summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul J. Davis <paul.joseph.davis@gmail.com>2018-10-25 16:58:48 -0500
committerPaul J. Davis <paul.joseph.davis@gmail.com>2019-01-18 13:03:28 -0600
commit05678b93d560bceb63a3e19350d3e068cac70dbf (patch)
tree33d302b5ed35c9db52aec32cfbb3d0d3c9ed973e
parent7bbe8286e3720247e7bea71c7780432d62bdcb60 (diff)
downloadcouchdb-05678b93d560bceb63a3e19350d3e068cac70dbf.tar.gz
Implement configurable hash functions
This provides the capability for features to specify alternative hash functions for placing documents in a given shard range. While the functionality exists with this implementation it is not yet actually used.
-rw-r--r--src/mem3/src/mem3.erl8
-rw-r--r--src/mem3/src/mem3_hash.erl73
-rw-r--r--src/mem3/src/mem3_shards.erl4
-rw-r--r--src/mem3/src/mem3_util.erl21
-rw-r--r--src/mem3/test/mem3_hash_test.erl23
-rw-r--r--src/mem3/test/mem3_util_test.erl8
6 files changed, 116 insertions, 21 deletions
diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
index f1af0f796..832c88d54 100644
--- a/src/mem3/src/mem3.erl
+++ b/src/mem3/src/mem3.erl
@@ -237,15 +237,15 @@ dbname(_) ->
%% @doc Determine if DocId belongs in shard (identified by record or filename)
belongs(#shard{}=Shard, DocId) when is_binary(DocId) ->
[Begin, End] = range(Shard),
- belongs(Begin, End, DocId);
+ belongs(Begin, End, Shard, DocId);
belongs(<<"shards/", _/binary>> = ShardName, DocId) when is_binary(DocId) ->
[Begin, End] = range(ShardName),
- belongs(Begin, End, DocId);
+ belongs(Begin, End, ShardName, DocId);
belongs(DbName, DocId) when is_binary(DbName), is_binary(DocId) ->
true.
-belongs(Begin, End, DocId) ->
- HashKey = mem3_util:hash(DocId),
+belongs(Begin, End, Shard, DocId) ->
+ HashKey = mem3_hash:calculate(Shard, DocId),
Begin =< HashKey andalso HashKey =< End.
range(#shard{range = Range}) ->
diff --git a/src/mem3/src/mem3_hash.erl b/src/mem3/src/mem3_hash.erl
new file mode 100644
index 000000000..665c61cb1
--- /dev/null
+++ b/src/mem3/src/mem3_hash.erl
@@ -0,0 +1,73 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(mem3_hash).
+
+-export([
+ calculate/2,
+
+ get_hash_fun/1,
+
+ crc32/1
+]).
+
+
+-include_lib("mem3/include/mem3.hrl").
+
+
+calculate(#shard{opts = Opts}, DocId) ->
+ Props = couch_util:get_value(props, Opts, []),
+ MFA = get_hash_fun_int(Props),
+ calculate(MFA, DocId);
+
+calculate(#ordered_shard{opts = Opts}, DocId) ->
+ Props = couch_util:get_value(props, Opts, []),
+ MFA = get_hash_fun_int(Props),
+ calculate(MFA, DocId);
+
+calculate(DbName, DocId) when is_binary(DbName) ->
+ MFA = get_hash_fun(DbName),
+ calculate(MFA, DocId);
+
+calculate({Mod, Fun, Args}, DocId) ->
+ erlang:apply(Mod, Fun, [DocId | Args]).
+
+
+get_hash_fun(#shard{opts = Opts}) ->
+ get_hash_fun_int(Opts);
+
+get_hash_fun(#ordered_shard{opts = Opts}) ->
+ get_hash_fun_int(Opts);
+
+get_hash_fun(DbName0) when is_binary(DbName0) ->
+ DbName = mem3:dbname(DbName0),
+ try
+ [#shard{opts=Opts} | _] = mem3_shards:for_db(DbName),
+ get_hash_fun_int(couch_util:get_value(props, Opts, []))
+ catch error:database_does_not_exist ->
+ {?MODULE, crc32, []}
+ end.
+
+
+crc32(Item) when is_binary(Item) ->
+ erlang:crc32(Item);
+crc32(Item) ->
+ erlang:crc32(term_to_binary(Item)).
+
+
+get_hash_fun_int(Opts) when is_list(Opts) ->
+ case lists:keyfind(hash, 1, Opts) of
+ {hash, [Mod, Fun, Args]} ->
+ {Mod, Fun, Args};
+ _ ->
+ {?MODULE, crc32, []}
+ end.
diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl
index 183f28fef..6afc22f57 100644
--- a/src/mem3/src/mem3_shards.erl
+++ b/src/mem3/src/mem3_shards.erl
@@ -67,7 +67,7 @@ for_docid(DbName, DocId) ->
for_docid(DbName, DocId, []).
for_docid(DbName, DocId, Options) ->
- HashKey = mem3_util:hash(DocId),
+ HashKey = mem3_hash:calculate(DbName, DocId),
ShardHead = #shard{
dbname = DbName,
range = ['$1', '$2'],
@@ -397,7 +397,7 @@ load_shards_from_db(ShardDb, DbName) ->
load_shards_from_disk(DbName, DocId)->
Shards = load_shards_from_disk(DbName),
- HashKey = mem3_util:hash(DocId),
+ HashKey = mem3_hash:calculate(hd(Shards), DocId),
[S || S <- Shards, in_range(S, HashKey)].
in_range(Shard, HashKey) ->
diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl
index 927607aff..b44ca2332 100644
--- a/src/mem3/src/mem3_util.erl
+++ b/src/mem3/src/mem3_util.erl
@@ -12,7 +12,7 @@
-module(mem3_util).
--export([hash/1, name_shard/2, create_partition_map/5, build_shards/2,
+-export([name_shard/2, create_partition_map/5, build_shards/2,
n_val/2, q_val/1, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1,
shard_info/1, ensure_exists/1, open_db_doc/1]).
-export([is_deleted/1, rotate_list/2]).
@@ -32,10 +32,6 @@
-include_lib("mem3/include/mem3.hrl").
-include_lib("couch/include/couch_db.hrl").
-hash(Item) when is_binary(Item) ->
- erlang:crc32(Item);
-hash(Item) ->
- erlang:crc32(term_to_binary(Item)).
name_shard(Shard) ->
name_shard(Shard, "").
@@ -165,7 +161,7 @@ build_shards_by_node(DbName, DocProps) ->
dbname = DbName,
node = to_atom(Node),
range = [Beg, End],
- opts = get_engine_opt(DocProps)
+ opts = get_shard_opts(DocProps)
}, Suffix)
end, Ranges)
end, ByNode).
@@ -183,7 +179,7 @@ build_shards_by_range(DbName, DocProps) ->
node = to_atom(Node),
range = [Beg, End],
order = Order,
- opts = get_engine_opt(DocProps)
+ opts = get_shard_opts(DocProps)
}, Suffix)
end, lists:zip(Nodes, lists:seq(1, length(Nodes))))
end, ByRange).
@@ -200,6 +196,9 @@ to_integer(N) when is_binary(N) ->
to_integer(N) when is_list(N) ->
list_to_integer(N).
+get_shard_opts(DocProps) ->
+ get_engine_opt(DocProps) ++ get_props_opt(DocProps).
+
get_engine_opt(DocProps) ->
case couch_util:get_value(<<"engine">>, DocProps) of
Engine when is_binary(Engine) ->
@@ -208,6 +207,14 @@ get_engine_opt(DocProps) ->
[]
end.
+get_props_opt(DocProps) ->
+ case couch_util:get_value(<<"props">>, DocProps) of
+ {Props} when is_list(Props) ->
+ [{props, Props}];
+ _ ->
+ []
+ end.
+
n_val(undefined, NodeCount) ->
n_val(config:get("cluster", "n", "3"), NodeCount);
n_val(N, NodeCount) when is_list(N) ->
diff --git a/src/mem3/test/mem3_hash_test.erl b/src/mem3/test/mem3_hash_test.erl
new file mode 100644
index 000000000..7a40c5366
--- /dev/null
+++ b/src/mem3/test/mem3_hash_test.erl
@@ -0,0 +1,23 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(mem3_hash_test).
+
+-include_lib("eunit/include/eunit.hrl").
+
+hash_test() ->
+ ?assertEqual(1624516141,mem3_hash:crc32(0)),
+ ?assertEqual(3816901808,mem3_hash:crc32("0")),
+ ?assertEqual(3523407757,mem3_hash:crc32(<<0>>)),
+ ?assertEqual(4108050209,mem3_hash:crc32(<<"0">>)),
+ ?assertEqual(3094724072,mem3_hash:crc32(zero)),
+ ok.
diff --git a/src/mem3/test/mem3_util_test.erl b/src/mem3/test/mem3_util_test.erl
index 214217ec4..8b74c4b2b 100644
--- a/src/mem3/test/mem3_util_test.erl
+++ b/src/mem3/test/mem3_util_test.erl
@@ -15,14 +15,6 @@
-include("mem3.hrl").
-include_lib("eunit/include/eunit.hrl").
-hash_test() ->
- ?assertEqual(1624516141,mem3_util:hash(0)),
- ?assertEqual(3816901808,mem3_util:hash("0")),
- ?assertEqual(3523407757,mem3_util:hash(<<0>>)),
- ?assertEqual(4108050209,mem3_util:hash(<<"0">>)),
- ?assertEqual(3094724072,mem3_util:hash(zero)),
- ok.
-
name_shard_test() ->
Shard1 = #shard{},
?assertError(function_clause, mem3_util:name_shard(Shard1, ".1234")),