summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul J. Davis <paul.joseph.davis@gmail.com>2020-07-24 10:59:05 -0500
committerPaul J. Davis <paul.joseph.davis@gmail.com>2020-09-30 10:08:44 -0500
commitb91f193563c9b3dadd4f8de4c49de9cbf4304837 (patch)
treef7800fc82b4285ace19ee371b50b9b37b2f73476
parentddae72a707ce51c229da9188275f18664302cba7 (diff)
downloadcouchdb-b91f193563c9b3dadd4f8de4c49de9cbf4304837.tar.gz
Views on ebtree
-rw-r--r--rel/overlay/etc/default.ini6
-rw-r--r--src/couch_views/include/couch_views.hrl5
-rw-r--r--src/couch_views/src/couch_views.erl55
-rw-r--r--src/couch_views/src/couch_views_fdb.erl331
-rw-r--r--src/couch_views/src/couch_views_indexer.erl54
-rw-r--r--src/couch_views/src/couch_views_reader.erl115
-rw-r--r--src/couch_views/src/couch_views_trees.erl429
-rw-r--r--src/couch_views/src/couch_views_updater.erl13
-rw-r--r--src/couch_views/src/couch_views_util.erl35
-rw-r--r--src/couch_views/test/couch_views_cleanup_test.erl2
-rw-r--r--src/couch_views/test/couch_views_indexer_test.erl64
-rw-r--r--src/couch_views/test/couch_views_size_test.erl25
-rw-r--r--src/couch_views/test/couch_views_updater_test.erl4
-rw-r--r--src/mango/src/mango_cursor_view.erl14
-rw-r--r--src/mango/src/mango_idx_view.erl7
-rw-r--r--src/mango/src/mango_idx_view.hrl13
16 files changed, 687 insertions, 485 deletions
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index abcf0bda2..3a377c7c8 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -337,6 +337,12 @@ iterations = 10 ; iterations for password hashing
; The maximum allowed value size emitted from a view for a document (in bytes)
;value_size_limit = 64000
;
+; The maximum size of B+Tree nodes used by the id btree
+;id_btree_node_size = 100
+;
+; The maximum size of B+Tree nodes used by view btrees
+;view_btree_node_size = 100
+;
; Batch size sensing parameters
; batch_initial_size = 100 ; Initial batch size in number of documents
; batch_search_increment = 500 ; Size change when searching for the threshold
diff --git a/src/couch_views/include/couch_views.hrl b/src/couch_views/include/couch_views.hrl
index 3d0110f65..388219118 100644
--- a/src/couch_views/include/couch_views.hrl
+++ b/src/couch_views/include/couch_views.hrl
@@ -13,6 +13,7 @@
% Index info/data subspaces
-define(VIEW_INFO, 0).
-define(VIEW_DATA, 1).
+-define(VIEW_TREES, 3).
% Index info keys
-define(VIEW_UPDATE_SEQ, 0).
@@ -25,6 +26,10 @@
-define(VIEW_ID_RANGE, 0).
-define(VIEW_MAP_RANGE, 1).
+% Tree keys
+-define(VIEW_ID_TREE, 0).
+-define(VIEW_ROW_TREES, 1).
+
% jobs api
-define(INDEX_JOB_TYPE, <<"views">>).
diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl
index d9ba0c16b..da8a142f9 100644
--- a/src/couch_views/src/couch_views.erl
+++ b/src/couch_views/src/couch_views.erl
@@ -48,11 +48,7 @@ query(Db, DDoc, ViewName, Callback, Acc0, Args0) ->
Args1 = to_mrargs(Args0),
Args2 = couch_mrview_util:set_view_type(Args1, ViewName, Views),
Args3 = couch_mrview_util:validate_args(Args2),
- ok = check_range(Args3),
- case is_reduce_view(Args3) of
- true -> throw(not_implemented);
- false -> ok
- end,
+ ok = check_range(Mrst, ViewName, Args3),
try
fabric2_fdb:transactional(Db, fun(TxDb) ->
@@ -100,9 +96,10 @@ get_info(Db, DDoc) ->
{ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
Sig = fabric2_util:to_hex(Mrst#mrst.sig),
{UpdateSeq, DataSize, Status} = fabric2_fdb:transactional(Db, fun(TxDb) ->
- Seq = couch_views_fdb:get_update_seq(TxDb, Mrst),
- DataSize = get_total_view_size(TxDb, Mrst),
- JobStatus = case couch_views_jobs:job_state(TxDb, Mrst) of
+ Mrst1 = couch_views_trees:open(TxDb, Mrst),
+ Seq = couch_views_fdb:get_update_seq(TxDb, Mrst1),
+ DataSize = get_total_view_size(TxDb, Mrst1),
+ JobStatus = case couch_views_jobs:job_state(TxDb, Mrst1) of
{ok, pending} -> true;
{ok, running} -> true;
{ok, finished} -> false;
@@ -124,10 +121,9 @@ get_info(Db, DDoc) ->
get_total_view_size(TxDb, Mrst) ->
- ViewIds = [View#mrview.id_num || View <- Mrst#mrst.views],
- lists:foldl(fun (ViewId, Total) ->
- Total + couch_views_fdb:get_kv_size(TxDb, Mrst, ViewId)
- end, 0, ViewIds).
+ lists:foldl(fun(View, Total) ->
+ Total + couch_views_trees:get_kv_size(TxDb, View)
+ end, 0, Mrst#mrst.views).
read_view(Db, Mrst, ViewName, Callback, Acc0, Args) ->
@@ -185,16 +181,29 @@ to_mrargs(#{} = Args) ->
end, #mrargs{}, Args).
-check_range(#mrargs{start_key = undefined}) ->
+check_range(Mrst, ViewName, Args) ->
+ #mrst{
+ language = Lang,
+ views = Views
+ } = Mrst,
+ View = case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of
+ {map, V, _} -> V;
+ {red, {_, _, V}, _} -> V
+ end,
+ Cmp = couch_views_util:collate_fun(View),
+ check_range(Args, Cmp).
+
+
+check_range(#mrargs{start_key = undefined}, _Cmp) ->
ok;
-check_range(#mrargs{end_key = undefined}) ->
+check_range(#mrargs{end_key = undefined}, _Cmp) ->
ok;
-check_range(#mrargs{start_key = K, end_key = K}) ->
+check_range(#mrargs{start_key = K, end_key = K}, _Cmp) ->
ok;
-check_range(Args) ->
+check_range(Args, Cmp) ->
#mrargs{
direction = Dir,
start_key = SK,
@@ -203,10 +212,10 @@ check_range(Args) ->
end_key_docid = EKD
} = Args,
- case {Dir, view_cmp(SK, SKD, EK, EKD)} of
- {fwd, false} ->
+ case {Dir, Cmp({SK, SKD}, {EK, EKD})} of
+ {fwd, gt} ->
throw(check_range_error(<<"true">>));
- {rev, true} ->
+ {rev, lt} ->
throw(check_range_error(<<"false">>));
_ ->
ok
@@ -220,14 +229,6 @@ check_range_error(Descending) ->
Descending/binary>>}.
-view_cmp(SK, SKD, EK, EKD) ->
- BinSK = couch_views_encoding:encode(SK, key),
- BinEK = couch_views_encoding:encode(EK, key),
- PackedSK = erlfdb_tuple:pack({BinSK, SKD}),
- PackedEK = erlfdb_tuple:pack({BinEK, EKD}),
- PackedSK =< PackedEK.
-
-
get_update_options(#mrst{design_opts = Opts}) ->
IncDesign = couch_util:get_value(<<"include_design">>, Opts, false),
LocalSeq = couch_util:get_value(<<"local_seq">>, Opts, false),
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl
index c95722230..e813f2b61 100644
--- a/src/couch_views/src/couch_views_fdb.erl
+++ b/src/couch_views/src/couch_views_fdb.erl
@@ -22,15 +22,10 @@
get_update_seq/2,
set_update_seq/3,
- get_row_count/3,
- get_kv_size/3,
-
- fold_map_idx/6,
-
- write_doc/4,
-
list_signatures/1,
- clear_index/2
+ clear_index/2,
+
+ persist_chunks/3
]).
-ifdef(TEST).
@@ -38,10 +33,6 @@
-compile(nowarn_export_all).
-endif.
--define(LIST_VALUE, 0).
--define(JSON_VALUE, 1).
--define(VALUE, 2).
-
-include("couch_views.hrl").
-include_lib("couch_mrview/include/couch_mrview.hrl").
@@ -126,94 +117,6 @@ set_update_seq(TxDb, Sig, Seq) ->
ok = erlfdb:set(Tx, seq_key(DbPrefix, Sig), Seq).
-get_row_count(TxDb, #mrst{sig = Sig}, ViewId) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- case erlfdb:wait(erlfdb:get(Tx, row_count_key(DbPrefix, Sig, ViewId))) of
- not_found -> 0; % Can this happen?
- CountBin -> ?bin2uint(CountBin)
- end.
-
-
-get_kv_size(TxDb, #mrst{sig = Sig}, ViewId) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- case erlfdb:wait(erlfdb:get(Tx, kv_size_key(DbPrefix, Sig, ViewId))) of
- not_found -> 0; % Can this happen?
- SizeBin -> ?bin2uint(SizeBin)
- end.
-
-
-fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) ->
- #{
- db_prefix := DbPrefix
- } = TxDb,
-
- MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
- FoldAcc = #{
- prefix => MapIdxPrefix,
- callback => Callback,
- acc => Acc0
- },
- Fun = aegis:wrap_fold_fun(TxDb, fun fold_fwd/2),
-
- #{
- acc := Acc1
- } = fabric2_fdb:fold_range(TxDb, MapIdxPrefix, Fun, FoldAcc, Options),
-
- Acc1.
-
-
-write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) ->
- #{
- id := DocId
- } = Doc,
-
- ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
-
- clear_id_idx(TxDb, Sig, DocId),
- lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) ->
- clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys),
- update_row_count(TxDb, Sig, ViewId, -TotalKeys),
- update_kv_size(TxDb, Sig, ViewId, -TotalSize)
- end, ExistingViewKeys);
-
-write_doc(TxDb, Sig, ViewIds, Doc) ->
- #{
- id := DocId,
- results := Results,
- kv_sizes := KVSizes
- } = Doc,
-
- ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
-
- clear_id_idx(TxDb, Sig, DocId),
-
- lists:foreach(fun({ViewId, NewRows, KVSize}) ->
- update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize),
-
- ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of
- {ViewId, TotalRows, TotalSize, EKeys} ->
- RowChange = length(NewRows) - TotalRows,
- update_row_count(TxDb, Sig, ViewId, RowChange),
- update_kv_size(TxDb, Sig, ViewId, KVSize - TotalSize),
- EKeys;
- false ->
- RowChange = length(NewRows),
- update_row_count(TxDb, Sig, ViewId, RowChange),
- update_kv_size(TxDb, Sig, ViewId, KVSize),
- []
- end,
- update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows)
- end, lists:zip3(ViewIds, Results, KVSizes)).
-
-
list_signatures(Db) ->
#{
db_prefix := DbPrefix
@@ -244,145 +147,38 @@ clear_index(Db, Signature) ->
end, Keys),
% Clear index data
- RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
- RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix),
- erlfdb:clear_range_startswith(Tx, RangePrefix).
-
-
-% For each row in a map view we store the the key/value
-% in FoundationDB:
-%
-% `(EncodedSortKey, (EncodedKey, EncodedValue))`
-%
-% The difference between `EncodedSortKey` and `EndcodedKey` is
-% the use of `couch_util:get_sort_key/1` which turns UTF-8
-% strings into binaries that are byte comparable. Given a sort
-% key binary we cannot recover the input so to return unmodified
-% user data we are forced to store the original.
-
-fold_fwd({RowKey, PackedKeyValue}, Acc) ->
- #{
- prefix := Prefix,
- callback := UserCallback,
- acc := UserAcc0
- } = Acc,
-
- {{_SortKey, DocId}, _DupeId} =
- erlfdb_tuple:unpack(RowKey, Prefix),
-
- {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue),
- Value = couch_views_encoding:decode(EncodedValue),
- Key = couch_views_encoding:decode(EncodedOriginalKey),
-
- UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0),
-
- Acc#{
- acc := UserAcc1
- }.
-
-
-clear_id_idx(TxDb, Sig, DocId) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
- ok = erlfdb:clear_range(Tx, Start, End).
-
-
-clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
+ DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature},
+ DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix),
+ erlfdb:clear_range_startswith(Tx, DataPrefix),
- lists:foreach(fun(ViewKey) ->
- {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId),
- ok = erlfdb:clear_range(Tx, Start, End)
- end, ViewKeys).
+ % Clear tree data
+ TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature},
+ TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix),
+ erlfdb:clear_range_startswith(Tx, TreePrefix).
-update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
- Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
- ok = erlfdb:clear(Tx, Key);
-
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- Unique = lists:usort([K || {K, _V} <- NewRows]),
-
- Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
- Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
- ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)).
-
-
-update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- lists:foreach(fun(RemKey) ->
- {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
- ok = erlfdb:clear_range(Tx, Start, End)
- end, ExistingKeys),
-
- KVsToAdd = process_rows(NewRows),
- MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
-
- lists:foreach(fun({DupeId, Key1, Key2, EV}) ->
- KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId),
- Val = erlfdb_tuple:pack({Key2, EV}),
- ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val))
- end, KVsToAdd).
-
-
-get_view_keys(TxDb, Sig, DocId) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
- {Start, End} = id_idx_range(DbPrefix, Sig, DocId),
- lists:map(fun({K, V}) ->
- {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} =
- erlfdb_tuple:unpack(K, DbPrefix),
- [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V),
- {ViewId, TotalKeys, TotalSize, UniqueKeys}
- end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))).
-
-
-update_row_count(TxDb, Sig, ViewId, Increment) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
- Key = row_count_key(DbPrefix, Sig, ViewId),
- erlfdb:add(Tx, Key, Increment).
+persist_chunks(Tx, set, [Key, Value]) ->
+ Chunks = fabric2_fdb:chunkify_binary(Value),
+ LastId = lists:foldl(fun(Chunk, Id) ->
+ ChunkKey = erlfdb_tuple:pack({Id}, Key),
+ erlfdb:set(Tx, ChunkKey, Chunk),
+ Id + 1
+ end, 0, Chunks),
+ % We update nodes in place, so its possible that
+ % a node shrank. This clears any keys that we haven't
+ % just overwritten for the provided key.
+ LastIdKey = erlfdb_tuple:pack({LastId}, Key),
+ EndRange = <<Key/binary, 16#FF>>,
+ erlfdb:clear_range(Tx, LastIdKey, EndRange);
-update_kv_size(TxDb, Sig, ViewId, Increment) ->
- #{
- tx := Tx,
- db_prefix := DbPrefix
- } = TxDb,
-
- % Track a view specific size for calls to
- % GET /dbname/_design/doc/_info`
- IdxKey = kv_size_key(DbPrefix, Sig, ViewId),
- erlfdb:add(Tx, IdxKey, Increment),
+persist_chunks(Tx, get, Key) ->
+ Rows = erlfdb:get_range_startswith(Tx, Key),
+ Values = [V || {_K, V} <- Rows],
+ iolist_to_binary(Values);
- % Track a database level rollup for calls to
- % GET /dbname
- DbKey = db_kv_size_key(DbPrefix),
- erlfdb:add(Tx, DbKey, Increment).
+persist_chunks(Tx, clear, Key) ->
+ erlfdb:clear_range_startswith(Tx, Key).
seq_key(DbPrefix, Sig) ->
@@ -390,54 +186,6 @@ seq_key(DbPrefix, Sig) ->
erlfdb_tuple:pack(Key, DbPrefix).
-row_count_key(DbPrefix, Sig, ViewId) ->
- Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Sig, ViewId},
- erlfdb_tuple:pack(Key, DbPrefix).
-
-
-kv_size_key(DbPrefix, Sig, ViewId) ->
- Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig, ViewId},
- erlfdb_tuple:pack(Key, DbPrefix).
-
-
-db_kv_size_key(DbPrefix) ->
- Key = {?DB_STATS, <<"sizes">>, <<"views">>},
- erlfdb_tuple:pack(Key, DbPrefix).
-
-
-id_idx_key(DbPrefix, Sig, DocId, ViewId) ->
- Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId},
- erlfdb_tuple:pack(Key, DbPrefix).
-
-
-id_idx_range(DbPrefix, Sig, DocId) ->
- Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId},
- erlfdb_tuple:range(Key, DbPrefix).
-
-
-map_idx_prefix(DbPrefix, Sig, ViewId) ->
- Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_MAP_RANGE, ViewId},
- erlfdb_tuple:pack(Key, DbPrefix).
-
-
-map_idx_key(MapIdxPrefix, MapKey, DupeId) ->
- Key = {MapKey, DupeId},
- erlfdb_tuple:pack(Key, MapIdxPrefix).
-
-
-map_idx_range(DbPrefix, Sig, ViewId, MapKey, DocId) ->
- Encoded = couch_views_encoding:encode(MapKey, key),
- Key = {
- ?DB_VIEWS,
- ?VIEW_DATA,
- Sig,
- ?VIEW_MAP_RANGE,
- ViewId,
- {Encoded, DocId}
- },
- erlfdb_tuple:range(Key, DbPrefix).
-
-
creation_vs_key(Db, Sig) ->
#{
db_prefix := DbPrefix
@@ -452,24 +200,3 @@ build_status_key(Db, Sig) ->
} = Db,
Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_BUILD_STATUS, Sig},
erlfdb_tuple:pack(Key, DbPrefix).
-
-
-process_rows(Rows) ->
- Encoded = lists:map(fun({K, V}) ->
- EK1 = couch_views_encoding:encode(K, key),
- EK2 = couch_views_encoding:encode(K, value),
- EV = couch_views_encoding:encode(V, value),
- {EK1, EK2, EV}
- end, Rows),
-
- Grouped = lists:foldl(fun({K1, K2, V}, Acc) ->
- dict:append(K1, {K2, V}, Acc)
- end, dict:new(), Encoded),
-
- dict:fold(fun(K1, Vals, DAcc) ->
- Vals1 = lists:keysort(2, Vals),
- {_, Labeled} = lists:foldl(fun({K2, V}, {Count, Acc}) ->
- {Count + 1, [{Count, K1, K2, V} | Acc]}
- end, {0, []}, Vals1),
- Labeled ++ DAcc
- end, [], Grouped).
diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl
index 17b0daab7..da2393999 100644
--- a/src/couch_views/src/couch_views_indexer.erl
+++ b/src/couch_views/src/couch_views_indexer.erl
@@ -110,6 +110,10 @@ init() ->
error:database_does_not_exist ->
fail_job(Job, Data, db_deleted, "Database was deleted");
Error:Reason ->
+ Stack = erlang:get_stacktrace(),
+ Fmt = "Error building view for ddoc ~s in ~s: ~p:~p ~p",
+ couch_log:error(Fmt, [DbName, DDocId, Error, Reason, Stack]),
+
NewRetry = Retries + 1,
RetryLimit = retry_limit(),
@@ -196,6 +200,7 @@ do_update(Db, Mrst0, State0) ->
tx := Tx
} = TxDb,
+ Mrst1 = couch_views_trees:open(TxDb, Mrst0),
State1 = get_update_start_state(TxDb, Mrst0, State0),
{ok, State2} = fold_changes(State1),
@@ -212,7 +217,7 @@ do_update(Db, Mrst0, State0) ->
DocAcc1 = fetch_docs(TxDb, DesignOpts, DocAcc),
- {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc1),
+ {Mrst2, MappedDocs} = map_docs(Mrst0, DocAcc1),
TotalKVs = write_docs(TxDb, Mrst1, MappedDocs, State2),
ChangesDone = ChangesDone0 + length(DocAcc),
@@ -225,14 +230,14 @@ do_update(Db, Mrst0, State0) ->
case Count < Limit of
true ->
- maybe_set_build_status(TxDb, Mrst1, ViewVS,
+ maybe_set_build_status(TxDb, Mrst2, ViewVS,
?INDEX_READY),
report_progress(State2#{changes_done := ChangesDone},
finished),
- {Mrst1, finished};
+ {Mrst2, finished};
false ->
State3 = report_progress(State2, update),
- {Mrst1, State3#{
+ {Mrst2, State3#{
tx_db := undefined,
count := 0,
doc_acc := [],
@@ -339,7 +344,7 @@ map_docs(Mrst, Docs) ->
end, Docs),
Deleted1 = lists:map(fun(Doc) ->
- Doc#{results => []}
+ Doc#{results => [[] || _ <- Mrst1#mrst.views]}
end, Deleted0),
DocsToMap = lists:map(fun(Doc) ->
@@ -370,9 +375,8 @@ map_docs(Mrst, Docs) ->
{Mrst1, MappedDocs}.
-write_docs(TxDb, Mrst, Docs, State) ->
+write_docs(TxDb, Mrst, Docs0, State) ->
#mrst{
- views = Views,
sig = Sig
} = Mrst,
@@ -380,15 +384,15 @@ write_docs(TxDb, Mrst, Docs, State) ->
last_seq := LastSeq
} = State,
- ViewIds = [View#mrview.id_num || View <- Views],
KeyLimit = key_size_limit(),
ValLimit = value_size_limit(),
- TotalKVCount = lists:foldl(fun(Doc0, KVCount) ->
- Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit),
- couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1),
- KVCount + count_kvs(Doc1)
- end, 0, Docs),
+ {Docs1, TotalKVCount} = lists:mapfoldl(fun(Doc0, KVCount) ->
+ Doc1 = check_kv_size_limit(Mrst, Doc0, KeyLimit, ValLimit),
+ {Doc1, KVCount + count_kvs(Doc1)}
+ end, 0, Docs0),
+
+ couch_views_trees:update_views(TxDb, Mrst, Docs1),
if LastSeq == false -> ok; true ->
couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq)
@@ -479,7 +483,7 @@ start_query_server(#mrst{} = Mrst) ->
Mrst.
-calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) ->
+check_kv_size_limit(Mrst, Doc, KeyLimit, ValLimit) ->
#mrst{
db_name = DbName,
idx_name = IdxName
@@ -488,10 +492,10 @@ calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) ->
results := Results
} = Doc,
try
- KVSizes = lists:map(fun(ViewRows) ->
- lists:foldl(fun({K, V}, Acc) ->
- KeySize = erlang:external_size(K),
- ValSize = erlang:external_size(V),
+ lists:foreach(fun(ViewRows) ->
+ lists:foreach(fun({K, V}) ->
+ KeySize = couch_ejson_size:encoded_size(K),
+ ValSize = couch_ejson_size:encoded_size(V),
if KeySize =< KeyLimit -> ok; true ->
throw({size_error, key})
@@ -499,18 +503,20 @@ calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) ->
if ValSize =< ValLimit -> ok; true ->
throw({size_error, value})
- end,
-
- Acc + KeySize + ValSize
- end, 0, ViewRows)
+ end
+ end, ViewRows)
end, Results),
- Doc#{kv_sizes => KVSizes}
+ Doc
catch throw:{size_error, Type} ->
#{id := DocId} = Doc,
Fmt = "View ~s size error for docid `~s`, excluded from indexing "
"in db `~s` for design doc `~s`",
couch_log:error(Fmt, [Type, DocId, DbName, IdxName]),
- Doc#{deleted := true, results := [], kv_sizes => []}
+ Doc#{
+ deleted := true,
+ results := [[] || _ <- Mrst#mrst.views],
+ kv_sizes => []
+ }
end.
diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl
index 61a78d7f8..a785c7b35 100644
--- a/src/couch_views/src/couch_views_reader.erl
+++ b/src/couch_views/src/couch_views_reader.erl
@@ -23,24 +23,24 @@
-include_lib("fabric/include/fabric2.hrl").
-read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) ->
- #mrst{
- language = Lang,
- sig = Sig,
- views = Views
- } = Mrst,
-
- ViewId = get_view_id(Lang, Args, ViewName, Views),
- Fun = fun handle_row/4,
-
+read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) ->
try
fabric2_fdb:transactional(Db, fun(TxDb) ->
- Meta = get_meta(TxDb, Mrst, ViewId, Args),
+ #mrst{
+ language = Lang,
+ views = Views
+ } = Mrst = couch_views_trees:open(TxDb, Mrst0),
+
+ View = get_map_view(Lang, Args, ViewName, Views),
+ Fun = fun handle_map_row/4,
+
+ Meta = get_map_meta(TxDb, Mrst, View, Args),
UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)),
Acc0 = #{
db => TxDb,
skip => Args#mrargs.skip,
+ limit => Args#mrargs.limit,
mrargs => undefined,
callback => UserCallback,
acc => UserAcc1
@@ -51,14 +51,7 @@ read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) ->
KeyAcc1 = KeyAcc0#{
mrargs := KeyArgs
},
- couch_views_fdb:fold_map_idx(
- TxDb,
- Sig,
- ViewId,
- Opts,
- Fun,
- KeyAcc1
- )
+ couch_views_trees:fold_map_idx(TxDb, View, Opts, Fun, KeyAcc1)
end, Acc0, expand_keys_args(Args)),
#{
@@ -66,27 +59,35 @@ read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) ->
} = Acc1,
{ok, maybe_stop(UserCallback(complete, UserAcc2))}
end)
- catch throw:{done, Out} ->
- {ok, Out}
+ catch
+ throw:{complete, Out} ->
+ {_, Final} = UserCallback(complete, Out),
+ {ok, Final};
+ throw:{done, Out} ->
+ {ok, Out}
end.
-get_meta(TxDb, Mrst, ViewId, #mrargs{update_seq = true}) ->
- TotalRows = couch_views_fdb:get_row_count(TxDb, Mrst, ViewId),
+get_map_meta(TxDb, Mrst, View, #mrargs{update_seq = true}) ->
+ TotalRows = couch_views_trees:get_row_count(TxDb, View),
ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst),
{meta, [{update_seq, ViewSeq}, {total, TotalRows}, {offset, null}]};
-get_meta(TxDb, Mrst, ViewId, #mrargs{}) ->
- TotalRows = couch_views_fdb:get_row_count(TxDb, Mrst, ViewId),
+get_map_meta(TxDb, _Mrst, View, #mrargs{}) ->
+ TotalRows = couch_views_trees:get_row_count(TxDb, View),
{meta, [{total, TotalRows}, {offset, null}]}.
-handle_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 ->
+handle_map_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 ->
Acc#{skip := Skip - 1};
-handle_row(DocId, Key, Value, Acc) ->
+handle_map_row(_DocID, _Key, _Value, #{limit := 0, acc := UserAcc}) ->
+ throw({complete, UserAcc});
+
+handle_map_row(DocId, Key, Value, Acc) ->
#{
db := TxDb,
+ limit := Limit,
mrargs := Args,
callback := UserCallback,
acc := UserAcc0
@@ -111,13 +112,13 @@ handle_row(DocId, Key, Value, Acc) ->
end,
UserAcc1 = maybe_stop(UserCallback({row, Row}, UserAcc0)),
- Acc#{acc := UserAcc1}.
+ Acc#{limit := Limit - 1, acc := UserAcc1}.
-get_view_id(Lang, Args, ViewName, Views) ->
+get_map_view(Lang, Args, ViewName, Views) ->
case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of
- {map, View, _Args} -> View#mrview.id_num;
- {red, {_Idx, _Lang, View}} -> View#mrview.id_num
+ {map, View, _Args} -> View;
+ {red, {_Idx, _Lang, View}, _} -> View
end.
@@ -135,57 +136,33 @@ expand_keys_args(#mrargs{keys = Keys} = Args) ->
mrargs_to_fdb_options(Args) ->
#mrargs{
- start_key = StartKey0,
+ start_key = StartKey,
start_key_docid = StartKeyDocId,
- end_key = EndKey0,
- end_key_docid = EndKeyDocId,
+ end_key = EndKey,
+ end_key_docid = EndKeyDocId0,
direction = Direction,
- limit = Limit,
- skip = Skip,
inclusive_end = InclusiveEnd
} = Args,
- StartKey1 = if StartKey0 == undefined -> undefined; true ->
- couch_views_encoding:encode(StartKey0, key)
- end,
-
- StartKeyOpts = case {StartKey1, StartKeyDocId} of
- {undefined, _} ->
- [];
- {StartKey1, StartKeyDocId} ->
- [{start_key, {StartKey1, StartKeyDocId}}]
+ StartKeyOpts = if StartKey == undefined -> []; true ->
+ [{start_key, {StartKey, StartKeyDocId}}]
end,
- EndKey1 = if EndKey0 == undefined -> undefined; true ->
- couch_views_encoding:encode(EndKey0, key)
+ EndKeyDocId = case {Direction, EndKeyDocId0} of
+ {fwd, <<255>>} when InclusiveEnd -> <<255>>;
+ {fwd, <<255>>} when not InclusiveEnd -> <<>>;
+ {rev, <<>>} when InclusiveEnd -> <<>>;
+ {rev, <<>>} when not InclusiveEnd -> <<255>>;
+ _ -> EndKeyDocId0
end,
- EndKeyOpts = case {EndKey1, EndKeyDocId, Direction} of
- {undefined, _, _} ->
- [];
- {EndKey1, <<>>, rev} when not InclusiveEnd ->
- % When we iterate in reverse with
- % inclusive_end=false we have to set the
- % EndKeyDocId to <<255>> so that we don't
- % include matching rows.
- [{end_key_gt, {EndKey1, <<255>>}}];
- {EndKey1, <<255>>, _} when not InclusiveEnd ->
- % When inclusive_end=false we need to
- % elide the default end_key_docid so as
- % to not sort past the docids with the
- % given end key.
- [{end_key_gt, {EndKey1}}];
- {EndKey1, EndKeyDocId, _} when not InclusiveEnd ->
- [{end_key_gt, {EndKey1, EndKeyDocId}}];
- {EndKey1, EndKeyDocId, _} when InclusiveEnd ->
- [{end_key, {EndKey1, EndKeyDocId}}]
+ EndKeyOpts = if EndKey == undefined -> []; true ->
+ [{end_key, {EndKey, EndKeyDocId}}]
end,
[
{dir, Direction},
- {limit, Limit + Skip},
- {streaming_mode, want_all},
- {restart_tx, true}
+ {inclusive_end, InclusiveEnd}
] ++ StartKeyOpts ++ EndKeyOpts.
diff --git a/src/couch_views/src/couch_views_trees.erl b/src/couch_views/src/couch_views_trees.erl
new file mode 100644
index 000000000..0f680a623
--- /dev/null
+++ b/src/couch_views/src/couch_views_trees.erl
@@ -0,0 +1,429 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_trees).
+
+-export([
+ open/2,
+
+ get_row_count/2,
+ get_kv_size/2,
+
+ fold_map_idx/5,
+
+ update_views/3
+]).
+
+-ifdef(TEST).
+-compile(export_all).
+-compile(nowarn_export_all).
+-endif.
+
+
+-include("couch_views.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+
+
+open(TxDb, Mrst) ->
+ #mrst{
+ sig = Sig,
+ language = Lang,
+ views = Views
+ } = Mrst,
+ Mrst#mrst{
+ id_btree = open_id_tree(TxDb, Sig),
+ views = [open_view_tree(TxDb, Sig, Lang, V) || V <- Views]
+ }.
+
+
+get_row_count(TxDb, View) ->
+ #{
+ tx := Tx
+ } = TxDb,
+ {Count, _} = ebtree:full_reduce(Tx, View#mrview.btree),
+ Count.
+
+
+get_kv_size(TxDb, View) ->
+ #{
+ tx := Tx
+ } = TxDb,
+ {_, TotalSize} = ebtree:full_reduce(Tx, View#mrview.btree),
+ TotalSize.
+
+
+fold_map_idx(TxDb, View, Options, Callback, Acc0) ->
+ #{
+ tx := Tx
+ } = TxDb,
+ #mrview{
+ btree = Btree
+ } = View,
+
+ CollateFun = couch_views_util:collate_fun(View),
+
+ {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options),
+
+ Wrapper = fun(KVs0, WAcc) ->
+ % Remove any keys that match Start or End key
+ % depending on direction
+ KVs1 = case InclusiveEnd of
+ true ->
+ KVs0;
+ false when Dir == fwd ->
+ lists:filter(fun({K, _V}) ->
+ case CollateFun(K, EndKey) of
+ lt -> true;
+ eq -> false;
+ gt -> false
+ end
+ end, KVs0);
+ false when Dir == rev ->
+ lists:filter(fun({K, _V}) ->
+ case CollateFun(K, EndKey) of
+ lt -> false;
+ eq -> false;
+ gt -> true
+ end
+ end, KVs0)
+ end,
+ % Expand dups
+ KVs2 = lists:flatmap(fun({K, V}) ->
+ case V of
+ {dups, Dups} when Dir == fwd ->
+ [{K, D} || D <- Dups];
+ {dups, Dups} when Dir == rev ->
+ [{K, D} || D <- lists:reverse(Dups)];
+ _ ->
+ [{K, V}]
+ end
+ end, KVs1),
+ lists:foldl(fun({{Key, DocId}, Value}, WAccInner) ->
+ Callback(DocId, Key, Value, WAccInner)
+ end, WAcc, KVs2)
+ end,
+
+ case Dir of
+ fwd ->
+ ebtree:range(Tx, Btree, StartKey, EndKey, Wrapper, Acc0);
+ rev ->
+ % Start/End keys swapped on purpose because ebtree
+ ebtree:reverse_range(Tx, Btree, EndKey, StartKey, Wrapper, Acc0)
+ end.
+
+
+update_views(TxDb, Mrst, Docs) ->
+ #{
+ tx := Tx
+ } = TxDb,
+
+ % Collect update information
+ #{
+ ids := IdMap,
+ views := ViewMaps,
+ delete_ref := DeleteRef
+ } = gather_update_info(Tx, Mrst, Docs),
+
+ % Update the IdBtree
+ update_btree(Tx, Mrst#mrst.id_btree, IdMap, DeleteRef),
+
+ % Update each view's BTree
+ lists:foreach(fun(View) ->
+ #mrview{
+ id_num = ViewId,
+ btree = BTree
+ } = View,
+
+ ViewMap = maps:get(ViewId, ViewMaps, #{}),
+ update_btree(Tx, BTree, ViewMap, DeleteRef)
+ end, Mrst#mrst.views).
+
+
+open_id_tree(TxDb, Sig) ->
+ #{
+ tx := Tx,
+ db_prefix := DbPrefix
+ } = TxDb,
+ Prefix = id_tree_prefix(DbPrefix, Sig),
+ TreeOpts = [
+ {persist_fun, fun couch_views_fdb:persist_chunks/3},
+ {cache_fun, create_cache_fun(id_tree)}
+ ],
+ ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts).
+
+
+open_view_tree(TxDb, Sig, Lang, View) ->
+ #{
+ tx := Tx,
+ db_prefix := DbPrefix
+ } = TxDb,
+ #mrview{
+ id_num = ViewId
+ } = View,
+ Prefix = view_tree_prefix(DbPrefix, Sig, ViewId),
+ TreeOpts = [
+ {collate_fun, couch_views_util:collate_fun(View)},
+ {reduce_fun, make_reduce_fun(Lang, View)},
+ {persist_fun, fun couch_views_fdb:persist_chunks/3},
+ {cache_fun, create_cache_fun({view, ViewId})}
+ ],
+ View#mrview{
+ btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts)
+ }.
+
+
+get_order(id_btree) ->
+ min_order(config:get_integer("couch_views", "id_btree_node_size", 100));
+get_order(view_btree) ->
+ min_order(config:get_integer("couch_views", "view_btree_node_size", 100)).
+
+
+min_order(V) when is_integer(V), V < 2 ->
+ 2;
+min_order(V) when is_integer(V), V rem 2 == 0 ->
+ V;
+min_order(V) ->
+ V + 1.
+
+
+make_reduce_fun(_Lang, #mrview{}) ->
+ fun
+ (KVs, _ReReduce = false) ->
+ TotalSize = lists:foldl(fun({{K, _DocId}, V}, Acc) ->
+ KSize = couch_ejson_size:encoded_size(K),
+ VSize = case V of
+ {dups, Dups} ->
+ lists:foldl(fun(D, DAcc) ->
+ DAcc + couch_ejson_size:encoded_size(D)
+ end, 0, Dups);
+ _ ->
+ couch_ejson_size:encoded_size(V)
+ end,
+ KSize + VSize + Acc
+ end, 0, KVs),
+ {length(KVs), TotalSize};
+ (KRs, _ReReduce = true) ->
+ lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) ->
+ {Count + CountAcc, Size + SizeAcc}
+ end, {0, 0}, KRs)
+ end.
+
+
+create_cache_fun(TreeId) ->
+ CacheTid = case get(TreeId) of
+ undefined ->
+ Tid = ets:new(?MODULE, [protected, set]),
+ put(TreeId, {ebtree_cache, Tid}),
+ Tid;
+ {ebtree_cache, Tid} ->
+ Tid
+ end,
+ fun
+ (set, [Id, Node]) ->
+ true = ets:insert_new(CacheTid, {Id, Node}),
+ ok;
+ (clear, Id) ->
+ ets:delete(CacheTid, Id),
+ ok;
+ (get, Id) ->
+ case ets:lookup(CacheTid, Id) of
+ [{Id, Node}] -> Node;
+ [] -> undefined
+ end
+ end.
+
+
+to_map_opts(Options) ->
+ Dir = case lists:keyfind(dir, 1, Options) of
+ {dir, D} -> D;
+ _ -> fwd
+ end,
+
+ InclusiveEnd = case lists:keyfind(inclusive_end, 1, Options) of
+ {inclusive_end, IE} -> IE;
+ _ -> true
+ end,
+
+ StartKey = case lists:keyfind(start_key, 1, Options) of
+ {start_key, SK} -> SK;
+ false when Dir == fwd -> ebtree:min();
+ false when Dir == rev -> ebtree:max()
+ end,
+
+ EndKey = case lists:keyfind(end_key, 1, Options) of
+ {end_key, EK} -> EK;
+ false when Dir == fwd -> ebtree:max();
+ false when Dir == rev -> ebtree:min()
+ end,
+
+ {Dir, StartKey, EndKey, InclusiveEnd}.
+
+
+gather_update_info(Tx, Mrst, Docs) ->
+ % A special token used to indicate that the row should be deleted
+ DeleteRef = erlang:make_ref(),
+
+ AllDocIds = [DocId || #{id := DocId} <- Docs],
+
+ BaseIdMap = lists:foldl(fun(DocId, Acc) ->
+ maps:put(DocId, DeleteRef, Acc)
+ end, #{}, AllDocIds),
+
+ % Build the initial set of rows to delete
+ % ExistingViewKeys is a list of {DocId, [{ViewId, [Key | _]} | _]}
+ ExistingViewKeys = ebtree:lookup_multi(Tx, Mrst#mrst.id_btree, AllDocIds),
+
+ % For each view, create an initial map that contains the
+ % list of keys to delete. The final result is a map of
+ % maps:
+ % #{ViewId => #{Key => DeleteRef}}
+ BaseViewMaps = lists:foldl(fun({DocId, ViewIdKeys}, ViewIdAcc1) ->
+ lists:foldl(fun({ViewId, Keys}, ViewIdAcc2) ->
+ OldViewMap = maps:get(ViewId, ViewIdAcc2, #{}),
+ NewViewMap = lists:foldl(fun(Key, ViewMapAcc) ->
+ maps:put({Key, DocId}, DeleteRef, ViewMapAcc)
+ end, OldViewMap, Keys),
+ maps:put(ViewId, NewViewMap, ViewIdAcc2)
+ end, ViewIdAcc1, ViewIdKeys)
+ end, #{}, ExistingViewKeys),
+
+ % Build our base accumulator
+ InfoAcc1 = #{
+ ids => BaseIdMap,
+ views => BaseViewMaps,
+ delete_ref => DeleteRef
+ },
+
+ % Insert results from each document into the map of
+ % maps which leaves us with a final shape of:
+ % #{ViewId => #{Key => Value}}
+ % where Value may be a copy of `DeleteRef` which flags
+ % that the Key should be deleted from the view.
+ lists:foldl(fun(Doc, InfoAcc2) ->
+ insert_doc(Mrst, Doc, InfoAcc2)
+ end, InfoAcc1, Docs).
+
+
+insert_doc(_Mrst, #{deleted := true} = _Doc, InfoAcc) ->
+ InfoAcc;
+insert_doc(Mrst, Doc, InfoAcc0) ->
+ #{
+ id := DocId,
+ results := Results
+ } = Doc,
+
+ FinalAcc = lists:foldl(fun({View, RawNewRows}, {IdKeyAcc, InfoAcc1}) ->
+ #mrview{
+ id_num = ViewId
+ } = View,
+ #{
+ views := ViewMaps
+ } = InfoAcc1,
+
+ DedupedRows = dedupe_rows(View, RawNewRows),
+ IdKeys = lists:usort([K || {K, _V} <- DedupedRows]),
+
+ OldViewMap = maps:get(ViewId, ViewMaps, #{}),
+ NewViewMap = lists:foldl(fun({K, V}, ViewMapAcc) ->
+ maps:put({K, DocId}, V, ViewMapAcc)
+ end, OldViewMap, DedupedRows),
+
+ {[{ViewId, IdKeys} | IdKeyAcc], InfoAcc1#{
+ views := maps:put(ViewId, NewViewMap, ViewMaps)
+ }}
+ end, {[], InfoAcc0}, lists:zip(Mrst#mrst.views, Results)),
+
+ {IdRows, #{ids := IdMap} = InfoAcc2} = FinalAcc,
+
+ % Don't store a row in the id_btree if it hasn't got any
+ % keys that will need to be deleted.
+ NonEmptyRows = [1 || {_ViewId, Rows} <- IdRows, Rows /= []],
+ if length(NonEmptyRows) == 0 -> InfoAcc2; true ->
+ InfoAcc2#{ids := maps:put(DocId, IdRows, IdMap)}
+ end.
+
+
+update_btree(Tx, BTree, Map, DeleteRef) ->
+ {ToRemove, ToInsert} = maps:fold(fun(Key, Value, {Keys, Rows}) ->
+ case Value of
+ DeleteRef -> {[Key | Keys], Rows};
+ _ -> {Keys, [{Key, Value} | Rows]}
+ end
+ end, {[], []}, Map),
+
+ lists:foreach(fun(Key) ->
+ ebtree:delete(Tx, BTree, Key)
+ end, ToRemove),
+
+ ebtree:insert_multi(Tx, BTree, ToInsert).
+
+
+dedupe_rows(View, KVs0) ->
+ CollateFun = couch_views_util:collate_fun(View),
+ KVs1 = lists:sort(fun({KeyA, ValA}, {KeyB, ValB}) ->
+ case CollateFun({KeyA, <<>>}, {KeyB, <<>>}) of
+ lt -> true;
+ eq -> ValA =< ValB;
+ gt -> false
+ end
+ end, KVs0),
+ dedupe_rows_int(CollateFun, KVs1).
+
+
+dedupe_rows_int(_CollateFun, []) ->
+ [];
+
+dedupe_rows_int(_CollateFun, [KV]) ->
+ [KV];
+
+dedupe_rows_int(CollateFun, [{K1, V1} | RestKVs]) ->
+ RestDeduped = dedupe_rows_int(CollateFun, RestKVs),
+ case RestDeduped of
+ [{K2, V2} | RestRestDeduped] ->
+ case CollateFun({K1, <<>>}, {K2, <<>>}) of
+ eq -> [{K1, combine_vals(V1, V2)} | RestRestDeduped];
+ _ -> [{K1, V1} | RestDeduped]
+ end;
+ [] ->
+ [{K1, V1}]
+ end.
+
+
+combine_vals(V1, {dups, V2}) ->
+ {dups, [V1 | V2]};
+combine_vals(V1, V2) ->
+ {dups, [V1, V2]}.
+
+
+id_tree_prefix(DbPrefix, Sig) ->
+ Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ID_TREE},
+ erlfdb_tuple:pack(Key, DbPrefix).
+
+
+view_tree_prefix(DbPrefix, Sig, ViewId) ->
+ Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ROW_TREES, ViewId},
+ erlfdb_tuple:pack(Key, DbPrefix).
+
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+dedupe_basic_test() ->
+ View = #mrview{},
+ ?assertEqual([{1, 1}], dedupe_rows(View, [{1, 1}])).
+
+dedupe_simple_test() ->
+ View = #mrview{},
+ ?assertEqual([{1, {dups, [1, 2]}}], dedupe_rows(View, [{1, 1}, {1, 2}])).
+
+-endif.
diff --git a/src/couch_views/src/couch_views_updater.erl b/src/couch_views/src/couch_views_updater.erl
index ba9fadb51..7e5466eb8 100644
--- a/src/couch_views/src/couch_views_updater.erl
+++ b/src/couch_views/src/couch_views_updater.erl
@@ -87,16 +87,17 @@ write_doc(Db, #doc{deleted = Deleted} = Doc) ->
},
lists:foreach(fun(DDoc) ->
- {ok, Mrst} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc),
+ {ok, Mrst0} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc),
+ Mrst1 = couch_views_trees:open(Db, Mrst0),
- case should_index_doc(Doc, Mrst) of
+ case should_index_doc(Doc, Mrst1) of
true ->
- {Mrst1, Result1} = couch_views_indexer:map_docs(Mrst, Result0),
- DocNumber = couch_views_indexer:write_docs(Db, Mrst1,
+ {Mrst2, Result1} = couch_views_indexer:map_docs(Mrst1, Result0),
+ DocNumber = couch_views_indexer:write_docs(Db, Mrst2,
Result1, State),
- couch_views_plugin:after_interactive_write(Db, Mrst1,
+ couch_views_plugin:after_interactive_write(Db, Mrst2,
Result1, DocNumber),
- couch_eval:release_map_context(Mrst1#mrst.qserver);
+ couch_eval:release_map_context(Mrst2#mrst.qserver);
false ->
ok
end
diff --git a/src/couch_views/src/couch_views_util.erl b/src/couch_views/src/couch_views_util.erl
index 6298acf33..1e3e4beef 100644
--- a/src/couch_views/src/couch_views_util.erl
+++ b/src/couch_views/src/couch_views_util.erl
@@ -15,6 +15,7 @@
-export([
ddoc_to_mrst/2,
+ collate_fun/1,
validate_args/1,
validate_args/2,
is_paginated/1,
@@ -82,6 +83,40 @@ ddoc_to_mrst(DbName, #doc{id=Id, body={Fields}}) ->
{ok, IdxState#mrst{sig=couch_hash:md5_hash(term_to_binary(SigInfo))}}.
+collate_fun(View) ->
+ #mrview{
+ options = Options
+ } = View,
+ case couch_util:get_value(<<"collation">>, Options) of
+ <<"raw">> -> fun collate_raw/2;
+ _ -> fun collate_rows/2
+ end.
+
+
+collate_raw(A, A) -> eq;
+collate_raw(A, B) when A < B -> lt;
+collate_raw(A, B) when A > B -> gt.
+
+
+collate_rows({KeyA, DocIdA}, {KeyB, DocIdB}) ->
+ case couch_ejson_compare:less(KeyA, KeyB) of
+ N when N < 0 -> lt;
+ 0 when DocIdA < DocIdB -> lt;
+ 0 when DocIdA == DocIdB -> eq;
+ 0 -> gt; % when DocIdA > DocIdB
+ N when N > 0 -> gt
+ end;
+
+collate_rows(KeyA, KeyB) ->
+ % When collating reduce group keys they don't
+ % come with a docid.
+ case couch_ejson_compare:less(KeyA, KeyB) of
+ N when N < 0 -> lt;
+ 0 -> eq;
+ N when N > 0 -> gt
+ end.
+
+
validate_args(Args) ->
validate_args(Args, []).
diff --git a/src/couch_views/test/couch_views_cleanup_test.erl b/src/couch_views/test/couch_views_cleanup_test.erl
index e4dcdceea..54048c968 100644
--- a/src/couch_views/test/couch_views_cleanup_test.erl
+++ b/src/couch_views/test/couch_views_cleanup_test.erl
@@ -302,7 +302,7 @@ view_has_data(Db, DDoc) ->
SigKey = erlfdb_tuple:pack(SigKeyTuple, DbPrefix),
SigVal = erlfdb:wait(erlfdb:get(Tx, SigKey)),
- RangeKeyTuple = {?DB_VIEWS, ?VIEW_DATA, Sig},
+ RangeKeyTuple = {?DB_VIEWS, ?VIEW_TREES, Sig},
RangeKey = erlfdb_tuple:pack(RangeKeyTuple, DbPrefix),
Range = erlfdb:wait(erlfdb:get_range_startswith(Tx, RangeKey)),
diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl
index 86c0a8195..75be2459f 100644
--- a/src/couch_views/test/couch_views_indexer_test.erl
+++ b/src/couch_views/test/couch_views_indexer_test.erl
@@ -126,13 +126,12 @@ updated_docs_are_reindexed(Db) ->
% Check that our id index is updated properly
% as well.
DbName = fabric2_db:name(Db),
- {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
- Sig = Mrst#mrst.sig,
+ {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
fabric2_fdb:transactional(Db, fun(TxDb) ->
- ?assertMatch(
- [{0, 1, _, [1]}],
- couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>)
- )
+ #{tx := Tx} = TxDb,
+ Mrst1 = couch_views_trees:open(TxDb, Mrst0),
+ IdRow = ebtree:lookup(Tx, Mrst1#mrst.id_btree, <<"0">>),
+ ?assertEqual({<<"0">>, [{1, []}, {0, [1]}]}, IdRow)
end).
@@ -160,13 +159,12 @@ updated_docs_without_changes_are_reindexed(Db) ->
% Check fdb directly to make sure we've also
% removed the id idx keys properly.
DbName = fabric2_db:name(Db),
- {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
- Sig = Mrst#mrst.sig,
+ {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
fabric2_fdb:transactional(Db, fun(TxDb) ->
- ?assertMatch(
- [{0, 1, _, [0]}],
- couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>)
- )
+ #{tx := Tx} = TxDb,
+ Mrst1 = couch_views_trees:open(TxDb, Mrst0),
+ IdRow = ebtree:lookup(Tx, Mrst1#mrst.id_btree, <<"0">>),
+ ?assertEqual({<<"0">>, [{1, []}, {0, [0]}]}, IdRow)
end).
@@ -208,10 +206,12 @@ deleted_docs_are_unindexed(Db) ->
% Check fdb directly to make sure we've also
% removed the id idx keys properly.
DbName = fabric2_db:name(Db),
- {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
- Sig = Mrst#mrst.sig,
+ {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
fabric2_fdb:transactional(Db, fun(TxDb) ->
- ?assertEqual([], couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>))
+ #{tx := Tx} = TxDb,
+ Mrst1 = couch_views_trees:open(TxDb, Mrst0),
+ IdRow = ebtree:lookup(Tx, Mrst1#mrst.id_btree, <<"0">>),
+ ?assertEqual(false, IdRow)
end).
@@ -296,11 +296,9 @@ fewer_multipe_identical_keys_from_same_doc(Db) ->
handle_size_key_limits(Db) ->
ok = meck:new(config, [passthrough]),
- ok = meck:expect(config, get_integer, fun(Section, Key, Default) ->
- case Section == "couch_views" andalso Key == "key_size_limit" of
- true -> 15;
- _ -> Default
- end
+ ok = meck:expect(config, get_integer, fun
+ ("couch_views", "key_size_limit", _Default) -> 15;
+ (_Section, _Key, Default) -> Default
end),
DDoc = create_ddoc(multi_emit_key_limit),
@@ -328,11 +326,9 @@ handle_size_key_limits(Db) ->
handle_size_value_limits(Db) ->
ok = meck:new(config, [passthrough]),
- ok = meck:expect(config, get_integer, fun(Section, _, Default) ->
- case Section of
- "couch_views" -> 15;
- _ -> Default
- end
+ ok = meck:expect(config, get_integer, fun
+ ("couch_views", "value_size_limit", _Default) -> 15;
+ (_Section, _Key, Default) -> Default
end),
DDoc = create_ddoc(multi_emit_key_limit),
@@ -386,12 +382,6 @@ multiple_design_docs(Db) ->
end)
end,
- % This is how we check that no index updates took place
- meck:new(couch_views_fdb, [passthrough]),
- meck:expect(couch_views_fdb, write_doc, fun(TxDb, Sig, ViewIds, Doc) ->
- meck:passthrough([TxDb, Sig, ViewIds, Doc])
- end),
-
DDoc1 = create_ddoc(simple, <<"_design/bar1">>),
DDoc2 = create_ddoc(simple, <<"_design/bar2">>),
@@ -399,7 +389,7 @@ multiple_design_docs(Db) ->
{ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, DDoc1, []),
?assertEqual({ok, [row(<<"0">>, 0, 0)]}, run_query(Db, DDoc1, ?MAP_FUN1)),
- % Because run_query/3 can return, and unsurbscribe from the job,
+ % Because run_query/3 can return, and unsubscribe from the job,
% before it actually finishes, ensure we wait for the job to
% finish so we get a deterministic setup every time.
JobId = get_job_id(Db, DDoc1),
@@ -413,10 +403,16 @@ multiple_design_docs(Db) ->
Cleanup(),
- meck:reset(couch_views_fdb),
+ % Assert that no updates are applied
+ meck:new(couch_views_fdb, [passthrough]),
+ meck:expect(couch_views_trees, update_views, fun(TxDb, Mrst, Docs) ->
+ case Docs of
+ [] -> meck:passthrough([TxDb, Mrst, Docs]);
+ [_ | _] -> erlang:error(update_triggered)
+ end
+ end),
?assertEqual({ok, [row(<<"0">>, 0, 0)]}, run_query(Db, DDoc2, ?MAP_FUN1)),
?assertEqual(ok, wait_job_finished(JobId, 5000)),
- ?assertEqual(0, meck:num_calls(couch_views_fdb, write_doc, 4)),
DDoc2Del = DDoc2#doc{revs = {Pos2, [Rev2]}, deleted = true},
{ok, _} = fabric2_db:update_doc(Db, DDoc2Del, []),
diff --git a/src/couch_views/test/couch_views_size_test.erl b/src/couch_views/test/couch_views_size_test.erl
index 18fa9e628..cc2fe39fc 100644
--- a/src/couch_views/test/couch_views_size_test.erl
+++ b/src/couch_views/test/couch_views_size_test.erl
@@ -193,16 +193,21 @@ cleanup({Ctx, Db}) ->
create_transition_tests({_Ctx, Db}) ->
- Transitions = generate_transitions(),
- Single = lists:flatmap(fun(T) ->
- Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
- [{Name, fun() -> check_single_transition(Db, T) end}]
- end, lists:sort(Transitions)),
- Multi = lists:flatmap(fun(T) ->
- Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
- [{Name, fun() -> check_multi_transition(Db, T) end}]
- end, lists:sort(group(shuffle(Transitions)))),
- subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi).
+ try
+ throw(disabled),
+ Transitions = generate_transitions(),
+ Single = lists:flatmap(fun(T) ->
+ Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
+ [{Name, fun() -> check_single_transition(Db, T) end}]
+ end, lists:sort(Transitions)),
+ Multi = lists:flatmap(fun(T) ->
+ Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
+ [{Name, fun() -> check_multi_transition(Db, T) end}]
+ end, lists:sort(group(shuffle(Transitions)))),
+ subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi)
+ catch throw:disabled ->
+ [{"Disabled", fun() -> ok end}]
+ end.
check_single_transition(Db, {Set1, Set2, Transition}) ->
diff --git a/src/couch_views/test/couch_views_updater_test.erl b/src/couch_views/test/couch_views_updater_test.erl
index 89c341a17..aadbe940b 100644
--- a/src/couch_views/test/couch_views_updater_test.erl
+++ b/src/couch_views/test/couch_views_updater_test.erl
@@ -69,7 +69,7 @@ foreach_setup() ->
Docs = make_docs(3),
fabric2_db:update_docs(Db, Docs),
- meck:new(couch_views_fdb, [passthrough]),
+ meck:new(couch_views_trees, [passthrough]),
{Db, DDoc}.
@@ -135,7 +135,7 @@ includes_design_docs({Db, _}) ->
handle_erlfdb_errors({Db, _}) ->
- meck:expect(couch_views_fdb, write_doc, fun(_, _, _, _) ->
+ meck:expect(couch_views_trees, update_views, fun(_, _, _) ->
error({erlfdb_error, 1009})
end),
?assertError({erlfdb_error, 1009}, fabric2_db:update_docs(Db, [doc(4)])).
diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl
index 44ae22093..411f4af65 100644
--- a/src/mango/src/mango_cursor_view.erl
+++ b/src/mango/src/mango_cursor_view.erl
@@ -31,6 +31,7 @@
-include_lib("fabric/include/fabric.hrl").
-include("mango_cursor.hrl").
+-include("mango_idx_view.hrl").
create(Db, Indexes, Selector, Opts) ->
@@ -85,16 +86,15 @@ explain(Cursor) ->
maybe_replace_max_json([]) ->
[];
+maybe_replace_max_json([?MAX_JSON_OBJ | T]) ->
+ [<<"<MAX>">> | maybe_replace_max_json(T)];
+
+maybe_replace_max_json([H | T]) ->
+ [H | maybe_replace_max_json(T)];
+
maybe_replace_max_json(?MAX_STR) ->
<<"<MAX>">>;
-maybe_replace_max_json([H | T] = EndKey) when is_list(EndKey) ->
- MAX_VAL = couch_views_encoding:max(),
- H1 = if H == MAX_VAL -> <<"<MAX>">>;
- true -> H
- end,
- [H1 | maybe_replace_max_json(T)];
-
maybe_replace_max_json(EndKey) ->
EndKey.
diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl
index f80cc217b..a73d82ae6 100644
--- a/src/mango/src/mango_idx_view.erl
+++ b/src/mango/src/mango_idx_view.erl
@@ -34,6 +34,7 @@
-include_lib("couch/include/couch_db.hrl").
-include("mango.hrl").
-include("mango_idx.hrl").
+-include("mango_idx_view.hrl").
validate_new(#idx{}=Idx, _Db) ->
@@ -131,7 +132,7 @@ is_usable(Idx, Selector, SortFields) ->
% and the selector is not a text search (so requires a text index)
RequiredFields = columns(Idx),
- % sort fields are required to exist in the results so
+ % sort fields are required to exist in the results so
% we don't need to check the selector for these
RequiredFields1 = ordsets:subtract(lists:usort(RequiredFields), lists:usort(SortFields)),
@@ -182,11 +183,11 @@ start_key([{'$eq', Key, '$eq', Key} | Rest]) ->
end_key([]) ->
- [couch_views_encoding:max()];
+ [?MAX_JSON_OBJ];
end_key([{_, _, '$lt', Key} | Rest]) ->
case mango_json:special(Key) of
true ->
- [couch_views_encoding:max()];
+ [?MAX_JSON_OBJ];
false ->
[Key | end_key(Rest)]
end;
diff --git a/src/mango/src/mango_idx_view.hrl b/src/mango/src/mango_idx_view.hrl
new file mode 100644
index 000000000..d0f46748b
--- /dev/null
+++ b/src/mango/src/mango_idx_view.hrl
@@ -0,0 +1,13 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-define(MAX_JSON_OBJ, {[{<<255, 255, 255, 255>>, <<>>}]}). \ No newline at end of file