summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul J. Davis <paul.joseph.davis@gmail.com>2020-03-04 09:54:53 -0600
committerPaul J. Davis <paul.joseph.davis@gmail.com>2020-03-05 14:18:12 -0600
commit69fc02bf17545f37c1aad0847552ecefbdc8e539 (patch)
treee0b3e0e051ee692d3441500af9b78d5458e74800
parent377a3d3903bf2166f5e4b95d90f0a237672b812e (diff)
downloadcouchdb-69fc02bf17545f37c1aad0847552ecefbdc8e539.tar.gz
Fix handling of duplicate keys
If a map function emits duplicate keys for a document this stores multiple rows in the map index differentiated by a `DupeId` counter. Previously we were attempting to save some work avoiding clearing ranges for keys that would be overwritten. However, if a document update causes fewer duplicates to be emitted for the same key we left orphaned rows in the index.
-rw-r--r--src/couch_views/src/couch_views_fdb.erl5
-rw-r--r--src/couch_views/test/couch_views_indexer_test.erl52
2 files changed, 53 insertions, 4 deletions
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl
index f2ac01bf3..98257f308 100644
--- a/src/couch_views/src/couch_views_fdb.erl
+++ b/src/couch_views/src/couch_views_fdb.erl
@@ -224,13 +224,10 @@ update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) ->
db_prefix := DbPrefix
} = TxDb,
- Unique = lists:usort([K || {K, _V} <- NewRows]),
-
- KeysToRem = ExistingKeys -- Unique,
lists:foreach(fun(RemKey) ->
{Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId),
ok = erlfdb:clear_range(Tx, Start, End)
- end, KeysToRem),
+ end, ExistingKeys),
KVsToAdd = process_rows(NewRows),
MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId),
diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl
index 9482fdd85..c3f704911 100644
--- a/src/couch_views/test/couch_views_indexer_test.erl
+++ b/src/couch_views/test/couch_views_indexer_test.erl
@@ -39,6 +39,7 @@ indexer_test_() ->
?TDEF_FE(multipe_docs_with_same_key),
?TDEF_FE(multipe_keys_from_same_doc),
?TDEF_FE(multipe_identical_keys_from_same_doc),
+ ?TDEF_FE(fewer_multipe_identical_keys_from_same_doc),
?TDEF_FE(handle_size_key_limits),
?TDEF_FE(handle_size_value_limits)
]
@@ -388,6 +389,53 @@ multipe_identical_keys_from_same_doc(Db) ->
], Out).
+fewer_multipe_identical_keys_from_same_doc(Db) ->
+ DDoc = create_ddoc(multi_emit_same),
+ Doc0 = #doc{
+ id = <<"0">>,
+ body = {[{<<"val">>, 1}, {<<"extra">>, 3}]}
+ },
+
+ {ok, _} = fabric2_db:update_doc(Db, DDoc, []),
+ {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc0, []),
+
+ {ok, Out1} = couch_views:query(
+ Db,
+ DDoc,
+ <<"map_fun1">>,
+ fun fold_fun/2,
+ [],
+ #mrargs{}
+ ),
+
+ ?assertEqual([
+ row(<<"0">>, 1, 1),
+ row(<<"0">>, 1, 2),
+ row(<<"0">>, 1, 3)
+ ], Out1),
+
+ Doc1 = #doc{
+ id = <<"0">>,
+ revs = {Pos, [Rev]},
+ body = {[{<<"val">>, 1}]}
+ },
+ {ok, _} = fabric2_db:update_doc(Db, Doc1, []),
+
+ {ok, Out2} = couch_views:query(
+ Db,
+ DDoc,
+ <<"map_fun1">>,
+ fun fold_fun/2,
+ [],
+ #mrargs{}
+ ),
+
+ ?assertEqual([
+ row(<<"0">>, 1, 1),
+ row(<<"0">>, 1, 2)
+ ], Out2).
+
+
handle_size_key_limits(Db) ->
ok = meck:new(config, [passthrough]),
ok = meck:expect(config, get_integer, fun(Section, Key, Default) ->
@@ -495,6 +543,7 @@ row(Id, Key, Value) ->
{value, Value}
]}.
+
fold_fun({meta, _Meta}, Acc) ->
{ok, Acc};
fold_fun({row, _} = Row, Acc) ->
@@ -544,6 +593,9 @@ create_ddoc(multi_emit_same) ->
{<<"map">>, <<"function(doc) { "
"emit(doc.val, doc.val * 2); "
"emit(doc.val, doc.val); "
+ "if(doc.extra) {"
+ " emit(doc.val, doc.extra);"
+ "}"
"}">>}
]}},
{<<"map_fun2">>, {[