summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Sun <tony.sun427@gmail.com>2021-10-20 17:07:06 -0700
committerTony Sun <tony.sun427@gmail.com>2021-10-20 17:31:55 -0700
commit109fcacde85a80c7fff24660d5c235cd83637a74 (patch)
tree61be145c909789f8cee4ce4484ba1507a2bbed08
parent24bc0ce338a5aae91f1f3e8714178360e10ba24b (diff)
downloadcouchdb-fix-case-clause-dreyfus.tar.gz
fix case_clause when HitId and DocId do not match when include_docs=truefix-case-clause-dreyfus
Sometimes, a clouseau index may become out of sync with the db or become corrupt under certain conditions (heap exhaustion, garbage collection). This leads to certain HitIds that get returned which may not exist in the current db. Before this fix, we assume the Hit's Id will automatically match an Id return from: fabric:all_docs(DbName, fun callback/2, [], [{keys, DocIds}, {include_docs, true}]). When the document does not exist, {error, false} is returned for the row. This fix accounts for this scenario and possible other scenarios by returning an error to the user and also logging the error when the Hit Id does not match what is returned from the _all_docs call.
-rw-r--r--src/dreyfus/src/dreyfus_httpd.erl48
1 files changed, 41 insertions, 7 deletions
diff --git a/src/dreyfus/src/dreyfus_httpd.erl b/src/dreyfus/src/dreyfus_httpd.erl
index 007dace8f..a9259662c 100644
--- a/src/dreyfus/src/dreyfus_httpd.erl
+++ b/src/dreyfus/src/dreyfus_httpd.erl
@@ -532,16 +532,18 @@ hits_to_json(DbName, IncludeDocs, Hits) ->
if IncludeDocs ->
chttpd_stats:incr_reads(length(Hits)),
{ok, JsonDocs} = dreyfus_fabric:get_json_docs(DbName, Ids),
- lists:zipwith(fun(Hit, {Id, Doc}) ->
+ lists:zipwith(fun
+ (Hit, {Id, Doc}) ->
case Hit of
- {Id, Order, Fields} ->
- {[{id, Id}, {order, Order}, {fields, {Fields}}, Doc]};
- {Id, Order, Fields, Highlights} ->
+ {HitId, Order, Fields} ->
+ Doc1 = hits_id_validator(HitId, {Id, Doc}),
+ {[{id, Id}, {order, Order}, {fields, {Fields}}, Doc1]};
+ {HitId, Order, Fields, Highlights} ->
+ Doc1 = hits_id_validator(HitId, {Id, Doc}),
{[{id, Id}, {order, Order}, {fields, {Fields}},
- {highlights, {Highlights}}, Doc]}
+ {highlights, {Highlights}}, Doc1]}
end
- end, HitData, JsonDocs);
-
+ end, HitData, JsonDocs);
true ->
lists:map(fun(Hit) ->
case Hit of
@@ -553,6 +555,21 @@ hits_to_json(DbName, IncludeDocs, Hits) ->
end, HitData)
end.
+
+% _all_docs returned {error, false} meaning document doesn't exist
+hits_id_validator(HitId, {error, false}) ->
+ couch_log:error("HitId: ~p does not exist in db. Index may be out of sync or corrupt", [HitId]),
+ {error, <<"not_found">>};
+hits_id_validator(HitId, {error, Reason}) ->
+ couch_log:error("HitId: ~p does not exist in db. Reason: ~p", [HitId, Reason]),
+ {error, Reason};
+hits_id_validator(HitId, {DocId, Doc}) when DocId =:= HitId ->
+ Doc;
+hits_id_validator(HitId, {DocId, _}) ->
+ couch_log:error("HitId: ~p DocId: ~p mismatch. Index maybe out of sync or corrupt.", [HitId, DocId]),
+ {error, <<"docid mismatch">>}.
+
+
get_hit_data(Hit) ->
Id = couch_util:get_value(<<"_id">>, Hit#hit.fields),
Fields = lists:keydelete(<<"_id">>, 1, Hit#hit.fields),
@@ -612,3 +629,20 @@ backoff_and_retry(Req, Db, DDoc, RetryCount, RetryPause, Error) ->
timer:sleep(RetryPause),
handle_search_req(Req, Db, DDoc, RetryCount + 1, RetryPause * 2)
end.
+
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+hits_id_validator_test() ->
+ Doc = {doc, {[
+ {<<"_id">>, <<"foo">>},
+ {<<"_rev">>, <<"1-4c6114c65e295552ab1019e2b046b10e">>},
+ {<<"foo">>, <<"bar">>}
+ ]}},
+ ?assertEqual(Doc, hits_id_validator(<<"foo">>, {<<"foo">>, Doc})),
+ ?assertEqual({error, <<"not_found">>}, hits_id_validator(<<"foo>">>, {error, false})),
+ ?assertEqual({error, some_reason}, hits_id_validator(<<"foo>">>, {error, some_reason})),
+ ?assertEqual({error, <<"docid mismatch">>}, hits_id_validator(<<"bar">>, {<<"foo">>, Doc})).
+
+-endif.