summaryrefslogtreecommitdiff
path: root/src/couch_mrview
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2022-11-07 22:42:14 -0500
committerNick Vatamaniuc <nickva@users.noreply.github.com>2022-11-10 17:51:49 -0500
commita02c483ccf7a6f367eeb37325f520a66ae67d012 (patch)
treea2d2ebed346a0966ec34800e25f96836de82cabd /src/couch_mrview
parent21dfdf504d1e3a11068b71ef55402183e14c193e (diff)
downloadcouchdb-a02c483ccf7a6f367eeb37325f520a66ae67d012.tar.gz
Improve fabric index cleanup
* Clean-up stale view purge checkpoints. Previously we didn't and purge progress could have stalled by keeping around inactive(lagging) lagging purge checkpoints. * couch_mrview_cleanup attempted to clean purge checkpoints but that didn't work for clustered databases, only for local ones. Nowadays most dbs are clustered so make sure those work as well. * DRY-out code from both fabric inactive index cleanup and couch_mrview_cleanup modules. Move some of the common code to couch_mrview_util module. couch_mrvew_cleanup is the only place in charge the cleanup logic now. * Consolidate and improve tests. Utility functions to get all index files, purge checkpoint and signatures are now tested with couch_mrview_util tests, and end-to-end fabric cleanup tests are in fabric_tests. Since fabirc_tests covers all the test scenarios from fabric_test.exs, remove fabric_test.exs so we don't have test duplicated and get same coverage.
Diffstat (limited to 'src/couch_mrview')
-rw-r--r--src/couch_mrview/src/couch_mrview_cleanup.erl97
-rw-r--r--src/couch_mrview/src/couch_mrview_util.erl93
-rw-r--r--src/couch_mrview/test/eunit/couch_mrview_util_tests.erl139
3 files changed, 263 insertions, 66 deletions
diff --git a/src/couch_mrview/src/couch_mrview_cleanup.erl b/src/couch_mrview/src/couch_mrview_cleanup.erl
index 417605c55..5b5afbdce 100644
--- a/src/couch_mrview/src/couch_mrview_cleanup.erl
+++ b/src/couch_mrview/src/couch_mrview_cleanup.erl
@@ -12,57 +12,62 @@
-module(couch_mrview_cleanup).
--export([run/1]).
+-export([
+ run/1,
+ cleanup_purges/3,
+ cleanup_indices/2
+]).
-include_lib("couch/include/couch_db.hrl").
--include_lib("couch_mrview/include/couch_mrview.hrl").
run(Db) ->
- RootDir = couch_index_util:root_dir(),
- DbName = couch_db:name(Db),
+ Indices = couch_mrview_util:get_index_files(Db),
+ Checkpoints = couch_mrview_util:get_purge_checkpoints(Db),
+ {ok, Db1} = couch_db:reopen(Db),
+ Sigs = couch_mrview_util:get_signatures(Db1),
+ ok = cleanup_purges(Db1, Sigs, Checkpoints),
+ ok = cleanup_indices(Sigs, Indices).
- {ok, DesignDocs} = couch_db:get_design_docs(Db),
- SigFiles = lists:foldl(
- fun(DDocInfo, SFAcc) ->
- {ok, DDoc} = couch_db:open_doc_int(Db, DDocInfo, [ejson_body]),
- {ok, InitState} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc),
- Sig = InitState#mrst.sig,
- IFName = couch_mrview_util:index_file(DbName, Sig),
- CFName = couch_mrview_util:compaction_file(DbName, Sig),
- [IFName, CFName | SFAcc]
- end,
- [],
- [DD || DD <- DesignDocs, DD#full_doc_info.deleted == false]
- ),
+cleanup_purges(DbName, Sigs, Checkpoints) when is_binary(DbName) ->
+ couch_util:with_db(DbName, fun(Db) ->
+ cleanup_purges(Db, Sigs, Checkpoints)
+ end);
+cleanup_purges(Db, #{} = Sigs, #{} = CheckpointsMap) ->
+ InactiveMap = maps:without(maps:keys(Sigs), CheckpointsMap),
+ InactiveCheckpoints = maps:values(InactiveMap),
+ DeleteFun = fun(DocId) -> delete_checkpoint(Db, DocId) end,
+ lists:foreach(DeleteFun, InactiveCheckpoints).
- IdxDir = couch_index_util:index_dir(mrview, DbName),
- DiskFiles = filelib:wildcard(filename:join(IdxDir, "*")),
+cleanup_indices(#{} = Sigs, #{} = IndexMap) ->
+ Fun = fun(_, Files) -> lists:foreach(fun delete_file/1, Files) end,
+ maps:map(Fun, maps:without(maps:keys(Sigs), IndexMap)),
+ ok.
- % We need to delete files that have no ddoc.
- ToDelete = DiskFiles -- SigFiles,
+delete_file(File) ->
+ RootDir = couch_index_util:root_dir(),
+ couch_log:debug("~p : deleting inactive index : ~s", [?MODULE, File]),
+ try
+ couch_file:delete(RootDir, File, [sync])
+ catch
+ Tag:Error ->
+ ErrLog = "~p : error deleting inactive index file ~s ~p:~p",
+ couch_log:error(ErrLog, [?MODULE, File, Tag, Error]),
+ ok
+ end.
- lists:foreach(
- fun(FN) ->
- couch_log:debug("Deleting stale view file: ~s", [FN]),
- couch_file:delete(RootDir, FN, [sync]),
- case couch_mrview_util:verify_view_filename(FN) of
- true ->
- Sig = couch_mrview_util:get_signature_from_filename(FN),
- DocId = couch_mrview_util:get_local_purge_doc_id(Sig),
- case couch_db:open_doc(Db, DocId, []) of
- {ok, LocalPurgeDoc} ->
- couch_db:update_doc(
- Db,
- LocalPurgeDoc#doc{deleted = true},
- [?ADMIN_CTX]
- );
- {not_found, _} ->
- ok
- end;
- false ->
- ok
- end
- end,
- ToDelete
- ),
- ok.
+delete_checkpoint(Db, DocId) ->
+ DbName = couch_db:name(Db),
+ LogMsg = "~p : deleting inactive purge checkpoint ~s : ~s",
+ couch_log:debug(LogMsg, [?MODULE, DbName, DocId]),
+ try couch_db:open_doc(Db, DocId, []) of
+ {ok, Doc = #doc{}} ->
+ Deleted = Doc#doc{deleted = true, body = {[]}},
+ couch_db:update_doc(Db, Deleted, [?ADMIN_CTX]);
+ {not_found, _} ->
+ ok
+ catch
+ Tag:Error ->
+ ErrLog = "~p : error deleting checkpoint ~s : ~s error: ~p:~p",
+ couch_log:error(ErrLog, [?MODULE, DbName, DocId, Tag, Error]),
+ ok
+ end.
diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl
index 9e3d292ed..e1e75f34f 100644
--- a/src/couch_mrview/src/couch_mrview_util.erl
+++ b/src/couch_mrview/src/couch_mrview_util.erl
@@ -15,6 +15,7 @@
-export([get_view/4, get_view_index_pid/4]).
-export([get_local_purge_doc_id/1, get_value_from_options/2]).
-export([verify_view_filename/1, get_signature_from_filename/1]).
+-export([get_signatures/1, get_purge_checkpoints/1, get_index_files/1]).
-export([ddoc_to_mrst/2, init_state/4, reset_index/3]).
-export([make_header/1]).
-export([index_file/2, compaction_file/2, open_file/1]).
@@ -53,6 +54,11 @@
true -> B
end)
).
+-define(IS_HEX(C),
+ ((C >= $0 andalso C =< $9) orelse
+ (C >= $a andalso C =< $f) orelse
+ (C >= $A andalso C =< $F))
+).
-include_lib("couch/include/couch_db.hrl").
-include_lib("couch_mrview/include/couch_mrview.hrl").
@@ -70,31 +76,78 @@ get_value_from_options(Key, Options) ->
end.
verify_view_filename(FileName) ->
- FilePathList = filename:split(FileName),
- PureFN = lists:last(FilePathList),
- case filename:extension(PureFN) of
+ case filename:extension(FileName) of
".view" ->
- Sig = filename:basename(PureFN),
- case
- [
- Ch
- || Ch <- Sig,
- not (((Ch >= $0) and (Ch =< $9)) orelse
- ((Ch >= $a) and (Ch =< $f)) orelse
- ((Ch >= $A) and (Ch =< $F)))
- ] == []
- of
- true -> true;
- false -> false
- end;
+ Sig = get_signature_from_filename(FileName),
+ lists:all(fun(C) -> ?IS_HEX(C) end, Sig);
_ ->
false
end.
-get_signature_from_filename(FileName) ->
- FilePathList = filename:split(FileName),
- PureFN = lists:last(FilePathList),
- filename:basename(PureFN, ".view").
+get_signature_from_filename(Path) ->
+ filename:basename(filename:basename(Path, ".view"), ".compact").
+
+% Returns map of `Sig => true` elements with all the active signatures.
+% Sig is a hex-encoded binary.
+%
+get_signatures(DbName) when is_binary(DbName) ->
+ couch_util:with_db(DbName, fun get_signatures/1);
+get_signatures(Db) ->
+ DbName = couch_db:name(Db),
+ % get_design_docs/1 returns ejson for clustered shards, and
+ % #full_doc_info{}'s for other cases.
+ {ok, DDocs} = couch_db:get_design_docs(Db),
+ FoldFun = fun
+ ({[_ | _]} = EJsonDoc, Acc) ->
+ Doc = couch_doc:from_json_obj(EJsonDoc),
+ {ok, Mrst} = ddoc_to_mrst(DbName, Doc),
+ Sig = couch_util:to_hex_bin(Mrst#mrst.sig),
+ Acc#{Sig => true};
+ (#full_doc_info{} = FDI, Acc) ->
+ {ok, Doc} = couch_db:open_doc_int(Db, FDI, [ejson_body]),
+ {ok, Mrst} = ddoc_to_mrst(DbName, Doc),
+ Sig = couch_util:to_hex_bin(Mrst#mrst.sig),
+ Acc#{Sig => true}
+ end,
+ lists:foldl(FoldFun, #{}, DDocs).
+
+% Returns a map of `Sig => DocId` elements for all the purge view
+% checkpoint docs. Sig is a hex-encoded binary.
+%
+get_purge_checkpoints(DbName) when is_binary(DbName) ->
+ couch_util:with_db(DbName, fun get_purge_checkpoints/1);
+get_purge_checkpoints(Db) ->
+ FoldFun = fun(#doc{id = Id}, Acc) ->
+ case Id of
+ <<?LOCAL_DOC_PREFIX, "purge-mrview-", Sig/binary>> ->
+ {ok, Acc#{Sig => Id}};
+ _ ->
+ {stop, Acc}
+ end
+ end,
+ Opts = [{start_key, <<?LOCAL_DOC_PREFIX, "purge-mrview-">>}],
+ {ok, Signatures = #{}} = couch_db:fold_local_docs(Db, FoldFun, #{}, Opts),
+ Signatures.
+
+% Returns a map of `Sig => [FilePath, ...]` elements. Sig is a hex-encoded
+% binary and FilePaths are lists as they intended to be passed to couch_file
+% and file module functions.
+%
+get_index_files(DbName) when is_binary(DbName) ->
+ IdxDir = couch_index_util:index_dir(mrview, DbName),
+ WildcardPath = filename:join(IdxDir, "*"),
+ FoldFun = fun(F, Acc) ->
+ case verify_view_filename(F) of
+ true ->
+ Sig = ?l2b(get_signature_from_filename(F)),
+ maps:update_with(Sig, fun(Fs) -> [F | Fs] end, [F], Acc);
+ false ->
+ Acc
+ end
+ end,
+ lists:foldl(FoldFun, #{}, filelib:wildcard(WildcardPath));
+get_index_files(Db) ->
+ get_index_files(couch_db:name(Db)).
get_view(Db, DDoc, ViewName, Args0) ->
case get_view_index_state(Db, DDoc, ViewName, Args0) of
diff --git a/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl b/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl
index a495fd82c..2562bb511 100644
--- a/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl
+++ b/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl
@@ -13,8 +13,11 @@
-module(couch_mrview_util_tests).
-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
-include_lib("couch_mrview/include/couch_mrview.hrl").
+-define(DDOC_ID, <<"_design/bar">>).
+
couch_mrview_util_test_() ->
[
?_assertEqual(0, validate_group_level(undefined, undefined)),
@@ -35,3 +38,139 @@ validate_group_level(Group, GroupLevel) ->
Args0 = #mrargs{group = Group, group_level = GroupLevel, view_type = red},
Args1 = couch_mrview_util:validate_args(Args0),
Args1#mrargs.group_level.
+
+get_signature_from_filename_test() ->
+ Sig = "da817c3d3f7413c1a610f25635a0c521",
+ P1 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.view",
+ P2 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.compact.view",
+ P3 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521",
+ ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P1)),
+ ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P2)),
+ ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P3)).
+
+verify_view_filename_test() ->
+ P1 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.view",
+ P2 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.compact.view",
+ P3 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521",
+ ?assert(couch_mrview_util:verify_view_filename(P1)),
+ ?assert(couch_mrview_util:verify_view_filename(P2)),
+ ?assertNot(couch_mrview_util:verify_view_filename(P3)),
+ ?assertNot(couch_mrview_util:verify_view_filename("")),
+ ?assertNot(couch_mrview_util:verify_view_filename("foo.view")).
+
+setup() ->
+ DbName = ?tempdb(),
+ ok = fabric:create_db(DbName, [?ADMIN_CTX, {q, 2}]),
+ DDoc = couch_mrview_test_util:ddoc(map),
+ {ok, _} = fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]),
+ {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}),
+ {ok, Db} = couch_mrview_test_util:init_db(?tempdb(), map),
+ {ok, _} = couch_mrview:query_view(Db, ?DDOC_ID, <<"baz">>),
+ {DbName, Db}.
+
+teardown({DbName, Db}) ->
+ couch_db:close(Db),
+ couch_server:delete(couch_db:name(Db), [?ADMIN_CTX]),
+ ok = fabric:delete_db(DbName, [?ADMIN_CTX]).
+
+get_signatures_test_() ->
+ {
+ setup,
+ fun() -> test_util:start_couch([fabric]) end,
+ fun test_util:stop_couch/1,
+ {
+ foreach,
+ fun setup/0,
+ fun teardown/1,
+ [
+ ?TDEF_FE(t_get_signatures_local),
+ ?TDEF_FE(t_get_signatures_clustered),
+ ?TDEF_FE(t_get_purge_checkpoints_local),
+ ?TDEF_FE(t_get_purge_checkpoints_clustered),
+ ?TDEF_FE(t_get_index_files_local),
+ ?TDEF_FE(t_get_index_files_clustered)
+ ]
+ }
+ }.
+
+t_get_signatures_local({_, Db}) ->
+ DbName = couch_db:name(Db),
+ Sigs = couch_mrview_util:get_signatures(DbName),
+ ?assert(is_map(Sigs)),
+ ?assertEqual(1, map_size(Sigs)),
+ [{Sig, true}] = maps:to_list(Sigs),
+ {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID),
+ ?assertEqual(proplists:get_value(signature, Info), Sig),
+
+ {ok, DDoc} = couch_db:open_doc(Db, ?DDOC_ID, [?ADMIN_CTX]),
+ Deleted = DDoc#doc{deleted = true, body = {[]}},
+ {ok, _} = couch_db:update_doc(Db, Deleted, []),
+ ?assertEqual(#{}, couch_mrview_util:get_signatures(DbName)).
+
+t_get_signatures_clustered({DbName, _Db}) ->
+ [Shard1, Shard2] = mem3:local_shards(DbName),
+ ShardName1 = mem3:name(Shard1),
+ ShardName2 = mem3:name(Shard2),
+ Sigs = couch_mrview_util:get_signatures(ShardName1),
+ ?assertEqual(Sigs, couch_mrview_util:get_signatures(ShardName2)),
+ ?assert(is_map(Sigs)),
+ ?assertEqual(1, map_size(Sigs)),
+ [{Sig, true}] = maps:to_list(Sigs),
+ {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID),
+ ?assertEqual(proplists:get_value(signature, Info), Sig),
+
+ {ok, DDoc} = fabric:open_doc(DbName, ?DDOC_ID, [?ADMIN_CTX]),
+ Deleted = DDoc#doc{deleted = true, body = {[]}},
+ {ok, _} = fabric:update_doc(DbName, Deleted, [?ADMIN_CTX]),
+ ?assertEqual(#{}, couch_mrview_util:get_signatures(ShardName1)),
+ ?assertEqual(#{}, couch_mrview_util:get_signatures(ShardName2)).
+
+t_get_purge_checkpoints_local({_, Db}) ->
+ DbName = couch_db:name(Db),
+ Checkpoints = couch_mrview_util:get_purge_checkpoints(DbName),
+ ?assert(is_map(Checkpoints)),
+ ?assertEqual(1, map_size(Checkpoints)),
+ [{Sig, <<"_local/", _/binary>>}] = maps:to_list(Checkpoints),
+ {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID),
+ ?assertEqual(proplists:get_value(signature, Info), Sig).
+
+t_get_purge_checkpoints_clustered({DbName, _Db}) ->
+ {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}),
+ [Shard1, Shard2] = mem3:local_shards(DbName),
+ ShardName1 = mem3:name(Shard1),
+ ShardName2 = mem3:name(Shard2),
+ Sigs1 = couch_mrview_util:get_purge_checkpoints(ShardName1),
+ Sigs2 = couch_mrview_util:get_purge_checkpoints(ShardName2),
+ ?assertEqual(lists:sort(maps:keys(Sigs1)), lists:sort(maps:keys(Sigs2))),
+ ?assert(is_map(Sigs1)),
+ ?assertEqual(1, map_size(Sigs1)),
+ [{Sig, <<"_local/", _/binary>>}] = maps:to_list(Sigs1),
+ {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID),
+ ?assertEqual(proplists:get_value(signature, Info), Sig).
+
+t_get_index_files_local({_, Db}) ->
+ DbName = couch_db:name(Db),
+ SigFilesMap = couch_mrview_util:get_index_files(DbName),
+ ?assert(is_map(SigFilesMap)),
+ ?assertEqual(1, map_size(SigFilesMap)),
+ [{Sig, [File]}] = maps:to_list(SigFilesMap),
+ ?assertMatch({ok, _}, file:read_file_info(File)),
+ {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID),
+ ?assertEqual(proplists:get_value(signature, Info), Sig).
+
+t_get_index_files_clustered({DbName, _Db}) ->
+ {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}),
+ [Shard1, Shard2] = mem3:local_shards(DbName),
+ ShardName1 = mem3:name(Shard1),
+ ShardName2 = mem3:name(Shard2),
+ SigFilesMap1 = couch_mrview_util:get_index_files(ShardName1),
+ SigFilesMap2 = couch_mrview_util:get_index_files(ShardName2),
+ SigKeys1 = lists:sort(maps:keys(SigFilesMap1)),
+ SigKeys2 = lists:sort(maps:keys(SigFilesMap2)),
+ ?assertEqual(SigKeys1, SigKeys2),
+ ?assert(is_map(SigFilesMap1)),
+ ?assertEqual(1, map_size(SigFilesMap1)),
+ [{Sig, [File]}] = maps:to_list(SigFilesMap1),
+ ?assertMatch({ok, _}, file:read_file_info(File)),
+ {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID),
+ ?assertEqual(proplists:get_value(signature, Info), Sig).