diff options
author | Nick Vatamaniuc <vatamane@gmail.com> | 2022-11-07 22:42:14 -0500 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2022-11-10 17:51:49 -0500 |
commit | a02c483ccf7a6f367eeb37325f520a66ae67d012 (patch) | |
tree | a2d2ebed346a0966ec34800e25f96836de82cabd /src/couch_mrview | |
parent | 21dfdf504d1e3a11068b71ef55402183e14c193e (diff) | |
download | couchdb-a02c483ccf7a6f367eeb37325f520a66ae67d012.tar.gz |
Improve fabric index cleanup
* Clean-up stale view purge checkpoints. Previously we didn't and purge
progress could have stalled by keeping around inactive(lagging) lagging
purge checkpoints.
* couch_mrview_cleanup attempted to clean purge checkpoints but that didn't
work for clustered databases, only for local ones. Nowadays most dbs
are clustered so make sure those work as well.
* DRY-out code from both fabric inactive index cleanup and
couch_mrview_cleanup modules. Move some of the common code to
couch_mrview_util module. couch_mrvew_cleanup is the only place in charge
the cleanup logic now.
* Consolidate and improve tests. Utility functions to get all index files,
purge checkpoint and signatures are now tested with couch_mrview_util tests,
and end-to-end fabric cleanup tests are in fabric_tests. Since fabirc_tests
covers all the test scenarios from fabric_test.exs, remove fabric_test.exs
so we don't have test duplicated and get same coverage.
Diffstat (limited to 'src/couch_mrview')
-rw-r--r-- | src/couch_mrview/src/couch_mrview_cleanup.erl | 97 | ||||
-rw-r--r-- | src/couch_mrview/src/couch_mrview_util.erl | 93 | ||||
-rw-r--r-- | src/couch_mrview/test/eunit/couch_mrview_util_tests.erl | 139 |
3 files changed, 263 insertions, 66 deletions
diff --git a/src/couch_mrview/src/couch_mrview_cleanup.erl b/src/couch_mrview/src/couch_mrview_cleanup.erl index 417605c55..5b5afbdce 100644 --- a/src/couch_mrview/src/couch_mrview_cleanup.erl +++ b/src/couch_mrview/src/couch_mrview_cleanup.erl @@ -12,57 +12,62 @@ -module(couch_mrview_cleanup). --export([run/1]). +-export([ + run/1, + cleanup_purges/3, + cleanup_indices/2 +]). -include_lib("couch/include/couch_db.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). run(Db) -> - RootDir = couch_index_util:root_dir(), - DbName = couch_db:name(Db), + Indices = couch_mrview_util:get_index_files(Db), + Checkpoints = couch_mrview_util:get_purge_checkpoints(Db), + {ok, Db1} = couch_db:reopen(Db), + Sigs = couch_mrview_util:get_signatures(Db1), + ok = cleanup_purges(Db1, Sigs, Checkpoints), + ok = cleanup_indices(Sigs, Indices). - {ok, DesignDocs} = couch_db:get_design_docs(Db), - SigFiles = lists:foldl( - fun(DDocInfo, SFAcc) -> - {ok, DDoc} = couch_db:open_doc_int(Db, DDocInfo, [ejson_body]), - {ok, InitState} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), - Sig = InitState#mrst.sig, - IFName = couch_mrview_util:index_file(DbName, Sig), - CFName = couch_mrview_util:compaction_file(DbName, Sig), - [IFName, CFName | SFAcc] - end, - [], - [DD || DD <- DesignDocs, DD#full_doc_info.deleted == false] - ), +cleanup_purges(DbName, Sigs, Checkpoints) when is_binary(DbName) -> + couch_util:with_db(DbName, fun(Db) -> + cleanup_purges(Db, Sigs, Checkpoints) + end); +cleanup_purges(Db, #{} = Sigs, #{} = CheckpointsMap) -> + InactiveMap = maps:without(maps:keys(Sigs), CheckpointsMap), + InactiveCheckpoints = maps:values(InactiveMap), + DeleteFun = fun(DocId) -> delete_checkpoint(Db, DocId) end, + lists:foreach(DeleteFun, InactiveCheckpoints). - IdxDir = couch_index_util:index_dir(mrview, DbName), - DiskFiles = filelib:wildcard(filename:join(IdxDir, "*")), +cleanup_indices(#{} = Sigs, #{} = IndexMap) -> + Fun = fun(_, Files) -> lists:foreach(fun delete_file/1, Files) end, + maps:map(Fun, maps:without(maps:keys(Sigs), IndexMap)), + ok. - % We need to delete files that have no ddoc. - ToDelete = DiskFiles -- SigFiles, +delete_file(File) -> + RootDir = couch_index_util:root_dir(), + couch_log:debug("~p : deleting inactive index : ~s", [?MODULE, File]), + try + couch_file:delete(RootDir, File, [sync]) + catch + Tag:Error -> + ErrLog = "~p : error deleting inactive index file ~s ~p:~p", + couch_log:error(ErrLog, [?MODULE, File, Tag, Error]), + ok + end. - lists:foreach( - fun(FN) -> - couch_log:debug("Deleting stale view file: ~s", [FN]), - couch_file:delete(RootDir, FN, [sync]), - case couch_mrview_util:verify_view_filename(FN) of - true -> - Sig = couch_mrview_util:get_signature_from_filename(FN), - DocId = couch_mrview_util:get_local_purge_doc_id(Sig), - case couch_db:open_doc(Db, DocId, []) of - {ok, LocalPurgeDoc} -> - couch_db:update_doc( - Db, - LocalPurgeDoc#doc{deleted = true}, - [?ADMIN_CTX] - ); - {not_found, _} -> - ok - end; - false -> - ok - end - end, - ToDelete - ), - ok. +delete_checkpoint(Db, DocId) -> + DbName = couch_db:name(Db), + LogMsg = "~p : deleting inactive purge checkpoint ~s : ~s", + couch_log:debug(LogMsg, [?MODULE, DbName, DocId]), + try couch_db:open_doc(Db, DocId, []) of + {ok, Doc = #doc{}} -> + Deleted = Doc#doc{deleted = true, body = {[]}}, + couch_db:update_doc(Db, Deleted, [?ADMIN_CTX]); + {not_found, _} -> + ok + catch + Tag:Error -> + ErrLog = "~p : error deleting checkpoint ~s : ~s error: ~p:~p", + couch_log:error(ErrLog, [?MODULE, DbName, DocId, Tag, Error]), + ok + end. diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl index 9e3d292ed..e1e75f34f 100644 --- a/src/couch_mrview/src/couch_mrview_util.erl +++ b/src/couch_mrview/src/couch_mrview_util.erl @@ -15,6 +15,7 @@ -export([get_view/4, get_view_index_pid/4]). -export([get_local_purge_doc_id/1, get_value_from_options/2]). -export([verify_view_filename/1, get_signature_from_filename/1]). +-export([get_signatures/1, get_purge_checkpoints/1, get_index_files/1]). -export([ddoc_to_mrst/2, init_state/4, reset_index/3]). -export([make_header/1]). -export([index_file/2, compaction_file/2, open_file/1]). @@ -53,6 +54,11 @@ true -> B end) ). +-define(IS_HEX(C), + ((C >= $0 andalso C =< $9) orelse + (C >= $a andalso C =< $f) orelse + (C >= $A andalso C =< $F)) +). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). @@ -70,31 +76,78 @@ get_value_from_options(Key, Options) -> end. verify_view_filename(FileName) -> - FilePathList = filename:split(FileName), - PureFN = lists:last(FilePathList), - case filename:extension(PureFN) of + case filename:extension(FileName) of ".view" -> - Sig = filename:basename(PureFN), - case - [ - Ch - || Ch <- Sig, - not (((Ch >= $0) and (Ch =< $9)) orelse - ((Ch >= $a) and (Ch =< $f)) orelse - ((Ch >= $A) and (Ch =< $F))) - ] == [] - of - true -> true; - false -> false - end; + Sig = get_signature_from_filename(FileName), + lists:all(fun(C) -> ?IS_HEX(C) end, Sig); _ -> false end. -get_signature_from_filename(FileName) -> - FilePathList = filename:split(FileName), - PureFN = lists:last(FilePathList), - filename:basename(PureFN, ".view"). +get_signature_from_filename(Path) -> + filename:basename(filename:basename(Path, ".view"), ".compact"). + +% Returns map of `Sig => true` elements with all the active signatures. +% Sig is a hex-encoded binary. +% +get_signatures(DbName) when is_binary(DbName) -> + couch_util:with_db(DbName, fun get_signatures/1); +get_signatures(Db) -> + DbName = couch_db:name(Db), + % get_design_docs/1 returns ejson for clustered shards, and + % #full_doc_info{}'s for other cases. + {ok, DDocs} = couch_db:get_design_docs(Db), + FoldFun = fun + ({[_ | _]} = EJsonDoc, Acc) -> + Doc = couch_doc:from_json_obj(EJsonDoc), + {ok, Mrst} = ddoc_to_mrst(DbName, Doc), + Sig = couch_util:to_hex_bin(Mrst#mrst.sig), + Acc#{Sig => true}; + (#full_doc_info{} = FDI, Acc) -> + {ok, Doc} = couch_db:open_doc_int(Db, FDI, [ejson_body]), + {ok, Mrst} = ddoc_to_mrst(DbName, Doc), + Sig = couch_util:to_hex_bin(Mrst#mrst.sig), + Acc#{Sig => true} + end, + lists:foldl(FoldFun, #{}, DDocs). + +% Returns a map of `Sig => DocId` elements for all the purge view +% checkpoint docs. Sig is a hex-encoded binary. +% +get_purge_checkpoints(DbName) when is_binary(DbName) -> + couch_util:with_db(DbName, fun get_purge_checkpoints/1); +get_purge_checkpoints(Db) -> + FoldFun = fun(#doc{id = Id}, Acc) -> + case Id of + <<?LOCAL_DOC_PREFIX, "purge-mrview-", Sig/binary>> -> + {ok, Acc#{Sig => Id}}; + _ -> + {stop, Acc} + end + end, + Opts = [{start_key, <<?LOCAL_DOC_PREFIX, "purge-mrview-">>}], + {ok, Signatures = #{}} = couch_db:fold_local_docs(Db, FoldFun, #{}, Opts), + Signatures. + +% Returns a map of `Sig => [FilePath, ...]` elements. Sig is a hex-encoded +% binary and FilePaths are lists as they intended to be passed to couch_file +% and file module functions. +% +get_index_files(DbName) when is_binary(DbName) -> + IdxDir = couch_index_util:index_dir(mrview, DbName), + WildcardPath = filename:join(IdxDir, "*"), + FoldFun = fun(F, Acc) -> + case verify_view_filename(F) of + true -> + Sig = ?l2b(get_signature_from_filename(F)), + maps:update_with(Sig, fun(Fs) -> [F | Fs] end, [F], Acc); + false -> + Acc + end + end, + lists:foldl(FoldFun, #{}, filelib:wildcard(WildcardPath)); +get_index_files(Db) -> + get_index_files(couch_db:name(Db)). get_view(Db, DDoc, ViewName, Args0) -> case get_view_index_state(Db, DDoc, ViewName, Args0) of diff --git a/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl b/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl index a495fd82c..2562bb511 100644 --- a/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl +++ b/src/couch_mrview/test/eunit/couch_mrview_util_tests.erl @@ -13,8 +13,11 @@ -module(couch_mrview_util_tests). -include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). +-define(DDOC_ID, <<"_design/bar">>). + couch_mrview_util_test_() -> [ ?_assertEqual(0, validate_group_level(undefined, undefined)), @@ -35,3 +38,139 @@ validate_group_level(Group, GroupLevel) -> Args0 = #mrargs{group = Group, group_level = GroupLevel, view_type = red}, Args1 = couch_mrview_util:validate_args(Args0), Args1#mrargs.group_level. + +get_signature_from_filename_test() -> + Sig = "da817c3d3f7413c1a610f25635a0c521", + P1 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.view", + P2 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.compact.view", + P3 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521", + ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P1)), + ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P2)), + ?assertEqual(Sig, couch_mrview_util:get_signature_from_filename(P3)). + +verify_view_filename_test() -> + P1 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.view", + P2 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521.compact.view", + P3 = "/x.1667618375_design/mrview/da817c3d3f7413c1a610f25635a0c521", + ?assert(couch_mrview_util:verify_view_filename(P1)), + ?assert(couch_mrview_util:verify_view_filename(P2)), + ?assertNot(couch_mrview_util:verify_view_filename(P3)), + ?assertNot(couch_mrview_util:verify_view_filename("")), + ?assertNot(couch_mrview_util:verify_view_filename("foo.view")). + +setup() -> + DbName = ?tempdb(), + ok = fabric:create_db(DbName, [?ADMIN_CTX, {q, 2}]), + DDoc = couch_mrview_test_util:ddoc(map), + {ok, _} = fabric:update_doc(DbName, DDoc, [?ADMIN_CTX]), + {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}), + {ok, Db} = couch_mrview_test_util:init_db(?tempdb(), map), + {ok, _} = couch_mrview:query_view(Db, ?DDOC_ID, <<"baz">>), + {DbName, Db}. + +teardown({DbName, Db}) -> + couch_db:close(Db), + couch_server:delete(couch_db:name(Db), [?ADMIN_CTX]), + ok = fabric:delete_db(DbName, [?ADMIN_CTX]). + +get_signatures_test_() -> + { + setup, + fun() -> test_util:start_couch([fabric]) end, + fun test_util:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_get_signatures_local), + ?TDEF_FE(t_get_signatures_clustered), + ?TDEF_FE(t_get_purge_checkpoints_local), + ?TDEF_FE(t_get_purge_checkpoints_clustered), + ?TDEF_FE(t_get_index_files_local), + ?TDEF_FE(t_get_index_files_clustered) + ] + } + }. + +t_get_signatures_local({_, Db}) -> + DbName = couch_db:name(Db), + Sigs = couch_mrview_util:get_signatures(DbName), + ?assert(is_map(Sigs)), + ?assertEqual(1, map_size(Sigs)), + [{Sig, true}] = maps:to_list(Sigs), + {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID), + ?assertEqual(proplists:get_value(signature, Info), Sig), + + {ok, DDoc} = couch_db:open_doc(Db, ?DDOC_ID, [?ADMIN_CTX]), + Deleted = DDoc#doc{deleted = true, body = {[]}}, + {ok, _} = couch_db:update_doc(Db, Deleted, []), + ?assertEqual(#{}, couch_mrview_util:get_signatures(DbName)). + +t_get_signatures_clustered({DbName, _Db}) -> + [Shard1, Shard2] = mem3:local_shards(DbName), + ShardName1 = mem3:name(Shard1), + ShardName2 = mem3:name(Shard2), + Sigs = couch_mrview_util:get_signatures(ShardName1), + ?assertEqual(Sigs, couch_mrview_util:get_signatures(ShardName2)), + ?assert(is_map(Sigs)), + ?assertEqual(1, map_size(Sigs)), + [{Sig, true}] = maps:to_list(Sigs), + {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID), + ?assertEqual(proplists:get_value(signature, Info), Sig), + + {ok, DDoc} = fabric:open_doc(DbName, ?DDOC_ID, [?ADMIN_CTX]), + Deleted = DDoc#doc{deleted = true, body = {[]}}, + {ok, _} = fabric:update_doc(DbName, Deleted, [?ADMIN_CTX]), + ?assertEqual(#{}, couch_mrview_util:get_signatures(ShardName1)), + ?assertEqual(#{}, couch_mrview_util:get_signatures(ShardName2)). + +t_get_purge_checkpoints_local({_, Db}) -> + DbName = couch_db:name(Db), + Checkpoints = couch_mrview_util:get_purge_checkpoints(DbName), + ?assert(is_map(Checkpoints)), + ?assertEqual(1, map_size(Checkpoints)), + [{Sig, <<"_local/", _/binary>>}] = maps:to_list(Checkpoints), + {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID), + ?assertEqual(proplists:get_value(signature, Info), Sig). + +t_get_purge_checkpoints_clustered({DbName, _Db}) -> + {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}), + [Shard1, Shard2] = mem3:local_shards(DbName), + ShardName1 = mem3:name(Shard1), + ShardName2 = mem3:name(Shard2), + Sigs1 = couch_mrview_util:get_purge_checkpoints(ShardName1), + Sigs2 = couch_mrview_util:get_purge_checkpoints(ShardName2), + ?assertEqual(lists:sort(maps:keys(Sigs1)), lists:sort(maps:keys(Sigs2))), + ?assert(is_map(Sigs1)), + ?assertEqual(1, map_size(Sigs1)), + [{Sig, <<"_local/", _/binary>>}] = maps:to_list(Sigs1), + {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID), + ?assertEqual(proplists:get_value(signature, Info), Sig). + +t_get_index_files_local({_, Db}) -> + DbName = couch_db:name(Db), + SigFilesMap = couch_mrview_util:get_index_files(DbName), + ?assert(is_map(SigFilesMap)), + ?assertEqual(1, map_size(SigFilesMap)), + [{Sig, [File]}] = maps:to_list(SigFilesMap), + ?assertMatch({ok, _}, file:read_file_info(File)), + {ok, Info} = couch_mrview:get_info(Db, ?DDOC_ID), + ?assertEqual(proplists:get_value(signature, Info), Sig). + +t_get_index_files_clustered({DbName, _Db}) -> + {ok, _} = fabric:query_view(DbName, <<"bar">>, <<"baz">>, #mrargs{}), + [Shard1, Shard2] = mem3:local_shards(DbName), + ShardName1 = mem3:name(Shard1), + ShardName2 = mem3:name(Shard2), + SigFilesMap1 = couch_mrview_util:get_index_files(ShardName1), + SigFilesMap2 = couch_mrview_util:get_index_files(ShardName2), + SigKeys1 = lists:sort(maps:keys(SigFilesMap1)), + SigKeys2 = lists:sort(maps:keys(SigFilesMap2)), + ?assertEqual(SigKeys1, SigKeys2), + ?assert(is_map(SigFilesMap1)), + ?assertEqual(1, map_size(SigFilesMap1)), + [{Sig, [File]}] = maps:to_list(SigFilesMap1), + ?assertMatch({ok, _}, file:read_file_info(File)), + {ok, Info} = couch_mrview:get_info(ShardName1, ?DDOC_ID), + ?assertEqual(proplists:get_value(signature, Info), Sig). |