diff options
author | Nick Vatamaniuc <vatamane@apache.org> | 2022-02-04 00:11:51 -0500 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2022-02-11 17:55:51 -0500 |
commit | 24d6582dfd341b7c22fcabd8fb85217c923f7bb2 (patch) | |
tree | 311f1645f08e32d081ad8656df4f47964b0d435b | |
parent | 81fe821496259718c91bb9554dbbb7d6a8988712 (diff) | |
download | couchdb-24d6582dfd341b7c22fcabd8fb85217c923f7bb2.tar.gz |
Track libicu collator versions in the view header
Previously, libicu collator versions were not tracked, and during major OS
version upgrades, it was possible to experience apparent data loss due to
collation order changes between libicu library versions. The view order
inconsistency would last until the view is compacted.
This commit introduces a view info map in the header which records the list of
libicu collator versions used by that view. The collator versions list is
checked and updated every time a view is opened.
The new view info map is re-using a previously removed view header field from
2.x views. The upgrade logic from 2.x to 3.x ignores that header field, and
this allows for transparent downgrading back to 3.2.1, and then upgrading back
to 3.2.1+ versions, all while keeping the same view signature.
If there is no collator version recorded in the view header, the first time the
view is opened, the header will be upgraded to record the current libicu
version. It's possible to avoid immediately writting the upgraded header and
instead delaying till the next view data update with this setting:
```
[view_upgrade]
commit_on_header_upgrade = false
```
By default it's toggled to `true`, meaning the view header will be written
immediately.
The list of collator version is returned in the _design/*/_info response. This
allows users to easily track the condition when the view is built or opened
with more than one libicu collator versions.
Views which have more than one collator versions are submitted for
re-compaction to the "upgrade_views" channel. This behavior is triggered both
on update (which is the typical smoosh trigger mechanism), and when opened.
Triggering on open is inteded to be used with read-only views, which may not be
updated after libicu upgrades, and so would perpetually emit inconsistent data.
Automatic re-compaction may be disabled with a config setting:
```
[view_upgrade]
compact_on_collator_upgrade = false
```
The default value is `true`.
-rw-r--r-- | rel/overlay/etc/default.ini | 10 | ||||
-rw-r--r-- | src/couch_mrview/include/couch_mrview.hrl | 4 | ||||
-rw-r--r-- | src/couch_mrview/src/couch_mrview_index.erl | 51 | ||||
-rw-r--r-- | src/couch_mrview/src/couch_mrview_util.erl | 112 | ||||
-rw-r--r-- | src/fabric/src/fabric_group_info.erl | 5 | ||||
-rw-r--r-- | src/smoosh/src/smoosh_server.erl | 15 |
6 files changed, 171 insertions, 26 deletions
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 93aa1ca59..3c15ae92c 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -718,3 +718,13 @@ partitioned||* = true additional_port = false bind_address = 127.0.0.1 port = {{prometheus_port}} + +[view_upgrade] +; When enabled, views with more than one collator versions will be submitted +; for auto-compaction to smoosh's "upgrade_views" channel. +;compact_on_collator_upgrade = true + +; Eagerly commit views which been upgraded from older header formats. A reason +; to disable this setting could be if the views need an upgrade but located on +; read-only file system. +;commit_on_header_upgrade = true diff --git a/src/couch_mrview/include/couch_mrview.hrl b/src/couch_mrview/include/couch_mrview.hrl index bb0ab0b46..b31463c53 100644 --- a/src/couch_mrview/include/couch_mrview.hrl +++ b/src/couch_mrview/include/couch_mrview.hrl @@ -29,7 +29,8 @@ doc_acc, doc_queue, write_queue, - qserver=nil + qserver=nil, + view_info=#{} }). @@ -49,6 +50,7 @@ seq=0, purge_seq=0, id_btree_state=nil, + view_info=#{}, % replaces log btree in versions < 3.x view_states=nil }). diff --git a/src/couch_mrview/src/couch_mrview_index.erl b/src/couch_mrview/src/couch_mrview_index.erl index a024d35c8..1bfdb2818 100644 --- a/src/couch_mrview/src/couch_mrview_index.erl +++ b/src/couch_mrview/src/couch_mrview_index.erl @@ -63,7 +63,8 @@ get(info, State) -> language = Lang, update_seq = UpdateSeq, purge_seq = PurgeSeq, - views = Views + views = Views, + view_info = ViewInfo } = State, {ok, FileSize} = couch_file:bytes(Fd), {ok, ExternalSize} = couch_mrview_util:calculate_external_size(Views), @@ -72,7 +73,8 @@ get(info, State) -> UpdateOptions0 = get(update_options, State), UpdateOptions = [atom_to_binary(O, latin1) || O <- UpdateOptions0], - + CollVsTups = couch_mrview_util:get_collator_versions(ViewInfo), + CollVsBins = [couch_util:version_to_binary(V) || V <- CollVsTups], {ok, [ {signature, list_to_binary(couch_index_util:hexsig(Sig))}, {language, Lang}, @@ -84,7 +86,8 @@ get(info, State) -> ]}}, {update_seq, UpdateSeq}, {purge_seq, PurgeSeq}, - {update_options, UpdateOptions} + {update_options, UpdateOptions}, + {collator_versions, CollVsBins} ]}; get(Other, _) -> throw({unknown_index_property, Other}). @@ -123,15 +126,15 @@ open(Db, State0) -> % upgrade code for <= 2.x {ok, {OldSig, Header}} -> % Matching view signatures. - NewSt = couch_mrview_util:init_state(Db, Fd, State, Header), - ok = commit(NewSt), + NewSt = init_and_upgrade_state(Db, Fd, State, Header), ensure_local_purge_doc(Db, NewSt), {ok, NewSt}; % end of upgrade code for <= 2.x {ok, {Sig, Header}} -> % Matching view signatures. - NewSt = couch_mrview_util:init_state(Db, Fd, State, Header), + NewSt = init_and_upgrade_state(Db, Fd, State, Header), ensure_local_purge_doc(Db, NewSt), + check_collator_versions(DbName, NewSt), {ok, NewSt}; {ok, {WrongSig, _}} -> couch_log:error( @@ -321,3 +324,39 @@ update_local_purge_doc(Db, State, PSeq) -> BaseDoc end, couch_db:update_doc(Db, Doc, []). + +init_and_upgrade_state(Db, Fd, State, Header) -> + {Commit, #mrst{} = Mrst} = couch_mrview_util:init_state(Db, Fd, State, Header), + case Commit of + true -> + case couch_mrview_util:commit_on_header_upgrade() of + true -> + LogMsg = "~p : Index ~s ~s was upgraded", + DbName = couch_db:name(Db), + IdxName = State#mrst.idx_name, + couch_log:warning(LogMsg, [?MODULE, DbName, IdxName]), + ok = commit(Mrst), + Mrst; + false -> + Mrst + end; + false -> + Mrst + end. + +% Check if there are multiple collator versions used to build this view +check_collator_versions(DbName, #mrst{} = Mrst) -> + case couch_mrview_util:compact_on_collator_upgrade() of + true -> + #mrst{view_info = ViewInfo, idx_name = IdxName} = Mrst, + Vers = couch_mrview_util:get_collator_versions(ViewInfo), + case length(Vers) >= 2 of + true -> + Event = {index_collator_upgrade, IdxName}, + couch_event:notify(DbName, Event); + false -> + ok + end; + false -> + ok + end. diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl index b7220f71f..9e3d292ed 100644 --- a/src/couch_mrview/src/couch_mrview_util.erl +++ b/src/couch_mrview/src/couch_mrview_util.erl @@ -32,6 +32,9 @@ -export([get_view_keys/1, get_view_queries/1]). -export([set_view_type/3]). -export([set_extra/3, get_extra/2, get_extra/3]). +-export([get_collator_versions/1]). +-export([compact_on_collator_upgrade/0]). +-export([commit_on_header_upgrade/0]). -define(MOD, couch_mrview_index). -define(GET_VIEW_RETRY_COUNT, 1). @@ -285,6 +288,7 @@ init_state(Db, Fd, #mrst{views = Views} = State, nil) -> seq = 0, purge_seq = PurgeSeq, id_btree_state = nil, + view_info = update_collator_versions(#{}), view_states = [make_view_state(#mrview{}) || _ <- Views] }, init_state(Db, Fd, State, Header); @@ -293,12 +297,14 @@ init_state(Db, Fd, State, Header) -> language = Lang, views = Views } = State, - #mrheader{ + + {ShouldCommit, #mrheader{ seq = Seq, purge_seq = PurgeSeq, id_btree_state = IdBtreeState, + view_info = ViewInfo, view_states = ViewStates - } = maybe_update_header(Header), + }} = maybe_update_header(Header), IdBtOpts = [ {compression, couch_compress:get_compression_method()} @@ -308,14 +314,15 @@ init_state(Db, Fd, State, Header) -> OpenViewFun = fun(St, View) -> open_view(Db, Fd, Lang, St, View) end, Views2 = lists:zipwith(OpenViewFun, ViewStates, Views), - State#mrst{ + {ShouldCommit, State#mrst{ fd = Fd, fd_monitor = erlang:monitor(process, Fd), update_seq = Seq, purge_seq = PurgeSeq, id_btree = IdBtree, - views = Views2 - }. + views = Views2, + view_info = ViewInfo + }}. open_view(_Db, Fd, Lang, ViewState, View) -> ReduceFun = make_reduce_fun(Lang, View#mrview.reduce_funs), @@ -764,14 +771,16 @@ make_header(State) -> update_seq = Seq, purge_seq = PurgeSeq, id_btree = IdBtree, - views = Views + views = Views, + view_info = ViewInfo } = State, #mrheader{ seq = Seq, purge_seq = PurgeSeq, id_btree_state = get_btree_state(IdBtree), - view_states = [make_view_state(V) || V <- Views] + view_info = ViewInfo, + view_states = [make_disk_view_state(V) || V <- Views] }. index_file(DbName, Sig) -> @@ -811,7 +820,8 @@ delete_file(FName) -> reset_index(Db, Fd, #mrst{sig = Sig} = State) -> ok = couch_file:truncate(Fd, 0), ok = couch_file:write_header(Fd, {Sig, nil}), - init_state(Db, Fd, reset_state(State), nil). + {_Commit, NewSt} = init_state(Db, Fd, reset_state(State), nil), + NewSt. reset_state(State) -> State#mrst{ @@ -819,7 +829,8 @@ reset_state(State) -> qserver = nil, update_seq = 0, id_btree = nil, - views = [View#mrview{btree = nil} || View <- State#mrst.views] + views = [View#mrview{btree = nil} || View <- State#mrst.views], + view_info = #{} }. all_docs_key_opts(#mrargs{extra = Extra} = Args) -> @@ -1070,18 +1081,41 @@ old_view_format(View, SI, KSI) -> View#mrview.options }. -maybe_update_header(#mrheader{} = Header) -> - Header; -maybe_update_header(Header) when tuple_size(Header) == 6 -> - #mrheader{ - seq = element(2, Header), - purge_seq = element(3, Header), - id_btree_state = element(4, Header), - view_states = [make_view_state(S) || S <- element(6, Header)] - }. +maybe_update_header(#mrheader{view_info = Info} = Header) when is_map(Info) -> + % Latest (3.2.1+) version. The size of the record is the same as + % the <2.3.1 version. The main difference is that the LogBt field + % is now a map. This trick allows for easy downgrading back to + % version 3.2.1 and then upgrading back to 3.2.1+ if needed. + {false, Header#mrheader{ + view_info = update_collator_versions(Info), + view_states = [make_view_state(S) || S <- Header#mrheader.view_states] + }}; +maybe_update_header({mrheader, Seq, PSeq, IDBt, ViewStates}) -> + % Versions >2.3.1 and =<3.2.1 (no view info map) + {true, #mrheader{ + seq = Seq, + purge_seq = PSeq, + id_btree_state = IDBt, + view_info = update_collator_versions(#{}), + view_states = [make_view_state(S) || S <- ViewStates] + }}; +maybe_update_header({mrheader, Seq, PSeq, IDBt, _LogBt, ViewStates}) -> + % Versions <2.3.1. + {true, #mrheader{ + seq = Seq, + purge_seq = PSeq, + id_btree_state = IDBt, + view_info = update_collator_versions(#{}), + view_states = [make_view_state(S) || S <- ViewStates] + }}. %% End of <= 2.x upgrade code. +% Used for creating a new view states or reading (upgrading) from +% disk. On disk, the state will be a 5 tuple with nil values in +% positions 2 and 3 to allow downgrading between current version and +% =<3.2.1 views. +% make_view_state(#mrview{} = View) -> BTState = get_btree_state(View#mrview.btree), { @@ -1089,11 +1123,35 @@ make_view_state(#mrview{} = View) -> View#mrview.update_seq, View#mrview.purge_seq }; -make_view_state({BTState, _SeqBTState, _KSeqBTState, UpdateSeq, PurgeSeq}) -> +make_view_state({BTState, UpdateSeq, PurgeSeq}) -> + % Versions >2.x and =<3.2.1 + {BTState, UpdateSeq, PurgeSeq}; +make_view_state({BTState, _SeqBTOrNil, _KSeqBTOrNil, UpdateSeq, PurgeSeq}) -> + % Current disk version and version 2.x views {BTState, UpdateSeq, PurgeSeq}; make_view_state(nil) -> {nil, 0, 0}. +% Used by make_header/1 before committing to disk. The two added nil +% values in position 2 and 3 make the state on disk look like a 2.x +% view, where those fields used to be SeqBTState and KSeqBTState, +% respectively. This is to allow easy downgrading between current +% version and >2.x and =<3.2.1 views. +% +make_disk_view_state(#mrview{} = View) -> + BTState = get_btree_state(View#mrview.btree), + { + BTState, + nil, + nil, + View#mrview.update_seq, + View#mrview.purge_seq + }; +make_disk_view_state({BTState, UpdateSeq, PurgeSeq}) -> + {BTState, nil, nil, UpdateSeq, PurgeSeq}; +make_disk_view_state(nil) -> + {nil, nil, nil, 0, 0}. + get_key_btree_state(ViewState) -> element(1, ViewState). @@ -1216,3 +1274,19 @@ kv_external_size(KVList, Reduction) -> ?term_size(Reduction), KVList ). + +update_collator_versions(#{} = ViewInfo) -> + Versions = maps:get(ucol_vs, ViewInfo, []), + Ver = tuple_to_list(couch_ejson_compare:get_collator_version()), + ViewInfo#{ucol_vs => lists:usort([Ver | Versions])}. + +get_collator_versions(#{ucol_vs := Versions}) when is_list(Versions) -> + Versions; +get_collator_versions(#{}) -> + []. + +compact_on_collator_upgrade() -> + config:get_boolean("view_upgrade", "compact_on_collator_upgrade", true). + +commit_on_header_upgrade() -> + config:get_boolean("view_upgrade", "commit_on_header_upgrade", true). diff --git a/src/fabric/src/fabric_group_info.erl b/src/fabric/src/fabric_group_info.erl index c7d7293fd..ff875aa96 100644 --- a/src/fabric/src/fabric_group_info.erl +++ b/src/fabric/src/fabric_group_info.erl @@ -135,6 +135,11 @@ merge_results(Info) -> [{update_seq, lists:sum(X)} | Acc]; (purge_seq, X, Acc) -> [{purge_seq, lists:sum(X)} | Acc]; + (collator_versions, X, Acc) -> + % Concatenate (undo orddict:append/3), then + % sort and remove duplicates. + Vs = lists:usort(lists:flatmap(fun(V) -> V end, X)), + [{collator_versions, Vs} | Acc]; (_, _, Acc) -> Acc end, diff --git a/src/smoosh/src/smoosh_server.erl b/src/smoosh/src/smoosh_server.erl index 0526625ff..5529e93de 100644 --- a/src/smoosh/src/smoosh_server.erl +++ b/src/smoosh/src/smoosh_server.erl @@ -100,6 +100,9 @@ handle_db_event(DbName, updated, St) -> handle_db_event(DbName, {index_commit, IdxName}, St) -> smoosh_server:enqueue({DbName, IdxName}), {ok, St}; +handle_db_event(DbName, {index_collator_upgrade, IdxName}, St) -> + smoosh_server:enqueue({DbName, IdxName}), + {ok, St}; handle_db_event(DbName, {schema_updated, DDocId}, St) -> smoosh_server:enqueue({schema, DbName, DDocId}), {ok, St}; @@ -480,6 +483,9 @@ get_priority(Channel) -> smoosh_utils:get(Channel, "priority", "ratio"). needs_upgrade(Props) -> + db_needs_upgrade(Props) orelse view_needs_upgrade(Props). + +db_needs_upgrade(Props) -> DiskVersion = couch_util:get_value(disk_format_version, Props), case couch_util:get_value(engine, Props) of couch_bt_engine -> @@ -488,6 +494,15 @@ needs_upgrade(Props) -> false end. +view_needs_upgrade(Props) -> + case couch_util:get_value(collator_versions, Props) of + undefined -> + false; + Versions when is_list(Versions) -> + Enabled = couch_mrview_util:compact_on_collator_upgrade(), + Enabled andalso length(Versions) >= 2 + end. + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). |