summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2022-02-04 00:11:51 -0500
committerNick Vatamaniuc <nickva@users.noreply.github.com>2022-02-11 17:55:51 -0500
commit24d6582dfd341b7c22fcabd8fb85217c923f7bb2 (patch)
tree311f1645f08e32d081ad8656df4f47964b0d435b
parent81fe821496259718c91bb9554dbbb7d6a8988712 (diff)
downloadcouchdb-24d6582dfd341b7c22fcabd8fb85217c923f7bb2.tar.gz
Track libicu collator versions in the view header
Previously, libicu collator versions were not tracked, and during major OS version upgrades, it was possible to experience apparent data loss due to collation order changes between libicu library versions. The view order inconsistency would last until the view is compacted. This commit introduces a view info map in the header which records the list of libicu collator versions used by that view. The collator versions list is checked and updated every time a view is opened. The new view info map is re-using a previously removed view header field from 2.x views. The upgrade logic from 2.x to 3.x ignores that header field, and this allows for transparent downgrading back to 3.2.1, and then upgrading back to 3.2.1+ versions, all while keeping the same view signature. If there is no collator version recorded in the view header, the first time the view is opened, the header will be upgraded to record the current libicu version. It's possible to avoid immediately writting the upgraded header and instead delaying till the next view data update with this setting: ``` [view_upgrade] commit_on_header_upgrade = false ``` By default it's toggled to `true`, meaning the view header will be written immediately. The list of collator version is returned in the _design/*/_info response. This allows users to easily track the condition when the view is built or opened with more than one libicu collator versions. Views which have more than one collator versions are submitted for re-compaction to the "upgrade_views" channel. This behavior is triggered both on update (which is the typical smoosh trigger mechanism), and when opened. Triggering on open is inteded to be used with read-only views, which may not be updated after libicu upgrades, and so would perpetually emit inconsistent data. Automatic re-compaction may be disabled with a config setting: ``` [view_upgrade] compact_on_collator_upgrade = false ``` The default value is `true`.
-rw-r--r--rel/overlay/etc/default.ini10
-rw-r--r--src/couch_mrview/include/couch_mrview.hrl4
-rw-r--r--src/couch_mrview/src/couch_mrview_index.erl51
-rw-r--r--src/couch_mrview/src/couch_mrview_util.erl112
-rw-r--r--src/fabric/src/fabric_group_info.erl5
-rw-r--r--src/smoosh/src/smoosh_server.erl15
6 files changed, 171 insertions, 26 deletions
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index 93aa1ca59..3c15ae92c 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -718,3 +718,13 @@ partitioned||* = true
additional_port = false
bind_address = 127.0.0.1
port = {{prometheus_port}}
+
+[view_upgrade]
+; When enabled, views with more than one collator versions will be submitted
+; for auto-compaction to smoosh's "upgrade_views" channel.
+;compact_on_collator_upgrade = true
+
+; Eagerly commit views which been upgraded from older header formats. A reason
+; to disable this setting could be if the views need an upgrade but located on
+; read-only file system.
+;commit_on_header_upgrade = true
diff --git a/src/couch_mrview/include/couch_mrview.hrl b/src/couch_mrview/include/couch_mrview.hrl
index bb0ab0b46..b31463c53 100644
--- a/src/couch_mrview/include/couch_mrview.hrl
+++ b/src/couch_mrview/include/couch_mrview.hrl
@@ -29,7 +29,8 @@
doc_acc,
doc_queue,
write_queue,
- qserver=nil
+ qserver=nil,
+ view_info=#{}
}).
@@ -49,6 +50,7 @@
seq=0,
purge_seq=0,
id_btree_state=nil,
+ view_info=#{}, % replaces log btree in versions < 3.x
view_states=nil
}).
diff --git a/src/couch_mrview/src/couch_mrview_index.erl b/src/couch_mrview/src/couch_mrview_index.erl
index a024d35c8..1bfdb2818 100644
--- a/src/couch_mrview/src/couch_mrview_index.erl
+++ b/src/couch_mrview/src/couch_mrview_index.erl
@@ -63,7 +63,8 @@ get(info, State) ->
language = Lang,
update_seq = UpdateSeq,
purge_seq = PurgeSeq,
- views = Views
+ views = Views,
+ view_info = ViewInfo
} = State,
{ok, FileSize} = couch_file:bytes(Fd),
{ok, ExternalSize} = couch_mrview_util:calculate_external_size(Views),
@@ -72,7 +73,8 @@ get(info, State) ->
UpdateOptions0 = get(update_options, State),
UpdateOptions = [atom_to_binary(O, latin1) || O <- UpdateOptions0],
-
+ CollVsTups = couch_mrview_util:get_collator_versions(ViewInfo),
+ CollVsBins = [couch_util:version_to_binary(V) || V <- CollVsTups],
{ok, [
{signature, list_to_binary(couch_index_util:hexsig(Sig))},
{language, Lang},
@@ -84,7 +86,8 @@ get(info, State) ->
]}},
{update_seq, UpdateSeq},
{purge_seq, PurgeSeq},
- {update_options, UpdateOptions}
+ {update_options, UpdateOptions},
+ {collator_versions, CollVsBins}
]};
get(Other, _) ->
throw({unknown_index_property, Other}).
@@ -123,15 +126,15 @@ open(Db, State0) ->
% upgrade code for <= 2.x
{ok, {OldSig, Header}} ->
% Matching view signatures.
- NewSt = couch_mrview_util:init_state(Db, Fd, State, Header),
- ok = commit(NewSt),
+ NewSt = init_and_upgrade_state(Db, Fd, State, Header),
ensure_local_purge_doc(Db, NewSt),
{ok, NewSt};
% end of upgrade code for <= 2.x
{ok, {Sig, Header}} ->
% Matching view signatures.
- NewSt = couch_mrview_util:init_state(Db, Fd, State, Header),
+ NewSt = init_and_upgrade_state(Db, Fd, State, Header),
ensure_local_purge_doc(Db, NewSt),
+ check_collator_versions(DbName, NewSt),
{ok, NewSt};
{ok, {WrongSig, _}} ->
couch_log:error(
@@ -321,3 +324,39 @@ update_local_purge_doc(Db, State, PSeq) ->
BaseDoc
end,
couch_db:update_doc(Db, Doc, []).
+
+init_and_upgrade_state(Db, Fd, State, Header) ->
+ {Commit, #mrst{} = Mrst} = couch_mrview_util:init_state(Db, Fd, State, Header),
+ case Commit of
+ true ->
+ case couch_mrview_util:commit_on_header_upgrade() of
+ true ->
+ LogMsg = "~p : Index ~s ~s was upgraded",
+ DbName = couch_db:name(Db),
+ IdxName = State#mrst.idx_name,
+ couch_log:warning(LogMsg, [?MODULE, DbName, IdxName]),
+ ok = commit(Mrst),
+ Mrst;
+ false ->
+ Mrst
+ end;
+ false ->
+ Mrst
+ end.
+
+% Check if there are multiple collator versions used to build this view
+check_collator_versions(DbName, #mrst{} = Mrst) ->
+ case couch_mrview_util:compact_on_collator_upgrade() of
+ true ->
+ #mrst{view_info = ViewInfo, idx_name = IdxName} = Mrst,
+ Vers = couch_mrview_util:get_collator_versions(ViewInfo),
+ case length(Vers) >= 2 of
+ true ->
+ Event = {index_collator_upgrade, IdxName},
+ couch_event:notify(DbName, Event);
+ false ->
+ ok
+ end;
+ false ->
+ ok
+ end.
diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl
index b7220f71f..9e3d292ed 100644
--- a/src/couch_mrview/src/couch_mrview_util.erl
+++ b/src/couch_mrview/src/couch_mrview_util.erl
@@ -32,6 +32,9 @@
-export([get_view_keys/1, get_view_queries/1]).
-export([set_view_type/3]).
-export([set_extra/3, get_extra/2, get_extra/3]).
+-export([get_collator_versions/1]).
+-export([compact_on_collator_upgrade/0]).
+-export([commit_on_header_upgrade/0]).
-define(MOD, couch_mrview_index).
-define(GET_VIEW_RETRY_COUNT, 1).
@@ -285,6 +288,7 @@ init_state(Db, Fd, #mrst{views = Views} = State, nil) ->
seq = 0,
purge_seq = PurgeSeq,
id_btree_state = nil,
+ view_info = update_collator_versions(#{}),
view_states = [make_view_state(#mrview{}) || _ <- Views]
},
init_state(Db, Fd, State, Header);
@@ -293,12 +297,14 @@ init_state(Db, Fd, State, Header) ->
language = Lang,
views = Views
} = State,
- #mrheader{
+
+ {ShouldCommit, #mrheader{
seq = Seq,
purge_seq = PurgeSeq,
id_btree_state = IdBtreeState,
+ view_info = ViewInfo,
view_states = ViewStates
- } = maybe_update_header(Header),
+ }} = maybe_update_header(Header),
IdBtOpts = [
{compression, couch_compress:get_compression_method()}
@@ -308,14 +314,15 @@ init_state(Db, Fd, State, Header) ->
OpenViewFun = fun(St, View) -> open_view(Db, Fd, Lang, St, View) end,
Views2 = lists:zipwith(OpenViewFun, ViewStates, Views),
- State#mrst{
+ {ShouldCommit, State#mrst{
fd = Fd,
fd_monitor = erlang:monitor(process, Fd),
update_seq = Seq,
purge_seq = PurgeSeq,
id_btree = IdBtree,
- views = Views2
- }.
+ views = Views2,
+ view_info = ViewInfo
+ }}.
open_view(_Db, Fd, Lang, ViewState, View) ->
ReduceFun = make_reduce_fun(Lang, View#mrview.reduce_funs),
@@ -764,14 +771,16 @@ make_header(State) ->
update_seq = Seq,
purge_seq = PurgeSeq,
id_btree = IdBtree,
- views = Views
+ views = Views,
+ view_info = ViewInfo
} = State,
#mrheader{
seq = Seq,
purge_seq = PurgeSeq,
id_btree_state = get_btree_state(IdBtree),
- view_states = [make_view_state(V) || V <- Views]
+ view_info = ViewInfo,
+ view_states = [make_disk_view_state(V) || V <- Views]
}.
index_file(DbName, Sig) ->
@@ -811,7 +820,8 @@ delete_file(FName) ->
reset_index(Db, Fd, #mrst{sig = Sig} = State) ->
ok = couch_file:truncate(Fd, 0),
ok = couch_file:write_header(Fd, {Sig, nil}),
- init_state(Db, Fd, reset_state(State), nil).
+ {_Commit, NewSt} = init_state(Db, Fd, reset_state(State), nil),
+ NewSt.
reset_state(State) ->
State#mrst{
@@ -819,7 +829,8 @@ reset_state(State) ->
qserver = nil,
update_seq = 0,
id_btree = nil,
- views = [View#mrview{btree = nil} || View <- State#mrst.views]
+ views = [View#mrview{btree = nil} || View <- State#mrst.views],
+ view_info = #{}
}.
all_docs_key_opts(#mrargs{extra = Extra} = Args) ->
@@ -1070,18 +1081,41 @@ old_view_format(View, SI, KSI) ->
View#mrview.options
}.
-maybe_update_header(#mrheader{} = Header) ->
- Header;
-maybe_update_header(Header) when tuple_size(Header) == 6 ->
- #mrheader{
- seq = element(2, Header),
- purge_seq = element(3, Header),
- id_btree_state = element(4, Header),
- view_states = [make_view_state(S) || S <- element(6, Header)]
- }.
+maybe_update_header(#mrheader{view_info = Info} = Header) when is_map(Info) ->
+ % Latest (3.2.1+) version. The size of the record is the same as
+ % the <2.3.1 version. The main difference is that the LogBt field
+ % is now a map. This trick allows for easy downgrading back to
+ % version 3.2.1 and then upgrading back to 3.2.1+ if needed.
+ {false, Header#mrheader{
+ view_info = update_collator_versions(Info),
+ view_states = [make_view_state(S) || S <- Header#mrheader.view_states]
+ }};
+maybe_update_header({mrheader, Seq, PSeq, IDBt, ViewStates}) ->
+ % Versions >2.3.1 and =<3.2.1 (no view info map)
+ {true, #mrheader{
+ seq = Seq,
+ purge_seq = PSeq,
+ id_btree_state = IDBt,
+ view_info = update_collator_versions(#{}),
+ view_states = [make_view_state(S) || S <- ViewStates]
+ }};
+maybe_update_header({mrheader, Seq, PSeq, IDBt, _LogBt, ViewStates}) ->
+ % Versions <2.3.1.
+ {true, #mrheader{
+ seq = Seq,
+ purge_seq = PSeq,
+ id_btree_state = IDBt,
+ view_info = update_collator_versions(#{}),
+ view_states = [make_view_state(S) || S <- ViewStates]
+ }}.
%% End of <= 2.x upgrade code.
+% Used for creating a new view states or reading (upgrading) from
+% disk. On disk, the state will be a 5 tuple with nil values in
+% positions 2 and 3 to allow downgrading between current version and
+% =<3.2.1 views.
+%
make_view_state(#mrview{} = View) ->
BTState = get_btree_state(View#mrview.btree),
{
@@ -1089,11 +1123,35 @@ make_view_state(#mrview{} = View) ->
View#mrview.update_seq,
View#mrview.purge_seq
};
-make_view_state({BTState, _SeqBTState, _KSeqBTState, UpdateSeq, PurgeSeq}) ->
+make_view_state({BTState, UpdateSeq, PurgeSeq}) ->
+ % Versions >2.x and =<3.2.1
+ {BTState, UpdateSeq, PurgeSeq};
+make_view_state({BTState, _SeqBTOrNil, _KSeqBTOrNil, UpdateSeq, PurgeSeq}) ->
+ % Current disk version and version 2.x views
{BTState, UpdateSeq, PurgeSeq};
make_view_state(nil) ->
{nil, 0, 0}.
+% Used by make_header/1 before committing to disk. The two added nil
+% values in position 2 and 3 make the state on disk look like a 2.x
+% view, where those fields used to be SeqBTState and KSeqBTState,
+% respectively. This is to allow easy downgrading between current
+% version and >2.x and =<3.2.1 views.
+%
+make_disk_view_state(#mrview{} = View) ->
+ BTState = get_btree_state(View#mrview.btree),
+ {
+ BTState,
+ nil,
+ nil,
+ View#mrview.update_seq,
+ View#mrview.purge_seq
+ };
+make_disk_view_state({BTState, UpdateSeq, PurgeSeq}) ->
+ {BTState, nil, nil, UpdateSeq, PurgeSeq};
+make_disk_view_state(nil) ->
+ {nil, nil, nil, 0, 0}.
+
get_key_btree_state(ViewState) ->
element(1, ViewState).
@@ -1216,3 +1274,19 @@ kv_external_size(KVList, Reduction) ->
?term_size(Reduction),
KVList
).
+
+update_collator_versions(#{} = ViewInfo) ->
+ Versions = maps:get(ucol_vs, ViewInfo, []),
+ Ver = tuple_to_list(couch_ejson_compare:get_collator_version()),
+ ViewInfo#{ucol_vs => lists:usort([Ver | Versions])}.
+
+get_collator_versions(#{ucol_vs := Versions}) when is_list(Versions) ->
+ Versions;
+get_collator_versions(#{}) ->
+ [].
+
+compact_on_collator_upgrade() ->
+ config:get_boolean("view_upgrade", "compact_on_collator_upgrade", true).
+
+commit_on_header_upgrade() ->
+ config:get_boolean("view_upgrade", "commit_on_header_upgrade", true).
diff --git a/src/fabric/src/fabric_group_info.erl b/src/fabric/src/fabric_group_info.erl
index c7d7293fd..ff875aa96 100644
--- a/src/fabric/src/fabric_group_info.erl
+++ b/src/fabric/src/fabric_group_info.erl
@@ -135,6 +135,11 @@ merge_results(Info) ->
[{update_seq, lists:sum(X)} | Acc];
(purge_seq, X, Acc) ->
[{purge_seq, lists:sum(X)} | Acc];
+ (collator_versions, X, Acc) ->
+ % Concatenate (undo orddict:append/3), then
+ % sort and remove duplicates.
+ Vs = lists:usort(lists:flatmap(fun(V) -> V end, X)),
+ [{collator_versions, Vs} | Acc];
(_, _, Acc) ->
Acc
end,
diff --git a/src/smoosh/src/smoosh_server.erl b/src/smoosh/src/smoosh_server.erl
index 0526625ff..5529e93de 100644
--- a/src/smoosh/src/smoosh_server.erl
+++ b/src/smoosh/src/smoosh_server.erl
@@ -100,6 +100,9 @@ handle_db_event(DbName, updated, St) ->
handle_db_event(DbName, {index_commit, IdxName}, St) ->
smoosh_server:enqueue({DbName, IdxName}),
{ok, St};
+handle_db_event(DbName, {index_collator_upgrade, IdxName}, St) ->
+ smoosh_server:enqueue({DbName, IdxName}),
+ {ok, St};
handle_db_event(DbName, {schema_updated, DDocId}, St) ->
smoosh_server:enqueue({schema, DbName, DDocId}),
{ok, St};
@@ -480,6 +483,9 @@ get_priority(Channel) ->
smoosh_utils:get(Channel, "priority", "ratio").
needs_upgrade(Props) ->
+ db_needs_upgrade(Props) orelse view_needs_upgrade(Props).
+
+db_needs_upgrade(Props) ->
DiskVersion = couch_util:get_value(disk_format_version, Props),
case couch_util:get_value(engine, Props) of
couch_bt_engine ->
@@ -488,6 +494,15 @@ needs_upgrade(Props) ->
false
end.
+view_needs_upgrade(Props) ->
+ case couch_util:get_value(collator_versions, Props) of
+ undefined ->
+ false;
+ Versions when is_list(Versions) ->
+ Enabled = couch_mrview_util:compact_on_collator_upgrade(),
+ Enabled andalso length(Versions) >= 2
+ end.
+
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").