diff options
author | Gabor Pali <gabor.pali@ibm.com> | 2023-03-16 20:23:44 +0100 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2023-04-18 23:51:32 -0400 |
commit | c83b5ccb6757dafdee982deb3dfe7bb3cee9f67f (patch) | |
tree | cbaffc448d31d95742a97bd7be8f3d10c348d6c2 | |
parent | 1ed0519a15427b0db6d03ecdc543227691fc3bbc (diff) | |
download | couchdb-c83b5ccb6757dafdee982deb3dfe7bb3cee9f67f.tar.gz |
mango: introduce support for covering indexes
As a performance improvement, shorten the gap between Mango
queries and the underlying map-reduce views: try to serve
requests without pulling documents from the primary data set, i.e.
run the query with `include_docs` set to `false` when there is a
chance that it can be "covered" by the chosen index. The rows in
the results are then built from the information stored there.
Extend the response on the `_explain` endpoint to show information
in the `covered` Boolean attribute about the query would be covered
by the index or not.
Remarks:
- This should be a transparent optimization, without any semantical
effect on the queries.
- Because the main purpose of indexes is to store keys and the
document identifiers, the change will only work in cases when
the selected fields overlap with those. The chance of being
covered could be increased by adding more non-key fields to the
index, but that is not in scope here.
-rw-r--r-- | src/mango/src/mango_cursor_view.erl | 86 | ||||
-rw-r--r-- | src/mango/src/mango_idx_view.erl | 18 |
2 files changed, 77 insertions, 27 deletions
diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index a8a255f72..8e79f608e 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -39,15 +39,19 @@ % viewcbargs wraps up the arguments that view_cb uses into a single % entry in the mrargs.extra list. We use a Map to allow us to later % add fields without having old messages causing errors/crashes. -viewcbargs_new(Selector, Fields) -> +viewcbargs_new(Selector, Fields, CoveringIndex) -> #{ selector => Selector, - fields => Fields + fields => Fields, + covering_index => CoveringIndex }. + viewcbargs_get(selector, Args) when is_map(Args) -> maps:get(selector, Args, undefined); viewcbargs_get(fields, Args) when is_map(Args) -> - maps:get(fields, Args, undefined). + maps:get(fields, Args, undefined); +viewcbargs_get(covering_index, Args) when is_map(Args) -> + maps:get(covering_index, Args, undefined). create(Db, Indexes, Selector, Opts) -> FieldRanges = mango_idx_view:field_ranges(Selector), @@ -73,13 +77,11 @@ create(Db, Indexes, Selector, Opts) -> bookmark = Bookmark }}. -explain(Cursor) -> - #cursor{ - opts = Opts - } = Cursor, - +explain(#cursor{opts = Opts} = Cursor) -> BaseArgs = base_args(Cursor), - Args = apply_opts(Opts, BaseArgs), + Args0 = apply_opts(Opts, BaseArgs), + #cursor{index = Index, fields = Fields} = Cursor, + Args = consider_index_coverage(Index, Fields, Args0), [ {mrargs, @@ -94,7 +96,8 @@ explain(Cursor) -> {stable, Args#mrargs.stable}, {update, Args#mrargs.update}, {conflicts, Args#mrargs.conflicts} - ]}} + ]}}, + {covered, mango_idx_view:covers(Index, Fields)} ]. % replace internal values that cannot @@ -125,6 +128,13 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) - mango_idx:end_key(Idx, Cursor#cursor.ranges) } end, + CoveringIndex = + case mango_idx_view:covers(Idx, Fields) of + true -> + Idx; + false -> + undefined + end, #mrargs{ view_type = map, reduce = false, @@ -137,7 +147,7 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) - {callback, {?MODULE, view_cb}}, % TODO remove selector. It supports older nodes during version upgrades. {selector, Selector}, - {callback_args, viewcbargs_new(Selector, Fields)}, + {callback_args, viewcbargs_new(Selector, Fields, CoveringIndex)}, {ignore_partition_query_limit, true} ] @@ -157,7 +167,8 @@ execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFu BaseArgs = base_args(Cursor), #cursor{opts = Opts, bookmark = Bookmark} = Cursor, Args0 = apply_opts(Opts, BaseArgs), - Args = mango_json_bookmark:update_args(Bookmark, Args0), + Args1 = consider_index_coverage(Idx, Cursor#cursor.fields, Args0), + Args = mango_json_bookmark:update_args(Bookmark, Args1), UserCtx = couch_util:get_value(user_ctx, Opts, #user_ctx{}), DbOpts = [{user_ctx, UserCtx}], Result = @@ -280,29 +291,25 @@ view_cb({row, Row}, #mrargs{extra = Options} = Acc) -> % or in the new record in `callback_args`. This is to support mid-upgrade % clusters where the non-upgraded coordinator nodes will send the older style. % TODO remove this in a couple of couchdb versions. - {Selector, Fields} = + {Selector, Fields, CoveringIndex} = case couch_util:get_value(callback_args, Options) of % old style undefined -> - {couch_util:get_value(selector, Options), undefined}; + {couch_util:get_value(selector, Options), undefined, undefined}; % new style - assume a viewcbargs Args = #{} -> - {viewcbargs_get(selector, Args), viewcbargs_get(fields, Args)} + { + viewcbargs_get(selector, Args), + viewcbargs_get(fields, Args), + viewcbargs_get(covering_index, Args) + } end, - case ViewRow#view_row.doc of - null -> - maybe_send_mango_ping(); - undefined -> - % include_docs=false. Use quorum fetch at coordinator - ok = rexi:stream2(ViewRow), - set_mango_msg_timestamp(); - Doc -> - % We slightly abuse the doc field in the view response here, + Process = + fun(Doc) -> + % slightly abuse the doc field in the view response here, % because we may return something other than the full document: % we may have projected the requested `fields` from the query. % However, this oddness is confined to being visible in this module. - put(mango_docs_examined, get(mango_docs_examined) + 1), - couch_stats:increment_counter([mango, docs_examined]), case match_and_extract_doc(Doc, Selector, Fields) of {match, FinalDoc} -> FinalViewRow = ViewRow#view_row{doc = FinalDoc}, @@ -311,6 +318,21 @@ view_cb({row, Row}, #mrargs{extra = Options} = Acc) -> {no_match, undefined} -> maybe_send_mango_ping() end + end, + case {ViewRow#view_row.doc, CoveringIndex} of + {null, _} -> + maybe_send_mango_ping(); + {undefined, Index = #idx{}} -> + Doc = derive_doc_from_index(Index, ViewRow), + Process(Doc); + {undefined, _} -> + % include_docs=false. Use quorum fetch at coordinator + ok = rexi:stream2(ViewRow), + set_mango_msg_timestamp(); + {Doc, _} -> + put(mango_docs_examined, get(mango_docs_examined) + 1), + couch_stats:increment_counter([mango, docs_examined]), + Process(Doc) end, {ok, Acc}; view_cb(complete, Acc) -> @@ -338,6 +360,14 @@ match_and_extract_doc(Doc, Selector, Fields) -> {no_match, undefined} end. +derive_doc_from_index(Index, #view_row{id = DocId, key = Keys}) -> + Columns = mango_idx:columns(Index), + lists:foldr( + fun({Column, Key}, Doc) -> mango_doc:set_field(Doc, Column, Key) end, + mango_doc:set_field({[]}, <<"_id">>, DocId), + lists:zip(Columns, Keys) + ). + maybe_send_mango_ping() -> Current = os:timestamp(), LastPing = get(mango_last_msg_timestamp), @@ -482,6 +512,10 @@ apply_opts([{_, _} | Rest], Args) -> % Ignore unknown options apply_opts(Rest, Args). +consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs0} = Args) -> + IncludeDocs = IncludeDocs0 andalso (not mango_idx_view:covers(Index, Fields)), + Args#mrargs{include_docs = IncludeDocs}. + doc_member_and_extract(Cursor, RowProps) -> Db = Cursor#cursor.db, Opts = Cursor#cursor.opts, diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index ff8f6c6bb..3ef410e12 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -26,7 +26,9 @@ indexable_fields/1, field_ranges/1, - field_ranges/2 + field_ranges/2, + + covers/2 ]). -include_lib("couch/include/couch_db.hrl"). @@ -521,3 +523,17 @@ can_use_sort([Col | RestCols], SortFields, Selector) -> true -> can_use_sort(RestCols, SortFields, Selector); false -> false end. + +% There is no information available about the full set of fields which +% comes the following consequences: an index cannot (reliably) cover +% an "all fields" type of query and nested fields are out of scope. +covers(_, all_fields) -> + false; +covers(Idx, Fields) -> + case mango_idx:def(Idx) of + all_docs -> + false; + _ -> + Available = [<<"_id">> | columns(Idx)], + sets:is_subset(sets:from_list(Fields), sets:from_list(Available)) + end. |