summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGabor Pali <gabor.pali@ibm.com>2023-03-16 20:23:44 +0100
committerNick Vatamaniuc <nickva@users.noreply.github.com>2023-04-18 23:51:32 -0400
commitc83b5ccb6757dafdee982deb3dfe7bb3cee9f67f (patch)
treecbaffc448d31d95742a97bd7be8f3d10c348d6c2
parent1ed0519a15427b0db6d03ecdc543227691fc3bbc (diff)
downloadcouchdb-c83b5ccb6757dafdee982deb3dfe7bb3cee9f67f.tar.gz
mango: introduce support for covering indexes
As a performance improvement, shorten the gap between Mango queries and the underlying map-reduce views: try to serve requests without pulling documents from the primary data set, i.e. run the query with `include_docs` set to `false` when there is a chance that it can be "covered" by the chosen index. The rows in the results are then built from the information stored there. Extend the response on the `_explain` endpoint to show information in the `covered` Boolean attribute about the query would be covered by the index or not. Remarks: - This should be a transparent optimization, without any semantical effect on the queries. - Because the main purpose of indexes is to store keys and the document identifiers, the change will only work in cases when the selected fields overlap with those. The chance of being covered could be increased by adding more non-key fields to the index, but that is not in scope here.
-rw-r--r--src/mango/src/mango_cursor_view.erl86
-rw-r--r--src/mango/src/mango_idx_view.erl18
2 files changed, 77 insertions, 27 deletions
diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl
index a8a255f72..8e79f608e 100644
--- a/src/mango/src/mango_cursor_view.erl
+++ b/src/mango/src/mango_cursor_view.erl
@@ -39,15 +39,19 @@
% viewcbargs wraps up the arguments that view_cb uses into a single
% entry in the mrargs.extra list. We use a Map to allow us to later
% add fields without having old messages causing errors/crashes.
-viewcbargs_new(Selector, Fields) ->
+viewcbargs_new(Selector, Fields, CoveringIndex) ->
#{
selector => Selector,
- fields => Fields
+ fields => Fields,
+ covering_index => CoveringIndex
}.
+
viewcbargs_get(selector, Args) when is_map(Args) ->
maps:get(selector, Args, undefined);
viewcbargs_get(fields, Args) when is_map(Args) ->
- maps:get(fields, Args, undefined).
+ maps:get(fields, Args, undefined);
+viewcbargs_get(covering_index, Args) when is_map(Args) ->
+ maps:get(covering_index, Args, undefined).
create(Db, Indexes, Selector, Opts) ->
FieldRanges = mango_idx_view:field_ranges(Selector),
@@ -73,13 +77,11 @@ create(Db, Indexes, Selector, Opts) ->
bookmark = Bookmark
}}.
-explain(Cursor) ->
- #cursor{
- opts = Opts
- } = Cursor,
-
+explain(#cursor{opts = Opts} = Cursor) ->
BaseArgs = base_args(Cursor),
- Args = apply_opts(Opts, BaseArgs),
+ Args0 = apply_opts(Opts, BaseArgs),
+ #cursor{index = Index, fields = Fields} = Cursor,
+ Args = consider_index_coverage(Index, Fields, Args0),
[
{mrargs,
@@ -94,7 +96,8 @@ explain(Cursor) ->
{stable, Args#mrargs.stable},
{update, Args#mrargs.update},
{conflicts, Args#mrargs.conflicts}
- ]}}
+ ]}},
+ {covered, mango_idx_view:covers(Index, Fields)}
].
% replace internal values that cannot
@@ -125,6 +128,13 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) -
mango_idx:end_key(Idx, Cursor#cursor.ranges)
}
end,
+ CoveringIndex =
+ case mango_idx_view:covers(Idx, Fields) of
+ true ->
+ Idx;
+ false ->
+ undefined
+ end,
#mrargs{
view_type = map,
reduce = false,
@@ -137,7 +147,7 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) -
{callback, {?MODULE, view_cb}},
% TODO remove selector. It supports older nodes during version upgrades.
{selector, Selector},
- {callback_args, viewcbargs_new(Selector, Fields)},
+ {callback_args, viewcbargs_new(Selector, Fields, CoveringIndex)},
{ignore_partition_query_limit, true}
]
@@ -157,7 +167,8 @@ execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFu
BaseArgs = base_args(Cursor),
#cursor{opts = Opts, bookmark = Bookmark} = Cursor,
Args0 = apply_opts(Opts, BaseArgs),
- Args = mango_json_bookmark:update_args(Bookmark, Args0),
+ Args1 = consider_index_coverage(Idx, Cursor#cursor.fields, Args0),
+ Args = mango_json_bookmark:update_args(Bookmark, Args1),
UserCtx = couch_util:get_value(user_ctx, Opts, #user_ctx{}),
DbOpts = [{user_ctx, UserCtx}],
Result =
@@ -280,29 +291,25 @@ view_cb({row, Row}, #mrargs{extra = Options} = Acc) ->
% or in the new record in `callback_args`. This is to support mid-upgrade
% clusters where the non-upgraded coordinator nodes will send the older style.
% TODO remove this in a couple of couchdb versions.
- {Selector, Fields} =
+ {Selector, Fields, CoveringIndex} =
case couch_util:get_value(callback_args, Options) of
% old style
undefined ->
- {couch_util:get_value(selector, Options), undefined};
+ {couch_util:get_value(selector, Options), undefined, undefined};
% new style - assume a viewcbargs
Args = #{} ->
- {viewcbargs_get(selector, Args), viewcbargs_get(fields, Args)}
+ {
+ viewcbargs_get(selector, Args),
+ viewcbargs_get(fields, Args),
+ viewcbargs_get(covering_index, Args)
+ }
end,
- case ViewRow#view_row.doc of
- null ->
- maybe_send_mango_ping();
- undefined ->
- % include_docs=false. Use quorum fetch at coordinator
- ok = rexi:stream2(ViewRow),
- set_mango_msg_timestamp();
- Doc ->
- % We slightly abuse the doc field in the view response here,
+ Process =
+ fun(Doc) ->
+ % slightly abuse the doc field in the view response here,
% because we may return something other than the full document:
% we may have projected the requested `fields` from the query.
% However, this oddness is confined to being visible in this module.
- put(mango_docs_examined, get(mango_docs_examined) + 1),
- couch_stats:increment_counter([mango, docs_examined]),
case match_and_extract_doc(Doc, Selector, Fields) of
{match, FinalDoc} ->
FinalViewRow = ViewRow#view_row{doc = FinalDoc},
@@ -311,6 +318,21 @@ view_cb({row, Row}, #mrargs{extra = Options} = Acc) ->
{no_match, undefined} ->
maybe_send_mango_ping()
end
+ end,
+ case {ViewRow#view_row.doc, CoveringIndex} of
+ {null, _} ->
+ maybe_send_mango_ping();
+ {undefined, Index = #idx{}} ->
+ Doc = derive_doc_from_index(Index, ViewRow),
+ Process(Doc);
+ {undefined, _} ->
+ % include_docs=false. Use quorum fetch at coordinator
+ ok = rexi:stream2(ViewRow),
+ set_mango_msg_timestamp();
+ {Doc, _} ->
+ put(mango_docs_examined, get(mango_docs_examined) + 1),
+ couch_stats:increment_counter([mango, docs_examined]),
+ Process(Doc)
end,
{ok, Acc};
view_cb(complete, Acc) ->
@@ -338,6 +360,14 @@ match_and_extract_doc(Doc, Selector, Fields) ->
{no_match, undefined}
end.
+derive_doc_from_index(Index, #view_row{id = DocId, key = Keys}) ->
+ Columns = mango_idx:columns(Index),
+ lists:foldr(
+ fun({Column, Key}, Doc) -> mango_doc:set_field(Doc, Column, Key) end,
+ mango_doc:set_field({[]}, <<"_id">>, DocId),
+ lists:zip(Columns, Keys)
+ ).
+
maybe_send_mango_ping() ->
Current = os:timestamp(),
LastPing = get(mango_last_msg_timestamp),
@@ -482,6 +512,10 @@ apply_opts([{_, _} | Rest], Args) ->
% Ignore unknown options
apply_opts(Rest, Args).
+consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs0} = Args) ->
+ IncludeDocs = IncludeDocs0 andalso (not mango_idx_view:covers(Index, Fields)),
+ Args#mrargs{include_docs = IncludeDocs}.
+
doc_member_and_extract(Cursor, RowProps) ->
Db = Cursor#cursor.db,
Opts = Cursor#cursor.opts,
diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl
index ff8f6c6bb..3ef410e12 100644
--- a/src/mango/src/mango_idx_view.erl
+++ b/src/mango/src/mango_idx_view.erl
@@ -26,7 +26,9 @@
indexable_fields/1,
field_ranges/1,
- field_ranges/2
+ field_ranges/2,
+
+ covers/2
]).
-include_lib("couch/include/couch_db.hrl").
@@ -521,3 +523,17 @@ can_use_sort([Col | RestCols], SortFields, Selector) ->
true -> can_use_sort(RestCols, SortFields, Selector);
false -> false
end.
+
+% There is no information available about the full set of fields which
+% comes the following consequences: an index cannot (reliably) cover
+% an "all fields" type of query and nested fields are out of scope.
+covers(_, all_fields) ->
+ false;
+covers(Idx, Fields) ->
+ case mango_idx:def(Idx) of
+ all_docs ->
+ false;
+ _ ->
+ Available = [<<"_id">> | columns(Idx)],
+ sets:is_subset(sets:from_list(Fields), sets:from_list(Available))
+ end.