diff options
author | garren smith <garren.smith@gmail.com> | 2018-06-20 21:12:27 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-20 21:12:27 +0200 |
commit | 5b5c8a1dcb196eb461dcab17a52c3c27998c5372 (patch) | |
tree | 10cc8c3bd16f62c3060a00fc0ef56633ac3cfa1d | |
parent | fe53e437ca5ec9d23aa1b55d7934daced157a9e3 (diff) | |
download | couchdb-5b5c8a1dcb196eb461dcab17a52c3c27998c5372.tar.gz |
Add constant index fields to sort based on the selector (#1376)
This moves the sort check into the is_usable function for all indexes.
For map/reduce indexes it can add any constant fields e.g {a: {"$eq": 4}
to the prefix of the sort because it won't affect the actual sort but
will increase the chance that an index is selected. This is a user
experience fix to help a user if they don't add all the columns for an
index to the sort fields.
-rw-r--r-- | src/mango/src/mango_cursor.erl | 18 | ||||
-rw-r--r-- | src/mango/src/mango_idx.erl | 36 | ||||
-rw-r--r-- | src/mango/src/mango_idx_special.erl | 13 | ||||
-rw-r--r-- | src/mango/src/mango_idx_view.erl | 30 | ||||
-rw-r--r-- | src/mango/src/mango_selector.erl | 113 | ||||
-rw-r--r-- | src/mango/test/02-basic-find-test.py | 7 | ||||
-rw-r--r-- | src/mango/test/18-json-sort.py | 222 |
7 files changed, 392 insertions, 47 deletions
diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl index 5108d36b2..5d2ea717d 100644 --- a/src/mango/src/mango_cursor.erl +++ b/src/mango/src/mango_cursor.erl @@ -48,18 +48,12 @@ create(Db, Selector0, Opts) -> Selector = mango_selector:normalize(Selector0), UsableIndexes = mango_idx:get_usable_indexes(Db, Selector, Opts), - case length(UsableIndexes) of - 0 -> - AllDocs = mango_idx:special(Db), - create_cursor(Db, AllDocs, Selector, Opts); - _ -> - case mango_cursor:maybe_filter_indexes_by_ddoc(UsableIndexes, Opts) of - [] -> - % use_index doesn't match a valid index - fall back to a valid one - create_cursor(Db, UsableIndexes, Selector, Opts); - UserSpecifiedIndex -> - create_cursor(Db, UserSpecifiedIndex, Selector, Opts) - end + case mango_cursor:maybe_filter_indexes_by_ddoc(UsableIndexes, Opts) of + [] -> + % use_index doesn't match a valid index - fall back to a valid one + create_cursor(Db, UsableIndexes, Selector, Opts); + UserSpecifiedIndex -> + create_cursor(Db, UserSpecifiedIndex, Selector, Opts) end. diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl index ea5949c02..8af92b946 100644 --- a/src/mango/src/mango_idx.erl +++ b/src/mango/src/mango_idx.erl @@ -66,13 +66,12 @@ get_usable_indexes(Db, Selector, Opts) -> SortFields = get_sort_fields(Opts), UsableFilter = fun(I) -> is_usable(I, Selector, SortFields) end, - UsableIndexes1 = lists:filter(UsableFilter, UsableIndexes0), - case maybe_filter_by_sort_fields(UsableIndexes1, SortFields) of - {ok, SortIndexes} -> - SortIndexes; - {error, no_usable_index} -> - ?MANGO_ERROR({no_usable_index, missing_sort_index}) + case lists:filter(UsableFilter, UsableIndexes0) of + [] -> + ?MANGO_ERROR({no_usable_index, missing_sort_index}); + UsableIndexes -> + UsableIndexes end. @@ -100,31 +99,6 @@ get_sort_fields(Opts) -> end. -maybe_filter_by_sort_fields(Indexes, []) -> - {ok, Indexes}; - -maybe_filter_by_sort_fields(Indexes, SortFields) -> - FilterFun = fun(Idx) -> - Cols = mango_idx:columns(Idx), - case {mango_idx:type(Idx), Cols} of - {_, all_fields} -> - true; - {<<"text">>, _} -> - sets:is_subset(sets:from_list(SortFields), sets:from_list(Cols)); - {<<"json">>, _} -> - lists:prefix(SortFields, Cols); - {<<"special">>, _} -> - lists:prefix(SortFields, Cols) - end - end, - case lists:filter(FilterFun, Indexes) of - [] -> - {error, no_usable_index}; - FilteredIndexes -> - {ok, FilteredIndexes} - end. - - new(Db, Opts) -> Def = get_idx_def(Opts), Type = get_idx_type(Opts), diff --git a/src/mango/src/mango_idx_special.erl b/src/mango/src/mango_idx_special.erl index 12da1cbe5..ac6efc707 100644 --- a/src/mango/src/mango_idx_special.erl +++ b/src/mango/src/mango_idx_special.erl @@ -63,9 +63,11 @@ columns(#idx{def=all_docs}) -> [<<"_id">>]. -is_usable(#idx{def=all_docs}, Selector, _) -> +is_usable(#idx{def=all_docs}, _Selector, []) -> + true; +is_usable(#idx{def=all_docs} = Idx, Selector, SortFields) -> Fields = mango_idx_view:indexable_fields(Selector), - lists:member(<<"_id">>, Fields). + lists:member(<<"_id">>, Fields) and can_use_sort(Idx, SortFields, Selector). start_key([{'$gt', Key, _, _}]) -> @@ -96,3 +98,10 @@ end_key([{_, _, '$lte', Key}]) -> end_key([{'$eq', Key, '$eq', Key}]) -> false = mango_json:special(Key), Key. + + +can_use_sort(_Idx, [], _Selector) -> + true; +can_use_sort(Idx, SortFields, _Selector) -> + Cols = columns(Idx), + lists:prefix(SortFields, Cols). diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index 8956b27b0..2d784b638 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -131,7 +131,8 @@ is_usable(Idx, Selector, SortFields) -> [<<"_id">>, <<"_rev">>]), mango_selector:has_required_fields(Selector, RequiredFields2) - andalso not is_text_search(Selector). + andalso not is_text_search(Selector) + andalso can_use_sort(RequiredFields, SortFields, Selector). is_text_search({[]}) -> @@ -511,3 +512,30 @@ range_pos(Low, Arg, High) -> max end end. + + +% Can_use_sort works as follows: +% +% * no sort fields then we can use this +% * Run out index columns we can't use this index +% * If the current column is the start of the sort, return if sort is a prefix +% * If the current column is constant, drop it and continue, else return false +% +% A constant column is a something that won't affect the sort +% for example A: {$eq: 21}} +% +% Currently we only look at constant fields that are prefixes to the sort fields +% set by the user. We considered adding in constant fields after sort fields +% but were not 100% sure that it would not affect the sorting of the query. + +can_use_sort(_Cols, [], _Selector) -> + true; +can_use_sort([], _SortFields, _Selector) -> + false; +can_use_sort([Col | _] = Cols, [Col | _] = SortFields, _Selector) -> + lists:prefix(SortFields, Cols); +can_use_sort([Col | RestCols], SortFields, Selector) -> + case mango_selector:is_constant_field(Selector, Col) of + true -> can_use_sort(RestCols, SortFields, Selector); + false -> false + end. diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 968dc3c74..fffadcd20 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -16,7 +16,8 @@ -export([ normalize/1, match/2, - has_required_fields/2 + has_required_fields/2, + is_constant_field/2 ]). @@ -638,11 +639,121 @@ has_required_fields_int([{[{Field, Cond}]} | Rest], RequiredFields) -> end. +% Returns true if a field in the selector is a constant value e.g. {a: {$eq: 1}} +is_constant_field({[]}, _Field) -> + false; + +is_constant_field(Selector, Field) when not is_list(Selector) -> + is_constant_field([Selector], Field); + +is_constant_field([], _Field) -> + false; + +is_constant_field([{[{<<"$and">>, Args}]}], Field) when is_list(Args) -> + lists:any(fun(Arg) -> is_constant_field(Arg, Field) end, Args); + +is_constant_field([{[{<<"$and">>, Args}]}], Field) -> + is_constant_field(Args, Field); + +is_constant_field([{[{Field, {[{Cond, _Val}]}}]} | _Rest], Field) -> + Cond =:= <<"$eq">>; + +is_constant_field([{[{_UnMatched, _}]} | Rest], Field) -> + is_constant_field(Rest, Field). + + %%%%%%%% module tests below %%%%%%%% -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +is_constant_field_basic_test() -> + Selector = normalize({[{<<"A">>, <<"foo">>}]}), + Field = <<"A">>, + ?assertEqual(true, is_constant_field(Selector, Field)). + +is_constant_field_basic_two_test() -> + Selector = normalize({[{<<"$and">>, + [ + {[{<<"cars">>,{[{<<"$eq">>,<<"2">>}]}}]}, + {[{<<"age">>,{[{<<"$gt">>,10}]}}]} + ] + }]}), + Field = <<"cars">>, + ?assertEqual(true, is_constant_field(Selector, Field)). + +is_constant_field_not_eq_test() -> + Selector = normalize({[{<<"$and">>, + [ + {[{<<"cars">>,{[{<<"$eq">>,<<"2">>}]}}]}, + {[{<<"age">>,{[{<<"$gt">>,10}]}}]} + ] + }]}), + Field = <<"age">>, + ?assertEqual(false, is_constant_field(Selector, Field)). + +is_constant_field_missing_field_test() -> + Selector = normalize({[{<<"$and">>, + [ + {[{<<"cars">>,{[{<<"$eq">>,<<"2">>}]}}]}, + {[{<<"age">>,{[{<<"$gt">>,10}]}}]} + ] + }]}), + Field = <<"wrong">>, + ?assertEqual(false, is_constant_field(Selector, Field)). + +is_constant_field_or_field_test() -> + Selector = {[{<<"$or">>, + [ + {[{<<"A">>, <<"foo">>}]}, + {[{<<"B">>, <<"foo">>}]} + ] + }]}, + Normalized = normalize(Selector), + Field = <<"A">>, + ?assertEqual(false, is_constant_field(Normalized, Field)). + +is_constant_field_empty_selector_test() -> + Selector = normalize({[]}), + Field = <<"wrong">>, + ?assertEqual(false, is_constant_field(Selector, Field)). + +is_constant_nested_and_test() -> + Selector1 = {[{<<"$and">>, + [ + {[{<<"A">>, <<"foo">>}]} + ] + }]}, + Selector2 = {[{<<"$and">>, + [ + {[{<<"B">>, {[{<<"$gt">>,10}]}}]} + ] + }]}, + Selector = {[{<<"$and">>, + [ + Selector1, + Selector2 + ] + }]}, + + Normalized = normalize(Selector), + ?assertEqual(true, is_constant_field(Normalized, <<"A">>)), + ?assertEqual(false, is_constant_field(Normalized, <<"B">>)). + +is_constant_combined_or_and_equals_test() -> + Selector = {[{<<"A">>, "foo"}, + {<<"$or">>, + [ + {[{<<"B">>, <<"bar">>}]}, + {[{<<"B">>, <<"baz">>}]} + ] + }, + {<<"C">>, "qux"} + ]}, + Normalized = normalize(Selector), + ?assertEqual(true, is_constant_field(Normalized, <<"C">>)), + ?assertEqual(false, is_constant_field(Normalized, <<"B">>)). + has_required_fields_basic_test() -> RequiredFields = [<<"A">>], Selector = {[{<<"A">>, <<"foo">>}]}, diff --git a/src/mango/test/02-basic-find-test.py b/src/mango/test/02-basic-find-test.py index f7e151ad8..6a31d33ee 100644 --- a/src/mango/test/02-basic-find-test.py +++ b/src/mango/test/02-basic-find-test.py @@ -333,3 +333,10 @@ class BasicFindTests(mango.UserDocsTests): assert explain["mrargs"]["start_key"] == [0] assert explain["mrargs"]["end_key"] == ["<MAX>"] assert explain["mrargs"]["include_docs"] == True + + def test_sort_with_all_docs(self): + explain = self.db.find({ + "_id": {"$gt": 0}, + "age": {"$gt": 0} + }, sort=["_id"], explain=True) + self.assertEquals(explain["index"]["type"], "special") diff --git a/src/mango/test/18-json-sort.py b/src/mango/test/18-json-sort.py new file mode 100644 index 000000000..f8d2abe99 --- /dev/null +++ b/src/mango/test/18-json-sort.py @@ -0,0 +1,222 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +import mango +import copy +import unittest + +DOCS = [ + { + "_id": "1", + "name": "Jimi", + "age": 10, + "cars": 1 + }, + { + "_id": "2", + "name": "Eddie", + "age": 20, + "cars": 1 + }, + { + "_id": "3", + "name": "Jane", + "age": 30, + "cars": 2 + }, + { + "_id": "4", + "name": "Mary", + "age": 40, + "cars": 2 + }, + { + "_id": "5", + "name": "Sam", + "age": 50, + "cars": 3 + } +] + +class JSONIndexSortOptimisations(mango.DbPerClass): + def setUp(self): + self.db.recreate() + self.db.save_docs(copy.deepcopy(DOCS)) + + def test_works_for_basic_case(self): + self.db.create_index(["cars", "age"], name="cars-age") + selector = { + "cars": "2", + "age": { + "$gt": 10 + } + } + explain = self.db.find(selector, sort=["age"], explain=True) + self.assertEqual(explain["index"]["name"], "cars-age") + self.assertEqual(explain["mrargs"]["direction"], "fwd") + + def test_works_for_all_fields_specified(self): + self.db.create_index(["cars", "age"], name="cars-age") + selector = { + "cars": "2", + "age": { + "$gt": 10 + } + } + explain = self.db.find(selector, sort=["cars", "age"], explain=True) + self.assertEqual(explain["index"]["name"], "cars-age") + + def test_works_for_no_sort_fields_specified(self): + self.db.create_index(["cars", "age"], name="cars-age") + selector = { + "cars": { + "$gt": 10 + }, + "age": { + "$gt": 10 + } + } + explain = self.db.find(selector, explain=True) + self.assertEqual(explain["index"]["name"], "cars-age") + + def test_works_for_opp_dir_sort(self): + self.db.create_index(["cars", "age"], name="cars-age") + selector = { + "cars": "2", + "age": { + "$gt": 10 + } + } + explain = self.db.find(selector, sort=[{"age": "desc"}], explain=True) + self.assertEqual(explain["index"]["name"], "cars-age") + self.assertEqual(explain["mrargs"]["direction"], "rev") + + def test_not_work_for_non_constant_field(self): + self.db.create_index(["cars", "age"], name="cars-age") + selector = { + "cars": { + "$gt": 10 + }, + "age": { + "$gt": 10 + } + } + try: + self.db.find(selector, explain=True, sort=["age"]) + raise Exception("Should not get here") + except Exception as e: + resp = e.response.json() + self.assertEqual(resp["error"], "no_usable_index") + + def test_three_index_one(self): + self.db.create_index(["cars", "age", "name"], name="cars-age-name") + selector = { + "cars": "2", + "age": 10, + "name": { + "$gt": "AA" + } + } + explain = self.db.find(selector, sort=["name"], explain=True) + self.assertEqual(explain["index"]["name"], "cars-age-name") + + def test_three_index_two(self): + self.db.create_index(["cars", "age", "name"], name="cars-age-name") + selector = { + "cars": "2", + "name": "Eddie", + "age": { + "$gt": 10 + } + } + explain = self.db.find(selector, sort=["age"], explain=True) + self.assertEqual(explain["index"]["name"], "cars-age-name") + + def test_three_index_fails(self): + self.db.create_index(["cars", "age", "name"], name="cars-age-name") + selector = { + "name": "Eddie", + "age": { + "$gt": 1 + }, + "cars": { + "$gt": "1" + } + } + try: + self.db.find(selector, explain=True, sort=["name"]) + raise Exception("Should not get here") + except Exception as e: + resp = e.response.json() + self.assertEqual(resp["error"], "no_usable_index") + + def test_empty_sort(self): + self.db.create_index(["cars", "age", "name"], name="cars-age-name") + selector = { + "name": { + "$gt": "Eddie", + }, + "age": 10, + "cars": { + "$gt": "1" + } + } + explain = self.db.find(selector, explain=True) + self.assertEqual(explain["index"]["name"], "cars-age-name") + + def test_in_between(self): + self.db.create_index(["cars", "age", "name"], name="cars-age-name") + selector = { + "name": "Eddie", + "age": 10, + "cars": { + "$gt": "1" + } + } + explain = self.db.find(selector, explain=True) + self.assertEqual(explain["index"]["name"], "cars-age-name") + + try: + self.db.find(selector, sort=["cars", "name"], explain=True) + raise Exception("Should not get here") + except Exception as e: + resp = e.response.json() + self.assertEqual(resp["error"], "no_usable_index") + + def test_ignore_after_set_sort_value(self): + self.db.create_index(["cars", "age", "name"], name="cars-age-name") + selector = { + "age": { + "$gt": 10 + }, + "cars": 2, + "name": { + "$gt": "A" + } + } + explain = self.db.find(selector, sort=["age"], explain=True) + self.assertEqual(explain["index"]["name"], "cars-age-name") + + def test_not_use_index_if_other_fields_in_sort(self): + self.db.create_index(["cars", "age"], name="cars-age") + selector = { + "age": 10, + "cars": { + "$gt": "1" + } + } + try: + self.db.find(selector, sort=["cars", "name"], explain=True) + raise Exception("Should not get here") + except Exception as e: + resp = e.response.json() + self.assertEqual(resp["error"], "no_usable_index") |