summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgarren smith <garren.smith@gmail.com>2018-06-20 21:12:27 +0200
committerGitHub <noreply@github.com>2018-06-20 21:12:27 +0200
commit5b5c8a1dcb196eb461dcab17a52c3c27998c5372 (patch)
tree10cc8c3bd16f62c3060a00fc0ef56633ac3cfa1d
parentfe53e437ca5ec9d23aa1b55d7934daced157a9e3 (diff)
downloadcouchdb-5b5c8a1dcb196eb461dcab17a52c3c27998c5372.tar.gz
Add constant index fields to sort based on the selector (#1376)
This moves the sort check into the is_usable function for all indexes. For map/reduce indexes it can add any constant fields e.g {a: {"$eq": 4} to the prefix of the sort because it won't affect the actual sort but will increase the chance that an index is selected. This is a user experience fix to help a user if they don't add all the columns for an index to the sort fields.
-rw-r--r--src/mango/src/mango_cursor.erl18
-rw-r--r--src/mango/src/mango_idx.erl36
-rw-r--r--src/mango/src/mango_idx_special.erl13
-rw-r--r--src/mango/src/mango_idx_view.erl30
-rw-r--r--src/mango/src/mango_selector.erl113
-rw-r--r--src/mango/test/02-basic-find-test.py7
-rw-r--r--src/mango/test/18-json-sort.py222
7 files changed, 392 insertions, 47 deletions
diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl
index 5108d36b2..5d2ea717d 100644
--- a/src/mango/src/mango_cursor.erl
+++ b/src/mango/src/mango_cursor.erl
@@ -48,18 +48,12 @@
create(Db, Selector0, Opts) ->
Selector = mango_selector:normalize(Selector0),
UsableIndexes = mango_idx:get_usable_indexes(Db, Selector, Opts),
- case length(UsableIndexes) of
- 0 ->
- AllDocs = mango_idx:special(Db),
- create_cursor(Db, AllDocs, Selector, Opts);
- _ ->
- case mango_cursor:maybe_filter_indexes_by_ddoc(UsableIndexes, Opts) of
- [] ->
- % use_index doesn't match a valid index - fall back to a valid one
- create_cursor(Db, UsableIndexes, Selector, Opts);
- UserSpecifiedIndex ->
- create_cursor(Db, UserSpecifiedIndex, Selector, Opts)
- end
+ case mango_cursor:maybe_filter_indexes_by_ddoc(UsableIndexes, Opts) of
+ [] ->
+ % use_index doesn't match a valid index - fall back to a valid one
+ create_cursor(Db, UsableIndexes, Selector, Opts);
+ UserSpecifiedIndex ->
+ create_cursor(Db, UserSpecifiedIndex, Selector, Opts)
end.
diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl
index ea5949c02..8af92b946 100644
--- a/src/mango/src/mango_idx.erl
+++ b/src/mango/src/mango_idx.erl
@@ -66,13 +66,12 @@ get_usable_indexes(Db, Selector, Opts) ->
SortFields = get_sort_fields(Opts),
UsableFilter = fun(I) -> is_usable(I, Selector, SortFields) end,
- UsableIndexes1 = lists:filter(UsableFilter, UsableIndexes0),
- case maybe_filter_by_sort_fields(UsableIndexes1, SortFields) of
- {ok, SortIndexes} ->
- SortIndexes;
- {error, no_usable_index} ->
- ?MANGO_ERROR({no_usable_index, missing_sort_index})
+ case lists:filter(UsableFilter, UsableIndexes0) of
+ [] ->
+ ?MANGO_ERROR({no_usable_index, missing_sort_index});
+ UsableIndexes ->
+ UsableIndexes
end.
@@ -100,31 +99,6 @@ get_sort_fields(Opts) ->
end.
-maybe_filter_by_sort_fields(Indexes, []) ->
- {ok, Indexes};
-
-maybe_filter_by_sort_fields(Indexes, SortFields) ->
- FilterFun = fun(Idx) ->
- Cols = mango_idx:columns(Idx),
- case {mango_idx:type(Idx), Cols} of
- {_, all_fields} ->
- true;
- {<<"text">>, _} ->
- sets:is_subset(sets:from_list(SortFields), sets:from_list(Cols));
- {<<"json">>, _} ->
- lists:prefix(SortFields, Cols);
- {<<"special">>, _} ->
- lists:prefix(SortFields, Cols)
- end
- end,
- case lists:filter(FilterFun, Indexes) of
- [] ->
- {error, no_usable_index};
- FilteredIndexes ->
- {ok, FilteredIndexes}
- end.
-
-
new(Db, Opts) ->
Def = get_idx_def(Opts),
Type = get_idx_type(Opts),
diff --git a/src/mango/src/mango_idx_special.erl b/src/mango/src/mango_idx_special.erl
index 12da1cbe5..ac6efc707 100644
--- a/src/mango/src/mango_idx_special.erl
+++ b/src/mango/src/mango_idx_special.erl
@@ -63,9 +63,11 @@ columns(#idx{def=all_docs}) ->
[<<"_id">>].
-is_usable(#idx{def=all_docs}, Selector, _) ->
+is_usable(#idx{def=all_docs}, _Selector, []) ->
+ true;
+is_usable(#idx{def=all_docs} = Idx, Selector, SortFields) ->
Fields = mango_idx_view:indexable_fields(Selector),
- lists:member(<<"_id">>, Fields).
+ lists:member(<<"_id">>, Fields) and can_use_sort(Idx, SortFields, Selector).
start_key([{'$gt', Key, _, _}]) ->
@@ -96,3 +98,10 @@ end_key([{_, _, '$lte', Key}]) ->
end_key([{'$eq', Key, '$eq', Key}]) ->
false = mango_json:special(Key),
Key.
+
+
+can_use_sort(_Idx, [], _Selector) ->
+ true;
+can_use_sort(Idx, SortFields, _Selector) ->
+ Cols = columns(Idx),
+ lists:prefix(SortFields, Cols).
diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl
index 8956b27b0..2d784b638 100644
--- a/src/mango/src/mango_idx_view.erl
+++ b/src/mango/src/mango_idx_view.erl
@@ -131,7 +131,8 @@ is_usable(Idx, Selector, SortFields) ->
[<<"_id">>, <<"_rev">>]),
mango_selector:has_required_fields(Selector, RequiredFields2)
- andalso not is_text_search(Selector).
+ andalso not is_text_search(Selector)
+ andalso can_use_sort(RequiredFields, SortFields, Selector).
is_text_search({[]}) ->
@@ -511,3 +512,30 @@ range_pos(Low, Arg, High) ->
max
end
end.
+
+
+% Can_use_sort works as follows:
+%
+% * no sort fields then we can use this
+% * Run out index columns we can't use this index
+% * If the current column is the start of the sort, return if sort is a prefix
+% * If the current column is constant, drop it and continue, else return false
+%
+% A constant column is a something that won't affect the sort
+% for example A: {$eq: 21}}
+%
+% Currently we only look at constant fields that are prefixes to the sort fields
+% set by the user. We considered adding in constant fields after sort fields
+% but were not 100% sure that it would not affect the sorting of the query.
+
+can_use_sort(_Cols, [], _Selector) ->
+ true;
+can_use_sort([], _SortFields, _Selector) ->
+ false;
+can_use_sort([Col | _] = Cols, [Col | _] = SortFields, _Selector) ->
+ lists:prefix(SortFields, Cols);
+can_use_sort([Col | RestCols], SortFields, Selector) ->
+ case mango_selector:is_constant_field(Selector, Col) of
+ true -> can_use_sort(RestCols, SortFields, Selector);
+ false -> false
+ end.
diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl
index 968dc3c74..fffadcd20 100644
--- a/src/mango/src/mango_selector.erl
+++ b/src/mango/src/mango_selector.erl
@@ -16,7 +16,8 @@
-export([
normalize/1,
match/2,
- has_required_fields/2
+ has_required_fields/2,
+ is_constant_field/2
]).
@@ -638,11 +639,121 @@ has_required_fields_int([{[{Field, Cond}]} | Rest], RequiredFields) ->
end.
+% Returns true if a field in the selector is a constant value e.g. {a: {$eq: 1}}
+is_constant_field({[]}, _Field) ->
+ false;
+
+is_constant_field(Selector, Field) when not is_list(Selector) ->
+ is_constant_field([Selector], Field);
+
+is_constant_field([], _Field) ->
+ false;
+
+is_constant_field([{[{<<"$and">>, Args}]}], Field) when is_list(Args) ->
+ lists:any(fun(Arg) -> is_constant_field(Arg, Field) end, Args);
+
+is_constant_field([{[{<<"$and">>, Args}]}], Field) ->
+ is_constant_field(Args, Field);
+
+is_constant_field([{[{Field, {[{Cond, _Val}]}}]} | _Rest], Field) ->
+ Cond =:= <<"$eq">>;
+
+is_constant_field([{[{_UnMatched, _}]} | Rest], Field) ->
+ is_constant_field(Rest, Field).
+
+
%%%%%%%% module tests below %%%%%%%%
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
+is_constant_field_basic_test() ->
+ Selector = normalize({[{<<"A">>, <<"foo">>}]}),
+ Field = <<"A">>,
+ ?assertEqual(true, is_constant_field(Selector, Field)).
+
+is_constant_field_basic_two_test() ->
+ Selector = normalize({[{<<"$and">>,
+ [
+ {[{<<"cars">>,{[{<<"$eq">>,<<"2">>}]}}]},
+ {[{<<"age">>,{[{<<"$gt">>,10}]}}]}
+ ]
+ }]}),
+ Field = <<"cars">>,
+ ?assertEqual(true, is_constant_field(Selector, Field)).
+
+is_constant_field_not_eq_test() ->
+ Selector = normalize({[{<<"$and">>,
+ [
+ {[{<<"cars">>,{[{<<"$eq">>,<<"2">>}]}}]},
+ {[{<<"age">>,{[{<<"$gt">>,10}]}}]}
+ ]
+ }]}),
+ Field = <<"age">>,
+ ?assertEqual(false, is_constant_field(Selector, Field)).
+
+is_constant_field_missing_field_test() ->
+ Selector = normalize({[{<<"$and">>,
+ [
+ {[{<<"cars">>,{[{<<"$eq">>,<<"2">>}]}}]},
+ {[{<<"age">>,{[{<<"$gt">>,10}]}}]}
+ ]
+ }]}),
+ Field = <<"wrong">>,
+ ?assertEqual(false, is_constant_field(Selector, Field)).
+
+is_constant_field_or_field_test() ->
+ Selector = {[{<<"$or">>,
+ [
+ {[{<<"A">>, <<"foo">>}]},
+ {[{<<"B">>, <<"foo">>}]}
+ ]
+ }]},
+ Normalized = normalize(Selector),
+ Field = <<"A">>,
+ ?assertEqual(false, is_constant_field(Normalized, Field)).
+
+is_constant_field_empty_selector_test() ->
+ Selector = normalize({[]}),
+ Field = <<"wrong">>,
+ ?assertEqual(false, is_constant_field(Selector, Field)).
+
+is_constant_nested_and_test() ->
+ Selector1 = {[{<<"$and">>,
+ [
+ {[{<<"A">>, <<"foo">>}]}
+ ]
+ }]},
+ Selector2 = {[{<<"$and">>,
+ [
+ {[{<<"B">>, {[{<<"$gt">>,10}]}}]}
+ ]
+ }]},
+ Selector = {[{<<"$and">>,
+ [
+ Selector1,
+ Selector2
+ ]
+ }]},
+
+ Normalized = normalize(Selector),
+ ?assertEqual(true, is_constant_field(Normalized, <<"A">>)),
+ ?assertEqual(false, is_constant_field(Normalized, <<"B">>)).
+
+is_constant_combined_or_and_equals_test() ->
+ Selector = {[{<<"A">>, "foo"},
+ {<<"$or">>,
+ [
+ {[{<<"B">>, <<"bar">>}]},
+ {[{<<"B">>, <<"baz">>}]}
+ ]
+ },
+ {<<"C">>, "qux"}
+ ]},
+ Normalized = normalize(Selector),
+ ?assertEqual(true, is_constant_field(Normalized, <<"C">>)),
+ ?assertEqual(false, is_constant_field(Normalized, <<"B">>)).
+
has_required_fields_basic_test() ->
RequiredFields = [<<"A">>],
Selector = {[{<<"A">>, <<"foo">>}]},
diff --git a/src/mango/test/02-basic-find-test.py b/src/mango/test/02-basic-find-test.py
index f7e151ad8..6a31d33ee 100644
--- a/src/mango/test/02-basic-find-test.py
+++ b/src/mango/test/02-basic-find-test.py
@@ -333,3 +333,10 @@ class BasicFindTests(mango.UserDocsTests):
assert explain["mrargs"]["start_key"] == [0]
assert explain["mrargs"]["end_key"] == ["<MAX>"]
assert explain["mrargs"]["include_docs"] == True
+
+ def test_sort_with_all_docs(self):
+ explain = self.db.find({
+ "_id": {"$gt": 0},
+ "age": {"$gt": 0}
+ }, sort=["_id"], explain=True)
+ self.assertEquals(explain["index"]["type"], "special")
diff --git a/src/mango/test/18-json-sort.py b/src/mango/test/18-json-sort.py
new file mode 100644
index 000000000..f8d2abe99
--- /dev/null
+++ b/src/mango/test/18-json-sort.py
@@ -0,0 +1,222 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import mango
+import copy
+import unittest
+
+DOCS = [
+ {
+ "_id": "1",
+ "name": "Jimi",
+ "age": 10,
+ "cars": 1
+ },
+ {
+ "_id": "2",
+ "name": "Eddie",
+ "age": 20,
+ "cars": 1
+ },
+ {
+ "_id": "3",
+ "name": "Jane",
+ "age": 30,
+ "cars": 2
+ },
+ {
+ "_id": "4",
+ "name": "Mary",
+ "age": 40,
+ "cars": 2
+ },
+ {
+ "_id": "5",
+ "name": "Sam",
+ "age": 50,
+ "cars": 3
+ }
+]
+
+class JSONIndexSortOptimisations(mango.DbPerClass):
+ def setUp(self):
+ self.db.recreate()
+ self.db.save_docs(copy.deepcopy(DOCS))
+
+ def test_works_for_basic_case(self):
+ self.db.create_index(["cars", "age"], name="cars-age")
+ selector = {
+ "cars": "2",
+ "age": {
+ "$gt": 10
+ }
+ }
+ explain = self.db.find(selector, sort=["age"], explain=True)
+ self.assertEqual(explain["index"]["name"], "cars-age")
+ self.assertEqual(explain["mrargs"]["direction"], "fwd")
+
+ def test_works_for_all_fields_specified(self):
+ self.db.create_index(["cars", "age"], name="cars-age")
+ selector = {
+ "cars": "2",
+ "age": {
+ "$gt": 10
+ }
+ }
+ explain = self.db.find(selector, sort=["cars", "age"], explain=True)
+ self.assertEqual(explain["index"]["name"], "cars-age")
+
+ def test_works_for_no_sort_fields_specified(self):
+ self.db.create_index(["cars", "age"], name="cars-age")
+ selector = {
+ "cars": {
+ "$gt": 10
+ },
+ "age": {
+ "$gt": 10
+ }
+ }
+ explain = self.db.find(selector, explain=True)
+ self.assertEqual(explain["index"]["name"], "cars-age")
+
+ def test_works_for_opp_dir_sort(self):
+ self.db.create_index(["cars", "age"], name="cars-age")
+ selector = {
+ "cars": "2",
+ "age": {
+ "$gt": 10
+ }
+ }
+ explain = self.db.find(selector, sort=[{"age": "desc"}], explain=True)
+ self.assertEqual(explain["index"]["name"], "cars-age")
+ self.assertEqual(explain["mrargs"]["direction"], "rev")
+
+ def test_not_work_for_non_constant_field(self):
+ self.db.create_index(["cars", "age"], name="cars-age")
+ selector = {
+ "cars": {
+ "$gt": 10
+ },
+ "age": {
+ "$gt": 10
+ }
+ }
+ try:
+ self.db.find(selector, explain=True, sort=["age"])
+ raise Exception("Should not get here")
+ except Exception as e:
+ resp = e.response.json()
+ self.assertEqual(resp["error"], "no_usable_index")
+
+ def test_three_index_one(self):
+ self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+ selector = {
+ "cars": "2",
+ "age": 10,
+ "name": {
+ "$gt": "AA"
+ }
+ }
+ explain = self.db.find(selector, sort=["name"], explain=True)
+ self.assertEqual(explain["index"]["name"], "cars-age-name")
+
+ def test_three_index_two(self):
+ self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+ selector = {
+ "cars": "2",
+ "name": "Eddie",
+ "age": {
+ "$gt": 10
+ }
+ }
+ explain = self.db.find(selector, sort=["age"], explain=True)
+ self.assertEqual(explain["index"]["name"], "cars-age-name")
+
+ def test_three_index_fails(self):
+ self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+ selector = {
+ "name": "Eddie",
+ "age": {
+ "$gt": 1
+ },
+ "cars": {
+ "$gt": "1"
+ }
+ }
+ try:
+ self.db.find(selector, explain=True, sort=["name"])
+ raise Exception("Should not get here")
+ except Exception as e:
+ resp = e.response.json()
+ self.assertEqual(resp["error"], "no_usable_index")
+
+ def test_empty_sort(self):
+ self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+ selector = {
+ "name": {
+ "$gt": "Eddie",
+ },
+ "age": 10,
+ "cars": {
+ "$gt": "1"
+ }
+ }
+ explain = self.db.find(selector, explain=True)
+ self.assertEqual(explain["index"]["name"], "cars-age-name")
+
+ def test_in_between(self):
+ self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+ selector = {
+ "name": "Eddie",
+ "age": 10,
+ "cars": {
+ "$gt": "1"
+ }
+ }
+ explain = self.db.find(selector, explain=True)
+ self.assertEqual(explain["index"]["name"], "cars-age-name")
+
+ try:
+ self.db.find(selector, sort=["cars", "name"], explain=True)
+ raise Exception("Should not get here")
+ except Exception as e:
+ resp = e.response.json()
+ self.assertEqual(resp["error"], "no_usable_index")
+
+ def test_ignore_after_set_sort_value(self):
+ self.db.create_index(["cars", "age", "name"], name="cars-age-name")
+ selector = {
+ "age": {
+ "$gt": 10
+ },
+ "cars": 2,
+ "name": {
+ "$gt": "A"
+ }
+ }
+ explain = self.db.find(selector, sort=["age"], explain=True)
+ self.assertEqual(explain["index"]["name"], "cars-age-name")
+
+ def test_not_use_index_if_other_fields_in_sort(self):
+ self.db.create_index(["cars", "age"], name="cars-age")
+ selector = {
+ "age": 10,
+ "cars": {
+ "$gt": "1"
+ }
+ }
+ try:
+ self.db.find(selector, sort=["cars", "name"], explain=True)
+ raise Exception("Should not get here")
+ except Exception as e:
+ resp = e.response.json()
+ self.assertEqual(resp["error"], "no_usable_index")