summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPÁLI Gábor János <gabor.pali@ibm.com>2023-03-10 16:58:14 +0100
committerGitHub <noreply@github.com>2023-03-10 16:58:14 +0100
commitf40147b262ca007d07bb5f48c345b31b46b80629 (patch)
tree061caf70bc4d6b0a1cd4dff34bcbdf7f144f2f62
parent8ec7b578848903b9a152934c7ef0f57e69d8946d (diff)
downloadcouchdb-f40147b262ca007d07bb5f48c345b31b46b80629.tar.gz
mango: correct text index selection for queries with `$regex` (#4458)
* mango: Remove unused `op_insert` The `op_insert` elements in the abstract representation of the translated Lucene queries do not seem to be produced anywhere in the code. This might have been left over a while ago, and now retire it. * mango: Remove unused directory include * mango: Equip text index selection with tests, specs, and docs - Add specifications for the important functions that play some role in the text index selection. This would help to understand the implicit contracts around them and the associated data flow. - Introduce `test_utils:as_selector/1` to make it easier to build valid Mango selectors for testing. On the top level, it uses Erlang maps to ensure the structural consistency of the input (selectors are JSON objects that can be considered maps). Maps are then validated and normalized by `jiffy` and Mango's internal normalization rules for selectors for additional correctness, they eventually become embedded JSON objects. This facilities writing better unit tests that are closer to the real-world use. At the same time, it comes with a dependency on these tools and their misbehavior can cause test failures. - Add unit tests for the major functions that contribute to the index selection logic and boost the test coverage of the `mango_idx_text` and `mango_selector_text` modules. That is important because running integration tests on a higher level requires a working Clouseau instance, which may not always be available. With these unit tests in place, changes in the code can be tracked easily. Also, the test cases can aid the reader to get a better understanding of the assumed behavior. - Explain the purpose of `mango_idx_text:is_usable/3` as this is not trivial to catch at the first sight. Thanks @mikerhodes for providing the input. * mango: Refactor index selection tests * mango: Correct text index selection for `$regex` For the `$regex` operator, text indexes can be overly permissive which can cause that they are selected even if they could not serve the corresponding query. Rework the interpreteration of `$regex` to avoid such problems.
-rw-r--r--src/couch/src/test_util.erl6
-rw-r--r--src/mango/src/mango.hrl10
-rw-r--r--src/mango/src/mango_idx_text.erl137
-rw-r--r--src/mango/src/mango_selector_text.erl451
-rw-r--r--src/mango/test/05-index-selection-test.py25
-rw-r--r--src/mango/test/16-index-selectors-test.py59
6 files changed, 666 insertions, 22 deletions
diff --git a/src/couch/src/test_util.erl b/src/couch/src/test_util.erl
index 345359794..e53b11149 100644
--- a/src/couch/src/test_util.erl
+++ b/src/couch/src/test_util.erl
@@ -37,6 +37,8 @@
-export([shuffle/1]).
+-export([as_selector/1]).
+
-include_lib("couch/include/couch_eunit.hrl").
-include_lib("couch/include/couch_db.hrl").
-include("couch_db_int.hrl").
@@ -488,3 +490,7 @@ shuffle(List) ->
Paired = [{couch_rand:uniform(), I} || I <- List],
Sorted = lists:sort(Paired),
[I || {_, I} <- Sorted].
+
+%% Create a valid Mango selector from an Erlang map.
+as_selector(Map) ->
+ mango_selector:normalize(jiffy:decode(jiffy:encode(Map))).
diff --git a/src/mango/src/mango.hrl b/src/mango/src/mango.hrl
index 26a9d43b9..d50d17b6f 100644
--- a/src/mango/src/mango.hrl
+++ b/src/mango/src/mango.hrl
@@ -11,3 +11,13 @@
% the License.
-define(MANGO_ERROR(R), throw({mango_error, ?MODULE, R})).
+
+-type abstract_text_selector() :: {'op_and', [abstract_text_selector()]}
+ | {'op_or', [abstract_text_selector()]}
+ | {'op_not', {abstract_text_selector(), abstract_text_selector()}}
+ | {'op_not', {_, 'false'}}
+ | {'op_field', {iolist() | binary(), _}}
+ | {'op_fieldname', {_, _}}
+ | {'op_null', {_, _}}
+ | {'op_default', _}
+ | {'op_regex', binary()}.
diff --git a/src/mango/src/mango_idx_text.erl b/src/mango/src/mango_idx_text.erl
index b4a46d688..db8af795b 100644
--- a/src/mango/src/mango_idx_text.erl
+++ b/src/mango/src/mango_idx_text.erl
@@ -29,6 +29,10 @@
-include("mango.hrl").
-include("mango_idx.hrl").
+-ifdef(TEST).
+-import(test_util, [as_selector/1]).
+-endif.
+
validate_new(#idx{} = Idx, Db) ->
{ok, Def} = do_validate(Idx#idx.def),
maybe_reject_index_all_req(Def, Db),
@@ -127,6 +131,14 @@ columns(Idx) ->
)
end.
+% Mind that `is_usable/3` is not about "what fields can be answered by
+% the index" but instead more along the lines of "this index will
+% ensure that all the documents that should be returned for the query
+% will be, because we checked that all the bits of the query that
+% imply `$exists` for a field are used when we check that the indexing
+% process will have included all the relevant documents in the index".
+-spec is_usable(#idx{}, SelectorObject, _) -> boolean() when
+ SelectorObject :: any().
is_usable(_, Selector, _) when Selector =:= {[]} ->
false;
is_usable(Idx, Selector, _) ->
@@ -303,10 +315,15 @@ construct_analyzer({Props}) ->
]}
end.
+-spec indexable_fields(SelectorObject) -> Fields when
+ SelectorObject :: any(),
+ Fields :: [binary()].
indexable_fields(Selector) ->
TupleTree = mango_selector_text:convert([], Selector),
indexable_fields([], TupleTree).
+-spec indexable_fields(Fields, abstract_text_selector()) -> Fields when
+ Fields :: [binary()].
indexable_fields(Fields, {op_and, Args}) when is_list(Args) ->
lists:foldl(
fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end,
@@ -344,8 +361,6 @@ indexable_fields(Fields, {op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) ->
% forces "$exists" : false to use _all_docs
indexable_fields(_, {op_not, {_, false}}) ->
[];
-indexable_fields(Fields, {op_insert, Arg}) when is_binary(Arg) ->
- Fields;
%% fieldname.[]:length is not a user defined field.
indexable_fields(Fields, {op_field, {[_, <<":length">>], _}}) ->
Fields;
@@ -360,6 +375,11 @@ indexable_fields(Fields, {op_fieldname, {_, _}}) ->
%% Similar idea to op_fieldname but with fieldname:null
indexable_fields(Fields, {op_null, {_, _}}) ->
Fields;
+%% Regular expression matching should be an exception to the rule
+%% above because the type of the associated field is exact, it must be
+%% a string.
+indexable_fields(Fields, {op_regex, Name}) ->
+ [iolist_to_binary([Name, ":string"]) | Fields];
indexable_fields(Fields, {op_default, _}) ->
[<<"$default">> | Fields].
@@ -456,4 +476,117 @@ warn_index_all({Idx, Db}) ->
?assertThrow({test_error, logged_warning}, validate_new(Idx, Db))
end).
+indexable_fields_test() ->
+ ?assertEqual(
+ [<<"$default">>, <<"field1:boolean">>, <<"field2:number">>, <<"field3:string">>],
+ indexable_fields(
+ as_selector(
+ #{
+ <<"$default">> => #{<<"$text">> => <<"text">>},
+ <<"field1">> => true,
+ <<"field2">> => 42,
+ <<"field3">> => #{<<"$regex">> => <<".*">>}
+ }
+ )
+ )
+ ),
+ ?assertEqual(
+ [<<"f1:string">>, <<"f2:string">>, <<"f3:string">>, <<"f4:string">>, <<"f5:string">>],
+ lists:sort(
+ indexable_fields(
+ as_selector(
+ #{
+ <<"$and">> =>
+ [
+ #{<<"f1">> => <<"v1">>},
+ #{<<"f2">> => <<"v2">>}
+ ],
+ <<"$or">> =>
+ [
+ #{<<"f3">> => <<"v3">>},
+ #{<<"f4">> => <<"v4">>}
+ ],
+ <<"$not">> => #{<<"f5">> => <<"v5">>}
+ }
+ )
+ )
+ )
+ ),
+
+ ?assertEqual(
+ [],
+ indexable_fields(
+ as_selector(
+ #{
+ <<"field1">> => null,
+ <<"field2">> => #{<<"$size">> => 3},
+ <<"field3">> => #{<<"$type">> => <<"type">>}
+ }
+ )
+ )
+ ),
+ ?assertEqual(
+ [],
+ indexable_fields(
+ as_selector(
+ #{
+ <<"$and">> =>
+ [
+ #{<<"f1">> => null},
+ #{<<"f2">> => null}
+ ],
+ <<"$or">> =>
+ [
+ #{<<"f3">> => null},
+ #{<<"f4">> => null}
+ ],
+ <<"$not">> => #{<<"f5">> => null}
+ }
+ )
+ )
+ ).
+
+is_usable_test() ->
+ ?assertNot(is_usable(undefined, {[]}, undefined)),
+
+ AllFieldsIndex = #idx{def = {[{<<"fields">>, <<"all_fields">>}]}},
+ ?assert(is_usable(AllFieldsIndex, undefined, undefined)),
+
+ Field1 = {[{<<"name">>, <<"field1">>}, {<<"type">>, <<"string">>}]},
+ Field2 = {[{<<"name">>, <<"field2">>}, {<<"type">>, <<"number">>}]},
+ Index = #idx{def = {[{<<"fields">>, [Field1, Field2]}]}},
+ ?assert(is_usable(Index, as_selector(#{<<"field1">> => <<"value">>}), undefined)),
+ ?assertNot(is_usable(Index, as_selector(#{<<"field1">> => 42}), undefined)),
+ ?assertNot(is_usable(Index, as_selector(#{<<"field3">> => true}), undefined)),
+ ?assert(
+ is_usable(Index, as_selector(#{<<"field1">> => #{<<"$type">> => <<"string">>}}), undefined)
+ ),
+ ?assert(
+ is_usable(Index, as_selector(#{<<"field1">> => #{<<"$type">> => <<"boolean">>}}), undefined)
+ ),
+ ?assert(
+ is_usable(Index, as_selector(#{<<"field3">> => #{<<"$type">> => <<"boolean">>}}), undefined)
+ ),
+ ?assert(is_usable(Index, as_selector(#{<<"field1">> => #{<<"$exists">> => true}}), undefined)),
+ ?assert(is_usable(Index, as_selector(#{<<"field1">> => #{<<"$exists">> => false}}), undefined)),
+ ?assert(is_usable(Index, as_selector(#{<<"field3">> => #{<<"$exists">> => true}}), undefined)),
+ ?assert(is_usable(Index, as_selector(#{<<"field3">> => #{<<"$exists">> => false}}), undefined)),
+ ?assert(
+ is_usable(Index, as_selector(#{<<"field1">> => #{<<"$regex">> => <<".*">>}}), undefined)
+ ),
+ ?assertNot(
+ is_usable(Index, as_selector(#{<<"field2">> => #{<<"$regex">> => <<".*">>}}), undefined)
+ ),
+ ?assertNot(
+ is_usable(Index, as_selector(#{<<"field3">> => #{<<"$regex">> => <<".*">>}}), undefined)
+ ),
+ ?assertNot(
+ is_usable(Index, as_selector(#{<<"field1">> => #{<<"$nin">> => [1, 2, 3]}}), undefined)
+ ),
+ ?assert(
+ is_usable(Index, as_selector(#{<<"field2">> => #{<<"$nin">> => [1, 2, 3]}}), undefined)
+ ),
+ ?assertNot(
+ is_usable(Index, as_selector(#{<<"field3">> => #{<<"$nin">> => [1, 2, 3]}}), undefined)
+ ).
-endif.
diff --git a/src/mango/src/mango_selector_text.erl b/src/mango/src/mango_selector_text.erl
index ab7a63f01..0d18516c9 100644
--- a/src/mango/src/mango_selector_text.erl
+++ b/src/mango/src/mango_selector_text.erl
@@ -19,16 +19,25 @@
append_sort_type/2
]).
--include_lib("couch/include/couch_db.hrl").
-include("mango.hrl").
+-ifdef(TEST).
+-import(test_util, [as_selector/1]).
+-endif.
+
%% Regex for <<"\\.">>
-define(PERIOD, "\\.").
+-spec convert(SelectorObject) -> LuceneQueryString when
+ SelectorObject :: any(),
+ LuceneQueryString :: binary().
convert(Object) ->
TupleTree = convert([], Object),
iolist_to_binary(to_query(TupleTree)).
+-spec convert(CurrentPath, SelectorObject) -> abstract_text_selector() when
+ CurrentPath :: [binary()],
+ SelectorObject :: any().
convert(Path, {[{<<"$and">>, Args}]}) ->
Parts = [convert(Path, Arg) || Arg <- Args],
{op_and, Parts};
@@ -149,8 +158,11 @@ convert(Path, {[{<<"$mod">>, _}]}) ->
% instead a custom implementation. The syntax is therefore different, so we do
% would get different behavior than our view indexes. To be consistent, we will
% simply return docs for fields that exist and then run our match filter.
+%
+% It has a dedicated type so that `mango_idx_text:indexable_fields/1`
+% could handle this case properly.
convert(Path, {[{<<"$regex">>, _}]}) ->
- field_exists_query(Path, "string");
+ {op_regex, path_str(Path)};
convert(Path, {[{<<"$size">>, Arg}]}) ->
{op_field, {make_field([<<"[]">> | Path], length), value_str(Arg)}};
% All other operators are internal assertion errors for
@@ -209,6 +221,8 @@ to_query_nested(Args) ->
FilterFun = fun(A) -> A =/= [] andalso A =/= "()" end,
lists:filter(FilterFun, QueryArgs).
+-spec to_query(abstract_text_selector()) -> LuceneQueryStringPieces when
+ LuceneQueryStringPieces :: [binary()].
to_query({op_and, []}) ->
[];
to_query({op_and, Args}) when is_list(Args) ->
@@ -231,8 +245,6 @@ to_query({op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) ->
%% For $exists:false
to_query({op_not, {ExistsQuery, false}}) ->
["($fieldnames:/.*/ ", " AND NOT (", to_query(ExistsQuery), "))"];
-to_query({op_insert, Arg}) when is_binary(Arg) ->
- ["(", Arg, ")"];
%% We escape : and / for now for values and all lucene chars for fieldnames
%% This needs to be resolved.
to_query({op_field, {Name, Value}}) ->
@@ -245,6 +257,10 @@ to_query({op_null, {Name, Value}}) ->
to_query({op_fieldname, {Name, Wildcard}}) ->
NameBin = iolist_to_binary(Name),
["($fieldnames:", mango_util:lucene_escape_user(NameBin), Wildcard, ")"];
+%% This is for indexable_fields
+to_query({op_regex, Name}) ->
+ NameBin = iolist_to_binary([Name, ":"]),
+ ["($fieldnames:", mango_util:lucene_escape_user(NameBin), "string)"];
to_query({op_default, Value}) ->
["($default:", Value, ")"].
@@ -421,3 +437,430 @@ replace_array_indexes([Part | Rest], NewPartsAcc, HasIntAcc) ->
[NewPart | NewPartsAcc],
HasInt or HasIntAcc
).
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+convert_fields_test() ->
+ ?assertEqual(
+ {op_null, {[[<<"field">>], <<":">>, <<"null">>], <<"true">>}},
+ convert([], as_selector(#{<<"field">> => null}))
+ ),
+ ?assertEqual(
+ {op_field, {[[<<"field">>], <<":">>, <<"boolean">>], <<"true">>}},
+ convert([], as_selector(#{<<"field">> => true}))
+ ),
+ ?assertEqual(
+ {op_field, {[[<<"field">>], <<":">>, <<"number">>], <<"42">>}},
+ convert([], as_selector(#{<<"field">> => 42}))
+ ),
+ ?assertEqual(
+ {op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"\"value\"">>}},
+ convert([], as_selector(#{<<"field">> => <<"value">>}))
+ ),
+ ?assertEqual(
+ {op_and, [
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":length">>], <<"3">>}},
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"1">>}},
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"2">>}},
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"3">>}}
+ ]},
+ convert([], as_selector(#{<<"field">> => [1, 2, 3]}))
+ ),
+ ?assertEqual(
+ {op_field, {
+ [[<<"field1">>, <<".">>, <<"field2">>], <<":">>, <<"string">>], <<"\"value\"">>
+ }},
+ convert([], as_selector(#{<<"field1">> => #{<<"field2">> => <<"value">>}}))
+ ),
+ ?assertEqual(
+ {op_and, [
+ {op_field, {[[<<"field2">>], <<":">>, <<"string">>], <<"\"value2\"">>}},
+ {op_field, {[[<<"field1">>], <<":">>, <<"string">>], <<"\"value1\"">>}}
+ ]},
+ convert([], as_selector(#{<<"field1">> => <<"value1">>, <<"field2">> => <<"value2">>}))
+ ).
+
+convert_default_test() ->
+ ?assertEqual(
+ {op_default, <<"\"text\"">>},
+ convert([], as_selector(#{<<"$default">> => #{<<"$text">> => <<"text">>}}))
+ ).
+
+convert_lt_test() ->
+ ?assertEqual(
+ {op_field,
+ {[[<<"field">>], <<":">>, <<"number">>], [<<"[-Infinity TO ">>, <<"42">>, <<"}">>]}},
+ convert([], as_selector(#{<<"field">> => #{<<"$lt">> => 42}}))
+ ),
+ ?assertEqual(
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$lt">> => [1, 2, 3]}}))
+ ),
+ ?assertEqual(
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$lt">> => null}}))
+ ).
+
+convert_lte_test() ->
+ ?assertEqual(
+ {op_field,
+ {[[<<"field">>], <<":">>, <<"number">>], [<<"[-Infinity TO ">>, <<"42">>, <<"]">>]}},
+ convert([], as_selector(#{<<"field">> => #{<<"$lte">> => 42}}))
+ ),
+ ?assertEqual(
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$lte">> => [1, 2, 3]}}))
+ ),
+ ?assertEqual(
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$lte">> => null}}))
+ ).
+
+convert_eq_test() ->
+ ?assertEqual(
+ {op_field, {[[<<"field">>], <<":">>, <<"number">>], <<"42">>}},
+ convert([], as_selector(#{<<"field">> => #{<<"$eq">> => 42}}))
+ ),
+ ?assertEqual(
+ {op_and, [
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":length">>], <<"3">>}},
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"1">>}},
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"2">>}},
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"3">>}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$eq">> => [1, 2, 3]}}))
+ ),
+ ?assertEqual(
+ {op_null, {[[<<"field">>], <<":">>, <<"null">>], <<"true">>}},
+ convert([], as_selector(#{<<"field">> => #{<<"$eq">> => null}}))
+ ).
+
+convert_ne_test() ->
+ ?assertEqual(
+ {op_not, {
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ {op_field, {[[<<"field">>], <<":">>, <<"number">>], <<"42">>}}
+ }},
+ convert([], as_selector(#{<<"field">> => #{<<"$ne">> => 42}}))
+ ).
+
+convert_gte_test() ->
+ ?assertEqual(
+ {op_field,
+ {[[<<"field">>], <<":">>, <<"number">>], [<<"[">>, <<"42">>, <<" TO Infinity]">>]}},
+ convert([], as_selector(#{<<"field">> => #{<<"$gte">> => 42}}))
+ ),
+ ?assertEqual(
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$gte">> => [1, 2, 3]}}))
+ ),
+ ?assertEqual(
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$gte">> => null}}))
+ ).
+
+convert_gt_test() ->
+ ?assertEqual(
+ {op_field,
+ {[[<<"field">>], <<":">>, <<"number">>], [<<"{">>, <<"42">>, <<" TO Infinity]">>]}},
+ convert([], as_selector(#{<<"field">> => #{<<"$gt">> => 42}}))
+ ),
+ ?assertEqual(
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$gt">> => [1, 2, 3]}}))
+ ),
+ ?assertEqual(
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$gt">> => null}}))
+ ).
+
+convert_all_test() ->
+ ?assertEqual(
+ {op_and, [
+ {op_field, {
+ [[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"string">>], <<"\"value1\"">>
+ }},
+ {op_field, {
+ [[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"string">>], <<"\"value2\"">>
+ }}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$all">> => [<<"value1">>, <<"value2">>]}}))
+ ).
+
+convert_elemMatch_test() ->
+ ?assertEqual(
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"string">>], <<"\"value\"">>}},
+ convert(
+ [], as_selector(#{<<"field">> => #{<<"$elemMatch">> => #{<<"$eq">> => <<"value">>}}})
+ )
+ ).
+
+convert_allMatch_test() ->
+ ?assertEqual(
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"string">>], <<"\"value\"">>}},
+ convert(
+ [], as_selector(#{<<"field">> => #{<<"$allMatch">> => #{<<"$eq">> => <<"value">>}}})
+ )
+ ).
+
+convert_keyMapMatch_test() ->
+ ?assertThrow(
+ {mango_error, mango_selector_text, {invalid_operator, <<"$keyMapMatch">>}},
+ convert(
+ [], as_selector(#{<<"field">> => #{<<"$keyMapMatch">> => #{<<"key">> => <<"value">>}}})
+ )
+ ).
+
+convert_in_test() ->
+ ?assertEqual(
+ {op_or, []},
+ convert([], as_selector(#{<<"field">> => #{<<"$in">> => []}}))
+ ),
+ ?assertEqual(
+ {op_or, [
+ {op_or, [
+ {op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"\"value1\"">>}},
+ {op_field, {
+ [[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"string">>], <<"\"value1\"">>
+ }}
+ ]},
+ {op_or, [
+ {op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"\"value2\"">>}},
+ {op_field, {
+ [[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"string">>], <<"\"value2\"">>
+ }}
+ ]}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$in">> => [<<"value1">>, <<"value2">>]}}))
+ ).
+
+convert_nin_test() ->
+ ?assertEqual(
+ {op_not, {
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ {op_or, []}
+ }},
+ convert([], as_selector(#{<<"field">> => #{<<"$nin">> => []}}))
+ ),
+ ?assertEqual(
+ {op_not, {
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ {op_or, [
+ {op_or, [
+ {op_field, {[[<<"field">>], <<":">>, <<"number">>], <<"1">>}},
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"1">>}}
+ ]},
+ {op_or, [
+ {op_field, {[[<<"field">>], <<":">>, <<"number">>], <<"2">>}},
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":">>, <<"number">>], <<"2">>}}
+ ]}
+ ]}
+ }},
+ convert([], as_selector(#{<<"field">> => #{<<"$nin">> => [1, 2]}}))
+ ).
+
+convert_exists_test() ->
+ ?assertEqual(
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$exists">> => true}}))
+ ),
+ ?assertEqual(
+ {op_not, {
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ false
+ }},
+ convert([], as_selector(#{<<"field">> => #{<<"$exists">> => false}}))
+ ).
+
+convert_type_test() ->
+ ?assertEqual(
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ convert([], as_selector(#{<<"field">> => #{<<"$type">> => <<"string">>}}))
+ ).
+
+convert_mod_test() ->
+ ?assertEqual(
+ {op_fieldname, {[[<<"field">>], ":"], "number"}},
+ convert([], as_selector(#{<<"field">> => #{<<"$mod">> => [2, 0]}}))
+ ).
+
+convert_regex_test() ->
+ ?assertEqual(
+ {op_regex, [<<"field">>]},
+ convert([], as_selector(#{<<"field">> => #{<<"$regex">> => <<".*">>}}))
+ ).
+
+convert_size_test() ->
+ ?assertEqual(
+ {op_field, {[[<<"field">>, <<".">>, <<"[]">>], <<":length">>], <<"6">>}},
+ convert([], as_selector(#{<<"field">> => #{<<"$size">> => 6}}))
+ ).
+
+convert_not_test() ->
+ ?assertEqual(
+ {op_not, {
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ {op_fieldname, {[[<<"field">>], ":"], "number"}}
+ }},
+ convert([], as_selector(#{<<"field">> => #{<<"$not">> => #{<<"$mod">> => [2, 0]}}}))
+ ).
+
+convert_and_test() ->
+ ?assertEqual(
+ {op_and, []},
+ convert([], as_selector(#{<<"$and">> => []}))
+ ),
+ ?assertEqual(
+ {op_and, [{op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"\"value\"">>}}]},
+ convert([], as_selector(#{<<"$and">> => [#{<<"field">> => <<"value">>}]}))
+ ),
+ ?assertEqual(
+ {op_and, [
+ {op_field, {[[<<"field1">>], <<":">>, <<"string">>], <<"\"value1\"">>}},
+ {op_field, {[[<<"field2">>], <<":">>, <<"string">>], <<"\"value2\"">>}}
+ ]},
+ convert(
+ [],
+ as_selector(#{
+ <<"$and">> => [#{<<"field1">> => <<"value1">>}, #{<<"field2">> => <<"value2">>}]
+ })
+ )
+ ).
+
+convert_or_test() ->
+ ?assertEqual(
+ {op_or, []},
+ convert([], as_selector(#{<<"$or">> => []}))
+ ),
+ ?assertEqual(
+ {op_or, [{op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"\"value\"">>}}]},
+ convert([], as_selector(#{<<"$or">> => [#{<<"field">> => <<"value">>}]}))
+ ),
+ ?assertEqual(
+ {op_or, [
+ {op_field, {[[<<"field1">>], <<":">>, <<"string">>], <<"\"value1\"">>}},
+ {op_field, {[[<<"field2">>], <<":">>, <<"string">>], <<"\"value2\"">>}}
+ ]},
+ convert(
+ [],
+ as_selector(#{
+ <<"$or">> => [#{<<"field1">> => <<"value1">>}, #{<<"field2">> => <<"value2">>}]
+ })
+ )
+ ).
+
+convert_nor_test() ->
+ ?assertEqual(
+ {op_and, []},
+ convert([], as_selector(#{<<"$nor">> => []}))
+ ),
+ ?assertEqual(
+ {op_and, [
+ {op_not, {
+ {op_or, [
+ {op_fieldname, {[[<<"field">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field">>]], ".*"}}
+ ]},
+ {op_field, {[[<<"field">>], <<":">>, <<"string">>], <<"\"value\"">>}}
+ }}
+ ]},
+ convert([], as_selector(#{<<"$nor">> => [#{<<"field">> => <<"value">>}]}))
+ ),
+ ?assertEqual(
+ {op_and, [
+ {op_not, {
+ {op_or, [
+ {op_fieldname, {[[<<"field1">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field1">>]], ".*"}}
+ ]},
+ {op_field, {[[<<"field1">>], <<":">>, <<"string">>], <<"\"value1\"">>}}
+ }},
+ {op_not, {
+ {op_or, [
+ {op_fieldname, {[[<<"field2">>], ":"], "*"}},
+ {op_fieldname, {[[<<"field2">>]], ".*"}}
+ ]},
+ {op_field, {[[<<"field2">>], <<":">>, <<"string">>], <<"\"value2\"">>}}
+ }}
+ ]},
+ convert(
+ [],
+ as_selector(#{
+ <<"$nor">> => [#{<<"field1">> => <<"value1">>}, #{<<"field2">> => <<"value2">>}]
+ })
+ )
+ ).
+
+to_query_test() ->
+ F = fun(S) -> iolist_to_binary(to_query(S)) end,
+ Input = {<<"name">>, <<"value">>},
+ ?assertEqual(<<"(name:value)">>, F({op_field, Input})),
+ ?assertEqual(
+ <<"(name1.name2_3atype:value)">>,
+ F({op_field, {[[<<"name1">>, <<".">>, <<"name2">>], <<":">>, <<"type">>], <<"value">>}})
+ ),
+ ?assertEqual(<<"(name:value)">>, F({op_null, Input})),
+ ?assertEqual(<<"($fieldnames:name_3astring)">>, F({op_regex, <<"name">>})),
+ ?assertEqual(<<"($fieldnames:name_3a.*)">>, F({op_fieldname, {<<"name">>, <<"_3a.*">>}})),
+ Arg1 = {op_default, <<"value">>},
+ ?assertEqual(<<"($default:value)">>, F(Arg1)),
+ Arg2 = {op_field, Input},
+ ?assertEqual(<<"($fieldnames:/.*/ AND NOT ((name:value)))">>, F({op_not, {Arg2, false}})),
+ ?assertEqual(<<"((name:value))">>, F({op_not, {Arg2, {op_and, []}}})),
+ ?assertEqual(<<"">>, F({op_and, []})),
+ ?assertEqual(<<"(($default:value))">>, F({op_and, [Arg1]})),
+ ?assertEqual(<<"(($default:value) AND (name:value))">>, F({op_and, [Arg1, Arg2]})),
+ ?assertEqual(
+ <<"(($default:value) AND (name:value))">>, F({op_and, [Arg1, {op_and, []}, Arg2]})
+ ),
+ ?assertEqual(<<"">>, F({op_or, []})),
+ ?assertEqual(<<"(($default:value))">>, F({op_or, [Arg1]})),
+ ?assertEqual(<<"(($default:value) OR (name:value))">>, F({op_or, [Arg1, Arg2]})),
+ ?assertEqual(<<"(($default:value) OR (name:value))">>, F({op_or, [Arg1, Arg2, {op_or, []}]})).
+-endif.
diff --git a/src/mango/test/05-index-selection-test.py b/src/mango/test/05-index-selection-test.py
index cb4d32986..d12f076fd 100644
--- a/src/mango/test/05-index-selection-test.py
+++ b/src/mango/test/05-index-selection-test.py
@@ -334,3 +334,28 @@ class MultiTextIndexSelectionTests(mango.UserDocsTests):
def test_use_index_works(self):
resp = self.db.find({"$text": "a query"}, use_index="foo", explain=True)
self.assertEqual(resp["index"]["ddoc"], "_design/foo")
+
+
+@unittest.skipUnless(mango.has_text_service(), "requires text service")
+class RegexVsTextIndexTest(mango.DbPerClass):
+ @classmethod
+ def setUpClass(klass):
+ super(RegexVsTextIndexTest, klass).setUpClass()
+
+ def test_regex_works_with_text_index(self):
+ doc = {"currency": "HUF", "location": "EUROPE"}
+ self.db.save_docs([doc], w=3)
+
+ selector = {"currency": {"$regex": "HUF"}}
+ docs = self.db.find(selector)
+ assert docs == [doc]
+
+ # Now that it is confirmed to be working, try again the
+ # previous query with a text index on `location`. This
+ # attempt should succeed as well.
+ self.db.create_text_index(
+ name="TextIndexByLocation", fields=[{"name": "location", "type": "string"}]
+ )
+
+ docs = self.db.find(selector)
+ assert docs == [doc]
diff --git a/src/mango/test/16-index-selectors-test.py b/src/mango/test/16-index-selectors-test.py
index 4510065f5..fd0675d0e 100644
--- a/src/mango/test/16-index-selectors-test.py
+++ b/src/mango/test/16-index-selectors-test.py
@@ -171,8 +171,28 @@ class IndexSelectorJson(mango.DbPerClass):
docs = self.db.find(selector, use_index="oldschool")
self.assertEqual(len(docs), 3)
- @unittest.skipUnless(mango.has_text_service(), "requires text service")
- def test_text_saves_partialfilterselector_in_index(self):
+ def test_uses_partial_index_with_non_indexable_selector(self):
+ partial_selector = {"location": {"$gte": "FRA"}}
+ selector = {"location": {"$exists": True}}
+ self.db.create_index(
+ ["location"],
+ partial_filter_selector=partial_selector,
+ ddoc="Selected",
+ name="Selected",
+ )
+ resp = self.db.find(selector, explain=True, use_index="Selected")
+ self.assertEqual(resp["index"]["name"], "Selected")
+ docs = self.db.find(selector, use_index="Selected")
+ self.assertEqual(len(docs), 3)
+
+
+@unittest.skipUnless(mango.has_text_service(), "requires text service")
+class IndexSelectorText(mango.DbPerClass):
+ def setUp(self):
+ self.db.recreate()
+ self.db.save_docs(copy.deepcopy(DOCS))
+
+ def test_saves_partialfilterselector_in_index(self):
selector = {"location": {"$gte": "FRA"}}
self.db.create_text_index(
fields=[{"name": "location", "type": "string"}],
@@ -181,8 +201,7 @@ class IndexSelectorJson(mango.DbPerClass):
indexes = self.db.list_indexes()
self.assertEqual(indexes[1]["def"]["partial_filter_selector"], selector)
- @unittest.skipUnless(mango.has_text_service(), "requires text service")
- def test_text_uses_partial_index_for_query_selector(self):
+ def test_uses_partial_index_for_query_selector(self):
selector = {"location": {"$gte": "FRA"}}
self.db.create_text_index(
fields=[{"name": "location", "type": "string"}],
@@ -195,8 +214,7 @@ class IndexSelectorJson(mango.DbPerClass):
docs = self.db.find(selector, use_index="Selected", fields=["_id", "location"])
self.assertEqual(len(docs), 3)
- @unittest.skipUnless(mango.has_text_service(), "requires text service")
- def test_text_uses_partial_index_with_different_selector(self):
+ def test_uses_partial_index_with_different_selector(self):
selector = {"location": {"$gte": "FRA"}}
selector2 = {"location": {"$gte": "A"}}
self.db.create_text_index(
@@ -210,8 +228,7 @@ class IndexSelectorJson(mango.DbPerClass):
docs = self.db.find(selector2, use_index="Selected")
self.assertEqual(len(docs), 3)
- @unittest.skipUnless(mango.has_text_service(), "requires text service")
- def test_text_doesnot_use_selector_when_not_specified(self):
+ def test_doesnot_use_selector_when_not_specified(self):
selector = {"location": {"$gte": "FRA"}}
self.db.create_text_index(
fields=[{"name": "location", "type": "string"}],
@@ -222,8 +239,7 @@ class IndexSelectorJson(mango.DbPerClass):
resp = self.db.find(selector, explain=True)
self.assertEqual(resp["index"]["name"], "_all_docs")
- @unittest.skipUnless(mango.has_text_service(), "requires text service")
- def test_text_doesnot_use_selector_when_not_specified_with_index(self):
+ def test_doesnot_use_selector_when_not_specified_with_index(self):
selector = {"location": {"$gte": "FRA"}}
self.db.create_text_index(
fields=[{"name": "location", "type": "string"}],
@@ -237,8 +253,7 @@ class IndexSelectorJson(mango.DbPerClass):
resp = self.db.find(selector, explain=True)
self.assertEqual(resp["index"]["name"], "NotSelected")
- @unittest.skipUnless(mango.has_text_service(), "requires text service")
- def test_text_old_selector_still_supported(self):
+ def test_old_selector_still_supported(self):
selector = {"location": {"$gte": "FRA"}}
self.db.save_doc(oldschoolddoctext)
resp = self.db.find(selector, explain=True, use_index="oldschooltext")
@@ -246,8 +261,7 @@ class IndexSelectorJson(mango.DbPerClass):
docs = self.db.find(selector, use_index="oldschooltext")
self.assertEqual(len(docs), 3)
- @unittest.skipUnless(mango.has_text_service(), "requires text service")
- def test_text_old_selector_still_supported_via_api(self):
+ def test_old_selector_still_supported_via_api(self):
selector = {"location": {"$gte": "FRA"}}
self.db.create_text_index(
fields=[{"name": "location", "type": "string"}],
@@ -258,8 +272,21 @@ class IndexSelectorJson(mango.DbPerClass):
docs = self.db.find({"location": {"$exists": True}}, use_index="Selected")
self.assertEqual(len(docs), 3)
- @unittest.skipUnless(mango.has_text_service(), "requires text service")
- def test_text_partial_filter_only_in_return_if_not_default(self):
+ def test_partial_filter_only_in_return_if_not_default(self):
self.db.create_text_index(fields=[{"name": "location", "type": "string"}])
index = self.db.list_indexes()[1]
self.assertEqual("partial_filter_selector" in index["def"], False)
+
+ def test_uses_partial_index_with_non_indexable_selector(self):
+ partial_selector = {"location": {"$gte": "FRA"}}
+ selector = {"location": {"$exists": True}}
+ self.db.create_text_index(
+ ["location"],
+ partial_filter_selector=partial_selector,
+ ddoc="Selected",
+ name="Selected",
+ )
+ resp = self.db.find(selector, explain=True, use_index="Selected")
+ self.assertEqual(resp["index"]["name"], "Selected")
+ docs = self.db.find(selector, use_index="Selected")
+ self.assertEqual(len(docs), 3)