diff options
Diffstat (limited to 'src/mango/src/mango_selector_text.erl')
-rw-r--r-- | src/mango/src/mango_selector_text.erl | 423 |
1 files changed, 0 insertions, 423 deletions
diff --git a/src/mango/src/mango_selector_text.erl b/src/mango/src/mango_selector_text.erl deleted file mode 100644 index aaa1e3329..000000000 --- a/src/mango/src/mango_selector_text.erl +++ /dev/null @@ -1,423 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(mango_selector_text). - --export([ - convert/1, - convert/2, - - append_sort_type/2 -]). - --include_lib("couch/include/couch_db.hrl"). --include("mango.hrl"). - -%% Regex for <<"\\.">> --define(PERIOD, "\\."). - -convert(Object) -> - TupleTree = convert([], Object), - iolist_to_binary(to_query(TupleTree)). - -convert(Path, {[{<<"$and">>, Args}]}) -> - Parts = [convert(Path, Arg) || Arg <- Args], - {op_and, Parts}; -convert(Path, {[{<<"$or">>, Args}]}) -> - Parts = [convert(Path, Arg) || Arg <- Args], - {op_or, Parts}; -convert(Path, {[{<<"$not">>, Arg}]}) -> - {op_not, {field_exists_query(Path), convert(Path, Arg)}}; -convert(Path, {[{<<"$default">>, Arg}]}) -> - {op_field, {_, Query}} = convert(Path, Arg), - {op_default, Query}; -% The $text operator specifies a Lucene syntax query -% so we just pull it in directly. -convert(Path, {[{<<"$text">>, Query}]}) when is_binary(Query) -> - {op_field, {make_field(Path, Query), value_str(Query)}}; -% The MongoDB docs for $all are super confusing and read more -% like they screwed up the implementation of this operator -% and then just documented it as a feature. -% -% This implementation will match the behavior as closely as -% possible based on the available docs but we'll need to have -% the testing team validate how MongoDB handles edge conditions -convert(Path, {[{<<"$all">>, Args}]}) -> - case Args of - [Values] when is_list(Values) -> - % If Args is a single element array then we have to - % either match if Path is that array or if it contains - % the array as an element of an array (which isn't at all - % confusing). For Lucene to return us all possible matches - % that means we just need to search for each value in - % Path.[] and Path.[].[] and rely on our filtering to limit - % the results properly. - Fields1 = convert(Path, {[{<<"$eq">>, Values}]}), - Fields2 = convert([<<"[]">> | Path], {[{<<"$eq">>, Values}]}), - {op_or, [Fields1, Fields2]}; - _ -> - % Otherwise the $all operator is equivalent to an $and - % operator so we treat it as such. - convert([<<"[]">> | Path], {[{<<"$and">>, Args}]}) - end; -% The $elemMatch Lucene query is not an exact translation -% as we can't enforce that the matches are all for the same -% item in an array. We just rely on the final selector match -% to filter out anything that doesn't match. The only trick -% is that we have to add the `[]` path element since the docs -% say this has to match against an array. -convert(Path, {[{<<"$elemMatch">>, Arg}]}) -> - convert([<<"[]">> | Path], Arg); -convert(Path, {[{<<"$allMatch">>, Arg}]}) -> - convert([<<"[]">> | Path], Arg); -% Our comparison operators are fairly straight forward -convert(Path, {[{<<"$lt">>, Arg}]}) when - is_list(Arg); - is_tuple(Arg); - Arg =:= null --> - field_exists_query(Path); -convert(Path, {[{<<"$lt">>, Arg}]}) -> - {op_field, {make_field(Path, Arg), range(lt, Arg)}}; -convert(Path, {[{<<"$lte">>, Arg}]}) when - is_list(Arg); - is_tuple(Arg); - Arg =:= null --> - field_exists_query(Path); -convert(Path, {[{<<"$lte">>, Arg}]}) -> - {op_field, {make_field(Path, Arg), range(lte, Arg)}}; -%% This is for indexable_fields -convert(Path, {[{<<"$eq">>, Arg}]}) when Arg =:= null -> - {op_null, {make_field(Path, Arg), value_str(Arg)}}; -convert(Path, {[{<<"$eq">>, Args}]}) when is_list(Args) -> - Path0 = [<<"[]">> | Path], - LPart = {op_field, {make_field(Path0, length), value_str(length(Args))}}, - Parts0 = [convert(Path0, {[{<<"$eq">>, Arg}]}) || Arg <- Args], - Parts = [LPart | Parts0], - {op_and, Parts}; -convert(Path, {[{<<"$eq">>, {_} = Arg}]}) -> - convert(Path, Arg); -convert(Path, {[{<<"$eq">>, Arg}]}) -> - {op_field, {make_field(Path, Arg), value_str(Arg)}}; -convert(Path, {[{<<"$ne">>, Arg}]}) -> - {op_not, {field_exists_query(Path), convert(Path, {[{<<"$eq">>, Arg}]})}}; -convert(Path, {[{<<"$gte">>, Arg}]}) when - is_list(Arg); - is_tuple(Arg); - Arg =:= null --> - field_exists_query(Path); -convert(Path, {[{<<"$gte">>, Arg}]}) -> - {op_field, {make_field(Path, Arg), range(gte, Arg)}}; -convert(Path, {[{<<"$gt">>, Arg}]}) when - is_list(Arg); - is_tuple(Arg); - Arg =:= null --> - field_exists_query(Path); -convert(Path, {[{<<"$gt">>, Arg}]}) -> - {op_field, {make_field(Path, Arg), range(gt, Arg)}}; -convert(Path, {[{<<"$in">>, Args}]}) -> - {op_or, convert_in(Path, Args)}; -convert(Path, {[{<<"$nin">>, Args}]}) -> - {op_not, {field_exists_query(Path), convert(Path, {[{<<"$in">>, Args}]})}}; -convert(Path, {[{<<"$exists">>, ShouldExist}]}) -> - FieldExists = field_exists_query(Path), - case ShouldExist of - true -> FieldExists; - false -> {op_not, {FieldExists, false}} - end; -% We're not checking the actual type here, just looking for -% anything that has a possibility of matching by checking -% for the field name. We use the same logic for $exists on -% the actual query. -convert(Path, {[{<<"$type">>, _}]}) -> - field_exists_query(Path); -convert(Path, {[{<<"$mod">>, _}]}) -> - field_exists_query(Path, "number"); -% The lucene regular expression engine does not use java's regex engine but -% instead a custom implementation. The syntax is therefore different, so we do -% would get different behavior than our view indexes. To be consistent, we will -% simply return docs for fields that exist and then run our match filter. -convert(Path, {[{<<"$regex">>, _}]}) -> - field_exists_query(Path, "string"); -convert(Path, {[{<<"$size">>, Arg}]}) -> - {op_field, {make_field([<<"[]">> | Path], length), value_str(Arg)}}; -% All other operators are internal assertion errors for -% matching because we either should've removed them during -% normalization or something else broke. -convert(_Path, {[{<<"$", _/binary>> = Op, _}]}) -> - ?MANGO_ERROR({invalid_operator, Op}); -% We've hit a field name specifier. Check if the field name is accessing -% arrays. Convert occurrences of element position references to .[]. Then we -% need to break the name into path parts and continue our conversion. -convert(Path, {[{Field0, Cond}]}) -> - {ok, PP0} = - case Field0 of - <<>> -> - {ok, []}; - _ -> - mango_util:parse_field(Field0) - end, - % Later on, we perform a lucene_escape_user call on the - % final Path, which calls parse_field again. Calling the function - % twice converts <<"a\\.b">> to [<<"a">>,<<"b">>]. This leads to - % an incorrect query since we need [<<"a.b">>]. Without breaking - % our escaping mechanism, we simply revert this first parse_field - % effect and replace instances of "." to "\\.". - MP = mango_util:cached_re(mango_period, ?PERIOD), - PP1 = [ - re:replace( - P, - MP, - <<"\\\\.">>, - [global, {return, binary}] - ) - || P <- PP0 - ], - {PP2, HasInteger} = replace_array_indexes(PP1, [], false), - NewPath = PP2 ++ Path, - case HasInteger of - true -> - OldPath = lists:reverse(PP1, Path), - OldParts = convert(OldPath, Cond), - NewParts = convert(NewPath, Cond), - {op_or, [OldParts, NewParts]}; - false -> - convert(NewPath, Cond) - end; -%% For $in -convert(Path, Val) when is_binary(Val); is_number(Val); is_boolean(Val) -> - {op_field, {make_field(Path, Val), value_str(Val)}}; -% Anything else is a bad selector. -convert(_Path, {Props} = Sel) when length(Props) > 1 -> - erlang:error({unnormalized_selector, Sel}). - -to_query_nested(Args) -> - QueryArgs = lists:map(fun to_query/1, Args), - % removes empty queries that result from selectors with empty arrays - FilterFun = fun(A) -> A =/= [] andalso A =/= "()" end, - lists:filter(FilterFun, QueryArgs). - -to_query({op_and, []}) -> - []; -to_query({op_and, Args}) when is_list(Args) -> - case to_query_nested(Args) of - [] -> []; - QueryArgs -> ["(", mango_util:join(<<" AND ">>, QueryArgs), ")"] - end; -to_query({op_or, []}) -> - []; -to_query({op_or, Args}) when is_list(Args) -> - case to_query_nested(Args) of - [] -> []; - QueryArgs -> ["(", mango_util:join(" OR ", QueryArgs), ")"] - end; -to_query({op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) -> - case to_query(Arg) of - [] -> ["(", to_query(ExistsQuery), ")"]; - Query -> ["(", to_query(ExistsQuery), " AND NOT (", Query, "))"] - end; -%% For $exists:false -to_query({op_not, {ExistsQuery, false}}) -> - ["($fieldnames:/.*/ ", " AND NOT (", to_query(ExistsQuery), "))"]; -to_query({op_insert, Arg}) when is_binary(Arg) -> - ["(", Arg, ")"]; -%% We escape : and / for now for values and all lucene chars for fieldnames -%% This needs to be resolved. -to_query({op_field, {Name, Value}}) -> - NameBin = iolist_to_binary(Name), - ["(", mango_util:lucene_escape_user(NameBin), ":", Value, ")"]; -%% This is for indexable_fields -to_query({op_null, {Name, Value}}) -> - NameBin = iolist_to_binary(Name), - ["(", mango_util:lucene_escape_user(NameBin), ":", Value, ")"]; -to_query({op_fieldname, {Name, Wildcard}}) -> - NameBin = iolist_to_binary(Name), - ["($fieldnames:", mango_util:lucene_escape_user(NameBin), Wildcard, ")"]; -to_query({op_default, Value}) -> - ["($default:", Value, ")"]. - -%% We match on fieldname and fieldname.[] -convert_in(Path, Args) -> - Path0 = [<<"[]">> | Path], - lists:map( - fun(Arg) -> - case Arg of - {Object} -> - Parts = lists:map( - fun(SubObject) -> - Fields1 = convert(Path, {[SubObject]}), - Fields2 = convert(Path0, {[SubObject]}), - {op_or, [Fields1, Fields2]} - end, - Object - ), - {op_or, Parts}; - SingleVal -> - Fields1 = {op_field, {make_field(Path, SingleVal), value_str(SingleVal)}}, - Fields2 = {op_field, {make_field(Path0, SingleVal), value_str(SingleVal)}}, - {op_or, [Fields1, Fields2]} - end - end, - Args - ). - -make_field(Path, length) -> - [path_str(Path), <<":length">>]; -make_field(Path, Arg) -> - [path_str(Path), <<":">>, type_str(Arg)]. - -range(lt, Arg) -> - Min = get_range(min, Arg), - [<<"[", Min/binary, " TO ">>, value_str(Arg), <<"}">>]; -range(lte, Arg) -> - Min = get_range(min, Arg), - [<<"[", Min/binary, " TO ">>, value_str(Arg), <<"]">>]; -range(gte, Arg) -> - Max = get_range(max, Arg), - [<<"[">>, value_str(Arg), <<" TO ", Max/binary, "]">>]; -range(gt, Arg) -> - Max = get_range(max, Arg), - [<<"{">>, value_str(Arg), <<" TO ", Max/binary, "]">>]. - -get_range(min, Arg) when is_number(Arg) -> - <<"-Infinity">>; -get_range(min, _Arg) -> - <<"\"\"">>; -get_range(max, Arg) when is_number(Arg) -> - <<"Infinity">>; -get_range(max, _Arg) -> - <<"\u0x10FFFF">>. - -field_exists_query(Path) -> - % We specify two here for :* and .* so that we don't incorrectly - % match a path foo.name against foo.name_first (if were to just - % appened * isntead). - Parts = [ - % We need to remove the period from the path list to indicate that it is - % a path separator. We escape the colon because it is not used as a - % separator and we escape colons in field names. - {op_fieldname, {[path_str(Path), ":"], "*"}}, - {op_fieldname, {[path_str(Path)], ".*"}} - ], - {op_or, Parts}. - -field_exists_query(Path, Type) -> - {op_fieldname, {[path_str(Path), ":"], Type}}. - -path_str(Path) -> - path_str(Path, []). - -path_str([], Acc) -> - Acc; -path_str([Part], Acc) -> - % No reverse because Path is backwards - % during recursion of convert. - [Part | Acc]; -path_str([Part | Rest], Acc) -> - case Part of - % do not append a period if Part is blank - <<>> -> - path_str(Rest, [Acc]); - _ -> - path_str(Rest, [<<".">>, Part | Acc]) - end. - -type_str(Value) when is_number(Value) -> - <<"number">>; -type_str(Value) when is_boolean(Value) -> - <<"boolean">>; -type_str(Value) when is_binary(Value) -> - <<"string">>; -type_str(null) -> - <<"null">>. - -value_str(Value) when is_binary(Value) -> - case mango_util:is_number_string(Value) of - true -> - <<"\"", Value/binary, "\"">>; - false -> - Escaped = mango_util:lucene_escape_query_value(Value), - <<"\"", Escaped/binary, "\"">> - end; -value_str(Value) when is_integer(Value) -> - list_to_binary(integer_to_list(Value)); -value_str(Value) when is_float(Value) -> - list_to_binary(float_to_list(Value)); -value_str(true) -> - <<"true">>; -value_str(false) -> - <<"false">>; -value_str(null) -> - <<"true">>. - -append_sort_type(RawSortField, Selector) -> - EncodeField = mango_util:lucene_escape_user(RawSortField), - String = mango_util:has_suffix(EncodeField, <<"_3astring">>), - Number = mango_util:has_suffix(EncodeField, <<"_3anumber">>), - case {String, Number} of - {true, _} -> - <<EncodeField/binary, "<string>">>; - {_, true} -> - <<EncodeField/binary, "<number>">>; - _ -> - Type = get_sort_type(RawSortField, Selector), - <<EncodeField/binary, Type/binary>> - end. - -get_sort_type(Field, Selector) -> - Types = get_sort_types(Field, Selector, []), - case lists:usort(Types) of - [str] -> <<"_3astring<string>">>; - [num] -> <<"_3anumber<number>">>; - _ -> ?MANGO_ERROR({text_sort_error, Field}) - end. - -get_sort_types(Field, {[{Field, {[{<<"$", _/binary>>, Cond}]}}]}, Acc) when - is_binary(Cond) --> - [str | Acc]; -get_sort_types(Field, {[{Field, {[{<<"$", _/binary>>, Cond}]}}]}, Acc) when - is_number(Cond) --> - [num | Acc]; -get_sort_types(Field, {[{_, Cond}]}, Acc) when is_list(Cond) -> - lists:foldl( - fun(Arg, InnerAcc) -> - get_sort_types(Field, Arg, InnerAcc) - end, - Acc, - Cond - ); -get_sort_types(Field, {[{_, Cond}]}, Acc) when is_tuple(Cond) -> - get_sort_types(Field, Cond, Acc); -get_sort_types(_Field, _, Acc) -> - Acc. - -replace_array_indexes([], NewPartsAcc, HasIntAcc) -> - {NewPartsAcc, HasIntAcc}; -replace_array_indexes([Part | Rest], NewPartsAcc, HasIntAcc) -> - {NewPart, HasInt} = - try - _ = list_to_integer(binary_to_list(Part)), - {<<"[]">>, true} - catch - _:_ -> - {Part, false} - end, - replace_array_indexes( - Rest, - [NewPart | NewPartsAcc], - HasInt or HasIntAcc - ). |