summaryrefslogtreecommitdiff
path: root/src/mango/src/mango_selector_text.erl
diff options
context:
space:
mode:
Diffstat (limited to 'src/mango/src/mango_selector_text.erl')
-rw-r--r--src/mango/src/mango_selector_text.erl423
1 files changed, 0 insertions, 423 deletions
diff --git a/src/mango/src/mango_selector_text.erl b/src/mango/src/mango_selector_text.erl
deleted file mode 100644
index aaa1e3329..000000000
--- a/src/mango/src/mango_selector_text.erl
+++ /dev/null
@@ -1,423 +0,0 @@
-% Licensed under the Apache License, Version 2.0 (the "License"); you may not
-% use this file except in compliance with the License. You may obtain a copy of
-% the License at
-%
-% http://www.apache.org/licenses/LICENSE-2.0
-%
-% Unless required by applicable law or agreed to in writing, software
-% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-% License for the specific language governing permissions and limitations under
-% the License.
-
--module(mango_selector_text).
-
--export([
- convert/1,
- convert/2,
-
- append_sort_type/2
-]).
-
--include_lib("couch/include/couch_db.hrl").
--include("mango.hrl").
-
-%% Regex for <<"\\.">>
--define(PERIOD, "\\.").
-
-convert(Object) ->
- TupleTree = convert([], Object),
- iolist_to_binary(to_query(TupleTree)).
-
-convert(Path, {[{<<"$and">>, Args}]}) ->
- Parts = [convert(Path, Arg) || Arg <- Args],
- {op_and, Parts};
-convert(Path, {[{<<"$or">>, Args}]}) ->
- Parts = [convert(Path, Arg) || Arg <- Args],
- {op_or, Parts};
-convert(Path, {[{<<"$not">>, Arg}]}) ->
- {op_not, {field_exists_query(Path), convert(Path, Arg)}};
-convert(Path, {[{<<"$default">>, Arg}]}) ->
- {op_field, {_, Query}} = convert(Path, Arg),
- {op_default, Query};
-% The $text operator specifies a Lucene syntax query
-% so we just pull it in directly.
-convert(Path, {[{<<"$text">>, Query}]}) when is_binary(Query) ->
- {op_field, {make_field(Path, Query), value_str(Query)}};
-% The MongoDB docs for $all are super confusing and read more
-% like they screwed up the implementation of this operator
-% and then just documented it as a feature.
-%
-% This implementation will match the behavior as closely as
-% possible based on the available docs but we'll need to have
-% the testing team validate how MongoDB handles edge conditions
-convert(Path, {[{<<"$all">>, Args}]}) ->
- case Args of
- [Values] when is_list(Values) ->
- % If Args is a single element array then we have to
- % either match if Path is that array or if it contains
- % the array as an element of an array (which isn't at all
- % confusing). For Lucene to return us all possible matches
- % that means we just need to search for each value in
- % Path.[] and Path.[].[] and rely on our filtering to limit
- % the results properly.
- Fields1 = convert(Path, {[{<<"$eq">>, Values}]}),
- Fields2 = convert([<<"[]">> | Path], {[{<<"$eq">>, Values}]}),
- {op_or, [Fields1, Fields2]};
- _ ->
- % Otherwise the $all operator is equivalent to an $and
- % operator so we treat it as such.
- convert([<<"[]">> | Path], {[{<<"$and">>, Args}]})
- end;
-% The $elemMatch Lucene query is not an exact translation
-% as we can't enforce that the matches are all for the same
-% item in an array. We just rely on the final selector match
-% to filter out anything that doesn't match. The only trick
-% is that we have to add the `[]` path element since the docs
-% say this has to match against an array.
-convert(Path, {[{<<"$elemMatch">>, Arg}]}) ->
- convert([<<"[]">> | Path], Arg);
-convert(Path, {[{<<"$allMatch">>, Arg}]}) ->
- convert([<<"[]">> | Path], Arg);
-% Our comparison operators are fairly straight forward
-convert(Path, {[{<<"$lt">>, Arg}]}) when
- is_list(Arg);
- is_tuple(Arg);
- Arg =:= null
-->
- field_exists_query(Path);
-convert(Path, {[{<<"$lt">>, Arg}]}) ->
- {op_field, {make_field(Path, Arg), range(lt, Arg)}};
-convert(Path, {[{<<"$lte">>, Arg}]}) when
- is_list(Arg);
- is_tuple(Arg);
- Arg =:= null
-->
- field_exists_query(Path);
-convert(Path, {[{<<"$lte">>, Arg}]}) ->
- {op_field, {make_field(Path, Arg), range(lte, Arg)}};
-%% This is for indexable_fields
-convert(Path, {[{<<"$eq">>, Arg}]}) when Arg =:= null ->
- {op_null, {make_field(Path, Arg), value_str(Arg)}};
-convert(Path, {[{<<"$eq">>, Args}]}) when is_list(Args) ->
- Path0 = [<<"[]">> | Path],
- LPart = {op_field, {make_field(Path0, length), value_str(length(Args))}},
- Parts0 = [convert(Path0, {[{<<"$eq">>, Arg}]}) || Arg <- Args],
- Parts = [LPart | Parts0],
- {op_and, Parts};
-convert(Path, {[{<<"$eq">>, {_} = Arg}]}) ->
- convert(Path, Arg);
-convert(Path, {[{<<"$eq">>, Arg}]}) ->
- {op_field, {make_field(Path, Arg), value_str(Arg)}};
-convert(Path, {[{<<"$ne">>, Arg}]}) ->
- {op_not, {field_exists_query(Path), convert(Path, {[{<<"$eq">>, Arg}]})}};
-convert(Path, {[{<<"$gte">>, Arg}]}) when
- is_list(Arg);
- is_tuple(Arg);
- Arg =:= null
-->
- field_exists_query(Path);
-convert(Path, {[{<<"$gte">>, Arg}]}) ->
- {op_field, {make_field(Path, Arg), range(gte, Arg)}};
-convert(Path, {[{<<"$gt">>, Arg}]}) when
- is_list(Arg);
- is_tuple(Arg);
- Arg =:= null
-->
- field_exists_query(Path);
-convert(Path, {[{<<"$gt">>, Arg}]}) ->
- {op_field, {make_field(Path, Arg), range(gt, Arg)}};
-convert(Path, {[{<<"$in">>, Args}]}) ->
- {op_or, convert_in(Path, Args)};
-convert(Path, {[{<<"$nin">>, Args}]}) ->
- {op_not, {field_exists_query(Path), convert(Path, {[{<<"$in">>, Args}]})}};
-convert(Path, {[{<<"$exists">>, ShouldExist}]}) ->
- FieldExists = field_exists_query(Path),
- case ShouldExist of
- true -> FieldExists;
- false -> {op_not, {FieldExists, false}}
- end;
-% We're not checking the actual type here, just looking for
-% anything that has a possibility of matching by checking
-% for the field name. We use the same logic for $exists on
-% the actual query.
-convert(Path, {[{<<"$type">>, _}]}) ->
- field_exists_query(Path);
-convert(Path, {[{<<"$mod">>, _}]}) ->
- field_exists_query(Path, "number");
-% The lucene regular expression engine does not use java's regex engine but
-% instead a custom implementation. The syntax is therefore different, so we do
-% would get different behavior than our view indexes. To be consistent, we will
-% simply return docs for fields that exist and then run our match filter.
-convert(Path, {[{<<"$regex">>, _}]}) ->
- field_exists_query(Path, "string");
-convert(Path, {[{<<"$size">>, Arg}]}) ->
- {op_field, {make_field([<<"[]">> | Path], length), value_str(Arg)}};
-% All other operators are internal assertion errors for
-% matching because we either should've removed them during
-% normalization or something else broke.
-convert(_Path, {[{<<"$", _/binary>> = Op, _}]}) ->
- ?MANGO_ERROR({invalid_operator, Op});
-% We've hit a field name specifier. Check if the field name is accessing
-% arrays. Convert occurrences of element position references to .[]. Then we
-% need to break the name into path parts and continue our conversion.
-convert(Path, {[{Field0, Cond}]}) ->
- {ok, PP0} =
- case Field0 of
- <<>> ->
- {ok, []};
- _ ->
- mango_util:parse_field(Field0)
- end,
- % Later on, we perform a lucene_escape_user call on the
- % final Path, which calls parse_field again. Calling the function
- % twice converts <<"a\\.b">> to [<<"a">>,<<"b">>]. This leads to
- % an incorrect query since we need [<<"a.b">>]. Without breaking
- % our escaping mechanism, we simply revert this first parse_field
- % effect and replace instances of "." to "\\.".
- MP = mango_util:cached_re(mango_period, ?PERIOD),
- PP1 = [
- re:replace(
- P,
- MP,
- <<"\\\\.">>,
- [global, {return, binary}]
- )
- || P <- PP0
- ],
- {PP2, HasInteger} = replace_array_indexes(PP1, [], false),
- NewPath = PP2 ++ Path,
- case HasInteger of
- true ->
- OldPath = lists:reverse(PP1, Path),
- OldParts = convert(OldPath, Cond),
- NewParts = convert(NewPath, Cond),
- {op_or, [OldParts, NewParts]};
- false ->
- convert(NewPath, Cond)
- end;
-%% For $in
-convert(Path, Val) when is_binary(Val); is_number(Val); is_boolean(Val) ->
- {op_field, {make_field(Path, Val), value_str(Val)}};
-% Anything else is a bad selector.
-convert(_Path, {Props} = Sel) when length(Props) > 1 ->
- erlang:error({unnormalized_selector, Sel}).
-
-to_query_nested(Args) ->
- QueryArgs = lists:map(fun to_query/1, Args),
- % removes empty queries that result from selectors with empty arrays
- FilterFun = fun(A) -> A =/= [] andalso A =/= "()" end,
- lists:filter(FilterFun, QueryArgs).
-
-to_query({op_and, []}) ->
- [];
-to_query({op_and, Args}) when is_list(Args) ->
- case to_query_nested(Args) of
- [] -> [];
- QueryArgs -> ["(", mango_util:join(<<" AND ">>, QueryArgs), ")"]
- end;
-to_query({op_or, []}) ->
- [];
-to_query({op_or, Args}) when is_list(Args) ->
- case to_query_nested(Args) of
- [] -> [];
- QueryArgs -> ["(", mango_util:join(" OR ", QueryArgs), ")"]
- end;
-to_query({op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) ->
- case to_query(Arg) of
- [] -> ["(", to_query(ExistsQuery), ")"];
- Query -> ["(", to_query(ExistsQuery), " AND NOT (", Query, "))"]
- end;
-%% For $exists:false
-to_query({op_not, {ExistsQuery, false}}) ->
- ["($fieldnames:/.*/ ", " AND NOT (", to_query(ExistsQuery), "))"];
-to_query({op_insert, Arg}) when is_binary(Arg) ->
- ["(", Arg, ")"];
-%% We escape : and / for now for values and all lucene chars for fieldnames
-%% This needs to be resolved.
-to_query({op_field, {Name, Value}}) ->
- NameBin = iolist_to_binary(Name),
- ["(", mango_util:lucene_escape_user(NameBin), ":", Value, ")"];
-%% This is for indexable_fields
-to_query({op_null, {Name, Value}}) ->
- NameBin = iolist_to_binary(Name),
- ["(", mango_util:lucene_escape_user(NameBin), ":", Value, ")"];
-to_query({op_fieldname, {Name, Wildcard}}) ->
- NameBin = iolist_to_binary(Name),
- ["($fieldnames:", mango_util:lucene_escape_user(NameBin), Wildcard, ")"];
-to_query({op_default, Value}) ->
- ["($default:", Value, ")"].
-
-%% We match on fieldname and fieldname.[]
-convert_in(Path, Args) ->
- Path0 = [<<"[]">> | Path],
- lists:map(
- fun(Arg) ->
- case Arg of
- {Object} ->
- Parts = lists:map(
- fun(SubObject) ->
- Fields1 = convert(Path, {[SubObject]}),
- Fields2 = convert(Path0, {[SubObject]}),
- {op_or, [Fields1, Fields2]}
- end,
- Object
- ),
- {op_or, Parts};
- SingleVal ->
- Fields1 = {op_field, {make_field(Path, SingleVal), value_str(SingleVal)}},
- Fields2 = {op_field, {make_field(Path0, SingleVal), value_str(SingleVal)}},
- {op_or, [Fields1, Fields2]}
- end
- end,
- Args
- ).
-
-make_field(Path, length) ->
- [path_str(Path), <<":length">>];
-make_field(Path, Arg) ->
- [path_str(Path), <<":">>, type_str(Arg)].
-
-range(lt, Arg) ->
- Min = get_range(min, Arg),
- [<<"[", Min/binary, " TO ">>, value_str(Arg), <<"}">>];
-range(lte, Arg) ->
- Min = get_range(min, Arg),
- [<<"[", Min/binary, " TO ">>, value_str(Arg), <<"]">>];
-range(gte, Arg) ->
- Max = get_range(max, Arg),
- [<<"[">>, value_str(Arg), <<" TO ", Max/binary, "]">>];
-range(gt, Arg) ->
- Max = get_range(max, Arg),
- [<<"{">>, value_str(Arg), <<" TO ", Max/binary, "]">>].
-
-get_range(min, Arg) when is_number(Arg) ->
- <<"-Infinity">>;
-get_range(min, _Arg) ->
- <<"\"\"">>;
-get_range(max, Arg) when is_number(Arg) ->
- <<"Infinity">>;
-get_range(max, _Arg) ->
- <<"\u0x10FFFF">>.
-
-field_exists_query(Path) ->
- % We specify two here for :* and .* so that we don't incorrectly
- % match a path foo.name against foo.name_first (if were to just
- % appened * isntead).
- Parts = [
- % We need to remove the period from the path list to indicate that it is
- % a path separator. We escape the colon because it is not used as a
- % separator and we escape colons in field names.
- {op_fieldname, {[path_str(Path), ":"], "*"}},
- {op_fieldname, {[path_str(Path)], ".*"}}
- ],
- {op_or, Parts}.
-
-field_exists_query(Path, Type) ->
- {op_fieldname, {[path_str(Path), ":"], Type}}.
-
-path_str(Path) ->
- path_str(Path, []).
-
-path_str([], Acc) ->
- Acc;
-path_str([Part], Acc) ->
- % No reverse because Path is backwards
- % during recursion of convert.
- [Part | Acc];
-path_str([Part | Rest], Acc) ->
- case Part of
- % do not append a period if Part is blank
- <<>> ->
- path_str(Rest, [Acc]);
- _ ->
- path_str(Rest, [<<".">>, Part | Acc])
- end.
-
-type_str(Value) when is_number(Value) ->
- <<"number">>;
-type_str(Value) when is_boolean(Value) ->
- <<"boolean">>;
-type_str(Value) when is_binary(Value) ->
- <<"string">>;
-type_str(null) ->
- <<"null">>.
-
-value_str(Value) when is_binary(Value) ->
- case mango_util:is_number_string(Value) of
- true ->
- <<"\"", Value/binary, "\"">>;
- false ->
- Escaped = mango_util:lucene_escape_query_value(Value),
- <<"\"", Escaped/binary, "\"">>
- end;
-value_str(Value) when is_integer(Value) ->
- list_to_binary(integer_to_list(Value));
-value_str(Value) when is_float(Value) ->
- list_to_binary(float_to_list(Value));
-value_str(true) ->
- <<"true">>;
-value_str(false) ->
- <<"false">>;
-value_str(null) ->
- <<"true">>.
-
-append_sort_type(RawSortField, Selector) ->
- EncodeField = mango_util:lucene_escape_user(RawSortField),
- String = mango_util:has_suffix(EncodeField, <<"_3astring">>),
- Number = mango_util:has_suffix(EncodeField, <<"_3anumber">>),
- case {String, Number} of
- {true, _} ->
- <<EncodeField/binary, "<string>">>;
- {_, true} ->
- <<EncodeField/binary, "<number>">>;
- _ ->
- Type = get_sort_type(RawSortField, Selector),
- <<EncodeField/binary, Type/binary>>
- end.
-
-get_sort_type(Field, Selector) ->
- Types = get_sort_types(Field, Selector, []),
- case lists:usort(Types) of
- [str] -> <<"_3astring<string>">>;
- [num] -> <<"_3anumber<number>">>;
- _ -> ?MANGO_ERROR({text_sort_error, Field})
- end.
-
-get_sort_types(Field, {[{Field, {[{<<"$", _/binary>>, Cond}]}}]}, Acc) when
- is_binary(Cond)
-->
- [str | Acc];
-get_sort_types(Field, {[{Field, {[{<<"$", _/binary>>, Cond}]}}]}, Acc) when
- is_number(Cond)
-->
- [num | Acc];
-get_sort_types(Field, {[{_, Cond}]}, Acc) when is_list(Cond) ->
- lists:foldl(
- fun(Arg, InnerAcc) ->
- get_sort_types(Field, Arg, InnerAcc)
- end,
- Acc,
- Cond
- );
-get_sort_types(Field, {[{_, Cond}]}, Acc) when is_tuple(Cond) ->
- get_sort_types(Field, Cond, Acc);
-get_sort_types(_Field, _, Acc) ->
- Acc.
-
-replace_array_indexes([], NewPartsAcc, HasIntAcc) ->
- {NewPartsAcc, HasIntAcc};
-replace_array_indexes([Part | Rest], NewPartsAcc, HasIntAcc) ->
- {NewPart, HasInt} =
- try
- _ = list_to_integer(binary_to_list(Part)),
- {<<"[]">>, true}
- catch
- _:_ ->
- {Part, false}
- end,
- replace_array_indexes(
- Rest,
- [NewPart | NewPartsAcc],
- HasInt or HasIntAcc
- ).