diff options
author | Tony Sun <tony.sun@cloudant.com> | 2017-04-05 00:02:57 -0700 |
---|---|---|
committer | Tony Sun <tony.sun@cloudant.com> | 2017-06-21 08:26:04 -0700 |
commit | 4e5e84ecc20808df056163f6a147c154cdf75ee3 (patch) | |
tree | 3e09314a1504f1c487d4da69dcb3dfd6890ffd3e | |
parent | 5bd0b652788cc78eb05c78229d89bf74a70f9c00 (diff) | |
download | couchdb-4e5e84ecc20808df056163f6a147c154cdf75ee3.tar.gz |
Use efficient set storage for field names3358-use-efficient-set
When indexing a set of fields for text search, we also create a special
field called $fieldnames. It contains values for all the fields that
need to be indexed. In order to do that, we need a unique list of the
form [[<<"$fieldnames">>, Name, [] | Rest]. The old code would add an
element to the list, and then check for membership via lists:member/2.
This is inefficient. Some documents can contain a large number of
fields, so we will use gb_sets to create a unique set of fields, and
then extract out the field names.
COUCHDB-3358
-rw-r--r-- | src/mango/src/mango_native_proc.erl | 16 |
1 files changed, 6 insertions, 10 deletions
diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl index 6d0fb2400..ba17c4867 100644 --- a/src/mango/src/mango_native_proc.erl +++ b/src/mango/src/mango_native_proc.erl @@ -175,7 +175,7 @@ get_text_entries0(IdxProps, Doc) -> Fields = if not DefaultEnabled -> Fields0; true -> add_default_text_field(Fields0) end, - FieldNames = get_field_names(Fields, []), + FieldNames = get_field_names(Fields), Converted = convert_text_fields(Fields), FieldNames ++ Converted. @@ -257,15 +257,11 @@ add_default_text_field([_ | Rest], Acc) -> %% index of all field names -get_field_names([], FAcc) -> - FAcc; -get_field_names([{Name, _Type, _Value} | Rest], FAcc) -> - case lists:member([<<"$fieldnames">>, Name, []], FAcc) of - true -> - get_field_names(Rest, FAcc); - false -> - get_field_names(Rest, [[<<"$fieldnames">>, Name, []] | FAcc]) - end. +get_field_names(Fields) -> + FieldNameSet = lists:foldl(fun({Name, _, _}, Set) -> + gb_sets:add([<<"$fieldnames">>, Name, []], Set) + end, gb_sets:new(), Fields), + gb_sets:to_list(FieldNameSet). convert_text_fields([]) -> |