From 4e5e84ecc20808df056163f6a147c154cdf75ee3 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Wed, 5 Apr 2017 00:02:57 -0700 Subject: Use efficient set storage for field names When indexing a set of fields for text search, we also create a special field called $fieldnames. It contains values for all the fields that need to be indexed. In order to do that, we need a unique list of the form [[<<"$fieldnames">>, Name, [] | Rest]. The old code would add an element to the list, and then check for membership via lists:member/2. This is inefficient. Some documents can contain a large number of fields, so we will use gb_sets to create a unique set of fields, and then extract out the field names. COUCHDB-3358 --- src/mango/src/mango_native_proc.erl | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl index 6d0fb2400..ba17c4867 100644 --- a/src/mango/src/mango_native_proc.erl +++ b/src/mango/src/mango_native_proc.erl @@ -175,7 +175,7 @@ get_text_entries0(IdxProps, Doc) -> Fields = if not DefaultEnabled -> Fields0; true -> add_default_text_field(Fields0) end, - FieldNames = get_field_names(Fields, []), + FieldNames = get_field_names(Fields), Converted = convert_text_fields(Fields), FieldNames ++ Converted. @@ -257,15 +257,11 @@ add_default_text_field([_ | Rest], Acc) -> %% index of all field names -get_field_names([], FAcc) -> - FAcc; -get_field_names([{Name, _Type, _Value} | Rest], FAcc) -> - case lists:member([<<"$fieldnames">>, Name, []], FAcc) of - true -> - get_field_names(Rest, FAcc); - false -> - get_field_names(Rest, [[<<"$fieldnames">>, Name, []] | FAcc]) - end. +get_field_names(Fields) -> + FieldNameSet = lists:foldl(fun({Name, _, _}, Set) -> + gb_sets:add([<<"$fieldnames">>, Name, []], Set) + end, gb_sets:new(), Fields), + gb_sets:to_list(FieldNameSet). convert_text_fields([]) -> -- cgit v1.2.1