diff options
Diffstat (limited to 'src/mongo/db/fts/fts_spec.cpp')
-rw-r--r-- | src/mongo/db/fts/fts_spec.cpp | 753 |
1 files changed, 352 insertions, 401 deletions
diff --git a/src/mongo/db/fts/fts_spec.cpp b/src/mongo/db/fts/fts_spec.cpp index 274d9a6d6ba..eb7e018b522 100644 --- a/src/mongo/db/fts/fts_spec.cpp +++ b/src/mongo/db/fts/fts_spec.cpp @@ -40,457 +40,408 @@ namespace mongo { - namespace fts { - - using std::map; - using std::string; - using namespace mongoutils; - - const double DEFAULT_WEIGHT = 1; - const double MAX_WEIGHT = 1000000000; - const double MAX_WORD_WEIGHT = MAX_WEIGHT / 10000; - - namespace { - // Default language. Used for new indexes. - const std::string moduleDefaultLanguage( "english" ); - - /** Validate the given language override string. */ - bool validateOverride( const string& override ) { - // The override field can't be empty, can't be prefixed with a dollar sign, and - // can't contain a dot. - return !override.empty() && - override[0] != '$' && - override.find('.') == std::string::npos; - } - } - - FTSSpec::FTSSpec( const BSONObj& indexInfo ) { - // indexInfo is a text index spec. Text index specs pass through fixSpec() before - // being saved to the system.indexes collection. fixSpec() enforces a schema, such that - // required fields must exist and be of the correct type (e.g. weights, - // textIndexVersion). - massert( 16739, "found invalid spec for text index", - indexInfo["weights"].isABSONObj() ); - BSONElement textIndexVersionElt = indexInfo["textIndexVersion"]; - massert( 17367, - "found invalid spec for text index, expected number for textIndexVersion", - textIndexVersionElt.isNumber() ); - - // We currently support TEXT_INDEX_VERSION_1 (deprecated) and TEXT_INDEX_VERSION_2. - // Reject all other values. - massert( 17364, - str::stream() << "attempt to use unsupported textIndexVersion " << - textIndexVersionElt.numberInt() << "; versions supported: " << - TEXT_INDEX_VERSION_2 << ", " << TEXT_INDEX_VERSION_1, - textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_2 || - textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_1 ); - - _textIndexVersion = ( textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_2 ) ? - TEXT_INDEX_VERSION_2 : TEXT_INDEX_VERSION_1; - - // Initialize _defaultLanguage. Note that the FTSLanguage constructor requires - // textIndexVersion, since language parsing is version-specific. - auto indexLanguage = indexInfo["default_language"].String(); - auto swl = FTSLanguage::make(indexLanguage , _textIndexVersion ); - - // This can fail if the user originally created the text index under an instance of - // MongoDB that supports different languages then the current instance - // TODO: consder propagating the index ns to here to improve the error message - uassert(28682, - str::stream() << "Unrecognized language " << indexLanguage << - " found for text index. Verify mongod was started with the" - " correct options.", - swl.getStatus().isOK()); - _defaultLanguage = swl.getValue(); - - _languageOverrideField = indexInfo["language_override"].valuestrsafe(); - - _wildcard = false; - - // in this block we fill in the _weights map - { - BSONObjIterator i( indexInfo["weights"].Obj() ); - while ( i.more() ) { - BSONElement e = i.next(); - verify( e.isNumber() ); - - if ( WILDCARD == e.fieldName() ) { - _wildcard = true; - } - else { - double num = e.number(); - _weights[ e.fieldName() ] = num; - verify( num > 0 && num < MAX_WORD_WEIGHT ); - } - } - verify( _wildcard || _weights.size() ); - } - - // extra information - { - BSONObj keyPattern = indexInfo["key"].Obj(); - verify( keyPattern.nFields() >= 2 ); - BSONObjIterator i( keyPattern ); +namespace fts { - bool passedFTS = false; +using std::map; +using std::string; +using namespace mongoutils; - while ( i.more() ) { - BSONElement e = i.next(); - if ( str::equals( e.fieldName(), "_fts" ) || - str::equals( e.fieldName(), "_ftsx" ) ) { - passedFTS = true; - continue; - } +const double DEFAULT_WEIGHT = 1; +const double MAX_WEIGHT = 1000000000; +const double MAX_WORD_WEIGHT = MAX_WEIGHT / 10000; - if ( passedFTS ) - _extraAfter.push_back( e.fieldName() ); - else - _extraBefore.push_back( e.fieldName() ); - } +namespace { +// Default language. Used for new indexes. +const std::string moduleDefaultLanguage("english"); - } - } +/** Validate the given language override string. */ +bool validateOverride(const string& override) { + // The override field can't be empty, can't be prefixed with a dollar sign, and + // can't contain a dot. + return !override.empty() && override[0] != '$' && override.find('.') == std::string::npos; +} +} - const FTSLanguage* FTSSpec::_getLanguageToUseV2( const BSONObj& userDoc, - const FTSLanguage* currentLanguage ) const { - BSONElement e = userDoc[_languageOverrideField]; - if ( e.eoo() ) { - return currentLanguage; +FTSSpec::FTSSpec(const BSONObj& indexInfo) { + // indexInfo is a text index spec. Text index specs pass through fixSpec() before + // being saved to the system.indexes collection. fixSpec() enforces a schema, such that + // required fields must exist and be of the correct type (e.g. weights, + // textIndexVersion). + massert(16739, "found invalid spec for text index", indexInfo["weights"].isABSONObj()); + BSONElement textIndexVersionElt = indexInfo["textIndexVersion"]; + massert(17367, + "found invalid spec for text index, expected number for textIndexVersion", + textIndexVersionElt.isNumber()); + + // We currently support TEXT_INDEX_VERSION_1 (deprecated) and TEXT_INDEX_VERSION_2. + // Reject all other values. + massert(17364, + str::stream() << "attempt to use unsupported textIndexVersion " + << textIndexVersionElt.numberInt() << "; versions supported: " + << TEXT_INDEX_VERSION_2 << ", " << TEXT_INDEX_VERSION_1, + textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_2 || + textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_1); + + _textIndexVersion = (textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_2) + ? TEXT_INDEX_VERSION_2 + : TEXT_INDEX_VERSION_1; + + // Initialize _defaultLanguage. Note that the FTSLanguage constructor requires + // textIndexVersion, since language parsing is version-specific. + auto indexLanguage = indexInfo["default_language"].String(); + auto swl = FTSLanguage::make(indexLanguage, _textIndexVersion); + + // This can fail if the user originally created the text index under an instance of + // MongoDB that supports different languages then the current instance + // TODO: consder propagating the index ns to here to improve the error message + uassert(28682, + str::stream() << "Unrecognized language " << indexLanguage + << " found for text index. Verify mongod was started with the" + " correct options.", + swl.getStatus().isOK()); + _defaultLanguage = swl.getValue(); + + _languageOverrideField = indexInfo["language_override"].valuestrsafe(); + + _wildcard = false; + + // in this block we fill in the _weights map + { + BSONObjIterator i(indexInfo["weights"].Obj()); + while (i.more()) { + BSONElement e = i.next(); + verify(e.isNumber()); + + if (WILDCARD == e.fieldName()) { + _wildcard = true; + } else { + double num = e.number(); + _weights[e.fieldName()] = num; + verify(num > 0 && num < MAX_WORD_WEIGHT); } - uassert( 17261, - "found language override field in document with non-string type", - e.type() == mongo::String ); - StatusWithFTSLanguage swl = FTSLanguage::make( e.String(), TEXT_INDEX_VERSION_2 ); - uassert( 17262, - "language override unsupported: " + e.String(), - swl.getStatus().isOK() ); - return swl.getValue(); } + verify(_wildcard || _weights.size()); + } - void FTSSpec::scoreDocument( const BSONObj& obj, TermFrequencyMap* term_freqs ) const { - if ( _textIndexVersion == TEXT_INDEX_VERSION_1 ) { - return _scoreDocumentV1( obj, term_freqs ); - } + // extra information + { + BSONObj keyPattern = indexInfo["key"].Obj(); + verify(keyPattern.nFields() >= 2); + BSONObjIterator i(keyPattern); - FTSElementIterator it( *this, obj ); + bool passedFTS = false; - while ( it.more() ) { - FTSIteratorValue val = it.next(); - std::unique_ptr<FTSTokenizer> tokenizer(val._language->createTokenizer()); - _scoreStringV2( tokenizer.get(), val._text, term_freqs, val._weight ); + while (i.more()) { + BSONElement e = i.next(); + if (str::equals(e.fieldName(), "_fts") || str::equals(e.fieldName(), "_ftsx")) { + passedFTS = true; + continue; } + + if (passedFTS) + _extraAfter.push_back(e.fieldName()); + else + _extraBefore.push_back(e.fieldName()); } + } +} - void FTSSpec::_scoreStringV2( FTSTokenizer* tokenizer, - StringData raw, - TermFrequencyMap* docScores, - double weight ) const { +const FTSLanguage* FTSSpec::_getLanguageToUseV2(const BSONObj& userDoc, + const FTSLanguage* currentLanguage) const { + BSONElement e = userDoc[_languageOverrideField]; + if (e.eoo()) { + return currentLanguage; + } + uassert(17261, + "found language override field in document with non-string type", + e.type() == mongo::String); + StatusWithFTSLanguage swl = FTSLanguage::make(e.String(), TEXT_INDEX_VERSION_2); + uassert(17262, "language override unsupported: " + e.String(), swl.getStatus().isOK()); + return swl.getValue(); +} - ScoreHelperMap terms; +void FTSSpec::scoreDocument(const BSONObj& obj, TermFrequencyMap* term_freqs) const { + if (_textIndexVersion == TEXT_INDEX_VERSION_1) { + return _scoreDocumentV1(obj, term_freqs); + } - unsigned numTokens = 0; + FTSElementIterator it(*this, obj); - tokenizer->reset(raw.rawData(), FTSTokenizer::FilterStopWords ); + while (it.more()) { + FTSIteratorValue val = it.next(); + std::unique_ptr<FTSTokenizer> tokenizer(val._language->createTokenizer()); + _scoreStringV2(tokenizer.get(), val._text, term_freqs, val._weight); + } +} - while (tokenizer->moveNext()) { - string term = tokenizer->get().toString(); +void FTSSpec::_scoreStringV2(FTSTokenizer* tokenizer, + StringData raw, + TermFrequencyMap* docScores, + double weight) const { + ScoreHelperMap terms; - ScoreHelperStruct& data = terms[term]; + unsigned numTokens = 0; - if ( data.exp ) { - data.exp *= 2; - } - else { - data.exp = 1; - } - data.count += 1; - data.freq += ( 1 / data.exp ); - numTokens++; - } + tokenizer->reset(raw.rawData(), FTSTokenizer::FilterStopWords); - for ( ScoreHelperMap::const_iterator i = terms.begin(); i != terms.end(); ++i ) { + while (tokenizer->moveNext()) { + string term = tokenizer->get().toString(); - const string& term = i->first; - const ScoreHelperStruct& data = i->second; + ScoreHelperStruct& data = terms[term]; - // in order to adjust weights as a function of term count as it - // relates to total field length. ie. is this the only word or - // a frequently occuring term? or does it only show up once in - // a long block of text? + if (data.exp) { + data.exp *= 2; + } else { + data.exp = 1; + } + data.count += 1; + data.freq += (1 / data.exp); + numTokens++; + } - double coeff = ( 0.5 * data.count / numTokens ) + 0.5; + for (ScoreHelperMap::const_iterator i = terms.begin(); i != terms.end(); ++i) { + const string& term = i->first; + const ScoreHelperStruct& data = i->second; - // if term is identical to the raw form of the - // field (untokenized) give it a small boost. - double adjustment = 1; - if ( raw.size() == term.length() && raw.equalCaseInsensitive( term ) ) - adjustment += 0.1; + // in order to adjust weights as a function of term count as it + // relates to total field length. ie. is this the only word or + // a frequently occuring term? or does it only show up once in + // a long block of text? - double& score = (*docScores)[term]; - score += ( weight * data.freq * coeff * adjustment ); - verify( score <= MAX_WEIGHT ); - } - } + double coeff = (0.5 * data.count / numTokens) + 0.5; - Status FTSSpec::getIndexPrefix( const BSONObj& query, BSONObj* out ) const { - if ( numExtraBefore() == 0 ) { - *out = BSONObj(); - return Status::OK(); - } + // if term is identical to the raw form of the + // field (untokenized) give it a small boost. + double adjustment = 1; + if (raw.size() == term.length() && raw.equalCaseInsensitive(term)) + adjustment += 0.1; - BSONObjBuilder b; - for ( unsigned i = 0; i < numExtraBefore(); i++ ) { - BSONElement e = query.getFieldDotted(extraBefore(i)); - if ( e.eoo() ) - return Status( ErrorCodes::BadValue, - str::stream() - << "need have an equality filter on: " - << extraBefore(i) ); - - if ( e.isABSONObj() && e.Obj().firstElement().getGtLtOp( -1 ) != -1 ) - return Status( ErrorCodes::BadValue, - str::stream() - << "need have an equality filter on: " - << extraBefore(i) ); - - b.append( e ); - } - *out = b.obj(); - return Status::OK(); - } + double& score = (*docScores)[term]; + score += (weight * data.freq * coeff * adjustment); + verify(score <= MAX_WEIGHT); + } +} - namespace { - void _addFTSStuff( BSONObjBuilder* b ) { - b->append( "_fts", INDEX_NAME ); - b->append( "_ftsx", 1 ); - } +Status FTSSpec::getIndexPrefix(const BSONObj& query, BSONObj* out) const { + if (numExtraBefore() == 0) { + *out = BSONObj(); + return Status::OK(); + } - void verifyFieldNameNotReserved( StringData s ) { - uassert( 17289, - "text index with reserved fields _fts/_ftsx not allowed", - s != "_fts" && s != "_ftsx" ); - } - } + BSONObjBuilder b; + for (unsigned i = 0; i < numExtraBefore(); i++) { + BSONElement e = query.getFieldDotted(extraBefore(i)); + if (e.eoo()) + return Status(ErrorCodes::BadValue, + str::stream() << "need have an equality filter on: " << extraBefore(i)); - BSONObj FTSSpec::fixSpec( const BSONObj& spec ) { - if ( spec["textIndexVersion"].numberInt() == TEXT_INDEX_VERSION_1 ) { - return _fixSpecV1( spec ); - } + if (e.isABSONObj() && e.Obj().firstElement().getGtLtOp(-1) != -1) + return Status(ErrorCodes::BadValue, + str::stream() << "need have an equality filter on: " << extraBefore(i)); - map<string,int> m; - - BSONObj keyPattern; - { - BSONObjBuilder b; - - // Populate m and keyPattern. - { - bool addedFtsStuff = false; - BSONObjIterator i( spec["key"].Obj() ); - while ( i.more() ) { - BSONElement e = i.next(); - if ( str::equals( e.fieldName(), "_fts" ) ) { - uassert( 17271, - "expecting _fts:\"text\"", - INDEX_NAME == e.valuestrsafe() ); - addedFtsStuff = true; - b.append( e ); - } - else if ( str::equals( e.fieldName(), "_ftsx" ) ) { - uassert( 17272, "expecting _ftsx:1", e.numberInt() == 1 ); - b.append( e ); - } - else if ( e.type() == String && INDEX_NAME == e.valuestr() ) { - - if ( !addedFtsStuff ) { - _addFTSStuff( &b ); - addedFtsStuff = true; - } - - m[e.fieldName()] = 1; - } - else { - uassert( 17273, - "expected value 1 or -1 for non-text key in compound index", - e.numberInt() == 1 || e.numberInt() == -1 ); - b.append( e ); - } - } - verify( addedFtsStuff ); - } - keyPattern = b.obj(); - - // Verify that index key is in the correct format: extraBefore fields, then text - // fields, then extraAfter fields. - { - BSONObjIterator i( spec["key"].Obj() ); - verify( i.more() ); - BSONElement e = i.next(); - - // extraBefore fields - while ( String != e.type() ) { - verifyFieldNameNotReserved( e.fieldNameStringData() ); - verify( i.more() ); - e = i.next(); - } + b.append(e); + } + *out = b.obj(); + return Status::OK(); +} - // text fields - bool alreadyFixed = str::equals( e.fieldName(), "_fts" ); - if ( alreadyFixed ) { - uassert( 17288, "expected _ftsx after _fts", i.more() ); - e = i.next(); - uassert( 17274, - "expected _ftsx after _fts", - str::equals( e.fieldName(), "_ftsx" ) ); - e = i.next(); - } - else { - do { - verifyFieldNameNotReserved( e.fieldNameStringData() ); - e = i.next(); - } while ( !e.eoo() && e.type() == String ); - } +namespace { +void _addFTSStuff(BSONObjBuilder* b) { + b->append("_fts", INDEX_NAME); + b->append("_ftsx", 1); +} - // extraAfterFields - while ( !e.eoo() ) { - uassert( 17389, - "'text' fields in index must all be adjacent", - e.type() != String ); - verifyFieldNameNotReserved( e.fieldNameStringData() ); - e = i.next(); - } - } +void verifyFieldNameNotReserved(StringData s) { + uassert(17289, + "text index with reserved fields _fts/_ftsx not allowed", + s != "_fts" && s != "_ftsx"); +} +} - } +BSONObj FTSSpec::fixSpec(const BSONObj& spec) { + if (spec["textIndexVersion"].numberInt() == TEXT_INDEX_VERSION_1) { + return _fixSpecV1(spec); + } - if ( spec["weights"].type() == Object ) { - BSONObjIterator i( spec["weights"].Obj() ); - while ( i.more() ) { - BSONElement e = i.next(); - uassert( 17283, - "weight for text index needs numeric type", - e.isNumber() ); - m[e.fieldName()] = e.numberInt(); - } - } - else if ( spec["weights"].str() == WILDCARD ) { - m[WILDCARD] = 1; - } - else if ( !spec["weights"].eoo() ) { - uasserted( 17284, "text index option 'weights' must be an object" ); - } + map<string, int> m; - BSONObj weights; - { - BSONObjBuilder b; - for ( map<string,int>::iterator i = m.begin(); i != m.end(); ++i ) { - uassert( 16674, "score for word too high", - i->second > 0 && i->second < MAX_WORD_WEIGHT ); - - // Verify weight refers to a valid field. - if ( i->first != "$**" ) { - FieldRef keyField( i->first ); - uassert( 17294, - "weight cannot be on an empty field", - keyField.numParts() != 0 ); - for ( size_t partNum = 0; partNum < keyField.numParts(); partNum++ ) { - StringData part = keyField.getPart(partNum); - uassert( 17291, - "weight cannot have empty path component", - !part.empty() ); - uassert( 17292, - "weight cannot have path component with $ prefix", - !part.startsWith( "$" ) ); - } + BSONObj keyPattern; + { + BSONObjBuilder b; + + // Populate m and keyPattern. + { + bool addedFtsStuff = false; + BSONObjIterator i(spec["key"].Obj()); + while (i.more()) { + BSONElement e = i.next(); + if (str::equals(e.fieldName(), "_fts")) { + uassert(17271, "expecting _fts:\"text\"", INDEX_NAME == e.valuestrsafe()); + addedFtsStuff = true; + b.append(e); + } else if (str::equals(e.fieldName(), "_ftsx")) { + uassert(17272, "expecting _ftsx:1", e.numberInt() == 1); + b.append(e); + } else if (e.type() == String && INDEX_NAME == e.valuestr()) { + if (!addedFtsStuff) { + _addFTSStuff(&b); + addedFtsStuff = true; } - b.append( i->first, i->second ); + m[e.fieldName()] = 1; + } else { + uassert(17273, + "expected value 1 or -1 for non-text key in compound index", + e.numberInt() == 1 || e.numberInt() == -1); + b.append(e); } - weights = b.obj(); - } - - BSONElement default_language_elt = spec["default_language"]; - string default_language( default_language_elt.str() ); - if ( default_language_elt.eoo() ) { - default_language = moduleDefaultLanguage; } - else { - uassert( 17263, - "default_language needs a string type", - default_language_elt.type() == String ); + verify(addedFtsStuff); + } + keyPattern = b.obj(); + + // Verify that index key is in the correct format: extraBefore fields, then text + // fields, then extraAfter fields. + { + BSONObjIterator i(spec["key"].Obj()); + verify(i.more()); + BSONElement e = i.next(); + + // extraBefore fields + while (String != e.type()) { + verifyFieldNameNotReserved(e.fieldNameStringData()); + verify(i.more()); + e = i.next(); } - uassert( 17264, - "default_language is not valid", - FTSLanguage::make( default_language, - TEXT_INDEX_VERSION_2 ).getStatus().isOK() ); - - BSONElement language_override_elt = spec["language_override"]; - string language_override( language_override_elt.str() ); - if ( language_override_elt.eoo() ) { - language_override = "language"; + + // text fields + bool alreadyFixed = str::equals(e.fieldName(), "_fts"); + if (alreadyFixed) { + uassert(17288, "expected _ftsx after _fts", i.more()); + e = i.next(); + uassert(17274, "expected _ftsx after _fts", str::equals(e.fieldName(), "_ftsx")); + e = i.next(); + } else { + do { + verifyFieldNameNotReserved(e.fieldNameStringData()); + e = i.next(); + } while (!e.eoo() && e.type() == String); } - else { - uassert( 17136, - "language_override is not valid", - language_override_elt.type() == String - && validateOverride( language_override ) ); + + // extraAfterFields + while (!e.eoo()) { + uassert(17389, "'text' fields in index must all be adjacent", e.type() != String); + verifyFieldNameNotReserved(e.fieldNameStringData()); + e = i.next(); } + } + } - int version = -1; - int textIndexVersion = TEXT_INDEX_VERSION_2; + if (spec["weights"].type() == Object) { + BSONObjIterator i(spec["weights"].Obj()); + while (i.more()) { + BSONElement e = i.next(); + uassert(17283, "weight for text index needs numeric type", e.isNumber()); + m[e.fieldName()] = e.numberInt(); + } + } else if (spec["weights"].str() == WILDCARD) { + m[WILDCARD] = 1; + } else if (!spec["weights"].eoo()) { + uasserted(17284, "text index option 'weights' must be an object"); + } - BSONObjBuilder b; - BSONObjIterator i( spec ); - while ( i.more() ) { - BSONElement e = i.next(); - if ( str::equals( e.fieldName(), "key" ) ) { - b.append( "key", keyPattern ); - } - else if ( str::equals( e.fieldName(), "weights" ) ) { - b.append( "weights", weights ); - weights = BSONObj(); - } - else if ( str::equals( e.fieldName(), "default_language" ) ) { - b.append( "default_language", default_language); - default_language = ""; - } - else if ( str::equals( e.fieldName(), "language_override" ) ) { - b.append( "language_override", language_override); - language_override = ""; - } - else if ( str::equals( e.fieldName(), "v" ) ) { - version = e.numberInt(); - } - else if ( str::equals( e.fieldName(), "textIndexVersion" ) ) { - uassert( 17293, - "text index option 'textIndexVersion' must be a number", - e.isNumber() ); - textIndexVersion = e.numberInt(); - uassert( 16730, - str::stream() << "bad textIndexVersion: " << textIndexVersion, - textIndexVersion == TEXT_INDEX_VERSION_2 ); - } - else { - b.append( e ); + BSONObj weights; + { + BSONObjBuilder b; + for (map<string, int>::iterator i = m.begin(); i != m.end(); ++i) { + uassert(16674, "score for word too high", i->second > 0 && i->second < MAX_WORD_WEIGHT); + + // Verify weight refers to a valid field. + if (i->first != "$**") { + FieldRef keyField(i->first); + uassert(17294, "weight cannot be on an empty field", keyField.numParts() != 0); + for (size_t partNum = 0; partNum < keyField.numParts(); partNum++) { + StringData part = keyField.getPart(partNum); + uassert(17291, "weight cannot have empty path component", !part.empty()); + uassert(17292, + "weight cannot have path component with $ prefix", + !part.startsWith("$")); } } - if ( !weights.isEmpty() ) { - b.append( "weights", weights ); - } - if ( !default_language.empty() ) { - b.append( "default_language", default_language); - } - if ( !language_override.empty() ) { - b.append( "language_override", language_override); - } - if ( version >= 0 ) { - b.append( "v", version ); - } - b.append( "textIndexVersion", textIndexVersion ); + b.append(i->first, i->second); + } + weights = b.obj(); + } + + BSONElement default_language_elt = spec["default_language"]; + string default_language(default_language_elt.str()); + if (default_language_elt.eoo()) { + default_language = moduleDefaultLanguage; + } else { + uassert( + 17263, "default_language needs a string type", default_language_elt.type() == String); + } + uassert(17264, + "default_language is not valid", + FTSLanguage::make(default_language, TEXT_INDEX_VERSION_2).getStatus().isOK()); + + BSONElement language_override_elt = spec["language_override"]; + string language_override(language_override_elt.str()); + if (language_override_elt.eoo()) { + language_override = "language"; + } else { + uassert(17136, + "language_override is not valid", + language_override_elt.type() == String && validateOverride(language_override)); + } - return b.obj(); + int version = -1; + int textIndexVersion = TEXT_INDEX_VERSION_2; + + BSONObjBuilder b; + BSONObjIterator i(spec); + while (i.more()) { + BSONElement e = i.next(); + if (str::equals(e.fieldName(), "key")) { + b.append("key", keyPattern); + } else if (str::equals(e.fieldName(), "weights")) { + b.append("weights", weights); + weights = BSONObj(); + } else if (str::equals(e.fieldName(), "default_language")) { + b.append("default_language", default_language); + default_language = ""; + } else if (str::equals(e.fieldName(), "language_override")) { + b.append("language_override", language_override); + language_override = ""; + } else if (str::equals(e.fieldName(), "v")) { + version = e.numberInt(); + } else if (str::equals(e.fieldName(), "textIndexVersion")) { + uassert(17293, "text index option 'textIndexVersion' must be a number", e.isNumber()); + textIndexVersion = e.numberInt(); + uassert(16730, + str::stream() << "bad textIndexVersion: " << textIndexVersion, + textIndexVersion == TEXT_INDEX_VERSION_2); + } else { + b.append(e); } + } + if (!weights.isEmpty()) { + b.append("weights", weights); + } + if (!default_language.empty()) { + b.append("default_language", default_language); } + if (!language_override.empty()) { + b.append("language_override", language_override); + } + if (version >= 0) { + b.append("v", version); + } + b.append("textIndexVersion", textIndexVersion); + + return b.obj(); +} +} } |