summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/fts_spec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/fts/fts_spec.cpp')
-rw-r--r--src/mongo/db/fts/fts_spec.cpp753
1 files changed, 352 insertions, 401 deletions
diff --git a/src/mongo/db/fts/fts_spec.cpp b/src/mongo/db/fts/fts_spec.cpp
index 274d9a6d6ba..eb7e018b522 100644
--- a/src/mongo/db/fts/fts_spec.cpp
+++ b/src/mongo/db/fts/fts_spec.cpp
@@ -40,457 +40,408 @@
namespace mongo {
- namespace fts {
-
- using std::map;
- using std::string;
- using namespace mongoutils;
-
- const double DEFAULT_WEIGHT = 1;
- const double MAX_WEIGHT = 1000000000;
- const double MAX_WORD_WEIGHT = MAX_WEIGHT / 10000;
-
- namespace {
- // Default language. Used for new indexes.
- const std::string moduleDefaultLanguage( "english" );
-
- /** Validate the given language override string. */
- bool validateOverride( const string& override ) {
- // The override field can't be empty, can't be prefixed with a dollar sign, and
- // can't contain a dot.
- return !override.empty() &&
- override[0] != '$' &&
- override.find('.') == std::string::npos;
- }
- }
-
- FTSSpec::FTSSpec( const BSONObj& indexInfo ) {
- // indexInfo is a text index spec. Text index specs pass through fixSpec() before
- // being saved to the system.indexes collection. fixSpec() enforces a schema, such that
- // required fields must exist and be of the correct type (e.g. weights,
- // textIndexVersion).
- massert( 16739, "found invalid spec for text index",
- indexInfo["weights"].isABSONObj() );
- BSONElement textIndexVersionElt = indexInfo["textIndexVersion"];
- massert( 17367,
- "found invalid spec for text index, expected number for textIndexVersion",
- textIndexVersionElt.isNumber() );
-
- // We currently support TEXT_INDEX_VERSION_1 (deprecated) and TEXT_INDEX_VERSION_2.
- // Reject all other values.
- massert( 17364,
- str::stream() << "attempt to use unsupported textIndexVersion " <<
- textIndexVersionElt.numberInt() << "; versions supported: " <<
- TEXT_INDEX_VERSION_2 << ", " << TEXT_INDEX_VERSION_1,
- textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_2 ||
- textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_1 );
-
- _textIndexVersion = ( textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_2 ) ?
- TEXT_INDEX_VERSION_2 : TEXT_INDEX_VERSION_1;
-
- // Initialize _defaultLanguage. Note that the FTSLanguage constructor requires
- // textIndexVersion, since language parsing is version-specific.
- auto indexLanguage = indexInfo["default_language"].String();
- auto swl = FTSLanguage::make(indexLanguage , _textIndexVersion );
-
- // This can fail if the user originally created the text index under an instance of
- // MongoDB that supports different languages then the current instance
- // TODO: consder propagating the index ns to here to improve the error message
- uassert(28682,
- str::stream() << "Unrecognized language " << indexLanguage <<
- " found for text index. Verify mongod was started with the"
- " correct options.",
- swl.getStatus().isOK());
- _defaultLanguage = swl.getValue();
-
- _languageOverrideField = indexInfo["language_override"].valuestrsafe();
-
- _wildcard = false;
-
- // in this block we fill in the _weights map
- {
- BSONObjIterator i( indexInfo["weights"].Obj() );
- while ( i.more() ) {
- BSONElement e = i.next();
- verify( e.isNumber() );
-
- if ( WILDCARD == e.fieldName() ) {
- _wildcard = true;
- }
- else {
- double num = e.number();
- _weights[ e.fieldName() ] = num;
- verify( num > 0 && num < MAX_WORD_WEIGHT );
- }
- }
- verify( _wildcard || _weights.size() );
- }
-
- // extra information
- {
- BSONObj keyPattern = indexInfo["key"].Obj();
- verify( keyPattern.nFields() >= 2 );
- BSONObjIterator i( keyPattern );
+namespace fts {
- bool passedFTS = false;
+using std::map;
+using std::string;
+using namespace mongoutils;
- while ( i.more() ) {
- BSONElement e = i.next();
- if ( str::equals( e.fieldName(), "_fts" ) ||
- str::equals( e.fieldName(), "_ftsx" ) ) {
- passedFTS = true;
- continue;
- }
+const double DEFAULT_WEIGHT = 1;
+const double MAX_WEIGHT = 1000000000;
+const double MAX_WORD_WEIGHT = MAX_WEIGHT / 10000;
- if ( passedFTS )
- _extraAfter.push_back( e.fieldName() );
- else
- _extraBefore.push_back( e.fieldName() );
- }
+namespace {
+// Default language. Used for new indexes.
+const std::string moduleDefaultLanguage("english");
- }
- }
+/** Validate the given language override string. */
+bool validateOverride(const string& override) {
+ // The override field can't be empty, can't be prefixed with a dollar sign, and
+ // can't contain a dot.
+ return !override.empty() && override[0] != '$' && override.find('.') == std::string::npos;
+}
+}
- const FTSLanguage* FTSSpec::_getLanguageToUseV2( const BSONObj& userDoc,
- const FTSLanguage* currentLanguage ) const {
- BSONElement e = userDoc[_languageOverrideField];
- if ( e.eoo() ) {
- return currentLanguage;
+FTSSpec::FTSSpec(const BSONObj& indexInfo) {
+ // indexInfo is a text index spec. Text index specs pass through fixSpec() before
+ // being saved to the system.indexes collection. fixSpec() enforces a schema, such that
+ // required fields must exist and be of the correct type (e.g. weights,
+ // textIndexVersion).
+ massert(16739, "found invalid spec for text index", indexInfo["weights"].isABSONObj());
+ BSONElement textIndexVersionElt = indexInfo["textIndexVersion"];
+ massert(17367,
+ "found invalid spec for text index, expected number for textIndexVersion",
+ textIndexVersionElt.isNumber());
+
+ // We currently support TEXT_INDEX_VERSION_1 (deprecated) and TEXT_INDEX_VERSION_2.
+ // Reject all other values.
+ massert(17364,
+ str::stream() << "attempt to use unsupported textIndexVersion "
+ << textIndexVersionElt.numberInt() << "; versions supported: "
+ << TEXT_INDEX_VERSION_2 << ", " << TEXT_INDEX_VERSION_1,
+ textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_2 ||
+ textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_1);
+
+ _textIndexVersion = (textIndexVersionElt.numberInt() == TEXT_INDEX_VERSION_2)
+ ? TEXT_INDEX_VERSION_2
+ : TEXT_INDEX_VERSION_1;
+
+ // Initialize _defaultLanguage. Note that the FTSLanguage constructor requires
+ // textIndexVersion, since language parsing is version-specific.
+ auto indexLanguage = indexInfo["default_language"].String();
+ auto swl = FTSLanguage::make(indexLanguage, _textIndexVersion);
+
+ // This can fail if the user originally created the text index under an instance of
+ // MongoDB that supports different languages then the current instance
+ // TODO: consder propagating the index ns to here to improve the error message
+ uassert(28682,
+ str::stream() << "Unrecognized language " << indexLanguage
+ << " found for text index. Verify mongod was started with the"
+ " correct options.",
+ swl.getStatus().isOK());
+ _defaultLanguage = swl.getValue();
+
+ _languageOverrideField = indexInfo["language_override"].valuestrsafe();
+
+ _wildcard = false;
+
+ // in this block we fill in the _weights map
+ {
+ BSONObjIterator i(indexInfo["weights"].Obj());
+ while (i.more()) {
+ BSONElement e = i.next();
+ verify(e.isNumber());
+
+ if (WILDCARD == e.fieldName()) {
+ _wildcard = true;
+ } else {
+ double num = e.number();
+ _weights[e.fieldName()] = num;
+ verify(num > 0 && num < MAX_WORD_WEIGHT);
}
- uassert( 17261,
- "found language override field in document with non-string type",
- e.type() == mongo::String );
- StatusWithFTSLanguage swl = FTSLanguage::make( e.String(), TEXT_INDEX_VERSION_2 );
- uassert( 17262,
- "language override unsupported: " + e.String(),
- swl.getStatus().isOK() );
- return swl.getValue();
}
+ verify(_wildcard || _weights.size());
+ }
- void FTSSpec::scoreDocument( const BSONObj& obj, TermFrequencyMap* term_freqs ) const {
- if ( _textIndexVersion == TEXT_INDEX_VERSION_1 ) {
- return _scoreDocumentV1( obj, term_freqs );
- }
+ // extra information
+ {
+ BSONObj keyPattern = indexInfo["key"].Obj();
+ verify(keyPattern.nFields() >= 2);
+ BSONObjIterator i(keyPattern);
- FTSElementIterator it( *this, obj );
+ bool passedFTS = false;
- while ( it.more() ) {
- FTSIteratorValue val = it.next();
- std::unique_ptr<FTSTokenizer> tokenizer(val._language->createTokenizer());
- _scoreStringV2( tokenizer.get(), val._text, term_freqs, val._weight );
+ while (i.more()) {
+ BSONElement e = i.next();
+ if (str::equals(e.fieldName(), "_fts") || str::equals(e.fieldName(), "_ftsx")) {
+ passedFTS = true;
+ continue;
}
+
+ if (passedFTS)
+ _extraAfter.push_back(e.fieldName());
+ else
+ _extraBefore.push_back(e.fieldName());
}
+ }
+}
- void FTSSpec::_scoreStringV2( FTSTokenizer* tokenizer,
- StringData raw,
- TermFrequencyMap* docScores,
- double weight ) const {
+const FTSLanguage* FTSSpec::_getLanguageToUseV2(const BSONObj& userDoc,
+ const FTSLanguage* currentLanguage) const {
+ BSONElement e = userDoc[_languageOverrideField];
+ if (e.eoo()) {
+ return currentLanguage;
+ }
+ uassert(17261,
+ "found language override field in document with non-string type",
+ e.type() == mongo::String);
+ StatusWithFTSLanguage swl = FTSLanguage::make(e.String(), TEXT_INDEX_VERSION_2);
+ uassert(17262, "language override unsupported: " + e.String(), swl.getStatus().isOK());
+ return swl.getValue();
+}
- ScoreHelperMap terms;
+void FTSSpec::scoreDocument(const BSONObj& obj, TermFrequencyMap* term_freqs) const {
+ if (_textIndexVersion == TEXT_INDEX_VERSION_1) {
+ return _scoreDocumentV1(obj, term_freqs);
+ }
- unsigned numTokens = 0;
+ FTSElementIterator it(*this, obj);
- tokenizer->reset(raw.rawData(), FTSTokenizer::FilterStopWords );
+ while (it.more()) {
+ FTSIteratorValue val = it.next();
+ std::unique_ptr<FTSTokenizer> tokenizer(val._language->createTokenizer());
+ _scoreStringV2(tokenizer.get(), val._text, term_freqs, val._weight);
+ }
+}
- while (tokenizer->moveNext()) {
- string term = tokenizer->get().toString();
+void FTSSpec::_scoreStringV2(FTSTokenizer* tokenizer,
+ StringData raw,
+ TermFrequencyMap* docScores,
+ double weight) const {
+ ScoreHelperMap terms;
- ScoreHelperStruct& data = terms[term];
+ unsigned numTokens = 0;
- if ( data.exp ) {
- data.exp *= 2;
- }
- else {
- data.exp = 1;
- }
- data.count += 1;
- data.freq += ( 1 / data.exp );
- numTokens++;
- }
+ tokenizer->reset(raw.rawData(), FTSTokenizer::FilterStopWords);
- for ( ScoreHelperMap::const_iterator i = terms.begin(); i != terms.end(); ++i ) {
+ while (tokenizer->moveNext()) {
+ string term = tokenizer->get().toString();
- const string& term = i->first;
- const ScoreHelperStruct& data = i->second;
+ ScoreHelperStruct& data = terms[term];
- // in order to adjust weights as a function of term count as it
- // relates to total field length. ie. is this the only word or
- // a frequently occuring term? or does it only show up once in
- // a long block of text?
+ if (data.exp) {
+ data.exp *= 2;
+ } else {
+ data.exp = 1;
+ }
+ data.count += 1;
+ data.freq += (1 / data.exp);
+ numTokens++;
+ }
- double coeff = ( 0.5 * data.count / numTokens ) + 0.5;
+ for (ScoreHelperMap::const_iterator i = terms.begin(); i != terms.end(); ++i) {
+ const string& term = i->first;
+ const ScoreHelperStruct& data = i->second;
- // if term is identical to the raw form of the
- // field (untokenized) give it a small boost.
- double adjustment = 1;
- if ( raw.size() == term.length() && raw.equalCaseInsensitive( term ) )
- adjustment += 0.1;
+ // in order to adjust weights as a function of term count as it
+ // relates to total field length. ie. is this the only word or
+ // a frequently occuring term? or does it only show up once in
+ // a long block of text?
- double& score = (*docScores)[term];
- score += ( weight * data.freq * coeff * adjustment );
- verify( score <= MAX_WEIGHT );
- }
- }
+ double coeff = (0.5 * data.count / numTokens) + 0.5;
- Status FTSSpec::getIndexPrefix( const BSONObj& query, BSONObj* out ) const {
- if ( numExtraBefore() == 0 ) {
- *out = BSONObj();
- return Status::OK();
- }
+ // if term is identical to the raw form of the
+ // field (untokenized) give it a small boost.
+ double adjustment = 1;
+ if (raw.size() == term.length() && raw.equalCaseInsensitive(term))
+ adjustment += 0.1;
- BSONObjBuilder b;
- for ( unsigned i = 0; i < numExtraBefore(); i++ ) {
- BSONElement e = query.getFieldDotted(extraBefore(i));
- if ( e.eoo() )
- return Status( ErrorCodes::BadValue,
- str::stream()
- << "need have an equality filter on: "
- << extraBefore(i) );
-
- if ( e.isABSONObj() && e.Obj().firstElement().getGtLtOp( -1 ) != -1 )
- return Status( ErrorCodes::BadValue,
- str::stream()
- << "need have an equality filter on: "
- << extraBefore(i) );
-
- b.append( e );
- }
- *out = b.obj();
- return Status::OK();
- }
+ double& score = (*docScores)[term];
+ score += (weight * data.freq * coeff * adjustment);
+ verify(score <= MAX_WEIGHT);
+ }
+}
- namespace {
- void _addFTSStuff( BSONObjBuilder* b ) {
- b->append( "_fts", INDEX_NAME );
- b->append( "_ftsx", 1 );
- }
+Status FTSSpec::getIndexPrefix(const BSONObj& query, BSONObj* out) const {
+ if (numExtraBefore() == 0) {
+ *out = BSONObj();
+ return Status::OK();
+ }
- void verifyFieldNameNotReserved( StringData s ) {
- uassert( 17289,
- "text index with reserved fields _fts/_ftsx not allowed",
- s != "_fts" && s != "_ftsx" );
- }
- }
+ BSONObjBuilder b;
+ for (unsigned i = 0; i < numExtraBefore(); i++) {
+ BSONElement e = query.getFieldDotted(extraBefore(i));
+ if (e.eoo())
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "need have an equality filter on: " << extraBefore(i));
- BSONObj FTSSpec::fixSpec( const BSONObj& spec ) {
- if ( spec["textIndexVersion"].numberInt() == TEXT_INDEX_VERSION_1 ) {
- return _fixSpecV1( spec );
- }
+ if (e.isABSONObj() && e.Obj().firstElement().getGtLtOp(-1) != -1)
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "need have an equality filter on: " << extraBefore(i));
- map<string,int> m;
-
- BSONObj keyPattern;
- {
- BSONObjBuilder b;
-
- // Populate m and keyPattern.
- {
- bool addedFtsStuff = false;
- BSONObjIterator i( spec["key"].Obj() );
- while ( i.more() ) {
- BSONElement e = i.next();
- if ( str::equals( e.fieldName(), "_fts" ) ) {
- uassert( 17271,
- "expecting _fts:\"text\"",
- INDEX_NAME == e.valuestrsafe() );
- addedFtsStuff = true;
- b.append( e );
- }
- else if ( str::equals( e.fieldName(), "_ftsx" ) ) {
- uassert( 17272, "expecting _ftsx:1", e.numberInt() == 1 );
- b.append( e );
- }
- else if ( e.type() == String && INDEX_NAME == e.valuestr() ) {
-
- if ( !addedFtsStuff ) {
- _addFTSStuff( &b );
- addedFtsStuff = true;
- }
-
- m[e.fieldName()] = 1;
- }
- else {
- uassert( 17273,
- "expected value 1 or -1 for non-text key in compound index",
- e.numberInt() == 1 || e.numberInt() == -1 );
- b.append( e );
- }
- }
- verify( addedFtsStuff );
- }
- keyPattern = b.obj();
-
- // Verify that index key is in the correct format: extraBefore fields, then text
- // fields, then extraAfter fields.
- {
- BSONObjIterator i( spec["key"].Obj() );
- verify( i.more() );
- BSONElement e = i.next();
-
- // extraBefore fields
- while ( String != e.type() ) {
- verifyFieldNameNotReserved( e.fieldNameStringData() );
- verify( i.more() );
- e = i.next();
- }
+ b.append(e);
+ }
+ *out = b.obj();
+ return Status::OK();
+}
- // text fields
- bool alreadyFixed = str::equals( e.fieldName(), "_fts" );
- if ( alreadyFixed ) {
- uassert( 17288, "expected _ftsx after _fts", i.more() );
- e = i.next();
- uassert( 17274,
- "expected _ftsx after _fts",
- str::equals( e.fieldName(), "_ftsx" ) );
- e = i.next();
- }
- else {
- do {
- verifyFieldNameNotReserved( e.fieldNameStringData() );
- e = i.next();
- } while ( !e.eoo() && e.type() == String );
- }
+namespace {
+void _addFTSStuff(BSONObjBuilder* b) {
+ b->append("_fts", INDEX_NAME);
+ b->append("_ftsx", 1);
+}
- // extraAfterFields
- while ( !e.eoo() ) {
- uassert( 17389,
- "'text' fields in index must all be adjacent",
- e.type() != String );
- verifyFieldNameNotReserved( e.fieldNameStringData() );
- e = i.next();
- }
- }
+void verifyFieldNameNotReserved(StringData s) {
+ uassert(17289,
+ "text index with reserved fields _fts/_ftsx not allowed",
+ s != "_fts" && s != "_ftsx");
+}
+}
- }
+BSONObj FTSSpec::fixSpec(const BSONObj& spec) {
+ if (spec["textIndexVersion"].numberInt() == TEXT_INDEX_VERSION_1) {
+ return _fixSpecV1(spec);
+ }
- if ( spec["weights"].type() == Object ) {
- BSONObjIterator i( spec["weights"].Obj() );
- while ( i.more() ) {
- BSONElement e = i.next();
- uassert( 17283,
- "weight for text index needs numeric type",
- e.isNumber() );
- m[e.fieldName()] = e.numberInt();
- }
- }
- else if ( spec["weights"].str() == WILDCARD ) {
- m[WILDCARD] = 1;
- }
- else if ( !spec["weights"].eoo() ) {
- uasserted( 17284, "text index option 'weights' must be an object" );
- }
+ map<string, int> m;
- BSONObj weights;
- {
- BSONObjBuilder b;
- for ( map<string,int>::iterator i = m.begin(); i != m.end(); ++i ) {
- uassert( 16674, "score for word too high",
- i->second > 0 && i->second < MAX_WORD_WEIGHT );
-
- // Verify weight refers to a valid field.
- if ( i->first != "$**" ) {
- FieldRef keyField( i->first );
- uassert( 17294,
- "weight cannot be on an empty field",
- keyField.numParts() != 0 );
- for ( size_t partNum = 0; partNum < keyField.numParts(); partNum++ ) {
- StringData part = keyField.getPart(partNum);
- uassert( 17291,
- "weight cannot have empty path component",
- !part.empty() );
- uassert( 17292,
- "weight cannot have path component with $ prefix",
- !part.startsWith( "$" ) );
- }
+ BSONObj keyPattern;
+ {
+ BSONObjBuilder b;
+
+ // Populate m and keyPattern.
+ {
+ bool addedFtsStuff = false;
+ BSONObjIterator i(spec["key"].Obj());
+ while (i.more()) {
+ BSONElement e = i.next();
+ if (str::equals(e.fieldName(), "_fts")) {
+ uassert(17271, "expecting _fts:\"text\"", INDEX_NAME == e.valuestrsafe());
+ addedFtsStuff = true;
+ b.append(e);
+ } else if (str::equals(e.fieldName(), "_ftsx")) {
+ uassert(17272, "expecting _ftsx:1", e.numberInt() == 1);
+ b.append(e);
+ } else if (e.type() == String && INDEX_NAME == e.valuestr()) {
+ if (!addedFtsStuff) {
+ _addFTSStuff(&b);
+ addedFtsStuff = true;
}
- b.append( i->first, i->second );
+ m[e.fieldName()] = 1;
+ } else {
+ uassert(17273,
+ "expected value 1 or -1 for non-text key in compound index",
+ e.numberInt() == 1 || e.numberInt() == -1);
+ b.append(e);
}
- weights = b.obj();
- }
-
- BSONElement default_language_elt = spec["default_language"];
- string default_language( default_language_elt.str() );
- if ( default_language_elt.eoo() ) {
- default_language = moduleDefaultLanguage;
}
- else {
- uassert( 17263,
- "default_language needs a string type",
- default_language_elt.type() == String );
+ verify(addedFtsStuff);
+ }
+ keyPattern = b.obj();
+
+ // Verify that index key is in the correct format: extraBefore fields, then text
+ // fields, then extraAfter fields.
+ {
+ BSONObjIterator i(spec["key"].Obj());
+ verify(i.more());
+ BSONElement e = i.next();
+
+ // extraBefore fields
+ while (String != e.type()) {
+ verifyFieldNameNotReserved(e.fieldNameStringData());
+ verify(i.more());
+ e = i.next();
}
- uassert( 17264,
- "default_language is not valid",
- FTSLanguage::make( default_language,
- TEXT_INDEX_VERSION_2 ).getStatus().isOK() );
-
- BSONElement language_override_elt = spec["language_override"];
- string language_override( language_override_elt.str() );
- if ( language_override_elt.eoo() ) {
- language_override = "language";
+
+ // text fields
+ bool alreadyFixed = str::equals(e.fieldName(), "_fts");
+ if (alreadyFixed) {
+ uassert(17288, "expected _ftsx after _fts", i.more());
+ e = i.next();
+ uassert(17274, "expected _ftsx after _fts", str::equals(e.fieldName(), "_ftsx"));
+ e = i.next();
+ } else {
+ do {
+ verifyFieldNameNotReserved(e.fieldNameStringData());
+ e = i.next();
+ } while (!e.eoo() && e.type() == String);
}
- else {
- uassert( 17136,
- "language_override is not valid",
- language_override_elt.type() == String
- && validateOverride( language_override ) );
+
+ // extraAfterFields
+ while (!e.eoo()) {
+ uassert(17389, "'text' fields in index must all be adjacent", e.type() != String);
+ verifyFieldNameNotReserved(e.fieldNameStringData());
+ e = i.next();
}
+ }
+ }
- int version = -1;
- int textIndexVersion = TEXT_INDEX_VERSION_2;
+ if (spec["weights"].type() == Object) {
+ BSONObjIterator i(spec["weights"].Obj());
+ while (i.more()) {
+ BSONElement e = i.next();
+ uassert(17283, "weight for text index needs numeric type", e.isNumber());
+ m[e.fieldName()] = e.numberInt();
+ }
+ } else if (spec["weights"].str() == WILDCARD) {
+ m[WILDCARD] = 1;
+ } else if (!spec["weights"].eoo()) {
+ uasserted(17284, "text index option 'weights' must be an object");
+ }
- BSONObjBuilder b;
- BSONObjIterator i( spec );
- while ( i.more() ) {
- BSONElement e = i.next();
- if ( str::equals( e.fieldName(), "key" ) ) {
- b.append( "key", keyPattern );
- }
- else if ( str::equals( e.fieldName(), "weights" ) ) {
- b.append( "weights", weights );
- weights = BSONObj();
- }
- else if ( str::equals( e.fieldName(), "default_language" ) ) {
- b.append( "default_language", default_language);
- default_language = "";
- }
- else if ( str::equals( e.fieldName(), "language_override" ) ) {
- b.append( "language_override", language_override);
- language_override = "";
- }
- else if ( str::equals( e.fieldName(), "v" ) ) {
- version = e.numberInt();
- }
- else if ( str::equals( e.fieldName(), "textIndexVersion" ) ) {
- uassert( 17293,
- "text index option 'textIndexVersion' must be a number",
- e.isNumber() );
- textIndexVersion = e.numberInt();
- uassert( 16730,
- str::stream() << "bad textIndexVersion: " << textIndexVersion,
- textIndexVersion == TEXT_INDEX_VERSION_2 );
- }
- else {
- b.append( e );
+ BSONObj weights;
+ {
+ BSONObjBuilder b;
+ for (map<string, int>::iterator i = m.begin(); i != m.end(); ++i) {
+ uassert(16674, "score for word too high", i->second > 0 && i->second < MAX_WORD_WEIGHT);
+
+ // Verify weight refers to a valid field.
+ if (i->first != "$**") {
+ FieldRef keyField(i->first);
+ uassert(17294, "weight cannot be on an empty field", keyField.numParts() != 0);
+ for (size_t partNum = 0; partNum < keyField.numParts(); partNum++) {
+ StringData part = keyField.getPart(partNum);
+ uassert(17291, "weight cannot have empty path component", !part.empty());
+ uassert(17292,
+ "weight cannot have path component with $ prefix",
+ !part.startsWith("$"));
}
}
- if ( !weights.isEmpty() ) {
- b.append( "weights", weights );
- }
- if ( !default_language.empty() ) {
- b.append( "default_language", default_language);
- }
- if ( !language_override.empty() ) {
- b.append( "language_override", language_override);
- }
- if ( version >= 0 ) {
- b.append( "v", version );
- }
- b.append( "textIndexVersion", textIndexVersion );
+ b.append(i->first, i->second);
+ }
+ weights = b.obj();
+ }
+
+ BSONElement default_language_elt = spec["default_language"];
+ string default_language(default_language_elt.str());
+ if (default_language_elt.eoo()) {
+ default_language = moduleDefaultLanguage;
+ } else {
+ uassert(
+ 17263, "default_language needs a string type", default_language_elt.type() == String);
+ }
+ uassert(17264,
+ "default_language is not valid",
+ FTSLanguage::make(default_language, TEXT_INDEX_VERSION_2).getStatus().isOK());
+
+ BSONElement language_override_elt = spec["language_override"];
+ string language_override(language_override_elt.str());
+ if (language_override_elt.eoo()) {
+ language_override = "language";
+ } else {
+ uassert(17136,
+ "language_override is not valid",
+ language_override_elt.type() == String && validateOverride(language_override));
+ }
- return b.obj();
+ int version = -1;
+ int textIndexVersion = TEXT_INDEX_VERSION_2;
+
+ BSONObjBuilder b;
+ BSONObjIterator i(spec);
+ while (i.more()) {
+ BSONElement e = i.next();
+ if (str::equals(e.fieldName(), "key")) {
+ b.append("key", keyPattern);
+ } else if (str::equals(e.fieldName(), "weights")) {
+ b.append("weights", weights);
+ weights = BSONObj();
+ } else if (str::equals(e.fieldName(), "default_language")) {
+ b.append("default_language", default_language);
+ default_language = "";
+ } else if (str::equals(e.fieldName(), "language_override")) {
+ b.append("language_override", language_override);
+ language_override = "";
+ } else if (str::equals(e.fieldName(), "v")) {
+ version = e.numberInt();
+ } else if (str::equals(e.fieldName(), "textIndexVersion")) {
+ uassert(17293, "text index option 'textIndexVersion' must be a number", e.isNumber());
+ textIndexVersion = e.numberInt();
+ uassert(16730,
+ str::stream() << "bad textIndexVersion: " << textIndexVersion,
+ textIndexVersion == TEXT_INDEX_VERSION_2);
+ } else {
+ b.append(e);
}
+ }
+ if (!weights.isEmpty()) {
+ b.append("weights", weights);
+ }
+ if (!default_language.empty()) {
+ b.append("default_language", default_language);
}
+ if (!language_override.empty()) {
+ b.append("language_override", language_override);
+ }
+ if (version >= 0) {
+ b.append("v", version);
+ }
+ b.append("textIndexVersion", textIndexVersion);
+
+ return b.obj();
+}
+}
}