diff options
Diffstat (limited to 'src/mongo/db/fts/fts_spec_test.cpp')
-rw-r--r-- | src/mongo/db/fts/fts_spec_test.cpp | 1087 |
1 files changed, 552 insertions, 535 deletions
diff --git a/src/mongo/db/fts/fts_spec_test.cpp b/src/mongo/db/fts/fts_spec_test.cpp index 832279eb18d..c9f628a2b28 100644 --- a/src/mongo/db/fts/fts_spec_test.cpp +++ b/src/mongo/db/fts/fts_spec_test.cpp @@ -36,541 +36,558 @@ namespace mongo { - using std::set; - using std::string; - - namespace fts { - - /** - * Assert that fixSpec() accepts the provided text index spec. - */ - void assertFixSuccess( const std::string& s ) { - BSONObj user = fromjson( s ); - - try { - // fixSpec() should not throw on a valid spec. - BSONObj fixed = FTSSpec::fixSpec( user ); - - // fixSpec() on an already-fixed spec shouldn't change it. - BSONObj fixed2 = FTSSpec::fixSpec( fixed ); - ASSERT_EQUALS( fixed, fixed2 ); - } - catch ( UserException& ) { - ASSERT( false ); - } - } - - /** - * Assert that fixSpec() rejects the provided text index spec. - */ - void assertFixFailure( const std::string& s ) { - BSONObj user = fromjson( s ); - - try { - // fixSpec() on an invalid spec should uassert. - BSONObj fixed = FTSSpec::fixSpec( user ); - } - catch ( UserException& ) { - return; - } - ASSERT( false ); - } - - TEST( FTSSpec, FixNormalKey1 ) { - assertFixSuccess("{key: {a: 'text'}}"); - assertFixSuccess("{key: {a: 'text', b: 'text'}}"); - assertFixSuccess("{key: {a: 'text', b: 'text', c: 'text'}}"); - - assertFixFailure("{key: {_fts: 'text'}}"); // not allowed to index reserved field - assertFixFailure("{key: {_ftsx: 'text'}}"); - } - - TEST( FTSSpec, FixCompoundKey1 ) { - assertFixSuccess("{key: {a: 'text', b: 1.0}}"); - assertFixSuccess("{key: {a: 'text', b: NumberInt(1)}}"); - assertFixSuccess("{key: {a: 'text', b: NumberLong(1)}}"); - assertFixSuccess("{key: {a: 'text', b: -1.0}}"); - assertFixSuccess("{key: {a: 'text', b: NumberInt(-1)}}"); - assertFixSuccess("{key: {a: 'text', b: NumberLong(-1)}}"); - assertFixSuccess("{key: {a: 1.0, b: 'text'}}"); - assertFixSuccess("{key: {a: NumberInt(1), b: 'text'}}"); - assertFixSuccess("{key: {a: NumberLong(1), b: 'text'}}"); - assertFixSuccess("{key: {a: -1, b: 'text'}}"); - assertFixSuccess("{key: {a: 1, b: 1, c: 'text'}}"); - assertFixSuccess("{key: {a: 1, b: -1, c: 'text'}}"); - assertFixSuccess("{key: {a: -1, b: 1, c: 'text'}}"); - assertFixSuccess("{key: {a: 1, b: 'text', c: 1}}"); - assertFixSuccess("{key: {a: 'text', b: 1, c: 1}}"); - assertFixSuccess("{key: {a: 'text', b: 1, c: -1}}"); - assertFixSuccess("{key: {a: 'text', b: 'text', c: 1}}"); - assertFixSuccess("{key: {a: 1, b: 'text', c: 'text'}}"); - - assertFixFailure("{key: {a: 'text', b: 0}}"); - assertFixFailure("{key: {a: 'text', b: '2d'}}"); // not allowed to mix special indexes - assertFixFailure("{key: {a: 'text', b: '1'}}"); - assertFixFailure("{key: {a: 'text', _fts: 1}}"); - assertFixFailure("{key: {a: 'text', _fts: 'text'}}"); - assertFixFailure("{key: {a: 'text', _ftsx: 1}}"); - assertFixFailure("{key: {a: 'text', _ftsx: 'text'}}"); - assertFixFailure("{key: {_fts: 1, a: 'text'}}"); - assertFixFailure("{key: {_fts: 'text', a: 'text'}}"); - assertFixFailure("{key: {_ftsx: 1, a: 'text'}}"); - assertFixFailure("{key: {_ftsx: 'text', a: 'text'}}"); - assertFixFailure("{key: {a: 'text', b: 1, c: 'text'}}"); // 'text' must all be adjacent - assertFixFailure("{key: {a: 'text', b: 1, c: 'text', d: 1}}"); - assertFixFailure("{key: {a: 1, b: 'text', c: 1, d: 'text', e: 1}}"); - } - - TEST( FTSSpec, FixDefaultLanguage1 ) { - assertFixSuccess("{key: {a: 'text'}, default_language: 'english'}"); - assertFixSuccess("{key: {a: 'text'}, default_language: 'engLISH'}"); - assertFixSuccess("{key: {a: 'text'}, default_language: 'en'}"); - assertFixSuccess("{key: {a: 'text'}, default_language: 'eN'}"); - assertFixSuccess("{key: {a: 'text'}, default_language: 'spanish'}"); - assertFixSuccess("{key: {a: 'text'}, default_language: 'none'}"); - - assertFixFailure("{key: {a: 'text'}, default_language: 'engrish'}"); - assertFixFailure("{key: {a: 'text'}, default_language: ' english'}"); - assertFixFailure("{key: {a: 'text'}, default_language: ''}"); - } - - TEST( FTSSpec, FixWeights1 ) { - assertFixSuccess("{key: {a: 'text'}, weights: {}}"); - assertFixSuccess("{key: {a: 'text'}, weights: {a: 1.0}}"); - assertFixSuccess("{key: {a: 'text'}, weights: {a: NumberInt(1)}}"); - assertFixSuccess("{key: {a: 'text'}, weights: {a: NumberLong(1)}}"); - assertFixSuccess("{key: {a: 'text'}, weights: {a: 99999}}"); - assertFixSuccess("{key: {'$**': 'text'}, weights: {'a.b': 2}}"); - assertFixSuccess("{key: {'$**': 'text'}, weights: {a: 2, b: 2}}"); - assertFixSuccess("{key: {'$**': 'text'}, weights: {'$**': 2}}"); - - assertFixFailure("{key: {a: 'text'}, weights: 0}"); - assertFixFailure("{key: {a: 'text'}, weights: []}"); - assertFixFailure("{key: {a: 'text'}, weights: 'x'}"); - assertFixFailure("{key: {a: 'text'}, weights: {a: 0}}"); - assertFixFailure("{key: {a: 'text'}, weights: {a: -1}}"); - assertFixFailure("{key: {a: 'text'}, weights: {a: 100000}}"); // above max weight - assertFixFailure("{key: {a: 'text'}, weights: {a: '1'}}"); - assertFixFailure("{key: {a: 'text'}, weights: {'': 1}}"); // "invalid" path - assertFixFailure("{key: {a: 'text'}, weights: {'a.': 1}}"); - assertFixFailure("{key: {a: 'text'}, weights: {'.a': 1}}"); - assertFixFailure("{key: {a: 'text'}, weights: {'a..a': 1}}"); - assertFixFailure("{key: {a: 'text'}, weights: {$a: 1}}"); - assertFixFailure("{key: {a: 'text'}, weights: {'a.$a': 1}}"); - assertFixFailure("{key: {a: 'text'}, weights: {'a.$**': 1}}"); - } - - TEST( FTSSpec, FixLanguageOverride1 ) { - assertFixSuccess("{key: {a: 'text'}, language_override: 'foo'}"); - assertFixSuccess("{key: {a: 'text'}, language_override: 'foo$bar'}"); - - assertFixFailure("{key: {a: 'text'}, language_override: 'foo.bar'}"); // can't have '.' - assertFixFailure("{key: {a: 'text'}, language_override: ''}"); - assertFixFailure("{key: {a: 'text'}, language_override: '$foo'}"); - } - - TEST( FTSSpec, FixTextIndexVersion1 ) { - assertFixSuccess("{key: {a: 'text'}, textIndexVersion: 1.0}}"); - assertFixSuccess("{key: {a: 'text'}, textIndexVersion: NumberInt(1)}}"); - assertFixSuccess("{key: {a: 'text'}, textIndexVersion: NumberLong(1)}}"); - assertFixSuccess("{key: {a: 'text'}, textIndexVersion: 2.0}}"); - assertFixSuccess("{key: {a: 'text'}, textIndexVersion: NumberInt(2)}}"); - assertFixSuccess("{key: {a: 'text'}, textIndexVersion: NumberLong(2)}}"); - - assertFixFailure("{key: {a: 'text'}, textIndexVersion: 3}"); - assertFixFailure("{key: {a: 'text'}, textIndexVersion: '2'}"); - assertFixFailure("{key: {a: 'text'}, textIndexVersion: {}}"); - } - - TEST( FTSSpec, ScoreSingleField1 ) { - BSONObj user = BSON( "key" << BSON( "title" << "text" << - "text" << "text" ) << - "weights" << BSON( "title" << 10 ) ); - - FTSSpec spec( FTSSpec::fixSpec( user ) ); - - TermFrequencyMap m; - spec.scoreDocument( BSON( "title" << "cat sat run" ), &m ); - ASSERT_EQUALS( 3U, m.size() ); - ASSERT_EQUALS( m["cat"], m["sat"] ); - ASSERT_EQUALS( m["cat"], m["run"] ); - ASSERT( m["cat"] > 0 ); - } - - TEST( FTSSpec, ScoreMultipleField1 ) { - BSONObj user = BSON( "key" << BSON( "title" << "text" << - "text" << "text" ) << - "weights" << BSON( "title" << 10 ) ); - - FTSSpec spec( FTSSpec::fixSpec( user ) ); - - TermFrequencyMap m; - spec.scoreDocument( BSON( "title" << "cat sat run" << "text" << "cat book" ), &m ); - - ASSERT_EQUALS( 4U, m.size() ); - ASSERT_EQUALS( m["sat"], m["run"] ); - ASSERT( m["sat"] > 0 ); - - ASSERT( m["cat"] > m["sat"] ); - ASSERT( m["cat"] > m["book"] ); - ASSERT( m["book"] > 0 ); - ASSERT( m["book"] < m["sat"] ); - } - - TEST( FTSSpec, ScoreMultipleField2 ) { - // Test where one indexed field is a parent component of another indexed field. - BSONObj user = BSON( "key" << BSON( "a" << "text" << "a.b" << "text" ) ); - - FTSSpec spec( FTSSpec::fixSpec( user ) ); - - TermFrequencyMap m; - spec.scoreDocument( BSON( "a" << BSON( "b" << "term" ) ), &m ); - ASSERT_EQUALS( 1U, m.size() ); - } - - TEST( FTSSpec, ScoreRepeatWord ) { - BSONObj user = BSON( "key" << BSON( "title" << "text" << - "text" << "text" ) << - "weights" << BSON( "title" << 10 ) ); - - FTSSpec spec( FTSSpec::fixSpec( user ) ); - - TermFrequencyMap m; - spec.scoreDocument( BSON( "title" << "cat sat sat run run run" ), &m ); - ASSERT_EQUALS( 3U, m.size() ); - ASSERT( m["cat"] > 0 ); - ASSERT( m["sat"] > m["cat"] ); - ASSERT( m["run"] > m["sat"] ); - - } - - TEST( FTSSpec, Extra1 ) { - BSONObj user = BSON( "key" << BSON( "data" << "text" ) ); - FTSSpec spec( FTSSpec::fixSpec( user ) ); - ASSERT_EQUALS( 0U, spec.numExtraBefore() ); - ASSERT_EQUALS( 0U, spec.numExtraAfter() ); - } - - TEST( FTSSpec, Extra2 ) { - BSONObj user = BSON( "key" << BSON( "data" << "text" << "x" << 1 ) ); - BSONObj fixed = FTSSpec::fixSpec( user ); - FTSSpec spec( fixed ); - ASSERT_EQUALS( 0U, spec.numExtraBefore() ); - ASSERT_EQUALS( 1U, spec.numExtraAfter() ); - ASSERT_EQUALS( StringData("x"), spec.extraAfter(0) ); - - BSONObj fixed2 = FTSSpec::fixSpec( fixed ); - ASSERT_EQUALS( fixed, fixed2 ); - } - - TEST( FTSSpec, Extra3 ) { - BSONObj user = BSON( "key" << BSON( "x" << 1 << "data" << "text" ) ); - BSONObj fixed = FTSSpec::fixSpec( user ); - - ASSERT_EQUALS( BSON( "x" << 1 << - "_fts" << "text" << - "_ftsx" << 1 ), - fixed["key"].Obj() ); - ASSERT_EQUALS( BSON( "data" << 1 ), - fixed["weights"].Obj() ); - - BSONObj fixed2 = FTSSpec::fixSpec( fixed ); - ASSERT_EQUALS( fixed, fixed2 ); - - FTSSpec spec( fixed ); - ASSERT_EQUALS( 1U, spec.numExtraBefore() ); - ASSERT_EQUALS( StringData("x"), spec.extraBefore(0) ); - ASSERT_EQUALS( 0U, spec.numExtraAfter() ); - - BSONObj prefix; - - ASSERT( spec.getIndexPrefix( BSON( "x" << 2 ), &prefix ).isOK() ); - ASSERT_EQUALS( BSON( "x" << 2 ), prefix ); - - ASSERT( spec.getIndexPrefix( BSON( "x" << 3 << "y" << 4 ), &prefix ).isOK() ); - ASSERT_EQUALS( BSON( "x" << 3 ), prefix ); - - ASSERT( !spec.getIndexPrefix( BSON( "x" << BSON( "$gt" << 5 ) ), &prefix ).isOK() ); - ASSERT( !spec.getIndexPrefix( BSON( "y" << 4 ), &prefix ).isOK() ); - ASSERT( !spec.getIndexPrefix( BSONObj(), &prefix ).isOK() ); - } - - // Test for correct behavior when encountering nested arrays (both directly nested and - // indirectly nested). - - TEST( FTSSpec, NestedArraysPos1 ) { - BSONObj user = BSON( "key" << BSON( "a.b" << "text" ) ); - FTSSpec spec( FTSSpec::fixSpec( user ) ); - - // The following document matches {"a.b": {$type: 2}}, so "term" should be indexed. - BSONObj obj = fromjson("{a: [{b: ['term']}]}"); // indirectly nested arrays - TermFrequencyMap m; - spec.scoreDocument( obj, &m ); - ASSERT_EQUALS( 1U, m.size() ); - } - - TEST( FTSSpec, NestedArraysPos2 ) { - BSONObj user = BSON( "key" << BSON( "$**" << "text" ) ); - FTSSpec spec( FTSSpec::fixSpec( user ) ); - - // The wildcard spec implies a full recursive traversal, so "term" should be indexed. - BSONObj obj = fromjson("{a: {b: [['term']]}}"); // directly nested arrays - TermFrequencyMap m; - spec.scoreDocument( obj, &m ); - ASSERT_EQUALS( 1U, m.size() ); - } - - TEST( FTSSpec, NestedArraysNeg1 ) { - BSONObj user = BSON( "key" << BSON( "a.b" << "text" ) ); - FTSSpec spec( FTSSpec::fixSpec( user ) ); - - // The following document does not match {"a.b": {$type: 2}}, so "term" should not be - // indexed. - BSONObj obj = fromjson("{a: {b: [['term']]}}"); // directly nested arrays - TermFrequencyMap m; - spec.scoreDocument( obj, &m ); - ASSERT_EQUALS( 0U, m.size() ); - } - - // Multi-language test_1: test independent stemming per sub-document - TEST( FTSSpec, NestedLanguages_PerArrayItemStemming ) { - BSONObj indexSpec = BSON( "key" << BSON( "a.b.c" << "text" ) ); - FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); - TermFrequencyMap tfm; - - BSONObj obj = fromjson( - "{ a :" - " { b :" - " [ { c : \"walked\", language : \"english\" }," - " { c : \"camminato\", language : \"italian\" }," - " { c : \"ging\", language : \"german\" } ]" - " }" - " }" ); - - spec.scoreDocument( obj, &tfm ); - - set<string> hits; - hits.insert("walk"); - hits.insert("cammin"); - hits.insert("ging"); - - for (TermFrequencyMap::const_iterator i = tfm.begin(); i!=tfm.end(); ++i) { - string term = i->first; - ASSERT_EQUALS( 1U, hits.count( term ) ); - } - - } - - // Multi-language test_2: test nested stemming per sub-document - TEST( FTSSpec, NestedLanguages_PerSubdocStemming ) { - BSONObj indexSpec = BSON( "key" << BSON( "a.b.c" << "text" ) ); - FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); - TermFrequencyMap tfm; - - BSONObj obj = fromjson( - "{ language : \"english\"," - " a :" - " { language : \"danish\"," - " b :" - " [ { c : \"foredrag\" }," - " { c : \"foredragsholder\" }," - " { c : \"lector\" } ]" - " }" - "}" ); - - spec.scoreDocument( obj, &tfm ); - - set<string> hits; - hits.insert("foredrag"); - hits.insert("foredragshold"); - hits.insert("lector"); - - for (TermFrequencyMap::const_iterator i = tfm.begin(); i!=tfm.end(); ++i) { - string term = i->first; - ASSERT_EQUALS( 1U, hits.count( term ) ); - } - - } - - // Multi-language test_3: test nested arrays - TEST( FTSSpec, NestedLanguages_NestedArrays ) { - BSONObj indexSpec = BSON( "key" << BSON( "a.b.c" << "text" ) ); - FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); - TermFrequencyMap tfm; - - BSONObj obj = fromjson( - "{ language : \"english\"," - " a : [" - " { language : \"danish\"," - " b :" - " [ { c : [\"foredrag\"] }," - " { c : [\"foredragsholder\"] }," - " { c : [\"lector\"] } ]" - " } ]" - "}" ); - - spec.scoreDocument( obj, &tfm ); - - set<string> hits; - hits.insert("foredrag"); - hits.insert("foredragshold"); - hits.insert("lector"); - - for (TermFrequencyMap::const_iterator i = tfm.begin(); i!=tfm.end(); ++i) { - string term = i->first; - ASSERT_EQUALS( 1U, hits.count( term ) ); - } - - } - - // Multi-language test_4: test pruning - TEST( FTSSpec, NestedLanguages_PathPruning ) { - BSONObj indexSpec = BSON( "key" << BSON( "a.b.c" << "text" ) ); - FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); - TermFrequencyMap tfm; - - BSONObj obj = fromjson( - "{ language : \"english\"," - " a : " - " { language : \"danish\"," - " bc : \"foo\"," - " b : { d: \"bar\" }," - " b :" - " [ { c : \"foredrag\" }," - " { c : \"foredragsholder\" }," - " { c : \"lector\" } ]" - " }" - "}" ); - - spec.scoreDocument( obj, &tfm ); - - set<string> hits; - hits.insert("foredrag"); - hits.insert("foredragshold"); - hits.insert("lector"); - - for (TermFrequencyMap::const_iterator i = tfm.begin(); i!=tfm.end(); ++i) { - string term = i->first; - ASSERT_EQUALS( 1U, hits.count( term ) ); - } - - } - - // Multi-language test_5: test wildcard spec - TEST( FTSSpec, NestedLanguages_Wildcard ) { - BSONObj indexSpec = BSON( "key" << BSON( "$**" << "text" ) ); - FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); - TermFrequencyMap tfm; - - BSONObj obj = fromjson( - "{ language : \"english\"," - " b : \"walking\"," - " c : { e: \"walked\" }," - " d : " - " { language : \"danish\"," - " e :" - " [ { f : \"foredrag\" }," - " { f : \"foredragsholder\" }," - " { f : \"lector\" } ]" - " }" - "}" ); - - spec.scoreDocument( obj, &tfm ); - - set<string> hits; - hits.insert("foredrag"); - hits.insert("foredragshold"); - hits.insert("lector"); - hits.insert("walk"); - - for (TermFrequencyMap::const_iterator i = tfm.begin(); i!=tfm.end(); ++i) { - string term = i->first; - ASSERT_EQUALS( 1U, hits.count( term ) ); - } - - } - - // Multi-language test_6: test wildcard spec with override - TEST( FTSSpec, NestedLanguages_WildcardOverride ) { - BSONObj indexSpec = BSON( "key" << BSON( "$**" << "text" ) << - "weights" << BSON( "d.e.f" << 20 ) ); - FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); - TermFrequencyMap tfm; - - BSONObj obj = fromjson( - "{ language : \"english\"," - " b : \"walking\"," - " c : { e: \"walked\" }," - " d : " - " { language : \"danish\"," - " e :" - " [ { f : \"foredrag\" }," - " { f : \"foredragsholder\" }," - " { f : \"lector\" } ]" - " }" - "}" ); - - spec.scoreDocument( obj, &tfm ); - - set<string> hits; - hits.insert("foredrag"); - hits.insert("foredragshold"); - hits.insert("lector"); - hits.insert("walk"); - - for (TermFrequencyMap::const_iterator i = tfm.begin(); i!=tfm.end(); ++i) { - string term = i->first; - ASSERT_EQUALS( 1U, hits.count( term ) ); - } - - } - - /** Test differences across textIndexVersion values in handling of nested arrays. */ - TEST( FTSSpec, TextIndexLegacyNestedArrays ) { - BSONObj obj = fromjson( "{a: [{b: ['hello']}]}" ); - - // textIndexVersion=1 FTSSpec objects do not index nested arrays. - { - BSONObj indexSpec = fromjson( "{key: {'a.b': 'text'}, textIndexVersion: 1}" ); - FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); - TermFrequencyMap tfm; - spec.scoreDocument( obj, &tfm ); - ASSERT_EQUALS( tfm.size(), 0U ); - } - - // textIndexVersion=2 FTSSpec objects do index nested arrays. - { - BSONObj indexSpec = fromjson( "{key: {'a.b': 'text'}, textIndexVersion: 2}" ); - FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); - TermFrequencyMap tfm; - spec.scoreDocument( obj, &tfm ); - ASSERT_EQUALS( tfm.size(), 1U ); - } - } - - /** Test differences across textIndexVersion values in handling of language annotations. */ - TEST( FTSSpec, TextIndexLegacyLanguageRecognition) { - BSONObj obj = fromjson( "{a: 'the', language: 'EN'}" ); - - // textIndexVersion=1 FTSSpec objects treat two-letter language annotations as "none" - // for purposes of stopword processing. - { - BSONObj indexSpec = fromjson( "{key: {'a': 'text'}, textIndexVersion: 1}" ); - FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); - TermFrequencyMap tfm; - spec.scoreDocument( obj, &tfm ); - ASSERT_EQUALS( tfm.size(), 1U ); // "the" not recognized as stopword - } - - // textIndexVersion=2 FTSSpec objects recognize two-letter codes. - { - BSONObj indexSpec = fromjson( "{key: {'a': 'text'}, textIndexVersion: 2}" ); - FTSSpec spec( FTSSpec::fixSpec( indexSpec ) ); - TermFrequencyMap tfm; - spec.scoreDocument( obj, &tfm ); - ASSERT_EQUALS( tfm.size(), 0U ); // "the" recognized as stopword - } - } +using std::set; +using std::string; +namespace fts { + +/** + * Assert that fixSpec() accepts the provided text index spec. + */ +void assertFixSuccess(const std::string& s) { + BSONObj user = fromjson(s); + + try { + // fixSpec() should not throw on a valid spec. + BSONObj fixed = FTSSpec::fixSpec(user); + + // fixSpec() on an already-fixed spec shouldn't change it. + BSONObj fixed2 = FTSSpec::fixSpec(fixed); + ASSERT_EQUALS(fixed, fixed2); + } catch (UserException&) { + ASSERT(false); + } +} + +/** + * Assert that fixSpec() rejects the provided text index spec. + */ +void assertFixFailure(const std::string& s) { + BSONObj user = fromjson(s); + + try { + // fixSpec() on an invalid spec should uassert. + BSONObj fixed = FTSSpec::fixSpec(user); + } catch (UserException&) { + return; + } + ASSERT(false); +} + +TEST(FTSSpec, FixNormalKey1) { + assertFixSuccess("{key: {a: 'text'}}"); + assertFixSuccess("{key: {a: 'text', b: 'text'}}"); + assertFixSuccess("{key: {a: 'text', b: 'text', c: 'text'}}"); + + assertFixFailure("{key: {_fts: 'text'}}"); // not allowed to index reserved field + assertFixFailure("{key: {_ftsx: 'text'}}"); +} + +TEST(FTSSpec, FixCompoundKey1) { + assertFixSuccess("{key: {a: 'text', b: 1.0}}"); + assertFixSuccess("{key: {a: 'text', b: NumberInt(1)}}"); + assertFixSuccess("{key: {a: 'text', b: NumberLong(1)}}"); + assertFixSuccess("{key: {a: 'text', b: -1.0}}"); + assertFixSuccess("{key: {a: 'text', b: NumberInt(-1)}}"); + assertFixSuccess("{key: {a: 'text', b: NumberLong(-1)}}"); + assertFixSuccess("{key: {a: 1.0, b: 'text'}}"); + assertFixSuccess("{key: {a: NumberInt(1), b: 'text'}}"); + assertFixSuccess("{key: {a: NumberLong(1), b: 'text'}}"); + assertFixSuccess("{key: {a: -1, b: 'text'}}"); + assertFixSuccess("{key: {a: 1, b: 1, c: 'text'}}"); + assertFixSuccess("{key: {a: 1, b: -1, c: 'text'}}"); + assertFixSuccess("{key: {a: -1, b: 1, c: 'text'}}"); + assertFixSuccess("{key: {a: 1, b: 'text', c: 1}}"); + assertFixSuccess("{key: {a: 'text', b: 1, c: 1}}"); + assertFixSuccess("{key: {a: 'text', b: 1, c: -1}}"); + assertFixSuccess("{key: {a: 'text', b: 'text', c: 1}}"); + assertFixSuccess("{key: {a: 1, b: 'text', c: 'text'}}"); + + assertFixFailure("{key: {a: 'text', b: 0}}"); + assertFixFailure("{key: {a: 'text', b: '2d'}}"); // not allowed to mix special indexes + assertFixFailure("{key: {a: 'text', b: '1'}}"); + assertFixFailure("{key: {a: 'text', _fts: 1}}"); + assertFixFailure("{key: {a: 'text', _fts: 'text'}}"); + assertFixFailure("{key: {a: 'text', _ftsx: 1}}"); + assertFixFailure("{key: {a: 'text', _ftsx: 'text'}}"); + assertFixFailure("{key: {_fts: 1, a: 'text'}}"); + assertFixFailure("{key: {_fts: 'text', a: 'text'}}"); + assertFixFailure("{key: {_ftsx: 1, a: 'text'}}"); + assertFixFailure("{key: {_ftsx: 'text', a: 'text'}}"); + assertFixFailure("{key: {a: 'text', b: 1, c: 'text'}}"); // 'text' must all be adjacent + assertFixFailure("{key: {a: 'text', b: 1, c: 'text', d: 1}}"); + assertFixFailure("{key: {a: 1, b: 'text', c: 1, d: 'text', e: 1}}"); +} + +TEST(FTSSpec, FixDefaultLanguage1) { + assertFixSuccess("{key: {a: 'text'}, default_language: 'english'}"); + assertFixSuccess("{key: {a: 'text'}, default_language: 'engLISH'}"); + assertFixSuccess("{key: {a: 'text'}, default_language: 'en'}"); + assertFixSuccess("{key: {a: 'text'}, default_language: 'eN'}"); + assertFixSuccess("{key: {a: 'text'}, default_language: 'spanish'}"); + assertFixSuccess("{key: {a: 'text'}, default_language: 'none'}"); + + assertFixFailure("{key: {a: 'text'}, default_language: 'engrish'}"); + assertFixFailure("{key: {a: 'text'}, default_language: ' english'}"); + assertFixFailure("{key: {a: 'text'}, default_language: ''}"); +} + +TEST(FTSSpec, FixWeights1) { + assertFixSuccess("{key: {a: 'text'}, weights: {}}"); + assertFixSuccess("{key: {a: 'text'}, weights: {a: 1.0}}"); + assertFixSuccess("{key: {a: 'text'}, weights: {a: NumberInt(1)}}"); + assertFixSuccess("{key: {a: 'text'}, weights: {a: NumberLong(1)}}"); + assertFixSuccess("{key: {a: 'text'}, weights: {a: 99999}}"); + assertFixSuccess("{key: {'$**': 'text'}, weights: {'a.b': 2}}"); + assertFixSuccess("{key: {'$**': 'text'}, weights: {a: 2, b: 2}}"); + assertFixSuccess("{key: {'$**': 'text'}, weights: {'$**': 2}}"); + + assertFixFailure("{key: {a: 'text'}, weights: 0}"); + assertFixFailure("{key: {a: 'text'}, weights: []}"); + assertFixFailure("{key: {a: 'text'}, weights: 'x'}"); + assertFixFailure("{key: {a: 'text'}, weights: {a: 0}}"); + assertFixFailure("{key: {a: 'text'}, weights: {a: -1}}"); + assertFixFailure("{key: {a: 'text'}, weights: {a: 100000}}"); // above max weight + assertFixFailure("{key: {a: 'text'}, weights: {a: '1'}}"); + assertFixFailure("{key: {a: 'text'}, weights: {'': 1}}"); // "invalid" path + assertFixFailure("{key: {a: 'text'}, weights: {'a.': 1}}"); + assertFixFailure("{key: {a: 'text'}, weights: {'.a': 1}}"); + assertFixFailure("{key: {a: 'text'}, weights: {'a..a': 1}}"); + assertFixFailure("{key: {a: 'text'}, weights: {$a: 1}}"); + assertFixFailure("{key: {a: 'text'}, weights: {'a.$a': 1}}"); + assertFixFailure("{key: {a: 'text'}, weights: {'a.$**': 1}}"); +} + +TEST(FTSSpec, FixLanguageOverride1) { + assertFixSuccess("{key: {a: 'text'}, language_override: 'foo'}"); + assertFixSuccess("{key: {a: 'text'}, language_override: 'foo$bar'}"); + + assertFixFailure("{key: {a: 'text'}, language_override: 'foo.bar'}"); // can't have '.' + assertFixFailure("{key: {a: 'text'}, language_override: ''}"); + assertFixFailure("{key: {a: 'text'}, language_override: '$foo'}"); +} + +TEST(FTSSpec, FixTextIndexVersion1) { + assertFixSuccess("{key: {a: 'text'}, textIndexVersion: 1.0}}"); + assertFixSuccess("{key: {a: 'text'}, textIndexVersion: NumberInt(1)}}"); + assertFixSuccess("{key: {a: 'text'}, textIndexVersion: NumberLong(1)}}"); + assertFixSuccess("{key: {a: 'text'}, textIndexVersion: 2.0}}"); + assertFixSuccess("{key: {a: 'text'}, textIndexVersion: NumberInt(2)}}"); + assertFixSuccess("{key: {a: 'text'}, textIndexVersion: NumberLong(2)}}"); + + assertFixFailure("{key: {a: 'text'}, textIndexVersion: 3}"); + assertFixFailure("{key: {a: 'text'}, textIndexVersion: '2'}"); + assertFixFailure("{key: {a: 'text'}, textIndexVersion: {}}"); +} + +TEST(FTSSpec, ScoreSingleField1) { + BSONObj user = BSON("key" << BSON("title" + << "text" + << "text" + << "text") << "weights" << BSON("title" << 10)); + + FTSSpec spec(FTSSpec::fixSpec(user)); + + TermFrequencyMap m; + spec.scoreDocument(BSON("title" + << "cat sat run"), + &m); + ASSERT_EQUALS(3U, m.size()); + ASSERT_EQUALS(m["cat"], m["sat"]); + ASSERT_EQUALS(m["cat"], m["run"]); + ASSERT(m["cat"] > 0); +} + +TEST(FTSSpec, ScoreMultipleField1) { + BSONObj user = BSON("key" << BSON("title" + << "text" + << "text" + << "text") << "weights" << BSON("title" << 10)); + + FTSSpec spec(FTSSpec::fixSpec(user)); + + TermFrequencyMap m; + spec.scoreDocument(BSON("title" + << "cat sat run" + << "text" + << "cat book"), + &m); + + ASSERT_EQUALS(4U, m.size()); + ASSERT_EQUALS(m["sat"], m["run"]); + ASSERT(m["sat"] > 0); + + ASSERT(m["cat"] > m["sat"]); + ASSERT(m["cat"] > m["book"]); + ASSERT(m["book"] > 0); + ASSERT(m["book"] < m["sat"]); +} + +TEST(FTSSpec, ScoreMultipleField2) { + // Test where one indexed field is a parent component of another indexed field. + BSONObj user = BSON("key" << BSON("a" + << "text" + << "a.b" + << "text")); + + FTSSpec spec(FTSSpec::fixSpec(user)); + + TermFrequencyMap m; + spec.scoreDocument(BSON("a" << BSON("b" + << "term")), + &m); + ASSERT_EQUALS(1U, m.size()); +} + +TEST(FTSSpec, ScoreRepeatWord) { + BSONObj user = BSON("key" << BSON("title" + << "text" + << "text" + << "text") << "weights" << BSON("title" << 10)); + + FTSSpec spec(FTSSpec::fixSpec(user)); + + TermFrequencyMap m; + spec.scoreDocument(BSON("title" + << "cat sat sat run run run"), + &m); + ASSERT_EQUALS(3U, m.size()); + ASSERT(m["cat"] > 0); + ASSERT(m["sat"] > m["cat"]); + ASSERT(m["run"] > m["sat"]); +} + +TEST(FTSSpec, Extra1) { + BSONObj user = BSON("key" << BSON("data" + << "text")); + FTSSpec spec(FTSSpec::fixSpec(user)); + ASSERT_EQUALS(0U, spec.numExtraBefore()); + ASSERT_EQUALS(0U, spec.numExtraAfter()); +} + +TEST(FTSSpec, Extra2) { + BSONObj user = BSON("key" << BSON("data" + << "text" + << "x" << 1)); + BSONObj fixed = FTSSpec::fixSpec(user); + FTSSpec spec(fixed); + ASSERT_EQUALS(0U, spec.numExtraBefore()); + ASSERT_EQUALS(1U, spec.numExtraAfter()); + ASSERT_EQUALS(StringData("x"), spec.extraAfter(0)); + + BSONObj fixed2 = FTSSpec::fixSpec(fixed); + ASSERT_EQUALS(fixed, fixed2); +} + +TEST(FTSSpec, Extra3) { + BSONObj user = BSON("key" << BSON("x" << 1 << "data" + << "text")); + BSONObj fixed = FTSSpec::fixSpec(user); + + ASSERT_EQUALS(BSON("x" << 1 << "_fts" + << "text" + << "_ftsx" << 1), + fixed["key"].Obj()); + ASSERT_EQUALS(BSON("data" << 1), fixed["weights"].Obj()); + + BSONObj fixed2 = FTSSpec::fixSpec(fixed); + ASSERT_EQUALS(fixed, fixed2); + + FTSSpec spec(fixed); + ASSERT_EQUALS(1U, spec.numExtraBefore()); + ASSERT_EQUALS(StringData("x"), spec.extraBefore(0)); + ASSERT_EQUALS(0U, spec.numExtraAfter()); + + BSONObj prefix; + + ASSERT(spec.getIndexPrefix(BSON("x" << 2), &prefix).isOK()); + ASSERT_EQUALS(BSON("x" << 2), prefix); + + ASSERT(spec.getIndexPrefix(BSON("x" << 3 << "y" << 4), &prefix).isOK()); + ASSERT_EQUALS(BSON("x" << 3), prefix); + + ASSERT(!spec.getIndexPrefix(BSON("x" << BSON("$gt" << 5)), &prefix).isOK()); + ASSERT(!spec.getIndexPrefix(BSON("y" << 4), &prefix).isOK()); + ASSERT(!spec.getIndexPrefix(BSONObj(), &prefix).isOK()); +} + +// Test for correct behavior when encountering nested arrays (both directly nested and +// indirectly nested). + +TEST(FTSSpec, NestedArraysPos1) { + BSONObj user = BSON("key" << BSON("a.b" + << "text")); + FTSSpec spec(FTSSpec::fixSpec(user)); + + // The following document matches {"a.b": {$type: 2}}, so "term" should be indexed. + BSONObj obj = fromjson("{a: [{b: ['term']}]}"); // indirectly nested arrays + TermFrequencyMap m; + spec.scoreDocument(obj, &m); + ASSERT_EQUALS(1U, m.size()); +} + +TEST(FTSSpec, NestedArraysPos2) { + BSONObj user = BSON("key" << BSON("$**" + << "text")); + FTSSpec spec(FTSSpec::fixSpec(user)); + + // The wildcard spec implies a full recursive traversal, so "term" should be indexed. + BSONObj obj = fromjson("{a: {b: [['term']]}}"); // directly nested arrays + TermFrequencyMap m; + spec.scoreDocument(obj, &m); + ASSERT_EQUALS(1U, m.size()); +} + +TEST(FTSSpec, NestedArraysNeg1) { + BSONObj user = BSON("key" << BSON("a.b" + << "text")); + FTSSpec spec(FTSSpec::fixSpec(user)); + + // The following document does not match {"a.b": {$type: 2}}, so "term" should not be + // indexed. + BSONObj obj = fromjson("{a: {b: [['term']]}}"); // directly nested arrays + TermFrequencyMap m; + spec.scoreDocument(obj, &m); + ASSERT_EQUALS(0U, m.size()); +} + +// Multi-language test_1: test independent stemming per sub-document +TEST(FTSSpec, NestedLanguages_PerArrayItemStemming) { + BSONObj indexSpec = BSON("key" << BSON("a.b.c" + << "text")); + FTSSpec spec(FTSSpec::fixSpec(indexSpec)); + TermFrequencyMap tfm; + + BSONObj obj = fromjson( + "{ a :" + " { b :" + " [ { c : \"walked\", language : \"english\" }," + " { c : \"camminato\", language : \"italian\" }," + " { c : \"ging\", language : \"german\" } ]" + " }" + " }"); + + spec.scoreDocument(obj, &tfm); + + set<string> hits; + hits.insert("walk"); + hits.insert("cammin"); + hits.insert("ging"); + + for (TermFrequencyMap::const_iterator i = tfm.begin(); i != tfm.end(); ++i) { + string term = i->first; + ASSERT_EQUALS(1U, hits.count(term)); + } +} + +// Multi-language test_2: test nested stemming per sub-document +TEST(FTSSpec, NestedLanguages_PerSubdocStemming) { + BSONObj indexSpec = BSON("key" << BSON("a.b.c" + << "text")); + FTSSpec spec(FTSSpec::fixSpec(indexSpec)); + TermFrequencyMap tfm; + + BSONObj obj = fromjson( + "{ language : \"english\"," + " a :" + " { language : \"danish\"," + " b :" + " [ { c : \"foredrag\" }," + " { c : \"foredragsholder\" }," + " { c : \"lector\" } ]" + " }" + "}"); + + spec.scoreDocument(obj, &tfm); + + set<string> hits; + hits.insert("foredrag"); + hits.insert("foredragshold"); + hits.insert("lector"); + + for (TermFrequencyMap::const_iterator i = tfm.begin(); i != tfm.end(); ++i) { + string term = i->first; + ASSERT_EQUALS(1U, hits.count(term)); } } + +// Multi-language test_3: test nested arrays +TEST(FTSSpec, NestedLanguages_NestedArrays) { + BSONObj indexSpec = BSON("key" << BSON("a.b.c" + << "text")); + FTSSpec spec(FTSSpec::fixSpec(indexSpec)); + TermFrequencyMap tfm; + + BSONObj obj = fromjson( + "{ language : \"english\"," + " a : [" + " { language : \"danish\"," + " b :" + " [ { c : [\"foredrag\"] }," + " { c : [\"foredragsholder\"] }," + " { c : [\"lector\"] } ]" + " } ]" + "}"); + + spec.scoreDocument(obj, &tfm); + + set<string> hits; + hits.insert("foredrag"); + hits.insert("foredragshold"); + hits.insert("lector"); + + for (TermFrequencyMap::const_iterator i = tfm.begin(); i != tfm.end(); ++i) { + string term = i->first; + ASSERT_EQUALS(1U, hits.count(term)); + } +} + +// Multi-language test_4: test pruning +TEST(FTSSpec, NestedLanguages_PathPruning) { + BSONObj indexSpec = BSON("key" << BSON("a.b.c" + << "text")); + FTSSpec spec(FTSSpec::fixSpec(indexSpec)); + TermFrequencyMap tfm; + + BSONObj obj = fromjson( + "{ language : \"english\"," + " a : " + " { language : \"danish\"," + " bc : \"foo\"," + " b : { d: \"bar\" }," + " b :" + " [ { c : \"foredrag\" }," + " { c : \"foredragsholder\" }," + " { c : \"lector\" } ]" + " }" + "}"); + + spec.scoreDocument(obj, &tfm); + + set<string> hits; + hits.insert("foredrag"); + hits.insert("foredragshold"); + hits.insert("lector"); + + for (TermFrequencyMap::const_iterator i = tfm.begin(); i != tfm.end(); ++i) { + string term = i->first; + ASSERT_EQUALS(1U, hits.count(term)); + } +} + +// Multi-language test_5: test wildcard spec +TEST(FTSSpec, NestedLanguages_Wildcard) { + BSONObj indexSpec = BSON("key" << BSON("$**" + << "text")); + FTSSpec spec(FTSSpec::fixSpec(indexSpec)); + TermFrequencyMap tfm; + + BSONObj obj = fromjson( + "{ language : \"english\"," + " b : \"walking\"," + " c : { e: \"walked\" }," + " d : " + " { language : \"danish\"," + " e :" + " [ { f : \"foredrag\" }," + " { f : \"foredragsholder\" }," + " { f : \"lector\" } ]" + " }" + "}"); + + spec.scoreDocument(obj, &tfm); + + set<string> hits; + hits.insert("foredrag"); + hits.insert("foredragshold"); + hits.insert("lector"); + hits.insert("walk"); + + for (TermFrequencyMap::const_iterator i = tfm.begin(); i != tfm.end(); ++i) { + string term = i->first; + ASSERT_EQUALS(1U, hits.count(term)); + } +} + +// Multi-language test_6: test wildcard spec with override +TEST(FTSSpec, NestedLanguages_WildcardOverride) { + BSONObj indexSpec = BSON("key" << BSON("$**" + << "text") << "weights" << BSON("d.e.f" << 20)); + FTSSpec spec(FTSSpec::fixSpec(indexSpec)); + TermFrequencyMap tfm; + + BSONObj obj = fromjson( + "{ language : \"english\"," + " b : \"walking\"," + " c : { e: \"walked\" }," + " d : " + " { language : \"danish\"," + " e :" + " [ { f : \"foredrag\" }," + " { f : \"foredragsholder\" }," + " { f : \"lector\" } ]" + " }" + "}"); + + spec.scoreDocument(obj, &tfm); + + set<string> hits; + hits.insert("foredrag"); + hits.insert("foredragshold"); + hits.insert("lector"); + hits.insert("walk"); + + for (TermFrequencyMap::const_iterator i = tfm.begin(); i != tfm.end(); ++i) { + string term = i->first; + ASSERT_EQUALS(1U, hits.count(term)); + } +} + +/** Test differences across textIndexVersion values in handling of nested arrays. */ +TEST(FTSSpec, TextIndexLegacyNestedArrays) { + BSONObj obj = fromjson("{a: [{b: ['hello']}]}"); + + // textIndexVersion=1 FTSSpec objects do not index nested arrays. + { + BSONObj indexSpec = fromjson("{key: {'a.b': 'text'}, textIndexVersion: 1}"); + FTSSpec spec(FTSSpec::fixSpec(indexSpec)); + TermFrequencyMap tfm; + spec.scoreDocument(obj, &tfm); + ASSERT_EQUALS(tfm.size(), 0U); + } + + // textIndexVersion=2 FTSSpec objects do index nested arrays. + { + BSONObj indexSpec = fromjson("{key: {'a.b': 'text'}, textIndexVersion: 2}"); + FTSSpec spec(FTSSpec::fixSpec(indexSpec)); + TermFrequencyMap tfm; + spec.scoreDocument(obj, &tfm); + ASSERT_EQUALS(tfm.size(), 1U); + } +} + +/** Test differences across textIndexVersion values in handling of language annotations. */ +TEST(FTSSpec, TextIndexLegacyLanguageRecognition) { + BSONObj obj = fromjson("{a: 'the', language: 'EN'}"); + + // textIndexVersion=1 FTSSpec objects treat two-letter language annotations as "none" + // for purposes of stopword processing. + { + BSONObj indexSpec = fromjson("{key: {'a': 'text'}, textIndexVersion: 1}"); + FTSSpec spec(FTSSpec::fixSpec(indexSpec)); + TermFrequencyMap tfm; + spec.scoreDocument(obj, &tfm); + ASSERT_EQUALS(tfm.size(), 1U); // "the" not recognized as stopword + } + + // textIndexVersion=2 FTSSpec objects recognize two-letter codes. + { + BSONObj indexSpec = fromjson("{key: {'a': 'text'}, textIndexVersion: 2}"); + FTSSpec spec(FTSSpec::fixSpec(indexSpec)); + TermFrequencyMap tfm; + spec.scoreDocument(obj, &tfm); + ASSERT_EQUALS(tfm.size(), 0U); // "the" recognized as stopword + } +} +} +} |