diff options
author | Charlie Swanson <charlie.swanson@mongodb.com> | 2016-02-02 17:47:20 -0500 |
---|---|---|
committer | Charlie Swanson <charlie.swanson@mongodb.com> | 2016-02-04 19:38:27 -0500 |
commit | 1ddd9162e8fd7de3b54053093698ef16abce8aa9 (patch) | |
tree | 3216136a15a9a7398db21dde85b14aef0c8f881c /src/mongo/db/fts | |
parent | ebc481d27e408d8608075a61eba2a68be43c1314 (diff) | |
download | mongo-1ddd9162e8fd7de3b54053093698ef16abce8aa9.tar.gz |
SERVER-20792 Only negate term if directly proceeded by hyphen.
For example, the term "documentation" should be negated in the text
query "MongoDB -documentation", but should not be negated in the text
query "MongoDB - documentation".
Diffstat (limited to 'src/mongo/db/fts')
-rw-r--r-- | src/mongo/db/fts/fts_query_impl.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_query_impl.h | 17 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_query_impl_test.cpp | 99 |
3 files changed, 130 insertions, 7 deletions
diff --git a/src/mongo/db/fts/fts_query_impl.cpp b/src/mongo/db/fts/fts_query_impl.cpp index 674c8e84ab2..2c4b5016f90 100644 --- a/src/mongo/db/fts/fts_query_impl.cpp +++ b/src/mongo/db/fts/fts_query_impl.cpp @@ -75,6 +75,13 @@ Status FTSQueryImpl::parse(TextIndexVersion textIndexVersion) { if (inPhrase && inNegation) { // don't add term } else { + // A negation should only continue until the next whitespace character. For example, + // "-foo" should negate "foo", "- foo" should not negate "foo", and "-foo-bar" + // should negate both "foo" and "bar". + if (inNegation && t.previousWhiteSpace) { + inNegation = false; + } + if (inNegation) { negativeTermSentence.append(s); negativeTermSentence.push_back(' '); @@ -83,9 +90,6 @@ Status FTSQueryImpl::parse(TextIndexVersion textIndexVersion) { positiveTermSentence.push_back(' '); } } - - if (inNegation && !inPhrase) - inNegation = false; } else if (t.type == QueryToken::DELIMITER) { char c = t.data[0]; if (c == '-') { @@ -105,11 +109,20 @@ Status FTSQueryImpl::parse(TextIndexVersion textIndexVersion) { } else { _positivePhrases.push_back(phrase.toString()); } - inNegation = false; + + // Do not reset 'inNegation' here, since a negation should continue until the + // next whitespace character. For example, '-"foo bar"-"baz quux"' should negate + // both the phrase "foo bar" and the phrase "baz quux". + inPhrase = false; } else { // start of a phrase inPhrase = true; + // A "-" should only be treated as a negation if there is no whitespace between + // the "-" and the start of the phrase. + if (inNegation && t.previousWhiteSpace) { + inNegation = false; + } quoteOffset = t.offset; } } diff --git a/src/mongo/db/fts/fts_query_impl.h b/src/mongo/db/fts/fts_query_impl.h index 888cec08e13..a3933a39806 100644 --- a/src/mongo/db/fts/fts_query_impl.h +++ b/src/mongo/db/fts/fts_query_impl.h @@ -67,8 +67,25 @@ public: std::string toString() const; + /** + * Returns a string with the following format: + * <positive terms>||<negative terms>||<positive phrases>||<negative phrases> + * + * Each set of terms or phrases is separated by '|' characters. For example, if we had positive + * terms 'foo' and 'bar', negative term 'baz', and no phrases, the debug string would be + * 'foo|bar||baz||||'. + */ std::string debugString() const; + /** + * Returns a BSON object with the following format: + * { + * terms: <array of positive terms>, + * negatedTerms: <array of negative terms>, + * phrases: <array of positive phrases>, + * negatedPhrases: <array of negative phrases> + * } + */ BSONObj toBSON() const; private: diff --git a/src/mongo/db/fts/fts_query_impl_test.cpp b/src/mongo/db/fts/fts_query_impl_test.cpp index 538ee3755bc..0465fff71d3 100644 --- a/src/mongo/db/fts/fts_query_impl_test.cpp +++ b/src/mongo/db/fts/fts_query_impl_test.cpp @@ -70,9 +70,9 @@ TEST(FTSQueryImpl, ParsePunctuation) { ASSERT_TRUE(q.getTermsForBounds() == q.getPositiveTerms()); } -TEST(FTSQueryImpl, Neg1) { +TEST(FTSQueryImpl, HyphenBeforeWordShouldNegateTerm) { FTSQueryImpl q; - q.setQuery("this is -really fun"); + q.setQuery("-really fun"); q.setLanguage("english"); q.setCaseSensitive(false); q.setDiacriticSensitive(false); @@ -85,6 +85,69 @@ TEST(FTSQueryImpl, Neg1) { ASSERT_TRUE(q.getTermsForBounds() == q.getPositiveTerms()); } +TEST(FTSQueryImpl, HyphenFollowedByWhitespaceShouldNotNegate) { + FTSQueryImpl q; + q.setQuery("- really fun"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); + + auto positiveTerms = q.getPositiveTerms(); + ASSERT_EQUALS(2U, positiveTerms.size()); + ASSERT_EQUALS(1U, positiveTerms.count("fun")); + ASSERT_EQUALS(1U, positiveTerms.count("realli")); + ASSERT_EQUALS(0U, q.getNegatedTerms().size()); + ASSERT_TRUE(q.getTermsForBounds() == q.getPositiveTerms()); +} + +TEST(FTSQueryImpl, TwoHyphensShouldNegate) { + FTSQueryImpl q; + q.setQuery("--really fun"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); + + ASSERT_EQUALS(1U, q.getPositiveTerms().size()); + ASSERT_EQUALS("fun", *q.getPositiveTerms().begin()); + ASSERT_EQUALS(1U, q.getNegatedTerms().size()); + ASSERT_EQUALS("realli", *q.getNegatedTerms().begin()); + ASSERT_TRUE(q.getTermsForBounds() == q.getPositiveTerms()); +} + +TEST(FTSQueryImpl, HyphenWithNoSurroundingWhitespaceShouldBeTreatedAsDelimiter) { + FTSQueryImpl q; + q.setQuery("really-fun"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); + + auto positiveTerms = q.getPositiveTerms(); + ASSERT_EQUALS(2U, positiveTerms.size()); + ASSERT_EQUALS(1U, positiveTerms.count("fun")); + ASSERT_EQUALS(1U, positiveTerms.count("realli")); + ASSERT_EQUALS(0U, q.getNegatedTerms().size()); + ASSERT_TRUE(q.getTermsForBounds() == q.getPositiveTerms()); +} + +TEST(FTSQueryImpl, HyphenShouldNegateAllSucceedingTermsSeparatedByHyphens) { + FTSQueryImpl q; + q.setQuery("-really-fun-stuff"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); + + auto negatedTerms = q.getNegatedTerms(); + ASSERT_EQUALS(3U, negatedTerms.size()); + ASSERT_EQUALS(1U, negatedTerms.count("realli")); + ASSERT_EQUALS(1U, negatedTerms.count("fun")); + ASSERT_EQUALS(1U, negatedTerms.count("stuff")); + ASSERT_EQUALS(0U, q.getPositiveTerms().size()); +} + TEST(FTSQueryImpl, Phrase1) { FTSQueryImpl q; q.setQuery("doing a \"phrase test\" for fun"); @@ -114,7 +177,7 @@ TEST(FTSQueryImpl, Phrase2) { ASSERT_EQUALS("phrase-test", q.getPositivePhr()[0]); } -TEST(FTSQueryImpl, NegPhrase1) { +TEST(FTSQueryImpl, HyphenDirectlyBeforePhraseShouldNegateEntirePhrase) { FTSQueryImpl q; q.setQuery("doing a -\"phrase test\" for fun"); q.setLanguage("english"); @@ -124,6 +187,36 @@ TEST(FTSQueryImpl, NegPhrase1) { ASSERT_EQUALS("fun||||||phrase test", q.debugString()); } +TEST(FTSQueryImpl, HyphenSurroundedByWhitespaceBeforePhraseShouldNotNegateEntirePhrase) { + FTSQueryImpl q; + q.setQuery("doing a - \"phrase test\" for fun"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); + ASSERT_EQUALS("fun|phrase|test||||phrase test||", q.debugString()); +} + +TEST(FTSQueryImpl, HyphenBetweenTermAndPhraseShouldBeTreatedAsDelimiter) { + FTSQueryImpl q; + q.setQuery("doing a-\"phrase test\" for fun"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); + ASSERT_EQUALS("fun|phrase|test||||phrase test||", q.debugString()); +} + +TEST(FTSQueryImpl, HyphenShouldNegateAllSucceedingPhrasesSeparatedByHyphens) { + FTSQueryImpl q; + q.setQuery("-\"really fun\"-\"stuff here\" \"another phrase\""); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); + ASSERT_EQUALS("anoth|phrase||||another phrase||really fun|stuff here", q.debugString()); +} + TEST(FTSQueryImpl, CaseSensitiveOption) { FTSQueryImpl q; q.setQuery("this is fun"); |