summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2016-02-02 17:47:20 -0500
committerCharlie Swanson <charlie.swanson@mongodb.com>2016-02-04 19:38:27 -0500
commit1ddd9162e8fd7de3b54053093698ef16abce8aa9 (patch)
tree3216136a15a9a7398db21dde85b14aef0c8f881c /src/mongo/db/fts
parentebc481d27e408d8608075a61eba2a68be43c1314 (diff)
downloadmongo-1ddd9162e8fd7de3b54053093698ef16abce8aa9.tar.gz
SERVER-20792 Only negate term if directly proceeded by hyphen.
For example, the term "documentation" should be negated in the text query "MongoDB -documentation", but should not be negated in the text query "MongoDB - documentation".
Diffstat (limited to 'src/mongo/db/fts')
-rw-r--r--src/mongo/db/fts/fts_query_impl.cpp21
-rw-r--r--src/mongo/db/fts/fts_query_impl.h17
-rw-r--r--src/mongo/db/fts/fts_query_impl_test.cpp99
3 files changed, 130 insertions, 7 deletions
diff --git a/src/mongo/db/fts/fts_query_impl.cpp b/src/mongo/db/fts/fts_query_impl.cpp
index 674c8e84ab2..2c4b5016f90 100644
--- a/src/mongo/db/fts/fts_query_impl.cpp
+++ b/src/mongo/db/fts/fts_query_impl.cpp
@@ -75,6 +75,13 @@ Status FTSQueryImpl::parse(TextIndexVersion textIndexVersion) {
if (inPhrase && inNegation) {
// don't add term
} else {
+ // A negation should only continue until the next whitespace character. For example,
+ // "-foo" should negate "foo", "- foo" should not negate "foo", and "-foo-bar"
+ // should negate both "foo" and "bar".
+ if (inNegation && t.previousWhiteSpace) {
+ inNegation = false;
+ }
+
if (inNegation) {
negativeTermSentence.append(s);
negativeTermSentence.push_back(' ');
@@ -83,9 +90,6 @@ Status FTSQueryImpl::parse(TextIndexVersion textIndexVersion) {
positiveTermSentence.push_back(' ');
}
}
-
- if (inNegation && !inPhrase)
- inNegation = false;
} else if (t.type == QueryToken::DELIMITER) {
char c = t.data[0];
if (c == '-') {
@@ -105,11 +109,20 @@ Status FTSQueryImpl::parse(TextIndexVersion textIndexVersion) {
} else {
_positivePhrases.push_back(phrase.toString());
}
- inNegation = false;
+
+ // Do not reset 'inNegation' here, since a negation should continue until the
+ // next whitespace character. For example, '-"foo bar"-"baz quux"' should negate
+ // both the phrase "foo bar" and the phrase "baz quux".
+
inPhrase = false;
} else {
// start of a phrase
inPhrase = true;
+ // A "-" should only be treated as a negation if there is no whitespace between
+ // the "-" and the start of the phrase.
+ if (inNegation && t.previousWhiteSpace) {
+ inNegation = false;
+ }
quoteOffset = t.offset;
}
}
diff --git a/src/mongo/db/fts/fts_query_impl.h b/src/mongo/db/fts/fts_query_impl.h
index 888cec08e13..a3933a39806 100644
--- a/src/mongo/db/fts/fts_query_impl.h
+++ b/src/mongo/db/fts/fts_query_impl.h
@@ -67,8 +67,25 @@ public:
std::string toString() const;
+ /**
+ * Returns a string with the following format:
+ * <positive terms>||<negative terms>||<positive phrases>||<negative phrases>
+ *
+ * Each set of terms or phrases is separated by '|' characters. For example, if we had positive
+ * terms 'foo' and 'bar', negative term 'baz', and no phrases, the debug string would be
+ * 'foo|bar||baz||||'.
+ */
std::string debugString() const;
+ /**
+ * Returns a BSON object with the following format:
+ * {
+ * terms: <array of positive terms>,
+ * negatedTerms: <array of negative terms>,
+ * phrases: <array of positive phrases>,
+ * negatedPhrases: <array of negative phrases>
+ * }
+ */
BSONObj toBSON() const;
private:
diff --git a/src/mongo/db/fts/fts_query_impl_test.cpp b/src/mongo/db/fts/fts_query_impl_test.cpp
index 538ee3755bc..0465fff71d3 100644
--- a/src/mongo/db/fts/fts_query_impl_test.cpp
+++ b/src/mongo/db/fts/fts_query_impl_test.cpp
@@ -70,9 +70,9 @@ TEST(FTSQueryImpl, ParsePunctuation) {
ASSERT_TRUE(q.getTermsForBounds() == q.getPositiveTerms());
}
-TEST(FTSQueryImpl, Neg1) {
+TEST(FTSQueryImpl, HyphenBeforeWordShouldNegateTerm) {
FTSQueryImpl q;
- q.setQuery("this is -really fun");
+ q.setQuery("-really fun");
q.setLanguage("english");
q.setCaseSensitive(false);
q.setDiacriticSensitive(false);
@@ -85,6 +85,69 @@ TEST(FTSQueryImpl, Neg1) {
ASSERT_TRUE(q.getTermsForBounds() == q.getPositiveTerms());
}
+TEST(FTSQueryImpl, HyphenFollowedByWhitespaceShouldNotNegate) {
+ FTSQueryImpl q;
+ q.setQuery("- really fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
+
+ auto positiveTerms = q.getPositiveTerms();
+ ASSERT_EQUALS(2U, positiveTerms.size());
+ ASSERT_EQUALS(1U, positiveTerms.count("fun"));
+ ASSERT_EQUALS(1U, positiveTerms.count("realli"));
+ ASSERT_EQUALS(0U, q.getNegatedTerms().size());
+ ASSERT_TRUE(q.getTermsForBounds() == q.getPositiveTerms());
+}
+
+TEST(FTSQueryImpl, TwoHyphensShouldNegate) {
+ FTSQueryImpl q;
+ q.setQuery("--really fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
+
+ ASSERT_EQUALS(1U, q.getPositiveTerms().size());
+ ASSERT_EQUALS("fun", *q.getPositiveTerms().begin());
+ ASSERT_EQUALS(1U, q.getNegatedTerms().size());
+ ASSERT_EQUALS("realli", *q.getNegatedTerms().begin());
+ ASSERT_TRUE(q.getTermsForBounds() == q.getPositiveTerms());
+}
+
+TEST(FTSQueryImpl, HyphenWithNoSurroundingWhitespaceShouldBeTreatedAsDelimiter) {
+ FTSQueryImpl q;
+ q.setQuery("really-fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
+
+ auto positiveTerms = q.getPositiveTerms();
+ ASSERT_EQUALS(2U, positiveTerms.size());
+ ASSERT_EQUALS(1U, positiveTerms.count("fun"));
+ ASSERT_EQUALS(1U, positiveTerms.count("realli"));
+ ASSERT_EQUALS(0U, q.getNegatedTerms().size());
+ ASSERT_TRUE(q.getTermsForBounds() == q.getPositiveTerms());
+}
+
+TEST(FTSQueryImpl, HyphenShouldNegateAllSucceedingTermsSeparatedByHyphens) {
+ FTSQueryImpl q;
+ q.setQuery("-really-fun-stuff");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
+
+ auto negatedTerms = q.getNegatedTerms();
+ ASSERT_EQUALS(3U, negatedTerms.size());
+ ASSERT_EQUALS(1U, negatedTerms.count("realli"));
+ ASSERT_EQUALS(1U, negatedTerms.count("fun"));
+ ASSERT_EQUALS(1U, negatedTerms.count("stuff"));
+ ASSERT_EQUALS(0U, q.getPositiveTerms().size());
+}
+
TEST(FTSQueryImpl, Phrase1) {
FTSQueryImpl q;
q.setQuery("doing a \"phrase test\" for fun");
@@ -114,7 +177,7 @@ TEST(FTSQueryImpl, Phrase2) {
ASSERT_EQUALS("phrase-test", q.getPositivePhr()[0]);
}
-TEST(FTSQueryImpl, NegPhrase1) {
+TEST(FTSQueryImpl, HyphenDirectlyBeforePhraseShouldNegateEntirePhrase) {
FTSQueryImpl q;
q.setQuery("doing a -\"phrase test\" for fun");
q.setLanguage("english");
@@ -124,6 +187,36 @@ TEST(FTSQueryImpl, NegPhrase1) {
ASSERT_EQUALS("fun||||||phrase test", q.debugString());
}
+TEST(FTSQueryImpl, HyphenSurroundedByWhitespaceBeforePhraseShouldNotNegateEntirePhrase) {
+ FTSQueryImpl q;
+ q.setQuery("doing a - \"phrase test\" for fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
+ ASSERT_EQUALS("fun|phrase|test||||phrase test||", q.debugString());
+}
+
+TEST(FTSQueryImpl, HyphenBetweenTermAndPhraseShouldBeTreatedAsDelimiter) {
+ FTSQueryImpl q;
+ q.setQuery("doing a-\"phrase test\" for fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
+ ASSERT_EQUALS("fun|phrase|test||||phrase test||", q.debugString());
+}
+
+TEST(FTSQueryImpl, HyphenShouldNegateAllSucceedingPhrasesSeparatedByHyphens) {
+ FTSQueryImpl q;
+ q.setQuery("-\"really fun\"-\"stuff here\" \"another phrase\"");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
+ ASSERT_EQUALS("anoth|phrase||||another phrase||really fun|stuff here", q.debugString());
+}
+
TEST(FTSQueryImpl, CaseSensitiveOption) {
FTSQueryImpl q;
q.setQuery("this is fun");