summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts
diff options
context:
space:
mode:
authorJason Rassi <rassi@10gen.com>2015-11-18 17:31:32 -0500
committerJason Rassi <rassi@10gen.com>2015-11-18 17:38:27 -0500
commit23136883e394b73cbc26f873cd0276779adef3df (patch)
treeb53259aeeaeb8a7e7450796dec662157d9b0a44d /src/mongo/db/fts
parent7bac6c8f64019082f205c7606c65f173972960a3 (diff)
downloadmongo-23136883e394b73cbc26f873cd0276779adef3df.tar.gz
SERVER-19510 Move text query parsing to TextMatchExpression::init()
- Introduces FTSQuery, which is now the base class for FTSQueryImpl. - Introduces a derived class FTSQueryNoop (which TextNoOpMatchExpression now wraps). libfts_query_noop is now linked into db/matcher/expressions. - TextMatchExpression now parses the text query (which acquires a collection lock as part of the parsing process), and TextNode now stores a parsed version of the query. The FTSQuery::parse() call in buildStages() is removed. Behavior change: $text against a non-existent collection now returns an error, instead of an empty result set.
Diffstat (limited to 'src/mongo/db/fts')
-rw-r--r--src/mongo/db/fts/SConscript13
-rw-r--r--src/mongo/db/fts/fts_matcher_test.cpp66
-rw-r--r--src/mongo/db/fts/fts_query.h107
-rw-r--r--src/mongo/db/fts/fts_query_impl.cpp44
-rw-r--r--src/mongo/db/fts/fts_query_impl.h47
-rw-r--r--src/mongo/db/fts/fts_query_impl_test.cpp181
-rw-r--r--src/mongo/db/fts/fts_query_noop.cpp48
-rw-r--r--src/mongo/db/fts/fts_query_noop.h49
-rw-r--r--src/mongo/db/fts/fts_query_noop_test.cpp57
-rw-r--r--src/mongo/db/fts/fts_util.h7
10 files changed, 518 insertions, 101 deletions
diff --git a/src/mongo/db/fts/SConscript b/src/mongo/db/fts/SConscript
index 300f05c04e0..1a752c1cae8 100644
--- a/src/mongo/db/fts/SConscript
+++ b/src/mongo/db/fts/SConscript
@@ -58,6 +58,16 @@ baseEnv.Library('base', [
"$BUILD_DIR/third_party/shim_stemmer",
])
+env.Library(
+ target='fts_query_noop',
+ source=[
+ 'fts_query_noop.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/base',
+ ],
+)
+
env.Library( 'server_common', [
'fts_enabled.cpp'
],
@@ -96,6 +106,9 @@ env.CppUnitTest( "fts_matcher_test", "fts_matcher_test.cpp",
env.CppUnitTest( "fts_query_impl_test", "fts_query_impl_test.cpp",
LIBDEPS=["base"] )
+env.CppUnitTest( "fts_query_noop_test", "fts_query_noop_test.cpp",
+ LIBDEPS=["fts_query_noop"] )
+
env.CppUnitTest( "fts_spec_test", "fts_spec_test.cpp",
LIBDEPS=["base"] )
diff --git a/src/mongo/db/fts/fts_matcher_test.cpp b/src/mongo/db/fts/fts_matcher_test.cpp
index 02f6c2b2ba0..9b590fa67a3 100644
--- a/src/mongo/db/fts/fts_matcher_test.cpp
+++ b/src/mongo/db/fts/fts_matcher_test.cpp
@@ -38,7 +38,11 @@ namespace fts {
TEST(FTSMatcher, NegWild1) {
FTSQueryImpl q;
- ASSERT_OK(q.parse("foo -bar", "english", false, false, TEXT_INDEX_VERSION_3));
+ q.setQuery("foo -bar");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("$**"
<< "text")))));
@@ -52,7 +56,11 @@ TEST(FTSMatcher, NegWild1) {
// Regression test for SERVER-11994.
TEST(FTSMatcher, NegWild2) {
FTSQueryImpl q;
- ASSERT_OK(q.parse("pizza -restaurant", "english", false, false, TEXT_INDEX_VERSION_3));
+ q.setQuery("pizza -restaurant");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("$**"
<< "text")))));
@@ -65,7 +73,11 @@ TEST(FTSMatcher, NegWild2) {
TEST(FTSMatcher, Phrase1) {
FTSQueryImpl q;
- ASSERT_OK(q.parse("foo \"table top\"", "english", false, false, TEXT_INDEX_VERSION_3));
+ q.setQuery("foo \"table top\"");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("$**"
<< "text")))));
@@ -87,7 +99,11 @@ TEST(FTSMatcher, Phrase1) {
TEST(FTSMatcher, Phrase2) {
FTSQueryImpl q;
- ASSERT_OK(q.parse("foo \"table top\"", "english", false, false, TEXT_INDEX_VERSION_3));
+ q.setQuery("foo \"table top\"");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x"
<< "text")))));
@@ -98,7 +114,11 @@ TEST(FTSMatcher, Phrase2) {
// language.
TEST(FTSMatcher, ParsesUsingDocLanguage) {
FTSQueryImpl q;
- ASSERT_OK(q.parse("-glad", "none", false, false, TEXT_INDEX_VERSION_3));
+ q.setQuery("-glad");
+ q.setLanguage("none");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x"
<< "text")))));
@@ -112,7 +132,11 @@ TEST(FTSMatcher, ParsesUsingDocLanguage) {
// Test the matcher does not filter out stop words from positive terms
TEST(FTSMatcher, MatcherDoesNotFilterStopWordsNeg) {
FTSQueryImpl q;
- ASSERT_OK(q.parse("-the", "none", false, false, TEXT_INDEX_VERSION_3));
+ q.setQuery("-the");
+ q.setLanguage("none");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x"
<< "text")))));
@@ -124,7 +148,11 @@ TEST(FTSMatcher, MatcherDoesNotFilterStopWordsNeg) {
// Test the matcher does not filter out stop words from negative terms
TEST(FTSMatcher, MatcherDoesNotFilterStopWordsPos) {
FTSQueryImpl q;
- ASSERT_OK(q.parse("the", "none", false, false, TEXT_INDEX_VERSION_3));
+ q.setQuery("the");
+ q.setLanguage("none");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x"
<< "text")))));
@@ -137,7 +165,11 @@ TEST(FTSMatcher, MatcherDoesNotFilterStopWordsPos) {
// case-sensitive text query 'search'.
static bool docHasPositiveTermWithCase(const std::string& doc, const std::string& search) {
FTSQueryImpl q;
- ASSERT_OK(q.parse(search, "english", true, false, TEXT_INDEX_VERSION_3));
+ q.setQuery(search);
+ q.setLanguage("english");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x"
<< "text")))));
@@ -164,7 +196,11 @@ TEST(FTSMatcher, HasPositiveTermCaseSensitive) {
// case-sensitive text query 'search'.
static bool docHasNegativeTermWithCase(const std::string& doc, const std::string& search) {
FTSQueryImpl q;
- ASSERT_OK(q.parse(search, "english", true, false, TEXT_INDEX_VERSION_3));
+ q.setQuery(search);
+ q.setLanguage("english");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x"
<< "text")))));
@@ -191,7 +227,11 @@ TEST(FTSMatcher, HasNegativeTermCaseSensitive) {
// from case-sensitive text query 'search'.
static bool docPositivePhrasesMatchWithCase(const std::string& doc, const std::string& search) {
FTSQueryImpl q;
- ASSERT_OK(q.parse(search, "english", true, false, TEXT_INDEX_VERSION_3));
+ q.setQuery(search);
+ q.setLanguage("english");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x"
<< "text")))));
@@ -214,7 +254,11 @@ TEST(FTSMatcher, PositivePhrasesMatchWithCase) {
// from case-sensitive text query 'search'.
static bool docNegativePhrasesMatchWithCase(const std::string& doc, const std::string& search) {
FTSQueryImpl q;
- ASSERT_OK(q.parse(search, "english", true, false, TEXT_INDEX_VERSION_3));
+ q.setQuery(search);
+ q.setLanguage("english");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
FTSMatcher m(q,
FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x"
<< "text")))));
diff --git a/src/mongo/db/fts/fts_query.h b/src/mongo/db/fts/fts_query.h
new file mode 100644
index 00000000000..c2b2fd87deb
--- /dev/null
+++ b/src/mongo/db/fts/fts_query.h
@@ -0,0 +1,107 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "mongo/db/fts/fts_util.h"
+
+namespace mongo {
+namespace fts {
+
+/**
+ * An FTSQuery represents a parsed text search query.
+ */
+class FTSQuery {
+public:
+ virtual ~FTSQuery() {}
+
+ void setQuery(std::string query) {
+ _query = std::move(query);
+ }
+
+ void setLanguage(std::string language) {
+ _language = std::move(language);
+ }
+
+ void setCaseSensitive(bool caseSensitive) {
+ _caseSensitive = caseSensitive;
+ }
+
+ void setDiacriticSensitive(bool diacriticSensitive) {
+ _diacriticSensitive = diacriticSensitive;
+ }
+
+ const std::string& getQuery() const {
+ return _query;
+ }
+
+ const std::string& getLanguage() const {
+ return _language;
+ }
+
+ bool getCaseSensitive() const {
+ return _caseSensitive;
+ }
+
+ bool getDiacriticSensitive() const {
+ return _diacriticSensitive;
+ }
+
+ /**
+ * Returns true iff '*this' and 'other' have the same unparsed form.
+ */
+ bool equivalent(const FTSQuery& other) const {
+ return _query == other._query && _language == other._language &&
+ _caseSensitive == other._caseSensitive &&
+ _diacriticSensitive == other._diacriticSensitive;
+ }
+
+ /**
+ * Parses the text search query. Before parsing, the FTSQuery needs to be initialized with
+ * the set*() methods above.
+ *
+ * Returns Status::OK() if parsing was successful; returns an error Status otherwise.
+ */
+ virtual Status parse(TextIndexVersion textIndexVersion) = 0;
+
+ /**
+ * Returns a copy of this FTSQuery.
+ */
+ virtual std::unique_ptr<FTSQuery> clone() const = 0;
+
+private:
+ std::string _query;
+ std::string _language;
+ bool _caseSensitive = false;
+ bool _diacriticSensitive = false;
+};
+
+} // namespace fts
+} // namespace mongo
diff --git a/src/mongo/db/fts/fts_query_impl.cpp b/src/mongo/db/fts/fts_query_impl.cpp
index af03d2ed5da..674c8e84ab2 100644
--- a/src/mongo/db/fts/fts_query_impl.cpp
+++ b/src/mongo/db/fts/fts_query_impl.cpp
@@ -37,6 +37,7 @@
#include "mongo/db/fts/fts_tokenizer.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/stringutils.h"
+#include "mongo/stdx/memory.h"
namespace mongo {
@@ -49,18 +50,11 @@ using std::string;
using std::stringstream;
using std::vector;
-Status FTSQueryImpl::parse(const string& query,
- StringData language,
- bool caseSensitive,
- bool diacriticSensitive,
- TextIndexVersion textIndexVersion) {
- StatusWithFTSLanguage swl = FTSLanguage::make(language, textIndexVersion);
- if (!swl.getStatus().isOK()) {
- return swl.getStatus();
+Status FTSQueryImpl::parse(TextIndexVersion textIndexVersion) {
+ StatusWithFTSLanguage ftsLanguage = FTSLanguage::make(getLanguage(), textIndexVersion);
+ if (!ftsLanguage.getStatus().isOK()) {
+ return ftsLanguage.getStatus();
}
- _language = swl.getValue();
- _caseSensitive = caseSensitive;
- _diacriticSensitive = diacriticSensitive;
// Build a space delimited list of words to have the FtsTokenizer tokenize
string positiveTermSentence;
@@ -71,7 +65,7 @@ Status FTSQueryImpl::parse(const string& query,
unsigned quoteOffset = 0;
- FTSQueryParser i(query);
+ FTSQueryParser i(getQuery());
while (i.more()) {
QueryToken t = i.next();
@@ -105,7 +99,7 @@ Status FTSQueryImpl::parse(const string& query,
// end of a phrase
unsigned phraseStart = quoteOffset + 1;
unsigned phraseLength = t.offset - phraseStart;
- StringData phrase = StringData(query).substr(phraseStart, phraseLength);
+ StringData phrase = StringData(getQuery()).substr(phraseStart, phraseLength);
if (inNegation) {
_negatedPhrases.push_back(phrase.toString());
} else {
@@ -124,7 +118,7 @@ Status FTSQueryImpl::parse(const string& query,
}
}
- std::unique_ptr<FTSTokenizer> tokenizer(_language->createTokenizer());
+ std::unique_ptr<FTSTokenizer> tokenizer(ftsLanguage.getValue()->createTokenizer());
_addTerms(tokenizer.get(), positiveTermSentence, false);
_addTerms(tokenizer.get(), negativeTermSentence, true);
@@ -132,6 +126,20 @@ Status FTSQueryImpl::parse(const string& query,
return Status::OK();
}
+std::unique_ptr<FTSQuery> FTSQueryImpl::clone() const {
+ auto clonedQuery = stdx::make_unique<FTSQueryImpl>();
+ clonedQuery->setQuery(getQuery());
+ clonedQuery->setLanguage(getLanguage());
+ clonedQuery->setCaseSensitive(getCaseSensitive());
+ clonedQuery->setDiacriticSensitive(getDiacriticSensitive());
+ clonedQuery->_positiveTerms = _positiveTerms;
+ clonedQuery->_negatedTerms = _negatedTerms;
+ clonedQuery->_positivePhrases = _positivePhrases;
+ clonedQuery->_negatedPhrases = _negatedPhrases;
+ clonedQuery->_termsForBounds = _termsForBounds;
+ return std::move(clonedQuery);
+}
+
void FTSQueryImpl::_addTerms(FTSTokenizer* tokenizer, const string& sentence, bool negated) {
tokenizer->reset(sentence.c_str(), FTSTokenizer::kFilterStopWords);
@@ -150,21 +158,21 @@ void FTSQueryImpl::_addTerms(FTSTokenizer* tokenizer, const string& sentence, bo
// Compute the string corresponding to 'token' that will be used for the matcher.
// For case and diacritic insensitive queries, this is the same string as 'boundsTerm'
// computed above.
- if (!_caseSensitive && !_diacriticSensitive) {
+ if (!getCaseSensitive() && !getDiacriticSensitive()) {
activeTerms.insert(word);
}
}
- if (!_caseSensitive && !_diacriticSensitive) {
+ if (!getCaseSensitive() && !getDiacriticSensitive()) {
return;
}
FTSTokenizer::Options newOptions = FTSTokenizer::kFilterStopWords;
- if (_caseSensitive) {
+ if (getCaseSensitive()) {
newOptions |= FTSTokenizer::kGenerateCaseSensitiveTokens;
}
- if (_diacriticSensitive) {
+ if (getDiacriticSensitive()) {
newOptions |= FTSTokenizer::kGenerateDiacriticSensitiveTokens;
}
diff --git a/src/mongo/db/fts/fts_query_impl.h b/src/mongo/db/fts/fts_query_impl.h
index dbd266983d3..888cec08e13 100644
--- a/src/mongo/db/fts/fts_query_impl.h
+++ b/src/mongo/db/fts/fts_query_impl.h
@@ -30,31 +30,23 @@
#pragma once
+#include <set>
#include <string>
#include <vector>
-#include "mongo/base/status.h"
-#include "mongo/db/fts/stemmer.h"
-#include "mongo/db/fts/stop_words.h"
-#include "mongo/util/stringutils.h"
+#include "mongo/db/fts/fts_query.h"
namespace mongo {
namespace fts {
-class FTSQueryImpl {
+class FTSTokenizer;
+
+class FTSQueryImpl final : public FTSQuery {
public:
- // Initializes an FTSQueryImpl. Note that the parsing of "language" depends on the text
- // index version, since a query which doesn't specify a language and is against a
- // version 1 text index with a version 1 default language string needs to be parsed as
- // version 1 (see fts_language.cpp for a list of language strings specific to version
- // 1). Note that the diacritic sensitive option has no effect on FTS queries below index version
- // 3.
- Status parse(const std::string& query,
- StringData language,
- bool caseSensitive,
- bool diacriticSensitive,
- TextIndexVersion textIndexVersion);
+ Status parse(TextIndexVersion textIndexVersion) final;
+
+ std::unique_ptr<FTSQuery> clone() const final;
const std::set<std::string>& getPositiveTerms() const {
return _positiveTerms;
@@ -73,16 +65,6 @@ public:
return _termsForBounds;
}
- const FTSLanguage& getLanguage() const {
- return *_language;
- }
- bool getCaseSensitive() const {
- return _caseSensitive;
- }
- bool getDiacriticSensitive() const {
- return _diacriticSensitive;
- }
-
std::string toString() const;
std::string debugString() const;
@@ -92,23 +74,10 @@ public:
private:
void _addTerms(FTSTokenizer* tokenizer, const std::string& tokens, bool negated);
- const FTSLanguage* _language;
- bool _caseSensitive;
- bool _diacriticSensitive;
-
- // Positive terms.
std::set<std::string> _positiveTerms;
-
- // Negated terms.
std::set<std::string> _negatedTerms;
-
- // Positive phrases.
std::vector<std::string> _positivePhrases;
-
- // Negated phrases.
std::vector<std::string> _negatedPhrases;
-
- // Terms for bounds.
std::set<std::string> _termsForBounds;
};
}
diff --git a/src/mongo/db/fts/fts_query_impl_test.cpp b/src/mongo/db/fts/fts_query_impl_test.cpp
index 9f336e3c75b..538ee3755bc 100644
--- a/src/mongo/db/fts/fts_query_impl_test.cpp
+++ b/src/mongo/db/fts/fts_query_impl_test.cpp
@@ -37,7 +37,11 @@ namespace fts {
TEST(FTSQueryImpl, Basic1) {
FTSQueryImpl q;
- ASSERT(q.parse("this is fun", "english", false, false, TEXT_INDEX_VERSION_3).isOK());
+ q.setQuery("this is fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(false, q.getCaseSensitive());
ASSERT_EQUALS(1U, q.getPositiveTerms().size());
@@ -50,7 +54,11 @@ TEST(FTSQueryImpl, Basic1) {
TEST(FTSQueryImpl, ParsePunctuation) {
FTSQueryImpl q;
- ASSERT(q.parse("hello.world", "english", false, false, TEXT_INDEX_VERSION_3).isOK());
+ q.setQuery("hello.world");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(false, q.getCaseSensitive());
ASSERT_EQUALS(2U, q.getPositiveTerms().size());
@@ -64,7 +72,11 @@ TEST(FTSQueryImpl, ParsePunctuation) {
TEST(FTSQueryImpl, Neg1) {
FTSQueryImpl q;
- ASSERT(q.parse("this is -really fun", "english", false, false, TEXT_INDEX_VERSION_3).isOK());
+ q.setQuery("this is -really fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(1U, q.getPositiveTerms().size());
ASSERT_EQUALS("fun", *q.getPositiveTerms().begin());
@@ -75,8 +87,11 @@ TEST(FTSQueryImpl, Neg1) {
TEST(FTSQueryImpl, Phrase1) {
FTSQueryImpl q;
- ASSERT(q.parse("doing a \"phrase test\" for fun", "english", false, false, TEXT_INDEX_VERSION_3)
- .isOK());
+ q.setQuery("doing a \"phrase test\" for fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(3U, q.getPositiveTerms().size());
ASSERT_EQUALS(0U, q.getNegatedTerms().size());
@@ -90,29 +105,42 @@ TEST(FTSQueryImpl, Phrase1) {
TEST(FTSQueryImpl, Phrase2) {
FTSQueryImpl q;
- ASSERT(q.parse("doing a \"phrase-test\" for fun", "english", false, false, TEXT_INDEX_VERSION_3)
- .isOK());
+ q.setQuery("doing a \"phrase-test\" for fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(1U, q.getPositivePhr().size());
ASSERT_EQUALS("phrase-test", q.getPositivePhr()[0]);
}
TEST(FTSQueryImpl, NegPhrase1) {
FTSQueryImpl q;
- ASSERT(
- q.parse("doing a -\"phrase test\" for fun", "english", false, false, TEXT_INDEX_VERSION_3)
- .isOK());
+ q.setQuery("doing a -\"phrase test\" for fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS("fun||||||phrase test", q.debugString());
}
TEST(FTSQueryImpl, CaseSensitiveOption) {
FTSQueryImpl q;
- ASSERT(q.parse("this is fun", "english", true, false, TEXT_INDEX_VERSION_3).isOK());
+ q.setQuery("this is fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(true, q.getCaseSensitive());
}
TEST(FTSQueryImpl, CaseSensitivePositiveTerms) {
FTSQueryImpl q;
- ASSERT(q.parse("This is Positively fun", "english", true, false, TEXT_INDEX_VERSION_3).isOK());
+ q.setQuery("This is Positively fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(2U, q.getTermsForBounds().size());
ASSERT_EQUALS(1,
@@ -128,8 +156,11 @@ TEST(FTSQueryImpl, CaseSensitivePositiveTerms) {
TEST(FTSQueryImpl, CaseSensitiveNegativeTerms) {
FTSQueryImpl q;
- ASSERT(q.parse("-This -is -Negatively -miserable", "english", true, false, TEXT_INDEX_VERSION_3)
- .isOK());
+ q.setQuery("-This -is -Negatively -miserable");
+ q.setLanguage("english");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(0U, q.getPositiveTerms().size());
ASSERT_EQUALS(0U, q.getTermsForBounds().size());
@@ -142,8 +173,11 @@ TEST(FTSQueryImpl, CaseSensitiveNegativeTerms) {
TEST(FTSQueryImpl, CaseSensitivePositivePhrases) {
FTSQueryImpl q;
- ASSERT(q.parse("doing a \"Phrase Test\" for fun", "english", true, false, TEXT_INDEX_VERSION_3)
- .isOK());
+ q.setQuery("doing a \"Phrase Test\" for fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(1U, q.getPositivePhr().size());
ASSERT_EQUALS(0U, q.getNegatedPhr().size());
@@ -152,8 +186,11 @@ TEST(FTSQueryImpl, CaseSensitivePositivePhrases) {
TEST(FTSQueryImpl, CaseSensitiveNegativePhrases) {
FTSQueryImpl q;
- ASSERT(q.parse("doing a -\"Phrase Test\" for fun", "english", true, false, TEXT_INDEX_VERSION_3)
- .isOK());
+ q.setQuery("doing a -\"Phrase Test\" for fun");
+ q.setLanguage("english");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(0U, q.getPositivePhr().size());
ASSERT_EQUALS(1U, q.getNegatedPhr().size());
@@ -162,17 +199,34 @@ TEST(FTSQueryImpl, CaseSensitiveNegativePhrases) {
TEST(FTSQueryImpl, Mix1) {
FTSQueryImpl q;
- ASSERT(
- q.parse("\"industry\" -Melbourne -Physics", "english", false, false, TEXT_INDEX_VERSION_3)
- .isOK());
+ q.setQuery("\"industry\" -Melbourne -Physics");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS("industri||melbourn|physic||industry||", q.debugString());
}
TEST(FTSQueryImpl, NegPhrase2) {
FTSQueryImpl q1, q2, q3;
- ASSERT(q1.parse("foo \"bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK());
- ASSERT(q2.parse("foo \"-bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK());
- ASSERT(q3.parse("foo \" -bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK());
+
+ q1.setQuery("foo \"bar\"");
+ q1.setLanguage("english");
+ q1.setCaseSensitive(false);
+ q1.setDiacriticSensitive(false);
+ ASSERT(q1.parse(TEXT_INDEX_VERSION_3).isOK());
+
+ q2.setQuery("foo \"-bar\"");
+ q2.setLanguage("english");
+ q2.setCaseSensitive(false);
+ q2.setDiacriticSensitive(false);
+ ASSERT(q2.parse(TEXT_INDEX_VERSION_3).isOK());
+
+ q3.setQuery("foo \" -bar\"");
+ q3.setLanguage("english");
+ q3.setCaseSensitive(false);
+ q3.setDiacriticSensitive(false);
+ ASSERT(q3.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(2U, q1.getPositiveTerms().size());
ASSERT_EQUALS(2U, q2.getPositiveTerms().size());
@@ -193,9 +247,24 @@ TEST(FTSQueryImpl, NegPhrase2) {
TEST(FTSQueryImpl, NegPhrase3) {
FTSQueryImpl q1, q2, q3;
- ASSERT(q1.parse("foo -\"bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK());
- ASSERT(q2.parse("foo -\"-bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK());
- ASSERT(q3.parse("foo -\" -bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK());
+
+ q1.setQuery("foo -\"bar\"");
+ q1.setLanguage("english");
+ q1.setCaseSensitive(false);
+ q1.setDiacriticSensitive(false);
+ ASSERT(q1.parse(TEXT_INDEX_VERSION_3).isOK());
+
+ q2.setQuery("foo -\"-bar\"");
+ q2.setLanguage("english");
+ q2.setCaseSensitive(false);
+ q2.setDiacriticSensitive(false);
+ ASSERT(q2.parse(TEXT_INDEX_VERSION_3).isOK());
+
+ q3.setQuery("foo -\" -bar\"");
+ q3.setLanguage("english");
+ q3.setCaseSensitive(false);
+ q3.setDiacriticSensitive(false);
+ ASSERT(q3.parse(TEXT_INDEX_VERSION_3).isOK());
ASSERT_EQUALS(1U, q1.getPositiveTerms().size());
ASSERT_EQUALS(1U, q2.getPositiveTerms().size());
@@ -218,7 +287,11 @@ TEST(FTSQueryImpl, NegPhrase3) {
// stemmer and stopword list.
TEST(FTSQueryImpl, TextIndexVersion1LanguageEnglish) {
FTSQueryImpl q;
- ASSERT(q.parse("the running", "english", false, false, TEXT_INDEX_VERSION_1).isOK());
+ q.setQuery("the running");
+ q.setLanguage("english");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_1).isOK());
ASSERT_EQUALS(1U, q.getPositiveTerms().size());
ASSERT_EQUALS("run", *q.getPositiveTerms().begin());
ASSERT_EQUALS(0U, q.getNegatedTerms().size());
@@ -230,7 +303,11 @@ TEST(FTSQueryImpl, TextIndexVersion1LanguageEnglish) {
// no stopword list.
TEST(FTSQueryImpl, TextIndexVersion1LanguageEng) {
FTSQueryImpl q;
- ASSERT(q.parse("the running", "eng", false, false, TEXT_INDEX_VERSION_1).isOK());
+ q.setQuery("the running");
+ q.setLanguage("eng");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_1).isOK());
ASSERT_EQUALS(2U, q.getPositiveTerms().size());
ASSERT_EQUALS(1, std::count(q.getPositiveTerms().begin(), q.getPositiveTerms().end(), "the"));
ASSERT_EQUALS(1, std::count(q.getPositiveTerms().begin(), q.getPositiveTerms().end(), "run"));
@@ -243,7 +320,11 @@ TEST(FTSQueryImpl, TextIndexVersion1LanguageEng) {
// and no stopword list will be used.
TEST(FTSQueryImpl, TextIndexVersion1LanguageInvalid) {
FTSQueryImpl q;
- ASSERT(q.parse("the running", "invalid", false, false, TEXT_INDEX_VERSION_1).isOK());
+ q.setQuery("the running");
+ q.setLanguage("invalid");
+ q.setCaseSensitive(false);
+ q.setDiacriticSensitive(false);
+ ASSERT(q.parse(TEXT_INDEX_VERSION_1).isOK());
ASSERT_EQUALS(2U, q.getPositiveTerms().size());
ASSERT_EQUALS(1, std::count(q.getPositiveTerms().begin(), q.getPositiveTerms().end(), "the"));
ASSERT_EQUALS(1,
@@ -252,5 +333,45 @@ TEST(FTSQueryImpl, TextIndexVersion1LanguageInvalid) {
ASSERT_EQUALS(0U, q.getPositivePhr().size());
ASSERT_EQUALS(0U, q.getNegatedPhr().size());
}
+
+TEST(FTSQueryImpl, CloneUnparsedQuery) {
+ FTSQueryImpl q;
+ q.setQuery("foo");
+ q.setLanguage("bar");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(true);
+
+ auto clone = q.clone();
+ ASSERT_EQUALS(clone->getQuery(), q.getQuery());
+ ASSERT_EQUALS(clone->getLanguage(), q.getLanguage());
+ ASSERT_EQUALS(clone->getCaseSensitive(), q.getCaseSensitive());
+ ASSERT_EQUALS(clone->getDiacriticSensitive(), q.getDiacriticSensitive());
+}
+
+TEST(FTSQueryImpl, CloneParsedQuery) {
+ FTSQueryImpl q;
+ q.setQuery("Foo -bar \"baz\" -\"quux\"");
+ q.setLanguage("english");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(true);
+ ASSERT_OK(q.parse(TEXT_INDEX_VERSION_3));
+ ASSERT(std::set<std::string>({"Foo", "baz"}) == q.getPositiveTerms());
+ ASSERT(std::set<std::string>({"bar"}) == q.getNegatedTerms());
+ ASSERT(std::vector<std::string>({"baz"}) == q.getPositivePhr());
+ ASSERT(std::vector<std::string>({"quux"}) == q.getNegatedPhr());
+ ASSERT(std::set<std::string>({"foo", "baz"}) == q.getTermsForBounds());
+
+ auto clone = q.clone();
+ ASSERT_EQUALS(clone->getQuery(), q.getQuery());
+ ASSERT_EQUALS(clone->getLanguage(), q.getLanguage());
+ ASSERT_EQUALS(clone->getCaseSensitive(), q.getCaseSensitive());
+ ASSERT_EQUALS(clone->getDiacriticSensitive(), q.getDiacriticSensitive());
+ FTSQueryImpl* castedClone = static_cast<FTSQueryImpl*>(clone.get());
+ ASSERT(castedClone->getPositiveTerms() == q.getPositiveTerms());
+ ASSERT(castedClone->getNegatedTerms() == q.getNegatedTerms());
+ ASSERT(castedClone->getPositivePhr() == q.getPositivePhr());
+ ASSERT(castedClone->getNegatedPhr() == q.getNegatedPhr());
+ ASSERT(castedClone->getTermsForBounds() == q.getTermsForBounds());
+}
}
}
diff --git a/src/mongo/db/fts/fts_query_noop.cpp b/src/mongo/db/fts/fts_query_noop.cpp
new file mode 100644
index 00000000000..d6da988a3e0
--- /dev/null
+++ b/src/mongo/db/fts/fts_query_noop.cpp
@@ -0,0 +1,48 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/fts/fts_query_noop.h"
+
+#include "mongo/stdx/memory.h"
+
+namespace mongo {
+namespace fts {
+
+std::unique_ptr<FTSQuery> FTSQueryNoop::clone() const {
+ auto clonedQuery = stdx::make_unique<FTSQueryNoop>();
+ clonedQuery->setQuery(getQuery());
+ clonedQuery->setLanguage(getLanguage());
+ clonedQuery->setCaseSensitive(getCaseSensitive());
+ clonedQuery->setDiacriticSensitive(getDiacriticSensitive());
+ return std::move(clonedQuery);
+}
+
+} // namespace fts
+} // namespace mongo
diff --git a/src/mongo/db/fts/fts_query_noop.h b/src/mongo/db/fts/fts_query_noop.h
new file mode 100644
index 00000000000..b24a8f572a1
--- /dev/null
+++ b/src/mongo/db/fts/fts_query_noop.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/fts/fts_query.h"
+
+namespace mongo {
+namespace fts {
+
+/**
+ * A no-op implementation of FTSQuery.
+ */
+class FTSQueryNoop final : public FTSQuery {
+public:
+ Status parse(TextIndexVersion textIndexVersion) final {
+ return Status::OK();
+ }
+
+ std::unique_ptr<FTSQuery> clone() const final;
+};
+
+} // namespace fts
+} // namespace mongo
diff --git a/src/mongo/db/fts/fts_query_noop_test.cpp b/src/mongo/db/fts/fts_query_noop_test.cpp
new file mode 100644
index 00000000000..6876893c6e9
--- /dev/null
+++ b/src/mongo/db/fts/fts_query_noop_test.cpp
@@ -0,0 +1,57 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/fts/fts_query_noop.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+namespace fts {
+
+TEST(FTSQueryNoop, Parse) {
+ FTSQueryNoop q;
+ ASSERT_OK(q.parse(TEXT_INDEX_VERSION_INVALID));
+}
+
+TEST(FTSQueryNoop, Clone) {
+ FTSQueryNoop q;
+ q.setQuery("foo");
+ q.setLanguage("bar");
+ q.setCaseSensitive(true);
+ q.setDiacriticSensitive(true);
+
+ auto clone = q.clone();
+ ASSERT_EQUALS(clone->getQuery(), q.getQuery());
+ ASSERT_EQUALS(clone->getLanguage(), q.getLanguage());
+ ASSERT_EQUALS(clone->getCaseSensitive(), q.getCaseSensitive());
+ ASSERT_EQUALS(clone->getDiacriticSensitive(), q.getDiacriticSensitive());
+}
+
+} // namespace fts
+} // namespace mongo
diff --git a/src/mongo/db/fts/fts_util.h b/src/mongo/db/fts/fts_util.h
index b9fed70a8e8..7286e6fc7a6 100644
--- a/src/mongo/db/fts/fts_util.h
+++ b/src/mongo/db/fts/fts_util.h
@@ -42,9 +42,10 @@ extern const std::string WILDCARD;
extern const std::string INDEX_NAME;
enum TextIndexVersion {
- TEXT_INDEX_VERSION_1 = 1, // Legacy index format. Deprecated.
- TEXT_INDEX_VERSION_2 = 2, // Index format with ASCII support and murmur hashing.
- TEXT_INDEX_VERSION_3 = 3, // Current index format with basic Unicode support.
+ TEXT_INDEX_VERSION_INVALID = 0, // Invalid value.
+ TEXT_INDEX_VERSION_1 = 1, // Legacy index format. Deprecated.
+ TEXT_INDEX_VERSION_2 = 2, // Index format with ASCII support and murmur hashing.
+ TEXT_INDEX_VERSION_3 = 3, // Current index format with basic Unicode support.
};
}
}