diff options
Diffstat (limited to 'src/mongo/db/fts')
-rw-r--r-- | src/mongo/db/fts/SConscript | 13 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_matcher_test.cpp | 66 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_query.h | 107 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_query_impl.cpp | 44 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_query_impl.h | 47 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_query_impl_test.cpp | 181 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_query_noop.cpp | 48 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_query_noop.h | 49 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_query_noop_test.cpp | 57 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_util.h | 7 |
10 files changed, 518 insertions, 101 deletions
diff --git a/src/mongo/db/fts/SConscript b/src/mongo/db/fts/SConscript index 300f05c04e0..1a752c1cae8 100644 --- a/src/mongo/db/fts/SConscript +++ b/src/mongo/db/fts/SConscript @@ -58,6 +58,16 @@ baseEnv.Library('base', [ "$BUILD_DIR/third_party/shim_stemmer", ]) +env.Library( + target='fts_query_noop', + source=[ + 'fts_query_noop.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + ], +) + env.Library( 'server_common', [ 'fts_enabled.cpp' ], @@ -96,6 +106,9 @@ env.CppUnitTest( "fts_matcher_test", "fts_matcher_test.cpp", env.CppUnitTest( "fts_query_impl_test", "fts_query_impl_test.cpp", LIBDEPS=["base"] ) +env.CppUnitTest( "fts_query_noop_test", "fts_query_noop_test.cpp", + LIBDEPS=["fts_query_noop"] ) + env.CppUnitTest( "fts_spec_test", "fts_spec_test.cpp", LIBDEPS=["base"] ) diff --git a/src/mongo/db/fts/fts_matcher_test.cpp b/src/mongo/db/fts/fts_matcher_test.cpp index 02f6c2b2ba0..9b590fa67a3 100644 --- a/src/mongo/db/fts/fts_matcher_test.cpp +++ b/src/mongo/db/fts/fts_matcher_test.cpp @@ -38,7 +38,11 @@ namespace fts { TEST(FTSMatcher, NegWild1) { FTSQueryImpl q; - ASSERT_OK(q.parse("foo -bar", "english", false, false, TEXT_INDEX_VERSION_3)); + q.setQuery("foo -bar"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("$**" << "text"))))); @@ -52,7 +56,11 @@ TEST(FTSMatcher, NegWild1) { // Regression test for SERVER-11994. TEST(FTSMatcher, NegWild2) { FTSQueryImpl q; - ASSERT_OK(q.parse("pizza -restaurant", "english", false, false, TEXT_INDEX_VERSION_3)); + q.setQuery("pizza -restaurant"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("$**" << "text"))))); @@ -65,7 +73,11 @@ TEST(FTSMatcher, NegWild2) { TEST(FTSMatcher, Phrase1) { FTSQueryImpl q; - ASSERT_OK(q.parse("foo \"table top\"", "english", false, false, TEXT_INDEX_VERSION_3)); + q.setQuery("foo \"table top\""); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("$**" << "text"))))); @@ -87,7 +99,11 @@ TEST(FTSMatcher, Phrase1) { TEST(FTSMatcher, Phrase2) { FTSQueryImpl q; - ASSERT_OK(q.parse("foo \"table top\"", "english", false, false, TEXT_INDEX_VERSION_3)); + q.setQuery("foo \"table top\""); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text"))))); @@ -98,7 +114,11 @@ TEST(FTSMatcher, Phrase2) { // language. TEST(FTSMatcher, ParsesUsingDocLanguage) { FTSQueryImpl q; - ASSERT_OK(q.parse("-glad", "none", false, false, TEXT_INDEX_VERSION_3)); + q.setQuery("-glad"); + q.setLanguage("none"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text"))))); @@ -112,7 +132,11 @@ TEST(FTSMatcher, ParsesUsingDocLanguage) { // Test the matcher does not filter out stop words from positive terms TEST(FTSMatcher, MatcherDoesNotFilterStopWordsNeg) { FTSQueryImpl q; - ASSERT_OK(q.parse("-the", "none", false, false, TEXT_INDEX_VERSION_3)); + q.setQuery("-the"); + q.setLanguage("none"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text"))))); @@ -124,7 +148,11 @@ TEST(FTSMatcher, MatcherDoesNotFilterStopWordsNeg) { // Test the matcher does not filter out stop words from negative terms TEST(FTSMatcher, MatcherDoesNotFilterStopWordsPos) { FTSQueryImpl q; - ASSERT_OK(q.parse("the", "none", false, false, TEXT_INDEX_VERSION_3)); + q.setQuery("the"); + q.setLanguage("none"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text"))))); @@ -137,7 +165,11 @@ TEST(FTSMatcher, MatcherDoesNotFilterStopWordsPos) { // case-sensitive text query 'search'. static bool docHasPositiveTermWithCase(const std::string& doc, const std::string& search) { FTSQueryImpl q; - ASSERT_OK(q.parse(search, "english", true, false, TEXT_INDEX_VERSION_3)); + q.setQuery(search); + q.setLanguage("english"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text"))))); @@ -164,7 +196,11 @@ TEST(FTSMatcher, HasPositiveTermCaseSensitive) { // case-sensitive text query 'search'. static bool docHasNegativeTermWithCase(const std::string& doc, const std::string& search) { FTSQueryImpl q; - ASSERT_OK(q.parse(search, "english", true, false, TEXT_INDEX_VERSION_3)); + q.setQuery(search); + q.setLanguage("english"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text"))))); @@ -191,7 +227,11 @@ TEST(FTSMatcher, HasNegativeTermCaseSensitive) { // from case-sensitive text query 'search'. static bool docPositivePhrasesMatchWithCase(const std::string& doc, const std::string& search) { FTSQueryImpl q; - ASSERT_OK(q.parse(search, "english", true, false, TEXT_INDEX_VERSION_3)); + q.setQuery(search); + q.setLanguage("english"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text"))))); @@ -214,7 +254,11 @@ TEST(FTSMatcher, PositivePhrasesMatchWithCase) { // from case-sensitive text query 'search'. static bool docNegativePhrasesMatchWithCase(const std::string& doc, const std::string& search) { FTSQueryImpl q; - ASSERT_OK(q.parse(search, "english", true, false, TEXT_INDEX_VERSION_3)); + q.setQuery(search); + q.setLanguage("english"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text"))))); diff --git a/src/mongo/db/fts/fts_query.h b/src/mongo/db/fts/fts_query.h new file mode 100644 index 00000000000..c2b2fd87deb --- /dev/null +++ b/src/mongo/db/fts/fts_query.h @@ -0,0 +1,107 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <string> + +#include "mongo/db/fts/fts_util.h" + +namespace mongo { +namespace fts { + +/** + * An FTSQuery represents a parsed text search query. + */ +class FTSQuery { +public: + virtual ~FTSQuery() {} + + void setQuery(std::string query) { + _query = std::move(query); + } + + void setLanguage(std::string language) { + _language = std::move(language); + } + + void setCaseSensitive(bool caseSensitive) { + _caseSensitive = caseSensitive; + } + + void setDiacriticSensitive(bool diacriticSensitive) { + _diacriticSensitive = diacriticSensitive; + } + + const std::string& getQuery() const { + return _query; + } + + const std::string& getLanguage() const { + return _language; + } + + bool getCaseSensitive() const { + return _caseSensitive; + } + + bool getDiacriticSensitive() const { + return _diacriticSensitive; + } + + /** + * Returns true iff '*this' and 'other' have the same unparsed form. + */ + bool equivalent(const FTSQuery& other) const { + return _query == other._query && _language == other._language && + _caseSensitive == other._caseSensitive && + _diacriticSensitive == other._diacriticSensitive; + } + + /** + * Parses the text search query. Before parsing, the FTSQuery needs to be initialized with + * the set*() methods above. + * + * Returns Status::OK() if parsing was successful; returns an error Status otherwise. + */ + virtual Status parse(TextIndexVersion textIndexVersion) = 0; + + /** + * Returns a copy of this FTSQuery. + */ + virtual std::unique_ptr<FTSQuery> clone() const = 0; + +private: + std::string _query; + std::string _language; + bool _caseSensitive = false; + bool _diacriticSensitive = false; +}; + +} // namespace fts +} // namespace mongo diff --git a/src/mongo/db/fts/fts_query_impl.cpp b/src/mongo/db/fts/fts_query_impl.cpp index af03d2ed5da..674c8e84ab2 100644 --- a/src/mongo/db/fts/fts_query_impl.cpp +++ b/src/mongo/db/fts/fts_query_impl.cpp @@ -37,6 +37,7 @@ #include "mongo/db/fts/fts_tokenizer.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/stringutils.h" +#include "mongo/stdx/memory.h" namespace mongo { @@ -49,18 +50,11 @@ using std::string; using std::stringstream; using std::vector; -Status FTSQueryImpl::parse(const string& query, - StringData language, - bool caseSensitive, - bool diacriticSensitive, - TextIndexVersion textIndexVersion) { - StatusWithFTSLanguage swl = FTSLanguage::make(language, textIndexVersion); - if (!swl.getStatus().isOK()) { - return swl.getStatus(); +Status FTSQueryImpl::parse(TextIndexVersion textIndexVersion) { + StatusWithFTSLanguage ftsLanguage = FTSLanguage::make(getLanguage(), textIndexVersion); + if (!ftsLanguage.getStatus().isOK()) { + return ftsLanguage.getStatus(); } - _language = swl.getValue(); - _caseSensitive = caseSensitive; - _diacriticSensitive = diacriticSensitive; // Build a space delimited list of words to have the FtsTokenizer tokenize string positiveTermSentence; @@ -71,7 +65,7 @@ Status FTSQueryImpl::parse(const string& query, unsigned quoteOffset = 0; - FTSQueryParser i(query); + FTSQueryParser i(getQuery()); while (i.more()) { QueryToken t = i.next(); @@ -105,7 +99,7 @@ Status FTSQueryImpl::parse(const string& query, // end of a phrase unsigned phraseStart = quoteOffset + 1; unsigned phraseLength = t.offset - phraseStart; - StringData phrase = StringData(query).substr(phraseStart, phraseLength); + StringData phrase = StringData(getQuery()).substr(phraseStart, phraseLength); if (inNegation) { _negatedPhrases.push_back(phrase.toString()); } else { @@ -124,7 +118,7 @@ Status FTSQueryImpl::parse(const string& query, } } - std::unique_ptr<FTSTokenizer> tokenizer(_language->createTokenizer()); + std::unique_ptr<FTSTokenizer> tokenizer(ftsLanguage.getValue()->createTokenizer()); _addTerms(tokenizer.get(), positiveTermSentence, false); _addTerms(tokenizer.get(), negativeTermSentence, true); @@ -132,6 +126,20 @@ Status FTSQueryImpl::parse(const string& query, return Status::OK(); } +std::unique_ptr<FTSQuery> FTSQueryImpl::clone() const { + auto clonedQuery = stdx::make_unique<FTSQueryImpl>(); + clonedQuery->setQuery(getQuery()); + clonedQuery->setLanguage(getLanguage()); + clonedQuery->setCaseSensitive(getCaseSensitive()); + clonedQuery->setDiacriticSensitive(getDiacriticSensitive()); + clonedQuery->_positiveTerms = _positiveTerms; + clonedQuery->_negatedTerms = _negatedTerms; + clonedQuery->_positivePhrases = _positivePhrases; + clonedQuery->_negatedPhrases = _negatedPhrases; + clonedQuery->_termsForBounds = _termsForBounds; + return std::move(clonedQuery); +} + void FTSQueryImpl::_addTerms(FTSTokenizer* tokenizer, const string& sentence, bool negated) { tokenizer->reset(sentence.c_str(), FTSTokenizer::kFilterStopWords); @@ -150,21 +158,21 @@ void FTSQueryImpl::_addTerms(FTSTokenizer* tokenizer, const string& sentence, bo // Compute the string corresponding to 'token' that will be used for the matcher. // For case and diacritic insensitive queries, this is the same string as 'boundsTerm' // computed above. - if (!_caseSensitive && !_diacriticSensitive) { + if (!getCaseSensitive() && !getDiacriticSensitive()) { activeTerms.insert(word); } } - if (!_caseSensitive && !_diacriticSensitive) { + if (!getCaseSensitive() && !getDiacriticSensitive()) { return; } FTSTokenizer::Options newOptions = FTSTokenizer::kFilterStopWords; - if (_caseSensitive) { + if (getCaseSensitive()) { newOptions |= FTSTokenizer::kGenerateCaseSensitiveTokens; } - if (_diacriticSensitive) { + if (getDiacriticSensitive()) { newOptions |= FTSTokenizer::kGenerateDiacriticSensitiveTokens; } diff --git a/src/mongo/db/fts/fts_query_impl.h b/src/mongo/db/fts/fts_query_impl.h index dbd266983d3..888cec08e13 100644 --- a/src/mongo/db/fts/fts_query_impl.h +++ b/src/mongo/db/fts/fts_query_impl.h @@ -30,31 +30,23 @@ #pragma once +#include <set> #include <string> #include <vector> -#include "mongo/base/status.h" -#include "mongo/db/fts/stemmer.h" -#include "mongo/db/fts/stop_words.h" -#include "mongo/util/stringutils.h" +#include "mongo/db/fts/fts_query.h" namespace mongo { namespace fts { -class FTSQueryImpl { +class FTSTokenizer; + +class FTSQueryImpl final : public FTSQuery { public: - // Initializes an FTSQueryImpl. Note that the parsing of "language" depends on the text - // index version, since a query which doesn't specify a language and is against a - // version 1 text index with a version 1 default language string needs to be parsed as - // version 1 (see fts_language.cpp for a list of language strings specific to version - // 1). Note that the diacritic sensitive option has no effect on FTS queries below index version - // 3. - Status parse(const std::string& query, - StringData language, - bool caseSensitive, - bool diacriticSensitive, - TextIndexVersion textIndexVersion); + Status parse(TextIndexVersion textIndexVersion) final; + + std::unique_ptr<FTSQuery> clone() const final; const std::set<std::string>& getPositiveTerms() const { return _positiveTerms; @@ -73,16 +65,6 @@ public: return _termsForBounds; } - const FTSLanguage& getLanguage() const { - return *_language; - } - bool getCaseSensitive() const { - return _caseSensitive; - } - bool getDiacriticSensitive() const { - return _diacriticSensitive; - } - std::string toString() const; std::string debugString() const; @@ -92,23 +74,10 @@ public: private: void _addTerms(FTSTokenizer* tokenizer, const std::string& tokens, bool negated); - const FTSLanguage* _language; - bool _caseSensitive; - bool _diacriticSensitive; - - // Positive terms. std::set<std::string> _positiveTerms; - - // Negated terms. std::set<std::string> _negatedTerms; - - // Positive phrases. std::vector<std::string> _positivePhrases; - - // Negated phrases. std::vector<std::string> _negatedPhrases; - - // Terms for bounds. std::set<std::string> _termsForBounds; }; } diff --git a/src/mongo/db/fts/fts_query_impl_test.cpp b/src/mongo/db/fts/fts_query_impl_test.cpp index 9f336e3c75b..538ee3755bc 100644 --- a/src/mongo/db/fts/fts_query_impl_test.cpp +++ b/src/mongo/db/fts/fts_query_impl_test.cpp @@ -37,7 +37,11 @@ namespace fts { TEST(FTSQueryImpl, Basic1) { FTSQueryImpl q; - ASSERT(q.parse("this is fun", "english", false, false, TEXT_INDEX_VERSION_3).isOK()); + q.setQuery("this is fun"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(false, q.getCaseSensitive()); ASSERT_EQUALS(1U, q.getPositiveTerms().size()); @@ -50,7 +54,11 @@ TEST(FTSQueryImpl, Basic1) { TEST(FTSQueryImpl, ParsePunctuation) { FTSQueryImpl q; - ASSERT(q.parse("hello.world", "english", false, false, TEXT_INDEX_VERSION_3).isOK()); + q.setQuery("hello.world"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(false, q.getCaseSensitive()); ASSERT_EQUALS(2U, q.getPositiveTerms().size()); @@ -64,7 +72,11 @@ TEST(FTSQueryImpl, ParsePunctuation) { TEST(FTSQueryImpl, Neg1) { FTSQueryImpl q; - ASSERT(q.parse("this is -really fun", "english", false, false, TEXT_INDEX_VERSION_3).isOK()); + q.setQuery("this is -really fun"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(1U, q.getPositiveTerms().size()); ASSERT_EQUALS("fun", *q.getPositiveTerms().begin()); @@ -75,8 +87,11 @@ TEST(FTSQueryImpl, Neg1) { TEST(FTSQueryImpl, Phrase1) { FTSQueryImpl q; - ASSERT(q.parse("doing a \"phrase test\" for fun", "english", false, false, TEXT_INDEX_VERSION_3) - .isOK()); + q.setQuery("doing a \"phrase test\" for fun"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(3U, q.getPositiveTerms().size()); ASSERT_EQUALS(0U, q.getNegatedTerms().size()); @@ -90,29 +105,42 @@ TEST(FTSQueryImpl, Phrase1) { TEST(FTSQueryImpl, Phrase2) { FTSQueryImpl q; - ASSERT(q.parse("doing a \"phrase-test\" for fun", "english", false, false, TEXT_INDEX_VERSION_3) - .isOK()); + q.setQuery("doing a \"phrase-test\" for fun"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(1U, q.getPositivePhr().size()); ASSERT_EQUALS("phrase-test", q.getPositivePhr()[0]); } TEST(FTSQueryImpl, NegPhrase1) { FTSQueryImpl q; - ASSERT( - q.parse("doing a -\"phrase test\" for fun", "english", false, false, TEXT_INDEX_VERSION_3) - .isOK()); + q.setQuery("doing a -\"phrase test\" for fun"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS("fun||||||phrase test", q.debugString()); } TEST(FTSQueryImpl, CaseSensitiveOption) { FTSQueryImpl q; - ASSERT(q.parse("this is fun", "english", true, false, TEXT_INDEX_VERSION_3).isOK()); + q.setQuery("this is fun"); + q.setLanguage("english"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(true, q.getCaseSensitive()); } TEST(FTSQueryImpl, CaseSensitivePositiveTerms) { FTSQueryImpl q; - ASSERT(q.parse("This is Positively fun", "english", true, false, TEXT_INDEX_VERSION_3).isOK()); + q.setQuery("This is Positively fun"); + q.setLanguage("english"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(2U, q.getTermsForBounds().size()); ASSERT_EQUALS(1, @@ -128,8 +156,11 @@ TEST(FTSQueryImpl, CaseSensitivePositiveTerms) { TEST(FTSQueryImpl, CaseSensitiveNegativeTerms) { FTSQueryImpl q; - ASSERT(q.parse("-This -is -Negatively -miserable", "english", true, false, TEXT_INDEX_VERSION_3) - .isOK()); + q.setQuery("-This -is -Negatively -miserable"); + q.setLanguage("english"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(0U, q.getPositiveTerms().size()); ASSERT_EQUALS(0U, q.getTermsForBounds().size()); @@ -142,8 +173,11 @@ TEST(FTSQueryImpl, CaseSensitiveNegativeTerms) { TEST(FTSQueryImpl, CaseSensitivePositivePhrases) { FTSQueryImpl q; - ASSERT(q.parse("doing a \"Phrase Test\" for fun", "english", true, false, TEXT_INDEX_VERSION_3) - .isOK()); + q.setQuery("doing a \"Phrase Test\" for fun"); + q.setLanguage("english"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(1U, q.getPositivePhr().size()); ASSERT_EQUALS(0U, q.getNegatedPhr().size()); @@ -152,8 +186,11 @@ TEST(FTSQueryImpl, CaseSensitivePositivePhrases) { TEST(FTSQueryImpl, CaseSensitiveNegativePhrases) { FTSQueryImpl q; - ASSERT(q.parse("doing a -\"Phrase Test\" for fun", "english", true, false, TEXT_INDEX_VERSION_3) - .isOK()); + q.setQuery("doing a -\"Phrase Test\" for fun"); + q.setLanguage("english"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(0U, q.getPositivePhr().size()); ASSERT_EQUALS(1U, q.getNegatedPhr().size()); @@ -162,17 +199,34 @@ TEST(FTSQueryImpl, CaseSensitiveNegativePhrases) { TEST(FTSQueryImpl, Mix1) { FTSQueryImpl q; - ASSERT( - q.parse("\"industry\" -Melbourne -Physics", "english", false, false, TEXT_INDEX_VERSION_3) - .isOK()); + q.setQuery("\"industry\" -Melbourne -Physics"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS("industri||melbourn|physic||industry||", q.debugString()); } TEST(FTSQueryImpl, NegPhrase2) { FTSQueryImpl q1, q2, q3; - ASSERT(q1.parse("foo \"bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK()); - ASSERT(q2.parse("foo \"-bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK()); - ASSERT(q3.parse("foo \" -bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK()); + + q1.setQuery("foo \"bar\""); + q1.setLanguage("english"); + q1.setCaseSensitive(false); + q1.setDiacriticSensitive(false); + ASSERT(q1.parse(TEXT_INDEX_VERSION_3).isOK()); + + q2.setQuery("foo \"-bar\""); + q2.setLanguage("english"); + q2.setCaseSensitive(false); + q2.setDiacriticSensitive(false); + ASSERT(q2.parse(TEXT_INDEX_VERSION_3).isOK()); + + q3.setQuery("foo \" -bar\""); + q3.setLanguage("english"); + q3.setCaseSensitive(false); + q3.setDiacriticSensitive(false); + ASSERT(q3.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(2U, q1.getPositiveTerms().size()); ASSERT_EQUALS(2U, q2.getPositiveTerms().size()); @@ -193,9 +247,24 @@ TEST(FTSQueryImpl, NegPhrase2) { TEST(FTSQueryImpl, NegPhrase3) { FTSQueryImpl q1, q2, q3; - ASSERT(q1.parse("foo -\"bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK()); - ASSERT(q2.parse("foo -\"-bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK()); - ASSERT(q3.parse("foo -\" -bar\"", "english", false, false, TEXT_INDEX_VERSION_3).isOK()); + + q1.setQuery("foo -\"bar\""); + q1.setLanguage("english"); + q1.setCaseSensitive(false); + q1.setDiacriticSensitive(false); + ASSERT(q1.parse(TEXT_INDEX_VERSION_3).isOK()); + + q2.setQuery("foo -\"-bar\""); + q2.setLanguage("english"); + q2.setCaseSensitive(false); + q2.setDiacriticSensitive(false); + ASSERT(q2.parse(TEXT_INDEX_VERSION_3).isOK()); + + q3.setQuery("foo -\" -bar\""); + q3.setLanguage("english"); + q3.setCaseSensitive(false); + q3.setDiacriticSensitive(false); + ASSERT(q3.parse(TEXT_INDEX_VERSION_3).isOK()); ASSERT_EQUALS(1U, q1.getPositiveTerms().size()); ASSERT_EQUALS(1U, q2.getPositiveTerms().size()); @@ -218,7 +287,11 @@ TEST(FTSQueryImpl, NegPhrase3) { // stemmer and stopword list. TEST(FTSQueryImpl, TextIndexVersion1LanguageEnglish) { FTSQueryImpl q; - ASSERT(q.parse("the running", "english", false, false, TEXT_INDEX_VERSION_1).isOK()); + q.setQuery("the running"); + q.setLanguage("english"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_1).isOK()); ASSERT_EQUALS(1U, q.getPositiveTerms().size()); ASSERT_EQUALS("run", *q.getPositiveTerms().begin()); ASSERT_EQUALS(0U, q.getNegatedTerms().size()); @@ -230,7 +303,11 @@ TEST(FTSQueryImpl, TextIndexVersion1LanguageEnglish) { // no stopword list. TEST(FTSQueryImpl, TextIndexVersion1LanguageEng) { FTSQueryImpl q; - ASSERT(q.parse("the running", "eng", false, false, TEXT_INDEX_VERSION_1).isOK()); + q.setQuery("the running"); + q.setLanguage("eng"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_1).isOK()); ASSERT_EQUALS(2U, q.getPositiveTerms().size()); ASSERT_EQUALS(1, std::count(q.getPositiveTerms().begin(), q.getPositiveTerms().end(), "the")); ASSERT_EQUALS(1, std::count(q.getPositiveTerms().begin(), q.getPositiveTerms().end(), "run")); @@ -243,7 +320,11 @@ TEST(FTSQueryImpl, TextIndexVersion1LanguageEng) { // and no stopword list will be used. TEST(FTSQueryImpl, TextIndexVersion1LanguageInvalid) { FTSQueryImpl q; - ASSERT(q.parse("the running", "invalid", false, false, TEXT_INDEX_VERSION_1).isOK()); + q.setQuery("the running"); + q.setLanguage("invalid"); + q.setCaseSensitive(false); + q.setDiacriticSensitive(false); + ASSERT(q.parse(TEXT_INDEX_VERSION_1).isOK()); ASSERT_EQUALS(2U, q.getPositiveTerms().size()); ASSERT_EQUALS(1, std::count(q.getPositiveTerms().begin(), q.getPositiveTerms().end(), "the")); ASSERT_EQUALS(1, @@ -252,5 +333,45 @@ TEST(FTSQueryImpl, TextIndexVersion1LanguageInvalid) { ASSERT_EQUALS(0U, q.getPositivePhr().size()); ASSERT_EQUALS(0U, q.getNegatedPhr().size()); } + +TEST(FTSQueryImpl, CloneUnparsedQuery) { + FTSQueryImpl q; + q.setQuery("foo"); + q.setLanguage("bar"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(true); + + auto clone = q.clone(); + ASSERT_EQUALS(clone->getQuery(), q.getQuery()); + ASSERT_EQUALS(clone->getLanguage(), q.getLanguage()); + ASSERT_EQUALS(clone->getCaseSensitive(), q.getCaseSensitive()); + ASSERT_EQUALS(clone->getDiacriticSensitive(), q.getDiacriticSensitive()); +} + +TEST(FTSQueryImpl, CloneParsedQuery) { + FTSQueryImpl q; + q.setQuery("Foo -bar \"baz\" -\"quux\""); + q.setLanguage("english"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(true); + ASSERT_OK(q.parse(TEXT_INDEX_VERSION_3)); + ASSERT(std::set<std::string>({"Foo", "baz"}) == q.getPositiveTerms()); + ASSERT(std::set<std::string>({"bar"}) == q.getNegatedTerms()); + ASSERT(std::vector<std::string>({"baz"}) == q.getPositivePhr()); + ASSERT(std::vector<std::string>({"quux"}) == q.getNegatedPhr()); + ASSERT(std::set<std::string>({"foo", "baz"}) == q.getTermsForBounds()); + + auto clone = q.clone(); + ASSERT_EQUALS(clone->getQuery(), q.getQuery()); + ASSERT_EQUALS(clone->getLanguage(), q.getLanguage()); + ASSERT_EQUALS(clone->getCaseSensitive(), q.getCaseSensitive()); + ASSERT_EQUALS(clone->getDiacriticSensitive(), q.getDiacriticSensitive()); + FTSQueryImpl* castedClone = static_cast<FTSQueryImpl*>(clone.get()); + ASSERT(castedClone->getPositiveTerms() == q.getPositiveTerms()); + ASSERT(castedClone->getNegatedTerms() == q.getNegatedTerms()); + ASSERT(castedClone->getPositivePhr() == q.getPositivePhr()); + ASSERT(castedClone->getNegatedPhr() == q.getNegatedPhr()); + ASSERT(castedClone->getTermsForBounds() == q.getTermsForBounds()); +} } } diff --git a/src/mongo/db/fts/fts_query_noop.cpp b/src/mongo/db/fts/fts_query_noop.cpp new file mode 100644 index 00000000000..d6da988a3e0 --- /dev/null +++ b/src/mongo/db/fts/fts_query_noop.cpp @@ -0,0 +1,48 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/fts/fts_query_noop.h" + +#include "mongo/stdx/memory.h" + +namespace mongo { +namespace fts { + +std::unique_ptr<FTSQuery> FTSQueryNoop::clone() const { + auto clonedQuery = stdx::make_unique<FTSQueryNoop>(); + clonedQuery->setQuery(getQuery()); + clonedQuery->setLanguage(getLanguage()); + clonedQuery->setCaseSensitive(getCaseSensitive()); + clonedQuery->setDiacriticSensitive(getDiacriticSensitive()); + return std::move(clonedQuery); +} + +} // namespace fts +} // namespace mongo diff --git a/src/mongo/db/fts/fts_query_noop.h b/src/mongo/db/fts/fts_query_noop.h new file mode 100644 index 00000000000..b24a8f572a1 --- /dev/null +++ b/src/mongo/db/fts/fts_query_noop.h @@ -0,0 +1,49 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/fts/fts_query.h" + +namespace mongo { +namespace fts { + +/** + * A no-op implementation of FTSQuery. + */ +class FTSQueryNoop final : public FTSQuery { +public: + Status parse(TextIndexVersion textIndexVersion) final { + return Status::OK(); + } + + std::unique_ptr<FTSQuery> clone() const final; +}; + +} // namespace fts +} // namespace mongo diff --git a/src/mongo/db/fts/fts_query_noop_test.cpp b/src/mongo/db/fts/fts_query_noop_test.cpp new file mode 100644 index 00000000000..6876893c6e9 --- /dev/null +++ b/src/mongo/db/fts/fts_query_noop_test.cpp @@ -0,0 +1,57 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/fts/fts_query_noop.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace fts { + +TEST(FTSQueryNoop, Parse) { + FTSQueryNoop q; + ASSERT_OK(q.parse(TEXT_INDEX_VERSION_INVALID)); +} + +TEST(FTSQueryNoop, Clone) { + FTSQueryNoop q; + q.setQuery("foo"); + q.setLanguage("bar"); + q.setCaseSensitive(true); + q.setDiacriticSensitive(true); + + auto clone = q.clone(); + ASSERT_EQUALS(clone->getQuery(), q.getQuery()); + ASSERT_EQUALS(clone->getLanguage(), q.getLanguage()); + ASSERT_EQUALS(clone->getCaseSensitive(), q.getCaseSensitive()); + ASSERT_EQUALS(clone->getDiacriticSensitive(), q.getDiacriticSensitive()); +} + +} // namespace fts +} // namespace mongo diff --git a/src/mongo/db/fts/fts_util.h b/src/mongo/db/fts/fts_util.h index b9fed70a8e8..7286e6fc7a6 100644 --- a/src/mongo/db/fts/fts_util.h +++ b/src/mongo/db/fts/fts_util.h @@ -42,9 +42,10 @@ extern const std::string WILDCARD; extern const std::string INDEX_NAME; enum TextIndexVersion { - TEXT_INDEX_VERSION_1 = 1, // Legacy index format. Deprecated. - TEXT_INDEX_VERSION_2 = 2, // Index format with ASCII support and murmur hashing. - TEXT_INDEX_VERSION_3 = 3, // Current index format with basic Unicode support. + TEXT_INDEX_VERSION_INVALID = 0, // Invalid value. + TEXT_INDEX_VERSION_1 = 1, // Legacy index format. Deprecated. + TEXT_INDEX_VERSION_2 = 2, // Index format with ASCII support and murmur hashing. + TEXT_INDEX_VERSION_3 = 3, // Current index format with basic Unicode support. }; } } |