// fts_matcher_test.cpp /** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #include "mongo/platform/basic.h" #include "mongo/db/fts/fts_matcher.h" #include "mongo/unittest/unittest.h" namespace mongo { namespace fts { using unittest::assertGet; TEST(FTSMatcher, NegWild1) { FTSQueryImpl q; q.setQuery("foo -bar"); q.setLanguage("english"); q.setCaseSensitive(false); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("$**" << "text")))))); ASSERT(m.hasNegativeTerm(BSON("x" << BSON("y" << "bar")))); ASSERT(m.hasNegativeTerm(BSON("x" << BSON("y" << "bar")))); } // Regression test for SERVER-11994. TEST(FTSMatcher, NegWild2) { FTSQueryImpl q; q.setQuery("pizza -restaurant"); q.setLanguage("english"); q.setCaseSensitive(false); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("$**" << "text")))))); ASSERT(m.hasNegativeTerm(BSON("x" << BSON("y" << "pizza restaurant")))); ASSERT(m.hasNegativeTerm(BSON("x" << BSON("y" << "PIZZA RESTAURANT")))); } TEST(FTSMatcher, Phrase1) { FTSQueryImpl q; q.setQuery("foo \"table top\""); q.setLanguage("english"); q.setCaseSensitive(false); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("$**" << "text")))))); ASSERT(m.positivePhrasesMatch(BSON("x" << "table top"))); ASSERT(m.positivePhrasesMatch(BSON("x" << " asd table top asd"))); ASSERT(!m.positivePhrasesMatch(BSON("x" << "tablz top"))); ASSERT(!m.positivePhrasesMatch(BSON("x" << " asd tablz top asd"))); ASSERT(m.positivePhrasesMatch(BSON("x" << "table top"))); ASSERT(!m.positivePhrasesMatch(BSON("x" << "table a top"))); } TEST(FTSMatcher, Phrase2) { FTSQueryImpl q; q.setQuery("foo \"table top\""); q.setLanguage("english"); q.setCaseSensitive(false); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text")))))); ASSERT(m.positivePhrasesMatch(BSON("x" << BSON_ARRAY("table top")))); } // Test that the matcher parses the document with the document language, not the search // language. TEST(FTSMatcher, ParsesUsingDocLanguage) { FTSQueryImpl q; q.setQuery("-glad"); q.setLanguage("none"); q.setCaseSensitive(false); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text")))))); // Even though the search language is "none", the document {x: "gladly"} should be // parsed using the English stemmer, and as such should match the negated term "glad". ASSERT(m.hasNegativeTerm(BSON("x" << "gladly"))); } // Test the matcher does not filter out stop words from positive terms TEST(FTSMatcher, MatcherDoesNotFilterStopWordsNeg) { FTSQueryImpl q; q.setQuery("-the"); q.setLanguage("none"); q.setCaseSensitive(false); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text")))))); ASSERT(m.hasNegativeTerm(BSON("x" << "the"))); } // Test the matcher does not filter out stop words from negative terms TEST(FTSMatcher, MatcherDoesNotFilterStopWordsPos) { FTSQueryImpl q; q.setQuery("the"); q.setLanguage("none"); q.setCaseSensitive(false); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text")))))); ASSERT(m.hasPositiveTerm(BSON("x" << "the"))); } // Returns whether a document indexed with text data 'doc' contains any positive terms from // case-sensitive text query 'search'. static bool docHasPositiveTermWithCase(const std::string& doc, const std::string& search) { FTSQueryImpl q; q.setQuery(search); q.setLanguage("english"); q.setCaseSensitive(true); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text")))))); return m.hasPositiveTerm(BSON("x" << doc)); } TEST(FTSMatcher, HasPositiveTermCaseSensitive) { ASSERT_TRUE(docHasPositiveTermWithCase("hello world", "hello")); ASSERT_TRUE(docHasPositiveTermWithCase("Hello World", "Hello")); ASSERT_TRUE(docHasPositiveTermWithCase("Hello World", "World Hello")); ASSERT_TRUE(docHasPositiveTermWithCase("Hello World", "World GoodBye")); ASSERT_TRUE(docHasPositiveTermWithCase("John Runs", "Runs")); ASSERT_TRUE(docHasPositiveTermWithCase("John Runs", "Running")); ASSERT_TRUE(docHasPositiveTermWithCase("John Runs", "Run")); ASSERT_FALSE(docHasPositiveTermWithCase("John Runs", "run")); ASSERT_FALSE(docHasPositiveTermWithCase("Hello World", "HELLO")); ASSERT_FALSE(docHasPositiveTermWithCase("hello world", "Hello")); ASSERT_FALSE(docHasPositiveTermWithCase("Hello World", "hello")); } // Returns whether a document indexed with text data 'doc' contains any negative terms from // case-sensitive text query 'search'. static bool docHasNegativeTermWithCase(const std::string& doc, const std::string& search) { FTSQueryImpl q; q.setQuery(search); q.setLanguage("english"); q.setCaseSensitive(true); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text")))))); return m.hasNegativeTerm(BSON("x" << doc)); } TEST(FTSMatcher, HasNegativeTermCaseSensitive) { ASSERT_TRUE(docHasNegativeTermWithCase("hello world", "hello -world")); ASSERT_TRUE(docHasNegativeTermWithCase("Hello World", "Hello -World")); ASSERT_TRUE(docHasNegativeTermWithCase("Hello World", "-World -Hello")); ASSERT_TRUE(docHasNegativeTermWithCase("Hello World", "-Goodbye -World")); ASSERT_TRUE(docHasNegativeTermWithCase("John Runs", "-Runs")); ASSERT_TRUE(docHasNegativeTermWithCase("John Runs", "-Running")); ASSERT_TRUE(docHasNegativeTermWithCase("John Runs", "-Run")); ASSERT_FALSE(docHasNegativeTermWithCase("John Runs", "-run")); ASSERT_FALSE(docHasNegativeTermWithCase("Hello World", "Hello -WORLD")); ASSERT_FALSE(docHasNegativeTermWithCase("hello world", "hello -World")); ASSERT_FALSE(docHasNegativeTermWithCase("Hello World", "Hello -world")); } // Returns whether a document indexed with text data 'doc' contains all positive phrases // from case-sensitive text query 'search'. static bool docPositivePhrasesMatchWithCase(const std::string& doc, const std::string& search) { FTSQueryImpl q; q.setQuery(search); q.setLanguage("english"); q.setCaseSensitive(true); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text")))))); return m.positivePhrasesMatch(BSON("x" << doc)); } TEST(FTSMatcher, PositivePhrasesMatchWithCase) { ASSERT_TRUE(docPositivePhrasesMatchWithCase("John Runs", "\"John Runs\"")); ASSERT_TRUE(docPositivePhrasesMatchWithCase("John Runs", "\"John Run\"")); ASSERT_TRUE(docPositivePhrasesMatchWithCase("John Runs", "\"John\" \"Run\"")); ASSERT_TRUE(docPositivePhrasesMatchWithCase("John Runs", "\"n R\"")); ASSERT_FALSE(docPositivePhrasesMatchWithCase("John Runs", "\"john runs\"")); ASSERT_FALSE(docPositivePhrasesMatchWithCase("john runs", "\"John Runs\"")); ASSERT_FALSE(docPositivePhrasesMatchWithCase("John Runs", "\"John\" \"Running\"")); } // Returns whether a document indexed with text data 'doc' contains zero negative phrases // from case-sensitive text query 'search'. static bool docNegativePhrasesMatchWithCase(const std::string& doc, const std::string& search) { FTSQueryImpl q; q.setQuery(search); q.setLanguage("english"); q.setCaseSensitive(true); q.setDiacriticSensitive(false); ASSERT(q.parse(TEXT_INDEX_VERSION_3).isOK()); FTSMatcher m(q, FTSSpec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("x" << "text")))))); return m.negativePhrasesMatch(BSON("x" << doc)); } TEST(FTSMatcher, NegativePhrasesMatchWithCase) { ASSERT_TRUE(docNegativePhrasesMatchWithCase("John Runs", "-\"john runs\"")); ASSERT_TRUE(docNegativePhrasesMatchWithCase("john runs", "-\"John Runs\"")); ASSERT_TRUE(docNegativePhrasesMatchWithCase("john runs", "-\"John\" -\"Runs\"")); ASSERT_FALSE(docNegativePhrasesMatchWithCase("John Runs", "-\"John Runs\"")); ASSERT_FALSE(docNegativePhrasesMatchWithCase("John Runs", "-\"John Run\"")); ASSERT_FALSE(docNegativePhrasesMatchWithCase("John Runs", "-\"John\" -\"Run\"")); ASSERT_FALSE(docNegativePhrasesMatchWithCase("John Runs", "-\"n R\"")); ASSERT_FALSE(docNegativePhrasesMatchWithCase("John Runs", "-\"John\" -\"Running\"")); } } }