// fts_index_format_test.cpp
/**
* Copyright (C) 2012 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
#include "mongo/platform/basic.h"
#include
#include "mongo/db/fts/fts_index_format.h"
#include "mongo/db/fts/fts_spec.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
namespace fts {
using std::string;
TEST(FTSIndexFormat, Simple1) {
FTSSpec spec(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text"))));
BSONObjSet keys;
FTSIndexFormat::getKeys(spec,
BSON("data"
<< "cat sat"),
&keys);
ASSERT_EQUALS(2U, keys.size());
for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) {
BSONObj key = *i;
ASSERT_EQUALS(2, key.nFields());
ASSERT_EQUALS(String, key.firstElement().type());
}
}
TEST(FTSIndexFormat, ExtraBack1) {
FTSSpec spec(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text"
<< "x" << 1))));
BSONObjSet keys;
FTSIndexFormat::getKeys(spec,
BSON("data"
<< "cat"
<< "x" << 5),
&keys);
ASSERT_EQUALS(1U, keys.size());
BSONObj key = *(keys.begin());
ASSERT_EQUALS(3, key.nFields());
BSONObjIterator i(key);
ASSERT_EQUALS(StringData("cat"), i.next().valuestr());
ASSERT(i.next().numberDouble() > 0);
ASSERT_EQUALS(5, i.next().numberInt());
}
/*
TEST( FTSIndexFormat, ExtraBackArray1 ) {
FTSSpec spec( FTSSpec::fixSpec( BSON( "key" << BSON( "data" << "text" <<
"x.y" << 1 ) ) ) );
BSONObjSet keys;
FTSIndexFormat::getKeys( spec,
BSON( "data" << "cat" <<
"x" << BSON_ARRAY( BSON( "y" << 1 ) <<
BSON( "y" << 2 ) ) ),
&keys );
ASSERT_EQUALS( 1U, keys.size() );
BSONObj key = *(keys.begin());
log() << "e: " << key << endl;
ASSERT_EQUALS( 3, key.nFields() );
BSONObjIterator i( key );
ASSERT_EQUALS( StringData("cat"), i.next().valuestr() );
ASSERT( i.next().numberDouble() > 0 );
ASSERT_EQUALS( 5, i.next().numberInt() );
}
*/
TEST(FTSIndexFormat, ExtraFront1) {
FTSSpec spec(FTSSpec::fixSpec(BSON("key" << BSON("x" << 1 << "data"
<< "text"))));
BSONObjSet keys;
FTSIndexFormat::getKeys(spec,
BSON("data"
<< "cat"
<< "x" << 5),
&keys);
ASSERT_EQUALS(1U, keys.size());
BSONObj key = *(keys.begin());
ASSERT_EQUALS(3, key.nFields());
BSONObjIterator i(key);
ASSERT_EQUALS(5, i.next().numberInt());
ASSERT_EQUALS(StringData("cat"), i.next().valuestr());
ASSERT(i.next().numberDouble() > 0);
}
TEST(FTSIndexFormat, StopWords1) {
FTSSpec spec(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text"))));
BSONObjSet keys1;
FTSIndexFormat::getKeys(spec,
BSON("data"
<< "computer"),
&keys1);
ASSERT_EQUALS(1U, keys1.size());
BSONObjSet keys2;
FTSIndexFormat::getKeys(spec,
BSON("data"
<< "any computer"),
&keys2);
ASSERT_EQUALS(1U, keys2.size());
}
/**
* Helper function to compare keys returned in getKeys() result
* with expected values.
*/
void assertEqualsIndexKeys(std::set& expectedKeys, const BSONObjSet& keys) {
ASSERT_EQUALS(expectedKeys.size(), keys.size());
for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) {
BSONObj key = *i;
ASSERT_EQUALS(2, key.nFields());
ASSERT_EQUALS(String, key.firstElement().type());
string s = key.firstElement().String();
std::set::const_iterator j = expectedKeys.find(s);
if (j == expectedKeys.end()) {
mongoutils::str::stream ss;
ss << "unexpected key " << s << " in FTSIndexFormat::getKeys result. "
<< "expected keys:";
for (std::set::const_iterator k = expectedKeys.begin(); k != expectedKeys.end();
++k) {
ss << "\n " << *k;
}
FAIL(ss);
}
}
}
/**
* Tests keys for long terms using text index version 1.
* Terms that are too long are not truncated in version 1.
*/
TEST(FTSIndexFormat, LongWordsTextIndexVersion1) {
FTSSpec spec(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text") << "textIndexVersion" << 1)));
BSONObjSet keys;
string longPrefix(1024U, 'a');
// "aaa...aaacat"
string longWordCat = longPrefix + "cat";
// "aaa...aaasat"
string longWordSat = longPrefix + "sat";
string text = mongoutils::str::stream() << longWordCat << " " << longWordSat;
FTSIndexFormat::getKeys(spec, BSON("data" << text), &keys);
// Hard-coded expected computed keys for future-proofing.
std::set expectedKeys;
// cat
expectedKeys.insert(longWordCat);
// sat
expectedKeys.insert(longWordSat);
assertEqualsIndexKeys(expectedKeys, keys);
}
/**
* Tests keys for long terms using text index version 2.
* In version 2, long terms (longer than 32 characters)
* are hashed with murmur3 and appended to the first 32
* characters of the term to form the index key.
*/
TEST(FTSIndexFormat, LongWordTextIndexVersion2) {
FTSSpec spec(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text") << "textIndexVersion" << 2)));
BSONObjSet keys;
string longPrefix(1024U, 'a');
// "aaa...aaacat"
string longWordCat = longPrefix + "cat";
// "aaa...aaasat"
string longWordSat = longPrefix + "sat";
string text = mongoutils::str::stream() << longWordCat << " " << longWordSat;
FTSIndexFormat::getKeys(spec, BSON("data" << text), &keys);
// Hard-coded expected computed keys for future-proofing.
std::set expectedKeys;
// cat
expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab8e78455d827ebb87cbe87f392bf45f6");
// sat
expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaf2d6f58bb3b81b97e611ae7ccac6dea7");
assertEqualsIndexKeys(expectedKeys, keys);
}
}
}