summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorHenrik Edin <henrik.edin@mongodb.com>2020-05-04 13:06:15 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-05 19:24:43 +0000
commitb7d70ba03a92d70b5bda98960c8764007081d575 (patch)
tree19daecbadaae99536252729a1708414f00b3eb99 /src
parent9e80c5c46ec01e61e020681b393ce8e529049836 (diff)
downloadmongo-b7d70ba03a92d70b5bda98960c8764007081d575.tar.gz
SERVER-47928 Fix quadradic KeyString insert behavior in fts indexes.
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/fts/fts_index_format.cpp15
-rw-r--r--src/mongo/db/fts/fts_index_format.h6
-rw-r--r--src/mongo/db/fts/fts_index_format_test.cpp111
-rw-r--r--src/mongo/db/index/expression_keys_private.cpp6
-rw-r--r--src/mongo/db/index/expression_keys_private.h3
-rw-r--r--src/mongo/db/index/fts_access_method.cpp3
6 files changed, 95 insertions, 49 deletions
diff --git a/src/mongo/db/fts/fts_index_format.cpp b/src/mongo/db/fts/fts_index_format.cpp
index f6f5e379c02..1993f518115 100644
--- a/src/mongo/db/fts/fts_index_format.cpp
+++ b/src/mongo/db/fts/fts_index_format.cpp
@@ -110,7 +110,8 @@ MONGO_INITIALIZER(FTSIndexFormat)(InitializerContext* context) {
return Status::OK();
}
-void FTSIndexFormat::getKeys(const FTSSpec& spec,
+void FTSIndexFormat::getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder,
+ const FTSSpec& spec,
const BSONObj& obj,
KeyStringSet* keys,
KeyString::Version keyStringVersion,
@@ -137,11 +138,12 @@ void FTSIndexFormat::getKeys(const FTSSpec& spec,
TermFrequencyMap term_freqs;
spec.scoreDocument(obj, &term_freqs);
+ auto sequence = keys->extract_sequence();
for (TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i) {
const string& term = i->first;
double weight = i->second;
- KeyString::Builder keyString(keyStringVersion, ordering);
+ KeyString::PooledBuilder keyString(pooledBufferBuilder, keyStringVersion, ordering);
for (const auto& elem : extrasBefore) {
keyString.appendBSONElement(elem);
}
@@ -154,11 +156,9 @@ void FTSIndexFormat::getKeys(const FTSSpec& spec,
keyString.appendRecordId(*id);
}
- /*
- * Insert a copy to only allocate as much buffer space as necessary.
- */
- keys->insert(keyString.getValueCopy());
+ sequence.push_back(keyString.release());
}
+ keys->adopt_sequence(std::move(sequence));
}
BSONObj FTSIndexFormat::getIndexKey(double weight,
@@ -179,7 +179,8 @@ BSONObj FTSIndexFormat::getIndexKey(double weight,
return b.appendElements(key).obj();
}
-void FTSIndexFormat::_appendIndexKey(KeyString::Builder& keyString,
+template <typename KeyStringBuilder>
+void FTSIndexFormat::_appendIndexKey(KeyStringBuilder& keyString,
double weight,
const string& term,
TextIndexVersion textIndexVersion) {
diff --git a/src/mongo/db/fts/fts_index_format.h b/src/mongo/db/fts/fts_index_format.h
index a3a9edfa331..f52918aeb4a 100644
--- a/src/mongo/db/fts/fts_index_format.h
+++ b/src/mongo/db/fts/fts_index_format.h
@@ -45,7 +45,8 @@ class FTSSpec;
class FTSIndexFormat {
public:
- static void getKeys(const FTSSpec& spec,
+ static void getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder,
+ const FTSSpec& spec,
const BSONObj& document,
KeyStringSet* keys,
KeyString::Version keyStringVersion,
@@ -84,7 +85,8 @@ private:
* 'term' is the std::string term in the entry.
* 'textIndexVersion' is index version, affects key format.
*/
- static void _appendIndexKey(KeyString::Builder& keyString,
+ template <typename KeyStringBuilder>
+ static void _appendIndexKey(KeyStringBuilder& keyString,
double weight,
const std::string& term,
TextIndexVersion textIndexVersion);
diff --git a/src/mongo/db/fts/fts_index_format_test.cpp b/src/mongo/db/fts/fts_index_format_test.cpp
index 9332d1a53cb..7d5dd4a3eae 100644
--- a/src/mongo/db/fts/fts_index_format_test.cpp
+++ b/src/mongo/db/fts/fts_index_format_test.cpp
@@ -50,8 +50,10 @@ using unittest::assertGet;
TEST(FTSIndexFormat, Simple1) {
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text")))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
- FTSIndexFormat::getKeys(spec,
+ FTSIndexFormat::getKeys(allocator,
+ spec,
BSON("data"
<< "cat sat"),
&keys,
@@ -70,8 +72,10 @@ TEST(FTSIndexFormat, ExtraBack1) {
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text"
<< "x" << 1)))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
- FTSIndexFormat::getKeys(spec,
+ FTSIndexFormat::getKeys(allocator,
+ spec,
BSON("data"
<< "cat"
<< "x" << 5),
@@ -91,8 +95,10 @@ TEST(FTSIndexFormat, ExtraBack1) {
TEST(FTSIndexFormat, ExtraFront1) {
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("x" << 1 << "data"
<< "text")))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
- FTSIndexFormat::getKeys(spec,
+ FTSIndexFormat::getKeys(allocator,
+ spec,
BSON("data"
<< "cat"
<< "x" << 5),
@@ -112,9 +118,10 @@ TEST(FTSIndexFormat, ExtraFront1) {
TEST(FTSIndexFormat, StopWords1) {
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text")))));
-
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys1;
- FTSIndexFormat::getKeys(spec,
+ FTSIndexFormat::getKeys(allocator,
+ spec,
BSON("data"
<< "computer"),
&keys1,
@@ -123,7 +130,8 @@ TEST(FTSIndexFormat, StopWords1) {
ASSERT_EQUALS(1U, keys1.size());
KeyStringSet keys2;
- FTSIndexFormat::getKeys(spec,
+ FTSIndexFormat::getKeys(allocator,
+ spec,
BSON("data"
<< "any computer"),
&keys2,
@@ -165,6 +173,7 @@ TEST(FTSIndexFormat, LongWordsTextIndexVersion1) {
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text")
<< "textIndexVersion" << 1))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
string longPrefix(1024U, 'a');
// "aaa...aaacat"
@@ -172,7 +181,8 @@ TEST(FTSIndexFormat, LongWordsTextIndexVersion1) {
// "aaa...aaasat"
string longWordSat = longPrefix + "sat";
string text = str::stream() << longWordCat << " " << longWordSat;
- FTSIndexFormat::getKeys(spec,
+ FTSIndexFormat::getKeys(allocator,
+ spec,
BSON("data" << text),
&keys,
KeyString::Version::kLatestVersion,
@@ -198,6 +208,7 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion2) {
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text")
<< "textIndexVersion" << 2))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
string longPrefix(1024U, 'a');
// "aaa...aaacat"
@@ -207,7 +218,8 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion2) {
// "aaa...aaamongodbfts"
string longWordMongoDBFts = longPrefix + "mongodbfts";
string text = str::stream() << longWordCat << " " << longWordSat << " " << longWordMongoDBFts;
- FTSIndexFormat::getKeys(spec,
+ FTSIndexFormat::getKeys(allocator,
+ spec,
BSON("data" << text),
&keys,
KeyString::Version::kLatestVersion,
@@ -235,6 +247,7 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion3) {
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data"
<< "text")
<< "textIndexVersion" << 3))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
string longPrefix(1024U, 'a');
// "aaa...aaacat"
@@ -242,7 +255,8 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion3) {
// "aaa...aaasat"
string longWordSat = longPrefix + "sat";
string text = str::stream() << longWordCat << " " << longWordSat;
- FTSIndexFormat::getKeys(spec,
+ FTSIndexFormat::getKeys(allocator,
+ spec,
BSON("data" << text),
&keys,
KeyString::Version::kLatestVersion,
@@ -267,70 +281,95 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion3) {
TEST(FTSIndexFormat, GetKeysWithLeadingEmptyArrayThrows) {
BSONObj keyPattern = fromjson("{'a.b': 1, data: 'text'}");
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
BSONObj objToIndex = fromjson("{a: {b: []}, data: 'foo'}");
- ASSERT_THROWS_CODE(
- FTSIndexFormat::getKeys(
- spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())),
- AssertionException,
- ErrorCodes::CannotBuildIndexKeys);
+ ASSERT_THROWS_CODE(FTSIndexFormat::getKeys(allocator,
+ spec,
+ objToIndex,
+ &keys,
+ KeyString::Version::kLatestVersion,
+ Ordering::make(BSONObj())),
+ AssertionException,
+ ErrorCodes::CannotBuildIndexKeys);
}
TEST(FTSIndexFormat, GetKeysWithTrailingEmptyArrayThrows) {
BSONObj keyPattern = fromjson("{data: 'text', 'a.b': 1}");
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
BSONObj objToIndex = fromjson("{a: {b: []}, data: 'foo'}");
- ASSERT_THROWS_CODE(
- FTSIndexFormat::getKeys(
- spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())),
- AssertionException,
- ErrorCodes::CannotBuildIndexKeys);
+ ASSERT_THROWS_CODE(FTSIndexFormat::getKeys(allocator,
+ spec,
+ objToIndex,
+ &keys,
+ KeyString::Version::kLatestVersion,
+ Ordering::make(BSONObj())),
+ AssertionException,
+ ErrorCodes::CannotBuildIndexKeys);
}
TEST(FTSIndexFormat, GetKeysWithLeadingSingleElementArrayThrows) {
BSONObj keyPattern = fromjson("{'a.b': 1, data: 'text'}");
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
BSONObj objToIndex = fromjson("{a: [{b: 9}], data: 'foo'}");
- ASSERT_THROWS_CODE(
- FTSIndexFormat::getKeys(
- spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())),
- AssertionException,
- ErrorCodes::CannotBuildIndexKeys);
+ ASSERT_THROWS_CODE(FTSIndexFormat::getKeys(allocator,
+ spec,
+ objToIndex,
+ &keys,
+ KeyString::Version::kLatestVersion,
+ Ordering::make(BSONObj())),
+ AssertionException,
+ ErrorCodes::CannotBuildIndexKeys);
}
TEST(FTSIndexFormat, GetKeysWithTrailingSingleElementArrayThrows) {
BSONObj keyPattern = fromjson("{data: 'text', 'a.b': 1}");
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
BSONObj objToIndex = fromjson("{a: [{b: 9}], data: 'foo'}");
- ASSERT_THROWS_CODE(
- FTSIndexFormat::getKeys(
- spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())),
- AssertionException,
- ErrorCodes::CannotBuildIndexKeys);
+ ASSERT_THROWS_CODE(FTSIndexFormat::getKeys(allocator,
+ spec,
+ objToIndex,
+ &keys,
+ KeyString::Version::kLatestVersion,
+ Ordering::make(BSONObj())),
+ AssertionException,
+ ErrorCodes::CannotBuildIndexKeys);
}
TEST(FTSIndexFormat, GetKeysWithMultiElementArrayThrows) {
BSONObj keyPattern = fromjson("{'a.b': 1, 'a.c': 'text'}");
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
BSONObj objToIndex = fromjson("{a: [{b: 9, c: 'foo'}, {b: 10, c: 'bar'}]}");
- ASSERT_THROWS_CODE(
- FTSIndexFormat::getKeys(
- spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())),
- AssertionException,
- ErrorCodes::CannotBuildIndexKeys);
+ ASSERT_THROWS_CODE(FTSIndexFormat::getKeys(allocator,
+ spec,
+ objToIndex,
+ &keys,
+ KeyString::Version::kLatestVersion,
+ Ordering::make(BSONObj())),
+ AssertionException,
+ ErrorCodes::CannotBuildIndexKeys);
}
TEST(FTSIndexFormat, GetKeysWithPositionalPathAllowed) {
BSONObj keyPattern = fromjson("{'a.0': 1, 'a.b': 'text'}");
FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3))));
+ SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes);
KeyStringSet keys;
BSONObj objToIndex = fromjson("{a: [{b: 'foo'}, {b: 'bar'}]}");
- FTSIndexFormat::getKeys(
- spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj()));
+ FTSIndexFormat::getKeys(allocator,
+ spec,
+ objToIndex,
+ &keys,
+ KeyString::Version::kLatestVersion,
+ Ordering::make(BSONObj()));
ASSERT_EQ(2U, keys.size());
{
diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp
index e1bc0a8a5f4..09675556ecf 100644
--- a/src/mongo/db/index/expression_keys_private.cpp
+++ b/src/mongo/db/index/expression_keys_private.cpp
@@ -363,13 +363,15 @@ void ExpressionKeysPrivate::get2DKeys(const BSONObj& obj,
}
// static
-void ExpressionKeysPrivate::getFTSKeys(const BSONObj& obj,
+void ExpressionKeysPrivate::getFTSKeys(SharedBufferFragmentBuilder& pooledBufferBuilder,
+ const BSONObj& obj,
const fts::FTSSpec& ftsSpec,
KeyStringSet* keys,
KeyString::Version keyStringVersion,
Ordering ordering,
boost::optional<RecordId> id) {
- fts::FTSIndexFormat::getKeys(ftsSpec, obj, keys, keyStringVersion, ordering, id);
+ fts::FTSIndexFormat::getKeys(
+ pooledBufferBuilder, ftsSpec, obj, keys, keyStringVersion, ordering, id);
}
// static
diff --git a/src/mongo/db/index/expression_keys_private.h b/src/mongo/db/index/expression_keys_private.h
index a4cb319c041..3356fd9f869 100644
--- a/src/mongo/db/index/expression_keys_private.h
+++ b/src/mongo/db/index/expression_keys_private.h
@@ -71,7 +71,8 @@ public:
// FTS
//
- static void getFTSKeys(const BSONObj& obj,
+ static void getFTSKeys(SharedBufferFragmentBuilder& pooledBufferBuilder,
+ const BSONObj& obj,
const fts::FTSSpec& ftsSpec,
KeyStringSet* keys,
KeyString::Version keyStringVersion,
diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp
index 294f235ab78..7574cb2885b 100644
--- a/src/mongo/db/index/fts_access_method.cpp
+++ b/src/mongo/db/index/fts_access_method.cpp
@@ -46,7 +46,8 @@ void FTSAccessMethod::doGetKeys(SharedBufferFragmentBuilder& pooledBufferBuilder
KeyStringSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths,
boost::optional<RecordId> id) const {
- ExpressionKeysPrivate::getFTSKeys(obj,
+ ExpressionKeysPrivate::getFTSKeys(pooledBufferBuilder,
+ obj,
_ftsSpec,
keys,
getSortedDataInterface()->getKeyStringVersion(),