diff options
author | Henrik Edin <henrik.edin@mongodb.com> | 2020-05-04 13:06:15 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-05 19:24:43 +0000 |
commit | b7d70ba03a92d70b5bda98960c8764007081d575 (patch) | |
tree | 19daecbadaae99536252729a1708414f00b3eb99 /src | |
parent | 9e80c5c46ec01e61e020681b393ce8e529049836 (diff) | |
download | mongo-b7d70ba03a92d70b5bda98960c8764007081d575.tar.gz |
SERVER-47928 Fix quadradic KeyString insert behavior in fts indexes.
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/fts/fts_index_format.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_index_format.h | 6 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_index_format_test.cpp | 111 | ||||
-rw-r--r-- | src/mongo/db/index/expression_keys_private.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/index/expression_keys_private.h | 3 | ||||
-rw-r--r-- | src/mongo/db/index/fts_access_method.cpp | 3 |
6 files changed, 95 insertions, 49 deletions
diff --git a/src/mongo/db/fts/fts_index_format.cpp b/src/mongo/db/fts/fts_index_format.cpp index f6f5e379c02..1993f518115 100644 --- a/src/mongo/db/fts/fts_index_format.cpp +++ b/src/mongo/db/fts/fts_index_format.cpp @@ -110,7 +110,8 @@ MONGO_INITIALIZER(FTSIndexFormat)(InitializerContext* context) { return Status::OK(); } -void FTSIndexFormat::getKeys(const FTSSpec& spec, +void FTSIndexFormat::getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder, + const FTSSpec& spec, const BSONObj& obj, KeyStringSet* keys, KeyString::Version keyStringVersion, @@ -137,11 +138,12 @@ void FTSIndexFormat::getKeys(const FTSSpec& spec, TermFrequencyMap term_freqs; spec.scoreDocument(obj, &term_freqs); + auto sequence = keys->extract_sequence(); for (TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i) { const string& term = i->first; double weight = i->second; - KeyString::Builder keyString(keyStringVersion, ordering); + KeyString::PooledBuilder keyString(pooledBufferBuilder, keyStringVersion, ordering); for (const auto& elem : extrasBefore) { keyString.appendBSONElement(elem); } @@ -154,11 +156,9 @@ void FTSIndexFormat::getKeys(const FTSSpec& spec, keyString.appendRecordId(*id); } - /* - * Insert a copy to only allocate as much buffer space as necessary. - */ - keys->insert(keyString.getValueCopy()); + sequence.push_back(keyString.release()); } + keys->adopt_sequence(std::move(sequence)); } BSONObj FTSIndexFormat::getIndexKey(double weight, @@ -179,7 +179,8 @@ BSONObj FTSIndexFormat::getIndexKey(double weight, return b.appendElements(key).obj(); } -void FTSIndexFormat::_appendIndexKey(KeyString::Builder& keyString, +template <typename KeyStringBuilder> +void FTSIndexFormat::_appendIndexKey(KeyStringBuilder& keyString, double weight, const string& term, TextIndexVersion textIndexVersion) { diff --git a/src/mongo/db/fts/fts_index_format.h b/src/mongo/db/fts/fts_index_format.h index a3a9edfa331..f52918aeb4a 100644 --- a/src/mongo/db/fts/fts_index_format.h +++ b/src/mongo/db/fts/fts_index_format.h @@ -45,7 +45,8 @@ class FTSSpec; class FTSIndexFormat { public: - static void getKeys(const FTSSpec& spec, + static void getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder, + const FTSSpec& spec, const BSONObj& document, KeyStringSet* keys, KeyString::Version keyStringVersion, @@ -84,7 +85,8 @@ private: * 'term' is the std::string term in the entry. * 'textIndexVersion' is index version, affects key format. */ - static void _appendIndexKey(KeyString::Builder& keyString, + template <typename KeyStringBuilder> + static void _appendIndexKey(KeyStringBuilder& keyString, double weight, const std::string& term, TextIndexVersion textIndexVersion); diff --git a/src/mongo/db/fts/fts_index_format_test.cpp b/src/mongo/db/fts/fts_index_format_test.cpp index 9332d1a53cb..7d5dd4a3eae 100644 --- a/src/mongo/db/fts/fts_index_format_test.cpp +++ b/src/mongo/db/fts/fts_index_format_test.cpp @@ -50,8 +50,10 @@ using unittest::assertGet; TEST(FTSIndexFormat, Simple1) { FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data" << "text"))))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; - FTSIndexFormat::getKeys(spec, + FTSIndexFormat::getKeys(allocator, + spec, BSON("data" << "cat sat"), &keys, @@ -70,8 +72,10 @@ TEST(FTSIndexFormat, ExtraBack1) { FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data" << "text" << "x" << 1))))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; - FTSIndexFormat::getKeys(spec, + FTSIndexFormat::getKeys(allocator, + spec, BSON("data" << "cat" << "x" << 5), @@ -91,8 +95,10 @@ TEST(FTSIndexFormat, ExtraBack1) { TEST(FTSIndexFormat, ExtraFront1) { FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("x" << 1 << "data" << "text"))))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; - FTSIndexFormat::getKeys(spec, + FTSIndexFormat::getKeys(allocator, + spec, BSON("data" << "cat" << "x" << 5), @@ -112,9 +118,10 @@ TEST(FTSIndexFormat, ExtraFront1) { TEST(FTSIndexFormat, StopWords1) { FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data" << "text"))))); - + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys1; - FTSIndexFormat::getKeys(spec, + FTSIndexFormat::getKeys(allocator, + spec, BSON("data" << "computer"), &keys1, @@ -123,7 +130,8 @@ TEST(FTSIndexFormat, StopWords1) { ASSERT_EQUALS(1U, keys1.size()); KeyStringSet keys2; - FTSIndexFormat::getKeys(spec, + FTSIndexFormat::getKeys(allocator, + spec, BSON("data" << "any computer"), &keys2, @@ -165,6 +173,7 @@ TEST(FTSIndexFormat, LongWordsTextIndexVersion1) { FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data" << "text") << "textIndexVersion" << 1)))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; string longPrefix(1024U, 'a'); // "aaa...aaacat" @@ -172,7 +181,8 @@ TEST(FTSIndexFormat, LongWordsTextIndexVersion1) { // "aaa...aaasat" string longWordSat = longPrefix + "sat"; string text = str::stream() << longWordCat << " " << longWordSat; - FTSIndexFormat::getKeys(spec, + FTSIndexFormat::getKeys(allocator, + spec, BSON("data" << text), &keys, KeyString::Version::kLatestVersion, @@ -198,6 +208,7 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion2) { FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data" << "text") << "textIndexVersion" << 2)))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; string longPrefix(1024U, 'a'); // "aaa...aaacat" @@ -207,7 +218,8 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion2) { // "aaa...aaamongodbfts" string longWordMongoDBFts = longPrefix + "mongodbfts"; string text = str::stream() << longWordCat << " " << longWordSat << " " << longWordMongoDBFts; - FTSIndexFormat::getKeys(spec, + FTSIndexFormat::getKeys(allocator, + spec, BSON("data" << text), &keys, KeyString::Version::kLatestVersion, @@ -235,6 +247,7 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion3) { FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << BSON("data" << "text") << "textIndexVersion" << 3)))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; string longPrefix(1024U, 'a'); // "aaa...aaacat" @@ -242,7 +255,8 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion3) { // "aaa...aaasat" string longWordSat = longPrefix + "sat"; string text = str::stream() << longWordCat << " " << longWordSat; - FTSIndexFormat::getKeys(spec, + FTSIndexFormat::getKeys(allocator, + spec, BSON("data" << text), &keys, KeyString::Version::kLatestVersion, @@ -267,70 +281,95 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion3) { TEST(FTSIndexFormat, GetKeysWithLeadingEmptyArrayThrows) { BSONObj keyPattern = fromjson("{'a.b': 1, data: 'text'}"); FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3)))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; BSONObj objToIndex = fromjson("{a: {b: []}, data: 'foo'}"); - ASSERT_THROWS_CODE( - FTSIndexFormat::getKeys( - spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())), - AssertionException, - ErrorCodes::CannotBuildIndexKeys); + ASSERT_THROWS_CODE(FTSIndexFormat::getKeys(allocator, + spec, + objToIndex, + &keys, + KeyString::Version::kLatestVersion, + Ordering::make(BSONObj())), + AssertionException, + ErrorCodes::CannotBuildIndexKeys); } TEST(FTSIndexFormat, GetKeysWithTrailingEmptyArrayThrows) { BSONObj keyPattern = fromjson("{data: 'text', 'a.b': 1}"); FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3)))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; BSONObj objToIndex = fromjson("{a: {b: []}, data: 'foo'}"); - ASSERT_THROWS_CODE( - FTSIndexFormat::getKeys( - spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())), - AssertionException, - ErrorCodes::CannotBuildIndexKeys); + ASSERT_THROWS_CODE(FTSIndexFormat::getKeys(allocator, + spec, + objToIndex, + &keys, + KeyString::Version::kLatestVersion, + Ordering::make(BSONObj())), + AssertionException, + ErrorCodes::CannotBuildIndexKeys); } TEST(FTSIndexFormat, GetKeysWithLeadingSingleElementArrayThrows) { BSONObj keyPattern = fromjson("{'a.b': 1, data: 'text'}"); FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3)))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; BSONObj objToIndex = fromjson("{a: [{b: 9}], data: 'foo'}"); - ASSERT_THROWS_CODE( - FTSIndexFormat::getKeys( - spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())), - AssertionException, - ErrorCodes::CannotBuildIndexKeys); + ASSERT_THROWS_CODE(FTSIndexFormat::getKeys(allocator, + spec, + objToIndex, + &keys, + KeyString::Version::kLatestVersion, + Ordering::make(BSONObj())), + AssertionException, + ErrorCodes::CannotBuildIndexKeys); } TEST(FTSIndexFormat, GetKeysWithTrailingSingleElementArrayThrows) { BSONObj keyPattern = fromjson("{data: 'text', 'a.b': 1}"); FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3)))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; BSONObj objToIndex = fromjson("{a: [{b: 9}], data: 'foo'}"); - ASSERT_THROWS_CODE( - FTSIndexFormat::getKeys( - spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())), - AssertionException, - ErrorCodes::CannotBuildIndexKeys); + ASSERT_THROWS_CODE(FTSIndexFormat::getKeys(allocator, + spec, + objToIndex, + &keys, + KeyString::Version::kLatestVersion, + Ordering::make(BSONObj())), + AssertionException, + ErrorCodes::CannotBuildIndexKeys); } TEST(FTSIndexFormat, GetKeysWithMultiElementArrayThrows) { BSONObj keyPattern = fromjson("{'a.b': 1, 'a.c': 'text'}"); FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3)))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; BSONObj objToIndex = fromjson("{a: [{b: 9, c: 'foo'}, {b: 10, c: 'bar'}]}"); - ASSERT_THROWS_CODE( - FTSIndexFormat::getKeys( - spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())), - AssertionException, - ErrorCodes::CannotBuildIndexKeys); + ASSERT_THROWS_CODE(FTSIndexFormat::getKeys(allocator, + spec, + objToIndex, + &keys, + KeyString::Version::kLatestVersion, + Ordering::make(BSONObj())), + AssertionException, + ErrorCodes::CannotBuildIndexKeys); } TEST(FTSIndexFormat, GetKeysWithPositionalPathAllowed) { BSONObj keyPattern = fromjson("{'a.0': 1, 'a.b': 'text'}"); FTSSpec spec(assertGet(FTSSpec::fixSpec(BSON("key" << keyPattern << "textIndexVersion" << 3)))); + SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet keys; BSONObj objToIndex = fromjson("{a: [{b: 'foo'}, {b: 'bar'}]}"); - FTSIndexFormat::getKeys( - spec, objToIndex, &keys, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())); + FTSIndexFormat::getKeys(allocator, + spec, + objToIndex, + &keys, + KeyString::Version::kLatestVersion, + Ordering::make(BSONObj())); ASSERT_EQ(2U, keys.size()); { diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp index e1bc0a8a5f4..09675556ecf 100644 --- a/src/mongo/db/index/expression_keys_private.cpp +++ b/src/mongo/db/index/expression_keys_private.cpp @@ -363,13 +363,15 @@ void ExpressionKeysPrivate::get2DKeys(const BSONObj& obj, } // static -void ExpressionKeysPrivate::getFTSKeys(const BSONObj& obj, +void ExpressionKeysPrivate::getFTSKeys(SharedBufferFragmentBuilder& pooledBufferBuilder, + const BSONObj& obj, const fts::FTSSpec& ftsSpec, KeyStringSet* keys, KeyString::Version keyStringVersion, Ordering ordering, boost::optional<RecordId> id) { - fts::FTSIndexFormat::getKeys(ftsSpec, obj, keys, keyStringVersion, ordering, id); + fts::FTSIndexFormat::getKeys( + pooledBufferBuilder, ftsSpec, obj, keys, keyStringVersion, ordering, id); } // static diff --git a/src/mongo/db/index/expression_keys_private.h b/src/mongo/db/index/expression_keys_private.h index a4cb319c041..3356fd9f869 100644 --- a/src/mongo/db/index/expression_keys_private.h +++ b/src/mongo/db/index/expression_keys_private.h @@ -71,7 +71,8 @@ public: // FTS // - static void getFTSKeys(const BSONObj& obj, + static void getFTSKeys(SharedBufferFragmentBuilder& pooledBufferBuilder, + const BSONObj& obj, const fts::FTSSpec& ftsSpec, KeyStringSet* keys, KeyString::Version keyStringVersion, diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp index 294f235ab78..7574cb2885b 100644 --- a/src/mongo/db/index/fts_access_method.cpp +++ b/src/mongo/db/index/fts_access_method.cpp @@ -46,7 +46,8 @@ void FTSAccessMethod::doGetKeys(SharedBufferFragmentBuilder& pooledBufferBuilder KeyStringSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths, boost::optional<RecordId> id) const { - ExpressionKeysPrivate::getFTSKeys(obj, + ExpressionKeysPrivate::getFTSKeys(pooledBufferBuilder, + obj, _ftsSpec, keys, getSortedDataInterface()->getKeyStringVersion(), |