diff options
author | Xiangyu Yao <xiangyu.yao@mongodb.com> | 2018-08-03 14:22:13 -0400 |
---|---|---|
committer | Xiangyu Yao <xiangyu.yao@mongodb.com> | 2018-08-13 19:20:21 -0400 |
commit | ab5cf1f214277536f1d4d2d1dc1fa319af911bbe (patch) | |
tree | 4965379813dc8ebb75bc1848d484d95996f3f093 | |
parent | c764ee142f633b3a88954f336b11633e1baeffdc (diff) | |
download | mongo-ab5cf1f214277536f1d4d2d1dc1fa319af911bbe.tar.gz |
SERVER-22078 Remove term list limits for text index in FCV 4.2
-rw-r--r-- | jstests/core/text_index_limits.js | 42 | ||||
-rw-r--r-- | jstests/multiVersion/text_index_limits.js | 53 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_index_format.cpp | 37 |
3 files changed, 119 insertions, 13 deletions
diff --git a/jstests/core/text_index_limits.js b/jstests/core/text_index_limits.js new file mode 100644 index 00000000000..4779c58b939 --- /dev/null +++ b/jstests/core/text_index_limits.js @@ -0,0 +1,42 @@ +// There is no limit for the total size or the number of unique terms for text index. +(function() { + "use strict"; + + var t = db.text_index_limits; + t.drop(); + + assert.commandWorked(t.createIndex({comments: "text"})); + + // 1. Test number of unique terms exceeds 400,000 + let commentsWithALotOfUniqueWords = ""; + // 26^4 = 456,976 > 400,000 + for (let ch1 = 97; ch1 < 123; ch1++) { + for (let ch2 = 97; ch2 < 123; ch2++) { + for (let ch3 = 97; ch3 < 123; ch3++) { + for (let ch4 = 97; ch4 < 123; ch4++) { + let word = String.fromCharCode(ch1, ch2, ch3, ch4); + commentsWithALotOfUniqueWords += word + " "; + } + } + } + } + assert.commandWorked(db.runCommand( + {insert: t.getName(), documents: [{_id: 1, comments: commentsWithALotOfUniqueWords}]})); + + // 2. Test total size of index keys for unique terms exceeds 4MB + + // 26^3 = 17576 < 400,000 + let prefix = "a".repeat(400); + let commentsWithWordsOfLargeSize = ""; + for (let ch1 = 97; ch1 < 123; ch1++) { + for (let ch2 = 97; ch2 < 123; ch2++) { + for (let ch3 = 97; ch3 < 123; ch3++) { + let word = String.fromCharCode(ch1, ch2, ch3); + commentsWithWordsOfLargeSize += prefix + word + " "; + } + } + } + assert.commandWorked(db.runCommand( + {insert: t.getName(), documents: [{_id: 2, comments: commentsWithWordsOfLargeSize}]})); + +}()); diff --git a/jstests/multiVersion/text_index_limits.js b/jstests/multiVersion/text_index_limits.js new file mode 100644 index 00000000000..5860d5b0726 --- /dev/null +++ b/jstests/multiVersion/text_index_limits.js @@ -0,0 +1,53 @@ +// TODO SERVER-36440: Remove this test +(function() { + "use strict"; + + load("jstests/libs/feature_compatibility_version.js"); + + // Start the node with FCV 4.0 + let conn = MongoRunner.runMongod({binVersion: "latest"}); + assert.commandWorked(conn.adminCommand({setFeatureCompatibilityVersion: "4.0"})); + var db = conn.getDB('test'); + var t = db.text_index_limits; + t.drop(); + + assert.commandWorked(t.createIndex({comments: "text"})); + + // 1. Test number of unique terms exceeds 400,000 + let commentsWithALotOfUniqueWords = ""; + // 26^4 = 456,976 > 400,000 + for (let ch1 = 97; ch1 < 123; ch1++) { + for (let ch2 = 97; ch2 < 123; ch2++) { + for (let ch3 = 97; ch3 < 123; ch3++) { + for (let ch4 = 97; ch4 < 123; ch4++) { + let word = String.fromCharCode(ch1, ch2, ch3, ch4); + commentsWithALotOfUniqueWords += word + " "; + } + } + } + } + assert.commandFailedWithCode( + db.runCommand( + {insert: t.getName(), documents: [{_id: 1, comments: commentsWithALotOfUniqueWords}]}), + 16732); + + // 2. Test total size of index keys for unique terms exceeds 4MB + + // 26^3 = 17576 < 400,000 + let prefix = "a".repeat(400); + let commentsWithWordsOfLargeSize = ""; + for (let ch1 = 97; ch1 < 123; ch1++) { + for (let ch2 = 97; ch2 < 123; ch2++) { + for (let ch3 = 97; ch3 < 123; ch3++) { + let word = String.fromCharCode(ch1, ch2, ch3); + commentsWithWordsOfLargeSize += prefix + word + " "; + } + } + } + assert.commandFailedWithCode( + db.runCommand( + {insert: t.getName(), documents: [{_id: 2, comments: commentsWithWordsOfLargeSize}]}), + 16733); + + MongoRunner.stopMongod(conn); +}()); diff --git a/src/mongo/db/fts/fts_index_format.cpp b/src/mongo/db/fts/fts_index_format.cpp index 770930e90c1..2b0477a5d31 100644 --- a/src/mongo/db/fts/fts_index_format.cpp +++ b/src/mongo/db/fts/fts_index_format.cpp @@ -36,6 +36,7 @@ #include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/fts/fts_index_format.h" #include "mongo/db/fts/fts_spec.h" +#include "mongo/db/server_options.h" #include "mongo/util/hex.h" #include "mongo/util/md5.hpp" #include "mongo/util/mongoutils/str.h" @@ -160,12 +161,17 @@ void FTSIndexFormat::getKeys(const FTSSpec& spec, const BSONObj& obj, BSONObjSet // create index keys from raw scores // only 1 per string - uassert(16732, - mongoutils::str::stream() << "too many unique keys for a single document to" - << " have a text index, max is " - << term_freqs.size() - << obj["_id"], - term_freqs.size() <= 400000); + // TODO SERVER-36440: Completely remove this limit in 4.3. + if (serverGlobalParams.featureCompatibility.isVersionInitialized() && + serverGlobalParams.featureCompatibility.getVersion() == + ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40) { + uassert(16732, + mongoutils::str::stream() << "too many unique keys for a single document to" + << " have a text index, max is " + << term_freqs.size() + << obj["_id"], + term_freqs.size() <= 400000); + } long long keyBSONSize = 0; const int MaxKeyBSONSizeMB = 4; @@ -194,13 +200,18 @@ void FTSIndexFormat::getKeys(const FTSSpec& spec, const BSONObj& obj, BSONObjSet keys->insert(res); keyBSONSize += res.objsize(); - uassert(16733, - mongoutils::str::stream() - << "trying to index text where term list is too big, max is " - << MaxKeyBSONSizeMB - << "mb " - << obj["_id"], - keyBSONSize <= (MaxKeyBSONSizeMB * 1024 * 1024)); + // TODO SERVER-36440: Completely remove this limit in 4.3. + if (serverGlobalParams.featureCompatibility.isVersionInitialized() && + serverGlobalParams.featureCompatibility.getVersion() == + ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40) { + uassert(16733, + mongoutils::str::stream() + << "trying to index text where term list is too big, max is " + << MaxKeyBSONSizeMB + << "mb " + << obj["_id"], + keyBSONSize <= (MaxKeyBSONSizeMB * 1024 * 1024)); + } } } |