summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts
diff options
context:
space:
mode:
authorGregory Noma <gregory.noma@gmail.com>2019-08-08 15:02:29 -0400
committerGregory Noma <gregory.noma@gmail.com>2019-08-15 13:02:11 -0400
commit55d783629997fffd1a7e6a528de59a5404a51805 (patch)
tree9110249d13339c76c71a57a4b3d5b00f8964af22 /src/mongo/db/fts
parent5bdcbdb7dd33f47d809dc238cd5dfee1d91b0e09 (diff)
downloadmongo-55d783629997fffd1a7e6a528de59a5404a51805.tar.gz
SERVER-41726 Make FTSAccessMethod generate keys with KeyString
Diffstat (limited to 'src/mongo/db/fts')
-rw-r--r--src/mongo/db/fts/fts_index_format.cpp82
-rw-r--r--src/mongo/db/fts/fts_index_format.h22
2 files changed, 41 insertions, 63 deletions
diff --git a/src/mongo/db/fts/fts_index_format.cpp b/src/mongo/db/fts/fts_index_format.cpp
index b514a67a6dd..75d343ccedb 100644
--- a/src/mongo/db/fts/fts_index_format.cpp
+++ b/src/mongo/db/fts/fts_index_format.cpp
@@ -78,31 +78,6 @@ const size_t termKeySuffixLengthV3 = 32U;
const size_t termKeyLengthV3 = termKeyPrefixLengthV3 + termKeySuffixLengthV3;
/**
- * Returns size of buffer required to store term in index key.
- * In version 1, terms are stored verbatim in key.
- * In version 2 and above, terms longer than 32 characters are hashed and combined
- * with a prefix.
- */
-int guessTermSize(const std::string& term, TextIndexVersion textIndexVersion) {
- if (TEXT_INDEX_VERSION_1 == textIndexVersion) {
- return term.size();
- } else if (TEXT_INDEX_VERSION_2 == textIndexVersion) {
- if (term.size() <= termKeyPrefixLengthV2) {
- return term.size();
- }
-
- return termKeyLengthV2;
- } else {
- invariant(TEXT_INDEX_VERSION_3 == textIndexVersion);
- if (term.size() <= termKeyPrefixLengthV3) {
- return term.size();
- }
-
- return termKeyLengthV3;
- }
-}
-
-/**
* Given an object being indexed, 'obj', and a path through 'obj', returns the corresponding BSON
* element, according to the indexing rules for the non-text fields of an FTS index key pattern.
*
@@ -162,37 +137,27 @@ void FTSIndexFormat::getKeys(const FTSSpec& spec,
TermFrequencyMap term_freqs;
spec.scoreDocument(obj, &term_freqs);
- // create index keys from raw scores
- // only 1 per string
- long long keyBSONSize = 0;
-
for (TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i) {
const string& term = i->first;
double weight = i->second;
- // guess the total size of the btree entry based on the size of the weight, term tuple
- int guess = 5 /* bson overhead */ + 10 /* weight */ + 8 /* term overhead */ +
- /* term size (could be truncated/hashed) */
- guessTermSize(term, spec.getTextIndexVersion()) + extraSize;
-
- BSONObjBuilder b(guess); // builds a BSON object with guess length.
- for (unsigned k = 0; k < extrasBefore.size(); k++) {
- b.appendAs(extrasBefore[k], "");
+ KeyString::Builder keyString(keyStringVersion, ordering);
+ for (const auto& elem : extrasBefore) {
+ keyString.appendBSONElement(elem);
}
- _appendIndexKey(b, weight, term, spec.getTextIndexVersion());
- for (unsigned k = 0; k < extrasAfter.size(); k++) {
- b.appendAs(extrasAfter[k], "");
+ _appendIndexKey(keyString, weight, term, spec.getTextIndexVersion());
+ for (const auto& elem : extrasAfter) {
+ keyString.appendBSONElement(elem);
}
- BSONObj res = b.obj();
-
- verify(guess >= res.objsize());
- KeyString::HeapBuilder keyString(keyStringVersion, res, ordering);
if (id) {
keyString.appendRecordId(*id);
}
- keys->insert(keyString.release());
- keyBSONSize += res.objsize();
+
+ /*
+ * Insert a copy to only allocate as much buffer space as necessary.
+ */
+ keys->insert(keyString.getValueCopy());
}
}
@@ -207,25 +172,27 @@ BSONObj FTSIndexFormat::getIndexKey(double weight,
b.appendAs(i.next(), "");
}
- _appendIndexKey(b, weight, term, textIndexVersion);
- return b.obj();
+ KeyString::Builder keyString(KeyString::Version::kLatestVersion, KeyString::ALL_ASCENDING);
+ _appendIndexKey(keyString, weight, term, textIndexVersion);
+ auto key = KeyString::toBson(keyString, KeyString::ALL_ASCENDING);
+
+ return b.appendElements(key).obj();
}
-void FTSIndexFormat::_appendIndexKey(BSONObjBuilder& b,
+void FTSIndexFormat::_appendIndexKey(KeyString::Builder& keyString,
double weight,
const string& term,
TextIndexVersion textIndexVersion) {
- verify(weight >= 0 && weight <= MAX_WEIGHT); // FTSmaxweight = defined in fts_header
+ invariant(weight >= 0 && weight <= MAX_WEIGHT); // FTSmaxweight = defined in fts_header
// Terms are added to index key verbatim.
if (TEXT_INDEX_VERSION_1 == textIndexVersion) {
- b.append("", term);
- b.append("", weight);
+ keyString.appendString(term);
}
// See comments at the top of file for termKeyPrefixLengthV2.
// Apply hash for text index version 2 to long terms (longer than 32 characters).
else if (TEXT_INDEX_VERSION_2 == textIndexVersion) {
if (term.size() <= termKeyPrefixLengthV2) {
- b.append("", term);
+ keyString.appendString(term);
} else {
union {
uint64_t hash[2];
@@ -235,20 +202,19 @@ void FTSIndexFormat::_appendIndexKey(BSONObjBuilder& b,
MurmurHash3_x64_128(term.data(), term.size(), seed, t.hash);
string keySuffix = mongo::toHexLower(t.data, sizeof(t.data));
invariant(termKeySuffixLengthV2 == keySuffix.size());
- b.append("", term.substr(0, termKeyPrefixLengthV2) + keySuffix);
+ keyString.appendString(term.substr(0, termKeyPrefixLengthV2) + keySuffix);
}
- b.append("", weight);
} else {
invariant(TEXT_INDEX_VERSION_3 == textIndexVersion);
if (term.size() <= termKeyPrefixLengthV3) {
- b.append("", term);
+ keyString.appendString(term);
} else {
string keySuffix = md5simpledigest(term);
invariant(termKeySuffixLengthV3 == keySuffix.size());
- b.append("", term.substr(0, termKeyPrefixLengthV3) + keySuffix);
+ keyString.appendString(term.substr(0, termKeyPrefixLengthV3) + keySuffix);
}
- b.append("", weight);
}
+ keyString.appendNumberDouble(weight);
}
} // namespace fts
} // namespace mongo
diff --git a/src/mongo/db/fts/fts_index_format.h b/src/mongo/db/fts/fts_index_format.h
index 5ac0823c0c0..a3a9edfa331 100644
--- a/src/mongo/db/fts/fts_index_format.h
+++ b/src/mongo/db/fts/fts_index_format.h
@@ -66,16 +66,28 @@ public:
private:
/**
- * Helper method to get return entry from the FTSIndex as a BSONObj
- * @param b, reference to the BSONOBjBuilder
- * @param weight, the weight of the term in the entry
- * @param term, the std::string term in the entry
- * @param textIndexVersion, index version. affects key format.
+ * Helper method to get return entry from the FTSIndex as a BSONObj.
+ * 'b' is a reference to the BSONOBjBuilder.
+ * 'weight' is the weight of the term in the entry.
+ * 'term' is the std::string term in the entry.
+ * 'textIndexVersion' is index version, affects key format.
*/
static void _appendIndexKey(BSONObjBuilder& b,
double weight,
const std::string& term,
TextIndexVersion textIndexVersion);
+
+ /**
+ * Helper method to get return entry from the FTSIndex as a BSONObj.
+ * 'keyString' is a reference to the KeyString builder.
+ * 'weight' is the weight of the term in the entry.
+ * 'term' is the std::string term in the entry.
+ * 'textIndexVersion' is index version, affects key format.
+ */
+ static void _appendIndexKey(KeyString::Builder& keyString,
+ double weight,
+ const std::string& term,
+ TextIndexVersion textIndexVersion);
};
} // namespace fts
} // namespace mongo