summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGregory Noma <gregory.noma@gmail.com>2019-08-08 15:02:29 -0400
committerGregory Noma <gregory.noma@gmail.com>2019-08-15 13:02:11 -0400
commit55d783629997fffd1a7e6a528de59a5404a51805 (patch)
tree9110249d13339c76c71a57a4b3d5b00f8964af22
parent5bdcbdb7dd33f47d809dc238cd5dfee1d91b0e09 (diff)
downloadmongo-55d783629997fffd1a7e6a528de59a5404a51805.tar.gz
SERVER-41726 Make FTSAccessMethod generate keys with KeyString
-rw-r--r--src/mongo/db/fts/fts_index_format.cpp82
-rw-r--r--src/mongo/db/fts/fts_index_format.h22
-rw-r--r--src/mongo/db/storage/key_string.cpp22
-rw-r--r--src/mongo/db/storage/key_string.h15
4 files changed, 71 insertions, 70 deletions
diff --git a/src/mongo/db/fts/fts_index_format.cpp b/src/mongo/db/fts/fts_index_format.cpp
index b514a67a6dd..75d343ccedb 100644
--- a/src/mongo/db/fts/fts_index_format.cpp
+++ b/src/mongo/db/fts/fts_index_format.cpp
@@ -78,31 +78,6 @@ const size_t termKeySuffixLengthV3 = 32U;
const size_t termKeyLengthV3 = termKeyPrefixLengthV3 + termKeySuffixLengthV3;
/**
- * Returns size of buffer required to store term in index key.
- * In version 1, terms are stored verbatim in key.
- * In version 2 and above, terms longer than 32 characters are hashed and combined
- * with a prefix.
- */
-int guessTermSize(const std::string& term, TextIndexVersion textIndexVersion) {
- if (TEXT_INDEX_VERSION_1 == textIndexVersion) {
- return term.size();
- } else if (TEXT_INDEX_VERSION_2 == textIndexVersion) {
- if (term.size() <= termKeyPrefixLengthV2) {
- return term.size();
- }
-
- return termKeyLengthV2;
- } else {
- invariant(TEXT_INDEX_VERSION_3 == textIndexVersion);
- if (term.size() <= termKeyPrefixLengthV3) {
- return term.size();
- }
-
- return termKeyLengthV3;
- }
-}
-
-/**
* Given an object being indexed, 'obj', and a path through 'obj', returns the corresponding BSON
* element, according to the indexing rules for the non-text fields of an FTS index key pattern.
*
@@ -162,37 +137,27 @@ void FTSIndexFormat::getKeys(const FTSSpec& spec,
TermFrequencyMap term_freqs;
spec.scoreDocument(obj, &term_freqs);
- // create index keys from raw scores
- // only 1 per string
- long long keyBSONSize = 0;
-
for (TermFrequencyMap::const_iterator i = term_freqs.begin(); i != term_freqs.end(); ++i) {
const string& term = i->first;
double weight = i->second;
- // guess the total size of the btree entry based on the size of the weight, term tuple
- int guess = 5 /* bson overhead */ + 10 /* weight */ + 8 /* term overhead */ +
- /* term size (could be truncated/hashed) */
- guessTermSize(term, spec.getTextIndexVersion()) + extraSize;
-
- BSONObjBuilder b(guess); // builds a BSON object with guess length.
- for (unsigned k = 0; k < extrasBefore.size(); k++) {
- b.appendAs(extrasBefore[k], "");
+ KeyString::Builder keyString(keyStringVersion, ordering);
+ for (const auto& elem : extrasBefore) {
+ keyString.appendBSONElement(elem);
}
- _appendIndexKey(b, weight, term, spec.getTextIndexVersion());
- for (unsigned k = 0; k < extrasAfter.size(); k++) {
- b.appendAs(extrasAfter[k], "");
+ _appendIndexKey(keyString, weight, term, spec.getTextIndexVersion());
+ for (const auto& elem : extrasAfter) {
+ keyString.appendBSONElement(elem);
}
- BSONObj res = b.obj();
-
- verify(guess >= res.objsize());
- KeyString::HeapBuilder keyString(keyStringVersion, res, ordering);
if (id) {
keyString.appendRecordId(*id);
}
- keys->insert(keyString.release());
- keyBSONSize += res.objsize();
+
+ /*
+ * Insert a copy to only allocate as much buffer space as necessary.
+ */
+ keys->insert(keyString.getValueCopy());
}
}
@@ -207,25 +172,27 @@ BSONObj FTSIndexFormat::getIndexKey(double weight,
b.appendAs(i.next(), "");
}
- _appendIndexKey(b, weight, term, textIndexVersion);
- return b.obj();
+ KeyString::Builder keyString(KeyString::Version::kLatestVersion, KeyString::ALL_ASCENDING);
+ _appendIndexKey(keyString, weight, term, textIndexVersion);
+ auto key = KeyString::toBson(keyString, KeyString::ALL_ASCENDING);
+
+ return b.appendElements(key).obj();
}
-void FTSIndexFormat::_appendIndexKey(BSONObjBuilder& b,
+void FTSIndexFormat::_appendIndexKey(KeyString::Builder& keyString,
double weight,
const string& term,
TextIndexVersion textIndexVersion) {
- verify(weight >= 0 && weight <= MAX_WEIGHT); // FTSmaxweight = defined in fts_header
+ invariant(weight >= 0 && weight <= MAX_WEIGHT); // FTSmaxweight = defined in fts_header
// Terms are added to index key verbatim.
if (TEXT_INDEX_VERSION_1 == textIndexVersion) {
- b.append("", term);
- b.append("", weight);
+ keyString.appendString(term);
}
// See comments at the top of file for termKeyPrefixLengthV2.
// Apply hash for text index version 2 to long terms (longer than 32 characters).
else if (TEXT_INDEX_VERSION_2 == textIndexVersion) {
if (term.size() <= termKeyPrefixLengthV2) {
- b.append("", term);
+ keyString.appendString(term);
} else {
union {
uint64_t hash[2];
@@ -235,20 +202,19 @@ void FTSIndexFormat::_appendIndexKey(BSONObjBuilder& b,
MurmurHash3_x64_128(term.data(), term.size(), seed, t.hash);
string keySuffix = mongo::toHexLower(t.data, sizeof(t.data));
invariant(termKeySuffixLengthV2 == keySuffix.size());
- b.append("", term.substr(0, termKeyPrefixLengthV2) + keySuffix);
+ keyString.appendString(term.substr(0, termKeyPrefixLengthV2) + keySuffix);
}
- b.append("", weight);
} else {
invariant(TEXT_INDEX_VERSION_3 == textIndexVersion);
if (term.size() <= termKeyPrefixLengthV3) {
- b.append("", term);
+ keyString.appendString(term);
} else {
string keySuffix = md5simpledigest(term);
invariant(termKeySuffixLengthV3 == keySuffix.size());
- b.append("", term.substr(0, termKeyPrefixLengthV3) + keySuffix);
+ keyString.appendString(term.substr(0, termKeyPrefixLengthV3) + keySuffix);
}
- b.append("", weight);
}
+ keyString.appendNumberDouble(weight);
}
} // namespace fts
} // namespace mongo
diff --git a/src/mongo/db/fts/fts_index_format.h b/src/mongo/db/fts/fts_index_format.h
index 5ac0823c0c0..a3a9edfa331 100644
--- a/src/mongo/db/fts/fts_index_format.h
+++ b/src/mongo/db/fts/fts_index_format.h
@@ -66,16 +66,28 @@ public:
private:
/**
- * Helper method to get return entry from the FTSIndex as a BSONObj
- * @param b, reference to the BSONOBjBuilder
- * @param weight, the weight of the term in the entry
- * @param term, the std::string term in the entry
- * @param textIndexVersion, index version. affects key format.
+ * Helper method to get return entry from the FTSIndex as a BSONObj.
+ * 'b' is a reference to the BSONOBjBuilder.
+ * 'weight' is the weight of the term in the entry.
+ * 'term' is the std::string term in the entry.
+ * 'textIndexVersion' is index version, affects key format.
*/
static void _appendIndexKey(BSONObjBuilder& b,
double weight,
const std::string& term,
TextIndexVersion textIndexVersion);
+
+ /**
+ * Helper method to get return entry from the FTSIndex as a BSONObj.
+ * 'keyString' is a reference to the KeyString builder.
+ * 'weight' is the weight of the term in the entry.
+ * 'term' is the std::string term in the entry.
+ * 'textIndexVersion' is index version, affects key format.
+ */
+ static void _appendIndexKey(KeyString::Builder& keyString,
+ double weight,
+ const std::string& term,
+ TextIndexVersion textIndexVersion);
};
} // namespace fts
} // namespace mongo
diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp
index f8ec7b19180..f446598596a 100644
--- a/src/mongo/db/storage/key_string.cpp
+++ b/src/mongo/db/storage/key_string.cpp
@@ -343,15 +343,23 @@ void BuilderBase<BufferT>::resetToKey(const BSONObj& obj,
template <class BufferT>
void BuilderBase<BufferT>::appendBSONElement(const BSONElement& elem, const StringTransformFn& f) {
- invariant(_state == BuildState::kEmpty || _state == BuildState::kAppendingBSONElements);
+ _verifyAppendingState();
+ _appendBsonValue(elem, _shouldInvertOnAppend(), nullptr, f);
+ _elemCount++;
+}
- const int elemIdx = _elemCount++;
- const bool invert = (_ordering.get(elemIdx) == -1);
+template <class BufferT>
+void BuilderBase<BufferT>::appendString(StringData val) {
+ _verifyAppendingState();
+ _appendString(val, _shouldInvertOnAppend(), nullptr);
+ _elemCount++;
+}
- if (_state == BuildState::kEmpty) {
- _transition(BuildState::kAppendingBSONElements);
- }
- _appendBsonValue(elem, invert, nullptr, f);
+template <class BufferT>
+void BuilderBase<BufferT>::appendNumberDouble(double num) {
+ _verifyAppendingState();
+ _appendNumberDouble(num, _shouldInvertOnAppend());
+ _elemCount++;
}
template <class BufferT>
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h
index 4707c6671d1..9865c901958 100644
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@@ -450,6 +450,9 @@ public:
*/
void appendBSONElement(const BSONElement& elem, const StringTransformFn& f = nullptr);
+ void appendString(StringData val);
+ void appendNumberDouble(double num);
+
/**
* Resets to an empty state.
* Equivalent to but faster than *this = Builder(ord, discriminator)
@@ -573,6 +576,14 @@ private:
}
}
+ void _verifyAppendingState() {
+ invariant(_state == BuildState::kEmpty || _state == BuildState::kAppendingBSONElements);
+
+ if (_state == BuildState::kEmpty) {
+ _transition(BuildState::kAppendingBSONElements);
+ }
+ }
+
void _transition(BuildState to) {
// We can empty at any point since it just means that we are clearing the buffer.
if (to == BuildState::kEmpty) {
@@ -613,6 +624,10 @@ private:
_state = to;
}
+ bool _shouldInvertOnAppend() const {
+ return _ordering.get(_elemCount) == -1;
+ }
+
TypeBits _typeBits;
BufferT _buffer;