diff options
author | Charlie Swanson <charlie.swanson@mongodb.com> | 2018-04-09 14:12:05 -0400 |
---|---|---|
committer | Charlie Swanson <charlie.swanson@mongodb.com> | 2018-04-13 16:18:35 -0400 |
commit | a820491e9402a52d7575157a9897306d49129370 (patch) | |
tree | 61ed25fdec3912a8fc1407bcb52380b110b697fa /src/mongo/db/pipeline/resume_token.cpp | |
parent | 4b894b4a55467c38bb7910317af00793b493de37 (diff) | |
download | mongo-a820491e9402a52d7575157a9897306d49129370.tar.gz |
SERVER-34313 Use hex-encoded string for resume token
Diffstat (limited to 'src/mongo/db/pipeline/resume_token.cpp')
-rw-r--r-- | src/mongo/db/pipeline/resume_token.cpp | 184 |
1 files changed, 134 insertions, 50 deletions
diff --git a/src/mongo/db/pipeline/resume_token.cpp b/src/mongo/db/pipeline/resume_token.cpp index 2618a70186d..fd1e8cd3ce7 100644 --- a/src/mongo/db/pipeline/resume_token.cpp +++ b/src/mongo/db/pipeline/resume_token.cpp @@ -26,20 +26,59 @@ * it in the license file. */ +#include "mongo/platform/basic.h" + #include "mongo/db/pipeline/resume_token.h" #include <boost/optional/optional_io.hpp> +#include <limits> #include "mongo/bson/bsonmisc.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/pipeline/document_sources_gen.h" #include "mongo/db/pipeline/value_comparator.h" #include "mongo/db/storage/key_string.h" +#include "mongo/util/hex.h" namespace mongo { constexpr StringData ResumeToken::kDataFieldName; constexpr StringData ResumeToken::kTypeBitsFieldName; +namespace { + +/** + * Returns a pair of values representing the key-string encoded data and the type bits respectively. + * Both are of type BinData, but if the type bits of the key string are all zeros then the second + * Value will be the missing value. + */ +std::pair<Value, Value> encodeInBinDataFormat(const ResumeTokenData& data) { + // In the legacy format we serialize clusterTime, then documentKey, then UUID. + BSONObjBuilder builder; + builder.append("", data.clusterTime); + data.documentKey.addToBsonObj(&builder, ""); + if (data.uuid) { + if (data.documentKey.missing()) { + // Never allow a missing document key with a UUID present, as that will mess up + // the field order. + builder.appendNull(""); + } + data.uuid->appendToBuilder(&builder, ""); + } + auto keyObj = builder.obj(); + + // After writing all the pieces to an object, keystring-encode that object into binary. + KeyString encodedToken(KeyString::Version::V1, keyObj, Ordering::make(BSONObj())); + const auto& typeBits = encodedToken.getTypeBits(); + + auto rawBinary = + BSONBinData(encodedToken.getBuffer(), encodedToken.getSize(), BinDataType::BinDataGeneral); + auto typeBitsValue = typeBits.isAllZeros() + ? Value() + : Value(BSONBinData(typeBits.getBuffer(), typeBits.getSize(), BinDataType::BinDataGeneral)); + return {Value(rawBinary), typeBitsValue}; +} +} // namespace + bool ResumeTokenData::operator==(const ResumeTokenData& other) const { return clusterTime == other.clusterTime && (Value::compare(this->documentKey, other.documentKey, nullptr) == 0) && uuid == other.uuid; @@ -56,38 +95,47 @@ ResumeToken::ResumeToken(const Document& resumeDoc) { uassert(40647, str::stream() << "Bad resume token: _data of missing or of wrong type" << resumeDoc.toString(), - _keyStringData.getType() == BinData && - _keyStringData.getBinData().type == BinDataGeneral); + (_keyStringData.getType() == BSONType::BinData && + _keyStringData.getBinData().type == BinDataGeneral) || + _keyStringData.getType() == BSONType::String); uassert(40648, str::stream() << "Bad resume token: _typeBits of wrong type" << resumeDoc.toString(), - _typeBits.missing() || - (_typeBits.getType() == BinData && _typeBits.getBinData().type == BinDataGeneral)); + _typeBits.missing() || (_typeBits.getType() == BSONType::BinData && + _typeBits.getBinData().type == BinDataGeneral)); } -// We encode the resume token as a KeyString with the sequence: clusterTime, documentKey, uuid. +// We encode the resume token as a KeyString with the sequence: clusterTime, uuid, documentKey. // Only the clusterTime is required. ResumeToken::ResumeToken(const ResumeTokenData& data) { BSONObjBuilder builder; builder.append("", data.clusterTime); - data.documentKey.addToBsonObj(&builder, ""); + uassert(50788, + "Unexpected resume token with a documentKey but no UUID", + data.uuid || data.documentKey.missing()); + if (data.uuid) { - if (data.documentKey.missing()) { - // Never allow a missing document key with a UUID present, as that will mess up - // the field order. - builder.appendNull(""); - } data.uuid->appendToBuilder(&builder, ""); } + data.documentKey.addToBsonObj(&builder, ""); auto keyObj = builder.obj(); KeyString encodedToken(KeyString::Version::V1, keyObj, Ordering::make(BSONObj())); - _keyStringData = Value( - BSONBinData(encodedToken.getBuffer(), encodedToken.getSize(), BinDataType::BinDataGeneral)); + _keyStringData = Value(toHex(encodedToken.getBuffer(), encodedToken.getSize())); const auto& typeBits = encodedToken.getTypeBits(); if (!typeBits.isAllZeros()) _typeBits = Value( BSONBinData(typeBits.getBuffer(), typeBits.getSize(), BinDataType::BinDataGeneral)); } +bool ResumeToken::operator==(const ResumeToken& other) const { + // '_keyStringData' is enough to determine equality. The type bits are used to unambiguously + // re-construct the original data, but we do not expect any two resume tokens to have the same + // data and different type bits, since that would imply they have (1) the same timestamp and (2) + // the same documentKey (possibly different types). This should not be possible because + // documents with the same documentKey should be on the same shard and therefore should have + // different timestamps. + return ValueComparator::kInstance.evaluate(_keyStringData == other._keyStringData); +} + ResumeTokenData ResumeToken::getData() const { KeyString::TypeBits typeBits(KeyString::Version::V1); if (!_typeBits.missing()) { @@ -95,7 +143,29 @@ ResumeTokenData ResumeToken::getData() const { BufReader typeBitsReader(typeBitsBinData.data, typeBitsBinData.length); typeBits.resetFromBuffer(&typeBitsReader); } - BSONBinData keyStringBinData = _keyStringData.getBinData(); + + // Accept either serialization format. + BufBuilder hexDecodeBuf; // Keep this in scope until we've decoded the bytes. + BSONBinData keyStringBinData{nullptr, 0, BinDataType::BinDataGeneral}; + boost::optional<std::string> decodedString; + switch (_keyStringData.getType()) { + case BSONType::BinData: { + keyStringBinData = _keyStringData.getBinData(); + break; + } + case BSONType::String: { + uassert(ErrorCodes::FailedToParse, + "resume token string was not a valid hex string", + isValidHex(_keyStringData.getStringData())); + fromHexString(_keyStringData.getStringData(), &hexDecodeBuf); + keyStringBinData = BSONBinData( + hexDecodeBuf.buf(), hexDecodeBuf.getSize(), BinDataType::BinDataGeneral); + break; + } + default: + // We validate the type at parse time. + MONGO_UNREACHABLE; + } auto internalBson = KeyString::toBson(static_cast<const char*>(keyStringBinData.data), keyStringBinData.length, Ordering::make(BSONObj()), @@ -105,47 +175,61 @@ ResumeTokenData ResumeToken::getData() const { ResumeTokenData result; uassert(40649, "invalid empty resume token", i.more()); result.clusterTime = i.next().timestamp(); - if (i.more()) - result.documentKey = Value(i.next()); - if (i.more()) - result.uuid = uassertStatusOK(UUID::parse(i.next())); + if (!i.more()) { + // There was nothing other than the timestamp. + return result; + } + switch (_keyStringData.getType()) { + case BSONType::BinData: { + // In the old format, the documentKey came first, then the UUID. + result.documentKey = Value(i.next()); + if (i.more()) { + result.uuid = uassertStatusOK(UUID::parse(i.next())); + } + break; + } + case BSONType::String: { + // In the new format, the UUID comes first, then the documentKey. + result.uuid = uassertStatusOK(UUID::parse(i.next())); + if (i.more()) { + result.documentKey = Value(i.next()); + } + break; + } + default: { MONGO_UNREACHABLE } + } uassert(40646, "invalid oversized resume token", !i.more()); return result; } -int ResumeToken::compare(const ResumeToken& other) const { - BSONBinData thisData = _keyStringData.getBinData(); - BSONBinData otherData = other._keyStringData.getBinData(); - return StringData(static_cast<const char*>(thisData.data), thisData.length) - .compare(StringData(static_cast<const char*>(otherData.data), otherData.length)); -} - -bool ResumeToken::operator==(const ResumeToken& other) const { - return compare(other) == 0; -} - -bool ResumeToken::operator!=(const ResumeToken& other) const { - return compare(other) != 0; -} - -bool ResumeToken::operator<(const ResumeToken& other) const { - return compare(other) < 0; -} - -bool ResumeToken::operator<=(const ResumeToken& other) const { - return compare(other) <= 0; -} - -bool ResumeToken::operator>(const ResumeToken& other) const { - return compare(other) > 0; -} - -bool ResumeToken::operator>=(const ResumeToken& other) const { - return compare(other) >= 0; -} +Document ResumeToken::toDocument(SerializationFormat format) const { + // In most cases we expect to be serializing in the same format we were given. + const auto dataType = _keyStringData.getType(); + if ((dataType == BSONType::BinData && format == SerializationFormat::kBinData) || + (dataType == BSONType::String && format == SerializationFormat::kHexString)) { + return Document{{kDataFieldName, _keyStringData}, {kTypeBitsFieldName, _typeBits}}; + } -Document ResumeToken::toDocument() const { - return Document{{kDataFieldName, _keyStringData}, {kTypeBitsFieldName, _typeBits}}; + // If we have to switch formats, then decompose the resume token into its pieces and + // re-construct a resume token in the new format. + auto data = getData(); + + switch (format) { + case SerializationFormat::kBinData: { + // Going from the three pieces of data into BinData requires special logic, since + // re-constructing a ResumeToken from 'data' will generate the new format. + Value rawBinary, typeBits; + std::tie(rawBinary, typeBits) = encodeInBinDataFormat(data); + return Document{{kDataFieldName, rawBinary}, {kTypeBitsFieldName, typeBits}}; + } + case SerializationFormat::kHexString: { + // Constructing a new ResumeToken from the three pieces of data will generate a + // hex-encoded KeyString as the token. + const ResumeToken newResumeToken(data); + return newResumeToken.toDocument(format); + } + default: { MONGO_UNREACHABLE; } + } } ResumeToken ResumeToken::parse(const Document& resumeDoc) { |