summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline/resume_token.cpp
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2018-04-09 14:12:05 -0400
committerCharlie Swanson <charlie.swanson@mongodb.com>2018-04-13 16:18:35 -0400
commita820491e9402a52d7575157a9897306d49129370 (patch)
tree61ed25fdec3912a8fc1407bcb52380b110b697fa /src/mongo/db/pipeline/resume_token.cpp
parent4b894b4a55467c38bb7910317af00793b493de37 (diff)
downloadmongo-a820491e9402a52d7575157a9897306d49129370.tar.gz
SERVER-34313 Use hex-encoded string for resume token
Diffstat (limited to 'src/mongo/db/pipeline/resume_token.cpp')
-rw-r--r--src/mongo/db/pipeline/resume_token.cpp184
1 files changed, 134 insertions, 50 deletions
diff --git a/src/mongo/db/pipeline/resume_token.cpp b/src/mongo/db/pipeline/resume_token.cpp
index 2618a70186d..fd1e8cd3ce7 100644
--- a/src/mongo/db/pipeline/resume_token.cpp
+++ b/src/mongo/db/pipeline/resume_token.cpp
@@ -26,20 +26,59 @@
* it in the license file.
*/
+#include "mongo/platform/basic.h"
+
#include "mongo/db/pipeline/resume_token.h"
#include <boost/optional/optional_io.hpp>
+#include <limits>
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/pipeline/document_sources_gen.h"
#include "mongo/db/pipeline/value_comparator.h"
#include "mongo/db/storage/key_string.h"
+#include "mongo/util/hex.h"
namespace mongo {
constexpr StringData ResumeToken::kDataFieldName;
constexpr StringData ResumeToken::kTypeBitsFieldName;
+namespace {
+
+/**
+ * Returns a pair of values representing the key-string encoded data and the type bits respectively.
+ * Both are of type BinData, but if the type bits of the key string are all zeros then the second
+ * Value will be the missing value.
+ */
+std::pair<Value, Value> encodeInBinDataFormat(const ResumeTokenData& data) {
+ // In the legacy format we serialize clusterTime, then documentKey, then UUID.
+ BSONObjBuilder builder;
+ builder.append("", data.clusterTime);
+ data.documentKey.addToBsonObj(&builder, "");
+ if (data.uuid) {
+ if (data.documentKey.missing()) {
+ // Never allow a missing document key with a UUID present, as that will mess up
+ // the field order.
+ builder.appendNull("");
+ }
+ data.uuid->appendToBuilder(&builder, "");
+ }
+ auto keyObj = builder.obj();
+
+ // After writing all the pieces to an object, keystring-encode that object into binary.
+ KeyString encodedToken(KeyString::Version::V1, keyObj, Ordering::make(BSONObj()));
+ const auto& typeBits = encodedToken.getTypeBits();
+
+ auto rawBinary =
+ BSONBinData(encodedToken.getBuffer(), encodedToken.getSize(), BinDataType::BinDataGeneral);
+ auto typeBitsValue = typeBits.isAllZeros()
+ ? Value()
+ : Value(BSONBinData(typeBits.getBuffer(), typeBits.getSize(), BinDataType::BinDataGeneral));
+ return {Value(rawBinary), typeBitsValue};
+}
+} // namespace
+
bool ResumeTokenData::operator==(const ResumeTokenData& other) const {
return clusterTime == other.clusterTime &&
(Value::compare(this->documentKey, other.documentKey, nullptr) == 0) && uuid == other.uuid;
@@ -56,38 +95,47 @@ ResumeToken::ResumeToken(const Document& resumeDoc) {
uassert(40647,
str::stream() << "Bad resume token: _data of missing or of wrong type"
<< resumeDoc.toString(),
- _keyStringData.getType() == BinData &&
- _keyStringData.getBinData().type == BinDataGeneral);
+ (_keyStringData.getType() == BSONType::BinData &&
+ _keyStringData.getBinData().type == BinDataGeneral) ||
+ _keyStringData.getType() == BSONType::String);
uassert(40648,
str::stream() << "Bad resume token: _typeBits of wrong type" << resumeDoc.toString(),
- _typeBits.missing() ||
- (_typeBits.getType() == BinData && _typeBits.getBinData().type == BinDataGeneral));
+ _typeBits.missing() || (_typeBits.getType() == BSONType::BinData &&
+ _typeBits.getBinData().type == BinDataGeneral));
}
-// We encode the resume token as a KeyString with the sequence: clusterTime, documentKey, uuid.
+// We encode the resume token as a KeyString with the sequence: clusterTime, uuid, documentKey.
// Only the clusterTime is required.
ResumeToken::ResumeToken(const ResumeTokenData& data) {
BSONObjBuilder builder;
builder.append("", data.clusterTime);
- data.documentKey.addToBsonObj(&builder, "");
+ uassert(50788,
+ "Unexpected resume token with a documentKey but no UUID",
+ data.uuid || data.documentKey.missing());
+
if (data.uuid) {
- if (data.documentKey.missing()) {
- // Never allow a missing document key with a UUID present, as that will mess up
- // the field order.
- builder.appendNull("");
- }
data.uuid->appendToBuilder(&builder, "");
}
+ data.documentKey.addToBsonObj(&builder, "");
auto keyObj = builder.obj();
KeyString encodedToken(KeyString::Version::V1, keyObj, Ordering::make(BSONObj()));
- _keyStringData = Value(
- BSONBinData(encodedToken.getBuffer(), encodedToken.getSize(), BinDataType::BinDataGeneral));
+ _keyStringData = Value(toHex(encodedToken.getBuffer(), encodedToken.getSize()));
const auto& typeBits = encodedToken.getTypeBits();
if (!typeBits.isAllZeros())
_typeBits = Value(
BSONBinData(typeBits.getBuffer(), typeBits.getSize(), BinDataType::BinDataGeneral));
}
+bool ResumeToken::operator==(const ResumeToken& other) const {
+ // '_keyStringData' is enough to determine equality. The type bits are used to unambiguously
+ // re-construct the original data, but we do not expect any two resume tokens to have the same
+ // data and different type bits, since that would imply they have (1) the same timestamp and (2)
+ // the same documentKey (possibly different types). This should not be possible because
+ // documents with the same documentKey should be on the same shard and therefore should have
+ // different timestamps.
+ return ValueComparator::kInstance.evaluate(_keyStringData == other._keyStringData);
+}
+
ResumeTokenData ResumeToken::getData() const {
KeyString::TypeBits typeBits(KeyString::Version::V1);
if (!_typeBits.missing()) {
@@ -95,7 +143,29 @@ ResumeTokenData ResumeToken::getData() const {
BufReader typeBitsReader(typeBitsBinData.data, typeBitsBinData.length);
typeBits.resetFromBuffer(&typeBitsReader);
}
- BSONBinData keyStringBinData = _keyStringData.getBinData();
+
+ // Accept either serialization format.
+ BufBuilder hexDecodeBuf; // Keep this in scope until we've decoded the bytes.
+ BSONBinData keyStringBinData{nullptr, 0, BinDataType::BinDataGeneral};
+ boost::optional<std::string> decodedString;
+ switch (_keyStringData.getType()) {
+ case BSONType::BinData: {
+ keyStringBinData = _keyStringData.getBinData();
+ break;
+ }
+ case BSONType::String: {
+ uassert(ErrorCodes::FailedToParse,
+ "resume token string was not a valid hex string",
+ isValidHex(_keyStringData.getStringData()));
+ fromHexString(_keyStringData.getStringData(), &hexDecodeBuf);
+ keyStringBinData = BSONBinData(
+ hexDecodeBuf.buf(), hexDecodeBuf.getSize(), BinDataType::BinDataGeneral);
+ break;
+ }
+ default:
+ // We validate the type at parse time.
+ MONGO_UNREACHABLE;
+ }
auto internalBson = KeyString::toBson(static_cast<const char*>(keyStringBinData.data),
keyStringBinData.length,
Ordering::make(BSONObj()),
@@ -105,47 +175,61 @@ ResumeTokenData ResumeToken::getData() const {
ResumeTokenData result;
uassert(40649, "invalid empty resume token", i.more());
result.clusterTime = i.next().timestamp();
- if (i.more())
- result.documentKey = Value(i.next());
- if (i.more())
- result.uuid = uassertStatusOK(UUID::parse(i.next()));
+ if (!i.more()) {
+ // There was nothing other than the timestamp.
+ return result;
+ }
+ switch (_keyStringData.getType()) {
+ case BSONType::BinData: {
+ // In the old format, the documentKey came first, then the UUID.
+ result.documentKey = Value(i.next());
+ if (i.more()) {
+ result.uuid = uassertStatusOK(UUID::parse(i.next()));
+ }
+ break;
+ }
+ case BSONType::String: {
+ // In the new format, the UUID comes first, then the documentKey.
+ result.uuid = uassertStatusOK(UUID::parse(i.next()));
+ if (i.more()) {
+ result.documentKey = Value(i.next());
+ }
+ break;
+ }
+ default: { MONGO_UNREACHABLE }
+ }
uassert(40646, "invalid oversized resume token", !i.more());
return result;
}
-int ResumeToken::compare(const ResumeToken& other) const {
- BSONBinData thisData = _keyStringData.getBinData();
- BSONBinData otherData = other._keyStringData.getBinData();
- return StringData(static_cast<const char*>(thisData.data), thisData.length)
- .compare(StringData(static_cast<const char*>(otherData.data), otherData.length));
-}
-
-bool ResumeToken::operator==(const ResumeToken& other) const {
- return compare(other) == 0;
-}
-
-bool ResumeToken::operator!=(const ResumeToken& other) const {
- return compare(other) != 0;
-}
-
-bool ResumeToken::operator<(const ResumeToken& other) const {
- return compare(other) < 0;
-}
-
-bool ResumeToken::operator<=(const ResumeToken& other) const {
- return compare(other) <= 0;
-}
-
-bool ResumeToken::operator>(const ResumeToken& other) const {
- return compare(other) > 0;
-}
-
-bool ResumeToken::operator>=(const ResumeToken& other) const {
- return compare(other) >= 0;
-}
+Document ResumeToken::toDocument(SerializationFormat format) const {
+ // In most cases we expect to be serializing in the same format we were given.
+ const auto dataType = _keyStringData.getType();
+ if ((dataType == BSONType::BinData && format == SerializationFormat::kBinData) ||
+ (dataType == BSONType::String && format == SerializationFormat::kHexString)) {
+ return Document{{kDataFieldName, _keyStringData}, {kTypeBitsFieldName, _typeBits}};
+ }
-Document ResumeToken::toDocument() const {
- return Document{{kDataFieldName, _keyStringData}, {kTypeBitsFieldName, _typeBits}};
+ // If we have to switch formats, then decompose the resume token into its pieces and
+ // re-construct a resume token in the new format.
+ auto data = getData();
+
+ switch (format) {
+ case SerializationFormat::kBinData: {
+ // Going from the three pieces of data into BinData requires special logic, since
+ // re-constructing a ResumeToken from 'data' will generate the new format.
+ Value rawBinary, typeBits;
+ std::tie(rawBinary, typeBits) = encodeInBinDataFormat(data);
+ return Document{{kDataFieldName, rawBinary}, {kTypeBitsFieldName, typeBits}};
+ }
+ case SerializationFormat::kHexString: {
+ // Constructing a new ResumeToken from the three pieces of data will generate a
+ // hex-encoded KeyString as the token.
+ const ResumeToken newResumeToken(data);
+ return newResumeToken.toDocument(format);
+ }
+ default: { MONGO_UNREACHABLE; }
+ }
}
ResumeToken ResumeToken::parse(const Document& resumeDoc) {