SERVER-34313 Use hex-encoded string for resume token

author: Charlie Swanson <charlie.swanson@mongodb.com> 2018-04-09 14:12:05 -0400
committer: Charlie Swanson <charlie.swanson@mongodb.com> 2018-04-13 16:18:35 -0400
commit: a820491e9402a52d7575157a9897306d49129370 (patch)
tree: 61ed25fdec3912a8fc1407bcb52380b110b697fa /src/mongo/db/pipeline/resume_token.h
parent: 4b894b4a55467c38bb7910317af00793b493de37 (diff)
download: mongo-a820491e9402a52d7575157a9897306d49129370.tar.gz
1 files changed, 56 insertions, 35 deletions
diff --git a/src/mongo/db/pipeline/resume_token.h b/src/mongo/db/pipeline/resume_token.h
index ba41270fbed..7d1bb3c432f 100644
--- a/src/mongo/db/pipeline/resume_token.h
+++ b/src/mongo/db/pipeline/resume_token.h
@@ -59,43 +59,64 @@ struct ResumeTokenData {
 std::ostream& operator<<(std::ostream& out, const ResumeTokenData& tokenData);
 
 /**
- * A token passed in by the user to indicate where in the oplog we should start for
- * $changeStream.  This token has the following format:
- * {
- *   _data: <binary data>,
- *   _typeBits: <binary data>
- * }
- * The _data field data is encoded such that byte by byte comparisons provide the correct
- * ordering of tokens.  The _typeBits field may be missing and should not affect token
- * comparison.
+ * A token passed in by the user to indicate where in the oplog we should start for $changeStream.
+ * This token has one of the following formats:
+ * 1. Using BinData:
+ *   {
+ *     _data: BinData - The keystring encoded resume token, in clusterTime, documentKey, UUID order.
+ *     _typeBits: BinData - The keystring type bits used for deserialization.
+ *   }
+ * 2. Using a hex-encoded string in a similar format:
+ *   {
+ *     _data: String, A hex encoding of the binary generated by keystring encoding the clusterTime,
+ *     UUID, then documentKey in that order.
+ *     _typeBits: BinData - The keystring type bits used for deserialization.
+ *   }
+ *   The _data field data is encoded such that string comparisons provide the correct ordering of
+ *   tokens. Unlike the BinData, this can be sorted correctly using a MongoDB sort. BinData
+ *   unfortunately orders by the length of the data first, then by the contents.
+ *
+ *   In both cases, the _typeBits field may be missing and should not affect token comparison.
  */
-
 class ResumeToken {
 public:
+    enum class SerializationFormat {
+        kBinData,
+        kHexString,
+    };
+
+    constexpr static StringData kDataFieldName = "_data"_sd;
+    constexpr static StringData kTypeBitsFieldName = "_typeBits"_sd;
+
+    /**
+     * Parse a resume token from a BSON object; used as an interface to the IDL parser.
+     */
+    static ResumeToken parse(const BSONObj& resumeBson) {
+        return ResumeToken::parse(Document(resumeBson));
+    }
+
+    static ResumeToken parse(const Document& document);
+
     /**
      * The default no-argument constructor is required by the IDL for types used as non-optional
      * fields.
      */
     ResumeToken() = default;
 
-    explicit ResumeToken(const ResumeTokenData& resumeValue);
-
-    bool operator==(const ResumeToken&) const;
-    bool operator!=(const ResumeToken&) const;
-    bool operator<(const ResumeToken&) const;
-    bool operator<=(const ResumeToken&) const;
-    bool operator>(const ResumeToken&) const;
-    bool operator>=(const ResumeToken&) const;
-
-    /** Three way comparison, returns 0 if *this is equal to other, < 0 if *this is less than
-     * other, and > 0 if *this is greater than other.
+    /**
+     * Parses 'resumeValue' into a ResumeToken using the hex-encoded string format.
      */
-    int compare(const ResumeToken& other) const;
+    explicit ResumeToken(const ResumeTokenData& resumeValue);
 
-    Document toDocument() const;
+    Document toDocument(SerializationFormat) const;
 
-    BSONObj toBSON() const {
-        return toDocument().toBson();
+    /**
+     * Because we use the IDL we require a serializer. However, the serialization format depends on
+     * the feature compatibility version, so a serializer without an argument doesn't make sense.
+     * This should never be used.
+     */
+    BSONObj toBSON_do_not_use() const {
+        MONGO_UNREACHABLE;
     }
 
     ResumeTokenData getData() const;
@@ -104,26 +125,26 @@ public:
         return getData().clusterTime;
     }
 
-    /**
-     * Parse a resume token from a BSON object; used as an interface to the IDL parser.
-     */
-    static ResumeToken parse(const BSONObj& resumeBson) {
-        return ResumeToken::parse(Document(resumeBson));
+    bool operator==(const ResumeToken&) const;
+    bool operator!=(const ResumeToken& other) const {
+        return !(*this == other);
     }
 
-    static ResumeToken parse(const Document& document);
-
     friend std::ostream& operator<<(std::ostream& out, const ResumeToken& token) {
         return out << token.getData();
     }
 
-    constexpr static StringData kDataFieldName = "_data"_sd;
-    constexpr static StringData kTypeBitsFieldName = "_typeBits"_sd;
-
 private:
     explicit ResumeToken(const Document& resumeData);
 
+    // This is either the BinData or the hex-encoded string encoding all the pieces of the
+    // resume token.
     Value _keyStringData;
+
+    // Since we are using a KeyString encoding, we might lose some information about what the
+    // original types of the serialized values were. For example, the integer 2 and the double 2.0
+    // will generate the same KeyString. We keep the type bits around so we can deserialize without
+    // losing information.
     Value _typeBits;
 };
 }  // namespace mongo
author	Charlie Swanson <charlie.swanson@mongodb.com>	2018-04-09 14:12:05 -0400
committer	Charlie Swanson <charlie.swanson@mongodb.com>	2018-04-13 16:18:35 -0400
commit	a820491e9402a52d7575157a9897306d49129370 (patch)
tree	61ed25fdec3912a8fc1407bcb52380b110b697fa /src/mongo/db/pipeline/resume_token.h
parent	4b894b4a55467c38bb7910317af00793b493de37 (diff)
download	mongo-a820491e9402a52d7575157a9897306d49129370.tar.gz