diff options
Diffstat (limited to 'src/mongo/bson/json.h')
-rw-r--r-- | src/mongo/bson/json.h | 865 |
1 files changed, 430 insertions, 435 deletions
diff --git a/src/mongo/bson/json.h b/src/mongo/bson/json.h index 34564765242..7a22d1d2186 100644 --- a/src/mongo/bson/json.h +++ b/src/mongo/bson/json.h @@ -35,454 +35,449 @@ namespace mongo { - /** - * Create a BSONObj from a JSON <http://www.json.org>, - * <http://www.ietf.org/rfc/rfc4627.txt> string. In addition to the JSON - * extensions extensions described here - * <http://dochub.mongodb.org/core/mongodbextendedjson>, this function - * accepts unquoted field names and allows single quotes to optionally be - * used when specifying field names and std::string values instead of double - * quotes. JSON unicode escape sequences (of the form \uXXXX) are - * converted to utf8. +/** + * Create a BSONObj from a JSON <http://www.json.org>, + * <http://www.ietf.org/rfc/rfc4627.txt> string. In addition to the JSON + * extensions extensions described here + * <http://dochub.mongodb.org/core/mongodbextendedjson>, this function + * accepts unquoted field names and allows single quotes to optionally be + * used when specifying field names and std::string values instead of double + * quotes. JSON unicode escape sequences (of the form \uXXXX) are + * converted to utf8. + * + * @throws MsgAssertionException if parsing fails. The message included with + * this assertion includes the character offset where parsing failed. + */ +BSONObj fromjson(const std::string& str); + +/** @param len will be size of JSON object in text chars. */ +BSONObj fromjson(const char* str, int* len = NULL); + +/** + * Tests whether the JSON string is an Array. + * + * Useful for assigning the result of fromjson to the right object type. Either: + * BSONObj + * BSONArray + * + * @example Using the method to select the proper type. + * If this method returns true, the user could store the result of fromjson + * inside a BSONArray, rather than a BSONObj, in order to have it print as an + * array when passed to tojson. + * + * @param obj The JSON string to test. + */ +bool isArray(StringData str); + +/** + * Convert a BSONArray to a JSON string. + * + * @param arr The BSON Array. + * @param format The JSON format (JS, TenGen, Strict). + * @param pretty Enables pretty output. + */ +std::string tojson(const BSONArray& arr, JsonStringFormat format = Strict, bool pretty = false); + +/** + * Convert a BSONObj to a JSON string. + * + * @param obj The BSON Object. + * @param format The JSON format (JS, TenGen, Strict). + * @param pretty Enables pretty output. + */ +std::string tojson(const BSONObj& obj, JsonStringFormat format = Strict, bool pretty = false); + +/** + * Parser class. A BSONObj is constructed incrementally by passing a + * BSONObjBuilder to the recursive parsing methods. The grammar for the + * element parsed is described before each function. + */ +class JParse { +public: + explicit JParse(StringData str); + + /* + * Notation: All-uppercase symbols denote non-terminals; all other + * symbols are literals. + */ + + /* + * VALUE : + * STRING + * | NUMBER + * | NUMBERINT + * | NUMBERLONG + * | OBJECT + * | ARRAY * - * @throws MsgAssertionException if parsing fails. The message included with - * this assertion includes the character offset where parsing failed. + * | true + * | false + * | null + * | undefined + * + * | NaN + * | Infinity + * | -Infinity + * + * | DATE + * | TIMESTAMP + * | REGEX + * | OBJECTID + * | DBREF + * + * | new CONSTRUCTOR + */ +private: + Status value(StringData fieldName, BSONObjBuilder&); + + /* + * OBJECT : + * {} + * | { MEMBERS } + * | SPECIALOBJECT + * + * MEMBERS : + * PAIR + * | PAIR , MEMBERS + * + * PAIR : + * FIELD : VALUE + * + * SPECIALOBJECT : + * OIDOBJECT + * | BINARYOBJECT + * | DATEOBJECT + * | TIMESTAMPOBJECT + * | REGEXOBJECT + * | REFOBJECT + * | UNDEFINEDOBJECT + * | NUMBERLONGOBJECT + * | MINKEYOBJECT + * | MAXKEYOBJECT + * + */ +public: + Status object(StringData fieldName, BSONObjBuilder&, bool subObj = true); + Status parse(BSONObjBuilder& builder); + bool isArray(); + +private: + /* The following functions are called with the '{' and the first + * field already parsed since they are both implied given the + * context. */ + /* + * OIDOBJECT : + * { FIELD("$oid") : <24 character hex std::string> } */ - BSONObj fromjson(const std::string& str); + Status objectIdObject(StringData fieldName, BSONObjBuilder&); - /** @param len will be size of JSON object in text chars. */ - BSONObj fromjson(const char* str, int* len=NULL); + /* + * BINARYOBJECT : + * { FIELD("$binary") : <base64 representation of a binary std::string>, + * FIELD("$type") : <hexadecimal representation of a single byte + * indicating the data type> } + */ + Status binaryObject(StringData fieldName, BSONObjBuilder&); - /** - * Tests whether the JSON string is an Array. + /* + * DATEOBJECT : + * { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> } + */ + Status dateObject(StringData fieldName, BSONObjBuilder&); + + /* + * TIMESTAMPOBJECT : + * { FIELD("$timestamp") : { + * FIELD("t") : <32 bit unsigned integer for seconds since epoch>, + * FIELD("i") : <32 bit unsigned integer for the increment> } } + */ + Status timestampObject(StringData fieldName, BSONObjBuilder&); + + /* + * NOTE: the rules for the body of the regex are different here, + * since it is quoted instead of surrounded by slashes. + * REGEXOBJECT : + * { FIELD("$regex") : <string representing body of regex> } + * | { FIELD("$regex") : <string representing body of regex>, + * FIELD("$options") : <string representing regex options> } + */ + Status regexObject(StringData fieldName, BSONObjBuilder&); + + /* + * REFOBJECT : + * { FIELD("$ref") : <string representing collection name>, + * FIELD("$id") : <24 character hex std::string> } + * | { FIELD("$ref") : std::string , FIELD("$id") : OBJECTID } + * | { FIELD("$ref") : std::string , FIELD("$id") : OIDOBJECT } + */ + Status dbRefObject(StringData fieldName, BSONObjBuilder&); + + /* + * UNDEFINEDOBJECT : + * { FIELD("$undefined") : true } + */ + Status undefinedObject(StringData fieldName, BSONObjBuilder&); + + /* + * NUMBERLONGOBJECT : + * { FIELD("$numberLong") : "<number>" } + */ + Status numberLongObject(StringData fieldName, BSONObjBuilder&); + + /* + * MINKEYOBJECT : + * { FIELD("$minKey") : 1 } + */ + Status minKeyObject(StringData fieldName, BSONObjBuilder& builder); + + /* + * MAXKEYOBJECT : + * { FIELD("$maxKey") : 1 } + */ + Status maxKeyObject(StringData fieldName, BSONObjBuilder& builder); + + /* + * ARRAY : + * [] + * | [ ELEMENTS ] + * + * ELEMENTS : + * VALUE + * | VALUE , ELEMENTS + */ + Status array(StringData fieldName, BSONObjBuilder&, bool subObj = true); + + /* + * NOTE: Currently only Date can be preceded by the "new" keyword + * CONSTRUCTOR : + * DATE + */ + Status constructor(StringData fieldName, BSONObjBuilder&); + + /* The following functions only parse the body of the constructor + * between the parentheses, not including the constructor name */ + /* + * DATE : + * Date( <64 bit signed integer for milliseconds since epoch> ) + */ + Status date(StringData fieldName, BSONObjBuilder&); + + /* + * TIMESTAMP : + * Timestamp( <32 bit unsigned integer for seconds since epoch>, + * <32 bit unsigned integer for the increment> ) + */ + Status timestamp(StringData fieldName, BSONObjBuilder&); + + /* + * OBJECTID : + * ObjectId( <24 character hex std::string> ) + */ + Status objectId(StringData fieldName, BSONObjBuilder&); + + /* + * NUMBERLONG : + * NumberLong( <number> ) + */ + Status numberLong(StringData fieldName, BSONObjBuilder&); + + /* + * NUMBERINT : + * NumberInt( <number> ) + */ + Status numberInt(StringData fieldName, BSONObjBuilder&); + + /* + * DBREF : + * Dbref( <namespace std::string> , <24 character hex std::string> ) + */ + Status dbRef(StringData fieldName, BSONObjBuilder&); + + /* + * REGEX : + * / REGEXCHARS / REGEXOPTIONS + * + * REGEXCHARS : + * REGEXCHAR + * | REGEXCHAR REGEXCHARS * - * Useful for assigning the result of fromjson to the right object type. Either: - * BSONObj - * BSONArray + * REGEXCHAR : + * any-Unicode-character-except-/-or-\-or-CONTROLCHAR + * | \" + * | \' + * | \\ + * | \/ + * | \b + * | \f + * | \n + * | \r + * | \t + * | \v + * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT + * | \any-Unicode-character-except-x-or-[0-7] * - * @example Using the method to select the proper type. - * If this method returns true, the user could store the result of fromjson - * inside a BSONArray, rather than a BSONObj, in order to have it print as an - * array when passed to tojson. + * REGEXOPTIONS : + * REGEXOPTION + * | REGEXOPTION REGEXOPTIONS * - * @param obj The JSON string to test. + * REGEXOPTION : + * g | i | m | s */ - bool isArray(StringData str); + Status regex(StringData fieldName, BSONObjBuilder&); + Status regexPat(std::string* result); + Status regexOpt(std::string* result); + Status regexOptCheck(StringData opt); - /** - * Convert a BSONArray to a JSON string. + /* + * NUMBER : * - * @param arr The BSON Array. - * @param format The JSON format (JS, TenGen, Strict). - * @param pretty Enables pretty output. + * NOTE: Number parsing is based on standard library functions, not + * necessarily on the JSON numeric grammar. + * + * Number as value - strtoll and strtod + * Date - strtoll + * Timestamp - strtoul for both timestamp and increment and '-' + * before a number explicity disallowed */ - std::string tojson( - const BSONArray& arr, - JsonStringFormat format = Strict, - bool pretty = false - ); + Status number(StringData fieldName, BSONObjBuilder&); - /** - * Convert a BSONObj to a JSON string. + /* + * FIELD : + * STRING + * | [a-zA-Z$_] FIELDCHARS + * + * FIELDCHARS : + * [a-zA-Z0-9$_] + * | [a-zA-Z0-9$_] FIELDCHARS + */ + Status field(std::string* result); + + /* + * std::string : + * " " + * | ' ' + * | " CHARS " + * | ' CHARS ' + */ + Status quotedString(std::string* result); + + /* + * CHARS : + * CHAR + * | CHAR CHARS + * + * Note: " or ' may be allowed depending on whether the std::string is + * double or single quoted + * + * CHAR : + * any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR + * | \" + * | \' + * | \\ + * | \/ + * | \b + * | \f + * | \n + * | \r + * | \t + * | \v + * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT + * | \any-Unicode-character-except-x-or-[0-9] + * + * HEXDIGIT : [0..9a..fA..F] * - * @param obj The BSON Object. - * @param format The JSON format (JS, TenGen, Strict). - * @param pretty Enables pretty output. + * per http://www.ietf.org/rfc/rfc4627.txt, control characters are + * (U+0000 through U+001F). U+007F is not mentioned as a control + * character. + * CONTROLCHAR : [0x00..0x1F] + * + * If there is not an error, result will contain a null terminated + * string, but there is no guarantee that it will not contain other + * null characters. + */ + Status chars(std::string* result, const char* terminalSet, const char* allowedSet = NULL); + + /** + * Converts the two byte Unicode code point to its UTF8 character + * encoding representation. This function returns a std::string because + * UTF8 encodings for code points from 0x0000 to 0xFFFF can range + * from one to three characters. */ - std::string tojson( - const BSONObj& obj, - JsonStringFormat format = Strict, - bool pretty = false - ); + std::string encodeUTF8(unsigned char first, unsigned char second) const; /** - * Parser class. A BSONObj is constructed incrementally by passing a - * BSONObjBuilder to the recursive parsing methods. The grammar for the - * element parsed is described before each function. - */ - class JParse { - public: - explicit JParse(StringData str); - - /* - * Notation: All-uppercase symbols denote non-terminals; all other - * symbols are literals. - */ - - /* - * VALUE : - * STRING - * | NUMBER - * | NUMBERINT - * | NUMBERLONG - * | OBJECT - * | ARRAY - * - * | true - * | false - * | null - * | undefined - * - * | NaN - * | Infinity - * | -Infinity - * - * | DATE - * | TIMESTAMP - * | REGEX - * | OBJECTID - * | DBREF - * - * | new CONSTRUCTOR - */ - private: - Status value(StringData fieldName, BSONObjBuilder&); - - /* - * OBJECT : - * {} - * | { MEMBERS } - * | SPECIALOBJECT - * - * MEMBERS : - * PAIR - * | PAIR , MEMBERS - * - * PAIR : - * FIELD : VALUE - * - * SPECIALOBJECT : - * OIDOBJECT - * | BINARYOBJECT - * | DATEOBJECT - * | TIMESTAMPOBJECT - * | REGEXOBJECT - * | REFOBJECT - * | UNDEFINEDOBJECT - * | NUMBERLONGOBJECT - * | MINKEYOBJECT - * | MAXKEYOBJECT - * - */ - public: - Status object(StringData fieldName, BSONObjBuilder&, bool subObj=true); - Status parse(BSONObjBuilder& builder); - bool isArray(); - - private: - /* The following functions are called with the '{' and the first - * field already parsed since they are both implied given the - * context. */ - /* - * OIDOBJECT : - * { FIELD("$oid") : <24 character hex std::string> } - */ - Status objectIdObject(StringData fieldName, BSONObjBuilder&); - - /* - * BINARYOBJECT : - * { FIELD("$binary") : <base64 representation of a binary std::string>, - * FIELD("$type") : <hexadecimal representation of a single byte - * indicating the data type> } - */ - Status binaryObject(StringData fieldName, BSONObjBuilder&); - - /* - * DATEOBJECT : - * { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> } - */ - Status dateObject(StringData fieldName, BSONObjBuilder&); - - /* - * TIMESTAMPOBJECT : - * { FIELD("$timestamp") : { - * FIELD("t") : <32 bit unsigned integer for seconds since epoch>, - * FIELD("i") : <32 bit unsigned integer for the increment> } } - */ - Status timestampObject(StringData fieldName, BSONObjBuilder&); - - /* - * NOTE: the rules for the body of the regex are different here, - * since it is quoted instead of surrounded by slashes. - * REGEXOBJECT : - * { FIELD("$regex") : <string representing body of regex> } - * | { FIELD("$regex") : <string representing body of regex>, - * FIELD("$options") : <string representing regex options> } - */ - Status regexObject(StringData fieldName, BSONObjBuilder&); - - /* - * REFOBJECT : - * { FIELD("$ref") : <string representing collection name>, - * FIELD("$id") : <24 character hex std::string> } - * | { FIELD("$ref") : std::string , FIELD("$id") : OBJECTID } - * | { FIELD("$ref") : std::string , FIELD("$id") : OIDOBJECT } - */ - Status dbRefObject(StringData fieldName, BSONObjBuilder&); - - /* - * UNDEFINEDOBJECT : - * { FIELD("$undefined") : true } - */ - Status undefinedObject(StringData fieldName, BSONObjBuilder&); - - /* - * NUMBERLONGOBJECT : - * { FIELD("$numberLong") : "<number>" } - */ - Status numberLongObject(StringData fieldName, BSONObjBuilder&); - - /* - * MINKEYOBJECT : - * { FIELD("$minKey") : 1 } - */ - Status minKeyObject(StringData fieldName, BSONObjBuilder& builder); - - /* - * MAXKEYOBJECT : - * { FIELD("$maxKey") : 1 } - */ - Status maxKeyObject(StringData fieldName, BSONObjBuilder& builder); - - /* - * ARRAY : - * [] - * | [ ELEMENTS ] - * - * ELEMENTS : - * VALUE - * | VALUE , ELEMENTS - */ - Status array(StringData fieldName, BSONObjBuilder&, bool subObj=true); - - /* - * NOTE: Currently only Date can be preceded by the "new" keyword - * CONSTRUCTOR : - * DATE - */ - Status constructor(StringData fieldName, BSONObjBuilder&); - - /* The following functions only parse the body of the constructor - * between the parentheses, not including the constructor name */ - /* - * DATE : - * Date( <64 bit signed integer for milliseconds since epoch> ) - */ - Status date(StringData fieldName, BSONObjBuilder&); - - /* - * TIMESTAMP : - * Timestamp( <32 bit unsigned integer for seconds since epoch>, - * <32 bit unsigned integer for the increment> ) - */ - Status timestamp(StringData fieldName, BSONObjBuilder&); - - /* - * OBJECTID : - * ObjectId( <24 character hex std::string> ) - */ - Status objectId(StringData fieldName, BSONObjBuilder&); - - /* - * NUMBERLONG : - * NumberLong( <number> ) - */ - Status numberLong(StringData fieldName, BSONObjBuilder&); - - /* - * NUMBERINT : - * NumberInt( <number> ) - */ - Status numberInt(StringData fieldName, BSONObjBuilder&); - - /* - * DBREF : - * Dbref( <namespace std::string> , <24 character hex std::string> ) - */ - Status dbRef(StringData fieldName, BSONObjBuilder&); - - /* - * REGEX : - * / REGEXCHARS / REGEXOPTIONS - * - * REGEXCHARS : - * REGEXCHAR - * | REGEXCHAR REGEXCHARS - * - * REGEXCHAR : - * any-Unicode-character-except-/-or-\-or-CONTROLCHAR - * | \" - * | \' - * | \\ - * | \/ - * | \b - * | \f - * | \n - * | \r - * | \t - * | \v - * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT - * | \any-Unicode-character-except-x-or-[0-7] - * - * REGEXOPTIONS : - * REGEXOPTION - * | REGEXOPTION REGEXOPTIONS - * - * REGEXOPTION : - * g | i | m | s - */ - Status regex(StringData fieldName, BSONObjBuilder&); - Status regexPat(std::string* result); - Status regexOpt(std::string* result); - Status regexOptCheck(StringData opt); - - /* - * NUMBER : - * - * NOTE: Number parsing is based on standard library functions, not - * necessarily on the JSON numeric grammar. - * - * Number as value - strtoll and strtod - * Date - strtoll - * Timestamp - strtoul for both timestamp and increment and '-' - * before a number explicity disallowed - */ - Status number(StringData fieldName, BSONObjBuilder&); - - /* - * FIELD : - * STRING - * | [a-zA-Z$_] FIELDCHARS - * - * FIELDCHARS : - * [a-zA-Z0-9$_] - * | [a-zA-Z0-9$_] FIELDCHARS - */ - Status field(std::string* result); - - /* - * std::string : - * " " - * | ' ' - * | " CHARS " - * | ' CHARS ' - */ - Status quotedString(std::string* result); - - /* - * CHARS : - * CHAR - * | CHAR CHARS - * - * Note: " or ' may be allowed depending on whether the std::string is - * double or single quoted - * - * CHAR : - * any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR - * | \" - * | \' - * | \\ - * | \/ - * | \b - * | \f - * | \n - * | \r - * | \t - * | \v - * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT - * | \any-Unicode-character-except-x-or-[0-9] - * - * HEXDIGIT : [0..9a..fA..F] - * - * per http://www.ietf.org/rfc/rfc4627.txt, control characters are - * (U+0000 through U+001F). U+007F is not mentioned as a control - * character. - * CONTROLCHAR : [0x00..0x1F] - * - * If there is not an error, result will contain a null terminated - * string, but there is no guarantee that it will not contain other - * null characters. - */ - Status chars(std::string* result, const char* terminalSet, const char* allowedSet=NULL); - - /** - * Converts the two byte Unicode code point to its UTF8 character - * encoding representation. This function returns a std::string because - * UTF8 encodings for code points from 0x0000 to 0xFFFF can range - * from one to three characters. - */ - std::string encodeUTF8(unsigned char first, unsigned char second) const; - - /** - * @return true if the given token matches the next non whitespace - * sequence in our buffer, and false if the token doesn't match or - * we reach the end of our buffer. Do not update the pointer to our - * buffer (same as calling readTokenImpl with advance=false). - */ - inline bool peekToken(const char* token); - - /** - * @return true if the given token matches the next non whitespace - * sequence in our buffer, and false if the token doesn't match or - * we reach the end of our buffer. Updates the pointer to our - * buffer (same as calling readTokenImpl with advance=true). - */ - inline bool readToken(const char* token); - - /** - * @return true if the given token matches the next non whitespace - * sequence in our buffer, and false if the token doesn't match or - * we reach the end of our buffer. Do not update the pointer to our - * buffer if advance is false. - */ - bool readTokenImpl(const char* token, bool advance=true); - - /** - * @return true if the next field in our stream matches field. - * Handles single quoted, double quoted, and unquoted field names - */ - bool readField(StringData field); - - /** - * @return true if matchChar is in matchSet - * @return true if matchSet is NULL and false if it is an empty string - */ - bool match(char matchChar, const char* matchSet) const; - - /** - * @return true if every character in the std::string is a hex digit - */ - bool isHexString(StringData) const; - - /** - * @return true if every character in the std::string is a valid base64 - * character - */ - bool isBase64String(StringData) const; - - /** - * @return FailedToParse status with the given message and some - * additional context information - */ - Status parseError(StringData msg); - public: - inline int offset() { return (_input - _buf); } - - private: - /* - * _buf - start of our input buffer - * _input - cursor we advance in our input buffer - * _input_end - sentinel for the end of our input buffer - * - * _buf is the null terminated buffer containing the JSON std::string we - * are parsing. _input_end points to the null byte at the end of - * the buffer. strtoll, strtol, and strtod will access the null - * byte at the end of the buffer because they are assuming a c-style - * string. - */ - const char* const _buf; - const char* _input; - const char* const _input_end; - }; - -} // namespace mongo + * @return true if the given token matches the next non whitespace + * sequence in our buffer, and false if the token doesn't match or + * we reach the end of our buffer. Do not update the pointer to our + * buffer (same as calling readTokenImpl with advance=false). + */ + inline bool peekToken(const char* token); + + /** + * @return true if the given token matches the next non whitespace + * sequence in our buffer, and false if the token doesn't match or + * we reach the end of our buffer. Updates the pointer to our + * buffer (same as calling readTokenImpl with advance=true). + */ + inline bool readToken(const char* token); + + /** + * @return true if the given token matches the next non whitespace + * sequence in our buffer, and false if the token doesn't match or + * we reach the end of our buffer. Do not update the pointer to our + * buffer if advance is false. + */ + bool readTokenImpl(const char* token, bool advance = true); + + /** + * @return true if the next field in our stream matches field. + * Handles single quoted, double quoted, and unquoted field names + */ + bool readField(StringData field); + + /** + * @return true if matchChar is in matchSet + * @return true if matchSet is NULL and false if it is an empty string + */ + bool match(char matchChar, const char* matchSet) const; + + /** + * @return true if every character in the std::string is a hex digit + */ + bool isHexString(StringData) const; + + /** + * @return true if every character in the std::string is a valid base64 + * character + */ + bool isBase64String(StringData) const; + + /** + * @return FailedToParse status with the given message and some + * additional context information + */ + Status parseError(StringData msg); + +public: + inline int offset() { + return (_input - _buf); + } + +private: + /* + * _buf - start of our input buffer + * _input - cursor we advance in our input buffer + * _input_end - sentinel for the end of our input buffer + * + * _buf is the null terminated buffer containing the JSON std::string we + * are parsing. _input_end points to the null byte at the end of + * the buffer. strtoll, strtol, and strtod will access the null + * byte at the end of the buffer because they are assuming a c-style + * string. + */ + const char* const _buf; + const char* _input; + const char* const _input_end; +}; + +} // namespace mongo |