diff options
author | Andrew Morrow <acm@mongodb.com> | 2015-04-30 11:49:52 -0400 |
---|---|---|
committer | Andrew Morrow <acm@mongodb.com> | 2015-05-06 15:47:21 -0400 |
commit | 543ca54c22e13056b4f278e36b4c1b6436c2f1cb (patch) | |
tree | ce86697434bce1152c31df200341405e618472f8 /src/mongo/bson/json.h | |
parent | 0ec9948134ca39df062d59e7eaa212100631ecac (diff) | |
download | mongo-543ca54c22e13056b4f278e36b4c1b6436c2f1cb.tar.gz |
SERVER-9666 Move json utils into bson library
Diffstat (limited to 'src/mongo/bson/json.h')
-rw-r--r-- | src/mongo/bson/json.h | 488 |
1 files changed, 488 insertions, 0 deletions
diff --git a/src/mongo/bson/json.h b/src/mongo/bson/json.h new file mode 100644 index 00000000000..34564765242 --- /dev/null +++ b/src/mongo/bson/json.h @@ -0,0 +1,488 @@ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#pragma once + +#include <string> + +#include "mongo/bson/bsonobj.h" +#include "mongo/base/status.h" + +namespace mongo { + + /** + * Create a BSONObj from a JSON <http://www.json.org>, + * <http://www.ietf.org/rfc/rfc4627.txt> string. In addition to the JSON + * extensions extensions described here + * <http://dochub.mongodb.org/core/mongodbextendedjson>, this function + * accepts unquoted field names and allows single quotes to optionally be + * used when specifying field names and std::string values instead of double + * quotes. JSON unicode escape sequences (of the form \uXXXX) are + * converted to utf8. + * + * @throws MsgAssertionException if parsing fails. The message included with + * this assertion includes the character offset where parsing failed. + */ + BSONObj fromjson(const std::string& str); + + /** @param len will be size of JSON object in text chars. */ + BSONObj fromjson(const char* str, int* len=NULL); + + /** + * Tests whether the JSON string is an Array. + * + * Useful for assigning the result of fromjson to the right object type. Either: + * BSONObj + * BSONArray + * + * @example Using the method to select the proper type. + * If this method returns true, the user could store the result of fromjson + * inside a BSONArray, rather than a BSONObj, in order to have it print as an + * array when passed to tojson. + * + * @param obj The JSON string to test. + */ + bool isArray(StringData str); + + /** + * Convert a BSONArray to a JSON string. + * + * @param arr The BSON Array. + * @param format The JSON format (JS, TenGen, Strict). + * @param pretty Enables pretty output. + */ + std::string tojson( + const BSONArray& arr, + JsonStringFormat format = Strict, + bool pretty = false + ); + + /** + * Convert a BSONObj to a JSON string. + * + * @param obj The BSON Object. + * @param format The JSON format (JS, TenGen, Strict). + * @param pretty Enables pretty output. + */ + std::string tojson( + const BSONObj& obj, + JsonStringFormat format = Strict, + bool pretty = false + ); + + /** + * Parser class. A BSONObj is constructed incrementally by passing a + * BSONObjBuilder to the recursive parsing methods. The grammar for the + * element parsed is described before each function. + */ + class JParse { + public: + explicit JParse(StringData str); + + /* + * Notation: All-uppercase symbols denote non-terminals; all other + * symbols are literals. + */ + + /* + * VALUE : + * STRING + * | NUMBER + * | NUMBERINT + * | NUMBERLONG + * | OBJECT + * | ARRAY + * + * | true + * | false + * | null + * | undefined + * + * | NaN + * | Infinity + * | -Infinity + * + * | DATE + * | TIMESTAMP + * | REGEX + * | OBJECTID + * | DBREF + * + * | new CONSTRUCTOR + */ + private: + Status value(StringData fieldName, BSONObjBuilder&); + + /* + * OBJECT : + * {} + * | { MEMBERS } + * | SPECIALOBJECT + * + * MEMBERS : + * PAIR + * | PAIR , MEMBERS + * + * PAIR : + * FIELD : VALUE + * + * SPECIALOBJECT : + * OIDOBJECT + * | BINARYOBJECT + * | DATEOBJECT + * | TIMESTAMPOBJECT + * | REGEXOBJECT + * | REFOBJECT + * | UNDEFINEDOBJECT + * | NUMBERLONGOBJECT + * | MINKEYOBJECT + * | MAXKEYOBJECT + * + */ + public: + Status object(StringData fieldName, BSONObjBuilder&, bool subObj=true); + Status parse(BSONObjBuilder& builder); + bool isArray(); + + private: + /* The following functions are called with the '{' and the first + * field already parsed since they are both implied given the + * context. */ + /* + * OIDOBJECT : + * { FIELD("$oid") : <24 character hex std::string> } + */ + Status objectIdObject(StringData fieldName, BSONObjBuilder&); + + /* + * BINARYOBJECT : + * { FIELD("$binary") : <base64 representation of a binary std::string>, + * FIELD("$type") : <hexadecimal representation of a single byte + * indicating the data type> } + */ + Status binaryObject(StringData fieldName, BSONObjBuilder&); + + /* + * DATEOBJECT : + * { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> } + */ + Status dateObject(StringData fieldName, BSONObjBuilder&); + + /* + * TIMESTAMPOBJECT : + * { FIELD("$timestamp") : { + * FIELD("t") : <32 bit unsigned integer for seconds since epoch>, + * FIELD("i") : <32 bit unsigned integer for the increment> } } + */ + Status timestampObject(StringData fieldName, BSONObjBuilder&); + + /* + * NOTE: the rules for the body of the regex are different here, + * since it is quoted instead of surrounded by slashes. + * REGEXOBJECT : + * { FIELD("$regex") : <string representing body of regex> } + * | { FIELD("$regex") : <string representing body of regex>, + * FIELD("$options") : <string representing regex options> } + */ + Status regexObject(StringData fieldName, BSONObjBuilder&); + + /* + * REFOBJECT : + * { FIELD("$ref") : <string representing collection name>, + * FIELD("$id") : <24 character hex std::string> } + * | { FIELD("$ref") : std::string , FIELD("$id") : OBJECTID } + * | { FIELD("$ref") : std::string , FIELD("$id") : OIDOBJECT } + */ + Status dbRefObject(StringData fieldName, BSONObjBuilder&); + + /* + * UNDEFINEDOBJECT : + * { FIELD("$undefined") : true } + */ + Status undefinedObject(StringData fieldName, BSONObjBuilder&); + + /* + * NUMBERLONGOBJECT : + * { FIELD("$numberLong") : "<number>" } + */ + Status numberLongObject(StringData fieldName, BSONObjBuilder&); + + /* + * MINKEYOBJECT : + * { FIELD("$minKey") : 1 } + */ + Status minKeyObject(StringData fieldName, BSONObjBuilder& builder); + + /* + * MAXKEYOBJECT : + * { FIELD("$maxKey") : 1 } + */ + Status maxKeyObject(StringData fieldName, BSONObjBuilder& builder); + + /* + * ARRAY : + * [] + * | [ ELEMENTS ] + * + * ELEMENTS : + * VALUE + * | VALUE , ELEMENTS + */ + Status array(StringData fieldName, BSONObjBuilder&, bool subObj=true); + + /* + * NOTE: Currently only Date can be preceded by the "new" keyword + * CONSTRUCTOR : + * DATE + */ + Status constructor(StringData fieldName, BSONObjBuilder&); + + /* The following functions only parse the body of the constructor + * between the parentheses, not including the constructor name */ + /* + * DATE : + * Date( <64 bit signed integer for milliseconds since epoch> ) + */ + Status date(StringData fieldName, BSONObjBuilder&); + + /* + * TIMESTAMP : + * Timestamp( <32 bit unsigned integer for seconds since epoch>, + * <32 bit unsigned integer for the increment> ) + */ + Status timestamp(StringData fieldName, BSONObjBuilder&); + + /* + * OBJECTID : + * ObjectId( <24 character hex std::string> ) + */ + Status objectId(StringData fieldName, BSONObjBuilder&); + + /* + * NUMBERLONG : + * NumberLong( <number> ) + */ + Status numberLong(StringData fieldName, BSONObjBuilder&); + + /* + * NUMBERINT : + * NumberInt( <number> ) + */ + Status numberInt(StringData fieldName, BSONObjBuilder&); + + /* + * DBREF : + * Dbref( <namespace std::string> , <24 character hex std::string> ) + */ + Status dbRef(StringData fieldName, BSONObjBuilder&); + + /* + * REGEX : + * / REGEXCHARS / REGEXOPTIONS + * + * REGEXCHARS : + * REGEXCHAR + * | REGEXCHAR REGEXCHARS + * + * REGEXCHAR : + * any-Unicode-character-except-/-or-\-or-CONTROLCHAR + * | \" + * | \' + * | \\ + * | \/ + * | \b + * | \f + * | \n + * | \r + * | \t + * | \v + * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT + * | \any-Unicode-character-except-x-or-[0-7] + * + * REGEXOPTIONS : + * REGEXOPTION + * | REGEXOPTION REGEXOPTIONS + * + * REGEXOPTION : + * g | i | m | s + */ + Status regex(StringData fieldName, BSONObjBuilder&); + Status regexPat(std::string* result); + Status regexOpt(std::string* result); + Status regexOptCheck(StringData opt); + + /* + * NUMBER : + * + * NOTE: Number parsing is based on standard library functions, not + * necessarily on the JSON numeric grammar. + * + * Number as value - strtoll and strtod + * Date - strtoll + * Timestamp - strtoul for both timestamp and increment and '-' + * before a number explicity disallowed + */ + Status number(StringData fieldName, BSONObjBuilder&); + + /* + * FIELD : + * STRING + * | [a-zA-Z$_] FIELDCHARS + * + * FIELDCHARS : + * [a-zA-Z0-9$_] + * | [a-zA-Z0-9$_] FIELDCHARS + */ + Status field(std::string* result); + + /* + * std::string : + * " " + * | ' ' + * | " CHARS " + * | ' CHARS ' + */ + Status quotedString(std::string* result); + + /* + * CHARS : + * CHAR + * | CHAR CHARS + * + * Note: " or ' may be allowed depending on whether the std::string is + * double or single quoted + * + * CHAR : + * any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR + * | \" + * | \' + * | \\ + * | \/ + * | \b + * | \f + * | \n + * | \r + * | \t + * | \v + * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT + * | \any-Unicode-character-except-x-or-[0-9] + * + * HEXDIGIT : [0..9a..fA..F] + * + * per http://www.ietf.org/rfc/rfc4627.txt, control characters are + * (U+0000 through U+001F). U+007F is not mentioned as a control + * character. + * CONTROLCHAR : [0x00..0x1F] + * + * If there is not an error, result will contain a null terminated + * string, but there is no guarantee that it will not contain other + * null characters. + */ + Status chars(std::string* result, const char* terminalSet, const char* allowedSet=NULL); + + /** + * Converts the two byte Unicode code point to its UTF8 character + * encoding representation. This function returns a std::string because + * UTF8 encodings for code points from 0x0000 to 0xFFFF can range + * from one to three characters. + */ + std::string encodeUTF8(unsigned char first, unsigned char second) const; + + /** + * @return true if the given token matches the next non whitespace + * sequence in our buffer, and false if the token doesn't match or + * we reach the end of our buffer. Do not update the pointer to our + * buffer (same as calling readTokenImpl with advance=false). + */ + inline bool peekToken(const char* token); + + /** + * @return true if the given token matches the next non whitespace + * sequence in our buffer, and false if the token doesn't match or + * we reach the end of our buffer. Updates the pointer to our + * buffer (same as calling readTokenImpl with advance=true). + */ + inline bool readToken(const char* token); + + /** + * @return true if the given token matches the next non whitespace + * sequence in our buffer, and false if the token doesn't match or + * we reach the end of our buffer. Do not update the pointer to our + * buffer if advance is false. + */ + bool readTokenImpl(const char* token, bool advance=true); + + /** + * @return true if the next field in our stream matches field. + * Handles single quoted, double quoted, and unquoted field names + */ + bool readField(StringData field); + + /** + * @return true if matchChar is in matchSet + * @return true if matchSet is NULL and false if it is an empty string + */ + bool match(char matchChar, const char* matchSet) const; + + /** + * @return true if every character in the std::string is a hex digit + */ + bool isHexString(StringData) const; + + /** + * @return true if every character in the std::string is a valid base64 + * character + */ + bool isBase64String(StringData) const; + + /** + * @return FailedToParse status with the given message and some + * additional context information + */ + Status parseError(StringData msg); + public: + inline int offset() { return (_input - _buf); } + + private: + /* + * _buf - start of our input buffer + * _input - cursor we advance in our input buffer + * _input_end - sentinel for the end of our input buffer + * + * _buf is the null terminated buffer containing the JSON std::string we + * are parsing. _input_end points to the null byte at the end of + * the buffer. strtoll, strtol, and strtod will access the null + * byte at the end of the buffer because they are assuming a c-style + * string. + */ + const char* const _buf; + const char* _input; + const char* const _input_end; + }; + +} // namespace mongo |