/** * Copyright (C) 2008 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #pragma once #include #include "mongo/bson/bsonobj.h" #include "mongo/base/status.h" namespace mongo { /** * Create a BSONObj from a JSON , * string. In addition to the JSON * extensions extensions described here * , this function * accepts unquoted field names and allows single quotes to optionally be * used when specifying field names and std::string values instead of double * quotes. JSON unicode escape sequences (of the form \uXXXX) are * converted to utf8. * * @throws MsgAssertionException if parsing fails. The message included with * this assertion includes the character offset where parsing failed. */ BSONObj fromjson(const std::string& str); /** @param len will be size of JSON object in text chars. */ BSONObj fromjson(const char* str, int* len=NULL); /** * Tests whether the JSON string is an Array. * * Useful for assigning the result of fromjson to the right object type. Either: * BSONObj * BSONArray * * @example Using the method to select the proper type. * If this method returns true, the user could store the result of fromjson * inside a BSONArray, rather than a BSONObj, in order to have it print as an * array when passed to tojson. * * @param obj The JSON string to test. */ bool isArray(StringData str); /** * Convert a BSONArray to a JSON string. * * @param arr The BSON Array. * @param format The JSON format (JS, TenGen, Strict). * @param pretty Enables pretty output. */ std::string tojson( const BSONArray& arr, JsonStringFormat format = Strict, bool pretty = false ); /** * Convert a BSONObj to a JSON string. * * @param obj The BSON Object. * @param format The JSON format (JS, TenGen, Strict). * @param pretty Enables pretty output. */ std::string tojson( const BSONObj& obj, JsonStringFormat format = Strict, bool pretty = false ); /** * Parser class. A BSONObj is constructed incrementally by passing a * BSONObjBuilder to the recursive parsing methods. The grammar for the * element parsed is described before each function. */ class JParse { public: explicit JParse(StringData str); /* * Notation: All-uppercase symbols denote non-terminals; all other * symbols are literals. */ /* * VALUE : * STRING * | NUMBER * | NUMBERINT * | NUMBERLONG * | OBJECT * | ARRAY * * | true * | false * | null * | undefined * * | NaN * | Infinity * | -Infinity * * | DATE * | TIMESTAMP * | REGEX * | OBJECTID * | DBREF * * | new CONSTRUCTOR */ private: Status value(StringData fieldName, BSONObjBuilder&); /* * OBJECT : * {} * | { MEMBERS } * | SPECIALOBJECT * * MEMBERS : * PAIR * | PAIR , MEMBERS * * PAIR : * FIELD : VALUE * * SPECIALOBJECT : * OIDOBJECT * | BINARYOBJECT * | DATEOBJECT * | TIMESTAMPOBJECT * | REGEXOBJECT * | REFOBJECT * | UNDEFINEDOBJECT * | NUMBERLONGOBJECT * | MINKEYOBJECT * | MAXKEYOBJECT * */ public: Status object(StringData fieldName, BSONObjBuilder&, bool subObj=true); Status parse(BSONObjBuilder& builder); bool isArray(); private: /* The following functions are called with the '{' and the first * field already parsed since they are both implied given the * context. */ /* * OIDOBJECT : * { FIELD("$oid") : <24 character hex std::string> } */ Status objectIdObject(StringData fieldName, BSONObjBuilder&); /* * BINARYOBJECT : * { FIELD("$binary") : , * FIELD("$type") : } */ Status binaryObject(StringData fieldName, BSONObjBuilder&); /* * DATEOBJECT : * { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> } */ Status dateObject(StringData fieldName, BSONObjBuilder&); /* * TIMESTAMPOBJECT : * { FIELD("$timestamp") : { * FIELD("t") : <32 bit unsigned integer for seconds since epoch>, * FIELD("i") : <32 bit unsigned integer for the increment> } } */ Status timestampObject(StringData fieldName, BSONObjBuilder&); /* * NOTE: the rules for the body of the regex are different here, * since it is quoted instead of surrounded by slashes. * REGEXOBJECT : * { FIELD("$regex") : } * | { FIELD("$regex") : , * FIELD("$options") : } */ Status regexObject(StringData fieldName, BSONObjBuilder&); /* * REFOBJECT : * { FIELD("$ref") : , * FIELD("$id") : <24 character hex std::string> } * | { FIELD("$ref") : std::string , FIELD("$id") : OBJECTID } * | { FIELD("$ref") : std::string , FIELD("$id") : OIDOBJECT } */ Status dbRefObject(StringData fieldName, BSONObjBuilder&); /* * UNDEFINEDOBJECT : * { FIELD("$undefined") : true } */ Status undefinedObject(StringData fieldName, BSONObjBuilder&); /* * NUMBERLONGOBJECT : * { FIELD("$numberLong") : "" } */ Status numberLongObject(StringData fieldName, BSONObjBuilder&); /* * MINKEYOBJECT : * { FIELD("$minKey") : 1 } */ Status minKeyObject(StringData fieldName, BSONObjBuilder& builder); /* * MAXKEYOBJECT : * { FIELD("$maxKey") : 1 } */ Status maxKeyObject(StringData fieldName, BSONObjBuilder& builder); /* * ARRAY : * [] * | [ ELEMENTS ] * * ELEMENTS : * VALUE * | VALUE , ELEMENTS */ Status array(StringData fieldName, BSONObjBuilder&, bool subObj=true); /* * NOTE: Currently only Date can be preceded by the "new" keyword * CONSTRUCTOR : * DATE */ Status constructor(StringData fieldName, BSONObjBuilder&); /* The following functions only parse the body of the constructor * between the parentheses, not including the constructor name */ /* * DATE : * Date( <64 bit signed integer for milliseconds since epoch> ) */ Status date(StringData fieldName, BSONObjBuilder&); /* * TIMESTAMP : * Timestamp( <32 bit unsigned integer for seconds since epoch>, * <32 bit unsigned integer for the increment> ) */ Status timestamp(StringData fieldName, BSONObjBuilder&); /* * OBJECTID : * ObjectId( <24 character hex std::string> ) */ Status objectId(StringData fieldName, BSONObjBuilder&); /* * NUMBERLONG : * NumberLong( ) */ Status numberLong(StringData fieldName, BSONObjBuilder&); /* * NUMBERINT : * NumberInt( ) */ Status numberInt(StringData fieldName, BSONObjBuilder&); /* * DBREF : * Dbref( , <24 character hex std::string> ) */ Status dbRef(StringData fieldName, BSONObjBuilder&); /* * REGEX : * / REGEXCHARS / REGEXOPTIONS * * REGEXCHARS : * REGEXCHAR * | REGEXCHAR REGEXCHARS * * REGEXCHAR : * any-Unicode-character-except-/-or-\-or-CONTROLCHAR * | \" * | \' * | \\ * | \/ * | \b * | \f * | \n * | \r * | \t * | \v * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT * | \any-Unicode-character-except-x-or-[0-7] * * REGEXOPTIONS : * REGEXOPTION * | REGEXOPTION REGEXOPTIONS * * REGEXOPTION : * g | i | m | s */ Status regex(StringData fieldName, BSONObjBuilder&); Status regexPat(std::string* result); Status regexOpt(std::string* result); Status regexOptCheck(StringData opt); /* * NUMBER : * * NOTE: Number parsing is based on standard library functions, not * necessarily on the JSON numeric grammar. * * Number as value - strtoll and strtod * Date - strtoll * Timestamp - strtoul for both timestamp and increment and '-' * before a number explicity disallowed */ Status number(StringData fieldName, BSONObjBuilder&); /* * FIELD : * STRING * | [a-zA-Z$_] FIELDCHARS * * FIELDCHARS : * [a-zA-Z0-9$_] * | [a-zA-Z0-9$_] FIELDCHARS */ Status field(std::string* result); /* * std::string : * " " * | ' ' * | " CHARS " * | ' CHARS ' */ Status quotedString(std::string* result); /* * CHARS : * CHAR * | CHAR CHARS * * Note: " or ' may be allowed depending on whether the std::string is * double or single quoted * * CHAR : * any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR * | \" * | \' * | \\ * | \/ * | \b * | \f * | \n * | \r * | \t * | \v * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT * | \any-Unicode-character-except-x-or-[0-9] * * HEXDIGIT : [0..9a..fA..F] * * per http://www.ietf.org/rfc/rfc4627.txt, control characters are * (U+0000 through U+001F). U+007F is not mentioned as a control * character. * CONTROLCHAR : [0x00..0x1F] * * If there is not an error, result will contain a null terminated * string, but there is no guarantee that it will not contain other * null characters. */ Status chars(std::string* result, const char* terminalSet, const char* allowedSet=NULL); /** * Converts the two byte Unicode code point to its UTF8 character * encoding representation. This function returns a std::string because * UTF8 encodings for code points from 0x0000 to 0xFFFF can range * from one to three characters. */ std::string encodeUTF8(unsigned char first, unsigned char second) const; /** * @return true if the given token matches the next non whitespace * sequence in our buffer, and false if the token doesn't match or * we reach the end of our buffer. Do not update the pointer to our * buffer (same as calling readTokenImpl with advance=false). */ inline bool peekToken(const char* token); /** * @return true if the given token matches the next non whitespace * sequence in our buffer, and false if the token doesn't match or * we reach the end of our buffer. Updates the pointer to our * buffer (same as calling readTokenImpl with advance=true). */ inline bool readToken(const char* token); /** * @return true if the given token matches the next non whitespace * sequence in our buffer, and false if the token doesn't match or * we reach the end of our buffer. Do not update the pointer to our * buffer if advance is false. */ bool readTokenImpl(const char* token, bool advance=true); /** * @return true if the next field in our stream matches field. * Handles single quoted, double quoted, and unquoted field names */ bool readField(StringData field); /** * @return true if matchChar is in matchSet * @return true if matchSet is NULL and false if it is an empty string */ bool match(char matchChar, const char* matchSet) const; /** * @return true if every character in the std::string is a hex digit */ bool isHexString(StringData) const; /** * @return true if every character in the std::string is a valid base64 * character */ bool isBase64String(StringData) const; /** * @return FailedToParse status with the given message and some * additional context information */ Status parseError(StringData msg); public: inline int offset() { return (_input - _buf); } private: /* * _buf - start of our input buffer * _input - cursor we advance in our input buffer * _input_end - sentinel for the end of our input buffer * * _buf is the null terminated buffer containing the JSON std::string we * are parsing. _input_end points to the null byte at the end of * the buffer. strtoll, strtol, and strtod will access the null * byte at the end of the buffer because they are assuming a c-style * string. */ const char* const _buf; const char* _input; const char* const _input_end; }; } // namespace mongo