/**
* Copyright (C) 2008 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include
#include "mongo/bson/bsonobj.h"
#include "mongo/base/status.h"
#include "mongo/client/export_macros.h"
namespace mongo {
/**
* Create a BSONObj from a JSON ,
* string. In addition to the JSON
* extensions extensions described here
* , this function
* accepts unquoted field names and allows single quotes to optionally be
* used when specifying field names and std::string values instead of double
* quotes. JSON unicode escape sequences (of the form \uXXXX) are
* converted to utf8.
*
* @throws MsgAssertionException if parsing fails. The message included with
* this assertion includes the character offset where parsing failed.
*/
MONGO_CLIENT_API BSONObj fromjson(const std::string& str);
/** @param len will be size of JSON object in text chars. */
MONGO_CLIENT_API BSONObj fromjson(const char* str, int* len=NULL);
/**
* Tests whether the JSON string is an Array.
*
* Useful for assigning the result of fromjson to the right object type. Either:
* BSONObj
* BSONArray
*
* @example Using the method to select the proper type.
* If this method returns true, the user could store the result of fromjson
* inside a BSONArray, rather than a BSONObj, in order to have it print as an
* array when passed to tojson.
*
* @param obj The JSON string to test.
*/
MONGO_CLIENT_API bool isArray(StringData str);
/**
* Convert a BSONArray to a JSON string.
*
* @param arr The BSON Array.
* @param format The JSON format (JS, TenGen, Strict).
* @param pretty Enables pretty output.
*/
MONGO_CLIENT_API std::string tojson(
const BSONArray& arr,
JsonStringFormat format = Strict,
bool pretty = false
);
/**
* Convert a BSONObj to a JSON string.
*
* @param obj The BSON Object.
* @param format The JSON format (JS, TenGen, Strict).
* @param pretty Enables pretty output.
*/
MONGO_CLIENT_API std::string tojson(
const BSONObj& obj,
JsonStringFormat format = Strict,
bool pretty = false
);
/**
* Parser class. A BSONObj is constructed incrementally by passing a
* BSONObjBuilder to the recursive parsing methods. The grammar for the
* element parsed is described before each function.
*/
class JParse {
public:
explicit JParse(StringData str);
/*
* Notation: All-uppercase symbols denote non-terminals; all other
* symbols are literals.
*/
/*
* VALUE :
* STRING
* | NUMBER
* | NUMBERINT
* | NUMBERLONG
* | OBJECT
* | ARRAY
*
* | true
* | false
* | null
* | undefined
*
* | NaN
* | Infinity
* | -Infinity
*
* | DATE
* | TIMESTAMP
* | REGEX
* | OBJECTID
* | DBREF
*
* | new CONSTRUCTOR
*/
private:
Status value(StringData fieldName, BSONObjBuilder&);
/*
* OBJECT :
* {}
* | { MEMBERS }
* | SPECIALOBJECT
*
* MEMBERS :
* PAIR
* | PAIR , MEMBERS
*
* PAIR :
* FIELD : VALUE
*
* SPECIALOBJECT :
* OIDOBJECT
* | BINARYOBJECT
* | DATEOBJECT
* | TIMESTAMPOBJECT
* | REGEXOBJECT
* | REFOBJECT
* | UNDEFINEDOBJECT
* | NUMBERLONGOBJECT
* | MINKEYOBJECT
* | MAXKEYOBJECT
*
*/
public:
Status object(StringData fieldName, BSONObjBuilder&, bool subObj=true);
Status parse(BSONObjBuilder& builder);
bool isArray();
private:
/* The following functions are called with the '{' and the first
* field already parsed since they are both implied given the
* context. */
/*
* OIDOBJECT :
* { FIELD("$oid") : <24 character hex std::string> }
*/
Status objectIdObject(StringData fieldName, BSONObjBuilder&);
/*
* BINARYOBJECT :
* { FIELD("$binary") : ,
* FIELD("$type") : }
*/
Status binaryObject(StringData fieldName, BSONObjBuilder&);
/*
* DATEOBJECT :
* { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> }
*/
Status dateObject(StringData fieldName, BSONObjBuilder&);
/*
* TIMESTAMPOBJECT :
* { FIELD("$timestamp") : {
* FIELD("t") : <32 bit unsigned integer for seconds since epoch>,
* FIELD("i") : <32 bit unsigned integer for the increment> } }
*/
Status timestampObject(StringData fieldName, BSONObjBuilder&);
/*
* NOTE: the rules for the body of the regex are different here,
* since it is quoted instead of surrounded by slashes.
* REGEXOBJECT :
* { FIELD("$regex") : }
* | { FIELD("$regex") : ,
* FIELD("$options") : }
*/
Status regexObject(StringData fieldName, BSONObjBuilder&);
/*
* REFOBJECT :
* { FIELD("$ref") : ,
* FIELD("$id") : <24 character hex std::string> }
* | { FIELD("$ref") : std::string , FIELD("$id") : OBJECTID }
* | { FIELD("$ref") : std::string , FIELD("$id") : OIDOBJECT }
*/
Status dbRefObject(StringData fieldName, BSONObjBuilder&);
/*
* UNDEFINEDOBJECT :
* { FIELD("$undefined") : true }
*/
Status undefinedObject(StringData fieldName, BSONObjBuilder&);
/*
* NUMBERLONGOBJECT :
* { FIELD("$numberLong") : "" }
*/
Status numberLongObject(StringData fieldName, BSONObjBuilder&);
/*
* MINKEYOBJECT :
* { FIELD("$minKey") : 1 }
*/
Status minKeyObject(StringData fieldName, BSONObjBuilder& builder);
/*
* MAXKEYOBJECT :
* { FIELD("$maxKey") : 1 }
*/
Status maxKeyObject(StringData fieldName, BSONObjBuilder& builder);
/*
* ARRAY :
* []
* | [ ELEMENTS ]
*
* ELEMENTS :
* VALUE
* | VALUE , ELEMENTS
*/
Status array(StringData fieldName, BSONObjBuilder&, bool subObj=true);
/*
* NOTE: Currently only Date can be preceded by the "new" keyword
* CONSTRUCTOR :
* DATE
*/
Status constructor(StringData fieldName, BSONObjBuilder&);
/* The following functions only parse the body of the constructor
* between the parentheses, not including the constructor name */
/*
* DATE :
* Date( <64 bit signed integer for milliseconds since epoch> )
*/
Status date(StringData fieldName, BSONObjBuilder&);
/*
* TIMESTAMP :
* Timestamp( <32 bit unsigned integer for seconds since epoch>,
* <32 bit unsigned integer for the increment> )
*/
Status timestamp(StringData fieldName, BSONObjBuilder&);
/*
* OBJECTID :
* ObjectId( <24 character hex std::string> )
*/
Status objectId(StringData fieldName, BSONObjBuilder&);
/*
* NUMBERLONG :
* NumberLong( )
*/
Status numberLong(StringData fieldName, BSONObjBuilder&);
/*
* NUMBERINT :
* NumberInt( )
*/
Status numberInt(StringData fieldName, BSONObjBuilder&);
/*
* DBREF :
* Dbref( , <24 character hex std::string> )
*/
Status dbRef(StringData fieldName, BSONObjBuilder&);
/*
* REGEX :
* / REGEXCHARS / REGEXOPTIONS
*
* REGEXCHARS :
* REGEXCHAR
* | REGEXCHAR REGEXCHARS
*
* REGEXCHAR :
* any-Unicode-character-except-/-or-\-or-CONTROLCHAR
* | \"
* | \'
* | \\
* | \/
* | \b
* | \f
* | \n
* | \r
* | \t
* | \v
* | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
* | \any-Unicode-character-except-x-or-[0-7]
*
* REGEXOPTIONS :
* REGEXOPTION
* | REGEXOPTION REGEXOPTIONS
*
* REGEXOPTION :
* g | i | m | s
*/
Status regex(StringData fieldName, BSONObjBuilder&);
Status regexPat(std::string* result);
Status regexOpt(std::string* result);
Status regexOptCheck(StringData opt);
/*
* NUMBER :
*
* NOTE: Number parsing is based on standard library functions, not
* necessarily on the JSON numeric grammar.
*
* Number as value - strtoll and strtod
* Date - strtoll
* Timestamp - strtoul for both timestamp and increment and '-'
* before a number explicity disallowed
*/
Status number(StringData fieldName, BSONObjBuilder&);
/*
* FIELD :
* STRING
* | [a-zA-Z$_] FIELDCHARS
*
* FIELDCHARS :
* [a-zA-Z0-9$_]
* | [a-zA-Z0-9$_] FIELDCHARS
*/
Status field(std::string* result);
/*
* std::string :
* " "
* | ' '
* | " CHARS "
* | ' CHARS '
*/
Status quotedString(std::string* result);
/*
* CHARS :
* CHAR
* | CHAR CHARS
*
* Note: " or ' may be allowed depending on whether the std::string is
* double or single quoted
*
* CHAR :
* any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR
* | \"
* | \'
* | \\
* | \/
* | \b
* | \f
* | \n
* | \r
* | \t
* | \v
* | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
* | \any-Unicode-character-except-x-or-[0-9]
*
* HEXDIGIT : [0..9a..fA..F]
*
* per http://www.ietf.org/rfc/rfc4627.txt, control characters are
* (U+0000 through U+001F). U+007F is not mentioned as a control
* character.
* CONTROLCHAR : [0x00..0x1F]
*
* If there is not an error, result will contain a null terminated
* string, but there is no guarantee that it will not contain other
* null characters.
*/
Status chars(std::string* result, const char* terminalSet, const char* allowedSet=NULL);
/**
* Converts the two byte Unicode code point to its UTF8 character
* encoding representation. This function returns a std::string because
* UTF8 encodings for code points from 0x0000 to 0xFFFF can range
* from one to three characters.
*/
std::string encodeUTF8(unsigned char first, unsigned char second) const;
/**
* @return true if the given token matches the next non whitespace
* sequence in our buffer, and false if the token doesn't match or
* we reach the end of our buffer. Do not update the pointer to our
* buffer (same as calling readTokenImpl with advance=false).
*/
inline bool peekToken(const char* token);
/**
* @return true if the given token matches the next non whitespace
* sequence in our buffer, and false if the token doesn't match or
* we reach the end of our buffer. Updates the pointer to our
* buffer (same as calling readTokenImpl with advance=true).
*/
inline bool readToken(const char* token);
/**
* @return true if the given token matches the next non whitespace
* sequence in our buffer, and false if the token doesn't match or
* we reach the end of our buffer. Do not update the pointer to our
* buffer if advance is false.
*/
bool readTokenImpl(const char* token, bool advance=true);
/**
* @return true if the next field in our stream matches field.
* Handles single quoted, double quoted, and unquoted field names
*/
bool readField(StringData field);
/**
* @return true if matchChar is in matchSet
* @return true if matchSet is NULL and false if it is an empty string
*/
bool match(char matchChar, const char* matchSet) const;
/**
* @return true if every character in the std::string is a hex digit
*/
bool isHexString(StringData) const;
/**
* @return true if every character in the std::string is a valid base64
* character
*/
bool isBase64String(StringData) const;
/**
* @return FailedToParse status with the given message and some
* additional context information
*/
Status parseError(StringData msg);
public:
inline int offset() { return (_input - _buf); }
private:
/*
* _buf - start of our input buffer
* _input - cursor we advance in our input buffer
* _input_end - sentinel for the end of our input buffer
*
* _buf is the null terminated buffer containing the JSON std::string we
* are parsing. _input_end points to the null byte at the end of
* the buffer. strtoll, strtol, and strtod will access the null
* byte at the end of the buffer because they are assuming a c-style
* string.
*/
const char* const _buf;
const char* _input;
const char* const _input_end;
};
} // namespace mongo