diff options
author | Andrew Morrow <acm@mongodb.com> | 2015-04-30 11:49:52 -0400 |
---|---|---|
committer | Andrew Morrow <acm@mongodb.com> | 2015-05-06 15:47:21 -0400 |
commit | 543ca54c22e13056b4f278e36b4c1b6436c2f1cb (patch) | |
tree | ce86697434bce1152c31df200341405e618472f8 /src/mongo/db/json.h | |
parent | 0ec9948134ca39df062d59e7eaa212100631ecac (diff) | |
download | mongo-543ca54c22e13056b4f278e36b4c1b6436c2f1cb.tar.gz |
SERVER-9666 Move json utils into bson library
Diffstat (limited to 'src/mongo/db/json.h')
-rw-r--r-- | src/mongo/db/json.h | 512 |
1 files changed, 27 insertions, 485 deletions
diff --git a/src/mongo/db/json.h b/src/mongo/db/json.h index 34564765242..609393813d8 100644 --- a/src/mongo/db/json.h +++ b/src/mongo/db/json.h @@ -1,488 +1,30 @@ -/** -* Copyright (C) 2008 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ +/* Copyright 2009 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ #pragma once -#include <string> - -#include "mongo/bson/bsonobj.h" -#include "mongo/base/status.h" - -namespace mongo { - - /** - * Create a BSONObj from a JSON <http://www.json.org>, - * <http://www.ietf.org/rfc/rfc4627.txt> string. In addition to the JSON - * extensions extensions described here - * <http://dochub.mongodb.org/core/mongodbextendedjson>, this function - * accepts unquoted field names and allows single quotes to optionally be - * used when specifying field names and std::string values instead of double - * quotes. JSON unicode escape sequences (of the form \uXXXX) are - * converted to utf8. - * - * @throws MsgAssertionException if parsing fails. The message included with - * this assertion includes the character offset where parsing failed. - */ - BSONObj fromjson(const std::string& str); - - /** @param len will be size of JSON object in text chars. */ - BSONObj fromjson(const char* str, int* len=NULL); - - /** - * Tests whether the JSON string is an Array. - * - * Useful for assigning the result of fromjson to the right object type. Either: - * BSONObj - * BSONArray - * - * @example Using the method to select the proper type. - * If this method returns true, the user could store the result of fromjson - * inside a BSONArray, rather than a BSONObj, in order to have it print as an - * array when passed to tojson. - * - * @param obj The JSON string to test. - */ - bool isArray(StringData str); - - /** - * Convert a BSONArray to a JSON string. - * - * @param arr The BSON Array. - * @param format The JSON format (JS, TenGen, Strict). - * @param pretty Enables pretty output. - */ - std::string tojson( - const BSONArray& arr, - JsonStringFormat format = Strict, - bool pretty = false - ); - - /** - * Convert a BSONObj to a JSON string. - * - * @param obj The BSON Object. - * @param format The JSON format (JS, TenGen, Strict). - * @param pretty Enables pretty output. - */ - std::string tojson( - const BSONObj& obj, - JsonStringFormat format = Strict, - bool pretty = false - ); - - /** - * Parser class. A BSONObj is constructed incrementally by passing a - * BSONObjBuilder to the recursive parsing methods. The grammar for the - * element parsed is described before each function. - */ - class JParse { - public: - explicit JParse(StringData str); - - /* - * Notation: All-uppercase symbols denote non-terminals; all other - * symbols are literals. - */ - - /* - * VALUE : - * STRING - * | NUMBER - * | NUMBERINT - * | NUMBERLONG - * | OBJECT - * | ARRAY - * - * | true - * | false - * | null - * | undefined - * - * | NaN - * | Infinity - * | -Infinity - * - * | DATE - * | TIMESTAMP - * | REGEX - * | OBJECTID - * | DBREF - * - * | new CONSTRUCTOR - */ - private: - Status value(StringData fieldName, BSONObjBuilder&); - - /* - * OBJECT : - * {} - * | { MEMBERS } - * | SPECIALOBJECT - * - * MEMBERS : - * PAIR - * | PAIR , MEMBERS - * - * PAIR : - * FIELD : VALUE - * - * SPECIALOBJECT : - * OIDOBJECT - * | BINARYOBJECT - * | DATEOBJECT - * | TIMESTAMPOBJECT - * | REGEXOBJECT - * | REFOBJECT - * | UNDEFINEDOBJECT - * | NUMBERLONGOBJECT - * | MINKEYOBJECT - * | MAXKEYOBJECT - * - */ - public: - Status object(StringData fieldName, BSONObjBuilder&, bool subObj=true); - Status parse(BSONObjBuilder& builder); - bool isArray(); - - private: - /* The following functions are called with the '{' and the first - * field already parsed since they are both implied given the - * context. */ - /* - * OIDOBJECT : - * { FIELD("$oid") : <24 character hex std::string> } - */ - Status objectIdObject(StringData fieldName, BSONObjBuilder&); - - /* - * BINARYOBJECT : - * { FIELD("$binary") : <base64 representation of a binary std::string>, - * FIELD("$type") : <hexadecimal representation of a single byte - * indicating the data type> } - */ - Status binaryObject(StringData fieldName, BSONObjBuilder&); - - /* - * DATEOBJECT : - * { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> } - */ - Status dateObject(StringData fieldName, BSONObjBuilder&); - - /* - * TIMESTAMPOBJECT : - * { FIELD("$timestamp") : { - * FIELD("t") : <32 bit unsigned integer for seconds since epoch>, - * FIELD("i") : <32 bit unsigned integer for the increment> } } - */ - Status timestampObject(StringData fieldName, BSONObjBuilder&); - - /* - * NOTE: the rules for the body of the regex are different here, - * since it is quoted instead of surrounded by slashes. - * REGEXOBJECT : - * { FIELD("$regex") : <string representing body of regex> } - * | { FIELD("$regex") : <string representing body of regex>, - * FIELD("$options") : <string representing regex options> } - */ - Status regexObject(StringData fieldName, BSONObjBuilder&); - - /* - * REFOBJECT : - * { FIELD("$ref") : <string representing collection name>, - * FIELD("$id") : <24 character hex std::string> } - * | { FIELD("$ref") : std::string , FIELD("$id") : OBJECTID } - * | { FIELD("$ref") : std::string , FIELD("$id") : OIDOBJECT } - */ - Status dbRefObject(StringData fieldName, BSONObjBuilder&); - - /* - * UNDEFINEDOBJECT : - * { FIELD("$undefined") : true } - */ - Status undefinedObject(StringData fieldName, BSONObjBuilder&); - - /* - * NUMBERLONGOBJECT : - * { FIELD("$numberLong") : "<number>" } - */ - Status numberLongObject(StringData fieldName, BSONObjBuilder&); - - /* - * MINKEYOBJECT : - * { FIELD("$minKey") : 1 } - */ - Status minKeyObject(StringData fieldName, BSONObjBuilder& builder); - - /* - * MAXKEYOBJECT : - * { FIELD("$maxKey") : 1 } - */ - Status maxKeyObject(StringData fieldName, BSONObjBuilder& builder); - - /* - * ARRAY : - * [] - * | [ ELEMENTS ] - * - * ELEMENTS : - * VALUE - * | VALUE , ELEMENTS - */ - Status array(StringData fieldName, BSONObjBuilder&, bool subObj=true); - - /* - * NOTE: Currently only Date can be preceded by the "new" keyword - * CONSTRUCTOR : - * DATE - */ - Status constructor(StringData fieldName, BSONObjBuilder&); - - /* The following functions only parse the body of the constructor - * between the parentheses, not including the constructor name */ - /* - * DATE : - * Date( <64 bit signed integer for milliseconds since epoch> ) - */ - Status date(StringData fieldName, BSONObjBuilder&); - - /* - * TIMESTAMP : - * Timestamp( <32 bit unsigned integer for seconds since epoch>, - * <32 bit unsigned integer for the increment> ) - */ - Status timestamp(StringData fieldName, BSONObjBuilder&); - - /* - * OBJECTID : - * ObjectId( <24 character hex std::string> ) - */ - Status objectId(StringData fieldName, BSONObjBuilder&); - - /* - * NUMBERLONG : - * NumberLong( <number> ) - */ - Status numberLong(StringData fieldName, BSONObjBuilder&); - - /* - * NUMBERINT : - * NumberInt( <number> ) - */ - Status numberInt(StringData fieldName, BSONObjBuilder&); - - /* - * DBREF : - * Dbref( <namespace std::string> , <24 character hex std::string> ) - */ - Status dbRef(StringData fieldName, BSONObjBuilder&); - - /* - * REGEX : - * / REGEXCHARS / REGEXOPTIONS - * - * REGEXCHARS : - * REGEXCHAR - * | REGEXCHAR REGEXCHARS - * - * REGEXCHAR : - * any-Unicode-character-except-/-or-\-or-CONTROLCHAR - * | \" - * | \' - * | \\ - * | \/ - * | \b - * | \f - * | \n - * | \r - * | \t - * | \v - * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT - * | \any-Unicode-character-except-x-or-[0-7] - * - * REGEXOPTIONS : - * REGEXOPTION - * | REGEXOPTION REGEXOPTIONS - * - * REGEXOPTION : - * g | i | m | s - */ - Status regex(StringData fieldName, BSONObjBuilder&); - Status regexPat(std::string* result); - Status regexOpt(std::string* result); - Status regexOptCheck(StringData opt); - - /* - * NUMBER : - * - * NOTE: Number parsing is based on standard library functions, not - * necessarily on the JSON numeric grammar. - * - * Number as value - strtoll and strtod - * Date - strtoll - * Timestamp - strtoul for both timestamp and increment and '-' - * before a number explicity disallowed - */ - Status number(StringData fieldName, BSONObjBuilder&); - - /* - * FIELD : - * STRING - * | [a-zA-Z$_] FIELDCHARS - * - * FIELDCHARS : - * [a-zA-Z0-9$_] - * | [a-zA-Z0-9$_] FIELDCHARS - */ - Status field(std::string* result); - - /* - * std::string : - * " " - * | ' ' - * | " CHARS " - * | ' CHARS ' - */ - Status quotedString(std::string* result); - - /* - * CHARS : - * CHAR - * | CHAR CHARS - * - * Note: " or ' may be allowed depending on whether the std::string is - * double or single quoted - * - * CHAR : - * any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR - * | \" - * | \' - * | \\ - * | \/ - * | \b - * | \f - * | \n - * | \r - * | \t - * | \v - * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT - * | \any-Unicode-character-except-x-or-[0-9] - * - * HEXDIGIT : [0..9a..fA..F] - * - * per http://www.ietf.org/rfc/rfc4627.txt, control characters are - * (U+0000 through U+001F). U+007F is not mentioned as a control - * character. - * CONTROLCHAR : [0x00..0x1F] - * - * If there is not an error, result will contain a null terminated - * string, but there is no guarantee that it will not contain other - * null characters. - */ - Status chars(std::string* result, const char* terminalSet, const char* allowedSet=NULL); - - /** - * Converts the two byte Unicode code point to its UTF8 character - * encoding representation. This function returns a std::string because - * UTF8 encodings for code points from 0x0000 to 0xFFFF can range - * from one to three characters. - */ - std::string encodeUTF8(unsigned char first, unsigned char second) const; - - /** - * @return true if the given token matches the next non whitespace - * sequence in our buffer, and false if the token doesn't match or - * we reach the end of our buffer. Do not update the pointer to our - * buffer (same as calling readTokenImpl with advance=false). - */ - inline bool peekToken(const char* token); - - /** - * @return true if the given token matches the next non whitespace - * sequence in our buffer, and false if the token doesn't match or - * we reach the end of our buffer. Updates the pointer to our - * buffer (same as calling readTokenImpl with advance=true). - */ - inline bool readToken(const char* token); - - /** - * @return true if the given token matches the next non whitespace - * sequence in our buffer, and false if the token doesn't match or - * we reach the end of our buffer. Do not update the pointer to our - * buffer if advance is false. - */ - bool readTokenImpl(const char* token, bool advance=true); - - /** - * @return true if the next field in our stream matches field. - * Handles single quoted, double quoted, and unquoted field names - */ - bool readField(StringData field); - - /** - * @return true if matchChar is in matchSet - * @return true if matchSet is NULL and false if it is an empty string - */ - bool match(char matchChar, const char* matchSet) const; - - /** - * @return true if every character in the std::string is a hex digit - */ - bool isHexString(StringData) const; - - /** - * @return true if every character in the std::string is a valid base64 - * character - */ - bool isBase64String(StringData) const; - - /** - * @return FailedToParse status with the given message and some - * additional context information - */ - Status parseError(StringData msg); - public: - inline int offset() { return (_input - _buf); } - - private: - /* - * _buf - start of our input buffer - * _input - cursor we advance in our input buffer - * _input_end - sentinel for the end of our input buffer - * - * _buf is the null terminated buffer containing the JSON std::string we - * are parsing. _input_end points to the null byte at the end of - * the buffer. strtoll, strtol, and strtod will access the null - * byte at the end of the buffer because they are assuming a c-style - * string. - */ - const char* const _buf; - const char* _input; - const char* const _input_end; - }; - -} // namespace mongo +#include "mongo/bson/json.h" |