summaryrefslogtreecommitdiff
path: root/src/mongo/bson
diff options
context:
space:
mode:
authorAndrew Morrow <acm@mongodb.com>2015-04-30 11:49:52 -0400
committerAndrew Morrow <acm@mongodb.com>2015-05-06 15:47:21 -0400
commit543ca54c22e13056b4f278e36b4c1b6436c2f1cb (patch)
treece86697434bce1152c31df200341405e618472f8 /src/mongo/bson
parent0ec9948134ca39df062d59e7eaa212100631ecac (diff)
downloadmongo-543ca54c22e13056b4f278e36b4c1b6436c2f1cb.tar.gz
SERVER-9666 Move json utils into bson library
Diffstat (limited to 'src/mongo/bson')
-rw-r--r--src/mongo/bson/json.cpp1304
-rw-r--r--src/mongo/bson/json.h488
2 files changed, 1792 insertions, 0 deletions
diff --git a/src/mongo/bson/json.cpp b/src/mongo/bson/json.cpp
new file mode 100644
index 00000000000..2b13e318e44
--- /dev/null
+++ b/src/mongo/bson/json.cpp
@@ -0,0 +1,1304 @@
+/* Copyright 2009 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
+
+#include "mongo/bson/json.h"
+
+#include <boost/scoped_ptr.hpp>
+
+#include "mongo/base/parse_number.h"
+#include "mongo/db/jsobj.h"
+#include "mongo/platform/cstdint.h"
+#include "mongo/platform/strtoll.h"
+#include "mongo/util/base64.h"
+#include "mongo/util/hex.h"
+#include "mongo/util/log.h"
+#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/time_support.h"
+
+namespace mongo {
+
+ using boost::scoped_ptr;
+ using std::ostringstream;
+ using std::string;
+
+#if 0
+#define MONGO_JSON_DEBUG(message) log() << "JSON DEBUG @ " << __FILE__\
+ << ":" << __LINE__ << " " << __FUNCTION__ << ": " << message << endl;
+#else
+#define MONGO_JSON_DEBUG(message)
+#endif
+
+#define ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+#define DIGIT "0123456789"
+#define CONTROL "\a\b\f\n\r\t\v"
+#define JOPTIONS "gims"
+
+ // Size hints given to char vectors
+ enum {
+ ID_RESERVE_SIZE = 64,
+ PAT_RESERVE_SIZE = 4096,
+ OPT_RESERVE_SIZE = 64,
+ FIELD_RESERVE_SIZE = 4096,
+ STRINGVAL_RESERVE_SIZE = 4096,
+ BINDATA_RESERVE_SIZE = 4096,
+ BINDATATYPE_RESERVE_SIZE = 4096,
+ NS_RESERVE_SIZE = 64,
+ DB_RESERVE_SIZE = 64,
+ NUMBERLONG_RESERVE_SIZE = 64,
+ DATE_RESERVE_SIZE = 64
+ };
+
+ static const char* LBRACE = "{",
+ *RBRACE = "}",
+ *LBRACKET = "[",
+ *RBRACKET = "]",
+ *LPAREN = "(",
+ *RPAREN = ")",
+ *COLON = ":",
+ *COMMA = ",",
+ *FORWARDSLASH = "/",
+ *SINGLEQUOTE = "'",
+ *DOUBLEQUOTE = "\"";
+
+ JParse::JParse(StringData str)
+ : _buf(str.rawData())
+ , _input(_buf)
+ , _input_end(_input + str.size())
+ {}
+
+ Status JParse::parseError(StringData msg) {
+ std::ostringstream ossmsg;
+ ossmsg << msg;
+ ossmsg << ": offset:";
+ ossmsg << offset();
+ ossmsg << " of:";
+ ossmsg << _buf;
+ return Status(ErrorCodes::FailedToParse, ossmsg.str());
+ }
+
+ Status JParse::value(StringData fieldName, BSONObjBuilder& builder) {
+ MONGO_JSON_DEBUG("fieldName: " << fieldName);
+ if (peekToken(LBRACE)) {
+ Status ret = object(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (peekToken(LBRACKET)) {
+ Status ret = array(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (readToken("new")) {
+ Status ret = constructor(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (readToken("Date")) {
+ Status ret = date(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (readToken("Timestamp")) {
+ Status ret = timestamp(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (readToken("ObjectId")) {
+ Status ret = objectId(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (readToken("NumberLong")) {
+ Status ret = numberLong(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (readToken("NumberInt")) {
+ Status ret = numberInt(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (readToken("Dbref") || readToken("DBRef")) {
+ Status ret = dbRef(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (peekToken(FORWARDSLASH)) {
+ Status ret = regex(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (peekToken(DOUBLEQUOTE) || peekToken(SINGLEQUOTE)) {
+ std::string valueString;
+ valueString.reserve(STRINGVAL_RESERVE_SIZE);
+ Status ret = quotedString(&valueString);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ builder.append(fieldName, valueString);
+ }
+ else if (readToken("true")) {
+ builder.append(fieldName, true);
+ }
+ else if (readToken("false")) {
+ builder.append(fieldName, false);
+ }
+ else if (readToken("null")) {
+ builder.appendNull(fieldName);
+ }
+ else if (readToken("undefined")) {
+ builder.appendUndefined(fieldName);
+ }
+ else if (readToken("NaN")) {
+ builder.append(fieldName, std::numeric_limits<double>::quiet_NaN());
+ }
+ else if (readToken("Infinity")) {
+ builder.append(fieldName, std::numeric_limits<double>::infinity());
+ }
+ else if (readToken("-Infinity")) {
+ builder.append(fieldName, -std::numeric_limits<double>::infinity());
+ }
+ else {
+ Status ret = number(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ return Status::OK();
+ }
+
+ Status JParse::parse(BSONObjBuilder& builder) {
+ return isArray() ? array("UNUSED", builder, false) : object("UNUSED", builder, false);
+ }
+
+ Status JParse::object(StringData fieldName, BSONObjBuilder& builder, bool subObject) {
+ MONGO_JSON_DEBUG("fieldName: " << fieldName);
+ if (!readToken(LBRACE)) {
+ return parseError("Expecting '{'");
+ }
+
+ // Empty object
+ if (readToken(RBRACE)) {
+ if (subObject) {
+ BSONObjBuilder empty(builder.subobjStart(fieldName));
+ empty.done();
+ }
+ return Status::OK();
+ }
+
+ // Special object
+ std::string firstField;
+ firstField.reserve(FIELD_RESERVE_SIZE);
+ Status ret = field(&firstField);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+
+ if (firstField == "$oid") {
+ if (!subObject) {
+ return parseError("Reserved field name in base object: $oid");
+ }
+ Status ret = objectIdObject(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (firstField == "$binary") {
+ if (!subObject) {
+ return parseError("Reserved field name in base object: $binary");
+ }
+ Status ret = binaryObject(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (firstField == "$date") {
+ if (!subObject) {
+ return parseError("Reserved field name in base object: $date");
+ }
+ Status ret = dateObject(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (firstField == "$timestamp") {
+ if (!subObject) {
+ return parseError("Reserved field name in base object: $timestamp");
+ }
+ Status ret = timestampObject(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (firstField == "$regex") {
+ if (!subObject) {
+ return parseError("Reserved field name in base object: $regex");
+ }
+ Status ret = regexObject(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (firstField == "$ref") {
+ if (!subObject) {
+ return parseError("Reserved field name in base object: $ref");
+ }
+ Status ret = dbRefObject(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (firstField == "$undefined") {
+ if (!subObject) {
+ return parseError("Reserved field name in base object: $undefined");
+ }
+ Status ret = undefinedObject(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (firstField == "$numberLong") {
+ if (!subObject) {
+ return parseError("Reserved field name in base object: $numberLong");
+ }
+ Status ret = numberLongObject(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (firstField == "$minKey") {
+ if (!subObject) {
+ return parseError("Reserved field name in base object: $minKey");
+ }
+ Status ret = minKeyObject(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else if (firstField == "$maxKey") {
+ if (!subObject) {
+ return parseError("Reserved field name in base object: $maxKey");
+ }
+ Status ret = maxKeyObject(fieldName, builder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ }
+ else { // firstField != <reserved field name>
+ // Normal object
+
+ // Only create a sub builder if this is not the base object
+ BSONObjBuilder* objBuilder = &builder;
+ scoped_ptr<BSONObjBuilder> subObjBuilder;
+ if (subObject) {
+ subObjBuilder.reset(new BSONObjBuilder(builder.subobjStart(fieldName)));
+ objBuilder = subObjBuilder.get();
+ }
+
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+ Status valueRet = value(firstField, *objBuilder);
+ if (valueRet != Status::OK()) {
+ return valueRet;
+ }
+ while (readToken(COMMA)) {
+ std::string fieldName;
+ fieldName.reserve(FIELD_RESERVE_SIZE);
+ Status fieldRet = field(&fieldName);
+ if (fieldRet != Status::OK()) {
+ return fieldRet;
+ }
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+ Status valueRet = value(fieldName, *objBuilder);
+ if (valueRet != Status::OK()) {
+ return valueRet;
+ }
+ }
+ }
+ if (!readToken(RBRACE)) {
+ return parseError("Expecting '}' or ','");
+ }
+ return Status::OK();
+ }
+
+ Status JParse::objectIdObject(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(COLON)) {
+ return parseError("Expected ':'");
+ }
+ std::string id;
+ id.reserve(ID_RESERVE_SIZE);
+ Status ret = quotedString(&id);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ if (id.size() != 24) {
+ return parseError("Expecting 24 hex digits: " + id);
+ }
+ if (!isHexString(id)) {
+ return parseError("Expecting hex digits: " + id);
+ }
+ builder.append(fieldName, OID(id));
+ return Status::OK();
+ }
+
+ Status JParse::binaryObject(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(COLON)) {
+ return parseError("Expected ':'");
+ }
+ std::string binDataString;
+ binDataString.reserve(BINDATA_RESERVE_SIZE);
+ Status dataRet = quotedString(&binDataString);
+ if (dataRet != Status::OK()) {
+ return dataRet;
+ }
+ if (binDataString.size() % 4 != 0) {
+ return parseError("Invalid length base64 encoded string");
+ }
+ if (!isBase64String(binDataString)) {
+ return parseError("Invalid character in base64 encoded string");
+ }
+ const std::string& binData = base64::decode(binDataString);
+ if (!readToken(COMMA)) {
+ return parseError("Expected ','");
+ }
+
+ if (!readField("$type")) {
+ return parseError("Expected second field name: \"$type\", in \"$binary\" object");
+ }
+ if (!readToken(COLON)) {
+ return parseError("Expected ':'");
+ }
+ std::string binDataType;
+ binDataType.reserve(BINDATATYPE_RESERVE_SIZE);
+ Status typeRet = quotedString(&binDataType);
+ if (typeRet != Status::OK()) {
+ return typeRet;
+ }
+ if ((binDataType.size() != 2) || !isHexString(binDataType)) {
+ return parseError("Argument of $type in $bindata object must be a hex string representation of a single byte");
+ }
+ builder.appendBinData( fieldName, binData.length(),
+ BinDataType(fromHex(binDataType)),
+ binData.data());
+ return Status::OK();
+ }
+
+ Status JParse::dateObject(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(COLON)) {
+ return parseError("Expected ':'");
+ }
+ errno = 0;
+ char* endptr;
+ Date_t date;
+
+ if (peekToken(DOUBLEQUOTE)) {
+ std::string dateString;
+ dateString.reserve(DATE_RESERVE_SIZE);
+ Status ret = quotedString(&dateString);
+ if (!ret.isOK()) {
+ return ret;
+ }
+ StatusWith<Date_t> dateRet = dateFromISOString(dateString);
+ if (!dateRet.isOK()) {
+ return dateRet.getStatus();
+ }
+ date = dateRet.getValue();
+ }
+ else if (readToken(LBRACE)) {
+ std::string fieldName;
+ fieldName.reserve(FIELD_RESERVE_SIZE);
+ Status ret = field(&fieldName);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ if (fieldName != "$numberLong") {
+ return parseError("Expected field name: $numberLong for $date value object");
+ }
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+
+ // The number must be a quoted string, since large long numbers could overflow a double
+ // and thus may not be valid JSON
+ std::string numberLongString;
+ numberLongString.reserve(NUMBERLONG_RESERVE_SIZE);
+ ret = quotedString(&numberLongString);
+ if (!ret.isOK()) {
+ return ret;
+ }
+
+ long long numberLong;
+ ret = parseNumberFromString(numberLongString, &numberLong);
+ if (!ret.isOK()) {
+ return ret;
+ }
+ date = numberLong;
+ }
+ else {
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires
+ // that we know ahead of time where the number ends, which is not currently the case.
+ date = static_cast<unsigned long long>(strtoll(_input, &endptr, 10));
+ if (_input == endptr) {
+ return parseError("Date expecting integer milliseconds");
+ }
+ if (errno == ERANGE) {
+ /* Need to handle this because jsonString outputs the value of Date_t as unsigned.
+ * See SERVER-8330 and SERVER-8573 */
+ errno = 0;
+ // SERVER-11920: We should use parseNumberFromString here, but that function
+ // requires that we know ahead of time where the number ends, which is not currently
+ // the case.
+ date = strtoull(_input, &endptr, 10);
+ if (errno == ERANGE) {
+ return parseError("Date milliseconds overflow");
+ }
+ }
+ _input = endptr;
+ }
+ builder.appendDate(fieldName, date);
+ return Status::OK();
+ }
+
+ Status JParse::timestampObject(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+ if (!readToken(LBRACE)) {
+ return parseError("Expecting '{' to start \"$timestamp\" object");
+ }
+
+ if (!readField("t")) {
+ return parseError("Expected field name \"t\" in \"$timestamp\" sub object");
+ }
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+ if (readToken("-")) {
+ return parseError("Negative seconds in \"$timestamp\"");
+ }
+ errno = 0;
+ char* endptr;
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires that
+ // we know ahead of time where the number ends, which is not currently the case.
+ uint32_t seconds = strtoul(_input, &endptr, 10);
+ if (errno == ERANGE) {
+ return parseError("Timestamp seconds overflow");
+ }
+ if (_input == endptr) {
+ return parseError("Expecting unsigned integer seconds in \"$timestamp\"");
+ }
+ _input = endptr;
+ if (!readToken(COMMA)) {
+ return parseError("Expecting ','");
+ }
+
+ if (!readField("i")) {
+ return parseError("Expected field name \"i\" in \"$timestamp\" sub object");
+ }
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+ if (readToken("-")) {
+ return parseError("Negative increment in \"$timestamp\"");
+ }
+ errno = 0;
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires that
+ // we know ahead of time where the number ends, which is not currently the case.
+ uint32_t count = strtoul(_input, &endptr, 10);
+ if (errno == ERANGE) {
+ return parseError("Timestamp increment overflow");
+ }
+ if (_input == endptr) {
+ return parseError("Expecting unsigned integer increment in \"$timestamp\"");
+ }
+ _input = endptr;
+
+ if (!readToken(RBRACE)) {
+ return parseError("Expecting '}'");
+ }
+ builder.append(fieldName, Timestamp(seconds, count));
+ return Status::OK();
+ }
+
+ Status JParse::regexObject(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+ std::string pat;
+ pat.reserve(PAT_RESERVE_SIZE);
+ Status patRet = quotedString(&pat);
+ if (patRet != Status::OK()) {
+ return patRet;
+ }
+ if (readToken(COMMA)) {
+ if (!readField("$options")) {
+ return parseError("Expected field name: \"$options\" in \"$regex\" object");
+ }
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+ std::string opt;
+ opt.reserve(OPT_RESERVE_SIZE);
+ Status optRet = quotedString(&opt);
+ if (optRet != Status::OK()) {
+ return optRet;
+ }
+ Status optCheckRet = regexOptCheck(opt);
+ if (optCheckRet != Status::OK()) {
+ return optCheckRet;
+ }
+ builder.appendRegex(fieldName, pat, opt);
+ }
+ else {
+ builder.appendRegex(fieldName, pat, "");
+ }
+ return Status::OK();
+ }
+
+ Status JParse::dbRefObject(StringData fieldName, BSONObjBuilder& builder) {
+
+ BSONObjBuilder subBuilder(builder.subobjStart(fieldName));
+
+ if (!readToken(COLON)) {
+ return parseError("DBRef: Expecting ':'");
+ }
+ std::string ns;
+ ns.reserve(NS_RESERVE_SIZE);
+ Status ret = quotedString(&ns);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ subBuilder.append("$ref", ns);
+
+ if (!readToken(COMMA)) {
+ return parseError("DBRef: Expecting ','");
+ }
+
+ if (!readField("$id")) {
+ return parseError("DBRef: Expected field name: \"$id\" in \"$ref\" object");
+ }
+ if (!readToken(COLON)) {
+ return parseError("DBRef: Expecting ':'");
+ }
+ Status valueRet = value("$id", subBuilder);
+ if (valueRet != Status::OK()) {
+ return valueRet;
+ }
+
+ if (readToken(COMMA)) {
+ if (!readField("$db")) {
+ return parseError("DBRef: Expected field name: \"$db\" in \"$ref\" object");
+ }
+ if (!readToken(COLON)) {
+ return parseError("DBRef: Expecting ':'");
+ }
+ std::string db;
+ db.reserve(DB_RESERVE_SIZE);
+ ret = quotedString(&db);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ subBuilder.append("$db", db);
+ }
+
+ subBuilder.done();
+ return Status::OK();
+ }
+
+ Status JParse::undefinedObject(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+ if (!readToken("true")) {
+ return parseError("Reserved field \"$undefined\" requires value of true");
+ }
+ builder.appendUndefined(fieldName);
+ return Status::OK();
+ }
+
+ Status JParse::numberLongObject(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+
+ // The number must be a quoted string, since large long numbers could overflow a double and
+ // thus may not be valid JSON
+ std::string numberLongString;
+ numberLongString.reserve(NUMBERLONG_RESERVE_SIZE);
+ Status ret = quotedString(&numberLongString);
+ if (!ret.isOK()) {
+ return ret;
+ }
+
+ long long numberLong;
+ ret = parseNumberFromString(numberLongString, &numberLong);
+ if (!ret.isOK()) {
+ return ret;
+ }
+
+ builder.appendNumber(fieldName, numberLong);
+ return Status::OK();
+ }
+
+ Status JParse::minKeyObject(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+ if (!readToken("1")) {
+ return parseError("Reserved field \"$minKey\" requires value of 1");
+ }
+ builder.appendMinKey(fieldName);
+ return Status::OK();
+ }
+
+ Status JParse::maxKeyObject(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(COLON)) {
+ return parseError("Expecting ':'");
+ }
+ if (!readToken("1")) {
+ return parseError("Reserved field \"$maxKey\" requires value of 1");
+ }
+ builder.appendMaxKey(fieldName);
+ return Status::OK();
+ }
+
+ Status JParse::array(StringData fieldName, BSONObjBuilder& builder, bool subObject) {
+ MONGO_JSON_DEBUG("fieldName: " << fieldName);
+ uint32_t index(0);
+ if (!readToken(LBRACKET)) {
+ return parseError("Expecting '['");
+ }
+
+ BSONObjBuilder* arrayBuilder = &builder;
+ scoped_ptr<BSONObjBuilder> subObjBuilder;
+ if (subObject) {
+ subObjBuilder.reset(new BSONObjBuilder(builder.subarrayStart(fieldName)));
+ arrayBuilder = subObjBuilder.get();
+ }
+
+ if (!peekToken(RBRACKET)) {
+ do {
+ Status ret = value(builder.numStr(index), *arrayBuilder);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ index++;
+ } while (readToken(COMMA));
+ }
+ arrayBuilder->done();
+ if (!readToken(RBRACKET)) {
+ return parseError("Expecting ']' or ','");
+ }
+ return Status::OK();
+ }
+
+ /* NOTE: this could be easily modified to allow "new" before other
+ * constructors, but for now it only allows "new" before Date().
+ * Also note that unlike the interactive shell "Date(x)" and "new Date(x)"
+ * have the same behavior. XXX: this may not be desired. */
+ Status JParse::constructor(StringData fieldName, BSONObjBuilder& builder) {
+ if (readToken("Date")) {
+ date(fieldName, builder);
+ }
+ else {
+ return parseError("\"new\" keyword not followed by Date constructor");
+ }
+ return Status::OK();
+ }
+
+ Status JParse::date(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(LPAREN)) {
+ return parseError("Expecting '('");
+ }
+ errno = 0;
+ char* endptr;
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires that
+ // we know ahead of time where the number ends, which is not currently the case.
+ Date_t date = static_cast<unsigned long long>(strtoll(_input, &endptr, 10));
+ if (_input == endptr) {
+ return parseError("Date expecting integer milliseconds");
+ }
+ if (errno == ERANGE) {
+ /* Need to handle this because jsonString outputs the value of Date_t as unsigned.
+ * See SERVER-8330 and SERVER-8573 */
+ errno = 0;
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires
+ // that we know ahead of time where the number ends, which is not currently the case.
+ date = strtoull(_input, &endptr, 10);
+ if (errno == ERANGE) {
+ return parseError("Date milliseconds overflow");
+ }
+ }
+ _input = endptr;
+ if (!readToken(RPAREN)) {
+ return parseError("Expecting ')'");
+ }
+ builder.appendDate(fieldName, date);
+ return Status::OK();
+ }
+
+ Status JParse::timestamp(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(LPAREN)) {
+ return parseError("Expecting '('");
+ }
+ if (readToken("-")) {
+ return parseError("Negative seconds in \"$timestamp\"");
+ }
+ errno = 0;
+ char* endptr;
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires that
+ // we know ahead of time where the number ends, which is not currently the case.
+ uint32_t seconds = strtoul(_input, &endptr, 10);
+ if (errno == ERANGE) {
+ return parseError("Timestamp seconds overflow");
+ }
+ if (_input == endptr) {
+ return parseError("Expecting unsigned integer seconds in \"$timestamp\"");
+ }
+ _input = endptr;
+ if (!readToken(COMMA)) {
+ return parseError("Expecting ','");
+ }
+ if (readToken("-")) {
+ return parseError("Negative seconds in \"$timestamp\"");
+ }
+ errno = 0;
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires that
+ // we know ahead of time where the number ends, which is not currently the case.
+ uint32_t count = strtoul(_input, &endptr, 10);
+ if (errno == ERANGE) {
+ return parseError("Timestamp increment overflow");
+ }
+ if (_input == endptr) {
+ return parseError("Expecting unsigned integer increment in \"$timestamp\"");
+ }
+ _input = endptr;
+ if (!readToken(RPAREN)) {
+ return parseError("Expecting ')'");
+ }
+ builder.append(fieldName, Timestamp(seconds, count));
+ return Status::OK();
+ }
+
+ Status JParse::objectId(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(LPAREN)) {
+ return parseError("Expecting '('");
+ }
+ std::string id;
+ id.reserve(ID_RESERVE_SIZE);
+ Status ret = quotedString(&id);
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ if (!readToken(RPAREN)) {
+ return parseError("Expecting ')'");
+ }
+ if (id.size() != 24) {
+ return parseError("Expecting 24 hex digits: " + id);
+ }
+ if (!isHexString(id)) {
+ return parseError("Expecting hex digits: " + id);
+ }
+ builder.append(fieldName, OID(id));
+ return Status::OK();
+ }
+
+ Status JParse::numberLong(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(LPAREN)) {
+ return parseError("Expecting '('");
+ }
+ errno = 0;
+ char* endptr;
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires that
+ // we know ahead of time where the number ends, which is not currently the case.
+ int64_t val = strtoll(_input, &endptr, 10);
+ if (errno == ERANGE) {
+ return parseError("NumberLong out of range");
+ }
+ if (_input == endptr) {
+ return parseError("Expecting number in NumberLong");
+ }
+ _input = endptr;
+ if (!readToken(RPAREN)) {
+ return parseError("Expecting ')'");
+ }
+ builder.appendNumber(fieldName, static_cast<long long int>(val));
+ return Status::OK();
+ }
+
+ Status JParse::numberInt(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(LPAREN)) {
+ return parseError("Expecting '('");
+ }
+ errno = 0;
+ char* endptr;
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires that
+ // we know ahead of time where the number ends, which is not currently the case.
+ int32_t val = strtol(_input, &endptr, 10);
+ if (errno == ERANGE) {
+ return parseError("NumberInt out of range");
+ }
+ if (_input == endptr) {
+ return parseError("Expecting unsigned number in NumberInt");
+ }
+ _input = endptr;
+ if (!readToken(RPAREN)) {
+ return parseError("Expecting ')'");
+ }
+ builder.appendNumber(fieldName, static_cast<int>(val));
+ return Status::OK();
+ }
+
+
+ Status JParse::dbRef(StringData fieldName, BSONObjBuilder& builder) {
+ BSONObjBuilder subBuilder(builder.subobjStart(fieldName));
+
+ if (!readToken(LPAREN)) {
+ return parseError("Expecting '('");
+ }
+ std::string ns;
+ ns.reserve(NS_RESERVE_SIZE);
+ Status refRet = quotedString(&ns);
+ if (refRet != Status::OK()) {
+ return refRet;
+ }
+ subBuilder.append("$ref", ns);
+
+ if (!readToken(COMMA)) {
+ return parseError("Expecting ','");
+ }
+
+ Status valueRet = value("$id", subBuilder);
+ if (valueRet != Status::OK()) {
+ return valueRet;
+ }
+
+ if (readToken(COMMA)) {
+ std::string db;
+ db.reserve(DB_RESERVE_SIZE);
+ Status dbRet = quotedString(&db);
+ if (dbRet != Status::OK()) {
+ return dbRet;
+ }
+ subBuilder.append("$db", db);
+ }
+
+ if (!readToken(RPAREN)) {
+ return parseError("Expecting ')'");
+ }
+
+ subBuilder.done();
+ return Status::OK();
+ }
+
+ Status JParse::regex(StringData fieldName, BSONObjBuilder& builder) {
+ if (!readToken(FORWARDSLASH)) {
+ return parseError("Expecting '/'");
+ }
+ std::string pat;
+ pat.reserve(PAT_RESERVE_SIZE);
+ Status patRet = regexPat(&pat);
+ if (patRet != Status::OK()) {
+ return patRet;
+ }
+ if (!readToken(FORWARDSLASH)) {
+ return parseError("Expecting '/'");
+ }
+ std::string opt;
+ opt.reserve(OPT_RESERVE_SIZE);
+ Status optRet = regexOpt(&opt);
+ if (optRet != Status::OK()) {
+ return optRet;
+ }
+ Status optCheckRet = regexOptCheck(opt);
+ if (optCheckRet != Status::OK()) {
+ return optCheckRet;
+ }
+ builder.appendRegex(fieldName, pat, opt);
+ return Status::OK();
+ }
+
+ Status JParse::regexPat(std::string* result) {
+ MONGO_JSON_DEBUG("");
+ return chars(result, "/");
+ }
+
+ Status JParse::regexOpt(std::string* result) {
+ MONGO_JSON_DEBUG("");
+ return chars(result, "", JOPTIONS);
+ }
+
+ Status JParse::regexOptCheck(StringData opt) {
+ MONGO_JSON_DEBUG("opt: " << opt);
+ std::size_t i;
+ for (i = 0; i < opt.size(); i++) {
+ if (!match(opt[i], JOPTIONS)) {
+ return parseError(string("Bad regex option: ") + opt[i]);
+ }
+ }
+ return Status::OK();
+ }
+
+ Status JParse::number(StringData fieldName, BSONObjBuilder& builder) {
+ char* endptrll;
+ char* endptrd;
+ long long retll;
+ double retd;
+
+ // reset errno to make sure that we are getting it from strtod
+ errno = 0;
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires that
+ // we know ahead of time where the number ends, which is not currently the case.
+ retd = strtod(_input, &endptrd);
+ // if pointer does not move, we found no digits
+ if (_input == endptrd) {
+ return parseError("Bad characters in value");
+ }
+ if (errno == ERANGE) {
+ return parseError("Value cannot fit in double");
+ }
+ // reset errno to make sure that we are getting it from strtoll
+ errno = 0;
+ // SERVER-11920: We should use parseNumberFromString here, but that function requires that
+ // we know ahead of time where the number ends, which is not currently the case.
+ retll = strtoll(_input, &endptrll, 10);
+ if (endptrll < endptrd || errno == ERANGE) {
+ // The number either had characters only meaningful for a double or
+ // could not fit in a 64 bit int
+ MONGO_JSON_DEBUG("Type: double");
+ builder.append(fieldName, retd);
+ }
+ else if (retll == static_cast<int>(retll)) {
+ // The number can fit in a 32 bit int
+ MONGO_JSON_DEBUG("Type: 32 bit int");
+ builder.append(fieldName, static_cast<int>(retll));
+ }
+ else {
+ // The number can fit in a 64 bit int
+ MONGO_JSON_DEBUG("Type: 64 bit int");
+ builder.append(fieldName, retll);
+ }
+ _input = endptrd;
+ if (_input >= _input_end) {
+ return parseError("Trailing number at end of input");
+ }
+ return Status::OK();
+ }
+
+ Status JParse::field(std::string* result) {
+ MONGO_JSON_DEBUG("");
+ if (peekToken(DOUBLEQUOTE) || peekToken(SINGLEQUOTE)) {
+ // Quoted key
+ // TODO: make sure quoted field names cannot contain null characters
+ return quotedString(result);
+ }
+ else {
+ // Unquoted key
+ // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended
+ // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes
+ // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080
+ while (_input < _input_end &&
+ isspace(*reinterpret_cast<const unsigned char*>(_input))) {
+ ++_input;
+ }
+ if (_input >= _input_end) {
+ return parseError("Field name expected");
+ }
+ if (!match(*_input, ALPHA "_$")) {
+ return parseError("First character in field must be [A-Za-z$_]");
+ }
+ return chars(result, "", ALPHA DIGIT "_$");
+ }
+ }
+
+ Status JParse::quotedString(std::string* result) {
+ MONGO_JSON_DEBUG("");
+ if (readToken(DOUBLEQUOTE)) {
+ Status ret = chars(result, "\"");
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ if (!readToken(DOUBLEQUOTE)) {
+ return parseError("Expecting '\"'");
+ }
+ }
+ else if (readToken(SINGLEQUOTE)) {
+ Status ret = chars(result, "'");
+ if (ret != Status::OK()) {
+ return ret;
+ }
+ if (!readToken(SINGLEQUOTE)) {
+ return parseError("Expecting '''");
+ }
+ }
+ else {
+ return parseError("Expecting quoted string");
+ }
+ return Status::OK();
+ }
+
+ /*
+ * terminalSet are characters that signal end of string (e.g.) [ :\0]
+ * allowedSet are the characters that are allowed, if this is set
+ */
+ Status JParse::chars(std::string* result, const char* terminalSet,
+ const char* allowedSet) {
+ MONGO_JSON_DEBUG("terminalSet: " << terminalSet);
+ if (_input >= _input_end) {
+ return parseError("Unexpected end of input");
+ }
+ const char* q = _input;
+ while (q < _input_end && !match(*q, terminalSet)) {
+ MONGO_JSON_DEBUG("q: " << q);
+ if (allowedSet != NULL) {
+ if (!match(*q, allowedSet)) {
+ _input = q;
+ return Status::OK();
+ }
+ }
+ if (0x00 <= *q && *q <= 0x1F) {
+ return parseError("Invalid control character");
+ }
+ if (*q == '\\' && q + 1 < _input_end) {
+ switch (*(++q)) {
+ // Escape characters allowed by the JSON spec
+ case '"': result->push_back('"'); break;
+ case '\'': result->push_back('\''); break;
+ case '\\': result->push_back('\\'); break;
+ case '/': result->push_back('/'); break;
+ case 'b': result->push_back('\b'); break;
+ case 'f': result->push_back('\f'); break;
+ case 'n': result->push_back('\n'); break;
+ case 'r': result->push_back('\r'); break;
+ case 't': result->push_back('\t'); break;
+ case 'u': { //expect 4 hexdigits
+ // TODO: handle UTF-16 surrogate characters
+ ++q;
+ if (q + 4 >= _input_end) {
+ return parseError("Expecting 4 hex digits");
+ }
+ if (!isHexString(StringData(q, 4))) {
+ return parseError("Expecting 4 hex digits");
+ }
+ unsigned char first = fromHex(q);
+ unsigned char second = fromHex(q += 2);
+ const std::string& utf8str = encodeUTF8(first, second);
+ for (unsigned int i = 0; i < utf8str.size(); i++) {
+ result->push_back(utf8str[i]);
+ }
+ ++q;
+ break;
+ }
+ // Vertical tab character. Not in JSON spec but allowed in
+ // our implementation according to test suite.
+ case 'v': result->push_back('\v'); break;
+ // Escape characters we explicity disallow
+ case 'x': return parseError("Hex escape not supported");
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7': return parseError("Octal escape not supported");
+ // By default pass on the unescaped character
+ default: result->push_back(*q); break;
+ // TODO: check for escaped control characters
+ }
+ ++q;
+ }
+ else {
+ result->push_back(*q++);
+ }
+ }
+ if (q < _input_end) {
+ _input = q;
+ return Status::OK();
+ }
+ return parseError("Unexpected end of input");
+ }
+
+ std::string JParse::encodeUTF8(unsigned char first, unsigned char second) const {
+ std::ostringstream oss;
+ if (first == 0 && second < 0x80) {
+ oss << second;
+ }
+ else if (first < 0x08) {
+ oss << char( 0xc0 | (first << 2 | second >> 6) );
+ oss << char( 0x80 | (~0xc0 & second) );
+ }
+ else {
+ oss << char( 0xe0 | (first >> 4) );
+ oss << char( 0x80 | (~0xc0 & (first << 2 | second >> 6) ) );
+ oss << char( 0x80 | (~0xc0 & second) );
+ }
+ return oss.str();
+ }
+
+ inline bool JParse::peekToken(const char* token) {
+ return readTokenImpl(token, false);
+ }
+
+ inline bool JParse::readToken(const char* token) {
+ return readTokenImpl(token, true);
+ }
+
+ bool JParse::readTokenImpl(const char* token, bool advance) {
+ MONGO_JSON_DEBUG("token: " << token);
+ const char* check = _input;
+ if (token == NULL) {
+ return false;
+ }
+ // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended
+ // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes
+ // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080
+ while (check < _input_end && isspace(*reinterpret_cast<const unsigned char*>(check))) {
+ ++check;
+ }
+ while (*token != '\0') {
+ if (check >= _input_end) {
+ return false;
+ }
+ if (*token++ != *check++) {
+ return false;
+ }
+ }
+ if (advance) { _input = check; }
+ return true;
+ }
+
+ bool JParse::readField(StringData expectedField) {
+ MONGO_JSON_DEBUG("expectedField: " << expectedField);
+ std::string nextField;
+ nextField.reserve(FIELD_RESERVE_SIZE);
+ Status ret = field(&nextField);
+ if (ret != Status::OK()) {
+ return false;
+ }
+ if (expectedField != nextField) {
+ return false;
+ }
+ return true;
+ }
+
+ inline bool JParse::match(char matchChar, const char* matchSet) const {
+ if (matchSet == NULL) {
+ return true;
+ }
+ if (*matchSet == '\0') {
+ return false;
+ }
+ return (strchr(matchSet, matchChar) != NULL);
+ }
+
+ bool JParse::isHexString(StringData str) const {
+ MONGO_JSON_DEBUG("str: " << str);
+ std::size_t i;
+ for (i = 0; i < str.size(); i++) {
+ if (!isxdigit(str[i])) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool JParse::isBase64String(StringData str) const {
+ MONGO_JSON_DEBUG("str: " << str);
+ std::size_t i;
+ for (i = 0; i < str.size(); i++) {
+ if (!match(str[i], base64::chars)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool JParse::isArray() {
+ return peekToken(LBRACKET);
+ }
+
+ BSONObj fromjson(const char* jsonString, int* len) {
+ MONGO_JSON_DEBUG("jsonString: " << jsonString);
+ if (jsonString[0] == '\0') {
+ if (len) *len = 0;
+ return BSONObj();
+ }
+ JParse jparse(jsonString);
+ BSONObjBuilder builder;
+ Status ret = Status::OK();
+ try {
+ ret = jparse.parse(builder);
+ }
+ catch(std::exception& e) {
+ std::ostringstream message;
+ message << "caught exception from within JSON parser: " << e.what();
+ throw MsgAssertionException(17031, message.str());
+ }
+
+ if (ret != Status::OK()) {
+ ostringstream message;
+ message << "code " << ret.code() << ": " << ret.codeString() << ": " << ret.reason();
+ throw MsgAssertionException(16619, message.str());
+ }
+ if (len) *len = jparse.offset();
+ return builder.obj();
+ }
+
+ BSONObj fromjson(const std::string& str) {
+ return fromjson( str.c_str() );
+ }
+
+ std::string tojson(const BSONObj& obj, JsonStringFormat format, bool pretty) {
+ return obj.jsonString(format, pretty);
+ }
+
+ std::string tojson(const BSONArray& arr, JsonStringFormat format, bool pretty) {
+ return arr.jsonString(format, pretty, true);
+ }
+
+ bool isArray(StringData str) {
+ JParse parser(str);
+ return parser.isArray();
+ }
+
+} /* namespace mongo */
diff --git a/src/mongo/bson/json.h b/src/mongo/bson/json.h
new file mode 100644
index 00000000000..34564765242
--- /dev/null
+++ b/src/mongo/bson/json.h
@@ -0,0 +1,488 @@
+/**
+* Copyright (C) 2008 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects for
+* all of the code used other than as permitted herein. If you modify file(s)
+* with this exception, you may extend this exception to your version of the
+* file(s), but you are not obligated to do so. If you do not wish to do so,
+* delete this exception statement from your version. If you delete this
+* exception statement from all source files in the program, then also delete
+* it in the license file.
+*/
+
+#pragma once
+
+#include <string>
+
+#include "mongo/bson/bsonobj.h"
+#include "mongo/base/status.h"
+
+namespace mongo {
+
+ /**
+ * Create a BSONObj from a JSON <http://www.json.org>,
+ * <http://www.ietf.org/rfc/rfc4627.txt> string. In addition to the JSON
+ * extensions extensions described here
+ * <http://dochub.mongodb.org/core/mongodbextendedjson>, this function
+ * accepts unquoted field names and allows single quotes to optionally be
+ * used when specifying field names and std::string values instead of double
+ * quotes. JSON unicode escape sequences (of the form \uXXXX) are
+ * converted to utf8.
+ *
+ * @throws MsgAssertionException if parsing fails. The message included with
+ * this assertion includes the character offset where parsing failed.
+ */
+ BSONObj fromjson(const std::string& str);
+
+ /** @param len will be size of JSON object in text chars. */
+ BSONObj fromjson(const char* str, int* len=NULL);
+
+ /**
+ * Tests whether the JSON string is an Array.
+ *
+ * Useful for assigning the result of fromjson to the right object type. Either:
+ * BSONObj
+ * BSONArray
+ *
+ * @example Using the method to select the proper type.
+ * If this method returns true, the user could store the result of fromjson
+ * inside a BSONArray, rather than a BSONObj, in order to have it print as an
+ * array when passed to tojson.
+ *
+ * @param obj The JSON string to test.
+ */
+ bool isArray(StringData str);
+
+ /**
+ * Convert a BSONArray to a JSON string.
+ *
+ * @param arr The BSON Array.
+ * @param format The JSON format (JS, TenGen, Strict).
+ * @param pretty Enables pretty output.
+ */
+ std::string tojson(
+ const BSONArray& arr,
+ JsonStringFormat format = Strict,
+ bool pretty = false
+ );
+
+ /**
+ * Convert a BSONObj to a JSON string.
+ *
+ * @param obj The BSON Object.
+ * @param format The JSON format (JS, TenGen, Strict).
+ * @param pretty Enables pretty output.
+ */
+ std::string tojson(
+ const BSONObj& obj,
+ JsonStringFormat format = Strict,
+ bool pretty = false
+ );
+
+ /**
+ * Parser class. A BSONObj is constructed incrementally by passing a
+ * BSONObjBuilder to the recursive parsing methods. The grammar for the
+ * element parsed is described before each function.
+ */
+ class JParse {
+ public:
+ explicit JParse(StringData str);
+
+ /*
+ * Notation: All-uppercase symbols denote non-terminals; all other
+ * symbols are literals.
+ */
+
+ /*
+ * VALUE :
+ * STRING
+ * | NUMBER
+ * | NUMBERINT
+ * | NUMBERLONG
+ * | OBJECT
+ * | ARRAY
+ *
+ * | true
+ * | false
+ * | null
+ * | undefined
+ *
+ * | NaN
+ * | Infinity
+ * | -Infinity
+ *
+ * | DATE
+ * | TIMESTAMP
+ * | REGEX
+ * | OBJECTID
+ * | DBREF
+ *
+ * | new CONSTRUCTOR
+ */
+ private:
+ Status value(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * OBJECT :
+ * {}
+ * | { MEMBERS }
+ * | SPECIALOBJECT
+ *
+ * MEMBERS :
+ * PAIR
+ * | PAIR , MEMBERS
+ *
+ * PAIR :
+ * FIELD : VALUE
+ *
+ * SPECIALOBJECT :
+ * OIDOBJECT
+ * | BINARYOBJECT
+ * | DATEOBJECT
+ * | TIMESTAMPOBJECT
+ * | REGEXOBJECT
+ * | REFOBJECT
+ * | UNDEFINEDOBJECT
+ * | NUMBERLONGOBJECT
+ * | MINKEYOBJECT
+ * | MAXKEYOBJECT
+ *
+ */
+ public:
+ Status object(StringData fieldName, BSONObjBuilder&, bool subObj=true);
+ Status parse(BSONObjBuilder& builder);
+ bool isArray();
+
+ private:
+ /* The following functions are called with the '{' and the first
+ * field already parsed since they are both implied given the
+ * context. */
+ /*
+ * OIDOBJECT :
+ * { FIELD("$oid") : <24 character hex std::string> }
+ */
+ Status objectIdObject(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * BINARYOBJECT :
+ * { FIELD("$binary") : <base64 representation of a binary std::string>,
+ * FIELD("$type") : <hexadecimal representation of a single byte
+ * indicating the data type> }
+ */
+ Status binaryObject(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * DATEOBJECT :
+ * { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> }
+ */
+ Status dateObject(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * TIMESTAMPOBJECT :
+ * { FIELD("$timestamp") : {
+ * FIELD("t") : <32 bit unsigned integer for seconds since epoch>,
+ * FIELD("i") : <32 bit unsigned integer for the increment> } }
+ */
+ Status timestampObject(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * NOTE: the rules for the body of the regex are different here,
+ * since it is quoted instead of surrounded by slashes.
+ * REGEXOBJECT :
+ * { FIELD("$regex") : <string representing body of regex> }
+ * | { FIELD("$regex") : <string representing body of regex>,
+ * FIELD("$options") : <string representing regex options> }
+ */
+ Status regexObject(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * REFOBJECT :
+ * { FIELD("$ref") : <string representing collection name>,
+ * FIELD("$id") : <24 character hex std::string> }
+ * | { FIELD("$ref") : std::string , FIELD("$id") : OBJECTID }
+ * | { FIELD("$ref") : std::string , FIELD("$id") : OIDOBJECT }
+ */
+ Status dbRefObject(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * UNDEFINEDOBJECT :
+ * { FIELD("$undefined") : true }
+ */
+ Status undefinedObject(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * NUMBERLONGOBJECT :
+ * { FIELD("$numberLong") : "<number>" }
+ */
+ Status numberLongObject(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * MINKEYOBJECT :
+ * { FIELD("$minKey") : 1 }
+ */
+ Status minKeyObject(StringData fieldName, BSONObjBuilder& builder);
+
+ /*
+ * MAXKEYOBJECT :
+ * { FIELD("$maxKey") : 1 }
+ */
+ Status maxKeyObject(StringData fieldName, BSONObjBuilder& builder);
+
+ /*
+ * ARRAY :
+ * []
+ * | [ ELEMENTS ]
+ *
+ * ELEMENTS :
+ * VALUE
+ * | VALUE , ELEMENTS
+ */
+ Status array(StringData fieldName, BSONObjBuilder&, bool subObj=true);
+
+ /*
+ * NOTE: Currently only Date can be preceded by the "new" keyword
+ * CONSTRUCTOR :
+ * DATE
+ */
+ Status constructor(StringData fieldName, BSONObjBuilder&);
+
+ /* The following functions only parse the body of the constructor
+ * between the parentheses, not including the constructor name */
+ /*
+ * DATE :
+ * Date( <64 bit signed integer for milliseconds since epoch> )
+ */
+ Status date(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * TIMESTAMP :
+ * Timestamp( <32 bit unsigned integer for seconds since epoch>,
+ * <32 bit unsigned integer for the increment> )
+ */
+ Status timestamp(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * OBJECTID :
+ * ObjectId( <24 character hex std::string> )
+ */
+ Status objectId(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * NUMBERLONG :
+ * NumberLong( <number> )
+ */
+ Status numberLong(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * NUMBERINT :
+ * NumberInt( <number> )
+ */
+ Status numberInt(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * DBREF :
+ * Dbref( <namespace std::string> , <24 character hex std::string> )
+ */
+ Status dbRef(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * REGEX :
+ * / REGEXCHARS / REGEXOPTIONS
+ *
+ * REGEXCHARS :
+ * REGEXCHAR
+ * | REGEXCHAR REGEXCHARS
+ *
+ * REGEXCHAR :
+ * any-Unicode-character-except-/-or-\-or-CONTROLCHAR
+ * | \"
+ * | \'
+ * | \\
+ * | \/
+ * | \b
+ * | \f
+ * | \n
+ * | \r
+ * | \t
+ * | \v
+ * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
+ * | \any-Unicode-character-except-x-or-[0-7]
+ *
+ * REGEXOPTIONS :
+ * REGEXOPTION
+ * | REGEXOPTION REGEXOPTIONS
+ *
+ * REGEXOPTION :
+ * g | i | m | s
+ */
+ Status regex(StringData fieldName, BSONObjBuilder&);
+ Status regexPat(std::string* result);
+ Status regexOpt(std::string* result);
+ Status regexOptCheck(StringData opt);
+
+ /*
+ * NUMBER :
+ *
+ * NOTE: Number parsing is based on standard library functions, not
+ * necessarily on the JSON numeric grammar.
+ *
+ * Number as value - strtoll and strtod
+ * Date - strtoll
+ * Timestamp - strtoul for both timestamp and increment and '-'
+ * before a number explicity disallowed
+ */
+ Status number(StringData fieldName, BSONObjBuilder&);
+
+ /*
+ * FIELD :
+ * STRING
+ * | [a-zA-Z$_] FIELDCHARS
+ *
+ * FIELDCHARS :
+ * [a-zA-Z0-9$_]
+ * | [a-zA-Z0-9$_] FIELDCHARS
+ */
+ Status field(std::string* result);
+
+ /*
+ * std::string :
+ * " "
+ * | ' '
+ * | " CHARS "
+ * | ' CHARS '
+ */
+ Status quotedString(std::string* result);
+
+ /*
+ * CHARS :
+ * CHAR
+ * | CHAR CHARS
+ *
+ * Note: " or ' may be allowed depending on whether the std::string is
+ * double or single quoted
+ *
+ * CHAR :
+ * any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR
+ * | \"
+ * | \'
+ * | \\
+ * | \/
+ * | \b
+ * | \f
+ * | \n
+ * | \r
+ * | \t
+ * | \v
+ * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
+ * | \any-Unicode-character-except-x-or-[0-9]
+ *
+ * HEXDIGIT : [0..9a..fA..F]
+ *
+ * per http://www.ietf.org/rfc/rfc4627.txt, control characters are
+ * (U+0000 through U+001F). U+007F is not mentioned as a control
+ * character.
+ * CONTROLCHAR : [0x00..0x1F]
+ *
+ * If there is not an error, result will contain a null terminated
+ * string, but there is no guarantee that it will not contain other
+ * null characters.
+ */
+ Status chars(std::string* result, const char* terminalSet, const char* allowedSet=NULL);
+
+ /**
+ * Converts the two byte Unicode code point to its UTF8 character
+ * encoding representation. This function returns a std::string because
+ * UTF8 encodings for code points from 0x0000 to 0xFFFF can range
+ * from one to three characters.
+ */
+ std::string encodeUTF8(unsigned char first, unsigned char second) const;
+
+ /**
+ * @return true if the given token matches the next non whitespace
+ * sequence in our buffer, and false if the token doesn't match or
+ * we reach the end of our buffer. Do not update the pointer to our
+ * buffer (same as calling readTokenImpl with advance=false).
+ */
+ inline bool peekToken(const char* token);
+
+ /**
+ * @return true if the given token matches the next non whitespace
+ * sequence in our buffer, and false if the token doesn't match or
+ * we reach the end of our buffer. Updates the pointer to our
+ * buffer (same as calling readTokenImpl with advance=true).
+ */
+ inline bool readToken(const char* token);
+
+ /**
+ * @return true if the given token matches the next non whitespace
+ * sequence in our buffer, and false if the token doesn't match or
+ * we reach the end of our buffer. Do not update the pointer to our
+ * buffer if advance is false.
+ */
+ bool readTokenImpl(const char* token, bool advance=true);
+
+ /**
+ * @return true if the next field in our stream matches field.
+ * Handles single quoted, double quoted, and unquoted field names
+ */
+ bool readField(StringData field);
+
+ /**
+ * @return true if matchChar is in matchSet
+ * @return true if matchSet is NULL and false if it is an empty string
+ */
+ bool match(char matchChar, const char* matchSet) const;
+
+ /**
+ * @return true if every character in the std::string is a hex digit
+ */
+ bool isHexString(StringData) const;
+
+ /**
+ * @return true if every character in the std::string is a valid base64
+ * character
+ */
+ bool isBase64String(StringData) const;
+
+ /**
+ * @return FailedToParse status with the given message and some
+ * additional context information
+ */
+ Status parseError(StringData msg);
+ public:
+ inline int offset() { return (_input - _buf); }
+
+ private:
+ /*
+ * _buf - start of our input buffer
+ * _input - cursor we advance in our input buffer
+ * _input_end - sentinel for the end of our input buffer
+ *
+ * _buf is the null terminated buffer containing the JSON std::string we
+ * are parsing. _input_end points to the null byte at the end of
+ * the buffer. strtoll, strtol, and strtod will access the null
+ * byte at the end of the buffer because they are assuming a c-style
+ * string.
+ */
+ const char* const _buf;
+ const char* _input;
+ const char* const _input_end;
+ };
+
+} // namespace mongo