diff options
author | Andrew Morrow <acm@mongodb.com> | 2015-04-30 11:49:52 -0400 |
---|---|---|
committer | Andrew Morrow <acm@mongodb.com> | 2015-05-06 15:47:21 -0400 |
commit | 543ca54c22e13056b4f278e36b4c1b6436c2f1cb (patch) | |
tree | ce86697434bce1152c31df200341405e618472f8 /src/mongo/bson | |
parent | 0ec9948134ca39df062d59e7eaa212100631ecac (diff) | |
download | mongo-543ca54c22e13056b4f278e36b4c1b6436c2f1cb.tar.gz |
SERVER-9666 Move json utils into bson library
Diffstat (limited to 'src/mongo/bson')
-rw-r--r-- | src/mongo/bson/json.cpp | 1304 | ||||
-rw-r--r-- | src/mongo/bson/json.h | 488 |
2 files changed, 1792 insertions, 0 deletions
diff --git a/src/mongo/bson/json.cpp b/src/mongo/bson/json.cpp new file mode 100644 index 00000000000..2b13e318e44 --- /dev/null +++ b/src/mongo/bson/json.cpp @@ -0,0 +1,1304 @@ +/* Copyright 2009 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault + +#include "mongo/bson/json.h" + +#include <boost/scoped_ptr.hpp> + +#include "mongo/base/parse_number.h" +#include "mongo/db/jsobj.h" +#include "mongo/platform/cstdint.h" +#include "mongo/platform/strtoll.h" +#include "mongo/util/base64.h" +#include "mongo/util/hex.h" +#include "mongo/util/log.h" +#include "mongo/util/mongoutils/str.h" +#include "mongo/util/time_support.h" + +namespace mongo { + + using boost::scoped_ptr; + using std::ostringstream; + using std::string; + +#if 0 +#define MONGO_JSON_DEBUG(message) log() << "JSON DEBUG @ " << __FILE__\ + << ":" << __LINE__ << " " << __FUNCTION__ << ": " << message << endl; +#else +#define MONGO_JSON_DEBUG(message) +#endif + +#define ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +#define DIGIT "0123456789" +#define CONTROL "\a\b\f\n\r\t\v" +#define JOPTIONS "gims" + + // Size hints given to char vectors + enum { + ID_RESERVE_SIZE = 64, + PAT_RESERVE_SIZE = 4096, + OPT_RESERVE_SIZE = 64, + FIELD_RESERVE_SIZE = 4096, + STRINGVAL_RESERVE_SIZE = 4096, + BINDATA_RESERVE_SIZE = 4096, + BINDATATYPE_RESERVE_SIZE = 4096, + NS_RESERVE_SIZE = 64, + DB_RESERVE_SIZE = 64, + NUMBERLONG_RESERVE_SIZE = 64, + DATE_RESERVE_SIZE = 64 + }; + + static const char* LBRACE = "{", + *RBRACE = "}", + *LBRACKET = "[", + *RBRACKET = "]", + *LPAREN = "(", + *RPAREN = ")", + *COLON = ":", + *COMMA = ",", + *FORWARDSLASH = "/", + *SINGLEQUOTE = "'", + *DOUBLEQUOTE = "\""; + + JParse::JParse(StringData str) + : _buf(str.rawData()) + , _input(_buf) + , _input_end(_input + str.size()) + {} + + Status JParse::parseError(StringData msg) { + std::ostringstream ossmsg; + ossmsg << msg; + ossmsg << ": offset:"; + ossmsg << offset(); + ossmsg << " of:"; + ossmsg << _buf; + return Status(ErrorCodes::FailedToParse, ossmsg.str()); + } + + Status JParse::value(StringData fieldName, BSONObjBuilder& builder) { + MONGO_JSON_DEBUG("fieldName: " << fieldName); + if (peekToken(LBRACE)) { + Status ret = object(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (peekToken(LBRACKET)) { + Status ret = array(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (readToken("new")) { + Status ret = constructor(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (readToken("Date")) { + Status ret = date(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (readToken("Timestamp")) { + Status ret = timestamp(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (readToken("ObjectId")) { + Status ret = objectId(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (readToken("NumberLong")) { + Status ret = numberLong(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (readToken("NumberInt")) { + Status ret = numberInt(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (readToken("Dbref") || readToken("DBRef")) { + Status ret = dbRef(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (peekToken(FORWARDSLASH)) { + Status ret = regex(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (peekToken(DOUBLEQUOTE) || peekToken(SINGLEQUOTE)) { + std::string valueString; + valueString.reserve(STRINGVAL_RESERVE_SIZE); + Status ret = quotedString(&valueString); + if (ret != Status::OK()) { + return ret; + } + builder.append(fieldName, valueString); + } + else if (readToken("true")) { + builder.append(fieldName, true); + } + else if (readToken("false")) { + builder.append(fieldName, false); + } + else if (readToken("null")) { + builder.appendNull(fieldName); + } + else if (readToken("undefined")) { + builder.appendUndefined(fieldName); + } + else if (readToken("NaN")) { + builder.append(fieldName, std::numeric_limits<double>::quiet_NaN()); + } + else if (readToken("Infinity")) { + builder.append(fieldName, std::numeric_limits<double>::infinity()); + } + else if (readToken("-Infinity")) { + builder.append(fieldName, -std::numeric_limits<double>::infinity()); + } + else { + Status ret = number(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + return Status::OK(); + } + + Status JParse::parse(BSONObjBuilder& builder) { + return isArray() ? array("UNUSED", builder, false) : object("UNUSED", builder, false); + } + + Status JParse::object(StringData fieldName, BSONObjBuilder& builder, bool subObject) { + MONGO_JSON_DEBUG("fieldName: " << fieldName); + if (!readToken(LBRACE)) { + return parseError("Expecting '{'"); + } + + // Empty object + if (readToken(RBRACE)) { + if (subObject) { + BSONObjBuilder empty(builder.subobjStart(fieldName)); + empty.done(); + } + return Status::OK(); + } + + // Special object + std::string firstField; + firstField.reserve(FIELD_RESERVE_SIZE); + Status ret = field(&firstField); + if (ret != Status::OK()) { + return ret; + } + + if (firstField == "$oid") { + if (!subObject) { + return parseError("Reserved field name in base object: $oid"); + } + Status ret = objectIdObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (firstField == "$binary") { + if (!subObject) { + return parseError("Reserved field name in base object: $binary"); + } + Status ret = binaryObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (firstField == "$date") { + if (!subObject) { + return parseError("Reserved field name in base object: $date"); + } + Status ret = dateObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (firstField == "$timestamp") { + if (!subObject) { + return parseError("Reserved field name in base object: $timestamp"); + } + Status ret = timestampObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (firstField == "$regex") { + if (!subObject) { + return parseError("Reserved field name in base object: $regex"); + } + Status ret = regexObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (firstField == "$ref") { + if (!subObject) { + return parseError("Reserved field name in base object: $ref"); + } + Status ret = dbRefObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (firstField == "$undefined") { + if (!subObject) { + return parseError("Reserved field name in base object: $undefined"); + } + Status ret = undefinedObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (firstField == "$numberLong") { + if (!subObject) { + return parseError("Reserved field name in base object: $numberLong"); + } + Status ret = numberLongObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (firstField == "$minKey") { + if (!subObject) { + return parseError("Reserved field name in base object: $minKey"); + } + Status ret = minKeyObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else if (firstField == "$maxKey") { + if (!subObject) { + return parseError("Reserved field name in base object: $maxKey"); + } + Status ret = maxKeyObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; + } + } + else { // firstField != <reserved field name> + // Normal object + + // Only create a sub builder if this is not the base object + BSONObjBuilder* objBuilder = &builder; + scoped_ptr<BSONObjBuilder> subObjBuilder; + if (subObject) { + subObjBuilder.reset(new BSONObjBuilder(builder.subobjStart(fieldName))); + objBuilder = subObjBuilder.get(); + } + + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + Status valueRet = value(firstField, *objBuilder); + if (valueRet != Status::OK()) { + return valueRet; + } + while (readToken(COMMA)) { + std::string fieldName; + fieldName.reserve(FIELD_RESERVE_SIZE); + Status fieldRet = field(&fieldName); + if (fieldRet != Status::OK()) { + return fieldRet; + } + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + Status valueRet = value(fieldName, *objBuilder); + if (valueRet != Status::OK()) { + return valueRet; + } + } + } + if (!readToken(RBRACE)) { + return parseError("Expecting '}' or ','"); + } + return Status::OK(); + } + + Status JParse::objectIdObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expected ':'"); + } + std::string id; + id.reserve(ID_RESERVE_SIZE); + Status ret = quotedString(&id); + if (ret != Status::OK()) { + return ret; + } + if (id.size() != 24) { + return parseError("Expecting 24 hex digits: " + id); + } + if (!isHexString(id)) { + return parseError("Expecting hex digits: " + id); + } + builder.append(fieldName, OID(id)); + return Status::OK(); + } + + Status JParse::binaryObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expected ':'"); + } + std::string binDataString; + binDataString.reserve(BINDATA_RESERVE_SIZE); + Status dataRet = quotedString(&binDataString); + if (dataRet != Status::OK()) { + return dataRet; + } + if (binDataString.size() % 4 != 0) { + return parseError("Invalid length base64 encoded string"); + } + if (!isBase64String(binDataString)) { + return parseError("Invalid character in base64 encoded string"); + } + const std::string& binData = base64::decode(binDataString); + if (!readToken(COMMA)) { + return parseError("Expected ','"); + } + + if (!readField("$type")) { + return parseError("Expected second field name: \"$type\", in \"$binary\" object"); + } + if (!readToken(COLON)) { + return parseError("Expected ':'"); + } + std::string binDataType; + binDataType.reserve(BINDATATYPE_RESERVE_SIZE); + Status typeRet = quotedString(&binDataType); + if (typeRet != Status::OK()) { + return typeRet; + } + if ((binDataType.size() != 2) || !isHexString(binDataType)) { + return parseError("Argument of $type in $bindata object must be a hex string representation of a single byte"); + } + builder.appendBinData( fieldName, binData.length(), + BinDataType(fromHex(binDataType)), + binData.data()); + return Status::OK(); + } + + Status JParse::dateObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expected ':'"); + } + errno = 0; + char* endptr; + Date_t date; + + if (peekToken(DOUBLEQUOTE)) { + std::string dateString; + dateString.reserve(DATE_RESERVE_SIZE); + Status ret = quotedString(&dateString); + if (!ret.isOK()) { + return ret; + } + StatusWith<Date_t> dateRet = dateFromISOString(dateString); + if (!dateRet.isOK()) { + return dateRet.getStatus(); + } + date = dateRet.getValue(); + } + else if (readToken(LBRACE)) { + std::string fieldName; + fieldName.reserve(FIELD_RESERVE_SIZE); + Status ret = field(&fieldName); + if (ret != Status::OK()) { + return ret; + } + if (fieldName != "$numberLong") { + return parseError("Expected field name: $numberLong for $date value object"); + } + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + + // The number must be a quoted string, since large long numbers could overflow a double + // and thus may not be valid JSON + std::string numberLongString; + numberLongString.reserve(NUMBERLONG_RESERVE_SIZE); + ret = quotedString(&numberLongString); + if (!ret.isOK()) { + return ret; + } + + long long numberLong; + ret = parseNumberFromString(numberLongString, &numberLong); + if (!ret.isOK()) { + return ret; + } + date = numberLong; + } + else { + // SERVER-11920: We should use parseNumberFromString here, but that function requires + // that we know ahead of time where the number ends, which is not currently the case. + date = static_cast<unsigned long long>(strtoll(_input, &endptr, 10)); + if (_input == endptr) { + return parseError("Date expecting integer milliseconds"); + } + if (errno == ERANGE) { + /* Need to handle this because jsonString outputs the value of Date_t as unsigned. + * See SERVER-8330 and SERVER-8573 */ + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function + // requires that we know ahead of time where the number ends, which is not currently + // the case. + date = strtoull(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("Date milliseconds overflow"); + } + } + _input = endptr; + } + builder.appendDate(fieldName, date); + return Status::OK(); + } + + Status JParse::timestampObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + if (!readToken(LBRACE)) { + return parseError("Expecting '{' to start \"$timestamp\" object"); + } + + if (!readField("t")) { + return parseError("Expected field name \"t\" in \"$timestamp\" sub object"); + } + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + if (readToken("-")) { + return parseError("Negative seconds in \"$timestamp\""); + } + errno = 0; + char* endptr; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + uint32_t seconds = strtoul(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("Timestamp seconds overflow"); + } + if (_input == endptr) { + return parseError("Expecting unsigned integer seconds in \"$timestamp\""); + } + _input = endptr; + if (!readToken(COMMA)) { + return parseError("Expecting ','"); + } + + if (!readField("i")) { + return parseError("Expected field name \"i\" in \"$timestamp\" sub object"); + } + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + if (readToken("-")) { + return parseError("Negative increment in \"$timestamp\""); + } + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + uint32_t count = strtoul(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("Timestamp increment overflow"); + } + if (_input == endptr) { + return parseError("Expecting unsigned integer increment in \"$timestamp\""); + } + _input = endptr; + + if (!readToken(RBRACE)) { + return parseError("Expecting '}'"); + } + builder.append(fieldName, Timestamp(seconds, count)); + return Status::OK(); + } + + Status JParse::regexObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + std::string pat; + pat.reserve(PAT_RESERVE_SIZE); + Status patRet = quotedString(&pat); + if (patRet != Status::OK()) { + return patRet; + } + if (readToken(COMMA)) { + if (!readField("$options")) { + return parseError("Expected field name: \"$options\" in \"$regex\" object"); + } + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + std::string opt; + opt.reserve(OPT_RESERVE_SIZE); + Status optRet = quotedString(&opt); + if (optRet != Status::OK()) { + return optRet; + } + Status optCheckRet = regexOptCheck(opt); + if (optCheckRet != Status::OK()) { + return optCheckRet; + } + builder.appendRegex(fieldName, pat, opt); + } + else { + builder.appendRegex(fieldName, pat, ""); + } + return Status::OK(); + } + + Status JParse::dbRefObject(StringData fieldName, BSONObjBuilder& builder) { + + BSONObjBuilder subBuilder(builder.subobjStart(fieldName)); + + if (!readToken(COLON)) { + return parseError("DBRef: Expecting ':'"); + } + std::string ns; + ns.reserve(NS_RESERVE_SIZE); + Status ret = quotedString(&ns); + if (ret != Status::OK()) { + return ret; + } + subBuilder.append("$ref", ns); + + if (!readToken(COMMA)) { + return parseError("DBRef: Expecting ','"); + } + + if (!readField("$id")) { + return parseError("DBRef: Expected field name: \"$id\" in \"$ref\" object"); + } + if (!readToken(COLON)) { + return parseError("DBRef: Expecting ':'"); + } + Status valueRet = value("$id", subBuilder); + if (valueRet != Status::OK()) { + return valueRet; + } + + if (readToken(COMMA)) { + if (!readField("$db")) { + return parseError("DBRef: Expected field name: \"$db\" in \"$ref\" object"); + } + if (!readToken(COLON)) { + return parseError("DBRef: Expecting ':'"); + } + std::string db; + db.reserve(DB_RESERVE_SIZE); + ret = quotedString(&db); + if (ret != Status::OK()) { + return ret; + } + subBuilder.append("$db", db); + } + + subBuilder.done(); + return Status::OK(); + } + + Status JParse::undefinedObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + if (!readToken("true")) { + return parseError("Reserved field \"$undefined\" requires value of true"); + } + builder.appendUndefined(fieldName); + return Status::OK(); + } + + Status JParse::numberLongObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + + // The number must be a quoted string, since large long numbers could overflow a double and + // thus may not be valid JSON + std::string numberLongString; + numberLongString.reserve(NUMBERLONG_RESERVE_SIZE); + Status ret = quotedString(&numberLongString); + if (!ret.isOK()) { + return ret; + } + + long long numberLong; + ret = parseNumberFromString(numberLongString, &numberLong); + if (!ret.isOK()) { + return ret; + } + + builder.appendNumber(fieldName, numberLong); + return Status::OK(); + } + + Status JParse::minKeyObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + if (!readToken("1")) { + return parseError("Reserved field \"$minKey\" requires value of 1"); + } + builder.appendMinKey(fieldName); + return Status::OK(); + } + + Status JParse::maxKeyObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + if (!readToken("1")) { + return parseError("Reserved field \"$maxKey\" requires value of 1"); + } + builder.appendMaxKey(fieldName); + return Status::OK(); + } + + Status JParse::array(StringData fieldName, BSONObjBuilder& builder, bool subObject) { + MONGO_JSON_DEBUG("fieldName: " << fieldName); + uint32_t index(0); + if (!readToken(LBRACKET)) { + return parseError("Expecting '['"); + } + + BSONObjBuilder* arrayBuilder = &builder; + scoped_ptr<BSONObjBuilder> subObjBuilder; + if (subObject) { + subObjBuilder.reset(new BSONObjBuilder(builder.subarrayStart(fieldName))); + arrayBuilder = subObjBuilder.get(); + } + + if (!peekToken(RBRACKET)) { + do { + Status ret = value(builder.numStr(index), *arrayBuilder); + if (ret != Status::OK()) { + return ret; + } + index++; + } while (readToken(COMMA)); + } + arrayBuilder->done(); + if (!readToken(RBRACKET)) { + return parseError("Expecting ']' or ','"); + } + return Status::OK(); + } + + /* NOTE: this could be easily modified to allow "new" before other + * constructors, but for now it only allows "new" before Date(). + * Also note that unlike the interactive shell "Date(x)" and "new Date(x)" + * have the same behavior. XXX: this may not be desired. */ + Status JParse::constructor(StringData fieldName, BSONObjBuilder& builder) { + if (readToken("Date")) { + date(fieldName, builder); + } + else { + return parseError("\"new\" keyword not followed by Date constructor"); + } + return Status::OK(); + } + + Status JParse::date(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + errno = 0; + char* endptr; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + Date_t date = static_cast<unsigned long long>(strtoll(_input, &endptr, 10)); + if (_input == endptr) { + return parseError("Date expecting integer milliseconds"); + } + if (errno == ERANGE) { + /* Need to handle this because jsonString outputs the value of Date_t as unsigned. + * See SERVER-8330 and SERVER-8573 */ + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function requires + // that we know ahead of time where the number ends, which is not currently the case. + date = strtoull(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("Date milliseconds overflow"); + } + } + _input = endptr; + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + builder.appendDate(fieldName, date); + return Status::OK(); + } + + Status JParse::timestamp(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + if (readToken("-")) { + return parseError("Negative seconds in \"$timestamp\""); + } + errno = 0; + char* endptr; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + uint32_t seconds = strtoul(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("Timestamp seconds overflow"); + } + if (_input == endptr) { + return parseError("Expecting unsigned integer seconds in \"$timestamp\""); + } + _input = endptr; + if (!readToken(COMMA)) { + return parseError("Expecting ','"); + } + if (readToken("-")) { + return parseError("Negative seconds in \"$timestamp\""); + } + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + uint32_t count = strtoul(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("Timestamp increment overflow"); + } + if (_input == endptr) { + return parseError("Expecting unsigned integer increment in \"$timestamp\""); + } + _input = endptr; + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + builder.append(fieldName, Timestamp(seconds, count)); + return Status::OK(); + } + + Status JParse::objectId(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + std::string id; + id.reserve(ID_RESERVE_SIZE); + Status ret = quotedString(&id); + if (ret != Status::OK()) { + return ret; + } + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + if (id.size() != 24) { + return parseError("Expecting 24 hex digits: " + id); + } + if (!isHexString(id)) { + return parseError("Expecting hex digits: " + id); + } + builder.append(fieldName, OID(id)); + return Status::OK(); + } + + Status JParse::numberLong(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + errno = 0; + char* endptr; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + int64_t val = strtoll(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("NumberLong out of range"); + } + if (_input == endptr) { + return parseError("Expecting number in NumberLong"); + } + _input = endptr; + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + builder.appendNumber(fieldName, static_cast<long long int>(val)); + return Status::OK(); + } + + Status JParse::numberInt(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + errno = 0; + char* endptr; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + int32_t val = strtol(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("NumberInt out of range"); + } + if (_input == endptr) { + return parseError("Expecting unsigned number in NumberInt"); + } + _input = endptr; + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + builder.appendNumber(fieldName, static_cast<int>(val)); + return Status::OK(); + } + + + Status JParse::dbRef(StringData fieldName, BSONObjBuilder& builder) { + BSONObjBuilder subBuilder(builder.subobjStart(fieldName)); + + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + std::string ns; + ns.reserve(NS_RESERVE_SIZE); + Status refRet = quotedString(&ns); + if (refRet != Status::OK()) { + return refRet; + } + subBuilder.append("$ref", ns); + + if (!readToken(COMMA)) { + return parseError("Expecting ','"); + } + + Status valueRet = value("$id", subBuilder); + if (valueRet != Status::OK()) { + return valueRet; + } + + if (readToken(COMMA)) { + std::string db; + db.reserve(DB_RESERVE_SIZE); + Status dbRet = quotedString(&db); + if (dbRet != Status::OK()) { + return dbRet; + } + subBuilder.append("$db", db); + } + + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + + subBuilder.done(); + return Status::OK(); + } + + Status JParse::regex(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(FORWARDSLASH)) { + return parseError("Expecting '/'"); + } + std::string pat; + pat.reserve(PAT_RESERVE_SIZE); + Status patRet = regexPat(&pat); + if (patRet != Status::OK()) { + return patRet; + } + if (!readToken(FORWARDSLASH)) { + return parseError("Expecting '/'"); + } + std::string opt; + opt.reserve(OPT_RESERVE_SIZE); + Status optRet = regexOpt(&opt); + if (optRet != Status::OK()) { + return optRet; + } + Status optCheckRet = regexOptCheck(opt); + if (optCheckRet != Status::OK()) { + return optCheckRet; + } + builder.appendRegex(fieldName, pat, opt); + return Status::OK(); + } + + Status JParse::regexPat(std::string* result) { + MONGO_JSON_DEBUG(""); + return chars(result, "/"); + } + + Status JParse::regexOpt(std::string* result) { + MONGO_JSON_DEBUG(""); + return chars(result, "", JOPTIONS); + } + + Status JParse::regexOptCheck(StringData opt) { + MONGO_JSON_DEBUG("opt: " << opt); + std::size_t i; + for (i = 0; i < opt.size(); i++) { + if (!match(opt[i], JOPTIONS)) { + return parseError(string("Bad regex option: ") + opt[i]); + } + } + return Status::OK(); + } + + Status JParse::number(StringData fieldName, BSONObjBuilder& builder) { + char* endptrll; + char* endptrd; + long long retll; + double retd; + + // reset errno to make sure that we are getting it from strtod + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + retd = strtod(_input, &endptrd); + // if pointer does not move, we found no digits + if (_input == endptrd) { + return parseError("Bad characters in value"); + } + if (errno == ERANGE) { + return parseError("Value cannot fit in double"); + } + // reset errno to make sure that we are getting it from strtoll + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + retll = strtoll(_input, &endptrll, 10); + if (endptrll < endptrd || errno == ERANGE) { + // The number either had characters only meaningful for a double or + // could not fit in a 64 bit int + MONGO_JSON_DEBUG("Type: double"); + builder.append(fieldName, retd); + } + else if (retll == static_cast<int>(retll)) { + // The number can fit in a 32 bit int + MONGO_JSON_DEBUG("Type: 32 bit int"); + builder.append(fieldName, static_cast<int>(retll)); + } + else { + // The number can fit in a 64 bit int + MONGO_JSON_DEBUG("Type: 64 bit int"); + builder.append(fieldName, retll); + } + _input = endptrd; + if (_input >= _input_end) { + return parseError("Trailing number at end of input"); + } + return Status::OK(); + } + + Status JParse::field(std::string* result) { + MONGO_JSON_DEBUG(""); + if (peekToken(DOUBLEQUOTE) || peekToken(SINGLEQUOTE)) { + // Quoted key + // TODO: make sure quoted field names cannot contain null characters + return quotedString(result); + } + else { + // Unquoted key + // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended + // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes + // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080 + while (_input < _input_end && + isspace(*reinterpret_cast<const unsigned char*>(_input))) { + ++_input; + } + if (_input >= _input_end) { + return parseError("Field name expected"); + } + if (!match(*_input, ALPHA "_$")) { + return parseError("First character in field must be [A-Za-z$_]"); + } + return chars(result, "", ALPHA DIGIT "_$"); + } + } + + Status JParse::quotedString(std::string* result) { + MONGO_JSON_DEBUG(""); + if (readToken(DOUBLEQUOTE)) { + Status ret = chars(result, "\""); + if (ret != Status::OK()) { + return ret; + } + if (!readToken(DOUBLEQUOTE)) { + return parseError("Expecting '\"'"); + } + } + else if (readToken(SINGLEQUOTE)) { + Status ret = chars(result, "'"); + if (ret != Status::OK()) { + return ret; + } + if (!readToken(SINGLEQUOTE)) { + return parseError("Expecting '''"); + } + } + else { + return parseError("Expecting quoted string"); + } + return Status::OK(); + } + + /* + * terminalSet are characters that signal end of string (e.g.) [ :\0] + * allowedSet are the characters that are allowed, if this is set + */ + Status JParse::chars(std::string* result, const char* terminalSet, + const char* allowedSet) { + MONGO_JSON_DEBUG("terminalSet: " << terminalSet); + if (_input >= _input_end) { + return parseError("Unexpected end of input"); + } + const char* q = _input; + while (q < _input_end && !match(*q, terminalSet)) { + MONGO_JSON_DEBUG("q: " << q); + if (allowedSet != NULL) { + if (!match(*q, allowedSet)) { + _input = q; + return Status::OK(); + } + } + if (0x00 <= *q && *q <= 0x1F) { + return parseError("Invalid control character"); + } + if (*q == '\\' && q + 1 < _input_end) { + switch (*(++q)) { + // Escape characters allowed by the JSON spec + case '"': result->push_back('"'); break; + case '\'': result->push_back('\''); break; + case '\\': result->push_back('\\'); break; + case '/': result->push_back('/'); break; + case 'b': result->push_back('\b'); break; + case 'f': result->push_back('\f'); break; + case 'n': result->push_back('\n'); break; + case 'r': result->push_back('\r'); break; + case 't': result->push_back('\t'); break; + case 'u': { //expect 4 hexdigits + // TODO: handle UTF-16 surrogate characters + ++q; + if (q + 4 >= _input_end) { + return parseError("Expecting 4 hex digits"); + } + if (!isHexString(StringData(q, 4))) { + return parseError("Expecting 4 hex digits"); + } + unsigned char first = fromHex(q); + unsigned char second = fromHex(q += 2); + const std::string& utf8str = encodeUTF8(first, second); + for (unsigned int i = 0; i < utf8str.size(); i++) { + result->push_back(utf8str[i]); + } + ++q; + break; + } + // Vertical tab character. Not in JSON spec but allowed in + // our implementation according to test suite. + case 'v': result->push_back('\v'); break; + // Escape characters we explicity disallow + case 'x': return parseError("Hex escape not supported"); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': return parseError("Octal escape not supported"); + // By default pass on the unescaped character + default: result->push_back(*q); break; + // TODO: check for escaped control characters + } + ++q; + } + else { + result->push_back(*q++); + } + } + if (q < _input_end) { + _input = q; + return Status::OK(); + } + return parseError("Unexpected end of input"); + } + + std::string JParse::encodeUTF8(unsigned char first, unsigned char second) const { + std::ostringstream oss; + if (first == 0 && second < 0x80) { + oss << second; + } + else if (first < 0x08) { + oss << char( 0xc0 | (first << 2 | second >> 6) ); + oss << char( 0x80 | (~0xc0 & second) ); + } + else { + oss << char( 0xe0 | (first >> 4) ); + oss << char( 0x80 | (~0xc0 & (first << 2 | second >> 6) ) ); + oss << char( 0x80 | (~0xc0 & second) ); + } + return oss.str(); + } + + inline bool JParse::peekToken(const char* token) { + return readTokenImpl(token, false); + } + + inline bool JParse::readToken(const char* token) { + return readTokenImpl(token, true); + } + + bool JParse::readTokenImpl(const char* token, bool advance) { + MONGO_JSON_DEBUG("token: " << token); + const char* check = _input; + if (token == NULL) { + return false; + } + // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended + // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes + // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080 + while (check < _input_end && isspace(*reinterpret_cast<const unsigned char*>(check))) { + ++check; + } + while (*token != '\0') { + if (check >= _input_end) { + return false; + } + if (*token++ != *check++) { + return false; + } + } + if (advance) { _input = check; } + return true; + } + + bool JParse::readField(StringData expectedField) { + MONGO_JSON_DEBUG("expectedField: " << expectedField); + std::string nextField; + nextField.reserve(FIELD_RESERVE_SIZE); + Status ret = field(&nextField); + if (ret != Status::OK()) { + return false; + } + if (expectedField != nextField) { + return false; + } + return true; + } + + inline bool JParse::match(char matchChar, const char* matchSet) const { + if (matchSet == NULL) { + return true; + } + if (*matchSet == '\0') { + return false; + } + return (strchr(matchSet, matchChar) != NULL); + } + + bool JParse::isHexString(StringData str) const { + MONGO_JSON_DEBUG("str: " << str); + std::size_t i; + for (i = 0; i < str.size(); i++) { + if (!isxdigit(str[i])) { + return false; + } + } + return true; + } + + bool JParse::isBase64String(StringData str) const { + MONGO_JSON_DEBUG("str: " << str); + std::size_t i; + for (i = 0; i < str.size(); i++) { + if (!match(str[i], base64::chars)) { + return false; + } + } + return true; + } + + bool JParse::isArray() { + return peekToken(LBRACKET); + } + + BSONObj fromjson(const char* jsonString, int* len) { + MONGO_JSON_DEBUG("jsonString: " << jsonString); + if (jsonString[0] == '\0') { + if (len) *len = 0; + return BSONObj(); + } + JParse jparse(jsonString); + BSONObjBuilder builder; + Status ret = Status::OK(); + try { + ret = jparse.parse(builder); + } + catch(std::exception& e) { + std::ostringstream message; + message << "caught exception from within JSON parser: " << e.what(); + throw MsgAssertionException(17031, message.str()); + } + + if (ret != Status::OK()) { + ostringstream message; + message << "code " << ret.code() << ": " << ret.codeString() << ": " << ret.reason(); + throw MsgAssertionException(16619, message.str()); + } + if (len) *len = jparse.offset(); + return builder.obj(); + } + + BSONObj fromjson(const std::string& str) { + return fromjson( str.c_str() ); + } + + std::string tojson(const BSONObj& obj, JsonStringFormat format, bool pretty) { + return obj.jsonString(format, pretty); + } + + std::string tojson(const BSONArray& arr, JsonStringFormat format, bool pretty) { + return arr.jsonString(format, pretty, true); + } + + bool isArray(StringData str) { + JParse parser(str); + return parser.isArray(); + } + +} /* namespace mongo */ diff --git a/src/mongo/bson/json.h b/src/mongo/bson/json.h new file mode 100644 index 00000000000..34564765242 --- /dev/null +++ b/src/mongo/bson/json.h @@ -0,0 +1,488 @@ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#pragma once + +#include <string> + +#include "mongo/bson/bsonobj.h" +#include "mongo/base/status.h" + +namespace mongo { + + /** + * Create a BSONObj from a JSON <http://www.json.org>, + * <http://www.ietf.org/rfc/rfc4627.txt> string. In addition to the JSON + * extensions extensions described here + * <http://dochub.mongodb.org/core/mongodbextendedjson>, this function + * accepts unquoted field names and allows single quotes to optionally be + * used when specifying field names and std::string values instead of double + * quotes. JSON unicode escape sequences (of the form \uXXXX) are + * converted to utf8. + * + * @throws MsgAssertionException if parsing fails. The message included with + * this assertion includes the character offset where parsing failed. + */ + BSONObj fromjson(const std::string& str); + + /** @param len will be size of JSON object in text chars. */ + BSONObj fromjson(const char* str, int* len=NULL); + + /** + * Tests whether the JSON string is an Array. + * + * Useful for assigning the result of fromjson to the right object type. Either: + * BSONObj + * BSONArray + * + * @example Using the method to select the proper type. + * If this method returns true, the user could store the result of fromjson + * inside a BSONArray, rather than a BSONObj, in order to have it print as an + * array when passed to tojson. + * + * @param obj The JSON string to test. + */ + bool isArray(StringData str); + + /** + * Convert a BSONArray to a JSON string. + * + * @param arr The BSON Array. + * @param format The JSON format (JS, TenGen, Strict). + * @param pretty Enables pretty output. + */ + std::string tojson( + const BSONArray& arr, + JsonStringFormat format = Strict, + bool pretty = false + ); + + /** + * Convert a BSONObj to a JSON string. + * + * @param obj The BSON Object. + * @param format The JSON format (JS, TenGen, Strict). + * @param pretty Enables pretty output. + */ + std::string tojson( + const BSONObj& obj, + JsonStringFormat format = Strict, + bool pretty = false + ); + + /** + * Parser class. A BSONObj is constructed incrementally by passing a + * BSONObjBuilder to the recursive parsing methods. The grammar for the + * element parsed is described before each function. + */ + class JParse { + public: + explicit JParse(StringData str); + + /* + * Notation: All-uppercase symbols denote non-terminals; all other + * symbols are literals. + */ + + /* + * VALUE : + * STRING + * | NUMBER + * | NUMBERINT + * | NUMBERLONG + * | OBJECT + * | ARRAY + * + * | true + * | false + * | null + * | undefined + * + * | NaN + * | Infinity + * | -Infinity + * + * | DATE + * | TIMESTAMP + * | REGEX + * | OBJECTID + * | DBREF + * + * | new CONSTRUCTOR + */ + private: + Status value(StringData fieldName, BSONObjBuilder&); + + /* + * OBJECT : + * {} + * | { MEMBERS } + * | SPECIALOBJECT + * + * MEMBERS : + * PAIR + * | PAIR , MEMBERS + * + * PAIR : + * FIELD : VALUE + * + * SPECIALOBJECT : + * OIDOBJECT + * | BINARYOBJECT + * | DATEOBJECT + * | TIMESTAMPOBJECT + * | REGEXOBJECT + * | REFOBJECT + * | UNDEFINEDOBJECT + * | NUMBERLONGOBJECT + * | MINKEYOBJECT + * | MAXKEYOBJECT + * + */ + public: + Status object(StringData fieldName, BSONObjBuilder&, bool subObj=true); + Status parse(BSONObjBuilder& builder); + bool isArray(); + + private: + /* The following functions are called with the '{' and the first + * field already parsed since they are both implied given the + * context. */ + /* + * OIDOBJECT : + * { FIELD("$oid") : <24 character hex std::string> } + */ + Status objectIdObject(StringData fieldName, BSONObjBuilder&); + + /* + * BINARYOBJECT : + * { FIELD("$binary") : <base64 representation of a binary std::string>, + * FIELD("$type") : <hexadecimal representation of a single byte + * indicating the data type> } + */ + Status binaryObject(StringData fieldName, BSONObjBuilder&); + + /* + * DATEOBJECT : + * { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> } + */ + Status dateObject(StringData fieldName, BSONObjBuilder&); + + /* + * TIMESTAMPOBJECT : + * { FIELD("$timestamp") : { + * FIELD("t") : <32 bit unsigned integer for seconds since epoch>, + * FIELD("i") : <32 bit unsigned integer for the increment> } } + */ + Status timestampObject(StringData fieldName, BSONObjBuilder&); + + /* + * NOTE: the rules for the body of the regex are different here, + * since it is quoted instead of surrounded by slashes. + * REGEXOBJECT : + * { FIELD("$regex") : <string representing body of regex> } + * | { FIELD("$regex") : <string representing body of regex>, + * FIELD("$options") : <string representing regex options> } + */ + Status regexObject(StringData fieldName, BSONObjBuilder&); + + /* + * REFOBJECT : + * { FIELD("$ref") : <string representing collection name>, + * FIELD("$id") : <24 character hex std::string> } + * | { FIELD("$ref") : std::string , FIELD("$id") : OBJECTID } + * | { FIELD("$ref") : std::string , FIELD("$id") : OIDOBJECT } + */ + Status dbRefObject(StringData fieldName, BSONObjBuilder&); + + /* + * UNDEFINEDOBJECT : + * { FIELD("$undefined") : true } + */ + Status undefinedObject(StringData fieldName, BSONObjBuilder&); + + /* + * NUMBERLONGOBJECT : + * { FIELD("$numberLong") : "<number>" } + */ + Status numberLongObject(StringData fieldName, BSONObjBuilder&); + + /* + * MINKEYOBJECT : + * { FIELD("$minKey") : 1 } + */ + Status minKeyObject(StringData fieldName, BSONObjBuilder& builder); + + /* + * MAXKEYOBJECT : + * { FIELD("$maxKey") : 1 } + */ + Status maxKeyObject(StringData fieldName, BSONObjBuilder& builder); + + /* + * ARRAY : + * [] + * | [ ELEMENTS ] + * + * ELEMENTS : + * VALUE + * | VALUE , ELEMENTS + */ + Status array(StringData fieldName, BSONObjBuilder&, bool subObj=true); + + /* + * NOTE: Currently only Date can be preceded by the "new" keyword + * CONSTRUCTOR : + * DATE + */ + Status constructor(StringData fieldName, BSONObjBuilder&); + + /* The following functions only parse the body of the constructor + * between the parentheses, not including the constructor name */ + /* + * DATE : + * Date( <64 bit signed integer for milliseconds since epoch> ) + */ + Status date(StringData fieldName, BSONObjBuilder&); + + /* + * TIMESTAMP : + * Timestamp( <32 bit unsigned integer for seconds since epoch>, + * <32 bit unsigned integer for the increment> ) + */ + Status timestamp(StringData fieldName, BSONObjBuilder&); + + /* + * OBJECTID : + * ObjectId( <24 character hex std::string> ) + */ + Status objectId(StringData fieldName, BSONObjBuilder&); + + /* + * NUMBERLONG : + * NumberLong( <number> ) + */ + Status numberLong(StringData fieldName, BSONObjBuilder&); + + /* + * NUMBERINT : + * NumberInt( <number> ) + */ + Status numberInt(StringData fieldName, BSONObjBuilder&); + + /* + * DBREF : + * Dbref( <namespace std::string> , <24 character hex std::string> ) + */ + Status dbRef(StringData fieldName, BSONObjBuilder&); + + /* + * REGEX : + * / REGEXCHARS / REGEXOPTIONS + * + * REGEXCHARS : + * REGEXCHAR + * | REGEXCHAR REGEXCHARS + * + * REGEXCHAR : + * any-Unicode-character-except-/-or-\-or-CONTROLCHAR + * | \" + * | \' + * | \\ + * | \/ + * | \b + * | \f + * | \n + * | \r + * | \t + * | \v + * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT + * | \any-Unicode-character-except-x-or-[0-7] + * + * REGEXOPTIONS : + * REGEXOPTION + * | REGEXOPTION REGEXOPTIONS + * + * REGEXOPTION : + * g | i | m | s + */ + Status regex(StringData fieldName, BSONObjBuilder&); + Status regexPat(std::string* result); + Status regexOpt(std::string* result); + Status regexOptCheck(StringData opt); + + /* + * NUMBER : + * + * NOTE: Number parsing is based on standard library functions, not + * necessarily on the JSON numeric grammar. + * + * Number as value - strtoll and strtod + * Date - strtoll + * Timestamp - strtoul for both timestamp and increment and '-' + * before a number explicity disallowed + */ + Status number(StringData fieldName, BSONObjBuilder&); + + /* + * FIELD : + * STRING + * | [a-zA-Z$_] FIELDCHARS + * + * FIELDCHARS : + * [a-zA-Z0-9$_] + * | [a-zA-Z0-9$_] FIELDCHARS + */ + Status field(std::string* result); + + /* + * std::string : + * " " + * | ' ' + * | " CHARS " + * | ' CHARS ' + */ + Status quotedString(std::string* result); + + /* + * CHARS : + * CHAR + * | CHAR CHARS + * + * Note: " or ' may be allowed depending on whether the std::string is + * double or single quoted + * + * CHAR : + * any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR + * | \" + * | \' + * | \\ + * | \/ + * | \b + * | \f + * | \n + * | \r + * | \t + * | \v + * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT + * | \any-Unicode-character-except-x-or-[0-9] + * + * HEXDIGIT : [0..9a..fA..F] + * + * per http://www.ietf.org/rfc/rfc4627.txt, control characters are + * (U+0000 through U+001F). U+007F is not mentioned as a control + * character. + * CONTROLCHAR : [0x00..0x1F] + * + * If there is not an error, result will contain a null terminated + * string, but there is no guarantee that it will not contain other + * null characters. + */ + Status chars(std::string* result, const char* terminalSet, const char* allowedSet=NULL); + + /** + * Converts the two byte Unicode code point to its UTF8 character + * encoding representation. This function returns a std::string because + * UTF8 encodings for code points from 0x0000 to 0xFFFF can range + * from one to three characters. + */ + std::string encodeUTF8(unsigned char first, unsigned char second) const; + + /** + * @return true if the given token matches the next non whitespace + * sequence in our buffer, and false if the token doesn't match or + * we reach the end of our buffer. Do not update the pointer to our + * buffer (same as calling readTokenImpl with advance=false). + */ + inline bool peekToken(const char* token); + + /** + * @return true if the given token matches the next non whitespace + * sequence in our buffer, and false if the token doesn't match or + * we reach the end of our buffer. Updates the pointer to our + * buffer (same as calling readTokenImpl with advance=true). + */ + inline bool readToken(const char* token); + + /** + * @return true if the given token matches the next non whitespace + * sequence in our buffer, and false if the token doesn't match or + * we reach the end of our buffer. Do not update the pointer to our + * buffer if advance is false. + */ + bool readTokenImpl(const char* token, bool advance=true); + + /** + * @return true if the next field in our stream matches field. + * Handles single quoted, double quoted, and unquoted field names + */ + bool readField(StringData field); + + /** + * @return true if matchChar is in matchSet + * @return true if matchSet is NULL and false if it is an empty string + */ + bool match(char matchChar, const char* matchSet) const; + + /** + * @return true if every character in the std::string is a hex digit + */ + bool isHexString(StringData) const; + + /** + * @return true if every character in the std::string is a valid base64 + * character + */ + bool isBase64String(StringData) const; + + /** + * @return FailedToParse status with the given message and some + * additional context information + */ + Status parseError(StringData msg); + public: + inline int offset() { return (_input - _buf); } + + private: + /* + * _buf - start of our input buffer + * _input - cursor we advance in our input buffer + * _input_end - sentinel for the end of our input buffer + * + * _buf is the null terminated buffer containing the JSON std::string we + * are parsing. _input_end points to the null byte at the end of + * the buffer. strtoll, strtol, and strtod will access the null + * byte at the end of the buffer because they are assuming a c-style + * string. + */ + const char* const _buf; + const char* _input; + const char* const _input_end; + }; + +} // namespace mongo |