diff options
Diffstat (limited to 'src/mongo/bson/json.cpp')
-rw-r--r-- | src/mongo/bson/json.cpp | 2120 |
1 files changed, 1047 insertions, 1073 deletions
diff --git a/src/mongo/bson/json.cpp b/src/mongo/bson/json.cpp index 961033570da..5e74b9d9a16 100644 --- a/src/mongo/bson/json.cpp +++ b/src/mongo/bson/json.cpp @@ -42,13 +42,14 @@ namespace mongo { - using std::unique_ptr; - using std::ostringstream; - using std::string; +using std::unique_ptr; +using std::ostringstream; +using std::string; #if 0 -#define MONGO_JSON_DEBUG(message) log() << "JSON DEBUG @ " << __FILE__\ - << ":" << __LINE__ << " " << __FUNCTION__ << ": " << message << endl; +#define MONGO_JSON_DEBUG(message) \ + log() << "JSON DEBUG @ " << __FILE__ << ":" << __LINE__ << " " << __FUNCTION__ << ": " \ + << message << endl; #else #define MONGO_JSON_DEBUG(message) #endif @@ -58,613 +59,370 @@ namespace mongo { #define CONTROL "\a\b\f\n\r\t\v" #define JOPTIONS "gims" - // Size hints given to char vectors - enum { - ID_RESERVE_SIZE = 64, - PAT_RESERVE_SIZE = 4096, - OPT_RESERVE_SIZE = 64, - FIELD_RESERVE_SIZE = 4096, - STRINGVAL_RESERVE_SIZE = 4096, - BINDATA_RESERVE_SIZE = 4096, - BINDATATYPE_RESERVE_SIZE = 4096, - NS_RESERVE_SIZE = 64, - DB_RESERVE_SIZE = 64, - NUMBERLONG_RESERVE_SIZE = 64, - DATE_RESERVE_SIZE = 64 - }; - - static const char* LBRACE = "{", - *RBRACE = "}", - *LBRACKET = "[", - *RBRACKET = "]", - *LPAREN = "(", - *RPAREN = ")", - *COLON = ":", - *COMMA = ",", - *FORWARDSLASH = "/", - *SINGLEQUOTE = "'", - *DOUBLEQUOTE = "\""; - - JParse::JParse(StringData str) - : _buf(str.rawData()) - , _input(_buf) - , _input_end(_input + str.size()) - {} - - Status JParse::parseError(StringData msg) { - std::ostringstream ossmsg; - ossmsg << msg; - ossmsg << ": offset:"; - ossmsg << offset(); - ossmsg << " of:"; - ossmsg << _buf; - return Status(ErrorCodes::FailedToParse, ossmsg.str()); - } - - Status JParse::value(StringData fieldName, BSONObjBuilder& builder) { - MONGO_JSON_DEBUG("fieldName: " << fieldName); - if (peekToken(LBRACE)) { - Status ret = object(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } - } - else if (peekToken(LBRACKET)) { - Status ret = array(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } - } - else if (readToken("new")) { - Status ret = constructor(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } - } - else if (readToken("Date")) { - Status ret = date(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } - } - else if (readToken("Timestamp")) { - Status ret = timestamp(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } - } - else if (readToken("ObjectId")) { - Status ret = objectId(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } - } - else if (readToken("NumberLong")) { - Status ret = numberLong(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } - } - else if (readToken("NumberInt")) { - Status ret = numberInt(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } +// Size hints given to char vectors +enum { + ID_RESERVE_SIZE = 64, + PAT_RESERVE_SIZE = 4096, + OPT_RESERVE_SIZE = 64, + FIELD_RESERVE_SIZE = 4096, + STRINGVAL_RESERVE_SIZE = 4096, + BINDATA_RESERVE_SIZE = 4096, + BINDATATYPE_RESERVE_SIZE = 4096, + NS_RESERVE_SIZE = 64, + DB_RESERVE_SIZE = 64, + NUMBERLONG_RESERVE_SIZE = 64, + DATE_RESERVE_SIZE = 64 +}; + +static const char* LBRACE = "{", * RBRACE = "}", * LBRACKET = "[", * RBRACKET = "]", * LPAREN = "(", + * RPAREN = ")", * COLON = ":", * COMMA = ",", * FORWARDSLASH = "/", + * SINGLEQUOTE = "'", * DOUBLEQUOTE = "\""; + +JParse::JParse(StringData str) + : _buf(str.rawData()), _input(_buf), _input_end(_input + str.size()) {} + +Status JParse::parseError(StringData msg) { + std::ostringstream ossmsg; + ossmsg << msg; + ossmsg << ": offset:"; + ossmsg << offset(); + ossmsg << " of:"; + ossmsg << _buf; + return Status(ErrorCodes::FailedToParse, ossmsg.str()); +} + +Status JParse::value(StringData fieldName, BSONObjBuilder& builder) { + MONGO_JSON_DEBUG("fieldName: " << fieldName); + if (peekToken(LBRACE)) { + Status ret = object(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (readToken("Dbref") || readToken("DBRef")) { - Status ret = dbRef(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + } else if (peekToken(LBRACKET)) { + Status ret = array(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (peekToken(FORWARDSLASH)) { - Status ret = regex(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + } else if (readToken("new")) { + Status ret = constructor(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (peekToken(DOUBLEQUOTE) || peekToken(SINGLEQUOTE)) { - std::string valueString; - valueString.reserve(STRINGVAL_RESERVE_SIZE); - Status ret = quotedString(&valueString); - if (ret != Status::OK()) { - return ret; - } - builder.append(fieldName, valueString); + } else if (readToken("Date")) { + Status ret = date(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (readToken("true")) { - builder.append(fieldName, true); + } else if (readToken("Timestamp")) { + Status ret = timestamp(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (readToken("false")) { - builder.append(fieldName, false); + } else if (readToken("ObjectId")) { + Status ret = objectId(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (readToken("null")) { - builder.appendNull(fieldName); + } else if (readToken("NumberLong")) { + Status ret = numberLong(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (readToken("undefined")) { - builder.appendUndefined(fieldName); + } else if (readToken("NumberInt")) { + Status ret = numberInt(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (readToken("NaN")) { - builder.append(fieldName, std::numeric_limits<double>::quiet_NaN()); + } else if (readToken("Dbref") || readToken("DBRef")) { + Status ret = dbRef(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (readToken("Infinity")) { - builder.append(fieldName, std::numeric_limits<double>::infinity()); + } else if (peekToken(FORWARDSLASH)) { + Status ret = regex(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (readToken("-Infinity")) { - builder.append(fieldName, -std::numeric_limits<double>::infinity()); + } else if (peekToken(DOUBLEQUOTE) || peekToken(SINGLEQUOTE)) { + std::string valueString; + valueString.reserve(STRINGVAL_RESERVE_SIZE); + Status ret = quotedString(&valueString); + if (ret != Status::OK()) { + return ret; } - else { - Status ret = number(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + builder.append(fieldName, valueString); + } else if (readToken("true")) { + builder.append(fieldName, true); + } else if (readToken("false")) { + builder.append(fieldName, false); + } else if (readToken("null")) { + builder.appendNull(fieldName); + } else if (readToken("undefined")) { + builder.appendUndefined(fieldName); + } else if (readToken("NaN")) { + builder.append(fieldName, std::numeric_limits<double>::quiet_NaN()); + } else if (readToken("Infinity")) { + builder.append(fieldName, std::numeric_limits<double>::infinity()); + } else if (readToken("-Infinity")) { + builder.append(fieldName, -std::numeric_limits<double>::infinity()); + } else { + Status ret = number(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - return Status::OK(); } + return Status::OK(); +} - Status JParse::parse(BSONObjBuilder& builder) { - return isArray() ? array("UNUSED", builder, false) : object("UNUSED", builder, false); +Status JParse::parse(BSONObjBuilder& builder) { + return isArray() ? array("UNUSED", builder, false) : object("UNUSED", builder, false); +} + +Status JParse::object(StringData fieldName, BSONObjBuilder& builder, bool subObject) { + MONGO_JSON_DEBUG("fieldName: " << fieldName); + if (!readToken(LBRACE)) { + return parseError("Expecting '{'"); } - Status JParse::object(StringData fieldName, BSONObjBuilder& builder, bool subObject) { - MONGO_JSON_DEBUG("fieldName: " << fieldName); - if (!readToken(LBRACE)) { - return parseError("Expecting '{'"); + // Empty object + if (readToken(RBRACE)) { + if (subObject) { + BSONObjBuilder empty(builder.subobjStart(fieldName)); + empty.done(); } + return Status::OK(); + } - // Empty object - if (readToken(RBRACE)) { - if (subObject) { - BSONObjBuilder empty(builder.subobjStart(fieldName)); - empty.done(); - } - return Status::OK(); - } + // Special object + std::string firstField; + firstField.reserve(FIELD_RESERVE_SIZE); + Status ret = field(&firstField); + if (ret != Status::OK()) { + return ret; + } - // Special object - std::string firstField; - firstField.reserve(FIELD_RESERVE_SIZE); - Status ret = field(&firstField); + if (firstField == "$oid") { + if (!subObject) { + return parseError("Reserved field name in base object: $oid"); + } + Status ret = objectIdObject(fieldName, builder); if (ret != Status::OK()) { return ret; } - - if (firstField == "$oid") { - if (!subObject) { - return parseError("Reserved field name in base object: $oid"); - } - Status ret = objectIdObject(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } - } - else if (firstField == "$binary") { - if (!subObject) { - return parseError("Reserved field name in base object: $binary"); - } - Status ret = binaryObject(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + } else if (firstField == "$binary") { + if (!subObject) { + return parseError("Reserved field name in base object: $binary"); } - else if (firstField == "$date") { - if (!subObject) { - return parseError("Reserved field name in base object: $date"); - } - Status ret = dateObject(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + Status ret = binaryObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (firstField == "$timestamp") { - if (!subObject) { - return parseError("Reserved field name in base object: $timestamp"); - } - Status ret = timestampObject(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + } else if (firstField == "$date") { + if (!subObject) { + return parseError("Reserved field name in base object: $date"); } - else if (firstField == "$regex") { - if (!subObject) { - return parseError("Reserved field name in base object: $regex"); - } - Status ret = regexObject(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } - } - else if (firstField == "$ref") { - if (!subObject) { - return parseError("Reserved field name in base object: $ref"); - } - Status ret = dbRefObject(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + Status ret = dateObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (firstField == "$undefined") { - if (!subObject) { - return parseError("Reserved field name in base object: $undefined"); - } - Status ret = undefinedObject(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + } else if (firstField == "$timestamp") { + if (!subObject) { + return parseError("Reserved field name in base object: $timestamp"); } - else if (firstField == "$numberLong") { - if (!subObject) { - return parseError("Reserved field name in base object: $numberLong"); - } - Status ret = numberLongObject(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + Status ret = timestampObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else if (firstField == "$minKey") { - if (!subObject) { - return parseError("Reserved field name in base object: $minKey"); - } - Status ret = minKeyObject(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + } else if (firstField == "$regex") { + if (!subObject) { + return parseError("Reserved field name in base object: $regex"); } - else if (firstField == "$maxKey") { - if (!subObject) { - return parseError("Reserved field name in base object: $maxKey"); - } - Status ret = maxKeyObject(fieldName, builder); - if (ret != Status::OK()) { - return ret; - } + Status ret = regexObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - else { // firstField != <reserved field name> - // Normal object - - // Only create a sub builder if this is not the base object - BSONObjBuilder* objBuilder = &builder; - unique_ptr<BSONObjBuilder> subObjBuilder; - if (subObject) { - subObjBuilder.reset(new BSONObjBuilder(builder.subobjStart(fieldName))); - objBuilder = subObjBuilder.get(); - } - - if (!readToken(COLON)) { - return parseError("Expecting ':'"); - } - Status valueRet = value(firstField, *objBuilder); - if (valueRet != Status::OK()) { - return valueRet; - } - while (readToken(COMMA)) { - std::string fieldName; - fieldName.reserve(FIELD_RESERVE_SIZE); - Status fieldRet = field(&fieldName); - if (fieldRet != Status::OK()) { - return fieldRet; - } - if (!readToken(COLON)) { - return parseError("Expecting ':'"); - } - Status valueRet = value(fieldName, *objBuilder); - if (valueRet != Status::OK()) { - return valueRet; - } - } + } else if (firstField == "$ref") { + if (!subObject) { + return parseError("Reserved field name in base object: $ref"); } - if (!readToken(RBRACE)) { - return parseError("Expecting '}' or ','"); + Status ret = dbRefObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - return Status::OK(); - } - - Status JParse::objectIdObject(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(COLON)) { - return parseError("Expected ':'"); + } else if (firstField == "$undefined") { + if (!subObject) { + return parseError("Reserved field name in base object: $undefined"); } - std::string id; - id.reserve(ID_RESERVE_SIZE); - Status ret = quotedString(&id); + Status ret = undefinedObject(fieldName, builder); if (ret != Status::OK()) { return ret; } - if (id.size() != 24) { - return parseError("Expecting 24 hex digits: " + id); - } - if (!isHexString(id)) { - return parseError("Expecting hex digits: " + id); + } else if (firstField == "$numberLong") { + if (!subObject) { + return parseError("Reserved field name in base object: $numberLong"); } - builder.append(fieldName, OID(id)); - return Status::OK(); - } - - Status JParse::binaryObject(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(COLON)) { - return parseError("Expected ':'"); + Status ret = numberLongObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - std::string binDataString; - binDataString.reserve(BINDATA_RESERVE_SIZE); - Status dataRet = quotedString(&binDataString); - if (dataRet != Status::OK()) { - return dataRet; + } else if (firstField == "$minKey") { + if (!subObject) { + return parseError("Reserved field name in base object: $minKey"); } - if (binDataString.size() % 4 != 0) { - return parseError("Invalid length base64 encoded string"); + Status ret = minKeyObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; } - if (!isBase64String(binDataString)) { - return parseError("Invalid character in base64 encoded string"); + } else if (firstField == "$maxKey") { + if (!subObject) { + return parseError("Reserved field name in base object: $maxKey"); } - const std::string& binData = base64::decode(binDataString); - if (!readToken(COMMA)) { - return parseError("Expected ','"); + Status ret = maxKeyObject(fieldName, builder); + if (ret != Status::OK()) { + return ret; } + } else { // firstField != <reserved field name> + // Normal object - if (!readField("$type")) { - return parseError("Expected second field name: \"$type\", in \"$binary\" object"); - } - if (!readToken(COLON)) { - return parseError("Expected ':'"); - } - std::string binDataType; - binDataType.reserve(BINDATATYPE_RESERVE_SIZE); - Status typeRet = quotedString(&binDataType); - if (typeRet != Status::OK()) { - return typeRet; - } - if ((binDataType.size() != 2) || !isHexString(binDataType)) { - return parseError("Argument of $type in $bindata object must be a hex string representation of a single byte"); + // Only create a sub builder if this is not the base object + BSONObjBuilder* objBuilder = &builder; + unique_ptr<BSONObjBuilder> subObjBuilder; + if (subObject) { + subObjBuilder.reset(new BSONObjBuilder(builder.subobjStart(fieldName))); + objBuilder = subObjBuilder.get(); } - builder.appendBinData( fieldName, binData.length(), - BinDataType(fromHex(binDataType)), - binData.data()); - return Status::OK(); - } - Status JParse::dateObject(StringData fieldName, BSONObjBuilder& builder) { if (!readToken(COLON)) { - return parseError("Expected ':'"); + return parseError("Expecting ':'"); } - errno = 0; - char* endptr; - Date_t date; - - if (peekToken(DOUBLEQUOTE)) { - std::string dateString; - dateString.reserve(DATE_RESERVE_SIZE); - Status ret = quotedString(&dateString); - if (!ret.isOK()) { - return ret; - } - StatusWith<Date_t> dateRet = dateFromISOString(dateString); - if (!dateRet.isOK()) { - return dateRet.getStatus(); - } - date = dateRet.getValue(); + Status valueRet = value(firstField, *objBuilder); + if (valueRet != Status::OK()) { + return valueRet; } - else if (readToken(LBRACE)) { + while (readToken(COMMA)) { std::string fieldName; fieldName.reserve(FIELD_RESERVE_SIZE); - Status ret = field(&fieldName); - if (ret != Status::OK()) { - return ret; - } - if (fieldName != "$numberLong") { - return parseError("Expected field name: $numberLong for $date value object"); + Status fieldRet = field(&fieldName); + if (fieldRet != Status::OK()) { + return fieldRet; } if (!readToken(COLON)) { return parseError("Expecting ':'"); } - - // The number must be a quoted string, since large long numbers could overflow a double - // and thus may not be valid JSON - std::string numberLongString; - numberLongString.reserve(NUMBERLONG_RESERVE_SIZE); - ret = quotedString(&numberLongString); - if (!ret.isOK()) { - return ret; - } - - long long numberLong; - ret = parseNumberFromString(numberLongString, &numberLong); - if (!ret.isOK()) { - return ret; - } - date = Date_t::fromMillisSinceEpoch(numberLong); - } - else { - // SERVER-11920: We should use parseNumberFromString here, but that function requires - // that we know ahead of time where the number ends, which is not currently the case. - date = Date_t::fromMillisSinceEpoch(strtoll(_input, &endptr, 10)); - if (_input == endptr) { - return parseError("Date expecting integer milliseconds"); - } - if (errno == ERANGE) { - /* Need to handle this because jsonString outputs the value of Date_t as unsigned. - * See SERVER-8330 and SERVER-8573 */ - errno = 0; - // SERVER-11920: We should use parseNumberFromString here, but that function - // requires that we know ahead of time where the number ends, which is not currently - // the case. - date = Date_t::fromMillisSinceEpoch( - static_cast<long long>(strtoull(_input, &endptr, 10))); - if (errno == ERANGE) { - return parseError("Date milliseconds overflow"); - } + Status valueRet = value(fieldName, *objBuilder); + if (valueRet != Status::OK()) { + return valueRet; } - _input = endptr; } - builder.appendDate(fieldName, date); - return Status::OK(); } - - Status JParse::timestampObject(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(COLON)) { - return parseError("Expecting ':'"); - } - if (!readToken(LBRACE)) { - return parseError("Expecting '{' to start \"$timestamp\" object"); - } - - if (!readField("t")) { - return parseError("Expected field name \"t\" in \"$timestamp\" sub object"); - } - if (!readToken(COLON)) { - return parseError("Expecting ':'"); - } - if (readToken("-")) { - return parseError("Negative seconds in \"$timestamp\""); - } - errno = 0; - char* endptr; - // SERVER-11920: We should use parseNumberFromString here, but that function requires that - // we know ahead of time where the number ends, which is not currently the case. - uint32_t seconds = strtoul(_input, &endptr, 10); - if (errno == ERANGE) { - return parseError("Timestamp seconds overflow"); - } - if (_input == endptr) { - return parseError("Expecting unsigned integer seconds in \"$timestamp\""); - } - _input = endptr; - if (!readToken(COMMA)) { - return parseError("Expecting ','"); - } - - if (!readField("i")) { - return parseError("Expected field name \"i\" in \"$timestamp\" sub object"); - } - if (!readToken(COLON)) { - return parseError("Expecting ':'"); - } - if (readToken("-")) { - return parseError("Negative increment in \"$timestamp\""); - } - errno = 0; - // SERVER-11920: We should use parseNumberFromString here, but that function requires that - // we know ahead of time where the number ends, which is not currently the case. - uint32_t count = strtoul(_input, &endptr, 10); - if (errno == ERANGE) { - return parseError("Timestamp increment overflow"); - } - if (_input == endptr) { - return parseError("Expecting unsigned integer increment in \"$timestamp\""); - } - _input = endptr; - - if (!readToken(RBRACE)) { - return parseError("Expecting '}'"); - } - builder.append(fieldName, Timestamp(seconds, count)); - return Status::OK(); + if (!readToken(RBRACE)) { + return parseError("Expecting '}' or ','"); } + return Status::OK(); +} - Status JParse::regexObject(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(COLON)) { - return parseError("Expecting ':'"); - } - std::string pat; - pat.reserve(PAT_RESERVE_SIZE); - Status patRet = quotedString(&pat); - if (patRet != Status::OK()) { - return patRet; - } - if (readToken(COMMA)) { - if (!readField("$options")) { - return parseError("Expected field name: \"$options\" in \"$regex\" object"); - } - if (!readToken(COLON)) { - return parseError("Expecting ':'"); - } - std::string opt; - opt.reserve(OPT_RESERVE_SIZE); - Status optRet = quotedString(&opt); - if (optRet != Status::OK()) { - return optRet; - } - Status optCheckRet = regexOptCheck(opt); - if (optCheckRet != Status::OK()) { - return optCheckRet; - } - builder.appendRegex(fieldName, pat, opt); - } - else { - builder.appendRegex(fieldName, pat, ""); - } - return Status::OK(); +Status JParse::objectIdObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expected ':'"); } + std::string id; + id.reserve(ID_RESERVE_SIZE); + Status ret = quotedString(&id); + if (ret != Status::OK()) { + return ret; + } + if (id.size() != 24) { + return parseError("Expecting 24 hex digits: " + id); + } + if (!isHexString(id)) { + return parseError("Expecting hex digits: " + id); + } + builder.append(fieldName, OID(id)); + return Status::OK(); +} - Status JParse::dbRefObject(StringData fieldName, BSONObjBuilder& builder) { - - BSONObjBuilder subBuilder(builder.subobjStart(fieldName)); +Status JParse::binaryObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expected ':'"); + } + std::string binDataString; + binDataString.reserve(BINDATA_RESERVE_SIZE); + Status dataRet = quotedString(&binDataString); + if (dataRet != Status::OK()) { + return dataRet; + } + if (binDataString.size() % 4 != 0) { + return parseError("Invalid length base64 encoded string"); + } + if (!isBase64String(binDataString)) { + return parseError("Invalid character in base64 encoded string"); + } + const std::string& binData = base64::decode(binDataString); + if (!readToken(COMMA)) { + return parseError("Expected ','"); + } - if (!readToken(COLON)) { - return parseError("DBRef: Expecting ':'"); - } - std::string ns; - ns.reserve(NS_RESERVE_SIZE); - Status ret = quotedString(&ns); - if (ret != Status::OK()) { + if (!readField("$type")) { + return parseError("Expected second field name: \"$type\", in \"$binary\" object"); + } + if (!readToken(COLON)) { + return parseError("Expected ':'"); + } + std::string binDataType; + binDataType.reserve(BINDATATYPE_RESERVE_SIZE); + Status typeRet = quotedString(&binDataType); + if (typeRet != Status::OK()) { + return typeRet; + } + if ((binDataType.size() != 2) || !isHexString(binDataType)) { + return parseError( + "Argument of $type in $bindata object must be a hex string representation of a single " + "byte"); + } + builder.appendBinData( + fieldName, binData.length(), BinDataType(fromHex(binDataType)), binData.data()); + return Status::OK(); +} + +Status JParse::dateObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expected ':'"); + } + errno = 0; + char* endptr; + Date_t date; + + if (peekToken(DOUBLEQUOTE)) { + std::string dateString; + dateString.reserve(DATE_RESERVE_SIZE); + Status ret = quotedString(&dateString); + if (!ret.isOK()) { return ret; } - subBuilder.append("$ref", ns); - - if (!readToken(COMMA)) { - return parseError("DBRef: Expecting ','"); - } - - if (!readField("$id")) { - return parseError("DBRef: Expected field name: \"$id\" in \"$ref\" object"); + StatusWith<Date_t> dateRet = dateFromISOString(dateString); + if (!dateRet.isOK()) { + return dateRet.getStatus(); } - if (!readToken(COLON)) { - return parseError("DBRef: Expecting ':'"); - } - Status valueRet = value("$id", subBuilder); - if (valueRet != Status::OK()) { - return valueRet; - } - - if (readToken(COMMA)) { - if (!readField("$db")) { - return parseError("DBRef: Expected field name: \"$db\" in \"$ref\" object"); - } - if (!readToken(COLON)) { - return parseError("DBRef: Expecting ':'"); - } - std::string db; - db.reserve(DB_RESERVE_SIZE); - ret = quotedString(&db); - if (ret != Status::OK()) { - return ret; - } - subBuilder.append("$db", db); - } - - subBuilder.done(); - return Status::OK(); - } - - Status JParse::undefinedObject(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(COLON)) { - return parseError("Expecting ':'"); + date = dateRet.getValue(); + } else if (readToken(LBRACE)) { + std::string fieldName; + fieldName.reserve(FIELD_RESERVE_SIZE); + Status ret = field(&fieldName); + if (ret != Status::OK()) { + return ret; } - if (!readToken("true")) { - return parseError("Reserved field \"$undefined\" requires value of true"); + if (fieldName != "$numberLong") { + return parseError("Expected field name: $numberLong for $date value object"); } - builder.appendUndefined(fieldName); - return Status::OK(); - } - - Status JParse::numberLongObject(StringData fieldName, BSONObjBuilder& builder) { if (!readToken(COLON)) { return parseError("Expecting ':'"); } - // The number must be a quoted string, since large long numbers could overflow a double and - // thus may not be valid JSON + // The number must be a quoted string, since large long numbers could overflow a double + // and thus may not be valid JSON std::string numberLongString; numberLongString.reserve(NUMBERLONG_RESERVE_SIZE); - Status ret = quotedString(&numberLongString); + ret = quotedString(&numberLongString); if (!ret.isOK()) { return ret; } @@ -674,86 +432,11 @@ namespace mongo { if (!ret.isOK()) { return ret; } - - builder.appendNumber(fieldName, numberLong); - return Status::OK(); - } - - Status JParse::minKeyObject(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(COLON)) { - return parseError("Expecting ':'"); - } - if (!readToken("1")) { - return parseError("Reserved field \"$minKey\" requires value of 1"); - } - builder.appendMinKey(fieldName); - return Status::OK(); - } - - Status JParse::maxKeyObject(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(COLON)) { - return parseError("Expecting ':'"); - } - if (!readToken("1")) { - return parseError("Reserved field \"$maxKey\" requires value of 1"); - } - builder.appendMaxKey(fieldName); - return Status::OK(); - } - - Status JParse::array(StringData fieldName, BSONObjBuilder& builder, bool subObject) { - MONGO_JSON_DEBUG("fieldName: " << fieldName); - uint32_t index(0); - if (!readToken(LBRACKET)) { - return parseError("Expecting '['"); - } - - BSONObjBuilder* arrayBuilder = &builder; - unique_ptr<BSONObjBuilder> subObjBuilder; - if (subObject) { - subObjBuilder.reset(new BSONObjBuilder(builder.subarrayStart(fieldName))); - arrayBuilder = subObjBuilder.get(); - } - - if (!peekToken(RBRACKET)) { - do { - Status ret = value(builder.numStr(index), *arrayBuilder); - if (ret != Status::OK()) { - return ret; - } - index++; - } while (readToken(COMMA)); - } - arrayBuilder->done(); - if (!readToken(RBRACKET)) { - return parseError("Expecting ']' or ','"); - } - return Status::OK(); - } - - /* NOTE: this could be easily modified to allow "new" before other - * constructors, but for now it only allows "new" before Date(). - * Also note that unlike the interactive shell "Date(x)" and "new Date(x)" - * have the same behavior. XXX: this may not be desired. */ - Status JParse::constructor(StringData fieldName, BSONObjBuilder& builder) { - if (readToken("Date")) { - date(fieldName, builder); - } - else { - return parseError("\"new\" keyword not followed by Date constructor"); - } - return Status::OK(); - } - - Status JParse::date(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(LPAREN)) { - return parseError("Expecting '('"); - } - errno = 0; - char* endptr; - // SERVER-11920: We should use parseNumberFromString here, but that function requires that - // we know ahead of time where the number ends, which is not currently the case. - Date_t date = Date_t::fromMillisSinceEpoch(strtoll(_input, &endptr, 10)); + date = Date_t::fromMillisSinceEpoch(numberLong); + } else { + // SERVER-11920: We should use parseNumberFromString here, but that function requires + // that we know ahead of time where the number ends, which is not currently the case. + date = Date_t::fromMillisSinceEpoch(strtoll(_input, &endptr, 10)); if (_input == endptr) { return parseError("Date expecting integer milliseconds"); } @@ -761,192 +444,102 @@ namespace mongo { /* Need to handle this because jsonString outputs the value of Date_t as unsigned. * See SERVER-8330 and SERVER-8573 */ errno = 0; - // SERVER-11920: We should use parseNumberFromString here, but that function requires - // that we know ahead of time where the number ends, which is not currently the case. - date = Date_t::fromMillisSinceEpoch( - static_cast<long long>(strtoull(_input, &endptr, 10))); + // SERVER-11920: We should use parseNumberFromString here, but that function + // requires that we know ahead of time where the number ends, which is not currently + // the case. + date = + Date_t::fromMillisSinceEpoch(static_cast<long long>(strtoull(_input, &endptr, 10))); if (errno == ERANGE) { return parseError("Date milliseconds overflow"); } } _input = endptr; - if (!readToken(RPAREN)) { - return parseError("Expecting ')'"); - } - builder.appendDate(fieldName, date); - return Status::OK(); } + builder.appendDate(fieldName, date); + return Status::OK(); +} - Status JParse::timestamp(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(LPAREN)) { - return parseError("Expecting '('"); - } - if (readToken("-")) { - return parseError("Negative seconds in \"$timestamp\""); - } - errno = 0; - char* endptr; - // SERVER-11920: We should use parseNumberFromString here, but that function requires that - // we know ahead of time where the number ends, which is not currently the case. - uint32_t seconds = strtoul(_input, &endptr, 10); - if (errno == ERANGE) { - return parseError("Timestamp seconds overflow"); - } - if (_input == endptr) { - return parseError("Expecting unsigned integer seconds in \"$timestamp\""); - } - _input = endptr; - if (!readToken(COMMA)) { - return parseError("Expecting ','"); - } - if (readToken("-")) { - return parseError("Negative seconds in \"$timestamp\""); - } - errno = 0; - // SERVER-11920: We should use parseNumberFromString here, but that function requires that - // we know ahead of time where the number ends, which is not currently the case. - uint32_t count = strtoul(_input, &endptr, 10); - if (errno == ERANGE) { - return parseError("Timestamp increment overflow"); - } - if (_input == endptr) { - return parseError("Expecting unsigned integer increment in \"$timestamp\""); - } - _input = endptr; - if (!readToken(RPAREN)) { - return parseError("Expecting ')'"); - } - builder.append(fieldName, Timestamp(seconds, count)); - return Status::OK(); +Status JParse::timestampObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); } - - Status JParse::objectId(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(LPAREN)) { - return parseError("Expecting '('"); - } - std::string id; - id.reserve(ID_RESERVE_SIZE); - Status ret = quotedString(&id); - if (ret != Status::OK()) { - return ret; - } - if (!readToken(RPAREN)) { - return parseError("Expecting ')'"); - } - if (id.size() != 24) { - return parseError("Expecting 24 hex digits: " + id); - } - if (!isHexString(id)) { - return parseError("Expecting hex digits: " + id); - } - builder.append(fieldName, OID(id)); - return Status::OK(); + if (!readToken(LBRACE)) { + return parseError("Expecting '{' to start \"$timestamp\" object"); } - Status JParse::numberLong(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(LPAREN)) { - return parseError("Expecting '('"); - } - errno = 0; - char* endptr; - // SERVER-11920: We should use parseNumberFromString here, but that function requires that - // we know ahead of time where the number ends, which is not currently the case. - int64_t val = strtoll(_input, &endptr, 10); - if (errno == ERANGE) { - return parseError("NumberLong out of range"); - } - if (_input == endptr) { - return parseError("Expecting number in NumberLong"); - } - _input = endptr; - if (!readToken(RPAREN)) { - return parseError("Expecting ')'"); - } - builder.appendNumber(fieldName, static_cast<long long int>(val)); - return Status::OK(); + if (!readField("t")) { + return parseError("Expected field name \"t\" in \"$timestamp\" sub object"); } - - Status JParse::numberInt(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(LPAREN)) { - return parseError("Expecting '('"); - } - errno = 0; - char* endptr; - // SERVER-11920: We should use parseNumberFromString here, but that function requires that - // we know ahead of time where the number ends, which is not currently the case. - int32_t val = strtol(_input, &endptr, 10); - if (errno == ERANGE) { - return parseError("NumberInt out of range"); - } - if (_input == endptr) { - return parseError("Expecting unsigned number in NumberInt"); - } - _input = endptr; - if (!readToken(RPAREN)) { - return parseError("Expecting ')'"); - } - builder.appendNumber(fieldName, static_cast<int>(val)); - return Status::OK(); + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + if (readToken("-")) { + return parseError("Negative seconds in \"$timestamp\""); + } + errno = 0; + char* endptr; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + uint32_t seconds = strtoul(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("Timestamp seconds overflow"); + } + if (_input == endptr) { + return parseError("Expecting unsigned integer seconds in \"$timestamp\""); + } + _input = endptr; + if (!readToken(COMMA)) { + return parseError("Expecting ','"); } + if (!readField("i")) { + return parseError("Expected field name \"i\" in \"$timestamp\" sub object"); + } + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + if (readToken("-")) { + return parseError("Negative increment in \"$timestamp\""); + } + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + uint32_t count = strtoul(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("Timestamp increment overflow"); + } + if (_input == endptr) { + return parseError("Expecting unsigned integer increment in \"$timestamp\""); + } + _input = endptr; - Status JParse::dbRef(StringData fieldName, BSONObjBuilder& builder) { - BSONObjBuilder subBuilder(builder.subobjStart(fieldName)); - - if (!readToken(LPAREN)) { - return parseError("Expecting '('"); - } - std::string ns; - ns.reserve(NS_RESERVE_SIZE); - Status refRet = quotedString(&ns); - if (refRet != Status::OK()) { - return refRet; - } - subBuilder.append("$ref", ns); - - if (!readToken(COMMA)) { - return parseError("Expecting ','"); - } - - Status valueRet = value("$id", subBuilder); - if (valueRet != Status::OK()) { - return valueRet; - } - - if (readToken(COMMA)) { - std::string db; - db.reserve(DB_RESERVE_SIZE); - Status dbRet = quotedString(&db); - if (dbRet != Status::OK()) { - return dbRet; - } - subBuilder.append("$db", db); - } - - if (!readToken(RPAREN)) { - return parseError("Expecting ')'"); - } - - subBuilder.done(); - return Status::OK(); + if (!readToken(RBRACE)) { + return parseError("Expecting '}'"); } + builder.append(fieldName, Timestamp(seconds, count)); + return Status::OK(); +} - Status JParse::regex(StringData fieldName, BSONObjBuilder& builder) { - if (!readToken(FORWARDSLASH)) { - return parseError("Expecting '/'"); - } - std::string pat; - pat.reserve(PAT_RESERVE_SIZE); - Status patRet = regexPat(&pat); - if (patRet != Status::OK()) { - return patRet; +Status JParse::regexObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + std::string pat; + pat.reserve(PAT_RESERVE_SIZE); + Status patRet = quotedString(&pat); + if (patRet != Status::OK()) { + return patRet; + } + if (readToken(COMMA)) { + if (!readField("$options")) { + return parseError("Expected field name: \"$options\" in \"$regex\" object"); } - if (!readToken(FORWARDSLASH)) { - return parseError("Expecting '/'"); + if (!readToken(COLON)) { + return parseError("Expecting ':'"); } std::string opt; opt.reserve(OPT_RESERVE_SIZE); - Status optRet = regexOpt(&opt); + Status optRet = quotedString(&opt); if (optRet != Status::OK()) { return optRet; } @@ -955,351 +548,732 @@ namespace mongo { return optCheckRet; } builder.appendRegex(fieldName, pat, opt); - return Status::OK(); + } else { + builder.appendRegex(fieldName, pat, ""); } + return Status::OK(); +} - Status JParse::regexPat(std::string* result) { - MONGO_JSON_DEBUG(""); - return chars(result, "/"); - } +Status JParse::dbRefObject(StringData fieldName, BSONObjBuilder& builder) { + BSONObjBuilder subBuilder(builder.subobjStart(fieldName)); - Status JParse::regexOpt(std::string* result) { - MONGO_JSON_DEBUG(""); - return chars(result, "", JOPTIONS); + if (!readToken(COLON)) { + return parseError("DBRef: Expecting ':'"); } + std::string ns; + ns.reserve(NS_RESERVE_SIZE); + Status ret = quotedString(&ns); + if (ret != Status::OK()) { + return ret; + } + subBuilder.append("$ref", ns); - Status JParse::regexOptCheck(StringData opt) { - MONGO_JSON_DEBUG("opt: " << opt); - std::size_t i; - for (i = 0; i < opt.size(); i++) { - if (!match(opt[i], JOPTIONS)) { - return parseError(string("Bad regex option: ") + opt[i]); - } - } - return Status::OK(); + if (!readToken(COMMA)) { + return parseError("DBRef: Expecting ','"); } - Status JParse::number(StringData fieldName, BSONObjBuilder& builder) { - char* endptrll; - char* endptrd; - long long retll; - double retd; + if (!readField("$id")) { + return parseError("DBRef: Expected field name: \"$id\" in \"$ref\" object"); + } + if (!readToken(COLON)) { + return parseError("DBRef: Expecting ':'"); + } + Status valueRet = value("$id", subBuilder); + if (valueRet != Status::OK()) { + return valueRet; + } - // reset errno to make sure that we are getting it from strtod - errno = 0; - // SERVER-11920: We should use parseNumberFromString here, but that function requires that - // we know ahead of time where the number ends, which is not currently the case. - retd = strtod(_input, &endptrd); - // if pointer does not move, we found no digits - if (_input == endptrd) { - return parseError("Bad characters in value"); + if (readToken(COMMA)) { + if (!readField("$db")) { + return parseError("DBRef: Expected field name: \"$db\" in \"$ref\" object"); } - if (errno == ERANGE) { - return parseError("Value cannot fit in double"); + if (!readToken(COLON)) { + return parseError("DBRef: Expecting ':'"); } - // reset errno to make sure that we are getting it from strtoll - errno = 0; - // SERVER-11920: We should use parseNumberFromString here, but that function requires that - // we know ahead of time where the number ends, which is not currently the case. - retll = strtoll(_input, &endptrll, 10); - if (endptrll < endptrd || errno == ERANGE) { - // The number either had characters only meaningful for a double or - // could not fit in a 64 bit int - MONGO_JSON_DEBUG("Type: double"); - builder.append(fieldName, retd); - } - else if (retll == static_cast<int>(retll)) { - // The number can fit in a 32 bit int - MONGO_JSON_DEBUG("Type: 32 bit int"); - builder.append(fieldName, static_cast<int>(retll)); - } - else { - // The number can fit in a 64 bit int - MONGO_JSON_DEBUG("Type: 64 bit int"); - builder.append(fieldName, retll); - } - _input = endptrd; - if (_input >= _input_end) { - return parseError("Trailing number at end of input"); + std::string db; + db.reserve(DB_RESERVE_SIZE); + ret = quotedString(&db); + if (ret != Status::OK()) { + return ret; } - return Status::OK(); + subBuilder.append("$db", db); } - Status JParse::field(std::string* result) { - MONGO_JSON_DEBUG(""); - if (peekToken(DOUBLEQUOTE) || peekToken(SINGLEQUOTE)) { - // Quoted key - // TODO: make sure quoted field names cannot contain null characters - return quotedString(result); - } - else { - // Unquoted key - // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended - // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes - // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080 - while (_input < _input_end && - isspace(*reinterpret_cast<const unsigned char*>(_input))) { - ++_input; - } - if (_input >= _input_end) { - return parseError("Field name expected"); - } - if (!match(*_input, ALPHA "_$")) { - return parseError("First character in field must be [A-Za-z$_]"); - } - return chars(result, "", ALPHA DIGIT "_$"); - } + subBuilder.done(); + return Status::OK(); +} + +Status JParse::undefinedObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); } + if (!readToken("true")) { + return parseError("Reserved field \"$undefined\" requires value of true"); + } + builder.appendUndefined(fieldName); + return Status::OK(); +} - Status JParse::quotedString(std::string* result) { - MONGO_JSON_DEBUG(""); - if (readToken(DOUBLEQUOTE)) { - Status ret = chars(result, "\""); - if (ret != Status::OK()) { - return ret; - } - if (!readToken(DOUBLEQUOTE)) { - return parseError("Expecting '\"'"); - } - } - else if (readToken(SINGLEQUOTE)) { - Status ret = chars(result, "'"); +Status JParse::numberLongObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + + // The number must be a quoted string, since large long numbers could overflow a double and + // thus may not be valid JSON + std::string numberLongString; + numberLongString.reserve(NUMBERLONG_RESERVE_SIZE); + Status ret = quotedString(&numberLongString); + if (!ret.isOK()) { + return ret; + } + + long long numberLong; + ret = parseNumberFromString(numberLongString, &numberLong); + if (!ret.isOK()) { + return ret; + } + + builder.appendNumber(fieldName, numberLong); + return Status::OK(); +} + +Status JParse::minKeyObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + if (!readToken("1")) { + return parseError("Reserved field \"$minKey\" requires value of 1"); + } + builder.appendMinKey(fieldName); + return Status::OK(); +} + +Status JParse::maxKeyObject(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(COLON)) { + return parseError("Expecting ':'"); + } + if (!readToken("1")) { + return parseError("Reserved field \"$maxKey\" requires value of 1"); + } + builder.appendMaxKey(fieldName); + return Status::OK(); +} + +Status JParse::array(StringData fieldName, BSONObjBuilder& builder, bool subObject) { + MONGO_JSON_DEBUG("fieldName: " << fieldName); + uint32_t index(0); + if (!readToken(LBRACKET)) { + return parseError("Expecting '['"); + } + + BSONObjBuilder* arrayBuilder = &builder; + unique_ptr<BSONObjBuilder> subObjBuilder; + if (subObject) { + subObjBuilder.reset(new BSONObjBuilder(builder.subarrayStart(fieldName))); + arrayBuilder = subObjBuilder.get(); + } + + if (!peekToken(RBRACKET)) { + do { + Status ret = value(builder.numStr(index), *arrayBuilder); if (ret != Status::OK()) { return ret; } - if (!readToken(SINGLEQUOTE)) { - return parseError("Expecting '''"); - } - } - else { - return parseError("Expecting quoted string"); - } - return Status::OK(); + index++; + } while (readToken(COMMA)); + } + arrayBuilder->done(); + if (!readToken(RBRACKET)) { + return parseError("Expecting ']' or ','"); } + return Status::OK(); +} + +/* NOTE: this could be easily modified to allow "new" before other + * constructors, but for now it only allows "new" before Date(). + * Also note that unlike the interactive shell "Date(x)" and "new Date(x)" + * have the same behavior. XXX: this may not be desired. */ +Status JParse::constructor(StringData fieldName, BSONObjBuilder& builder) { + if (readToken("Date")) { + date(fieldName, builder); + } else { + return parseError("\"new\" keyword not followed by Date constructor"); + } + return Status::OK(); +} - /* - * terminalSet are characters that signal end of string (e.g.) [ :\0] - * allowedSet are the characters that are allowed, if this is set - */ - Status JParse::chars(std::string* result, const char* terminalSet, - const char* allowedSet) { - MONGO_JSON_DEBUG("terminalSet: " << terminalSet); - if (_input >= _input_end) { - return parseError("Unexpected end of input"); - } - const char* q = _input; - while (q < _input_end && !match(*q, terminalSet)) { - MONGO_JSON_DEBUG("q: " << q); - if (allowedSet != NULL) { - if (!match(*q, allowedSet)) { - _input = q; - return Status::OK(); - } - } - if (0x00 <= *q && *q <= 0x1F) { - return parseError("Invalid control character"); - } - if (*q == '\\' && q + 1 < _input_end) { - switch (*(++q)) { - // Escape characters allowed by the JSON spec - case '"': result->push_back('"'); break; - case '\'': result->push_back('\''); break; - case '\\': result->push_back('\\'); break; - case '/': result->push_back('/'); break; - case 'b': result->push_back('\b'); break; - case 'f': result->push_back('\f'); break; - case 'n': result->push_back('\n'); break; - case 'r': result->push_back('\r'); break; - case 't': result->push_back('\t'); break; - case 'u': { //expect 4 hexdigits - // TODO: handle UTF-16 surrogate characters - ++q; - if (q + 4 >= _input_end) { - return parseError("Expecting 4 hex digits"); - } - if (!isHexString(StringData(q, 4))) { - return parseError("Expecting 4 hex digits"); - } - unsigned char first = fromHex(q); - unsigned char second = fromHex(q += 2); - const std::string& utf8str = encodeUTF8(first, second); - for (unsigned int i = 0; i < utf8str.size(); i++) { - result->push_back(utf8str[i]); - } - ++q; - break; - } - // Vertical tab character. Not in JSON spec but allowed in - // our implementation according to test suite. - case 'v': result->push_back('\v'); break; - // Escape characters we explicity disallow - case 'x': return parseError("Hex escape not supported"); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': return parseError("Octal escape not supported"); - // By default pass on the unescaped character - default: result->push_back(*q); break; - // TODO: check for escaped control characters - } - ++q; - } - else { - result->push_back(*q++); - } - } - if (q < _input_end) { - _input = q; - return Status::OK(); +Status JParse::date(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + errno = 0; + char* endptr; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + Date_t date = Date_t::fromMillisSinceEpoch(strtoll(_input, &endptr, 10)); + if (_input == endptr) { + return parseError("Date expecting integer milliseconds"); + } + if (errno == ERANGE) { + /* Need to handle this because jsonString outputs the value of Date_t as unsigned. + * See SERVER-8330 and SERVER-8573 */ + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function requires + // that we know ahead of time where the number ends, which is not currently the case. + date = Date_t::fromMillisSinceEpoch(static_cast<long long>(strtoull(_input, &endptr, 10))); + if (errno == ERANGE) { + return parseError("Date milliseconds overflow"); } - return parseError("Unexpected end of input"); } + _input = endptr; + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + builder.appendDate(fieldName, date); + return Status::OK(); +} - std::string JParse::encodeUTF8(unsigned char first, unsigned char second) const { - std::ostringstream oss; - if (first == 0 && second < 0x80) { - oss << second; - } - else if (first < 0x08) { - oss << char( 0xc0 | (first << 2 | second >> 6) ); - oss << char( 0x80 | (~0xc0 & second) ); - } - else { - oss << char( 0xe0 | (first >> 4) ); - oss << char( 0x80 | (~0xc0 & (first << 2 | second >> 6) ) ); - oss << char( 0x80 | (~0xc0 & second) ); - } - return oss.str(); +Status JParse::timestamp(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); } + if (readToken("-")) { + return parseError("Negative seconds in \"$timestamp\""); + } + errno = 0; + char* endptr; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + uint32_t seconds = strtoul(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("Timestamp seconds overflow"); + } + if (_input == endptr) { + return parseError("Expecting unsigned integer seconds in \"$timestamp\""); + } + _input = endptr; + if (!readToken(COMMA)) { + return parseError("Expecting ','"); + } + if (readToken("-")) { + return parseError("Negative seconds in \"$timestamp\""); + } + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + uint32_t count = strtoul(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("Timestamp increment overflow"); + } + if (_input == endptr) { + return parseError("Expecting unsigned integer increment in \"$timestamp\""); + } + _input = endptr; + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + builder.append(fieldName, Timestamp(seconds, count)); + return Status::OK(); +} - inline bool JParse::peekToken(const char* token) { - return readTokenImpl(token, false); +Status JParse::objectId(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + std::string id; + id.reserve(ID_RESERVE_SIZE); + Status ret = quotedString(&id); + if (ret != Status::OK()) { + return ret; } + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + if (id.size() != 24) { + return parseError("Expecting 24 hex digits: " + id); + } + if (!isHexString(id)) { + return parseError("Expecting hex digits: " + id); + } + builder.append(fieldName, OID(id)); + return Status::OK(); +} - inline bool JParse::readToken(const char* token) { - return readTokenImpl(token, true); +Status JParse::numberLong(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + errno = 0; + char* endptr; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + int64_t val = strtoll(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("NumberLong out of range"); + } + if (_input == endptr) { + return parseError("Expecting number in NumberLong"); } + _input = endptr; + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + builder.appendNumber(fieldName, static_cast<long long int>(val)); + return Status::OK(); +} - bool JParse::readTokenImpl(const char* token, bool advance) { - MONGO_JSON_DEBUG("token: " << token); - const char* check = _input; - if (token == NULL) { - return false; +Status JParse::numberInt(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + errno = 0; + char* endptr; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + int32_t val = strtol(_input, &endptr, 10); + if (errno == ERANGE) { + return parseError("NumberInt out of range"); + } + if (_input == endptr) { + return parseError("Expecting unsigned number in NumberInt"); + } + _input = endptr; + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + builder.appendNumber(fieldName, static_cast<int>(val)); + return Status::OK(); +} + + +Status JParse::dbRef(StringData fieldName, BSONObjBuilder& builder) { + BSONObjBuilder subBuilder(builder.subobjStart(fieldName)); + + if (!readToken(LPAREN)) { + return parseError("Expecting '('"); + } + std::string ns; + ns.reserve(NS_RESERVE_SIZE); + Status refRet = quotedString(&ns); + if (refRet != Status::OK()) { + return refRet; + } + subBuilder.append("$ref", ns); + + if (!readToken(COMMA)) { + return parseError("Expecting ','"); + } + + Status valueRet = value("$id", subBuilder); + if (valueRet != Status::OK()) { + return valueRet; + } + + if (readToken(COMMA)) { + std::string db; + db.reserve(DB_RESERVE_SIZE); + Status dbRet = quotedString(&db); + if (dbRet != Status::OK()) { + return dbRet; + } + subBuilder.append("$db", db); + } + + if (!readToken(RPAREN)) { + return parseError("Expecting ')'"); + } + + subBuilder.done(); + return Status::OK(); +} + +Status JParse::regex(StringData fieldName, BSONObjBuilder& builder) { + if (!readToken(FORWARDSLASH)) { + return parseError("Expecting '/'"); + } + std::string pat; + pat.reserve(PAT_RESERVE_SIZE); + Status patRet = regexPat(&pat); + if (patRet != Status::OK()) { + return patRet; + } + if (!readToken(FORWARDSLASH)) { + return parseError("Expecting '/'"); + } + std::string opt; + opt.reserve(OPT_RESERVE_SIZE); + Status optRet = regexOpt(&opt); + if (optRet != Status::OK()) { + return optRet; + } + Status optCheckRet = regexOptCheck(opt); + if (optCheckRet != Status::OK()) { + return optCheckRet; + } + builder.appendRegex(fieldName, pat, opt); + return Status::OK(); +} + +Status JParse::regexPat(std::string* result) { + MONGO_JSON_DEBUG(""); + return chars(result, "/"); +} + +Status JParse::regexOpt(std::string* result) { + MONGO_JSON_DEBUG(""); + return chars(result, "", JOPTIONS); +} + +Status JParse::regexOptCheck(StringData opt) { + MONGO_JSON_DEBUG("opt: " << opt); + std::size_t i; + for (i = 0; i < opt.size(); i++) { + if (!match(opt[i], JOPTIONS)) { + return parseError(string("Bad regex option: ") + opt[i]); } + } + return Status::OK(); +} + +Status JParse::number(StringData fieldName, BSONObjBuilder& builder) { + char* endptrll; + char* endptrd; + long long retll; + double retd; + + // reset errno to make sure that we are getting it from strtod + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + retd = strtod(_input, &endptrd); + // if pointer does not move, we found no digits + if (_input == endptrd) { + return parseError("Bad characters in value"); + } + if (errno == ERANGE) { + return parseError("Value cannot fit in double"); + } + // reset errno to make sure that we are getting it from strtoll + errno = 0; + // SERVER-11920: We should use parseNumberFromString here, but that function requires that + // we know ahead of time where the number ends, which is not currently the case. + retll = strtoll(_input, &endptrll, 10); + if (endptrll < endptrd || errno == ERANGE) { + // The number either had characters only meaningful for a double or + // could not fit in a 64 bit int + MONGO_JSON_DEBUG("Type: double"); + builder.append(fieldName, retd); + } else if (retll == static_cast<int>(retll)) { + // The number can fit in a 32 bit int + MONGO_JSON_DEBUG("Type: 32 bit int"); + builder.append(fieldName, static_cast<int>(retll)); + } else { + // The number can fit in a 64 bit int + MONGO_JSON_DEBUG("Type: 64 bit int"); + builder.append(fieldName, retll); + } + _input = endptrd; + if (_input >= _input_end) { + return parseError("Trailing number at end of input"); + } + return Status::OK(); +} + +Status JParse::field(std::string* result) { + MONGO_JSON_DEBUG(""); + if (peekToken(DOUBLEQUOTE) || peekToken(SINGLEQUOTE)) { + // Quoted key + // TODO: make sure quoted field names cannot contain null characters + return quotedString(result); + } else { + // Unquoted key // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080 - while (check < _input_end && isspace(*reinterpret_cast<const unsigned char*>(check))) { - ++check; + while (_input < _input_end && isspace(*reinterpret_cast<const unsigned char*>(_input))) { + ++_input; } - while (*token != '\0') { - if (check >= _input_end) { - return false; - } - if (*token++ != *check++) { - return false; - } + if (_input >= _input_end) { + return parseError("Field name expected"); } - if (advance) { _input = check; } - return true; + if (!match(*_input, ALPHA "_$")) { + return parseError("First character in field must be [A-Za-z$_]"); + } + return chars(result, "", ALPHA DIGIT "_$"); } +} - bool JParse::readField(StringData expectedField) { - MONGO_JSON_DEBUG("expectedField: " << expectedField); - std::string nextField; - nextField.reserve(FIELD_RESERVE_SIZE); - Status ret = field(&nextField); +Status JParse::quotedString(std::string* result) { + MONGO_JSON_DEBUG(""); + if (readToken(DOUBLEQUOTE)) { + Status ret = chars(result, "\""); if (ret != Status::OK()) { - return false; + return ret; } - if (expectedField != nextField) { - return false; + if (!readToken(DOUBLEQUOTE)) { + return parseError("Expecting '\"'"); } - return true; - } - - inline bool JParse::match(char matchChar, const char* matchSet) const { - if (matchSet == NULL) { - return true; + } else if (readToken(SINGLEQUOTE)) { + Status ret = chars(result, "'"); + if (ret != Status::OK()) { + return ret; } - if (*matchSet == '\0') { - return false; + if (!readToken(SINGLEQUOTE)) { + return parseError("Expecting '''"); } - return (strchr(matchSet, matchChar) != NULL); + } else { + return parseError("Expecting quoted string"); } + return Status::OK(); +} - bool JParse::isHexString(StringData str) const { - MONGO_JSON_DEBUG("str: " << str); - std::size_t i; - for (i = 0; i < str.size(); i++) { - if (!isxdigit(str[i])) { - return false; - } - } - return true; +/* + * terminalSet are characters that signal end of string (e.g.) [ :\0] + * allowedSet are the characters that are allowed, if this is set + */ +Status JParse::chars(std::string* result, const char* terminalSet, const char* allowedSet) { + MONGO_JSON_DEBUG("terminalSet: " << terminalSet); + if (_input >= _input_end) { + return parseError("Unexpected end of input"); } - - bool JParse::isBase64String(StringData str) const { - MONGO_JSON_DEBUG("str: " << str); - std::size_t i; - for (i = 0; i < str.size(); i++) { - if (!match(str[i], base64::chars)) { - return false; + const char* q = _input; + while (q < _input_end && !match(*q, terminalSet)) { + MONGO_JSON_DEBUG("q: " << q); + if (allowedSet != NULL) { + if (!match(*q, allowedSet)) { + _input = q; + return Status::OK(); + } + } + if (0x00 <= *q && *q <= 0x1F) { + return parseError("Invalid control character"); + } + if (*q == '\\' && q + 1 < _input_end) { + switch (*(++q)) { + // Escape characters allowed by the JSON spec + case '"': + result->push_back('"'); + break; + case '\'': + result->push_back('\''); + break; + case '\\': + result->push_back('\\'); + break; + case '/': + result->push_back('/'); + break; + case 'b': + result->push_back('\b'); + break; + case 'f': + result->push_back('\f'); + break; + case 'n': + result->push_back('\n'); + break; + case 'r': + result->push_back('\r'); + break; + case 't': + result->push_back('\t'); + break; + case 'u': { // expect 4 hexdigits + // TODO: handle UTF-16 surrogate characters + ++q; + if (q + 4 >= _input_end) { + return parseError("Expecting 4 hex digits"); + } + if (!isHexString(StringData(q, 4))) { + return parseError("Expecting 4 hex digits"); + } + unsigned char first = fromHex(q); + unsigned char second = fromHex(q += 2); + const std::string& utf8str = encodeUTF8(first, second); + for (unsigned int i = 0; i < utf8str.size(); i++) { + result->push_back(utf8str[i]); + } + ++q; + break; + } + // Vertical tab character. Not in JSON spec but allowed in + // our implementation according to test suite. + case 'v': + result->push_back('\v'); + break; + // Escape characters we explicity disallow + case 'x': + return parseError("Hex escape not supported"); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + return parseError("Octal escape not supported"); + // By default pass on the unescaped character + default: + result->push_back(*q); + break; + // TODO: check for escaped control characters } + ++q; + } else { + result->push_back(*q++); } - return true; } - - bool JParse::isArray() { - return peekToken(LBRACKET); + if (q < _input_end) { + _input = q; + return Status::OK(); } - - BSONObj fromjson(const char* jsonString, int* len) { - MONGO_JSON_DEBUG("jsonString: " << jsonString); - if (jsonString[0] == '\0') { - if (len) *len = 0; - return BSONObj(); - } - JParse jparse(jsonString); - BSONObjBuilder builder; - Status ret = Status::OK(); - try { - ret = jparse.parse(builder); + return parseError("Unexpected end of input"); +} + +std::string JParse::encodeUTF8(unsigned char first, unsigned char second) const { + std::ostringstream oss; + if (first == 0 && second < 0x80) { + oss << second; + } else if (first < 0x08) { + oss << char(0xc0 | (first << 2 | second >> 6)); + oss << char(0x80 | (~0xc0 & second)); + } else { + oss << char(0xe0 | (first >> 4)); + oss << char(0x80 | (~0xc0 & (first << 2 | second >> 6))); + oss << char(0x80 | (~0xc0 & second)); + } + return oss.str(); +} + +inline bool JParse::peekToken(const char* token) { + return readTokenImpl(token, false); +} + +inline bool JParse::readToken(const char* token) { + return readTokenImpl(token, true); +} + +bool JParse::readTokenImpl(const char* token, bool advance) { + MONGO_JSON_DEBUG("token: " << token); + const char* check = _input; + if (token == NULL) { + return false; + } + // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended + // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes + // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080 + while (check < _input_end && isspace(*reinterpret_cast<const unsigned char*>(check))) { + ++check; + } + while (*token != '\0') { + if (check >= _input_end) { + return false; } - catch(std::exception& e) { - std::ostringstream message; - message << "caught exception from within JSON parser: " << e.what(); - throw MsgAssertionException(17031, message.str()); + if (*token++ != *check++) { + return false; } + } + if (advance) { + _input = check; + } + return true; +} + +bool JParse::readField(StringData expectedField) { + MONGO_JSON_DEBUG("expectedField: " << expectedField); + std::string nextField; + nextField.reserve(FIELD_RESERVE_SIZE); + Status ret = field(&nextField); + if (ret != Status::OK()) { + return false; + } + if (expectedField != nextField) { + return false; + } + return true; +} - if (ret != Status::OK()) { - ostringstream message; - message << "code " << ret.code() << ": " << ret.codeString() << ": " << ret.reason(); - throw MsgAssertionException(16619, message.str()); +inline bool JParse::match(char matchChar, const char* matchSet) const { + if (matchSet == NULL) { + return true; + } + if (*matchSet == '\0') { + return false; + } + return (strchr(matchSet, matchChar) != NULL); +} + +bool JParse::isHexString(StringData str) const { + MONGO_JSON_DEBUG("str: " << str); + std::size_t i; + for (i = 0; i < str.size(); i++) { + if (!isxdigit(str[i])) { + return false; } - if (len) *len = jparse.offset(); - return builder.obj(); } - - BSONObj fromjson(const std::string& str) { - return fromjson( str.c_str() ); + return true; +} + +bool JParse::isBase64String(StringData str) const { + MONGO_JSON_DEBUG("str: " << str); + std::size_t i; + for (i = 0; i < str.size(); i++) { + if (!match(str[i], base64::chars)) { + return false; + } } - - std::string tojson(const BSONObj& obj, JsonStringFormat format, bool pretty) { - return obj.jsonString(format, pretty); + return true; +} + +bool JParse::isArray() { + return peekToken(LBRACKET); +} + +BSONObj fromjson(const char* jsonString, int* len) { + MONGO_JSON_DEBUG("jsonString: " << jsonString); + if (jsonString[0] == '\0') { + if (len) + *len = 0; + return BSONObj(); } - - std::string tojson(const BSONArray& arr, JsonStringFormat format, bool pretty) { - return arr.jsonString(format, pretty, true); + JParse jparse(jsonString); + BSONObjBuilder builder; + Status ret = Status::OK(); + try { + ret = jparse.parse(builder); + } catch (std::exception& e) { + std::ostringstream message; + message << "caught exception from within JSON parser: " << e.what(); + throw MsgAssertionException(17031, message.str()); } - bool isArray(StringData str) { - JParse parser(str); - return parser.isArray(); + if (ret != Status::OK()) { + ostringstream message; + message << "code " << ret.code() << ": " << ret.codeString() << ": " << ret.reason(); + throw MsgAssertionException(16619, message.str()); } + if (len) + *len = jparse.offset(); + return builder.obj(); +} + +BSONObj fromjson(const std::string& str) { + return fromjson(str.c_str()); +} + +std::string tojson(const BSONObj& obj, JsonStringFormat format, bool pretty) { + return obj.jsonString(format, pretty); +} + +std::string tojson(const BSONArray& arr, JsonStringFormat format, bool pretty) { + return arr.jsonString(format, pretty, true); +} + +bool isArray(StringData str) { + JParse parser(str); + return parser.isArray(); +} -} /* namespace mongo */ +} /* namespace mongo */ |