// bson_validate.cpp /* Copyright 2012 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects * for all of the code used other than as permitted herein. If you modify * file(s) with this exception, you may extend this exception to your * version of the file(s), but you are not obligated to do so. If you do not * wish to do so, delete this exception statement from your version. If you * delete this exception statement from all source files in the program, * then also delete it in the license file. */ #include #include #include #include "mongo/base/data_view.h" #include "mongo/bson/bson_validate.h" #include "mongo/bson/oid.h" #include "mongo/db/jsobj.h" #include "mongo/platform/decimal128.h" namespace mongo { namespace { /** * Creates a status with InvalidBSON code and adds information about _id if available. * WARNING: only pass in a non-EOO idElem if it has been fully validated already! */ Status makeError(std::string baseMsg, BSONElement idElem) { if (idElem.eoo()) { baseMsg += " in object with unknown _id"; } else { baseMsg += " in object with " + idElem.toString(/*field name=*/true, /*full=*/true); } return Status(ErrorCodes::InvalidBSON, baseMsg); } class Buffer { public: Buffer(const char* buffer, uint64_t maxLength) : _buffer(buffer), _position(0), _maxLength(maxLength) {} template bool readNumber(N* out) { if ((_position + sizeof(N)) > _maxLength) return false; if (out) { *out = ConstDataView(_buffer).read>(_position); } _position += sizeof(N); return true; } Status readCString(StringData* out) { const void* x = memchr(_buffer + _position, 0, _maxLength - _position); if (!x) return makeError("no end of c-string", _idElem); uint64_t len = static_cast(static_cast(x) - (_buffer + _position)); StringData data(_buffer + _position, len); _position += len + 1; if (out) { *out = data; } return Status::OK(); } Status readUTF8String(StringData* out) { int sz; if (!readNumber(&sz)) return makeError("invalid bson", _idElem); if (sz <= 0) { // must have NULL at the very least return makeError("invalid bson", _idElem); } if (out) { *out = StringData(_buffer + _position, sz); } if (!skip(sz - 1)) return makeError("invalid bson", _idElem); char c; if (!readNumber(&c)) return makeError("invalid bson", _idElem); if (c != 0) return makeError("not null terminated string", _idElem); return Status::OK(); } bool skip(uint64_t sz) { _position += sz; return _position < _maxLength; } uint64_t position() const { return _position; } const char* getBasePtr() const { return _buffer; } /** * WARNING: only pass in a non-EOO idElem if it has been fully validated already! */ void setIdElem(BSONElement idElem) { _idElem = idElem; } private: const char* _buffer; uint64_t _position; uint64_t _maxLength; BSONElement _idElem; }; struct ValidationState { enum State { BeginObj = 1, WithinObj, EndObj, BeginCodeWScope, EndCodeWScope, Done }; }; class ValidationObjectFrame { public: int startPosition() const { return _startPosition & ~(1 << 31); } bool isCodeWithScope() const { return _startPosition & (1 << 31); } void setStartPosition(int pos) { _startPosition = (_startPosition & (1 << 31)) | (pos & ~(1 << 31)); } void setIsCodeWithScope(bool isCodeWithScope) { if (isCodeWithScope) { _startPosition |= 1 << 31; } else { _startPosition &= ~(1 << 31); } } int expectedSize; private: int _startPosition; }; /** * WARNING: only pass in a non-EOO idElem if it has been fully validated already! */ Status validateElementInfo(Buffer* buffer, ValidationState::State* nextState, BSONElement idElem) { Status status = Status::OK(); signed char type; if (!buffer->readNumber(&type)) return makeError("invalid bson", idElem); if (type == EOO) { *nextState = ValidationState::EndObj; return Status::OK(); } StringData name; status = buffer->readCString(&name); if (!status.isOK()) return status; switch (type) { case MinKey: case MaxKey: case jstNULL: case Undefined: return Status::OK(); case jstOID: if (!buffer->skip(OID::kOIDSize)) return makeError("invalid bson", idElem); return Status::OK(); case NumberInt: if (!buffer->skip(sizeof(int32_t))) return makeError("invalid bson", idElem); return Status::OK(); case Bool: uint8_t val; if (!buffer->readNumber(&val)) return makeError("invalid bson", idElem); if ((val != 0) && (val != 1)) return makeError("invalid boolean value", idElem); return Status::OK(); case NumberDouble: case NumberLong: case bsonTimestamp: case Date: if (!buffer->skip(sizeof(int64_t))) return makeError("invalid bson", idElem); return Status::OK(); case NumberDecimal: if (Decimal128::enabled) { if (!buffer->skip(sizeof(Decimal128::Value))) return makeError("Invalid bson", idElem); return Status::OK(); } else { return Status(ErrorCodes::InvalidBSON, "Attempt to use a decimal BSON type when experimental decimal " "server support is not currently enabled."); } case DBRef: status = buffer->readUTF8String(NULL); if (!status.isOK()) return status; buffer->skip(OID::kOIDSize); return Status::OK(); case RegEx: status = buffer->readCString(NULL); if (!status.isOK()) return status; status = buffer->readCString(NULL); if (!status.isOK()) return status; return Status::OK(); case Code: case Symbol: case String: status = buffer->readUTF8String(NULL); if (!status.isOK()) return status; return Status::OK(); case BinData: { int sz; if (!buffer->readNumber(&sz)) return makeError("invalid bson", idElem); if (sz < 0 || sz == std::numeric_limits::max()) return makeError("invalid size in bson", idElem); if (!buffer->skip(1 + sz)) return makeError("invalid bson", idElem); return Status::OK(); } case CodeWScope: *nextState = ValidationState::BeginCodeWScope; return Status::OK(); case Object: case Array: *nextState = ValidationState::BeginObj; return Status::OK(); default: return makeError("invalid bson type", idElem); } } Status validateBSONIterative(Buffer* buffer) { std::deque frames; ValidationObjectFrame* curr = NULL; ValidationState::State state = ValidationState::BeginObj; uint64_t idElemStartPos = 0; // will become idElem once validated BSONElement idElem; while (state != ValidationState::Done) { switch (state) { case ValidationState::BeginObj: frames.push_back(ValidationObjectFrame()); curr = &frames.back(); curr->setStartPosition(buffer->position()); curr->setIsCodeWithScope(false); if (!buffer->readNumber(&curr->expectedSize)) { return makeError("bson size is larger than buffer size", idElem); } state = ValidationState::WithinObj; // fall through case ValidationState::WithinObj: { const bool atTopLevel = frames.size() == 1; // check if we've finished validating idElem and are at start of next element. if (atTopLevel && idElemStartPos) { idElem = BSONElement(buffer->getBasePtr() + idElemStartPos); buffer->setIdElem(idElem); idElemStartPos = 0; } const uint64_t elemStartPos = buffer->position(); ValidationState::State nextState = state; Status status = validateElementInfo(buffer, &nextState, idElem); if (!status.isOK()) return status; // we've already validated that fieldname is safe to access as long as we aren't // at the end of the object, since EOO doesn't have a fieldname. if (nextState != ValidationState::EndObj && idElem.eoo() && atTopLevel) { if (strcmp(buffer->getBasePtr() + elemStartPos + 1 /*type*/, "_id") == 0) { idElemStartPos = elemStartPos; } } state = nextState; break; } case ValidationState::EndObj: { int actualLength = buffer->position() - curr->startPosition(); if (actualLength != curr->expectedSize) { return makeError("bson length doesn't match what we found", idElem); } frames.pop_back(); if (frames.empty()) { state = ValidationState::Done; } else { curr = &frames.back(); if (curr->isCodeWithScope()) state = ValidationState::EndCodeWScope; else state = ValidationState::WithinObj; } break; } case ValidationState::BeginCodeWScope: { frames.push_back(ValidationObjectFrame()); curr = &frames.back(); curr->setStartPosition(buffer->position()); curr->setIsCodeWithScope(true); if (!buffer->readNumber(&curr->expectedSize)) return makeError("invalid bson CodeWScope size", idElem); Status status = buffer->readUTF8String(NULL); if (!status.isOK()) return status; state = ValidationState::BeginObj; break; } case ValidationState::EndCodeWScope: { int actualLength = buffer->position() - curr->startPosition(); if (actualLength != curr->expectedSize) { return makeError("bson length for CodeWScope doesn't match what we found", idElem); } frames.pop_back(); if (frames.empty()) return makeError("unnested CodeWScope", idElem); curr = &frames.back(); state = ValidationState::WithinObj; break; } case ValidationState::Done: break; } } return Status::OK(); } } // namespace Status validateBSON(const char* originalBuffer, uint64_t maxLength) { if (maxLength < 5) { return Status(ErrorCodes::InvalidBSON, "bson data has to be at least 5 bytes"); } Buffer buf(originalBuffer, maxLength); return validateBSONIterative(&buf); } } // namespace mongo