diff options
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/bson/bson-inl.h | 40 | ||||
-rw-r--r-- | src/mongo/bson/bson_validate.cpp | 96 | ||||
-rw-r--r-- | src/mongo/bson/bson_validate_test.cpp | 60 | ||||
-rw-r--r-- | src/mongo/bson/bsonelement.h | 3 | ||||
-rw-r--r-- | src/mongo/db/cloner.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/commands/validate.cpp | 17 | ||||
-rw-r--r-- | src/mongo/tools/bsondump.cpp | 21 | ||||
-rw-r--r-- | src/mongo/tools/tool.cpp | 28 |
8 files changed, 173 insertions, 108 deletions
diff --git a/src/mongo/bson/bson-inl.h b/src/mongo/bson/bson-inl.h index 70bd7f714f0..29ce7f7af04 100644 --- a/src/mongo/bson/bson-inl.h +++ b/src/mongo/bson/bson-inl.h @@ -474,7 +474,6 @@ dodouble: int offset = (int) (e.rawdata() - this->objdata()); massert( 10330 , "Element extends past end of object", e.size() + offset <= this->objsize() ); - e.validate(); bool end = ( e.size() + offset == this->objsize() ); if ( e.eoo() ) { massert( 10331 , "EOO Before end of object", end ); @@ -489,45 +488,6 @@ dodouble: s << ( isArray ? " ]" : " }" ); } - inline void BSONElement::validate() const { - const BSONType t = type(); - - switch( t ) { - case DBRef: - case Code: - case Symbol: - case mongo::String: { - unsigned x = (unsigned) valuestrsize(); - bool lenOk = x > 0 && x < (unsigned) BSONObjMaxInternalSize; - if( lenOk && valuestr()[x-1] == 0 ) - return; - StringBuilder buf; - buf << "Invalid dbref/code/string/symbol size: " << x; - if( lenOk ) - buf << " strnlen:" << mongo::strnlen( valuestr() , x ); - msgasserted( 10321 , buf.str() ); - break; - } - case CodeWScope: { - int totalSize = *( int * )( value() ); - massert( 10322 , "Invalid CodeWScope size", totalSize >= 8 ); - int strSizeWNull = *( int * )( value() + 4 ); - massert( 10323 , "Invalid CodeWScope string size", totalSize >= strSizeWNull + 4 + 4 ); - massert( 10324 , "Invalid CodeWScope string size", - strSizeWNull > 0 && - (strSizeWNull - 1) == mongo::strnlen( codeWScopeCode(), strSizeWNull ) ); - massert( 10325 , "Invalid CodeWScope size", totalSize >= strSizeWNull + 4 + 4 + 4 ); - int objSize = *( int * )( value() + 4 + 4 + strSizeWNull ); - massert( 10326 , "Invalid CodeWScope object size", totalSize == 4 + 4 + strSizeWNull + objSize ); - // Subobject validation handled elsewhere. - } - case Object: - // We expect Object size validation to be handled elsewhere. - default: - break; - } - } - inline int BSONElement::size( int maxLen ) const { if ( totalSize >= 0 ) return totalSize; diff --git a/src/mongo/bson/bson_validate.cpp b/src/mongo/bson/bson_validate.cpp index 88e6d481b10..998ac79ffd6 100644 --- a/src/mongo/bson/bson_validate.cpp +++ b/src/mongo/bson/bson_validate.cpp @@ -15,15 +15,31 @@ * limitations under the License. */ +#include <cstring> #include <deque> #include "mongo/bson/bson_validate.h" #include "mongo/bson/oid.h" +#include "mongo/db/jsobj.h" namespace mongo { namespace { + /** + * Creates a status with InvalidBSON code and adds information about _id if available. + * WARNING: only pass in a non-EOO idElem if it has been fully validated already! + */ + Status makeError(std::string baseMsg, BSONElement idElem) { + if (idElem.eoo()) { + baseMsg += " in object with unknown _id"; + } + else { + baseMsg += " in object with " + idElem.toString(/*field name=*/true, /*full=*/true); + } + return Status(ErrorCodes::InvalidBSON, baseMsg); + } + class Buffer { public: Buffer( const char* buffer, uint64_t maxLength ) @@ -45,7 +61,7 @@ namespace mongo { Status readCString( StringData* out ) { const void* x = memchr( _buffer + _position, 0, _maxLength - _position ); if ( !x ) - return Status( ErrorCodes::InvalidBSON, "no end of c-string" ); + return makeError("no end of c-string", _idElem); uint64_t len = static_cast<uint64_t>( static_cast<const char*>(x) - ( _buffer + _position ) ); StringData data( _buffer + _position, len ); @@ -60,21 +76,21 @@ namespace mongo { Status readUTF8String( StringData* out ) { int sz; if ( !readNumber<int>( &sz ) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson" ); + return makeError("invalid bson", _idElem); if ( out ) { *out = StringData( _buffer + _position, sz ); } if ( !skip( sz - 1 ) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson" ); + return makeError("invalid bson", _idElem); char c; if ( !readNumber<char>( &c ) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson" ); + return makeError("invalid bson", _idElem); if ( c != 0 ) - return Status( ErrorCodes::InvalidBSON, "not null terminate string" ); + return makeError("not null terminated string", _idElem); return Status::OK(); } @@ -88,10 +104,22 @@ namespace mongo { return _position; } + const char* getBasePtr() const { + return _buffer; + } + + /** + * WARNING: only pass in a non-EOO idElem if it has been fully validated already! + */ + void setIdElem(BSONElement idElem) { + _idElem = idElem; + } + private: const char* _buffer; uint64_t _position; uint64_t _maxLength; + BSONElement _idElem; }; struct ValidationState { @@ -127,12 +155,17 @@ namespace mongo { int _startPosition; }; - Status validateElementInfo(Buffer* buffer, ValidationState::State* nextState) { + /** + * WARNING: only pass in a non-EOO idElem if it has been fully validated already! + */ + Status validateElementInfo(Buffer* buffer, + ValidationState::State* nextState, + BSONElement idElem) { Status status = Status::OK(); signed char type; if ( !buffer->readNumber<signed char>(&type) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson" ); + return makeError("invalid bson", idElem); if ( type == EOO ) { *nextState = ValidationState::EndObj; @@ -153,17 +186,17 @@ namespace mongo { case jstOID: if ( !buffer->skip( sizeof(OID) ) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson" ); + return makeError("invalid bson", idElem); return Status::OK(); case NumberInt: if ( !buffer->skip( sizeof(int32_t) ) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson" ); + return makeError("invalid bson", idElem); return Status::OK(); case Bool: if ( !buffer->skip( sizeof(int8_t) ) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson" ); + return makeError("invalid bson", idElem); return Status::OK(); @@ -172,7 +205,7 @@ namespace mongo { case Timestamp: case Date: if ( !buffer->skip( sizeof(int64_t) ) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson" ); + return makeError("invalid bson", idElem); return Status::OK(); case DBRef: @@ -203,9 +236,9 @@ namespace mongo { case BinData: { int sz; if ( !buffer->readNumber<int>( &sz ) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson" ); + return makeError("invalid bson", idElem); if ( !buffer->skip( 1 + sz ) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson" ); + return makeError("invalid bson", idElem); return Status::OK(); } case CodeWScope: @@ -217,7 +250,7 @@ namespace mongo { return Status::OK(); default: - return Status( ErrorCodes::InvalidBSON, "invalid bson type" ); + return makeError("invalid bson type", idElem); } } @@ -226,6 +259,9 @@ namespace mongo { ValidationObjectFrame* curr = NULL; ValidationState::State state = ValidationState::BeginObj; + uint64_t idElemStartPos = 0; // will become idElem once validated + BSONElement idElem; + while (state != ValidationState::Done) { switch (state) { case ValidationState::BeginObj: @@ -234,22 +270,36 @@ namespace mongo { curr->setStartPosition(buffer->position()); curr->setIsCodeWithScope(false); if (!buffer->readNumber<int>(&curr->expectedSize)) { - return Status(ErrorCodes::InvalidBSON, - "bson size is larger than buffer size"); + return makeError("bson size is larger than buffer size", idElem); } state = ValidationState::WithinObj; // fall through case ValidationState::WithinObj: { - Status status = validateElementInfo(buffer, &state); + const bool atTopLevel = frames.size() == 1; + // check if we've finished validating idElem and are at start of next element. + if (atTopLevel && idElemStartPos) { + idElem = BSONElement(buffer->getBasePtr() + idElemStartPos); + buffer->setIdElem(idElem); + idElemStartPos = 0; + } + + const uint64_t elemStartPos = buffer->position(); + Status status = validateElementInfo(buffer, &state, idElem); if (!status.isOK()) return status; + + if (idElem.eoo() && atTopLevel) { + // we've already validated that fieldname is safe to access. + if (strcmp(buffer->getBasePtr() + elemStartPos + 1/*type*/, "_id") == 0) { + idElemStartPos = elemStartPos; + } + } break; } case ValidationState::EndObj: { int actualLength = buffer->position() - curr->startPosition(); if ( actualLength != curr->expectedSize ) { - return Status( ErrorCodes::InvalidBSON, - "bson length doesn't match what we found" ); + return makeError("bson length doesn't match what we found", idElem); } frames.pop_back(); if (frames.empty()) { @@ -270,7 +320,7 @@ namespace mongo { curr->setStartPosition(buffer->position()); curr->setIsCodeWithScope(true); if ( !buffer->readNumber<int>( &curr->expectedSize ) ) - return Status( ErrorCodes::InvalidBSON, "invalid bson CodeWScope size" ); + return makeError("invalid bson CodeWScope size", idElem); Status status = buffer->readUTF8String( NULL ); if ( !status.isOK() ) return status; @@ -280,12 +330,12 @@ namespace mongo { case ValidationState::EndCodeWScope: { int actualLength = buffer->position() - curr->startPosition(); if ( actualLength != curr->expectedSize ) { - return Status( ErrorCodes::InvalidBSON, - "bson length for CodeWScope doesn't match what we found" ); + return makeError("bson length for CodeWScope doesn't match what we found", + idElem); } frames.pop_back(); if (frames.empty()) - return Status(ErrorCodes::InvalidBSON, "unnested CodeWScope"); + return makeError("unnested CodeWScope", idElem); curr = &frames.back(); state = ValidationState::WithinObj; break; diff --git a/src/mongo/bson/bson_validate_test.cpp b/src/mongo/bson/bson_validate_test.cpp index 61e67f33c46..2d381e31183 100644 --- a/src/mongo/bson/bson_validate_test.cpp +++ b/src/mongo/bson/bson_validate_test.cpp @@ -22,6 +22,14 @@ namespace { using namespace mongo; + void appendInvalidStringElement(const char* fieldName, BufBuilder* bb) { + // like a BSONObj string, but without a NUL terminator. + bb->appendChar(String); + bb->appendStr("name", /*withNUL*/true); + bb->appendNum(4); + bb->appendStr("asdf", /*withNUL*/false); + } + TEST(BSONValidate, Basic) { BSONObj x; ASSERT_TRUE( x.valid() ); @@ -213,4 +221,56 @@ namespace { ASSERT_NOT_OK(validateBSON(x.objdata(), x.objsize() / 2)); } + TEST(BSONValidateFast, ErrorWithId) { + BufBuilder bb; + BSONObjBuilder ob(bb); + ob.append("_id", 1); + appendInvalidStringElement("not_id", &bb); + const BSONObj x = ob.done(); + const Status status = validateBSON(x.objdata(), x.objsize()); + ASSERT_NOT_OK(status); + ASSERT_EQUALS(status.reason(), "not null terminated string in object with _id: 1"); + } + + TEST(BSONValidateFast, ErrorBeforeId) { + BufBuilder bb; + BSONObjBuilder ob(bb); + appendInvalidStringElement("not_id", &bb); + ob.append("_id", 1); + const BSONObj x = ob.done(); + const Status status = validateBSON(x.objdata(), x.objsize()); + ASSERT_NOT_OK(status); + ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id"); + } + + TEST(BSONValidateFast, ErrorNoId) { + BufBuilder bb; + BSONObjBuilder ob(bb); + appendInvalidStringElement("not_id", &bb); + const BSONObj x = ob.done(); + const Status status = validateBSON(x.objdata(), x.objsize()); + ASSERT_NOT_OK(status); + ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id"); + } + + TEST(BSONValidateFast, ErrorIsInId) { + BufBuilder bb; + BSONObjBuilder ob(bb); + appendInvalidStringElement("_id", &bb); + const BSONObj x = ob.done(); + const Status status = validateBSON(x.objdata(), x.objsize()); + ASSERT_NOT_OK(status); + ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id"); + } + + TEST(BSONValidateFast, NonTopLevelId) { + BufBuilder bb; + BSONObjBuilder ob(bb); + ob.append("not_id1", BSON("_id" << "not the real _id")); + appendInvalidStringElement("not_id2", &bb); + const BSONObj x = ob.done(); + const Status status = validateBSON(x.objdata(), x.objsize()); + ASSERT_NOT_OK(status); + ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id"); + } } diff --git a/src/mongo/bson/bsonelement.h b/src/mongo/bson/bsonelement.h index a3dd39ee56d..7a150992936 100644 --- a/src/mongo/bson/bsonelement.h +++ b/src/mongo/bson/bsonelement.h @@ -376,9 +376,6 @@ namespace mongo { /** Constructs an empty element */ BSONElement(); - /** Check that data is internally consistent. */ - void validate() const; - /** True if this element may contain subobjects. */ bool mayEncapsulate() const { switch ( type() ) { diff --git a/src/mongo/db/cloner.cpp b/src/mongo/db/cloner.cpp index 03d1195b841..3a8cdd08bbf 100644 --- a/src/mongo/db/cloner.cpp +++ b/src/mongo/db/cloner.cpp @@ -154,18 +154,10 @@ namespace mongo { BSONObj tmp = i.nextSafe(); /* assure object is valid. note this will slow us down a little. */ - if ( !tmp.valid() ) { - stringstream ss; - ss << "Cloner: skipping corrupt object from " << from_collection; - BSONElement e = tmp.firstElement(); - try { - e.validate(); - ss << " firstElement: " << e; - } - catch( ... ) { - ss << " firstElement corrupt"; - } - out() << ss.str() << endl; + const Status status = validateBSON(tmp.objdata(), tmp.objsize()); + if (!status.isOK()) { + out() << "Cloner: skipping corrupt object from " << from_collection + << ": " << status.reason(); continue; } diff --git a/src/mongo/db/commands/validate.cpp b/src/mongo/db/commands/validate.cpp index 84a819e0255..c69024e05c2 100644 --- a/src/mongo/db/commands/validate.cpp +++ b/src/mongo/db/commands/validate.cpp @@ -266,24 +266,15 @@ namespace mongo { if (full){ BSONObj obj = BSONObj::make(r); - if (!obj.isValid() || !obj.valid()){ // both fast and deep checks + const Status status = validateBSON(obj.objdata(), obj.objsize()); + if (!status.isOK()) { valid = false; if (nInvalid == 0) // only log once; errors << "invalid bson object detected (see logs for more info)"; nInvalid++; - if (strcmp("_id", obj.firstElementFieldName()) == 0){ - try { - obj.firstElement().validate(); // throws on error - log() << "Invalid bson detected in " << ns << " with _id: " << obj.firstElement().toString(false) << endl; - } - catch(...){ - log() << "Invalid bson detected in " << ns << " with corrupt _id" << endl; - } - } - else { - log() << "Invalid bson detected in " << ns << " and couldn't find _id" << endl; - } + log() << "Invalid bson detected in " << ns + << ": " << status.reason(); } else { bsonLen += obj.objsize(); diff --git a/src/mongo/tools/bsondump.cpp b/src/mongo/tools/bsondump.cpp index 31c45c22bea..ef0c02be5b3 100644 --- a/src/mongo/tools/bsondump.cpp +++ b/src/mongo/tools/bsondump.cpp @@ -84,17 +84,31 @@ public: try { cout << prefix << "--- new object ---\n"; cout << prefix << "\t size : " << o.objsize() << "\n"; + + // Note: this will recursively check each level of the bson and will also be called by + // this function at each level. While inefficient, it shouldn't effect correctness. + const Status status = validateBSON(o.objdata(), o.objsize()); + if (!status.isOK()) { + cout << prefix << "\t OBJECT IS INVALID: " << status.reason() << '\n' + << prefix << "\t attempting to print as much as possible" << endl; + } + BSONObjIterator i(o); while ( i.more() ) { - BSONElement e = i.next(); - cout << prefix << "\t\t " << e.fieldName() << "\n" << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size() << endl; + // This call verifies it is safe to call size() and fieldName() but doesn't check + // whether the element extends past the end of the object. That is done below. + BSONElement e = i.next(/*checkEnd=*/true); + + cout << prefix << "\t\t " << e.fieldName() << "\n" + << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size() + << endl; + if ( ( read + e.size() ) > o.objsize() ) { cout << prefix << " SIZE DOES NOT WORK" << endl; return false; } read += e.size(); try { - e.validate(); if ( e.isABSONObj() ) { if ( ! debug( e.Obj() , depth + 1 ) ) { //return false; @@ -111,7 +125,6 @@ public: else if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) ) { cout << prefix << "\t\t\t" << e << endl; } - } catch ( std::exception& e ) { cout << prefix << "\t\t\t bad value: " << e.what() << endl; diff --git a/src/mongo/tools/tool.cpp b/src/mongo/tools/tool.cpp index e6ac3785aef..e90e46257c7 100644 --- a/src/mongo/tools/tool.cpp +++ b/src/mongo/tools/tool.cpp @@ -312,21 +312,23 @@ namespace mongo { verify( amt == (size_t)( size - 4 ) ); BSONObj o( buf ); - if (bsonToolGlobalParams.objcheck && !o.valid()) { - toolError() << "INVALID OBJECT - going to try and print out " << std::endl; - toolError() << "size: " << size << std::endl; - BSONObjIterator i(o); - while ( i.more() ) { - BSONElement e = i.next(); + if (bsonToolGlobalParams.objcheck) { + const Status status = validateBSON(buf, size); + if (!status.isOK()) { + toolError() << "INVALID OBJECT - going to try and print out " << std::endl; + toolError() << "size: " << size << std::endl; + toolError() << "error: " << status.reason() << std::endl; + + StringBuilder sb; try { - e.validate(); + o.toString(sb); // using StringBuilder version to get as much as possible + } catch (...) { + toolError() << "object up to error: " << sb.str() << endl; + throw; } - catch ( ... ) { - toolError() << "\t\t NEXT ONE IS INVALID" << std::endl; - } - toolError() << "\t name : " << e.fieldName() << " " << typeName(e.type()) - << std::endl; - toolError() << "\t " << e << std::endl; + toolError() << "complete object: " << sb.str() << endl; + + // NOTE: continuing with object even though we know it is invalid. } } |