summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mongo/bson/bson-inl.h40
-rw-r--r--src/mongo/bson/bson_validate.cpp96
-rw-r--r--src/mongo/bson/bson_validate_test.cpp60
-rw-r--r--src/mongo/bson/bsonelement.h3
-rw-r--r--src/mongo/db/cloner.cpp16
-rw-r--r--src/mongo/db/commands/validate.cpp17
-rw-r--r--src/mongo/tools/bsondump.cpp21
-rw-r--r--src/mongo/tools/tool.cpp28
8 files changed, 173 insertions, 108 deletions
diff --git a/src/mongo/bson/bson-inl.h b/src/mongo/bson/bson-inl.h
index 70bd7f714f0..29ce7f7af04 100644
--- a/src/mongo/bson/bson-inl.h
+++ b/src/mongo/bson/bson-inl.h
@@ -474,7 +474,6 @@ dodouble:
int offset = (int) (e.rawdata() - this->objdata());
massert( 10330 , "Element extends past end of object",
e.size() + offset <= this->objsize() );
- e.validate();
bool end = ( e.size() + offset == this->objsize() );
if ( e.eoo() ) {
massert( 10331 , "EOO Before end of object", end );
@@ -489,45 +488,6 @@ dodouble:
s << ( isArray ? " ]" : " }" );
}
- inline void BSONElement::validate() const {
- const BSONType t = type();
-
- switch( t ) {
- case DBRef:
- case Code:
- case Symbol:
- case mongo::String: {
- unsigned x = (unsigned) valuestrsize();
- bool lenOk = x > 0 && x < (unsigned) BSONObjMaxInternalSize;
- if( lenOk && valuestr()[x-1] == 0 )
- return;
- StringBuilder buf;
- buf << "Invalid dbref/code/string/symbol size: " << x;
- if( lenOk )
- buf << " strnlen:" << mongo::strnlen( valuestr() , x );
- msgasserted( 10321 , buf.str() );
- break;
- }
- case CodeWScope: {
- int totalSize = *( int * )( value() );
- massert( 10322 , "Invalid CodeWScope size", totalSize >= 8 );
- int strSizeWNull = *( int * )( value() + 4 );
- massert( 10323 , "Invalid CodeWScope string size", totalSize >= strSizeWNull + 4 + 4 );
- massert( 10324 , "Invalid CodeWScope string size",
- strSizeWNull > 0 &&
- (strSizeWNull - 1) == mongo::strnlen( codeWScopeCode(), strSizeWNull ) );
- massert( 10325 , "Invalid CodeWScope size", totalSize >= strSizeWNull + 4 + 4 + 4 );
- int objSize = *( int * )( value() + 4 + 4 + strSizeWNull );
- massert( 10326 , "Invalid CodeWScope object size", totalSize == 4 + 4 + strSizeWNull + objSize );
- // Subobject validation handled elsewhere.
- }
- case Object:
- // We expect Object size validation to be handled elsewhere.
- default:
- break;
- }
- }
-
inline int BSONElement::size( int maxLen ) const {
if ( totalSize >= 0 )
return totalSize;
diff --git a/src/mongo/bson/bson_validate.cpp b/src/mongo/bson/bson_validate.cpp
index 88e6d481b10..998ac79ffd6 100644
--- a/src/mongo/bson/bson_validate.cpp
+++ b/src/mongo/bson/bson_validate.cpp
@@ -15,15 +15,31 @@
* limitations under the License.
*/
+#include <cstring>
#include <deque>
#include "mongo/bson/bson_validate.h"
#include "mongo/bson/oid.h"
+#include "mongo/db/jsobj.h"
namespace mongo {
namespace {
+ /**
+ * Creates a status with InvalidBSON code and adds information about _id if available.
+ * WARNING: only pass in a non-EOO idElem if it has been fully validated already!
+ */
+ Status makeError(std::string baseMsg, BSONElement idElem) {
+ if (idElem.eoo()) {
+ baseMsg += " in object with unknown _id";
+ }
+ else {
+ baseMsg += " in object with " + idElem.toString(/*field name=*/true, /*full=*/true);
+ }
+ return Status(ErrorCodes::InvalidBSON, baseMsg);
+ }
+
class Buffer {
public:
Buffer( const char* buffer, uint64_t maxLength )
@@ -45,7 +61,7 @@ namespace mongo {
Status readCString( StringData* out ) {
const void* x = memchr( _buffer + _position, 0, _maxLength - _position );
if ( !x )
- return Status( ErrorCodes::InvalidBSON, "no end of c-string" );
+ return makeError("no end of c-string", _idElem);
uint64_t len = static_cast<uint64_t>( static_cast<const char*>(x) - ( _buffer + _position ) );
StringData data( _buffer + _position, len );
@@ -60,21 +76,21 @@ namespace mongo {
Status readUTF8String( StringData* out ) {
int sz;
if ( !readNumber<int>( &sz ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", _idElem);
if ( out ) {
*out = StringData( _buffer + _position, sz );
}
if ( !skip( sz - 1 ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", _idElem);
char c;
if ( !readNumber<char>( &c ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", _idElem);
if ( c != 0 )
- return Status( ErrorCodes::InvalidBSON, "not null terminate string" );
+ return makeError("not null terminated string", _idElem);
return Status::OK();
}
@@ -88,10 +104,22 @@ namespace mongo {
return _position;
}
+ const char* getBasePtr() const {
+ return _buffer;
+ }
+
+ /**
+ * WARNING: only pass in a non-EOO idElem if it has been fully validated already!
+ */
+ void setIdElem(BSONElement idElem) {
+ _idElem = idElem;
+ }
+
private:
const char* _buffer;
uint64_t _position;
uint64_t _maxLength;
+ BSONElement _idElem;
};
struct ValidationState {
@@ -127,12 +155,17 @@ namespace mongo {
int _startPosition;
};
- Status validateElementInfo(Buffer* buffer, ValidationState::State* nextState) {
+ /**
+ * WARNING: only pass in a non-EOO idElem if it has been fully validated already!
+ */
+ Status validateElementInfo(Buffer* buffer,
+ ValidationState::State* nextState,
+ BSONElement idElem) {
Status status = Status::OK();
signed char type;
if ( !buffer->readNumber<signed char>(&type) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
if ( type == EOO ) {
*nextState = ValidationState::EndObj;
@@ -153,17 +186,17 @@ namespace mongo {
case jstOID:
if ( !buffer->skip( sizeof(OID) ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
return Status::OK();
case NumberInt:
if ( !buffer->skip( sizeof(int32_t) ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
return Status::OK();
case Bool:
if ( !buffer->skip( sizeof(int8_t) ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
return Status::OK();
@@ -172,7 +205,7 @@ namespace mongo {
case Timestamp:
case Date:
if ( !buffer->skip( sizeof(int64_t) ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
return Status::OK();
case DBRef:
@@ -203,9 +236,9 @@ namespace mongo {
case BinData: {
int sz;
if ( !buffer->readNumber<int>( &sz ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
if ( !buffer->skip( 1 + sz ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
return Status::OK();
}
case CodeWScope:
@@ -217,7 +250,7 @@ namespace mongo {
return Status::OK();
default:
- return Status( ErrorCodes::InvalidBSON, "invalid bson type" );
+ return makeError("invalid bson type", idElem);
}
}
@@ -226,6 +259,9 @@ namespace mongo {
ValidationObjectFrame* curr = NULL;
ValidationState::State state = ValidationState::BeginObj;
+ uint64_t idElemStartPos = 0; // will become idElem once validated
+ BSONElement idElem;
+
while (state != ValidationState::Done) {
switch (state) {
case ValidationState::BeginObj:
@@ -234,22 +270,36 @@ namespace mongo {
curr->setStartPosition(buffer->position());
curr->setIsCodeWithScope(false);
if (!buffer->readNumber<int>(&curr->expectedSize)) {
- return Status(ErrorCodes::InvalidBSON,
- "bson size is larger than buffer size");
+ return makeError("bson size is larger than buffer size", idElem);
}
state = ValidationState::WithinObj;
// fall through
case ValidationState::WithinObj: {
- Status status = validateElementInfo(buffer, &state);
+ const bool atTopLevel = frames.size() == 1;
+ // check if we've finished validating idElem and are at start of next element.
+ if (atTopLevel && idElemStartPos) {
+ idElem = BSONElement(buffer->getBasePtr() + idElemStartPos);
+ buffer->setIdElem(idElem);
+ idElemStartPos = 0;
+ }
+
+ const uint64_t elemStartPos = buffer->position();
+ Status status = validateElementInfo(buffer, &state, idElem);
if (!status.isOK())
return status;
+
+ if (idElem.eoo() && atTopLevel) {
+ // we've already validated that fieldname is safe to access.
+ if (strcmp(buffer->getBasePtr() + elemStartPos + 1/*type*/, "_id") == 0) {
+ idElemStartPos = elemStartPos;
+ }
+ }
break;
}
case ValidationState::EndObj: {
int actualLength = buffer->position() - curr->startPosition();
if ( actualLength != curr->expectedSize ) {
- return Status( ErrorCodes::InvalidBSON,
- "bson length doesn't match what we found" );
+ return makeError("bson length doesn't match what we found", idElem);
}
frames.pop_back();
if (frames.empty()) {
@@ -270,7 +320,7 @@ namespace mongo {
curr->setStartPosition(buffer->position());
curr->setIsCodeWithScope(true);
if ( !buffer->readNumber<int>( &curr->expectedSize ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson CodeWScope size" );
+ return makeError("invalid bson CodeWScope size", idElem);
Status status = buffer->readUTF8String( NULL );
if ( !status.isOK() )
return status;
@@ -280,12 +330,12 @@ namespace mongo {
case ValidationState::EndCodeWScope: {
int actualLength = buffer->position() - curr->startPosition();
if ( actualLength != curr->expectedSize ) {
- return Status( ErrorCodes::InvalidBSON,
- "bson length for CodeWScope doesn't match what we found" );
+ return makeError("bson length for CodeWScope doesn't match what we found",
+ idElem);
}
frames.pop_back();
if (frames.empty())
- return Status(ErrorCodes::InvalidBSON, "unnested CodeWScope");
+ return makeError("unnested CodeWScope", idElem);
curr = &frames.back();
state = ValidationState::WithinObj;
break;
diff --git a/src/mongo/bson/bson_validate_test.cpp b/src/mongo/bson/bson_validate_test.cpp
index 61e67f33c46..2d381e31183 100644
--- a/src/mongo/bson/bson_validate_test.cpp
+++ b/src/mongo/bson/bson_validate_test.cpp
@@ -22,6 +22,14 @@ namespace {
using namespace mongo;
+ void appendInvalidStringElement(const char* fieldName, BufBuilder* bb) {
+ // like a BSONObj string, but without a NUL terminator.
+ bb->appendChar(String);
+ bb->appendStr("name", /*withNUL*/true);
+ bb->appendNum(4);
+ bb->appendStr("asdf", /*withNUL*/false);
+ }
+
TEST(BSONValidate, Basic) {
BSONObj x;
ASSERT_TRUE( x.valid() );
@@ -213,4 +221,56 @@ namespace {
ASSERT_NOT_OK(validateBSON(x.objdata(), x.objsize() / 2));
}
+ TEST(BSONValidateFast, ErrorWithId) {
+ BufBuilder bb;
+ BSONObjBuilder ob(bb);
+ ob.append("_id", 1);
+ appendInvalidStringElement("not_id", &bb);
+ const BSONObj x = ob.done();
+ const Status status = validateBSON(x.objdata(), x.objsize());
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(status.reason(), "not null terminated string in object with _id: 1");
+ }
+
+ TEST(BSONValidateFast, ErrorBeforeId) {
+ BufBuilder bb;
+ BSONObjBuilder ob(bb);
+ appendInvalidStringElement("not_id", &bb);
+ ob.append("_id", 1);
+ const BSONObj x = ob.done();
+ const Status status = validateBSON(x.objdata(), x.objsize());
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id");
+ }
+
+ TEST(BSONValidateFast, ErrorNoId) {
+ BufBuilder bb;
+ BSONObjBuilder ob(bb);
+ appendInvalidStringElement("not_id", &bb);
+ const BSONObj x = ob.done();
+ const Status status = validateBSON(x.objdata(), x.objsize());
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id");
+ }
+
+ TEST(BSONValidateFast, ErrorIsInId) {
+ BufBuilder bb;
+ BSONObjBuilder ob(bb);
+ appendInvalidStringElement("_id", &bb);
+ const BSONObj x = ob.done();
+ const Status status = validateBSON(x.objdata(), x.objsize());
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id");
+ }
+
+ TEST(BSONValidateFast, NonTopLevelId) {
+ BufBuilder bb;
+ BSONObjBuilder ob(bb);
+ ob.append("not_id1", BSON("_id" << "not the real _id"));
+ appendInvalidStringElement("not_id2", &bb);
+ const BSONObj x = ob.done();
+ const Status status = validateBSON(x.objdata(), x.objsize());
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id");
+ }
}
diff --git a/src/mongo/bson/bsonelement.h b/src/mongo/bson/bsonelement.h
index a3dd39ee56d..7a150992936 100644
--- a/src/mongo/bson/bsonelement.h
+++ b/src/mongo/bson/bsonelement.h
@@ -376,9 +376,6 @@ namespace mongo {
/** Constructs an empty element */
BSONElement();
- /** Check that data is internally consistent. */
- void validate() const;
-
/** True if this element may contain subobjects. */
bool mayEncapsulate() const {
switch ( type() ) {
diff --git a/src/mongo/db/cloner.cpp b/src/mongo/db/cloner.cpp
index 03d1195b841..3a8cdd08bbf 100644
--- a/src/mongo/db/cloner.cpp
+++ b/src/mongo/db/cloner.cpp
@@ -154,18 +154,10 @@ namespace mongo {
BSONObj tmp = i.nextSafe();
/* assure object is valid. note this will slow us down a little. */
- if ( !tmp.valid() ) {
- stringstream ss;
- ss << "Cloner: skipping corrupt object from " << from_collection;
- BSONElement e = tmp.firstElement();
- try {
- e.validate();
- ss << " firstElement: " << e;
- }
- catch( ... ) {
- ss << " firstElement corrupt";
- }
- out() << ss.str() << endl;
+ const Status status = validateBSON(tmp.objdata(), tmp.objsize());
+ if (!status.isOK()) {
+ out() << "Cloner: skipping corrupt object from " << from_collection
+ << ": " << status.reason();
continue;
}
diff --git a/src/mongo/db/commands/validate.cpp b/src/mongo/db/commands/validate.cpp
index 84a819e0255..c69024e05c2 100644
--- a/src/mongo/db/commands/validate.cpp
+++ b/src/mongo/db/commands/validate.cpp
@@ -266,24 +266,15 @@ namespace mongo {
if (full){
BSONObj obj = BSONObj::make(r);
- if (!obj.isValid() || !obj.valid()){ // both fast and deep checks
+ const Status status = validateBSON(obj.objdata(), obj.objsize());
+ if (!status.isOK()) {
valid = false;
if (nInvalid == 0) // only log once;
errors << "invalid bson object detected (see logs for more info)";
nInvalid++;
- if (strcmp("_id", obj.firstElementFieldName()) == 0){
- try {
- obj.firstElement().validate(); // throws on error
- log() << "Invalid bson detected in " << ns << " with _id: " << obj.firstElement().toString(false) << endl;
- }
- catch(...){
- log() << "Invalid bson detected in " << ns << " with corrupt _id" << endl;
- }
- }
- else {
- log() << "Invalid bson detected in " << ns << " and couldn't find _id" << endl;
- }
+ log() << "Invalid bson detected in " << ns
+ << ": " << status.reason();
}
else {
bsonLen += obj.objsize();
diff --git a/src/mongo/tools/bsondump.cpp b/src/mongo/tools/bsondump.cpp
index 31c45c22bea..ef0c02be5b3 100644
--- a/src/mongo/tools/bsondump.cpp
+++ b/src/mongo/tools/bsondump.cpp
@@ -84,17 +84,31 @@ public:
try {
cout << prefix << "--- new object ---\n";
cout << prefix << "\t size : " << o.objsize() << "\n";
+
+ // Note: this will recursively check each level of the bson and will also be called by
+ // this function at each level. While inefficient, it shouldn't effect correctness.
+ const Status status = validateBSON(o.objdata(), o.objsize());
+ if (!status.isOK()) {
+ cout << prefix << "\t OBJECT IS INVALID: " << status.reason() << '\n'
+ << prefix << "\t attempting to print as much as possible" << endl;
+ }
+
BSONObjIterator i(o);
while ( i.more() ) {
- BSONElement e = i.next();
- cout << prefix << "\t\t " << e.fieldName() << "\n" << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size() << endl;
+ // This call verifies it is safe to call size() and fieldName() but doesn't check
+ // whether the element extends past the end of the object. That is done below.
+ BSONElement e = i.next(/*checkEnd=*/true);
+
+ cout << prefix << "\t\t " << e.fieldName() << "\n"
+ << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size()
+ << endl;
+
if ( ( read + e.size() ) > o.objsize() ) {
cout << prefix << " SIZE DOES NOT WORK" << endl;
return false;
}
read += e.size();
try {
- e.validate();
if ( e.isABSONObj() ) {
if ( ! debug( e.Obj() , depth + 1 ) ) {
//return false;
@@ -111,7 +125,6 @@ public:
else if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) ) {
cout << prefix << "\t\t\t" << e << endl;
}
-
}
catch ( std::exception& e ) {
cout << prefix << "\t\t\t bad value: " << e.what() << endl;
diff --git a/src/mongo/tools/tool.cpp b/src/mongo/tools/tool.cpp
index e6ac3785aef..e90e46257c7 100644
--- a/src/mongo/tools/tool.cpp
+++ b/src/mongo/tools/tool.cpp
@@ -312,21 +312,23 @@ namespace mongo {
verify( amt == (size_t)( size - 4 ) );
BSONObj o( buf );
- if (bsonToolGlobalParams.objcheck && !o.valid()) {
- toolError() << "INVALID OBJECT - going to try and print out " << std::endl;
- toolError() << "size: " << size << std::endl;
- BSONObjIterator i(o);
- while ( i.more() ) {
- BSONElement e = i.next();
+ if (bsonToolGlobalParams.objcheck) {
+ const Status status = validateBSON(buf, size);
+ if (!status.isOK()) {
+ toolError() << "INVALID OBJECT - going to try and print out " << std::endl;
+ toolError() << "size: " << size << std::endl;
+ toolError() << "error: " << status.reason() << std::endl;
+
+ StringBuilder sb;
try {
- e.validate();
+ o.toString(sb); // using StringBuilder version to get as much as possible
+ } catch (...) {
+ toolError() << "object up to error: " << sb.str() << endl;
+ throw;
}
- catch ( ... ) {
- toolError() << "\t\t NEXT ONE IS INVALID" << std::endl;
- }
- toolError() << "\t name : " << e.fieldName() << " " << typeName(e.type())
- << std::endl;
- toolError() << "\t " << e << std::endl;
+ toolError() << "complete object: " << sb.str() << endl;
+
+ // NOTE: continuing with object even though we know it is invalid.
}
}