summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorMathias Stearn <mathias@10gen.com>2013-12-24 15:27:14 -0500
committerMathias Stearn <mathias@10gen.com>2014-01-03 12:50:26 -0500
commitab48b396e5bc48cb27ce3bc2dbd28bf3b6a96851 (patch)
tree309ae1ffe7b09e05f4a594d97cee9a599755c092 /src/mongo
parent04221630900335fc6ee0d9922edae9d29f02d66d (diff)
downloadmongo-ab48b396e5bc48cb27ce3bc2dbd28bf3b6a96851.tar.gz
SERVER-11903 Remove BSONElement::validate()
It was mostly used for improving the error messages about BSON that already failed validation, but it did not cover all validation cases. To replace this use case validateBSON was enhanced to include the _id of the invalid object in the error message.
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/bson/bson-inl.h40
-rw-r--r--src/mongo/bson/bson_validate.cpp96
-rw-r--r--src/mongo/bson/bson_validate_test.cpp60
-rw-r--r--src/mongo/bson/bsonelement.h3
-rw-r--r--src/mongo/db/cloner.cpp16
-rw-r--r--src/mongo/db/commands/validate.cpp17
-rw-r--r--src/mongo/tools/bsondump.cpp21
-rw-r--r--src/mongo/tools/tool.cpp28
8 files changed, 173 insertions, 108 deletions
diff --git a/src/mongo/bson/bson-inl.h b/src/mongo/bson/bson-inl.h
index 70bd7f714f0..29ce7f7af04 100644
--- a/src/mongo/bson/bson-inl.h
+++ b/src/mongo/bson/bson-inl.h
@@ -474,7 +474,6 @@ dodouble:
int offset = (int) (e.rawdata() - this->objdata());
massert( 10330 , "Element extends past end of object",
e.size() + offset <= this->objsize() );
- e.validate();
bool end = ( e.size() + offset == this->objsize() );
if ( e.eoo() ) {
massert( 10331 , "EOO Before end of object", end );
@@ -489,45 +488,6 @@ dodouble:
s << ( isArray ? " ]" : " }" );
}
- inline void BSONElement::validate() const {
- const BSONType t = type();
-
- switch( t ) {
- case DBRef:
- case Code:
- case Symbol:
- case mongo::String: {
- unsigned x = (unsigned) valuestrsize();
- bool lenOk = x > 0 && x < (unsigned) BSONObjMaxInternalSize;
- if( lenOk && valuestr()[x-1] == 0 )
- return;
- StringBuilder buf;
- buf << "Invalid dbref/code/string/symbol size: " << x;
- if( lenOk )
- buf << " strnlen:" << mongo::strnlen( valuestr() , x );
- msgasserted( 10321 , buf.str() );
- break;
- }
- case CodeWScope: {
- int totalSize = *( int * )( value() );
- massert( 10322 , "Invalid CodeWScope size", totalSize >= 8 );
- int strSizeWNull = *( int * )( value() + 4 );
- massert( 10323 , "Invalid CodeWScope string size", totalSize >= strSizeWNull + 4 + 4 );
- massert( 10324 , "Invalid CodeWScope string size",
- strSizeWNull > 0 &&
- (strSizeWNull - 1) == mongo::strnlen( codeWScopeCode(), strSizeWNull ) );
- massert( 10325 , "Invalid CodeWScope size", totalSize >= strSizeWNull + 4 + 4 + 4 );
- int objSize = *( int * )( value() + 4 + 4 + strSizeWNull );
- massert( 10326 , "Invalid CodeWScope object size", totalSize == 4 + 4 + strSizeWNull + objSize );
- // Subobject validation handled elsewhere.
- }
- case Object:
- // We expect Object size validation to be handled elsewhere.
- default:
- break;
- }
- }
-
inline int BSONElement::size( int maxLen ) const {
if ( totalSize >= 0 )
return totalSize;
diff --git a/src/mongo/bson/bson_validate.cpp b/src/mongo/bson/bson_validate.cpp
index 88e6d481b10..998ac79ffd6 100644
--- a/src/mongo/bson/bson_validate.cpp
+++ b/src/mongo/bson/bson_validate.cpp
@@ -15,15 +15,31 @@
* limitations under the License.
*/
+#include <cstring>
#include <deque>
#include "mongo/bson/bson_validate.h"
#include "mongo/bson/oid.h"
+#include "mongo/db/jsobj.h"
namespace mongo {
namespace {
+ /**
+ * Creates a status with InvalidBSON code and adds information about _id if available.
+ * WARNING: only pass in a non-EOO idElem if it has been fully validated already!
+ */
+ Status makeError(std::string baseMsg, BSONElement idElem) {
+ if (idElem.eoo()) {
+ baseMsg += " in object with unknown _id";
+ }
+ else {
+ baseMsg += " in object with " + idElem.toString(/*field name=*/true, /*full=*/true);
+ }
+ return Status(ErrorCodes::InvalidBSON, baseMsg);
+ }
+
class Buffer {
public:
Buffer( const char* buffer, uint64_t maxLength )
@@ -45,7 +61,7 @@ namespace mongo {
Status readCString( StringData* out ) {
const void* x = memchr( _buffer + _position, 0, _maxLength - _position );
if ( !x )
- return Status( ErrorCodes::InvalidBSON, "no end of c-string" );
+ return makeError("no end of c-string", _idElem);
uint64_t len = static_cast<uint64_t>( static_cast<const char*>(x) - ( _buffer + _position ) );
StringData data( _buffer + _position, len );
@@ -60,21 +76,21 @@ namespace mongo {
Status readUTF8String( StringData* out ) {
int sz;
if ( !readNumber<int>( &sz ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", _idElem);
if ( out ) {
*out = StringData( _buffer + _position, sz );
}
if ( !skip( sz - 1 ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", _idElem);
char c;
if ( !readNumber<char>( &c ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", _idElem);
if ( c != 0 )
- return Status( ErrorCodes::InvalidBSON, "not null terminate string" );
+ return makeError("not null terminated string", _idElem);
return Status::OK();
}
@@ -88,10 +104,22 @@ namespace mongo {
return _position;
}
+ const char* getBasePtr() const {
+ return _buffer;
+ }
+
+ /**
+ * WARNING: only pass in a non-EOO idElem if it has been fully validated already!
+ */
+ void setIdElem(BSONElement idElem) {
+ _idElem = idElem;
+ }
+
private:
const char* _buffer;
uint64_t _position;
uint64_t _maxLength;
+ BSONElement _idElem;
};
struct ValidationState {
@@ -127,12 +155,17 @@ namespace mongo {
int _startPosition;
};
- Status validateElementInfo(Buffer* buffer, ValidationState::State* nextState) {
+ /**
+ * WARNING: only pass in a non-EOO idElem if it has been fully validated already!
+ */
+ Status validateElementInfo(Buffer* buffer,
+ ValidationState::State* nextState,
+ BSONElement idElem) {
Status status = Status::OK();
signed char type;
if ( !buffer->readNumber<signed char>(&type) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
if ( type == EOO ) {
*nextState = ValidationState::EndObj;
@@ -153,17 +186,17 @@ namespace mongo {
case jstOID:
if ( !buffer->skip( sizeof(OID) ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
return Status::OK();
case NumberInt:
if ( !buffer->skip( sizeof(int32_t) ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
return Status::OK();
case Bool:
if ( !buffer->skip( sizeof(int8_t) ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
return Status::OK();
@@ -172,7 +205,7 @@ namespace mongo {
case Timestamp:
case Date:
if ( !buffer->skip( sizeof(int64_t) ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
return Status::OK();
case DBRef:
@@ -203,9 +236,9 @@ namespace mongo {
case BinData: {
int sz;
if ( !buffer->readNumber<int>( &sz ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
if ( !buffer->skip( 1 + sz ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson" );
+ return makeError("invalid bson", idElem);
return Status::OK();
}
case CodeWScope:
@@ -217,7 +250,7 @@ namespace mongo {
return Status::OK();
default:
- return Status( ErrorCodes::InvalidBSON, "invalid bson type" );
+ return makeError("invalid bson type", idElem);
}
}
@@ -226,6 +259,9 @@ namespace mongo {
ValidationObjectFrame* curr = NULL;
ValidationState::State state = ValidationState::BeginObj;
+ uint64_t idElemStartPos = 0; // will become idElem once validated
+ BSONElement idElem;
+
while (state != ValidationState::Done) {
switch (state) {
case ValidationState::BeginObj:
@@ -234,22 +270,36 @@ namespace mongo {
curr->setStartPosition(buffer->position());
curr->setIsCodeWithScope(false);
if (!buffer->readNumber<int>(&curr->expectedSize)) {
- return Status(ErrorCodes::InvalidBSON,
- "bson size is larger than buffer size");
+ return makeError("bson size is larger than buffer size", idElem);
}
state = ValidationState::WithinObj;
// fall through
case ValidationState::WithinObj: {
- Status status = validateElementInfo(buffer, &state);
+ const bool atTopLevel = frames.size() == 1;
+ // check if we've finished validating idElem and are at start of next element.
+ if (atTopLevel && idElemStartPos) {
+ idElem = BSONElement(buffer->getBasePtr() + idElemStartPos);
+ buffer->setIdElem(idElem);
+ idElemStartPos = 0;
+ }
+
+ const uint64_t elemStartPos = buffer->position();
+ Status status = validateElementInfo(buffer, &state, idElem);
if (!status.isOK())
return status;
+
+ if (idElem.eoo() && atTopLevel) {
+ // we've already validated that fieldname is safe to access.
+ if (strcmp(buffer->getBasePtr() + elemStartPos + 1/*type*/, "_id") == 0) {
+ idElemStartPos = elemStartPos;
+ }
+ }
break;
}
case ValidationState::EndObj: {
int actualLength = buffer->position() - curr->startPosition();
if ( actualLength != curr->expectedSize ) {
- return Status( ErrorCodes::InvalidBSON,
- "bson length doesn't match what we found" );
+ return makeError("bson length doesn't match what we found", idElem);
}
frames.pop_back();
if (frames.empty()) {
@@ -270,7 +320,7 @@ namespace mongo {
curr->setStartPosition(buffer->position());
curr->setIsCodeWithScope(true);
if ( !buffer->readNumber<int>( &curr->expectedSize ) )
- return Status( ErrorCodes::InvalidBSON, "invalid bson CodeWScope size" );
+ return makeError("invalid bson CodeWScope size", idElem);
Status status = buffer->readUTF8String( NULL );
if ( !status.isOK() )
return status;
@@ -280,12 +330,12 @@ namespace mongo {
case ValidationState::EndCodeWScope: {
int actualLength = buffer->position() - curr->startPosition();
if ( actualLength != curr->expectedSize ) {
- return Status( ErrorCodes::InvalidBSON,
- "bson length for CodeWScope doesn't match what we found" );
+ return makeError("bson length for CodeWScope doesn't match what we found",
+ idElem);
}
frames.pop_back();
if (frames.empty())
- return Status(ErrorCodes::InvalidBSON, "unnested CodeWScope");
+ return makeError("unnested CodeWScope", idElem);
curr = &frames.back();
state = ValidationState::WithinObj;
break;
diff --git a/src/mongo/bson/bson_validate_test.cpp b/src/mongo/bson/bson_validate_test.cpp
index 61e67f33c46..2d381e31183 100644
--- a/src/mongo/bson/bson_validate_test.cpp
+++ b/src/mongo/bson/bson_validate_test.cpp
@@ -22,6 +22,14 @@ namespace {
using namespace mongo;
+ void appendInvalidStringElement(const char* fieldName, BufBuilder* bb) {
+ // like a BSONObj string, but without a NUL terminator.
+ bb->appendChar(String);
+ bb->appendStr("name", /*withNUL*/true);
+ bb->appendNum(4);
+ bb->appendStr("asdf", /*withNUL*/false);
+ }
+
TEST(BSONValidate, Basic) {
BSONObj x;
ASSERT_TRUE( x.valid() );
@@ -213,4 +221,56 @@ namespace {
ASSERT_NOT_OK(validateBSON(x.objdata(), x.objsize() / 2));
}
+ TEST(BSONValidateFast, ErrorWithId) {
+ BufBuilder bb;
+ BSONObjBuilder ob(bb);
+ ob.append("_id", 1);
+ appendInvalidStringElement("not_id", &bb);
+ const BSONObj x = ob.done();
+ const Status status = validateBSON(x.objdata(), x.objsize());
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(status.reason(), "not null terminated string in object with _id: 1");
+ }
+
+ TEST(BSONValidateFast, ErrorBeforeId) {
+ BufBuilder bb;
+ BSONObjBuilder ob(bb);
+ appendInvalidStringElement("not_id", &bb);
+ ob.append("_id", 1);
+ const BSONObj x = ob.done();
+ const Status status = validateBSON(x.objdata(), x.objsize());
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id");
+ }
+
+ TEST(BSONValidateFast, ErrorNoId) {
+ BufBuilder bb;
+ BSONObjBuilder ob(bb);
+ appendInvalidStringElement("not_id", &bb);
+ const BSONObj x = ob.done();
+ const Status status = validateBSON(x.objdata(), x.objsize());
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id");
+ }
+
+ TEST(BSONValidateFast, ErrorIsInId) {
+ BufBuilder bb;
+ BSONObjBuilder ob(bb);
+ appendInvalidStringElement("_id", &bb);
+ const BSONObj x = ob.done();
+ const Status status = validateBSON(x.objdata(), x.objsize());
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id");
+ }
+
+ TEST(BSONValidateFast, NonTopLevelId) {
+ BufBuilder bb;
+ BSONObjBuilder ob(bb);
+ ob.append("not_id1", BSON("_id" << "not the real _id"));
+ appendInvalidStringElement("not_id2", &bb);
+ const BSONObj x = ob.done();
+ const Status status = validateBSON(x.objdata(), x.objsize());
+ ASSERT_NOT_OK(status);
+ ASSERT_EQUALS(status.reason(), "not null terminated string in object with unknown _id");
+ }
}
diff --git a/src/mongo/bson/bsonelement.h b/src/mongo/bson/bsonelement.h
index a3dd39ee56d..7a150992936 100644
--- a/src/mongo/bson/bsonelement.h
+++ b/src/mongo/bson/bsonelement.h
@@ -376,9 +376,6 @@ namespace mongo {
/** Constructs an empty element */
BSONElement();
- /** Check that data is internally consistent. */
- void validate() const;
-
/** True if this element may contain subobjects. */
bool mayEncapsulate() const {
switch ( type() ) {
diff --git a/src/mongo/db/cloner.cpp b/src/mongo/db/cloner.cpp
index 03d1195b841..3a8cdd08bbf 100644
--- a/src/mongo/db/cloner.cpp
+++ b/src/mongo/db/cloner.cpp
@@ -154,18 +154,10 @@ namespace mongo {
BSONObj tmp = i.nextSafe();
/* assure object is valid. note this will slow us down a little. */
- if ( !tmp.valid() ) {
- stringstream ss;
- ss << "Cloner: skipping corrupt object from " << from_collection;
- BSONElement e = tmp.firstElement();
- try {
- e.validate();
- ss << " firstElement: " << e;
- }
- catch( ... ) {
- ss << " firstElement corrupt";
- }
- out() << ss.str() << endl;
+ const Status status = validateBSON(tmp.objdata(), tmp.objsize());
+ if (!status.isOK()) {
+ out() << "Cloner: skipping corrupt object from " << from_collection
+ << ": " << status.reason();
continue;
}
diff --git a/src/mongo/db/commands/validate.cpp b/src/mongo/db/commands/validate.cpp
index 84a819e0255..c69024e05c2 100644
--- a/src/mongo/db/commands/validate.cpp
+++ b/src/mongo/db/commands/validate.cpp
@@ -266,24 +266,15 @@ namespace mongo {
if (full){
BSONObj obj = BSONObj::make(r);
- if (!obj.isValid() || !obj.valid()){ // both fast and deep checks
+ const Status status = validateBSON(obj.objdata(), obj.objsize());
+ if (!status.isOK()) {
valid = false;
if (nInvalid == 0) // only log once;
errors << "invalid bson object detected (see logs for more info)";
nInvalid++;
- if (strcmp("_id", obj.firstElementFieldName()) == 0){
- try {
- obj.firstElement().validate(); // throws on error
- log() << "Invalid bson detected in " << ns << " with _id: " << obj.firstElement().toString(false) << endl;
- }
- catch(...){
- log() << "Invalid bson detected in " << ns << " with corrupt _id" << endl;
- }
- }
- else {
- log() << "Invalid bson detected in " << ns << " and couldn't find _id" << endl;
- }
+ log() << "Invalid bson detected in " << ns
+ << ": " << status.reason();
}
else {
bsonLen += obj.objsize();
diff --git a/src/mongo/tools/bsondump.cpp b/src/mongo/tools/bsondump.cpp
index 31c45c22bea..ef0c02be5b3 100644
--- a/src/mongo/tools/bsondump.cpp
+++ b/src/mongo/tools/bsondump.cpp
@@ -84,17 +84,31 @@ public:
try {
cout << prefix << "--- new object ---\n";
cout << prefix << "\t size : " << o.objsize() << "\n";
+
+ // Note: this will recursively check each level of the bson and will also be called by
+ // this function at each level. While inefficient, it shouldn't effect correctness.
+ const Status status = validateBSON(o.objdata(), o.objsize());
+ if (!status.isOK()) {
+ cout << prefix << "\t OBJECT IS INVALID: " << status.reason() << '\n'
+ << prefix << "\t attempting to print as much as possible" << endl;
+ }
+
BSONObjIterator i(o);
while ( i.more() ) {
- BSONElement e = i.next();
- cout << prefix << "\t\t " << e.fieldName() << "\n" << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size() << endl;
+ // This call verifies it is safe to call size() and fieldName() but doesn't check
+ // whether the element extends past the end of the object. That is done below.
+ BSONElement e = i.next(/*checkEnd=*/true);
+
+ cout << prefix << "\t\t " << e.fieldName() << "\n"
+ << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size()
+ << endl;
+
if ( ( read + e.size() ) > o.objsize() ) {
cout << prefix << " SIZE DOES NOT WORK" << endl;
return false;
}
read += e.size();
try {
- e.validate();
if ( e.isABSONObj() ) {
if ( ! debug( e.Obj() , depth + 1 ) ) {
//return false;
@@ -111,7 +125,6 @@ public:
else if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1)) ) {
cout << prefix << "\t\t\t" << e << endl;
}
-
}
catch ( std::exception& e ) {
cout << prefix << "\t\t\t bad value: " << e.what() << endl;
diff --git a/src/mongo/tools/tool.cpp b/src/mongo/tools/tool.cpp
index e6ac3785aef..e90e46257c7 100644
--- a/src/mongo/tools/tool.cpp
+++ b/src/mongo/tools/tool.cpp
@@ -312,21 +312,23 @@ namespace mongo {
verify( amt == (size_t)( size - 4 ) );
BSONObj o( buf );
- if (bsonToolGlobalParams.objcheck && !o.valid()) {
- toolError() << "INVALID OBJECT - going to try and print out " << std::endl;
- toolError() << "size: " << size << std::endl;
- BSONObjIterator i(o);
- while ( i.more() ) {
- BSONElement e = i.next();
+ if (bsonToolGlobalParams.objcheck) {
+ const Status status = validateBSON(buf, size);
+ if (!status.isOK()) {
+ toolError() << "INVALID OBJECT - going to try and print out " << std::endl;
+ toolError() << "size: " << size << std::endl;
+ toolError() << "error: " << status.reason() << std::endl;
+
+ StringBuilder sb;
try {
- e.validate();
+ o.toString(sb); // using StringBuilder version to get as much as possible
+ } catch (...) {
+ toolError() << "object up to error: " << sb.str() << endl;
+ throw;
}
- catch ( ... ) {
- toolError() << "\t\t NEXT ONE IS INVALID" << std::endl;
- }
- toolError() << "\t name : " << e.fieldName() << " " << typeName(e.type())
- << std::endl;
- toolError() << "\t " << e << std::endl;
+ toolError() << "complete object: " << sb.str() << endl;
+
+ // NOTE: continuing with object even though we know it is invalid.
}
}