/** @file jsobj.h
BSON classes
*/
/**
BSONObj and its helpers
"BSON" stands for "binary JSON" -- ie a binary way to represent objects that would be
represented in JSON (plus a few extensions useful for databases & other languages).
http://www.mongodb.org/display/DOCS/BSON
*/
/**
* Copyright (C) 2008 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#pragma once
#include "../stdafx.h"
#include "../util/builder.h"
#include "../util/optime.h"
#include "boost/utility.hpp"
#include
namespace mongo {
class BSONObj;
class Record;
class BSONObjBuilder;
class BSONObjBuilderValueStream;
#pragma pack(1)
/**
the complete list of valid BSON types
*/
enum BSONType {
/** smaller than all other types */
MinKey=-1,
/** end of object */
EOO=0,
/** double precision floating point value */
NumberDouble=1,
/** character string, stored in utf8 */
String=2,
/** an embedded object */
Object=3,
/** an embedded array */
Array=4,
/** binary data */
BinData=5,
/** Undefined type */
Undefined=6,
/** ObjectId */
jstOID=7,
/** boolean type */
Bool=8,
/** date type */
Date=9,
/** null type */
jstNULL=10,
/** regular expression, a pattern with options */
RegEx=11,
/** deprecated / will be redesigned */
DBRef=12,
/** deprecated / use CodeWScope */
Code=13,
/** a programming language (e.g., Python) symbol */
Symbol=14,
/** javascript code that can execute on the database server, with context */
CodeWScope=15,
/** 32 bit signed integer */
NumberInt = 16,
/** Updated to a Date with value next OpTime on insert */
Timestamp = 17,
/** 64 bit integer */
NumberLong = 18,
/** max type that is not MaxKey */
JSTypeMax=18,
/** larger than all other types */
MaxKey=127
};
/* subtypes of BinData.
bdtCustom and above are ones that the JS compiler understands, but are
opaque to the database.
*/
enum BinDataType { Function=1, ByteArray=2, bdtUUID = 3, MD5Type=5, bdtCustom=128 };
/** Object ID type.
BSON objects typically have an _id field for the object id. This field should be the first
member of the object when present. class OID is a special type that is a 12 byte id which
is likely to be unique to the system. You may also use other types for _id's.
When _id field is missing from a BSON object, on an insert the database may insert one
automatically in certain circumstances.
*/
class OID {
union {
long long a;
unsigned char data[8];
};
unsigned b;
public:
const unsigned char *getData() const { return data; }
bool operator==(const OID& r) {
return a==r.a&&b==r.b;
}
bool operator!=(const OID& r) {
return a!=r.a||b!=r.b;
}
/** The object ID output as 24 hex digits. */
string str() const {
stringstream s;
s << hex;
// s.fill( '0' );
// s.width( 2 );
// fill wasn't working so doing manually...
for( int i = 0; i < 8; i++ ) {
unsigned u = data[i];
if( u < 16 ) s << '0';
s << u;
}
const unsigned char * raw = (const unsigned char*)&b;
for( int i = 0; i < 4; i++ ) {
unsigned u = raw[i];
if( u < 16 ) s << '0';
s << u;
}
/*
s.width( 16 );
s << a;
s.width( 8 );
s << b;
s << dec;
*/
return s.str();
}
/**
sets the contents to a new oid / randomized value
*/
void init();
/** Set to the hex string value specified. */
void init( string s );
};
ostream& operator<<( ostream &s, const OID &o );
/** Formatting mode for generating JSON from BSON.
See
for details.
*/
enum JsonStringFormat {
/** strict RFC format */
Strict,
/** 10gen format, which is close to JS format. This form is understandable by
javascript running inside the Mongo server via eval() */
TenGen,
/** Javascript JSON compatible */
JS
};
#pragma pack()
/* internals
-------- size() ------------
-fieldNameSize-
value()
type()
*/
/** BSONElement represents an "element" in a BSONObj. So for the object { a : 3, b : "abc" },
'a : 3' is the first element (key+value).
The BSONElement object points into the BSONObj's data. Thus the BSONObj must stay in scope
for the life of the BSONElement.
*/
class BSONElement {
friend class BSONObjIterator;
friend class BSONObj;
public:
string toString( bool includeFieldName = true ) const;
operator string() const { return toString(); }
string jsonString( JsonStringFormat format, bool includeFieldNames = true ) const;
/** Returns the type of the element */
BSONType type() const {
return (BSONType) *data;
}
/** returns the tyoe of the element fixed for the main type
the main purpose is numbers. any numeric type will return NumberDouble
*/
BSONType canonicalType() const {
BSONType t = type();
switch ( t ){
case NumberInt:
case NumberLong:
return NumberDouble;
default:
return t;
}
}
/** Indicates if it is the end-of-object element, which is present at the end of
every BSON object.
*/
bool eoo() const {
return type() == EOO;
}
/** Size of the element.
@param maxLen If maxLen is specified, don't scan more than maxLen bytes to calculate size.
*/
int size( int maxLen = -1 ) const;
/** Wrap this element up as a singleton object. */
BSONObj wrap() const;
/** field name of the element. e.g., for
name : "Joe"
"name" is the fieldname
*/
const char * fieldName() const {
if ( eoo() ) return ""; // no fieldname for it.
return data + 1;
}
/** raw data of the element's value (so be careful). */
const char * value() const {
return (data + fieldNameSize() + 1);
}
/** size in bytes of the element's value (when applicable). */
int valuesize() const {
return size() - fieldNameSize() - 1;
}
bool isBoolean() const {
return type() == Bool;
}
/** @return value of a boolean element.
You must assure element is a boolean before
calling. */
bool boolean() const {
return *value() ? true : false;
}
/** Retrieve a java style date value from the element.
Ensure element is of type Date before calling.
*/
unsigned long long date() const {
return *reinterpret_cast< const unsigned long long* >( value() );
}
/** Convert the value to boolean, regardless of its type, in a javascript-like fashion
(i.e., treat zero and null as false).
*/
bool trueValue() const {
switch( type() ) {
case NumberLong:
return *reinterpret_cast< const long long* >( value() ) != 0;
case NumberDouble:
return *reinterpret_cast< const double* >( value() ) != 0;
case NumberInt:
return *reinterpret_cast< const int* >( value() ) != 0;
case Bool:
return boolean();
case jstNULL:
return false;
default:
;
}
return true;
}
/** True if element is of a numeric type. */
bool isNumber() const {
switch( type() ) {
case NumberLong:
case NumberDouble:
case NumberInt:
return true;
default:
return false;
}
}
bool isSimpleType() const {
switch( type() ){
case NumberLong:
case NumberDouble:
case NumberInt:
case String:
case Bool:
case Date:
return true;
default:
return false;
}
}
/** Return double value for this field. MUST be NumberDouble type. */
double _numberDouble() const {return *reinterpret_cast< const double* >( value() ); }
/** Return double value for this field. MUST be NumberInt type. */
int _numberInt() const {return *reinterpret_cast< const int* >( value() ); }
/** Return double value for this field. MUST be NumberLong type. */
long long _numberLong() const {return *reinterpret_cast< const long long* >( value() ); }
/** Retrieve int value for the element safely. Zero returned if not a number. */
int numberInt() const {
switch( type() ) {
case NumberDouble:
return (int) _numberDouble();
case NumberInt:
return _numberInt();
case NumberLong:
return (int) _numberLong();
default:
return 0;
}
}
/** Retrieve long value for the element safely. Zero returned if not a number. */
long long numberLong() const {
switch( type() ) {
case NumberDouble:
return (long long) _numberDouble();
case NumberInt:
return _numberInt();
case NumberLong:
return _numberLong();
default:
return 0;
}
}
/** Retrieve the numeric value of the element. If not of a numeric type, returns 0.
NOTE: casts to double, data loss may occur with large (>52 bit) NumberLong values.
*/
double numberDouble() const {
switch( type() ) {
case NumberDouble:
return _numberDouble();
case NumberInt:
return *reinterpret_cast< const int* >( value() );
case NumberLong:
return (double) *reinterpret_cast< const long long* >( value() );
default:
return 0;
}
}
/** Retrieve the numeric value of the element. If not of a numeric type, returns 0.
NOTE: casts to double, data loss may occur with large (>52 bit) NumberLong values.
*/
double number() const { return numberDouble(); }
/** Retrieve the object ID stored in the object.
You must ensure the element is of type jstOID first. */
const OID &__oid() const {
return *reinterpret_cast< const OID* >( value() );
}
/** True if element is null. */
bool isNull() const {
return type() == jstNULL;
}
/** Size (length) of a string element.
You must assure of type String first. */
int valuestrsize() const {
return *reinterpret_cast< const int* >( value() );
}
// for objects the size *includes* the size of the size field
int objsize() const {
return *reinterpret_cast< const int* >( value() );
}
/** Get a string's value. Also gives you start of the real data for an embedded object.
You must assure data is of an appropriate type first -- see also valuestrsafe().
*/
const char * valuestr() const {
return value() + 4;
}
/** Get the string value of the element. If not a string returns "". */
const char *valuestrsafe() const {
return type() == String ? valuestr() : "";
}
/** Get the string value of the element. If not a string returns "". */
string str() const { return valuestrsafe(); }
/** Get javascript code of a CodeWScope data element. */
const char * codeWScopeCode() const {
return value() + 8;
}
/** Get the scope context of a CodeWScope data element. */
const char * codeWScopeScopeData() const {
// TODO fix
return codeWScopeCode() + strlen( codeWScopeCode() ) + 1;
}
/** Get the embedded object this element holds. */
BSONObj embeddedObject() const;
/* uasserts if not an object */
BSONObj embeddedObjectUserCheck();
BSONObj codeWScopeObject() const;
string ascode() const {
switch( type() ){
case String:
case Code:
return valuestr();
case CodeWScope:
return codeWScopeCode();
default:
log() << "can't convert type: " << (int)(type()) << " to code" << endl;
}
uassert( "not code" , 0 );
return "";
}
/** Get binary data. Element must be of type BinData */
const char *binData(int& len) const {
// BinData:
assert( type() == BinData );
len = valuestrsize();
return value() + 5;
}
BinDataType binDataType() const {
// BinData:
assert( type() == BinData );
char c = (value() + 4)[0];
return (BinDataType)c;
}
/** Retrieve the regex string for a Regex element */
const char *regex() const {
assert(type() == RegEx);
return value();
}
/** returns a string that when used as a matcher, would match a super set of regex()
returns "" for complex regular expressions
used to optimize queries in some simple regex cases that start with '^'
*/
string simpleRegex() const;
/** Retrieve the regex flags (options) for a Regex element */
const char *regexFlags() const {
const char *p = regex();
return p + strlen(p) + 1;
}
/** like operator== but doesn't check the fieldname,
just the value.
*/
bool valuesEqual(const BSONElement& r) const {
switch( type() ) {
case NumberLong:
return _numberLong() == r.numberLong() && r.isNumber();
case NumberDouble:
return _numberDouble() == r.number() && r.isNumber();
case NumberInt:
return _numberInt() == r.numberInt() && r.isNumber();
default:
;
}
bool match= valuesize() == r.valuesize() &&
memcmp(value(),r.value(),valuesize()) == 0;
return match && type() == r.type();
}
/** Returns true if elements are equal. */
bool operator==(const BSONElement& r) const {
if ( strcmp(fieldName(), r.fieldName()) != 0 )
return false;
return valuesEqual(r);
}
/** Well ordered comparison.
@return <0: l0:l>r
order by type, field name, and field value.
If considerFieldName is true, pay attention to the field name.
*/
int woCompare( const BSONElement &e, bool considerFieldName = true ) const;
const char * rawdata() const {
return data;
}
int getGtLtOp() const;
/** Constructs an empty element */
BSONElement();
/** Check that data is internally consistent. */
void validate() const;
/** True if this element may contain subobjects. */
bool mayEncapsulate() const {
return type() == Object ||
type() == Array ||
type() == CodeWScope;
}
unsigned long long timestampTime() const{
unsigned long long t = ((unsigned int*)(value() + 4 ))[0];
return t * 1000;
}
unsigned int timestampInc() const{
return ((unsigned int*)(value() ))[0];
}
const char * dbrefNS() const {
uassert( "not a dbref" , type() == DBRef );
return value() + 4;
}
const OID& dbrefOID() const {
uassert( "not a dbref" , type() == DBRef );
const char * start = value();
start += 4 + *reinterpret_cast< const int* >( start );
return *reinterpret_cast< const OID* >( start );
}
protected:
// If maxLen is specified, don't scan more than maxLen bytes.
BSONElement(const char *d, int maxLen = -1) : data(d) {
fieldNameSize_ = -1;
if ( eoo() )
fieldNameSize_ = 0;
else {
if ( maxLen != -1 ) {
int size = strnlen( fieldName(), maxLen - 1 );
massert( "Invalid field name", size != -1 );
fieldNameSize_ = size + 1;
}
}
totalSize = -1;
}
private:
const char *data;
mutable int fieldNameSize_; // cached value
int fieldNameSize() const {
if ( fieldNameSize_ == -1 )
fieldNameSize_ = strlen( fieldName() ) + 1;
return fieldNameSize_;
}
mutable int totalSize; /* caches the computed size */
};
/* l and r MUST have same type when called: check that first. */
int compareElementValues(const BSONElement& l, const BSONElement& r);
int getGtLtOp(const BSONElement& e);
/* compare values with type check.
note: as is now, not smart about int/double comingling. TODO
*/
inline int compareValues(const BSONElement& l, const BSONElement& r)
{
int x = (int) l.type() - (int) r.type();
if( x ) return x;
return compareElementValues(l,r);
}
struct BSONElementCmpWithoutField {
bool operator()( const BSONElement &l, const BSONElement &r ) const {
return l.woCompare( r, false );
}
};
typedef set< BSONElement, BSONElementCmpWithoutField > BSONElementSet;
/**
C++ representation of a "BSON" object -- that is, an extended JSON-style
object in a binary representation.
Note that BSONObj's have a smart pointer capability built in -- so you can
pass them around by value. The reference counts used to implement this
do not use locking, so copying and destroying BSONObj's are not thread-safe
operations.
BSON object format:
\code
{}* EOO
totalSize includes itself.
Data:
Bool:
EOO: nothing follows
Undefined: nothing follows
OID: an OID object
NumberDouble:
NumberInt:
String:
Date: <8bytes>
Regex:
Object: a nested object, leading with its entire size, which terminates with EOO.
Array: same as object
DBRef:
DBRef: a database reference: basically a collection name plus an Object ID
BinData:
Code: a function (not a closure): same format as String.
Symbol: a language symbol (say a python symbol). same format as String.
Code With Scope: