diff options
author | Mathias Stearn <mathias@10gen.com> | 2012-11-29 14:54:48 -0500 |
---|---|---|
committer | Mathias Stearn <mathias@10gen.com> | 2012-12-10 18:54:04 -0500 |
commit | fefb4334afe40664438668a289c6daed6813b3c3 (patch) | |
tree | a44b9d8f6fd94122c551647382a17173bfdaa474 /src | |
parent | 46bdfdaf6cd175e20b3225ea227d5803fb8fd1da (diff) | |
download | mongo-fefb4334afe40664438668a289c6daed6813b3c3.tar.gz |
Add at least minimal support for all types to agg
Minimal support means conversion to/from BSON, comparison and hashing.
This means that they can be passed through the pipeline correctly, used
in $sort, and used in _id expressions in $group.
SERVER-4608 - Binary pass through
SERVER-5718 - Code/CodeWScope pass through
SERVER-6470 - Don't convert Regex to String
SERVER-7185 - Symbol support
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/pipeline/document.h | 5 | ||||
-rwxr-xr-x | src/mongo/db/pipeline/document_source_group.cpp | 46 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/pipeline/value.cpp | 389 | ||||
-rw-r--r-- | src/mongo/db/pipeline/value.h | 67 | ||||
-rw-r--r-- | src/mongo/db/pipeline/value_internal.h | 87 | ||||
-rw-r--r-- | src/mongo/dbtests/documentsourcetests.cpp | 2 | ||||
-rw-r--r-- | src/mongo/dbtests/documenttests.cpp | 123 | ||||
-rw-r--r-- | src/mongo/util/intrusive_counter.cpp | 3 |
9 files changed, 484 insertions, 253 deletions
diff --git a/src/mongo/db/pipeline/document.h b/src/mongo/db/pipeline/document.h index dbf158edcfb..bfae94badd9 100644 --- a/src/mongo/db/pipeline/document.h +++ b/src/mongo/db/pipeline/document.h @@ -109,7 +109,10 @@ namespace mongo { */ static int compare(const Document& lhs, const Document& rhs); - string toString() const; // TODO support streams + string toString() const; + + friend + ostream& operator << (ostream& out, const Document& doc) { return out << doc.toString(); } /** Calculate a hash value. * diff --git a/src/mongo/db/pipeline/document_source_group.cpp b/src/mongo/db/pipeline/document_source_group.cpp index 1fca0cd9467..5ac8d83345b 100755 --- a/src/mongo/db/pipeline/document_source_group.cpp +++ b/src/mongo/db/pipeline/document_source_group.cpp @@ -194,47 +194,17 @@ namespace mongo { else if (groupType == String) { string groupString(groupField.str()); const char *pGroupString = groupString.c_str(); - if ((groupString.length() == 0) || - (pGroupString[0] != '$')) - goto StringConstantId; - - string pathString( - Expression::removeFieldPrefix(groupString)); - intrusive_ptr<ExpressionFieldPath> pFieldPath( - ExpressionFieldPath::create(pathString)); - pGroup->setIdExpression(pFieldPath); - idSet = true; - } - else { - /* pick out the constant types that are allowed */ - switch(groupType) { - case NumberDouble: - case String: - case Object: - case Array: - case jstOID: - case Bool: - case Date: - case NumberInt: - case Timestamp: - case NumberLong: - case jstNULL: - StringConstantId: // from string case above - { - Value pValue( - Value::createFromBsonElement(&groupField)); - intrusive_ptr<ExpressionConstant> pConstant( - ExpressionConstant::create(pValue)); - pGroup->setIdExpression(pConstant); + if (pGroupString[0] == '$') { + string pathString = Expression::removeFieldPrefix(groupString); + pGroup->setIdExpression(ExpressionFieldPath::create(pathString)); idSet = true; - break; } + } - default: - uassert(15949, str::stream() << - "a group's _id may not include fields of BSON type " << groupType, - false); - } + if (!idSet) { + // constant id - single group + pGroup->setIdExpression(ExpressionConstant::create(Value(groupField))); + idSet = true; } } else { diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index cd7a0407808..2c64159af53 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -343,12 +343,17 @@ namespace mongo { Value pValue(vpOperand[i]->evaluate(pDocument)); BSONType valueType = pValue.getType(); + // leaving explicit checks for now since these were supported in alpha releases uassert(16415, "$add does not support dates", valueType != Date); uassert(16416, "$add does not support strings", valueType != String); - totalType = Value::getWidestNumeric(totalType, pValue.getType()); + totalType = Value::getWidestNumeric(totalType, valueType); + + uassert(16554, "$add only supports numeric types", + totalType != Undefined); + doubleTotal += pValue.coerceToDouble(); longTotal += pValue.coerceToLong(); } @@ -1072,7 +1077,7 @@ namespace mongo { /* Don't add non-existent values (note: different from NULL); this is consistent with existing selection syntax which doesn't - force the appearnance of non-existent fields. + force the appearance of non-existent fields. */ // TODO make missing distinct from Undefined if (pValue.getType() != Undefined) @@ -1845,6 +1850,9 @@ namespace mongo { uassert(16375, "$multiply does not support dates", pValue.getType() != Date); productType = Value::getWidestNumeric(productType, pValue.getType()); + uassert(16555, "$mutiply only supports numeric types", + productType != Undefined); + doubleProduct *= pValue.coerceToDouble(); longProduct *= pValue.coerceToLong(); } @@ -2408,6 +2416,9 @@ namespace mongo { "$subtract does not support dates", pLeft.getType() != Date && pRight.getType() != Date); + uassert(16556, "$subtract only supports numeric types", + productType != Undefined); + if (productType == NumberDouble) { double right = pRight.coerceToDouble(); double left = pLeft.coerceToDouble(); diff --git a/src/mongo/db/pipeline/value.cpp b/src/mongo/db/pipeline/value.cpp index 65afae9d450..8e4cb838e1c 100644 --- a/src/mongo/db/pipeline/value.cpp +++ b/src/mongo/db/pipeline/value.cpp @@ -28,37 +28,42 @@ namespace mongo { using namespace mongoutils; - void ValueStorage::putString(StringData s) { - const size_t sizeWithNUL = s.size() + 1; - if (sizeWithNUL <= sizeof(shortStrStorage)) { + void ValueStorage::putString(const StringData& s) { + // Note: this also stores data portion of BinData + const size_t sizeNoNUL = s.size(); + if (sizeNoNUL <= sizeof(shortStrStorage)) { shortStr = true; shortStrSize = s.size(); - s.copyTo( shortStrStorage, true ); + s.copyTo(shortStrStorage, false); // no NUL + + // All memory is zeroed before this is called. + // Note this may be past end of shortStrStorage and into nulTerminator + dassert(shortStrStorage[sizeNoNUL] == '\0'); } else { - intrusive_ptr<const RCString> rcs = RCString::create(s); - fassert(16492, rcs); - genericRCPtr = rcs.get(); - intrusive_ptr_add_ref(genericRCPtr); - refCounter = true; + putRefCountable(RCString::create(s)); } } void ValueStorage::putDocument(const Document& d) { - genericRCPtr = d._storage.get(); - - if (genericRCPtr) { // NULL here means empty document - intrusive_ptr_add_ref(genericRCPtr); - refCounter = true; - } + putRefCountable(d._storage); } void ValueStorage::putVector(const RCVector* vec) { fassert(16485, vec); + putRefCountable(vec); + } + + void ValueStorage::putRegEx(const BSONRegEx& re) { + const size_t patternLen = re.pattern.size(); + const size_t flagsLen = re.flags.size(); + const size_t totalLen = patternLen + 1/*middle NUL*/ + flagsLen; - genericRCPtr = vec; - intrusive_ptr_add_ref(genericRCPtr); - refCounter = true; + // Need to copy since putString doesn't support scatter-gather. + boost::scoped_array<char> buf (new char[totalLen]); + re.pattern.copyTo(buf.get(), true); + re.flags.copyTo(buf.get() + patternLen + 1, false); // no NUL + putString(StringData(buf.get(), totalLen)); } Document ValueStorage::getDocument() const { @@ -70,8 +75,12 @@ namespace mongo { return Document(documentPtr); } + // not in header because document is fwd declared + Value::Value(const BSONObj& obj) : _storage(Object, Document(obj)) {} + Value::Value(BSONType theType): _storage(theType) { - switch(getType()) { + switch(theType) { + case EOO: case Undefined: case jstNULL: case Object: // empty @@ -119,11 +128,21 @@ namespace mongo { } Value::Value(const BSONElement& elem) : _storage(elem.type()) { - switch(getType()) { + switch(elem.type()) { + // These are all type-only, no data + case EOO: + case MinKey: + case MaxKey: + case Undefined: + case jstNULL: + break; + case NumberDouble: _storage.doubleValue = elem.Double(); break; + case Code: + case Symbol: case String: _storage.putString(StringData(elem.valuestr(), elem.valuestrsize()-1)); break; @@ -156,10 +175,10 @@ namespace mongo { _storage.dateValue = static_cast<long long>(elem.date().millis); break; - case RegEx: - _storage.putString(elem.regex()); - // TODO elem.regexFlags(); + case RegEx: { + _storage.putRegEx(BSONRegEx(elem.regex(), elem.regexFlags())); break; + } case NumberInt: _storage.intValue = elem.numberInt(); @@ -174,22 +193,21 @@ namespace mongo { _storage.longValue = elem.numberLong(); break; - case Undefined: - case jstNULL: + case CodeWScope: { + StringData code (elem.codeWScopeCode(), elem.codeWScopeCodeLen()-1); + _storage.putCodeWScope(BSONCodeWScope(code, elem.codeWScopeObject())); break; + } - case BinData: - case Symbol: - case CodeWScope: + case BinData: { + int len; + const char* data = elem.binData(len); + _storage.putBinData(BSONBinData(data, len, elem.binDataType())); + break; + } - /* these shouldn't happen in this context */ - case MinKey: - case EOO: case DBRef: - case Code: - case MaxKey: - uassert(16002, str::stream() << - "can't create Value of BSON type " << typeName(getType()), false); + _storage.putDBRef(BSONDBRef(elem.dbrefNS(), elem.dbrefOID())); break; } } @@ -246,6 +264,9 @@ namespace mongo { return builder.builder(); switch(val.getType()) { + case EOO: return builder.builder(); // nothing appended + case MinKey: return builder << MINKEY; + case MaxKey: return builder << MAXKEY; case jstNULL: return builder << BSONNULL; case Undefined: return builder << BSONUndefined; case jstOID: return builder << val.getOid(); @@ -257,6 +278,21 @@ namespace mongo { case Date: return builder << Date_t(val.getDate()); case Timestamp: return builder << val.getTimestamp(); case Object: return builder << val.getDocument(); + case Symbol: return builder << BSONSymbol(val.getStringData()); + case Code: return builder << BSONCode(val.getStringData()); + case RegEx: return builder << BSONRegEx(val.getRegex(), val.getRegexFlags()); + + case DBRef: + return builder << BSONDBRef(val._storage.getDBRef()->ns, val._storage.getDBRef()->oid); + + case BinData: + return builder << BSONBinData(val.getStringData().rawData(), // looking for void* + val.getStringData().size(), + val._storage.binDataType()); + + case CodeWScope: + return builder << BSONCodeWScope(val._storage.getCodeWScope()->code, + val._storage.getCodeWScope()->scope); case Array: { const vector<Value>& array = val.getArray(); @@ -268,27 +304,12 @@ namespace mongo { arrayBuilder.doneFast(); return builder.builder(); } - - - // TODO: these need to not be appended as strings SERVER-6470 - case RegEx: return builder << val.getRegex(); - case Symbol: return builder << val.getSymbol(); - - /* these shouldn't appear in this context */ - case BinData: - case CodeWScope: - case MinKey: - case EOO: - case DBRef: - case Code: - case MaxKey: - verify(false); // CW TODO better message } verify(false); } void Value::addToBsonObj(BSONObjBuilder* pBuilder, StringData fieldName) const { - *pBuilder << fieldName.data() << *this; + *pBuilder << fieldName.__data() << *this; } void Value::addToBsonArray(BSONArrayBuilder* pBuilder) const { @@ -298,8 +319,16 @@ namespace mongo { } bool Value::coerceToBool() const { + if (missing()) + return false; + // TODO Unify the implementation with BSONElement::trueValue(). switch(getType()) { + case CodeWScope: + case MinKey: + case DBRef: + case Code: + case MaxKey: case String: case Object: case Array: @@ -311,6 +340,7 @@ namespace mongo { case Timestamp: return true; + case EOO: case jstNULL: case Undefined: return false; @@ -319,17 +349,8 @@ namespace mongo { case NumberInt: return _storage.intValue; case NumberLong: return _storage.longValue; case NumberDouble: return _storage.doubleValue; - - /* these shouldn't happen in this context */ - case CodeWScope: - case MinKey: - case EOO: - case DBRef: - case Code: - case MaxKey: - default: - verify(false); // CW TODO better message } + verify(false); } int Value::coerceToInt() const { @@ -347,7 +368,6 @@ namespace mongo { case Undefined: return 0; - case String: default: uassert(16003, str::stream() << "can't convert from BSON type " << typeName(getType()) << @@ -371,7 +391,6 @@ namespace mongo { case Undefined: return 0; - case String: default: uassert(16004, str::stream() << "can't convert from BSON type " << typeName(getType()) << @@ -395,7 +414,6 @@ namespace mongo { case Undefined: return 0; - case String: default: uassert(16005, str::stream() << "can't convert from BSON type " << typeName(getType()) << @@ -485,8 +503,10 @@ namespace mongo { ss << _storage.longValue; return ss.str(); + case Code: + case Symbol: case String: - return getString(); + return getStringData().toString(); case Timestamp: ss << getTimestamp().toStringPretty(); @@ -550,6 +570,9 @@ namespace mongo { } int Value::compare(const Value& rL, const Value& rR) { + // Note, this function needs to behave identically to BSON's compareElementValues(). + // Additionally, any changes here must be replicated in hash_combine(). + // TODO: remove conditional after SERVER-6571 BSONType lType = rL.missing() ? EOO : rL.getType(); BSONType rType = rR.missing() ? EOO : rR.getType(); @@ -563,13 +586,14 @@ namespace mongo { return ret; switch(lType) { - // For supported types, order is the same as in compareElementValues(). - // All unsupported types at end. + // Order of types is the same as in compareElementValues() to make it easier to verify // These are valueless types case EOO: case Undefined: case jstNULL: + case MaxKey: + case MinKey: return ret; case Bool: @@ -583,9 +607,9 @@ namespace mongo { return cmp(rL._storage.dateValue, rR._storage.dateValue); // Numbers should compare by equivalence even if different types - case NumberDouble: case NumberLong: case NumberInt: + case NumberDouble: switch (getWidestNumeric(lType, rType)) { case NumberDouble: return cmp(rL.getDouble(), rR.getDouble()); case NumberLong: return cmp(rL.getLong(), rR.getLong()); @@ -596,6 +620,8 @@ namespace mongo { case jstOID: return memcmp(rL._storage.oid, rR._storage.oid, sizeof(OID)); + case Code: + case Symbol: case String: return rL.getStringData().compare(rR.getStringData()); @@ -609,40 +635,91 @@ namespace mongo { const size_t elems = min(lArr.size(), rArr.size()); for (size_t i = 0; i < elems; i++ ) { // compare the two corresponding elements - const int cmp = Value::compare(lArr[i], rArr[i]); - if (cmp) - return cmp; // values are unequal + ret = Value::compare(lArr[i], rArr[i]); + if (ret) + return ret; // values are unequal } // if we get here we are either equal or one is prefix of the other return cmp(lArr.size(), rArr.size()); } - case RegEx: // TODO: consider flags - return rL.getRegex().compare(rR.getRegex()); + case DBRef: { + intrusive_ptr<const RCDBRef> l = rL._storage.getDBRef(); + intrusive_ptr<const RCDBRef> r = rR._storage.getDBRef(); + ret = cmp(l->ns.size(), r->ns.size()); + if (ret) + return ret; - // unsupported types - case BinData: - case Symbol: - case CodeWScope: - case MinKey: - case DBRef: - case Code: - case MaxKey: - uassert(16017, str::stream() << - "comparisons of values of BSON type " << typeName(lType) << - " are not supported", false); - } // switch(lType) + return l->oid.compare(r->oid); + } + + case BinData: { + ret = cmp(rL.getStringData().size(), rR.getStringData().size()); + if (ret) + return ret; + + // Need to compare as an unsigned char rather than enum since BSON uses memcmp + ret = cmp(rL._storage.binSubType, rR._storage.binSubType); + if (ret) + return ret; + + return rL.getStringData().compare(rR.getStringData()); + } + + case RegEx: // same as String in this impl but keeping order same as compareElementValues + return rL.getStringData().compare(rR.getStringData()); + + case CodeWScope: { + // This case crazy, but identical to how they are compared in BSON (SERVER-7804) + + intrusive_ptr<const RCCodeWScope> l = rL._storage.getCodeWScope(); + intrusive_ptr<const RCCodeWScope> r = rR._storage.getCodeWScope(); + + // This triggers two bugs in codeWScope. + // Since this is a very rare case I'm not handling it here. + uassert(16557, "can't compare CodeWScope values containing a NUL byte in the code.", + strlen(l->code.c_str()) == l->code.size() + && strlen(r->code.c_str()) == r->code.size()); + + ret = l->code.compare(r->code); + if (ret) + return ret; + // SERVER-7804 + return strcmp(l->scope.objdata(), r->scope.objdata()); + } + } verify(false); } void Value::hash_combine(size_t &seed) const { // TODO: remove conditional after SERVER-6571 - if (missing()) { - return; // same as Undefined - } - switch(getType()) { + BSONType type = missing() ? EOO : getType(); + + boost::hash_combine(seed, canonicalizeBSONType(type)); + + switch (type) { + // Order of types is the same as in Value::compare() and compareElementValues(). + + // These are valueless types + case EOO: + case Undefined: + case jstNULL: + case MaxKey: + case MinKey: + return; + + case Bool: + boost::hash_combine(seed, getBool()); + break; + + case Timestamp: + case Date: + BOOST_STATIC_ASSERT(sizeof(_storage.dateValue) == sizeof(_storage.timestampValue)); + boost::hash_combine(seed, _storage.dateValue); + break; + /* Numbers whose values are equal need to hash to the same thing as well. Note that Value::compare() promotes numeric values to @@ -656,10 +733,22 @@ namespace mongo { case NumberDouble: case NumberLong: case NumberInt: { - boost::hash_combine(seed, getDouble()); + const double dbl = getDouble(); + if (isnan(dbl)) { + boost::hash_combine(seed, numeric_limits<double>::quiet_NaN()); + } + else { + boost::hash_combine(seed, dbl); + } break; } + case jstOID: + getOid().hash_combine(seed); + break; + + case Code: + case Symbol: case String: { StringData sd = getStringData(); boost::hash_range(seed, sd.rawData(), (sd.rawData() + sd.size())); @@ -677,47 +766,34 @@ namespace mongo { break; } - case BinData: - case Symbol: - case CodeWScope: - uassert(16018, str::stream() << - "hashes of values of BSON type " << typeName(getType()) << - " are not supported", false); - break; - - case jstOID: - getOid().hash_combine(seed); - break; - - case Bool: - boost::hash_combine(seed, getBool()); - break; - - case Date: - boost::hash_combine(seed, getDate()); + case DBRef: + boost::hash_combine(seed, _storage.getDBRef()->ns); + _storage.getDBRef()->oid.hash_combine(seed); break; - case RegEx: - boost::hash_combine(seed, getRegex()); - break; - case Timestamp: - boost::hash_combine(seed, _storage.timestampValue); + case BinData: { + StringData sd = getStringData(); + boost::hash_range(seed, sd.rawData(), (sd.rawData() + sd.size())); + boost::hash_combine(seed, _storage.binDataType()); break; + } - case Undefined: - case jstNULL: + case RegEx: { + StringData sd = getStringData(); + boost::hash_range(seed, sd.rawData(), (sd.rawData() + sd.size())); break; + } - /* these shouldn't happen in this context */ - case MinKey: - case EOO: - case DBRef: - case Code: - case MaxKey: - verify(false); // CW TODO better message + case CodeWScope: { + // SERVER-7804 + const char * code = _storage.getCodeWScope()->code.c_str(); + boost::hash_range(seed, code, (code + strlen(code))); + // Not going to bother hashing scope. Too many edge cases. Will fall back to + // Value::compare when code is same, so this is ok. break; - } // switch(getType()) + } + } } BSONType Value::getWidestNumeric(BSONType lType, BSONType rType) { @@ -788,8 +864,14 @@ namespace mongo { size_t Value::getApproximateSize() const { switch(getType()) { + case Code: + case RegEx: + case Symbol: + case BinData: case String: - return sizeof(Value) + sizeof(RCString) + getStringData().size(); + return sizeof(Value) + (_storage.shortStr + ? sizeof(RCString) + _storage.getString().size() + : 0); case Object: return sizeof(Value) + getDocument()->getApproximateSize(); @@ -804,37 +886,28 @@ namespace mongo { return size; } + case CodeWScope: + return sizeof(Value) + sizeof(RCCodeWScope) + _storage.getCodeWScope()->code.size() + + _storage.getCodeWScope()->scope.objsize(); + + case DBRef: + return sizeof(Value) + sizeof(RCDBRef) + _storage.getDBRef()->ns.size(); + + // These types are always contained within the Value + case EOO: + case MinKey: + case MaxKey: case NumberDouble: - case BinData: case jstOID: case Bool: case Date: - case RegEx: - case Symbol: - case CodeWScope: case NumberInt: case Timestamp: case NumberLong: case jstNULL: case Undefined: return sizeof(Value); - - /* these shouldn't happen in this context */ - case MinKey: - case EOO: - case DBRef: - case Code: - case MaxKey: - verify(false); // CW TODO better message } - - /* - We shouldn't get here. In order to make the implementor think about - these cases, they are all listed explicitly, above. The compiler - should complain if they aren't all listed, because there's no - default. However, not all the compilers seem to do that. Therefore, - this final catch-all is here. - */ verify(false); } @@ -849,17 +922,21 @@ namespace mongo { if (val.missing()) return out << "MISSING"; switch(val.getType()) { + case EOO: return out << "MISSING"; + case MinKey: return out << "MinKey"; + case MaxKey: return out << "MaxKey"; case jstOID: return out << val.getOid(); case String: return out << '"' << val.getString() << '"'; - case RegEx: return out << '/' << val.getRegex() << '/'; - case Symbol: return out << val.getSymbol(); + case RegEx: return out << '/' << val.getRegex() << '/' << val.getRegexFlags(); + case Symbol: return out << "Symbol(\"" << val.getSymbol() << "\")"; + case Code: return out << "Code(\"" << val.getCode() << "\")"; case Bool: return out << (val.getBool() ? "true" : "false"); case NumberDouble: return out << val.getDouble(); case NumberLong: return out << val.getLong(); case NumberInt: return out << val.getInt(); case jstNULL: return out << "null"; case Undefined: return out << "undefined"; - case Date: return out << time_t_to_String_short(val.coerceToTimeT()); + case Date: return out << tmToISODateString(val.coerceToTm()); case Timestamp: return out << val.getTimestamp().toString(); case Object: return out << val.getDocument()->toString(); case Array: { @@ -874,20 +951,22 @@ namespace mongo { return out; } - /* these shouldn't happen in this context */ case CodeWScope: + return out << "CodeWScope(\"" << val._storage.getCodeWScope()->code << "\", " + << val._storage.getCodeWScope()->scope << ')'; + case BinData: - case MinKey: - case EOO: + return out << "BinData(" << val._storage.binDataType() << ", \"" + << toHex(val._storage.getString().rawData() + ,val._storage.getString().size()) + << "\")"; + case DBRef: - case Code: - case MaxKey: - verify(false); // CW TODO better message + return out << "DBRef(\"" << val._storage.getDBRef()->ns << "\", " + << val._storage.getDBRef()->oid << ')'; } - // Not in default case to trigger better warning if a case is missing verify(false); } - } diff --git a/src/mongo/db/pipeline/value.h b/src/mongo/db/pipeline/value.h index 5a81e99e87c..95b90a354e5 100644 --- a/src/mongo/db/pipeline/value.h +++ b/src/mongo/db/pipeline/value.h @@ -54,17 +54,31 @@ namespace mongo { */ Value(): _storage() {} // "Missing" value - explicit Value(bool value) : _storage(Bool, value) {} - explicit Value(int value) : _storage(NumberInt, value) {} - explicit Value(long long value) : _storage(NumberLong, value) {} - explicit Value(double value) : _storage(NumberDouble, value) {} - explicit Value(const OpTime& value) : _storage(Timestamp, value.asDate()) {} - explicit Value(const OID& value) : _storage(jstOID, value) {} - explicit Value(StringData value) : _storage(String, value) {} - explicit Value(const string& value) : _storage(String, StringData(value)) {} - explicit Value(const char* value) : _storage(String, StringData(value)) {} - explicit Value(const Document& doc) : _storage(Object, doc) {} - explicit Value(const vector<Value>& vec) : _storage(Array, new RCVector(vec)) {} + explicit Value(bool value) : _storage(Bool, value) {} + explicit Value(int value) : _storage(NumberInt, value) {} + explicit Value(long long value) : _storage(NumberLong, value) {} + explicit Value(double value) : _storage(NumberDouble, value) {} + explicit Value(const OpTime& value) : _storage(Timestamp, value.asDate()) {} + explicit Value(const OID& value) : _storage(jstOID, value) {} + explicit Value(const StringData& value) : _storage(String, value) {} + explicit Value(const string& value) : _storage(String, StringData(value)) {} + explicit Value(const char* value) : _storage(String, StringData(value)) {} + explicit Value(const Document& doc) : _storage(Object, doc) {} + explicit Value(const BSONObj& obj);// : _storage(Object, Document(obj)) {} // in cpp + explicit Value(const vector<Value>& vec) : _storage(Array, new RCVector(vec)) {} + explicit Value(const BSONBinData& bd) : _storage(BinData, bd) {} + explicit Value(const BSONRegEx& re) : _storage(RegEx, re) {} + explicit Value(const BSONCodeWScope& cws) : _storage(CodeWScope, cws) {} + explicit Value(const BSONDBRef& dbref) : _storage(DBRef, dbref) {} + explicit Value(const BSONSymbol& sym) : _storage(Symbol, sym.symbol) {} + explicit Value(const BSONCode& code) : _storage(Code, code.code) {} + explicit Value(const NullLabeler&) : _storage(jstNULL) {} // BSONNull + explicit Value(const UndefinedLabeler&) : _storage(Undefined) {} // BSONUndefined + explicit Value(const MinKeyLabeler&) : _storage(MinKey) {} // MINKEY + explicit Value(const MaxKeyLabeler&) : _storage(MaxKey) {} // MAXKEY + explicit Value(const Date_t& date) + : _storage(Date, static_cast<long long>(date.millis)) // millis really signed + {} /** Creates an empty or zero value of specified type. * This is currently the only way to create Undefined or Null Values. @@ -100,14 +114,15 @@ namespace mongo { */ double getDouble() const; string getString() const; - StringData getStringData() const; // May contain embedded NUL bytes Document getDocument() const; OID getOid() const; bool getBool() const; long long getDate() const; // in milliseconds OpTime getTimestamp() const; - string getRegex() const; + const char* getRegex() const; + const char* getRegexFlags() const; string getSymbol() const; + string getCode() const; int getInt() const; long long getLong() const; const vector<Value>& getArray() const { return _storage.getArray(); } @@ -217,6 +232,9 @@ namespace mongo { template <typename InvalidArgumentType> explicit Value(const InvalidArgumentType& invalidArgument); + // does no type checking + StringData getStringData() const; // May contain embedded NUL bytes + ValueStorage _storage; friend class MutableValue; // gets and sets _storage.genericRCPtr }; @@ -245,14 +263,12 @@ namespace mongo { } inline StringData Value::getStringData() const { - verify(getType() == String); return _storage.getString(); } inline string Value::getString() const { verify(getType() == String); - StringData sd = _storage.getString(); - return sd.toString(); + return _storage.getString().toString(); } inline OID Value::getOid() const { @@ -275,16 +291,25 @@ namespace mongo { return _storage.timestampValue; } - inline string Value::getRegex() const { + inline const char* Value::getRegex() const { verify(getType() == RegEx); - StringData sd = _storage.getString(); - return sd.toString(); + return _storage.getString().rawData(); // this is known to be NUL terminated + } + inline const char* Value::getRegexFlags() const { + verify(getType() == RegEx); + const char* pattern = _storage.getString().rawData(); // this is known to be NUL terminated + const char* flags = pattern + strlen(pattern) + 1; // first byte after pattern's NUL + dassert(flags + strlen(flags) == pattern + _storage.getString().size()); + return flags; } inline string Value::getSymbol() const { verify(getType() == Symbol); - StringData sd = _storage.getString(); - return sd.toString(); + return _storage.getString().toString(); + } + inline string Value::getCode() const { + verify(getType() == Code); + return _storage.getString().toString(); } inline int Value::getInt() const { diff --git a/src/mongo/db/pipeline/value_internal.h b/src/mongo/db/pipeline/value_internal.h index 239eb335f7c..e9176c8394e 100644 --- a/src/mongo/db/pipeline/value_internal.h +++ b/src/mongo/db/pipeline/value_internal.h @@ -17,7 +17,9 @@ #pragma once #include <algorithm> +#include "bson/bsonobj.h" #include "bson/bsontypes.h" +#include "bson/bsonmisc.h" #include "bson/oid.h" #include "util/intrusive_counter.h" #include "util/optime.h" @@ -37,21 +39,42 @@ namespace mongo { vector<Value> vec; }; + class RCCodeWScope : public RefCountable { + public: + RCCodeWScope(const string& str, BSONObj obj) :code(str), scope(obj.getOwned()) {} + const string code; + const BSONObj scope; // Not worth converting to Document for now + }; + + class RCDBRef : public RefCountable { + public: + RCDBRef(const string& str, const OID& o) :ns(str), oid(o) {} + const string ns; + const OID oid; + }; + #pragma pack(1) class ValueStorage { public: + // Note: it is important the memory is zeroed out (by calling zero()) at the start of every + // constructor. Much code relies on every byte being predictably initialized to zero. + // This is a "missing" Value ValueStorage() { zero(); type = EOO; } - explicit ValueStorage(BSONType t) { zero(); type = t;} - ValueStorage(BSONType t, int i) { zero(); type = t; intValue = i; } - ValueStorage(BSONType t, long long l) { zero(); type = t; longValue = l; } - ValueStorage(BSONType t, double d) { zero(); type = t; doubleValue = d; } - ValueStorage(BSONType t, ReplTime r) { zero(); type = t; timestampValue = r; } - ValueStorage(BSONType t, bool b) { zero(); type = t; boolValue = b; } - ValueStorage(BSONType t, const Document& d) { zero(); type = t; putDocument(d); } - ValueStorage(BSONType t, const RCVector* a) { zero(); type = t; putVector(a); } - ValueStorage(BSONType t, StringData s) { zero(); type = t; putString(s); } + explicit ValueStorage(BSONType t) { zero(); type = t; } + ValueStorage(BSONType t, int i) { zero(); type = t; intValue = i; } + ValueStorage(BSONType t, long long l) { zero(); type = t; longValue = l; } + ValueStorage(BSONType t, double d) { zero(); type = t; doubleValue = d; } + ValueStorage(BSONType t, ReplTime r) { zero(); type = t; timestampValue = r; } + ValueStorage(BSONType t, bool b) { zero(); type = t; boolValue = b; } + ValueStorage(BSONType t, const Document& d) { zero(); type = t; putDocument(d); } + ValueStorage(BSONType t, const RCVector* a) { zero(); type = t; putVector(a); } + ValueStorage(BSONType t, const StringData& s) { zero(); type = t; putString(s); } + ValueStorage(BSONType t, const BSONBinData& bd) { zero(); type = t; putBinData(bd); } + ValueStorage(BSONType t, const BSONRegEx& re) { zero(); type = t; putRegEx(re); } + ValueStorage(BSONType t, const BSONCodeWScope& cs) { zero(); type = t; putCodeWScope(cs); } + ValueStorage(BSONType t, const BSONDBRef& dbref) { zero(); type = t; putDBRef(dbref); } ValueStorage(BSONType t, const OID& o) { zero(); @@ -91,9 +114,31 @@ namespace mongo { } /// These are only to be called during Value construction on an empty Value - void putString(StringData s); + void putString(const StringData& s); void putVector(const RCVector* v); void putDocument(const Document& d); + void putRegEx(const BSONRegEx& re); + void putBinData(const BSONBinData& bd) { + putString(StringData(static_cast<const char*>(bd.data), bd.length)); + binSubType = bd.type; + } + + void putDBRef(const BSONDBRef& dbref) { + putRefCountable(new RCDBRef(dbref.ns.toString(), dbref.oid)); + } + + void putCodeWScope(const BSONCodeWScope& cws) { + putRefCountable(new RCCodeWScope(cws.code.toString(), cws.scope)); + } + + void putRefCountable(intrusive_ptr<const RefCountable> ptr) { + genericRCPtr = ptr.get(); + + if (genericRCPtr) { + intrusive_ptr_add_ref(genericRCPtr); + refCounter = true; + } + } StringData getString() const { if (shortStr) { @@ -112,6 +157,16 @@ namespace mongo { return arrayPtr->vec; } + intrusive_ptr<const RCCodeWScope> getCodeWScope() const { + dassert(typeid(*genericRCPtr) == typeid(const RCCodeWScope)); + return static_cast<const RCCodeWScope*>(genericRCPtr); + } + + intrusive_ptr<const RCDBRef> getDBRef() const { + dassert(typeid(*genericRCPtr) == typeid(const RCDBRef)); + return static_cast<const RCDBRef*>(genericRCPtr); + } + // Document is incomplete here so this can't be inline Document getDocument() const; @@ -120,8 +175,12 @@ namespace mongo { return BSONType(type); } + BinDataType binDataType() const { + dassert(type == BinData); + return BinDataType(binSubType); + } + void zero() { - // This is important for identical() memset(this, 0, sizeof(*this)); } @@ -150,7 +209,11 @@ namespace mongo { struct { char shortStrSize; // TODO Consider moving into flags union (4 bits) - char shortStrStorage[16 - 3]; // ValueStorage is 16 bytes, 3 byte offset + char shortStrStorage[16/*total bytes*/ - 3/*offset*/ - 1/*NUL byte*/]; + union { + char nulTerminator; + unsigned char binSubType; // type always goes here even if !shortStr + }; }; struct { diff --git a/src/mongo/dbtests/documentsourcetests.cpp b/src/mongo/dbtests/documentsourcetests.cpp index 8287c9327fc..67e0ddebfd3 100644 --- a/src/mongo/dbtests/documentsourcetests.cpp +++ b/src/mongo/dbtests/documentsourcetests.cpp @@ -518,7 +518,7 @@ namespace DocumentSourceTests { }; /** $group _id is a regular expression (not supported). */ - class IdRegularExpression : public ParseErrorBase { + class IdRegularExpression : public IdConstantBase { BSONObj spec() { return fromjson( "{_id:/a/}" ); } }; diff --git a/src/mongo/dbtests/documenttests.cpp b/src/mongo/dbtests/documenttests.cpp index 6f7532e769d..145f47993ca 100644 --- a/src/mongo/dbtests/documenttests.cpp +++ b/src/mongo/dbtests/documenttests.cpp @@ -318,7 +318,83 @@ namespace DocumentTests { ASSERT( !iterator.more() ); } }; - + + class AllTypesDoc { + public: + void run() { + // These are listed in order of BSONType with some duplicates + append("minkey", MINKEY); + // EOO not valid in middle of BSONObj + append("double", 1.0); + append("c-string", "string\0after NUL"); // after NULL is ignored + append("c++", StringData("string\0after NUL", StringData::LiteralTag()).toString()); + append("StringData", StringData("string\0after NUL", StringData::LiteralTag())); + append("emptyObj", BSONObj()); + append("filledObj", BSON("a" << 1)); + append("emptyArray", BSON("" << BSONArray()).firstElement()); + append("filledArray", BSON("" << BSON_ARRAY(1 << "a")).firstElement()); + append("binData", BSONBinData("a\0b", 3, BinDataGeneral)); + append("binDataCustom", BSONBinData("a\0b", 3, bdtCustom)); + append("undefined", BSONUndefined); + append("oid", OID()); + append("true", true); + append("false", false); + append("date", jsTime()); + append("null", BSONNULL); + append("regex", BSONRegEx(".*")); + append("regexFlags", BSONRegEx(".*", "i")); + append("regexEmpty", BSONRegEx("", "")); + append("dbref", BSONDBRef("foo", OID())); + append("code", BSONCode("function() {}")); + append("codeNul", BSONCode(StringData("var nul = '\0'", StringData::LiteralTag()))); + append("symbol", BSONSymbol("foo")); + append("symbolNul", BSONSymbol(StringData("f\0o", StringData::LiteralTag()))); + append("codeWScope", BSONCodeWScope("asdf", BSONObj())); + append("codeWScopeWScope", BSONCodeWScope("asdf", BSON("one" << 1))); + append("int", 1); + append("timestamp", OpTime()); + append("long", 1LL); + append("very long", 1LL << 40); + append("maxkey", MAXKEY); + + const BSONArray arr = arrBuilder.arr(); + + // can't use append any more since arrBuilder is done + objBuilder << "mega array" << arr; + docBuilder["mega array"] = Value(values); + + const BSONObj obj = objBuilder.obj(); + const Document doc = docBuilder.freeze(); + + const BSONObj obj2 = toBson(doc); + const Document doc2 = fromBson(obj); + + // logical equality + ASSERT_EQUALS(obj, obj2); + if (Document::compare(doc, doc2)) { + PRINT(doc); + PRINT(doc2); + } + ASSERT_EQUALS(Document::compare(doc, doc2), 0); + + // binary equality + ASSERT_EQUALS(obj.objsize(), obj2.objsize()); + ASSERT_EQUALS(memcmp(obj.objdata(), obj2.objdata(), obj.objsize()), 0); + } + + template <typename T> + void append(const char* name, const T& thing) { + objBuilder << name << thing; + arrBuilder << thing; + docBuilder[name] = Value(thing); + values.push_back(Value(thing)); + } + + vector<Value> values; + MutableDocument docBuilder; + BSONObjBuilder objBuilder; + BSONArrayBuilder arrBuilder; + }; } // namespace Document namespace Value { @@ -344,7 +420,8 @@ namespace DocumentTests { Value value2 = fromBson( obj1 ); BSONObj obj2 = toBson( value2 ); ASSERT_EQUALS( obj1, obj2 ); - ASSERT( value1 == value2 ); + ASSERT_EQUALS(value1, value2); + ASSERT_EQUALS(value1.getType(), value2.getType()); } /** Int type. */ @@ -528,11 +605,9 @@ namespace DocumentTests { public: void run() { Value value = fromBson( fromjson( "{'':/abc/}" ) ); - ASSERT_EQUALS( "abc", value.getRegex() ); + ASSERT_EQUALS( string("abc"), value.getRegex() ); ASSERT_EQUALS( RegEx, value.getType() ); - if ( 0 ) { // SERVER-6470 assertRoundTrips( value ); - } } }; @@ -540,9 +615,7 @@ namespace DocumentTests { class Symbol { public: void run() { - BSONObjBuilder bob; - bob.appendSymbol( "", "FOOBAR" ); - Value value = fromBson( bob.obj() ); + Value value (BSONSymbol("FOOBAR")); ASSERT_EQUALS( "FOOBAR", value.getSymbol() ); ASSERT_EQUALS( mongo::Symbol, value.getType() ); assertRoundTrips( value ); @@ -1139,9 +1212,7 @@ namespace DocumentTests { // Regex. assertComparison( 0, fromjson( "{'':/a/}" ), fromjson( "{'':/a/}" ) ); - assertComparison( 0, fromjson( "{'':/a/}" ), - // Regex options are ignored. - fromjson( "{'':/a/i}" ) ); + assertComparison( -1, fromjson( "{'':/a/}" ), fromjson( "{'':/a/i}" ) ); assertComparison( -1, fromjson( "{'':/a/}" ), fromjson( "{'':/aa/}" ) ); // Timestamp. @@ -1149,24 +1220,27 @@ namespace DocumentTests { assertComparison( -1, OpTime( 4 ), OpTime( 1234 ) ); // Cross-type comparisons. Listed in order of canonical types. + assertComparison(-1, Value(mongo::MINKEY), Value()); + assertComparison(0, Value(), Value(mongo::EOO)); assertComparison(0, Value(), Value(mongo::Undefined)); assertComparison(-1, Value(mongo::Undefined), Value(mongo::jstNULL)); assertComparison(-1, Value(mongo::jstNULL), Value(1)); assertComparison(0, Value(1), Value(1LL)); assertComparison(0, Value(1), Value(1.0)); assertComparison(-1, Value(1), Value("string")); - // Symbol not supported (SERVER-7185) + assertComparison(0, Value("string"), Value(BSONSymbol("string"))); assertComparison(-1, Value("string"), Value(mongo::Document())); assertComparison(-1, Value(mongo::Document()), Value(mongo::Array)); - // BinData not supported (SERVER-4608) - assertComparison(-1, Value(mongo::Array), Value(mongo::OID())); + assertComparison(-1, Value(mongo::Array), Value(BSONBinData("", 0, MD5Type))); + assertComparison(-1, Value(BSONBinData("", 0, MD5Type)), Value(mongo::OID())); assertComparison(-1, Value(mongo::OID()), Value(false)); assertComparison(-1, Value(false), Value(OpTime())); - assertComparison(0, Value(OpTime()), Value::createDate(0)); - // Regex not fully supported (SERVER-6470) - // DBRef not supported - // Code not supported (SERVER-5718) - // CodeWScope not supported (SERVER-5718) + assertComparison(0, Value(OpTime()), Value(Date_t(0))); + assertComparison(-1, Value(Date_t(0)), Value(BSONRegEx(""))); + assertComparison(-1, Value(BSONRegEx("")), Value(BSONDBRef("", mongo::OID()))); + assertComparison(-1, Value(BSONDBRef("", mongo::OID())), Value(BSONCode(""))); + assertComparison(-1, Value(BSONCode("")), Value(BSONCodeWScope("", BSONObj()))); + assertComparison(-1, Value(BSONCodeWScope("", BSONObj())), Value(mongo::MAXKEY)); } private: template<class T,class U> @@ -1193,10 +1267,16 @@ namespace DocumentTests { } void assertComparison(int expectedResult, const Value& a, const Value& b) { log() << "testing " << a.toString() << " and " << b.toString() << endl; + // reflexivity + ASSERT_EQUALS(0, cmp(a, a)); + ASSERT_EQUALS(0, cmp(b, b)); + + // symmetry ASSERT_EQUALS( expectedResult, cmp( a, b ) ); ASSERT_EQUALS( -expectedResult, cmp( b, a ) ); + + // equal values must hash equally. if ( expectedResult == 0 ) { - // Equal values must hash equally. ASSERT_EQUALS( hash( a ), hash( b ) ); } @@ -1262,6 +1342,7 @@ namespace DocumentTests { add<Document::FieldIteratorEmpty>(); add<Document::FieldIteratorSingle>(); add<Document::FieldIteratorMultiple>(); + add<Document::AllTypesDoc>(); add<Value::Int>(); add<Value::Long>(); @@ -1276,9 +1357,7 @@ namespace DocumentTests { add<Value::Oid>(); add<Value::Bool>(); add<Value::Regex>(); - if ( 0 ) { add<Value::Symbol>(); - } add<Value::Undefined>(); add<Value::Null>(); add<Value::True>(); diff --git a/src/mongo/util/intrusive_counter.cpp b/src/mongo/util/intrusive_counter.cpp index 2ef51797f0e..594b26ba998 100644 --- a/src/mongo/util/intrusive_counter.cpp +++ b/src/mongo/util/intrusive_counter.cpp @@ -32,7 +32,8 @@ namespace mongo { ptr->_size = s.size(); char* stringStart = reinterpret_cast<char*>(ptr.get()) + sizeof(RCString); - s.copyTo( stringStart, true ); + s.copyTo(stringStart, true); + return ptr; } |