// matcher.cpp
/* Matcher is our boolean expression evaluator for "where" clauses */
/**
* Copyright (C) 2008 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#include "pch.h"
#include "matcher.h"
#include "../util/goodies.h"
#include "../util/startup_test.h"
#include "diskloc.h"
#include "../scripting/engine.h"
#include "db.h"
#include "queryutil.h"
#include "client.h"
#include "pdfile.h"
namespace {
inline pcrecpp::RE_Options flags2options(const char* flags) {
pcrecpp::RE_Options options;
options.set_utf8(true);
while ( flags && *flags ) {
if ( *flags == 'i' )
options.set_caseless(true);
else if ( *flags == 'm' )
options.set_multiline(true);
else if ( *flags == 'x' )
options.set_extended(true);
else if ( *flags == 's' )
options.set_dotall(true);
flags++;
}
return options;
}
}
//#define DEBUGMATCHER(x) cout << x << endl;
#define DEBUGMATCHER(x)
namespace mongo {
extern BSONObj staticNull;
class Where : boost::noncopyable {
public:
Where( const string& ns ) {
_ns = ns;
_func = 0;
_initCalled = false;
}
~Where() {
if ( _scope.get() ){
try {
_scope->execSetup( "_mongo.readOnly = false;" , "make not read only" );
}
catch( DBException& e ){
warning() << "javascript scope cleanup interrupted" << causedBy( e ) << endl;
}
}
_func = 0;
}
void init() {
if ( _initCalled )
return;
_initCalled = true;
_scope = globalScriptEngine->getPooledScope( _ns );
NamespaceString ns( _ns );
_scope->localConnect( ns.db.c_str() );
massert( 10341 , "code has to be set first!" , ! _jsCode.empty() );
_func = _scope->createFunction( _jsCode.c_str() );
_scope->execSetup( "_mongo.readOnly = true;" , "make read only" );
}
void setScope( const BSONObj& scope ) {
_jsScope = scope.copy();
}
void setCode( const string& code ) {
_jsCode = code;
}
bool exec( const BSONObj& obj ) {
init();
uassert( 10070 , "$where compile error", _func != 0 );
if ( ! _jsScope.isEmpty() ) {
_scope->init( &_jsScope );
}
_scope->setObject( "obj", const_cast< BSONObj & >( obj ) );
_scope->setBoolean( "fullObject" , true ); // this is a hack b/c fullObject used to be relevant
int err = _scope->invoke( _func , 0, &obj , 1000 * 60 , false );
if ( err == -3 ) { // INVOKE_ERROR
stringstream ss;
ss << "error on invocation of $where function:\n"
<< _scope->getError();
uassert( 10071 , ss.str(), false);
}
else if ( err != 0 ) { // ! INVOKE_SUCCESS
uassert( 10072 , "unknown error in invocation of $where function", false);
}
return _scope->getBoolean( "return" ) != 0;
}
private:
bool _initCalled;
string _ns;
BSONObj _jsScope;
string _jsCode;
auto_ptr _scope;
ScriptingFunction _func;
};
Matcher::~Matcher() {
delete _where;
_where = 0;
}
ElementMatcher::ElementMatcher( BSONElement e , int op, bool isNot )
: _toMatch( e ) , _compareOp( op ), _isNot( isNot ), _subMatcherOnPrimitives(false) {
if ( op == BSONObj::opMOD ) {
BSONObj o = e.embeddedObject();
_mod = o["0"].numberInt();
_modm = o["1"].numberInt();
uassert( 10073 , "mod can't be 0" , _mod );
}
else if ( op == BSONObj::opTYPE ) {
_type = (BSONType)(e.numberInt());
}
else if ( op == BSONObj::opELEM_MATCH ) {
BSONElement m = e;
uassert( 12517 , "$elemMatch needs an Object" , m.type() == Object );
BSONObj x = m.embeddedObject();
if ( x.firstElement().getGtLtOp() == BSONObj::Equality &&
!str::equals( x.firstElement().fieldName(), "$not" ) ) {
_subMatcher.reset( new Matcher( x ) );
_subMatcherOnPrimitives = false;
}
else {
// meant to act on primitives
_subMatcher.reset( new Matcher( BSON( "" << x ) ) );
_subMatcherOnPrimitives = true;
}
}
}
ElementMatcher::ElementMatcher( BSONElement e , int op , const BSONObj& array, bool isNot )
: _toMatch( e ) , _compareOp( op ), _isNot( isNot ), _subMatcherOnPrimitives(false) {
_myset.reset( new set() );
BSONObjIterator i( array );
while ( i.more() ) {
BSONElement ie = i.next();
if ( op == BSONObj::opALL && ie.type() == Object && ie.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ) {
shared_ptr s;
s.reset( new Matcher( ie.embeddedObject().firstElement().embeddedObjectUserCheck() ) );
_allMatchers.push_back( s );
}
else if ( ie.type() == RegEx ) {
if ( !_myregex.get() ) {
_myregex.reset( new vector< RegexMatcher >() );
}
_myregex->push_back( RegexMatcher() );
RegexMatcher &rm = _myregex->back();
rm._re.reset( new pcrecpp::RE( ie.regex(), flags2options( ie.regexFlags() ) ) );
uassert(16431, "Regular expression is too long",
rm._re->pattern().size() <= RegexMatcher::MaxPatternSize);
rm._fieldName = 0; // no need for field name
rm._regex = ie.regex();
rm._flags = ie.regexFlags();
rm._isNot = false;
bool purePrefix;
string prefix = simpleRegex(rm._regex, rm._flags, &purePrefix);
if (purePrefix)
rm._prefix = prefix;
}
else {
uassert( 15882, "$elemMatch not allowed within $in",
ie.type() != Object ||
ie.embeddedObject().firstElement().getGtLtOp() != BSONObj::opELEM_MATCH );
_myset->insert(ie);
}
}
if ( _allMatchers.size() ) {
uassert( 13020 , "with $all, can't mix $elemMatch and others" , _myset->size() == 0 && !_myregex.get());
}
}
int ElementMatcher::inverseOfNegativeCompareOp() const {
verify( negativeCompareOp() );
return _compareOp == BSONObj::NE ? BSONObj::Equality : BSONObj::opIN;
}
bool ElementMatcher::negativeCompareOpContainsNull() const {
verify( negativeCompareOp() );
return (_compareOp == BSONObj::NE && _toMatch.type() != jstNULL) ||
(_compareOp == BSONObj::NIN && _myset->count( staticNull.firstElement()) == 0 );
}
void ElementMatcher::visit( MatcherVisitor& visitor ) const {
if ( _subMatcher ) {
_subMatcher->visit( visitor );
}
for( vector >::const_iterator j = _allMatchers.begin();
j != _allMatchers.end(); ++j ) {
(*j)->visit( visitor );
}
}
MatchDetails::MatchDetails() :
_elemMatchKeyRequested() {
resetOutput();
}
void MatchDetails::resetOutput() {
_loadedRecord = false;
_elemMatchKeyFound = false;
_elemMatchKey = "";
}
string MatchDetails::toString() const {
stringstream ss;
ss << "loadedRecord: " << _loadedRecord << " ";
ss << "elemMatchKeyRequested: " << _elemMatchKeyRequested << " ";
ss << "elemMatchKey: " << ( _elemMatchKeyFound ? _elemMatchKey : "NONE" ) << " ";
return ss.str();
}
void Matcher::addRegex(const char *fieldName, const char *regex, const char *flags, bool isNot) {
RegexMatcher rm;
rm._re.reset( new pcrecpp::RE(regex, flags2options(flags)) );
uassert(16432, "Regular expression is too long",
rm._re->pattern().size() <= RegexMatcher::MaxPatternSize);
rm._fieldName = fieldName;
rm._regex = regex;
rm._flags = flags;
rm._isNot = isNot;
_regexs.push_back(rm);
if (!isNot) { //TODO something smarter
bool purePrefix;
string prefix = simpleRegex(regex, flags, &purePrefix);
if (purePrefix)
rm._prefix = prefix;
}
}
bool Matcher::addOp( const BSONElement &e, const BSONElement &fe, bool isNot, const char *& regex, const char *&flags ) {
const char *fn = fe.fieldName();
int op = fe.getGtLtOp( -1 );
if ( op == -1 ) {
if ( !isNot && fn[1] == 'r' && fn[2] == 'e' && fn[3] == 'f' && fn[4] == 0 ) {
return false; // { $ref : xxx } - treat as normal object
}
uassert( 10068 , (string)"invalid operator: " + fn , op != -1 );
}
switch ( op ) {
case BSONObj::GT:
case BSONObj::GTE:
case BSONObj::LT:
case BSONObj::LTE: {
shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
_builders.push_back( b );
b->appendAs(fe, e.fieldName());
addBasic(b->done().firstElement(), op, isNot);
break;
}
case BSONObj::NE: {
_haveNeg = true;
shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
_builders.push_back( b );
b->appendAs(fe, e.fieldName());
addBasic(b->done().firstElement(), BSONObj::NE, isNot);
break;
}
case BSONObj::opALL:
_all = true;
case BSONObj::opIN: {
uassert( 13276 , "$in needs an array" , fe.isABSONObj() );
_basics.push_back( ElementMatcher( e , op , fe.embeddedObject(), isNot ) );
BSONObjIterator i( fe.embeddedObject() );
while( i.more() ) {
if ( i.next().type() == Array ) {
_hasArray = true;
}
}
break;
}
case BSONObj::NIN:
uassert( 13277 , "$nin needs an array" , fe.isABSONObj() );
_haveNeg = true;
_basics.push_back( ElementMatcher( e , op , fe.embeddedObject(), isNot ) );
break;
case BSONObj::opMOD:
case BSONObj::opTYPE:
case BSONObj::opELEM_MATCH: {
shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
_builders.push_back( b );
b->appendAs(fe, e.fieldName());
// these are types where ElementMatcher has all the info
_basics.push_back( ElementMatcher( b->done().firstElement() , op, isNot ) );
break;
}
case BSONObj::opSIZE: {
shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
_builders.push_back( b );
b->appendAs(fe, e.fieldName());
addBasic(b->done().firstElement(), BSONObj::opSIZE, isNot);
_haveSize = true;
break;
}
case BSONObj::opEXISTS: {
shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
_builders.push_back( b );
b->appendAs(fe, e.fieldName());
addBasic(b->done().firstElement(), BSONObj::opEXISTS, isNot);
break;
}
case BSONObj::opREGEX: {
uassert( 13032, "can't use $not with $regex, use BSON regex type instead", !isNot );
if ( fe.type() == RegEx ) {
regex = fe.regex();
flags = fe.regexFlags();
}
else {
regex = fe.valuestrsafe();
}
break;
}
case BSONObj::opOPTIONS: {
uassert( 13029, "can't use $not with $options, use BSON regex type instead", !isNot );
flags = fe.valuestrsafe();
break;
}
case BSONObj::opWITHIN: {
BSONObj shapeObj = fe.embeddedObject();
BSONObjIterator argIt(shapeObj);
uassert(16515, "Empty obj for $within: " + shapeObj.toString(), argIt.more());
BSONElement elt = argIt.next();
uassert(16516, "Within must be provided a BSONObj: " + elt.toString(),
elt.isABSONObj());
BSONObj obj = elt.Obj();
if (str::equals(elt.fieldName(), "$box")) {
uassert(16615, "Malformed $box: " + obj.toString(), GeoParser::isLegacyBox(obj));
_geo.push_back(GeoMatcher::makeBoxMatcher(e.fieldName(), obj, isNot));
} else if (str::equals(elt.fieldName(), "$center")) {
uassert(16616, "Malformed $center: " + obj.toString(),
GeoParser::isLegacyCenter(obj));
_geo.push_back(GeoMatcher::makeCircleMatcher(e.fieldName(), obj, isNot));
} else if (str::equals(elt.fieldName(), "$polygon")) {
uassert(16617, "Malformed $polygon: " + obj.toString(),
GeoParser::isLegacyPolygon(obj));
_geo.push_back(GeoMatcher::makePolygonMatcher(e.fieldName(), obj, isNot));
} else {
uasserted(16529, "Couldn't pull any geometry out of $within query: "
+ obj.toString());
}
break;
}
case BSONObj::opNEAR:
case BSONObj::opGEO_INTERSECTS:
case BSONObj::opMAX_DISTANCE:
break;
default:
uassert( 10069 , (string)"BUG - can't operator for: " + fn , 0 );
}
return true;
}
void Matcher::parseExtractedClause( const BSONElement &e, list< shared_ptr< Matcher > > &matchers ) {
uassert( 13086, "$and/$or/$nor must be a nonempty array", e.type() == Array && e.embeddedObject().nFields() > 0 );
BSONObjIterator j( e.embeddedObject() );
while( j.more() ) {
BSONElement f = j.next();
uassert( 13087, "$and/$or/$nor match element must be an object", f.type() == Object );
matchers.push_back( shared_ptr< Matcher >( new Matcher( f.embeddedObject(), true ) ) );
}
}
bool Matcher::parseClause( const BSONElement &e ) {
const char *ef = e.fieldName();
if ( ef[ 0 ] != '$' )
return false;
// $and
if ( ef[ 1 ] == 'a' && ef[ 2 ] == 'n' && ef[ 3 ] == 'd' ) {
parseExtractedClause( e, _andMatchers );
return true;
}
// $or
if ( ef[ 1 ] == 'o' && ef[ 2 ] == 'r' && ef[ 3 ] == 0 ) {
parseExtractedClause( e, _orMatchers );
return true;
}
// $nor
if ( ef[ 1 ] == 'n' && ef[ 2 ] == 'o' && ef[ 3 ] == 'r' && ef[ 4 ] == 0 ) {
parseExtractedClause( e, _norMatchers );
return true;
}
// $comment
if ( ef[ 1 ] == 'c' && ef[ 2 ] == 'o' && ef[ 3 ] == 'm' && str::equals( ef , "$comment" ) ) {
return true;
}
return false;
}
bool inConstructorChain( bool printOffending=false );
// $where: function()...
NOINLINE_DECL void Matcher::parseWhere( const BSONElement &e ) {
uassert(15902 , "$where expression has an unexpected type", e.type() == String || e.type() == CodeWScope || e.type() == Code );
uassert( 10066 , "$where may only appear once in query", _where == 0 );
uassert( 10067 , "$where query, but no script engine", globalScriptEngine );
massert( 13089 , "no current client needed for $where" , haveClient() );
_where = new Where( cc().ns() );
if ( e.type() == CodeWScope ) {
_where->setCode( e.codeWScopeCode() );
_where->setScope( BSONObj( e.codeWScopeScopeDataUnsafe() ) );
}
else {
_where->setCode( e.valuestr() );
}
}
void Matcher::parseMatchExpressionElement( const BSONElement &e, bool nested ) {
uassert( 13629 , "can't have undefined in a query expression" , e.type() != Undefined );
if ( parseClause( e ) ) {
return;
}
const char *fn = e.fieldName();
if ( str::equals(fn, "$where") ) {
parseWhere(e);
return;
}
if ( e.type() == RegEx ) {
addRegex( fn, e.regex(), e.regexFlags() );
return;
}
// greater than / less than...
// e.g., e == { a : { $gt : 3 } }
// or
// { a : { $in : [1,2,3] } }
if ( e.type() == Object ) {
// support {$regex:"a|b", $options:"imx"}
const char* regex = NULL;
const char* flags = "";
// e.g., fe == { $gt : 3 }
BSONObjIterator j(e.embeddedObject());
bool isOperator = false;
while ( j.more() ) {
BSONElement fe = j.next();
const char *fn = fe.fieldName();
if ( fn[0] == '$' && fn[1] ) {
isOperator = true;
if ( fn[1] == 'n' && fn[2] == 'o' && fn[3] == 't' && fn[4] == 0 ) {
_haveNeg = true;
switch( fe.type() ) {
case Object: {
BSONObjIterator k( fe.embeddedObject() );
uassert( 13030, "$not cannot be empty", k.more() );
while( k.more() ) {
addOp( e, k.next(), true, regex, flags );
}
break;
}
case RegEx:
addRegex( e.fieldName(), fe.regex(), fe.regexFlags(), true );
break;
default:
uassert( 13031, "invalid use of $not", false );
}
}
else {
if ( !addOp( e, fe, false, regex, flags ) ) {
isOperator = false;
break;
}
}
}
else {
isOperator = false;
break;
}
}
if (regex) {
addRegex(e.fieldName(), regex, flags);
}
if ( isOperator )
return;
}
if ( e.type() == Array ) {
_hasArray = true;
}
else if( *fn == '$' ) {
if( str::equals(fn, "$atomic") || str::equals(fn, "$isolated") ) {
uassert( 14844, "$atomic specifier must be a top level field", !nested );
_atomic = e.trueValue();
return;
}
}
// normal, simple case e.g. { a : "foo" }
addBasic(e, BSONObj::Equality, false);
}
/* _jsobj - the query pattern
*/
Matcher::Matcher(const BSONObj &jsobj, bool nested) :
_where(0), _jsobj(jsobj), _haveSize(), _all(), _hasArray(0), _haveNeg(), _atomic(false) {
BSONObjIterator i(_jsobj);
while ( i.more() ) {
parseMatchExpressionElement( i.next(), nested );
}
}
Matcher::Matcher( const Matcher &docMatcher, const BSONObj &key ) :
_where(0), _constrainIndexKey( key ), _haveSize(), _all(), _hasArray(0), _haveNeg(), _atomic(false) {
// Filter out match components that will provide an incorrect result
// given a key from a single key index.
for( vector< ElementMatcher >::const_iterator i = docMatcher._basics.begin(); i != docMatcher._basics.end(); ++i ) {
if ( key.hasField( i->_toMatch.fieldName() ) ) {
switch( i->_compareOp ) {
case BSONObj::opSIZE:
case BSONObj::opALL:
case BSONObj::NE:
case BSONObj::NIN:
case BSONObj::opEXISTS: // We can't match on index in this case.
case BSONObj::opTYPE: // For $type:10 (null), a null key could be a missing
// field or a null value field.
case BSONObj::opELEM_MATCH: // $elemMatch operates on arrays and no key match
// version has been implemented.
break;
case BSONObj::opIN: {
bool inContainsArray = false;
for( set::const_iterator j = i->_myset->begin(); j != i->_myset->end(); ++j ) {
if ( j->type() == Array ) {
inContainsArray = true;
break;
}
}
// Can't match an array to its first indexed element.
if ( !i->_isNot && !inContainsArray ) {
_basics.push_back( *i );
}
break;
}
default: {
// Can't match an array to its first indexed element.
if ( !i->_isNot && i->_toMatch.type() != Array ) {
_basics.push_back( *i );
}
}
}
}
}
for( vector::const_iterator it = docMatcher._regexs.begin();
it != docMatcher._regexs.end();
++it) {
if ( !it->_isNot && key.hasField( it->_fieldName ) ) {
_regexs.push_back(*it);
}
}
// Recursively filter match components for and and or matchers.
for( list< shared_ptr< Matcher > >::const_iterator i = docMatcher._andMatchers.begin(); i != docMatcher._andMatchers.end(); ++i ) {
_andMatchers.push_back( shared_ptr< Matcher >( new Matcher( **i, key ) ) );
}
for( list< shared_ptr< Matcher > >::const_iterator i = docMatcher._orMatchers.begin(); i != docMatcher._orMatchers.end(); ++i ) {
_orMatchers.push_back( shared_ptr< Matcher >( new Matcher( **i, key ) ) );
}
}
inline bool regexMatches(const RegexMatcher& rm, const BSONElement& e) {
switch (e.type()) {
case String:
case Symbol:
if (rm._prefix.empty())
return rm._re->PartialMatch(e.valuestr());
else
return !strncmp(e.valuestr(), rm._prefix.c_str(), rm._prefix.size());
case RegEx:
return !strcmp(rm._regex, e.regex()) && !strcmp(rm._flags, e.regexFlags());
default:
return false;
}
}
inline int Matcher::valuesMatch(const BSONElement& l, const BSONElement& r, int op, const ElementMatcher& bm) const {
verify( op != BSONObj::NE && op != BSONObj::NIN && op != BSONObj::opELEM_MATCH );
if ( op == BSONObj::Equality ) {
return l.valuesEqual(r);
}
if ( op == BSONObj::opIN ) {
// { $in : [1,2,3] }
int count = bm._myset->count(l);
if ( count )
return count;
if ( bm._myregex.get() ) {
for( vector::const_iterator i = bm._myregex->begin(); i != bm._myregex->end(); ++i ) {
if ( regexMatches( *i, l ) ) {
return true;
}
}
}
}
if ( op == BSONObj::opSIZE ) {
if ( l.type() != Array )
return 0;
int count = 0;
BSONObjIterator i( l.embeddedObject() );
while( i.moreWithEOO() ) {
BSONElement e = i.next();
if ( e.eoo() )
break;
++count;
}
return count == r.number();
}
if ( op == BSONObj::opMOD ) {
if ( ! l.isNumber() )
return false;
return l.numberLong() % bm._mod == bm._modm;
}
if ( op == BSONObj::opTYPE ) {
return bm._type == l.type();
}
/* check LT, GTE, ... */
if ( l.canonicalType() != r.canonicalType() )
return false;
int c = compareElementValues(l, r);
if ( c < -1 ) c = -1;
if ( c > 1 ) c = 1;
int z = 1 << (c+1);
return (op & z);
}
int Matcher::inverseMatch(const char *fieldName, const BSONElement &toMatch, const BSONObj &obj, const ElementMatcher& bm , MatchDetails * details ) const {
int inverseRet = matchesDotted( fieldName, toMatch, obj, bm.inverseOfNegativeCompareOp(), bm , false , details );
if ( bm.negativeCompareOpContainsNull() ) {
return ( inverseRet <= 0 ) ? 1 : 0;
}
return -inverseRet;
}
int retExistsFound( const ElementMatcher &bm ) {
return bm._toMatch.trueValue() ? 1 : -1;
}
/* Check if a particular field matches.
fieldName - field to match "a.b" if we are reaching into an embedded object.
toMatch - element we want to match.
obj - database object to check against
compareOp - Equality, LT, GT, etc. This may be different than, and should supersede, the compare op in em.
isArr -
Special forms:
{ "a.b" : 3 } means obj.a.b == 3
{ a : { $lt : 3 } } means obj.a < 3
{ a : { $in : [1,2] } } means [1,2].contains(obj.a)
return value
-1 mismatch
0 missing element
1 match
*/
int Matcher::matchesDotted(const char *fieldName, const BSONElement& toMatch, const BSONObj& obj, int compareOp, const ElementMatcher& em , bool isArr, MatchDetails * details ) const {
DEBUGMATCHER( "\t matchesDotted : " << fieldName << " hasDetails: " << ( details ? "yes" : "no" ) );
if ( compareOp == BSONObj::opALL ) {
if ( em._allMatchers.size() ) {
// $all query matching will not be performed against indexes, so the field
// to match is always extracted from the full document.
BSONElement e = obj.getFieldDotted( fieldName );
// The $all/$elemMatch operator only matches arrays.
if ( e.type() != Array ) {
return -1;
}
for ( unsigned i=0; imatches( f.embeddedObject() ) ) {
found = true;
break;
}
}
if ( ! found )
return -1;
}
return 1;
}
if ( em._myset->size() == 0 && !em._myregex.get() )
return -1; // is this desired?
BSONElementSet myValues;
obj.getFieldsDotted( fieldName , myValues );
for( set< BSONElement, element_lt >::const_iterator i = em._myset->begin(); i != em._myset->end(); ++i ) {
// ignore nulls
if ( i->type() == jstNULL )
continue;
if ( myValues.count( *i ) == 0 )
return -1;
}
if ( !em._myregex.get() )
return 1;
for( vector< RegexMatcher >::const_iterator i = em._myregex->begin(); i != em._myregex->end(); ++i ) {
bool match = false;
for( BSONElementSet::const_iterator j = myValues.begin(); j != myValues.end(); ++j ) {
if ( regexMatches( *i, *j ) ) {
match = true;
break;
}
}
if ( !match )
return -1;
}
return 1;
} // end opALL
if ( compareOp == BSONObj::NE || compareOp == BSONObj::NIN ) {
return inverseMatch( fieldName, toMatch, obj, em , details );
}
BSONElement e;
bool indexed = !_constrainIndexKey.isEmpty();
if ( indexed ) {
e = obj.getFieldUsingIndexNames(fieldName, _constrainIndexKey);
if( e.eoo() ) {
cout << "obj: " << obj << endl;
cout << "fieldName: " << fieldName << endl;
cout << "_constrainIndexKey: " << _constrainIndexKey << endl;
verify( !e.eoo() );
}
}
else {
const char *p = strchr(fieldName, '.');
if ( p ) {
string left(fieldName, p-fieldName);
BSONElement se = obj.getField(left.c_str());
if ( se.eoo() )
;
else if ( se.type() != Object && se.type() != Array )
;
else {
BSONObj eo = se.embeddedObject();
return matchesDotted(p+1, toMatch, eo, compareOp, em, se.type() == Array , details );
}
}
// An array was encountered while scanning for components of the field name.
if ( isArr ) {
DEBUGMATCHER( "\t\t isArr 1 : obj : " << obj );
BSONObjIterator ai(obj);
bool found = false;
while ( ai.moreWithEOO() ) {
BSONElement z = ai.next();
if( strcmp(z.fieldName(),fieldName) == 0 ) {
if ( compareOp == BSONObj::opEXISTS ) {
return retExistsFound( em );
}
if ( compareOp != BSONObj::opELEM_MATCH &&
valuesMatch(z, toMatch, compareOp, em) ) {
// "field." array notation was used
if ( details )
details->setElemMatchKey( z.fieldName() );
return 1;
}
}
if ( z.type() == Object ) {
BSONObj eo = z.embeddedObject();
int cmp = matchesDotted(fieldName, toMatch, eo, compareOp, em, false, details );
if ( cmp > 0 ) {
if ( details )
details->setElemMatchKey( z.fieldName() );
return 1;
}
else if ( cmp < 0 ) {
found = true;
}
}
}
return found ? -1 : 0;
}
if( p ) {
// Left portion of field name was not found or wrong type.
return 0;
}
else {
e = obj.getField(fieldName);
}
}
if ( compareOp == BSONObj::opEXISTS ) {
if( e.eoo() ) {
return 0;
} else {
return retExistsFound( em );
}
}
else if ( ( e.type() != Array || indexed || compareOp == BSONObj::opSIZE ) &&
compareOp != BSONObj::opELEM_MATCH &&
valuesMatch(e, toMatch, compareOp, em ) ) {
return 1;
}
else if ( e.type() == Array && compareOp != BSONObj::opSIZE ) {
BSONObjIterator ai(e.embeddedObject());
while ( ai.more() ) {
BSONElement z = ai.next();
bool match = false;
if ( compareOp == BSONObj::opELEM_MATCH ) {
if ( em._subMatcherOnPrimitives ) {
match = em._subMatcher->matches( z.wrap( "" ) );
}
else {
match = ( z.isABSONObj() && em._subMatcher->matches( z.embeddedObject() ) );
}
}
else {
match = valuesMatch( z, toMatch, compareOp, em );
}
if ( match ) {
if ( details ) {
details->setElemMatchKey( z.fieldName() );
}
return 1;
}
}
// match an entire array to itself
if ( compareOp == BSONObj::Equality && e.woCompare( toMatch , false ) == 0 ) {
return 1;
}
if ( compareOp == BSONObj::opIN && valuesMatch( e, toMatch, compareOp, em ) ) {
return 1;
}
}
else if ( e.eoo() ) {
return 0;
}
return -1;
}
extern int dump;
/* See if an object matches the query.
*/
bool Matcher::matches(const BSONObj& jsobj , MatchDetails * details ) const {
/*
NB: if any modifications are made to how this operates, make sure
they are reflected in visitReferences(), whose implementation
parallels this.
*/
LOG(5) << "Matcher::matches() " << jsobj.toString() << endl;
/* assuming there is usually only one thing to match. if more this
could be slow sometimes. */
// check normal non-regex cases:
for ( unsigned i = 0; i < _basics.size(); i++ ) {
const ElementMatcher& bm = _basics[i];
const BSONElement& m = bm._toMatch;
// -1=mismatch. 0=missing element. 1=match
int cmp = matchesDotted(m.fieldName(), m, jsobj, bm._compareOp, bm , false , details );
if ( cmp == 0 && bm._compareOp == BSONObj::opEXISTS ) {
// If missing, match cmp is opposite of $exists spec.
cmp = -retExistsFound(bm);
}
if ( bm._isNot )
cmp = -cmp;
if ( cmp < 0 )
return false;
if ( cmp == 0 ) {
/* missing is ok iff we were looking for null */
if ( m.type() == jstNULL || m.type() == Undefined ||
( ( bm._compareOp == BSONObj::opIN || bm._compareOp == BSONObj::NIN ) && bm._myset->count( staticNull.firstElement() ) > 0 ) ) {
if ( bm.negativeCompareOp() ^ bm._isNot ) {
return false;
}
}
else {
if ( !bm._isNot ) {
return false;
}
}
}
}
for (vector::const_iterator it = _geo.begin(); it != _geo.end(); ++it) {
verify(_constrainIndexKey.isEmpty());
BSONElementSet s;
jsobj.getFieldsDotted(it->getFieldName().c_str(), s, false);
int matches = 0;
for (BSONElementSet::const_iterator i = s.begin(); i != s.end(); ++i) {
if (!i->isABSONObj()) { continue; }
Point p;
if (!GeoMatcher::pointFrom(i->Obj(), &p)) { continue; }
if (it->containsPoint(p)) { ++matches; break; }
}
if (0 == matches) { return false; }
}
for (vector::const_iterator it = _regexs.begin();
it != _regexs.end();
++it) {
BSONElementSet s;
if ( !_constrainIndexKey.isEmpty() ) {
BSONElement e = jsobj.getFieldUsingIndexNames(it->_fieldName, _constrainIndexKey);
// Should only have keys nested one deep here, for geo-indices
// TODO: future indices may nest deeper?
if( e.type() == Array ){
BSONObjIterator i( e.Obj() );
while( i.more() ){
s.insert( i.next() );
}
}
else if ( !e.eoo() )
s.insert( e );
}
else {
jsobj.getFieldsDotted( it->_fieldName, s );
}
bool match = false;
for( BSONElementSet::const_iterator i = s.begin(); i != s.end(); ++i )
if ( regexMatches(*it, *i) )
match = true;
if ( !match ^ it->_isNot )
return false;
}
if ( _andMatchers.size() > 0 ) {
for( list< shared_ptr< Matcher > >::const_iterator i = _andMatchers.begin();
i != _andMatchers.end(); ++i ) {
// SERVER-3192 Track field matched using details the same as for
// top level fields, at least for now.
if ( !(*i)->matches( jsobj, details ) ) {
return false;
}
}
}
if ( _orMatchers.size() > 0 ) {
bool match = false;
for( list< shared_ptr< Matcher > >::const_iterator i = _orMatchers.begin();
i != _orMatchers.end(); ++i ) {
// SERVER-205 don't submit details - we don't want to track field
// matched within $or
if ( (*i)->matches( jsobj ) ) {
match = true;
break;
}
}
if ( !match ) {
return false;
}
}
if ( _norMatchers.size() > 0 ) {
for( list< shared_ptr< Matcher > >::const_iterator i = _norMatchers.begin();
i != _norMatchers.end(); ++i ) {
// SERVER-205 don't submit details - we don't want to track field
// matched within $nor
if ( (*i)->matches( jsobj ) ) {
return false;
}
}
}
if ( _where ) {
return _where->exec( jsobj );
}
return true;
}
#ifdef MONGO_LATER_SERVER_4644
void Matcher::visitReferences(FieldSink *pSink) const {
// check normal non-regex cases:
for ( unsigned i = 0; i < _basics.size(); i++ ) {
const ElementMatcher& bm = _basics[i];
const BSONElement& m = bm._toMatch;
// -1=mismatch. 0=missing element. 1=match
int cmp = matchesDotted(m.fieldName(), m, jsobj, bm._compareOp, bm , false , details );
if ( cmp == 0 && bm._compareOp == BSONObj::opEXISTS ) {
// If missing, match cmp is opposite of $exists spec.
cmp = -retExistsFound(bm);
}
if ( bm._isNot )
cmp = -cmp;
if ( cmp < 0 )
return false;
if ( cmp == 0 ) {
/* missing is ok iff we were looking for null */
if ( m.type() == jstNULL || m.type() == Undefined ||
( ( bm._compareOp == BSONObj::opIN || bm._compareOp == BSONObj::NIN ) && bm._myset->count( staticNull.firstElement() ) > 0 ) ) {
if ( bm.negativeCompareOp() ^ bm._isNot ) {
return false;
}
}
else {
if ( !bm._isNot ) {
return false;
}
}
}
}
for (vector::const_iterator it = _regexs.begin();
it != _regexs.end();
++it) {
BSONElementSet s;
if ( !_constrainIndexKey.isEmpty() ) {
BSONElement e = jsobj.getFieldUsingIndexNames(it->_fieldName, _constrainIndexKey);
// Should only have keys nested one deep here, for geo-indices
// TODO: future indices may nest deeper?
if( e.type() == Array ){
BSONObjIterator i( e.Obj() );
while( i.more() ){
s.insert( i.next() );
}
}
else if ( !e.eoo() )
s.insert( e );
}
else {
jsobj.getFieldsDotted( it->_fieldName, s );
}
bool match = false;
for( BSONElementSet::const_iterator i = s.begin(); i != s.end(); ++i )
if ( regexMatches(*it, *i) )
match = true;
if ( !match ^ it->_isNot )
return false;
}
if ( _andMatchers.size() > 0 ) {
for( list< shared_ptr< Matcher > >::const_iterator i = _andMatchers.begin();
i != _andMatchers.end(); ++i ) {
// SERVER-3192 Track field matched using details the same as for
// top level fields, at least for now.
if ( !(*i)->matches( jsobj, details ) ) {
return false;
}
}
}
if ( _orMatchers.size() > 0 ) {
bool match = false;
for( list< shared_ptr< Matcher > >::const_iterator i = _orMatchers.begin();
i != _orMatchers.end(); ++i ) {
// SERVER-205 don't submit details - we don't want to track field
// matched within $or
if ( (*i)->matches( jsobj ) ) {
match = true;
break;
}
}
if ( !match ) {
return false;
}
}
if ( _norMatchers.size() > 0 ) {
for( list< shared_ptr< Matcher > >::const_iterator i = _norMatchers.begin();
i != _norMatchers.end(); ++i ) {
// SERVER-205 don't submit details - we don't want to track field
// matched within $nor
if ( (*i)->matches( jsobj ) ) {
return false;
}
}
}
if ( _where ) {
if ( _where->func == 0 ) {
u_assert( 10070 , "$where compile error", false);
return false; // didn't compile
}
if ( _where->jsScope ) {
_where->scope->init( _where->jsScope );
}
_where->scope->setObject( "obj", const_cast< BSONObj & >( jsobj ) );
_where->scope->setBoolean( "fullObject" , true ); // this is a hack b/c fullObject used to be relevant
int err = _where->scope->invoke( _where->func , 0, &jsobj , 1000 * 60 , false );
if ( err == -3 ) { // INVOKE_ERROR
stringstream ss;
ss << "error on invocation of $where function:\n"
<< _where->scope->getError();
u_assert( 10071 , ss.str(), false);
return false;
}
else if ( err != 0 ) { // ! INVOKE_SUCCESS
u_assert( 10072 , "unknown error in invocation of $where function", false);
return false;
}
return _where->scope->getBoolean( "return" ) != 0;
}
}
#endif /* MONGO_LATER_SERVER_4644 */
static void visitList( MatcherVisitor& visitor,
const list >& matchers ) {
for( list >::const_iterator i = matchers.begin(); i != matchers.end();
++i ) {
(*i)->visit( visitor );
}
}
void Matcher::visit( MatcherVisitor& visitor ) const {
visitor.visitMatcher( *this );
// Visit the _basics ElementMatchers.
for( vector::const_iterator i = _basics.begin(); i != _basics.end(); ++i ) {
visitor.visitElementMatcher( *i );
}
// Visit Matchers contained within the _basics ElementMatchers.
for( vector::const_iterator i = _basics.begin(); i != _basics.end(); ++i ) {
i->visit( visitor );
}
visitList( visitor, _andMatchers );
visitList( visitor, _orMatchers );
visitList( visitor, _norMatchers );
}
bool Matcher::keyMatch( const Matcher &docMatcher ) const {
// Quick check certain non key match cases.
if ( docMatcher._all
|| docMatcher._haveSize
|| docMatcher._hasArray // We can't match an array to its first indexed element using keymatch
|| docMatcher._haveNeg ) {
return false;
}
// Check that all match components are available in the index matcher.
if ( !( _basics.size() == docMatcher._basics.size() && _regexs.size() == docMatcher._regexs.size() && !docMatcher._where ) ) {
return false;
}
if (_geo.size() != docMatcher._geo.size()) {
return false;
}
if ( _andMatchers.size() != docMatcher._andMatchers.size() ) {
return false;
}
if ( _orMatchers.size() != docMatcher._orMatchers.size() ) {
return false;
}
if ( docMatcher._norMatchers.size() > 0 ) {
return false;
}
// Recursively check that all submatchers support key match.
{
list< shared_ptr< Matcher > >::const_iterator i = _andMatchers.begin();
list< shared_ptr< Matcher > >::const_iterator j = docMatcher._andMatchers.begin();
while( i != _andMatchers.end() ) {
if ( !(*i)->keyMatch( **j ) ) {
return false;
}
++i; ++j;
}
}
{
list< shared_ptr< Matcher > >::const_iterator i = _orMatchers.begin();
list< shared_ptr< Matcher > >::const_iterator j = docMatcher._orMatchers.begin();
while( i != _orMatchers.end() ) {
if ( !(*i)->keyMatch( **j ) ) {
return false;
}
++i; ++j;
}
}
// Nor matchers and or dedup constraints aren't created for index matchers,
// so no need to check those here.
return true;
}
/*- just for testing -- */
#pragma pack(1)
struct JSObj1 {
JSObj1() {
totsize=sizeof(JSObj1);
n = NumberDouble;
strcpy_s(nname, 5, "abcd");
N = 3.1;
s = String;
strcpy_s(sname, 7, "abcdef");
slen = 10;
strcpy_s(sval, 10, "123456789");
eoo = EOO;
}
unsigned totsize;
char n;
char nname[5];
double N;
char s;
char sname[7];
unsigned slen;
char sval[10];
char eoo;
};
#pragma pack()
struct JSObj1 js1;
#pragma pack(1)
struct JSObj2 {
JSObj2() {
totsize=sizeof(JSObj2);
s = String;
strcpy_s(sname, 7, "abcdef");
slen = 10;
strcpy_s(sval, 10, "123456789");
eoo = EOO;
}
unsigned totsize;
char s;
char sname[7];
unsigned slen;
char sval[10];
char eoo;
} js2;
struct JSUnitTest : public StartupTest {
void run() {
{
// a quick check that we are using our mongo assert macro
int x = 1;
verify( ++x );
if( x != 2 ) {
log() << "bad build - wrong assert macro" << endl;
::abort();
}
}
BSONObj j1((const char *) &js1);
BSONObj j2((const char *) &js2);
Matcher m(j2);
verify( m.matches(j1) );
js2.sval[0] = 'z';
verify( !m.matches(j1) );
Matcher n(j1);
verify( n.matches(j1) );
verify( !n.matches(j2) );
BSONObj j0 = BSONObj();
// BSONObj j0((const char *) &js0);
Matcher p(j0);
verify( p.matches(j1) );
verify( p.matches(j2) );
}
} jsunittest;
#pragma pack()
struct RXTest : public StartupTest {
RXTest() {
}
void run() {
/*
static const boost::regex e("(\\d{4}[- ]){3}\\d{4}");
static const boost::regex b(".....");
out() << "regex result: " << regex_match("hello", e) << endl;
out() << "regex result: " << regex_match("abcoo", b) << endl;
*/
int ret = 0;
pcre_config( PCRE_CONFIG_UTF8 , &ret );
massert( 10342 , "pcre not compiled with utf8 support" , ret );
pcrecpp::RE re1(")({a}h.*o");
pcrecpp::RE re("h.llo");
verify( re.FullMatch("hello") );
verify( !re1.FullMatch("hello") );
pcrecpp::RE_Options options;
options.set_utf8(true);
pcrecpp::RE part("dwi", options);
verify( part.PartialMatch("dwight") );
pcre_config( PCRE_CONFIG_UNICODE_PROPERTIES , &ret );
if ( ! ret )
cout << "warning: some regex utf8 things will not work. pcre build doesn't have --enable-unicode-properties" << endl;
}
} rxtest;
} // namespace mongo