diff options
author | Kevin Matulef <matulef@10gen.com> | 2012-10-15 16:12:18 -0400 |
---|---|---|
committer | Kevin Matulef <matulef@10gen.com> | 2012-10-15 16:22:13 -0400 |
commit | 5a28905a8359f336aaa9c6acffe96448ab88e2a1 (patch) | |
tree | f9631cd581539ce3afe48c191782ce476198ea5d /src/mongo/db/keypattern.cpp | |
parent | 732f351f4699758ed30d0a2c7e74969b03004d1a (diff) | |
download | mongo-5a28905a8359f336aaa9c6acffe96448ab88e2a1.tar.gz |
SERVER-2001 calculate query bounds using more general key expressions
In sharding, given a key expression like {a : 1} or {a : -1} we
must translate a query to a set of bounds to figure out which shards
are relevant. This patch amends the keyBounds calculation function
so that patterns which start with "hashed" fields calculate the
right bounds.
Diffstat (limited to 'src/mongo/db/keypattern.cpp')
-rw-r--r-- | src/mongo/db/keypattern.cpp | 119 |
1 files changed, 88 insertions, 31 deletions
diff --git a/src/mongo/db/keypattern.cpp b/src/mongo/db/keypattern.cpp index 93b4ab050fc..1f3fd6233ec 100644 --- a/src/mongo/db/keypattern.cpp +++ b/src/mongo/db/keypattern.cpp @@ -47,64 +47,78 @@ namespace mongo { return false; } + typedef vector<pair<BSONObj,BSONObj> >::const_iterator BoundListIter; + BoundList KeyPattern::keyBounds( const FieldRangeSet& queryConstraints ) const { + // To construct our bounds we will generate intervals based on constraints for + // the first field, then compound intervals based on constraints for the first + // 2 fields, then compound intervals for the first 3 fields, etc. + // As we loop through the fields, we start generating new intervals that will later + // get extended in another iteration of the loop. We define these partially constructed + // intervals using pairs of BSONObjBuilders (shared_ptrs, since after one iteration of the + // loop they still must exist outside their scope). typedef vector< pair< shared_ptr<BSONObjBuilder> , shared_ptr<BSONObjBuilder> > > BoundBuilders; BoundBuilders builders; builders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) ); BSONObjIterator i( _pattern ); - // until equalityOnly is false, we are just dealing with equality (no range or $in querys). + // until equalityOnly is false, we are just dealing with equality (no range or $in queries). bool equalityOnly = true; while( i.more() ) { BSONElement e = i.next(); + + // get the relevant intervals for this field, but we may have to transform the + // list of what's relevant according to the expression for this field const FieldRange &fr = queryConstraints.range( e.fieldName() ); - int number = (int) e.number(); // returns 0.0 if not numeric - bool forward = ( number >= 0 ); + const vector<FieldInterval> &oldIntervals = fr.intervals(); + BoundList fieldBounds = _transformFieldBounds( oldIntervals , e ); + if ( equalityOnly ) { - if ( fr.equality() ) { - for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) { - j->first->appendAs( fr.min(), "" ); - j->second->appendAs( fr.min(), "" ); + if ( fieldBounds.size() == 1 && + ( fieldBounds.front().first == fieldBounds.front().second ) ){ + // this field is only a single point-interval + BoundBuilders::const_iterator j; + for( j = builders.begin(); j != builders.end(); ++j ) { + j->first->appendElements( fieldBounds.front().first ); + j->second->appendElements( fieldBounds.front().first ); } } else { + // This clause is the first to generate more than a single point. + // We only execute this clause once. After that, we simplify the bound + // extensions to prevent combinatorial explosion. equalityOnly = false; BoundBuilders newBuilders; - const vector<FieldInterval> &intervals = fr.intervals(); - for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) { + BoundBuilders::const_iterator i; + for( i = builders.begin(); i != builders.end(); ++i ) { BSONObj first = i->first->obj(); BSONObj second = i->second->obj(); - if ( forward ) { - for( vector<FieldInterval>::const_iterator j = intervals.begin(); j != intervals.end(); ++j ) { - uassert( 16449, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < MAX_IN_COMBINATIONS ); - newBuilders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) ); - newBuilders.back().first->appendElements( first ); - newBuilders.back().second->appendElements( second ); - newBuilders.back().first->appendAs( j->_lower._bound, "" ); - newBuilders.back().second->appendAs( j->_upper._bound, "" ); - } - } - else { - for( vector<FieldInterval>::const_reverse_iterator j = intervals.rbegin(); j != intervals.rend(); ++j ) { - uassert( 16450, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < MAX_IN_COMBINATIONS ); - newBuilders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) ); - newBuilders.back().first->appendElements( first ); - newBuilders.back().second->appendElements( second ); - newBuilders.back().first->appendAs( j->_upper._bound, "" ); - newBuilders.back().second->appendAs( j->_lower._bound, "" ); - } + for(BoundListIter j = fieldBounds.begin(); j != fieldBounds.end(); ++j ) { + uassert( 16452, + "combinatorial limit of $in partitioning of results exceeded" , + newBuilders.size() < MAX_IN_COMBINATIONS ); + newBuilders.push_back( + make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), + shared_ptr<BSONObjBuilder>( new BSONObjBuilder()))); + newBuilders.back().first->appendElements( first ); + newBuilders.back().second->appendElements( second ); + newBuilders.back().first->appendElements( j->first ); + newBuilders.back().second->appendElements( j->second ); } } builders = newBuilders; } } else { - for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) { - j->first->appendAs( forward ? fr.min() : fr.max(), "" ); - j->second->appendAs( forward ? fr.max() : fr.min(), "" ); + // if we've already generated a range or multiple point-intervals + // just extend what we've generated with min/max bounds for this field + BoundBuilders::const_iterator j; + for( j = builders.begin(); j != builders.end(); ++j ) { + j->first->appendElements( fieldBounds.front().first ); + j->second->appendElements( fieldBounds.back().second ); } } } @@ -114,4 +128,47 @@ namespace mongo { return ret; } + BoundList KeyPattern::_transformFieldBounds( const vector<FieldInterval>& oldIntervals , + const BSONElement& field ) const { + + BoundList ret; + vector<FieldInterval>::const_iterator i; + for( i = oldIntervals.begin(); i != oldIntervals.end(); ++i ) { + if ( isAscending( field ) ){ + // straightforward map [a,b] --> [a,b] + ret.push_back( make_pair( BSON( field.fieldName() << i->_lower._bound ) , + BSON( field.fieldName() << i->_upper._bound ) ) ); + } else if ( isDescending( field ) ) { + // reverse [a,b] --> [b,a] + ret.push_back( make_pair( BSON( field.fieldName() << i->_upper._bound ) , + BSON( field.fieldName() << i->_lower._bound ) ) ); + } else if ( isHashed( field ) ){ + if ( i->equality() ) { + // hash [a,a] --> [hash(a),hash(a)] + long long int h = BSONElementHasher::hash64( i->_lower._bound , + BSONElementHasher::DEFAULT_HASH_SEED ); + ret.push_back( make_pair( BSON( field.fieldName() << h ) , + BSON( field.fieldName() << h ) ) ); + } else { + // if it's a range interval and this field is hashed, just generate one + // big interval from MinKey to MaxKey, since these vals could lie anywhere + ret.clear(); + ret.push_back( make_pair( BSON( field.fieldName() << MINKEY ) , + BSON( field.fieldName() << MAXKEY ) ) ); + break; + } + } + } + + if ( isDescending( field ) ) { + // now order is [ [2,1], [4,3] , [6,5]....[n,n-1] ]. Reverse to get decreasing order. + reverse( ret.begin() , ret.end() ); + } else if ( isHashed( field ) ){ + // [ hash(a) , hash(b) , hash(c) ...] no longer in order, so sort before returning + sort( ret.begin() , ret.end() ); + } + + return ret; + } + } // namespace mongo |