summaryrefslogtreecommitdiff
path: root/src/mongo/db/keypattern.cpp
diff options
context:
space:
mode:
authorKevin Matulef <matulef@10gen.com>2012-10-15 16:12:18 -0400
committerKevin Matulef <matulef@10gen.com>2012-10-15 16:22:13 -0400
commit5a28905a8359f336aaa9c6acffe96448ab88e2a1 (patch)
treef9631cd581539ce3afe48c191782ce476198ea5d /src/mongo/db/keypattern.cpp
parent732f351f4699758ed30d0a2c7e74969b03004d1a (diff)
downloadmongo-5a28905a8359f336aaa9c6acffe96448ab88e2a1.tar.gz
SERVER-2001 calculate query bounds using more general key expressions
In sharding, given a key expression like {a : 1} or {a : -1} we must translate a query to a set of bounds to figure out which shards are relevant. This patch amends the keyBounds calculation function so that patterns which start with "hashed" fields calculate the right bounds.
Diffstat (limited to 'src/mongo/db/keypattern.cpp')
-rw-r--r--src/mongo/db/keypattern.cpp119
1 files changed, 88 insertions, 31 deletions
diff --git a/src/mongo/db/keypattern.cpp b/src/mongo/db/keypattern.cpp
index 93b4ab050fc..1f3fd6233ec 100644
--- a/src/mongo/db/keypattern.cpp
+++ b/src/mongo/db/keypattern.cpp
@@ -47,64 +47,78 @@ namespace mongo {
return false;
}
+ typedef vector<pair<BSONObj,BSONObj> >::const_iterator BoundListIter;
+
BoundList KeyPattern::keyBounds( const FieldRangeSet& queryConstraints ) const {
+ // To construct our bounds we will generate intervals based on constraints for
+ // the first field, then compound intervals based on constraints for the first
+ // 2 fields, then compound intervals for the first 3 fields, etc.
+ // As we loop through the fields, we start generating new intervals that will later
+ // get extended in another iteration of the loop. We define these partially constructed
+ // intervals using pairs of BSONObjBuilders (shared_ptrs, since after one iteration of the
+ // loop they still must exist outside their scope).
typedef vector< pair< shared_ptr<BSONObjBuilder> ,
shared_ptr<BSONObjBuilder> > > BoundBuilders;
BoundBuilders builders;
builders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ),
shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) );
BSONObjIterator i( _pattern );
- // until equalityOnly is false, we are just dealing with equality (no range or $in querys).
+ // until equalityOnly is false, we are just dealing with equality (no range or $in queries).
bool equalityOnly = true;
while( i.more() ) {
BSONElement e = i.next();
+
+ // get the relevant intervals for this field, but we may have to transform the
+ // list of what's relevant according to the expression for this field
const FieldRange &fr = queryConstraints.range( e.fieldName() );
- int number = (int) e.number(); // returns 0.0 if not numeric
- bool forward = ( number >= 0 );
+ const vector<FieldInterval> &oldIntervals = fr.intervals();
+ BoundList fieldBounds = _transformFieldBounds( oldIntervals , e );
+
if ( equalityOnly ) {
- if ( fr.equality() ) {
- for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) {
- j->first->appendAs( fr.min(), "" );
- j->second->appendAs( fr.min(), "" );
+ if ( fieldBounds.size() == 1 &&
+ ( fieldBounds.front().first == fieldBounds.front().second ) ){
+ // this field is only a single point-interval
+ BoundBuilders::const_iterator j;
+ for( j = builders.begin(); j != builders.end(); ++j ) {
+ j->first->appendElements( fieldBounds.front().first );
+ j->second->appendElements( fieldBounds.front().first );
}
}
else {
+ // This clause is the first to generate more than a single point.
+ // We only execute this clause once. After that, we simplify the bound
+ // extensions to prevent combinatorial explosion.
equalityOnly = false;
BoundBuilders newBuilders;
- const vector<FieldInterval> &intervals = fr.intervals();
- for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) {
+ BoundBuilders::const_iterator i;
+ for( i = builders.begin(); i != builders.end(); ++i ) {
BSONObj first = i->first->obj();
BSONObj second = i->second->obj();
- if ( forward ) {
- for( vector<FieldInterval>::const_iterator j = intervals.begin(); j != intervals.end(); ++j ) {
- uassert( 16449, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < MAX_IN_COMBINATIONS );
- newBuilders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) );
- newBuilders.back().first->appendElements( first );
- newBuilders.back().second->appendElements( second );
- newBuilders.back().first->appendAs( j->_lower._bound, "" );
- newBuilders.back().second->appendAs( j->_upper._bound, "" );
- }
- }
- else {
- for( vector<FieldInterval>::const_reverse_iterator j = intervals.rbegin(); j != intervals.rend(); ++j ) {
- uassert( 16450, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < MAX_IN_COMBINATIONS );
- newBuilders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) );
- newBuilders.back().first->appendElements( first );
- newBuilders.back().second->appendElements( second );
- newBuilders.back().first->appendAs( j->_upper._bound, "" );
- newBuilders.back().second->appendAs( j->_lower._bound, "" );
- }
+ for(BoundListIter j = fieldBounds.begin(); j != fieldBounds.end(); ++j ) {
+ uassert( 16452,
+ "combinatorial limit of $in partitioning of results exceeded" ,
+ newBuilders.size() < MAX_IN_COMBINATIONS );
+ newBuilders.push_back(
+ make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ),
+ shared_ptr<BSONObjBuilder>( new BSONObjBuilder())));
+ newBuilders.back().first->appendElements( first );
+ newBuilders.back().second->appendElements( second );
+ newBuilders.back().first->appendElements( j->first );
+ newBuilders.back().second->appendElements( j->second );
}
}
builders = newBuilders;
}
}
else {
- for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) {
- j->first->appendAs( forward ? fr.min() : fr.max(), "" );
- j->second->appendAs( forward ? fr.max() : fr.min(), "" );
+ // if we've already generated a range or multiple point-intervals
+ // just extend what we've generated with min/max bounds for this field
+ BoundBuilders::const_iterator j;
+ for( j = builders.begin(); j != builders.end(); ++j ) {
+ j->first->appendElements( fieldBounds.front().first );
+ j->second->appendElements( fieldBounds.back().second );
}
}
}
@@ -114,4 +128,47 @@ namespace mongo {
return ret;
}
+ BoundList KeyPattern::_transformFieldBounds( const vector<FieldInterval>& oldIntervals ,
+ const BSONElement& field ) const {
+
+ BoundList ret;
+ vector<FieldInterval>::const_iterator i;
+ for( i = oldIntervals.begin(); i != oldIntervals.end(); ++i ) {
+ if ( isAscending( field ) ){
+ // straightforward map [a,b] --> [a,b]
+ ret.push_back( make_pair( BSON( field.fieldName() << i->_lower._bound ) ,
+ BSON( field.fieldName() << i->_upper._bound ) ) );
+ } else if ( isDescending( field ) ) {
+ // reverse [a,b] --> [b,a]
+ ret.push_back( make_pair( BSON( field.fieldName() << i->_upper._bound ) ,
+ BSON( field.fieldName() << i->_lower._bound ) ) );
+ } else if ( isHashed( field ) ){
+ if ( i->equality() ) {
+ // hash [a,a] --> [hash(a),hash(a)]
+ long long int h = BSONElementHasher::hash64( i->_lower._bound ,
+ BSONElementHasher::DEFAULT_HASH_SEED );
+ ret.push_back( make_pair( BSON( field.fieldName() << h ) ,
+ BSON( field.fieldName() << h ) ) );
+ } else {
+ // if it's a range interval and this field is hashed, just generate one
+ // big interval from MinKey to MaxKey, since these vals could lie anywhere
+ ret.clear();
+ ret.push_back( make_pair( BSON( field.fieldName() << MINKEY ) ,
+ BSON( field.fieldName() << MAXKEY ) ) );
+ break;
+ }
+ }
+ }
+
+ if ( isDescending( field ) ) {
+ // now order is [ [2,1], [4,3] , [6,5]....[n,n-1] ]. Reverse to get decreasing order.
+ reverse( ret.begin() , ret.end() );
+ } else if ( isHashed( field ) ){
+ // [ hash(a) , hash(b) , hash(c) ...] no longer in order, so sort before returning
+ sort( ret.begin() , ret.end() );
+ }
+
+ return ret;
+ }
+
} // namespace mongo