summaryrefslogtreecommitdiff
path: root/src/mongo/db/queryutil.h
diff options
context:
space:
mode:
authorEliot Horowitz <eliot@10gen.com>2011-12-24 15:33:26 -0500
committerEliot Horowitz <eliot@10gen.com>2011-12-24 15:33:45 -0500
commitae1ecd9c786911f9f1f0242f0f7d702b3e5dfeba (patch)
tree92f8e1649e6f080b251ff5f1763679a72eb59b34 /src/mongo/db/queryutil.h
parentdfa4cd7e2cf109b072440155fabc08a93c8045a0 (diff)
downloadmongo-ae1ecd9c786911f9f1f0242f0f7d702b3e5dfeba.tar.gz
bulk move of code to src/ SERVER-4551
Diffstat (limited to 'src/mongo/db/queryutil.h')
-rw-r--r--src/mongo/db/queryutil.h443
1 files changed, 443 insertions, 0 deletions
diff --git a/src/mongo/db/queryutil.h b/src/mongo/db/queryutil.h
new file mode 100644
index 00000000000..aefef27cc8b
--- /dev/null
+++ b/src/mongo/db/queryutil.h
@@ -0,0 +1,443 @@
+// @file queryutil.h - Utility classes representing ranges of valid BSONElement values for a query.
+
+/* Copyright 2009 10gen Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "jsobj.h"
+#include "indexkey.h"
+
+namespace mongo {
+
+ /**
+ * One side of an interval of valid BSONElements, specified by a value and a
+ * boolean indicating whether the interval includes the value.
+ */
+ struct FieldBound {
+ BSONElement _bound;
+ bool _inclusive;
+ bool operator==( const FieldBound &other ) const {
+ return _bound.woCompare( other._bound ) == 0 &&
+ _inclusive == other._inclusive;
+ }
+ void flipInclusive() { _inclusive = !_inclusive; }
+ };
+
+ /** A closed interval composed of a lower and an upper FieldBound. */
+ struct FieldInterval {
+ FieldInterval() : _cachedEquality( -1 ) {}
+ FieldInterval( const BSONElement& e ) : _cachedEquality( -1 ) {
+ _lower._bound = _upper._bound = e;
+ _lower._inclusive = _upper._inclusive = true;
+ }
+ FieldBound _lower;
+ FieldBound _upper;
+ /** @return true iff no single element can be contained in the interval. */
+ bool strictValid() const {
+ int cmp = _lower._bound.woCompare( _upper._bound, false );
+ return ( cmp < 0 || ( cmp == 0 && _lower._inclusive && _upper._inclusive ) );
+ }
+ /** @return true iff the interval is an equality constraint. */
+ bool equality() const;
+ mutable int _cachedEquality;
+
+ string toString() const;
+ };
+
+ /**
+ * An ordered list of FieldIntervals expressing constraints on valid
+ * BSONElement values for a field.
+ */
+ class FieldRange {
+ public:
+ FieldRange( const BSONElement &e , bool singleKey , bool isNot=false , bool optimize=true );
+
+ /** @return Range intersection with 'other'. */
+ const FieldRange &operator&=( const FieldRange &other );
+ /** @return Range union with 'other'. */
+ const FieldRange &operator|=( const FieldRange &other );
+ /** @return Range of elements elements included in 'this' but not 'other'. */
+ const FieldRange &operator-=( const FieldRange &other );
+ /** @return true iff this range is a subset of 'other'. */
+ bool operator<=( const FieldRange &other ) const;
+
+ /**
+ * If there are any valid values for this range, the extreme values can
+ * be extracted.
+ */
+
+ BSONElement min() const { assert( !empty() ); return _intervals[ 0 ]._lower._bound; }
+ BSONElement max() const { assert( !empty() ); return _intervals[ _intervals.size() - 1 ]._upper._bound; }
+ bool minInclusive() const { assert( !empty() ); return _intervals[ 0 ]._lower._inclusive; }
+ bool maxInclusive() const { assert( !empty() ); return _intervals[ _intervals.size() - 1 ]._upper._inclusive; }
+
+ /** @return true iff this range expresses a single equality interval. */
+ bool equality() const;
+ /** @return true if all the intervals for this range are equalities */
+ bool inQuery() const;
+ /** @return true iff this range does not include every BSONElement */
+ bool nontrivial() const;
+ /** @return true iff this range matches no BSONElements. */
+ bool empty() const { return _intervals.empty(); }
+
+ /** Empty the range so it matches no BSONElements. */
+ void makeEmpty() { _intervals.clear(); }
+ const vector<FieldInterval> &intervals() const { return _intervals; }
+ string getSpecial() const { return _special; }
+ /** Make component intervals noninclusive. */
+ void setExclusiveBounds();
+ /**
+ * Constructs a range where all FieldIntervals and FieldBounds are in
+ * the opposite order of the current range.
+ * NOTE the resulting intervals might not be strictValid().
+ */
+ void reverse( FieldRange &ret ) const;
+
+ string toString() const;
+ private:
+ BSONObj addObj( const BSONObj &o );
+ void finishOperation( const vector<FieldInterval> &newIntervals, const FieldRange &other );
+ vector<FieldInterval> _intervals;
+ // Owns memory for our BSONElements.
+ vector<BSONObj> _objData;
+ string _special;
+ bool _singleKey;
+ };
+
+ /**
+ * A BoundList contains intervals specified by inclusive start
+ * and end bounds. The intervals should be nonoverlapping and occur in
+ * the specified direction of traversal. For example, given a simple index {i:1}
+ * and direction +1, one valid BoundList is: (1, 2); (4, 6). The same BoundList
+ * would be valid for index {i:-1} with direction -1.
+ */
+ typedef vector<pair<BSONObj,BSONObj> > BoundList;
+
+ class QueryPattern;
+
+ /**
+ * A set of FieldRanges determined from constraints on the fields of a query,
+ * that may be used to determine index bounds.
+ */
+ class FieldRangeSet {
+ public:
+ friend class OrRangeGenerator;
+ friend class FieldRangeVector;
+ FieldRangeSet( const char *ns, const BSONObj &query , bool singleKey , bool optimize=true );
+
+ /** @return true if there is a nontrivial range for the given field. */
+ bool hasRange( const char *fieldName ) const {
+ map<string, FieldRange>::const_iterator f = _ranges.find( fieldName );
+ return f != _ranges.end();
+ }
+ /** @return range for the given field. */
+ const FieldRange &range( const char *fieldName ) const;
+ /** @return range for the given field. */
+ FieldRange &range( const char *fieldName );
+ /** @return the number of nontrivial ranges. */
+ int nNontrivialRanges() const;
+ /** @return the field ranges comprising this set. */
+ const map<string,FieldRange> &ranges() const { return _ranges; }
+ /**
+ * @return true if a match could be possible on every field. Generally this
+ * is not useful information for a single key FieldRangeSet and
+ * matchPossibleForIndex() should be used instead.
+ */
+ bool matchPossible() const;
+ /**
+ * @return true if a match could be possible given the value of _singleKey
+ * and index key 'keyPattern'.
+ * @param keyPattern May be {} or {$natural:1} for a non index scan.
+ */
+ bool matchPossibleForIndex( const BSONObj &keyPattern ) const;
+
+ const char *ns() const { return _ns; }
+
+ /**
+ * @return a simplified query from the extreme values of the nontrivial
+ * fields.
+ * @param fields If specified, the fields of the returned object are
+ * ordered to match those of 'fields'.
+ */
+ BSONObj simplifiedQuery( const BSONObj &fields = BSONObj() ) const;
+
+ QueryPattern pattern( const BSONObj &sort = BSONObj() ) const;
+ string getSpecial() const;
+
+ /**
+ * @return a FieldRangeSet approximation of the documents in 'this' but
+ * not in 'other'. The approximation will be a superset of the documents
+ * in 'this' but not 'other'.
+ */
+ const FieldRangeSet &operator-=( const FieldRangeSet &other );
+ /** @return intersection of 'this' with 'other'. */
+ const FieldRangeSet &operator&=( const FieldRangeSet &other );
+
+ /**
+ * @return an ordered list of bounds generated using an index key pattern
+ * and traversal direction.
+ *
+ * NOTE This function is deprecated in the query optimizer and only
+ * currently used by the sharding code.
+ */
+ BoundList indexBounds( const BSONObj &keyPattern, int direction ) const;
+
+ /**
+ * @return - A new FieldRangeSet based on this FieldRangeSet, but with only
+ * a subset of the fields.
+ * @param fields - Only fields which are represented as field names in this object
+ * will be included in the returned FieldRangeSet.
+ */
+ FieldRangeSet *subset( const BSONObj &fields ) const;
+
+ bool singleKey() const { return _singleKey; }
+
+ BSONObj originalQuery() const { return _queries[ 0 ]; }
+ private:
+ void appendQueries( const FieldRangeSet &other );
+ void makeEmpty();
+ void processQueryField( const BSONElement &e, bool optimize );
+ void processOpElement( const char *fieldName, const BSONElement &f, bool isNot, bool optimize );
+ static FieldRange *__singleKeyTrivialRange;
+ static FieldRange *__multiKeyTrivialRange;
+ const FieldRange &trivialRange() const;
+ map<string,FieldRange> _ranges;
+ const char *_ns;
+ // Owns memory for FieldRange BSONElements.
+ vector<BSONObj> _queries;
+ bool _singleKey;
+ };
+
+ class NamespaceDetails;
+
+ /**
+ * A pair of FieldRangeSets, one representing constraints for single key
+ * indexes and the other representing constraints for multi key indexes and
+ * unindexed scans. In several member functions the caller is asked to
+ * supply an index so that the implementation may utilize the proper
+ * FieldRangeSet and return results that are appropriate with respect to that
+ * supplied index.
+ */
+ class FieldRangeSetPair {
+ public:
+ FieldRangeSetPair( const char *ns, const BSONObj &query, bool optimize=true )
+ :_singleKey( ns, query, true, optimize ), _multiKey( ns, query, false, optimize ) {}
+
+ /**
+ * @return the appropriate single or multi key FieldRangeSet for the specified index.
+ * @param idxNo -1 for non index scan.
+ */
+ const FieldRangeSet &frsForIndex( const NamespaceDetails* nsd, int idxNo ) const;
+
+ /** @return a field range in the single key FieldRangeSet. */
+ const FieldRange &singleKeyRange( const char *fieldName ) const {
+ return _singleKey.range( fieldName );
+ }
+ /** @return true if the range limits are equivalent to an empty query. */
+ bool noNontrivialRanges() const;
+ /** @return false if a match is impossible regardless of index. */
+ bool matchPossible() const { return _multiKey.matchPossible(); }
+ /**
+ * @return false if a match is impossible on the specified index.
+ * @param idxNo -1 for non index scan.
+ */
+ bool matchPossibleForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const;
+
+ const char *ns() const { return _singleKey.ns(); }
+
+ string getSpecial() const { return _singleKey.getSpecial(); }
+
+ /** Intersect with another FieldRangeSetPair. */
+ FieldRangeSetPair &operator&=( const FieldRangeSetPair &other );
+ /**
+ * Subtract a FieldRangeSet, generally one expressing a range that has
+ * already been scanned.
+ */
+ FieldRangeSetPair &operator-=( const FieldRangeSet &scanned );
+
+ BoundList singleKeyIndexBounds( const BSONObj &keyPattern, int direction ) const {
+ return _singleKey.indexBounds( keyPattern, direction );
+ }
+
+ BSONObj originalQuery() const { return _singleKey.originalQuery(); }
+
+ private:
+ FieldRangeSetPair( const FieldRangeSet &singleKey, const FieldRangeSet &multiKey )
+ :_singleKey( singleKey ), _multiKey( multiKey ) {}
+ void assertValidIndex( const NamespaceDetails *d, int idxNo ) const;
+ void assertValidIndexOrNoIndex( const NamespaceDetails *d, int idxNo ) const;
+ /** matchPossibleForIndex() must be true. */
+ BSONObj simplifiedQueryForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const;
+ FieldRangeSet _singleKey;
+ FieldRangeSet _multiKey;
+ friend class OrRangeGenerator;
+ friend struct QueryUtilIndexed;
+ };
+
+ class IndexSpec;
+
+ /**
+ * An ordered list of fields and their FieldRanges, corresponding to valid
+ * index keys for a given index spec.
+ */
+ class FieldRangeVector {
+ public:
+ /**
+ * @param frs The valid ranges for all fields, as defined by the query spec
+ * @param indexSpec The index spec (key pattern and info)
+ * @param direction The direction of index traversal
+ */
+ FieldRangeVector( const FieldRangeSet &frs, const IndexSpec &indexSpec, int direction );
+
+ /** @return the number of index ranges represented by 'this' */
+ long long size();
+ /** @return starting point for an index traversal. */
+ BSONObj startKey() const;
+ /** @return end point for an index traversal. */
+ BSONObj endKey() const;
+ /** @return a client readable representation of 'this' */
+ BSONObj obj() const;
+
+ const IndexSpec& getSpec(){ return _indexSpec; }
+
+ /**
+ * @return true iff the provided document matches valid ranges on all
+ * of this FieldRangeVector's fields, which is the case iff this document
+ * would be returned while scanning the index corresponding to this
+ * FieldRangeVector. This function is used for $or clause deduping.
+ */
+ bool matches( const BSONObj &obj ) const;
+
+ /**
+ * @return first key of 'obj' that would be encountered by a forward
+ * index scan using this FieldRangeVector, BSONObj() if no such key.
+ */
+ BSONObj firstMatch( const BSONObj &obj ) const;
+
+ private:
+ int matchingLowElement( const BSONElement &e, int i, bool direction, bool &lowEquality ) const;
+ bool matchesElement( const BSONElement &e, int i, bool direction ) const;
+ bool matchesKey( const BSONObj &key ) const;
+ vector<FieldRange> _ranges;
+ const IndexSpec _indexSpec;
+ int _direction;
+ vector<BSONObj> _queries; // make sure mem owned
+ friend class FieldRangeVectorIterator;
+ };
+
+ /**
+ * Helper class for iterating through an ordered representation of keys
+ * to find those keys that match a specified FieldRangeVector.
+ */
+ class FieldRangeVectorIterator {
+ public:
+ FieldRangeVectorIterator( const FieldRangeVector &v ) : _v( v ), _i( _v._ranges.size(), -1 ), _cmp( _v._ranges.size(), 0 ), _inc( _v._ranges.size(), false ), _after() {
+ }
+ static BSONObj minObject() {
+ BSONObjBuilder b; b.appendMinKey( "" );
+ return b.obj();
+ }
+ static BSONObj maxObject() {
+ BSONObjBuilder b; b.appendMaxKey( "" );
+ return b.obj();
+ }
+ /**
+ * @return Suggested advance method, based on current key.
+ * -2 Iteration is complete, no need to advance.
+ * -1 Advance to the next key, without skipping.
+ * >=0 Skip parameter. If @return is r, skip to the key comprised
+ * of the first r elements of curr followed by the (r+1)th and
+ * remaining elements of cmp() (with inclusivity specified by
+ * the (r+1)th and remaining elements of inc()). If after() is
+ * true, skip past this key not to it.
+ */
+ int advance( const BSONObj &curr );
+ const vector<const BSONElement *> &cmp() const { return _cmp; }
+ const vector<bool> &inc() const { return _inc; }
+ bool after() const { return _after; }
+ void prepDive();
+ void setZero( int i ) { for( int j = i; j < (int)_i.size(); ++j ) _i[ j ] = 0; }
+ void setMinus( int i ) { for( int j = i; j < (int)_i.size(); ++j ) _i[ j ] = -1; }
+ bool ok() { return _i[ 0 ] < (int)_v._ranges[ 0 ].intervals().size(); }
+ BSONObj startKey();
+ // temp
+ BSONObj endKey();
+ private:
+ const FieldRangeVector &_v;
+ vector<int> _i;
+ vector<const BSONElement*> _cmp;
+ vector<bool> _inc;
+ bool _after;
+ };
+
+ /**
+ * As we iterate through $or clauses this class generates a FieldRangeSetPair
+ * for the current $or clause, in some cases by excluding ranges that were
+ * included in a previous clause.
+ */
+ class OrRangeGenerator {
+ public:
+ OrRangeGenerator( const char *ns, const BSONObj &query , bool optimize=true );
+
+ /**
+ * @return true iff we are done scanning $or clauses. if there's a
+ * useless or clause, we won't use or index ranges to help with scanning.
+ */
+ bool orFinished() const { return _orFound && _orSets.empty(); }
+ /** Iterates to the next $or clause by removing the current $or clause. */
+ void popOrClause( NamespaceDetails *nsd, int idxNo, const BSONObj &keyPattern );
+ void popOrClauseSingleKey();
+ /** @return FieldRangeSetPair for the current $or clause. */
+ FieldRangeSetPair *topFrsp() const;
+ /**
+ * @return original FieldRangeSetPair for the current $or clause. While the
+ * original bounds are looser, they are composed of fewer ranges and it
+ * is faster to do operations with them; when they can be used instead of
+ * more precise bounds, they should.
+ */
+ FieldRangeSetPair *topFrspOriginal() const;
+
+ string getSpecial() const { return _baseSet.getSpecial(); }
+
+ bool moreOrClauses() const { return !_orSets.empty(); }
+ private:
+ void assertMayPopOrClause();
+ void popOrClause( const FieldRangeSet *toDiff, NamespaceDetails *d = 0, int idxNo = -1, const BSONObj &keyPattern = BSONObj() );
+ FieldRangeSetPair _baseSet;
+ list<FieldRangeSetPair> _orSets;
+ list<FieldRangeSetPair> _originalOrSets;
+ // ensure memory is owned
+ list<FieldRangeSetPair> _oldOrSets;
+ bool _orFound;
+ friend struct QueryUtilIndexed;
+ };
+
+ /** returns a string that when used as a matcher, would match a super set of regex()
+ returns "" for complex regular expressions
+ used to optimize queries in some simple regex cases that start with '^'
+
+ if purePrefix != NULL, sets it to whether the regex can be converted to a range query
+ */
+ string simpleRegex(const char* regex, const char* flags, bool* purePrefix=NULL);
+
+ /** returns the upper bound of a query that matches prefix */
+ string simpleRegexEnd( string prefix );
+
+ long long applySkipLimit( long long num , const BSONObj& cmd );
+
+} // namespace mongo
+
+#include "queryutil-inl.h"