SERVER-10471 add s2near stage, enable all 2dsphere queries, enable 2d queries (just slow).

author: Hari Khalsa <hkhalsa@10gen.com> 2013-10-01 22:43:48 -0400
committer: Hari Khalsa <hkhalsa@10gen.com> 2013-10-04 23:27:44 -0400
commit: ab167c642f49206b4328882286cd5b83c19088bd (patch)
tree: 1f7c57c90a81cb53b8d37e96fe37ed1621036853 /src/mongo/db/index/expression_index.h
parent: d09e608691aae000f3176b27cc67a7900229cd1e (diff)
download: mongo-ab167c642f49206b4328882286cd5b83c19088bd.tar.gz
1 files changed, 105 insertions, 2 deletions
diff --git a/src/mongo/db/index/expression_index.h b/src/mongo/db/index/expression_index.h
index 104c9580a44..d172af5836b 100644
--- a/src/mongo/db/index/expression_index.h
+++ b/src/mongo/db/index/expression_index.h
@@ -30,14 +30,14 @@
 
 #include "mongo/db/jsobj.h"
 #include "mongo/db/hasher.h"
+#include "mongo/db/query/index_bounds_builder.h"
 
 namespace mongo {
 
     /**
      * Functions that compute expression index mappings.
      *
-     * TODO: In the general case we would have (key pattern field, query value) -> projected value.
-     * Given that our only acceptable key pattern field is "hashed" we do something less general.
+     * TODO: I think we could structure this more generally with respect to planning.
      */
     class ExpressionMapping {
     public:
@@ -46,6 +46,109 @@ namespace mongo {
             bob.append("", BSONElementHasher::hash64(value, BSONElementHasher::DEFAULT_HASH_SEED));
             return bob.obj();
         }
+
+        static void cover2dsphere(const S2Region& region, OrderedIntervalList* oilOut) {
+            // XXX: should grab coarsest level from the index since the user can possibly change it.
+            int coarsestIndexedLevel = 
+                        S2::kAvgEdge.GetClosestLevel(100 * 1000.0 / kRadiusOfEarthInMeters);
+
+
+            // The min level of our covering is the level whose cells are the closest match to the
+            // *area* of the region (or the max indexed level, whichever is smaller) The max level
+            // is 4 sizes larger.
+            double edgeLen = sqrt(region.GetRectBound().Area());
+            S2RegionCoverer coverer;
+            coverer.set_min_level(min(coarsestIndexedLevel,
+                                      2 + S2::kAvgEdge.GetClosestLevel(edgeLen)));
+            coverer.set_max_level(4 + coverer.min_level());
+
+            vector<S2CellId> cover;
+            coverer.GetCovering(region, &cover);
+
+            // Look at the cells we cover and all cells that are within our covering and finer.
+            // Anything with our cover as a strict prefix is contained within the cover and should
+            // be intersection tested.
+            bool considerCoarser = false;
+            set<string> intervalSet;
+            for (size_t i = 0; i < cover.size(); ++i) {
+                intervalSet.insert(cover[i].toString());
+                // If any of our covers could be covered by something in the index, we have
+                // to look at things coarser.
+                if (cover[i].level() > coarsestIndexedLevel) {
+                    considerCoarser = true;
+                }
+            }
+
+            set<string> exactSet;
+            if (considerCoarser) {
+                // Look at the cells that cover us.  We want to look at every cell that contains the
+                // covering we would index on if we were to insert the query geometry.  We generate
+                // the would-index-with-this-covering and find all the cells strictly containing the
+                // cells in that set, until we hit the coarsest indexed cell.  We use equality, not
+                // a prefix match.  Why not prefix?  Because we've already looked at everything
+                // finer or as fine as our initial covering.
+                //
+                // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't
+                // want to look at cells 21212[not-1] because we know they're not going to intersect
+                // with 212121, but entries inserted with cell value 21212 (no trailing digits) may.
+                // And we've already looked at points with the cell id 211111 from the regex search
+                // created above, so we only want things where the value of the last digit is not
+                // stored (and therefore could be 1).
+                for (size_t i = 0; i < cover.size(); ++i) {
+                    for (S2CellId id = cover[i].parent(); id.level() >= coarsestIndexedLevel;
+                            id = id.parent()) {
+                        exactSet.insert(id.toString());
+                    }
+                }
+            }
+
+            // We turned the cell IDs into strings which define point intervals or prefixes of
+            // strings we want to look for.
+            set<string>::iterator exactIt = exactSet.begin();
+            set<string>::iterator intervalIt = intervalSet.begin();
+            while (exactSet.end() != exactIt && intervalSet.end() != intervalIt) {
+                const string& exact = *exactIt;
+                const string& ival = *intervalIt;
+                if (exact < ival) {
+                    // add exact
+                    oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(exact));
+                    exactIt++;
+                }
+                else {
+                    string end = ival;
+                    end[end.size() - 1]++;
+                    oilOut->intervals.push_back(
+                        IndexBoundsBuilder::makeRangeInterval(ival, end, true, false));
+                    intervalIt++;
+                }
+            }
+
+            if (exactSet.end() != exactIt) {
+                verify(intervalSet.end() == intervalIt);
+                do {
+                    oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(*exactIt));
+                    exactIt++;
+                } while (exactSet.end() != exactIt);
+            }
+            else if (intervalSet.end() != intervalIt) {
+                verify(exactSet.end() == exactIt);
+                do {
+                    const string& ival = *intervalIt;
+                    string end = ival;
+                    end[end.size() - 1]++;
+                    oilOut->intervals.push_back(
+                        IndexBoundsBuilder::makeRangeInterval(ival, end, true, false));
+                    intervalIt++;
+                } while (intervalSet.end() != intervalIt);
+            }
+
+            // Make sure that our intervals don't overlap each other and are ordered correctly.
+            // This perhaps should only be done in debug mode.
+            if (!oilOut->isValidFor(1)) {
+                cout << "check your assumptions! OIL = " << oilOut->toString() << endl;
+                verify(0);
+            }
+        }
     };
 
 }  // namespace mongo
author	Hari Khalsa <hkhalsa@10gen.com>	2013-10-01 22:43:48 -0400
committer	Hari Khalsa <hkhalsa@10gen.com>	2013-10-04 23:27:44 -0400
commit	ab167c642f49206b4328882286cd5b83c19088bd (patch)
tree	1f7c57c90a81cb53b8d37e96fe37ed1621036853 /src/mongo/db/index/expression_index.h
parent	d09e608691aae000f3176b27cc67a7900229cd1e (diff)
download	mongo-ab167c642f49206b4328882286cd5b83c19088bd.tar.gz