summaryrefslogtreecommitdiff
path: root/src/mongo/db/index/expression_index.h
diff options
context:
space:
mode:
authorHari Khalsa <hkhalsa@10gen.com>2013-10-01 22:43:48 -0400
committerHari Khalsa <hkhalsa@10gen.com>2013-10-04 23:27:44 -0400
commitab167c642f49206b4328882286cd5b83c19088bd (patch)
tree1f7c57c90a81cb53b8d37e96fe37ed1621036853 /src/mongo/db/index/expression_index.h
parentd09e608691aae000f3176b27cc67a7900229cd1e (diff)
downloadmongo-ab167c642f49206b4328882286cd5b83c19088bd.tar.gz
SERVER-10471 add s2near stage, enable all 2dsphere queries, enable 2d queries (just slow).
Diffstat (limited to 'src/mongo/db/index/expression_index.h')
-rw-r--r--src/mongo/db/index/expression_index.h107
1 files changed, 105 insertions, 2 deletions
diff --git a/src/mongo/db/index/expression_index.h b/src/mongo/db/index/expression_index.h
index 104c9580a44..d172af5836b 100644
--- a/src/mongo/db/index/expression_index.h
+++ b/src/mongo/db/index/expression_index.h
@@ -30,14 +30,14 @@
#include "mongo/db/jsobj.h"
#include "mongo/db/hasher.h"
+#include "mongo/db/query/index_bounds_builder.h"
namespace mongo {
/**
* Functions that compute expression index mappings.
*
- * TODO: In the general case we would have (key pattern field, query value) -> projected value.
- * Given that our only acceptable key pattern field is "hashed" we do something less general.
+ * TODO: I think we could structure this more generally with respect to planning.
*/
class ExpressionMapping {
public:
@@ -46,6 +46,109 @@ namespace mongo {
bob.append("", BSONElementHasher::hash64(value, BSONElementHasher::DEFAULT_HASH_SEED));
return bob.obj();
}
+
+ static void cover2dsphere(const S2Region& region, OrderedIntervalList* oilOut) {
+ // XXX: should grab coarsest level from the index since the user can possibly change it.
+ int coarsestIndexedLevel =
+ S2::kAvgEdge.GetClosestLevel(100 * 1000.0 / kRadiusOfEarthInMeters);
+
+
+ // The min level of our covering is the level whose cells are the closest match to the
+ // *area* of the region (or the max indexed level, whichever is smaller) The max level
+ // is 4 sizes larger.
+ double edgeLen = sqrt(region.GetRectBound().Area());
+ S2RegionCoverer coverer;
+ coverer.set_min_level(min(coarsestIndexedLevel,
+ 2 + S2::kAvgEdge.GetClosestLevel(edgeLen)));
+ coverer.set_max_level(4 + coverer.min_level());
+
+ vector<S2CellId> cover;
+ coverer.GetCovering(region, &cover);
+
+ // Look at the cells we cover and all cells that are within our covering and finer.
+ // Anything with our cover as a strict prefix is contained within the cover and should
+ // be intersection tested.
+ bool considerCoarser = false;
+ set<string> intervalSet;
+ for (size_t i = 0; i < cover.size(); ++i) {
+ intervalSet.insert(cover[i].toString());
+ // If any of our covers could be covered by something in the index, we have
+ // to look at things coarser.
+ if (cover[i].level() > coarsestIndexedLevel) {
+ considerCoarser = true;
+ }
+ }
+
+ set<string> exactSet;
+ if (considerCoarser) {
+ // Look at the cells that cover us. We want to look at every cell that contains the
+ // covering we would index on if we were to insert the query geometry. We generate
+ // the would-index-with-this-covering and find all the cells strictly containing the
+ // cells in that set, until we hit the coarsest indexed cell. We use equality, not
+ // a prefix match. Why not prefix? Because we've already looked at everything
+ // finer or as fine as our initial covering.
+ //
+ // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't
+ // want to look at cells 21212[not-1] because we know they're not going to intersect
+ // with 212121, but entries inserted with cell value 21212 (no trailing digits) may.
+ // And we've already looked at points with the cell id 211111 from the regex search
+ // created above, so we only want things where the value of the last digit is not
+ // stored (and therefore could be 1).
+ for (size_t i = 0; i < cover.size(); ++i) {
+ for (S2CellId id = cover[i].parent(); id.level() >= coarsestIndexedLevel;
+ id = id.parent()) {
+ exactSet.insert(id.toString());
+ }
+ }
+ }
+
+ // We turned the cell IDs into strings which define point intervals or prefixes of
+ // strings we want to look for.
+ set<string>::iterator exactIt = exactSet.begin();
+ set<string>::iterator intervalIt = intervalSet.begin();
+ while (exactSet.end() != exactIt && intervalSet.end() != intervalIt) {
+ const string& exact = *exactIt;
+ const string& ival = *intervalIt;
+ if (exact < ival) {
+ // add exact
+ oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(exact));
+ exactIt++;
+ }
+ else {
+ string end = ival;
+ end[end.size() - 1]++;
+ oilOut->intervals.push_back(
+ IndexBoundsBuilder::makeRangeInterval(ival, end, true, false));
+ intervalIt++;
+ }
+ }
+
+ if (exactSet.end() != exactIt) {
+ verify(intervalSet.end() == intervalIt);
+ do {
+ oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(*exactIt));
+ exactIt++;
+ } while (exactSet.end() != exactIt);
+ }
+ else if (intervalSet.end() != intervalIt) {
+ verify(exactSet.end() == exactIt);
+ do {
+ const string& ival = *intervalIt;
+ string end = ival;
+ end[end.size() - 1]++;
+ oilOut->intervals.push_back(
+ IndexBoundsBuilder::makeRangeInterval(ival, end, true, false));
+ intervalIt++;
+ } while (intervalSet.end() != intervalIt);
+ }
+
+ // Make sure that our intervals don't overlap each other and are ordered correctly.
+ // This perhaps should only be done in debug mode.
+ if (!oilOut->isValidFor(1)) {
+ cout << "check your assumptions! OIL = " << oilOut->toString() << endl;
+ verify(0);
+ }
+ }
};
} // namespace mongo