diff options
author | Greg Studer <greg@10gen.com> | 2014-07-18 15:28:37 -0400 |
---|---|---|
committer | Greg Studer <greg@10gen.com> | 2014-07-23 16:52:36 -0400 |
commit | 2cbd9f1d77ce6b25e9957d3121fccb521caf848f (patch) | |
tree | 98376940b7d7de3f23877f72cbe2e9008a64b8a7 /src/mongo/db/query/expression_index.cpp | |
parent | 7174d92cc1ad3dd24430769b72c05b1ee6fbcd74 (diff) | |
download | mongo-2cbd9f1d77ce6b25e9957d3121fccb521caf848f.tar.gz |
SERVER-5800 changes for geo expression index performance, avoid preprojecting
- additional covering parameters
- only project points when required
Diffstat (limited to 'src/mongo/db/query/expression_index.cpp')
-rw-r--r-- | src/mongo/db/query/expression_index.cpp | 209 |
1 files changed, 209 insertions, 0 deletions
diff --git a/src/mongo/db/query/expression_index.cpp b/src/mongo/db/query/expression_index.cpp new file mode 100644 index 00000000000..ee49ee574b0 --- /dev/null +++ b/src/mongo/db/query/expression_index.cpp @@ -0,0 +1,209 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/expression_index.h" + +#include "third_party/s2/s2regioncoverer.h" + +#include "mongo/db/geo/geoconstants.h" +#include "mongo/db/geo/hash.h" +#include "mongo/db/geo/r2_region_coverer.h" +#include "mongo/db/hasher.h" + +namespace mongo { + + BSONObj ExpressionMapping::hash(const BSONElement& value) { + BSONObjBuilder bob; + bob.append("", BSONElementHasher::hash64(value, BSONElementHasher::DEFAULT_HASH_SEED)); + return bob.obj(); + } + + // For debugging only + static std::string toCoveringString(const GeoHashConverter& hashConverter, + const set<GeoHash>& covering) { + string result = "["; + for (set<GeoHash>::const_iterator it = covering.begin(); it != covering.end(); + ++it) { + + if (it != covering.begin()) result += ", "; + + const GeoHash& geoHash = *it; + + result += hashConverter.unhashToBox(geoHash).toString(); + result += " (" + geoHash.toStringHex1() + ")"; + } + + return result + "]"; + } + + void ExpressionMapping::cover2d(const R2Region& region, + const BSONObj& indexInfoObj, + int maxCoveringCells, + OrderedIntervalList* oil) { + + GeoHashConverter::Parameters hashParams; + Status paramStatus = GeoHashConverter::parseParameters(indexInfoObj, &hashParams); + verify(paramStatus.isOK()); // We validated the parameters when creating the index + + GeoHashConverter hashConverter(hashParams); + R2RegionCoverer coverer(&hashConverter); + coverer.setMaxLevel(hashConverter.getBits()); + coverer.setMaxCells(maxCoveringCells); + + // TODO: Maybe slightly optimize by returning results in order + vector<GeoHash> unorderedCovering; + coverer.getCovering(region, &unorderedCovering); + set<GeoHash> covering(unorderedCovering.begin(), unorderedCovering.end()); + + for (set<GeoHash>::const_iterator it = covering.begin(); it != covering.end(); + ++it) { + + const GeoHash& geoHash = *it; + BSONObjBuilder builder; + geoHash.appendHashMin(&builder, ""); + geoHash.appendHashMax(&builder, ""); + + oil->intervals.push_back(IndexBoundsBuilder::makeRangeInterval(builder.obj(), + true, + true)); + } + } + + // TODO: what should we really pass in for indexInfoObj? + void ExpressionMapping::cover2dsphere(const S2Region& region, + const BSONObj& indexInfoObj, + OrderedIntervalList* oilOut) { + + int coarsestIndexedLevel; + BSONElement ce = indexInfoObj["coarsestIndexedLevel"]; + if (ce.isNumber()) { + coarsestIndexedLevel = ce.numberInt(); + } + else { + coarsestIndexedLevel = + S2::kAvgEdge.GetClosestLevel(100 * 1000.0 / kRadiusOfEarthInMeters); + } + + // The min level of our covering is the level whose cells are the closest match to the + // *area* of the region (or the max indexed level, whichever is smaller) The max level + // is 4 sizes larger. + double edgeLen = sqrt(region.GetRectBound().Area()); + S2RegionCoverer coverer; + coverer.set_min_level(min(coarsestIndexedLevel, + 2 + S2::kAvgEdge.GetClosestLevel(edgeLen))); + coverer.set_max_level(4 + coverer.min_level()); + + std::vector<S2CellId> cover; + coverer.GetCovering(region, &cover); + + // Look at the cells we cover and all cells that are within our covering and finer. + // Anything with our cover as a strict prefix is contained within the cover and should + // be intersection tested. + bool considerCoarser = false; + std::set<std::string> intervalSet; + for (size_t i = 0; i < cover.size(); ++i) { + intervalSet.insert(cover[i].toString()); + // If any of our covers could be covered by something in the index, we have + // to look at things coarser. + if (cover[i].level() > coarsestIndexedLevel) { + considerCoarser = true; + } + } + + std::set<std::string> exactSet; + if (considerCoarser) { + // Look at the cells that cover us. We want to look at every cell that contains the + // covering we would index on if we were to insert the query geometry. We generate + // the would-index-with-this-covering and find all the cells strictly containing the + // cells in that set, until we hit the coarsest indexed cell. We use equality, not + // a prefix match. Why not prefix? Because we've already looked at everything + // finer or as fine as our initial covering. + // + // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't + // want to look at cells 21212[not-1] because we know they're not going to intersect + // with 212121, but entries inserted with cell value 21212 (no trailing digits) may. + // And we've already looked at points with the cell id 211111 from the regex search + // created above, so we only want things where the value of the last digit is not + // stored (and therefore could be 1). + for (size_t i = 0; i < cover.size(); ++i) { + for (S2CellId id = cover[i].parent(); id.level() >= coarsestIndexedLevel; + id = id.parent()) { + exactSet.insert(id.toString()); + } + } + } + + // We turned the cell IDs into strings which define point intervals or prefixes of + // strings we want to look for. + std::set<std::string>::iterator exactIt = exactSet.begin(); + std::set<std::string>::iterator intervalIt = intervalSet.begin(); + while (exactSet.end() != exactIt && intervalSet.end() != intervalIt) { + const std::string& exact = *exactIt; + const std::string& ival = *intervalIt; + if (exact < ival) { + // add exact + oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(exact)); + exactIt++; + } + else { + std::string end = ival; + end[end.size() - 1]++; + oilOut->intervals.push_back( + IndexBoundsBuilder::makeRangeInterval(ival, end, true, false)); + intervalIt++; + } + } + + if (exactSet.end() != exactIt) { + verify(intervalSet.end() == intervalIt); + do { + oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(*exactIt)); + exactIt++; + } while (exactSet.end() != exactIt); + } + else if (intervalSet.end() != intervalIt) { + verify(exactSet.end() == exactIt); + do { + const std::string& ival = *intervalIt; + std::string end = ival; + end[end.size() - 1]++; + oilOut->intervals.push_back( + IndexBoundsBuilder::makeRangeInterval(ival, end, true, false)); + intervalIt++; + } while (intervalSet.end() != intervalIt); + } + + // Make sure that our intervals don't overlap each other and are ordered correctly. + // This perhaps should only be done in debug mode. + if (!oilOut->isValidFor(1)) { + cout << "check your assumptions! OIL = " << oilOut->toString() << std::endl; + verify(0); + } + } + +} // namespace mongo |