diff options
author | Kevin Albertson <kevin.albertson@10gen.com> | 2015-07-16 10:52:42 -0400 |
---|---|---|
committer | Kevin Albertson <kevin.albertson@10gen.com> | 2015-07-17 12:07:32 -0400 |
commit | f4d695fcf382820c7ea082bd4b1beb84e15cf2c5 (patch) | |
tree | da7a9eed960e8b02c1af25f70d2925a1d644ed5e /src/mongo/db/index | |
parent | fa5851d3313e74086eb2c1340758e6b535decdf3 (diff) | |
download | mongo-f4d695fcf382820c7ea082bd4b1beb84e15cf2c5.tar.gz |
SERVER-19072 Make S2 index keys numeric
Diffstat (limited to 'src/mongo/db/index')
-rw-r--r-- | src/mongo/db/index/SConscript | 9 | ||||
-rw-r--r-- | src/mongo/db/index/expression_keys_private.cpp | 28 | ||||
-rw-r--r-- | src/mongo/db/index/s2_keys.cpp | 172 | ||||
-rw-r--r-- | src/mongo/db/index/s2_keys.h | 53 |
4 files changed, 246 insertions, 16 deletions
diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript index ecafab77727..abe30cdf819 100644 --- a/src/mongo/db/index/SConscript +++ b/src/mongo/db/index/SConscript @@ -19,11 +19,13 @@ env.Library( ], LIBDEPS=[ 'expression_params', + 's2_keys', '$BUILD_DIR/mongo/bson/bson', '$BUILD_DIR/mongo/db/fts/base', '$BUILD_DIR/mongo/db/geo/geoparser', '$BUILD_DIR/mongo/db/index_names', '$BUILD_DIR/mongo/db/mongohasher', + '$BUILD_DIR/mongo/db/query/index_bounds', # needed for s2_keys '$BUILD_DIR/third_party/s2/s2', ], ) @@ -35,6 +37,7 @@ env.Library( ], LIBDEPS=[ 'key_generator', + 'expression_params', '$BUILD_DIR/mongo/bson/bson', ], ) @@ -63,3 +66,9 @@ env.CppUnitTest( '$BUILD_DIR/mongo/db/mongohasher', ], ) + +env.Library('s2_keys', [ 's2_keys.cpp' ], + LIBDEPS=[ + '$BUILD_DIR/mongo/db/geo/geometry', + ], +)
\ No newline at end of file diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp index 82c4b8d5400..b7d4e5dadd0 100644 --- a/src/mongo/db/index/expression_keys_private.cpp +++ b/src/mongo/db/index/expression_keys_private.cpp @@ -40,6 +40,7 @@ #include "mongo/db/index_names.h" #include "mongo/db/index/2d_common.h" #include "mongo/db/index/s2_indexing_params.h" +#include "mongo/db/index/s2_keys.h" #include "mongo/util/assert_util.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" @@ -75,20 +76,9 @@ void addKey(const string& root, const BSONElement& e, BSONObjSet* keys) { // Helper functions for getS2Keys // -static void S2KeysFromRegion(S2RegionCoverer* coverer, - const S2Region& region, - vector<string>* out) { - vector<S2CellId> covering; - coverer->GetCovering(region, &covering); - for (size_t i = 0; i < covering.size(); ++i) { - out->push_back(covering[i].toString()); - } -} - - Status S2GetKeysForElement(const BSONElement& element, const S2IndexingParams& params, - vector<string>* out) { + vector<S2CellId>* out) { GeometryContainer geoContainer; Status status = geoContainer.parseFromStorage(element); if (!status.isOK()) @@ -119,7 +109,7 @@ Status S2GetKeysForElement(const BSONElement& element, invariant(geoContainer.hasS2Region()); - S2KeysFromRegion(&coverer, geoContainer.getS2Region(), out); + coverer.GetCovering(geoContainer.getS2Region(), out); return Status::OK(); } @@ -133,7 +123,7 @@ void getS2GeoKeys(const BSONObj& document, const S2IndexingParams& params, BSONObjSet* out) { for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { - vector<string> cells; + vector<S2CellId> cells; Status status = S2GetKeysForElement(*i, params, &cells); uassert(16755, str::stream() << "Can't extract geo keys: " << document << " " << status.reason(), @@ -143,9 +133,15 @@ void getS2GeoKeys(const BSONObj& document, "Unable to generate keys for (likely malformed) geometry: " + document.toString(), cells.size() > 0); - for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) { + for (vector<S2CellId>::const_iterator it = cells.begin(); it != cells.end(); ++it) { BSONObjBuilder b; - b.append("", *it); + if (params.indexVersion < S2_INDEX_VERSION_3) { + b.append("", it->toString()); + } else { + b.append("", S2CellIdToIndexKey(*it)); + } + + out->insert(b.obj()); } } diff --git a/src/mongo/db/index/s2_keys.cpp b/src/mongo/db/index/s2_keys.cpp new file mode 100644 index 00000000000..4ea95f20a3c --- /dev/null +++ b/src/mongo/db/index/s2_keys.cpp @@ -0,0 +1,172 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "s2_keys.h" + +#include "mongo/db/index/expression_params.h" +#include "mongo/db/index/s2_indexing_params.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/query/index_bounds_builder.h" + +namespace mongo { + +using mongo::Interval; + +long long S2CellIdToIndexKey(const S2CellId& cellId) { + // The range of an unsigned long long is + // |-----------------|------------------| + // 0 2^32 2^64 - 1 + // 000... 100... 111... + // The range of a signed long long is + // |-----------------|------------------| + // -2^63 0 2^63 - 1 + // 100... 000... 011... + // S2 gives us an unsigned long long, and we need + // to use signed long longs for the index. + // + // The relative ordering may be changed with unsigned + // numbers around 2^32 being converted to signed + // + // However, because a single cell cannot span over + // more than once face, individual intervals will + // never cross that threshold. Thus, scans will still + // produce the same results. + return static_cast<long long>(cellId.id()); +} + +S2CellId S2CellIdFromIndexKey(long long indexKey) { + S2CellId cellId(static_cast<unsigned long long>(indexKey)); + return cellId; +} + +void S2CellIdToIndexRange(const S2CellId& cellId, long long* start, long long* end) { + *start = S2CellIdToIndexKey(cellId.range_min()); + *end = S2CellIdToIndexKey(cellId.range_max()); + invariant(*start <= *end); +} + +namespace { +bool compareIntervals(const Interval& a, const Interval& b) { + return a.precedes(b); +} + +void S2CellIdsToIntervalsUnsorted(const std::vector<S2CellId>& intervalSet, + const S2IndexingParams& indexParams, + OrderedIntervalList* oilOut) { + for (const S2CellId& interval : intervalSet) { + BSONObjBuilder b; + if (indexParams.indexVersion < S2_INDEX_VERSION_3) { + // for backwards compatibility, use strings + std::string start = interval.toString(); + std::string end = start; + end[start.size() - 1]++; + b.append("start", start); + b.append("end", end); + oilOut->intervals.push_back( + IndexBoundsBuilder::makeRangeInterval(b.obj(), true, false)); + } else { + long long start, end; + S2CellIdToIndexRange(interval, &start, &end); + b.append("start", start); + b.append("end", end); + // note: numeric form of S2CellId is end inclusive + oilOut->intervals.push_back(IndexBoundsBuilder::makeRangeInterval(b.obj(), true, true)); + } + } +} +} + +void S2CellIdsToIntervals(const std::vector<S2CellId>& intervalSet, + const S2IndexingParams& indexParams, + OrderedIntervalList* oilOut) { + // Order is not preserved in changing from numeric to string + // form of index key. Therefore, sorting is deferred to after + // intervals are made + S2CellIdsToIntervalsUnsorted(intervalSet, indexParams, oilOut); + std::sort(oilOut->intervals.begin(), oilOut->intervals.end(), compareIntervals); + // Make sure that our intervals don't overlap each other and are ordered correctly. + // This perhaps should only be done in debug mode. + if (!oilOut->isValidFor(1)) { + cout << "check your assumptions! OIL = " << oilOut->toString() << std::endl; + verify(0); + } +} + +void S2CellIdsToIntervalsWithParents(const std::vector<S2CellId>& intervalSet, + const S2IndexingParams& indexParams, + OrderedIntervalList* oilOut) { + // There may be duplicates when going up parent cells if two cells share a parent + std::unordered_set<S2CellId> exactSet; + for (const S2CellId& interval : intervalSet) { + S2CellId coveredCell = interval; + // Look at the cells that cover us. We want to look at every cell that contains the + // covering we would index on if we were to insert the query geometry. We generate + // the would-index-with-this-covering and find all the cells strictly containing the + // cells in that set, until we hit the coarsest indexed cell. We use equality, not + // a prefix match. Why not prefix? Because we've already looked at everything + // finer or as fine as our initial covering. + // + // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't + // want to look at cells 21212[not-1] because we know they're not going to intersect + // with 212121, but entries inserted with cell value 21212 (no trailing digits) may. + // And we've already looked at points with the cell id 211111 from the regex search + // created above, so we only want things where the value of the last digit is not + // stored (and therefore could be 1). + + while (coveredCell.level() > indexParams.coarsestIndexedLevel) { + // Add the parent cell of the currently covered cell since we aren't at the + // coarsest level yet + // NOTE: Be careful not to generate cells strictly less than the + // coarsestIndexedLevel - this can result in S2 failures when level < 0. + + coveredCell = coveredCell.parent(); + exactSet.insert(coveredCell); + } + } + for (const S2CellId& exact : exactSet) { + BSONObjBuilder b; + if (indexParams.indexVersion < S2_INDEX_VERSION_3) { + // for backwards compatibility, use strings + b.append("", exact.toString()); + } else { + b.append("", S2CellIdToIndexKey(exact)); + } + oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(b.obj())); + } + + S2CellIdsToIntervalsUnsorted(intervalSet, indexParams, oilOut); + std::sort(oilOut->intervals.begin(), oilOut->intervals.end(), compareIntervals); + // Make sure that our intervals don't overlap each other and are ordered correctly. + // This perhaps should only be done in debug mode. + if (!oilOut->isValidFor(1)) { + cout << "check your assumptions! OIL = " << oilOut->toString() << std::endl; + verify(0); + } +} +} diff --git a/src/mongo/db/index/s2_keys.h b/src/mongo/db/index/s2_keys.h new file mode 100644 index 00000000000..47e9d6d8c99 --- /dev/null +++ b/src/mongo/db/index/s2_keys.h @@ -0,0 +1,53 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/index/s2_indexing_params.h" +#include "mongo/db/query/index_bounds.h" +#include "third_party/s2/s2cellid.h" + +// Functions for abstracting S2CellIds and index keys +namespace mongo { + +long long S2CellIdToIndexKey(const S2CellId& cellId); + +// Initializes a new S2CellId from the passed index key +S2CellId S2CellIdFromIndexKey(long long indexKey); + +void S2CellIdsToIntervals(const std::vector<S2CellId>& intervalSet, + const S2IndexingParams& indexParams, + OrderedIntervalList* oilOut); + +// Creates an ordered interval list from range intervals and +// traverses cell parents for exact intervals up to coarsestIndexedLevel +void S2CellIdsToIntervalsWithParents(const std::vector<S2CellId>& interval, + const S2IndexingParams& indexParams, + OrderedIntervalList* out); +} |