summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mongo/db/exec/geo_near.cpp4
-rw-r--r--src/mongo/db/geo/hash.cpp81
-rw-r--r--src/mongo/db/geo/hash.h25
-rw-r--r--src/mongo/db/geo/hash_test.cpp283
-rw-r--r--src/mongo/db/geo/r2_region_coverer.cpp2
-rw-r--r--src/mongo/db/geo/r2_region_coverer_test.cpp7
-rw-r--r--src/mongo/db/query/expression_index.cpp2
7 files changed, 360 insertions, 44 deletions
diff --git a/src/mongo/db/exec/geo_near.cpp b/src/mongo/db/exec/geo_near.cpp
index a586960a8b2..32059002265 100644
--- a/src/mongo/db/exec/geo_near.cpp
+++ b/src/mongo/db/exec/geo_near.cpp
@@ -274,7 +274,7 @@ namespace mongo {
invariant(status.isOK()); // The index status should always be valid
GeoHashConverter converter(hashParams);
- return 5 * converter.sizeEdge(GeoHash(0u, 0u, hashParams.bits));
+ return 5 * converter.sizeEdge(hashParams.bits);
}
else {
return kMaxEarthDistanceInMeters / 1000.0;
@@ -388,7 +388,7 @@ namespace mongo {
virtual bool matchesSingleElement(const BSONElement& e) const {
// Something has gone terribly wrong if this doesn't hold.
invariant(BinData == e.type());
- return !_region->fastDisjoint(_unhasher.unhashToBox(e));
+ return !_region->fastDisjoint(_unhasher.unhashToBoxCovering(_unhasher.hash(e)));
}
//
diff --git a/src/mongo/db/geo/hash.cpp b/src/mongo/db/geo/hash.cpp
index 57996e0ba1e..ea481e14e14 100644
--- a/src/mongo/db/geo/hash.cpp
+++ b/src/mongo/db/geo/hash.cpp
@@ -32,6 +32,8 @@
#include "mongo/db/geo/shapes.h"
#include "mongo/util/mongoutils/str.h"
+#include <algorithm> // for max()
+
// So we can get at the str namespace.
using namespace mongoutils;
@@ -498,6 +500,21 @@ namespace mongo {
static BSONField<double> maxField("max", 180.0);
static BSONField<double> minField("min", -180.0);
+ // a x b
+ // | | |
+ // -----|---o-----|---------|-- "|" is a representable double number.
+ //
+ // In the above figure, b is the next representable double number after a, so
+ // |a - b|/|a| = epsilon (ULP) ~= 2.22E-16.
+ //
+ // An exact number x will be represented as the nearest representable double, which is a.
+ // |x - a|/|a| <= 0.5 ULP ~= 1.11e-16
+ //
+ // IEEE floating-point operations have a maximum error of 0.5 ULPS (units in
+ // the last place). For double-precision numbers, this works out to 2**-53
+ // (about 1.11e-16) times the magnitude of the result.
+ double const GeoHashConverter::kMachinePrecision = 0.5 * std::numeric_limits<double>::epsilon();
+
Status GeoHashConverter::parseParameters(const BSONObj& paramDoc,
GeoHashConverter::Parameters* params) {
@@ -556,6 +573,9 @@ namespace mongo {
// Error in radians
_errorSphere = deg2rad(_error);
+
+ // 8 * max(|max|, |min|) * u
+ _errorUnhashToBox = calcUnhashToBoxError(_params);
}
double GeoHashConverter::distanceBetweenHashes(const GeoHash& a, const GeoHash& b) const {
@@ -672,16 +692,25 @@ namespace mongo {
*y = convertFromHashScale(b);
}
- Box GeoHashConverter::unhashToBox(const GeoHash &h) const {
- double sizeEdgeBox = sizeEdge(h);
+ Box GeoHashConverter::unhashToBoxCovering(const GeoHash &h) const {
+ if (h.getBits() == 0) {
+ // Return the result without any error.
+ return Box(Point(_params.min, _params.min), Point(_params.max, _params.max));
+ }
+
+ double sizeEdgeBox = sizeEdge(h.getBits());
Point min(unhashToPoint(h));
Point max(min.x + sizeEdgeBox, min.y + sizeEdgeBox);
+
+ // Expand the box by the error bound
Box box(min, max);
+ box.fudge(_errorUnhashToBox);
return box;
}
- Box GeoHashConverter::unhashToBox(const BSONElement &e) const {
- return unhashToBox(hash(e));
+ double GeoHashConverter::calcUnhashToBoxError(const GeoHashConverter::Parameters& params) {
+ return std::max(fabs(params.min), fabs(params.max))
+ * GeoHashConverter::kMachinePrecision* 8;
}
double GeoHashConverter::sizeOfDiag(const GeoHash& a) const {
@@ -690,34 +719,29 @@ namespace mongo {
return distanceBetweenHashes(a, b);
}
- // TODO: Use ldexp(max - min, -level)
- double GeoHashConverter::sizeEdge(const GeoHash& a) const {
- if (!a.constrains())
- return _params.max - _params.min;
-
- double ax, ay, bx, by;
- GeoHash b = a;
- b.move(1, 1);
- unhash(a, &ax, &ay);
- unhash(b, &bx, &by);
-
- // _min and _max are a singularity
- if (bx == _params.min)
- bx = _params.max;
- return fabs(ax - bx);
+ // Relative error = epsilon_(max-min). ldexp() is just a direct translation to
+ // floating point exponent, and should be exact.
+ double GeoHashConverter::sizeEdge(unsigned level) const {
+ invariant(level >= 0);
+ invariant((int)level <= _params.bits);
+ return ldexp(_params.max - _params.min, -level);
}
- // Convert from an unsigned in [0, (max-min)*scaling] to [min, max]
- double GeoHashConverter::convertFromHashScale(unsigned in) const {
- double x = in;
+ // Convert from a double in [0, (max-min)*scaling] to [min, max]
+ double GeoHashConverter::convertDoubleFromHashScale(double x) const {
x /= _params.scaling;
x += _params.min;
return x;
}
- // Convert from a double that is [min, max] to an unsigned in [0, (max-min)*scaling]
- unsigned GeoHashConverter::convertToHashScale(double in) const {
+ // Convert from an unsigned in [0, (max-min)*scaling] to [min, max]
+ double GeoHashConverter::convertFromHashScale(unsigned in) const {
+ return convertDoubleFromHashScale((double)in);
+ }
+
+ // Convert from a double that is [min, max] to a double in [0, (max-min)*scaling]
+ double GeoHashConverter::convertToDoubleHashScale(double in) const {
verify(in <= _params.max && in >= _params.min);
if (in == _params.max) {
@@ -728,6 +752,13 @@ namespace mongo {
in -= _params.min;
verify(in >= 0);
- return static_cast<unsigned>(in * _params.scaling);
+ return in * _params.scaling;
}
+
+ // Convert from a double that is [min, max] to an unsigned in [0, (max-min)*scaling]
+ unsigned GeoHashConverter::convertToHashScale(double in) const {
+ return static_cast<unsigned>(convertToDoubleHashScale(in));
+ }
+
+
} // namespace mongo
diff --git a/src/mongo/db/geo/hash.h b/src/mongo/db/geo/hash.h
index 57d2083c430..dbd357983e9 100644
--- a/src/mongo/db/geo/hash.h
+++ b/src/mongo/db/geo/hash.h
@@ -155,6 +155,8 @@ namespace mongo {
*/
class GeoHashConverter {
public:
+ static double const kMachinePrecision; // = 1.1e-16
+
struct Parameters {
// How many bits to use for the hash?
int bits;
@@ -172,6 +174,8 @@ namespace mongo {
*/
static Status parseParameters(const BSONObj& paramDoc, Parameters* params);
+ static double calcUnhashToBoxError(const GeoHashConverter::Parameters& params);
+
/**
* Return converter parameterss which can be used to
* construct an copy of this converter.
@@ -215,17 +219,18 @@ namespace mongo {
void unhash(const GeoHash &h, double *x, double *y) const;
/**
- * Generates bounding box from geo hash using converter.
- * Used in GeoBrowse::fillStack and db/query/explain_plan.cpp
- * to generate index bounds from
- * geo hashes in plan stats.
+ * Generates bounding box from geohash, expanded by the error bound
*/
- Box unhashToBox(const GeoHash &h) const;
- Box unhashToBox(const BSONElement &e) const;
+ Box unhashToBoxCovering(const GeoHash &h) const;
double sizeOfDiag(const GeoHash& a) const;
- // XXX: understand/clean this.
- double sizeEdge(const GeoHash& a) const;
+
+ // Return the sizeEdge of a cell at a given level.
+ double sizeEdge(unsigned level) const;
+
+ // Used by test.
+ double convertDoubleFromHashScale(double in) const;
+ double convertToDoubleHashScale(double in) const;
private:
void init();
@@ -240,5 +245,9 @@ namespace mongo {
// We compute these based on the _params:
double _error;
double _errorSphere;
+
+ // Error bound of unhashToBox, see hash_test.cpp for its proof.
+ // 8 * max(|max|, |min|) * u
+ double _errorUnhashToBox;
};
} // namespace mongo
diff --git a/src/mongo/db/geo/hash_test.cpp b/src/mongo/db/geo/hash_test.cpp
index dc9be31fe97..7534a2efc04 100644
--- a/src/mongo/db/geo/hash_test.cpp
+++ b/src/mongo/db/geo/hash_test.cpp
@@ -32,9 +32,12 @@
#include <string>
#include <sstream>
+#include <iomanip>
#include <cmath>
+#include <algorithm> // For max()
#include "mongo/db/geo/hash.h"
+#include "mongo/db/geo/shapes.h"
#include "mongo/platform/random.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/assert_util.h"
@@ -97,10 +100,280 @@ namespace {
GeoHashConverter converter(params);
- ASSERT_APPROX_EQUAL(100.0, converter.sizeEdge(GeoHash(0, 0, 0)), kError);
- ASSERT_APPROX_EQUAL(50.0, converter.sizeEdge(GeoHash(0, 0, 1)), kError);
- ASSERT_APPROX_EQUAL(25.0, converter.sizeEdge(GeoHash(0, 0, 2)), kError);
- ASSERT_APPROX_EQUAL(ldexp(100.0, -26), converter.sizeEdge(GeoHash(0, 0, 26)), kError);
- ASSERT_APPROX_EQUAL(ldexp(100.0, -32), converter.sizeEdge(GeoHash(0, 0, 32)), kError);
+ ASSERT_APPROX_EQUAL(100.0, converter.sizeEdge(0), kError);
+ ASSERT_APPROX_EQUAL(50.0, converter.sizeEdge(1), kError);
+ ASSERT_APPROX_EQUAL(25.0, converter.sizeEdge(2), kError);
+ }
+
+ /**
+ * ==========================
+ * Error Bound of UnhashToBox
+ * ==========================
+ *
+ * Compute the absolute error when unhashing a GeoHash to a box, so that expanding
+ * the box by this absolute error can guarantee a point is always contained by the box
+ * of its GeoHash. Thus, the absolute error of box should consist of 3 components:
+ *
+ * 1) The error introduced by hashing x to GeoHash. The extreme example would be a point
+ * close to the boundary of a cell is hashed to an adjacent box.
+ *
+ * For a hash/unhash functions h(x)/uh(x) and computed functions h'(x),uh'(x):
+ *
+ * x uh(h'(x))
+ * |--------|----|--------------------> min-max scale
+ * min \
+ * \
+ * \
+ * \
+ * |--------|--|-|--------------------> hash scale for cells c
+ * 0 h(x) c h'(x)
+ *
+ * 2) The error introduced by unhashing an (int) GeoHash to its lower left corner in x-y
+ * space.
+ *
+ * uh(c)
+ * x | uh'(c)
+ * |--------|--|----|-----------------> min-max scale
+ * min \ /
+ * \ /
+ * \ /
+ * X
+ * |--------|--|-|--------------------> hash scale for cells c
+ * 0 h(x) c h'(x)
+ *
+ * 3) The error introduced by adding the edge length to get the top-right corner of box.
+ * Instead of directly computing uh'(c+1), we add the computed box edge length to the computed
+ * value uh(c), giving us an extra error.
+ *
+ * |edge(min,max)|
+ * | |
+ * | uh(c)+edge
+ * uh(c) |
+ * |-------------|------[uh(c)+edge']-----------> min-max scale
+ * min
+ *
+ * |-------------|-------------|----------------> hash scale
+ * 0 c c+1
+ * Hash and unhash definitions
+ * -------------------------
+ * h(x) = (x - min) * scaling = 2^32 * (x - min) / (max - min)
+ * uh(h) = h / scaling + min,
+ * where
+ * scaling = 2^32 / (max - min)
+ *
+ * Again, h(x)/uh(x) are the exact hash functions and h'(x)/uh'(x) are the computational hash
+ * functions which have small rounding errors.
+ *
+ * | h'(x) - h(x) | == | delta_h(x; max, min) |
+ * where delta_fn = the absolute difference between the computed and actual value of a
+ * function.
+ *
+ * Restating the problem, we're looking for:
+ * |delta_box| = | delta_x_{h'(x)=H} + delta_uh(h) + delta_edge_length |
+ * <= | delta_x_{h'(x)=H} | + | delta_uh(h) | + | delta_edge_length |
+ *
+ * 1. Error bounds calculation
+ * ---------------------------
+ *
+ * 1.1 Error: | delta_x_{h'(x)=H} |
+ * --------------------------------
+ * The first error | delta_x_{h'(x)=H} | means, given GeoHash H, we can find
+ * the range of x and only the range of x that may be mapped to H.
+ * In other words, given H, for any x that is far enough from uh(H) by at least d,
+ * it is impossible for x to be mapped to H.
+ * Mathematical, find d, such that for any x satisfying |x - uh(H)| > d,
+ * |h(x) - H| >= | delta_h(x) |
+ * => |h(x) - H| - | delta_h(x) | >= 0
+ * => |h(x) - H + delta_h(x) | >= 0 (|a + b| >= |a| - |b|)
+ * => |h'(x) - H| >= 0 (h'(x) = h(x) + delta_h(x))
+ * which guarantees h'(x) != H.
+ *
+ *
+ * uh(H)-d
+ * |
+ * x | uh(H)
+ * |--------|---[----|----]-----------> min-max scale
+ * min / \ \ /
+ * / \ \ /
+ * / \ \ /
+ * / \ \ /
+ * |---[----|--|-]---|----------------> hash scale for cells c
+ * 0 h(x) | H
+ * h'(x)
+ * =h(x)+delta_h(x)
+ *
+ *
+ * Let's consider one case of the above inequality. We need to find the d,
+ * such that, when
+ * x < uh(H) - d, (1)
+ * we have
+ * h(x) + |delta_h(x)| <= H. (2)
+ *
+ * Due to the monotonicity of h(x), apply h(x) to both side of inequality (1),
+ * we have
+ * h(x) < h(uh(H) - d) <= H - |delta_h(x)| (from (2))
+ *
+ * By solving it, we have
+ * d = |delta_h(x)| / scaling
+ * <= 2Mu * (1 + |x-min|/|max-min|) (see calculation for |delta_h(x)| below)
+ * <= 4Mu
+ *
+ * | delta_x_{h'(x)=H} | <= d <= 4Mu
+ * The similar calculation applies for the other side of the above inequality.
+ *
+ * 1.2 Error of h(x)
+ * -----------------
+ *
+ * Rules of error propagation
+ * --------------------------
+ * Absolute error of x is |delta_x|
+ * Relative error of x is epsilon_x = |delta_x| / |x|
+ * For any double number x, the relative error of x is bounded by "u". We assume all inputs
+ * have this error to make deduction clear.
+ * epsilon_x <= u = 0.5 * unit of least precision(ULP) ~= 1.1 * 10E-16
+ *
+ * |delta_(x + y)| <= |delta_x| + |delta_y|
+ * |delta_(x - y)| <= |delta_x| + |delta_y|
+ * epsilon_(x * y) <= epsilon_x + epsilon_y
+ * epsilon_(x / y) <= epsilon_x + epsilon_y
+ *
+ * For a given min, max scale, the maximum delta in a computation is bounded by the maximum
+ * value in the scale - M * u = max(|max|, |min|) * u.
+ *
+ * For the hash function h(x)
+ * --------------------------
+ *
+ * epsilon_h(x) = epsilon_(x-min) + epsilon_scaling
+ *
+ * epsilon_(x-min) = (|delta_x| + |delta_min|) / |x - min|
+ * <= 2Mu / |x - min|
+ *
+ * epsilon_scaling = epsilon_(2^32) + epsilon_(max - min)
+ * = 0 + epsilon_(max - min)
+ * <= 2Mu / |max - min|
+ *
+ * Hence, epsilon_h(x) <= 2Mu * (1/|x - min| + 1/|max - min|)
+ *
+ * |delta_h(x)| = 2Mu * (1 + |x-min|/|max-min|) * 2^32 / |max - min|
+ * <= 4Mu * 2^32 / |max-min|
+ *
+ * 2. Error: unhashing GeoHash to point
+ * ------------------------------------
+ * Similarly, we can calculate the error for uh(h) function, assuming h is exactly
+ * represented in form of GeoHash, since integer is represented exactly.
+ *
+ * |delta_uh(h)| = epsilon_(h/scaling) * |h/scaling| + delta_min
+ * = epsilon_(scaling) * |h/scaling| + delta_min
+ * <= 2Mu / |max-min| * |max-min| + |min| * u
+ * <= 3Mu
+ *
+ * Thus, the second error |delta_uh(h)| <= 3Mu
+ * Totally, the absolute error we need to add to unhashing to a point <= 4Mu + 3Mu = 7Mu
+ *
+ * 3. Error: edge length
+ * ---------------------
+ * The third part is easy to compute, since ldexp() doesn't introduce extra
+ * relative error.
+ *
+ * edge_length = ldexp(max - min, -level)
+ *
+ * epsilon_edge = epsilon_(max - min) <= 2 * M * u / |max - min|
+ *
+ * | delta_edge | = epsilon_edge * (max - min) * 2^(-level)
+ * = 2Mu * 2^(-level) <= Mu (level >= 1)
+ *
+ * This error is neglectable when level >> 0.
+ *
+ * In conclusion, | delta_box | <= 8Mu
+ *
+ *
+ * Test
+ * ====
+ * This first two component errors can be simulated by uh'(h'(x)).
+ * Let h = h'(x)
+ * |delta_(uh'(h'(x)))|
+ * = epsilon_(h/scaling) * |h/scaling| + delta_min
+ * = (epsilon_(h) + epsilon_(scaling)) * |h/scaling| + delta_min
+ * = epsilon_(h) * h/scaling + epsilon_(scaling) * |h/scaling| + delta_min
+ * = |delta_h|/scaling + |delta_uh(h)|
+ * ~= |delta_box| when level = 32
+ *
+ * Another way to think about it is the error of uh'(h'(x)) also consists of
+ * the same two components that constitute the error of unhashing to a point,
+ * by substituting c with h'(x).
+ *
+ * | delta_(uh'(h'(x))) | = | x - uh'(h(x)) |
+ *
+ * uh(h'(x))
+ * |
+ * x | uh'(h(x))
+ * |--------|---|---|----------------> min-max scale
+ * min \ /
+ * \ /
+ * \ /
+ * |--------|---|--------------------> hash scale for cells c
+ * 0 h(x) h'(x)
+ *
+ *
+ * We can get the maximum of the error by making max very large and min = -min, x -> max
+ */
+ TEST(GeoHashConverter, UnhashToBoxError) {
+ GeoHashConverter::Parameters params;
+ // Test max from 2^-20 to 2^20
+ for (int times = -20; times <= 20; times += 2) {
+ // Construct parameters
+ params.max = ldexp(1 + 0.01 * times, times);
+ params.min = -params.max;
+ params.bits = 32;
+ double numBuckets = (1024 * 1024 * 1024 * 4.0);
+ params.scaling = numBuckets / (params.max - params.min);
+
+ GeoHashConverter converter(params);
+ // Assume level == 32, so we ignore the error of edge length here.
+ double delta_box = 7.0 / 8.0 * GeoHashConverter::calcUnhashToBoxError(params);
+ double cellEdge = 1 / params.scaling;
+ double x;
+
+ // We are not able to test all the FP numbers to verify the error bound by design,
+ // so we consider the numbers in the cell near the point we are interested in.
+ //
+ // FP numbers starting at max, working downward in minimal increments
+ x = params.max;
+ while (x > params.max - cellEdge) {
+ x = nextafter(x, params.min);
+ double x_prime = converter.convertDoubleFromHashScale(
+ converter.convertToDoubleHashScale(x));
+ double delta = fabs(x - x_prime);
+ ASSERT_LESS_THAN(delta, delta_box);
+ }
+
+ // FP numbers starting between first and second cell, working downward to min
+ x = params.min + cellEdge;
+ while (x > params.min) {
+ x = nextafter(x, params.min);
+ double x_prime = converter.convertDoubleFromHashScale(
+ converter.convertToDoubleHashScale(x));
+ double delta = fabs(x - x_prime);
+ ASSERT_LESS_THAN(delta, delta_box);
+ }
+ }
+ }
+
+ // SERVER-15576 Verify a point is contained by its GeoHash box.
+ TEST(GeoHashConverter, GeoHashBox) {
+ GeoHashConverter::Parameters params;
+ params.max = 100000000.3;
+ params.min = -params.max;
+ params.bits = 32;
+ double numBuckets = (1024 * 1024 * 1024 * 4.0);
+ params.scaling = numBuckets / (params.max - params.min);
+
+ GeoHashConverter converter(params);
+
+ // Without expanding the box, the following point is not contained by its GeoHash box.
+ mongo::Point p(-7201198.6497758823, -0.1);
+ mongo::GeoHash hash = converter.hash(p);
+ mongo::Box box = converter.unhashToBoxCovering(hash);
+ ASSERT(box.inside(p));
}
}
diff --git a/src/mongo/db/geo/r2_region_coverer.cpp b/src/mongo/db/geo/r2_region_coverer.cpp
index 37dde0ac8e9..22812bccc77 100644
--- a/src/mongo/db/geo/r2_region_coverer.cpp
+++ b/src/mongo/db/geo/r2_region_coverer.cpp
@@ -134,7 +134,7 @@ namespace mongo {
// Caller owns the returned pointer
R2RegionCoverer::Candidate* R2RegionCoverer::newCandidate( const GeoHash& cell ) {
// Exclude the cell that doesn't intersect with the geometry.
- Box box = _hashConverter->unhashToBox(cell);
+ Box box = _hashConverter->unhashToBoxCovering(cell);
if (_region->fastDisjoint(box)) {
return NULL;
diff --git a/src/mongo/db/geo/r2_region_coverer_test.cpp b/src/mongo/db/geo/r2_region_coverer_test.cpp
index 15726598b69..5f61d40357a 100644
--- a/src/mongo/db/geo/r2_region_coverer_test.cpp
+++ b/src/mongo/db/geo/r2_region_coverer_test.cpp
@@ -161,7 +161,10 @@ namespace {
GeoHash id( (long long) rand.nextInt64(),
(unsigned) rand.nextInt32( GeoHash::kMaxBits + 1 ) );
vector<GeoHash> covering;
- HashBoxRegion region(converter.unhashToBox(id));
+ Box box = converter.unhashToBoxCovering(id);
+ // Since the unhashed box is expanded by the error 8Mu, we need to shrink it.
+ box.fudge(-GeoHashConverter::kMachinePrecision * MAXBOUND * 20);
+ HashBoxRegion region(box);
coverer.getCovering(region, &covering);
ASSERT_EQUALS( covering.size(), (size_t)1 );
ASSERT_EQUALS( covering[0], id );
@@ -183,7 +186,7 @@ namespace {
const R2CellUnion& covering,
const GeoHash cellId = GeoHash()) {
- Box cell = converter.unhashToBox(cellId);
+ Box cell = converter.unhashToBoxCovering(cellId);
// The covering may or may not contain this disjoint cell, we don't care.
if (region.fastDisjoint(cell)) return;
diff --git a/src/mongo/db/query/expression_index.cpp b/src/mongo/db/query/expression_index.cpp
index 7e134d0d79d..eb6301dfb57 100644
--- a/src/mongo/db/query/expression_index.cpp
+++ b/src/mongo/db/query/expression_index.cpp
@@ -54,7 +54,7 @@ namespace mongo {
const GeoHash& geoHash = *it;
- result += hashConverter.unhashToBox(geoHash).toString();
+ result += hashConverter.unhashToBoxCovering(geoHash).toString();
result += " (" + geoHash.toStringHex1() + ")";
}