/**
* Copyright (C) 2012 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/field_parser.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/geo/hash.h"
#include "mongo/db/geo/shapes.h"
#include "mongo/util/mongoutils/str.h"
#include // for max()
#include
// So we can get at the str namespace.
using namespace mongoutils;
namespace mongo {
using std::stringstream;
std::ostream& operator<<(std::ostream &s, const GeoHash &h) {
return s << h.toString();
}
/*
* GeoBitSets fills out various bit patterns that are used by GeoHash.
* What patterns? Look at the comments next to the fields.
* TODO(hk): hashedToNormal is still a bit of a mystery.
*/
class GeoBitSets {
public:
GeoBitSets() {
for (unsigned i = 0; i < 16; i++) {
unsigned fixed = 0;
for (int j = 0; j < 4; j++) {
if (i & (1 << j))
fixed |= (1 << (j * 2));
}
hashedToNormal[fixed] = i;
}
// Generate all 32 + 1 all-on bit patterns by repeatedly shifting the next bit to the
// correct position
long long currAllX = 0, currAllY = 0;
for (int i = 0; i < 64 + 2; i++){
long long thisBit = 1LL << (63 >= i ? 63 - i : 0);
if (i % 2 == 0) {
allX[i / 2] = currAllX;
currAllX |= thisBit;
} else{
allY[i / 2] = currAllY;
currAllY |= thisBit;
}
}
}
// The 0-th entries of each all[XY] is 0.
// The i-th entry of allX has i alternating bits turned on starting
// with the most significant. Example:
// allX[1] = 8000000000000000
// allX[2] = a000000000000000
// allX[3] = a800000000000000
// Note that 32 + 1 entries are needed, since 0 and 32 are both valid numbers of bits.
long long allX[33];
// Same alternating bits but starting with one from the MSB:
// allY[1] = 4000000000000000
// allY[2] = 5000000000000000
// allY[3] = 5400000000000000
long long allY[33];
unsigned hashedToNormal[256];
};
// Oh global variables.
GeoBitSets geoBitSets;
// For i return the i-th most significant bit.
// masks(0) = 80000..000
// masks(1) = 40000..000
// etc.
// Number of 0s depends on 32 vs. 64 bit.
inline static int mask32For(const int i) {
return 1 << (31 - i);
}
inline static long long mask64For(const int i) {
return 1LL << (63 - i);
}
// Binary data is stored in some particular byte ordering that requires this.
static void copyAndReverse(char *dst, const char *src) {
for (unsigned a = 0; a < 8; a++) {
dst[a] = src[7 - a];
}
}
// Definition
unsigned int const GeoHash::kMaxBits = 32;
/* This class maps an x,y coordinate pair to a hash value.
* This should probably be renamed/generalized so that it's more of a planar hash,
* and we also have a spherical hash, etc.
*/
GeoHash::GeoHash() : _hash(0), _bits(0) { }
GeoHash::GeoHash(const string& hash) {
initFromString(hash.c_str());
}
GeoHash::GeoHash(const char *s) {
initFromString(s);
}
void GeoHash::initFromString(const char *s) {
int length = strlen(s);
uassert(16457, "initFromString passed a too-long string", length <= 64);
uassert(16458, "initFromString passed an odd length string ", 0 == (length % 2));
_hash = 0;
// _bits is how many bits for X or Y, not both, so we divide by 2.
_bits = length / 2;
for (int i = 0; s[i] != '\0'; ++i)
if (s[i] == '1')
setBit(i, 1);
}
// This only works if e is BinData.
GeoHash::GeoHash(const BSONElement& e, unsigned bits) {
_bits = bits;
if (e.type() == BinData) {
int len = 0;
copyAndReverse((char*)&_hash, e.binData(len));
verify(len == 8);
} else {
cout << "GeoHash bad element: " << e << endl;
uassert(13047, "wrong type for geo index. if you're using a pre-release version,"
" need to rebuild index", 0);
}
clearUnusedBits();
}
GeoHash::GeoHash(unsigned x, unsigned y, unsigned bits) {
verify(bits <= 32);
_hash = 0;
_bits = bits;
for (unsigned i = 0; i < bits; i++) {
if (isBitSet(x, i)) _hash |= mask64For(i * 2);
if (isBitSet(y, i)) _hash |= mask64For((i * 2) + 1);
}
}
GeoHash::GeoHash(const GeoHash& old) {
_hash = old._hash;
_bits = old._bits;
}
GeoHash::GeoHash(long long hash, unsigned bits) : _hash(hash) , _bits(bits) {
clearUnusedBits();
}
// TODO(hk): This is nasty and has no examples.
void GeoHash::unhash_fast(unsigned *x, unsigned *y) const {
*x = 0;
*y = 0;
const char *c = reinterpret_cast(&_hash);
for (int i = 0; i < 8; i++) {
unsigned t = (unsigned)(c[i]) & 0x55;
*y |= (geoBitSets.hashedToNormal[t] << (4 * i));
t = ((unsigned)(c[i]) >> 1) & 0x55;
*x |= (geoBitSets.hashedToNormal[t] << (4 * i));
}
}
void GeoHash::unhash_slow(unsigned *x, unsigned *y) const {
*x = 0;
*y = 0;
for (unsigned i = 0; i < _bits; i++) {
if (getBitX(i))
*x |= mask32For(i);
if (getBitY(i))
*y |= mask32For(i);
}
}
void GeoHash::unhash(unsigned *x, unsigned *y) const {
unhash_fast(x, y);
}
/** Is the 'bit'-th most significant bit set? (NOT the least significant) */
bool GeoHash::isBitSet(unsigned val, unsigned bit) {
return mask32For(bit) & val;
}
/** Return a GeoHash with one bit of precision lost. */
GeoHash GeoHash::up() const {
return GeoHash(_hash, _bits - 1);
}
bool GeoHash::hasPrefix(const GeoHash& other) const {
verify(other._bits <= _bits);
if (other._bits == 0)
return true;
long long x = other._hash ^ _hash;
// We only care about the leftmost other._bits (well, really _bits*2 since we have x and
// y)
x = x >> (64 - (other._bits * 2));
return x == 0;
}
string GeoHash::toString() const {
StringBuilder buf;
for (unsigned x = 0; x < _bits * 2; x++)
buf.append((_hash & mask64For(x)) ? "1" : "0");
return buf.str();
}
string GeoHash::toStringHex1() const {
stringstream ss;
ss << std::hex << _hash;
return ss.str();
}
void GeoHash::setBit(unsigned pos, bool value) {
verify(pos < _bits * 2);
const long long mask = mask64For(pos);
if (value)
_hash |= mask;
else // if (_hash & mask)
_hash &= ~mask;
}
bool GeoHash::getBit(unsigned pos) const {
return _hash & mask64For(pos);
}
bool GeoHash::getBitX(unsigned pos) const {
verify(pos < 32);
return getBit(pos * 2);
}
bool GeoHash::getBitY(unsigned pos) const {
verify(pos < 32);
return getBit((pos * 2) + 1);
}
// TODO(hk): Comment this.
BSONObj GeoHash::wrap(const char* name) const {
BSONObjBuilder b(20);
appendHashMin(&b, name);
BSONObj o = b.obj();
if ('\0' == name[0]) verify(o.objsize() == 20);
return o;
}
// Do we have a non-trivial GeoHash?
bool GeoHash::constrains() const {
return _bits > 0;
}
// Could our GeoHash have higher precision?
bool GeoHash::canRefine() const {
return _bits < 32;
}
/**
* Hashing works like this:
* Divide the world into 4 buckets. Label each one as such:
* -----------------
* | | |
* | | |
* | 0,1 | 1,1 |
* -----------------
* | | |
* | | |
* | 0,0 | 1,0 |
* -----------------
* We recursively divide each cell, furthermore.
* The functions below tell us what quadrant we're in *at the finest level
* of the subdivision.*
*/
bool GeoHash::atMinX() const {
return (_hash & geoBitSets.allX[_bits]) == 0;
}
bool GeoHash::atMinY() const {
return (_hash & geoBitSets.allY[_bits]) == 0;
}
bool GeoHash::atMaxX() const {
return (_hash & geoBitSets.allX[_bits]) == geoBitSets.allX[_bits];
}
bool GeoHash::atMaxY() const {
return (_hash & geoBitSets.allY[_bits]) == geoBitSets.allY[_bits];
}
// TODO(hk): comment better
void GeoHash::move(int x, int y) {
verify(_bits);
_move(0, x);
_move(1, y);
}
// TODO(hk): comment much better
void GeoHash::_move(unsigned offset, int d) {
if (d == 0)
return;
verify(d <= 1 && d>= -1); // TEMP
bool from, to;
if (d > 0) {
from = 0;
to = 1;
}
else {
from = 1;
to = 0;
}
unsigned pos = (_bits * 2) - 1;
if (offset == 0)
pos--;
while (true) {
if (getBit(pos) == from) {
setBit(pos , to);
return;
}
if (pos < 2) {
// overflow
for (; pos < (_bits * 2) ; pos += 2) {
setBit(pos , from);
}
return;
}
setBit(pos , from);
pos -= 2;
}
verify(0);
}
GeoHash& GeoHash::operator=(const GeoHash& h) {
_hash = h._hash;
_bits = h._bits;
return *this;
}
bool GeoHash::operator==(const GeoHash& h) const {
return _hash == h._hash && _bits == h._bits;
}
bool GeoHash::operator!=(const GeoHash& h) const {
return !(*this == h);
}
bool GeoHash::operator<(const GeoHash& h) const {
if (_hash != h._hash) {
return static_cast(_hash) <
static_cast(h._hash);
}
return _bits < h._bits;
}
// Append the hash in s to our current hash. We expect s to be '0' or '1' or '\0',
// though we also treat non-'1' values as '0'.
GeoHash& GeoHash::operator+=(const char* s) {
unsigned pos = _bits * 2;
_bits += strlen(s) / 2;
verify(_bits <= 32);
while ('\0' != s[0]) {
if (s[0] == '1')
setBit(pos , 1);
pos++;
s++;
}
return *this;
}
GeoHash GeoHash::operator+(const char *s) const {
GeoHash n = *this;
n += s;
return n;
}
GeoHash GeoHash::operator+(const std::string& s) const {
return operator+(s.c_str());
}
/*
* Keep the upper _bits*2 bits of _hash, clear the lower bits.
* Maybe there's junk in there? Not sure why this is done.
*/
void GeoHash::clearUnusedBits() {
// Left shift count should be less than 64
if (_bits == 0) {
_hash = 0;
return;
}
static long long FULL = 0xFFFFFFFFFFFFFFFFLL;
long long mask = FULL << (64 - (_bits * 2));
_hash &= mask;
}
static void appendHashToBuilder(long long hash,
BSONObjBuilder* builder,
const char* fieldName) {
char buf[8];
copyAndReverse(buf, (char*) &hash);
builder->appendBinData(fieldName, 8, bdtCustom, buf);
}
void GeoHash::appendHashMin(BSONObjBuilder* builder, const char* fieldName) const {
// The min bound of a GeoHash region has all the unused suffix bits set to 0
appendHashToBuilder(_hash, builder, fieldName);
}
void GeoHash::appendHashMax(BSONObjBuilder* builder, const char* fieldName) const {
// The max bound of a GeoHash region has all the unused suffix bits set to 1
long long suffixMax = ~(geoBitSets.allX[_bits] | geoBitSets.allY[_bits]);
long long hashMax = _hash | suffixMax;
appendHashToBuilder(hashMax, builder, fieldName);
}
long long GeoHash::getHash() const {
return _hash;
}
unsigned GeoHash::getBits() const {
return _bits;
}
GeoHash GeoHash::commonPrefix(const GeoHash& other) const {
unsigned i = 0;
for (; i < _bits && i < other._bits; i++) {
if (getBitX(i) == other.getBitX(i) && getBitY(i) == other.getBitY(i))
continue;
break;
}
// i is how many bits match between this and other.
return GeoHash(_hash, i);
}
bool GeoHash::subdivide( GeoHash children[4] ) const {
if ( _bits == 32 ) {
return false;
}
children[0] = GeoHash( _hash, _bits + 1 ); // (0, 0)
children[1] = children[0];
children[1].setBit( _bits * 2 + 1, 1 ); // (0, 1)
children[2] = children[0];
children[2].setBit( _bits * 2, 1 ); // (1, 0)
children[3] = GeoHash(children[1]._hash | children[2]._hash, _bits + 1); // (1, 1)
return true;
}
bool GeoHash::contains(const GeoHash& other) const {
return _bits <= other._bits && other.hasPrefix(*this);
}
GeoHash GeoHash::parent(unsigned int level) const {
return GeoHash(_hash, level);
}
GeoHash GeoHash::parent() const {
verify(_bits > 0);
return GeoHash(_hash, _bits - 1);
}
void GeoHash::appendVertexNeighbors(unsigned level, vector* output) const {
invariant(level >= 0 && level < _bits);
// Parent at the given level.
GeoHash parentHash = parent(level);
output->push_back(parentHash);
// Generate the neighbors of parent that are closest to me.
unsigned px, py, parentBits;
parentHash.unhash(&px, &py);
parentBits = parentHash.getBits();
// No Neighbors for the top level.
if (parentBits == 0U) return;
// Position in parent
// Y
// ^
// | 01, 11
// | 00, 10
// +----------> X
// We can guarantee _bits > 0.
long long posInParent = (_hash >> (64 - 2 * (parentBits + 1))) & 3LL;
// 1 bit at parent's level, the least significant bit of parent.
unsigned parentMask = 1U << (32 - parentBits);
// Along X Axis
if ((posInParent & 2LL) == 0LL) {
// Left side of parent, X - 1
if (!parentHash.atMinX()) output->push_back(GeoHash(px - parentMask, py, parentBits));
} else {
// Right side of parent, X + 1
if (!parentHash.atMaxX()) output->push_back(GeoHash(px + parentMask, py, parentBits));
}
// Along Y Axis
if ((posInParent & 1LL) == 0LL) {
// Bottom of parent, Y - 1
if (!parentHash.atMinY()) output->push_back(GeoHash(px, py - parentMask, parentBits));
} else {
// Top of parent, Y + 1
if (!parentHash.atMaxY()) output->push_back(GeoHash(px, py + parentMask, parentBits));
}
// Four corners
if (posInParent == 0LL) {
if (!parentHash.atMinX() && !parentHash.atMinY())
output->push_back(GeoHash(px - parentMask, py - parentMask, parentBits));
} else if (posInParent == 1LL) {
if (!parentHash.atMinX() && !parentHash.atMaxY())
output->push_back(GeoHash(px - parentMask, py + parentMask, parentBits));
} else if (posInParent == 2LL) {
if (!parentHash.atMaxX() && !parentHash.atMinY())
output->push_back(GeoHash(px + parentMask, py - parentMask, parentBits));
} else {
// PosInParent == 3LL
if (!parentHash.atMaxX() && !parentHash.atMaxY())
output->push_back(GeoHash(px + parentMask, py + parentMask, parentBits));
}
}
static BSONField bitsField("bits", 26);
static BSONField maxField("max", 180.0);
static BSONField minField("min", -180.0);
// a x b
// | | |
// -----|---o-----|---------|-- "|" is a representable double number.
//
// In the above figure, b is the next representable double number after a, so
// |a - b|/|a| = epsilon (ULP) ~= 2.22E-16.
//
// An exact number x will be represented as the nearest representable double, which is a.
// |x - a|/|a| <= 0.5 ULP ~= 1.11e-16
//
// IEEE floating-point operations have a maximum error of 0.5 ULPS (units in
// the last place). For double-precision numbers, this works out to 2**-53
// (about 1.11e-16) times the magnitude of the result.
double const GeoHashConverter::kMachinePrecision = 0.5 * std::numeric_limits::epsilon();
Status GeoHashConverter::parseParameters(const BSONObj& paramDoc,
GeoHashConverter::Parameters* params) {
string errMsg;
if (FieldParser::FIELD_INVALID
== FieldParser::extractNumber(paramDoc, bitsField, ¶ms->bits, &errMsg)) {
return Status(ErrorCodes::InvalidOptions, errMsg);
}
if (FieldParser::FIELD_INVALID
== FieldParser::extractNumber(paramDoc, maxField, ¶ms->max, &errMsg)) {
return Status(ErrorCodes::InvalidOptions, errMsg);
}
if (FieldParser::FIELD_INVALID
== FieldParser::extractNumber(paramDoc, minField, ¶ms->min, &errMsg)) {
return Status(ErrorCodes::InvalidOptions, errMsg);
}
if (params->bits < 1 || params->bits > 32) {
return Status(ErrorCodes::InvalidOptions,
str::stream() << "bits for hash must be > 0 and <= 32, "
<< "but " << params->bits << " bits were specified");
}
if (params->min >= params->max) {
return Status(ErrorCodes::InvalidOptions,
str::stream() << "region for hash must be valid and have positive area, "
<< "but [" << params->min << ", " << params->max << "] "
<< "was specified");
}
double numBuckets = (1024 * 1024 * 1024 * 4.0);
params->scaling = numBuckets / (params->max - params->min);
return Status::OK();
}
GeoHashConverter::GeoHashConverter(const Parameters& params) : _params(params) {
init();
}
void GeoHashConverter::init() {
// TODO(hk): What do we require of the values in params?
// Compute how much error there is so it can be used as a fudge factor.
GeoHash a(0, 0, _params.bits);
GeoHash b = a;
b.move(1, 1);
// Epsilon is 1/100th of a bucket size
// TODO: Can we actually find error bounds for the sqrt function?
double epsilon = 0.001 / _params.scaling;
_error = distanceBetweenHashes(a, b) + epsilon;
// Error in radians
_errorSphere = deg2rad(_error);
// 8 * max(|max|, |min|) * u
_errorUnhashToBox = calcUnhashToBoxError(_params);
}
double GeoHashConverter::distanceBetweenHashes(const GeoHash& a, const GeoHash& b) const {
double ax, ay, bx, by;
unhash(a, &ax, &ay);
unhash(b, &bx, &by);
double dx = bx - ax;
double dy = by - ay;
return sqrt((dx * dx) + (dy * dy));
}
/**
* Hashing functions. Convert the following types (which have a double precision point)
* to a GeoHash:
* BSONElement
* BSONObj
* Point
* double, double
*/
GeoHash GeoHashConverter::hash(const Point &p) const {
return hash(p.x, p.y);
}
GeoHash GeoHashConverter::hash(const BSONElement& e) const {
if (e.isABSONObj())
return hash(e.embeddedObject());
return GeoHash(e, _params.bits);
}
GeoHash GeoHashConverter::hash(const BSONObj& o) const {
return hash(o, NULL);
}
// src is printed out as debugging information. Maybe it is actually somehow the 'source' of o?
GeoHash GeoHashConverter::hash(const BSONObj& o, const BSONObj* src) const {
BSONObjIterator i(o);
uassert(13067,
str::stream() << "geo field is empty"
<< (src ? causedBy((*src).toString()) : ""),
i.more());
BSONElement x = i.next();
uassert(13068,
str::stream() << "geo field only has 1 element"
<< causedBy(src ? (*src).toString() : x.toString()),
i.more());
BSONElement y = i.next();
uassert(13026,
str::stream() << "geo values must be 'legacy coordinate pairs' for 2d indexes"
<< causedBy(src ? (*src).toString() :
BSON_ARRAY(x << y).toString()),
x.isNumber() && y.isNumber());
uassert(13027,
str::stream() << "point not in interval of [ " << _params.min << ", "
<< _params.max << " ]"
<< causedBy(src ? (*src).toString() :
BSON_ARRAY(x.number() << y.number()).toString()),
x.number() <= _params.max && x.number() >= _params.min &&
y.number() <= _params.max && y.number() >= _params.min);
return GeoHash(convertToHashScale(x.number()), convertToHashScale(y.number()),
_params.bits);
}
GeoHash GeoHashConverter::hash(double x, double y) const {
uassert(16433,
str::stream() << "point not in interval of [ " << _params.min << ", "
<< _params.max << " ]"
<< causedBy(BSON_ARRAY(x << y).toString()),
x <= _params.max && x >= _params.min &&
y <= _params.max && y >= _params.min);
return GeoHash(convertToHashScale(x), convertToHashScale(y) , _params.bits);
}
/**
* Unhashing functions. These convert from a "discretized" GeoHash to the "continuous"
* doubles according to our scaling parameters.
*
* Possible outputs:
* double, double
* Point
* BSONObj
*/
// TODO(hk): these should have consistent naming
Point GeoHashConverter::unhashToPoint(const GeoHash &h) const {
Point point;
unhash(h, &point.x, &point.y);
return point;
}
Point GeoHashConverter::unhashToPoint(const BSONElement &e) const {
return unhashToPoint(hash(e));
}
BSONObj GeoHashConverter::unhashToBSONObj(const GeoHash& h) const {
unsigned x, y;
h.unhash(&x, &y);
BSONObjBuilder b;
b.append("x", convertFromHashScale(x));
b.append("y", convertFromHashScale(y));
return b.obj();
}
void GeoHashConverter::unhash(const GeoHash &h, double *x, double *y) const {
unsigned a, b;
h.unhash(&a, &b);
*x = convertFromHashScale(a);
*y = convertFromHashScale(b);
}
Box GeoHashConverter::unhashToBoxCovering(const GeoHash &h) const {
if (h.getBits() == 0) {
// Return the result without any error.
return Box(Point(_params.min, _params.min), Point(_params.max, _params.max));
}
double sizeEdgeBox = sizeEdge(h.getBits());
Point min(unhashToPoint(h));
Point max(min.x + sizeEdgeBox, min.y + sizeEdgeBox);
// Expand the box by the error bound
Box box(min, max);
box.fudge(_errorUnhashToBox);
return box;
}
double GeoHashConverter::calcUnhashToBoxError(const GeoHashConverter::Parameters& params) {
return std::max(fabs(params.min), fabs(params.max))
* GeoHashConverter::kMachinePrecision* 8;
}
double GeoHashConverter::sizeOfDiag(const GeoHash& a) const {
GeoHash b = a;
b.move(1, 1);
return distanceBetweenHashes(a, b);
}
// Relative error = epsilon_(max-min). ldexp() is just a direct translation to
// floating point exponent, and should be exact.
double GeoHashConverter::sizeEdge(unsigned level) const {
invariant(level >= 0);
invariant((int)level <= _params.bits);
return ldexp(_params.max - _params.min, -level);
}
// Convert from a double in [0, (max-min)*scaling] to [min, max]
double GeoHashConverter::convertDoubleFromHashScale(double x) const {
x /= _params.scaling;
x += _params.min;
return x;
}
// Convert from an unsigned in [0, (max-min)*scaling] to [min, max]
double GeoHashConverter::convertFromHashScale(unsigned in) const {
return convertDoubleFromHashScale((double)in);
}
// Convert from a double that is [min, max] to a double in [0, (max-min)*scaling]
double GeoHashConverter::convertToDoubleHashScale(double in) const {
verify(in <= _params.max && in >= _params.min);
if (in == _params.max) {
// prevent aliasing with _min by moving inside the "box"
// makes 180 == 179.999 (roughly)
in -= _error / 2;
}
in -= _params.min;
verify(in >= 0);
return in * _params.scaling;
}
// Convert from a double that is [min, max] to an unsigned in [0, (max-min)*scaling]
unsigned GeoHashConverter::convertToHashScale(double in) const {
return static_cast(convertToDoubleHashScale(in));
}
} // namespace mongo