/**
* Copyright (C) 2012 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#include "mongo/pch.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/geo/core.h"
#include "mongo/db/geo/hash.h"
#include "mongo/db/geo/shapes.h"
#include "mongo/util/mongoutils/str.h"
// So we can get at the str namespace.
using namespace mongoutils;
namespace mongo {
inline std::ostream& operator<<(std::ostream &s, const GeoHash &h) {
s << h.toString();
return s;
}
/*
* GeoBitSets fills out various bit patterns that are used by GeoHash.
* What patterns? Look at the comments next to the fields.
* TODO(hk): hashedToNormal is still a bit of a mystery.
*/
class GeoBitSets {
public:
GeoBitSets() {
for (unsigned i = 0; i < 16; i++) {
unsigned fixed = 0;
for (int j = 0; j < 4; j++) {
if (i & (1 << j))
fixed |= (1 << (j * 2));
}
hashedToNormal[fixed] = i;
}
long long currAllX = 0, currAllY = 0;
for (int i = 0; i < 64; i++){
long long thisBit = 1LL << (63 - i);
if (i % 2 == 0) {
allX[i / 2] = currAllX;
currAllX |= thisBit;
} else{
allY[i / 2] = currAllY;
currAllY |= thisBit;
}
}
}
// The 0-th entries of each all[XY] is 0.
// The i-th entry of allX has i alternating bits turned on starting
// with the most significant. Example:
// allX[1] = 8000000000000000
// allX[2] = a000000000000000
// allX[3] = a800000000000000
long long allX[32];
// Same alternating bits but starting with one from the MSB:
// allY[1] = 4000000000000000
// allY[2] = 5000000000000000
// allY[3] = 5400000000000000
long long allY[32];
unsigned hashedToNormal[256];
};
// Oh global variables.
GeoBitSets geoBitSets;
// For i return the i-th most significant bit.
// masks(0) = 80000..000
// masks(1) = 40000..000
// etc.
// Number of 0s depends on 32 vs. 64 bit.
inline static int mask32For(const int i) {
return 1 << (31 - i);
}
inline static long long mask64For(const int i) {
return 1LL << (63 - i);
}
/* This class maps an x,y coordinate pair to a hash value.
* This should probably be renamed/generalized so that it's more of a planar hash,
* and we also have a spherical hash, etc.
*/
GeoHash::GeoHash() : _hash(0), _bits(0) { }
GeoHash::GeoHash(const string& hash) {
initFromString(hash.c_str());
}
GeoHash::GeoHash(const char *s) {
initFromString(s);
}
void GeoHash::initFromString(const char *s) {
int length = strlen(s);
uassert(16457, "initFromString passed a too-long string", length <= 64);
uassert(16458, "initFromString passed an odd length string ", 0 == (length % 2));
_hash = 0;
// _bits is how many bits for X or Y, not both, so we divide by 2.
_bits = length / 2;
for (int i = 0; s[i] != '\0'; ++i)
if (s[i] == '1')
setBit(i, 1);
}
// This only works if e is BinData.
GeoHash::GeoHash(const BSONElement& e, unsigned bits) {
_bits = bits;
if (e.type() == BinData) {
int len = 0;
_copyAndReverse((char*)&_hash, e.binData(len));
verify(len == 8);
} else {
cout << "GeoHash bad element: " << e << endl;
uassert(13047, "wrong type for geo index. if you're using a pre-release version,"
" need to rebuild index", 0);
}
clearUnusedBits();
}
GeoHash::GeoHash(unsigned x, unsigned y, unsigned bits) {
verify(bits <= 32);
_hash = 0;
_bits = bits;
for (unsigned i = 0; i < bits; i++) {
if (isBitSet(x, i)) _hash |= mask64For(i * 2);
if (isBitSet(y, i)) _hash |= mask64For((i * 2) + 1);
}
}
GeoHash::GeoHash(const GeoHash& old) {
_hash = old._hash;
_bits = old._bits;
}
GeoHash::GeoHash(long long hash, unsigned bits) : _hash(hash) , _bits(bits) {
clearUnusedBits();
}
// TODO(hk): This is nasty and has no examples.
void GeoHash::unhash_fast(unsigned *x, unsigned *y) const {
*x = 0;
*y = 0;
const char *c = reinterpret_cast(&_hash);
for (int i = 0; i < 8; i++) {
unsigned t = (unsigned)(c[i]) & 0x55;
*y |= (geoBitSets.hashedToNormal[t] << (4 * i));
t = ((unsigned)(c[i]) >> 1) & 0x55;
*x |= (geoBitSets.hashedToNormal[t] << (4 * i));
}
}
void GeoHash::unhash_slow(unsigned *x, unsigned *y) const {
*x = 0;
*y = 0;
for (unsigned i = 0; i < _bits; i++) {
if (getBitX(i))
*x |= mask32For(i);
if (getBitY(i))
*y |= mask32For(i);
}
}
void GeoHash::unhash(unsigned *x, unsigned *y) const {
unhash_fast(x, y);
}
/** Is the 'bit'-th most significant bit set? (NOT the least significant) */
bool GeoHash::isBitSet(unsigned val, unsigned bit) {
return mask32For(bit) & val;
}
/** Return a GeoHash with one bit of precision lost. */
GeoHash GeoHash::up() const {
return GeoHash(_hash, _bits - 1);
}
bool GeoHash::hasPrefix(const GeoHash& other) const {
verify(other._bits <= _bits);
if (other._bits == 0)
return true;
long long x = other._hash ^ _hash;
// We only care about the leftmost other._bits (well, really _bits*2 since we have x and
// y)
x = x >> (64 - (other._bits * 2));
return x == 0;
}
string GeoHash::toString() const {
StringBuilder buf;
for (unsigned x = 0; x < _bits * 2; x++)
buf.append((_hash & mask64For(x)) ? "1" : "0");
return buf.str();
}
string GeoHash::toStringHex1() const {
stringstream ss;
ss << hex << _hash;
return ss.str();
}
void GeoHash::setBit(unsigned pos, bool value) {
verify(pos < _bits * 2);
const long long mask = mask64For(pos);
if (value)
_hash |= mask;
else // if (_hash & mask)
_hash &= ~mask;
}
bool GeoHash::getBit(unsigned pos) const {
return _hash & mask64For(pos);
}
bool GeoHash::getBitX(unsigned pos) const {
verify(pos < 32);
return getBit(pos * 2);
}
bool GeoHash::getBitY(unsigned pos) const {
verify(pos < 32);
return getBit((pos * 2) + 1);
}
// TODO(hk): Comment this.
BSONObj GeoHash::wrap(const char* name) const {
BSONObjBuilder b(20);
appendToBuilder(&b, name);
BSONObj o = b.obj();
if ('\0' == name[0]) verify(o.objsize() == 20);
return o;
}
// Do we have a non-trivial GeoHash?
bool GeoHash::constrains() const {
return _bits > 0;
}
// Could our GeoHash have higher precision?
bool GeoHash::canRefine() const {
return _bits < 32;
}
/**
* Hashing works like this:
* Divide the world into 4 buckets. Label each one as such:
* -----------------
* | | |
* | | |
* | 0,1 | 1,1 |
* -----------------
* | | |
* | | |
* | 0,0 | 1,0 |
* -----------------
* We recursively divide each cell, furthermore.
* The functions below tell us what quadrant we're in *at the finest level
* of the subdivision.*
*/
bool GeoHash::atMinX() const {
return (_hash & geoBitSets.allX[_bits]) == 0;
}
bool GeoHash::atMinY() const {
return (_hash & geoBitSets.allY[_bits]) == 0;
}
bool GeoHash::atMaxX() const {
return (_hash & geoBitSets.allX[_bits]) == geoBitSets.allX[_bits];
}
bool GeoHash::atMaxY() const {
return (_hash & geoBitSets.allY[_bits]) == geoBitSets.allY[_bits];
}
// TODO(hk): comment better
void GeoHash::move(int x, int y) {
verify(_bits);
_move(0, x);
_move(1, y);
}
// TODO(hk): comment much better
void GeoHash::_move(unsigned offset, int d) {
if (d == 0)
return;
verify(d <= 1 && d>= -1); // TEMP
bool from, to;
if (d > 0) {
from = 0;
to = 1;
}
else {
from = 1;
to = 0;
}
unsigned pos = (_bits * 2) - 1;
if (offset == 0)
pos--;
while (true) {
if (getBit(pos) == from) {
setBit(pos , to);
return;
}
if (pos < 2) {
// overflow
for (; pos < (_bits * 2) ; pos += 2) {
setBit(pos , from);
}
return;
}
setBit(pos , from);
pos -= 2;
}
verify(0);
}
GeoHash& GeoHash::operator=(const GeoHash& h) {
_hash = h._hash;
_bits = h._bits;
return *this;
}
bool GeoHash::operator==(const GeoHash& h) const {
return _hash == h._hash && _bits == h._bits;
}
bool GeoHash::operator!=(const GeoHash& h) const {
return !(*this == h);
}
bool GeoHash::operator<(const GeoHash& h) const {
if (_hash != h._hash) return _hash < h._hash;
return _bits < h._bits;
}
// Append the hash in s to our current hash. We expect s to be '0' or '1' or '\0',
// though we also treat non-'1' values as '0'.
GeoHash& GeoHash::operator+=(const char* s) {
unsigned pos = _bits * 2;
_bits += strlen(s) / 2;
verify(_bits <= 32);
while ('\0' != s[0]) {
if (s[0] == '1')
setBit(pos , 1);
pos++;
s++;
}
return *this;
}
GeoHash GeoHash::operator+(const char *s) const {
GeoHash n = *this;
n += s;
return n;
}
GeoHash GeoHash::operator+(const std::string& s) const {
return operator+(s.c_str());
}
/*
* Keep the upper _bits*2 bits of _hash, clear the lower bits.
* Maybe there's junk in there? Not sure why this is done.
*/
void GeoHash::clearUnusedBits() {
static long long FULL = 0xFFFFFFFFFFFFFFFFLL;
long long mask = FULL << (64 - (_bits * 2));
_hash &= mask;
}
// Append the hash to the builder provided.
void GeoHash::appendToBuilder(BSONObjBuilder* b, const char * name) const {
char buf[8];
_copyAndReverse(buf, (char*)&_hash);
b->appendBinData(name, 8, bdtCustom, buf);
}
long long GeoHash::getHash() const {
return _hash;
}
unsigned GeoHash::getBits() const {
return _bits;
}
GeoHash GeoHash::commonPrefix(const GeoHash& other) const {
unsigned i = 0;
for (; i < _bits && i < other._bits; i++) {
if (getBitX(i) == other.getBitX(i) && getBitY(i) == other.getBitY(i))
continue;
break;
}
// i is how many bits match between this and other.
return GeoHash(_hash, i);
}
// Binary data is stored in some particular byte ordering that requires this.
void GeoHash::_copyAndReverse(char *dst, const char *src) {
for (unsigned a = 0; a < 8; a++) {
dst[a] = src[7 - a];
}
}
GeoHashConverter::GeoHashConverter(const Parameters ¶ms) : _params(params) {
// TODO(hk): What do we require of the values in params?
// Compute how much error there is so it can be used as a fudge factor.
GeoHash a(0, 0, _params.bits);
GeoHash b = a;
b.move(1, 1);
// Epsilon is 1/100th of a bucket size
// TODO: Can we actually find error bounds for the sqrt function?
double epsilon = 0.001 / _params.scaling;
_error = distanceBetweenHashes(a, b) + epsilon;
// Error in radians
_errorSphere = deg2rad(_error);
}
double GeoHashConverter::distanceBetweenHashes(const GeoHash& a, const GeoHash& b) const {
double ax, ay, bx, by;
unhash(a, &ax, &ay);
unhash(b, &bx, &by);
double dx = bx - ax;
double dy = by - ay;
return sqrt((dx * dx) + (dy * dy));
}
/**
* Hashing functions. Convert the following types (which have a double precision point)
* to a GeoHash:
* BSONElement
* BSONObj
* Point
* double, double
*/
GeoHash GeoHashConverter::hash(const Point &p) const {
return hash(p.x, p.y);
}
GeoHash GeoHashConverter::hash(const BSONElement& e) const {
if (e.isABSONObj())
return hash(e.embeddedObject());
return GeoHash(e, _params.bits);
}
GeoHash GeoHashConverter::hash(const BSONObj& o) const {
return hash(o, NULL);
}
// src is printed out as debugging information. Maybe it is actually somehow the 'source' of o?
GeoHash GeoHashConverter::hash(const BSONObj& o, const BSONObj* src) const {
BSONObjIterator i(o);
uassert(13067,
str::stream() << "geo field is empty"
<< (src ? causedBy((*src).toString()) : ""),
i.more());
BSONElement x = i.next();
uassert(13068,
str::stream() << "geo field only has 1 element"
<< causedBy(src ? (*src).toString() : x.toString()),
i.more());
BSONElement y = i.next();
uassert(13026,
str::stream() << "geo values must be 'legacy coordinate pairs' for 2d indexes"
<< causedBy(src ? (*src).toString() :
BSON_ARRAY(x << y).toString()),
x.isNumber() && y.isNumber());
uassert(13027,
str::stream() << "point not in interval of [ " << _params.min << ", "
<< _params.max << " ]"
<< causedBy(src ? (*src).toString() :
BSON_ARRAY(x.number() << y.number()).toString()),
x.number() <= _params.max && x.number() >= _params.min &&
y.number() <= _params.max && y.number() >= _params.min);
return GeoHash(convertToHashScale(x.number()), convertToHashScale(y.number()),
_params.bits);
}
GeoHash GeoHashConverter::hash(double x, double y) const {
uassert(16433,
str::stream() << "point not in interval of [ " << _params.min << ", "
<< _params.max << " ]"
<< causedBy(BSON_ARRAY(x << y).toString()),
x <= _params.max && x >= _params.min &&
y <= _params.max && y >= _params.min);
return GeoHash(convertToHashScale(x), convertToHashScale(y) , _params.bits);
}
/**
* Unhashing functions. These convert from a "discretized" GeoHash to the "continuous"
* doubles according to our scaling parameters.
*
* Possible outputs:
* double, double
* Point
* BSONObj
*/
// TODO(hk): these should have consistent naming
Point GeoHashConverter::unhashToPoint(const GeoHash &h) const {
Point point;
unhash(h, &point.x, &point.y);
return point;
}
Point GeoHashConverter::unhashToPoint(const BSONElement &e) const {
return unhashToPoint(hash(e));
}
BSONObj GeoHashConverter::unhashToBSONObj(const GeoHash& h) const {
unsigned x, y;
h.unhash(&x, &y);
BSONObjBuilder b;
b.append("x", convertFromHashScale(x));
b.append("y", convertFromHashScale(y));
return b.obj();
}
void GeoHashConverter::unhash(const GeoHash &h, double *x, double *y) const {
unsigned a, b;
h.unhash(&a, &b);
*x = convertFromHashScale(a);
*y = convertFromHashScale(b);
}
double GeoHashConverter::sizeOfDiag(const GeoHash& a) const {
GeoHash b = a;
b.move(1, 1);
return distanceBetweenHashes(a, b);
}
double GeoHashConverter::sizeEdge(const GeoHash& a) const {
if (!a.constrains())
return _params.max - _params.min;
double ax, ay, bx, by;
GeoHash b = a;
b.move(1, 1);
unhash(a, &ax, &ay);
unhash(b, &bx, &by);
// _min and _max are a singularity
if (bx == _params.min)
bx = _params.max;
return fabs(ax - bx);
}
// Convert from an unsigned in [0, (max-min)*scaling] to [min, max]
double GeoHashConverter::convertFromHashScale(unsigned in) const {
double x = in;
x /= _params.scaling;
x += _params.min;
return x;
}
// Convert from a double that is [min, max] to an unsigned in [0, (max-min)*scaling]
unsigned GeoHashConverter::convertToHashScale(double in) const {
verify(in <= _params.max && in >= _params.min);
if (in == _params.max) {
// prevent aliasing with _min by moving inside the "box"
// makes 180 == 179.999 (roughly)
in -= _error / 2;
}
in -= _params.min;
verify(in >= 0);
return static_cast(in * _params.scaling);
}
} // namespace mongo