summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2016-07-27 11:07:23 +0200
committerantirez <antirez@gmail.com>2016-07-27 11:34:25 +0200
commit356a6304ec77783e7fdaf00668a09dc293b810a0 (patch)
treeca7f1d239f61e1b6f731168efcda1f3c20e684cf
parent03f5b508e5e939c941fb0c456b0f48b8e7a2c657 (diff)
downloadredis-356a6304ec77783e7fdaf00668a09dc293b810a0.tar.gz
Multiple GEORADIUS bugs fixed.
By grepping the continuous integration errors log a number of GEORADIUS tests failures were detected. Fortunately when a GEORADIUS failure happens, the test suite logs enough information in order to reproduce the problem: the PRNG seed, coordinates and radius of the query. By reproducing the issues, three different bugs were discovered and fixed in this commit. This commit also improves the already good reporting of the fuzzer and adds the failure vectors as regression tests. The issues found: 1. We need larger squares around the poles in order to cover the area requested by the user. There were already checks in order to use a smaller step (larger squares) but the limit set (+/- 67 degrees) is not enough in certain edge cases, so 66 is used now. 2. Even near the equator, when the search area center is very near the edge of the square, the north, south, west or ovest square may not be able to fully cover the specified radius. Now a test is performed at the edge of the initial guessed search area, and larger squares are used in case the test fails. 3. Because of rounding errors between Redis and Tcl, sometimes the test signaled false positives. This is now addressed. Whenever possible the original code was improved a bit in other ways. A debugging example stanza was added in order to make the next debugging session simpler when the next bug is found.
-rw-r--r--src/debugmacro.h41
-rw-r--r--src/geo.c3
-rw-r--r--src/geohash.c4
-rw-r--r--src/geohash.h2
-rw-r--r--src/geohash_helper.c89
-rw-r--r--tests/unit/geo.tcl81
6 files changed, 199 insertions, 21 deletions
diff --git a/src/debugmacro.h b/src/debugmacro.h
new file mode 100644
index 000000000..df237bad3
--- /dev/null
+++ b/src/debugmacro.h
@@ -0,0 +1,41 @@
+/* This file contains debugging macros to be used when investigating issues.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#define D(...) \
+ do { \
+ FILE *fp = fopen("/tmp/log.txt","a"); \
+ fprintf(fp,"%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ fprintf(fp,__VA_ARGS__); \
+ fprintf(fp,"\n"); \
+ fclose(fp); \
+ } while (0);
diff --git a/src/geo.c b/src/geo.c
index 28cb433dc..331d22435 100644
--- a/src/geo.c
+++ b/src/geo.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
- * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,6 +30,7 @@
#include "geo.h"
#include "geohash_helper.h"
+#include "debugmacro.h"
/* Things exported from t_zset.c only for geo.c, since it is the only other
* part of Redis that requires close zset introspection. */
diff --git a/src/geohash.c b/src/geohash.c
index a5e1dffbf..1ae7a7e05 100644
--- a/src/geohash.c
+++ b/src/geohash.c
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
* Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
- * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -118,7 +118,7 @@ void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range) {
lat_range->min = GEO_LAT_MIN;
}
-int geohashEncode(GeoHashRange *long_range, GeoHashRange *lat_range,
+int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,
double longitude, double latitude, uint8_t step,
GeoHashBits *hash) {
/* Check basic arguments sanity. */
diff --git a/src/geohash.h b/src/geohash.h
index c2f57bed0..ed2ef9336 100644
--- a/src/geohash.h
+++ b/src/geohash.h
@@ -95,7 +95,7 @@ typedef struct {
* -1:failed
*/
void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range);
-int geohashEncode(GeoHashRange *long_range, GeoHashRange *lat_range,
+int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,
double longitude, double latitude, uint8_t step,
GeoHashBits *hash);
int geohashEncodeType(double longitude, double latitude,
diff --git a/src/geohash_helper.c b/src/geohash_helper.c
index a65759bc4..139bcea11 100644
--- a/src/geohash_helper.c
+++ b/src/geohash_helper.c
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
* Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
- * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -36,6 +36,7 @@
#include "fmacros.h"
#include "geohash_helper.h"
+#include "debugmacro.h"
#include <math.h>
#define D_R (M_PI / 180.0)
@@ -56,8 +57,8 @@ const double MERCATOR_MIN = -20037726.37;
static inline double deg_rad(double ang) { return ang * D_R; }
static inline double rad_deg(double ang) { return ang / D_R; }
-/* You must *ONLY* estimate steps when you are encoding.
- * If you are decoding, always decode to GEO_STEP_MAX (26). */
+/* This function is used in order to estimate the step (bits precision)
+ * of the 9 search area boxes during radius queries. */
uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) {
if (range_meters == 0) return 26;
int step = 1;
@@ -65,12 +66,15 @@ uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) {
range_meters *= 2;
step++;
}
- step -= 2; /* Make sure range is included in the worst case. */
+ step -= 2; /* Make sure range is included in most of the base cases. */
+
/* Wider range torwards the poles... Note: it is possible to do better
* than this approximation by computing the distance between meridians
* at this latitude, but this does the trick for now. */
- if (lat > 67 || lat < -67) step--;
- if (lat > 80 || lat < -80) step--;
+ if (lat > 66 || lat < -66) {
+ step--;
+ if (lat > 80 || lat < -80) step--;
+ }
/* Frame to valid range. */
if (step < 1) step = 1;
@@ -105,12 +109,14 @@ int geohashBoundingBox(double longitude, double latitude, double radius_meters,
return 1;
}
+/* Return a set of areas (center + 8) that are able to cover a range query
+ * for the specified position and radius. */
GeoHashRadius geohashGetAreasByRadius(double longitude, double latitude, double radius_meters) {
GeoHashRange long_range, lat_range;
- GeoHashRadius radius = {{0}};
- GeoHashBits hash = {0,0};
- GeoHashNeighbors neighbors = {{0}};
- GeoHashArea area = {{0}};
+ GeoHashRadius radius;
+ GeoHashBits hash;
+ GeoHashNeighbors neighbors;
+ GeoHashArea area;
double min_lon, max_lon, min_lat, max_lat;
double bounds[4];
int steps;
@@ -123,12 +129,65 @@ GeoHashRadius geohashGetAreasByRadius(double longitude, double latitude, double
steps = geohashEstimateStepsByRadius(radius_meters,latitude);
- geohashGetCoordRange(&long_range, &lat_range);
- geohashEncode(&long_range, &lat_range, longitude, latitude, steps, &hash);
- geohashNeighbors(&hash, &neighbors);
- geohashGetCoordRange(&long_range, &lat_range);
- geohashDecode(long_range, lat_range, hash, &area);
+ geohashGetCoordRange(&long_range,&lat_range);
+ geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);
+ geohashNeighbors(&hash,&neighbors);
+ geohashDecode(long_range,lat_range,hash,&area);
+
+ /* Check if the step is enough at the limits of the covered area.
+ * Sometimes when the search area is near an edge of the
+ * area, the estimated step is not small enough, since one of the
+ * north / south / west / east square is too near to the search area
+ * to cover everything. */
+ int decrease_step = 0;
+ {
+ GeoHashArea north, south, east, west;
+
+ geohashDecode(long_range, lat_range, neighbors.north, &north);
+ geohashDecode(long_range, lat_range, neighbors.south, &south);
+ geohashDecode(long_range, lat_range, neighbors.east, &east);
+ geohashDecode(long_range, lat_range, neighbors.west, &west);
+
+ if (geohashGetDistance(longitude,latitude,longitude,north.latitude.max)
+ < radius_meters) decrease_step = 1;
+ if (geohashGetDistance(longitude,latitude,longitude,south.latitude.min)
+ < radius_meters) decrease_step = 1;
+ if (geohashGetDistance(longitude,latitude,east.longitude.max,latitude)
+ < radius_meters) decrease_step = 1;
+ if (geohashGetDistance(longitude,latitude,west.longitude.min,latitude)
+ < radius_meters) decrease_step = 1;
+ }
+
+ if (decrease_step) {
+ steps--;
+ geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);
+ geohashNeighbors(&hash,&neighbors);
+ geohashDecode(long_range,lat_range,hash,&area);
+ }
+
+ /* Example debug info. This turns to be very useful every time there is
+ * to investigate radius search potential bugs. So better to leave it
+ * here. */
+ if (0) {
+ GeoHashArea myarea = {{0}};
+ geohashDecode(long_range, lat_range, neighbors.west, &myarea);
+
+ /* Dump West. */
+ D("Neighbors");
+ D("area.longitude.min: %f\n", myarea.longitude.min);
+ D("area.longitude.max: %f\n", myarea.longitude.max);
+ D("area.latitude.min: %f\n", myarea.latitude.min);
+ D("area.latitude.max: %f\n", myarea.latitude.max);
+
+ /* Dump center square. */
+ D("Area");
+ D("area.longitude.min: %f\n", area.longitude.min);
+ D("area.longitude.max: %f\n", area.longitude.max);
+ D("area.latitude.min: %f\n", area.latitude.min);
+ D("area.latitude.max: %f\n", area.latitude.max);
+ }
+ /* Exclude the search areas that are useless. */
if (area.latitude.min < min_lat) {
GZERO(neighbors.south);
GZERO(neighbors.south_west);
diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl
index 3aa06c99c..a08726d2e 100644
--- a/tests/unit/geo.tcl
+++ b/tests/unit/geo.tcl
@@ -1,4 +1,4 @@
-# Helper functins to simulate search-in-radius in the Tcl side in order to
+# Helper functions to simulate search-in-radius in the Tcl side in order to
# verify the Redis implementation with a fuzzy test.
proc geo_degrad deg {expr {$deg*atan(1)*8/360}}
@@ -23,6 +23,44 @@ proc geo_random_point {lonvar latvar} {
set lat [expr {-70 + rand()*140}]
}
+# Return elements non common to both the lists.
+# This code is from http://wiki.tcl.tk/15489
+proc compare_lists {List1 List2} {
+ set DiffList {}
+ foreach Item $List1 {
+ if {[lsearch -exact $List2 $Item] == -1} {
+ lappend DiffList $Item
+ }
+ }
+ foreach Item $List2 {
+ if {[lsearch -exact $List1 $Item] == -1} {
+ if {[lsearch -exact $DiffList $Item] == -1} {
+ lappend DiffList $Item
+ }
+ }
+ }
+ return $DiffList
+}
+
+# The following list represents sets of random seed, search position
+# and radius that caused bugs in the past. It is used by the randomized
+# test later as a starting point. When the regression vectors are scanned
+# the code reverts to using random data.
+#
+# The format is: seed km lon lat
+set regression_vectors {
+ {1412 156 149.29737817929004 15.95807862745508}
+ {441574 143 59.235461856813856 66.269555127373678}
+ {160645 187 -101.88575239939883 49.061997951502917}
+ {750269 154 -90.187939661642517 66.615930412251487}
+ {342880 145 163.03472387745728 64.012747720821181}
+ {729955 143 137.86663517256579 63.986745399416776}
+ {939895 151 59.149620271823181 65.204186651485145}
+ {1412 156 149.29737817929004 15.95807862745508}
+ {564862 149 84.062063109158544 -65.685403922426232}
+}
+set rv_idx 0
+
start_server {tags {"geo"}} {
test {GEOADD create} {
r geoadd nyc -73.9454966 40.747533 "lic market"
@@ -183,16 +221,25 @@ start_server {tags {"geo"}} {
}
test {GEOADD + GEORANGE randomized test} {
- set attempt 10
+ set attempt 20
while {[incr attempt -1]} {
+ set rv [lindex $regression_vectors $rv_idx]
+ incr rv_idx
+
unset -nocomplain debuginfo
set srand_seed [randomInt 1000000]
+ if {$rv ne {}} {set srand_seed [lindex $rv 0]}
lappend debuginfo "srand_seed is $srand_seed"
expr {srand($srand_seed)} ; # If you need a reproducible run
r del mypoints
set radius_km [expr {[randomInt 200]+10}]
+ if {$rv ne {}} {set radius_km [lindex $rv 1]}
set radius_m [expr {$radius_km*1000}]
geo_random_point search_lon search_lat
+ if {$rv ne {}} {
+ set search_lon [lindex $rv 2]
+ set search_lat [lindex $rv 3]
+ }
lappend debuginfo "Search area: $search_lon,$search_lat $radius_km km"
set tcl_result {}
set argv {}
@@ -208,10 +255,40 @@ start_server {tags {"geo"}} {
set res [lsort [r georadius mypoints $search_lon $search_lat $radius_km km]]
set res2 [lsort $tcl_result]
set test_result OK
+
if {$res != $res2} {
+ set rounding_errors 0
+ set diff [compare_lists $res $res2]
+ foreach place $diff {
+ set mydist [geo_distance $lon $lat $search_lon $search_lat]
+ set mydist [expr $mydist/1000]
+ if {($mydist / $radius_km) > 0.999} {incr rounding_errors}
+ }
+ # Make sure this is a real error and not a rounidng issue.
+ if {[llength $diff] == $rounding_errors} {
+ set res $res2; # Error silenced
+ }
+ }
+
+ if {$res != $res2} {
+ set diff [compare_lists $res $res2]
+ puts "*** Possible problem in GEO radius query ***"
puts "Redis: $res"
puts "Tcl : $res2"
+ puts "Diff : $diff"
puts [join $debuginfo "\n"]
+ foreach place $diff {
+ if {[lsearch -exact $res2 $place] != -1} {
+ set where "(only in Tcl)"
+ } else {
+ set where "(only in Redis)"
+ }
+ lassign [lindex [r geopos mypoints $place] 0] lon lat
+ set mydist [geo_distance $lon $lat $search_lon $search_lat]
+ set mydist [expr $mydist/1000]
+ puts "$place -> [r geopos mypoints $place] $mydist $where"
+ if {($mydist / $radius_km) > 0.999} {incr rounding_errors}
+ }
set test_result FAIL
}
unset -nocomplain debuginfo