hll-gnuplot-graph.rb added to plot HyperLogLog error graphs.

author: antirez <antirez@gmail.com> 2014-03-31 10:01:42 +0200
committer: antirez <antirez@gmail.com> 2014-03-31 10:09:43 +0200
commit: 7f9d289e100725a8eab67ec1a0d069e8d1a6221e (patch)
tree: b0579cb4af42613b8b03c937ed0364574a4a0027
parent: 307a189900c06bb4f76638277275f70f2e480558 (diff)
download: redis-7f9d289e100725a8eab67ec1a0d069e8d1a6221e.tar.gz
1 files changed, 68 insertions, 0 deletions
diff --git a/utils/hyperloglog/hll-gnuplot-graph.rb b/utils/hyperloglog/hll-gnuplot-graph.rb
new file mode 100644
index 000000000..1cccbf4be
--- /dev/null
+++ b/utils/hyperloglog/hll-gnuplot-graph.rb
@@ -0,0 +1,68 @@
+# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
+# BSD license, See the COPYING file for more information.
+#
+# This program is suited to output average and maximum errors of
+# the Redis HyperLogLog implementation in a format suitable to print
+# graphs using gnuplot.
+
+require 'rubygems'
+require 'redis'
+require 'digest/sha1'
+
+# Generate an array of [cardinality,relative_error] pairs
+# in the 0 - max range with step of 1000*step.
+#
+# 'r' is the Redis object used to perform the queries.
+# 'seed' must be different every time you want a test performed
+# with a different set. The function guarantees that if 'seed' is the
+# same, exactly the same dataset is used, and when it is different,
+# a totally unrelated different data set is used (without any common
+# element in practice).
+def run_experiment(r,seed,max,step)
+    r.del('hll')
+    i = 0
+    samples = []
+    while i < max do
+        step.times {
+            elements = []
+            1000.times {
+                ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s)
+                elements << ele
+                i += 1
+            }
+            r.hlladd('hll',*elements)
+        }
+        approx = r.hllcount('hll')
+        err = approx-i
+        rel_err = 100.to_f*err/i
+        samples << [i,rel_err]
+    end
+    samples
+end
+
+def filter_samples(numsets,filter)
+    r = Redis.new
+    dataset = {}
+    (0...numsets).each{|i|
+        dataset[i] = run_experiment(r,i,100000,1)
+    }
+    dataset[0].each_with_index{|ele,index|
+        card,err=ele
+        if filter == :max
+            (1...numsets).each{|i|
+                err = dataset[i][index][1] if err < dataset[i][index][1]
+            }
+        elsif filter == :avg
+            (1...numsets).each{|i|
+                err += dataset[i][index][1]
+            }
+            err /= numsets
+        else
+            raise "Unknown filter #{filter}"
+        end
+        puts "#{card} #{err}"
+    }
+end
+
+filter_samples(100,:max)
+#filter_samples(100,:avg)
author	antirez <antirez@gmail.com>	2014-03-31 10:01:42 +0200
committer	antirez <antirez@gmail.com>	2014-03-31 10:09:43 +0200
commit	7f9d289e100725a8eab67ec1a0d069e8d1a6221e (patch)
tree	b0579cb4af42613b8b03c937ed0364574a4a0027
parent	307a189900c06bb4f76638277275f70f2e480558 (diff)
download	redis-7f9d289e100725a8eab67ec1a0d069e8d1a6221e.tar.gz