summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2022-10-28 10:10:01 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2022-10-28 10:57:40 -0400
commit8c6ac52cc5dd591fd4b47f1f4619c3ce225dbc1f (patch)
tree836acdadc58f6c3d23c35353ab92258f5f2dcf93
parentc0476adc21477f7cf076b8527c1327b158e2b190 (diff)
downloadcouchdb-8c6ac52cc5dd591fd4b47f1f4619c3ce225dbc1f.tar.gz
Integrate b64url, ets_lru and khash into the main repo
As discussed on the mailing list https://lists.apache.org/thread/opvsmz1pwlnv96wozy5kp7ss896l9lfp
-rw-r--r--.gitignore3
-rw-r--r--rebar.config.script6
-rw-r--r--src/b64url/.gitignore10
-rw-r--r--src/b64url/LICENSE203
-rw-r--r--src/b64url/Makefile46
-rw-r--r--src/b64url/README.md46
-rw-r--r--src/b64url/c_src/b64url.c665
-rw-r--r--src/b64url/rebar.config32
-rw-r--r--src/b64url/src/b64url.app.src18
-rw-r--r--src/b64url/src/b64url.erl88
-rw-r--r--src/b64url/test/b64url_tests.erl144
-rwxr-xr-xsrc/b64url/test/benchmark.escript165
-rw-r--r--src/ets_lru/.gitignore3
-rw-r--r--src/ets_lru/LICENSE202
-rw-r--r--src/ets_lru/Makefile41
-rw-r--r--src/ets_lru/src/ets_lru.app.src21
-rw-r--r--src/ets_lru/src/ets_lru.erl335
-rw-r--r--src/ets_lru/test/ets_lru_test.erl339
-rw-r--r--src/khash/.gitignore10
-rw-r--r--src/khash/LICENSE76
-rw-r--r--src/khash/Makefile44
-rw-r--r--src/khash/README.md4
-rw-r--r--src/khash/c_src/hash.c843
-rw-r--r--src/khash/c_src/hash.h240
-rw-r--r--src/khash/c_src/khash.c658
-rw-r--r--src/khash/rebar.config14
-rw-r--r--src/khash/src/khash.app.src10
-rw-r--r--src/khash/src/khash.erl136
-rw-r--r--src/khash/test/gen_term.erl113
-rw-r--r--src/khash/test/khash_test.erl374
30 files changed, 4883 insertions, 6 deletions
diff --git a/.gitignore b/.gitignore
index acf98e5c0..816ece6d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,7 +39,6 @@ rel/tmpdata
share/server/main-coffee.js
share/server/main.js
share/www
-src/b64url/
src/bear/
src/certifi/
src/couch/priv/couch_js/**/config.h
@@ -49,7 +48,6 @@ src/couch/priv/couch_ejson_compare/couch_ejson_compare.d
src/couch/priv/couch_js/**/*.d
src/couch/priv/icu_driver/couch_icu_driver.d
src/mango/src/mango_cursor_text.nocompile
-src/ets_lru/
src/excoveralls/
src/fauxton/
src/folsom/
@@ -59,7 +57,6 @@ src/hyper/
src/ibrowse/
src/idna/
src/jiffy/
-src/khash/
src/meck/
src/metrics/
src/mimerl/
diff --git a/rebar.config.script b/rebar.config.script
index 38a57c966..3952ad857 100644
--- a/rebar.config.script
+++ b/rebar.config.script
@@ -110,6 +110,9 @@ SubDirs = [
"src/couch_epi",
"src/config",
"src/couch_log",
+ "src/khash",
+ "src/b64url",
+ "src/ets_lru",
"src/chttpd",
"src/couch",
"src/couch_event",
@@ -142,9 +145,6 @@ SubDirs = [
DepDescs = [
%% Independent Apps
-{b64url, "b64url", {tag, "1.0.3"}},
-{ets_lru, "ets-lru", {tag, "1.1.0"}},
-{khash, "khash", {tag, "1.1.0"}},
{snappy, "snappy", {tag, "CouchDB-1.0.8"}},
%% %% Non-Erlang deps
diff --git a/src/b64url/.gitignore b/src/b64url/.gitignore
new file mode 100644
index 000000000..a3fd77fce
--- /dev/null
+++ b/src/b64url/.gitignore
@@ -0,0 +1,10 @@
+.eunit/
+c_src/*.o
+ebin/
+priv/
+.rebar/
+vc110.pdb
+compile_commands.json
+
+_build/
+c_src/*.d \ No newline at end of file
diff --git a/src/b64url/LICENSE b/src/b64url/LICENSE
new file mode 100644
index 000000000..94ad231b8
--- /dev/null
+++ b/src/b64url/LICENSE
@@ -0,0 +1,203 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
diff --git a/src/b64url/Makefile b/src/b64url/Makefile
new file mode 100644
index 000000000..754165e5c
--- /dev/null
+++ b/src/b64url/Makefile
@@ -0,0 +1,46 @@
+REBAR?=rebar
+
+
+.PHONY: all
+# target: all - Makes everything
+all: build
+
+
+.PHONY: build
+# target: build - Builds the project
+build:
+ $(REBAR) compile
+
+
+.PHONY: check
+# target: check - Checks if project builds and passes all the tests
+check: build eunit
+
+
+.PHONY: clean
+# target: clean - Removes build artifacts
+clean:
+ $(REBAR) clean
+ rm -f test/*.beam
+
+
+.PHONY: distclean
+# target: distclean - Removes all unversioned files
+distclean: clean
+ git clean -fxd
+
+
+.PHONY: eunit
+# target: eunit - Runs eunit test suite
+eunit:
+ $(REBAR) eunit
+
+
+.PHONY: help
+# target: help - Prints this help
+help:
+ @egrep "^# target:" Makefile | sed -e 's/^# target: //g' | sort
+
+
+%.beam: %.erl
+ erlc -o test/ $<
diff --git a/src/b64url/README.md b/src/b64url/README.md
new file mode 100644
index 000000000..c3a3497f2
--- /dev/null
+++ b/src/b64url/README.md
@@ -0,0 +1,46 @@
+# Base64 encoder with URL-safe scheme
+
+[![CI](https://github.com/apache/couchdb-b64url/actions/workflows/ci.yml/badge.svg)](https://github.com/apache/couchdb-b64url/actions/workflows/ci.yml)
+
+This is a simple NIF that is responsible for quickly encoding and
+decoding Base64 URL values:
+
+```erlang
+1> Thing = b64url:encode("Hello, CouchDB!").
+<<"SGVsbG8sIENvdWNoREIh">>
+2> b64url:decode(Thing).
+<<"Hello, CouchDB!">>
+```
+
+## Performance
+
+This implementation is significantly faster than the Erlang version it replaced
+in CouchDB. The `benchmark.escript` file contains the original implementation
+(using regular expressions to replace unsafe characters in the output of the
+`base64` module) and can be used to compare the two for strings of various
+lengths. For example:
+
+```
+ERL_LIBS=_build/default/lib/b64url/ ./test/benchmark.escript 4 10 100 30
+erl : 75491270 bytes / 30 seconds = 2516375.67 bps
+nif : 672299342 bytes / 30 seconds = 22409978.07 bps
+```
+
+This test invocation spawns four workers that generate random strings between 10
+and 100 bytes in length and then perform an encode/decode on them in a tight
+loop for 30 seconds, and then reports the aggregate encoded data volume. Note
+that the generator overhead (`crypto:strong_rand_bytes/1`) is included in these
+results, so the relative difference in encoder throughput is rather larger than
+what's reported here.
+
+## Timeslice Consumption
+
+NIF implementations must take care to avoid doing [lengthy
+work](https://www.erlang.org/doc/man/erl_nif.html#lengthy_work) in a single
+invocation. This library will yield back to the Erlang VM as needed when
+operating on a large string, maintaining a partial result until it can resume
+operation. The current implementation uses a conservative heuristic that
+estimates 64 bytes of encoding / decoding to consume 1% of a timeslice, so input
+strings shorter than ~6k should be encoded / decoded within a single invocation,
+and the library should not adversely affect the responsiveness of the VM in any
+way.
diff --git a/src/b64url/c_src/b64url.c b/src/b64url/c_src/b64url.c
new file mode 100644
index 000000000..d9e2e0448
--- /dev/null
+++ b/src/b64url/c_src/b64url.c
@@ -0,0 +1,665 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include <assert.h>
+#include <string.h>
+
+#include "erl_nif.h"
+
+#ifdef _WIN32
+#define INLINE __inline
+#else
+#define INLINE inline
+#endif
+
+
+typedef ERL_NIF_TERM ENTERM;
+
+
+typedef struct
+{
+ ENTERM atom_ok;
+ ENTERM atom_error;
+ ENTERM atom_partial;
+
+ ENTERM atom_nomem;
+ ENTERM atom_bad_block;
+
+ ErlNifResourceType* res_st;
+} b64url_priv;
+
+
+typedef struct
+{
+ ErlNifPid pid;
+ ErlNifBinary tgt;
+ size_t len;
+ size_t si;
+ size_t ti;
+ char res_released;
+ char bin_released;
+} b64url_st;
+
+
+typedef enum
+{
+ ST_OK,
+ ST_ERROR,
+ ST_PARTIAL
+} b64url_status;
+
+
+const unsigned char B64URL_B2A[256] = {
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, // 0 - 15
+ 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99,100,101,102, // 16 - 31
+ 103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118, // 32 - 47
+ 119,120,121,122, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 45, 95, // 48 - 63
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 64 - 79
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 80 - 95
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 96 - 111
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 112 - 127
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 128 - 143
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 144 - 159
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 160 - 175
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 176 - 191
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 192 - 207
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 208 - 223
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 224 - 239
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // 240 - 255
+};
+
+const unsigned char B64URL_A2B[256] = {
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 0 - 15
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 16 - 31
+ 255,255,255,255,255,255,255,255,255,255,255,255,255, 62,255,255, // 32 - 47
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,255,255,255, // 48 - 63
+ 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64 - 79
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255, 63, // 80 - 95
+ 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96 - 111
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255, // 112 - 127
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 128 - 143
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 144 - 159
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 160 - 175
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 176 - 191
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 192 - 207
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 208 - 223
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 224 - 239
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // 240 - 255
+};
+
+
+#define BYTES_PER_PERCENT 64
+
+static INLINE int
+do_consume_timeslice(ErlNifEnv* env) {
+#if(ERL_NIF_MAJOR_VERSION >= 2 && ERL_NIF_MINOR_VERSION >= 4)
+ return enif_consume_timeslice(env, 1);
+#else
+ return 0;
+#endif
+}
+
+
+static INLINE ENTERM
+make_atom(ErlNifEnv* env, const char* name)
+{
+ ENTERM ret;
+ if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) {
+ return ret;
+ }
+ return enif_make_atom(env, name);
+}
+
+
+static INLINE ENTERM
+make_ok(ErlNifEnv* env, b64url_priv* priv, ENTERM value)
+{
+ return enif_make_tuple2(env, priv->atom_ok, value);
+}
+
+
+static INLINE ENTERM
+make_error(ErlNifEnv* env, b64url_priv* priv, ENTERM value)
+{
+ return enif_make_tuple2(env, priv->atom_error, value);
+}
+
+
+static INLINE ENTERM
+make_bad_block(ErlNifEnv* env, b64url_priv* priv, size_t pos)
+{
+ ENTERM pterm = enif_make_uint64(env, pos);
+ return enif_make_tuple2(env, priv->atom_bad_block, pterm);
+}
+
+
+static INLINE ENTERM
+make_partial(ErlNifEnv* env, b64url_priv* priv, ENTERM value)
+{
+ return enif_make_tuple2(env, priv->atom_partial, value);
+}
+
+
+static INLINE int
+check_pid(ErlNifEnv* env, b64url_st* st)
+{
+ ErlNifPid self_pid;
+ ENTERM self;
+ ENTERM orig;
+
+ enif_self(env, &self_pid);
+ self = enif_make_pid(env, &self_pid);
+ orig = enif_make_pid(env, &(st->pid));
+
+ if(enif_compare(self, orig) == 0) {
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static b64url_st*
+b64url_st_alloc(ErlNifEnv* env, b64url_priv* priv, ErlNifBinary* src, size_t tlen)
+{
+ b64url_st* st = enif_alloc_resource(priv->res_st, sizeof(b64url_st));
+ if(st == NULL) {
+ goto error;
+ }
+
+ memset(st, '\0', sizeof(b64url_st));
+ enif_self(env, &(st->pid));
+ st->len = src->size;
+ st->si = 0;
+ st->ti = 0;
+ st->res_released = 0;
+ st->bin_released = 0;
+
+ if(!enif_alloc_binary(tlen, &(st->tgt))) {
+ goto error;
+ }
+
+ return st;
+
+error:
+ if(st != NULL) {
+ st->res_released = 1;
+ enif_release_resource(st);
+ }
+ return NULL;
+}
+
+
+static void
+b64url_st_free(ErlNifEnv* env, void* obj)
+{
+ b64url_st* st = (b64url_st*) obj;
+
+ if(!st->bin_released) {
+ enif_release_binary(&(st->tgt));
+ }
+}
+
+static ENTERM
+b64url_st_enc_ret(ErlNifEnv* env, b64url_st* st, int status)
+{
+ b64url_priv* priv = (b64url_priv*) enif_priv_data(env);
+ ENTERM ret;
+
+ if(status == ST_OK) {
+ ret = make_ok(env, priv, enif_make_binary(env, &(st->tgt)));
+ st->bin_released = 1;
+ } else if(status == ST_PARTIAL) {
+ ret = make_partial(env, priv, enif_make_resource(env, st));
+ } else {
+ assert(0 == 1 && "invalid status encoder status");
+ ret = enif_make_badarg(env);
+ }
+
+ if(!st->res_released) {
+ st->res_released = 1;
+ enif_release_resource(st);
+ }
+
+ return ret;
+}
+
+static ENTERM
+b64url_st_dec_ret(ErlNifEnv* env, b64url_st* st, int status, ENTERM ret)
+{
+ b64url_priv* priv = (b64url_priv*) enif_priv_data(env);
+
+ if(status == ST_OK) {
+ ret = make_ok(env, priv, enif_make_binary(env, &(st->tgt)));
+ st->bin_released = 1;
+ } else if(status == ST_ERROR) {
+ ret = make_error(env, priv, ret);
+ } else if(status == ST_PARTIAL) {
+ ret = make_partial(env, priv, enif_make_resource(env, st));
+ } else {
+ assert(0 == 1 && "invalid status decoder status");
+ ret = enif_make_badarg(env);
+ }
+
+ if(!st->res_released) {
+ st->res_released = 1;
+ enif_release_resource(st);
+ }
+
+ return ret;
+}
+
+static int
+load(ErlNifEnv* env, void** priv, ENTERM info)
+{
+ int flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
+ ErlNifResourceType* res;
+
+ b64url_priv* new_priv = (b64url_priv*) enif_alloc(sizeof(b64url_priv));
+ if(new_priv == NULL) {
+ return 1;
+ }
+
+ res = enif_open_resource_type(
+ env, NULL, "b64url_st", b64url_st_free, flags, NULL);
+ if(res == NULL) {
+ return 1;
+ }
+ new_priv->res_st = res;
+
+ new_priv->atom_ok = make_atom(env, "ok");
+ new_priv->atom_error = make_atom(env, "error");
+ new_priv->atom_partial = make_atom(env, "partial");
+
+ new_priv->atom_nomem = make_atom(env, "enomem");
+ new_priv->atom_bad_block = make_atom(env, "bad_block");
+
+ *priv = (void*) new_priv;
+
+ return 0;
+}
+
+
+static int
+reload(ErlNifEnv* env, void** priv, ENTERM info)
+{
+ return 0;
+}
+
+
+static int
+upgrade(ErlNifEnv* env, void** priv, void** old_priv, ENTERM info)
+{
+ return 0;
+}
+
+
+static void
+unload(ErlNifEnv* env, void* priv)
+{
+ return;
+}
+
+
+static INLINE b64url_status
+b64url_encode(ErlNifEnv* env, ErlNifBinary* src, b64url_st* st)
+{
+ size_t chunk_start = st->si;
+ size_t rem;
+ unsigned char c1;
+ unsigned char c2;
+ unsigned char c3;
+
+ assert(st->si % 3 == 0 && "invalid source index");
+ assert(st->ti % 4 == 0 && "invalid target index");
+
+ while(st->si + 3 <= src->size) {
+ c1 = src->data[st->si++];
+ c2 = src->data[st->si++];
+ c3 = src->data[st->si++];
+
+ st->tgt.data[st->ti++] = B64URL_B2A[(c1 >> 2) & 0x3F];
+ st->tgt.data[st->ti++] = B64URL_B2A[((c1 << 4) | (c2 >> 4)) & 0x3F];
+ st->tgt.data[st->ti++] = B64URL_B2A[((c2 << 2) | (c3 >> 6)) & 0x3F];
+ st->tgt.data[st->ti++] = B64URL_B2A[c3 & 0x3F];
+
+ if(st->si - chunk_start > BYTES_PER_PERCENT) {
+ if(do_consume_timeslice(env)) {
+ return ST_PARTIAL;
+ } else {
+ chunk_start = st->si;
+ }
+ }
+ }
+
+ rem = src->size % 3;
+ if(rem == 1) {
+ c1 = src->data[st->si];
+ st->tgt.data[st->ti++] = B64URL_B2A[(c1 >> 2) & 0x3F];
+ st->tgt.data[st->ti++] = B64URL_B2A[(c1 << 4) & 0x3F];
+ } else if(rem == 2) {
+ c1 = src->data[st->si];
+ c2 = src->data[st->si+1];
+ st->tgt.data[st->ti++] = B64URL_B2A[(c1 >> 2) & 0x3F];
+ st->tgt.data[st->ti++] = B64URL_B2A[((c1 << 4) | (c2 >> 4)) & 0x3F];
+ st->tgt.data[st->ti++] = B64URL_B2A[(c2 << 2) & 0x3F];
+ } else if(rem != 0) {
+ assert(0 == 1 && "Inavlid length calculation");
+ }
+
+ return ST_OK;
+}
+
+
+static ENTERM
+b64url_encode_init(ErlNifEnv* env, int argc, const ENTERM argv[])
+{
+ ErlNifBinary src;
+ b64url_priv* priv = (b64url_priv*) enif_priv_data(env);
+ b64url_st* st = NULL;
+ size_t tlen;
+ size_t rem;
+ int status;
+ ENTERM ret;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_inspect_iolist_as_binary(env, argv[0], &src)) {
+ ret = enif_make_badarg(env);
+ goto error;
+ }
+
+ // The target length is defined as 4 * ceil(src_len/3) but we
+ // don't use '=' padding for URLs so we only add exactly the
+ // extra bytes we need.
+ tlen = (src.size / 3) * 4;
+ rem = src.size % 3;
+ if(rem == 1) {
+ tlen += 2;
+ } else if(rem == 2) {
+ tlen += 3;
+ }
+
+ st = b64url_st_alloc(env, priv, &src, tlen);
+ if(st == NULL) {
+ ret = make_error(env, priv, priv->atom_nomem);
+ goto error;
+ }
+
+ status = b64url_encode(env, &src, st);
+
+ return b64url_st_enc_ret(env, st, status);
+
+error:
+ if(st != NULL) {
+ st->res_released = 1;
+ enif_release_resource(st);
+ }
+
+ return ret;
+}
+
+
+static ENTERM
+b64url_encode_cont(ErlNifEnv* env, int argc, const ENTERM argv[])
+{
+ ErlNifBinary src;
+ b64url_priv* priv = (b64url_priv*) enif_priv_data(env);
+ b64url_st* st = NULL;
+ void* res = NULL;
+ int status;
+
+ if(argc != 2) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_inspect_iolist_as_binary(env, argv[0], &src)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[1], priv->res_st, &res)) {
+ return enif_make_badarg(env);
+ }
+
+ st = (b64url_st*) res;
+
+ if(!check_pid(env, st)) {
+ return enif_make_badarg(env);
+ }
+
+ if(src.size != st->len) {
+ return enif_make_badarg(env);
+ }
+
+ status = b64url_encode(env, &src, st);
+
+ return b64url_st_enc_ret(env, st, status);
+}
+
+
+static INLINE b64url_status
+b64url_decode(ErlNifEnv* env, ErlNifBinary* src, b64url_st* st, ENTERM* ret)
+{
+ b64url_priv* priv = (b64url_priv*) enif_priv_data(env);
+ size_t chunk_start = st->si;
+ size_t rem;
+ unsigned char c1;
+ unsigned char c2;
+ unsigned char c3;
+ unsigned char c4;
+
+ assert(st->si % 4 == 0 && "invalid source index");
+ assert(st->ti % 3 == 0 && "invalid target index");
+
+ while(st->si + 4 <= src->size) {
+ c1 = B64URL_A2B[src->data[st->si++]];
+ c2 = B64URL_A2B[src->data[st->si++]];
+ c3 = B64URL_A2B[src->data[st->si++]];
+ c4 = B64URL_A2B[src->data[st->si++]];
+
+ if(c1 == 255 || c2 == 255 || c3 == 255 || c4 == 255) {
+ *ret = make_bad_block(env, priv, st->si-4);
+ return ST_ERROR;
+ }
+
+ st->tgt.data[st->ti++] = (c1 << 2) | (c2 >> 4);
+ st->tgt.data[st->ti++] = (c2 << 4) | (c3 >> 2);
+ st->tgt.data[st->ti++] = (c3 << 6) | c4;
+
+ if(st->si - chunk_start > BYTES_PER_PERCENT) {
+ if(do_consume_timeslice(env)) {
+ return ST_PARTIAL;
+ } else {
+ chunk_start = st->si;
+ }
+ }
+ }
+
+ rem = src->size % 4;
+ if(rem == 2) {
+ c1 = B64URL_A2B[src->data[st->si]];
+ c2 = B64URL_A2B[src->data[st->si+1]];
+
+ if(c1 == 255 || c2 == 255) {
+ *ret = make_bad_block(env, priv, st->si);
+ return ST_ERROR;
+ }
+
+ st->tgt.data[st->ti++] = (c1 << 2) | (c2 >> 4);
+ } else if(rem == 3) {
+ c1 = B64URL_A2B[src->data[st->si]];
+ c2 = B64URL_A2B[src->data[st->si+1]];
+ c3 = B64URL_A2B[src->data[st->si+2]];
+
+ if(c1 == 255 || c2 == 255 || c3 == 255) {
+ *ret = make_bad_block(env, priv, st->si);
+ return ST_ERROR;
+ }
+
+ st->tgt.data[st->ti++] = (c1 << 2) | (c2 >> 4);
+ st->tgt.data[st->ti++] = (c2 << 4) | (c3 >> 2);
+ } else if(rem != 0) {
+ assert(0 == 1 && "invalid source length");
+ }
+
+ return ST_OK;
+}
+
+
+static ENTERM
+b64url_decode_init(ErlNifEnv* env, int argc, const ENTERM argv[])
+{
+ ErlNifBinary src;
+ b64url_priv* priv = (b64url_priv*) enif_priv_data(env);
+ b64url_st* st = NULL;
+ size_t tlen;
+ int status;
+ ENTERM ret = priv->atom_error;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_inspect_iolist_as_binary(env, argv[0], &src)) {
+ return enif_make_badarg(env);
+ }
+
+ // We don't do use '=' padding for URLs so our target length
+ // will be the number of blocks of 4 bytes times 3, plus 1 or 2
+ // depending on the number of bytes in the last block.
+ tlen = (src.size / 4) * 3;
+ if(src.size % 4 == 1) {
+ ret = enif_make_badarg(env);
+ goto error;
+ } else if(src.size % 4 == 2) {
+ tlen += 1;
+ } else if(src.size % 4 == 3) {
+ tlen += 2;
+ }
+
+ st = b64url_st_alloc(env, priv, &src, tlen);
+ if(st == NULL) {
+ ret = make_error(env, priv, priv->atom_nomem);
+ goto error;
+ }
+
+ status = b64url_decode(env, &src, st, &ret);
+
+ return b64url_st_dec_ret(env, st, status, ret);
+
+error:
+ if(st != NULL) {
+ st->res_released = 1;
+ enif_release_resource(st);
+ }
+
+ return ret;
+}
+
+
+static ENTERM
+b64url_decode_cont(ErlNifEnv* env, int argc, const ENTERM argv[])
+{
+ ErlNifBinary src;
+ b64url_priv* priv = (b64url_priv*) enif_priv_data(env);
+ b64url_st* st = NULL;
+ void* res = NULL;
+ ENTERM ret = priv->atom_error;
+ int status;
+
+ if(argc != 2) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_inspect_iolist_as_binary(env, argv[0], &src)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[1], priv->res_st, &res)) {
+ return enif_make_badarg(env);
+ }
+
+ st = (b64url_st*) res;
+
+ if(!check_pid(env, st)) {
+ return enif_make_badarg(env);
+ }
+
+ if(src.size != st->len) {
+ return enif_make_badarg(env);
+ }
+
+ status = b64url_decode(env, &src, st, &ret);
+
+ return b64url_st_dec_ret(env, st, status, ret);
+}
+
+
+static unsigned char CHECK_A2B[64] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
+
+static ENTERM
+b64url_check_tables(ErlNifEnv* env, int argc, const ENTERM argv[])
+{
+ b64url_priv* priv = (b64url_priv*) enif_priv_data(env);
+ ENTERM pos;
+ int i;
+
+ for(i = 0; i < 64; i++) {
+ if(B64URL_B2A[i] != CHECK_A2B[i]) {
+ pos = enif_make_int(env, i);
+ return enif_make_tuple2(env, priv->atom_error, pos);
+ }
+ }
+
+ for(i = 64; i < 256; i++) {
+ if(B64URL_B2A[i] != 255) {
+ pos = enif_make_int(env, i);
+ return enif_make_tuple2(env, priv->atom_error, pos);
+ }
+ }
+
+ for(i = 0; i < 64; i++) {
+ if(B64URL_A2B[CHECK_A2B[i]] != i) {
+ pos = enif_make_int(env, i);
+ return enif_make_tuple2(env, priv->atom_error, pos);
+ }
+ }
+
+ for(i = 0; i < 256; i++) {
+ if(B64URL_A2B[i] == 255) {
+ continue;
+ }
+ if(CHECK_A2B[B64URL_A2B[i]] != i) {
+ pos = enif_make_int(env, i);
+ return enif_make_tuple2(env, priv->atom_error, pos);
+ }
+ }
+
+ return priv->atom_ok;
+}
+
+static ErlNifFunc funcs[] = {
+ {"encode_init", 1, b64url_encode_init},
+ {"encode_cont", 2, b64url_encode_cont},
+ {"decode_init", 1, b64url_decode_init},
+ {"decode_cont", 2, b64url_decode_cont},
+ {"check_tables", 0, b64url_check_tables}
+};
+
+
+ERL_NIF_INIT(b64url, funcs, &load, &reload, &upgrade, &unload);
+
+
diff --git a/src/b64url/rebar.config b/src/b64url/rebar.config
new file mode 100644
index 000000000..d9512aed3
--- /dev/null
+++ b/src/b64url/rebar.config
@@ -0,0 +1,32 @@
+{plugins, [
+ pc
+]}.
+
+{project_plugins, [
+ erlfmt
+]}.
+
+{provider_hooks, [
+ {pre, [
+ {compile, {pc, compile}},
+ {clean, {pc, clean}}
+ ]}
+]}.
+
+{port_specs, [
+ {"priv/b64url.so", ["c_src/*.c"]}
+]}.
+
+{port_env, [
+ % Development compilation
+ % {".*", "CFLAGS", "$CFLAGS -g -Wall -Werror -fPIC"}
+
+ % Production compilation
+ {"(linux|solaris|darwin|freebsd)", "CFLAGS", "$CFLAGS -Wall -Werror -DNDEBUG -O3"},
+ {"win32", "CFLAGS", "$CFLAGS /O2 /DNDEBUG /Wall"}
+]}.
+
+{eunit_opts, [
+ debug_info,
+ verbose
+]}.
diff --git a/src/b64url/src/b64url.app.src b/src/b64url/src/b64url.app.src
new file mode 100644
index 000000000..aaf8e9fc4
--- /dev/null
+++ b/src/b64url/src/b64url.app.src
@@ -0,0 +1,18 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+{application, b64url, [
+ {description, "A small NIF for encoding Base64 URL encoding/decoding."},
+ {vsn, git},
+ {registered, []},
+ {applications, [kernel, stdlib]}
+]}.
diff --git a/src/b64url/src/b64url.erl b/src/b64url/src/b64url.erl
new file mode 100644
index 000000000..6e6ddaccd
--- /dev/null
+++ b/src/b64url/src/b64url.erl
@@ -0,0 +1,88 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(b64url).
+-on_load(init/0).
+
+-export([
+ encode/1,
+ decode/1
+]).
+
+% Internal sanity checks
+-export([
+ check_tables/0
+]).
+
+-define(NOT_LOADED, not_loaded(?LINE)).
+
+-spec encode(iodata()) -> binary().
+encode(IoData) ->
+ case encode_init(IoData) of
+ {ok, Bin} ->
+ Bin;
+ {partial, St} ->
+ encode_loop(IoData, St)
+ end.
+
+-spec decode(iodata()) -> binary() | {error, any()}.
+decode(IoData) ->
+ case decode_init(IoData) of
+ {ok, Bin} ->
+ Bin;
+ {error, _} = Ret ->
+ Ret;
+ {partial, St} ->
+ decode_loop(IoData, St)
+ end.
+
+-spec check_tables() -> ok | {error, non_neg_integer()}.
+check_tables() ->
+ ?NOT_LOADED.
+
+init() ->
+ PrivDir =
+ case code:priv_dir(?MODULE) of
+ {error, _} ->
+ EbinDir = filename:dirname(code:which(?MODULE)),
+ AppPath = filename:dirname(EbinDir),
+ filename:join(AppPath, "priv");
+ Path ->
+ Path
+ end,
+ erlang:load_nif(filename:join(PrivDir, "b64url"), 0).
+
+encode_loop(IoData, St) ->
+ case encode_cont(IoData, St) of
+ {ok, Bin} ->
+ Bin;
+ {partial, St} ->
+ encode_loop(IoData, St)
+ end.
+
+decode_loop(IoData, St) ->
+ case decode_cont(IoData, St) of
+ {ok, Bin} ->
+ Bin;
+ {error, _} = Ret ->
+ Ret;
+ {partial, St} ->
+ decode_loop(IoData, St)
+ end.
+
+encode_init(_) -> ?NOT_LOADED.
+encode_cont(_, _) -> ?NOT_LOADED.
+decode_init(_) -> ?NOT_LOADED.
+decode_cont(_, _) -> ?NOT_LOADED.
+
+not_loaded(Line) ->
+ erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}).
diff --git a/src/b64url/test/b64url_tests.erl b/src/b64url/test/b64url_tests.erl
new file mode 100644
index 000000000..5e8e57502
--- /dev/null
+++ b/src/b64url/test/b64url_tests.erl
@@ -0,0 +1,144 @@
+-module(b64url_tests).
+
+-include_lib("eunit/include/eunit.hrl").
+
+-define(MAX_SIZE, 6401).
+-define(NUM_TESTS, 500).
+
+table_test() ->
+ ?assertEqual(ok, b64url:check_tables()).
+
+encode_binary_test() ->
+ lists:foreach(
+ fun(_) ->
+ Bin = gen_binary(),
+ A = couch_encode_base64url(Bin),
+ B = b64url:encode(Bin),
+ ?assertEqual(A, B)
+ end,
+ lists:seq(1, ?NUM_TESTS)
+ ).
+
+encode_iolist_test() ->
+ lists:foreach(
+ fun(_) ->
+ IoList = shallow_iolist(),
+ A = couch_encode_base64url(iolist_to_binary(IoList)),
+ B = b64url:encode(IoList),
+ ?assertEqual(A, B)
+ end,
+ lists:seq(1, ?NUM_TESTS)
+ ).
+
+decode_binary_test() ->
+ lists:foreach(
+ fun(_) ->
+ Bin = gen_binary(),
+ B64UrlBin = couch_encode_base64url(Bin),
+ Dec = b64url:decode(B64UrlBin),
+ ?assertEqual(Bin, Dec)
+ end,
+ lists:seq(1, ?NUM_TESTS)
+ ).
+
+decode_iolist_test() ->
+ lists:foreach(
+ fun(_) ->
+ IoList = shallow_b64_iolist(),
+ A = couch_decode_base64url(iolist_to_binary(IoList)),
+ B = b64url:decode(IoList),
+ ?assertEqual(A, B)
+ end,
+ lists:seq(1, ?NUM_TESTS)
+ ).
+
+decode_binary_error_test() ->
+ lists:foreach(
+ fun(_) ->
+ {ErrBin, BlockPos} = bad_binary(),
+ Dec = b64url:decode(ErrBin),
+ ?assertEqual({error, {bad_block, BlockPos}}, Dec)
+ end,
+ lists:seq(1, ?NUM_TESTS)
+ ).
+
+decode_bad_length_test() ->
+ lists:foreach(
+ fun(_) ->
+ Bin = bad_len_binary(),
+ ?assertError(badarg, b64url:decode(Bin))
+ end,
+ lists:seq(1, ?NUM_TESTS)
+ ).
+
+gen_binary() ->
+ % -1 to allow for 0 length
+ Length = rand:uniform(?MAX_SIZE) - 1,
+ crypto:strong_rand_bytes(Length).
+
+shallow_iolist() ->
+ to_iolist(gen_binary()).
+
+shallow_b64_iolist() ->
+ to_iolist(couch_encode_base64url(gen_binary())).
+
+bad_binary() ->
+ insert_error(gen_binary()).
+
+bad_len_binary() ->
+ make_bad_len(gen_binary()).
+
+to_iolist(<<>>) ->
+ case rand:uniform(2) of
+ 1 -> <<>>;
+ 2 -> [<<>>]
+ end;
+to_iolist(B) when is_binary(B), size(B) > 0 ->
+ S = rand:uniform(size(B)),
+ <<First:S/binary, Second/binary>> = B,
+ case rand:uniform(3) of
+ 1 ->
+ [to_iolist(First), Second];
+ 2 ->
+ [First, to_iolist(Second)];
+ 3 ->
+ [First, Second]
+ end.
+
+insert_error(B) when is_binary(B), size(B) < 2 ->
+ case rand:uniform(2) of
+ 1 -> {<<122, 255>>, 0};
+ 2 -> {<<122, 122, 255>>, 0}
+ end;
+insert_error(B) when is_binary(B) ->
+ B64 = couch_encode_base64url(B),
+ S = rand:uniform(size(B64) - 1),
+ <<First:S/binary, _:1/binary, Second/binary>> = B64,
+ {<<First:S/binary, 255, Second/binary>>, 4 * (S div 4)}.
+
+make_bad_len(Bin) when size(Bin) rem 4 == 1 ->
+ Bin;
+make_bad_len(Bin) when size(Bin) rem 4 == 2 ->
+ <<"AAA", Bin/binary>>;
+make_bad_len(Bin) when size(Bin) rem 4 == 3 ->
+ <<"AA", Bin/binary>>;
+make_bad_len(Bin) when size(Bin) rem 4 == 0 ->
+ <<"A", Bin/binary>>.
+
+% These functions are copy/pasted from couch_util to avoid
+% the direct dependency. The goal of this project is to replace
+% these in couch_util anyway so when that happens they'll only
+% exist here for these tests.
+
+couch_encode_base64url(Url) ->
+ Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")),
+ Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])),
+ iolist_to_binary(re:replace(Url2, "\\+", "-", [global])).
+
+couch_decode_base64url(Url64) ->
+ Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]),
+ Url2 = iolist_to_binary(
+ re:replace(iolist_to_binary(Url1), "_", "/", [global])
+ ),
+ Padding = list_to_binary(lists:duplicate((4 - size(Url2) rem 4) rem 4, $=)),
+ base64:decode(<<Url2/binary, Padding/binary>>).
diff --git a/src/b64url/test/benchmark.escript b/src/b64url/test/benchmark.escript
new file mode 100755
index 000000000..00a6f0dda
--- /dev/null
+++ b/src/b64url/test/benchmark.escript
@@ -0,0 +1,165 @@
+#!/usr/bin/env escript
+
+-mode(compile).
+
+
+-export([
+ encode/1,
+ decode/1,
+ run_worker/1
+]).
+
+
+-record(st, {
+ parent,
+ module,
+ workers,
+ minsize,
+ maxsize,
+ duration,
+ total_bytes
+}).
+
+
+main([Workers0, MinSize0, MaxSize0, Duration0]) ->
+ code:add_path("./ebin"),
+ code:add_path("../ebin"),
+ Workers = to_int(Workers0),
+ MinSize = to_int(MinSize0),
+ MaxSize = to_int(MaxSize0),
+ Duration = to_int(Duration0),
+ if Workers > 0 -> ok; true ->
+ die("Worker count must be positive~n")
+ end,
+ if MinSize > 0 -> ok; true ->
+ die("Minimum size must be positive.~n")
+ end,
+ if MaxSize > 0 -> ok; true ->
+ die("Maximum size must be positive.~n")
+ end,
+ if MinSize < MaxSize -> ok; true ->
+ die("Minimum size must be less than maximum size.~n")
+ end,
+ if Duration > 0 -> ok; true ->
+ die("Duration must be positive.~n")
+ end,
+ St = #st{
+ parent = self(),
+ workers = Workers,
+ minsize = MinSize,
+ maxsize = MaxSize,
+ duration = Duration
+ },
+ lists:foreach(fun(M) ->
+ run_test(St#st{module=M})
+ end, randomize([b64url, ?MODULE]));
+
+main(_) ->
+ Args = [escript:script_name()],
+ die("usage: ~s num_workers min_size max_size time_per_test~n", Args).
+
+
+run_test(St) ->
+ Workers = spawn_workers(St#st.workers, St),
+ start_workers(Workers),
+ Results = wait_for_workers(Workers),
+ report(St#st.module, St#st.duration, Results).
+
+
+start_workers(Pids) ->
+ lists:foreach(fun(P) ->
+ P ! start
+ end, Pids).
+
+
+wait_for_workers(Pids) ->
+ lists:map(fun(P) ->
+ receive
+ {P, TotalBytes} -> TotalBytes
+ end
+ end, Pids).
+
+
+report(Module, Duration, TotalByteList) ->
+ ModDesc = case Module of
+ ?MODULE -> "erl";
+ b64url -> "nif"
+ end,
+ TotalBytes = lists:sum(TotalByteList),
+ io:format("~s : ~14b bytes / ~3b seconds = ~14.2f bps~n", [
+ ModDesc, TotalBytes, Duration, TotalBytes / Duration]).
+
+
+spawn_workers(NumWorkers, St) ->
+ lists:map(fun(_) ->
+ spawn_link(?MODULE, run_worker, [St])
+ end, lists:seq(1, NumWorkers)).
+
+
+run_worker(St) ->
+ receive
+ start -> ok
+ end,
+ run_worker(St#st{total_bytes=0}, os:timestamp()).
+
+
+run_worker(St, Started) ->
+ HasRun = timer:now_diff(os:timestamp(), Started),
+ case HasRun div 1000000 > St#st.duration of
+ true ->
+ St#st.parent ! {self(), St#st.total_bytes};
+ false ->
+ NewSt = do_round_trip(St),
+ run_worker(NewSt, Started)
+ end.
+
+
+do_round_trip(St) ->
+ Size = St#st.minsize + rand:uniform(St#st.maxsize - St#st.minsize),
+ Data = crypto:strong_rand_bytes(Size),
+ Encoded = (St#st.module):encode(Data),
+ Data = (St#st.module):decode(Encoded),
+ St#st{total_bytes=St#st.total_bytes+Size}.
+
+
+encode(Url) ->
+ Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")),
+ Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])),
+ iolist_to_binary(re:replace(Url2, "\\+", "-", [global])).
+
+
+decode(Url64) ->
+ Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]),
+ Url2 = iolist_to_binary(
+ re:replace(iolist_to_binary(Url1), "_", "/", [global])
+ ),
+ Padding = list_to_binary(lists:duplicate((4 - size(Url2) rem 4) rem 4, $=)),
+ base64:decode(<<Url2/binary, Padding/binary>>).
+
+randomize(List) ->
+ List0 = [{rand:uniform(), L} || L <- List],
+ List1 = lists:sort(List0),
+ [L || {_, L} <- List1].
+
+
+to_int(Val) when is_integer(Val) ->
+ Val;
+to_int(Val) when is_binary(Val) ->
+ to_int(binary_to_list(Val));
+to_int(Val) when is_list(Val) ->
+ try
+ list_to_integer(Val)
+ catch _:_ ->
+ die("Invalid integer: ~w~n", [Val])
+ end;
+to_int(Val) ->
+ die("Invalid integer: ~w~n", [Val]).
+
+
+die(Message) ->
+ die(Message, []).
+
+die(Format, Args) ->
+ io:format(Format, Args),
+ init:stop().
+
diff --git a/src/ets_lru/.gitignore b/src/ets_lru/.gitignore
new file mode 100644
index 000000000..3164eee31
--- /dev/null
+++ b/src/ets_lru/.gitignore
@@ -0,0 +1,3 @@
+.rebar
+ebin/
+*~
diff --git a/src/ets_lru/LICENSE b/src/ets_lru/LICENSE
new file mode 100644
index 000000000..f6cd2bc80
--- /dev/null
+++ b/src/ets_lru/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/src/ets_lru/Makefile b/src/ets_lru/Makefile
new file mode 100644
index 000000000..058a7ecfc
--- /dev/null
+++ b/src/ets_lru/Makefile
@@ -0,0 +1,41 @@
+REBAR?=rebar
+
+
+.PHONY: all
+# target: all - Makes everything
+all: build
+
+
+.PHONY: build
+# target: build - Builds the project
+build:
+ $(REBAR) compile
+
+
+.PHONY: check
+# target: check - Checks if project builds and passes all the tests
+check: build eunit
+
+
+.PHONY: clean
+# target: clean - Removes build artifacts
+clean:
+ $(REBAR) clean
+
+
+.PHONY: distclean
+# target: distclean - Removes all unversioned files
+distclean: clean
+ git clean -fxd
+
+
+.PHONY: eunit
+# target: eunit - Runs eunit test suite
+eunit:
+ $(REBAR) eunit
+
+
+.PHONY: help
+# target: help - Prints this help
+help:
+ @egrep "^# target:" Makefile | sed -e 's/^# target: //g' | sort
diff --git a/src/ets_lru/src/ets_lru.app.src b/src/ets_lru/src/ets_lru.app.src
new file mode 100644
index 000000000..c81ce11bd
--- /dev/null
+++ b/src/ets_lru/src/ets_lru.app.src
@@ -0,0 +1,21 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+{application, ets_lru, [
+ {description, "ETS Base LRU Cache"},
+ {vsn, git},
+ {registered, []},
+ {applications, [
+ kernel,
+ stdlib
+ ]}
+]}.
diff --git a/src/ets_lru/src/ets_lru.erl b/src/ets_lru/src/ets_lru.erl
new file mode 100644
index 000000000..891aeeb57
--- /dev/null
+++ b/src/ets_lru/src/ets_lru.erl
@@ -0,0 +1,335 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(ets_lru).
+-behaviour(gen_server).
+-vsn(2).
+
+-export([
+ start_link/2,
+ stop/1,
+
+ insert/3,
+ lookup/2,
+ match/3,
+ match_object/3,
+ remove/2,
+ clear/1,
+
+ % Dirty functions read straight from
+ % the ETS tables which means there are
+ % race conditions with concurrent access.
+ lookup_d/2
+]).
+
+-export([
+ init/1,
+ terminate/2,
+
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+
+ code_change/3
+]).
+
+-define(DEFAULT_TIME_UNIT, millisecond).
+
+-type time_value() :: integer().
+-type strict_monotonic_time() :: {time_value(), integer()}.
+
+-record(entry, {
+ key :: term(),
+ val :: term(),
+ atime :: strict_monotonic_time(),
+ ctime :: strict_monotonic_time()
+}).
+
+-record(st, {
+ objects,
+ atimes,
+ ctimes,
+
+ max_objs :: non_neg_integer() | undefined,
+ max_size :: non_neg_integer() | undefined,
+ max_lifetime :: non_neg_integer() | undefined,
+ time_unit = ?DEFAULT_TIME_UNIT :: atom()
+}).
+
+start_link(Name, Options) when is_atom(Name) ->
+ gen_server:start_link({local, Name}, ?MODULE, {Name, Options}, []).
+
+stop(LRU) ->
+ gen_server:cast(LRU, stop).
+
+lookup(LRU, Key) ->
+ gen_server:call(LRU, {lookup, Key}).
+
+insert(LRU, Key, Val) ->
+ gen_server:call(LRU, {insert, Key, Val}).
+
+remove(LRU, Key) ->
+ gen_server:call(LRU, {remove, Key}).
+
+%% @doc match/3 provides an efficient way to retrieve parts of the
+%% keys and values without copying or requiring circumvention of the
+%% ets_lru API. The KeySpec and ValueSpec parameters are used as part
+%% of one larger match spec so keep in mind that all capturing
+%% placeholders will be aliased between the key and value parts.
+-spec match(atom() | pid(), term(), term()) -> [[any()]].
+match(LRU, KeySpec, ValueSpec) ->
+ gen_server:call(LRU, {match, KeySpec, ValueSpec}).
+
+%% @doc match_object/3 provides an efficient way to retrieve multiple
+%% values using match conditions. The KeySpec and ValueSpec parameters
+%% are used as part of one larger match spec so keep in mind that all
+%% capturing placeholders will be aliased between the key and value
+%% parts.
+-spec match_object(atom() | pid(), term(), term()) -> [any()].
+match_object(Name, KeySpec, ValueSpec) when is_atom(Name) ->
+ Pattern = #entry{key = KeySpec, val = ValueSpec, _ = '_'},
+ Entries = ets:match_object(obj_table(Name), Pattern),
+ lists:map(
+ fun(#entry{key = Key, val = Val}) ->
+ gen_server:cast(Name, {accessed, Key}),
+ Val
+ end,
+ Entries
+ );
+match_object(LRU, KeySpec, ValueSpec) ->
+ gen_server:call(LRU, {match_object, KeySpec, ValueSpec}).
+
+clear(LRU) ->
+ gen_server:call(LRU, clear).
+
+lookup_d(Name, Key) when is_atom(Name) ->
+ case ets:lookup(obj_table(Name), Key) of
+ [#entry{val = Val}] ->
+ gen_server:cast(Name, {accessed, Key}),
+ {ok, Val};
+ [] ->
+ not_found
+ end.
+
+init({Name, Options}) ->
+ St = set_options(#st{}, Options),
+ ObjOpts = [set, named_table, protected, {keypos, #entry.key}],
+ TimeOpts = [ordered_set, named_table, protected],
+
+ {ok, St#st{
+ objects = ets:new(obj_table(Name), ObjOpts),
+ atimes = ets:new(at_table(Name), TimeOpts),
+ ctimes = ets:new(ct_table(Name), TimeOpts)
+ }}.
+
+terminate(_Reason, St) ->
+ true = ets:delete(St#st.objects),
+ true = ets:delete(St#st.atimes),
+ true = ets:delete(St#st.ctimes),
+ ok.
+
+handle_call({lookup, Key}, _From, St) ->
+ Reply =
+ case ets:lookup(St#st.objects, Key) of
+ [#entry{val = Val} | _] ->
+ accessed(Key, St),
+ {ok, Val};
+ [] ->
+ not_found
+ end,
+ {reply, Reply, St, 0};
+handle_call({match_object, KeySpec, ValueSpec}, _From, St) ->
+ Pattern = #entry{key = KeySpec, val = ValueSpec, _ = '_'},
+ Entries = ets:match_object(St#st.objects, Pattern),
+ Values = lists:map(
+ fun(#entry{key = Key, val = Val}) ->
+ accessed(Key, St),
+ Val
+ end,
+ Entries
+ ),
+ {reply, Values, St, 0};
+handle_call({match, KeySpec, ValueSpec}, _From, St) ->
+ Pattern = #entry{key = KeySpec, val = ValueSpec, _ = '_'},
+ Values = ets:match(St#st.objects, Pattern),
+ {reply, Values, St, 0};
+handle_call({insert, Key, Val}, _From, St) ->
+ NewATime = strict_monotonic_time(St#st.time_unit),
+ Pattern = #entry{key = Key, atime = '$1', _ = '_'},
+ case ets:match(St#st.objects, Pattern) of
+ [[ATime]] ->
+ Update = {#entry.val, Val},
+ true = ets:update_element(St#st.objects, Key, Update),
+ true = ets:delete(St#st.atimes, ATime),
+ true = ets:insert(St#st.atimes, {NewATime, Key});
+ [] ->
+ Entry = #entry{key = Key, val = Val, atime = NewATime, ctime = NewATime},
+ true = ets:insert(St#st.objects, Entry),
+ true = ets:insert(St#st.atimes, {NewATime, Key}),
+ true = ets:insert(St#st.ctimes, {NewATime, Key})
+ end,
+ {reply, ok, St, 0};
+handle_call({remove, Key}, _From, St) ->
+ Pattern = #entry{key = Key, atime = '$1', ctime = '$2', _ = '_'},
+ Reply =
+ case ets:match(St#st.objects, Pattern) of
+ [[ATime, CTime]] ->
+ true = ets:delete(St#st.objects, Key),
+ true = ets:delete(St#st.atimes, ATime),
+ true = ets:delete(St#st.ctimes, CTime),
+ ok;
+ [] ->
+ not_found
+ end,
+ {reply, Reply, St, 0};
+handle_call(clear, _From, St) ->
+ true = ets:delete_all_objects(St#st.objects),
+ true = ets:delete_all_objects(St#st.atimes),
+ true = ets:delete_all_objects(St#st.ctimes),
+ % No need to timeout here and evict cache
+ % entries because its now empty.
+ {reply, ok, St};
+handle_call(Msg, _From, St) ->
+ {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}.
+
+handle_cast({accessed, Key}, St) ->
+ accessed(Key, St),
+ {noreply, St, 0};
+handle_cast(stop, St) ->
+ {stop, normal, St};
+handle_cast(Msg, St) ->
+ {stop, {invalid_cast, Msg}, St}.
+
+handle_info(timeout, St) ->
+ trim(St),
+ {noreply, St, next_timeout(St)};
+handle_info(Msg, St) ->
+ {stop, {invalid_info, Msg}, St}.
+
+code_change(_OldVsn, St, _Extra) ->
+ {ok, St}.
+
+accessed(Key, St) ->
+ Pattern = #entry{key = Key, atime = '$1', _ = '_'},
+ case ets:match(St#st.objects, Pattern) of
+ [[ATime]] ->
+ NewATime = strict_monotonic_time(St#st.time_unit),
+ Update = {#entry.atime, NewATime},
+ true = ets:update_element(St#st.objects, Key, Update),
+ true = ets:delete(St#st.atimes, ATime),
+ true = ets:insert(St#st.atimes, {NewATime, Key}),
+ ok;
+ [] ->
+ ok
+ end.
+
+trim(St) ->
+ trim_count(St),
+ trim_size(St),
+ trim_lifetime(St).
+
+trim_count(#st{max_objs = undefined}) ->
+ ok;
+trim_count(#st{max_objs = Max} = St) ->
+ case ets:info(St#st.objects, size) > Max of
+ true ->
+ drop_lru(St, fun trim_count/1);
+ false ->
+ ok
+ end.
+
+trim_size(#st{max_size = undefined}) ->
+ ok;
+trim_size(#st{max_size = Max} = St) ->
+ case ets:info(St#st.objects, memory) > Max of
+ true ->
+ drop_lru(St, fun trim_size/1);
+ false ->
+ ok
+ end.
+
+trim_lifetime(#st{max_lifetime = undefined}) ->
+ ok;
+trim_lifetime(#st{max_lifetime = Max} = St) ->
+ Now = erlang:monotonic_time(St#st.time_unit),
+ case ets:first(St#st.ctimes) of
+ '$end_of_table' ->
+ ok;
+ CTime = {Time, _} ->
+ case Now - Time > Max of
+ true ->
+ [{CTime, Key}] = ets:lookup(St#st.ctimes, CTime),
+ Pattern = #entry{key = Key, atime = '$1', _ = '_'},
+ [[ATime]] = ets:match(St#st.objects, Pattern),
+ true = ets:delete(St#st.objects, Key),
+ true = ets:delete(St#st.atimes, ATime),
+ true = ets:delete(St#st.ctimes, CTime),
+ trim_lifetime(St);
+ false ->
+ ok
+ end
+ end.
+
+drop_lru(St, Continue) ->
+ case ets:first(St#st.atimes) of
+ '$end_of_table' ->
+ empty;
+ ATime ->
+ [{ATime, Key}] = ets:lookup(St#st.atimes, ATime),
+ Pattern = #entry{key = Key, ctime = '$1', _ = '_'},
+ [[CTime]] = ets:match(St#st.objects, Pattern),
+ true = ets:delete(St#st.objects, Key),
+ true = ets:delete(St#st.atimes, ATime),
+ true = ets:delete(St#st.ctimes, CTime),
+ Continue(St)
+ end.
+
+next_timeout(#st{max_lifetime = undefined}) ->
+ infinity;
+next_timeout(St) ->
+ case ets:first(St#st.ctimes) of
+ '$end_of_table' ->
+ infinity;
+ {Time, _} ->
+ Now = erlang:monotonic_time(St#st.time_unit),
+ TimeDiff = Now - Time,
+ erlang:max(St#st.max_lifetime - TimeDiff, 0)
+ end.
+
+set_options(St, []) ->
+ St;
+set_options(St, [{max_objects, N} | Rest]) when is_integer(N), N >= 0 ->
+ set_options(St#st{max_objs = N}, Rest);
+set_options(St, [{max_size, N} | Rest]) when is_integer(N), N >= 0 ->
+ set_options(St#st{max_size = N}, Rest);
+set_options(St, [{max_lifetime, N} | Rest]) when is_integer(N), N >= 0 ->
+ set_options(St#st{max_lifetime = N}, Rest);
+set_options(St, [{time_unit, T} | Rest]) when is_atom(T) ->
+ set_options(St#st{time_unit = T}, Rest);
+set_options(_, [Opt | _]) ->
+ throw({invalid_option, Opt}).
+
+obj_table(Name) ->
+ table_name(Name, "_objects").
+
+at_table(Name) ->
+ table_name(Name, "_atimes").
+
+ct_table(Name) ->
+ table_name(Name, "_ctimes").
+
+table_name(Name, Ext) ->
+ list_to_atom(atom_to_list(Name) ++ Ext).
+
+-spec strict_monotonic_time(atom()) -> strict_monotonic_time().
+strict_monotonic_time(TimeUnit) ->
+ {erlang:monotonic_time(TimeUnit), erlang:unique_integer([monotonic])}.
diff --git a/src/ets_lru/test/ets_lru_test.erl b/src/ets_lru/test/ets_lru_test.erl
new file mode 100644
index 000000000..5dd193f8d
--- /dev/null
+++ b/src/ets_lru/test/ets_lru_test.erl
@@ -0,0 +1,339 @@
+-module(ets_lru_test).
+
+-include_lib("eunit/include/eunit.hrl").
+
+lifecyle_test_() ->
+ {
+ "Test LRU lifecycle",
+ {setup, fun() -> ets_lru:start_link(?MODULE, []) end,
+ fun({ok, LRU}) -> is_process_alive(LRU) == false end, fun({ok, LRU}) ->
+ [
+ {
+ "ets_lru:start_link/2 returned an LRU",
+ ?_assert(is_pid(LRU))
+ },
+ {
+ "Destroyed the LRU ok",
+ ?_assertEqual(ok, ets_lru:stop(LRU))
+ }
+ ]
+ end}
+ }.
+
+table_names_test_() ->
+ {
+ "Test tables names",
+ {setup, fun() -> ets_lru:start_link(foo, []) end, fun({ok, LRU}) ->
+ [
+ {
+ "foo_objects exists",
+ ?_assertEqual(0, ets:info(foo_objects, size))
+ },
+ {
+ "foo_atimes exists",
+ ?_assertEqual(0, ets:info(foo_atimes, size))
+ },
+ {
+ "foo_ctimes exists",
+ ?_assertEqual(0, ets:info(foo_ctimes, size))
+ },
+ {
+ "LRU stopped normally",
+ ?_test(begin
+ Reason = stop_lru({ok, LRU}),
+ ?assertEqual(normal, Reason)
+ end)
+ },
+ {
+ "foo_objects doesn't exists",
+ ?_assertEqual(undefined, ets:info(foo_objects, size))
+ },
+ {
+ "foo_atimes doesn't exists",
+ ?_assertEqual(undefined, ets:info(foo_atimes, size))
+ },
+ {
+ "foo_ctimes doesn't exists",
+ ?_assertEqual(undefined, ets:info(foo_ctimes, size))
+ }
+ ]
+ end}
+ }.
+
+basic_behavior_test_() ->
+ {
+ "Test basic behavior",
+ {foreach,
+ fun() ->
+ {ok, LRU} = ets_lru:start_link(test_lru, []),
+ ok = ets_lru:insert(LRU, foo, bar),
+ {ok, LRU}
+ end,
+ fun stop_lru/1, [
+ fun({ok, LRU}) ->
+ {
+ "Lookup returned the inserted value",
+ ?_assertEqual({ok, bar}, ets_lru:lookup(LRU, foo))
+ }
+ end,
+ fun({ok, _LRU}) ->
+ {
+ "Dirty lookup returned the inserted value",
+ ?_assertEqual(
+ {ok, bar},
+ ets_lru:lookup_d(test_lru, foo)
+ )
+ }
+ end,
+ fun({ok, LRU}) ->
+ [
+ {
+ "Lookup returned the inserted value",
+ ?_assertEqual({ok, bar}, ets_lru:lookup(LRU, foo))
+ },
+ {
+ "Insert new value",
+ ?_assertEqual(ok, ets_lru:insert(LRU, foo, bam))
+ },
+ {
+ "Lookup returned the newly inserted value",
+ ?_assertEqual({ok, bam}, ets_lru:lookup(LRU, foo))
+ }
+ ]
+ end,
+ fun({ok, LRU}) ->
+ [
+ {
+ "Lookup returned the inserted value",
+ ?_assertEqual({ok, bar}, ets_lru:lookup(LRU, foo))
+ },
+ {
+ "Remove value",
+ ?_assertEqual(ok, ets_lru:remove(LRU, foo))
+ },
+ {
+ "Lookup returned not_found for removed value",
+ ?_assertEqual(not_found, ets_lru:lookup(LRU, foo))
+ }
+ ]
+ end,
+ fun({ok, LRU}) ->
+ [
+ {
+ "Lookup returned the inserted value",
+ ?_assertEqual({ok, bar}, ets_lru:lookup(LRU, foo))
+ },
+ {
+ "Clear LRU",
+ ?_assertEqual(ok, ets_lru:clear(LRU))
+ },
+ {
+ "Lookup returned not_found after a clear",
+ ?_assertEqual(not_found, ets_lru:lookup(LRU, foo))
+ }
+ ]
+ end
+ ]}
+ }.
+
+lru_good_options_test_() ->
+ {
+ "Test good LRU options",
+ {foreachx,
+ fun(Opts) ->
+ process_flag(trap_exit, true),
+ ets_lru:start_link(?MODULE, Opts)
+ end,
+ fun(_, Cfg) -> stop_lru(Cfg) end, [
+ {[{max_objects, 1}], fun test_good_opts/2},
+ {[{max_objects, 5}], fun test_good_opts/2},
+ {[{max_objects, 923928342098203942}], fun test_good_opts/2},
+ {[{max_size, 1}], fun test_good_opts/2},
+ {[{max_size, 5}], fun test_good_opts/2},
+ {[{max_size, 2342923423942309423094}], fun test_good_opts/2},
+ {[{max_lifetime, 1}], fun test_good_opts/2},
+ {[{max_lifetime, 5}], fun test_good_opts/2},
+ {[{max_lifetime, 1244209909180928348}], fun test_good_opts/2}
+ ]}
+ }.
+
+lru_bad_options_test_() ->
+ {
+ "Test bad LRU options",
+ {foreachx,
+ fun(Opts) ->
+ process_flag(trap_exit, true),
+ ets_lru:start_link(?MODULE, Opts)
+ end,
+ fun(_, _) ->
+ case whereis(?MODULE) of
+ Pid when is_pid(Pid) ->
+ stop_lru({ok, Pid});
+ undefined ->
+ ok
+ end
+ end,
+ [
+ {[{bingo, bango}], fun test_bad_opts/2},
+ {[12], fun test_bad_opts/2},
+ {[true], fun test_bad_opts/2}
+ ]}
+ }.
+
+lru_limits_test_() ->
+ {
+ "Test LRU limits",
+ {foreachx, fun(Opts) -> ets_lru:start_link(lru, Opts) end, fun(_, Cfg) -> stop_lru(Cfg) end,
+ [
+ {[{max_objects, 25}], fun test_limits/2},
+ {[{max_size, 1024}], fun test_limits/2},
+ {[{max_lifetime, 100}], fun test_limits/2}
+ ]}
+ }.
+
+lru_match_test_() ->
+ {
+ "Test match functions",
+ {foreach, fun() -> ets_lru:start_link(test_lru, []) end, fun stop_lru/1, [
+ fun({ok, LRU}) ->
+ {
+ "Empty match",
+ ?_assertEqual([], ets_lru:match(LRU, a, '$1'))
+ }
+ end,
+ fun({ok, LRU}) ->
+ ets_lru:insert(LRU, b, {x, y}),
+ {
+ "Single match",
+ ?_assertEqual(
+ [[x, y]],
+ ets_lru:match(LRU, b, {'$1', '$2'})
+ )
+ }
+ end,
+ fun({ok, LRU}) ->
+ ets_lru:insert(LRU, boston, {state, "MA"}),
+ ets_lru:insert(LRU, new_york, {state, "NY"}),
+ Values = ets_lru:match(LRU, '_', {state, '$1'}),
+ {
+ "Multiple match",
+ ?_assertEqual([["MA"], ["NY"]], lists:sort(Values))
+ }
+ end,
+ fun({ok, LRU}) ->
+ {
+ "Empty match_object",
+ ?_assertEqual([], ets_lru:match_object(LRU, a, '$1'))
+ }
+ end,
+ fun({ok, LRU}) ->
+ ets_lru:insert(LRU, ans, 42),
+ [
+ {
+ "Single match_object (registered)",
+ ?_assertEqual(
+ [42],
+ ets_lru:match_object(test_lru, ans, '$1')
+ )
+ },
+ {
+ "Single match_object (pid)",
+ ?_assertEqual(
+ [42],
+ ets_lru:match_object(LRU, ans, '$1')
+ )
+ }
+ ]
+ end,
+ fun({ok, LRU}) ->
+ ets_lru:insert(LRU, {color, blue}, a),
+ ets_lru:insert(LRU, {color, red}, b),
+ Values = ets_lru:match_object(LRU, {color, '_'}, '_'),
+ {
+ "Multiple match_object",
+ ?_assertEqual(lists:sort(Values), [a, b])
+ }
+ end
+ ]}
+ }.
+
+test_good_opts(Opts, {ok, LRU}) ->
+ Msg = io_lib:format("LRU created ok with options: ~w", [Opts]),
+ {lists:flatten(Msg), ?_assert(is_pid(LRU))};
+test_good_opts(Opts, ErrorMsg) ->
+ Msg = io_lib:format("LRU created ok with options: ~w", [Opts]),
+ {lists:flatten(Msg), ?_assertEqual(ok, ErrorMsg)}.
+
+test_bad_opts([Opts], {error, {bad_return_value, {invalid_option, Opts2}}}) ->
+ Msg = io_lib:format("LRU failed with bad options: ~w", [Opts]),
+ {lists:flatten(Msg), ?_assertEqual(Opts, Opts2)}.
+
+test_limits([{max_objects, N}], {ok, LRU}) ->
+ {
+ "Max object count ok",
+ ?_assert(insert_kvs(size, LRU, 100 * N, N))
+ };
+test_limits([{max_size, N}], {ok, LRU}) ->
+ {
+ "Max size ok",
+ ?_assert(insert_kvs(memory, LRU, 10 * N, N))
+ };
+test_limits([{max_lifetime, N}], {ok, LRU}) ->
+ [
+ {
+ "Expire leaves new entries",
+ ?_test(begin
+ ets_lru:insert(LRU, foo, bar),
+ ?assertEqual({ok, bar}, ets_lru:lookup(LRU, foo))
+ end)
+ },
+ {
+ "Entry was expired",
+ ?_test(begin
+ timer:sleep(round(N * 1.5)),
+ ?assertEqual(not_found, ets_lru:lookup(LRU, foo))
+ end)
+ }
+ ].
+
+insert_kvs(_, _, 0, _) ->
+ true;
+insert_kvs(Info, LRU, Count, Limit) ->
+ ets_lru:insert(LRU, Count, 1.5234),
+ case ets:info(lru_objects, Info) > Limit of
+ true ->
+ % Retry again as eviction statistics
+ % returned by ets:info() can be delayed.
+ timer:sleep(1),
+ case ets:info(lru_objects, Info) > Limit of
+ true ->
+ erlang:error(exceeded_limit);
+ false ->
+ true
+ end;
+ false ->
+ true
+ end,
+ insert_kvs(Info, LRU, Count - 1, Limit).
+
+stop_lru({ok, LRU}) ->
+ Ref = erlang:monitor(process, LRU),
+ ets_lru:stop(LRU),
+ receive
+ {'DOWN', Ref, process, LRU, Reason} -> Reason
+ end;
+stop_lru({error, _}) ->
+ ok.
+
+valid_parameterized_time_unit_test() ->
+ Opts = [{time_unit, microsecond}],
+ {ok, LRU} = ets_lru:start_link(lru_test, Opts),
+ ?assert(is_process_alive(LRU)),
+ ok = ets_lru:insert(LRU, foo, bar),
+ ?assertEqual({ok, bar}, ets_lru:lookup(LRU, foo)),
+ ?assertEqual(ok, ets_lru:stop(LRU)).
+
+invalid_parameterized_time_unit_test() ->
+ Opts = [{time_unit, invalid}],
+ {ok, LRU} = ets_lru:start_link(lru_test, Opts),
+ ?assertExit(_, ets_lru:insert(LRU, foo, bar)).
diff --git a/src/khash/.gitignore b/src/khash/.gitignore
new file mode 100644
index 000000000..a14ed6b30
--- /dev/null
+++ b/src/khash/.gitignore
@@ -0,0 +1,10 @@
+/c_src/hash.d
+/c_src/khash.d
+c_src/*.o
+ebin/
+test/*.beam
+.eunit
+.rebar
+priv/*.so
+vc*.pdb
+compile_commands.json
diff --git a/src/khash/LICENSE b/src/khash/LICENSE
new file mode 100644
index 000000000..873b78bd3
--- /dev/null
+++ b/src/khash/LICENSE
@@ -0,0 +1,76 @@
+khash
+=====
+
+Files: c_src/khash.c src/* test/*.t test/util.erl test/gen_term.erl
+
+Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+
+Kazlib - Hash Table Data Type
+=============================
+
+Files: c_src/hash.*
+
+Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net>
+
+Free Software License:
+
+All rights are reserved by the author, with the following exceptions:
+Permission is granted to freely reproduce and distribute this software,
+possibly in exchange for a fee, provided that this copyright notice appears
+intact. Permission is also granted to adapt this software to produce
+derivative works, as long as the modified versions carry this copyright
+notice and additional notices stating that the work has been modified.
+This source code may be translated into executable form and incorporated
+into proprietary software; there is no requirement for such software to
+contain a copyright notice related to this source.
+
+
+Etap
+====
+
+Files: test/etap.erl
+
+Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net>
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
diff --git a/src/khash/Makefile b/src/khash/Makefile
new file mode 100644
index 000000000..ccc4a8ab0
--- /dev/null
+++ b/src/khash/Makefile
@@ -0,0 +1,44 @@
+REBAR?=rebar
+
+
+.PHONY: all
+# target: all - Makes everything
+all: build
+
+
+.PHONY: build
+# target: build - Builds the project
+build:
+ $(REBAR) compile
+
+
+.PHONY: check
+# target: check - Checks if project builds and passes all the tests
+check: build eunit
+
+
+.PHONY: clean
+# target: clean - Removes build artifacts
+clean:
+ $(REBAR) clean
+ rm -f test/*.beam
+
+
+.PHONY: distclean
+# target: distclean - Removes all unversioned files
+distclean: clean
+ git clean -fxd
+
+
+.PHONY: eunit
+# target: eunit - Runs eunit test suite
+eunit:
+ $(REBAR) eunit
+
+
+.PHONY: help
+# target: help - Prints this help
+help:
+ @egrep "^# target:" Makefile | sed -e 's/^# target: //g' | sort
+
+
diff --git a/src/khash/README.md b/src/khash/README.md
new file mode 100644
index 000000000..c04d00829
--- /dev/null
+++ b/src/khash/README.md
@@ -0,0 +1,4 @@
+khash
+=====
+
+This is a basic NIF wrapper around Kazlib's hash data structure.
diff --git a/src/khash/c_src/hash.c b/src/khash/c_src/hash.c
new file mode 100644
index 000000000..e3da0da20
--- /dev/null
+++ b/src/khash/c_src/hash.c
@@ -0,0 +1,843 @@
+/*
+ * Hash Table Data Type
+ * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net>
+ *
+ * Free Software License:
+ *
+ * All rights are reserved by the author, with the following exceptions:
+ * Permission is granted to freely reproduce and distribute this software,
+ * possibly in exchange for a fee, provided that this copyright notice appears
+ * intact. Permission is also granted to adapt this software to produce
+ * derivative works, as long as the modified versions carry this copyright
+ * notice and additional notices stating that the work has been modified.
+ * This source code may be translated into executable form and incorporated
+ * into proprietary software; there is no requirement for such software to
+ * contain a copyright notice related to this source.
+ *
+ * $Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $
+ * $Name: kazlib_1_20 $
+ */
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <assert.h>
+#include <string.h>
+#define HASH_IMPLEMENTATION
+#include "hash.h"
+
+#ifdef KAZLIB_RCSID
+static const char rcsid[] = "$Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $";
+#endif
+
+#define INIT_BITS 6
+#define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */
+#define INIT_MASK ((INIT_SIZE) - 1)
+
+#define next hash_next
+#define key hash_key
+#define data hash_data
+#define hkey hash_hkey
+
+#define table hash_table
+#define nchains hash_nchains
+#define nodecount hash_nodecount
+#define maxcount hash_maxcount
+#define highmark hash_highmark
+#define lowmark hash_lowmark
+#define compare hash_compare
+#define function hash_function
+#define allocnode hash_allocnode
+#define freenode hash_freenode
+#define context hash_context
+#define mask hash_mask
+#define dynamic hash_dynamic
+
+#define table hash_table
+#define chain hash_chain
+
+static hnode_t *kl_hnode_alloc(void *context);
+static void kl_hnode_free(hnode_t *node, void *context);
+static hash_val_t hash_fun_default(const void *key);
+static int hash_comp_default(const void *key1, const void *key2);
+
+int hash_val_t_bit;
+
+/*
+ * Compute the number of bits in the hash_val_t type. We know that hash_val_t
+ * is an unsigned integral type. Thus the highest value it can hold is a
+ * Mersenne number (power of two, less one). We initialize a hash_val_t
+ * object with this value and then shift bits out one by one while counting.
+ * Notes:
+ * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power
+ * of two. This means that its binary representation consists of all one
+ * bits, and hence ``val'' is initialized to all one bits.
+ * 2. While bits remain in val, we increment the bit count and shift it to the
+ * right, replacing the topmost bit by zero.
+ */
+
+static void compute_bits(void)
+{
+ hash_val_t val = HASH_VAL_T_MAX; /* 1 */
+ int bits = 0;
+
+ while (val) { /* 2 */
+ bits++;
+ val >>= 1;
+ }
+
+ hash_val_t_bit = bits;
+}
+
+/*
+ * Verify whether the given argument is a power of two.
+ */
+
+static int is_power_of_two(hash_val_t arg)
+{
+ if (arg == 0)
+ return 0;
+ while ((arg & 1) == 0)
+ arg >>= 1;
+ return (arg == 1);
+}
+
+/*
+ * Compute a shift amount from a given table size
+ */
+
+static hash_val_t compute_mask(hashcount_t size)
+{
+ assert (is_power_of_two(size));
+ assert (size >= 2);
+
+ return size - 1;
+}
+
+/*
+ * Initialize the table of pointers to null.
+ */
+
+static void clear_table(hash_t *hash)
+{
+ hash_val_t i;
+
+ for (i = 0; i < hash->nchains; i++)
+ hash->table[i] = NULL;
+}
+
+/*
+ * Double the size of a dynamic table. This works as follows. Each chain splits
+ * into two adjacent chains. The shift amount increases by one, exposing an
+ * additional bit of each hashed key. For each node in the original chain, the
+ * value of this newly exposed bit will decide which of the two new chains will
+ * receive the node: if the bit is 1, the chain with the higher index will have
+ * the node, otherwise the lower chain will receive the node. In this manner,
+ * the hash table will continue to function exactly as before without having to
+ * rehash any of the keys.
+ * Notes:
+ * 1. Overflow check.
+ * 2. The new number of chains is twice the old number of chains.
+ * 3. The new mask is one bit wider than the previous, revealing a
+ * new bit in all hashed keys.
+ * 4. Allocate a new table of chain pointers that is twice as large as the
+ * previous one.
+ * 5. If the reallocation was successful, we perform the rest of the growth
+ * algorithm, otherwise we do nothing.
+ * 6. The exposed_bit variable holds a mask with which each hashed key can be
+ * AND-ed to test the value of its newly exposed bit.
+ * 7. Now loop over each chain in the table and sort its nodes into two
+ * chains based on the value of each node's newly exposed hash bit.
+ * 8. The low chain replaces the current chain. The high chain goes
+ * into the corresponding sister chain in the upper half of the table.
+ * 9. We have finished dealing with the chains and nodes. We now update
+ * the various bookeeping fields of the hash structure.
+ */
+
+static void grow_table(hash_t *hash)
+{
+ hnode_t **newtable;
+
+ assert (2 * hash->nchains > hash->nchains); /* 1 */
+
+ newtable = realloc(hash->table,
+ sizeof *newtable * hash->nchains * 2); /* 4 */
+
+ if (newtable) { /* 5 */
+ hash_val_t mask = (hash->mask << 1) | 1; /* 3 */
+ hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */
+ hash_val_t chain;
+
+ assert (mask != hash->mask);
+
+ for (chain = 0; chain < hash->nchains; chain++) { /* 7 */
+ hnode_t *low_chain = 0, *high_chain = 0, *hptr, *next;
+
+ for (hptr = newtable[chain]; hptr != 0; hptr = next) {
+ next = hptr->next;
+
+ if (hptr->hkey & exposed_bit) {
+ hptr->next = high_chain;
+ high_chain = hptr;
+ } else {
+ hptr->next = low_chain;
+ low_chain = hptr;
+ }
+ }
+
+ newtable[chain] = low_chain; /* 8 */
+ newtable[chain + hash->nchains] = high_chain;
+ }
+
+ hash->table = newtable; /* 9 */
+ hash->mask = mask;
+ hash->nchains *= 2;
+ hash->lowmark *= 2;
+ hash->highmark *= 2;
+ }
+ assert (kl_hash_verify(hash));
+}
+
+/*
+ * Cut a table size in half. This is done by folding together adjacent chains
+ * and populating the lower half of the table with these chains. The chains are
+ * simply spliced together. Once this is done, the whole table is reallocated
+ * to a smaller object.
+ * Notes:
+ * 1. It is illegal to have a hash table with one slot. This would mean that
+ * hash->shift is equal to hash_val_t_bit, an illegal shift value.
+ * Also, other things could go wrong, such as hash->lowmark becoming zero.
+ * 2. Looping over each pair of sister chains, the low_chain is set to
+ * point to the head node of the chain in the lower half of the table,
+ * and high_chain points to the head node of the sister in the upper half.
+ * 3. The intent here is to compute a pointer to the last node of the
+ * lower chain into the low_tail variable. If this chain is empty,
+ * low_tail ends up with a null value.
+ * 4. If the lower chain is not empty, we simply tack the upper chain onto it.
+ * If the upper chain is a null pointer, nothing happens.
+ * 5. Otherwise if the lower chain is empty but the upper one is not,
+ * If the low chain is empty, but the high chain is not, then the
+ * high chain is simply transferred to the lower half of the table.
+ * 6. Otherwise if both chains are empty, there is nothing to do.
+ * 7. All the chain pointers are in the lower half of the table now, so
+ * we reallocate it to a smaller object. This, of course, invalidates
+ * all pointer-to-pointers which reference into the table from the
+ * first node of each chain.
+ * 8. Though it's unlikely, the reallocation may fail. In this case we
+ * pretend that the table _was_ reallocated to a smaller object.
+ * 9. Finally, update the various table parameters to reflect the new size.
+ */
+
+static void shrink_table(hash_t *hash)
+{
+ hash_val_t chain, nchains;
+ hnode_t **newtable, *low_tail, *low_chain, *high_chain;
+
+ assert (hash->nchains >= 2); /* 1 */
+ nchains = hash->nchains / 2;
+
+ for (chain = 0; chain < nchains; chain++) {
+ low_chain = hash->table[chain]; /* 2 */
+ high_chain = hash->table[chain + nchains];
+ for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next)
+ ; /* 3 */
+ if (low_chain != 0) /* 4 */
+ low_tail->next = high_chain;
+ else if (high_chain != 0) /* 5 */
+ hash->table[chain] = high_chain;
+ else
+ assert (hash->table[chain] == NULL); /* 6 */
+ }
+ newtable = realloc(hash->table,
+ sizeof *newtable * nchains); /* 7 */
+ if (newtable) /* 8 */
+ hash->table = newtable;
+ hash->mask >>= 1; /* 9 */
+ hash->nchains = nchains;
+ hash->lowmark /= 2;
+ hash->highmark /= 2;
+ assert (kl_hash_verify(hash));
+}
+
+
+/*
+ * Create a dynamic hash table. Both the hash table structure and the table
+ * itself are dynamically allocated. Furthermore, the table is extendible in
+ * that it will automatically grow as its load factor increases beyond a
+ * certain threshold.
+ * Notes:
+ * 1. If the number of bits in the hash_val_t type has not been computed yet,
+ * we do so here, because this is likely to be the first function that the
+ * user calls.
+ * 2. Allocate a hash table control structure.
+ * 3. If a hash table control structure is successfully allocated, we
+ * proceed to initialize it. Otherwise we return a null pointer.
+ * 4. We try to allocate the table of hash chains.
+ * 5. If we were able to allocate the hash chain table, we can finish
+ * initializing the hash structure and the table. Otherwise, we must
+ * backtrack by freeing the hash structure.
+ * 6. INIT_SIZE should be a power of two. The high and low marks are always set
+ * to be twice the table size and half the table size respectively. When the
+ * number of nodes in the table grows beyond the high size (beyond load
+ * factor 2), it will double in size to cut the load factor down to about
+ * about 1. If the table shrinks down to or beneath load factor 0.5,
+ * it will shrink, bringing the load up to about 1. However, the table
+ * will never shrink beneath INIT_SIZE even if it's emptied.
+ * 7. This indicates that the table is dynamically allocated and dynamically
+ * resized on the fly. A table that has this value set to zero is
+ * assumed to be statically allocated and will not be resized.
+ * 8. The table of chains must be properly reset to all null pointers.
+ */
+
+hash_t *kl_hash_create(hashcount_t maxcount, hash_comp_t compfun,
+ hash_fun_t hashfun)
+{
+ hash_t *hash;
+
+ if (hash_val_t_bit == 0) /* 1 */
+ compute_bits();
+
+ hash = malloc(sizeof *hash); /* 2 */
+
+ if (hash) { /* 3 */
+ hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */
+ if (hash->table) { /* 5 */
+ hash->nchains = INIT_SIZE; /* 6 */
+ hash->highmark = INIT_SIZE * 2;
+ hash->lowmark = INIT_SIZE / 2;
+ hash->nodecount = 0;
+ hash->maxcount = maxcount;
+ hash->compare = compfun ? compfun : hash_comp_default;
+ hash->function = hashfun ? hashfun : hash_fun_default;
+ hash->allocnode = kl_hnode_alloc;
+ hash->freenode = kl_hnode_free;
+ hash->context = NULL;
+ hash->mask = INIT_MASK;
+ hash->dynamic = 1; /* 7 */
+ clear_table(hash); /* 8 */
+ assert (kl_hash_verify(hash));
+ return hash;
+ }
+ free(hash);
+ }
+
+ return NULL;
+}
+
+/*
+ * Select a different set of node allocator routines.
+ */
+
+void kl_hash_set_allocator(hash_t *hash, hnode_alloc_t al,
+ hnode_free_t fr, void *context)
+{
+ assert (kl_hash_count(hash) == 0);
+ assert ((al == 0 && fr == 0) || (al != 0 && fr != 0));
+
+ hash->allocnode = al ? al : kl_hnode_alloc;
+ hash->freenode = fr ? fr : kl_hnode_free;
+ hash->context = context;
+}
+
+/*
+ * Free every node in the hash using the hash->freenode() function pointer, and
+ * cause the hash to become empty.
+ */
+
+void kl_hash_free_nodes(hash_t *hash)
+{
+ hscan_t hs;
+ hnode_t *node;
+ kl_hash_scan_begin(&hs, hash);
+ while ((node = kl_hash_scan_next(&hs))) {
+ kl_hash_scan_delete(hash, node);
+ hash->freenode(node, hash->context);
+ }
+ hash->nodecount = 0;
+ clear_table(hash);
+}
+
+/*
+ * Obsolescent function for removing all nodes from a table,
+ * freeing them and then freeing the table all in one step.
+ */
+
+void kl_hash_free(hash_t *hash)
+{
+#ifdef KAZLIB_OBSOLESCENT_DEBUG
+ assert ("call to obsolescent function hash_free()" && 0);
+#endif
+ kl_hash_free_nodes(hash);
+ kl_hash_destroy(hash);
+}
+
+/*
+ * Free a dynamic hash table structure.
+ */
+
+void kl_hash_destroy(hash_t *hash)
+{
+ assert (hash_val_t_bit != 0);
+ assert (kl_hash_isempty(hash));
+ free(hash->table);
+ free(hash);
+}
+
+/*
+ * Initialize a user supplied hash structure. The user also supplies a table of
+ * chains which is assigned to the hash structure. The table is static---it
+ * will not grow or shrink.
+ * 1. See note 1. in hash_create().
+ * 2. The user supplied array of pointers hopefully contains nchains nodes.
+ * 3. See note 7. in hash_create().
+ * 4. We must dynamically compute the mask from the given power of two table
+ * size.
+ * 5. The user supplied table can't be assumed to contain null pointers,
+ * so we reset it here.
+ */
+
+hash_t *kl_hash_init(hash_t *hash, hashcount_t maxcount,
+ hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table,
+ hashcount_t nchains)
+{
+ if (hash_val_t_bit == 0) /* 1 */
+ compute_bits();
+
+ assert (is_power_of_two(nchains));
+
+ hash->table = table; /* 2 */
+ hash->nchains = nchains;
+ hash->nodecount = 0;
+ hash->maxcount = maxcount;
+ hash->compare = compfun ? compfun : hash_comp_default;
+ hash->function = hashfun ? hashfun : hash_fun_default;
+ hash->dynamic = 0; /* 3 */
+ hash->mask = compute_mask(nchains); /* 4 */
+ clear_table(hash); /* 5 */
+
+ assert (kl_hash_verify(hash));
+
+ return hash;
+}
+
+/*
+ * Reset the hash scanner so that the next element retrieved by
+ * hash_scan_next() shall be the first element on the first non-empty chain.
+ * Notes:
+ * 1. Locate the first non empty chain.
+ * 2. If an empty chain is found, remember which one it is and set the next
+ * pointer to refer to its first element.
+ * 3. Otherwise if a chain is not found, set the next pointer to NULL
+ * so that hash_scan_next() shall indicate failure.
+ */
+
+void kl_hash_scan_begin(hscan_t *scan, hash_t *hash)
+{
+ hash_val_t nchains = hash->nchains;
+ hash_val_t chain;
+
+ scan->table = hash;
+
+ /* 1 */
+
+ for (chain = 0; chain < nchains && hash->table[chain] == 0; chain++)
+ ;
+
+ if (chain < nchains) { /* 2 */
+ scan->chain = chain;
+ scan->next = hash->table[chain];
+ } else { /* 3 */
+ scan->next = NULL;
+ }
+}
+
+/*
+ * Retrieve the next node from the hash table, and update the pointer
+ * for the next invocation of hash_scan_next().
+ * Notes:
+ * 1. Remember the next pointer in a temporary value so that it can be
+ * returned.
+ * 2. This assertion essentially checks whether the module has been properly
+ * initialized. The first point of interaction with the module should be
+ * either hash_create() or hash_init(), both of which set hash_val_t_bit to
+ * a non zero value.
+ * 3. If the next pointer we are returning is not NULL, then the user is
+ * allowed to call hash_scan_next() again. We prepare the new next pointer
+ * for that call right now. That way the user is allowed to delete the node
+ * we are about to return, since we will no longer be needing it to locate
+ * the next node.
+ * 4. If there is a next node in the chain (next->next), then that becomes the
+ * new next node, otherwise ...
+ * 5. We have exhausted the current chain, and must locate the next subsequent
+ * non-empty chain in the table.
+ * 6. If a non-empty chain is found, the first element of that chain becomes
+ * the new next node. Otherwise there is no new next node and we set the
+ * pointer to NULL so that the next time hash_scan_next() is called, a null
+ * pointer shall be immediately returned.
+ */
+
+
+hnode_t *kl_hash_scan_next(hscan_t *scan)
+{
+ hnode_t *next = scan->next; /* 1 */
+ hash_t *hash = scan->table;
+ hash_val_t chain = scan->chain + 1;
+ hash_val_t nchains = hash->nchains;
+
+ assert (hash_val_t_bit != 0); /* 2 */
+
+ if (next) { /* 3 */
+ if (next->next) { /* 4 */
+ scan->next = next->next;
+ } else {
+ while (chain < nchains && hash->table[chain] == 0) /* 5 */
+ chain++;
+ if (chain < nchains) { /* 6 */
+ scan->chain = chain;
+ scan->next = hash->table[chain];
+ } else {
+ scan->next = NULL;
+ }
+ }
+ }
+ return next;
+}
+
+/*
+ * Insert a node into the hash table.
+ * Notes:
+ * 1. It's illegal to insert more than the maximum number of nodes. The client
+ * should verify that the hash table is not full before attempting an
+ * insertion.
+ * 2. The same key may not be inserted into a table twice.
+ * 3. If the table is dynamic and the load factor is already at >= 2,
+ * grow the table.
+ * 4. We take the bottom N bits of the hash value to derive the chain index,
+ * where N is the base 2 logarithm of the size of the hash table.
+ */
+
+void kl_hash_insert(hash_t *hash, hnode_t *node, const void *key)
+{
+ hash_val_t hkey, chain;
+
+ assert (hash_val_t_bit != 0);
+ assert (node->next == NULL);
+ assert (hash->nodecount < hash->maxcount); /* 1 */
+ assert (kl_hash_lookup(hash, key) == NULL); /* 2 */
+
+ if (hash->dynamic && hash->nodecount >= hash->highmark) /* 3 */
+ grow_table(hash);
+
+ hkey = hash->function(key);
+ chain = hkey & hash->mask; /* 4 */
+
+ node->key = key;
+ node->hkey = hkey;
+ node->next = hash->table[chain];
+ hash->table[chain] = node;
+ hash->nodecount++;
+
+ assert (kl_hash_verify(hash));
+}
+
+/*
+ * Find a node in the hash table and return a pointer to it.
+ * Notes:
+ * 1. We hash the key and keep the entire hash value. As an optimization, when
+ * we descend down the chain, we can compare hash values first and only if
+ * hash values match do we perform a full key comparison.
+ * 2. To locate the chain from among 2^N chains, we look at the lower N bits of
+ * the hash value by anding them with the current mask.
+ * 3. Looping through the chain, we compare the stored hash value inside each
+ * node against our computed hash. If they match, then we do a full
+ * comparison between the unhashed keys. If these match, we have located the
+ * entry.
+ */
+
+hnode_t *kl_hash_lookup(hash_t *hash, const void *key)
+{
+ hash_val_t hkey, chain;
+ hnode_t *nptr;
+
+ hkey = hash->function(key); /* 1 */
+ chain = hkey & hash->mask; /* 2 */
+
+ for (nptr = hash->table[chain]; nptr; nptr = nptr->next) { /* 3 */
+ if (nptr->hkey == hkey && hash->compare(nptr->key, key) == 0)
+ return nptr;
+ }
+
+ return NULL;
+}
+
+/*
+ * Delete the given node from the hash table. Since the chains
+ * are singly linked, we must locate the start of the node's chain
+ * and traverse.
+ * Notes:
+ * 1. The node must belong to this hash table, and its key must not have
+ * been tampered with.
+ * 2. If this deletion will take the node count below the low mark, we
+ * shrink the table now.
+ * 3. Determine which chain the node belongs to, and fetch the pointer
+ * to the first node in this chain.
+ * 4. If the node being deleted is the first node in the chain, then
+ * simply update the chain head pointer.
+ * 5. Otherwise advance to the node's predecessor, and splice out
+ * by updating the predecessor's next pointer.
+ * 6. Indicate that the node is no longer in a hash table.
+ */
+
+hnode_t *kl_hash_delete(hash_t *hash, hnode_t *node)
+{
+ hash_val_t chain;
+ hnode_t *hptr;
+
+ assert (kl_hash_lookup(hash, node->key) == node); /* 1 */
+ assert (hash_val_t_bit != 0);
+
+ if (hash->dynamic && hash->nodecount <= hash->lowmark
+ && hash->nodecount > INIT_SIZE)
+ shrink_table(hash); /* 2 */
+
+ chain = node->hkey & hash->mask; /* 3 */
+ hptr = hash->table[chain];
+
+ if (hptr == node) { /* 4 */
+ hash->table[chain] = node->next;
+ } else {
+ while (hptr->next != node) { /* 5 */
+ assert (hptr != 0);
+ hptr = hptr->next;
+ }
+ assert (hptr->next == node);
+ hptr->next = node->next;
+ }
+
+ hash->nodecount--;
+ assert (kl_hash_verify(hash));
+
+ node->next = NULL; /* 6 */
+ return node;
+}
+
+int kl_hash_alloc_insert(hash_t *hash, const void *key, void *data)
+{
+ hnode_t *node = hash->allocnode(hash->context);
+
+ if (node) {
+ kl_hnode_init(node, data);
+ kl_hash_insert(hash, node, key);
+ return 1;
+ }
+ return 0;
+}
+
+void kl_hash_delete_free(hash_t *hash, hnode_t *node)
+{
+ kl_hash_delete(hash, node);
+ hash->freenode(node, hash->context);
+}
+
+/*
+ * Exactly like hash_delete, except does not trigger table shrinkage. This is to be
+ * used from within a hash table scan operation. See notes for hash_delete.
+ */
+
+hnode_t *kl_hash_scan_delete(hash_t *hash, hnode_t *node)
+{
+ hash_val_t chain;
+ hnode_t *hptr;
+
+ assert (kl_hash_lookup(hash, node->key) == node);
+ assert (hash_val_t_bit != 0);
+
+ chain = node->hkey & hash->mask;
+ hptr = hash->table[chain];
+
+ if (hptr == node) {
+ hash->table[chain] = node->next;
+ } else {
+ while (hptr->next != node)
+ hptr = hptr->next;
+ hptr->next = node->next;
+ }
+
+ hash->nodecount--;
+ assert (kl_hash_verify(hash));
+ node->next = NULL;
+
+ return node;
+}
+
+/*
+ * Like hash_delete_free but based on hash_scan_delete.
+ */
+
+void kl_hash_scan_delfree(hash_t *hash, hnode_t *node)
+{
+ kl_hash_scan_delete(hash, node);
+ hash->freenode(node, hash->context);
+}
+
+/*
+ * Verify whether the given object is a valid hash table. This means
+ * Notes:
+ * 1. If the hash table is dynamic, verify whether the high and
+ * low expansion/shrinkage thresholds are powers of two.
+ * 2. Count all nodes in the table, and test each hash value
+ * to see whether it is correct for the node's chain.
+ */
+
+int kl_hash_verify(hash_t *hash)
+{
+ hashcount_t count = 0;
+ hash_val_t chain;
+ hnode_t *hptr;
+
+ if (hash->dynamic) { /* 1 */
+ if (hash->lowmark >= hash->highmark)
+ return 0;
+ if (!is_power_of_two(hash->highmark))
+ return 0;
+ if (!is_power_of_two(hash->lowmark))
+ return 0;
+ }
+
+ for (chain = 0; chain < hash->nchains; chain++) { /* 2 */
+ for (hptr = hash->table[chain]; hptr != 0; hptr = hptr->next) {
+ if ((hptr->hkey & hash->mask) != chain)
+ return 0;
+ count++;
+ }
+ }
+
+ if (count != hash->nodecount)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Test whether the hash table is full and return 1 if this is true,
+ * 0 if it is false.
+ */
+
+#undef kl_hash_isfull
+int kl_hash_isfull(hash_t *hash)
+{
+ return hash->nodecount == hash->maxcount;
+}
+
+/*
+ * Test whether the hash table is empty and return 1 if this is true,
+ * 0 if it is false.
+ */
+
+#undef kl_hash_isempty
+int kl_hash_isempty(hash_t *hash)
+{
+ return hash->nodecount == 0;
+}
+
+static hnode_t *kl_hnode_alloc(void *context)
+{
+ return malloc(sizeof *kl_hnode_alloc(NULL));
+}
+
+static void kl_hnode_free(hnode_t *node, void *context)
+{
+ free(node);
+}
+
+
+/*
+ * Create a hash table node dynamically and assign it the given data.
+ */
+
+hnode_t *kl_hnode_create(void *data)
+{
+ hnode_t *node = malloc(sizeof *node);
+ if (node) {
+ node->data = data;
+ node->next = NULL;
+ }
+ return node;
+}
+
+/*
+ * Initialize a client-supplied node
+ */
+
+hnode_t *kl_hnode_init(hnode_t *hnode, void *data)
+{
+ hnode->data = data;
+ hnode->next = NULL;
+ return hnode;
+}
+
+/*
+ * Destroy a dynamically allocated node.
+ */
+
+void kl_hnode_destroy(hnode_t *hnode)
+{
+ free(hnode);
+}
+
+#undef kl_hnode_put
+void kl_hnode_put(hnode_t *node, void *data)
+{
+ node->data = data;
+}
+
+#undef kl_hnode_get
+void *kl_hnode_get(hnode_t *node)
+{
+ return node->data;
+}
+
+#undef kl_hnode_getkey
+const void *kl_hnode_getkey(hnode_t *node)
+{
+ return node->key;
+}
+
+#undef kl_hash_count
+hashcount_t kl_hash_count(hash_t *hash)
+{
+ return hash->nodecount;
+}
+
+#undef kl_hash_size
+hashcount_t kl_hash_size(hash_t *hash)
+{
+ return hash->nchains;
+}
+
+static hash_val_t hash_fun_default(const void *key)
+{
+ static unsigned long randbox[] = {
+ 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
+ 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
+ 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU,
+ 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU,
+ };
+
+ const unsigned char *str = key;
+ hash_val_t acc = 0;
+
+ while (*str) {
+ acc ^= randbox[(*str + acc) & 0xf];
+ acc = (acc << 1) | (acc >> 31);
+ acc &= 0xffffffffU;
+ acc ^= randbox[((*str++ >> 4) + acc) & 0xf];
+ acc = (acc << 2) | (acc >> 30);
+ acc &= 0xffffffffU;
+ }
+ return acc;
+}
+
+static int hash_comp_default(const void *key1, const void *key2)
+{
+ return strcmp(key1, key2);
+}
diff --git a/src/khash/c_src/hash.h b/src/khash/c_src/hash.h
new file mode 100644
index 000000000..0c75ed00f
--- /dev/null
+++ b/src/khash/c_src/hash.h
@@ -0,0 +1,240 @@
+/*
+ * Hash Table Data Type
+ * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net>
+ *
+ * Free Software License:
+ *
+ * All rights are reserved by the author, with the following exceptions:
+ * Permission is granted to freely reproduce and distribute this software,
+ * possibly in exchange for a fee, provided that this copyright notice appears
+ * intact. Permission is also granted to adapt this software to produce
+ * derivative works, as long as the modified versions carry this copyright
+ * notice and additional notices stating that the work has been modified.
+ * This source code may be translated into executable form and incorporated
+ * into proprietary software; there is no requirement for such software to
+ * contain a copyright notice related to this source.
+ *
+ * $Id: hash.h,v 1.22.2.7 2000/11/13 01:36:45 kaz Exp $
+ * $Name: kazlib_1_20 $
+ */
+
+#ifndef HASH_H
+#define HASH_H
+
+#include <limits.h>
+#ifdef KAZLIB_SIDEEFFECT_DEBUG
+#include "sfx.h"
+#endif
+
+/*
+ * Blurb for inclusion into C++ translation units
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef unsigned long hashcount_t;
+#define HASHCOUNT_T_MAX ULONG_MAX
+
+typedef unsigned long hash_val_t;
+#define HASH_VAL_T_MAX ULONG_MAX
+
+extern int hash_val_t_bit;
+
+#ifndef HASH_VAL_T_BIT
+#define HASH_VAL_T_BIT ((int) hash_val_t_bit)
+#endif
+
+/*
+ * Hash chain node structure.
+ * Notes:
+ * 1. This preprocessing directive is for debugging purposes. The effect is
+ * that if the preprocessor symbol KAZLIB_OPAQUE_DEBUG is defined prior to the
+ * inclusion of this header, then the structure shall be declared as having
+ * the single member int __OPAQUE__. This way, any attempts by the
+ * client code to violate the principles of information hiding (by accessing
+ * the structure directly) can be diagnosed at translation time. However,
+ * note the resulting compiled unit is not suitable for linking.
+ * 2. This is a pointer to the next node in the chain. In the last node of a
+ * chain, this pointer is null.
+ * 3. The key is a pointer to some user supplied data that contains a unique
+ * identifier for each hash node in a given table. The interpretation of
+ * the data is up to the user. When creating or initializing a hash table,
+ * the user must supply a pointer to a function for comparing two keys,
+ * and a pointer to a function for hashing a key into a numeric value.
+ * 4. The value is a user-supplied pointer to void which may refer to
+ * any data object. It is not interpreted in any way by the hashing
+ * module.
+ * 5. The hashed key is stored in each node so that we don't have to rehash
+ * each key when the table must grow or shrink.
+ */
+
+typedef struct hnode_t {
+ #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) /* 1 */
+ struct hnode_t *hash_next; /* 2 */
+ const void *hash_key; /* 3 */
+ void *hash_data; /* 4 */
+ hash_val_t hash_hkey; /* 5 */
+ #else
+ int hash_dummy;
+ #endif
+} hnode_t;
+
+/*
+ * The comparison function pointer type. A comparison function takes two keys
+ * and produces a value of -1 if the left key is less than the right key, a
+ * value of 0 if the keys are equal, and a value of 1 if the left key is
+ * greater than the right key.
+ */
+
+typedef int (*hash_comp_t)(const void *, const void *);
+
+/*
+ * The hashing function performs some computation on a key and produces an
+ * integral value of type hash_val_t based on that key. For best results, the
+ * function should have a good randomness properties in *all* significant bits
+ * over the set of keys that are being inserted into a given hash table. In
+ * particular, the most significant bits of hash_val_t are most significant to
+ * the hash module. Only as the hash table expands are less significant bits
+ * examined. Thus a function that has good distribution in its upper bits but
+ * not lower is preferrable to one that has poor distribution in the upper bits
+ * but not the lower ones.
+ */
+
+typedef hash_val_t (*hash_fun_t)(const void *);
+
+/*
+ * allocator functions
+ */
+
+typedef hnode_t *(*hnode_alloc_t)(void *);
+typedef void (*hnode_free_t)(hnode_t *, void *);
+
+/*
+ * This is the hash table control structure. It keeps track of information
+ * about a hash table, as well as the hash table itself.
+ * Notes:
+ * 1. Pointer to the hash table proper. The table is an array of pointers to
+ * hash nodes (of type hnode_t). If the table is empty, every element of
+ * this table is a null pointer. A non-null entry points to the first
+ * element of a chain of nodes.
+ * 2. This member keeps track of the size of the hash table---that is, the
+ * number of chain pointers.
+ * 3. The count member maintains the number of elements that are presently
+ * in the hash table.
+ * 4. The maximum count is the greatest number of nodes that can populate this
+ * table. If the table contains this many nodes, no more can be inserted,
+ * and the hash_isfull() function returns true.
+ * 5. The high mark is a population threshold, measured as a number of nodes,
+ * which, if exceeded, will trigger a table expansion. Only dynamic hash
+ * tables are subject to this expansion.
+ * 6. The low mark is a minimum population threshold, measured as a number of
+ * nodes. If the table population drops below this value, a table shrinkage
+ * will occur. Only dynamic tables are subject to this reduction. No table
+ * will shrink beneath a certain absolute minimum number of nodes.
+ * 7. This is the a pointer to the hash table's comparison function. The
+ * function is set once at initialization or creation time.
+ * 8. Pointer to the table's hashing function, set once at creation or
+ * initialization time.
+ * 9. The current hash table mask. If the size of the hash table is 2^N,
+ * this value has its low N bits set to 1, and the others clear. It is used
+ * to select bits from the result of the hashing function to compute an
+ * index into the table.
+ * 10. A flag which indicates whether the table is to be dynamically resized. It
+ * is set to 1 in dynamically allocated tables, 0 in tables that are
+ * statically allocated.
+ */
+
+typedef struct hash_t {
+ #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG)
+ struct hnode_t **hash_table; /* 1 */
+ hashcount_t hash_nchains; /* 2 */
+ hashcount_t hash_nodecount; /* 3 */
+ hashcount_t hash_maxcount; /* 4 */
+ hashcount_t hash_highmark; /* 5 */
+ hashcount_t hash_lowmark; /* 6 */
+ hash_comp_t hash_compare; /* 7 */
+ hash_fun_t hash_function; /* 8 */
+ hnode_alloc_t hash_allocnode;
+ hnode_free_t hash_freenode;
+ void *hash_context;
+ hash_val_t hash_mask; /* 9 */
+ int hash_dynamic; /* 10 */
+ #else
+ int hash_dummy;
+ #endif
+} hash_t;
+
+/*
+ * Hash scanner structure, used for traversals of the data structure.
+ * Notes:
+ * 1. Pointer to the hash table that is being traversed.
+ * 2. Reference to the current chain in the table being traversed (the chain
+ * that contains the next node that shall be retrieved).
+ * 3. Pointer to the node that will be retrieved by the subsequent call to
+ * hash_scan_next().
+ */
+
+typedef struct hscan_t {
+ #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG)
+ hash_t *hash_table; /* 1 */
+ hash_val_t hash_chain; /* 2 */
+ hnode_t *hash_next; /* 3 */
+ #else
+ int hash_dummy;
+ #endif
+} hscan_t;
+
+extern hash_t *kl_hash_create(hashcount_t, hash_comp_t, hash_fun_t);
+extern void kl_hash_set_allocator(hash_t *, hnode_alloc_t, hnode_free_t, void *);
+extern void kl_hash_destroy(hash_t *);
+extern void kl_hash_free_nodes(hash_t *);
+extern void kl_hash_free(hash_t *);
+extern hash_t *kl_hash_init(hash_t *, hashcount_t, hash_comp_t,
+ hash_fun_t, hnode_t **, hashcount_t);
+extern void kl_hash_insert(hash_t *, hnode_t *, const void *);
+extern hnode_t *kl_hash_lookup(hash_t *, const void *);
+extern hnode_t *kl_hash_delete(hash_t *, hnode_t *);
+extern int kl_hash_alloc_insert(hash_t *, const void *, void *);
+extern void kl_hash_delete_free(hash_t *, hnode_t *);
+
+extern void kl_hnode_put(hnode_t *, void *);
+extern void *kl_hnode_get(hnode_t *);
+extern const void *kl_hnode_getkey(hnode_t *);
+extern hashcount_t kl_hash_count(hash_t *);
+extern hashcount_t kl_hash_size(hash_t *);
+
+extern int kl_hash_isfull(hash_t *);
+extern int kl_hash_isempty(hash_t *);
+
+extern void kl_hash_scan_begin(hscan_t *, hash_t *);
+extern hnode_t *kl_hash_scan_next(hscan_t *);
+extern hnode_t *kl_hash_scan_delete(hash_t *, hnode_t *);
+extern void kl_hash_scan_delfree(hash_t *, hnode_t *);
+
+extern int kl_hash_verify(hash_t *);
+
+extern hnode_t *kl_hnode_create(void *);
+extern hnode_t *kl_hnode_init(hnode_t *, void *);
+extern void kl_hnode_destroy(hnode_t *);
+
+#if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG)
+#ifdef KAZLIB_SIDEEFFECT_DEBUG
+#define kl_hash_isfull(H) (SFX_CHECK(H)->hash_nodecount == (H)->hash_maxcount)
+#else
+#define kl_hash_isfull(H) ((H)->hash_nodecount == (H)->hash_maxcount)
+#endif
+#define kl_hash_isempty(H) ((H)->hash_nodecount == 0)
+#define kl_hash_count(H) ((H)->hash_nodecount)
+#define kl_hash_size(H) ((H)->hash_nchains)
+#define kl_hnode_get(N) ((N)->hash_data)
+#define kl_hnode_getkey(N) ((N)->hash_key)
+#define kl_hnode_put(N, V) ((N)->hash_data = (V))
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/khash/c_src/khash.c b/src/khash/c_src/khash.c
new file mode 100644
index 000000000..b15a18e64
--- /dev/null
+++ b/src/khash/c_src/khash.c
@@ -0,0 +1,658 @@
+// This file is part of khash released under the MIT license.
+// See the LICENSE file for more information.
+// Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+#include <assert.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "erl_nif.h"
+#include "hash.h"
+
+#ifdef _WIN32
+#define INLINE __inline
+#else
+#define INLINE inline
+#endif
+
+#define KHASH_VERSION 0
+
+
+typedef struct
+{
+ ERL_NIF_TERM atom_ok;
+ ERL_NIF_TERM atom_error;
+ ERL_NIF_TERM atom_value;
+ ERL_NIF_TERM atom_not_found;
+ ERL_NIF_TERM atom_end_of_table;
+ ERL_NIF_TERM atom_expired_iterator;
+ ErlNifResourceType* res_hash;
+ ErlNifResourceType* res_iter;
+} khash_priv;
+
+
+typedef struct
+{
+ unsigned int hval;
+ ErlNifEnv* env;
+ ERL_NIF_TERM key;
+ ERL_NIF_TERM val;
+} khnode_t;
+
+
+typedef struct
+{
+ int version;
+ unsigned int gen;
+ hash_t* h;
+ ErlNifPid p;
+} khash_t;
+
+
+typedef struct
+{
+ int version;
+ unsigned int gen;
+ khash_t* khash;
+ hscan_t scan;
+} khash_iter_t;
+
+
+static INLINE ERL_NIF_TERM
+make_atom(ErlNifEnv* env, const char* name)
+{
+ ERL_NIF_TERM ret;
+ if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) {
+ return ret;
+ }
+ return enif_make_atom(env, name);
+}
+
+
+static INLINE ERL_NIF_TERM
+make_ok(ErlNifEnv* env, khash_priv* priv, ERL_NIF_TERM value)
+{
+ return enif_make_tuple2(env, priv->atom_ok, value);
+}
+
+
+static INLINE ERL_NIF_TERM
+make_error(ErlNifEnv* env, khash_priv* priv, ERL_NIF_TERM reason)
+{
+ return enif_make_tuple2(env, priv->atom_error, reason);
+}
+
+
+static INLINE int
+check_pid(ErlNifEnv* env, khash_t* khash)
+{
+ ErlNifPid pid;
+ enif_self(env, &pid);
+
+ if(enif_compare(pid.pid, khash->p.pid) == 0) {
+ return 1;
+ }
+
+ return 0;
+}
+
+
+hnode_t*
+khnode_alloc(void* ctx)
+{
+ hnode_t* ret = (hnode_t*) enif_alloc(sizeof(hnode_t));
+ khnode_t* node = (khnode_t*) enif_alloc(sizeof(khnode_t));
+
+ memset(ret, '\0', sizeof(hnode_t));
+ memset(node, '\0', sizeof(khnode_t));
+
+ node->env = enif_alloc_env();
+ ret->hash_key = node;
+
+ return ret;
+}
+
+
+void
+khnode_free(hnode_t* obj, void* ctx)
+{
+ khnode_t* node = (khnode_t*) kl_hnode_getkey(obj);
+ enif_free_env(node->env);
+ enif_free(node);
+ enif_free(obj);
+ return;
+}
+
+
+int
+khash_cmp_fun(const void* l, const void* r)
+{
+ khnode_t* left = (khnode_t*) l;
+ khnode_t* right = (khnode_t*) r;
+ int cmp = enif_compare(left->key, right->key);
+
+ if(cmp < 0) {
+ return -1;
+ } else if(cmp == 0) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+
+hash_val_t
+khash_hash_fun(const void* obj)
+{
+ khnode_t* node = (khnode_t*) obj;
+ return (hash_val_t) node->hval;
+}
+
+
+static INLINE khash_t*
+khash_create_int(ErlNifEnv* env, khash_priv* priv, ERL_NIF_TERM opts)
+{
+ khash_t* khash = NULL;
+
+ assert(priv != NULL && "missing private data member");
+
+ khash = (khash_t*) enif_alloc_resource(priv->res_hash, sizeof(khash_t));
+ memset(khash, '\0', sizeof(khash_t));
+ khash->version = KHASH_VERSION;
+ khash->gen = 0;
+
+ khash->h = kl_hash_create(HASHCOUNT_T_MAX, khash_cmp_fun, khash_hash_fun);
+
+ if(khash->h == NULL ) {
+ enif_release_resource(khash);
+ return NULL;
+ }
+
+ kl_hash_set_allocator(khash->h, khnode_alloc, khnode_free, NULL);
+ enif_self(env, &(khash->p));
+
+ return khash;
+}
+
+
+static ERL_NIF_TERM
+khash_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash;
+ ERL_NIF_TERM ret;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ khash = khash_create_int(env, priv, argv[0]);
+ if(khash == NULL) {
+ return enif_make_badarg(env);
+ }
+
+ ret = enif_make_resource(env, khash);
+ enif_release_resource(khash);
+
+ return make_ok(env, priv, ret);
+}
+
+
+static void
+khash_free(ErlNifEnv* env, void* obj)
+{
+ khash_t* khash = (khash_t*) obj;
+
+ if(khash->h != NULL) {
+ kl_hash_free_nodes(khash->h);
+ kl_hash_destroy(khash->h);
+ }
+
+ return;
+}
+
+
+static ERL_NIF_TERM
+khash_to_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = (khash_priv*) enif_priv_data(env);
+ ERL_NIF_TERM ret = enif_make_list(env, 0);
+ khash_t* khash = NULL;
+ void* res = NULL;
+ hscan_t scan;
+ hnode_t* entry;
+ khnode_t* node;
+ ERL_NIF_TERM key;
+ ERL_NIF_TERM val;
+ ERL_NIF_TERM tuple;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) {
+ return enif_make_badarg(env);
+ }
+
+ khash = (khash_t*) res;
+
+ if(!check_pid(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ kl_hash_scan_begin(&scan, khash->h);
+
+ while((entry = kl_hash_scan_next(&scan)) != NULL) {
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ key = enif_make_copy(env, node->key);
+ val = enif_make_copy(env, node->val);
+ tuple = enif_make_tuple2(env, key, val);
+ ret = enif_make_list_cell(env, tuple, ret);
+ }
+
+ return ret;
+}
+
+
+static ERL_NIF_TERM
+khash_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash = NULL;
+ void* res = NULL;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) {
+ return enif_make_badarg(env);
+ }
+
+ khash = (khash_t*) res;
+
+ if(!check_pid(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ kl_hash_free_nodes(khash->h);
+
+ khash->gen += 1;
+
+ return priv->atom_ok;
+}
+
+
+static INLINE hnode_t*
+khash_lookup_int(ErlNifEnv* env, uint32_t hv, ERL_NIF_TERM key, khash_t* khash)
+{
+ khnode_t node;
+ node.hval = hv;
+ node.env = env;
+ node.key = key;
+ return kl_hash_lookup(khash->h, &node);
+}
+
+
+static ERL_NIF_TERM
+khash_lookup(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash = NULL;
+ void* res = NULL;
+ uint32_t hval;
+ hnode_t* entry;
+ khnode_t* node;
+ ERL_NIF_TERM ret;
+
+ if(argc != 3) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) {
+ return enif_make_badarg(env);
+ }
+
+ khash = (khash_t*) res;
+
+ if(!check_pid(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_uint(env, argv[1], &hval)) {
+ return enif_make_badarg(env);
+ }
+
+ entry = khash_lookup_int(env, hval, argv[2], khash);
+ if(entry == NULL) {
+ ret = priv->atom_not_found;
+ } else {
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ ret = enif_make_copy(env, node->val);
+ ret = enif_make_tuple2(env, priv->atom_value, ret);
+ }
+
+ return ret;
+}
+
+
+static ERL_NIF_TERM
+khash_get(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash = NULL;
+ void* res = NULL;
+ uint32_t hval;
+ hnode_t* entry;
+ khnode_t* node;
+ ERL_NIF_TERM ret;
+
+ if(argc != 4) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) {
+ return enif_make_badarg(env);
+ }
+
+ khash = (khash_t*) res;
+
+ if(!check_pid(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_uint(env, argv[1], &hval)) {
+ return enif_make_badarg(env);
+ }
+
+ entry = khash_lookup_int(env, hval, argv[2], khash);
+ if(entry == NULL) {
+ ret = argv[3];
+ } else {
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ ret = enif_make_copy(env, node->val);
+ }
+
+ return ret;
+}
+
+
+static ERL_NIF_TERM
+khash_put(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash = NULL;
+ void* res = NULL;
+ uint32_t hval;
+ hnode_t* entry;
+ khnode_t* node;
+
+ if(argc != 4) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) {
+ return enif_make_badarg(env);
+ }
+
+ khash = (khash_t*) res;
+
+ if(!check_pid(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_uint(env, argv[1], &hval)) {
+ return enif_make_badarg(env);
+ }
+
+ entry = khash_lookup_int(env, hval, argv[2], khash);
+ if(entry == NULL) {
+ entry = khnode_alloc(NULL);
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ node->hval = hval;
+ node->key = enif_make_copy(node->env, argv[2]);
+ node->val = enif_make_copy(node->env, argv[3]);
+ kl_hash_insert(khash->h, entry, node);
+ } else {
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ enif_clear_env(node->env);
+ node->key = enif_make_copy(node->env, argv[2]);
+ node->val = enif_make_copy(node->env, argv[3]);
+ }
+
+ khash->gen += 1;
+
+ return priv->atom_ok;
+}
+
+
+static ERL_NIF_TERM
+khash_del(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash = NULL;
+ void* res = NULL;
+ uint32_t hval;
+ hnode_t* entry;
+ ERL_NIF_TERM ret;
+
+ if(argc != 3) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) {
+ return enif_make_badarg(env);
+ }
+
+ khash = (khash_t*) res;
+
+ if(!check_pid(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_uint(env, argv[1], &hval)) {
+ return enif_make_badarg(env);
+ }
+
+ entry = khash_lookup_int(env, hval, argv[2], khash);
+ if(entry == NULL) {
+ ret = priv->atom_not_found;
+ } else {
+ kl_hash_delete_free(khash->h, entry);
+ ret = priv->atom_ok;
+ }
+
+ khash->gen += 1;
+
+ return ret;
+}
+
+
+static ERL_NIF_TERM
+khash_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, (void*) &khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(!check_pid(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ return enif_make_uint64(env, kl_hash_count(khash->h));
+}
+
+
+static ERL_NIF_TERM
+khash_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_t* khash = NULL;
+ void* res = NULL;
+ khash_iter_t* iter;
+ ERL_NIF_TERM ret;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) {
+ return enif_make_badarg(env);
+ }
+
+ khash = (khash_t*) res;
+
+ if(!check_pid(env, khash)) {
+ return enif_make_badarg(env);
+ }
+
+ iter = (khash_iter_t*) enif_alloc_resource(
+ priv->res_iter, sizeof(khash_iter_t));
+ memset(iter, '\0', sizeof(khash_iter_t));
+ iter->version = KHASH_VERSION;
+ iter->gen = khash->gen;
+ iter->khash = khash;
+ kl_hash_scan_begin(&(iter->scan), iter->khash->h);
+
+ // The iterator needs to guarantee that the khash
+ // remains alive for the life of the iterator.
+ enif_keep_resource(khash);
+
+ ret = enif_make_resource(env, iter);
+ enif_release_resource(iter);
+
+ return make_ok(env, priv, ret);
+}
+
+
+static void
+khash_iter_free(ErlNifEnv* env, void* obj)
+{
+ khash_iter_t* iter = (khash_iter_t*) obj;
+ enif_release_resource(iter->khash);
+}
+
+
+static ERL_NIF_TERM
+khash_iter_next(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ khash_priv* priv = enif_priv_data(env);
+ khash_iter_t* iter = NULL;
+ void* res = NULL;
+ hnode_t* entry;
+ khnode_t* node;
+ ERL_NIF_TERM key;
+ ERL_NIF_TERM val;
+
+ if(argc != 1) {
+ return enif_make_badarg(env);
+ }
+
+ if(!enif_get_resource(env, argv[0], priv->res_iter, &res)) {
+ return enif_make_badarg(env);
+ }
+
+ iter = (khash_iter_t*) res;
+
+ if(!check_pid(env, iter->khash)) {
+ return enif_make_badarg(env);
+ }
+
+ if(iter->gen != iter->khash->gen) {
+ return make_error(env, priv, priv->atom_expired_iterator);
+ }
+
+ entry = kl_hash_scan_next(&(iter->scan));
+ if(entry == NULL) {
+ return priv->atom_end_of_table;
+ }
+
+ node = (khnode_t*) kl_hnode_getkey(entry);
+ key = enif_make_copy(env, node->key);
+ val = enif_make_copy(env, node->val);
+ return enif_make_tuple2(env, key, val);
+}
+
+
+static int
+load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
+{
+ int flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
+ ErlNifResourceType* res;
+
+ khash_priv* new_priv = (khash_priv*) enif_alloc(sizeof(khash_priv));
+ if(new_priv == NULL) {
+ return 1;
+ }
+
+ res = enif_open_resource_type(
+ env, NULL, "khash", khash_free, flags, NULL);
+ if(res == NULL) {
+ return 1;
+ }
+ new_priv->res_hash = res;
+
+ res = enif_open_resource_type(
+ env, NULL, "khash_iter", khash_iter_free, flags, NULL);
+ if(res == NULL) {
+ return 1;
+ }
+ new_priv->res_iter = res;
+
+ new_priv->atom_ok = make_atom(env, "ok");
+ new_priv->atom_error = make_atom(env, "error");
+ new_priv->atom_value = make_atom(env, "value");
+ new_priv->atom_not_found = make_atom(env, "not_found");
+ new_priv->atom_end_of_table = make_atom(env, "end_of_table");
+ new_priv->atom_expired_iterator = make_atom(env, "expired_iterator");
+
+ *priv = (void*) new_priv;
+
+ return 0;
+}
+
+
+static int
+reload(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
+{
+ return 0;
+}
+
+
+static int
+upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info)
+{
+ return load(env, priv, info);
+}
+
+
+static void
+unload(ErlNifEnv* env, void* priv)
+{
+ enif_free(priv);
+ return;
+}
+
+
+static ErlNifFunc funcs[] = {
+ {"new", 1, khash_new},
+ {"to_list", 1, khash_to_list},
+ {"clear", 1, khash_clear},
+ {"lookup_int", 3, khash_lookup},
+ {"get_int", 4, khash_get},
+ {"put_int", 4, khash_put},
+ {"del_int", 3, khash_del},
+ {"size", 1, khash_size},
+ {"iter", 1, khash_iter},
+ {"iter_next", 1, khash_iter_next}
+};
+
+
+ERL_NIF_INIT(khash, funcs, &load, &reload, &upgrade, &unload);
diff --git a/src/khash/rebar.config b/src/khash/rebar.config
new file mode 100644
index 000000000..0bf93a8d1
--- /dev/null
+++ b/src/khash/rebar.config
@@ -0,0 +1,14 @@
+% vim: set ft=erlang : -*- erlang -*- % Magic lines for code editors
+
+{port_specs, [
+ {"priv/khash.so", ["c_src/*.c"]}
+]}.
+
+{port_env, [
+ % Development compilation
+ % {".*", "CFLAGS", "$CFLAGS -g -Wall -Werror -fPIC"}
+
+ % Production compilation
+ {"(linux|solaris|darwin|freebsd)", "CFLAGS", "$CFLAGS -Wall -Werror -DNDEBUG -O3"},
+ {"win32", "CFLAGS", "$CFLAGS /O2 /DNDEBUG /Wall"}
+]}.
diff --git a/src/khash/src/khash.app.src b/src/khash/src/khash.app.src
new file mode 100644
index 000000000..14a45c77d
--- /dev/null
+++ b/src/khash/src/khash.app.src
@@ -0,0 +1,10 @@
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+{application, khash, [
+ {description, "A NIF wrapper for Kazlib's hash structure."},
+ {vsn, git},
+ {registered, []},
+ {applications, [kernel]}
+]}.
diff --git a/src/khash/src/khash.erl b/src/khash/src/khash.erl
new file mode 100644
index 000000000..1db4c4cdb
--- /dev/null
+++ b/src/khash/src/khash.erl
@@ -0,0 +1,136 @@
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+-module(khash).
+-on_load(init/0).
+
+-export([
+ new/0,
+ new/1,
+ from_list/1,
+ from_list/2,
+ to_list/1,
+ clear/1,
+ lookup/2,
+ get/2,
+ get/3,
+ put/3,
+ del/2,
+ size/1,
+ iter/1,
+ iter_next/1,
+ fold/3
+]).
+
+-define(NOT_LOADED, not_loaded(?LINE)).
+
+-type kv() :: {any(), any()}.
+-type khash() :: term().
+-type khash_iter() :: term().
+-type option() :: [].
+
+-spec new() -> {ok, khash()}.
+new() ->
+ new([]).
+
+-spec new([option()]) -> {ok, khash()}.
+new(_Options) ->
+ ?NOT_LOADED.
+
+-spec from_list([kv()]) -> {ok, khash()}.
+from_list(KVList) ->
+ from_list(KVList, []).
+
+-spec from_list([kv()], [option()]) -> {ok, khash()}.
+from_list(KVList, Options) ->
+ {ok, Hash} = ?MODULE:new(Options),
+ lists:foreach(
+ fun({Key, Val}) ->
+ ?MODULE:put(Hash, Key, Val)
+ end,
+ KVList
+ ),
+ {ok, Hash}.
+
+-spec to_list(khash()) -> [kv()].
+to_list(_Hash) ->
+ ?NOT_LOADED.
+
+-spec clear(khash()) -> ok.
+clear(_Hash) ->
+ ?NOT_LOADED.
+
+-spec lookup(khash(), any()) -> {value, any()} | not_found.
+lookup(Hash, Key) ->
+ lookup_int(Hash, erlang:phash2(Key), Key).
+
+-spec get(khash(), any()) -> any().
+get(Hash, Key) ->
+ get(Hash, Key, undefined).
+
+-spec get(khash(), any(), any()) -> any().
+get(Hash, Key, Default) ->
+ get_int(Hash, erlang:phash2(Key), Key, Default).
+
+-spec put(khash(), any(), any()) -> ok.
+put(Hash, Key, Value) ->
+ put_int(Hash, erlang:phash2(Key), Key, Value).
+
+-spec del(khash(), any()) -> ok.
+del(Hash, Key) ->
+ del_int(Hash, erlang:phash2(Key), Key).
+
+-spec size(khash()) -> non_neg_integer().
+size(_Hash) ->
+ ?NOT_LOADED.
+
+-spec iter(khash()) -> {ok, khash_iter()}.
+iter(_Hash) ->
+ ?NOT_LOADED.
+
+-spec iter_next(khash_iter()) ->
+ kv() | end_of_table | {error, expired_iterator}.
+iter_next(_Iter) ->
+ ?NOT_LOADED.
+
+-spec fold(khash(), fun(), any()) -> any().
+fold(Hash, FoldFun, Acc) ->
+ {ok, Iter} = ?MODULE:iter(Hash),
+ fold_int(Iter, FoldFun, Acc).
+
+fold_int(Iter, FoldFun, Acc) ->
+ case ?MODULE:iter_next(Iter) of
+ {Key, Value} ->
+ NewAcc = FoldFun(Key, Value, Acc),
+ fold_int(Iter, FoldFun, NewAcc);
+ end_of_table ->
+ Acc
+ end.
+
+init() ->
+ PrivDir =
+ case code:priv_dir(?MODULE) of
+ {error, _} ->
+ EbinDir = filename:dirname(code:which(?MODULE)),
+ AppPath = filename:dirname(EbinDir),
+ filename:join(AppPath, "priv");
+ Path ->
+ Path
+ end,
+ erlang:load_nif(filename:join(PrivDir, "khash"), 0).
+
+lookup_int(_Hash, _HashValue, _Key) ->
+ ?NOT_LOADED.
+
+get_int(_Hash, _HashValue, _Key, _Default) ->
+ ?NOT_LOADED.
+
+put_int(_Hash, _HashValue, _Key, _Value) ->
+ ?NOT_LOADED.
+
+del_int(_Hash, _HashValue, _Key) ->
+ ?NOT_LOADED.
+
+not_loaded(Line) ->
+ erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}).
diff --git a/src/khash/test/gen_term.erl b/src/khash/test/gen_term.erl
new file mode 100644
index 000000000..d4244f11f
--- /dev/null
+++ b/src/khash/test/gen_term.erl
@@ -0,0 +1,113 @@
+%% This file is part of khash released under the MIT license.
+%% See the LICENSE file for more information.
+%% Copyright 2013 Cloudant, Inc <support@cloudant.com>
+
+-module(gen_term).
+
+-export([
+ any/0,
+ any/1,
+
+ gen_atom/1,
+ gen_integer/1,
+ gen_float/1,
+ gen_reference/1,
+ gen_port/1,
+ gen_pid/1,
+ gen_tuple/1,
+ gen_list/1,
+ gen_short_string/1,
+ gen_string/1,
+ gen_binary/1,
+ gen_bitstring/1,
+ gen_bignum/1,
+ gen_function/1
+]).
+
+any() ->
+ any(16).
+
+any(MaxSize) when MaxSize =< 0 ->
+ Fun = choice(value_types()),
+ ?MODULE:Fun(MaxSize);
+any(MaxSize) ->
+ Fun = choice(all_types()),
+ ?MODULE:Fun(MaxSize).
+
+gen_atom(MaxSize) ->
+ list_to_atom(gen_short_string(MaxSize)).
+
+gen_integer(_) ->
+ Value =
+ case rand:uniform() < 0.5 of
+ true -> rand:uniform(127);
+ false -> rand:uniform(16#FFFFFFFF)
+ end,
+ case rand:uniform() < 0.5 of
+ true -> -1 * Value;
+ false -> Value
+ end.
+
+gen_float(_) ->
+ rand:uniform() * float(16#FFFFFFFF).
+
+gen_reference(_) ->
+ erlang:make_ref().
+
+gen_port(_) ->
+ Ports = erlang:ports(),
+ lists:nth(rand:uniform(length(Ports)), Ports).
+
+gen_pid(_) ->
+ Pids = erlang:processes(),
+ lists:nth(rand:uniform(length(Pids)), Pids).
+
+gen_tuple(MaxSize) ->
+ list_to_tuple(gen_list(MaxSize)).
+
+gen_list(MaxSize) ->
+ Width = rand:uniform(MaxSize),
+ [any(MaxSize - Width) || _ <- lists:seq(1, Width)].
+
+gen_short_string(_) ->
+ Size = rand:uniform(255),
+ [rand:uniform(127) || _ <- lists:seq(1, Size)].
+
+gen_string(_) ->
+ Size = rand:uniform(4096),
+ [rand:uniform(127) || _ <- lists:seq(1, Size)].
+
+gen_binary(MaxSize) ->
+ list_to_binary(gen_string(MaxSize)).
+
+gen_bitstring(MaxSize) ->
+ B = gen_binary(MaxSize),
+ <<2:4/integer, B/binary>>.
+
+gen_bignum(_) ->
+ 16#FFFFFFFFFFFFFFFF + rand:uniform(16#FFFFFFFF).
+
+gen_function(_) ->
+ choice(all_types()).
+
+choice(Options) ->
+ lists:nth(rand:uniform(length(Options)), Options).
+
+value_types() ->
+ [
+ gen_atom,
+ gen_integer,
+ gen_float,
+ gen_reference,
+ gen_port,
+ gen_pid,
+ gen_short_string,
+ gen_string,
+ gen_binary,
+ gen_bitstring,
+ gen_bignum,
+ gen_function
+ ].
+
+all_types() ->
+ value_types() ++ [gen_tuple, gen_list].
diff --git a/src/khash/test/khash_test.erl b/src/khash/test/khash_test.erl
new file mode 100644
index 000000000..09e23a14e
--- /dev/null
+++ b/src/khash/test/khash_test.erl
@@ -0,0 +1,374 @@
+-module(khash_test).
+
+-export([
+ khash_fetch/0,
+ khash_store/0,
+ run_keys/1
+]).
+
+-include_lib("eunit/include/eunit.hrl").
+
+-define(NUM_RAND_CYCLES, 10000).
+-define(NUM_CYCLES, 1000000).
+-define(NUM_KVS, 5000).
+-define(TIMEOUT, 15).
+
+load_test_() ->
+ {
+ "Loaded khash",
+ ?_assertEqual({module, khash}, code:load_file(khash))
+ }.
+
+basic_test_() ->
+ {
+ "khash basic operations",
+ {setup, local, fun() -> khash:new() end, fun({ok, _}) -> ok end, fun({ok, C}) ->
+ [
+ {
+ "Lookup missing is ok",
+ ?_assertEqual(not_found, khash:lookup(C, <<"foo">>))
+ },
+ {
+ "Get missing is ok",
+ ?_assertEqual(undefined, khash:get(C, <<"foo">>))
+ },
+ {
+ "Del missing is ok",
+ ?_assertEqual(not_found, khash:del(C, <<"foo">>))
+ },
+ {
+ "Stored a key",
+ ?_assertEqual(ok, khash:put(C, <<"foo">>, bar))
+ },
+ {
+ "Lookuped a key",
+ ?_assertEqual({value, bar}, khash:lookup(C, <<"foo">>))
+ },
+ {
+ "Retrieved a key",
+ ?_assertEqual(bar, khash:get(C, <<"foo">>))
+ },
+ {
+ "Stored a key",
+ ?_assertEqual(ok, khash:put(C, <<"bar">>, foo))
+ },
+ {
+ "Correct size for hash",
+ ?_assertEqual(2, khash:size(C))
+ },
+ {
+ "Deleted a key",
+ ?_assertEqual(ok, khash:del(C, <<"foo">>))
+ },
+ {
+ "Correct size after delete",
+ ?_assertEqual(1, khash:size(C))
+ },
+ {
+ "Cleared the hash",
+ ?_assertEqual(ok, khash:clear(C))
+ },
+ {
+ "Correct size after clear",
+ ?_assertEqual(0, khash:size(C))
+ }
+ ]
+ end}
+ }.
+
+randomized_test_() ->
+ {
+ "khash randomized test",
+ {setup, local,
+ fun() ->
+ Dict = dict:new(),
+ {ok, KHash} = khash:new(),
+ Actions = [
+ {0.1, fun run_clear/1},
+ {1.0, fun run_get2/1},
+ {1.0, fun run_get3/1},
+ {1.0, fun run_put/1},
+ {1.0, fun run_del/1},
+ {0.5, fun run_size/1},
+ % {0.3, fun run_keys/1},
+ {0.3, fun run_to_list/1}
+ ],
+ {ok, Actions, ?NUM_RAND_CYCLES, {Dict, KHash}}
+ end,
+ fun(State) ->
+ {timeout, ?TIMEOUT, {
+ "State matches dict implementation",
+ ?_assert(run_randomized(State, true))
+ }}
+ end}
+ }.
+
+basic_iterators_test_() ->
+ {
+ "khash itrators basics operations",
+ {setup, local,
+ fun() ->
+ {ok, H} = khash:new(),
+ khash:put(H, foo, bar),
+ {ok, I} = khash:iter(H),
+ {ok, H, I}
+ end,
+ fun({ok, H, I}) ->
+ [
+ {
+ "Got only kv pair as first element",
+ ?_assertEqual(khash:iter_next(I), {foo, bar})
+ },
+ {
+ "Only the one kv pair exists",
+ ?_assertEqual(khash:iter_next(I), end_of_table)
+ },
+ {
+ "Fold works",
+ ?_test(begin
+ Fold = fun(K, V, Acc) -> [{K, V} | Acc] end,
+ ?assertEqual(khash:fold(H, Fold, []), [{foo, bar}])
+ end)
+ }
+ ]
+ end}
+ }.
+
+multiread_iterators_test_() ->
+ {
+ "khash iterators multi-read test",
+ {setup, local,
+ fun() ->
+ {ok, H} = khash:new(),
+ KVs = kv_data(10),
+ lists:foreach(fun({K, V}) -> khash:put(H, K, V) end, KVs),
+ {ok, I} = khash:iter(H),
+ {ok, I, KVs}
+ end,
+ fun({ok, I, KVs}) ->
+ {
+ "Read the same exact key/val pairs",
+ ?_assertEqual(khash_iterate(I, []), KVs)
+ }
+ end}
+ }.
+
+expiration_iterators_test_() ->
+ {
+ "khash iterators exipiration functions",
+ {foreach, local,
+ fun() ->
+ Err = {error, expired_iterator},
+ {ok, H} = khash:new(),
+ khash:put(H, foo, bar),
+ {ok, I} = khash:iter(H),
+ {ok, H, I, Err}
+ end,
+ [
+ fun({ok, H, I, Err}) ->
+ khash:put(H, foo, bar2),
+ {
+ "put expires iterators",
+ ?_assertEqual(khash:iter_next(I), Err)
+ }
+ end,
+ fun({ok, H, I, Err}) ->
+ khash:del(H, foo),
+ {
+ "del expires iterators",
+ ?_assertEqual(khash:iter_next(I), Err)
+ }
+ end,
+ fun({ok, H, I, Err}) ->
+ khash:clear(H),
+ {
+ "clear expires iterators",
+ ?_assertEqual(khash:iter_next(I), Err)
+ }
+ end
+ ]}
+ }.
+
+no_expiration_iterators_test_() ->
+ {
+ "khash iterators no exipiration functions",
+ {foreach, local,
+ fun() ->
+ Err = {error, expired_iterator},
+ {ok, H} = khash:new(),
+ khash:put(H, foo, bar),
+ {ok, I} = khash:iter(H),
+ {ok, H, I, Err}
+ end,
+ [
+ fun({ok, H, I, Err}) ->
+ [{foo, bar}] = khash:to_list(H),
+ {
+ "to_list doesn't expire iterators",
+ ?_assertNot(khash:iter_next(I) == Err)
+ }
+ end,
+ fun({ok, H, I, Err}) ->
+ {value, bar} = khash:lookup(H, foo),
+ {
+ "lookup doesn't expire iterators",
+ ?_assertNot(khash:iter_next(I) == Err)
+ }
+ end,
+ fun({ok, H, I, Err}) ->
+ bar = khash:get(H, foo),
+ {
+ "get doesn't expire iterators",
+ ?_assertNot(khash:iter_next(I) == Err)
+ }
+ end,
+ fun({ok, H, I, Err}) ->
+ 1 = khash:size(H),
+ {
+ "size doesn't expire iterators",
+ ?_assertNot(khash:iter_next(I) == Err)
+ }
+ end,
+ fun({ok, H, I, Err}) ->
+ {ok, _OtherI} = khash:iter(H),
+ {foo, bar} = khash:iter_next(I),
+ end_of_table = khash:iter_next(I),
+ {
+ "iteration doesn't expire iterators",
+ ?_assertNot(khash:iter_next(I) == Err)
+ }
+ end
+ ]}
+ }.
+
+khash_fetch() ->
+ erlang:garbage_collect(),
+ List = kv_data(?NUM_KVS),
+ {ok, KHash} = khash:from_list(List),
+ khash_fetch(KHash, ?NUM_CYCLES * 10).
+
+khash_fetch(_, 0) ->
+ ok;
+khash_fetch(KHash, NumCycles) ->
+ ?assertMatch(1, khash:get(KHash, 1)),
+ khash_fetch(KHash, NumCycles - 1).
+
+khash_store() ->
+ List = kv_data(?NUM_KVS * 2),
+ {ok, KHash} = khash:from_list(List),
+ khash_store(KHash, ?NUM_CYCLES).
+
+khash_store(_, 0) ->
+ ok;
+khash_store(KHash, NumCycles) ->
+ khash:put(KHash, 1, 1),
+ khash_store(KHash, NumCycles - 1).
+
+khash_iterate(I, Acc) ->
+ case khash:iter_next(I) of
+ {K, V} ->
+ khash_iterate(I, [{K, V} | Acc]);
+ end_of_table ->
+ lists:sort(Acc)
+ end.
+
+kv_data(NumKVs) ->
+ [{I, I} || I <- lists:seq(1, NumKVs)].
+
+run_randomized({ok, _, N, _}, Valid) when N =< 0 ->
+ Valid;
+run_randomized({ok, Actions, N, S0}, Valid) ->
+ Action = weighted_choice(Actions),
+ S1 = Action(S0),
+ Assertion = Valid andalso validate_randomized_state(S1),
+ run_randomized({ok, Actions, N - 1, S1}, Assertion).
+
+validate_randomized_state({D, H}) ->
+ DKVs = lists:sort(dict:to_list(D)),
+ HKVs = lists:sort(khash:to_list(H)),
+ DKVs == HKVs.
+
+run_clear({_D0, H}) ->
+ ?assertMatch(ok, khash:clear(H)),
+ {dict:new(), H}.
+
+run_get2({D, H}) ->
+ K = random_key(D),
+ case dict:find(K, D) of
+ {ok, Value} ->
+ ?assertMatch({value, Value}, khash:lookup(H, K)),
+ ?assertMatch(Value, khash:get(H, K));
+ error ->
+ ?assertMatch(not_found, khash:lookup(H, K)),
+ ?assertMatch(undefined, khash:get(H, K))
+ end,
+ {D, H}.
+
+run_get3({D, H}) ->
+ K = random_key(D),
+ case dict:find(K, D) of
+ {ok, Value} ->
+ ?assertMatch({value, Value}, khash:lookup(H, K)),
+ ?assertMatch(Value, khash:get(H, K));
+ error ->
+ Val = random_val(),
+ ?assertMatch(Val, khash:get(H, K, Val))
+ end,
+ {D, H}.
+
+run_put({D0, H}) ->
+ K = random_key(D0),
+ V = random_val(),
+ D1 = dict:store(K, V, D0),
+ ?assertMatch(ok, khash:put(H, K, V)),
+ {D1, H}.
+
+run_del({D0, H}) ->
+ K = random_key(D0),
+ D1 =
+ case dict:is_key(K, D0) of
+ true ->
+ ?assertMatch(ok, khash:del(H, K)),
+ dict:erase(K, D0);
+ false ->
+ ?assertMatch(not_found, khash:del(H, K)),
+ D0
+ end,
+ {D1, H}.
+
+run_size({D, H}) ->
+ ?assertEqual(dict:size(D), khash:size(H)),
+ {D, H}.
+
+run_keys({D, H}) ->
+ DKeys = dict:fetch_keys(D),
+ {ok, HKeys0} = khash:keys(H),
+ HKeys = lists:sort(HKeys0),
+ ?assertEqual(lists:sort(DKeys), lists:sort(HKeys)),
+ {D, H}.
+
+run_to_list({D, H}) ->
+ DKVs = dict:to_list(D),
+ HKVs = khash:to_list(H),
+ ?assertEqual(lists:sort(DKVs), lists:sort(HKVs)),
+ {D, H}.
+
+weighted_choice(Items0) ->
+ Items = lists:sort(Items0),
+ Sum = lists:sum([W || {W, _} <- Items]),
+ Choice = rand:uniform() * Sum,
+ weighted_choice(Items, 0.0, Choice).
+
+weighted_choice([], _, _) ->
+ throw(bad_choice);
+weighted_choice([{W, _} | Rest], S, C) when W + S < C ->
+ weighted_choice(Rest, W + S, C);
+weighted_choice([{_, I} | _], _, _) ->
+ I.
+
+random_key(D) ->
+ Keys = lists:usort(dict:fetch_keys(D) ++ [foo]),
+ lists:nth(rand:uniform(length(Keys)), Keys).
+
+random_val() ->
+ gen_term:any().