diff options
author | Nick Vatamaniuc <vatamane@gmail.com> | 2022-10-28 10:10:01 -0400 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2022-10-28 10:57:40 -0400 |
commit | 8c6ac52cc5dd591fd4b47f1f4619c3ce225dbc1f (patch) | |
tree | 836acdadc58f6c3d23c35353ab92258f5f2dcf93 | |
parent | c0476adc21477f7cf076b8527c1327b158e2b190 (diff) | |
download | couchdb-8c6ac52cc5dd591fd4b47f1f4619c3ce225dbc1f.tar.gz |
Integrate b64url, ets_lru and khash into the main repo
As discussed on the mailing list https://lists.apache.org/thread/opvsmz1pwlnv96wozy5kp7ss896l9lfp
30 files changed, 4883 insertions, 6 deletions
diff --git a/.gitignore b/.gitignore index acf98e5c0..816ece6d4 100644 --- a/.gitignore +++ b/.gitignore @@ -39,7 +39,6 @@ rel/tmpdata share/server/main-coffee.js share/server/main.js share/www -src/b64url/ src/bear/ src/certifi/ src/couch/priv/couch_js/**/config.h @@ -49,7 +48,6 @@ src/couch/priv/couch_ejson_compare/couch_ejson_compare.d src/couch/priv/couch_js/**/*.d src/couch/priv/icu_driver/couch_icu_driver.d src/mango/src/mango_cursor_text.nocompile -src/ets_lru/ src/excoveralls/ src/fauxton/ src/folsom/ @@ -59,7 +57,6 @@ src/hyper/ src/ibrowse/ src/idna/ src/jiffy/ -src/khash/ src/meck/ src/metrics/ src/mimerl/ diff --git a/rebar.config.script b/rebar.config.script index 38a57c966..3952ad857 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -110,6 +110,9 @@ SubDirs = [ "src/couch_epi", "src/config", "src/couch_log", + "src/khash", + "src/b64url", + "src/ets_lru", "src/chttpd", "src/couch", "src/couch_event", @@ -142,9 +145,6 @@ SubDirs = [ DepDescs = [ %% Independent Apps -{b64url, "b64url", {tag, "1.0.3"}}, -{ets_lru, "ets-lru", {tag, "1.1.0"}}, -{khash, "khash", {tag, "1.1.0"}}, {snappy, "snappy", {tag, "CouchDB-1.0.8"}}, %% %% Non-Erlang deps diff --git a/src/b64url/.gitignore b/src/b64url/.gitignore new file mode 100644 index 000000000..a3fd77fce --- /dev/null +++ b/src/b64url/.gitignore @@ -0,0 +1,10 @@ +.eunit/ +c_src/*.o +ebin/ +priv/ +.rebar/ +vc110.pdb +compile_commands.json + +_build/ +c_src/*.d
\ No newline at end of file diff --git a/src/b64url/LICENSE b/src/b64url/LICENSE new file mode 100644 index 000000000..94ad231b8 --- /dev/null +++ b/src/b64url/LICENSE @@ -0,0 +1,203 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/src/b64url/Makefile b/src/b64url/Makefile new file mode 100644 index 000000000..754165e5c --- /dev/null +++ b/src/b64url/Makefile @@ -0,0 +1,46 @@ +REBAR?=rebar + + +.PHONY: all +# target: all - Makes everything +all: build + + +.PHONY: build +# target: build - Builds the project +build: + $(REBAR) compile + + +.PHONY: check +# target: check - Checks if project builds and passes all the tests +check: build eunit + + +.PHONY: clean +# target: clean - Removes build artifacts +clean: + $(REBAR) clean + rm -f test/*.beam + + +.PHONY: distclean +# target: distclean - Removes all unversioned files +distclean: clean + git clean -fxd + + +.PHONY: eunit +# target: eunit - Runs eunit test suite +eunit: + $(REBAR) eunit + + +.PHONY: help +# target: help - Prints this help +help: + @egrep "^# target:" Makefile | sed -e 's/^# target: //g' | sort + + +%.beam: %.erl + erlc -o test/ $< diff --git a/src/b64url/README.md b/src/b64url/README.md new file mode 100644 index 000000000..c3a3497f2 --- /dev/null +++ b/src/b64url/README.md @@ -0,0 +1,46 @@ +# Base64 encoder with URL-safe scheme + +[![CI](https://github.com/apache/couchdb-b64url/actions/workflows/ci.yml/badge.svg)](https://github.com/apache/couchdb-b64url/actions/workflows/ci.yml) + +This is a simple NIF that is responsible for quickly encoding and +decoding Base64 URL values: + +```erlang +1> Thing = b64url:encode("Hello, CouchDB!"). +<<"SGVsbG8sIENvdWNoREIh">> +2> b64url:decode(Thing). +<<"Hello, CouchDB!">> +``` + +## Performance + +This implementation is significantly faster than the Erlang version it replaced +in CouchDB. The `benchmark.escript` file contains the original implementation +(using regular expressions to replace unsafe characters in the output of the +`base64` module) and can be used to compare the two for strings of various +lengths. For example: + +``` +ERL_LIBS=_build/default/lib/b64url/ ./test/benchmark.escript 4 10 100 30 +erl : 75491270 bytes / 30 seconds = 2516375.67 bps +nif : 672299342 bytes / 30 seconds = 22409978.07 bps +``` + +This test invocation spawns four workers that generate random strings between 10 +and 100 bytes in length and then perform an encode/decode on them in a tight +loop for 30 seconds, and then reports the aggregate encoded data volume. Note +that the generator overhead (`crypto:strong_rand_bytes/1`) is included in these +results, so the relative difference in encoder throughput is rather larger than +what's reported here. + +## Timeslice Consumption + +NIF implementations must take care to avoid doing [lengthy +work](https://www.erlang.org/doc/man/erl_nif.html#lengthy_work) in a single +invocation. This library will yield back to the Erlang VM as needed when +operating on a large string, maintaining a partial result until it can resume +operation. The current implementation uses a conservative heuristic that +estimates 64 bytes of encoding / decoding to consume 1% of a timeslice, so input +strings shorter than ~6k should be encoded / decoded within a single invocation, +and the library should not adversely affect the responsiveness of the VM in any +way. diff --git a/src/b64url/c_src/b64url.c b/src/b64url/c_src/b64url.c new file mode 100644 index 000000000..d9e2e0448 --- /dev/null +++ b/src/b64url/c_src/b64url.c @@ -0,0 +1,665 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include <assert.h> +#include <string.h> + +#include "erl_nif.h" + +#ifdef _WIN32 +#define INLINE __inline +#else +#define INLINE inline +#endif + + +typedef ERL_NIF_TERM ENTERM; + + +typedef struct +{ + ENTERM atom_ok; + ENTERM atom_error; + ENTERM atom_partial; + + ENTERM atom_nomem; + ENTERM atom_bad_block; + + ErlNifResourceType* res_st; +} b64url_priv; + + +typedef struct +{ + ErlNifPid pid; + ErlNifBinary tgt; + size_t len; + size_t si; + size_t ti; + char res_released; + char bin_released; +} b64url_st; + + +typedef enum +{ + ST_OK, + ST_ERROR, + ST_PARTIAL +} b64url_status; + + +const unsigned char B64URL_B2A[256] = { + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, // 0 - 15 + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99,100,101,102, // 16 - 31 + 103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118, // 32 - 47 + 119,120,121,122, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 45, 95, // 48 - 63 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 64 - 79 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 80 - 95 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 96 - 111 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 112 - 127 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 128 - 143 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 144 - 159 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 160 - 175 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 176 - 191 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 192 - 207 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 208 - 223 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 224 - 239 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // 240 - 255 +}; + +const unsigned char B64URL_A2B[256] = { + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 0 - 15 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 16 - 31 + 255,255,255,255,255,255,255,255,255,255,255,255,255, 62,255,255, // 32 - 47 + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,255,255,255, // 48 - 63 + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64 - 79 + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255, 63, // 80 - 95 + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96 - 111 + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255, // 112 - 127 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 128 - 143 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 144 - 159 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 160 - 175 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 176 - 191 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 192 - 207 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 208 - 223 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 224 - 239 + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // 240 - 255 +}; + + +#define BYTES_PER_PERCENT 64 + +static INLINE int +do_consume_timeslice(ErlNifEnv* env) { +#if(ERL_NIF_MAJOR_VERSION >= 2 && ERL_NIF_MINOR_VERSION >= 4) + return enif_consume_timeslice(env, 1); +#else + return 0; +#endif +} + + +static INLINE ENTERM +make_atom(ErlNifEnv* env, const char* name) +{ + ENTERM ret; + if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) { + return ret; + } + return enif_make_atom(env, name); +} + + +static INLINE ENTERM +make_ok(ErlNifEnv* env, b64url_priv* priv, ENTERM value) +{ + return enif_make_tuple2(env, priv->atom_ok, value); +} + + +static INLINE ENTERM +make_error(ErlNifEnv* env, b64url_priv* priv, ENTERM value) +{ + return enif_make_tuple2(env, priv->atom_error, value); +} + + +static INLINE ENTERM +make_bad_block(ErlNifEnv* env, b64url_priv* priv, size_t pos) +{ + ENTERM pterm = enif_make_uint64(env, pos); + return enif_make_tuple2(env, priv->atom_bad_block, pterm); +} + + +static INLINE ENTERM +make_partial(ErlNifEnv* env, b64url_priv* priv, ENTERM value) +{ + return enif_make_tuple2(env, priv->atom_partial, value); +} + + +static INLINE int +check_pid(ErlNifEnv* env, b64url_st* st) +{ + ErlNifPid self_pid; + ENTERM self; + ENTERM orig; + + enif_self(env, &self_pid); + self = enif_make_pid(env, &self_pid); + orig = enif_make_pid(env, &(st->pid)); + + if(enif_compare(self, orig) == 0) { + return 1; + } + + return 0; +} + + +static b64url_st* +b64url_st_alloc(ErlNifEnv* env, b64url_priv* priv, ErlNifBinary* src, size_t tlen) +{ + b64url_st* st = enif_alloc_resource(priv->res_st, sizeof(b64url_st)); + if(st == NULL) { + goto error; + } + + memset(st, '\0', sizeof(b64url_st)); + enif_self(env, &(st->pid)); + st->len = src->size; + st->si = 0; + st->ti = 0; + st->res_released = 0; + st->bin_released = 0; + + if(!enif_alloc_binary(tlen, &(st->tgt))) { + goto error; + } + + return st; + +error: + if(st != NULL) { + st->res_released = 1; + enif_release_resource(st); + } + return NULL; +} + + +static void +b64url_st_free(ErlNifEnv* env, void* obj) +{ + b64url_st* st = (b64url_st*) obj; + + if(!st->bin_released) { + enif_release_binary(&(st->tgt)); + } +} + +static ENTERM +b64url_st_enc_ret(ErlNifEnv* env, b64url_st* st, int status) +{ + b64url_priv* priv = (b64url_priv*) enif_priv_data(env); + ENTERM ret; + + if(status == ST_OK) { + ret = make_ok(env, priv, enif_make_binary(env, &(st->tgt))); + st->bin_released = 1; + } else if(status == ST_PARTIAL) { + ret = make_partial(env, priv, enif_make_resource(env, st)); + } else { + assert(0 == 1 && "invalid status encoder status"); + ret = enif_make_badarg(env); + } + + if(!st->res_released) { + st->res_released = 1; + enif_release_resource(st); + } + + return ret; +} + +static ENTERM +b64url_st_dec_ret(ErlNifEnv* env, b64url_st* st, int status, ENTERM ret) +{ + b64url_priv* priv = (b64url_priv*) enif_priv_data(env); + + if(status == ST_OK) { + ret = make_ok(env, priv, enif_make_binary(env, &(st->tgt))); + st->bin_released = 1; + } else if(status == ST_ERROR) { + ret = make_error(env, priv, ret); + } else if(status == ST_PARTIAL) { + ret = make_partial(env, priv, enif_make_resource(env, st)); + } else { + assert(0 == 1 && "invalid status decoder status"); + ret = enif_make_badarg(env); + } + + if(!st->res_released) { + st->res_released = 1; + enif_release_resource(st); + } + + return ret; +} + +static int +load(ErlNifEnv* env, void** priv, ENTERM info) +{ + int flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER; + ErlNifResourceType* res; + + b64url_priv* new_priv = (b64url_priv*) enif_alloc(sizeof(b64url_priv)); + if(new_priv == NULL) { + return 1; + } + + res = enif_open_resource_type( + env, NULL, "b64url_st", b64url_st_free, flags, NULL); + if(res == NULL) { + return 1; + } + new_priv->res_st = res; + + new_priv->atom_ok = make_atom(env, "ok"); + new_priv->atom_error = make_atom(env, "error"); + new_priv->atom_partial = make_atom(env, "partial"); + + new_priv->atom_nomem = make_atom(env, "enomem"); + new_priv->atom_bad_block = make_atom(env, "bad_block"); + + *priv = (void*) new_priv; + + return 0; +} + + +static int +reload(ErlNifEnv* env, void** priv, ENTERM info) +{ + return 0; +} + + +static int +upgrade(ErlNifEnv* env, void** priv, void** old_priv, ENTERM info) +{ + return 0; +} + + +static void +unload(ErlNifEnv* env, void* priv) +{ + return; +} + + +static INLINE b64url_status +b64url_encode(ErlNifEnv* env, ErlNifBinary* src, b64url_st* st) +{ + size_t chunk_start = st->si; + size_t rem; + unsigned char c1; + unsigned char c2; + unsigned char c3; + + assert(st->si % 3 == 0 && "invalid source index"); + assert(st->ti % 4 == 0 && "invalid target index"); + + while(st->si + 3 <= src->size) { + c1 = src->data[st->si++]; + c2 = src->data[st->si++]; + c3 = src->data[st->si++]; + + st->tgt.data[st->ti++] = B64URL_B2A[(c1 >> 2) & 0x3F]; + st->tgt.data[st->ti++] = B64URL_B2A[((c1 << 4) | (c2 >> 4)) & 0x3F]; + st->tgt.data[st->ti++] = B64URL_B2A[((c2 << 2) | (c3 >> 6)) & 0x3F]; + st->tgt.data[st->ti++] = B64URL_B2A[c3 & 0x3F]; + + if(st->si - chunk_start > BYTES_PER_PERCENT) { + if(do_consume_timeslice(env)) { + return ST_PARTIAL; + } else { + chunk_start = st->si; + } + } + } + + rem = src->size % 3; + if(rem == 1) { + c1 = src->data[st->si]; + st->tgt.data[st->ti++] = B64URL_B2A[(c1 >> 2) & 0x3F]; + st->tgt.data[st->ti++] = B64URL_B2A[(c1 << 4) & 0x3F]; + } else if(rem == 2) { + c1 = src->data[st->si]; + c2 = src->data[st->si+1]; + st->tgt.data[st->ti++] = B64URL_B2A[(c1 >> 2) & 0x3F]; + st->tgt.data[st->ti++] = B64URL_B2A[((c1 << 4) | (c2 >> 4)) & 0x3F]; + st->tgt.data[st->ti++] = B64URL_B2A[(c2 << 2) & 0x3F]; + } else if(rem != 0) { + assert(0 == 1 && "Inavlid length calculation"); + } + + return ST_OK; +} + + +static ENTERM +b64url_encode_init(ErlNifEnv* env, int argc, const ENTERM argv[]) +{ + ErlNifBinary src; + b64url_priv* priv = (b64url_priv*) enif_priv_data(env); + b64url_st* st = NULL; + size_t tlen; + size_t rem; + int status; + ENTERM ret; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_inspect_iolist_as_binary(env, argv[0], &src)) { + ret = enif_make_badarg(env); + goto error; + } + + // The target length is defined as 4 * ceil(src_len/3) but we + // don't use '=' padding for URLs so we only add exactly the + // extra bytes we need. + tlen = (src.size / 3) * 4; + rem = src.size % 3; + if(rem == 1) { + tlen += 2; + } else if(rem == 2) { + tlen += 3; + } + + st = b64url_st_alloc(env, priv, &src, tlen); + if(st == NULL) { + ret = make_error(env, priv, priv->atom_nomem); + goto error; + } + + status = b64url_encode(env, &src, st); + + return b64url_st_enc_ret(env, st, status); + +error: + if(st != NULL) { + st->res_released = 1; + enif_release_resource(st); + } + + return ret; +} + + +static ENTERM +b64url_encode_cont(ErlNifEnv* env, int argc, const ENTERM argv[]) +{ + ErlNifBinary src; + b64url_priv* priv = (b64url_priv*) enif_priv_data(env); + b64url_st* st = NULL; + void* res = NULL; + int status; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if(!enif_inspect_iolist_as_binary(env, argv[0], &src)) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[1], priv->res_st, &res)) { + return enif_make_badarg(env); + } + + st = (b64url_st*) res; + + if(!check_pid(env, st)) { + return enif_make_badarg(env); + } + + if(src.size != st->len) { + return enif_make_badarg(env); + } + + status = b64url_encode(env, &src, st); + + return b64url_st_enc_ret(env, st, status); +} + + +static INLINE b64url_status +b64url_decode(ErlNifEnv* env, ErlNifBinary* src, b64url_st* st, ENTERM* ret) +{ + b64url_priv* priv = (b64url_priv*) enif_priv_data(env); + size_t chunk_start = st->si; + size_t rem; + unsigned char c1; + unsigned char c2; + unsigned char c3; + unsigned char c4; + + assert(st->si % 4 == 0 && "invalid source index"); + assert(st->ti % 3 == 0 && "invalid target index"); + + while(st->si + 4 <= src->size) { + c1 = B64URL_A2B[src->data[st->si++]]; + c2 = B64URL_A2B[src->data[st->si++]]; + c3 = B64URL_A2B[src->data[st->si++]]; + c4 = B64URL_A2B[src->data[st->si++]]; + + if(c1 == 255 || c2 == 255 || c3 == 255 || c4 == 255) { + *ret = make_bad_block(env, priv, st->si-4); + return ST_ERROR; + } + + st->tgt.data[st->ti++] = (c1 << 2) | (c2 >> 4); + st->tgt.data[st->ti++] = (c2 << 4) | (c3 >> 2); + st->tgt.data[st->ti++] = (c3 << 6) | c4; + + if(st->si - chunk_start > BYTES_PER_PERCENT) { + if(do_consume_timeslice(env)) { + return ST_PARTIAL; + } else { + chunk_start = st->si; + } + } + } + + rem = src->size % 4; + if(rem == 2) { + c1 = B64URL_A2B[src->data[st->si]]; + c2 = B64URL_A2B[src->data[st->si+1]]; + + if(c1 == 255 || c2 == 255) { + *ret = make_bad_block(env, priv, st->si); + return ST_ERROR; + } + + st->tgt.data[st->ti++] = (c1 << 2) | (c2 >> 4); + } else if(rem == 3) { + c1 = B64URL_A2B[src->data[st->si]]; + c2 = B64URL_A2B[src->data[st->si+1]]; + c3 = B64URL_A2B[src->data[st->si+2]]; + + if(c1 == 255 || c2 == 255 || c3 == 255) { + *ret = make_bad_block(env, priv, st->si); + return ST_ERROR; + } + + st->tgt.data[st->ti++] = (c1 << 2) | (c2 >> 4); + st->tgt.data[st->ti++] = (c2 << 4) | (c3 >> 2); + } else if(rem != 0) { + assert(0 == 1 && "invalid source length"); + } + + return ST_OK; +} + + +static ENTERM +b64url_decode_init(ErlNifEnv* env, int argc, const ENTERM argv[]) +{ + ErlNifBinary src; + b64url_priv* priv = (b64url_priv*) enif_priv_data(env); + b64url_st* st = NULL; + size_t tlen; + int status; + ENTERM ret = priv->atom_error; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_inspect_iolist_as_binary(env, argv[0], &src)) { + return enif_make_badarg(env); + } + + // We don't do use '=' padding for URLs so our target length + // will be the number of blocks of 4 bytes times 3, plus 1 or 2 + // depending on the number of bytes in the last block. + tlen = (src.size / 4) * 3; + if(src.size % 4 == 1) { + ret = enif_make_badarg(env); + goto error; + } else if(src.size % 4 == 2) { + tlen += 1; + } else if(src.size % 4 == 3) { + tlen += 2; + } + + st = b64url_st_alloc(env, priv, &src, tlen); + if(st == NULL) { + ret = make_error(env, priv, priv->atom_nomem); + goto error; + } + + status = b64url_decode(env, &src, st, &ret); + + return b64url_st_dec_ret(env, st, status, ret); + +error: + if(st != NULL) { + st->res_released = 1; + enif_release_resource(st); + } + + return ret; +} + + +static ENTERM +b64url_decode_cont(ErlNifEnv* env, int argc, const ENTERM argv[]) +{ + ErlNifBinary src; + b64url_priv* priv = (b64url_priv*) enif_priv_data(env); + b64url_st* st = NULL; + void* res = NULL; + ENTERM ret = priv->atom_error; + int status; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if(!enif_inspect_iolist_as_binary(env, argv[0], &src)) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[1], priv->res_st, &res)) { + return enif_make_badarg(env); + } + + st = (b64url_st*) res; + + if(!check_pid(env, st)) { + return enif_make_badarg(env); + } + + if(src.size != st->len) { + return enif_make_badarg(env); + } + + status = b64url_decode(env, &src, st, &ret); + + return b64url_st_dec_ret(env, st, status, ret); +} + + +static unsigned char CHECK_A2B[64] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + + +static ENTERM +b64url_check_tables(ErlNifEnv* env, int argc, const ENTERM argv[]) +{ + b64url_priv* priv = (b64url_priv*) enif_priv_data(env); + ENTERM pos; + int i; + + for(i = 0; i < 64; i++) { + if(B64URL_B2A[i] != CHECK_A2B[i]) { + pos = enif_make_int(env, i); + return enif_make_tuple2(env, priv->atom_error, pos); + } + } + + for(i = 64; i < 256; i++) { + if(B64URL_B2A[i] != 255) { + pos = enif_make_int(env, i); + return enif_make_tuple2(env, priv->atom_error, pos); + } + } + + for(i = 0; i < 64; i++) { + if(B64URL_A2B[CHECK_A2B[i]] != i) { + pos = enif_make_int(env, i); + return enif_make_tuple2(env, priv->atom_error, pos); + } + } + + for(i = 0; i < 256; i++) { + if(B64URL_A2B[i] == 255) { + continue; + } + if(CHECK_A2B[B64URL_A2B[i]] != i) { + pos = enif_make_int(env, i); + return enif_make_tuple2(env, priv->atom_error, pos); + } + } + + return priv->atom_ok; +} + +static ErlNifFunc funcs[] = { + {"encode_init", 1, b64url_encode_init}, + {"encode_cont", 2, b64url_encode_cont}, + {"decode_init", 1, b64url_decode_init}, + {"decode_cont", 2, b64url_decode_cont}, + {"check_tables", 0, b64url_check_tables} +}; + + +ERL_NIF_INIT(b64url, funcs, &load, &reload, &upgrade, &unload); + + diff --git a/src/b64url/rebar.config b/src/b64url/rebar.config new file mode 100644 index 000000000..d9512aed3 --- /dev/null +++ b/src/b64url/rebar.config @@ -0,0 +1,32 @@ +{plugins, [ + pc +]}. + +{project_plugins, [ + erlfmt +]}. + +{provider_hooks, [ + {pre, [ + {compile, {pc, compile}}, + {clean, {pc, clean}} + ]} +]}. + +{port_specs, [ + {"priv/b64url.so", ["c_src/*.c"]} +]}. + +{port_env, [ + % Development compilation + % {".*", "CFLAGS", "$CFLAGS -g -Wall -Werror -fPIC"} + + % Production compilation + {"(linux|solaris|darwin|freebsd)", "CFLAGS", "$CFLAGS -Wall -Werror -DNDEBUG -O3"}, + {"win32", "CFLAGS", "$CFLAGS /O2 /DNDEBUG /Wall"} +]}. + +{eunit_opts, [ + debug_info, + verbose +]}. diff --git a/src/b64url/src/b64url.app.src b/src/b64url/src/b64url.app.src new file mode 100644 index 000000000..aaf8e9fc4 --- /dev/null +++ b/src/b64url/src/b64url.app.src @@ -0,0 +1,18 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, b64url, [ + {description, "A small NIF for encoding Base64 URL encoding/decoding."}, + {vsn, git}, + {registered, []}, + {applications, [kernel, stdlib]} +]}. diff --git a/src/b64url/src/b64url.erl b/src/b64url/src/b64url.erl new file mode 100644 index 000000000..6e6ddaccd --- /dev/null +++ b/src/b64url/src/b64url.erl @@ -0,0 +1,88 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(b64url). +-on_load(init/0). + +-export([ + encode/1, + decode/1 +]). + +% Internal sanity checks +-export([ + check_tables/0 +]). + +-define(NOT_LOADED, not_loaded(?LINE)). + +-spec encode(iodata()) -> binary(). +encode(IoData) -> + case encode_init(IoData) of + {ok, Bin} -> + Bin; + {partial, St} -> + encode_loop(IoData, St) + end. + +-spec decode(iodata()) -> binary() | {error, any()}. +decode(IoData) -> + case decode_init(IoData) of + {ok, Bin} -> + Bin; + {error, _} = Ret -> + Ret; + {partial, St} -> + decode_loop(IoData, St) + end. + +-spec check_tables() -> ok | {error, non_neg_integer()}. +check_tables() -> + ?NOT_LOADED. + +init() -> + PrivDir = + case code:priv_dir(?MODULE) of + {error, _} -> + EbinDir = filename:dirname(code:which(?MODULE)), + AppPath = filename:dirname(EbinDir), + filename:join(AppPath, "priv"); + Path -> + Path + end, + erlang:load_nif(filename:join(PrivDir, "b64url"), 0). + +encode_loop(IoData, St) -> + case encode_cont(IoData, St) of + {ok, Bin} -> + Bin; + {partial, St} -> + encode_loop(IoData, St) + end. + +decode_loop(IoData, St) -> + case decode_cont(IoData, St) of + {ok, Bin} -> + Bin; + {error, _} = Ret -> + Ret; + {partial, St} -> + decode_loop(IoData, St) + end. + +encode_init(_) -> ?NOT_LOADED. +encode_cont(_, _) -> ?NOT_LOADED. +decode_init(_) -> ?NOT_LOADED. +decode_cont(_, _) -> ?NOT_LOADED. + +not_loaded(Line) -> + erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}). diff --git a/src/b64url/test/b64url_tests.erl b/src/b64url/test/b64url_tests.erl new file mode 100644 index 000000000..5e8e57502 --- /dev/null +++ b/src/b64url/test/b64url_tests.erl @@ -0,0 +1,144 @@ +-module(b64url_tests). + +-include_lib("eunit/include/eunit.hrl"). + +-define(MAX_SIZE, 6401). +-define(NUM_TESTS, 500). + +table_test() -> + ?assertEqual(ok, b64url:check_tables()). + +encode_binary_test() -> + lists:foreach( + fun(_) -> + Bin = gen_binary(), + A = couch_encode_base64url(Bin), + B = b64url:encode(Bin), + ?assertEqual(A, B) + end, + lists:seq(1, ?NUM_TESTS) + ). + +encode_iolist_test() -> + lists:foreach( + fun(_) -> + IoList = shallow_iolist(), + A = couch_encode_base64url(iolist_to_binary(IoList)), + B = b64url:encode(IoList), + ?assertEqual(A, B) + end, + lists:seq(1, ?NUM_TESTS) + ). + +decode_binary_test() -> + lists:foreach( + fun(_) -> + Bin = gen_binary(), + B64UrlBin = couch_encode_base64url(Bin), + Dec = b64url:decode(B64UrlBin), + ?assertEqual(Bin, Dec) + end, + lists:seq(1, ?NUM_TESTS) + ). + +decode_iolist_test() -> + lists:foreach( + fun(_) -> + IoList = shallow_b64_iolist(), + A = couch_decode_base64url(iolist_to_binary(IoList)), + B = b64url:decode(IoList), + ?assertEqual(A, B) + end, + lists:seq(1, ?NUM_TESTS) + ). + +decode_binary_error_test() -> + lists:foreach( + fun(_) -> + {ErrBin, BlockPos} = bad_binary(), + Dec = b64url:decode(ErrBin), + ?assertEqual({error, {bad_block, BlockPos}}, Dec) + end, + lists:seq(1, ?NUM_TESTS) + ). + +decode_bad_length_test() -> + lists:foreach( + fun(_) -> + Bin = bad_len_binary(), + ?assertError(badarg, b64url:decode(Bin)) + end, + lists:seq(1, ?NUM_TESTS) + ). + +gen_binary() -> + % -1 to allow for 0 length + Length = rand:uniform(?MAX_SIZE) - 1, + crypto:strong_rand_bytes(Length). + +shallow_iolist() -> + to_iolist(gen_binary()). + +shallow_b64_iolist() -> + to_iolist(couch_encode_base64url(gen_binary())). + +bad_binary() -> + insert_error(gen_binary()). + +bad_len_binary() -> + make_bad_len(gen_binary()). + +to_iolist(<<>>) -> + case rand:uniform(2) of + 1 -> <<>>; + 2 -> [<<>>] + end; +to_iolist(B) when is_binary(B), size(B) > 0 -> + S = rand:uniform(size(B)), + <<First:S/binary, Second/binary>> = B, + case rand:uniform(3) of + 1 -> + [to_iolist(First), Second]; + 2 -> + [First, to_iolist(Second)]; + 3 -> + [First, Second] + end. + +insert_error(B) when is_binary(B), size(B) < 2 -> + case rand:uniform(2) of + 1 -> {<<122, 255>>, 0}; + 2 -> {<<122, 122, 255>>, 0} + end; +insert_error(B) when is_binary(B) -> + B64 = couch_encode_base64url(B), + S = rand:uniform(size(B64) - 1), + <<First:S/binary, _:1/binary, Second/binary>> = B64, + {<<First:S/binary, 255, Second/binary>>, 4 * (S div 4)}. + +make_bad_len(Bin) when size(Bin) rem 4 == 1 -> + Bin; +make_bad_len(Bin) when size(Bin) rem 4 == 2 -> + <<"AAA", Bin/binary>>; +make_bad_len(Bin) when size(Bin) rem 4 == 3 -> + <<"AA", Bin/binary>>; +make_bad_len(Bin) when size(Bin) rem 4 == 0 -> + <<"A", Bin/binary>>. + +% These functions are copy/pasted from couch_util to avoid +% the direct dependency. The goal of this project is to replace +% these in couch_util anyway so when that happens they'll only +% exist here for these tests. + +couch_encode_base64url(Url) -> + Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")), + Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])), + iolist_to_binary(re:replace(Url2, "\\+", "-", [global])). + +couch_decode_base64url(Url64) -> + Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]), + Url2 = iolist_to_binary( + re:replace(iolist_to_binary(Url1), "_", "/", [global]) + ), + Padding = list_to_binary(lists:duplicate((4 - size(Url2) rem 4) rem 4, $=)), + base64:decode(<<Url2/binary, Padding/binary>>). diff --git a/src/b64url/test/benchmark.escript b/src/b64url/test/benchmark.escript new file mode 100755 index 000000000..00a6f0dda --- /dev/null +++ b/src/b64url/test/benchmark.escript @@ -0,0 +1,165 @@ +#!/usr/bin/env escript + +-mode(compile). + + +-export([ + encode/1, + decode/1, + run_worker/1 +]). + + +-record(st, { + parent, + module, + workers, + minsize, + maxsize, + duration, + total_bytes +}). + + +main([Workers0, MinSize0, MaxSize0, Duration0]) -> + code:add_path("./ebin"), + code:add_path("../ebin"), + Workers = to_int(Workers0), + MinSize = to_int(MinSize0), + MaxSize = to_int(MaxSize0), + Duration = to_int(Duration0), + if Workers > 0 -> ok; true -> + die("Worker count must be positive~n") + end, + if MinSize > 0 -> ok; true -> + die("Minimum size must be positive.~n") + end, + if MaxSize > 0 -> ok; true -> + die("Maximum size must be positive.~n") + end, + if MinSize < MaxSize -> ok; true -> + die("Minimum size must be less than maximum size.~n") + end, + if Duration > 0 -> ok; true -> + die("Duration must be positive.~n") + end, + St = #st{ + parent = self(), + workers = Workers, + minsize = MinSize, + maxsize = MaxSize, + duration = Duration + }, + lists:foreach(fun(M) -> + run_test(St#st{module=M}) + end, randomize([b64url, ?MODULE])); + +main(_) -> + Args = [escript:script_name()], + die("usage: ~s num_workers min_size max_size time_per_test~n", Args). + + +run_test(St) -> + Workers = spawn_workers(St#st.workers, St), + start_workers(Workers), + Results = wait_for_workers(Workers), + report(St#st.module, St#st.duration, Results). + + +start_workers(Pids) -> + lists:foreach(fun(P) -> + P ! start + end, Pids). + + +wait_for_workers(Pids) -> + lists:map(fun(P) -> + receive + {P, TotalBytes} -> TotalBytes + end + end, Pids). + + +report(Module, Duration, TotalByteList) -> + ModDesc = case Module of + ?MODULE -> "erl"; + b64url -> "nif" + end, + TotalBytes = lists:sum(TotalByteList), + io:format("~s : ~14b bytes / ~3b seconds = ~14.2f bps~n", [ + ModDesc, TotalBytes, Duration, TotalBytes / Duration]). + + +spawn_workers(NumWorkers, St) -> + lists:map(fun(_) -> + spawn_link(?MODULE, run_worker, [St]) + end, lists:seq(1, NumWorkers)). + + +run_worker(St) -> + receive + start -> ok + end, + run_worker(St#st{total_bytes=0}, os:timestamp()). + + +run_worker(St, Started) -> + HasRun = timer:now_diff(os:timestamp(), Started), + case HasRun div 1000000 > St#st.duration of + true -> + St#st.parent ! {self(), St#st.total_bytes}; + false -> + NewSt = do_round_trip(St), + run_worker(NewSt, Started) + end. + + +do_round_trip(St) -> + Size = St#st.minsize + rand:uniform(St#st.maxsize - St#st.minsize), + Data = crypto:strong_rand_bytes(Size), + Encoded = (St#st.module):encode(Data), + Data = (St#st.module):decode(Encoded), + St#st{total_bytes=St#st.total_bytes+Size}. + + +encode(Url) -> + Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")), + Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])), + iolist_to_binary(re:replace(Url2, "\\+", "-", [global])). + + +decode(Url64) -> + Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]), + Url2 = iolist_to_binary( + re:replace(iolist_to_binary(Url1), "_", "/", [global]) + ), + Padding = list_to_binary(lists:duplicate((4 - size(Url2) rem 4) rem 4, $=)), + base64:decode(<<Url2/binary, Padding/binary>>). + +randomize(List) -> + List0 = [{rand:uniform(), L} || L <- List], + List1 = lists:sort(List0), + [L || {_, L} <- List1]. + + +to_int(Val) when is_integer(Val) -> + Val; +to_int(Val) when is_binary(Val) -> + to_int(binary_to_list(Val)); +to_int(Val) when is_list(Val) -> + try + list_to_integer(Val) + catch _:_ -> + die("Invalid integer: ~w~n", [Val]) + end; +to_int(Val) -> + die("Invalid integer: ~w~n", [Val]). + + +die(Message) -> + die(Message, []). + +die(Format, Args) -> + io:format(Format, Args), + init:stop(). + diff --git a/src/ets_lru/.gitignore b/src/ets_lru/.gitignore new file mode 100644 index 000000000..3164eee31 --- /dev/null +++ b/src/ets_lru/.gitignore @@ -0,0 +1,3 @@ +.rebar +ebin/ +*~ diff --git a/src/ets_lru/LICENSE b/src/ets_lru/LICENSE new file mode 100644 index 000000000..f6cd2bc80 --- /dev/null +++ b/src/ets_lru/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/ets_lru/Makefile b/src/ets_lru/Makefile new file mode 100644 index 000000000..058a7ecfc --- /dev/null +++ b/src/ets_lru/Makefile @@ -0,0 +1,41 @@ +REBAR?=rebar + + +.PHONY: all +# target: all - Makes everything +all: build + + +.PHONY: build +# target: build - Builds the project +build: + $(REBAR) compile + + +.PHONY: check +# target: check - Checks if project builds and passes all the tests +check: build eunit + + +.PHONY: clean +# target: clean - Removes build artifacts +clean: + $(REBAR) clean + + +.PHONY: distclean +# target: distclean - Removes all unversioned files +distclean: clean + git clean -fxd + + +.PHONY: eunit +# target: eunit - Runs eunit test suite +eunit: + $(REBAR) eunit + + +.PHONY: help +# target: help - Prints this help +help: + @egrep "^# target:" Makefile | sed -e 's/^# target: //g' | sort diff --git a/src/ets_lru/src/ets_lru.app.src b/src/ets_lru/src/ets_lru.app.src new file mode 100644 index 000000000..c81ce11bd --- /dev/null +++ b/src/ets_lru/src/ets_lru.app.src @@ -0,0 +1,21 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, ets_lru, [ + {description, "ETS Base LRU Cache"}, + {vsn, git}, + {registered, []}, + {applications, [ + kernel, + stdlib + ]} +]}. diff --git a/src/ets_lru/src/ets_lru.erl b/src/ets_lru/src/ets_lru.erl new file mode 100644 index 000000000..891aeeb57 --- /dev/null +++ b/src/ets_lru/src/ets_lru.erl @@ -0,0 +1,335 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ets_lru). +-behaviour(gen_server). +-vsn(2). + +-export([ + start_link/2, + stop/1, + + insert/3, + lookup/2, + match/3, + match_object/3, + remove/2, + clear/1, + + % Dirty functions read straight from + % the ETS tables which means there are + % race conditions with concurrent access. + lookup_d/2 +]). + +-export([ + init/1, + terminate/2, + + handle_call/3, + handle_cast/2, + handle_info/2, + + code_change/3 +]). + +-define(DEFAULT_TIME_UNIT, millisecond). + +-type time_value() :: integer(). +-type strict_monotonic_time() :: {time_value(), integer()}. + +-record(entry, { + key :: term(), + val :: term(), + atime :: strict_monotonic_time(), + ctime :: strict_monotonic_time() +}). + +-record(st, { + objects, + atimes, + ctimes, + + max_objs :: non_neg_integer() | undefined, + max_size :: non_neg_integer() | undefined, + max_lifetime :: non_neg_integer() | undefined, + time_unit = ?DEFAULT_TIME_UNIT :: atom() +}). + +start_link(Name, Options) when is_atom(Name) -> + gen_server:start_link({local, Name}, ?MODULE, {Name, Options}, []). + +stop(LRU) -> + gen_server:cast(LRU, stop). + +lookup(LRU, Key) -> + gen_server:call(LRU, {lookup, Key}). + +insert(LRU, Key, Val) -> + gen_server:call(LRU, {insert, Key, Val}). + +remove(LRU, Key) -> + gen_server:call(LRU, {remove, Key}). + +%% @doc match/3 provides an efficient way to retrieve parts of the +%% keys and values without copying or requiring circumvention of the +%% ets_lru API. The KeySpec and ValueSpec parameters are used as part +%% of one larger match spec so keep in mind that all capturing +%% placeholders will be aliased between the key and value parts. +-spec match(atom() | pid(), term(), term()) -> [[any()]]. +match(LRU, KeySpec, ValueSpec) -> + gen_server:call(LRU, {match, KeySpec, ValueSpec}). + +%% @doc match_object/3 provides an efficient way to retrieve multiple +%% values using match conditions. The KeySpec and ValueSpec parameters +%% are used as part of one larger match spec so keep in mind that all +%% capturing placeholders will be aliased between the key and value +%% parts. +-spec match_object(atom() | pid(), term(), term()) -> [any()]. +match_object(Name, KeySpec, ValueSpec) when is_atom(Name) -> + Pattern = #entry{key = KeySpec, val = ValueSpec, _ = '_'}, + Entries = ets:match_object(obj_table(Name), Pattern), + lists:map( + fun(#entry{key = Key, val = Val}) -> + gen_server:cast(Name, {accessed, Key}), + Val + end, + Entries + ); +match_object(LRU, KeySpec, ValueSpec) -> + gen_server:call(LRU, {match_object, KeySpec, ValueSpec}). + +clear(LRU) -> + gen_server:call(LRU, clear). + +lookup_d(Name, Key) when is_atom(Name) -> + case ets:lookup(obj_table(Name), Key) of + [#entry{val = Val}] -> + gen_server:cast(Name, {accessed, Key}), + {ok, Val}; + [] -> + not_found + end. + +init({Name, Options}) -> + St = set_options(#st{}, Options), + ObjOpts = [set, named_table, protected, {keypos, #entry.key}], + TimeOpts = [ordered_set, named_table, protected], + + {ok, St#st{ + objects = ets:new(obj_table(Name), ObjOpts), + atimes = ets:new(at_table(Name), TimeOpts), + ctimes = ets:new(ct_table(Name), TimeOpts) + }}. + +terminate(_Reason, St) -> + true = ets:delete(St#st.objects), + true = ets:delete(St#st.atimes), + true = ets:delete(St#st.ctimes), + ok. + +handle_call({lookup, Key}, _From, St) -> + Reply = + case ets:lookup(St#st.objects, Key) of + [#entry{val = Val} | _] -> + accessed(Key, St), + {ok, Val}; + [] -> + not_found + end, + {reply, Reply, St, 0}; +handle_call({match_object, KeySpec, ValueSpec}, _From, St) -> + Pattern = #entry{key = KeySpec, val = ValueSpec, _ = '_'}, + Entries = ets:match_object(St#st.objects, Pattern), + Values = lists:map( + fun(#entry{key = Key, val = Val}) -> + accessed(Key, St), + Val + end, + Entries + ), + {reply, Values, St, 0}; +handle_call({match, KeySpec, ValueSpec}, _From, St) -> + Pattern = #entry{key = KeySpec, val = ValueSpec, _ = '_'}, + Values = ets:match(St#st.objects, Pattern), + {reply, Values, St, 0}; +handle_call({insert, Key, Val}, _From, St) -> + NewATime = strict_monotonic_time(St#st.time_unit), + Pattern = #entry{key = Key, atime = '$1', _ = '_'}, + case ets:match(St#st.objects, Pattern) of + [[ATime]] -> + Update = {#entry.val, Val}, + true = ets:update_element(St#st.objects, Key, Update), + true = ets:delete(St#st.atimes, ATime), + true = ets:insert(St#st.atimes, {NewATime, Key}); + [] -> + Entry = #entry{key = Key, val = Val, atime = NewATime, ctime = NewATime}, + true = ets:insert(St#st.objects, Entry), + true = ets:insert(St#st.atimes, {NewATime, Key}), + true = ets:insert(St#st.ctimes, {NewATime, Key}) + end, + {reply, ok, St, 0}; +handle_call({remove, Key}, _From, St) -> + Pattern = #entry{key = Key, atime = '$1', ctime = '$2', _ = '_'}, + Reply = + case ets:match(St#st.objects, Pattern) of + [[ATime, CTime]] -> + true = ets:delete(St#st.objects, Key), + true = ets:delete(St#st.atimes, ATime), + true = ets:delete(St#st.ctimes, CTime), + ok; + [] -> + not_found + end, + {reply, Reply, St, 0}; +handle_call(clear, _From, St) -> + true = ets:delete_all_objects(St#st.objects), + true = ets:delete_all_objects(St#st.atimes), + true = ets:delete_all_objects(St#st.ctimes), + % No need to timeout here and evict cache + % entries because its now empty. + {reply, ok, St}; +handle_call(Msg, _From, St) -> + {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}. + +handle_cast({accessed, Key}, St) -> + accessed(Key, St), + {noreply, St, 0}; +handle_cast(stop, St) -> + {stop, normal, St}; +handle_cast(Msg, St) -> + {stop, {invalid_cast, Msg}, St}. + +handle_info(timeout, St) -> + trim(St), + {noreply, St, next_timeout(St)}; +handle_info(Msg, St) -> + {stop, {invalid_info, Msg}, St}. + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + +accessed(Key, St) -> + Pattern = #entry{key = Key, atime = '$1', _ = '_'}, + case ets:match(St#st.objects, Pattern) of + [[ATime]] -> + NewATime = strict_monotonic_time(St#st.time_unit), + Update = {#entry.atime, NewATime}, + true = ets:update_element(St#st.objects, Key, Update), + true = ets:delete(St#st.atimes, ATime), + true = ets:insert(St#st.atimes, {NewATime, Key}), + ok; + [] -> + ok + end. + +trim(St) -> + trim_count(St), + trim_size(St), + trim_lifetime(St). + +trim_count(#st{max_objs = undefined}) -> + ok; +trim_count(#st{max_objs = Max} = St) -> + case ets:info(St#st.objects, size) > Max of + true -> + drop_lru(St, fun trim_count/1); + false -> + ok + end. + +trim_size(#st{max_size = undefined}) -> + ok; +trim_size(#st{max_size = Max} = St) -> + case ets:info(St#st.objects, memory) > Max of + true -> + drop_lru(St, fun trim_size/1); + false -> + ok + end. + +trim_lifetime(#st{max_lifetime = undefined}) -> + ok; +trim_lifetime(#st{max_lifetime = Max} = St) -> + Now = erlang:monotonic_time(St#st.time_unit), + case ets:first(St#st.ctimes) of + '$end_of_table' -> + ok; + CTime = {Time, _} -> + case Now - Time > Max of + true -> + [{CTime, Key}] = ets:lookup(St#st.ctimes, CTime), + Pattern = #entry{key = Key, atime = '$1', _ = '_'}, + [[ATime]] = ets:match(St#st.objects, Pattern), + true = ets:delete(St#st.objects, Key), + true = ets:delete(St#st.atimes, ATime), + true = ets:delete(St#st.ctimes, CTime), + trim_lifetime(St); + false -> + ok + end + end. + +drop_lru(St, Continue) -> + case ets:first(St#st.atimes) of + '$end_of_table' -> + empty; + ATime -> + [{ATime, Key}] = ets:lookup(St#st.atimes, ATime), + Pattern = #entry{key = Key, ctime = '$1', _ = '_'}, + [[CTime]] = ets:match(St#st.objects, Pattern), + true = ets:delete(St#st.objects, Key), + true = ets:delete(St#st.atimes, ATime), + true = ets:delete(St#st.ctimes, CTime), + Continue(St) + end. + +next_timeout(#st{max_lifetime = undefined}) -> + infinity; +next_timeout(St) -> + case ets:first(St#st.ctimes) of + '$end_of_table' -> + infinity; + {Time, _} -> + Now = erlang:monotonic_time(St#st.time_unit), + TimeDiff = Now - Time, + erlang:max(St#st.max_lifetime - TimeDiff, 0) + end. + +set_options(St, []) -> + St; +set_options(St, [{max_objects, N} | Rest]) when is_integer(N), N >= 0 -> + set_options(St#st{max_objs = N}, Rest); +set_options(St, [{max_size, N} | Rest]) when is_integer(N), N >= 0 -> + set_options(St#st{max_size = N}, Rest); +set_options(St, [{max_lifetime, N} | Rest]) when is_integer(N), N >= 0 -> + set_options(St#st{max_lifetime = N}, Rest); +set_options(St, [{time_unit, T} | Rest]) when is_atom(T) -> + set_options(St#st{time_unit = T}, Rest); +set_options(_, [Opt | _]) -> + throw({invalid_option, Opt}). + +obj_table(Name) -> + table_name(Name, "_objects"). + +at_table(Name) -> + table_name(Name, "_atimes"). + +ct_table(Name) -> + table_name(Name, "_ctimes"). + +table_name(Name, Ext) -> + list_to_atom(atom_to_list(Name) ++ Ext). + +-spec strict_monotonic_time(atom()) -> strict_monotonic_time(). +strict_monotonic_time(TimeUnit) -> + {erlang:monotonic_time(TimeUnit), erlang:unique_integer([monotonic])}. diff --git a/src/ets_lru/test/ets_lru_test.erl b/src/ets_lru/test/ets_lru_test.erl new file mode 100644 index 000000000..5dd193f8d --- /dev/null +++ b/src/ets_lru/test/ets_lru_test.erl @@ -0,0 +1,339 @@ +-module(ets_lru_test). + +-include_lib("eunit/include/eunit.hrl"). + +lifecyle_test_() -> + { + "Test LRU lifecycle", + {setup, fun() -> ets_lru:start_link(?MODULE, []) end, + fun({ok, LRU}) -> is_process_alive(LRU) == false end, fun({ok, LRU}) -> + [ + { + "ets_lru:start_link/2 returned an LRU", + ?_assert(is_pid(LRU)) + }, + { + "Destroyed the LRU ok", + ?_assertEqual(ok, ets_lru:stop(LRU)) + } + ] + end} + }. + +table_names_test_() -> + { + "Test tables names", + {setup, fun() -> ets_lru:start_link(foo, []) end, fun({ok, LRU}) -> + [ + { + "foo_objects exists", + ?_assertEqual(0, ets:info(foo_objects, size)) + }, + { + "foo_atimes exists", + ?_assertEqual(0, ets:info(foo_atimes, size)) + }, + { + "foo_ctimes exists", + ?_assertEqual(0, ets:info(foo_ctimes, size)) + }, + { + "LRU stopped normally", + ?_test(begin + Reason = stop_lru({ok, LRU}), + ?assertEqual(normal, Reason) + end) + }, + { + "foo_objects doesn't exists", + ?_assertEqual(undefined, ets:info(foo_objects, size)) + }, + { + "foo_atimes doesn't exists", + ?_assertEqual(undefined, ets:info(foo_atimes, size)) + }, + { + "foo_ctimes doesn't exists", + ?_assertEqual(undefined, ets:info(foo_ctimes, size)) + } + ] + end} + }. + +basic_behavior_test_() -> + { + "Test basic behavior", + {foreach, + fun() -> + {ok, LRU} = ets_lru:start_link(test_lru, []), + ok = ets_lru:insert(LRU, foo, bar), + {ok, LRU} + end, + fun stop_lru/1, [ + fun({ok, LRU}) -> + { + "Lookup returned the inserted value", + ?_assertEqual({ok, bar}, ets_lru:lookup(LRU, foo)) + } + end, + fun({ok, _LRU}) -> + { + "Dirty lookup returned the inserted value", + ?_assertEqual( + {ok, bar}, + ets_lru:lookup_d(test_lru, foo) + ) + } + end, + fun({ok, LRU}) -> + [ + { + "Lookup returned the inserted value", + ?_assertEqual({ok, bar}, ets_lru:lookup(LRU, foo)) + }, + { + "Insert new value", + ?_assertEqual(ok, ets_lru:insert(LRU, foo, bam)) + }, + { + "Lookup returned the newly inserted value", + ?_assertEqual({ok, bam}, ets_lru:lookup(LRU, foo)) + } + ] + end, + fun({ok, LRU}) -> + [ + { + "Lookup returned the inserted value", + ?_assertEqual({ok, bar}, ets_lru:lookup(LRU, foo)) + }, + { + "Remove value", + ?_assertEqual(ok, ets_lru:remove(LRU, foo)) + }, + { + "Lookup returned not_found for removed value", + ?_assertEqual(not_found, ets_lru:lookup(LRU, foo)) + } + ] + end, + fun({ok, LRU}) -> + [ + { + "Lookup returned the inserted value", + ?_assertEqual({ok, bar}, ets_lru:lookup(LRU, foo)) + }, + { + "Clear LRU", + ?_assertEqual(ok, ets_lru:clear(LRU)) + }, + { + "Lookup returned not_found after a clear", + ?_assertEqual(not_found, ets_lru:lookup(LRU, foo)) + } + ] + end + ]} + }. + +lru_good_options_test_() -> + { + "Test good LRU options", + {foreachx, + fun(Opts) -> + process_flag(trap_exit, true), + ets_lru:start_link(?MODULE, Opts) + end, + fun(_, Cfg) -> stop_lru(Cfg) end, [ + {[{max_objects, 1}], fun test_good_opts/2}, + {[{max_objects, 5}], fun test_good_opts/2}, + {[{max_objects, 923928342098203942}], fun test_good_opts/2}, + {[{max_size, 1}], fun test_good_opts/2}, + {[{max_size, 5}], fun test_good_opts/2}, + {[{max_size, 2342923423942309423094}], fun test_good_opts/2}, + {[{max_lifetime, 1}], fun test_good_opts/2}, + {[{max_lifetime, 5}], fun test_good_opts/2}, + {[{max_lifetime, 1244209909180928348}], fun test_good_opts/2} + ]} + }. + +lru_bad_options_test_() -> + { + "Test bad LRU options", + {foreachx, + fun(Opts) -> + process_flag(trap_exit, true), + ets_lru:start_link(?MODULE, Opts) + end, + fun(_, _) -> + case whereis(?MODULE) of + Pid when is_pid(Pid) -> + stop_lru({ok, Pid}); + undefined -> + ok + end + end, + [ + {[{bingo, bango}], fun test_bad_opts/2}, + {[12], fun test_bad_opts/2}, + {[true], fun test_bad_opts/2} + ]} + }. + +lru_limits_test_() -> + { + "Test LRU limits", + {foreachx, fun(Opts) -> ets_lru:start_link(lru, Opts) end, fun(_, Cfg) -> stop_lru(Cfg) end, + [ + {[{max_objects, 25}], fun test_limits/2}, + {[{max_size, 1024}], fun test_limits/2}, + {[{max_lifetime, 100}], fun test_limits/2} + ]} + }. + +lru_match_test_() -> + { + "Test match functions", + {foreach, fun() -> ets_lru:start_link(test_lru, []) end, fun stop_lru/1, [ + fun({ok, LRU}) -> + { + "Empty match", + ?_assertEqual([], ets_lru:match(LRU, a, '$1')) + } + end, + fun({ok, LRU}) -> + ets_lru:insert(LRU, b, {x, y}), + { + "Single match", + ?_assertEqual( + [[x, y]], + ets_lru:match(LRU, b, {'$1', '$2'}) + ) + } + end, + fun({ok, LRU}) -> + ets_lru:insert(LRU, boston, {state, "MA"}), + ets_lru:insert(LRU, new_york, {state, "NY"}), + Values = ets_lru:match(LRU, '_', {state, '$1'}), + { + "Multiple match", + ?_assertEqual([["MA"], ["NY"]], lists:sort(Values)) + } + end, + fun({ok, LRU}) -> + { + "Empty match_object", + ?_assertEqual([], ets_lru:match_object(LRU, a, '$1')) + } + end, + fun({ok, LRU}) -> + ets_lru:insert(LRU, ans, 42), + [ + { + "Single match_object (registered)", + ?_assertEqual( + [42], + ets_lru:match_object(test_lru, ans, '$1') + ) + }, + { + "Single match_object (pid)", + ?_assertEqual( + [42], + ets_lru:match_object(LRU, ans, '$1') + ) + } + ] + end, + fun({ok, LRU}) -> + ets_lru:insert(LRU, {color, blue}, a), + ets_lru:insert(LRU, {color, red}, b), + Values = ets_lru:match_object(LRU, {color, '_'}, '_'), + { + "Multiple match_object", + ?_assertEqual(lists:sort(Values), [a, b]) + } + end + ]} + }. + +test_good_opts(Opts, {ok, LRU}) -> + Msg = io_lib:format("LRU created ok with options: ~w", [Opts]), + {lists:flatten(Msg), ?_assert(is_pid(LRU))}; +test_good_opts(Opts, ErrorMsg) -> + Msg = io_lib:format("LRU created ok with options: ~w", [Opts]), + {lists:flatten(Msg), ?_assertEqual(ok, ErrorMsg)}. + +test_bad_opts([Opts], {error, {bad_return_value, {invalid_option, Opts2}}}) -> + Msg = io_lib:format("LRU failed with bad options: ~w", [Opts]), + {lists:flatten(Msg), ?_assertEqual(Opts, Opts2)}. + +test_limits([{max_objects, N}], {ok, LRU}) -> + { + "Max object count ok", + ?_assert(insert_kvs(size, LRU, 100 * N, N)) + }; +test_limits([{max_size, N}], {ok, LRU}) -> + { + "Max size ok", + ?_assert(insert_kvs(memory, LRU, 10 * N, N)) + }; +test_limits([{max_lifetime, N}], {ok, LRU}) -> + [ + { + "Expire leaves new entries", + ?_test(begin + ets_lru:insert(LRU, foo, bar), + ?assertEqual({ok, bar}, ets_lru:lookup(LRU, foo)) + end) + }, + { + "Entry was expired", + ?_test(begin + timer:sleep(round(N * 1.5)), + ?assertEqual(not_found, ets_lru:lookup(LRU, foo)) + end) + } + ]. + +insert_kvs(_, _, 0, _) -> + true; +insert_kvs(Info, LRU, Count, Limit) -> + ets_lru:insert(LRU, Count, 1.5234), + case ets:info(lru_objects, Info) > Limit of + true -> + % Retry again as eviction statistics + % returned by ets:info() can be delayed. + timer:sleep(1), + case ets:info(lru_objects, Info) > Limit of + true -> + erlang:error(exceeded_limit); + false -> + true + end; + false -> + true + end, + insert_kvs(Info, LRU, Count - 1, Limit). + +stop_lru({ok, LRU}) -> + Ref = erlang:monitor(process, LRU), + ets_lru:stop(LRU), + receive + {'DOWN', Ref, process, LRU, Reason} -> Reason + end; +stop_lru({error, _}) -> + ok. + +valid_parameterized_time_unit_test() -> + Opts = [{time_unit, microsecond}], + {ok, LRU} = ets_lru:start_link(lru_test, Opts), + ?assert(is_process_alive(LRU)), + ok = ets_lru:insert(LRU, foo, bar), + ?assertEqual({ok, bar}, ets_lru:lookup(LRU, foo)), + ?assertEqual(ok, ets_lru:stop(LRU)). + +invalid_parameterized_time_unit_test() -> + Opts = [{time_unit, invalid}], + {ok, LRU} = ets_lru:start_link(lru_test, Opts), + ?assertExit(_, ets_lru:insert(LRU, foo, bar)). diff --git a/src/khash/.gitignore b/src/khash/.gitignore new file mode 100644 index 000000000..a14ed6b30 --- /dev/null +++ b/src/khash/.gitignore @@ -0,0 +1,10 @@ +/c_src/hash.d +/c_src/khash.d +c_src/*.o +ebin/ +test/*.beam +.eunit +.rebar +priv/*.so +vc*.pdb +compile_commands.json diff --git a/src/khash/LICENSE b/src/khash/LICENSE new file mode 100644 index 000000000..873b78bd3 --- /dev/null +++ b/src/khash/LICENSE @@ -0,0 +1,76 @@ +khash +===== + +Files: c_src/khash.c src/* test/*.t test/util.erl test/gen_term.erl + +Copyright 2013 Cloudant, Inc <support@cloudant.com> + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + + +Kazlib - Hash Table Data Type +============================= + +Files: c_src/hash.* + +Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net> + +Free Software License: + +All rights are reserved by the author, with the following exceptions: +Permission is granted to freely reproduce and distribute this software, +possibly in exchange for a fee, provided that this copyright notice appears +intact. Permission is also granted to adapt this software to produce +derivative works, as long as the modified versions carry this copyright +notice and additional notices stating that the work has been modified. +This source code may be translated into executable form and incorporated +into proprietary software; there is no requirement for such software to +contain a copyright notice related to this source. + + +Etap +==== + +Files: test/etap.erl + +Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/khash/Makefile b/src/khash/Makefile new file mode 100644 index 000000000..ccc4a8ab0 --- /dev/null +++ b/src/khash/Makefile @@ -0,0 +1,44 @@ +REBAR?=rebar + + +.PHONY: all +# target: all - Makes everything +all: build + + +.PHONY: build +# target: build - Builds the project +build: + $(REBAR) compile + + +.PHONY: check +# target: check - Checks if project builds and passes all the tests +check: build eunit + + +.PHONY: clean +# target: clean - Removes build artifacts +clean: + $(REBAR) clean + rm -f test/*.beam + + +.PHONY: distclean +# target: distclean - Removes all unversioned files +distclean: clean + git clean -fxd + + +.PHONY: eunit +# target: eunit - Runs eunit test suite +eunit: + $(REBAR) eunit + + +.PHONY: help +# target: help - Prints this help +help: + @egrep "^# target:" Makefile | sed -e 's/^# target: //g' | sort + + diff --git a/src/khash/README.md b/src/khash/README.md new file mode 100644 index 000000000..c04d00829 --- /dev/null +++ b/src/khash/README.md @@ -0,0 +1,4 @@ +khash +===== + +This is a basic NIF wrapper around Kazlib's hash data structure. diff --git a/src/khash/c_src/hash.c b/src/khash/c_src/hash.c new file mode 100644 index 000000000..e3da0da20 --- /dev/null +++ b/src/khash/c_src/hash.c @@ -0,0 +1,843 @@ +/* + * Hash Table Data Type + * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net> + * + * Free Software License: + * + * All rights are reserved by the author, with the following exceptions: + * Permission is granted to freely reproduce and distribute this software, + * possibly in exchange for a fee, provided that this copyright notice appears + * intact. Permission is also granted to adapt this software to produce + * derivative works, as long as the modified versions carry this copyright + * notice and additional notices stating that the work has been modified. + * This source code may be translated into executable form and incorporated + * into proprietary software; there is no requirement for such software to + * contain a copyright notice related to this source. + * + * $Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $ + * $Name: kazlib_1_20 $ + */ + +#include <stdlib.h> +#include <stddef.h> +#include <assert.h> +#include <string.h> +#define HASH_IMPLEMENTATION +#include "hash.h" + +#ifdef KAZLIB_RCSID +static const char rcsid[] = "$Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $"; +#endif + +#define INIT_BITS 6 +#define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */ +#define INIT_MASK ((INIT_SIZE) - 1) + +#define next hash_next +#define key hash_key +#define data hash_data +#define hkey hash_hkey + +#define table hash_table +#define nchains hash_nchains +#define nodecount hash_nodecount +#define maxcount hash_maxcount +#define highmark hash_highmark +#define lowmark hash_lowmark +#define compare hash_compare +#define function hash_function +#define allocnode hash_allocnode +#define freenode hash_freenode +#define context hash_context +#define mask hash_mask +#define dynamic hash_dynamic + +#define table hash_table +#define chain hash_chain + +static hnode_t *kl_hnode_alloc(void *context); +static void kl_hnode_free(hnode_t *node, void *context); +static hash_val_t hash_fun_default(const void *key); +static int hash_comp_default(const void *key1, const void *key2); + +int hash_val_t_bit; + +/* + * Compute the number of bits in the hash_val_t type. We know that hash_val_t + * is an unsigned integral type. Thus the highest value it can hold is a + * Mersenne number (power of two, less one). We initialize a hash_val_t + * object with this value and then shift bits out one by one while counting. + * Notes: + * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power + * of two. This means that its binary representation consists of all one + * bits, and hence ``val'' is initialized to all one bits. + * 2. While bits remain in val, we increment the bit count and shift it to the + * right, replacing the topmost bit by zero. + */ + +static void compute_bits(void) +{ + hash_val_t val = HASH_VAL_T_MAX; /* 1 */ + int bits = 0; + + while (val) { /* 2 */ + bits++; + val >>= 1; + } + + hash_val_t_bit = bits; +} + +/* + * Verify whether the given argument is a power of two. + */ + +static int is_power_of_two(hash_val_t arg) +{ + if (arg == 0) + return 0; + while ((arg & 1) == 0) + arg >>= 1; + return (arg == 1); +} + +/* + * Compute a shift amount from a given table size + */ + +static hash_val_t compute_mask(hashcount_t size) +{ + assert (is_power_of_two(size)); + assert (size >= 2); + + return size - 1; +} + +/* + * Initialize the table of pointers to null. + */ + +static void clear_table(hash_t *hash) +{ + hash_val_t i; + + for (i = 0; i < hash->nchains; i++) + hash->table[i] = NULL; +} + +/* + * Double the size of a dynamic table. This works as follows. Each chain splits + * into two adjacent chains. The shift amount increases by one, exposing an + * additional bit of each hashed key. For each node in the original chain, the + * value of this newly exposed bit will decide which of the two new chains will + * receive the node: if the bit is 1, the chain with the higher index will have + * the node, otherwise the lower chain will receive the node. In this manner, + * the hash table will continue to function exactly as before without having to + * rehash any of the keys. + * Notes: + * 1. Overflow check. + * 2. The new number of chains is twice the old number of chains. + * 3. The new mask is one bit wider than the previous, revealing a + * new bit in all hashed keys. + * 4. Allocate a new table of chain pointers that is twice as large as the + * previous one. + * 5. If the reallocation was successful, we perform the rest of the growth + * algorithm, otherwise we do nothing. + * 6. The exposed_bit variable holds a mask with which each hashed key can be + * AND-ed to test the value of its newly exposed bit. + * 7. Now loop over each chain in the table and sort its nodes into two + * chains based on the value of each node's newly exposed hash bit. + * 8. The low chain replaces the current chain. The high chain goes + * into the corresponding sister chain in the upper half of the table. + * 9. We have finished dealing with the chains and nodes. We now update + * the various bookeeping fields of the hash structure. + */ + +static void grow_table(hash_t *hash) +{ + hnode_t **newtable; + + assert (2 * hash->nchains > hash->nchains); /* 1 */ + + newtable = realloc(hash->table, + sizeof *newtable * hash->nchains * 2); /* 4 */ + + if (newtable) { /* 5 */ + hash_val_t mask = (hash->mask << 1) | 1; /* 3 */ + hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */ + hash_val_t chain; + + assert (mask != hash->mask); + + for (chain = 0; chain < hash->nchains; chain++) { /* 7 */ + hnode_t *low_chain = 0, *high_chain = 0, *hptr, *next; + + for (hptr = newtable[chain]; hptr != 0; hptr = next) { + next = hptr->next; + + if (hptr->hkey & exposed_bit) { + hptr->next = high_chain; + high_chain = hptr; + } else { + hptr->next = low_chain; + low_chain = hptr; + } + } + + newtable[chain] = low_chain; /* 8 */ + newtable[chain + hash->nchains] = high_chain; + } + + hash->table = newtable; /* 9 */ + hash->mask = mask; + hash->nchains *= 2; + hash->lowmark *= 2; + hash->highmark *= 2; + } + assert (kl_hash_verify(hash)); +} + +/* + * Cut a table size in half. This is done by folding together adjacent chains + * and populating the lower half of the table with these chains. The chains are + * simply spliced together. Once this is done, the whole table is reallocated + * to a smaller object. + * Notes: + * 1. It is illegal to have a hash table with one slot. This would mean that + * hash->shift is equal to hash_val_t_bit, an illegal shift value. + * Also, other things could go wrong, such as hash->lowmark becoming zero. + * 2. Looping over each pair of sister chains, the low_chain is set to + * point to the head node of the chain in the lower half of the table, + * and high_chain points to the head node of the sister in the upper half. + * 3. The intent here is to compute a pointer to the last node of the + * lower chain into the low_tail variable. If this chain is empty, + * low_tail ends up with a null value. + * 4. If the lower chain is not empty, we simply tack the upper chain onto it. + * If the upper chain is a null pointer, nothing happens. + * 5. Otherwise if the lower chain is empty but the upper one is not, + * If the low chain is empty, but the high chain is not, then the + * high chain is simply transferred to the lower half of the table. + * 6. Otherwise if both chains are empty, there is nothing to do. + * 7. All the chain pointers are in the lower half of the table now, so + * we reallocate it to a smaller object. This, of course, invalidates + * all pointer-to-pointers which reference into the table from the + * first node of each chain. + * 8. Though it's unlikely, the reallocation may fail. In this case we + * pretend that the table _was_ reallocated to a smaller object. + * 9. Finally, update the various table parameters to reflect the new size. + */ + +static void shrink_table(hash_t *hash) +{ + hash_val_t chain, nchains; + hnode_t **newtable, *low_tail, *low_chain, *high_chain; + + assert (hash->nchains >= 2); /* 1 */ + nchains = hash->nchains / 2; + + for (chain = 0; chain < nchains; chain++) { + low_chain = hash->table[chain]; /* 2 */ + high_chain = hash->table[chain + nchains]; + for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next) + ; /* 3 */ + if (low_chain != 0) /* 4 */ + low_tail->next = high_chain; + else if (high_chain != 0) /* 5 */ + hash->table[chain] = high_chain; + else + assert (hash->table[chain] == NULL); /* 6 */ + } + newtable = realloc(hash->table, + sizeof *newtable * nchains); /* 7 */ + if (newtable) /* 8 */ + hash->table = newtable; + hash->mask >>= 1; /* 9 */ + hash->nchains = nchains; + hash->lowmark /= 2; + hash->highmark /= 2; + assert (kl_hash_verify(hash)); +} + + +/* + * Create a dynamic hash table. Both the hash table structure and the table + * itself are dynamically allocated. Furthermore, the table is extendible in + * that it will automatically grow as its load factor increases beyond a + * certain threshold. + * Notes: + * 1. If the number of bits in the hash_val_t type has not been computed yet, + * we do so here, because this is likely to be the first function that the + * user calls. + * 2. Allocate a hash table control structure. + * 3. If a hash table control structure is successfully allocated, we + * proceed to initialize it. Otherwise we return a null pointer. + * 4. We try to allocate the table of hash chains. + * 5. If we were able to allocate the hash chain table, we can finish + * initializing the hash structure and the table. Otherwise, we must + * backtrack by freeing the hash structure. + * 6. INIT_SIZE should be a power of two. The high and low marks are always set + * to be twice the table size and half the table size respectively. When the + * number of nodes in the table grows beyond the high size (beyond load + * factor 2), it will double in size to cut the load factor down to about + * about 1. If the table shrinks down to or beneath load factor 0.5, + * it will shrink, bringing the load up to about 1. However, the table + * will never shrink beneath INIT_SIZE even if it's emptied. + * 7. This indicates that the table is dynamically allocated and dynamically + * resized on the fly. A table that has this value set to zero is + * assumed to be statically allocated and will not be resized. + * 8. The table of chains must be properly reset to all null pointers. + */ + +hash_t *kl_hash_create(hashcount_t maxcount, hash_comp_t compfun, + hash_fun_t hashfun) +{ + hash_t *hash; + + if (hash_val_t_bit == 0) /* 1 */ + compute_bits(); + + hash = malloc(sizeof *hash); /* 2 */ + + if (hash) { /* 3 */ + hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */ + if (hash->table) { /* 5 */ + hash->nchains = INIT_SIZE; /* 6 */ + hash->highmark = INIT_SIZE * 2; + hash->lowmark = INIT_SIZE / 2; + hash->nodecount = 0; + hash->maxcount = maxcount; + hash->compare = compfun ? compfun : hash_comp_default; + hash->function = hashfun ? hashfun : hash_fun_default; + hash->allocnode = kl_hnode_alloc; + hash->freenode = kl_hnode_free; + hash->context = NULL; + hash->mask = INIT_MASK; + hash->dynamic = 1; /* 7 */ + clear_table(hash); /* 8 */ + assert (kl_hash_verify(hash)); + return hash; + } + free(hash); + } + + return NULL; +} + +/* + * Select a different set of node allocator routines. + */ + +void kl_hash_set_allocator(hash_t *hash, hnode_alloc_t al, + hnode_free_t fr, void *context) +{ + assert (kl_hash_count(hash) == 0); + assert ((al == 0 && fr == 0) || (al != 0 && fr != 0)); + + hash->allocnode = al ? al : kl_hnode_alloc; + hash->freenode = fr ? fr : kl_hnode_free; + hash->context = context; +} + +/* + * Free every node in the hash using the hash->freenode() function pointer, and + * cause the hash to become empty. + */ + +void kl_hash_free_nodes(hash_t *hash) +{ + hscan_t hs; + hnode_t *node; + kl_hash_scan_begin(&hs, hash); + while ((node = kl_hash_scan_next(&hs))) { + kl_hash_scan_delete(hash, node); + hash->freenode(node, hash->context); + } + hash->nodecount = 0; + clear_table(hash); +} + +/* + * Obsolescent function for removing all nodes from a table, + * freeing them and then freeing the table all in one step. + */ + +void kl_hash_free(hash_t *hash) +{ +#ifdef KAZLIB_OBSOLESCENT_DEBUG + assert ("call to obsolescent function hash_free()" && 0); +#endif + kl_hash_free_nodes(hash); + kl_hash_destroy(hash); +} + +/* + * Free a dynamic hash table structure. + */ + +void kl_hash_destroy(hash_t *hash) +{ + assert (hash_val_t_bit != 0); + assert (kl_hash_isempty(hash)); + free(hash->table); + free(hash); +} + +/* + * Initialize a user supplied hash structure. The user also supplies a table of + * chains which is assigned to the hash structure. The table is static---it + * will not grow or shrink. + * 1. See note 1. in hash_create(). + * 2. The user supplied array of pointers hopefully contains nchains nodes. + * 3. See note 7. in hash_create(). + * 4. We must dynamically compute the mask from the given power of two table + * size. + * 5. The user supplied table can't be assumed to contain null pointers, + * so we reset it here. + */ + +hash_t *kl_hash_init(hash_t *hash, hashcount_t maxcount, + hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table, + hashcount_t nchains) +{ + if (hash_val_t_bit == 0) /* 1 */ + compute_bits(); + + assert (is_power_of_two(nchains)); + + hash->table = table; /* 2 */ + hash->nchains = nchains; + hash->nodecount = 0; + hash->maxcount = maxcount; + hash->compare = compfun ? compfun : hash_comp_default; + hash->function = hashfun ? hashfun : hash_fun_default; + hash->dynamic = 0; /* 3 */ + hash->mask = compute_mask(nchains); /* 4 */ + clear_table(hash); /* 5 */ + + assert (kl_hash_verify(hash)); + + return hash; +} + +/* + * Reset the hash scanner so that the next element retrieved by + * hash_scan_next() shall be the first element on the first non-empty chain. + * Notes: + * 1. Locate the first non empty chain. + * 2. If an empty chain is found, remember which one it is and set the next + * pointer to refer to its first element. + * 3. Otherwise if a chain is not found, set the next pointer to NULL + * so that hash_scan_next() shall indicate failure. + */ + +void kl_hash_scan_begin(hscan_t *scan, hash_t *hash) +{ + hash_val_t nchains = hash->nchains; + hash_val_t chain; + + scan->table = hash; + + /* 1 */ + + for (chain = 0; chain < nchains && hash->table[chain] == 0; chain++) + ; + + if (chain < nchains) { /* 2 */ + scan->chain = chain; + scan->next = hash->table[chain]; + } else { /* 3 */ + scan->next = NULL; + } +} + +/* + * Retrieve the next node from the hash table, and update the pointer + * for the next invocation of hash_scan_next(). + * Notes: + * 1. Remember the next pointer in a temporary value so that it can be + * returned. + * 2. This assertion essentially checks whether the module has been properly + * initialized. The first point of interaction with the module should be + * either hash_create() or hash_init(), both of which set hash_val_t_bit to + * a non zero value. + * 3. If the next pointer we are returning is not NULL, then the user is + * allowed to call hash_scan_next() again. We prepare the new next pointer + * for that call right now. That way the user is allowed to delete the node + * we are about to return, since we will no longer be needing it to locate + * the next node. + * 4. If there is a next node in the chain (next->next), then that becomes the + * new next node, otherwise ... + * 5. We have exhausted the current chain, and must locate the next subsequent + * non-empty chain in the table. + * 6. If a non-empty chain is found, the first element of that chain becomes + * the new next node. Otherwise there is no new next node and we set the + * pointer to NULL so that the next time hash_scan_next() is called, a null + * pointer shall be immediately returned. + */ + + +hnode_t *kl_hash_scan_next(hscan_t *scan) +{ + hnode_t *next = scan->next; /* 1 */ + hash_t *hash = scan->table; + hash_val_t chain = scan->chain + 1; + hash_val_t nchains = hash->nchains; + + assert (hash_val_t_bit != 0); /* 2 */ + + if (next) { /* 3 */ + if (next->next) { /* 4 */ + scan->next = next->next; + } else { + while (chain < nchains && hash->table[chain] == 0) /* 5 */ + chain++; + if (chain < nchains) { /* 6 */ + scan->chain = chain; + scan->next = hash->table[chain]; + } else { + scan->next = NULL; + } + } + } + return next; +} + +/* + * Insert a node into the hash table. + * Notes: + * 1. It's illegal to insert more than the maximum number of nodes. The client + * should verify that the hash table is not full before attempting an + * insertion. + * 2. The same key may not be inserted into a table twice. + * 3. If the table is dynamic and the load factor is already at >= 2, + * grow the table. + * 4. We take the bottom N bits of the hash value to derive the chain index, + * where N is the base 2 logarithm of the size of the hash table. + */ + +void kl_hash_insert(hash_t *hash, hnode_t *node, const void *key) +{ + hash_val_t hkey, chain; + + assert (hash_val_t_bit != 0); + assert (node->next == NULL); + assert (hash->nodecount < hash->maxcount); /* 1 */ + assert (kl_hash_lookup(hash, key) == NULL); /* 2 */ + + if (hash->dynamic && hash->nodecount >= hash->highmark) /* 3 */ + grow_table(hash); + + hkey = hash->function(key); + chain = hkey & hash->mask; /* 4 */ + + node->key = key; + node->hkey = hkey; + node->next = hash->table[chain]; + hash->table[chain] = node; + hash->nodecount++; + + assert (kl_hash_verify(hash)); +} + +/* + * Find a node in the hash table and return a pointer to it. + * Notes: + * 1. We hash the key and keep the entire hash value. As an optimization, when + * we descend down the chain, we can compare hash values first and only if + * hash values match do we perform a full key comparison. + * 2. To locate the chain from among 2^N chains, we look at the lower N bits of + * the hash value by anding them with the current mask. + * 3. Looping through the chain, we compare the stored hash value inside each + * node against our computed hash. If they match, then we do a full + * comparison between the unhashed keys. If these match, we have located the + * entry. + */ + +hnode_t *kl_hash_lookup(hash_t *hash, const void *key) +{ + hash_val_t hkey, chain; + hnode_t *nptr; + + hkey = hash->function(key); /* 1 */ + chain = hkey & hash->mask; /* 2 */ + + for (nptr = hash->table[chain]; nptr; nptr = nptr->next) { /* 3 */ + if (nptr->hkey == hkey && hash->compare(nptr->key, key) == 0) + return nptr; + } + + return NULL; +} + +/* + * Delete the given node from the hash table. Since the chains + * are singly linked, we must locate the start of the node's chain + * and traverse. + * Notes: + * 1. The node must belong to this hash table, and its key must not have + * been tampered with. + * 2. If this deletion will take the node count below the low mark, we + * shrink the table now. + * 3. Determine which chain the node belongs to, and fetch the pointer + * to the first node in this chain. + * 4. If the node being deleted is the first node in the chain, then + * simply update the chain head pointer. + * 5. Otherwise advance to the node's predecessor, and splice out + * by updating the predecessor's next pointer. + * 6. Indicate that the node is no longer in a hash table. + */ + +hnode_t *kl_hash_delete(hash_t *hash, hnode_t *node) +{ + hash_val_t chain; + hnode_t *hptr; + + assert (kl_hash_lookup(hash, node->key) == node); /* 1 */ + assert (hash_val_t_bit != 0); + + if (hash->dynamic && hash->nodecount <= hash->lowmark + && hash->nodecount > INIT_SIZE) + shrink_table(hash); /* 2 */ + + chain = node->hkey & hash->mask; /* 3 */ + hptr = hash->table[chain]; + + if (hptr == node) { /* 4 */ + hash->table[chain] = node->next; + } else { + while (hptr->next != node) { /* 5 */ + assert (hptr != 0); + hptr = hptr->next; + } + assert (hptr->next == node); + hptr->next = node->next; + } + + hash->nodecount--; + assert (kl_hash_verify(hash)); + + node->next = NULL; /* 6 */ + return node; +} + +int kl_hash_alloc_insert(hash_t *hash, const void *key, void *data) +{ + hnode_t *node = hash->allocnode(hash->context); + + if (node) { + kl_hnode_init(node, data); + kl_hash_insert(hash, node, key); + return 1; + } + return 0; +} + +void kl_hash_delete_free(hash_t *hash, hnode_t *node) +{ + kl_hash_delete(hash, node); + hash->freenode(node, hash->context); +} + +/* + * Exactly like hash_delete, except does not trigger table shrinkage. This is to be + * used from within a hash table scan operation. See notes for hash_delete. + */ + +hnode_t *kl_hash_scan_delete(hash_t *hash, hnode_t *node) +{ + hash_val_t chain; + hnode_t *hptr; + + assert (kl_hash_lookup(hash, node->key) == node); + assert (hash_val_t_bit != 0); + + chain = node->hkey & hash->mask; + hptr = hash->table[chain]; + + if (hptr == node) { + hash->table[chain] = node->next; + } else { + while (hptr->next != node) + hptr = hptr->next; + hptr->next = node->next; + } + + hash->nodecount--; + assert (kl_hash_verify(hash)); + node->next = NULL; + + return node; +} + +/* + * Like hash_delete_free but based on hash_scan_delete. + */ + +void kl_hash_scan_delfree(hash_t *hash, hnode_t *node) +{ + kl_hash_scan_delete(hash, node); + hash->freenode(node, hash->context); +} + +/* + * Verify whether the given object is a valid hash table. This means + * Notes: + * 1. If the hash table is dynamic, verify whether the high and + * low expansion/shrinkage thresholds are powers of two. + * 2. Count all nodes in the table, and test each hash value + * to see whether it is correct for the node's chain. + */ + +int kl_hash_verify(hash_t *hash) +{ + hashcount_t count = 0; + hash_val_t chain; + hnode_t *hptr; + + if (hash->dynamic) { /* 1 */ + if (hash->lowmark >= hash->highmark) + return 0; + if (!is_power_of_two(hash->highmark)) + return 0; + if (!is_power_of_two(hash->lowmark)) + return 0; + } + + for (chain = 0; chain < hash->nchains; chain++) { /* 2 */ + for (hptr = hash->table[chain]; hptr != 0; hptr = hptr->next) { + if ((hptr->hkey & hash->mask) != chain) + return 0; + count++; + } + } + + if (count != hash->nodecount) + return 0; + + return 1; +} + +/* + * Test whether the hash table is full and return 1 if this is true, + * 0 if it is false. + */ + +#undef kl_hash_isfull +int kl_hash_isfull(hash_t *hash) +{ + return hash->nodecount == hash->maxcount; +} + +/* + * Test whether the hash table is empty and return 1 if this is true, + * 0 if it is false. + */ + +#undef kl_hash_isempty +int kl_hash_isempty(hash_t *hash) +{ + return hash->nodecount == 0; +} + +static hnode_t *kl_hnode_alloc(void *context) +{ + return malloc(sizeof *kl_hnode_alloc(NULL)); +} + +static void kl_hnode_free(hnode_t *node, void *context) +{ + free(node); +} + + +/* + * Create a hash table node dynamically and assign it the given data. + */ + +hnode_t *kl_hnode_create(void *data) +{ + hnode_t *node = malloc(sizeof *node); + if (node) { + node->data = data; + node->next = NULL; + } + return node; +} + +/* + * Initialize a client-supplied node + */ + +hnode_t *kl_hnode_init(hnode_t *hnode, void *data) +{ + hnode->data = data; + hnode->next = NULL; + return hnode; +} + +/* + * Destroy a dynamically allocated node. + */ + +void kl_hnode_destroy(hnode_t *hnode) +{ + free(hnode); +} + +#undef kl_hnode_put +void kl_hnode_put(hnode_t *node, void *data) +{ + node->data = data; +} + +#undef kl_hnode_get +void *kl_hnode_get(hnode_t *node) +{ + return node->data; +} + +#undef kl_hnode_getkey +const void *kl_hnode_getkey(hnode_t *node) +{ + return node->key; +} + +#undef kl_hash_count +hashcount_t kl_hash_count(hash_t *hash) +{ + return hash->nodecount; +} + +#undef kl_hash_size +hashcount_t kl_hash_size(hash_t *hash) +{ + return hash->nchains; +} + +static hash_val_t hash_fun_default(const void *key) +{ + static unsigned long randbox[] = { + 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U, + 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU, + 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU, + 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU, + }; + + const unsigned char *str = key; + hash_val_t acc = 0; + + while (*str) { + acc ^= randbox[(*str + acc) & 0xf]; + acc = (acc << 1) | (acc >> 31); + acc &= 0xffffffffU; + acc ^= randbox[((*str++ >> 4) + acc) & 0xf]; + acc = (acc << 2) | (acc >> 30); + acc &= 0xffffffffU; + } + return acc; +} + +static int hash_comp_default(const void *key1, const void *key2) +{ + return strcmp(key1, key2); +} diff --git a/src/khash/c_src/hash.h b/src/khash/c_src/hash.h new file mode 100644 index 000000000..0c75ed00f --- /dev/null +++ b/src/khash/c_src/hash.h @@ -0,0 +1,240 @@ +/* + * Hash Table Data Type + * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net> + * + * Free Software License: + * + * All rights are reserved by the author, with the following exceptions: + * Permission is granted to freely reproduce and distribute this software, + * possibly in exchange for a fee, provided that this copyright notice appears + * intact. Permission is also granted to adapt this software to produce + * derivative works, as long as the modified versions carry this copyright + * notice and additional notices stating that the work has been modified. + * This source code may be translated into executable form and incorporated + * into proprietary software; there is no requirement for such software to + * contain a copyright notice related to this source. + * + * $Id: hash.h,v 1.22.2.7 2000/11/13 01:36:45 kaz Exp $ + * $Name: kazlib_1_20 $ + */ + +#ifndef HASH_H +#define HASH_H + +#include <limits.h> +#ifdef KAZLIB_SIDEEFFECT_DEBUG +#include "sfx.h" +#endif + +/* + * Blurb for inclusion into C++ translation units + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned long hashcount_t; +#define HASHCOUNT_T_MAX ULONG_MAX + +typedef unsigned long hash_val_t; +#define HASH_VAL_T_MAX ULONG_MAX + +extern int hash_val_t_bit; + +#ifndef HASH_VAL_T_BIT +#define HASH_VAL_T_BIT ((int) hash_val_t_bit) +#endif + +/* + * Hash chain node structure. + * Notes: + * 1. This preprocessing directive is for debugging purposes. The effect is + * that if the preprocessor symbol KAZLIB_OPAQUE_DEBUG is defined prior to the + * inclusion of this header, then the structure shall be declared as having + * the single member int __OPAQUE__. This way, any attempts by the + * client code to violate the principles of information hiding (by accessing + * the structure directly) can be diagnosed at translation time. However, + * note the resulting compiled unit is not suitable for linking. + * 2. This is a pointer to the next node in the chain. In the last node of a + * chain, this pointer is null. + * 3. The key is a pointer to some user supplied data that contains a unique + * identifier for each hash node in a given table. The interpretation of + * the data is up to the user. When creating or initializing a hash table, + * the user must supply a pointer to a function for comparing two keys, + * and a pointer to a function for hashing a key into a numeric value. + * 4. The value is a user-supplied pointer to void which may refer to + * any data object. It is not interpreted in any way by the hashing + * module. + * 5. The hashed key is stored in each node so that we don't have to rehash + * each key when the table must grow or shrink. + */ + +typedef struct hnode_t { + #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) /* 1 */ + struct hnode_t *hash_next; /* 2 */ + const void *hash_key; /* 3 */ + void *hash_data; /* 4 */ + hash_val_t hash_hkey; /* 5 */ + #else + int hash_dummy; + #endif +} hnode_t; + +/* + * The comparison function pointer type. A comparison function takes two keys + * and produces a value of -1 if the left key is less than the right key, a + * value of 0 if the keys are equal, and a value of 1 if the left key is + * greater than the right key. + */ + +typedef int (*hash_comp_t)(const void *, const void *); + +/* + * The hashing function performs some computation on a key and produces an + * integral value of type hash_val_t based on that key. For best results, the + * function should have a good randomness properties in *all* significant bits + * over the set of keys that are being inserted into a given hash table. In + * particular, the most significant bits of hash_val_t are most significant to + * the hash module. Only as the hash table expands are less significant bits + * examined. Thus a function that has good distribution in its upper bits but + * not lower is preferrable to one that has poor distribution in the upper bits + * but not the lower ones. + */ + +typedef hash_val_t (*hash_fun_t)(const void *); + +/* + * allocator functions + */ + +typedef hnode_t *(*hnode_alloc_t)(void *); +typedef void (*hnode_free_t)(hnode_t *, void *); + +/* + * This is the hash table control structure. It keeps track of information + * about a hash table, as well as the hash table itself. + * Notes: + * 1. Pointer to the hash table proper. The table is an array of pointers to + * hash nodes (of type hnode_t). If the table is empty, every element of + * this table is a null pointer. A non-null entry points to the first + * element of a chain of nodes. + * 2. This member keeps track of the size of the hash table---that is, the + * number of chain pointers. + * 3. The count member maintains the number of elements that are presently + * in the hash table. + * 4. The maximum count is the greatest number of nodes that can populate this + * table. If the table contains this many nodes, no more can be inserted, + * and the hash_isfull() function returns true. + * 5. The high mark is a population threshold, measured as a number of nodes, + * which, if exceeded, will trigger a table expansion. Only dynamic hash + * tables are subject to this expansion. + * 6. The low mark is a minimum population threshold, measured as a number of + * nodes. If the table population drops below this value, a table shrinkage + * will occur. Only dynamic tables are subject to this reduction. No table + * will shrink beneath a certain absolute minimum number of nodes. + * 7. This is the a pointer to the hash table's comparison function. The + * function is set once at initialization or creation time. + * 8. Pointer to the table's hashing function, set once at creation or + * initialization time. + * 9. The current hash table mask. If the size of the hash table is 2^N, + * this value has its low N bits set to 1, and the others clear. It is used + * to select bits from the result of the hashing function to compute an + * index into the table. + * 10. A flag which indicates whether the table is to be dynamically resized. It + * is set to 1 in dynamically allocated tables, 0 in tables that are + * statically allocated. + */ + +typedef struct hash_t { + #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) + struct hnode_t **hash_table; /* 1 */ + hashcount_t hash_nchains; /* 2 */ + hashcount_t hash_nodecount; /* 3 */ + hashcount_t hash_maxcount; /* 4 */ + hashcount_t hash_highmark; /* 5 */ + hashcount_t hash_lowmark; /* 6 */ + hash_comp_t hash_compare; /* 7 */ + hash_fun_t hash_function; /* 8 */ + hnode_alloc_t hash_allocnode; + hnode_free_t hash_freenode; + void *hash_context; + hash_val_t hash_mask; /* 9 */ + int hash_dynamic; /* 10 */ + #else + int hash_dummy; + #endif +} hash_t; + +/* + * Hash scanner structure, used for traversals of the data structure. + * Notes: + * 1. Pointer to the hash table that is being traversed. + * 2. Reference to the current chain in the table being traversed (the chain + * that contains the next node that shall be retrieved). + * 3. Pointer to the node that will be retrieved by the subsequent call to + * hash_scan_next(). + */ + +typedef struct hscan_t { + #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) + hash_t *hash_table; /* 1 */ + hash_val_t hash_chain; /* 2 */ + hnode_t *hash_next; /* 3 */ + #else + int hash_dummy; + #endif +} hscan_t; + +extern hash_t *kl_hash_create(hashcount_t, hash_comp_t, hash_fun_t); +extern void kl_hash_set_allocator(hash_t *, hnode_alloc_t, hnode_free_t, void *); +extern void kl_hash_destroy(hash_t *); +extern void kl_hash_free_nodes(hash_t *); +extern void kl_hash_free(hash_t *); +extern hash_t *kl_hash_init(hash_t *, hashcount_t, hash_comp_t, + hash_fun_t, hnode_t **, hashcount_t); +extern void kl_hash_insert(hash_t *, hnode_t *, const void *); +extern hnode_t *kl_hash_lookup(hash_t *, const void *); +extern hnode_t *kl_hash_delete(hash_t *, hnode_t *); +extern int kl_hash_alloc_insert(hash_t *, const void *, void *); +extern void kl_hash_delete_free(hash_t *, hnode_t *); + +extern void kl_hnode_put(hnode_t *, void *); +extern void *kl_hnode_get(hnode_t *); +extern const void *kl_hnode_getkey(hnode_t *); +extern hashcount_t kl_hash_count(hash_t *); +extern hashcount_t kl_hash_size(hash_t *); + +extern int kl_hash_isfull(hash_t *); +extern int kl_hash_isempty(hash_t *); + +extern void kl_hash_scan_begin(hscan_t *, hash_t *); +extern hnode_t *kl_hash_scan_next(hscan_t *); +extern hnode_t *kl_hash_scan_delete(hash_t *, hnode_t *); +extern void kl_hash_scan_delfree(hash_t *, hnode_t *); + +extern int kl_hash_verify(hash_t *); + +extern hnode_t *kl_hnode_create(void *); +extern hnode_t *kl_hnode_init(hnode_t *, void *); +extern void kl_hnode_destroy(hnode_t *); + +#if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) +#ifdef KAZLIB_SIDEEFFECT_DEBUG +#define kl_hash_isfull(H) (SFX_CHECK(H)->hash_nodecount == (H)->hash_maxcount) +#else +#define kl_hash_isfull(H) ((H)->hash_nodecount == (H)->hash_maxcount) +#endif +#define kl_hash_isempty(H) ((H)->hash_nodecount == 0) +#define kl_hash_count(H) ((H)->hash_nodecount) +#define kl_hash_size(H) ((H)->hash_nchains) +#define kl_hnode_get(N) ((N)->hash_data) +#define kl_hnode_getkey(N) ((N)->hash_key) +#define kl_hnode_put(N, V) ((N)->hash_data = (V)) +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/khash/c_src/khash.c b/src/khash/c_src/khash.c new file mode 100644 index 000000000..b15a18e64 --- /dev/null +++ b/src/khash/c_src/khash.c @@ -0,0 +1,658 @@ +// This file is part of khash released under the MIT license. +// See the LICENSE file for more information. +// Copyright 2013 Cloudant, Inc <support@cloudant.com> + +#include <assert.h> +#include <string.h> +#include <stdint.h> + +#include "erl_nif.h" +#include "hash.h" + +#ifdef _WIN32 +#define INLINE __inline +#else +#define INLINE inline +#endif + +#define KHASH_VERSION 0 + + +typedef struct +{ + ERL_NIF_TERM atom_ok; + ERL_NIF_TERM atom_error; + ERL_NIF_TERM atom_value; + ERL_NIF_TERM atom_not_found; + ERL_NIF_TERM atom_end_of_table; + ERL_NIF_TERM atom_expired_iterator; + ErlNifResourceType* res_hash; + ErlNifResourceType* res_iter; +} khash_priv; + + +typedef struct +{ + unsigned int hval; + ErlNifEnv* env; + ERL_NIF_TERM key; + ERL_NIF_TERM val; +} khnode_t; + + +typedef struct +{ + int version; + unsigned int gen; + hash_t* h; + ErlNifPid p; +} khash_t; + + +typedef struct +{ + int version; + unsigned int gen; + khash_t* khash; + hscan_t scan; +} khash_iter_t; + + +static INLINE ERL_NIF_TERM +make_atom(ErlNifEnv* env, const char* name) +{ + ERL_NIF_TERM ret; + if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) { + return ret; + } + return enif_make_atom(env, name); +} + + +static INLINE ERL_NIF_TERM +make_ok(ErlNifEnv* env, khash_priv* priv, ERL_NIF_TERM value) +{ + return enif_make_tuple2(env, priv->atom_ok, value); +} + + +static INLINE ERL_NIF_TERM +make_error(ErlNifEnv* env, khash_priv* priv, ERL_NIF_TERM reason) +{ + return enif_make_tuple2(env, priv->atom_error, reason); +} + + +static INLINE int +check_pid(ErlNifEnv* env, khash_t* khash) +{ + ErlNifPid pid; + enif_self(env, &pid); + + if(enif_compare(pid.pid, khash->p.pid) == 0) { + return 1; + } + + return 0; +} + + +hnode_t* +khnode_alloc(void* ctx) +{ + hnode_t* ret = (hnode_t*) enif_alloc(sizeof(hnode_t)); + khnode_t* node = (khnode_t*) enif_alloc(sizeof(khnode_t)); + + memset(ret, '\0', sizeof(hnode_t)); + memset(node, '\0', sizeof(khnode_t)); + + node->env = enif_alloc_env(); + ret->hash_key = node; + + return ret; +} + + +void +khnode_free(hnode_t* obj, void* ctx) +{ + khnode_t* node = (khnode_t*) kl_hnode_getkey(obj); + enif_free_env(node->env); + enif_free(node); + enif_free(obj); + return; +} + + +int +khash_cmp_fun(const void* l, const void* r) +{ + khnode_t* left = (khnode_t*) l; + khnode_t* right = (khnode_t*) r; + int cmp = enif_compare(left->key, right->key); + + if(cmp < 0) { + return -1; + } else if(cmp == 0) { + return 0; + } else { + return 1; + } +} + + +hash_val_t +khash_hash_fun(const void* obj) +{ + khnode_t* node = (khnode_t*) obj; + return (hash_val_t) node->hval; +} + + +static INLINE khash_t* +khash_create_int(ErlNifEnv* env, khash_priv* priv, ERL_NIF_TERM opts) +{ + khash_t* khash = NULL; + + assert(priv != NULL && "missing private data member"); + + khash = (khash_t*) enif_alloc_resource(priv->res_hash, sizeof(khash_t)); + memset(khash, '\0', sizeof(khash_t)); + khash->version = KHASH_VERSION; + khash->gen = 0; + + khash->h = kl_hash_create(HASHCOUNT_T_MAX, khash_cmp_fun, khash_hash_fun); + + if(khash->h == NULL ) { + enif_release_resource(khash); + return NULL; + } + + kl_hash_set_allocator(khash->h, khnode_alloc, khnode_free, NULL); + enif_self(env, &(khash->p)); + + return khash; +} + + +static ERL_NIF_TERM +khash_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + khash_priv* priv = enif_priv_data(env); + khash_t* khash; + ERL_NIF_TERM ret; + + if(argc != 1) { + return enif_make_badarg(env); + } + + khash = khash_create_int(env, priv, argv[0]); + if(khash == NULL) { + return enif_make_badarg(env); + } + + ret = enif_make_resource(env, khash); + enif_release_resource(khash); + + return make_ok(env, priv, ret); +} + + +static void +khash_free(ErlNifEnv* env, void* obj) +{ + khash_t* khash = (khash_t*) obj; + + if(khash->h != NULL) { + kl_hash_free_nodes(khash->h); + kl_hash_destroy(khash->h); + } + + return; +} + + +static ERL_NIF_TERM +khash_to_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + khash_priv* priv = (khash_priv*) enif_priv_data(env); + ERL_NIF_TERM ret = enif_make_list(env, 0); + khash_t* khash = NULL; + void* res = NULL; + hscan_t scan; + hnode_t* entry; + khnode_t* node; + ERL_NIF_TERM key; + ERL_NIF_TERM val; + ERL_NIF_TERM tuple; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) { + return enif_make_badarg(env); + } + + khash = (khash_t*) res; + + if(!check_pid(env, khash)) { + return enif_make_badarg(env); + } + + kl_hash_scan_begin(&scan, khash->h); + + while((entry = kl_hash_scan_next(&scan)) != NULL) { + node = (khnode_t*) kl_hnode_getkey(entry); + key = enif_make_copy(env, node->key); + val = enif_make_copy(env, node->val); + tuple = enif_make_tuple2(env, key, val); + ret = enif_make_list_cell(env, tuple, ret); + } + + return ret; +} + + +static ERL_NIF_TERM +khash_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + khash_priv* priv = enif_priv_data(env); + khash_t* khash = NULL; + void* res = NULL; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) { + return enif_make_badarg(env); + } + + khash = (khash_t*) res; + + if(!check_pid(env, khash)) { + return enif_make_badarg(env); + } + + kl_hash_free_nodes(khash->h); + + khash->gen += 1; + + return priv->atom_ok; +} + + +static INLINE hnode_t* +khash_lookup_int(ErlNifEnv* env, uint32_t hv, ERL_NIF_TERM key, khash_t* khash) +{ + khnode_t node; + node.hval = hv; + node.env = env; + node.key = key; + return kl_hash_lookup(khash->h, &node); +} + + +static ERL_NIF_TERM +khash_lookup(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + khash_priv* priv = enif_priv_data(env); + khash_t* khash = NULL; + void* res = NULL; + uint32_t hval; + hnode_t* entry; + khnode_t* node; + ERL_NIF_TERM ret; + + if(argc != 3) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) { + return enif_make_badarg(env); + } + + khash = (khash_t*) res; + + if(!check_pid(env, khash)) { + return enif_make_badarg(env); + } + + if(!enif_get_uint(env, argv[1], &hval)) { + return enif_make_badarg(env); + } + + entry = khash_lookup_int(env, hval, argv[2], khash); + if(entry == NULL) { + ret = priv->atom_not_found; + } else { + node = (khnode_t*) kl_hnode_getkey(entry); + ret = enif_make_copy(env, node->val); + ret = enif_make_tuple2(env, priv->atom_value, ret); + } + + return ret; +} + + +static ERL_NIF_TERM +khash_get(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + khash_priv* priv = enif_priv_data(env); + khash_t* khash = NULL; + void* res = NULL; + uint32_t hval; + hnode_t* entry; + khnode_t* node; + ERL_NIF_TERM ret; + + if(argc != 4) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) { + return enif_make_badarg(env); + } + + khash = (khash_t*) res; + + if(!check_pid(env, khash)) { + return enif_make_badarg(env); + } + + if(!enif_get_uint(env, argv[1], &hval)) { + return enif_make_badarg(env); + } + + entry = khash_lookup_int(env, hval, argv[2], khash); + if(entry == NULL) { + ret = argv[3]; + } else { + node = (khnode_t*) kl_hnode_getkey(entry); + ret = enif_make_copy(env, node->val); + } + + return ret; +} + + +static ERL_NIF_TERM +khash_put(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + khash_priv* priv = enif_priv_data(env); + khash_t* khash = NULL; + void* res = NULL; + uint32_t hval; + hnode_t* entry; + khnode_t* node; + + if(argc != 4) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) { + return enif_make_badarg(env); + } + + khash = (khash_t*) res; + + if(!check_pid(env, khash)) { + return enif_make_badarg(env); + } + + if(!enif_get_uint(env, argv[1], &hval)) { + return enif_make_badarg(env); + } + + entry = khash_lookup_int(env, hval, argv[2], khash); + if(entry == NULL) { + entry = khnode_alloc(NULL); + node = (khnode_t*) kl_hnode_getkey(entry); + node->hval = hval; + node->key = enif_make_copy(node->env, argv[2]); + node->val = enif_make_copy(node->env, argv[3]); + kl_hash_insert(khash->h, entry, node); + } else { + node = (khnode_t*) kl_hnode_getkey(entry); + enif_clear_env(node->env); + node->key = enif_make_copy(node->env, argv[2]); + node->val = enif_make_copy(node->env, argv[3]); + } + + khash->gen += 1; + + return priv->atom_ok; +} + + +static ERL_NIF_TERM +khash_del(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + khash_priv* priv = enif_priv_data(env); + khash_t* khash = NULL; + void* res = NULL; + uint32_t hval; + hnode_t* entry; + ERL_NIF_TERM ret; + + if(argc != 3) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) { + return enif_make_badarg(env); + } + + khash = (khash_t*) res; + + if(!check_pid(env, khash)) { + return enif_make_badarg(env); + } + + if(!enif_get_uint(env, argv[1], &hval)) { + return enif_make_badarg(env); + } + + entry = khash_lookup_int(env, hval, argv[2], khash); + if(entry == NULL) { + ret = priv->atom_not_found; + } else { + kl_hash_delete_free(khash->h, entry); + ret = priv->atom_ok; + } + + khash->gen += 1; + + return ret; +} + + +static ERL_NIF_TERM +khash_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + khash_priv* priv = enif_priv_data(env); + khash_t* khash; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hash, (void*) &khash)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, khash)) { + return enif_make_badarg(env); + } + + return enif_make_uint64(env, kl_hash_count(khash->h)); +} + + +static ERL_NIF_TERM +khash_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + khash_priv* priv = enif_priv_data(env); + khash_t* khash = NULL; + void* res = NULL; + khash_iter_t* iter; + ERL_NIF_TERM ret; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hash, &res)) { + return enif_make_badarg(env); + } + + khash = (khash_t*) res; + + if(!check_pid(env, khash)) { + return enif_make_badarg(env); + } + + iter = (khash_iter_t*) enif_alloc_resource( + priv->res_iter, sizeof(khash_iter_t)); + memset(iter, '\0', sizeof(khash_iter_t)); + iter->version = KHASH_VERSION; + iter->gen = khash->gen; + iter->khash = khash; + kl_hash_scan_begin(&(iter->scan), iter->khash->h); + + // The iterator needs to guarantee that the khash + // remains alive for the life of the iterator. + enif_keep_resource(khash); + + ret = enif_make_resource(env, iter); + enif_release_resource(iter); + + return make_ok(env, priv, ret); +} + + +static void +khash_iter_free(ErlNifEnv* env, void* obj) +{ + khash_iter_t* iter = (khash_iter_t*) obj; + enif_release_resource(iter->khash); +} + + +static ERL_NIF_TERM +khash_iter_next(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + khash_priv* priv = enif_priv_data(env); + khash_iter_t* iter = NULL; + void* res = NULL; + hnode_t* entry; + khnode_t* node; + ERL_NIF_TERM key; + ERL_NIF_TERM val; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_iter, &res)) { + return enif_make_badarg(env); + } + + iter = (khash_iter_t*) res; + + if(!check_pid(env, iter->khash)) { + return enif_make_badarg(env); + } + + if(iter->gen != iter->khash->gen) { + return make_error(env, priv, priv->atom_expired_iterator); + } + + entry = kl_hash_scan_next(&(iter->scan)); + if(entry == NULL) { + return priv->atom_end_of_table; + } + + node = (khnode_t*) kl_hnode_getkey(entry); + key = enif_make_copy(env, node->key); + val = enif_make_copy(env, node->val); + return enif_make_tuple2(env, key, val); +} + + +static int +load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + int flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER; + ErlNifResourceType* res; + + khash_priv* new_priv = (khash_priv*) enif_alloc(sizeof(khash_priv)); + if(new_priv == NULL) { + return 1; + } + + res = enif_open_resource_type( + env, NULL, "khash", khash_free, flags, NULL); + if(res == NULL) { + return 1; + } + new_priv->res_hash = res; + + res = enif_open_resource_type( + env, NULL, "khash_iter", khash_iter_free, flags, NULL); + if(res == NULL) { + return 1; + } + new_priv->res_iter = res; + + new_priv->atom_ok = make_atom(env, "ok"); + new_priv->atom_error = make_atom(env, "error"); + new_priv->atom_value = make_atom(env, "value"); + new_priv->atom_not_found = make_atom(env, "not_found"); + new_priv->atom_end_of_table = make_atom(env, "end_of_table"); + new_priv->atom_expired_iterator = make_atom(env, "expired_iterator"); + + *priv = (void*) new_priv; + + return 0; +} + + +static int +reload(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + return 0; +} + + +static int +upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info) +{ + return load(env, priv, info); +} + + +static void +unload(ErlNifEnv* env, void* priv) +{ + enif_free(priv); + return; +} + + +static ErlNifFunc funcs[] = { + {"new", 1, khash_new}, + {"to_list", 1, khash_to_list}, + {"clear", 1, khash_clear}, + {"lookup_int", 3, khash_lookup}, + {"get_int", 4, khash_get}, + {"put_int", 4, khash_put}, + {"del_int", 3, khash_del}, + {"size", 1, khash_size}, + {"iter", 1, khash_iter}, + {"iter_next", 1, khash_iter_next} +}; + + +ERL_NIF_INIT(khash, funcs, &load, &reload, &upgrade, &unload); diff --git a/src/khash/rebar.config b/src/khash/rebar.config new file mode 100644 index 000000000..0bf93a8d1 --- /dev/null +++ b/src/khash/rebar.config @@ -0,0 +1,14 @@ +% vim: set ft=erlang : -*- erlang -*- % Magic lines for code editors + +{port_specs, [ + {"priv/khash.so", ["c_src/*.c"]} +]}. + +{port_env, [ + % Development compilation + % {".*", "CFLAGS", "$CFLAGS -g -Wall -Werror -fPIC"} + + % Production compilation + {"(linux|solaris|darwin|freebsd)", "CFLAGS", "$CFLAGS -Wall -Werror -DNDEBUG -O3"}, + {"win32", "CFLAGS", "$CFLAGS /O2 /DNDEBUG /Wall"} +]}. diff --git a/src/khash/src/khash.app.src b/src/khash/src/khash.app.src new file mode 100644 index 000000000..14a45c77d --- /dev/null +++ b/src/khash/src/khash.app.src @@ -0,0 +1,10 @@ +%% This file is part of khash released under the MIT license. +%% See the LICENSE file for more information. +%% Copyright 2013 Cloudant, Inc <support@cloudant.com> + +{application, khash, [ + {description, "A NIF wrapper for Kazlib's hash structure."}, + {vsn, git}, + {registered, []}, + {applications, [kernel]} +]}. diff --git a/src/khash/src/khash.erl b/src/khash/src/khash.erl new file mode 100644 index 000000000..1db4c4cdb --- /dev/null +++ b/src/khash/src/khash.erl @@ -0,0 +1,136 @@ +%% This file is part of khash released under the MIT license. +%% See the LICENSE file for more information. +%% Copyright 2013 Cloudant, Inc <support@cloudant.com> + +-module(khash). +-on_load(init/0). + +-export([ + new/0, + new/1, + from_list/1, + from_list/2, + to_list/1, + clear/1, + lookup/2, + get/2, + get/3, + put/3, + del/2, + size/1, + iter/1, + iter_next/1, + fold/3 +]). + +-define(NOT_LOADED, not_loaded(?LINE)). + +-type kv() :: {any(), any()}. +-type khash() :: term(). +-type khash_iter() :: term(). +-type option() :: []. + +-spec new() -> {ok, khash()}. +new() -> + new([]). + +-spec new([option()]) -> {ok, khash()}. +new(_Options) -> + ?NOT_LOADED. + +-spec from_list([kv()]) -> {ok, khash()}. +from_list(KVList) -> + from_list(KVList, []). + +-spec from_list([kv()], [option()]) -> {ok, khash()}. +from_list(KVList, Options) -> + {ok, Hash} = ?MODULE:new(Options), + lists:foreach( + fun({Key, Val}) -> + ?MODULE:put(Hash, Key, Val) + end, + KVList + ), + {ok, Hash}. + +-spec to_list(khash()) -> [kv()]. +to_list(_Hash) -> + ?NOT_LOADED. + +-spec clear(khash()) -> ok. +clear(_Hash) -> + ?NOT_LOADED. + +-spec lookup(khash(), any()) -> {value, any()} | not_found. +lookup(Hash, Key) -> + lookup_int(Hash, erlang:phash2(Key), Key). + +-spec get(khash(), any()) -> any(). +get(Hash, Key) -> + get(Hash, Key, undefined). + +-spec get(khash(), any(), any()) -> any(). +get(Hash, Key, Default) -> + get_int(Hash, erlang:phash2(Key), Key, Default). + +-spec put(khash(), any(), any()) -> ok. +put(Hash, Key, Value) -> + put_int(Hash, erlang:phash2(Key), Key, Value). + +-spec del(khash(), any()) -> ok. +del(Hash, Key) -> + del_int(Hash, erlang:phash2(Key), Key). + +-spec size(khash()) -> non_neg_integer(). +size(_Hash) -> + ?NOT_LOADED. + +-spec iter(khash()) -> {ok, khash_iter()}. +iter(_Hash) -> + ?NOT_LOADED. + +-spec iter_next(khash_iter()) -> + kv() | end_of_table | {error, expired_iterator}. +iter_next(_Iter) -> + ?NOT_LOADED. + +-spec fold(khash(), fun(), any()) -> any(). +fold(Hash, FoldFun, Acc) -> + {ok, Iter} = ?MODULE:iter(Hash), + fold_int(Iter, FoldFun, Acc). + +fold_int(Iter, FoldFun, Acc) -> + case ?MODULE:iter_next(Iter) of + {Key, Value} -> + NewAcc = FoldFun(Key, Value, Acc), + fold_int(Iter, FoldFun, NewAcc); + end_of_table -> + Acc + end. + +init() -> + PrivDir = + case code:priv_dir(?MODULE) of + {error, _} -> + EbinDir = filename:dirname(code:which(?MODULE)), + AppPath = filename:dirname(EbinDir), + filename:join(AppPath, "priv"); + Path -> + Path + end, + erlang:load_nif(filename:join(PrivDir, "khash"), 0). + +lookup_int(_Hash, _HashValue, _Key) -> + ?NOT_LOADED. + +get_int(_Hash, _HashValue, _Key, _Default) -> + ?NOT_LOADED. + +put_int(_Hash, _HashValue, _Key, _Value) -> + ?NOT_LOADED. + +del_int(_Hash, _HashValue, _Key) -> + ?NOT_LOADED. + +not_loaded(Line) -> + erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}). diff --git a/src/khash/test/gen_term.erl b/src/khash/test/gen_term.erl new file mode 100644 index 000000000..d4244f11f --- /dev/null +++ b/src/khash/test/gen_term.erl @@ -0,0 +1,113 @@ +%% This file is part of khash released under the MIT license. +%% See the LICENSE file for more information. +%% Copyright 2013 Cloudant, Inc <support@cloudant.com> + +-module(gen_term). + +-export([ + any/0, + any/1, + + gen_atom/1, + gen_integer/1, + gen_float/1, + gen_reference/1, + gen_port/1, + gen_pid/1, + gen_tuple/1, + gen_list/1, + gen_short_string/1, + gen_string/1, + gen_binary/1, + gen_bitstring/1, + gen_bignum/1, + gen_function/1 +]). + +any() -> + any(16). + +any(MaxSize) when MaxSize =< 0 -> + Fun = choice(value_types()), + ?MODULE:Fun(MaxSize); +any(MaxSize) -> + Fun = choice(all_types()), + ?MODULE:Fun(MaxSize). + +gen_atom(MaxSize) -> + list_to_atom(gen_short_string(MaxSize)). + +gen_integer(_) -> + Value = + case rand:uniform() < 0.5 of + true -> rand:uniform(127); + false -> rand:uniform(16#FFFFFFFF) + end, + case rand:uniform() < 0.5 of + true -> -1 * Value; + false -> Value + end. + +gen_float(_) -> + rand:uniform() * float(16#FFFFFFFF). + +gen_reference(_) -> + erlang:make_ref(). + +gen_port(_) -> + Ports = erlang:ports(), + lists:nth(rand:uniform(length(Ports)), Ports). + +gen_pid(_) -> + Pids = erlang:processes(), + lists:nth(rand:uniform(length(Pids)), Pids). + +gen_tuple(MaxSize) -> + list_to_tuple(gen_list(MaxSize)). + +gen_list(MaxSize) -> + Width = rand:uniform(MaxSize), + [any(MaxSize - Width) || _ <- lists:seq(1, Width)]. + +gen_short_string(_) -> + Size = rand:uniform(255), + [rand:uniform(127) || _ <- lists:seq(1, Size)]. + +gen_string(_) -> + Size = rand:uniform(4096), + [rand:uniform(127) || _ <- lists:seq(1, Size)]. + +gen_binary(MaxSize) -> + list_to_binary(gen_string(MaxSize)). + +gen_bitstring(MaxSize) -> + B = gen_binary(MaxSize), + <<2:4/integer, B/binary>>. + +gen_bignum(_) -> + 16#FFFFFFFFFFFFFFFF + rand:uniform(16#FFFFFFFF). + +gen_function(_) -> + choice(all_types()). + +choice(Options) -> + lists:nth(rand:uniform(length(Options)), Options). + +value_types() -> + [ + gen_atom, + gen_integer, + gen_float, + gen_reference, + gen_port, + gen_pid, + gen_short_string, + gen_string, + gen_binary, + gen_bitstring, + gen_bignum, + gen_function + ]. + +all_types() -> + value_types() ++ [gen_tuple, gen_list]. diff --git a/src/khash/test/khash_test.erl b/src/khash/test/khash_test.erl new file mode 100644 index 000000000..09e23a14e --- /dev/null +++ b/src/khash/test/khash_test.erl @@ -0,0 +1,374 @@ +-module(khash_test). + +-export([ + khash_fetch/0, + khash_store/0, + run_keys/1 +]). + +-include_lib("eunit/include/eunit.hrl"). + +-define(NUM_RAND_CYCLES, 10000). +-define(NUM_CYCLES, 1000000). +-define(NUM_KVS, 5000). +-define(TIMEOUT, 15). + +load_test_() -> + { + "Loaded khash", + ?_assertEqual({module, khash}, code:load_file(khash)) + }. + +basic_test_() -> + { + "khash basic operations", + {setup, local, fun() -> khash:new() end, fun({ok, _}) -> ok end, fun({ok, C}) -> + [ + { + "Lookup missing is ok", + ?_assertEqual(not_found, khash:lookup(C, <<"foo">>)) + }, + { + "Get missing is ok", + ?_assertEqual(undefined, khash:get(C, <<"foo">>)) + }, + { + "Del missing is ok", + ?_assertEqual(not_found, khash:del(C, <<"foo">>)) + }, + { + "Stored a key", + ?_assertEqual(ok, khash:put(C, <<"foo">>, bar)) + }, + { + "Lookuped a key", + ?_assertEqual({value, bar}, khash:lookup(C, <<"foo">>)) + }, + { + "Retrieved a key", + ?_assertEqual(bar, khash:get(C, <<"foo">>)) + }, + { + "Stored a key", + ?_assertEqual(ok, khash:put(C, <<"bar">>, foo)) + }, + { + "Correct size for hash", + ?_assertEqual(2, khash:size(C)) + }, + { + "Deleted a key", + ?_assertEqual(ok, khash:del(C, <<"foo">>)) + }, + { + "Correct size after delete", + ?_assertEqual(1, khash:size(C)) + }, + { + "Cleared the hash", + ?_assertEqual(ok, khash:clear(C)) + }, + { + "Correct size after clear", + ?_assertEqual(0, khash:size(C)) + } + ] + end} + }. + +randomized_test_() -> + { + "khash randomized test", + {setup, local, + fun() -> + Dict = dict:new(), + {ok, KHash} = khash:new(), + Actions = [ + {0.1, fun run_clear/1}, + {1.0, fun run_get2/1}, + {1.0, fun run_get3/1}, + {1.0, fun run_put/1}, + {1.0, fun run_del/1}, + {0.5, fun run_size/1}, + % {0.3, fun run_keys/1}, + {0.3, fun run_to_list/1} + ], + {ok, Actions, ?NUM_RAND_CYCLES, {Dict, KHash}} + end, + fun(State) -> + {timeout, ?TIMEOUT, { + "State matches dict implementation", + ?_assert(run_randomized(State, true)) + }} + end} + }. + +basic_iterators_test_() -> + { + "khash itrators basics operations", + {setup, local, + fun() -> + {ok, H} = khash:new(), + khash:put(H, foo, bar), + {ok, I} = khash:iter(H), + {ok, H, I} + end, + fun({ok, H, I}) -> + [ + { + "Got only kv pair as first element", + ?_assertEqual(khash:iter_next(I), {foo, bar}) + }, + { + "Only the one kv pair exists", + ?_assertEqual(khash:iter_next(I), end_of_table) + }, + { + "Fold works", + ?_test(begin + Fold = fun(K, V, Acc) -> [{K, V} | Acc] end, + ?assertEqual(khash:fold(H, Fold, []), [{foo, bar}]) + end) + } + ] + end} + }. + +multiread_iterators_test_() -> + { + "khash iterators multi-read test", + {setup, local, + fun() -> + {ok, H} = khash:new(), + KVs = kv_data(10), + lists:foreach(fun({K, V}) -> khash:put(H, K, V) end, KVs), + {ok, I} = khash:iter(H), + {ok, I, KVs} + end, + fun({ok, I, KVs}) -> + { + "Read the same exact key/val pairs", + ?_assertEqual(khash_iterate(I, []), KVs) + } + end} + }. + +expiration_iterators_test_() -> + { + "khash iterators exipiration functions", + {foreach, local, + fun() -> + Err = {error, expired_iterator}, + {ok, H} = khash:new(), + khash:put(H, foo, bar), + {ok, I} = khash:iter(H), + {ok, H, I, Err} + end, + [ + fun({ok, H, I, Err}) -> + khash:put(H, foo, bar2), + { + "put expires iterators", + ?_assertEqual(khash:iter_next(I), Err) + } + end, + fun({ok, H, I, Err}) -> + khash:del(H, foo), + { + "del expires iterators", + ?_assertEqual(khash:iter_next(I), Err) + } + end, + fun({ok, H, I, Err}) -> + khash:clear(H), + { + "clear expires iterators", + ?_assertEqual(khash:iter_next(I), Err) + } + end + ]} + }. + +no_expiration_iterators_test_() -> + { + "khash iterators no exipiration functions", + {foreach, local, + fun() -> + Err = {error, expired_iterator}, + {ok, H} = khash:new(), + khash:put(H, foo, bar), + {ok, I} = khash:iter(H), + {ok, H, I, Err} + end, + [ + fun({ok, H, I, Err}) -> + [{foo, bar}] = khash:to_list(H), + { + "to_list doesn't expire iterators", + ?_assertNot(khash:iter_next(I) == Err) + } + end, + fun({ok, H, I, Err}) -> + {value, bar} = khash:lookup(H, foo), + { + "lookup doesn't expire iterators", + ?_assertNot(khash:iter_next(I) == Err) + } + end, + fun({ok, H, I, Err}) -> + bar = khash:get(H, foo), + { + "get doesn't expire iterators", + ?_assertNot(khash:iter_next(I) == Err) + } + end, + fun({ok, H, I, Err}) -> + 1 = khash:size(H), + { + "size doesn't expire iterators", + ?_assertNot(khash:iter_next(I) == Err) + } + end, + fun({ok, H, I, Err}) -> + {ok, _OtherI} = khash:iter(H), + {foo, bar} = khash:iter_next(I), + end_of_table = khash:iter_next(I), + { + "iteration doesn't expire iterators", + ?_assertNot(khash:iter_next(I) == Err) + } + end + ]} + }. + +khash_fetch() -> + erlang:garbage_collect(), + List = kv_data(?NUM_KVS), + {ok, KHash} = khash:from_list(List), + khash_fetch(KHash, ?NUM_CYCLES * 10). + +khash_fetch(_, 0) -> + ok; +khash_fetch(KHash, NumCycles) -> + ?assertMatch(1, khash:get(KHash, 1)), + khash_fetch(KHash, NumCycles - 1). + +khash_store() -> + List = kv_data(?NUM_KVS * 2), + {ok, KHash} = khash:from_list(List), + khash_store(KHash, ?NUM_CYCLES). + +khash_store(_, 0) -> + ok; +khash_store(KHash, NumCycles) -> + khash:put(KHash, 1, 1), + khash_store(KHash, NumCycles - 1). + +khash_iterate(I, Acc) -> + case khash:iter_next(I) of + {K, V} -> + khash_iterate(I, [{K, V} | Acc]); + end_of_table -> + lists:sort(Acc) + end. + +kv_data(NumKVs) -> + [{I, I} || I <- lists:seq(1, NumKVs)]. + +run_randomized({ok, _, N, _}, Valid) when N =< 0 -> + Valid; +run_randomized({ok, Actions, N, S0}, Valid) -> + Action = weighted_choice(Actions), + S1 = Action(S0), + Assertion = Valid andalso validate_randomized_state(S1), + run_randomized({ok, Actions, N - 1, S1}, Assertion). + +validate_randomized_state({D, H}) -> + DKVs = lists:sort(dict:to_list(D)), + HKVs = lists:sort(khash:to_list(H)), + DKVs == HKVs. + +run_clear({_D0, H}) -> + ?assertMatch(ok, khash:clear(H)), + {dict:new(), H}. + +run_get2({D, H}) -> + K = random_key(D), + case dict:find(K, D) of + {ok, Value} -> + ?assertMatch({value, Value}, khash:lookup(H, K)), + ?assertMatch(Value, khash:get(H, K)); + error -> + ?assertMatch(not_found, khash:lookup(H, K)), + ?assertMatch(undefined, khash:get(H, K)) + end, + {D, H}. + +run_get3({D, H}) -> + K = random_key(D), + case dict:find(K, D) of + {ok, Value} -> + ?assertMatch({value, Value}, khash:lookup(H, K)), + ?assertMatch(Value, khash:get(H, K)); + error -> + Val = random_val(), + ?assertMatch(Val, khash:get(H, K, Val)) + end, + {D, H}. + +run_put({D0, H}) -> + K = random_key(D0), + V = random_val(), + D1 = dict:store(K, V, D0), + ?assertMatch(ok, khash:put(H, K, V)), + {D1, H}. + +run_del({D0, H}) -> + K = random_key(D0), + D1 = + case dict:is_key(K, D0) of + true -> + ?assertMatch(ok, khash:del(H, K)), + dict:erase(K, D0); + false -> + ?assertMatch(not_found, khash:del(H, K)), + D0 + end, + {D1, H}. + +run_size({D, H}) -> + ?assertEqual(dict:size(D), khash:size(H)), + {D, H}. + +run_keys({D, H}) -> + DKeys = dict:fetch_keys(D), + {ok, HKeys0} = khash:keys(H), + HKeys = lists:sort(HKeys0), + ?assertEqual(lists:sort(DKeys), lists:sort(HKeys)), + {D, H}. + +run_to_list({D, H}) -> + DKVs = dict:to_list(D), + HKVs = khash:to_list(H), + ?assertEqual(lists:sort(DKVs), lists:sort(HKVs)), + {D, H}. + +weighted_choice(Items0) -> + Items = lists:sort(Items0), + Sum = lists:sum([W || {W, _} <- Items]), + Choice = rand:uniform() * Sum, + weighted_choice(Items, 0.0, Choice). + +weighted_choice([], _, _) -> + throw(bad_choice); +weighted_choice([{W, _} | Rest], S, C) when W + S < C -> + weighted_choice(Rest, W + S, C); +weighted_choice([{_, I} | _], _, _) -> + I. + +random_key(D) -> + Keys = lists:usort(dict:fetch_keys(D) ++ [foo]), + lists:nth(rand:uniform(length(Keys)), Keys). + +random_val() -> + gen_term:any(). |