From afb505724cb1e9376be8908bb2e43db64e789771 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Thu, 26 Feb 2015 09:54:00 -0800 Subject: test-hash: Test hash_bytes128() with single 128-bit word. This commit adds a new test for hash_bytes128() using single 128-bit word. The test shows that there is no collision in all 19 consecutive bits checks, which indicates the hash function is good. Signed-off-by: Alex Wang Acked-by: Joe Stringer --- tests/test-hash.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 12 deletions(-) (limited to 'tests/test-hash.c') diff --git a/tests/test-hash.c b/tests/test-hash.c index 5032d323b..317466e62 100644 --- a/tests/test-hash.c +++ b/tests/test-hash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2012, 2014 Nicira, Inc. + * Copyright (c) 2009, 2012, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,22 +35,30 @@ set_bit(uint32_t array[3], int bit) } } +/* When bit == n_bits, the function just 0 sets the 'values'. */ static void -set_bit128(ovs_u128 array[16], int bit) +set_bit128(ovs_u128 *values, int bit, int n_bits) { assert(bit >= 0 && bit <= 2048); - memset(array, 0, sizeof(ovs_u128) * 16); - if (bit < 2048) { + memset(values, 0, n_bits/8); + if (bit < n_bits) { int b = bit % 128; if (b < 64) { - array[bit / 128].u64.lo = UINT64_C(1) << (b % 64); + values[bit / 128].u64.lo = UINT64_C(1) << (b % 64); } else { - array[bit / 128].u64.hi = UINT64_C(1) << (b % 64); + values[bit / 128].u64.hi = UINT64_C(1) << (b % 64); } } } +static uint64_t +get_range128(ovs_u128 *value, int ofs, uint64_t mask) +{ + return ((ofs < 64 ? (value->u64.lo >> ofs) : 0) & mask) + | ((ofs <= 64 ? (value->u64.hi << (64 - ofs)) : (value->u64.hi >> (ofs - 64)) & mask)); +} + static uint32_t hash_words_cb(uint32_t input) { @@ -134,12 +142,64 @@ check_3word_hash(uint32_t (*hash)(const uint32_t[], size_t, uint32_t), } } +static void +check_hash_bytes128(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *), + const char *name, const int min_unique) +{ + const uint64_t unique_mask = (UINT64_C(1) << min_unique) - 1; + const int n_bits = sizeof(ovs_u128) * 8; + int i, j; + + for (i = 0; i <= n_bits; i++) { + OVS_PACKED(struct offset_ovs_u128 { + uint32_t a; + ovs_u128 b; + }) in0_data; + ovs_u128 *in0, in1; + ovs_u128 out0, out1; + + in0 = &in0_data.b; + set_bit128(in0, i, n_bits); + set_bit128(&in1, i, n_bits); + hash(in0, sizeof(ovs_u128), 0, &out0); + hash(&in1, sizeof(ovs_u128), 0, &out1); + if (!ovs_u128_equal(&out0, &out1)) { + printf("%s hash not the same for non-64 aligned data " + "%016"PRIx64"%016"PRIx64" != %016"PRIx64"%016"PRIx64"\n", + name, out0.u64.lo, out0.u64.hi, out1.u64.lo, out1.u64.hi); + } + + for (j = i + 1; j <= n_bits; j++) { + ovs_u128 in2; + ovs_u128 out2; + int ofs; + + set_bit128(&in2, j, n_bits); + hash(&in2, sizeof(ovs_u128), 0, &out2); + for (ofs = 0; ofs < 128 - min_unique; ofs++) { + uint64_t bits1 = get_range128(&out1, ofs, unique_mask); + uint64_t bits2 = get_range128(&out2, ofs, unique_mask); + + if (bits1 == bits2) { + printf("%s has a partial collision:\n", name); + printf("hash(1 << %d) == %016"PRIx64"%016"PRIx64"\n", + i, out1.u64.hi, out1.u64.lo); + printf("hash(1 << %d) == %016"PRIx64"%016"PRIx64"\n", + j, out2.u64.hi, out2.u64.lo); + printf("%d bits of output starting at bit %d " + "are both 0x%016"PRIx64"\n", min_unique, ofs, bits1); + } + } + } + } +} + static void check_256byte_hash(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *), const char *name, const int min_unique) { const uint64_t unique_mask = (UINT64_C(1) << min_unique) - 1; - const int n_bits = 256 * 8; + const int n_bits = sizeof(ovs_u128) * 8 * 16; int i, j; for (i = 0; i <= n_bits; i++) { @@ -151,10 +211,8 @@ check_256byte_hash(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *), ovs_u128 out0, out1; in0 = in0_data.b; - /* When, i or j == n_bits, the set_bit128() will just - * zero set the input variables. */ - set_bit128(in0, i); - set_bit128(in1, i); + set_bit128(in0, i, n_bits); + set_bit128(in1, i, n_bits); hash(in0, sizeof(ovs_u128) * 16, 0, &out0); hash(in1, sizeof(ovs_u128) * 16, 0, &out1); if (!ovs_u128_equal(&out0, &out1)) { @@ -167,7 +225,7 @@ check_256byte_hash(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *), ovs_u128 in2[16]; ovs_u128 out2; - set_bit128(in2, j); + set_bit128(in2, j, n_bits); hash(in2, sizeof(ovs_u128) * 16, 0, &out2); if ((out1.u64.lo & unique_mask) == (out2.u64.lo & unique_mask)) { printf("%s has a partial collision:\n", name); @@ -241,6 +299,25 @@ test_hash_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) */ check_word_hash(hash_int_cb, "hash_int", 12); + /* Check that all hashes computed with hash_bytes128 with one 1-bit (or no + * 1-bits) set within a single 128-bit word have different values in all + * 19-bit consecutive runs. + * + * Given a random distribution, the probability of at least one collision + * in any set of 19 bits is approximately + * + * 1 - ((2**19 - 1)/2**19)**C(129,2) + * == 1 - (524,287/524,288)**8256 + * =~ 0.0156 + * + * There are 111 ways to pick 19 consecutive bits in a 128-bit word, so if + * we assumed independence then the chance of having no collisions in any of + * those 19-bit runs would be (1-0.0156)**111 =~ 0.174. This refutes our + * assumption of independence, which makes it seem like a good hash + * function. + */ + check_hash_bytes128(hash_bytes128, "hash_bytes128", 19); + /* Check that all hashes computed with hash_bytes128 with 1-bit (or no * 1-bits) set within 16 128-bit words have different values in their * lowest 23 bits. -- cgit v1.2.1