Change hash function for Compress.

((a*b)>>18) & mask has higher throughput than (a*b)>>shift, and produces the same results when the hash table size is 2**14. In other cases, the hash function is still good, but it's not as necessary for that to be the case as the input is small anyway. This speeds up in encoding, especially in cases where hashing is a significant part of the encoding critical path (small or uncompressible files). PiperOrigin-RevId: 341498741
author: Luca Versari <veluca@google.com> 2020-11-09 23:32:45 +0000
committer: Victor Costan <costan@google.com> 2020-11-18 23:20:58 +0000
commit: 6835abd953cd66c6cf7716dd8347e9c5f245662a (patch)
tree: 78b3fced1d08f056a55a7dc09cbd07b1438c73da /snappy.cc
parent: 368b01c8dd29fee6c00775800ec7c2b792ad5482 (diff)
download: snappy-git-6835abd953cd66c6cf7716dd8347e9c5f245662a.tar.gz
1 files changed, 10 insertions, 12 deletions
diff --git a/snappy.cc b/snappy.cc
index 29e1e8e..41776a9 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -91,9 +91,9 @@ using internal::LITERAL;
 // compression for compressible input, and more speed for incompressible
 // input. Of course, it doesn't hurt if the hash function is reasonably fast
 // either, as it gets called a lot.
-static inline uint32_t HashBytes(uint32_t bytes, int shift) {
-  uint32_t kMul = 0x1e35a7bd;
-  return (bytes * kMul) >> shift;
+static inline uint32_t HashBytes(uint32_t bytes, uint32_t mask) {
+  constexpr uint32_t kMagic = 0x1e35a7bd;
+  return ((kMagic * bytes) >> (32 - kMaxHashTableBits)) & mask;
 }
 
 size_t MaxCompressedLength(size_t source_bytes) {
@@ -260,7 +260,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
       if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
     }
     return IncrementalCopySlow(src, op, op_limit);
-#else  // !SNAPPY_HAVE_SSSE3
+#else   // !SNAPPY_HAVE_SSSE3
     // If plenty of buffer space remains, expand the pattern to at least 8
     // bytes. The way the following loop is written, we need 8 bytes of buffer
     // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10
@@ -510,8 +510,7 @@ char* CompressFragment(const char* input, size_t input_size, char* op,
   const char* ip = input;
   assert(input_size <= kBlockSize);
   assert((table_size & (table_size - 1)) == 0);  // table must be power of two
-  const int shift = 32 - Bits::Log2Floor(table_size);
-  assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
+  const uint32_t mask = table_size - 1;
   const char* ip_end = input + input_size;
   const char* base_ip = ip;
 
@@ -562,7 +561,7 @@ char* CompressFragment(const char* input, size_t input_size, char* op,
             // loaded in preload.
             uint32_t dword = i == 0 ? preload : static_cast<uint32_t>(data);
             assert(dword == LittleEndian::Load32(ip + i));
-            uint32_t hash = HashBytes(dword, shift);
+            uint32_t hash = HashBytes(dword, mask);
             candidate = base_ip + table[hash];
             assert(candidate >= base_ip);
             assert(candidate < ip + i);
@@ -583,7 +582,7 @@ char* CompressFragment(const char* input, size_t input_size, char* op,
       }
       while (true) {
         assert(static_cast<uint32_t>(data) == LittleEndian::Load32(ip));
-        uint32_t hash = HashBytes(data, shift);
+        uint32_t hash = HashBytes(data, mask);
         uint32_t bytes_between_hash_lookups = skip >> 5;
         skip += bytes_between_hash_lookups;
         const char* next_ip = ip + bytes_between_hash_lookups;
@@ -642,10 +641,9 @@ char* CompressFragment(const char* input, size_t input_size, char* op,
                (LittleEndian::Load64(ip) & 0xFFFFFFFFFF));
         // We are now looking for a 4-byte match again.  We read
         // table[Hash(ip, shift)] for that.  To improve compression,
-        // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
-        table[HashBytes(LittleEndian::Load32(ip - 1), shift)] =
-            ip - base_ip - 1;
-        uint32_t hash = HashBytes(data, shift);
+        // we also update table[Hash(ip - 1, mask)] and table[Hash(ip, mask)].
+        table[HashBytes(LittleEndian::Load32(ip - 1), mask)] = ip - base_ip - 1;
+        uint32_t hash = HashBytes(data, mask);
         candidate = base_ip + table[hash];
         table[hash] = ip - base_ip;
         // Measurements on the benchmarks have shown the following probabilities
author	Luca Versari <veluca@google.com>	2020-11-09 23:32:45 +0000
committer	Victor Costan <costan@google.com>	2020-11-18 23:20:58 +0000
commit	6835abd953cd66c6cf7716dd8347e9c5f245662a (patch)
tree	78b3fced1d08f056a55a7dc09cbd07b1438c73da /snappy.cc
parent	368b01c8dd29fee6c00775800ec7c2b792ad5482 (diff)
download	snappy-git-6835abd953cd66c6cf7716dd8347e9c5f245662a.tar.gz