assertion failure on darwin_x86_64, have to investigage

PiperOrigin-RevId: 303428229
author: Snappy Team <no-reply@google.com> 2020-03-27 23:17:50 +0000
committer: Victor Costan <costan@google.com> 2020-04-11 04:41:07 +0000
commit: 4dfcad9f4e7e3ee0a226758e3b8ebc5b57098dbe (patch)
tree: 55d89ac03e696292ef4aa03f1de380cb73e13524 /snappy.cc
parent: e19178748f6dd4bb9380b69efd95ab104ed24d0f (diff)
download: snappy-git-4dfcad9f4e7e3ee0a226758e3b8ebc5b57098dbe.tar.gz
1 files changed, 16 insertions, 9 deletions
diff --git a/snappy.cc b/snappy.cc
index 6bc3651..0807f13 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -385,11 +385,18 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) {
   assert(offset < 65536);
   assert(len_less_than_12 == (len < 12));
 
-  if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
-    // offset fits in 11 bits.  The 3 highest go in the top of the first byte,
-    // and the rest go in the second byte.
-    *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
-    *op++ = offset & 0xff;
+  if (len_less_than_12) {
+    uint32 u = (len << 2) + (offset << 8);
+    uint32 copy1 = COPY_1_BYTE_OFFSET - (4 << 2) + ((offset >> 3) & 0xe0);
+    uint32 copy2 = COPY_2_BYTE_OFFSET - (1 << 2);
+    // It turns out that offset < 2048 is a difficult to predict branch.
+    // `perf record` shows this is the highest percentage of branch misses in
+    // benchmarks. This code produces branch free code, the data dependency
+    // chain that bottlenecks the throughput is so long that a few extra
+    // instructions are completely free (IPC << 6 because of data deps).
+    u += offset < 2048 ? copy1 : copy2;
+    LittleEndian::Store32(op, u);
+    op += offset < 2048 ? 2 : 3;
   } else {
     // Write 4 bytes, though we only care about 3 of them.  The output buffer
     // is required to have some slack, so the extra byte won't overrun it.
@@ -615,7 +622,7 @@ char* CompressFragment(const char* input,
         // "literal bytes" prior to ip.
         const char* base = ip;
         std::pair<size_t, bool> p =
-            FindMatchLength(candidate + 4, ip + 4, ip_end);
+            FindMatchLength(candidate + 4, ip + 4, ip_end, &data);
         size_t matched = 4 + p.first;
         ip += matched;
         size_t offset = base - candidate;
@@ -629,12 +636,12 @@ char* CompressFragment(const char* input,
         if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
           goto emit_remainder;
         }
+        assert(LittleEndian::Load64(ip) == data);
         // We are now looking for a 4-byte match again.  We read
         // table[Hash(ip, shift)] for that.  To improve compression,
         // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
-        data = LittleEndian::Load64(ip - 1);
-        table[HashBytes(data, shift)] = ip - base_ip - 1;
-        data >>= 8;
+        table[HashBytes(LittleEndian::Load32(ip - 1), shift)] =
+            ip - base_ip - 1;
         uint32 hash = HashBytes(data, shift);
         candidate = base_ip + table[hash];
         table[hash] = ip - base_ip;
author	Snappy Team <no-reply@google.com>	2020-03-27 23:17:50 +0000
committer	Victor Costan <costan@google.com>	2020-04-11 04:41:07 +0000
commit	4dfcad9f4e7e3ee0a226758e3b8ebc5b57098dbe (patch)
tree	55d89ac03e696292ef4aa03f1de380cb73e13524 /snappy.cc
parent	e19178748f6dd4bb9380b69efd95ab104ed24d0f (diff)
download	snappy-git-4dfcad9f4e7e3ee0a226758e3b8ebc5b57098dbe.tar.gz