summaryrefslogtreecommitdiff
path: root/snappy.cc
diff options
context:
space:
mode:
authorSnappy Team <no-reply@google.com>2020-03-27 23:17:50 +0000
committerVictor Costan <costan@google.com>2020-04-11 04:41:07 +0000
commit4dfcad9f4e7e3ee0a226758e3b8ebc5b57098dbe (patch)
tree55d89ac03e696292ef4aa03f1de380cb73e13524 /snappy.cc
parente19178748f6dd4bb9380b69efd95ab104ed24d0f (diff)
downloadsnappy-git-4dfcad9f4e7e3ee0a226758e3b8ebc5b57098dbe.tar.gz
assertion failure on darwin_x86_64, have to investigage
PiperOrigin-RevId: 303428229
Diffstat (limited to 'snappy.cc')
-rw-r--r--snappy.cc25
1 files changed, 16 insertions, 9 deletions
diff --git a/snappy.cc b/snappy.cc
index 6bc3651..0807f13 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -385,11 +385,18 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) {
assert(offset < 65536);
assert(len_less_than_12 == (len < 12));
- if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
- // offset fits in 11 bits. The 3 highest go in the top of the first byte,
- // and the rest go in the second byte.
- *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
- *op++ = offset & 0xff;
+ if (len_less_than_12) {
+ uint32 u = (len << 2) + (offset << 8);
+ uint32 copy1 = COPY_1_BYTE_OFFSET - (4 << 2) + ((offset >> 3) & 0xe0);
+ uint32 copy2 = COPY_2_BYTE_OFFSET - (1 << 2);
+ // It turns out that offset < 2048 is a difficult to predict branch.
+ // `perf record` shows this is the highest percentage of branch misses in
+ // benchmarks. This code produces branch free code, the data dependency
+ // chain that bottlenecks the throughput is so long that a few extra
+ // instructions are completely free (IPC << 6 because of data deps).
+ u += offset < 2048 ? copy1 : copy2;
+ LittleEndian::Store32(op, u);
+ op += offset < 2048 ? 2 : 3;
} else {
// Write 4 bytes, though we only care about 3 of them. The output buffer
// is required to have some slack, so the extra byte won't overrun it.
@@ -615,7 +622,7 @@ char* CompressFragment(const char* input,
// "literal bytes" prior to ip.
const char* base = ip;
std::pair<size_t, bool> p =
- FindMatchLength(candidate + 4, ip + 4, ip_end);
+ FindMatchLength(candidate + 4, ip + 4, ip_end, &data);
size_t matched = 4 + p.first;
ip += matched;
size_t offset = base - candidate;
@@ -629,12 +636,12 @@ char* CompressFragment(const char* input,
if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
+ assert(LittleEndian::Load64(ip) == data);
// We are now looking for a 4-byte match again. We read
// table[Hash(ip, shift)] for that. To improve compression,
// we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
- data = LittleEndian::Load64(ip - 1);
- table[HashBytes(data, shift)] = ip - base_ip - 1;
- data >>= 8;
+ table[HashBytes(LittleEndian::Load32(ip - 1), shift)] =
+ ip - base_ip - 1;
uint32 hash = HashBytes(data, shift);
candidate = base_ip + table[hash];
table[hash] = ip - base_ip;