diff options
author | nafi <nafi@google.com> | 2018-12-03 17:27:56 -0800 |
---|---|---|
committer | Victor Costan <pwnall@chromium.org> | 2019-01-04 19:08:30 -0800 |
commit | eb47f79631fc2e23739a5b09c0d46497fd4b95d1 (patch) | |
tree | 6d7cfa963a6e9dda681d06d7270ec0310e5a3694 /snappy.cc | |
parent | 254966c71e0d6e44e432e03dc8f9121816868ac6 (diff) | |
download | snappy-git-eb47f79631fc2e23739a5b09c0d46497fd4b95d1.tar.gz |
Optimize by about 0.5%.
How? Move boolean args of EmitLiteral, EmitCopyAtMost64 and EmitCopy to
template args so that compiler generates two separate pruned versions of
the functions for arg=true and arg=false. FWIW, CompressFragment
function calls 1) EmitLiteral inside from a 1-level loop and 2) EmitCopy
from a 2-level nested loop. CompressFragment is called from inside
another while-loop from the public 'Compress' function.
name old time/op new time/op delta
BM_UFlat/0 [html ] 41.9µs ± 0% 41.1µs ± 0% -1.92% (p=0.000 n=10+10)
BM_UFlat/1 [urls ] 576µs ± 0% 572µs ± 0% -0.68% (p=0.000 n=10+10)
BM_UFlat/2 [jpg ] 7.25µs ± 6% 7.13µs ± 1% ~ (p=0.074 n=9+8)
BM_UFlat/3 [jpg_200 ] 132ns ± 1% 130ns ± 0% -1.45% (p=0.000 n=10+8)
BM_UFlat/4 [pdf ] 8.27µs ± 3% 8.22µs ± 0% ~ (p=0.277 n=9+8)
BM_UFlat/5 [html4 ] 220µs ± 0% 219µs ± 0% -0.75% (p=0.000 n=10+10)
BM_UFlat/6 [txt1 ] 192µs ± 0% 190µs ± 0% -0.80% (p=0.000 n=10+10)
BM_UFlat/7 [txt2 ] 169µs ± 0% 168µs ± 0% -0.69% (p=0.000 n=10+10)
BM_UFlat/8 [txt3 ] 510µs ± 0% 508µs ± 0% -0.42% (p=0.000 n=10+10)
BM_UFlat/9 [txt4 ] 707µs ± 0% 702µs ± 0% -0.67% (p=0.000 n=10+10)
BM_UFlat/10 [pb ] 38.5µs ± 0% 37.4µs ± 1% -2.84% (p=0.000 n=10+10)
BM_UFlat/11 [gaviota ] 189µs ± 0% 190µs ± 0% +0.55% (p=0.000 n=10+10)
BM_UFlat/12 [cp ] 14.2µs ± 0% 14.1µs ± 0% -0.44% (p=0.000 n=10+10)
BM_UFlat/13 [c ] 7.31µs ± 1% 7.35µs ± 0% +0.54% (p=0.002 n=10+10)
BM_UFlat/14 [lsp ] 2.27µs ± 0% 2.27µs ± 1% ~ (p=0.161 n=9+9)
BM_UFlat/15 [xls ] 905µs ± 0% 903µs ± 0% -0.25% (p=0.000 n=10+10)
BM_UFlat/16 [xls_200 ] 214ns ± 1% 213ns ± 1% -0.57% (p=0.043 n=10+10)
BM_UFlat/17 [bin ] 275µs ± 0% 274µs ± 0% -0.31% (p=0.000 n=10+10)
BM_UFlat/18 [bin_200 ] 102ns ± 5% 101ns ± 3% ~ (p=0.161 n=9+9)
BM_UFlat/19 [sum ] 27.9µs ± 0% 27.2µs ± 0% -2.68% (p=0.000 n=10+10)
BM_UFlat/20 [man ] 2.97µs ± 1% 2.97µs ± 0% ~ (p=0.400 n=9+10)
BM_UValidate/0 [html ] 33.3µs ± 0% 33.7µs ± 0% +1.18% (p=0.000 n=10+10)
BM_UValidate/1 [urls ] 442µs ± 0% 442µs ± 0% ~ (p=0.353 n=10+10)
BM_UValidate/2 [jpg ] 146ns ± 0% 146ns ± 0% ~ (p=0.063 n=10+10)
BM_UValidate/3 [jpg_200 ] 98.4ns ± 0% 98.5ns ± 0% ~ (p=0.184 n=10+10)
BM_UValidate/4 [pdf ] 2.88µs ± 0% 2.90µs ± 1% +0.68% (p=0.000 n=10+10)
BM_UIOVec/0 [html ] 122µs ± 0% 122µs ± 0% -0.39% (p=0.000 n=10+10)
BM_UIOVec/1 [urls ] 1.08ms ± 0% 1.08ms ± 0% ~ (p=0.529 n=10+10)
BM_UIOVec/2 [jpg ] 7.71µs ±11% 7.76µs ± 9% ~ (p=0.853 n=10+10)
BM_UIOVec/3 [jpg_200 ] 327ns ± 0% 328ns ± 0% ~ (p=0.146 n=8+10)
BM_UIOVec/4 [pdf ] 12.1µs ± 1% 12.1µs ± 3% ~ (p=0.315 n=10+10)
BM_UFlatSink/0 [html ] 41.8µs ± 0% 41.0µs ± 0% -1.87% (p=0.000 n=10+9)
BM_UFlatSink/1 [urls ] 576µs ± 0% 572µs ± 0% -0.74% (p=0.000 n=9+10)
BM_UFlatSink/2 [jpg ] 7.58µs ± 8% 7.56µs ± 9% ~ (p=0.739 n=10+10)
BM_UFlatSink/3 [jpg_200 ] 133ns ± 0% 134ns ± 0% +0.60% (p=0.000 n=10+9)
BM_UFlatSink/4 [pdf ] 8.44µs ± 3% 8.30µs ± 1% -1.65% (p=0.029 n=10+10)
BM_UFlatSink/5 [html4 ] 220µs ± 0% 218µs ± 0% -0.81% (p=0.000 n=10+10)
BM_UFlatSink/6 [txt1 ] 192µs ± 0% 190µs ± 0% -0.78% (p=0.000 n=10+10)
BM_UFlatSink/7 [txt2 ] 169µs ± 0% 168µs ± 0% -0.59% (p=0.000 n=10+10)
BM_UFlatSink/8 [txt3 ] 510µs ± 0% 508µs ± 0% -0.39% (p=0.000 n=10+10)
BM_UFlatSink/9 [txt4 ] 707µs ± 0% 703µs ± 0% -0.62% (p=0.000 n=10+10)
BM_UFlatSink/10 [pb ] 38.4µs ± 0% 37.4µs ± 0% -2.62% (p=0.000 n=9+9)
BM_UFlatSink/11 [gaviota ] 189µs ± 0% 190µs ± 0% +0.63% (p=0.000 n=10+10)
BM_UFlatSink/12 [cp ] 14.2µs ± 0% 14.1µs ± 0% -0.27% (p=0.011 n=10+10)
BM_UFlatSink/13 [c ] 7.33µs ± 1% 7.35µs ± 1% ~ (p=0.243 n=10+9)
BM_UFlatSink/14 [lsp ] 2.27µs ± 0% 2.26µs ± 0% -0.39% (p=0.000 n=9+9)
BM_UFlatSink/15 [xls ] 904µs ± 0% 902µs ± 0% -0.28% (p=0.000 n=10+10)
BM_UFlatSink/16 [xls_200 ] 216ns ± 1% 217ns ± 1% ~ (p=0.661 n=10+9)
BM_UFlatSink/17 [bin ] 275µs ± 0% 274µs ± 0% -0.24% (p=0.000 n=8+9)
BM_UFlatSink/18 [bin_200 ] 104ns ± 2% 104ns ± 1% -0.70% (p=0.043 n=9+10)
BM_UFlatSink/19 [sum ] 27.8µs ± 0% 27.1µs ± 0% -2.51% (p=0.000 n=9+10)
BM_UFlatSink/20 [man ] 3.02µs ± 1% 3.00µs ± 1% ~ (p=0.079 n=10+9)
BM_ZFlat/0 [html (22.31 %) ] 126µs ± 0% 126µs ± 0% -0.24% (p=0.000 n=10+10)
BM_ZFlat/1 [urls (47.78 %) ] 1.68ms ± 0% 1.67ms ± 0% -1.06% (p=0.000 n=10+10)
BM_ZFlat/2 [jpg (99.95 %) ] 11.8µs ± 5% 11.6µs ± 5% ~ (p=0.165 n=10+10)
BM_ZFlat/3 [jpg_200 (73.00 %)] 360ns ± 3% 358ns ± 1% ~ (p=0.762 n=10+8)
BM_ZFlat/4 [pdf (83.30 %) ] 14.8µs ± 2% 14.6µs ± 1% -1.57% (p=0.022 n=10+9)
BM_ZFlat/5 [html4 (22.52 %) ] 556µs ± 0% 552µs ± 0% -0.87% (p=0.000 n=10+10)
BM_ZFlat/6 [txt1 (57.88 %) ] 542µs ± 0% 540µs ± 0% -0.47% (p=0.000 n=10+10)
BM_ZFlat/7 [txt2 (61.91 %) ] 483µs ± 0% 480µs ± 0% -0.62% (p=0.000 n=10+10)
BM_ZFlat/8 [txt3 (54.99 %) ] 1.45ms ± 0% 1.44ms ± 0% -0.47% (p=0.000 n=10+10)
BM_ZFlat/9 [txt4 (66.26 %) ] 1.98ms ± 0% 1.97ms ± 0% -0.19% (p=0.007 n=10+10)
BM_ZFlat/10 [pb (19.68 %) ] 111µs ± 0% 109µs ± 0% -1.75% (p=0.000 n=10+10)
BM_ZFlat/11 [gaviota (37.72 %)] 411µs ± 0% 410µs ± 0% -0.21% (p=0.004 n=10+10)
BM_ZFlat/12 [cp (48.12 %) ] 45.9µs ± 0% 45.5µs ± 0% -0.76% (p=0.000 n=10+10)
BM_ZFlat/13 [c (42.47 %) ] 17.6µs ± 0% 17.5µs ± 0% -0.80% (p=0.000 n=10+10)
BM_ZFlat/14 [lsp (48.37 %) ] 5.50µs ± 0% 5.44µs ± 0% -1.19% (p=0.000 n=9+10)
BM_ZFlat/15 [xls (41.23 %) ] 1.63ms ± 0% 1.61ms ± 0% -1.21% (p=0.000 n=10+10)
BM_ZFlat/16 [xls_200 (78.00 %)] 389ns ± 2% 391ns ± 1% ~ (p=0.182 n=10+9)
BM_ZFlat/17 [bin (18.11 %) ] 509µs ± 0% 506µs ± 0% -0.51% (p=0.000 n=10+10)
BM_ZFlat/18 [bin_200 (7.50 %) ] 92.7ns ± 0% 89.4ns ± 1% -3.55% (p=0.000 n=8+8)
BM_ZFlat/19 [sum (48.96 %) ] 80.2µs ± 0% 78.9µs ± 0% -1.65% (p=0.000 n=10+10)
BM_ZFlat/20 [man (59.21 %) ] 7.59µs ± 1% 7.59µs ± 1% ~ (p=0.912 n=10+10)
name old allocs/op new allocs/op delta
BM_UFlat/0 [html ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/1 [urls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/2 [jpg ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/3 [jpg_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/4 [pdf ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/5 [html4 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/6 [txt1 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/7 [txt2 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/8 [txt3 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/9 [txt4 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/10 [pb ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/11 [gaviota ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/12 [cp ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/13 [c ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/14 [lsp ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/15 [xls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/16 [xls_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/17 [bin ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/18 [bin_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/19 [sum ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlat/20 [man ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UValidate/0 [html ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UValidate/1 [urls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UValidate/2 [jpg ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UValidate/3 [jpg_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UValidate/4 [pdf ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UIOVec/0 [html ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UIOVec/1 [urls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UIOVec/2 [jpg ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UIOVec/3 [jpg_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UIOVec/4 [pdf ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/0 [html ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/1 [urls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/2 [jpg ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/3 [jpg_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/4 [pdf ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/5 [html4 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/6 [txt1 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/7 [txt2 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/8 [txt3 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/9 [txt4 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/10 [pb ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/11 [gaviota ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/12 [cp ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/13 [c ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/14 [lsp ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/15 [xls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/16 [xls_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/17 [bin ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/18 [bin_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/19 [sum ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_UFlatSink/20 [man ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal)
BM_ZFlat/0 [html (22.31 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/1 [urls (47.78 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/2 [jpg (99.95 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/3 [jpg_200 (73.00 %)] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/4 [pdf (83.30 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/5 [html4 (22.52 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/6 [txt1 (57.88 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/7 [txt2 (61.91 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/8 [txt3 (54.99 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/9 [txt4 (66.26 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/10 [pb (19.68 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/11 [gaviota (37.72 %)] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/12 [cp (48.12 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/13 [c (42.47 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/14 [lsp (48.37 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/15 [xls (41.23 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/16 [xls_200 (78.00 %)] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/17 [bin (18.11 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/18 [bin_200 (7.50 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/19 [sum (48.96 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ZFlat/20 [man (59.21 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta
BM_UFlat/0 [html ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/1 [urls ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/2 [jpg ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/3 [jpg_200 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/4 [pdf ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/5 [html4 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/6 [txt1 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/7 [txt2 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/8 [txt3 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/9 [txt4 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/10 [pb ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/11 [gaviota ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/12 [cp ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/13 [c ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/14 [lsp ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/15 [xls ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/16 [xls_200 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/17 [bin ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/18 [bin_200 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/19 [sum ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlat/20 [man ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UValidate/0 [html ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UValidate/1 [urls ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UValidate/2 [jpg ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UValidate/3 [jpg_200 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UValidate/4 [pdf ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UIOVec/0 [html ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UIOVec/1 [urls ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UIOVec/2 [jpg ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UIOVec/3 [jpg_200 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UIOVec/4 [pdf ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal)
BM_UFlatSink/0 [html ] 102k ± 0% 102k ± 0% ~ (all samples are equal)
BM_UFlatSink/1 [urls ] 702k ± 0% 702k ± 0% ~ (all samples are equal)
BM_UFlatSink/2 [jpg ] 123k ± 0% 123k ± 0% ~ (all samples are equal)
BM_UFlatSink/3 [jpg_200 ] 201 ± 0% 201 ± 0% ~ (all samples are equal)
BM_UFlatSink/4 [pdf ] 102k ± 0% 102k ± 0% ~ (all samples are equal)
BM_UFlatSink/5 [html4 ] 410k ± 0% 410k ± 0% ~ (all samples are equal)
BM_UFlatSink/6 [txt1 ] 152k ± 0% 152k ± 0% ~ (all samples are equal)
BM_UFlatSink/7 [txt2 ] 125k ± 0% 125k ± 0% ~ (all samples are equal)
BM_UFlatSink/8 [txt3 ] 427k ± 0% 427k ± 0% ~ (all samples are equal)
BM_UFlatSink/9 [txt4 ] 482k ± 0% 482k ± 0% ~ (all samples are equal)
BM_UFlatSink/10 [pb ] 119k ± 0% 119k ± 0% ~ (all samples are equal)
BM_UFlatSink/11 [gaviota ] 184k ± 0% 184k ± 0% ~ (all samples are equal)
BM_UFlatSink/12 [cp ] 24.6k ± 0% 24.6k ± 0% ~ (all samples are equal)
BM_UFlatSink/13 [c ] 11.2k ± 0% 11.2k ± 0% ~ (all samples are equal)
BM_UFlatSink/14 [lsp ] 3.72k ± 0% 3.72k ± 0% ~ (all samples are equal)
BM_UFlatSink/15 [xls ] 1.03M ± 0% 1.03M ± 0% ~ (all samples are equal)
BM_UFlatSink/16 [xls_200 ] 201 ± 0% 201 ± 0% ~ (all samples are equal)
BM_UFlatSink/17 [bin ] 513k ± 0% 513k ± 0% ~ (all samples are equal)
BM_UFlatSink/18 [bin_200 ] 201 ± 0% 201 ± 0% ~ (all samples are equal)
BM_UFlatSink/19 [sum ] 38.2k ± 0% 38.2k ± 0% ~ (all samples are equal)
BM_UFlatSink/20 [man ] 4.23k ± 0% 4.23k ± 0% ~ (all samples are equal)
BM_ZFlat/0 [html (22.31 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/1 [urls (47.78 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/2 [jpg (99.95 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/3 [jpg_200 (73.00 %)] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal)
BM_ZFlat/4 [pdf (83.30 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/5 [html4 (22.52 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/6 [txt1 (57.88 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/7 [txt2 (61.91 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/8 [txt3 (54.99 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/9 [txt4 (66.26 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/10 [pb (19.68 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/11 [gaviota (37.72 %)] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/12 [cp (48.12 %) ] 86.1k ± 0% 86.1k ± 0% ~ (all samples are equal)
BM_ZFlat/13 [c (42.47 %) ] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal)
BM_ZFlat/14 [lsp (48.37 %) ] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal)
BM_ZFlat/15 [xls (41.23 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/16 [xls_200 (78.00 %)] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal)
BM_ZFlat/17 [bin (18.11 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal)
BM_ZFlat/18 [bin_200 (7.50 %) ] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal)
BM_ZFlat/19 [sum (48.96 %) ] 116k ± 0% 116k ± 0% ~ (all samples are equal)
BM_ZFlat/20 [man (59.21 %) ] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal)
name old speed new speed delta
BM_UFlat/0 [html ] 2.45GB/s ± 0% 2.50GB/s ± 0% +1.96% (p=0.000 n=10+10)
BM_UFlat/1 [urls ] 1.22GB/s ± 0% 1.23GB/s ± 0% +0.69% (p=0.000 n=10+10)
BM_UFlat/2 [jpg ] 17.0GB/s ± 5% 17.3GB/s ± 1% ~ (p=0.074 n=9+8)
BM_UFlat/3 [jpg_200 ] 1.52GB/s ± 1% 1.54GB/s ± 0% +1.44% (p=0.000 n=10+8)
BM_UFlat/4 [pdf ] 12.5GB/s ± 1% 12.5GB/s ± 0% ~ (p=0.721 n=8+8)
BM_UFlat/5 [html4 ] 1.87GB/s ± 0% 1.88GB/s ± 0% +0.76% (p=0.000 n=10+10)
BM_UFlat/6 [txt1 ] 795MB/s ± 0% 801MB/s ± 0% +0.79% (p=0.000 n=10+10)
BM_UFlat/7 [txt2 ] 741MB/s ± 0% 746MB/s ± 0% +0.68% (p=0.000 n=10+10)
BM_UFlat/8 [txt3 ] 840MB/s ± 0% 844MB/s ± 0% +0.44% (p=0.000 n=10+10)
BM_UFlat/9 [txt4 ] 684MB/s ± 0% 688MB/s ± 0% +0.65% (p=0.000 n=9+10)
BM_UFlat/10 [pb ] 3.09GB/s ± 0% 3.18GB/s ± 0% +2.88% (p=0.000 n=10+9)
BM_UFlat/11 [gaviota ] 980MB/s ± 0% 975MB/s ± 0% -0.57% (p=0.000 n=10+10)
BM_UFlat/12 [cp ] 1.74GB/s ± 0% 1.75GB/s ± 0% +0.38% (p=0.001 n=10+9)
BM_UFlat/13 [c ] 1.53GB/s ± 1% 1.52GB/s ± 0% -0.55% (p=0.003 n=10+10)
BM_UFlat/14 [lsp ] 1.64GB/s ± 0% 1.64GB/s ± 1% ~ (p=0.400 n=9+10)
BM_UFlat/15 [xls ] 1.14GB/s ± 0% 1.14GB/s ± 0% +0.23% (p=0.000 n=10+10)
BM_UFlat/16 [xls_200 ] 936MB/s ± 1% 941MB/s ± 1% ~ (p=0.052 n=10+10)
BM_UFlat/17 [bin ] 1.87GB/s ± 0% 1.88GB/s ± 0% +0.28% (p=0.000 n=10+10)
BM_UFlat/18 [bin_200 ] 1.97GB/s ± 5% 1.99GB/s ± 3% ~ (p=0.136 n=9+9)
BM_UFlat/19 [sum ] 1.37GB/s ± 0% 1.41GB/s ± 0% +2.82% (p=0.000 n=10+9)
BM_UFlat/20 [man ] 1.42GB/s ± 1% 1.42GB/s ± 0% ~ (p=0.579 n=10+10)
BM_UValidate/0 [html ] 3.08GB/s ± 0% 3.05GB/s ± 0% -1.18% (p=0.000 n=10+10)
BM_UValidate/1 [urls ] 1.59GB/s ± 0% 1.59GB/s ± 0% ~ (p=0.247 n=10+10)
BM_UValidate/2 [jpg ] 845GB/s ± 0% 846GB/s ± 0% +0.09% (p=0.000 n=10+10)
BM_UValidate/3 [jpg_200 ] 2.04GB/s ± 0% 2.04GB/s ± 0% -0.09% (p=0.019 n=10+10)
BM_UValidate/4 [pdf ] 35.7GB/s ± 0% 35.4GB/s ± 1% -0.70% (p=0.000 n=10+10)
BM_UIOVec/0 [html ] 841MB/s ± 0% 844MB/s ± 0% +0.36% (p=0.000 n=10+10)
BM_UIOVec/1 [urls ] 650MB/s ± 0% 650MB/s ± 0% ~ (p=0.105 n=10+10)
BM_UIOVec/2 [jpg ] 16.1GB/s ±10% 15.9GB/s ± 8% ~ (p=0.853 n=10+10)
BM_UIOVec/3 [jpg_200 ] 612MB/s ± 1% 612MB/s ± 0% ~ (p=0.243 n=9+10)
BM_UIOVec/4 [pdf ] 8.52GB/s ± 2% 8.46GB/s ± 3% ~ (p=0.436 n=10+10)
BM_UFlatSink/0 [html ] 2.46GB/s ± 0% 2.50GB/s ± 0% +1.83% (p=0.000 n=9+10)
BM_UFlatSink/1 [urls ] 1.22GB/s ± 0% 1.23GB/s ± 0% +0.73% (p=0.000 n=10+10)
BM_UFlatSink/2 [jpg ] 16.3GB/s ± 8% 16.4GB/s ± 9% ~ (p=0.739 n=10+10)
BM_UFlatSink/3 [jpg_200 ] 1.51GB/s ± 0% 1.50GB/s ± 0% -0.62% (p=0.000 n=10+9)
BM_UFlatSink/4 [pdf ] 12.2GB/s ± 3% 12.4GB/s ± 1% +1.62% (p=0.029 n=10+10)
BM_UFlatSink/5 [html4 ] 1.87GB/s ± 0% 1.88GB/s ± 0% +0.79% (p=0.000 n=10+10)
BM_UFlatSink/6 [txt1 ] 795MB/s ± 0% 801MB/s ± 0% +0.74% (p=0.000 n=10+9)
BM_UFlatSink/7 [txt2 ] 741MB/s ± 0% 745MB/s ± 0% +0.59% (p=0.000 n=10+9)
BM_UFlatSink/8 [txt3 ] 840MB/s ± 0% 843MB/s ± 0% +0.37% (p=0.000 n=9+10)
BM_UFlatSink/9 [txt4 ] 684MB/s ± 0% 688MB/s ± 0% +0.57% (p=0.000 n=9+10)
BM_UFlatSink/10 [pb ] 3.10GB/s ± 0% 3.18GB/s ± 0% +2.64% (p=0.000 n=9+10)
BM_UFlatSink/11 [gaviota ] 980MB/s ± 0% 974MB/s ± 0% -0.64% (p=0.000 n=10+10)
BM_UFlatSink/12 [cp ] 1.74GB/s ± 0% 1.75GB/s ± 0% +0.26% (p=0.005 n=10+10)
BM_UFlatSink/13 [c ] 1.52GB/s ± 1% 1.52GB/s ± 1% ~ (p=0.123 n=10+10)
BM_UFlatSink/14 [lsp ] 1.64GB/s ± 0% 1.65GB/s ± 0% +0.46% (p=0.000 n=10+8)
BM_UFlatSink/15 [xls ] 1.14GB/s ± 0% 1.15GB/s ± 0% +0.27% (p=0.000 n=10+10)
BM_UFlatSink/16 [xls_200 ] 927MB/s ± 1% 926MB/s ± 1% ~ (p=0.497 n=10+9)
BM_UFlatSink/17 [bin ] 1.87GB/s ± 0% 1.88GB/s ± 0% +0.27% (p=0.000 n=10+10)
BM_UFlatSink/18 [bin_200 ] 1.92GB/s ± 2% 1.93GB/s ± 1% +0.70% (p=0.035 n=9+10)
BM_UFlatSink/19 [sum ] 1.38GB/s ± 0% 1.41GB/s ± 0% +2.59% (p=0.000 n=9+10)
BM_UFlatSink/20 [man ] 1.40GB/s ± 1% 1.41GB/s ± 1% ~ (p=0.079 n=10+9)
BM_ZFlat/0 [html (22.31 %) ] 814MB/s ± 0% 816MB/s ± 0% +0.23% (p=0.000 n=10+10)
BM_ZFlat/1 [urls (47.78 %) ] 418MB/s ± 0% 423MB/s ± 0% +1.06% (p=0.000 n=10+10)
BM_ZFlat/2 [jpg (99.95 %) ] 10.5GB/s ± 5% 10.7GB/s ± 5% ~ (p=0.165 n=10+10)
BM_ZFlat/3 [jpg_200 (73.00 %)] 558MB/s ± 3% 560MB/s ± 1% ~ (p=0.696 n=10+8)
BM_ZFlat/4 [pdf (83.30 %) ] 6.94GB/s ± 2% 7.05GB/s ± 1% +1.59% (p=0.028 n=10+9)
BM_ZFlat/5 [html4 (22.52 %) ] 739MB/s ± 0% 745MB/s ± 0% +0.86% (p=0.000 n=10+10)
BM_ZFlat/6 [txt1 (57.88 %) ] 281MB/s ± 0% 283MB/s ± 0% +0.46% (p=0.000 n=10+10)
BM_ZFlat/7 [txt2 (61.91 %) ] 260MB/s ± 0% 261MB/s ± 0% +0.59% (p=0.000 n=10+10)
BM_ZFlat/8 [txt3 (54.99 %) ] 296MB/s ± 0% 297MB/s ± 0% +0.45% (p=0.000 n=10+10)
BM_ZFlat/9 [txt4 (66.26 %) ] 244MB/s ± 0% 245MB/s ± 0% +0.16% (p=0.000 n=10+10)
BM_ZFlat/10 [pb (19.68 %) ] 1.07GB/s ± 0% 1.09GB/s ± 0% +1.75% (p=0.000 n=10+10)
BM_ZFlat/11 [gaviota (37.72 %)] 450MB/s ± 0% 451MB/s ± 0% +0.17% (p=0.000 n=9+10)
BM_ZFlat/12 [cp (48.12 %) ] 538MB/s ± 0% 542MB/s ± 0% +0.74% (p=0.000 n=10+10)
BM_ZFlat/13 [c (42.47 %) ] 635MB/s ± 0% 640MB/s ± 0% +0.80% (p=0.000 n=10+10)
BM_ZFlat/14 [lsp (48.37 %) ] 678MB/s ± 0% 686MB/s ± 1% +1.18% (p=0.000 n=9+10)
BM_ZFlat/15 [xls (41.23 %) ] 633MB/s ± 0% 641MB/s ± 0% +1.23% (p=0.000 n=10+7)
BM_ZFlat/16 [xls_200 (78.00 %)] 516MB/s ± 2% 513MB/s ± 1% ~ (p=0.156 n=10+9)
BM_ZFlat/17 [bin (18.11 %) ] 1.01GB/s ± 0% 1.02GB/s ± 0% +0.49% (p=0.000 n=10+10)
BM_ZFlat/18 [bin_200 (7.50 %) ] 2.16GB/s ± 0% 2.24GB/s ± 1% +3.65% (p=0.000 n=8+8)
BM_ZFlat/19 [sum (48.96 %) ] 478MB/s ± 0% 486MB/s ± 0% +1.66% (p=0.000 n=10+10)
BM_ZFlat/20 [man (59.21 %) ] 558MB/s ± 1% 558MB/s ± 1% ~ (p=0.912 n=10+10)
Diffstat (limited to 'snappy.cc')
-rw-r--r-- | snappy.cc | 35 |
1 files changed, 22 insertions, 13 deletions
@@ -313,10 +313,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, } // namespace +template <bool allow_fast_path> static inline char* EmitLiteral(char* op, const char* literal, - int len, - bool allow_fast_path) { + int len) { // The vast majority of copies are below 16 bytes, for which a // call to memcpy is overkill. This fast path can sometimes // copy up to 15 bytes too much, but that is okay in the @@ -358,8 +358,8 @@ static inline char* EmitLiteral(char* op, return op + len; } -static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len, - bool len_less_than_12) { +template <bool len_less_than_12> +static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) { assert(len <= 64); assert(len >= 4); assert(offset < 65536); @@ -380,29 +380,33 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len, return op; } -static inline char* EmitCopy(char* op, size_t offset, size_t len, - bool len_less_than_12) { +template <bool len_less_than_12> +static inline char* EmitCopy(char* op, size_t offset, size_t len) { assert(len_less_than_12 == (len < 12)); if (len_less_than_12) { - return EmitCopyAtMost64(op, offset, len, true); + return EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len); } else { // A special case for len <= 64 might help, but so far measurements suggest // it's in the noise. // Emit 64 byte copies but make sure to keep at least four bytes reserved. while (SNAPPY_PREDICT_FALSE(len >= 68)) { - op = EmitCopyAtMost64(op, offset, 64, false); + op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 64); len -= 64; } // One or two copies will now finish the job. if (len > 64) { - op = EmitCopyAtMost64(op, offset, 60, false); + op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 60); len -= 60; } // Emit remainder. - op = EmitCopyAtMost64(op, offset, len, len < 12); + if (len < 12) { + op = EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len); + } else { + op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, len); + } return op; } } @@ -586,7 +590,7 @@ char* CompressFragment(const char* input, // than 4 bytes match. But, prior to the match, input // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." assert(next_emit + 16 <= ip_end); - op = EmitLiteral(op, next_emit, ip - next_emit, true); + op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit, ip - next_emit); // Step 3: Call EmitCopy, and then see if another EmitCopy could // be our next move. Repeat until we find no match for the @@ -609,7 +613,11 @@ char* CompressFragment(const char* input, ip += matched; size_t offset = base - candidate; assert(0 == memcmp(base, candidate, matched)); - op = EmitCopy(op, offset, matched, p.second); + if (p.second) { + op = EmitCopy</*len_less_than_12=*/true>(op, offset, matched); + } else { + op = EmitCopy</*len_less_than_12=*/false>(op, offset, matched); + } next_emit = ip; if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) { goto emit_remainder; @@ -634,7 +642,8 @@ char* CompressFragment(const char* input, emit_remainder: // Emit the remaining bytes as a literal if (next_emit < ip_end) { - op = EmitLiteral(op, next_emit, ip_end - next_emit, false); + op = EmitLiteral</*allow_fast_path=*/false>(op, next_emit, + ip_end - next_emit); } return op; |