summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornafi <nafi@google.com>2018-12-03 17:27:56 -0800
committerVictor Costan <pwnall@chromium.org>2019-01-04 19:08:30 -0800
commiteb47f79631fc2e23739a5b09c0d46497fd4b95d1 (patch)
tree6d7cfa963a6e9dda681d06d7270ec0310e5a3694
parent254966c71e0d6e44e432e03dc8f9121816868ac6 (diff)
downloadsnappy-git-eb47f79631fc2e23739a5b09c0d46497fd4b95d1.tar.gz
Optimize by about 0.5%.
How? Move boolean args of EmitLiteral, EmitCopyAtMost64 and EmitCopy to template args so that compiler generates two separate pruned versions of the functions for arg=true and arg=false. FWIW, CompressFragment function calls 1) EmitLiteral inside from a 1-level loop and 2) EmitCopy from a 2-level nested loop. CompressFragment is called from inside another while-loop from the public 'Compress' function. name old time/op new time/op delta BM_UFlat/0 [html ] 41.9µs ± 0% 41.1µs ± 0% -1.92% (p=0.000 n=10+10) BM_UFlat/1 [urls ] 576µs ± 0% 572µs ± 0% -0.68% (p=0.000 n=10+10) BM_UFlat/2 [jpg ] 7.25µs ± 6% 7.13µs ± 1% ~ (p=0.074 n=9+8) BM_UFlat/3 [jpg_200 ] 132ns ± 1% 130ns ± 0% -1.45% (p=0.000 n=10+8) BM_UFlat/4 [pdf ] 8.27µs ± 3% 8.22µs ± 0% ~ (p=0.277 n=9+8) BM_UFlat/5 [html4 ] 220µs ± 0% 219µs ± 0% -0.75% (p=0.000 n=10+10) BM_UFlat/6 [txt1 ] 192µs ± 0% 190µs ± 0% -0.80% (p=0.000 n=10+10) BM_UFlat/7 [txt2 ] 169µs ± 0% 168µs ± 0% -0.69% (p=0.000 n=10+10) BM_UFlat/8 [txt3 ] 510µs ± 0% 508µs ± 0% -0.42% (p=0.000 n=10+10) BM_UFlat/9 [txt4 ] 707µs ± 0% 702µs ± 0% -0.67% (p=0.000 n=10+10) BM_UFlat/10 [pb ] 38.5µs ± 0% 37.4µs ± 1% -2.84% (p=0.000 n=10+10) BM_UFlat/11 [gaviota ] 189µs ± 0% 190µs ± 0% +0.55% (p=0.000 n=10+10) BM_UFlat/12 [cp ] 14.2µs ± 0% 14.1µs ± 0% -0.44% (p=0.000 n=10+10) BM_UFlat/13 [c ] 7.31µs ± 1% 7.35µs ± 0% +0.54% (p=0.002 n=10+10) BM_UFlat/14 [lsp ] 2.27µs ± 0% 2.27µs ± 1% ~ (p=0.161 n=9+9) BM_UFlat/15 [xls ] 905µs ± 0% 903µs ± 0% -0.25% (p=0.000 n=10+10) BM_UFlat/16 [xls_200 ] 214ns ± 1% 213ns ± 1% -0.57% (p=0.043 n=10+10) BM_UFlat/17 [bin ] 275µs ± 0% 274µs ± 0% -0.31% (p=0.000 n=10+10) BM_UFlat/18 [bin_200 ] 102ns ± 5% 101ns ± 3% ~ (p=0.161 n=9+9) BM_UFlat/19 [sum ] 27.9µs ± 0% 27.2µs ± 0% -2.68% (p=0.000 n=10+10) BM_UFlat/20 [man ] 2.97µs ± 1% 2.97µs ± 0% ~ (p=0.400 n=9+10) BM_UValidate/0 [html ] 33.3µs ± 0% 33.7µs ± 0% +1.18% (p=0.000 n=10+10) BM_UValidate/1 [urls ] 442µs ± 0% 442µs ± 0% ~ (p=0.353 n=10+10) BM_UValidate/2 [jpg ] 146ns ± 0% 146ns ± 0% ~ (p=0.063 n=10+10) BM_UValidate/3 [jpg_200 ] 98.4ns ± 0% 98.5ns ± 0% ~ (p=0.184 n=10+10) BM_UValidate/4 [pdf ] 2.88µs ± 0% 2.90µs ± 1% +0.68% (p=0.000 n=10+10) BM_UIOVec/0 [html ] 122µs ± 0% 122µs ± 0% -0.39% (p=0.000 n=10+10) BM_UIOVec/1 [urls ] 1.08ms ± 0% 1.08ms ± 0% ~ (p=0.529 n=10+10) BM_UIOVec/2 [jpg ] 7.71µs ±11% 7.76µs ± 9% ~ (p=0.853 n=10+10) BM_UIOVec/3 [jpg_200 ] 327ns ± 0% 328ns ± 0% ~ (p=0.146 n=8+10) BM_UIOVec/4 [pdf ] 12.1µs ± 1% 12.1µs ± 3% ~ (p=0.315 n=10+10) BM_UFlatSink/0 [html ] 41.8µs ± 0% 41.0µs ± 0% -1.87% (p=0.000 n=10+9) BM_UFlatSink/1 [urls ] 576µs ± 0% 572µs ± 0% -0.74% (p=0.000 n=9+10) BM_UFlatSink/2 [jpg ] 7.58µs ± 8% 7.56µs ± 9% ~ (p=0.739 n=10+10) BM_UFlatSink/3 [jpg_200 ] 133ns ± 0% 134ns ± 0% +0.60% (p=0.000 n=10+9) BM_UFlatSink/4 [pdf ] 8.44µs ± 3% 8.30µs ± 1% -1.65% (p=0.029 n=10+10) BM_UFlatSink/5 [html4 ] 220µs ± 0% 218µs ± 0% -0.81% (p=0.000 n=10+10) BM_UFlatSink/6 [txt1 ] 192µs ± 0% 190µs ± 0% -0.78% (p=0.000 n=10+10) BM_UFlatSink/7 [txt2 ] 169µs ± 0% 168µs ± 0% -0.59% (p=0.000 n=10+10) BM_UFlatSink/8 [txt3 ] 510µs ± 0% 508µs ± 0% -0.39% (p=0.000 n=10+10) BM_UFlatSink/9 [txt4 ] 707µs ± 0% 703µs ± 0% -0.62% (p=0.000 n=10+10) BM_UFlatSink/10 [pb ] 38.4µs ± 0% 37.4µs ± 0% -2.62% (p=0.000 n=9+9) BM_UFlatSink/11 [gaviota ] 189µs ± 0% 190µs ± 0% +0.63% (p=0.000 n=10+10) BM_UFlatSink/12 [cp ] 14.2µs ± 0% 14.1µs ± 0% -0.27% (p=0.011 n=10+10) BM_UFlatSink/13 [c ] 7.33µs ± 1% 7.35µs ± 1% ~ (p=0.243 n=10+9) BM_UFlatSink/14 [lsp ] 2.27µs ± 0% 2.26µs ± 0% -0.39% (p=0.000 n=9+9) BM_UFlatSink/15 [xls ] 904µs ± 0% 902µs ± 0% -0.28% (p=0.000 n=10+10) BM_UFlatSink/16 [xls_200 ] 216ns ± 1% 217ns ± 1% ~ (p=0.661 n=10+9) BM_UFlatSink/17 [bin ] 275µs ± 0% 274µs ± 0% -0.24% (p=0.000 n=8+9) BM_UFlatSink/18 [bin_200 ] 104ns ± 2% 104ns ± 1% -0.70% (p=0.043 n=9+10) BM_UFlatSink/19 [sum ] 27.8µs ± 0% 27.1µs ± 0% -2.51% (p=0.000 n=9+10) BM_UFlatSink/20 [man ] 3.02µs ± 1% 3.00µs ± 1% ~ (p=0.079 n=10+9) BM_ZFlat/0 [html (22.31 %) ] 126µs ± 0% 126µs ± 0% -0.24% (p=0.000 n=10+10) BM_ZFlat/1 [urls (47.78 %) ] 1.68ms ± 0% 1.67ms ± 0% -1.06% (p=0.000 n=10+10) BM_ZFlat/2 [jpg (99.95 %) ] 11.8µs ± 5% 11.6µs ± 5% ~ (p=0.165 n=10+10) BM_ZFlat/3 [jpg_200 (73.00 %)] 360ns ± 3% 358ns ± 1% ~ (p=0.762 n=10+8) BM_ZFlat/4 [pdf (83.30 %) ] 14.8µs ± 2% 14.6µs ± 1% -1.57% (p=0.022 n=10+9) BM_ZFlat/5 [html4 (22.52 %) ] 556µs ± 0% 552µs ± 0% -0.87% (p=0.000 n=10+10) BM_ZFlat/6 [txt1 (57.88 %) ] 542µs ± 0% 540µs ± 0% -0.47% (p=0.000 n=10+10) BM_ZFlat/7 [txt2 (61.91 %) ] 483µs ± 0% 480µs ± 0% -0.62% (p=0.000 n=10+10) BM_ZFlat/8 [txt3 (54.99 %) ] 1.45ms ± 0% 1.44ms ± 0% -0.47% (p=0.000 n=10+10) BM_ZFlat/9 [txt4 (66.26 %) ] 1.98ms ± 0% 1.97ms ± 0% -0.19% (p=0.007 n=10+10) BM_ZFlat/10 [pb (19.68 %) ] 111µs ± 0% 109µs ± 0% -1.75% (p=0.000 n=10+10) BM_ZFlat/11 [gaviota (37.72 %)] 411µs ± 0% 410µs ± 0% -0.21% (p=0.004 n=10+10) BM_ZFlat/12 [cp (48.12 %) ] 45.9µs ± 0% 45.5µs ± 0% -0.76% (p=0.000 n=10+10) BM_ZFlat/13 [c (42.47 %) ] 17.6µs ± 0% 17.5µs ± 0% -0.80% (p=0.000 n=10+10) BM_ZFlat/14 [lsp (48.37 %) ] 5.50µs ± 0% 5.44µs ± 0% -1.19% (p=0.000 n=9+10) BM_ZFlat/15 [xls (41.23 %) ] 1.63ms ± 0% 1.61ms ± 0% -1.21% (p=0.000 n=10+10) BM_ZFlat/16 [xls_200 (78.00 %)] 389ns ± 2% 391ns ± 1% ~ (p=0.182 n=10+9) BM_ZFlat/17 [bin (18.11 %) ] 509µs ± 0% 506µs ± 0% -0.51% (p=0.000 n=10+10) BM_ZFlat/18 [bin_200 (7.50 %) ] 92.7ns ± 0% 89.4ns ± 1% -3.55% (p=0.000 n=8+8) BM_ZFlat/19 [sum (48.96 %) ] 80.2µs ± 0% 78.9µs ± 0% -1.65% (p=0.000 n=10+10) BM_ZFlat/20 [man (59.21 %) ] 7.59µs ± 1% 7.59µs ± 1% ~ (p=0.912 n=10+10) name old allocs/op new allocs/op delta BM_UFlat/0 [html ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/1 [urls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/2 [jpg ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/3 [jpg_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/4 [pdf ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/5 [html4 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/6 [txt1 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/7 [txt2 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/8 [txt3 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/9 [txt4 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/10 [pb ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/11 [gaviota ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/12 [cp ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/13 [c ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/14 [lsp ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/15 [xls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/16 [xls_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/17 [bin ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/18 [bin_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/19 [sum ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlat/20 [man ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UValidate/0 [html ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UValidate/1 [urls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UValidate/2 [jpg ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UValidate/3 [jpg_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UValidate/4 [pdf ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UIOVec/0 [html ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UIOVec/1 [urls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UIOVec/2 [jpg ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UIOVec/3 [jpg_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UIOVec/4 [pdf ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/0 [html ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/1 [urls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/2 [jpg ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/3 [jpg_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/4 [pdf ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/5 [html4 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/6 [txt1 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/7 [txt2 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/8 [txt3 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/9 [txt4 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/10 [pb ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/11 [gaviota ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/12 [cp ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/13 [c ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/14 [lsp ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/15 [xls ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/16 [xls_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/17 [bin ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/18 [bin_200 ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/19 [sum ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_UFlatSink/20 [man ] 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_ZFlat/0 [html (22.31 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/1 [urls (47.78 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/2 [jpg (99.95 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/3 [jpg_200 (73.00 %)] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/4 [pdf (83.30 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/5 [html4 (22.52 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/6 [txt1 (57.88 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/7 [txt2 (61.91 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/8 [txt3 (54.99 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/9 [txt4 (66.26 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/10 [pb (19.68 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/11 [gaviota (37.72 %)] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/12 [cp (48.12 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/13 [c (42.47 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/14 [lsp (48.37 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/15 [xls (41.23 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/16 [xls_200 (78.00 %)] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/17 [bin (18.11 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/18 [bin_200 (7.50 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/19 [sum (48.96 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ZFlat/20 [man (59.21 %) ] 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta BM_UFlat/0 [html ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/1 [urls ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/2 [jpg ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/3 [jpg_200 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/4 [pdf ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/5 [html4 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/6 [txt1 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/7 [txt2 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/8 [txt3 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/9 [txt4 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/10 [pb ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/11 [gaviota ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/12 [cp ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/13 [c ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/14 [lsp ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/15 [xls ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/16 [xls_200 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/17 [bin ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/18 [bin_200 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/19 [sum ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlat/20 [man ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UValidate/0 [html ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UValidate/1 [urls ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UValidate/2 [jpg ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UValidate/3 [jpg_200 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UValidate/4 [pdf ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UIOVec/0 [html ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UIOVec/1 [urls ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UIOVec/2 [jpg ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UIOVec/3 [jpg_200 ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UIOVec/4 [pdf ] 4.00 ± 0% 4.00 ± 0% ~ (all samples are equal) BM_UFlatSink/0 [html ] 102k ± 0% 102k ± 0% ~ (all samples are equal) BM_UFlatSink/1 [urls ] 702k ± 0% 702k ± 0% ~ (all samples are equal) BM_UFlatSink/2 [jpg ] 123k ± 0% 123k ± 0% ~ (all samples are equal) BM_UFlatSink/3 [jpg_200 ] 201 ± 0% 201 ± 0% ~ (all samples are equal) BM_UFlatSink/4 [pdf ] 102k ± 0% 102k ± 0% ~ (all samples are equal) BM_UFlatSink/5 [html4 ] 410k ± 0% 410k ± 0% ~ (all samples are equal) BM_UFlatSink/6 [txt1 ] 152k ± 0% 152k ± 0% ~ (all samples are equal) BM_UFlatSink/7 [txt2 ] 125k ± 0% 125k ± 0% ~ (all samples are equal) BM_UFlatSink/8 [txt3 ] 427k ± 0% 427k ± 0% ~ (all samples are equal) BM_UFlatSink/9 [txt4 ] 482k ± 0% 482k ± 0% ~ (all samples are equal) BM_UFlatSink/10 [pb ] 119k ± 0% 119k ± 0% ~ (all samples are equal) BM_UFlatSink/11 [gaviota ] 184k ± 0% 184k ± 0% ~ (all samples are equal) BM_UFlatSink/12 [cp ] 24.6k ± 0% 24.6k ± 0% ~ (all samples are equal) BM_UFlatSink/13 [c ] 11.2k ± 0% 11.2k ± 0% ~ (all samples are equal) BM_UFlatSink/14 [lsp ] 3.72k ± 0% 3.72k ± 0% ~ (all samples are equal) BM_UFlatSink/15 [xls ] 1.03M ± 0% 1.03M ± 0% ~ (all samples are equal) BM_UFlatSink/16 [xls_200 ] 201 ± 0% 201 ± 0% ~ (all samples are equal) BM_UFlatSink/17 [bin ] 513k ± 0% 513k ± 0% ~ (all samples are equal) BM_UFlatSink/18 [bin_200 ] 201 ± 0% 201 ± 0% ~ (all samples are equal) BM_UFlatSink/19 [sum ] 38.2k ± 0% 38.2k ± 0% ~ (all samples are equal) BM_UFlatSink/20 [man ] 4.23k ± 0% 4.23k ± 0% ~ (all samples are equal) BM_ZFlat/0 [html (22.31 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/1 [urls (47.78 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/2 [jpg (99.95 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/3 [jpg_200 (73.00 %)] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal) BM_ZFlat/4 [pdf (83.30 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/5 [html4 (22.52 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/6 [txt1 (57.88 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/7 [txt2 (61.91 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/8 [txt3 (54.99 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/9 [txt4 (66.26 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/10 [pb (19.68 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/11 [gaviota (37.72 %)] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/12 [cp (48.12 %) ] 86.1k ± 0% 86.1k ± 0% ~ (all samples are equal) BM_ZFlat/13 [c (42.47 %) ] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal) BM_ZFlat/14 [lsp (48.37 %) ] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal) BM_ZFlat/15 [xls (41.23 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/16 [xls_200 (78.00 %)] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal) BM_ZFlat/17 [bin (18.11 %) ] 175k ± 0% 175k ± 0% ~ (all samples are equal) BM_ZFlat/18 [bin_200 (7.50 %) ] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal) BM_ZFlat/19 [sum (48.96 %) ] 116k ± 0% 116k ± 0% ~ (all samples are equal) BM_ZFlat/20 [man (59.21 %) ] 63.3k ± 0% 63.3k ± 0% ~ (all samples are equal) name old speed new speed delta BM_UFlat/0 [html ] 2.45GB/s ± 0% 2.50GB/s ± 0% +1.96% (p=0.000 n=10+10) BM_UFlat/1 [urls ] 1.22GB/s ± 0% 1.23GB/s ± 0% +0.69% (p=0.000 n=10+10) BM_UFlat/2 [jpg ] 17.0GB/s ± 5% 17.3GB/s ± 1% ~ (p=0.074 n=9+8) BM_UFlat/3 [jpg_200 ] 1.52GB/s ± 1% 1.54GB/s ± 0% +1.44% (p=0.000 n=10+8) BM_UFlat/4 [pdf ] 12.5GB/s ± 1% 12.5GB/s ± 0% ~ (p=0.721 n=8+8) BM_UFlat/5 [html4 ] 1.87GB/s ± 0% 1.88GB/s ± 0% +0.76% (p=0.000 n=10+10) BM_UFlat/6 [txt1 ] 795MB/s ± 0% 801MB/s ± 0% +0.79% (p=0.000 n=10+10) BM_UFlat/7 [txt2 ] 741MB/s ± 0% 746MB/s ± 0% +0.68% (p=0.000 n=10+10) BM_UFlat/8 [txt3 ] 840MB/s ± 0% 844MB/s ± 0% +0.44% (p=0.000 n=10+10) BM_UFlat/9 [txt4 ] 684MB/s ± 0% 688MB/s ± 0% +0.65% (p=0.000 n=9+10) BM_UFlat/10 [pb ] 3.09GB/s ± 0% 3.18GB/s ± 0% +2.88% (p=0.000 n=10+9) BM_UFlat/11 [gaviota ] 980MB/s ± 0% 975MB/s ± 0% -0.57% (p=0.000 n=10+10) BM_UFlat/12 [cp ] 1.74GB/s ± 0% 1.75GB/s ± 0% +0.38% (p=0.001 n=10+9) BM_UFlat/13 [c ] 1.53GB/s ± 1% 1.52GB/s ± 0% -0.55% (p=0.003 n=10+10) BM_UFlat/14 [lsp ] 1.64GB/s ± 0% 1.64GB/s ± 1% ~ (p=0.400 n=9+10) BM_UFlat/15 [xls ] 1.14GB/s ± 0% 1.14GB/s ± 0% +0.23% (p=0.000 n=10+10) BM_UFlat/16 [xls_200 ] 936MB/s ± 1% 941MB/s ± 1% ~ (p=0.052 n=10+10) BM_UFlat/17 [bin ] 1.87GB/s ± 0% 1.88GB/s ± 0% +0.28% (p=0.000 n=10+10) BM_UFlat/18 [bin_200 ] 1.97GB/s ± 5% 1.99GB/s ± 3% ~ (p=0.136 n=9+9) BM_UFlat/19 [sum ] 1.37GB/s ± 0% 1.41GB/s ± 0% +2.82% (p=0.000 n=10+9) BM_UFlat/20 [man ] 1.42GB/s ± 1% 1.42GB/s ± 0% ~ (p=0.579 n=10+10) BM_UValidate/0 [html ] 3.08GB/s ± 0% 3.05GB/s ± 0% -1.18% (p=0.000 n=10+10) BM_UValidate/1 [urls ] 1.59GB/s ± 0% 1.59GB/s ± 0% ~ (p=0.247 n=10+10) BM_UValidate/2 [jpg ] 845GB/s ± 0% 846GB/s ± 0% +0.09% (p=0.000 n=10+10) BM_UValidate/3 [jpg_200 ] 2.04GB/s ± 0% 2.04GB/s ± 0% -0.09% (p=0.019 n=10+10) BM_UValidate/4 [pdf ] 35.7GB/s ± 0% 35.4GB/s ± 1% -0.70% (p=0.000 n=10+10) BM_UIOVec/0 [html ] 841MB/s ± 0% 844MB/s ± 0% +0.36% (p=0.000 n=10+10) BM_UIOVec/1 [urls ] 650MB/s ± 0% 650MB/s ± 0% ~ (p=0.105 n=10+10) BM_UIOVec/2 [jpg ] 16.1GB/s ±10% 15.9GB/s ± 8% ~ (p=0.853 n=10+10) BM_UIOVec/3 [jpg_200 ] 612MB/s ± 1% 612MB/s ± 0% ~ (p=0.243 n=9+10) BM_UIOVec/4 [pdf ] 8.52GB/s ± 2% 8.46GB/s ± 3% ~ (p=0.436 n=10+10) BM_UFlatSink/0 [html ] 2.46GB/s ± 0% 2.50GB/s ± 0% +1.83% (p=0.000 n=9+10) BM_UFlatSink/1 [urls ] 1.22GB/s ± 0% 1.23GB/s ± 0% +0.73% (p=0.000 n=10+10) BM_UFlatSink/2 [jpg ] 16.3GB/s ± 8% 16.4GB/s ± 9% ~ (p=0.739 n=10+10) BM_UFlatSink/3 [jpg_200 ] 1.51GB/s ± 0% 1.50GB/s ± 0% -0.62% (p=0.000 n=10+9) BM_UFlatSink/4 [pdf ] 12.2GB/s ± 3% 12.4GB/s ± 1% +1.62% (p=0.029 n=10+10) BM_UFlatSink/5 [html4 ] 1.87GB/s ± 0% 1.88GB/s ± 0% +0.79% (p=0.000 n=10+10) BM_UFlatSink/6 [txt1 ] 795MB/s ± 0% 801MB/s ± 0% +0.74% (p=0.000 n=10+9) BM_UFlatSink/7 [txt2 ] 741MB/s ± 0% 745MB/s ± 0% +0.59% (p=0.000 n=10+9) BM_UFlatSink/8 [txt3 ] 840MB/s ± 0% 843MB/s ± 0% +0.37% (p=0.000 n=9+10) BM_UFlatSink/9 [txt4 ] 684MB/s ± 0% 688MB/s ± 0% +0.57% (p=0.000 n=9+10) BM_UFlatSink/10 [pb ] 3.10GB/s ± 0% 3.18GB/s ± 0% +2.64% (p=0.000 n=9+10) BM_UFlatSink/11 [gaviota ] 980MB/s ± 0% 974MB/s ± 0% -0.64% (p=0.000 n=10+10) BM_UFlatSink/12 [cp ] 1.74GB/s ± 0% 1.75GB/s ± 0% +0.26% (p=0.005 n=10+10) BM_UFlatSink/13 [c ] 1.52GB/s ± 1% 1.52GB/s ± 1% ~ (p=0.123 n=10+10) BM_UFlatSink/14 [lsp ] 1.64GB/s ± 0% 1.65GB/s ± 0% +0.46% (p=0.000 n=10+8) BM_UFlatSink/15 [xls ] 1.14GB/s ± 0% 1.15GB/s ± 0% +0.27% (p=0.000 n=10+10) BM_UFlatSink/16 [xls_200 ] 927MB/s ± 1% 926MB/s ± 1% ~ (p=0.497 n=10+9) BM_UFlatSink/17 [bin ] 1.87GB/s ± 0% 1.88GB/s ± 0% +0.27% (p=0.000 n=10+10) BM_UFlatSink/18 [bin_200 ] 1.92GB/s ± 2% 1.93GB/s ± 1% +0.70% (p=0.035 n=9+10) BM_UFlatSink/19 [sum ] 1.38GB/s ± 0% 1.41GB/s ± 0% +2.59% (p=0.000 n=9+10) BM_UFlatSink/20 [man ] 1.40GB/s ± 1% 1.41GB/s ± 1% ~ (p=0.079 n=10+9) BM_ZFlat/0 [html (22.31 %) ] 814MB/s ± 0% 816MB/s ± 0% +0.23% (p=0.000 n=10+10) BM_ZFlat/1 [urls (47.78 %) ] 418MB/s ± 0% 423MB/s ± 0% +1.06% (p=0.000 n=10+10) BM_ZFlat/2 [jpg (99.95 %) ] 10.5GB/s ± 5% 10.7GB/s ± 5% ~ (p=0.165 n=10+10) BM_ZFlat/3 [jpg_200 (73.00 %)] 558MB/s ± 3% 560MB/s ± 1% ~ (p=0.696 n=10+8) BM_ZFlat/4 [pdf (83.30 %) ] 6.94GB/s ± 2% 7.05GB/s ± 1% +1.59% (p=0.028 n=10+9) BM_ZFlat/5 [html4 (22.52 %) ] 739MB/s ± 0% 745MB/s ± 0% +0.86% (p=0.000 n=10+10) BM_ZFlat/6 [txt1 (57.88 %) ] 281MB/s ± 0% 283MB/s ± 0% +0.46% (p=0.000 n=10+10) BM_ZFlat/7 [txt2 (61.91 %) ] 260MB/s ± 0% 261MB/s ± 0% +0.59% (p=0.000 n=10+10) BM_ZFlat/8 [txt3 (54.99 %) ] 296MB/s ± 0% 297MB/s ± 0% +0.45% (p=0.000 n=10+10) BM_ZFlat/9 [txt4 (66.26 %) ] 244MB/s ± 0% 245MB/s ± 0% +0.16% (p=0.000 n=10+10) BM_ZFlat/10 [pb (19.68 %) ] 1.07GB/s ± 0% 1.09GB/s ± 0% +1.75% (p=0.000 n=10+10) BM_ZFlat/11 [gaviota (37.72 %)] 450MB/s ± 0% 451MB/s ± 0% +0.17% (p=0.000 n=9+10) BM_ZFlat/12 [cp (48.12 %) ] 538MB/s ± 0% 542MB/s ± 0% +0.74% (p=0.000 n=10+10) BM_ZFlat/13 [c (42.47 %) ] 635MB/s ± 0% 640MB/s ± 0% +0.80% (p=0.000 n=10+10) BM_ZFlat/14 [lsp (48.37 %) ] 678MB/s ± 0% 686MB/s ± 1% +1.18% (p=0.000 n=9+10) BM_ZFlat/15 [xls (41.23 %) ] 633MB/s ± 0% 641MB/s ± 0% +1.23% (p=0.000 n=10+7) BM_ZFlat/16 [xls_200 (78.00 %)] 516MB/s ± 2% 513MB/s ± 1% ~ (p=0.156 n=10+9) BM_ZFlat/17 [bin (18.11 %) ] 1.01GB/s ± 0% 1.02GB/s ± 0% +0.49% (p=0.000 n=10+10) BM_ZFlat/18 [bin_200 (7.50 %) ] 2.16GB/s ± 0% 2.24GB/s ± 1% +3.65% (p=0.000 n=8+8) BM_ZFlat/19 [sum (48.96 %) ] 478MB/s ± 0% 486MB/s ± 0% +1.66% (p=0.000 n=10+10) BM_ZFlat/20 [man (59.21 %) ] 558MB/s ± 1% 558MB/s ± 1% ~ (p=0.912 n=10+10)
-rw-r--r--snappy.cc35
1 files changed, 22 insertions, 13 deletions
diff --git a/snappy.cc b/snappy.cc
index 34a6346..c8ccad8 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -313,10 +313,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
} // namespace
+template <bool allow_fast_path>
static inline char* EmitLiteral(char* op,
const char* literal,
- int len,
- bool allow_fast_path) {
+ int len) {
// The vast majority of copies are below 16 bytes, for which a
// call to memcpy is overkill. This fast path can sometimes
// copy up to 15 bytes too much, but that is okay in the
@@ -358,8 +358,8 @@ static inline char* EmitLiteral(char* op,
return op + len;
}
-static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
- bool len_less_than_12) {
+template <bool len_less_than_12>
+static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) {
assert(len <= 64);
assert(len >= 4);
assert(offset < 65536);
@@ -380,29 +380,33 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
return op;
}
-static inline char* EmitCopy(char* op, size_t offset, size_t len,
- bool len_less_than_12) {
+template <bool len_less_than_12>
+static inline char* EmitCopy(char* op, size_t offset, size_t len) {
assert(len_less_than_12 == (len < 12));
if (len_less_than_12) {
- return EmitCopyAtMost64(op, offset, len, true);
+ return EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
} else {
// A special case for len <= 64 might help, but so far measurements suggest
// it's in the noise.
// Emit 64 byte copies but make sure to keep at least four bytes reserved.
while (SNAPPY_PREDICT_FALSE(len >= 68)) {
- op = EmitCopyAtMost64(op, offset, 64, false);
+ op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 64);
len -= 64;
}
// One or two copies will now finish the job.
if (len > 64) {
- op = EmitCopyAtMost64(op, offset, 60, false);
+ op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 60);
len -= 60;
}
// Emit remainder.
- op = EmitCopyAtMost64(op, offset, len, len < 12);
+ if (len < 12) {
+ op = EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
+ } else {
+ op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, len);
+ }
return op;
}
}
@@ -586,7 +590,7 @@ char* CompressFragment(const char* input,
// than 4 bytes match. But, prior to the match, input
// bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
assert(next_emit + 16 <= ip_end);
- op = EmitLiteral(op, next_emit, ip - next_emit, true);
+ op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit, ip - next_emit);
// Step 3: Call EmitCopy, and then see if another EmitCopy could
// be our next move. Repeat until we find no match for the
@@ -609,7 +613,11 @@ char* CompressFragment(const char* input,
ip += matched;
size_t offset = base - candidate;
assert(0 == memcmp(base, candidate, matched));
- op = EmitCopy(op, offset, matched, p.second);
+ if (p.second) {
+ op = EmitCopy</*len_less_than_12=*/true>(op, offset, matched);
+ } else {
+ op = EmitCopy</*len_less_than_12=*/false>(op, offset, matched);
+ }
next_emit = ip;
if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
@@ -634,7 +642,8 @@ char* CompressFragment(const char* input,
emit_remainder:
// Emit the remaining bytes as a literal
if (next_emit < ip_end) {
- op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
+ op = EmitLiteral</*allow_fast_path=*/false>(op, next_emit,
+ ip_end - next_emit);
}
return op;