/* * Copyright 2011 Avery Pennarun. All rights reserved. * * (This license applies to bupsplit.c and bupsplit.h only.) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY AVERY PENNARUN ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bupsplit.h" #include #include #include #include // According to librsync/rollsum.h: // "We should make this something other than zero to improve the // checksum algorithm: tridge suggests a prime number." // apenwarr: I unscientifically tried 0 and 7919, and they both ended up // slightly worse than the librsync value of 31 for my arbitrary test data. #define ROLLSUM_CHAR_OFFSET 31 typedef struct { unsigned s1, s2; uint8_t window[BUP_WINDOWSIZE]; int wofs; } Rollsum; // These formulas are based on rollsum.h in the librsync project. static void rollsum_add (Rollsum *r, uint8_t drop, uint8_t add) { r->s1 += add - drop; r->s2 += r->s1 - (BUP_WINDOWSIZE * (drop + ROLLSUM_CHAR_OFFSET)); } static void rollsum_init (Rollsum *r) { r->s1 = BUP_WINDOWSIZE * ROLLSUM_CHAR_OFFSET; r->s2 = BUP_WINDOWSIZE * (BUP_WINDOWSIZE - 1) * ROLLSUM_CHAR_OFFSET; r->wofs = 0; memset (r->window, 0, BUP_WINDOWSIZE); } // For some reason, gcc 4.3 (at least) optimizes badly if find_ofs() // is static and rollsum_roll is an inline function. Let's use a macro // here instead to help out the optimizer. #define rollsum_roll(r, ch) \ do \ { \ rollsum_add ((r), (r)->window[(r)->wofs], (ch)); \ (r)->window[(r)->wofs] = (ch); \ (r)->wofs = ((r)->wofs + 1) % BUP_WINDOWSIZE; \ } \ while (0) static uint32_t rollsum_digest (Rollsum *r) { return (r->s1 << 16) | (r->s2 & 0xffff); } uint32_t bupsplit_sum (uint8_t *buf, size_t ofs, size_t len) { size_t count; Rollsum r; rollsum_init (&r); for (count = ofs; count < len; count++) rollsum_roll (&r, buf[count]); return rollsum_digest (&r); } int bupsplit_find_ofs (const unsigned char *buf, int len, int *bits) { Rollsum r; int count; rollsum_init (&r); for (count = 0; count < len; count++) { rollsum_roll (&r, buf[count]); if ((r.s2 & (BUP_BLOBSIZE - 1)) == ((~0) & (BUP_BLOBSIZE - 1))) { if (bits) { unsigned rsum = rollsum_digest (&r); *bits = BUP_BLOBBITS; rsum >>= BUP_BLOBBITS; for (*bits = BUP_BLOBBITS; (rsum >>= 1) & 1; (*bits)++) ; } return count + 1; } } return 0; }