diff options
Diffstat (limited to 'subversion/libsvn_delta/xdelta.c')
-rw-r--r-- | subversion/libsvn_delta/xdelta.c | 190 |
1 files changed, 153 insertions, 37 deletions
diff --git a/subversion/libsvn_delta/xdelta.c b/subversion/libsvn_delta/xdelta.c index ea810e9..2075a51 100644 --- a/subversion/libsvn_delta/xdelta.c +++ b/subversion/libsvn_delta/xdelta.c @@ -27,10 +27,9 @@ #include <apr_general.h> /* for APR_INLINE */ #include <apr_hash.h> +#include "svn_hash.h" #include "svn_delta.h" #include "delta.h" - -#include "private/svn_adler32.h" /* This is pseudo-adler32. It is adler32 without the prime modulus. The idea is borrowed from monotone, and is a translation of the C++ @@ -44,6 +43,10 @@ */ #define MATCH_BLOCKSIZE 64 +/* "no" / "invalid" / "unused" value for positions within the delta windows + */ +#define NO_POSITION ((apr_uint32_t)-1) + /* Feed C_IN into the adler32 checksum and remove C_OUT at the same time. * This function may (and will) only be called for characters that are * MATCH_BLOCKSIZE positions apart. @@ -63,7 +66,7 @@ adler32_replace(apr_uint32_t adler32, const char c_out, const char c_in) return adler32 + adler32 * 0x10000; } -/* Calculate an peudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting +/* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting at DATA. Return the checksum value. */ static APR_INLINE apr_uint32_t @@ -96,17 +99,27 @@ init_adler32(const char *data) struct block { apr_uint32_t adlersum; - apr_size_t pos; + +/* Even in 64 bit systems, store only 32 bit offsets in our hash table + (our delta window size much much smaller then 4GB). + That reduces the hash table size by 50% from 32to 16KB + and makes it easier to fit into the CPU's L1 cache. */ + apr_uint32_t pos; /* NO_POSITION -> block is not used */ }; /* A hash table, using open addressing, of the blocks of the source. */ struct blocks { - /* The largest valid index of slots. */ - apr_size_t max; + /* The largest valid index of slots. + This value has an upper bound proportionate to the text delta + window size, so unless we dramatically increase the window size, + it's safe to make this a 32-bit value. In any case, it has to be + hte same width as the block position index, (struct + block).pos. */ + apr_uint32_t max; /* Source buffer that the positions in SLOTS refer to. */ const char* data; - /* The vector of blocks. A pos value of (apr_size_t)-1 represents an unused + /* The vector of blocks. A pos value of NO_POSITION represents an unused slot. */ struct block *slots; }; @@ -114,7 +127,7 @@ struct blocks /* Return a hash value calculated from the adler32 SUM, suitable for use with our hash table. */ -static apr_size_t hash_func(apr_uint32_t sum) +static apr_uint32_t hash_func(apr_uint32_t sum) { /* Since the adl32 checksum have a bad distribution for the 11th to 16th bits when used for our small block size, we add some bits from the @@ -126,12 +139,12 @@ static apr_size_t hash_func(apr_uint32_t sum) data into the table BLOCKS. Ignore true duplicates, i.e. blocks with actually the same content. */ static void -add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_size_t pos) +add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos) { - apr_size_t h = hash_func(adlersum) & blocks->max; + apr_uint32_t h = hash_func(adlersum) & blocks->max; /* This will terminate, since we know that we will not fill the table. */ - for (; blocks->slots[h].pos != (apr_size_t)-1; h = (h + 1) & blocks->max) + for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max) if (blocks->slots[h].adlersum == adlersum) if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos, MATCH_BLOCKSIZE) == 0) @@ -143,21 +156,21 @@ add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_size_t pos) /* Find a block in BLOCKS with the checksum ADLERSUM and matching the content at DATA, returning its position in the source data. If there is no such - block, return (apr_size_t)-1. */ -static apr_size_t + block, return NO_POSITION. */ +static apr_uint32_t find_block(const struct blocks *blocks, apr_uint32_t adlersum, const char* data) { - apr_size_t h = hash_func(adlersum) & blocks->max; + apr_uint32_t h = hash_func(adlersum) & blocks->max; - for (; blocks->slots[h].pos != (apr_size_t)-1; h = (h + 1) & blocks->max) + for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max) if (blocks->slots[h].adlersum == adlersum) if (memcmp(blocks->data + blocks->slots[h].pos, data, MATCH_BLOCKSIZE) == 0) return blocks->slots[h].pos; - return (apr_size_t)-1; + return NO_POSITION; } /* Initialize the matches table from DATA of size DATALEN. This goes @@ -169,17 +182,30 @@ init_blocks_table(const char *data, struct blocks *blocks, apr_pool_t *pool) { - apr_size_t i; apr_size_t nblocks; - apr_size_t nslots = 1; + apr_size_t wnslots = 1; + apr_uint32_t nslots; + apr_uint32_t i; - /* Be pesimistic about the block count. */ + /* Be pessimistic about the block count. */ nblocks = datalen / MATCH_BLOCKSIZE + 1; /* Find nearest larger power of two. */ - while (nslots <= nblocks) - nslots *= 2; + while (wnslots <= nblocks) + wnslots *= 2; /* Double the number of slots to avoid a too high load. */ - nslots *= 2; + wnslots *= 2; + /* Narrow the number of slots to 32 bits, which is the size of the + block position index in the hash table. + Sanity check: On 64-bit platforms, apr_size_t is likely to be + larger than apr_uint32_t. Make sure that the number of slots + actually fits into blocks->max. It's safe to use a hard assert + here, because the largest possible value for nslots is + proportional to the text delta window size and is therefore much + smaller than the range of an apr_uint32_t. If we ever happen to + increase the window size too much, this assertion will get + triggered by the test suite. */ + nslots = (apr_uint32_t) wnslots; + SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots); blocks->max = nslots - 1; blocks->data = data; blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots))); @@ -187,7 +213,7 @@ init_blocks_table(const char *data, { /* Avoid using an indeterminate value in the lookup. */ blocks->slots[i].adlersum = 0; - blocks->slots[i].pos = (apr_size_t)-1; + blocks->slots[i].pos = NO_POSITION; } /* If there is an odd block at the end of the buffer, we will @@ -226,10 +252,48 @@ match_length(const char *a, const char *b, apr_size_t max_len) return pos; } +/* Return the number of bytes before A and B that don't differ. If no + * difference can be found in the first MAX_LEN characters, MAX_LEN will + * be returned. Please note that A-MAX_LEN and B-MAX_LEN must both be + * valid addresses. + */ +static apr_size_t +reverse_match_length(const char *a, const char *b, apr_size_t max_len) +{ + apr_size_t pos = 0; + +#if SVN_UNALIGNED_ACCESS_IS_OK + + /* Chunky processing is so much faster ... + * + * We can't make this work on architectures that require aligned access + * because A and B will probably have different alignment. So, skipping + * the first few chars until alignment is reached is not an option. + */ + for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t)) + if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos)) + break; + + pos -= sizeof(apr_size_t); + +#endif + + /* If we find a mismatch at -pos, pos-1 characters matched. + */ + while (++pos <= max_len) + if (a[0-pos] != b[0-pos]) + return pos - 1; + + /* No mismatch found -> at least MAX_LEN matching chars. + */ + return max_len; +} + + /* Try to find a match for the target data B in BLOCKS, and then extend the match as long as data in A and B at the match position continues to match. We set the position in A we ended up in (in - case we extended it backwards) in APOSP and update the correspnding + case we extended it backwards) in APOSP and update the corresponding position within B given in BPOSP. PENDING_INSERT_START sets the lower limit to BPOSP. Return number of matching bytes starting at ASOP. Return 0 if @@ -252,7 +316,7 @@ find_match(const struct blocks *blocks, apos = find_block(blocks, rolling, b + bpos); /* See if we have a match. */ - if (apos == (apr_size_t)-1) + if (apos == NO_POSITION) return 0; /* Extend the match forward as far as possible */ @@ -278,6 +342,38 @@ find_match(const struct blocks *blocks, return MATCH_BLOCKSIZE + delta; } +/* Utility for compute_delta() that compares the range B[START,BSIZE) with + * the range of similar size before A[ASIZE]. Create corresponding copy and + * insert operations. + * + * BUILD_BATON and POOL will be passed through from compute_delta(). + */ +static void +store_delta_trailer(svn_txdelta__ops_baton_t *build_baton, + const char *a, + apr_size_t asize, + const char *b, + apr_size_t bsize, + apr_size_t start, + apr_pool_t *pool) +{ + apr_size_t end_match; + apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize; + if (max_len == 0) + return; + + end_match = reverse_match_length(a + asize, b + bsize, max_len); + if (end_match <= 4) + end_match = 0; + + if (bsize - start > end_match) + svn_txdelta__insert_op(build_baton, svn_txdelta_new, + start, bsize - start - end_match, b + start, pool); + if (end_match) + svn_txdelta__insert_op(build_baton, svn_txdelta_source, + asize - end_match, end_match, NULL, pool); +} + /* Compute a delta from A to B using xdelta. @@ -306,21 +402,33 @@ find_match(const struct blocks *blocks, static void compute_delta(svn_txdelta__ops_baton_t *build_baton, const char *a, - apr_uint32_t asize, + apr_size_t asize, const char *b, - apr_uint32_t bsize, + apr_size_t bsize, apr_pool_t *pool) { struct blocks blocks; apr_uint32_t rolling; apr_size_t lo = 0, pending_insert_start = 0; + /* Optimization: directly compare window starts. If more than 4 + * bytes match, we can immediately create a matching windows. + * Shorter sequences result in a net data increase. */ + lo = match_length(a, b, asize > bsize ? bsize : asize); + if ((lo > 4) || (lo == bsize)) + { + svn_txdelta__insert_op(build_baton, svn_txdelta_source, + 0, lo, NULL, pool); + pending_insert_start = lo; + } + else + lo = 0; + /* If the size of the target is smaller than the match blocksize, just insert the entire target. */ - if (bsize < MATCH_BLOCKSIZE) + if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE)) { - svn_txdelta__insert_op(build_baton, svn_txdelta_new, - 0, bsize, b, pool); + store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool); return; } @@ -328,7 +436,7 @@ compute_delta(svn_txdelta__ops_baton_t *build_baton, init_blocks_table(a, asize, &blocks, pool); /* Initialize our rolling checksum. */ - rolling = init_adler32(b); + rolling = init_adler32(b + lo); while (lo < bsize) { apr_size_t matchlen = 0; @@ -356,6 +464,19 @@ compute_delta(svn_txdelta__ops_baton_t *build_baton, svn_txdelta__insert_op(build_baton, svn_txdelta_new, 0, lo - pending_insert_start, b + pending_insert_start, pool); + else + { + /* the match borders on the previous op. Maybe, we found a + * match that is better than / overlapping the previous one. */ + apr_size_t len = reverse_match_length(a + apos, b + lo, apos < lo ? apos : lo); + if (len > 0) + { + len = svn_txdelta__remove_copy(build_baton, len); + apos -= len; + matchlen += len; + lo -= len; + } + } /* Reset the pending insert start to immediately after the match. */ @@ -373,12 +494,7 @@ compute_delta(svn_txdelta__ops_baton_t *build_baton, } /* If we still have an insert pending at the end, throw it in. */ - if (lo - pending_insert_start > 0) - { - svn_txdelta__insert_op(build_baton, svn_txdelta_new, - 0, lo - pending_insert_start, - b + pending_insert_start, pool); - } + store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool); } void |