summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDonovan Baarda <abo@minkirri.apana.org.au>2003-08-21 08:44:50 +0000
committerDonovan Baarda <abo@minkirri.apana.org.au>2003-08-21 08:44:50 +0000
commita6037692a1f5814ab2621e0faa527c25ecd2b48b (patch)
tree74a33185c318866c443d3ef80a9d1992df76225f
parentcf14c5d42389fa0b3c3bfe73c07f1a53de55124e (diff)
downloadlibrsync-a6037692a1f5814ab2621e0faa527c25ecd2b48b.tar.gz
Applied delta-refactor patch.
-rw-r--r--Makefile.am1
-rw-r--r--TODO23
-rw-r--r--delta.c532
-rw-r--r--job.h28
-rw-r--r--rollsum.c35
-rw-r--r--rollsum.h70
6 files changed, 408 insertions, 281 deletions
diff --git a/Makefile.am b/Makefile.am
index 1dec444..c50b4ed 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -33,6 +33,7 @@ librsync_la_SOURCES = prototab.c prototab.h \
command.h delta.c emit.c emit.h fileutil.c fileutil.h \
hex.c job.c job.h mdfour.c mdfour.h mksum.c msg.c netint.c netint.h \
patch.c protocol.h readsums.c librsync.h librsync-config.h \
+ rollsum.c rollsum.h \
scoop.c search.c search.h stats.c stream.c stream.h sumset.c \
sumset.h trace.c trace.h tube.c types.h util.c util.h \
version.c whole.c whole.h snprintf.h
diff --git a/TODO b/TODO
index e5821d4..152229b 100644
--- a/TODO
+++ b/TODO
@@ -1,6 +1,26 @@
-*- indented-text -*-
$Id$
+ * Optimisations and code cleanups;
+
+ scoop.c: Scoop needs major refactor. Perhaps the API needs
+ tweaking?
+
+ rsync.h: documentation refers to rs_work(), which has been replaced
+ by rs_job_iter. Vestigial rs_work_options enum typedef should be
+ removed. rs_buffers_s and rs_buffers_t should be one typedef? Just
+ how useful is rs_job_drive anyway? Not implemented, rs_accum_value
+ Not implemented rs_mdfour_file
+
+ patch.c: rs_patch_s_copying() does alloc, copy free, when it could
+ just copy directly into rs_buffer_t buffer. This _does_ mean the
+ callback can't allocate it's own data, though this can be done by
+ checking if the callback changed the pointer.
+
+ mdfour.c: This code has a different API to the RSA code in libmd
+ and is coupled with librsync in unhealthy ways (trace?). Recommend
+ changing to RSA API.
+
* Change popt library to proper automake conditional recursive
SUBDIR instead of an EXTRA_LIBRARIES in a seperate dir (or just
remove it from the distribution and make the library a dependency).
@@ -17,9 +37,6 @@ $Id$
Make snprintf.c into two proper replacement functions for snprintf
and vsnprintf instead of using conditional compilation.
- * Most urgent: do rolling checksums rather than from-scratch if
- possible.
-
* Don't use the rs_buffers_t structure.
There's something confusing about the existence of this structure.
diff --git a/delta.c b/delta.c
index 85a0826..611cf2a 100644
--- a/delta.c
+++ b/delta.c
@@ -75,316 +75,316 @@
#include "sumset.h"
#include "job.h"
#include "trace.h"
-#include "checksum.h"
#include "search.h"
+#include "types.h"
+#include "rollsum.h"
/**
- * Turn this on to make all rolling checksums be checked from scratch.
+ * 2002-06-26: Donovan Baarda
+ *
+ * The following is based entirely on pysync. It is much cleaner than the
+ * previous incarnation of this code. It is slightly complicated because in
+ * this case the output can block, so the main delta loop needs to stop
+ * when this happens.
+ *
+ * In pysync a 'last' attribute is used to hold the last miss or match for
+ * extending if possible. In this code, basis_len and scoop_pos are used
+ * instead of 'last'. When basis_len > 0, last is a match. When basis_len =
+ * 0 and scoop_pos is > 0, last is a miss. When both are 0, last is None
+ * (ie, nothing).
+ *
+ * Pysync is also slightly different in that a 'flush' method is available
+ * to force output of accumulated data. This 'flush' is use to finalise
+ * delta calculation. In librsync input is terminated with an eof flag on
+ * the input stream. I have structured this code similar to pysync with a
+ * seperate flush function that is used when eof is reached. This allows
+ * for a flush style API if one is ever needed. Note that flush in pysync
+ * can be used for more than just terminating delta calculation, so a flush
+ * based API can in some ways be more flexible...
+ *
+ * The input data is first scanned, then processed. Scanning identifies
+ * input data as misses or matches, and emits the instruction stream.
+ * Processing the data consumes it off the input scoop and outputs the
+ * processed miss data into the tube.
+ *
+ * The scoop contains all data yet to be processed. The scoop_pos is an
+ * index into the scoop that indicates the point scanned to. As data is
+ * scanned, scoop_pos is incremented. As data is processed, it is removed
+ * from the scoop and scoop_pos adjusted. Everything gets complicated
+ * because the tube can block. When the tube is blocked, no data can be
+ * processed.
+ *
*/
-int rs_roll_paranoia = 0;
-
-
-static rs_result rs_delta_scan(rs_job_t *, rs_long_t avail_len, void *);
-static rs_result rs_delta_match(rs_job_t *, rs_long_t avail_len, void *);
-
-static rs_result rs_delta_s_deferred_advance(rs_job_t *job);
+/* used by rdiff, but now redundant */
+int rs_roll_paranoia = 0;
-
-static rs_result rs_delta_s_end(rs_job_t *job)
-{
- rs_emit_end_cmd(job);
- return RS_DONE;
-}
-
+static rs_result rs_delta_s_scan(rs_job_t *job);
+static rs_result rs_delta_s_flush(rs_job_t *job);
+static rs_result rs_delta_s_end(rs_job_t *job);
+void rs_getinput(rs_job_t *job);
+inline int rs_findmatch(rs_job_t *job, rs_long_t *match_pos, size_t *match_len);
+inline rs_result rs_appendmatch(rs_job_t *job, rs_long_t match_pos, size_t match_len);
+inline rs_result rs_appendmiss(rs_job_t *job, size_t miss_len);
+inline rs_result rs_appendflush(rs_job_t *job);
+inline rs_result rs_processmatch(rs_job_t *job);
+inline rs_result rs_processmiss(rs_job_t *job);
/**
* \brief Get a block of data if possible, and see if it matches.
- *
- * On each call, we try to process all of the input data available on
- * the scoop and input buffer.
- */
-static rs_result
-rs_delta_s_scan(rs_job_t *job)
+ *
+ * On each call, we try to process all of the input data available on the
+ * scoop and input buffer. */
+static rs_result rs_delta_s_scan(rs_job_t *job)
{
- size_t this_len, avail_len;
- int is_ending;
- void *inptr;
+ rs_long_t match_pos;
+ size_t match_len;
rs_result result;
+ Rollsum test;
rs_job_check(job);
-
- avail_len = rs_scoop_total_avail(job);
- this_len = job->block_len;
- is_ending = job->stream->eof_in;
-
- /* Now, we have avail_len bytes, and we need to scan through them
- * looking for a match. We may end up emitting a bunch of
- * commands depending on how the blocks match with the signature */
- if ((avail_len == 0) && (job->basis_len == 0)) {
- if (is_ending) {
- /* no more delta to do */
- job->statefn = rs_delta_s_end;
+ /* read the input into the scoop */
+ rs_getinput(job);
+ /* output any pending output from the tube */
+ result=rs_tube_catchup(job);
+ /* while output is not blocked and there is a block of data */
+ while ((result==RS_DONE) &&
+ ((job->scoop_pos + job->block_len) < job->scoop_avail)) {
+ /* check if this block matches */
+ if (rs_findmatch(job,&match_pos,&match_len)) {
+ /* append the match and reset the weak_sum */
+ result=rs_appendmatch(job,match_pos,match_len);
+ RollsumInit(&job->weak_sum);
+ } else {
+ /* rotate the weak_sum and append the miss byte */
+ RollsumRotate(&job->weak_sum,job->scoop_next[job->scoop_pos],
+ job->scoop_next[job->scoop_pos+job->block_len]);
+ result=rs_appendmiss(job,1);
+ if (rs_roll_paranoia) {
+ RollsumInit(&test);
+ RollsumUpdate(&test,job->scoop_next+job->scoop_pos,
+ job->block_len);
+ if (RollsumDigest(&test) != RollsumDigest(&job->weak_sum)) {
+ rs_fatal("mismatch between rolled sum %#x and check %#x",
+ (int)RollsumDigest(&job->weak_sum),
+ (int)RollsumDigest(&test));
+ }
+
+ }
}
- return RS_BLOCKED;
- }
-
- /* must read at least one block, or give up */
- if ((avail_len < job->block_len) && !is_ending) {
- /* we know we won't get it, but we have to try for a whole
- * block anyhow so that it gets into the scoop. */
- rs_scoop_input(job, job->block_len);
- return RS_BLOCKED;
}
-
- result = rs_scoop_readahead(job, avail_len, &inptr);
- if (result != RS_DONE)
- return result;
-
- if (!job->basis_len)
- return rs_delta_scan(job, avail_len, inptr);
- else
- return rs_delta_match(job, avail_len, inptr);
+ /* if we completed OK */
+ if (result==RS_DONE) {
+ /* if we reached eof, we can flush the last fragment */
+ if (job->stream->eof_in) {
+ job->statefn=rs_delta_s_flush;
+ return RS_RUNNING;
+ } else {
+ /* we are blocked waiting for more data */
+ return RS_BLOCKED;
+ }
+ }
+ return result;
}
-/**
- * Scan for a matching block in the next \p avail_len bytes of input.
- *
- * If nonmatching data is found, then a LITERAL command will be put in
- * the tube immediately. If matching data is found, then its position
- * will be saved in the job, and the job state set up to to perform
- * RLL encoding after handling the literal.
- */
-static rs_result
-rs_delta_scan(rs_job_t *job, rs_long_t avail_len, void *p)
+static rs_result rs_delta_s_flush(rs_job_t *job)
{
- rs_long_t match_where;
- int search_pos, end_pos;
- unsigned char *inptr = (unsigned char *) p;
- unsigned s1 = job->weak_sig & 0xFFFF;
- unsigned s2 = job->weak_sig >> 16;
+ rs_long_t match_pos;
+ size_t match_len;
+ rs_result result;
- if (job->basis_len) {
- rs_log(RS_LOG_ERR, "somehow got nonzero basis_len");
- return RS_INTERNAL_ERROR;
+ rs_job_check(job);
+ /* read the input into the scoop */
+ rs_getinput(job);
+ /* output any pending output */
+ result=rs_tube_catchup(job);
+ /* while output is not blocked and there is any remaining data */
+ while ((result==RS_DONE) && (job->scoop_pos < job->scoop_avail)) {
+ /* check if this block matches */
+ if (rs_findmatch(job,&match_pos,&match_len)) {
+ /* append the match and reset the weak_sum */
+ result=rs_appendmatch(job,match_pos,match_len);
+ RollsumInit(&job->weak_sum);
+ } else {
+ /* rollout from weak_sum and append the miss byte */
+ RollsumRollout(&job->weak_sum,job->scoop_next[job->scoop_pos]);
+ rs_trace("block reduced to %d", (int)job->weak_sum.count);
+ result=rs_appendmiss(job,1);
+ }
+ }
+ /* if we are not blocked, flush and set end statefn. */
+ if (result==RS_DONE) {
+ result=rs_appendflush(job);
+ job->statefn=rs_delta_s_end;
}
+ if (result==RS_DONE) {
+ return RS_RUNNING;
+ }
+ return result;
+}
-
- /* So, we have avail_len bytes of data, and we want to look
- * through it for a match at some point. It's OK if it's not at
- * the start of the available input data. If we're approaching
- * the end and can't get a match, then we just block and get more
- * later. */
-
- /* FIXME: Perhaps we should be working in signed chars for the
- * rolling sum? */
-
- if (job->stream->eof_in)
- end_pos = avail_len - 1;
- else
- end_pos = avail_len - job->block_len;
-
- for (search_pos = 0; search_pos <= end_pos; search_pos++) {
- size_t this_len = job->block_len;
-
- /* Did we inherit the signature from rs_delta_match, if
- * so we know this block won't match and we should simply
- * skip the first char.
- */
- if (job->have_weak_sig < 0) {
- /* advance by one; roll out the byte we just skipped over. */
- unsigned char a = inptr[search_pos];
- unsigned shift = a + RS_CHAR_OFFSET;
-
- s1 -= shift;
- s2 -= this_len * shift;
- job->weak_sig = (s1 & 0xffff) | (s2 << 16);
- job->have_weak_sig = 1;
- /* We already know that this block won't match!*/
- continue;
- }
- if (search_pos + this_len > avail_len) {
- this_len = avail_len - search_pos;
- rs_trace("block reduced to %d", this_len);
- } else if (job->have_weak_sig > 0) {
- unsigned char a = inptr[search_pos + this_len - 1];
- /* roll in the newly added byte, if any */
- s1 += a + RS_CHAR_OFFSET;
- s2 += s1;
+static rs_result rs_delta_s_end(rs_job_t *job)
+{
+ rs_emit_end_cmd(job);
+ return RS_DONE;
+}
- job->weak_sig = (s1 & 0xffff) | (s2 << 16);
- }
- if (!job->have_weak_sig) {
- rs_trace("calculate weak sum from scratch");
- job->weak_sig = rs_calc_weak_sum(inptr + search_pos, this_len);
- s1 = job->weak_sig & 0xFFFF;
- s2 = job->weak_sig >> 16;
- job->have_weak_sig = 1;
- }
+void rs_getinput(rs_job_t *job) {
+ size_t len;
+
+ len=rs_scoop_total_avail(job);
+ if (job->scoop_avail < len) {
+ rs_scoop_input(job,len);
+ }
+}
- if (rs_roll_paranoia) {
- rs_weak_sum_t verify = rs_calc_weak_sum(inptr + search_pos, this_len);
- if (verify != job->weak_sig) {
- rs_fatal("mismatch between rolled sum %#x and check %#x",
- job->weak_sig, verify);
- }
- }
- if (rs_search_for_block(job->weak_sig, inptr + search_pos, this_len,
- job->signature, &job->stats, &match_where)) {
- /* So, we got a match. Cool. However, there may be
- * leading unmatched data that we need to flush. Thus we
- * set our statefn to be rs_delta_s_deferred_advance so that
- * we can skip bytes and write out the copy command later. */
-
- rs_trace("matched %.0f bytes at %.0f!",
- (double) this_len, (double) match_where);
- job->basis_pos = match_where;
- job->basis_len = this_len;
- job->statefn = rs_delta_s_deferred_advance;
- job->have_weak_sig = 0;
- break;
- } else {
- /* advance by one; roll out the byte we just moved over. */
- unsigned char a = inptr[search_pos];
- unsigned shift = a + RS_CHAR_OFFSET;
-
- s1 -= shift;
- s2 -= this_len * shift;
- job->weak_sig = (s1 & 0xffff) | (s2 << 16);
+/**
+ * find a match at scoop_pos, returning the match_pos and match_len.
+ * Note that this will calculate weak_sum if required. It will also
+ * determine the match_len.
+ *
+ * Note that this routine could be modified to do xdelta style matches that
+ * would extend matches past block boundaries by matching backwards and
+ * forwards beyond the block boundaries. Extending backwards would require
+ * decrementing scoop_pos as appropriate.
+ */
+inline int rs_findmatch(rs_job_t *job, rs_long_t *match_pos, size_t *match_len) {
+ /* calculate the weak_sum if we don't have one */
+ if (job->weak_sum.count == 0) {
+ /* set match_len to min(block_len, scan_avail) */
+ *match_len=job->scoop_avail - job->scoop_pos;
+ if (*match_len > job->block_len) {
+ *match_len = job->block_len;
}
+ /* Update the weak_sum */
+ RollsumUpdate(&job->weak_sum,job->scoop_next+job->scoop_pos,*match_len);
+ rs_trace("calculate weak sum from scratch length %d",(int)job->weak_sum.count);
+ } else {
+ /* set the match_len to the weak_sum count */
+ *match_len=job->weak_sum.count;
}
-
- if (search_pos > 0) {
- /* We may or may not have found a block, but we know we found
- * some literal data at the start of the buffer. Therefore,
- * we have to flush that out before we can continue on and
- * emit the copy command or keep searching. */
-
- /* FIXME: At the moment, if you call with very short buffers,
- * then you will get a series of very short LITERAL commands.
- * Perhaps this is what you deserve, or perhaps we should try
- * to get more readahead and avoid that. */
-
- /* There's some literal data at the start of this window which
- * we know is not in any block. */
- rs_trace("got %d bytes of literal data", search_pos);
- rs_emit_literal_cmd(job, search_pos);
- rs_tube_copy(job, search_pos);
- }
-
- return RS_RUNNING;
+ return rs_search_for_block(RollsumDigest(&job->weak_sum),
+ job->scoop_next+job->scoop_pos,
+ *match_len,
+ job->signature,
+ &job->stats,
+ match_pos);
}
+
/**
- * advance the scoop pointer to skip a matched block.
- *
- * We can't do this greedily within rs_delta_scan since rs_tube_copy is lazy.
- * Instead we use this intermediate state to advance the scoop.
- */
-static rs_result
-rs_delta_s_deferred_advance(rs_job_t *job)
+ * Append a match at match_pos of length match_len to the delta, extending
+ * a previous match if possible, or flushing any previous miss/match. */
+inline rs_result rs_appendmatch(rs_job_t *job, rs_long_t match_pos, size_t match_len)
{
- if (!job->basis_len) {
- rs_log(RS_LOG_ERR, "somehow got zero basis_len");
- return RS_INTERNAL_ERROR;
+ rs_result result=RS_DONE;
+
+ /* if last was a match that can be extended, extend it */
+ if (job->basis_len && (job->basis_pos + job->basis_len) == match_pos) {
+ job->basis_len+=match_len;
+ } else {
+ /* else appendflush the last value */
+ result=rs_appendflush(job);
+ /* make this the new match value */
+ job->basis_pos=match_pos;
+ job->basis_len=match_len;
}
-
- rs_scoop_advance(job,job->basis_len);
- job->statefn=rs_delta_s_scan;
-
- return RS_RUNNING;
+ /* increment scoop_pos to point at next unscanned data */
+ job->scoop_pos+=match_len;
+ /* we can only process from the scoop if output is not blocked */
+ if (result==RS_DONE) {
+ /* process the match data off the scoop*/
+ result=rs_processmatch(job);
+ }
+ return result;
}
+
/**
- * Do RLL coding of output.
- *
- * When a matched block is found we are in this state. We try to accumulate
- * adjacent blocks for RLL encoding of the output. If a non-adjacent block is
- * matched, we emit a copy command for the accumulated blocks and start a
- * new RLL sequence. If a block can't be matched we need to rescan.
- */
-static rs_result
-rs_delta_match(rs_job_t *job, rs_long_t avail_len, void *p)
+ * Append a miss of length miss_len to the delta, extending a previous miss
+ * if possible, or flushing any previous match.
+ *
+ * This also breaks misses up into block_len segments to avoid accumulating
+ * too much in memory. */
+inline rs_result rs_appendmiss(rs_job_t *job, size_t miss_len)
{
- rs_long_t match_where;
- int search_pos;
- unsigned char *inptr = (unsigned char *) p;
- int ending= job->stream->eof_in;
-
- if (!job->basis_len) {
- rs_log(RS_LOG_ERR, "somehow got zero basis_len");
- return RS_INTERNAL_ERROR;
+ rs_result result=RS_DONE;
+
+ /* if last was a match, or block_len misses, appendflush it */
+ if (job->basis_len || (job->scoop_pos >= rs_outbuflen)) {
+ result=rs_appendflush(job);
}
+ /* increment scoop_pos */
+ job->scoop_pos+=miss_len;
+ return result;
+}
- /* So, we have avail_len bytes of data, and we previously matched
- * one or more blocks. We now look for adjacent matches to roll into the
- * the current match. If we hit a block that has no match, we need to
- * go back rs_delta_scan and rescan. */
-
- for (search_pos = 0; search_pos <= avail_len; search_pos+=job->block_len) {
- size_t this_len = job->block_len;
-
- if (search_pos + this_len > avail_len) {
- /* We only allow short blocks at the end of stream*/
- if (!ending) {
- rs_trace("waiting for more input");
- return RS_BLOCKED;
- }
- this_len = avail_len - search_pos;
- rs_trace("block reduced to %d", this_len);
- }
-
- rs_trace("calculate weak sum from scratch");
- job->weak_sig = rs_calc_weak_sum(inptr + search_pos, this_len);
- job->have_weak_sig = -1;
-
- if (rs_search_for_block(job->weak_sig, inptr + search_pos, this_len,
- job->signature, &job->stats, &match_where)) {
- /* So, we got a match. Cool. Now try to roll it into the previous
- * match. If we can't we start a new rll sequence. */
- rs_trace("matched %.0f bytes at %.0f!",
- (double) this_len, (double) match_where);
- /* At this point we have matched this block so skip it*/
- /* We do this now since we might return in the IF block*/
- rs_scoop_advance(job,this_len);
-
- if (match_where == (job->basis_pos + job->basis_len)) {
- job->basis_len += this_len;
- rs_trace("adjacent match: accumulated %.0f bytes at %.0f",
- (double)job->basis_len,(double)job->basis_pos);
- } else {
- rs_trace("new match, flushing %.0f bytes at %.0f",
- (double)job->basis_pos,(double)job->basis_len);
- rs_emit_copy_cmd(job, job->basis_pos, job->basis_len);
- job->basis_pos = match_where;
- job->basis_len = this_len;
- /* Give the tube a chance to catchup */
- return RS_RUNNING;
- }
- } else {
- /* Copy blocks that we acummulated, there should be at least one */
- rs_trace("no match, copying %.0f bytes at %.0f",
- (double)job->basis_len,(double)job->basis_pos);
- rs_emit_copy_cmd(job, job->basis_pos, job->basis_len);
- /* Unmatched data...we need to rescan*/
- job->basis_len=0;
- return RS_RUNNING;
- }
- }
-
- if (ending) {
- /* The job ended with a matching block..we must copy everything*/
+/**
+ * Flush any accumulating hit or miss, appending it to the delta.
+ */
+inline rs_result rs_appendflush(rs_job_t *job)
+{
+ /* if last is a match, emit it and reset last by resetting basis_len */
+ if (job->basis_len) {
+ rs_trace("matched %.0f bytes at %.0f!",
+ (double) job->basis_len, (double) job->basis_pos);
rs_emit_copy_cmd(job, job->basis_pos, job->basis_len);
job->basis_len=0;
+ return rs_processmatch(job);
+ /* else if last is a miss, emit and process it*/
+ } else if (job->scoop_pos) {
+ rs_trace("got %d bytes of literal data", job->scoop_pos);
+ rs_emit_literal_cmd(job, job->scoop_pos);
+ return rs_processmiss(job);
}
+ /* otherwise, nothing to flush so we are done */
+ return RS_DONE;
+}
- return RS_RUNNING;
+
+/**
+ * The scoop contains match data at scoop_next of length scoop_pos. This
+ * function processes that match data, returning RS_DONE if it completes,
+ * or RS_BLOCKED if it gets blocked. After it completes scoop_pos is reset
+ * to still point at the next unscanned data.
+ *
+ * This function currently just removes data from the scoop and adjusts
+ * scoop_pos appropriately. In the future this could be used for something
+ * like context compressing of miss data. Note that it also calls
+ * rs_tube_catchup to output any pending output. */
+inline rs_result rs_processmatch(rs_job_t *job)
+{
+ job->scoop_avail-=job->scoop_pos;
+ job->scoop_next+=job->scoop_pos;
+ job->scoop_pos=0;
+ return rs_tube_catchup(job);
+}
+
+/**
+ * The scoop contains miss data at scoop_next of length scoop_pos. This
+ * function processes that miss data, returning RS_DONE if it completes, or
+ * RS_BLOCKED if it gets blocked. After it completes scoop_pos is reset to
+ * still point at the next unscanned data.
+ *
+ * This function uses rs_tube_copy to queue copying from the scoop into
+ * output. and uses rs_tube_catchup to do the copying. This automaticly
+ * removes data from the scoop, but this can block. While rs_tube_catchup
+ * is blocked, scoop_pos does not point at legit data, so scanning can also
+ * not proceed.
+ *
+ * In the future this could do compression of miss data before outputing
+ * it. */
+inline rs_result rs_processmiss(rs_job_t *job)
+{
+ rs_tube_copy(job, job->scoop_pos);
+ job->scoop_pos=0;
+ return rs_tube_catchup(job);
}
@@ -445,7 +445,9 @@ rs_job_t *rs_delta_begin(rs_signature_t *sig)
job = rs_job_new("delta", rs_delta_s_header);
job->signature = sig;
-
+
+ RollsumInit(&job->weak_sum);
+
if ((job->block_len = sig->block_len) < 0) {
rs_log(RS_LOG_ERR, "unreasonable block_len %d in signature",
job->block_len);
diff --git a/job.h b/job.h
index 2ca2764..4999ddb 100644
--- a/job.h
+++ b/job.h
@@ -21,6 +21,7 @@
*/
#include "mdfour.h"
+#include "rollsum.h"
struct rs_job {
int dogtag;
@@ -49,13 +50,11 @@ struct rs_job {
/** Command byte currently being processed, if any. */
unsigned char op;
- /** If in the middle of reading a signature (rs_loadsig_s_weak()),
- * or generating a delta, this contains the weak signature. */
+ /** The weak signature digest used by readsums.c */
rs_weak_sum_t weak_sig;
-
- /** If generating a delta, this is true if we have a valid weak signature and
- * can roll it forward. */
- int have_weak_sig;
+
+ /** The rollsum weak signature accumulator used by delta.c */
+ Rollsum weak_sum;
/** Lengths of expected parameters. */
rs_long_t param1, param2;
@@ -66,13 +65,16 @@ struct rs_job {
/** Encoding statistics. */
rs_stats_t stats;
- /** Buffer of data left over in the scoop. Allocation is
- * scoop_buf..scoop_alloc, and scoop_next[0..scoop_avail]
- * contains valid data. */
- char *scoop_buf;
- char *scoop_next;
- size_t scoop_alloc;
- size_t scoop_avail;
+ /**
+ * Buffer of data in the scoop. Allocation is
+ * scoop_buf[0..scoop_alloc], and scoop_next[0..scoop_avail] contains
+ * data yet to be processed. scoop_next[scoop_pos..scoop_avail] is the
+ * data yet to be scanned. */
+ char *scoop_buf; /* the allocation pointer */
+ char *scoop_next; /* the data pointer */
+ size_t scoop_alloc; /* the allocation size */
+ size_t scoop_avail; /* the data size */
+ size_t scoop_pos; /* the scan position */
/** If USED is >0, then buf contains that much write data to
* be sent out. */
diff --git a/rollsum.c b/rollsum.c
new file mode 100644
index 0000000..fce3f82
--- /dev/null
+++ b/rollsum.c
@@ -0,0 +1,35 @@
+/* rollsum.c -- compute the rsync rolling checksum of a data stream
+ */
+
+/* @(#) $Id$ */
+
+#include "rollsum.h"
+
+#define DO1(buf,i) {s1 += buf[i]; s2 += s1;}
+#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf) DO8(buf,0); DO8(buf,8);
+#define OF16(off) {s1 += 16*off; s2 += 136*off;}
+
+void RollsumUpdate(Rollsum *sum,const unsigned char *buf,unsigned int len) {
+ /* ANSI C says no overflow for unsigned.
+ zlib's adler 32 goes to extra effort to avoid overflow*/
+ unsigned long s1 = sum->s1;
+ unsigned long s2 = sum->s2;
+
+ sum->count+=len; /* increment sum count */
+ while (len >= 16) {
+ DO16(buf);
+ OF16(ROLLSUM_CHAR_OFFSET);
+ buf += 16;
+ len -= 16;
+ }
+ while (len != 0) {
+ s1 += (*buf++ + ROLLSUM_CHAR_OFFSET);
+ s2 += s1;
+ len--;
+ }
+ sum->s1=s1;
+ sum->s2=s2;
+}
diff --git a/rollsum.h b/rollsum.h
new file mode 100644
index 0000000..b36f8c1
--- /dev/null
+++ b/rollsum.h
@@ -0,0 +1,70 @@
+/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ *
+ * rollsum -- the librsync rolling checksum
+ * $Id$
+ *
+ * Author: Donovan Baarda <abo@minkirri.apana.org.au>, based on work,
+ * Copyright (C) 2000, 2001 by Martin Pool <mbp@samba.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _ROLLSUM_H_
+#define _ROLLSUM_H_
+
+/* We should make this something other than zero to improve the
+ * checksum algorithm: tridge suggests a prime number. */
+#define ROLLSUM_CHAR_OFFSET 31
+
+/* the Rollsum struct type*/
+typedef struct _Rollsum {
+ unsigned long count; /* count of bytes included in sum */
+ unsigned long s1; /* s1 part of sum */
+ unsigned long s2; /* s2 part of sum */
+} Rollsum;
+
+void RollsumUpdate(Rollsum *sum,const unsigned char *buf,unsigned int len);
+/* The following are implemented as macros.
+void RollsumInit(Rollsum *sum);
+void RollsumRotate(Rollsum *sum,unsigned char out, unsigned char in);
+void RollsumRollin(Rollsum *sum,unsigned char c);
+void RollsumRollout(Rollsum *sum,unsigned char c);
+unsigned long RollsumDigest(Rollsum *sum);
+*/
+
+/* macro implementations of simple routines */
+#define RollsumInit(sum) { \
+ (sum)->count=(sum)->s1=(sum)->s2=0; \
+}
+
+#define RollsumRotate(sum,out,in) { \
+ (sum)->s1 += (unsigned char)(in) - (unsigned char)(out); \
+ (sum)->s2 += (sum)->s1 - (sum)->count*((unsigned char)(out)+ROLLSUM_CHAR_OFFSET); \
+}
+
+#define RollsumRollin(sum,c) { \
+ (sum)->s1 += ((unsigned char)(c)+ROLLSUM_CHAR_OFFSET); \
+ (sum)->s2 += (sum)->s1; \
+ (sum)->count++; \
+}
+
+#define RollsumRollout(sum,c) { \
+ (sum)->s1 -= ((unsigned char)(c)+ROLLSUM_CHAR_OFFSET); \
+ (sum)->s2 -= (sum)->count*((unsigned char)(c)+ROLLSUM_CHAR_OFFSET); \
+ (sum)->count--; \
+}
+
+#define RollsumDigest(sum) (((sum)->s2 << 16) | ((sum)->s1 & 0xffff))
+
+#endif /* _ROLLSUM_H_ */