From ddcc8c5b469d2564dbacd629a873e7703f2dbd83 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 25 Dec 2010 05:11:32 -0600 Subject: vcs-svn: skeleton of an svn delta parser A delta in the subversion delta (svndiff0) format consists of the magic bytes SVN\0 followed by a sequence of windows of a certain well specified format (starting with five integers). Add an svndiff0_apply function and test-svn-fe -d commandline tool to parse such a delta in the special case of not including any windows. Later patches will add features to turn this into a fully functional delta applier for svn-fe to use to parse the streams produced by "svnrdump dump" and "svnadmin dump --deltas". The content of symlinks starts with the word "link " in Subversion's worldview, so we need to be able to prepend that text to input for the sake of delta application. So initialization of the input state of the delta preimage is left to the calling program, giving callers a chance to seed the buffer with text of their choice. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder --- t/t9011-svn-da.sh | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 t/t9011-svn-da.sh (limited to 't/t9011-svn-da.sh') diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh new file mode 100755 index 0000000000..ee0c1e2087 --- /dev/null +++ b/t/t9011-svn-da.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +test_description='test parsing of svndiff0 files + +Using the "test-svn-fe -d" helper, check that svn-fe correctly +interprets deltas using various facilities (some from the spec, +some only learned from practice). +' +. ./test-lib.sh + +>empty +printf foo >preimage + +test_expect_success 'reject empty delta' ' + test_must_fail test-svn-fe -d preimage empty 0 +' + +test_expect_success 'delta can empty file' ' + printf "SVNQ" | q_to_nul >clear.delta && + test-svn-fe -d preimage clear.delta 4 >actual && + test_cmp empty actual +' + +test_expect_success 'reject svndiff2' ' + printf "SVN\002" >bad.filetype && + test_must_fail test-svn-fe -d preimage bad.filetype 4 +' + +test_expect_failure 'one-window empty delta' ' + printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow && + test-svn-fe -d preimage clear.onewindow 9 >actual && + test_cmp empty actual +' + +test_done -- cgit v1.2.1 From 252712111fad127db365e3dd764309fe5658679a Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:21:43 -0500 Subject: vcs-svn: parse svndiff0 window header Each window in a subversion delta (svndiff0-format file) starts with a window header, consisting of five integers with variable-length representation: source view offset source view length output length instructions length auxiliary data length Parse it. The result is not usable for deltas with nonempty postimage yet; in fact, this only adds support for deltas without any instructions or auxiliary data. This is a good place to stop, though, since that little support lets us add some simple passing tests concerning error handling to the test suite. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder --- t/t9011-svn-da.sh | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) (limited to 't/t9011-svn-da.sh') diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index ee0c1e2087..be1234094f 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -26,10 +26,64 @@ test_expect_success 'reject svndiff2' ' test_must_fail test-svn-fe -d preimage bad.filetype 4 ' -test_expect_failure 'one-window empty delta' ' +test_expect_success 'one-window empty delta' ' printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow && test-svn-fe -d preimage clear.onewindow 9 >actual && test_cmp empty actual ' +test_expect_success 'reject incomplete window header' ' + printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow && + printf "SVNQ%s" "QQ" | q_to_nul >clear.partialwindow && + test_must_fail test-svn-fe -d preimage clear.onewindow 6 && + test_must_fail test-svn-fe -d preimage clear.partialwindow 6 +' + +test_expect_success 'reject declared delta longer than actual delta' ' + printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow && + printf "SVNQ%s" "QQ" | q_to_nul >clear.partialwindow && + test_must_fail test-svn-fe -d preimage clear.onewindow 14 && + test_must_fail test-svn-fe -d preimage clear.partialwindow 9 +' + +test_expect_success 'two-window empty delta' ' + printf "SVNQ%s%s" "QQQQQ" "QQQQQ" | q_to_nul >clear.twowindow && + test-svn-fe -d preimage clear.twowindow 14 >actual && + test_must_fail test-svn-fe -d preimage clear.twowindow 13 && + test_cmp empty actual +' + +test_expect_success 'noisy zeroes' ' + printf "SVNQ%s" \ + "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRQQQQQ" | + tr R "\200" | + q_to_nul >clear.noisy && + len=$(wc -c clear.badmagic && + test_must_fail test-svn-fe -d preimage clear.badmagic 5 +' + +test_expect_success 'reject truncated integer' ' + printf "SVNQ%s%s" "QQQQQ" "QQQQRRQ" | + tr R "\200" | + q_to_nul >clear.fullint && + printf "SVNQ%s%s" "QQQQQ" "QQQQRR" | + tr RT "\201" | + q_to_nul >clear.partialint && + test_must_fail test-svn-fe -d preimage clear.fullint 15 && + test-svn-fe -d preimage clear.fullint 16 && + test_must_fail test-svn-fe -d preimage clear.partialint 15 +' + +test_expect_success 'nonempty (but unused) preimage view' ' + printf "SVNQ%b" "Q\003QQQ" | q_to_nul >clear.readpreimage && + test-svn-fe -d preimage clear.readpreimage 9 >actual && + test_cmp empty actual +' + test_done -- cgit v1.2.1 From bcd254621f9a98794cdc32906db10af7135824c4 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:30:37 -0500 Subject: vcs-svn: read the preimage when applying deltas The source view offset heading each svndiff0 window represents a number of bytes past the beginning of the preimage. Together with the source view length, it dictates to the delta applier what portion of the preimage instructions will refer to. Read that portion right away using the sliding window code. Maybe some day we will use mmap to read data more lazily. Subversion's implementation tolerates source view offsets pointing past the end of the preimage file but we do not, for simplicity. This does not teach the delta applier to read instructions or copy data from the source view. Deltas that could produce nonempty output will still be rejected. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- t/t9011-svn-da.sh | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 't/t9011-svn-da.sh') diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index be1234094f..90b6058ab6 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -86,4 +86,39 @@ test_expect_success 'nonempty (but unused) preimage view' ' test_cmp empty actual ' +test_expect_success 'preimage view: right endpoint cannot backtrack' ' + printf "SVNQ%b%b" "Q\003QQQ" "Q\002QQQ" | + q_to_nul >clear.backtrack && + test_must_fail test-svn-fe -d preimage clear.backtrack 14 +' + +test_expect_success 'preimage view: left endpoint can advance' ' + printf "SVNQ%b%b" "Q\003QQQ" "\001\002QQQ" | + q_to_nul >clear.preshrink && + printf "SVNQ%b%b" "Q\003QQQ" "\001\001QQQ" | + q_to_nul >clear.shrinkbacktrack && + test-svn-fe -d preimage clear.preshrink 14 >actual && + test_must_fail test-svn-fe -d preimage clear.shrinkbacktrack 14 && + test_cmp empty actual +' + +test_expect_success 'preimage view: offsets compared by value' ' + printf "SVNQ%b%b" "\001\001QQQ" "\0200Q\003QQQ" | + q_to_nul >clear.noisybacktrack && + printf "SVNQ%b%b" "\001\001QQQ" "\0200\001\002QQQ" | + q_to_nul >clear.noisyadvance && + test_must_fail test-svn-fe -d preimage clear.noisybacktrack 15 && + test-svn-fe -d preimage clear.noisyadvance 15 && + test_cmp empty actual +' + +test_expect_success 'preimage view: reject truncated preimage' ' + printf "SVNQ%b" "\010QQQQ" | q_to_nul >clear.lateemptyread && + printf "SVNQ%b" "\010\001QQQ" | q_to_nul >clear.latenonemptyread && + printf "SVNQ%b" "\001\010QQQ" | q_to_nul >clear.longread && + test_must_fail test-svn-fe -d preimage clear.lateemptyread 9 && + test_must_fail test-svn-fe -d preimage clear.latenonemptyread 9 && + test_must_fail test-svn-fe -d preimage clear.longread 9 +' + test_done -- cgit v1.2.1 From fc4ae43b2cbd53da6ac2a0047fb4e53175921696 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:35:59 -0500 Subject: vcs-svn: read inline data from deltas Each window of an svndiff0-format delta includes a section for novel text to be copied to the postimage (in the order it appears in the window, possibly interspersed with other data). Slurp in this data when encountering it. It is not actually necessary to do so --- it would be just as easy to copy from delta to output as part of interpreting the relevant instructions --- but this way, the code that interprets svndiff0 instructions can proceed very quickly because it does not require I/O. Subversion's svndiff0 parser rejects deltas that do not consume all the novel text that was provided. Omit that check for now so we can test the new functionality right away, rather than waiting to learn instructions that consume data. Do check for truncated data sections. Subversion's parser rejects deltas that end in the middle of a declared novel-text section, so it should be safe for us to reject them, too. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- t/t9011-svn-da.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 't/t9011-svn-da.sh') diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index 90b6058ab6..26a4a3694a 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -121,4 +121,16 @@ test_expect_success 'preimage view: reject truncated preimage' ' test_must_fail test-svn-fe -d preimage clear.longread 9 ' +test_expect_success 'inline data' ' + printf "SVNQ%b%s%b%s" "QQQQ\003" "bar" "QQQQ\001" "x" | + q_to_nul >inline.clear && + test-svn-fe -d preimage inline.clear 18 >actual && + test_cmp empty actual +' + +test_expect_success 'reject truncated inline data' ' + printf "SVNQ%b%s" "QQQQ\003" "b" | q_to_nul >inline.trunc && + test_must_fail test-svn-fe -d preimage inline.trunc 10 +' + test_done -- cgit v1.2.1 From ef2ac77e9f8f4819f75cf52721567463e60a805c Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:38:01 -0500 Subject: vcs-svn: read instructions from deltas Buffer the instruction section upon encountering it for later interpretation. An alternative design would involve parsing the instructions at this point and buffering them in some processed form. Using the unprocessed form is simpler. Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- t/t9011-svn-da.sh | 5 +++++ 1 file changed, 5 insertions(+) (limited to 't/t9011-svn-da.sh') diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index 26a4a3694a..d9acd0c8a6 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -133,4 +133,9 @@ test_expect_success 'reject truncated inline data' ' test_must_fail test-svn-fe -d preimage inline.trunc 10 ' +test_expect_success 'reject truncated inline data (after instruction section)' ' + printf "SVNQ%b%b%s" "QQ\001\001\003" "\0201" "b" | q_to_nul >insn.trunc && + test_must_fail test-svn-fe -d preimage insn.trunc 11 +' + test_done -- cgit v1.2.1 From ec71aa2e1f229b90092e6678ac7c2dca3d15b5f3 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:39:44 -0500 Subject: vcs-svn: implement copyfrom_data delta instruction The copyfrom_data instruction copies a few bytes verbatim from the novel text section of a window to the postimage. [jn: with memory leak fix from David] Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- t/t9011-svn-da.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 't/t9011-svn-da.sh') diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index d9acd0c8a6..ba8ce052aa 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -138,4 +138,35 @@ test_expect_success 'reject truncated inline data (after instruction section)' ' test_must_fail test-svn-fe -d preimage insn.trunc 11 ' +test_expect_success 'copyfrom_data' ' + echo hi >expect && + printf "SVNQ%b%b%b" "QQ\003\001\003" "\0203" "hi\n" | q_to_nul >copydat && + test-svn-fe -d preimage copydat 13 >actual && + test_cmp expect actual +' + +test_expect_success 'multiple copyfrom_data' ' + echo hi >expect && + printf "SVNQ%b%b%b%b%b" "QQ\003\002\003" "\0201\0202" "hi\n" \ + "QQQ\002Q" "\0200Q" | q_to_nul >copy.multi && + len=$(wc -c actual && + test_cmp expect actual +' + +test_expect_success 'incomplete multiple insn' ' + printf "SVNQ%b%b%b" "QQ\003\002\003" "\0203\0200" "hi\n" | + q_to_nul >copy.partial && + len=$(wc -c copy.incomplete && + len=$(wc -c Date: Wed, 13 Oct 2010 04:48:07 -0500 Subject: vcs-svn: verify that deltas consume all inline data By constraining the format of deltas, we can more easily detect corruption and other breakage. Requiring deltas not to provide unconsumed data also opens the possibility of ignoring the declared amount of novel data and simply streaming the data as needed to fulfill copyfrom_data requests. Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- t/t9011-svn-da.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 't/t9011-svn-da.sh') diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index ba8ce052aa..72691b9379 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -121,11 +121,10 @@ test_expect_success 'preimage view: reject truncated preimage' ' test_must_fail test-svn-fe -d preimage clear.longread 9 ' -test_expect_success 'inline data' ' +test_expect_success 'forbid unconsumed inline data' ' printf "SVNQ%b%s%b%s" "QQQQ\003" "bar" "QQQQ\001" "x" | q_to_nul >inline.clear && - test-svn-fe -d preimage inline.clear 18 >actual && - test_cmp empty actual + test_must_fail test-svn-fe -d preimage inline.clear 18 >actual ' test_expect_success 'reject truncated inline data' ' -- cgit v1.2.1 From d3f131b57ec0e69a37bca882fa6bf39aa4c1c387 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:50:07 -0500 Subject: vcs-svn: let deltas use data from postimage The copyfrom_target instruction copies appends data that is already present in the current output view to the end of output. (The offset argument is relative to the beginning of output produced in the current window.) The region copied is allowed to run past the end of the existing output. To support that case, copy one character at a time rather than calling memcpy or memmove. This allows copyfrom_target to be used once to repeat a string many times. For example: COPYFROM_DATA 2 COPYFROM_OUTPUT 10, 0 DATA "ab" would produce the output "ababababababababababab". Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- t/t9011-svn-da.sh | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 't/t9011-svn-da.sh') diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index 72691b9379..f9573a1c40 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -168,4 +168,46 @@ test_expect_success 'catch attempt to copy missing data' ' test_must_fail test-svn-fe -d preimage copy.incomplete $len ' +test_expect_success 'copyfrom target to repeat data' ' + printf foofoo >expect && + printf "SVNQ%b%b%s" "QQ\006\004\003" "\0203\0100\003Q" "foo" | + q_to_nul >copytarget.repeat && + len=$(wc -c actual && + test_cmp expect actual +' + +test_expect_success 'copyfrom target out of order' ' + printf foooof >expect && + printf "SVNQ%b%b%s" \ + "QQ\006\007\003" "\0203\0101\002\0101\001\0101Q" "foo" | + q_to_nul >copytarget.reverse && + len=$(wc -c actual && + test_cmp expect actual +' + +test_expect_success 'catch copyfrom future' ' + printf "SVNQ%b%b%s" "QQ\004\004\003" "\0202\0101\002\0201" "XYZ" | + q_to_nul >copytarget.infuture && + len=$(wc -c expect && + printf "SVNQ%b%b%s" "QQ\014\004\003" "\0202\0111Q\0201" "XYZ" | + q_to_nul >copytarget.sustain && + len=$(wc -c actual && + test_cmp expect actual +' + +test_expect_success 'catch copy that overflows' ' + printf "SVNQ%b%b%s" "QQ\003\003\001" "\0201\0177Q" X | + q_to_nul >copytarget.overflow && + len=$(wc -c Date: Wed, 13 Oct 2010 04:58:30 -0500 Subject: vcs-svn: let deltas use data from preimage The copyfrom_source instruction appends data from the preimage buffer to the end of output. Its arguments are a length and an offset relative to the beginning of the source view. With this change, the delta applier is able to reproduce all 5,636,613 blobs in the early history of the ASF repository. Tested with mkfifo backflow svn-fe backflow with svn-asf-public-r0:940166 produced by whatever version of Subversion the dumps in /dump/ on svn.apache.org use (presumably 1.6.something). Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- t/t9011-svn-da.sh | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 't/t9011-svn-da.sh') diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index f9573a1c40..b38d16f9db 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -210,4 +210,39 @@ test_expect_success 'catch copy that overflows' ' test_must_fail test-svn-fe -d preimage copytarget.overflow $len ' +test_expect_success 'copyfrom source' ' + printf foo >expect && + printf "SVNQ%b%b" "Q\003\003\002Q" "\003Q" | q_to_nul >copysource.all && + test-svn-fe -d preimage copysource.all 11 >actual && + test_cmp expect actual +' + +test_expect_success 'copy backwards' ' + printf oof >expect && + printf "SVNQ%b%b" "Q\003\003\006Q" "\001\002\001\001\001Q" | + q_to_nul >copysource.rev && + test-svn-fe -d preimage copysource.rev 15 >actual && + test_cmp expect actual +' + +test_expect_success 'offsets are relative to window' ' + printf fo >expect && + printf "SVNQ%b%b%b%b" "Q\003\001\002Q" "\001Q" \ + "\002\001\001\002Q" "\001Q" | + q_to_nul >copysource.two && + test-svn-fe -d preimage copysource.two 18 >actual && + test_cmp expect actual +' + +test_expect_success 'example from notes/svndiff' ' + printf aaaaccccdddddddd >expect && + printf aaaabbbbcccc >source && + printf "SVNQ%b%b%s" "Q\014\020\007\001" \ + "\004Q\004\010\0201\0107\010" d | + q_to_nul >delta.example && + len=$(wc -c actual && + test_cmp expect actual +' + test_done -- cgit v1.2.1