summaryrefslogtreecommitdiff
path: root/rust
diff options
context:
space:
mode:
authorColin Walters <walters@verbum.org>2017-01-24 21:43:53 -0500
committerAtomic Bot <atomic-devel@projectatomic.io>2017-02-03 14:29:00 +0000
commitd894f609dbacd6d66aaff0402cab60a59901f73e (patch)
tree899a2ddd63656b2e2915d97effdf989bb606dcbd /rust
parent7803fe1d60f32f659555acf32a8812a45ab15792 (diff)
downloadostree-d894f609dbacd6d66aaff0402cab60a59901f73e.tar.gz
oxidation: Add implementation of bupsplit in Rust
This is an initial drop of "oxidation", or adding implementation of components in Rust. The bupsplit code is a good target - no dependencies, just computation. Translation into Rust had a few twists - - The C code relies a lot on overflowing unsigned ints, and also on the C promotion rules for e.g. `uint8_t -> int32_t` - There were some odd loops that I introduced bugs in while translating...in particular, the function always returns `len`, but I mistakenly translated to `len+1`, resulting in an OOB read on the C side, which was hard to debug. On the plus side, an off-by-one array indexing in the Rust code paniced nicely. In practice, we'll need a lot more build infrastructure to make this work, such as using `cargo vendor` when producing build artifacts for example. Also, Cargo is yet another thing we need to cache. Where do we go with this? Well, I think we should merge this, it's not a lot of code. We can just have it be an alternative CI target. Should we do a lot more right now? Probably not immediately, but I find the medium/long term prospects pretty exciting! Closes: #656 Approved by: jlebon
Diffstat (limited to 'rust')
-rw-r--r--rust/Cargo.toml16
-rw-r--r--rust/src/bupsplit.rs129
2 files changed, 145 insertions, 0 deletions
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
new file mode 100644
index 00000000..4da5ac32
--- /dev/null
+++ b/rust/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "bupsplit"
+version = "0.0.1"
+authors = ["Colin Walters <walters@verbum.org>"]
+
+[dependencies]
+libc = "0.2"
+
+[lib]
+name = "bupsplit_rs"
+path = "src/bupsplit.rs"
+crate-type = ["staticlib"]
+
+[profile.release]
+panic = "abort"
+lto = true
diff --git a/rust/src/bupsplit.rs b/rust/src/bupsplit.rs
new file mode 100644
index 00000000..af97e968
--- /dev/null
+++ b/rust/src/bupsplit.rs
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2017 Colin Walters <walters@verbum.org>
+ * Based on original bupsplit.c:
+ * Copyright 2011 Avery Pennarun. All rights reserved.
+ *
+ * (This license applies to bupsplit.c and bupsplit.h only.)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AVERY PENNARUN ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+extern crate libc;
+
+use std::slice;
+
+// According to librsync/rollsum.h:
+// "We should make this something other than zero to improve the
+// checksum algorithm: tridge suggests a prime number."
+// apenwarr: I unscientifically tried 0 and 7919, and they both ended up
+// slightly worse than the librsync value of 31 for my arbitrary test data.
+const ROLLSUM_CHAR_OFFSET: u32 = 31;
+
+// Previously in the header file
+const BUP_BLOBBITS: u32= 13;
+const BUP_BLOBSIZE: u32 = (1<<BUP_BLOBBITS);
+const BUP_WINDOWBITS: u32 = 7;
+const BUP_WINDOWSIZE: u32 = (1<<(BUP_WINDOWBITS-1));
+
+struct Rollsum {
+ s1: u32,
+ s2: u32,
+ window: [u8; BUP_WINDOWSIZE as usize],
+ wofs: i32,
+}
+
+impl Rollsum {
+ pub fn new() -> Rollsum {
+ Rollsum { s1: BUP_WINDOWSIZE * ROLLSUM_CHAR_OFFSET,
+ s2: BUP_WINDOWSIZE * (BUP_WINDOWSIZE-1) * ROLLSUM_CHAR_OFFSET,
+ window: [0; 64],
+ wofs: 0
+ }
+ }
+
+ // These formulas are based on rollsum.h in the librsync project.
+ pub fn add(&mut self, drop: u8, add: u8) -> () {
+ let drop_expanded = drop as u32;
+ let add_expanded = add as u32;
+ self.s1 = self.s1.wrapping_add(add_expanded.wrapping_sub(drop_expanded));
+ self.s2 = self.s2.wrapping_add(self.s1.wrapping_sub(BUP_WINDOWSIZE * (drop_expanded + ROLLSUM_CHAR_OFFSET)));
+ }
+
+ pub fn roll(&mut self, ch: u8) -> () {
+ let wofs = self.wofs as usize;
+ let dval = self.window[wofs];
+ self.add(dval, ch);
+ self.window[wofs] = ch;
+ self.wofs = (self.wofs + 1) % (BUP_WINDOWSIZE as i32);
+ }
+
+ pub fn digest(&self) -> u32 {
+ (self.s1 << 16) | (self.s2 & 0xFFFF)
+ }
+}
+
+#[no_mangle]
+pub extern fn bupsplit_sum(buf: *const u8, ofs: libc::size_t, len: libc::size_t) -> u32 {
+ let sbuf = unsafe {
+ assert!(!buf.is_null());
+ slice::from_raw_parts(buf.offset(ofs as isize), (len - ofs) as usize)
+ };
+
+ let mut r = Rollsum::new();
+ for x in sbuf {
+ r.roll(*x);
+ }
+ r.digest()
+}
+
+#[no_mangle]
+pub extern fn bupsplit_find_ofs(buf: *const u8, len: libc::size_t,
+ bits: *mut libc::c_int) -> libc::c_int
+{
+ let sbuf = unsafe {
+ assert!(!buf.is_null());
+ slice::from_raw_parts(buf, len as usize)
+ };
+
+ let mut r = Rollsum::new();
+ for x in sbuf {
+ r.roll(*x);
+ if (r.s2 & (BUP_BLOBSIZE-1)) == ((u32::max_value()) & (BUP_BLOBSIZE-1)) {
+ if !bits.is_null() {
+ let mut sum = r.digest() >> BUP_BLOBBITS;
+ let mut rbits : libc::c_int = BUP_BLOBBITS as i32;
+ while sum & 1 != 0 {
+ sum = sum >> 1;
+ rbits = rbits + 1;
+ }
+ unsafe {
+ *bits = rbits;
+ }
+ }
+ return len as i32
+ }
+ }
+ 0
+}