diff options
author | Pietro Albini <pietro@pietroalbini.org> | 2020-12-07 15:00:51 +0100 |
---|---|---|
committer | Pietro Albini <pietro@pietroalbini.org> | 2020-12-10 16:25:32 +0100 |
commit | a16646f77e9bf6b77f4e2300581804fbb2cfdb12 (patch) | |
tree | cd78b09a4f83d710d35aa833443fa20e65096bad | |
parent | bb072fe8e77e0533d256a47a0e36d001d6fb18e5 (diff) | |
download | rust-installer-a16646f77e9bf6b77f4e2300581804fbb2cfdb12.tar.gz |
compression: support a dynamic list of output formats
-rw-r--r-- | src/compression.rs | 102 | ||||
-rw-r--r-- | src/lib.rs | 1 | ||||
-rw-r--r-- | src/tarballer.rs | 68 |
3 files changed, 114 insertions, 57 deletions
diff --git a/src/compression.rs b/src/compression.rs new file mode 100644 index 0000000..949c0a2 --- /dev/null +++ b/src/compression.rs @@ -0,0 +1,102 @@ +use anyhow::{Context, Error}; +use flate2::write::GzEncoder; +use rayon::prelude::*; +use std::{io::Write, path::Path}; +use xz2::write::XzEncoder; + +pub(crate) enum CompressionFormat { + Gz, + Xz, +} + +impl CompressionFormat { + pub(crate) fn encode(&self, path: impl AsRef<Path>) -> Result<Box<dyn Encoder>, Error> { + let extension = match self { + CompressionFormat::Gz => ".gz", + CompressionFormat::Xz => ".xz", + }; + let mut os = path.as_ref().as_os_str().to_os_string(); + os.push(extension); + let path = Path::new(&os); + + if path.exists() { + crate::util::remove_file(path)?; + } + let file = crate::util::create_new_file(path)?; + + Ok(match self { + CompressionFormat::Gz => Box::new(GzEncoder::new(file, flate2::Compression::best())), + CompressionFormat::Xz => { + // Note that preset 6 takes about 173MB of memory per thread, so we limit the number of + // threads to not blow out 32-bit hosts. (We could be more precise with + // `MtStreamBuilder::memusage()` if desired.) + let stream = xz2::stream::MtStreamBuilder::new() + .threads(Ord::min(num_cpus::get(), 8) as u32) + .preset(6) + .encoder()?; + Box::new(XzEncoder::new_stream(file, stream)) + } + }) + } +} + +pub(crate) trait Encoder: Send + Write { + fn finish(self: Box<Self>) -> Result<(), Error>; +} + +impl<W: Send + Write> Encoder for GzEncoder<W> { + fn finish(self: Box<Self>) -> Result<(), Error> { + GzEncoder::finish(*self).context("failed to finish .gz file")?; + Ok(()) + } +} + +impl<W: Send + Write> Encoder for XzEncoder<W> { + fn finish(self: Box<Self>) -> Result<(), Error> { + XzEncoder::finish(*self).context("failed to finish .xz file")?; + Ok(()) + } +} + +pub(crate) struct CombinedEncoder { + encoders: Vec<Box<dyn Encoder>>, +} + +impl CombinedEncoder { + pub(crate) fn new(encoders: Vec<Box<dyn Encoder>>) -> Box<dyn Encoder> { + Box::new(Self { encoders }) + } +} + +impl Write for CombinedEncoder { + fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { + self.write_all(buf)?; + Ok(buf.len()) + } + + fn write_all(&mut self, buf: &[u8]) -> std::io::Result<()> { + self.encoders + .par_iter_mut() + .map(|w| w.write_all(buf)) + .collect::<std::io::Result<Vec<()>>>()?; + Ok(()) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.encoders + .par_iter_mut() + .map(|w| w.flush()) + .collect::<std::io::Result<Vec<()>>>()?; + Ok(()) + } +} + +impl Encoder for CombinedEncoder { + fn finish(self: Box<Self>) -> Result<(), Error> { + self.encoders + .into_par_iter() + .map(|e| e.finish()) + .collect::<Result<Vec<()>, Error>>()?; + Ok(()) + } +} @@ -2,6 +2,7 @@ mod util; mod combiner; +mod compression; mod generator; mod scripter; mod tarballer; diff --git a/src/tarballer.rs b/src/tarballer.rs index 42a4ffa..c23cac1 100644 --- a/src/tarballer.rs +++ b/src/tarballer.rs @@ -1,13 +1,11 @@ use anyhow::{bail, Context, Result}; -use flate2::write::GzEncoder; use std::fs::{read_link, symlink_metadata}; -use std::io::{self, empty, BufWriter, Write}; +use std::io::{empty, BufWriter, Write}; use std::path::Path; use tar::{Builder, Header}; use walkdir::WalkDir; -use xz2::write::XzEncoder; -use crate::util::*; +use crate::{compression::CombinedEncoder, compression::CompressionFormat, util::*}; actor! { #[derive(Debug)] @@ -26,15 +24,11 @@ actor! { impl Tarballer { /// Generates the actual tarballs pub fn run(self) -> Result<()> { - let tar_gz = self.output.clone() + ".tar.gz"; - let tar_xz = self.output.clone() + ".tar.xz"; - - // Remove any existing files. - for file in &[&tar_gz, &tar_xz] { - if Path::new(file).exists() { - remove_file(file)?; - } - } + let tarball_name = self.output.clone() + ".tar"; + let encoder = CombinedEncoder::new(vec![ + CompressionFormat::Gz.encode(&tarball_name)?, + CompressionFormat::Xz.encode(&tarball_name)?, + ]); // Sort files by their suffix, to group files with the same name from // different locations (likely identical) and files with the same @@ -43,22 +37,9 @@ impl Tarballer { .context("failed to collect file paths")?; files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev())); - // Prepare the `.tar.gz` file. - let gz = GzEncoder::new(create_new_file(tar_gz)?, flate2::Compression::best()); - - // Prepare the `.tar.xz` file. Note that preset 6 takes about 173MB of memory - // per thread, so we limit the number of threads to not blow out 32-bit hosts. - // (We could be more precise with `MtStreamBuilder::memusage()` if desired.) - let stream = xz2::stream::MtStreamBuilder::new() - .threads(Ord::min(num_cpus::get(), 8) as u32) - .preset(6) - .encoder()?; - let xz = XzEncoder::new_stream(create_new_file(tar_xz)?, stream); - // Write the tar into both encoded files. We write all directories // first, so files may be directly created. (See rust-lang/rustup.rs#1092.) - let tee = RayonTee(xz, gz); - let buf = BufWriter::with_capacity(1024 * 1024, tee); + let buf = BufWriter::with_capacity(1024 * 1024, encoder); let mut builder = Builder::new(buf); let pool = rayon::ThreadPoolBuilder::new() @@ -77,20 +58,14 @@ impl Tarballer { append_path(&mut builder, &src, &path) .with_context(|| format!("failed to tar file '{}'", src.display()))?; } - let RayonTee(xz, gz) = builder + builder .into_inner() .context("failed to finish writing .tar stream")? .into_inner() .ok() - .unwrap(); + .unwrap() + .finish()?; - // Finish both encoded files. - let (rxz, rgz) = rayon::join( - || xz.finish().context("failed to finish .tar.xz file"), - || gz.finish().context("failed to finish .tar.gz file"), - ); - rxz?; - rgz?; Ok(()) }) } @@ -154,24 +129,3 @@ where } Ok((dirs, files)) } - -struct RayonTee<A, B>(A, B); - -impl<A: Write + Send, B: Write + Send> Write for RayonTee<A, B> { - fn write(&mut self, buf: &[u8]) -> io::Result<usize> { - self.write_all(buf)?; - Ok(buf.len()) - } - - fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { - let (a, b) = (&mut self.0, &mut self.1); - let (ra, rb) = rayon::join(|| a.write_all(buf), || b.write_all(buf)); - ra.and(rb) - } - - fn flush(&mut self) -> io::Result<()> { - let (a, b) = (&mut self.0, &mut self.1); - let (ra, rb) = rayon::join(|| a.flush(), || b.flush()); - ra.and(rb) - } -} |