use anyhow::{Context, Error}; use flate2::{read::GzDecoder, write::GzEncoder}; use rayon::prelude::*; use std::{convert::TryFrom, fmt, io::Read, io::Write, path::Path, str::FromStr}; use xz2::{read::XzDecoder, write::XzEncoder}; #[derive(Debug, Copy, Clone)] pub enum CompressionFormat { Gz, Xz, } impl CompressionFormat { pub(crate) fn detect_from_path(path: impl AsRef) -> Option { match path.as_ref().extension().and_then(|e| e.to_str()) { Some("gz") => Some(CompressionFormat::Gz), Some("xz") => Some(CompressionFormat::Xz), _ => None, } } pub(crate) fn extension(&self) -> &'static str { match self { CompressionFormat::Gz => "gz", CompressionFormat::Xz => "xz", } } pub(crate) fn encode(&self, path: impl AsRef) -> Result, Error> { let mut os = path.as_ref().as_os_str().to_os_string(); os.push(format!(".{}", self.extension())); let path = Path::new(&os); if path.exists() { crate::util::remove_file(path)?; } let file = crate::util::create_new_file(path)?; Ok(match self { CompressionFormat::Gz => Box::new(GzEncoder::new(file, flate2::Compression::best())), CompressionFormat::Xz => { // Note that preset 6 takes about 173MB of memory per thread, so we limit the number of // threads to not blow out 32-bit hosts. (We could be more precise with // `MtStreamBuilder::memusage()` if desired.) let stream = xz2::stream::MtStreamBuilder::new() .threads(Ord::min(num_cpus::get(), 8) as u32) .preset(6) .encoder()?; Box::new(XzEncoder::new_stream(file, stream)) } }) } pub(crate) fn decode(&self, path: impl AsRef) -> Result, Error> { let file = crate::util::open_file(path.as_ref())?; Ok(match self { CompressionFormat::Gz => Box::new(GzDecoder::new(file)), CompressionFormat::Xz => Box::new(XzDecoder::new(file)), }) } } /// This struct wraps Vec in order to parse the value from the command line. #[derive(Debug, Clone)] pub struct CompressionFormats(Vec); impl TryFrom<&'_ str> for CompressionFormats { type Error = Error; fn try_from(value: &str) -> Result { let mut parsed = Vec::new(); for format in value.split(',') { match format.trim() { "gz" => parsed.push(CompressionFormat::Gz), "xz" => parsed.push(CompressionFormat::Xz), other => anyhow::bail!("unknown compression format: {}", other), } } Ok(CompressionFormats(parsed)) } } impl FromStr for CompressionFormats { type Err = Error; fn from_str(value: &str) -> Result { Self::try_from(value) } } impl fmt::Display for CompressionFormats { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for (i, format) in self.iter().enumerate() { if i != 0 { write!(f, ",")?; } fmt::Display::fmt(match format { CompressionFormat::Xz => "xz", CompressionFormat::Gz => "gz", }, f)?; } Ok(()) } } impl Default for CompressionFormats { fn default() -> Self { Self(vec![CompressionFormat::Gz, CompressionFormat::Xz]) } } impl CompressionFormats { pub(crate) fn iter(&self) -> impl Iterator + '_ { self.0.iter().map(|i| *i) } } pub(crate) trait Encoder: Send + Write { fn finish(self: Box) -> Result<(), Error>; } impl Encoder for GzEncoder { fn finish(self: Box) -> Result<(), Error> { GzEncoder::finish(*self).context("failed to finish .gz file")?; Ok(()) } } impl Encoder for XzEncoder { fn finish(self: Box) -> Result<(), Error> { XzEncoder::finish(*self).context("failed to finish .xz file")?; Ok(()) } } pub(crate) struct CombinedEncoder { encoders: Vec>, } impl CombinedEncoder { pub(crate) fn new(encoders: Vec>) -> Box { Box::new(Self { encoders }) } } impl Write for CombinedEncoder { fn write(&mut self, buf: &[u8]) -> std::io::Result { self.write_all(buf)?; Ok(buf.len()) } fn write_all(&mut self, buf: &[u8]) -> std::io::Result<()> { self.encoders .par_iter_mut() .map(|w| w.write_all(buf)) .collect::>>()?; Ok(()) } fn flush(&mut self) -> std::io::Result<()> { self.encoders .par_iter_mut() .map(|w| w.flush()) .collect::>>()?; Ok(()) } } impl Encoder for CombinedEncoder { fn finish(self: Box) -> Result<(), Error> { self.encoders .into_par_iter() .map(|e| e.finish()) .collect::, Error>>()?; Ok(()) } }