diff --git a/src/archive.rs b/src/archive.rs index 4d569c63..ad9a16ec 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -9,10 +9,10 @@ use std::path::Path; use crate::entry::{EntryFields, EntryIo}; use crate::error::TarError; -use crate::header::BLOCK_SIZE; +use crate::header::{SparseEntry, BLOCK_SIZE}; use crate::other; use crate::pax::*; -use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; +use crate::{Entry, GnuExtSparseHeader, Header}; /// A top-level representation of an archive file. /// @@ -282,6 +282,7 @@ impl<'a, R: Read> Iterator for Entries<'a, R> { } } +#[allow(unused_assignments)] impl<'a> EntriesFields<'a> { fn next_entry_raw( &mut self, @@ -430,26 +431,68 @@ impl<'a> EntriesFields<'a> { )); } pax_extensions = Some(EntryFields::from(entry).read_all()?); + // This entry has two headers. + // Keep pax_extensions for the next ustar header. + processed -= 1; continue; } let mut fields = EntryFields::from(entry); - fields.long_pathname = gnu_longname; - fields.long_linkname = gnu_longlink; fields.pax_extensions = pax_extensions; + // False positive: unused assignment + // https://github.com/rust-lang/rust/issues/22630 + pax_extensions = None; // Reset pax_extensions after use + fields.long_pathname = if is_recognized_header && fields.is_pax_sparse() { + fields.pax_sparse_name() + } else { + gnu_longname + }; + fields.long_linkname = gnu_longlink; self.parse_sparse_header(&mut fields)?; return Ok(Some(fields.into_entry())); } } fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { - if !entry.header.entry_type().is_gnu_sparse() { + if !entry.is_pax_sparse() && !entry.header.entry_type().is_gnu_sparse() { return Ok(()); } - let gnu = match entry.header.as_gnu() { - Some(gnu) => gnu, - None => return Err(other("sparse entry type listed but not GNU header")), - }; + let mut sparse_map = Vec::::new(); + let mut real_size = 0; + if entry.is_pax_sparse() { + real_size = entry.pax_sparse_realsize()?; + let mut num_bytes_read = 0; + let mut reader = io::BufReader::with_capacity(BLOCK_SIZE as usize, &self.archive.inner); + let mut read_decimal_line = || -> io::Result { + let mut str = String::new(); + num_bytes_read += reader.read_line(&mut str)?; + str.strip_suffix("\n") + .and_then(|s| s.parse::().ok()) + .ok_or_else(|| other("failed to read a decimal line")) + }; + + let num_entries = read_decimal_line()?; + for _ in 0..num_entries { + let offset = read_decimal_line()?; + let size = read_decimal_line()?; + sparse_map.push(SparseEntry { offset, size }); + } + let rem = BLOCK_SIZE as usize - (num_bytes_read % BLOCK_SIZE as usize); + entry.size -= (num_bytes_read + rem) as u64; + } else if entry.header.entry_type().is_gnu_sparse() { + let gnu = match entry.header.as_gnu() { + Some(gnu) => gnu, + None => return Err(other("sparse entry type listed but not GNU header")), + }; + real_size = gnu.real_size()?; + for block in gnu.sparse.iter() { + if !block.is_empty() { + let offset = block.offset()?; + let size = block.length()?; + sparse_map.push(SparseEntry { offset, size }); + } + } + } // Sparse files are represented internally as a list of blocks that are // read. Blocks are either a bunch of 0's or they're data from the @@ -478,12 +521,7 @@ impl<'a> EntriesFields<'a> { let data = &mut entry.data; let reader = &self.archive.inner; let size = entry.size; - let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { - if block.is_empty() { - return Ok(()); - } - let off = block.offset()?; - let len = block.length()?; + let mut add_block = |off: u64, len: u64| -> io::Result<_> { if len != 0 && (size - remaining) % BLOCK_SIZE != 0 { return Err(other( "previous block in sparse file was not \ @@ -510,10 +548,10 @@ impl<'a> EntriesFields<'a> { data.push(EntryIo::Data(reader.take(len))); Ok(()) }; - for block in gnu.sparse.iter() { - add_block(block)? + for block in sparse_map { + add_block(block.offset, block.size)? } - if gnu.is_extended() { + if entry.header.as_gnu().map(|gnu| gnu.is_extended()) == Some(true) { let mut ext = GnuExtSparseHeader::new(); ext.isextended[0] = 1; while ext.is_extended() { @@ -523,12 +561,14 @@ impl<'a> EntriesFields<'a> { self.next += BLOCK_SIZE; for block in ext.sparse.iter() { - add_block(block)?; + if !block.is_empty() { + add_block(block.offset()?, block.length()?)?; + } } } } } - if cur != gnu.real_size()? { + if cur != real_size { return Err(other( "mismatch in sparse file chunks and \ size in header", diff --git a/src/entry.rs b/src/entry.rs index b6b48b47..9187276b 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -13,6 +13,7 @@ use crate::archive::ArchiveInner; use crate::error::TarError; use crate::header::bytes2path; use crate::other; +use crate::pax::{GNU_SPARSE_MAJOR_EXTENSION, GNU_SPARSE_MINOR_EXTENSION}; use crate::{Archive, Header, PaxExtensions}; /// A read-only view into an entry of an archive. @@ -300,6 +301,47 @@ impl<'a> EntryFields<'a> { self.read_to_end(&mut v).map(|_| v) } + /// Check if the tar file is using PAX sparse extensions. + pub fn is_pax_sparse(&mut self) -> bool { + if let Some(ref pax) = self.pax_extensions { + let mut extensions = PaxExtensions::new(pax).filter_map(|f| f.ok()); + return extensions + .find(|f| *f == GNU_SPARSE_MAJOR_EXTENSION) + .is_some() + && extensions + .find(|f| *f == GNU_SPARSE_MINOR_EXTENSION) + .is_some(); + } + false + } + + pub fn pax_sparse_name(&mut self) -> Option> { + if let Some(ref pax) = self.pax_extensions { + return PaxExtensions::new(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"GNU.sparse.name") + .map(|f| f.value_bytes().to_vec()); + } + None + } + + pub fn pax_sparse_realsize(&mut self) -> io::Result { + if let Some(ref pax) = self.pax_extensions { + let pax = PaxExtensions::new(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"GNU.sparse.realsize") + .map(|f| f.value_bytes()); + if let Some(field) = pax { + let str = + std::str::from_utf8(&field).map_err(|_| other("failed to read string"))?; + return str + .parse::() + .map_err(|_| other("failed to parse the real size")); + } + } + Err(other("PAX extension GNU.sparse.realsize not found")) + } + fn path(&self) -> io::Result> { bytes2path(self.path_bytes()) } diff --git a/src/header.rs b/src/header.rs index 0c577b9a..562f02ac 100644 --- a/src/header.rs +++ b/src/header.rs @@ -124,6 +124,12 @@ pub struct GnuHeader { pub pad: [u8; 17], } +/// Description of a spare entry. +pub struct SparseEntry { + pub offset: u64, + pub size: u64, +} + /// Description of the header of a spare entry. /// /// Specifies the offset/number of bytes of a chunk of data in octal. diff --git a/src/pax.rs b/src/pax.rs index d1494282..95367a37 100644 --- a/src/pax.rs +++ b/src/pax.rs @@ -56,11 +56,24 @@ impl<'entry> PaxExtensions<'entry> { } /// A key/value pair corresponding to a pax extension. +#[derive(PartialEq)] pub struct PaxExtension<'entry> { key: &'entry [u8], value: &'entry [u8], } +/// Constant of the GNU sparse major extension. +pub const GNU_SPARSE_MAJOR_EXTENSION: PaxExtension<'_> = PaxExtension { + key: b"GNU.sparse.major", + value: b"1", +}; + +/// Constant of the GNU sparse minor extension. +pub const GNU_SPARSE_MINOR_EXTENSION: PaxExtension<'_> = PaxExtension { + key: b"GNU.sparse.minor", + value: b"0", +}; + pub fn pax_extensions_value(a: &[u8], key: &str) -> Option { for extension in PaxExtensions::new(a) { let current_extension = match extension { diff --git a/tests/all.rs b/tests/all.rs index 0ad67f98..e75bb67f 100644 --- a/tests/all.rs +++ b/tests/all.rs @@ -1285,6 +1285,22 @@ fn sparse_with_trailing() { assert_eq!(&s[0x100_000..], "1MB through\n"); } +#[test] +fn pax_sparse() { + let rdr = Cursor::new(tar!("pax_sparse.tar")); + let mut ar = Archive::new(rdr); + let td = TempBuilder::new().prefix("tar-rs").tempdir().unwrap(); + ar.unpack(td.path()).unwrap(); + + let mut s = String::new(); + File::open(td.path().join("sparse_begin.txt")) + .unwrap() + .read_to_string(&mut s) + .unwrap(); + assert_eq!(&s[..5], "test\n"); + assert!(s[5..].chars().all(|x| x == '\u{0}')); +} + #[test] fn writing_sparse() { let mut ar = Builder::new(Vec::new()); diff --git a/tests/archives/pax_sparse.tar b/tests/archives/pax_sparse.tar new file mode 100644 index 00000000..d74bef7b Binary files /dev/null and b/tests/archives/pax_sparse.tar differ