-
Notifications
You must be signed in to change notification settings - Fork 236
Add support for PAX Format, Version 1.0 #298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
2f5ea4f
95d44da
3340fc2
23684c8
827a3a0
d08f7df
16d4dbd
8ad8efd
880f591
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,10 +9,10 @@ use std::path::Path; | |
|
|
||
| use crate::entry::{EntryFields, EntryIo}; | ||
| use crate::error::TarError; | ||
| use crate::header::BLOCK_SIZE; | ||
| use crate::header::{SparseEntry, BLOCK_SIZE}; | ||
| use crate::other; | ||
| use crate::pax::*; | ||
| use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; | ||
| use crate::{Entry, GnuExtSparseHeader, Header}; | ||
|
|
||
| /// A top-level representation of an archive file. | ||
| /// | ||
|
|
@@ -282,6 +282,7 @@ impl<'a, R: Read> Iterator for Entries<'a, R> { | |
| } | ||
| } | ||
|
|
||
| #[allow(unused_assignments)] | ||
| impl<'a> EntriesFields<'a> { | ||
| fn next_entry_raw( | ||
| &mut self, | ||
|
|
@@ -430,26 +431,68 @@ impl<'a> EntriesFields<'a> { | |
| )); | ||
| } | ||
| pax_extensions = Some(EntryFields::from(entry).read_all()?); | ||
| // This entry has two headers. | ||
| // Keep pax_extensions for the next ustar header. | ||
| processed -= 1; | ||
| continue; | ||
| } | ||
|
|
||
| let mut fields = EntryFields::from(entry); | ||
| fields.long_pathname = gnu_longname; | ||
| fields.long_linkname = gnu_longlink; | ||
| fields.pax_extensions = pax_extensions; | ||
| // False positive: unused assignment | ||
| // https://github.com/rust-lang/rust/issues/22630 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this has been fixed so we should be able to drop the assignment.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't have extra assignments here.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we address this by doing: ? |
||
| pax_extensions = None; // Reset pax_extensions after use | ||
| fields.long_pathname = if is_recognized_header && fields.is_pax_sparse() { | ||
| fields.pax_sparse_name() | ||
| } else { | ||
| gnu_longname | ||
| }; | ||
| fields.long_linkname = gnu_longlink; | ||
| self.parse_sparse_header(&mut fields)?; | ||
| return Ok(Some(fields.into_entry())); | ||
| } | ||
| } | ||
|
|
||
| fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { | ||
| if !entry.header.entry_type().is_gnu_sparse() { | ||
| if !entry.is_pax_sparse() && !entry.header.entry_type().is_gnu_sparse() { | ||
| return Ok(()); | ||
| } | ||
| let gnu = match entry.header.as_gnu() { | ||
| Some(gnu) => gnu, | ||
| None => return Err(other("sparse entry type listed but not GNU header")), | ||
| }; | ||
| let mut sparse_map = Vec::<SparseEntry>::new(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One of the main goals I tried to keep for the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so. Here we need to convert strings to numbers and the number of pairs is not fixed. |
||
| let mut real_size = 0; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Something about this doesn't feel quite right because
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure what you mean. It is set in both branches, line 428 and 452.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could do though: |
||
| if entry.is_pax_sparse() { | ||
| real_size = entry.pax_sparse_realsize()?; | ||
| let mut num_bytes_read = 0; | ||
| let mut reader = io::BufReader::with_capacity(BLOCK_SIZE as usize, &self.archive.inner); | ||
| let mut read_decimal_line = || -> io::Result<u64> { | ||
| let mut str = String::new(); | ||
| num_bytes_read += reader.read_line(&mut str)?; | ||
|
cgwalters marked this conversation as resolved.
|
||
| str.strip_suffix("\n") | ||
| .and_then(|s| s.parse::<u64>().ok()) | ||
| .ok_or_else(|| other("failed to read a decimal line")) | ||
| }; | ||
|
|
||
| let num_entries = read_decimal_line()?; | ||
| for _ in 0..num_entries { | ||
| let offset = read_decimal_line()?; | ||
| let size = read_decimal_line()?; | ||
| sparse_map.push(SparseEntry { offset, size }); | ||
| } | ||
| let rem = BLOCK_SIZE as usize - (num_bytes_read % BLOCK_SIZE as usize); | ||
| entry.size -= (num_bytes_read + rem) as u64; | ||
| } else if entry.header.entry_type().is_gnu_sparse() { | ||
| let gnu = match entry.header.as_gnu() { | ||
| Some(gnu) => gnu, | ||
| None => return Err(other("sparse entry type listed but not GNU header")), | ||
| }; | ||
| real_size = gnu.real_size()?; | ||
| for block in gnu.sparse.iter() { | ||
| if !block.is_empty() { | ||
| let offset = block.offset()?; | ||
| let size = block.length()?; | ||
| sparse_map.push(SparseEntry { offset, size }); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Sparse files are represented internally as a list of blocks that are | ||
| // read. Blocks are either a bunch of 0's or they're data from the | ||
|
|
@@ -478,12 +521,7 @@ impl<'a> EntriesFields<'a> { | |
| let data = &mut entry.data; | ||
| let reader = &self.archive.inner; | ||
| let size = entry.size; | ||
| let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { | ||
| if block.is_empty() { | ||
| return Ok(()); | ||
| } | ||
| let off = block.offset()?; | ||
| let len = block.length()?; | ||
| let mut add_block = |off: u64, len: u64| -> io::Result<_> { | ||
| if len != 0 && (size - remaining) % BLOCK_SIZE != 0 { | ||
| return Err(other( | ||
| "previous block in sparse file was not \ | ||
|
|
@@ -510,10 +548,10 @@ impl<'a> EntriesFields<'a> { | |
| data.push(EntryIo::Data(reader.take(len))); | ||
| Ok(()) | ||
| }; | ||
| for block in gnu.sparse.iter() { | ||
| add_block(block)? | ||
| for block in sparse_map { | ||
| add_block(block.offset, block.size)? | ||
| } | ||
| if gnu.is_extended() { | ||
| if entry.header.as_gnu().map(|gnu| gnu.is_extended()) == Some(true) { | ||
| let mut ext = GnuExtSparseHeader::new(); | ||
| ext.isextended[0] = 1; | ||
| while ext.is_extended() { | ||
|
|
@@ -523,12 +561,14 @@ impl<'a> EntriesFields<'a> { | |
|
|
||
| self.next += BLOCK_SIZE; | ||
| for block in ext.sparse.iter() { | ||
| add_block(block)?; | ||
| if !block.is_empty() { | ||
| add_block(block.offset()?, block.length()?)?; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| if cur != gnu.real_size()? { | ||
| if cur != real_size { | ||
| return Err(other( | ||
| "mismatch in sparse file chunks and \ | ||
| size in header", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ use crate::archive::ArchiveInner; | |
| use crate::error::TarError; | ||
| use crate::header::bytes2path; | ||
| use crate::other; | ||
| use crate::pax::{GNU_SPARSE_MAJOR_EXTENSION, GNU_SPARSE_MINOR_EXTENSION}; | ||
| use crate::{Archive, Header, PaxExtensions}; | ||
|
|
||
| /// A read-only view into an entry of an archive. | ||
|
|
@@ -300,6 +301,47 @@ impl<'a> EntryFields<'a> { | |
| self.read_to_end(&mut v).map(|_| v) | ||
| } | ||
|
|
||
| /// Check if the tar file is using PAX sparse extensions. | ||
| pub fn is_pax_sparse(&mut self) -> bool { | ||
|
ncihnegn marked this conversation as resolved.
|
||
| if let Some(ref pax) = self.pax_extensions { | ||
| let mut extensions = PaxExtensions::new(pax).filter_map(|f| f.ok()); | ||
| return extensions | ||
| .find(|f| *f == GNU_SPARSE_MAJOR_EXTENSION) | ||
| .is_some() | ||
| && extensions | ||
| .find(|f| *f == GNU_SPARSE_MINOR_EXTENSION) | ||
| .is_some(); | ||
| } | ||
| false | ||
| } | ||
|
|
||
| pub fn pax_sparse_name(&mut self) -> Option<Vec<u8>> { | ||
| if let Some(ref pax) = self.pax_extensions { | ||
| return PaxExtensions::new(pax) | ||
| .filter_map(|f| f.ok()) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not a big fan of "swallowing" errors like this, my preference would be to make this function return
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand. To propagate the error using |
||
| .find(|f| f.key_bytes() == b"GNU.sparse.name") | ||
| .map(|f| f.value_bytes().to_vec()); | ||
| } | ||
| None | ||
| } | ||
|
|
||
| pub fn pax_sparse_realsize(&mut self) -> io::Result<u64> { | ||
| if let Some(ref pax) = self.pax_extensions { | ||
| let pax = PaxExtensions::new(pax) | ||
| .filter_map(|f| f.ok()) | ||
| .find(|f| f.key_bytes() == b"GNU.sparse.realsize") | ||
| .map(|f| f.value_bytes()); | ||
| if let Some(field) = pax { | ||
| let str = | ||
| std::str::from_utf8(&field).map_err(|_| other("failed to read string"))?; | ||
| return str | ||
| .parse::<u64>() | ||
| .map_err(|_| other("failed to parse the real size")); | ||
| } | ||
| } | ||
| Err(other("PAX extension GNU.sparse.realsize not found")) | ||
| } | ||
|
|
||
| fn path(&self) -> io::Result<Cow<Path>> { | ||
| bytes2path(self.path_bytes()) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -124,6 +124,12 @@ pub struct GnuHeader { | |
| pub pad: [u8; 17], | ||
| } | ||
|
|
||
| /// Description of a spare entry. | ||
| pub struct SparseEntry { | ||
| pub offset: u64, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's also document the fields please
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Offset and size names are self explanatory.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's definitely true, but I think there's a general principle here that everything In some other crates I maintain we use deny(missing_docs). |
||
| pub size: u64, | ||
| } | ||
|
|
||
| /// Description of the header of a spare entry. | ||
| /// | ||
| /// Specifies the offset/number of bytes of a chunk of data in octal. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1285,6 +1285,22 @@ fn sparse_with_trailing() { | |
| assert_eq!(&s[0x100_000..], "1MB through\n"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn pax_sparse() { | ||
| let rdr = Cursor::new(tar!("pax_sparse.tar")); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Post the xz fiasco let's be a bit more sensitive about committing binary data to git. Can you add the script that generates this at least? Or probably better honestly for tests, just assume we have a working external |
||
| let mut ar = Archive::new(rdr); | ||
| let td = TempBuilder::new().prefix("tar-rs").tempdir().unwrap(); | ||
| ar.unpack(td.path()).unwrap(); | ||
|
|
||
| let mut s = String::new(); | ||
| File::open(td.path().join("sparse_begin.txt")) | ||
| .unwrap() | ||
| .read_to_string(&mut s) | ||
| .unwrap(); | ||
| assert_eq!(&s[..5], "test\n"); | ||
| assert!(s[5..].chars().all(|x| x == '\u{0}')); | ||
| } | ||
|
|
||
| #[test] | ||
| fn writing_sparse() { | ||
| let mut ar = Builder::new(Vec::new()); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hopefully we can drop this now
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, rustc 1.87.0-nightly still complains.