Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions fuzz/generate_corpus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,9 @@ fn main() {
.unwrap()
.size(0)
.unwrap();
builder.add_pax("SCHILY.xattr.user.test", b"value123");
builder
.add_pax("SCHILY.xattr.user.test", b"value123")
.unwrap();
let hdr = builder.finish_bytes();
let mut archive = hdr;
archive.extend_from_slice(&EOA);
Expand All @@ -292,10 +294,12 @@ fn main() {
.unwrap()
.mtime(9999999999)
.unwrap();
builder.add_pax(
"SCHILY.xattr.security.selinux",
b"system_u:object_r:usr_t:s0",
);
builder
.add_pax(
"SCHILY.xattr.security.selinux",
b"system_u:object_r:usr_t:s0",
)
.unwrap();
let hdr = builder.finish_bytes();
let mut archive = hdr;
archive.extend_from_slice(b"payload");
Expand Down Expand Up @@ -512,7 +516,7 @@ fn main() {
.unwrap()
.mtime(1700000000)
.unwrap();
builder.add_pax("mtime", b"1700000000.123456789");
builder.add_pax("mtime", b"1700000000.123456789").unwrap();
let hdr = builder.finish_bytes();
let mut archive = hdr;
archive.extend_from_slice(&EOA);
Expand Down
55 changes: 51 additions & 4 deletions src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -863,11 +863,23 @@ impl EntryBuilder {
/// Add a custom PAX extension record.
///
/// This is useful for adding metadata that doesn't fit in standard
/// header fields. The PAX extension will be emitted regardless of
/// the extension mode setting.
pub fn add_pax(&mut self, key: &str, value: impl AsRef<[u8]>) -> &mut Self {
/// header fields (e.g. extended attributes, high-precision timestamps).
///
/// # Errors
///
/// Returns [`HeaderError::IncompatibleMode`] if the extension mode is
/// [`ExtensionMode::Gnu`]. PAX records are only emitted in PAX mode;
/// call [`Self::set_extension_mode`] first or use [`Self::new_ustar`]
/// to create a PAX-mode builder.
pub fn add_pax(&mut self, key: &str, value: impl AsRef<[u8]>) -> Result<&mut Self> {
if self.mode != ExtensionMode::Pax {
return Err(HeaderError::IncompatibleMode {
required: ExtensionMode::Pax,
current: self.mode,
});
}
self.pax_mut().add(key, value);
self
Ok(self)
}

/// Get or create the PAX builder for this entry.
Expand Down Expand Up @@ -1861,6 +1873,7 @@ mod tests {
.size(0)
.unwrap()
.add_pax("SCHILY.xattr.user.test", b"value")
.unwrap()
.entry_type(EntryType::Regular);

assert!(builder.needs_extension()); // Due to custom PAX
Expand All @@ -1876,6 +1889,40 @@ mod tests {
assert!(pax_str.contains("SCHILY.xattr.user.test=value"));
}

#[test]
fn test_add_pax_rejects_gnu_mode() {
let mut builder = EntryBuilder::new_gnu();
let result = builder.add_pax("SCHILY.xattr.user.test", b"value");
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
err.to_string().contains("Pax"),
"error should mention Pax mode: {err}"
);
}

#[test]
fn test_add_pax_after_mode_switch() {
// Switching from GNU to PAX mode should allow add_pax().
let mut builder = EntryBuilder::new_gnu();
assert!(builder.add_pax("key", b"val").is_err());

builder.set_extension_mode(ExtensionMode::Pax);
builder
.path(b"file.txt")
.mode(0o644)
.unwrap()
.size(0)
.unwrap()
.add_pax("SCHILY.xattr.user.test", b"value")
.unwrap();

let blocks = builder.finish();
// Should have PAX 'x' header + PAX data + main header
assert!(blocks.len() >= 3);
assert_eq!(blocks[0].entry_type(), EntryType::XHeader);
}

#[test]
fn test_entry_builder_extension_mode_switching() {
let long_path = "x/".repeat(60);
Expand Down
31 changes: 30 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,15 @@ pub enum HeaderError {
detail: String,
},

/// Operation requires a different extension mode.
#[error("operation requires {required:?} extension mode, but builder is in {current:?} mode")]
IncompatibleMode {
/// The mode the operation requires.
required: crate::builder::ExtensionMode,
/// The mode the builder is currently in.
current: crate::builder::ExtensionMode,
},

/// The header checksum does not match the computed value.
#[error("checksum mismatch: expected {expected}, computed {computed}")]
ChecksumMismatch {
Expand Down Expand Up @@ -1662,6 +1671,13 @@ pub const PAX_MTIME: &str = "mtime";
pub const PAX_ATIME: &str = "atime";
/// PAX extended header key for change time.
pub const PAX_CTIME: &str = "ctime";

// TODO: POSIX defines `hdrcharset` and `charset` PAX keys for signaling
// non-UTF-8 header values and file data encoding respectively. No major
// implementation (GNU tar, Go archive/tar, Rust tar crate) acts on these
// in practice -- they all just accept raw bytes. We should add support if
// interoperability requires it.

/// PAX extended header prefix for SCHILY extended attributes.
pub const PAX_SCHILY_XATTR: &str = "SCHILY.xattr.";

Expand Down Expand Up @@ -2272,6 +2288,18 @@ mod tests {
assert_eq!(pax.get("path"), Some("日本語/ファイル.txt"));
}

#[test]
fn test_pax_non_utf8_value() {
// PAX values may contain non-UTF-8 bytes in practice.
// value() should fail but value_bytes() should preserve them.
let record = b"25 path=etc/\x80\xfe\xff/file.txt\n";
let mut pax = PaxExtensions::new(record);
let ext = pax.next().unwrap().unwrap();
assert_eq!(ext.key().unwrap(), "path");
assert!(ext.value().is_err(), "non-UTF-8 value should fail value()");
assert_eq!(ext.value_bytes(), b"etc/\x80\xfe\xff/file.txt");
}

#[test]
fn test_pax_mtime_fractional() {
// PAX mtime can have fractional seconds
Expand Down Expand Up @@ -4084,7 +4112,8 @@ mod tests {
.mtime(params.mtime)
.unwrap()
.entry_type(EntryType::Regular)
.add_pax(&params.xattr_key, params.xattr_value.as_bytes());
.add_pax(&params.xattr_key, params.xattr_value.as_bytes())
.unwrap();

builder.finish()
}
Expand Down
183 changes: 183 additions & 0 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2004,6 +2004,93 @@ mod tests {
}
}

#[test]
#[allow(invalid_from_utf8)]
fn test_parser_pax_non_utf8_path() {
// Non-UTF-8 bytes in PAX path values are accepted, matching the
// pragmatic behavior of Go's archive/tar and the Rust tar crate.
// The POSIX spec says PAX path values SHOULD be UTF-8, but real-world
// archives (e.g. from Docker/BuildKit) may contain non-UTF-8 paths.
// See bootc-dev/bootc#2073 for a concrete example.
let non_utf8_path: &[u8] = b"etc/ssl/certs/F\xf5tan\xfas\xedtv\xe1ny.pem";
assert!(
core::str::from_utf8(non_utf8_path).is_err(),
"test data must be non-UTF-8"
);

let mut archive = Vec::new();
archive.extend(make_pax_header(&[("path", non_utf8_path)]));
archive.extend_from_slice(&make_header(b"placeholder.pem", 0, b'0'));
archive.extend(zeroes(1024));

let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();

match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.path.as_ref(), non_utf8_path);
// The lossy accessor should replace invalid bytes
let lossy = entry.path_lossy();
assert!(
lossy.contains('\u{FFFD}'),
"lossy conversion should have replacement chars"
);
}
other => panic!("Expected Entry, got {:?}", other),
}
}

#[test]
fn test_pax_non_utf8_path_roundtrip() {
// Verify that a non-UTF-8 PAX path survives a builder -> parser
// roundtrip. The path must exceed 100 bytes to trigger PAX emission.
use crate::builder::EntryBuilder;

// 101+ byte path with non-UTF-8 bytes embedded
let mut long_path =
b"a/very/deep/directory/structure/that/needs/to/exceed/one/hundred/bytes/\xf0\xf1\xf2/"
.to_vec();
long_path.extend(b"and/some/more/nested/dirs/to/be/safe/file.bin");
assert!(long_path.len() > 100, "path must exceed 100 bytes");
assert!(
core::str::from_utf8(&long_path).is_err(),
"path must contain non-UTF-8"
);

let mut builder = EntryBuilder::new_ustar();
builder
.path(&long_path)
.mode(0o644)
.unwrap()
.size(5)
.unwrap()
.mtime(0)
.unwrap()
.uid(0)
.unwrap()
.gid(0)
.unwrap();

let mut archive = Vec::new();
archive.extend_from_slice(&builder.finish_bytes());
// 5 bytes of content, padded to 512
let mut content_block = [0u8; 512];
content_block[..5].copy_from_slice(b"hello");
archive.extend_from_slice(&content_block);
archive.extend(zeroes(1024));

let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();

match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.path.as_ref(), long_path.as_slice());
assert_eq!(entry.size, 5);
}
other => panic!("Expected Entry, got {:?}", other),
}
}

#[test]
fn test_parser_pax_size_override() {
// PAX header should override the size in the actual header
Expand Down Expand Up @@ -2186,6 +2273,29 @@ mod tests {
}
}

#[test]
#[allow(invalid_from_utf8)]
fn test_parser_pax_non_utf8_linkpath() {
// Non-UTF-8 bytes in PAX linkpath values should be preserved.
let non_utf8_target: &[u8] = b"targets/\xc0\xc1invalid.so";
assert!(core::str::from_utf8(non_utf8_target).is_err());

let mut archive = Vec::new();
archive.extend(make_pax_header(&[("linkpath", non_utf8_target)]));
archive.extend_from_slice(&make_header(b"link.so", 0, b'2')); // symlink
archive.extend(zeroes(1024));

let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();

match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.link_target.as_deref(), Some(non_utf8_target.as_ref()));
}
other => panic!("Expected Entry, got {:?}", other),
}
}

// =========================================================================
// PAX global header tests
// =========================================================================
Expand Down Expand Up @@ -2453,6 +2563,79 @@ mod tests {
}
}

#[test]
fn test_parser_pax_before_gnu_long_name() {
// PAX 'x' -> GNU 'L' -> real entry: this is what tar-rs's builder
// produces when you call append_pax_extensions() (e.g. for xattrs)
// followed by append_data() with a long path. The PAX metadata
// should still be associated with the real entry, and PAX path
// (if present) should take precedence over the GNU long name.
//
// This ordering matters for ecosystem compatibility with bootc
// (see bootc-dev/bootc#2073).
let gnu_name =
"gnu/long/name/that/exceeds/one/hundred/bytes/".to_string() + &"g".repeat(60);
let xattr_value = b"some xattr value";

let mut archive = Vec::new();
// PAX header first (with xattr but no path -- simulating bootc's
// copy_entry which strips path/linkpath from PAX)
archive.extend(make_pax_header(&[(
"SCHILY.xattr.user.test",
xattr_value.as_slice(),
)]));
// GNU long name second
archive.extend(make_gnu_long_name(gnu_name.as_bytes()));
// Real entry last
archive.extend_from_slice(&make_header(b"placeholder", 0, b'0'));
archive.extend(zeroes(1024));

let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();

match event {
ParseEvent::Entry { entry, .. } => {
// GNU long name should be used (no PAX path to override it)
assert_eq!(entry.path.as_ref(), gnu_name.as_bytes());
// PAX xattr should still be preserved
assert!(entry.pax.is_some());
let pax = PaxExtensions::new(entry.pax.unwrap());
let xattr = pax
.filter_map(|e| e.ok())
.find(|e| e.key_bytes().starts_with(b"SCHILY.xattr."));
assert!(xattr.is_some(), "xattr should be preserved");
assert_eq!(xattr.unwrap().value_bytes(), xattr_value);
}
other => panic!("Expected Entry, got {:?}", other),
}
}

#[test]
fn test_parser_pax_path_overrides_gnu_long_name_reversed_order() {
// Same as test_parser_combined_gnu_pax but with reversed ordering:
// PAX 'x' (with path) -> GNU 'L' -> real entry.
// PAX path should still win regardless of order.
let gnu_name = "gnu/long/name/".to_string() + &"g".repeat(100);
let pax_path = "pax/should/still/win/file.txt";

let mut archive = Vec::new();
// PAX first this time (reversed from test_parser_combined_gnu_pax)
archive.extend(make_pax_header(&[("path", pax_path.as_bytes())]));
archive.extend(make_gnu_long_name(gnu_name.as_bytes()));
archive.extend_from_slice(&make_header(b"header.txt", 0, b'0'));
archive.extend(zeroes(1024));

let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();

match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.path.as_ref(), pax_path.as_bytes());
}
other => panic!("Expected Entry, got {:?}", other),
}
}

#[test]
fn test_parser_gnu_long_name_and_link_combined() {
// Both GNU long name and long link for the same entry
Expand Down
Loading