diff --git a/fuzz/generate_corpus.rs b/fuzz/generate_corpus.rs index a899e3b..97077d6 100644 --- a/fuzz/generate_corpus.rs +++ b/fuzz/generate_corpus.rs @@ -272,7 +272,9 @@ fn main() { .unwrap() .size(0) .unwrap(); - builder.add_pax("SCHILY.xattr.user.test", b"value123"); + builder + .add_pax("SCHILY.xattr.user.test", b"value123") + .unwrap(); let hdr = builder.finish_bytes(); let mut archive = hdr; archive.extend_from_slice(&EOA); @@ -292,10 +294,12 @@ fn main() { .unwrap() .mtime(9999999999) .unwrap(); - builder.add_pax( - "SCHILY.xattr.security.selinux", - b"system_u:object_r:usr_t:s0", - ); + builder + .add_pax( + "SCHILY.xattr.security.selinux", + b"system_u:object_r:usr_t:s0", + ) + .unwrap(); let hdr = builder.finish_bytes(); let mut archive = hdr; archive.extend_from_slice(b"payload"); @@ -512,7 +516,7 @@ fn main() { .unwrap() .mtime(1700000000) .unwrap(); - builder.add_pax("mtime", b"1700000000.123456789"); + builder.add_pax("mtime", b"1700000000.123456789").unwrap(); let hdr = builder.finish_bytes(); let mut archive = hdr; archive.extend_from_slice(&EOA); diff --git a/src/builder.rs b/src/builder.rs index 87407cc..27cf465 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -863,11 +863,23 @@ impl EntryBuilder { /// Add a custom PAX extension record. /// /// This is useful for adding metadata that doesn't fit in standard - /// header fields. The PAX extension will be emitted regardless of - /// the extension mode setting. - pub fn add_pax(&mut self, key: &str, value: impl AsRef<[u8]>) -> &mut Self { + /// header fields (e.g. extended attributes, high-precision timestamps). + /// + /// # Errors + /// + /// Returns [`HeaderError::IncompatibleMode`] if the extension mode is + /// [`ExtensionMode::Gnu`]. PAX records are only emitted in PAX mode; + /// call [`Self::set_extension_mode`] first or use [`Self::new_ustar`] + /// to create a PAX-mode builder. + pub fn add_pax(&mut self, key: &str, value: impl AsRef<[u8]>) -> Result<&mut Self> { + if self.mode != ExtensionMode::Pax { + return Err(HeaderError::IncompatibleMode { + required: ExtensionMode::Pax, + current: self.mode, + }); + } self.pax_mut().add(key, value); - self + Ok(self) } /// Get or create the PAX builder for this entry. @@ -1861,6 +1873,7 @@ mod tests { .size(0) .unwrap() .add_pax("SCHILY.xattr.user.test", b"value") + .unwrap() .entry_type(EntryType::Regular); assert!(builder.needs_extension()); // Due to custom PAX @@ -1876,6 +1889,40 @@ mod tests { assert!(pax_str.contains("SCHILY.xattr.user.test=value")); } + #[test] + fn test_add_pax_rejects_gnu_mode() { + let mut builder = EntryBuilder::new_gnu(); + let result = builder.add_pax("SCHILY.xattr.user.test", b"value"); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("Pax"), + "error should mention Pax mode: {err}" + ); + } + + #[test] + fn test_add_pax_after_mode_switch() { + // Switching from GNU to PAX mode should allow add_pax(). + let mut builder = EntryBuilder::new_gnu(); + assert!(builder.add_pax("key", b"val").is_err()); + + builder.set_extension_mode(ExtensionMode::Pax); + builder + .path(b"file.txt") + .mode(0o644) + .unwrap() + .size(0) + .unwrap() + .add_pax("SCHILY.xattr.user.test", b"value") + .unwrap(); + + let blocks = builder.finish(); + // Should have PAX 'x' header + PAX data + main header + assert!(blocks.len() >= 3); + assert_eq!(blocks[0].entry_type(), EntryType::XHeader); + } + #[test] fn test_entry_builder_extension_mode_switching() { let long_path = "x/".repeat(60); diff --git a/src/lib.rs b/src/lib.rs index c09f4d1..85a7c71 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,6 +94,15 @@ pub enum HeaderError { detail: String, }, + /// Operation requires a different extension mode. + #[error("operation requires {required:?} extension mode, but builder is in {current:?} mode")] + IncompatibleMode { + /// The mode the operation requires. + required: crate::builder::ExtensionMode, + /// The mode the builder is currently in. + current: crate::builder::ExtensionMode, + }, + /// The header checksum does not match the computed value. #[error("checksum mismatch: expected {expected}, computed {computed}")] ChecksumMismatch { @@ -1662,6 +1671,13 @@ pub const PAX_MTIME: &str = "mtime"; pub const PAX_ATIME: &str = "atime"; /// PAX extended header key for change time. pub const PAX_CTIME: &str = "ctime"; + +// TODO: POSIX defines `hdrcharset` and `charset` PAX keys for signaling +// non-UTF-8 header values and file data encoding respectively. No major +// implementation (GNU tar, Go archive/tar, Rust tar crate) acts on these +// in practice -- they all just accept raw bytes. We should add support if +// interoperability requires it. + /// PAX extended header prefix for SCHILY extended attributes. pub const PAX_SCHILY_XATTR: &str = "SCHILY.xattr."; @@ -2272,6 +2288,18 @@ mod tests { assert_eq!(pax.get("path"), Some("日本語/ファイル.txt")); } + #[test] + fn test_pax_non_utf8_value() { + // PAX values may contain non-UTF-8 bytes in practice. + // value() should fail but value_bytes() should preserve them. + let record = b"25 path=etc/\x80\xfe\xff/file.txt\n"; + let mut pax = PaxExtensions::new(record); + let ext = pax.next().unwrap().unwrap(); + assert_eq!(ext.key().unwrap(), "path"); + assert!(ext.value().is_err(), "non-UTF-8 value should fail value()"); + assert_eq!(ext.value_bytes(), b"etc/\x80\xfe\xff/file.txt"); + } + #[test] fn test_pax_mtime_fractional() { // PAX mtime can have fractional seconds @@ -4084,7 +4112,8 @@ mod tests { .mtime(params.mtime) .unwrap() .entry_type(EntryType::Regular) - .add_pax(¶ms.xattr_key, params.xattr_value.as_bytes()); + .add_pax(¶ms.xattr_key, params.xattr_value.as_bytes()) + .unwrap(); builder.finish() } diff --git a/src/parse.rs b/src/parse.rs index a4d3541..28e9ef2 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -2004,6 +2004,93 @@ mod tests { } } + #[test] + #[allow(invalid_from_utf8)] + fn test_parser_pax_non_utf8_path() { + // Non-UTF-8 bytes in PAX path values are accepted, matching the + // pragmatic behavior of Go's archive/tar and the Rust tar crate. + // The POSIX spec says PAX path values SHOULD be UTF-8, but real-world + // archives (e.g. from Docker/BuildKit) may contain non-UTF-8 paths. + // See bootc-dev/bootc#2073 for a concrete example. + let non_utf8_path: &[u8] = b"etc/ssl/certs/F\xf5tan\xfas\xedtv\xe1ny.pem"; + assert!( + core::str::from_utf8(non_utf8_path).is_err(), + "test data must be non-UTF-8" + ); + + let mut archive = Vec::new(); + archive.extend(make_pax_header(&[("path", non_utf8_path)])); + archive.extend_from_slice(&make_header(b"placeholder.pem", 0, b'0')); + archive.extend(zeroes(1024)); + + let mut parser = Parser::new(Limits::default()); + let event = parser.parse(&archive).unwrap(); + + match event { + ParseEvent::Entry { entry, .. } => { + assert_eq!(entry.path.as_ref(), non_utf8_path); + // The lossy accessor should replace invalid bytes + let lossy = entry.path_lossy(); + assert!( + lossy.contains('\u{FFFD}'), + "lossy conversion should have replacement chars" + ); + } + other => panic!("Expected Entry, got {:?}", other), + } + } + + #[test] + fn test_pax_non_utf8_path_roundtrip() { + // Verify that a non-UTF-8 PAX path survives a builder -> parser + // roundtrip. The path must exceed 100 bytes to trigger PAX emission. + use crate::builder::EntryBuilder; + + // 101+ byte path with non-UTF-8 bytes embedded + let mut long_path = + b"a/very/deep/directory/structure/that/needs/to/exceed/one/hundred/bytes/\xf0\xf1\xf2/" + .to_vec(); + long_path.extend(b"and/some/more/nested/dirs/to/be/safe/file.bin"); + assert!(long_path.len() > 100, "path must exceed 100 bytes"); + assert!( + core::str::from_utf8(&long_path).is_err(), + "path must contain non-UTF-8" + ); + + let mut builder = EntryBuilder::new_ustar(); + builder + .path(&long_path) + .mode(0o644) + .unwrap() + .size(5) + .unwrap() + .mtime(0) + .unwrap() + .uid(0) + .unwrap() + .gid(0) + .unwrap(); + + let mut archive = Vec::new(); + archive.extend_from_slice(&builder.finish_bytes()); + // 5 bytes of content, padded to 512 + let mut content_block = [0u8; 512]; + content_block[..5].copy_from_slice(b"hello"); + archive.extend_from_slice(&content_block); + archive.extend(zeroes(1024)); + + let mut parser = Parser::new(Limits::default()); + let event = parser.parse(&archive).unwrap(); + + match event { + ParseEvent::Entry { entry, .. } => { + assert_eq!(entry.path.as_ref(), long_path.as_slice()); + assert_eq!(entry.size, 5); + } + other => panic!("Expected Entry, got {:?}", other), + } + } + #[test] fn test_parser_pax_size_override() { // PAX header should override the size in the actual header @@ -2186,6 +2273,29 @@ mod tests { } } + #[test] + #[allow(invalid_from_utf8)] + fn test_parser_pax_non_utf8_linkpath() { + // Non-UTF-8 bytes in PAX linkpath values should be preserved. + let non_utf8_target: &[u8] = b"targets/\xc0\xc1invalid.so"; + assert!(core::str::from_utf8(non_utf8_target).is_err()); + + let mut archive = Vec::new(); + archive.extend(make_pax_header(&[("linkpath", non_utf8_target)])); + archive.extend_from_slice(&make_header(b"link.so", 0, b'2')); // symlink + archive.extend(zeroes(1024)); + + let mut parser = Parser::new(Limits::default()); + let event = parser.parse(&archive).unwrap(); + + match event { + ParseEvent::Entry { entry, .. } => { + assert_eq!(entry.link_target.as_deref(), Some(non_utf8_target.as_ref())); + } + other => panic!("Expected Entry, got {:?}", other), + } + } + // ========================================================================= // PAX global header tests // ========================================================================= @@ -2453,6 +2563,79 @@ mod tests { } } + #[test] + fn test_parser_pax_before_gnu_long_name() { + // PAX 'x' -> GNU 'L' -> real entry: this is what tar-rs's builder + // produces when you call append_pax_extensions() (e.g. for xattrs) + // followed by append_data() with a long path. The PAX metadata + // should still be associated with the real entry, and PAX path + // (if present) should take precedence over the GNU long name. + // + // This ordering matters for ecosystem compatibility with bootc + // (see bootc-dev/bootc#2073). + let gnu_name = + "gnu/long/name/that/exceeds/one/hundred/bytes/".to_string() + &"g".repeat(60); + let xattr_value = b"some xattr value"; + + let mut archive = Vec::new(); + // PAX header first (with xattr but no path -- simulating bootc's + // copy_entry which strips path/linkpath from PAX) + archive.extend(make_pax_header(&[( + "SCHILY.xattr.user.test", + xattr_value.as_slice(), + )])); + // GNU long name second + archive.extend(make_gnu_long_name(gnu_name.as_bytes())); + // Real entry last + archive.extend_from_slice(&make_header(b"placeholder", 0, b'0')); + archive.extend(zeroes(1024)); + + let mut parser = Parser::new(Limits::default()); + let event = parser.parse(&archive).unwrap(); + + match event { + ParseEvent::Entry { entry, .. } => { + // GNU long name should be used (no PAX path to override it) + assert_eq!(entry.path.as_ref(), gnu_name.as_bytes()); + // PAX xattr should still be preserved + assert!(entry.pax.is_some()); + let pax = PaxExtensions::new(entry.pax.unwrap()); + let xattr = pax + .filter_map(|e| e.ok()) + .find(|e| e.key_bytes().starts_with(b"SCHILY.xattr.")); + assert!(xattr.is_some(), "xattr should be preserved"); + assert_eq!(xattr.unwrap().value_bytes(), xattr_value); + } + other => panic!("Expected Entry, got {:?}", other), + } + } + + #[test] + fn test_parser_pax_path_overrides_gnu_long_name_reversed_order() { + // Same as test_parser_combined_gnu_pax but with reversed ordering: + // PAX 'x' (with path) -> GNU 'L' -> real entry. + // PAX path should still win regardless of order. + let gnu_name = "gnu/long/name/".to_string() + &"g".repeat(100); + let pax_path = "pax/should/still/win/file.txt"; + + let mut archive = Vec::new(); + // PAX first this time (reversed from test_parser_combined_gnu_pax) + archive.extend(make_pax_header(&[("path", pax_path.as_bytes())])); + archive.extend(make_gnu_long_name(gnu_name.as_bytes())); + archive.extend_from_slice(&make_header(b"header.txt", 0, b'0')); + archive.extend(zeroes(1024)); + + let mut parser = Parser::new(Limits::default()); + let event = parser.parse(&archive).unwrap(); + + match event { + ParseEvent::Entry { entry, .. } => { + assert_eq!(entry.path.as_ref(), pax_path.as_bytes()); + } + other => panic!("Expected Entry, got {:?}", other), + } + } + #[test] fn test_parser_gnu_long_name_and_link_combined() { // Both GNU long name and long link for the same entry