From 3510b8ce814cf2838df15975688b1e96acb8501c Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 17 Mar 2026 18:06:48 +0000 Subject: [PATCH 1/2] dumpfile: Canonicalize parsed entries per composefs-dump(5) The composefs-dump(5) spec leaves several fields unspecified or explicitly ignored. Canonicalize them at parse time so that parsed entries have a single canonical representation regardless of which implementation produced them: - **Directory sizes**: "This is ignored for directories." Drop the size field from Item::Directory, always emit 0. - **Hardlink metadata**: "We ignore all the fields except the payload." Zero uid/gid/mode/mtime and skip xattrs, matching the C parser which bails out early (mkcomposefs.c:477-491). - **Xattr ordering**: The spec doesn't define an order. Sort lexicographically so output is deterministic regardless of on-disk ordering. The parser still accepts any input values for backward compatibility. Assisted-by: OpenCode (Claude Opus 4) Signed-off-by: Colin Walters --- crates/composefs/src/dumpfile.rs | 6 +- crates/composefs/src/dumpfile_parse.rs | 114 ++++++++++++++---- crates/composefs/src/erofs/reader.rs | 82 +++++++------ .../composefs/src/tests/assets/special.dump | 4 +- 4 files changed, 138 insertions(+), 68 deletions(-) diff --git a/crates/composefs/src/dumpfile.rs b/crates/composefs/src/dumpfile.rs index 88159bcc..ad6c645e 100644 --- a/crates/composefs/src/dumpfile.rs +++ b/crates/composefs/src/dumpfile.rs @@ -561,7 +561,7 @@ mod tests { use super::*; use crate::fsverity::Sha256HashValue; - const SIMPLE_DUMP: &str = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + const SIMPLE_DUMP: &str = r#"/ 0 40755 2 0 0 0 1000.0 - - - /empty_file 0 100644 1 0 0 0 1000.0 - - - /small_file 5 100644 1 0 0 0 1000.0 - hello - /symlink 7 120777 1 0 0 0 1000.0 /target - - @@ -592,10 +592,10 @@ mod tests { // The nlink/uid/gid/rdev fields on hardlink lines use `-` here, // matching the C composefs writer convention. The parser must // accept these without trying to parse them as integers. - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /original 11 100644 2 0 0 0 1000.0 - hello_world - /hardlink1 0 @120000 - - - - 0.0 /original - - -/dir1 4096 40755 2 0 0 0 1000.0 - - - +/dir1 0 40755 2 0 0 0 1000.0 - - - /dir1/hardlink2 0 @120000 - - - - 0.0 /original - - "#; diff --git a/crates/composefs/src/dumpfile_parse.rs b/crates/composefs/src/dumpfile_parse.rs index ea4902c3..7e3e436b 100644 --- a/crates/composefs/src/dumpfile_parse.rs +++ b/crates/composefs/src/dumpfile_parse.rs @@ -33,7 +33,7 @@ const XATTR_LIST_MAX: usize = u16::MAX as usize; // See above const XATTR_SIZE_MAX: usize = u16::MAX as usize; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] /// An extended attribute entry pub struct Xattr<'k> { /// key @@ -122,8 +122,6 @@ pub enum Item<'p> { }, /// A directory Directory { - /// Size of a directory is not necessarily meaningful - size: u64, /// Number of links nlink: u32, }, @@ -375,16 +373,14 @@ impl<'p> Entry<'p> { (false, u32::from_str_radix(modeval, 8)?) }; - // For hardlinks, the C parser skips the remaining numeric fields - // (nlink, uid, gid, rdev, mtime) and only reads the payload (target - // path). We match that: consume the tokens without parsing them as - // integers, so values like `-` are accepted. + // Per composefs-dump(5): for hardlinks "we ignore all the fields + // except the payload." The C parser does the same (mkcomposefs.c + // bails out early). Skip everything and zero ignored fields. if is_hardlink { let ty = FileType::from_raw_mode(mode); if ty == FileType::Directory { anyhow::bail!("Invalid hardlinked directory"); } - // Skip nlink, uid, gid, rdev, mtime for field in ["nlink", "uid", "gid", "rdev", "mtime"] { next(field)?; } @@ -395,7 +391,7 @@ impl<'p> Entry<'p> { path, uid: 0, gid: 0, - mode, + mode: 0, mtime: Mtime { sec: 0, nsec: 0 }, item: Item::Hardlink { target }, xattrs: Vec::new(), @@ -410,7 +406,7 @@ impl<'p> Entry<'p> { let payload = optional_str(next("payload")?); let content = optional_str(next("content")?); let fsverity_digest = optional_str(next("digest")?); - let xattrs = components + let mut xattrs = components .try_fold((Vec::new(), 0usize), |(mut acc, total_namelen), line| { let xattr = Xattr::parse(line)?; // Limit the total length of keys. @@ -422,6 +418,10 @@ impl<'p> Entry<'p> { Ok((acc, total_namelen)) })? .0; + // Canonicalize xattr ordering — the composefs-dump(5) spec doesn't + // define an order, and different implementations emit them differently + // (C uses EROFS on-disk order, Rust uses BTreeMap order). + xattrs.sort(); let ty = FileType::from_raw_mode(mode); let item = { @@ -474,8 +474,9 @@ impl<'p> Entry<'p> { FileType::Directory => { Self::check_nonregfile(content, fsverity_digest)?; Self::check_rdev(rdev)?; - - Item::Directory { size, nlink } + // Per composefs-dump(5): "SIZE: The size of the file. + // This is ignored for directories." We discard it. + Item::Directory { nlink } } FileType::Socket => { anyhow::bail!("sockets are not supported"); @@ -512,8 +513,10 @@ impl<'p> Entry<'p> { impl Item<'_> { pub(crate) fn size(&self) -> u64 { match self { - Item::Regular { size, .. } | Item::Directory { size, .. } => *size, + Item::Regular { size, .. } => *size, Item::RegularInline { content, .. } => content.len() as u64, + // Directories always report 0; the spec says size is ignored. + Item::Directory { .. } => 0, _ => 0, } } @@ -556,9 +559,14 @@ impl Display for Mtime { impl Display for Entry<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { escape(f, self.path.as_os_str().as_bytes(), EscapeMode::Standard)?; + let hardlink_prefix = if matches!(self.item, Item::Hardlink { .. }) { + "@" + } else { + "" + }; write!( f, - " {} {:o} {} {} {} {} {} ", + " {} {hardlink_prefix}{:o} {} {} {} {} {} ", self.item.size(), self.mode, self.item.nlink(), @@ -858,18 +866,78 @@ mod tests { fn test_parse() { const CONTENT: &str = include_str!("tests/assets/special.dump"); for line in CONTENT.lines() { - // Test a full round trip by parsing, serialize, parsing again + // Test a full round trip by parsing, serializing, parsing again. + // The serialized form may differ from the input (e.g. xattr + // ordering is canonicalized), so we check structural equality + // and that serialization is idempotent. let e = Entry::parse(line).unwrap(); let serialized = e.to_string(); - if line != serialized { - dbg!(&line, &e, &serialized); - } - similar_asserts::assert_eq!(line, serialized); let e2 = Entry::parse(&serialized).unwrap(); similar_asserts::assert_eq!(e, e2); + // Serialization must be idempotent + similar_asserts::assert_eq!(serialized, e2.to_string()); } } + #[test] + fn test_canonicalize_directory_size() { + // Directory size should be discarded — any input value becomes 0 + let e = Entry::parse("/ 4096 40755 2 0 0 0 1000.0 - - -").unwrap(); + assert_eq!(e.item.size(), 0); + assert!(e.to_string().starts_with("/ 0 40755")); + + let e = Entry::parse("/ 99999 40755 2 0 0 0 1000.0 - - -").unwrap(); + assert_eq!(e.item.size(), 0); + } + + #[test] + fn test_canonicalize_hardlink_metadata() { + // Hardlink metadata fields should all be zeroed — only path and + // target (payload) are meaningful per composefs-dump(5). + let e = Entry::parse( + "/link 259 @100644 3 1000 1000 0 1695368732.385062094 /original - \ + 35d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408 \ + security.selinux=foo", + ) + .unwrap(); + + // All metadata zeroed + assert_eq!(e.uid, 0); + assert_eq!(e.gid, 0); + assert_eq!(e.mode, 0); + assert_eq!(e.mtime, Mtime { sec: 0, nsec: 0 }); + assert!(e.xattrs.is_empty()); + + // Target preserved + match &e.item { + Item::Hardlink { target } => assert_eq!(target.as_ref(), Path::new("/original")), + other => panic!("Expected Hardlink, got {other:?}"), + } + + // Serialization uses @0 for mode, zeroed fields + let s = e.to_string(); + assert!(s.contains("@0 0 0 0 0 0.0"), "got: {s}"); + } + + #[test] + fn test_canonicalize_xattr_ordering() { + // Xattrs should be sorted by key regardless of input order + let e = Entry::parse("/ 0 40755 2 0 0 0 0.0 - - - user.z=1 security.ima=2 trusted.a=3") + .unwrap(); + + let keys: Vec<&[u8]> = e.xattrs.iter().map(|x| x.key.as_bytes()).collect(); + assert_eq!( + keys, + vec![b"security.ima".as_slice(), b"trusted.a", b"user.z"], + "xattrs should be sorted by key" + ); + + // Re-serialization preserves sorted order + let s = e.to_string(); + let e2 = Entry::parse(&s).unwrap(); + assert_eq!(e, e2); + } + fn parse_all(name: &str, s: &str) -> Result<()> { for line in s.lines() { if line.is_empty() { @@ -886,10 +954,10 @@ mod tests { const CASES: &[(&str, &str)] = &[ ( "content in fifo", - "/ 4096 40755 2 0 0 0 0.0 - - -\n/fifo 0 10777 1 0 0 0 0.0 - foobar -", + "/ 0 40755 2 0 0 0 0.0 - - -\n/fifo 0 10777 1 0 0 0 0.0 - foobar -", ), - ("root with rdev", "/ 4096 40755 2 0 0 42 0.0 - - -"), - ("root with fsverity", "/ 4096 40755 2 0 0 0 0.0 - - 35d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408"), + ("root with rdev", "/ 0 40755 2 0 0 42 0.0 - - -"), + ("root with fsverity", "/ 0 40755 2 0 0 0 0.0 - - 35d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408"), ]; for (name, case) in CASES.iter().copied() { assert!( @@ -919,7 +987,7 @@ mod tests { #[test] fn test_load_cfs_filtered() -> Result<()> { const FILTERED: &str = - "/ 4096 40555 2 0 0 0 1633950376.0 - - - trusted.foo1=bar-1 user.foo2=bar-2\n\ + "/ 0 40555 2 0 0 0 1633950376.0 - - - trusted.foo1=bar-1 user.foo2=bar-2\n\ /blockdev 0 60777 1 0 0 107690 1633950376.0 - - - trusted.bar=bar-2\n\ /inline 15 100777 1 0 0 0 1633950376.0 - FOOBAR\\nINAFILE\\n - user.foo=bar-2\n"; let mut tmpf = tempfile::tempfile()?; diff --git a/crates/composefs/src/erofs/reader.rs b/crates/composefs/src/erofs/reader.rs index c52f275e..5d6a049a 100644 --- a/crates/composefs/src/erofs/reader.rs +++ b/crates/composefs/src/erofs/reader.rs @@ -1448,8 +1448,8 @@ mod tests { #[test] fn test_empty_directory() { // Create filesystem with empty directory - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/empty_dir 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/empty_dir 0 40755 2 0 0 0 1000.0 - - - "#; let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); @@ -1491,8 +1491,8 @@ mod tests { #[test] fn test_directory_with_inline_entries() { // Create filesystem with directory that has a few entries (should be inline) - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/dir1 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/dir1 0 40755 2 0 0 0 1000.0 - - - /dir1/file1 5 100644 1 0 0 0 1000.0 - hello - /dir1/file2 5 100644 1 0 0 0 1000.0 - world - "#; @@ -1532,8 +1532,8 @@ mod tests { #[test] fn test_directory_with_many_entries() { // Create a directory with many entries to force block storage - let mut dumpfile = String::from("/ 4096 40755 2 0 0 0 1000.0 - - -\n"); - dumpfile.push_str("/bigdir 4096 40755 2 0 0 0 1000.0 - - -\n"); + let mut dumpfile = String::from("/ 0 40755 2 0 0 0 1000.0 - - -\n"); + dumpfile.push_str("/bigdir 0 40755 2 0 0 0 1000.0 - - -\n"); // Add many files to force directory blocks for i in 0..100 { @@ -1585,10 +1585,10 @@ mod tests { #[test] fn test_nested_directories() { // Test deeply nested directory structure - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/a 4096 40755 2 0 0 0 1000.0 - - - -/a/b 4096 40755 2 0 0 0 1000.0 - - - -/a/b/c 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/a 0 40755 2 0 0 0 1000.0 - - - +/a/b 0 40755 2 0 0 0 1000.0 - - - +/a/b/c 0 40755 2 0 0 0 1000.0 - - - /a/b/c/file.txt 5 100644 1 0 0 0 1000.0 - hello - "#; @@ -1622,12 +1622,12 @@ mod tests { #[test] fn test_mixed_entry_types() { // Test directory with various file types - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/mixed 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/mixed 0 40755 2 0 0 0 1000.0 - - - /mixed/regular 10 100644 1 0 0 0 1000.0 - content123 - /mixed/symlink 7 120777 1 0 0 0 1000.0 /target - - /mixed/fifo 0 10644 1 0 0 0 1000.0 - - - -/mixed/subdir 4096 40755 2 0 0 0 1000.0 - - - +/mixed/subdir 0 40755 2 0 0 0 1000.0 - - - "#; let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); @@ -1668,11 +1668,11 @@ mod tests { #[test] fn test_collect_objects_traversal() { // Test that object collection properly traverses all directories - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/dir1 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/dir1 0 40755 2 0 0 0 1000.0 - - - /dir1/file1 5 100644 1 0 0 0 1000.0 - hello - -/dir2 4096 40755 2 0 0 0 1000.0 - - - -/dir2/subdir 4096 40755 2 0 0 0 1000.0 - - - +/dir2 0 40755 2 0 0 0 1000.0 - - - +/dir2/subdir 0 40755 2 0 0 0 1000.0 - - - /dir2/subdir/file2 5 100644 1 0 0 0 1000.0 - world - "#; @@ -1705,8 +1705,8 @@ mod tests { // . and .. entries). // Generate a C-generated erofs image using mkcomposefs - let dumpfile_content = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/empty_dir 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile_content = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/empty_dir 0 40755 2 0 0 0 1000.0 - - - "#; // Create temporary files for dumpfile and erofs output @@ -1745,10 +1745,10 @@ mod tests { #[test] fn test_round_trip_basic() { // Full round-trip: dumpfile -> tree -> erofs -> read back -> validate - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /file1 5 100644 1 0 0 0 1000.0 - hello - /file2 6 100644 1 0 0 0 1000.0 - world! - -/dir1 4096 40755 2 0 0 0 1000.0 - - - +/dir1 0 40755 2 0 0 0 1000.0 - - - /dir1/nested 8 100644 1 0 0 0 1000.0 - content1 - "#; @@ -1812,14 +1812,14 @@ mod tests { #[test] fn test_erofs_to_filesystem_empty_root() { - let dumpfile = "/ 4096 40755 2 0 0 0 1000.0 - - -\n"; + let dumpfile = "/ 0 40755 2 0 0 0 1000.0 - - -\n"; let (orig, rt) = round_trip_dumpfile(dumpfile); assert_eq!(orig, rt); } #[test] fn test_erofs_to_filesystem_inline_files() { - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /empty 0 100644 1 0 0 0 1000.0 - - - /hello 5 100644 1 0 0 0 1000.0 - hello - /world 6 100644 1 0 0 0 1000.0 - world! - @@ -1830,7 +1830,7 @@ mod tests { #[test] fn test_erofs_to_filesystem_symlinks() { - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /link1 7 120777 1 0 0 0 1000.0 /target - - /link2 11 120777 1 0 0 0 1000.0 /other/path - - "#; @@ -1840,10 +1840,10 @@ mod tests { #[test] fn test_erofs_to_filesystem_nested_dirs() { - let dumpfile = r#"/ 4096 40755 3 0 0 0 1000.0 - - - -/a 4096 40755 3 0 0 0 1000.0 - - - -/a/b 4096 40755 3 0 0 0 1000.0 - - - -/a/b/c 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 3 0 0 0 1000.0 - - - +/a 0 40755 3 0 0 0 1000.0 - - - +/a/b 0 40755 3 0 0 0 1000.0 - - - +/a/b/c 0 40755 2 0 0 0 1000.0 - - - /a/b/c/file.txt 5 100644 1 0 0 0 1000.0 - hello - /a/b/other 3 100644 1 0 0 0 1000.0 - abc - "#; @@ -1853,7 +1853,7 @@ mod tests { #[test] fn test_erofs_to_filesystem_devices_and_fifos() { - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /blk 0 60660 1 0 0 2049 1000.0 - - - /chr 0 20666 1 0 0 1025 1000.0 - - - /fifo 0 10644 1 0 0 0 1000.0 - - - @@ -1865,7 +1865,7 @@ mod tests { #[test] fn test_erofs_to_filesystem_xattrs() { let dumpfile = - "/ 4096 40755 2 0 0 0 1000.0 - - - security.selinux=system_u:object_r:root_t:s0\n\ + "/ 0 40755 2 0 0 0 1000.0 - - - security.selinux=system_u:object_r:root_t:s0\n\ /file 5 100644 1 0 0 0 1000.0 - hello - user.myattr=myvalue\n"; let (orig, rt) = round_trip_dumpfile(dumpfile); assert_eq!(orig, rt); @@ -1875,7 +1875,7 @@ mod tests { fn test_erofs_to_filesystem_escaped_overlay_xattrs() { // The writer escapes trusted.overlay.X to trusted.overlay.overlay.X. // Round-tripping must preserve the original xattr name. - let dumpfile = "/ 4096 40755 2 0 0 0 1000.0 - - -\n\ + let dumpfile = "/ 0 40755 2 0 0 0 1000.0 - - -\n\ /file 5 100644 1 0 0 0 1000.0 - hello - trusted.overlay.custom=val\n"; let (orig, rt) = round_trip_dumpfile(dumpfile); assert_eq!(orig, rt); @@ -1891,7 +1891,7 @@ mod tests { let digest = "a".repeat(64); let pathname = format!("{}/{}", &digest[..2], &digest[2..]); let dumpfile = format!( - "/ 4096 40755 2 0 0 0 1000.0 - - -\n\ + "/ 0 40755 2 0 0 0 1000.0 - - -\n\ /ext 1000000000 100644 1 0 0 0 1000.0 {pathname} - {digest}\n" ); let (orig, rt) = round_trip_dumpfile(&dumpfile); @@ -1900,7 +1900,7 @@ mod tests { #[test] fn test_erofs_to_filesystem_hardlinks() { - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /original 11 100644 2 0 0 0 1000.0 - hello_world - /hardlink 0 @120000 2 0 0 0 0.0 /original - - "#; @@ -1933,10 +1933,10 @@ mod tests { #[test] fn test_erofs_to_filesystem_mixed_types() { - let dumpfile = r#"/ 4096 40755 3 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 3 0 0 0 1000.0 - - - /blk 0 60660 1 0 6 259 1000.0 - - - /chr 0 20666 1 0 6 1025 1000.0 - - - -/dir 4096 40755 2 42 42 0 2000.0 - - - +/dir 0 40755 2 42 42 0 2000.0 - - - /dir/nested 3 100644 1 42 42 0 2000.0 - abc - /fifo 0 10644 1 0 0 0 1000.0 - - - /hello 5 100644 1 1000 1000 0 1500.0 - hello - @@ -1949,7 +1949,7 @@ mod tests { #[test] fn test_restrict_to_composefs_rejects_unsupported_features() { // Build a minimal valid composefs image (just a root directory). - let dumpfile = "/ 4096 40755 2 0 0 0 1000.0 - - -\n"; + let dumpfile = "/ 0 40755 2 0 0 0 1000.0 - - -\n"; let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); let base_image = mkfs_erofs(&fs); @@ -2079,7 +2079,7 @@ mod tests { // The corrupted size must be a multiple of block_size so that // additional_bytes() (which uses `size % block_size` for FlatInline) // stays the same and the inode still parses successfully. - let dumpfile = "/ 4096 40755 1 0 0 0 0.0 - - -\n"; + let dumpfile = "/ 0 40755 1 0 0 0 0.0 - - -\n"; let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); let mut image = mkfs_erofs(&fs); @@ -2124,8 +2124,7 @@ mod tests { use crate::test::proptest_strategies::{build_filesystem, filesystem_spec}; use proptest::prelude::*; - /// Round-trip a FileSystem through erofs with a given ObjectID type - /// and compare dumpfile output before and after. + /// Round-trip a FileSystem through erofs and compare dumpfile output. fn round_trip_filesystem( fs_orig: &tree::FileSystem, ) { @@ -2138,7 +2137,10 @@ mod tests { let mut rt_output = Vec::new(); write_dumpfile(&mut rt_output, &fs_rt).unwrap(); - assert_eq!(orig_output, rt_output); + similar_asserts::assert_eq!( + String::from_utf8_lossy(&orig_output), + String::from_utf8_lossy(&rt_output) + ); } proptest! { diff --git a/crates/composefs/src/tests/assets/special.dump b/crates/composefs/src/tests/assets/special.dump index 7665fdd7..2afe62f4 100644 --- a/crates/composefs/src/tests/assets/special.dump +++ b/crates/composefs/src/tests/assets/special.dump @@ -1,7 +1,7 @@ -/ 4096 40555 2 0 0 0 1633950376.0 - - - trusted.foo1=bar-1 user.foo2=bar-2 +/ 0 40555 2 0 0 0 1633950376.0 - - - trusted.foo1=bar-1 user.foo2=bar-2 /blockdev 0 60777 1 0 0 107690 1633950376.0 - - - trusted.bar=bar-2 /chardev 0 20777 1 0 0 10769 1633950376.0 - - - trusted.foo=bar-2 -/escaped-xattr 0 100777 1 0 0 0 1633950376.0 - - - trusted.overlay.redirect=/foo\n user.overlay.redirect=/foo\n user.foo=bar-2 +/escaped-xattr 0 100777 1 0 0 0 1633950376.0 - - - trusted.overlay.redirect=/foo\n user.foo=bar-2 user.overlay.redirect=/foo\n /external 42 100755 1 0 0 0 1731497312.0 70/a9125438f7255245f596c54cebb6621cb9a64f062752cf26763c1b690e7340 - 70a9125438f7255245f596c54cebb6621cb9a64f062752cf26763c1b690e7340 /fifo 0 10777 1 0 0 0 1633950376.0 - - - trusted.bar=bar-2 /inline 15 100777 1 0 0 0 1633950376.0 - FOOBAR\nINAFILE\n - user.foo=bar-2 From 6dfb4c414c7bac91a8a3c8653d808c15ed4b4296 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 17 Mar 2026 22:25:58 +0000 Subject: [PATCH 2/2] Limit symlink targets to 1024 bytes (XFS_SYMLINK_MAXLEN) XFS limits symlink targets to 1024 bytes, and since generic Linux containers are commonly backed by XFS, enforce that limit in both the dumpfile parser and the EROFS reader rather than allowing up to PATH_MAX (4096). This also avoids exercising a known limitation in our EROFS reader where symlink data that spills into a non-inline data block (which can happen with long symlinks + xattrs) is not read back correctly. See https://github.com/composefs/composefs/pull/342 for the corresponding C fix for that edge case. Assisted-by: OpenCode (Claude Opus 4) Signed-off-by: Colin Walters --- crates/composefs/src/dumpfile_parse.rs | 7 +++++-- crates/composefs/src/erofs/reader.rs | 8 ++++++++ crates/composefs/src/erofs/writer.rs | 6 ++++++ crates/composefs/src/lib.rs | 7 +++++++ crates/composefs/src/test.rs | 10 ++++------ 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/crates/composefs/src/dumpfile_parse.rs b/crates/composefs/src/dumpfile_parse.rs index 7e3e436b..dd6acdb9 100644 --- a/crates/composefs/src/dumpfile_parse.rs +++ b/crates/composefs/src/dumpfile_parse.rs @@ -24,6 +24,7 @@ use crate::MAX_INLINE_CONTENT; /// https://github.com/torvalds/linux/blob/47ac09b91befbb6a235ab620c32af719f8208399/include/uapi/linux/limits.h#L13 const PATH_MAX: u32 = 4096; +use crate::SYMLINK_MAX; /// https://github.com/torvalds/linux/blob/47ac09b91befbb6a235ab620c32af719f8208399/include/uapi/linux/limits.h#L15 /// This isn't exposed in libc/rustix, and in any case we should be conservative...if this ever /// gets bumped it'd be a hazard. @@ -456,8 +457,10 @@ impl<'p> Entry<'p> { let target = unescape_to_path(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; let targetlen = target.as_os_str().as_bytes().len(); - if targetlen > PATH_MAX as usize { - anyhow::bail!("Target length too large {targetlen}"); + if targetlen > SYMLINK_MAX { + anyhow::bail!( + "Symlink target length {targetlen} exceeds limit {SYMLINK_MAX}" + ); } Item::Symlink { nlink, target } } diff --git a/crates/composefs/src/erofs/reader.rs b/crates/composefs/src/erofs/reader.rs index 5d6a049a..3acffbce 100644 --- a/crates/composefs/src/erofs/reader.rs +++ b/crates/composefs/src/erofs/reader.rs @@ -1350,6 +1350,14 @@ fn populate_directory( } S_IFLNK => { let target_data = child_inode.inline().unwrap_or(&[]); + if target_data.len() > crate::SYMLINK_MAX { + anyhow::bail!( + "symlink target for {:?} is {} bytes (max {})", + name, + target_data.len(), + crate::SYMLINK_MAX, + ); + } let target = OsStr::from_bytes(target_data); tree::LeafContent::Symlink(Box::from(target)) } diff --git a/crates/composefs/src/erofs/writer.rs b/crates/composefs/src/erofs/writer.rs index 78858162..33f5a09e 100644 --- a/crates/composefs/src/erofs/writer.rs +++ b/crates/composefs/src/erofs/writer.rs @@ -293,6 +293,12 @@ impl Leaf<'_, ObjectID> { (format::DataLayout::FlatPlain, 0, 0) } tree::LeafContent::Symlink(target) => { + assert!( + target.len() <= crate::SYMLINK_MAX, + "symlink target is {} bytes (max {})", + target.len(), + crate::SYMLINK_MAX, + ); (format::DataLayout::FlatInline, 0, target.len() as u64) } }; diff --git a/crates/composefs/src/lib.rs b/crates/composefs/src/lib.rs index 521a9a73..38d55e1f 100644 --- a/crates/composefs/src/lib.rs +++ b/crates/composefs/src/lib.rs @@ -45,6 +45,13 @@ pub const INLINE_CONTENT_MAX_V0: usize = 64; /// ). pub const MAX_INLINE_CONTENT: usize = 512; +/// Maximum symlink target length in bytes. +/// +/// XFS limits symlink targets to 1024 bytes (`XFS_SYMLINK_MAXLEN`). Since +/// generic Linux containers are commonly backed by XFS, we enforce that +/// limit rather than the Linux VFS `PATH_MAX` of 4096. +pub const SYMLINK_MAX: usize = 1024; + /// Internal constants shared across workspace crates. /// /// Not part of the public API — may change without notice. diff --git a/crates/composefs/src/test.rs b/crates/composefs/src/test.rs index 3ff1a433..7e165f76 100644 --- a/crates/composefs/src/test.rs +++ b/crates/composefs/src/test.rs @@ -131,8 +131,7 @@ pub(crate) mod proptest_strategies { /// EROFS limit (`EROFS_NAME_LEN`). const NAME_MAX: usize = 255; - /// Maximum symlink target length on Linux (`PATH_MAX`). - const PATH_MAX: usize = 4096; + use crate::SYMLINK_MAX; /// Strategy for valid filenames as OsString. /// @@ -230,8 +229,7 @@ pub(crate) mod proptest_strategies { /// Strategy for symlink targets as OsString. /// /// Symlink targets on Linux are arbitrary bytes except `\0`, up to - /// [`PATH_MAX`] (4096) bytes. We generate a mix of path-like ASCII - /// targets and binary targets, occasionally long. + /// [`SYMLINK_MAX`] (1024) bytes, matching the XFS limit. fn symlink_target() -> impl Strategy { prop_oneof![ // Short path-like ASCII target (common case) @@ -241,8 +239,8 @@ pub(crate) mod proptest_strategies { // Binary target with arbitrary bytes (no NUL) 3 => prop::collection::vec(1..=0xFFu8, 1..=100) .prop_map(OsString::from_vec), - // Long ASCII target (up to PATH_MAX) - 1 => proptest::string::string_regex(&format!("[a-zA-Z0-9/._-]{{100,{PATH_MAX}}}")) + // Long ASCII target (up to SYMLINK_MAX) + 1 => proptest::string::string_regex(&format!("[a-zA-Z0-9/._-]{{100,{SYMLINK_MAX}}}")) .expect("valid regex") .prop_map(OsString::from), ]