From ec6f4af2be25035368fa053c3c07cd21bee3f062 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 10 Mar 2026 14:58:16 +0000 Subject: [PATCH 1/4] erofs: Add nlink() to InodeHeader trait Needed for later commits which want to access the hardlink count conveniently. Assisted-by: OpenCode (Claude claude-opus-4-6) Signed-off-by: Colin Walters --- crates/composefs/src/erofs/reader.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/crates/composefs/src/erofs/reader.rs b/crates/composefs/src/erofs/reader.rs index b33ca2e5..4eff3f97 100644 --- a/crates/composefs/src/erofs/reader.rs +++ b/crates/composefs/src/erofs/reader.rs @@ -37,6 +37,8 @@ pub trait InodeHeader { fn size(&self) -> u64; /// Returns the union field value (block address, device number, etc.) fn u(&self) -> u32; + /// Returns the number of hard links + fn nlink(&self) -> u32; /// Calculates the number of additional bytes after the header fn additional_bytes(&self, blkszbits: u8) -> usize { @@ -78,6 +80,10 @@ impl InodeHeader for ExtendedInodeHeader { fn u(&self) -> u32 { self.u.get() } + + fn nlink(&self) -> u32 { + self.nlink.get() + } } impl InodeHeader for CompactInodeHeader { @@ -100,6 +106,10 @@ impl InodeHeader for CompactInodeHeader { fn u(&self) -> u32 { self.u.get() } + + fn nlink(&self) -> u32 { + self.nlink.get().into() + } } /// Extended attribute entry with header and variable-length data @@ -192,6 +202,10 @@ impl InodeHeader for &Inode
{ fn u(&self) -> u32 { self.header.u() } + + fn nlink(&self) -> u32 { + self.header.nlink() + } } impl InodeOps for &Inode
{ @@ -277,6 +291,13 @@ impl InodeHeader for InodeType<'_> { Self::Extended(inode) => inode.mode(), } } + + fn nlink(&self) -> u32 { + match self { + Self::Compact(inode) => inode.nlink(), + Self::Extended(inode) => inode.nlink(), + } + } } impl InodeOps for InodeType<'_> { From 7c1f367c3fc8120a940afacd0a841c6562699ef8 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 10 Mar 2026 19:04:47 +0000 Subject: [PATCH 2/4] dumpfile: Fix escaping bugs and match C parser behavior for hardlinks Fix write_escaped to escape a bare '-' as '\x2d'. The dumpfile parser uses '-' as the sentinel for empty/none fields, so a symlink target or other field value that is literally '-' would be misinterpreted on parse. The escape function in dumpfile_parse.rs already handled this. Split out write_escaped_raw for xattr values, which must not use the '-' sentinel at all. Previously, an empty xattr value was written as '-' (the empty sentinel), but the parser doesn't apply optional_str to xattr values, so it would be read back as the literal byte 0x2d. This matches the C composefs behavior where xattr values use ESCAPE_EQUAL without ESCAPE_LONE_DASH. Fix the Rust dumpfile parser to match the C parser for hardlink entries: detect the '@' prefix early and skip integer parsing of the remaining numeric fields (nlink, uid, gid, rdev, mtime). The C parser splits all fields as strings first, detects '@', and returns early without ever calling parse_int_field on those values. Our parser was unconditionally parsing them as integers, which unnecessarily rejected '-' placeholders. Add comments cross-referencing C composefs escaping behavior throughout. Assisted-by: OpenCode (Claude claude-opus-4-6) Signed-off-by: Colin Walters --- crates/composefs/src/dumpfile.rs | 174 ++++++++++++++++++++++++- crates/composefs/src/dumpfile_parse.rs | 53 ++++++-- 2 files changed, 214 insertions(+), 13 deletions(-) diff --git a/crates/composefs/src/dumpfile.rs b/crates/composefs/src/dumpfile.rs index 79103476..c4454bbc 100644 --- a/crates/composefs/src/dumpfile.rs +++ b/crates/composefs/src/dumpfile.rs @@ -31,14 +31,47 @@ fn write_empty(writer: &mut impl fmt::Write) -> fmt::Result { writer.write_str("-") } +/// Escape a byte slice for a space-delimited dumpfile field. +/// +/// This corresponds to `print_escaped_optional` in the C composefs +/// `composefs-info.c`, combining `ESCAPE_STANDARD | ESCAPE_LONE_DASH`. +/// Empty values map to `-` (the "none" sentinel), and a bare `-` is +/// hex-escaped so it is not confused with the sentinel. +/// +/// Not appropriate for xattr values — use [`write_escaped_raw`] instead. fn write_escaped(writer: &mut impl fmt::Write, bytes: &[u8]) -> fmt::Result { if bytes.is_empty() { return write_empty(writer); } + // Matches C ESCAPE_LONE_DASH: a bare `-` must be escaped because + // the parser uses `-` as the sentinel for "empty/none". + if bytes == b"-" { + return writer.write_str("\\x2d"); + } + + write_escaped_raw(writer, bytes) +} + +/// Escape a byte slice without the `-` sentinel logic. +/// +/// This corresponds to `print_escaped` with `ESCAPE_EQUAL` (but without +/// `ESCAPE_LONE_DASH`) in the C composefs `composefs-info.c`. Used for +/// xattr values where `-` and empty are valid literal values, not +/// sentinels. +/// +/// Note: we unconditionally escape `=` in all fields, whereas the C code +/// only uses `ESCAPE_EQUAL` for xattr keys and values. This is harmless +/// since `\x3d` round-trips correctly, but means our output for paths +/// containing `=` is slightly more verbose than the C output. +fn write_escaped_raw(writer: &mut impl fmt::Write, bytes: &[u8]) -> fmt::Result { for c in bytes { let c = *c; + // The set of hex-escaped characters matches C `!isgraph(c)` in the + // POSIX locale (i.e. outside 0x21..=0x7E), plus `=` and `\`. + // The C code uses named escapes for `\\`, `\n`, `\r`, `\t` while + // we hex-escape everything uniformly; both forms parse correctly. if c < b'!' || c == b'=' || c == b'\\' || c > b'~' { write!(writer, "\\x{c:02x}")?; } else { @@ -86,7 +119,9 @@ fn write_entry( write!(writer, " ")?; write_escaped(writer, key.as_bytes())?; write!(writer, "=")?; - write_escaped(writer, value)?; + // Xattr values don't use the `-` sentinel — they're always present + // when the key=value pair exists, and empty or `-` are valid values. + write_escaped_raw(writer, value)?; } Ok(()) @@ -218,7 +253,10 @@ pub fn write_leaf( /// Writes a hardlink entry to the dumpfile format. /// /// Creates a special entry that links the given path to an existing target path -/// that was already written to the dumpfile. +/// that was already written to the dumpfile. The nlink/uid/gid/rdev/mtime +/// fields are written as `-` (ignored); both the C and Rust parsers detect +/// the `@` hardlink prefix on the mode field and skip parsing the remaining +/// numeric fields. pub fn write_hardlink(writer: &mut impl fmt::Write, path: &Path, target: &OsStr) -> fmt::Result { write_escaped(writer, path.as_os_str().as_bytes())?; write!(writer, " 0 @120000 - - - - 0.0 ")?; @@ -525,11 +563,14 @@ mod tests { #[test] fn test_hardlinks() -> Result<()> { + // The nlink/uid/gid/rdev fields on hardlink lines use `-` here, + // matching the C composefs writer convention. The parser must + // accept these without trying to parse them as integers. let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - /original 11 100644 2 0 0 0 1000.0 - hello_world - -/hardlink1 0 @120000 2 0 0 0 0.0 /original - - +/hardlink1 0 @120000 - - - - 0.0 /original - - /dir1 4096 40755 2 0 0 0 1000.0 - - - -/dir1/hardlink2 0 @120000 2 0 0 0 0.0 /original - - +/dir1/hardlink2 0 @120000 - - - - 0.0 /original - - "#; let fs = dumpfile_to_filesystem::(dumpfile)?; @@ -572,4 +613,129 @@ mod tests { Ok(()) } + + /// Verify that a symlink whose target is literally "-" survives a + /// write → parse → write round-trip. Previously `write_escaped` + /// did not escape a bare "-", so the parser treated it as "none". + #[test] + fn test_symlink_target_dash_round_trip() -> Result<()> { + let dumpfile = "/ 0 40755 2 0 0 0 0.0 - - -\n\ + /link 1 120777 1 0 0 0 0.0 \\x2d - -\n"; + let fs = dumpfile_to_filesystem::(dumpfile)?; + let link = fs.root.lookup(OsStr::new("link")).unwrap(); + match link { + Inode::Leaf(ref l) => match &l.content { + LeafContent::Symlink(target) => assert_eq!(target.as_ref(), OsStr::new("-")), + other => panic!("expected symlink, got {other:?}"), + }, + _ => panic!("expected leaf"), + } + + // Re-serialize and verify it round-trips + let mut out = Vec::new(); + write_dumpfile(&mut out, &fs)?; + let out_str = std::str::from_utf8(&out).unwrap(); + let fs2 = dumpfile_to_filesystem::(out_str)?; + let mut out2 = Vec::new(); + write_dumpfile(&mut out2, &fs2)?; + assert_eq!(out, out2); + Ok(()) + } + + /// Verify that xattrs with empty values and with a value of "-" + /// both survive a round-trip. Previously `write_escaped` used + /// the "-" sentinel for empty bytes, which the xattr parser does + /// not treat specially. + #[test] + fn test_xattr_empty_and_dash_values_round_trip() -> Result<()> { + use std::cell::RefCell; + use std::collections::BTreeMap; + + let mut xattrs = BTreeMap::new(); + xattrs.insert( + Box::from(OsStr::new("user.empty")), + Vec::new().into_boxed_slice(), + ); + xattrs.insert( + Box::from(OsStr::new("user.dash")), + vec![b'-'].into_boxed_slice(), + ); + + let mut fs = FileSystem::::new(Stat { + st_mode: 0o755, + st_uid: 0, + st_gid: 0, + st_mtim_sec: 0, + xattrs: RefCell::new(BTreeMap::new()), + }); + let leaf = std::rc::Rc::new(Leaf { + stat: Stat { + st_mode: 0o644, + st_uid: 0, + st_gid: 0, + st_mtim_sec: 0, + xattrs: RefCell::new(xattrs), + }, + content: LeafContent::Regular(RegularFile::Inline(b"test".to_vec().into())), + }); + fs.root.insert(OsStr::new("f"), Inode::Leaf(leaf)); + + let mut out = Vec::new(); + write_dumpfile(&mut out, &fs)?; + let out_str = std::str::from_utf8(&out).unwrap(); + let fs2 = dumpfile_to_filesystem::(out_str)?; + let mut out2 = Vec::new(); + write_dumpfile(&mut out2, &fs2)?; + assert_eq!(out, out2, "xattr round-trip mismatch:\n{out_str}"); + Ok(()) + } + + /// Verify that write_dumpfile → dumpfile_to_filesystem round-trips + /// hardlinks correctly. + #[test] + fn test_hardlink_write_round_trip() -> Result<()> { + use std::cell::RefCell; + use std::collections::BTreeMap; + + let stat = || Stat { + st_mode: 0o644, + st_uid: 0, + st_gid: 0, + st_mtim_sec: 0, + xattrs: RefCell::new(BTreeMap::new()), + }; + + let mut fs = FileSystem::::new(Stat { + st_mode: 0o755, + ..stat() + }); + let leaf = std::rc::Rc::new(Leaf { + stat: stat(), + content: LeafContent::Regular(RegularFile::Inline(b"data".to_vec().into())), + }); + // Insert original + hardlink (same Rc) + fs.root + .insert(OsStr::new("original"), Inode::Leaf(leaf.clone())); + fs.root.insert(OsStr::new("link"), Inode::Leaf(leaf)); + + let mut out = Vec::new(); + write_dumpfile(&mut out, &fs)?; + let out_str = std::str::from_utf8(&out).unwrap(); + + let fs2 = dumpfile_to_filesystem::(out_str)?; + + // Verify the hardlink is preserved + let orig = fs2.root.lookup(OsStr::new("original")).unwrap(); + let link = fs2.root.lookup(OsStr::new("link")).unwrap(); + match (orig, link) { + (Inode::Leaf(a), Inode::Leaf(b)) => assert!(Rc::ptr_eq(a, b)), + _ => panic!("expected both to be leaves"), + } + + // And re-serialization is stable + let mut out2 = Vec::new(); + write_dumpfile(&mut out2, &fs2)?; + assert_eq!(out, out2); + Ok(()) + } } diff --git a/crates/composefs/src/dumpfile_parse.rs b/crates/composefs/src/dumpfile_parse.rs index 89f321aa..0b4a642c 100644 --- a/crates/composefs/src/dumpfile_parse.rs +++ b/crates/composefs/src/dumpfile_parse.rs @@ -270,6 +270,15 @@ enum EscapeMode { } /// Escape a byte array according to the composefs dump file text format. +/// +/// Note: this function unconditionally maps empty → `-` and escapes a +/// bare `-`. That matches C `ESCAPE_LONE_DASH` and is correct for +/// space-delimited fields (path, payload, content), but the C code does +/// NOT set `ESCAPE_LONE_DASH` for xattr values — there, `-` and empty +/// are valid literals. The `Entry` Display impl currently uses this for +/// xattr values via `EscapeMode::Standard`, which diverges from C. +/// The `write_dumpfile` writer in `dumpfile.rs` avoids this by using +/// a separate `write_escaped_raw` for xattr values. fn escape(out: &mut W, s: &[u8], mode: EscapeMode) -> std::fmt::Result { // Empty content must be represented by `-` if s.is_empty() { @@ -369,6 +378,34 @@ impl<'p> Entry<'p> { } else { (false, u32::from_str_radix(modeval, 8)?) }; + + // For hardlinks, the C parser skips the remaining numeric fields + // (nlink, uid, gid, rdev, mtime) and only reads the payload (target + // path). We match that: consume the tokens without parsing them as + // integers, so values like `-` are accepted. + if is_hardlink { + let ty = FileType::from_raw_mode(mode); + if ty == FileType::Directory { + anyhow::bail!("Invalid hardlinked directory"); + } + // Skip nlink, uid, gid, rdev, mtime + for field in ["nlink", "uid", "gid", "rdev", "mtime"] { + next(field)?; + } + let payload = optional_str(next("payload")?); + let target = + unescape_to_path_canonical(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; + return Ok(Entry { + path, + uid: 0, + gid: 0, + mode, + mtime: Mtime { sec: 0, nsec: 0 }, + item: Item::Hardlink { target }, + xattrs: Vec::new(), + }); + } + let nlink = u32::from_str(next("nlink")?)?; let uid = u32::from_str(next("uid")?)?; let gid = u32::from_str(next("gid")?)?; @@ -391,15 +428,7 @@ impl<'p> Entry<'p> { .0; let ty = FileType::from_raw_mode(mode); - let item = if is_hardlink { - if ty == FileType::Directory { - anyhow::bail!("Invalid hardlinked directory"); - } - let target = - unescape_to_path_canonical(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; - // TODO: the dumpfile format suggests to retain all the metadata on hardlink lines - Item::Hardlink { target } - } else { + let item = { match ty { FileType::RegularFile => { Self::check_rdev(rdev)?; @@ -568,6 +597,12 @@ impl Display for Entry<'_> { f.write_char(' ')?; escape(f, xattr.key.as_bytes(), EscapeMode::XattrKey)?; f.write_char('=')?; + // NOTE: the C code uses ESCAPE_EQUAL (not ESCAPE_LONE_DASH) + // for xattr values, meaning it does not escape bare `-` or + // map empty to `-`. Using `Standard` mode here is slightly + // inconsistent with C but harmless since `\x2d` parses back + // to `-`. The `write_dumpfile` writer uses `write_escaped_raw` + // which matches C more closely. escape(f, &xattr.value, EscapeMode::Standard)?; } std::fmt::Result::Ok(()) From 9b857a5f43684e35eb598eebc4597035fd098fb5 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 10 Mar 2026 15:29:33 +0000 Subject: [PATCH 3/4] tree: Add proptest strategies for generating random FileSystem instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add hash-agnostic proptest strategies that build tree::FileSystem directly, covering hardlinks, all xattr namespaces (including the trusted.overlay.* escape path), binary filenames, binary symlink targets, and all file types. Also add dumpfile round-trip proptests that exercise write_dumpfile → dumpfile_to_filesystem → write_dumpfile, using prop_assume! to skip non-UTF-8 cases that the text-based dumpfile format cannot represent. Assisted-by: OpenCode (Claude claude-opus-4-6) Signed-off-by: Colin Walters --- crates/composefs/Cargo.toml | 1 + crates/composefs/src/dumpfile.rs | 50 ++++ crates/composefs/src/test.rs | 395 +++++++++++++++++++++++++++++++ 3 files changed, 446 insertions(+) diff --git a/crates/composefs/Cargo.toml b/crates/composefs/Cargo.toml index 97f71ef3..62ec2fbc 100644 --- a/crates/composefs/Cargo.toml +++ b/crates/composefs/Cargo.toml @@ -34,6 +34,7 @@ rand = { version = "0.9.1", default-features = true } [dev-dependencies] insta = "1.42.2" +proptest = "1" similar-asserts = "1.7.0" tempfile = { version = "3.8.0", default-features = false } test-with = { version = "0.14", default-features = false, features = ["executable", "runtime"] } diff --git a/crates/composefs/src/dumpfile.rs b/crates/composefs/src/dumpfile.rs index c4454bbc..4a9fcf26 100644 --- a/crates/composefs/src/dumpfile.rs +++ b/crates/composefs/src/dumpfile.rs @@ -738,4 +738,54 @@ mod tests { assert_eq!(out, out2); Ok(()) } + + mod proptest_tests { + use super::*; + use crate::fsverity::Sha512HashValue; + use crate::test::proptest_strategies::{build_filesystem, filesystem_spec}; + use proptest::prelude::*; + + /// Serialize filesystem to dumpfile bytes, returning None if the + /// output contains non-UTF-8 data (binary filenames) which the + /// text-based dumpfile parser cannot round-trip. + fn dumpfile_bytes( + fs: &FileSystem, + ) -> Option> { + let mut bytes = Vec::new(); + write_dumpfile(&mut bytes, fs).unwrap(); + // dumpfile_to_filesystem requires &str, so reject non-UTF-8 + std::str::from_utf8(&bytes).ok()?; + Some(bytes) + } + + fn round_trip_dumpfile(orig_bytes: &[u8]) { + let orig_str = std::str::from_utf8(orig_bytes).unwrap(); + let fs_rt = dumpfile_to_filesystem::(orig_str).unwrap(); + + let mut rt_bytes = Vec::new(); + write_dumpfile(&mut rt_bytes, &fs_rt).unwrap(); + + assert_eq!(orig_bytes, &rt_bytes); + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + #[test] + fn test_dumpfile_round_trip_sha256(spec in filesystem_spec()) { + let fs = build_filesystem::(spec); + let bytes = dumpfile_bytes(&fs); + prop_assume!(bytes.is_some(), "dumpfile can't round-trip binary names"); + round_trip_dumpfile::(&bytes.unwrap()); + } + + #[test] + fn test_dumpfile_round_trip_sha512(spec in filesystem_spec()) { + let fs = build_filesystem::(spec); + let bytes = dumpfile_bytes(&fs); + prop_assume!(bytes.is_some(), "dumpfile can't round-trip binary names"); + round_trip_dumpfile::(&bytes.unwrap()); + } + } + } } diff --git a/crates/composefs/src/test.rs b/crates/composefs/src/test.rs index 136c4cce..7f8a26f4 100644 --- a/crates/composefs/src/test.rs +++ b/crates/composefs/src/test.rs @@ -74,3 +74,398 @@ impl Default for TestRepo { Self::new() } } + +/// Proptest strategies for generating random `tree::FileSystem` instances. +/// +/// These strategies build the tree directly (not through dumpfile strings), +/// which means they can express things like hardlinks (shared `Rc`) +/// that are awkward to generate as text. +/// +/// The spec types are hash-type-agnostic: external file references store +/// raw random bytes, and `build_filesystem` constructs the appropriate +/// `ObjectID` from them via `from_hex`. This lets the same generated spec +/// be used with both `Sha256HashValue` and `Sha512HashValue`. +#[cfg(test)] +pub(crate) mod proptest_strategies { + use std::{ + cell::RefCell, + collections::BTreeMap, + ffi::{OsStr, OsString}, + mem, + os::unix::ffi::OsStringExt, + rc::Rc, + }; + + use proptest::prelude::*; + + use crate::{ + fsverity::FsVerityHashValue, + tree::{self, RegularFile}, + }; + + /// Maximum filename length (single directory entry name) on Linux. + /// This is `NAME_MAX` from POSIX / ``, and also the + /// EROFS limit (`EROFS_NAME_LEN`). + const NAME_MAX: usize = 255; + + /// Maximum symlink target length on Linux (`PATH_MAX`). + const PATH_MAX: usize = 4096; + + /// Strategy for valid filenames as OsString. + /// + /// Linux filenames are arbitrary bytes except `/` (0x2F) and `\0` (0x00), + /// with a max length of [`NAME_MAX`] (255) bytes. We generate a mix of + /// ASCII names and binary names, occasionally long, to exercise directory + /// entry layout edge cases. + pub fn filename() -> impl Strategy { + prop_oneof![ + // Short ASCII names (common case) + 6 => proptest::string::string_regex("[a-zA-Z0-9._-]{1,20}") + .expect("valid regex") + .prop_map(OsString::from), + // Binary names with arbitrary bytes (no NUL or /) + 3 => prop::collection::vec(1..=0xFEu8, 1..=30) + .prop_map(|mut v| { v.iter_mut().for_each(|b| if *b == b'/' { *b = b'_' }); OsString::from_vec(v) }), + // Long ASCII names (up to NAME_MAX) + 1 => proptest::string::string_regex(&format!("[a-zA-Z0-9._-]{{100,{NAME_MAX}}}")) + .expect("valid regex") + .prop_map(OsString::from), + ] + .prop_filter("reserved names", |s| s != "." && s != "..") + } + + /// Strategy for `tree::Stat` with random metadata. + pub fn stat() -> impl Strategy { + ( + 0..=0o7777u32, // permission bits + 0..=65535u32, // uid + 0..=65535u32, // gid + 0..=2_000_000_000i64, // mtime + xattrs(), + ) + .prop_map(|(mode, uid, gid, mtime, xattrs)| tree::Stat { + st_mode: mode, + st_uid: uid, + st_gid: gid, + st_mtim_sec: mtime, + xattrs: RefCell::new(xattrs), + }) + } + + /// Strategy for xattr keys covering all erofs prefix namespaces. + /// + /// The erofs format uses prefix indices to compress xattr names: + /// 0 = "" (fallback), 1 = "user.", 2 = "system.posix_acl_access", + /// 3 = "system.posix_acl_default", 4 = "trusted.", 5 = "lustre.", + /// 6 = "security." + /// + /// The writer also escapes `trusted.overlay.*` → `trusted.overlay.overlay.*`, + /// so we must test that path too. + fn xattr_key() -> impl Strategy { + prop_oneof![ + // user.* namespace (index 1) — most common + 3 => (0..5u32).prop_map(|n| format!("user.test_{n}")), + // security.* namespace (index 6) — e.g. SELinux + 2 => prop_oneof![ + Just("security.selinux".to_string()), + Just("security.ima".to_string()), + Just("security.capability".to_string()), + ], + // trusted.* but NOT overlay (index 4) + 1 => (0..3u32).prop_map(|n| format!("trusted.test_{n}")), + // trusted.overlay.* — exercises the escape/unescape path + 2 => prop_oneof![ + Just("trusted.overlay.custom".to_string()), + Just("trusted.overlay.origin".to_string()), + Just("trusted.overlay.upper".to_string()), + // This one tests double-escaping: it becomes + // trusted.overlay.overlay.overlay.nested on disk + Just("trusted.overlay.overlay.nested".to_string()), + ], + // system.posix_acl_access (index 2) — exact name, no suffix + 1 => Just("system.posix_acl_access".to_string()), + // system.posix_acl_default (index 3) — exact name, no suffix + 1 => Just("system.posix_acl_default".to_string()), + ] + } + + /// Strategy for 0-4 extended attributes across diverse namespaces. + fn xattrs() -> impl Strategy, Box<[u8]>>> { + prop::collection::vec( + (xattr_key(), prop::collection::vec(any::(), 0..=20)), + 0..=4, + ) + .prop_map(|pairs| { + let mut map = BTreeMap::new(); + for (key, value) in pairs { + map.insert(Box::from(OsStr::new(&key)), value.into_boxed_slice()); + } + map + }) + } + + /// Strategy for symlink targets as OsString. + /// + /// Symlink targets on Linux are arbitrary bytes except `\0`, up to + /// [`PATH_MAX`] (4096) bytes. We generate a mix of path-like ASCII + /// targets and binary targets, occasionally long. + fn symlink_target() -> impl Strategy { + prop_oneof![ + // Short path-like ASCII target (common case) + 6 => proptest::string::string_regex("[a-zA-Z0-9/._-]{1,50}") + .expect("valid regex") + .prop_map(OsString::from), + // Binary target with arbitrary bytes (no NUL) + 3 => prop::collection::vec(1..=0xFFu8, 1..=100) + .prop_map(OsString::from_vec), + // Long ASCII target (up to PATH_MAX) + 1 => proptest::string::string_regex(&format!("[a-zA-Z0-9/._-]{{100,{PATH_MAX}}}")) + .expect("valid regex") + .prop_map(OsString::from), + ] + } + + /// Hash-type-agnostic leaf content for the spec. + /// + /// External file references store raw hash bytes rather than a concrete + /// `ObjectID` type, so the same spec works with any hash algorithm. + #[derive(Debug)] + pub enum LeafContentSpec { + Inline(Vec), + /// External file: random hash bytes (truncated to hash size at build time) and size. + External(Vec, u64), + Symlink(OsString), + BlockDevice(u64), + CharacterDevice(u64), + Fifo, + } + + /// Strategy for hash-type-agnostic leaf content. + fn leaf_content_spec() -> impl Strategy { + // Generate 64 random bytes — enough for both Sha256 (32) and Sha512 (64). + // build_filesystem will truncate to the right size. + ( + 0..10u8, + prop::collection::vec(any::(), 0..=100), + symlink_target(), + prop::collection::vec(any::(), 64..=64), + 1..=1_000_000u64, + 0..=65535u64, + ) + .prop_map( + |(tag, file_data, symlink_target, hash_bytes, ext_size, rdev)| match tag { + 0..=3 => LeafContentSpec::Inline(file_data), + 4 => LeafContentSpec::External(hash_bytes, ext_size), + 5..=6 => LeafContentSpec::Symlink(symlink_target), + 7 => LeafContentSpec::BlockDevice(rdev), + 8 => LeafContentSpec::CharacterDevice(rdev), + _ => LeafContentSpec::Fifo, + }, + ) + } + + /// A hash-type-agnostic leaf node specification. + #[derive(Debug)] + pub struct LeafSpec { + pub stat: tree::Stat, + pub content: LeafContentSpec, + } + + fn leaf_spec() -> impl Strategy { + (stat(), leaf_content_spec()).prop_map(|(stat, content)| LeafSpec { stat, content }) + } + + /// Strategy for a list of uniquely-named leaf specs. + fn named_leaf_specs(max_entries: usize) -> impl Strategy> { + prop::collection::vec((filename(), leaf_spec()), 0..=max_entries).prop_map(|entries| { + let mut seen = std::collections::HashSet::new(); + entries + .into_iter() + .filter(|(name, _)| seen.insert(name.clone())) + .collect() + }) + } + + /// Description of a directory to be built, including potential hardlinks. + #[derive(Debug)] + pub struct DirSpec { + /// Stat metadata for this directory. + pub stat: tree::Stat, + /// Leaf entries in this directory. + pub leaves: Vec<(OsString, LeafSpec)>, + /// Subdirectory entries. + pub subdirs: Vec<(OsString, DirSpec)>, + } + + /// Description of a filesystem to be built, with hardlink info. + #[derive(Debug)] + pub struct FsSpec { + /// Root directory specification. + pub root: DirSpec, + /// Hardlink pairs: which leaf to link and where. + pub hardlinks: Vec, + } + + /// Specification for a hardlink: which leaf to link and where. + #[derive(Debug, Clone)] + pub struct HardlinkSpec { + /// Index into the flat list of all leaves (to pick which one to hardlink). + pub source_index: usize, + /// Name for the hardlink in the root directory. + pub link_name: OsString, + } + + /// Strategy for a subdirectory (no further nesting). + fn subdir_spec() -> impl Strategy { + (filename(), stat(), named_leaf_specs(10)).prop_map(|(name, stat, leaves)| { + ( + name, + DirSpec { + stat, + leaves, + subdirs: vec![], + }, + ) + }) + } + + /// Strategy for unique subdirectories. + fn unique_subdirs(max: usize) -> impl Strategy> { + prop::collection::vec(subdir_spec(), 0..=max).prop_map(|dirs| { + let mut seen = std::collections::HashSet::new(); + dirs.into_iter() + .filter(|(name, _)| seen.insert(name.clone())) + .collect() + }) + } + + /// Strategy for generating a complete `FsSpec`. + /// + /// Generates a root directory with up to 15 file entries and up to 5 + /// subdirectories (each with up to 10 entries, max depth 2). Then + /// optionally generates 0-3 hardlinks that reference existing leaves. + pub fn filesystem_spec() -> impl Strategy { + ( + stat(), + named_leaf_specs(15), + unique_subdirs(5), + // Hardlink candidates: (source index placeholder, link name) + prop::collection::vec((any::(), filename()), 0..=3), + ) + .prop_map( + |(root_stat, mut root_leaves, mut root_subdirs, hl_candidates)| { + // Deduplicate names across files and subdirs + let mut seen: std::collections::HashSet = + std::collections::HashSet::new(); + root_subdirs.retain(|(name, _)| seen.insert(name.clone())); + root_leaves.retain(|(name, _)| seen.insert(name.clone())); + + // Count total leaves for hardlink source index range + let total_leaves: usize = root_leaves.len() + + root_subdirs + .iter() + .map(|(_, d)| d.leaves.len()) + .sum::(); + + let hardlinks = if total_leaves > 0 { + hl_candidates + .into_iter() + .map(|(idx, name)| HardlinkSpec { + source_index: idx % total_leaves, + link_name: name, + }) + .collect() + } else { + vec![] + }; + + FsSpec { + root: DirSpec { + stat: root_stat, + leaves: root_leaves, + subdirs: root_subdirs, + }, + hardlinks, + } + }, + ) + } + + /// Convert a `LeafContentSpec` into a concrete `tree::LeafContent`. + fn build_leaf_content( + spec: LeafContentSpec, + ) -> tree::LeafContent { + match spec { + LeafContentSpec::Inline(data) => { + tree::LeafContent::Regular(RegularFile::Inline(data.into_boxed_slice())) + } + LeafContentSpec::External(hash_bytes, size) => { + let hash_len = mem::size_of::(); + let hex = hex::encode(&hash_bytes[..hash_len]); + let hash = ObjectID::from_hex(&hex).unwrap(); + tree::LeafContent::Regular(RegularFile::External(hash, size)) + } + LeafContentSpec::Symlink(target) => { + tree::LeafContent::Symlink(target.into_boxed_os_str()) + } + LeafContentSpec::BlockDevice(rdev) => tree::LeafContent::BlockDevice(rdev), + LeafContentSpec::CharacterDevice(rdev) => tree::LeafContent::CharacterDevice(rdev), + LeafContentSpec::Fifo => tree::LeafContent::Fifo, + } + } + + /// Build a `tree::FileSystem` from an `FsSpec`, consuming it. + /// + /// Generic over `ObjectID` — the same spec produces correctly-typed + /// external file references for any hash algorithm. + pub fn build_filesystem( + spec: FsSpec, + ) -> tree::FileSystem { + let mut fs = tree::FileSystem::new(spec.root.stat); + + let mut all_leaves: Vec>> = Vec::new(); + let mut used_names: std::collections::HashSet = std::collections::HashSet::new(); + + // Insert root-level leaves + for (name, leaf_spec) in spec.root.leaves { + let leaf = Rc::new(tree::Leaf { + stat: leaf_spec.stat, + content: build_leaf_content(leaf_spec.content), + }); + all_leaves.push(Rc::clone(&leaf)); + used_names.insert(name.clone()); + fs.root.insert(&name, tree::Inode::Leaf(leaf)); + } + + // Insert subdirectories + for (dir_name, dir_spec) in spec.root.subdirs { + let mut subdir = tree::Directory::new(dir_spec.stat); + for (name, leaf_spec) in dir_spec.leaves { + let leaf = Rc::new(tree::Leaf { + stat: leaf_spec.stat, + content: build_leaf_content(leaf_spec.content), + }); + all_leaves.push(Rc::clone(&leaf)); + subdir.insert(&name, tree::Inode::Leaf(leaf)); + } + used_names.insert(dir_name.clone()); + fs.root + .insert(&dir_name, tree::Inode::Directory(Box::new(subdir))); + } + + // Insert hardlinks into the root directory + for hl in &spec.hardlinks { + if !all_leaves.is_empty() { + let idx = hl.source_index % all_leaves.len(); + if used_names.insert(hl.link_name.clone()) { + let leaf = Rc::clone(&all_leaves[idx]); + fs.root.insert(&hl.link_name, tree::Inode::Leaf(leaf)); + } + } + } + // Drop the temporary refs used for hardlink indexing + drop(all_leaves); + + fs + } +} From b6dc460ba12c3bf42a09fbe40b7f2607d721d7b4 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 10 Mar 2026 15:01:00 +0000 Subject: [PATCH 4/4] erofs: Add erofs_to_filesystem to convert EROFS images back to tree::FileSystem The composefs dumpfile was invented to precisely describe exactly what's in a "composefs EROFS" in a human readable textual fashion. However our internal FileSystem is an efficient in-memory metadata tree which we use in various places. Wire these three things more together by supporting going from EROFS -> FileSystem. By adding proptests that cover this we strengthen test coverage of all of these components together. Assisted-by: OpenCode (Claude claude-opus-4-6) Signed-off-by: Colin Walters --- crates/composefs/src/erofs/reader.rs | 469 ++++++++++++++++++++++++++- 1 file changed, 466 insertions(+), 3 deletions(-) diff --git a/crates/composefs/src/erofs/reader.rs b/crates/composefs/src/erofs/reader.rs index 4eff3f97..0bfda609 100644 --- a/crates/composefs/src/erofs/reader.rs +++ b/crates/composefs/src/erofs/reader.rs @@ -5,9 +5,14 @@ //! reference collection for garbage collection. use core::mem::size_of; -use std::collections::{BTreeSet, HashSet}; +use std::cell::RefCell; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::ffi::OsStr; use std::ops::Range; +use std::os::unix::ffi::OsStrExt; +use std::rc::Rc; +use anyhow::Context; use thiserror::Error; use zerocopy::{little_endian::U32, FromBytes, Immutable, KnownLayout}; @@ -15,10 +20,12 @@ use super::{ composefs::OverlayMetacopy, format::{ CompactInodeHeader, ComposefsHeader, DataLayout, DirectoryEntryHeader, ExtendedInodeHeader, - InodeXAttrHeader, ModeField, Superblock, XAttrHeader, + InodeXAttrHeader, ModeField, Superblock, XAttrHeader, S_IFBLK, S_IFCHR, S_IFIFO, S_IFLNK, + S_IFMT, S_IFREG, S_IFSOCK, XATTR_PREFIXES, }, }; use crate::fsverity::FsVerityHashValue; +use crate::tree; /// Rounds up a value to the nearest multiple of `to` pub fn round_up(n: usize, to: usize) -> usize { @@ -675,11 +682,277 @@ pub fn collect_objects(image: &[u8]) -> ReadResult< Ok(this.objects) } +/// Construct the full xattr name from a prefix index and suffix. +fn construct_xattr_name(xattr: &XAttr) -> Vec { + let prefix = XATTR_PREFIXES[xattr.header.name_index as usize]; + let suffix = xattr.suffix(); + let mut full_name = Vec::with_capacity(prefix.len() + suffix.len()); + full_name.extend_from_slice(prefix); + full_name.extend_from_slice(suffix); + full_name +} + +/// Build a `tree::Stat` from an erofs inode, reversing the xattr namespace +/// transformations applied by the writer: +/// - Strips `trusted.overlay.metacopy` and `trusted.overlay.redirect` +/// - Unescapes `trusted.overlay.overlay.X` back to `trusted.overlay.X` +fn stat_from_inode_for_tree(img: &Image, inode: &InodeType) -> tree::Stat { + let (st_mode, st_uid, st_gid, st_mtim_sec) = match inode { + InodeType::Compact(inode) => ( + inode.header.mode.0.get() as u32 & 0o7777, + inode.header.uid.get() as u32, + inode.header.gid.get() as u32, + // Compact inodes don't store mtime; the writer uses build_time + // but for round-trip purposes, 0 matches what was written for + // compact headers (the writer always uses ExtendedInodeHeader) + 0i64, + ), + InodeType::Extended(inode) => ( + inode.header.mode.0.get() as u32 & 0o7777, + inode.header.uid.get(), + inode.header.gid.get(), + inode.header.mtime.get() as i64, + ), + }; + + let mut xattrs = BTreeMap::new(); + + if let Some(xattrs_section) = inode.xattrs() { + // Process shared xattrs + for id in xattrs_section.shared() { + let xattr = img.shared_xattr(id.get()); + if let Some((name, value)) = transform_xattr(xattr) { + xattrs.insert(name, value); + } + } + // Process local xattrs + for xattr in xattrs_section.local() { + if let Some((name, value)) = transform_xattr(xattr) { + xattrs.insert(name, value); + } + } + } + + tree::Stat { + st_mode, + st_uid, + st_gid, + st_mtim_sec, + xattrs: RefCell::new(xattrs), + } +} + +/// Transform a single xattr, reversing writer escaping. +/// Returns None for internal overlay xattrs that should be stripped. +fn transform_xattr(xattr: &XAttr) -> Option<(Box, Box<[u8]>)> { + let full_name = construct_xattr_name(xattr); + + // Skip internal overlay xattrs added by the writer + if full_name == b"trusted.overlay.metacopy" || full_name == b"trusted.overlay.redirect" { + return None; + } + + // Unescape: trusted.overlay.overlay.X -> trusted.overlay.X + let final_name = if let Some(rest) = full_name.strip_prefix(b"trusted.overlay.overlay.") { + let mut unescaped = b"trusted.overlay.".to_vec(); + unescaped.extend_from_slice(rest); + unescaped + } else { + full_name + }; + + let name = Box::from(OsStr::from_bytes(&final_name)); + let value = Box::from(xattr.value()); + Some((name, value)) +} + +/// Extract file data from an inode (inline and block data combined). +fn extract_all_file_data(img: &Image, inode: &InodeType) -> Vec { + let file_size = inode.size() as usize; + if file_size == 0 { + return Vec::new(); + } + + let mut data = Vec::with_capacity(file_size); + + // Read block data first + for blkid in inode.blocks(img.blkszbits) { + let block = img.block(blkid); + data.extend_from_slice(block); + } + + // Read inline data + if let Some(inline) = inode.inline() { + data.extend_from_slice(inline); + } + + data.truncate(file_size); + data +} + +/// Try to extract a metacopy digest from an inode's xattrs. +fn extract_metacopy_digest( + img: &Image, + inode: &InodeType, +) -> Option { + let xattrs_section = inode.xattrs()?; + + for id in xattrs_section.shared() { + let xattr = img.shared_xattr(id.get()); + if let Some(digest) = check_metacopy_xattr(xattr) { + return Some(digest); + } + } + for xattr in xattrs_section.local() { + if let Some(digest) = check_metacopy_xattr(xattr) { + return Some(digest); + } + } + None +} + +/// Check if a single xattr is a valid overlay.metacopy and return the digest. +fn check_metacopy_xattr(xattr: &XAttr) -> Option { + // name_index 4 = "trusted.", suffix = "overlay.metacopy" + if xattr.header.name_index != 4 { + return None; + } + if xattr.suffix() != b"overlay.metacopy" { + return None; + } + if let Ok(value) = OverlayMetacopy::::read_from_bytes(xattr.value()) { + if value.valid() { + return Some(value.digest.clone()); + } + } + None +} + +/// Iterate over directory entries from an inode, yielding (name_bytes, nid) pairs. +/// Skips "." and "..". +fn dir_entries<'a>( + img: &'a Image<'a>, + dir_inode: &'a InodeType<'a>, +) -> impl Iterator { + // Block-based entries + let block_entries = dir_inode.blocks(img.blkszbits).flat_map(move |blkid| { + img.directory_block(blkid) + .entries() + .filter(|e| e.name != b"." && e.name != b"..") + .map(|e| (e.name, e.nid())) + }); + + // Inline entries + let inline_entries = dir_inode + .inline() + .and_then(|data| DirectoryBlock::ref_from_bytes(data).ok()) + .into_iter() + .flat_map(|block| { + block + .entries() + .filter(|e| e.name != b"." && e.name != b"..") + .map(|e| (e.name, e.nid())) + }); + + block_entries.chain(inline_entries) +} + +/// Recursively populate a `tree::Directory` from an erofs directory inode. +fn populate_directory( + img: &Image, + dir_inode: &InodeType, + dir: &mut tree::Directory, + hardlinks: &mut HashMap>>, +) -> anyhow::Result<()> { + for (name_bytes, nid) in dir_entries(img, dir_inode) { + let name = OsStr::from_bytes(name_bytes); + let child_inode = img.inode(nid); + + if child_inode.mode().is_dir() { + let child_stat = stat_from_inode_for_tree(img, &child_inode); + let mut child_dir = tree::Directory::new(child_stat); + populate_directory(img, &child_inode, &mut child_dir, hardlinks) + .with_context(|| format!("reading directory {:?}", name))?; + dir.insert(name, tree::Inode::Directory(Box::new(child_dir))); + } else { + // Check if this is a hardlink (same nid seen before) + if let Some(existing_leaf) = hardlinks.get(&nid) { + dir.insert(name, tree::Inode::Leaf(Rc::clone(existing_leaf))); + continue; + } + + let stat = stat_from_inode_for_tree(img, &child_inode); + let mode = child_inode.mode().0.get(); + let file_type = mode & S_IFMT; + + let content = match file_type { + S_IFREG => { + if let Some(digest) = extract_metacopy_digest::(img, &child_inode) { + tree::LeafContent::Regular(tree::RegularFile::External( + digest, + child_inode.size(), + )) + } else { + let data = extract_all_file_data(img, &child_inode); + tree::LeafContent::Regular(tree::RegularFile::Inline(data.into())) + } + } + S_IFLNK => { + let target_data = child_inode.inline().unwrap_or(&[]); + let target = OsStr::from_bytes(target_data); + tree::LeafContent::Symlink(Box::from(target)) + } + S_IFBLK => tree::LeafContent::BlockDevice(child_inode.u() as u64), + S_IFCHR => tree::LeafContent::CharacterDevice(child_inode.u() as u64), + S_IFIFO => tree::LeafContent::Fifo, + S_IFSOCK => tree::LeafContent::Socket, + _ => anyhow::bail!("unknown file type {:#o} for {:?}", file_type, name), + }; + + let leaf = Rc::new(tree::Leaf { stat, content }); + + // Track for hardlink detection if nlink > 1 + if child_inode.nlink() > 1 { + hardlinks.insert(nid, Rc::clone(&leaf)); + } + + dir.insert(name, tree::Inode::Leaf(leaf)); + } + } + + Ok(()) +} + +/// Converts an EROFS image into a `tree::FileSystem`. +/// +/// This is the inverse of `mkfs_erofs`: it reads an EROFS image and +/// reconstructs the tree structure, including proper handling of hardlinks +/// (via `Rc` sharing), xattr namespace transformations, and metacopy-based +/// external file references. +pub fn erofs_to_filesystem( + image_data: &[u8], +) -> anyhow::Result> { + let img = Image::open(image_data); + let root_inode = img.root(); + + let root_stat = stat_from_inode_for_tree(&img, &root_inode); + let mut fs = tree::FileSystem::new(root_stat); + + let mut hardlinks: HashMap>> = HashMap::new(); + + populate_directory(&img, &root_inode, &mut fs.root, &mut hardlinks) + .context("reading root directory")?; + + Ok(fs) +} + #[cfg(test)] mod tests { use super::*; use crate::{ - dumpfile::dumpfile_to_filesystem, erofs::writer::mkfs_erofs, fsverity::Sha256HashValue, + dumpfile::{dumpfile_to_filesystem, write_dumpfile}, + erofs::writer::mkfs_erofs, + fsverity::Sha256HashValue, }; use std::collections::HashMap; @@ -1070,4 +1343,194 @@ mod tests { let inline_data = file1_inode.inline(); assert_eq!(inline_data, Some(b"hello".as_slice())); } + + /// Helper: round-trip a dumpfile through erofs and compare the result. + fn round_trip_dumpfile(input: &str) -> (String, String) { + let fs_orig = dumpfile_to_filesystem::(input).unwrap(); + + let mut orig_output = Vec::new(); + write_dumpfile(&mut orig_output, &fs_orig).unwrap(); + let orig_str = String::from_utf8(orig_output).unwrap(); + + let image = mkfs_erofs(&fs_orig); + let fs_rt = erofs_to_filesystem::(&image).unwrap(); + + let mut rt_output = Vec::new(); + write_dumpfile(&mut rt_output, &fs_rt).unwrap(); + let rt_str = String::from_utf8(rt_output).unwrap(); + + (orig_str, rt_str) + } + + #[test] + fn test_erofs_to_filesystem_empty_root() { + let dumpfile = "/ 4096 40755 2 0 0 0 1000.0 - - -\n"; + let (orig, rt) = round_trip_dumpfile(dumpfile); + assert_eq!(orig, rt); + } + + #[test] + fn test_erofs_to_filesystem_inline_files() { + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/empty 0 100644 1 0 0 0 1000.0 - - - +/hello 5 100644 1 0 0 0 1000.0 - hello - +/world 6 100644 1 0 0 0 1000.0 - world! - +"#; + let (orig, rt) = round_trip_dumpfile(dumpfile); + assert_eq!(orig, rt); + } + + #[test] + fn test_erofs_to_filesystem_symlinks() { + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/link1 7 120777 1 0 0 0 1000.0 /target - - +/link2 11 120777 1 0 0 0 1000.0 /other/path - - +"#; + let (orig, rt) = round_trip_dumpfile(dumpfile); + assert_eq!(orig, rt); + } + + #[test] + fn test_erofs_to_filesystem_nested_dirs() { + let dumpfile = r#"/ 4096 40755 3 0 0 0 1000.0 - - - +/a 4096 40755 3 0 0 0 1000.0 - - - +/a/b 4096 40755 3 0 0 0 1000.0 - - - +/a/b/c 4096 40755 2 0 0 0 1000.0 - - - +/a/b/c/file.txt 5 100644 1 0 0 0 1000.0 - hello - +/a/b/other 3 100644 1 0 0 0 1000.0 - abc - +"#; + let (orig, rt) = round_trip_dumpfile(dumpfile); + assert_eq!(orig, rt); + } + + #[test] + fn test_erofs_to_filesystem_devices_and_fifos() { + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/blk 0 60660 1 0 0 2049 1000.0 - - - +/chr 0 20666 1 0 0 1025 1000.0 - - - +/fifo 0 10644 1 0 0 0 1000.0 - - - +"#; + let (orig, rt) = round_trip_dumpfile(dumpfile); + assert_eq!(orig, rt); + } + + #[test] + fn test_erofs_to_filesystem_xattrs() { + let dumpfile = + "/ 4096 40755 2 0 0 0 1000.0 - - - security.selinux=system_u:object_r:root_t:s0\n\ + /file 5 100644 1 0 0 0 1000.0 - hello - user.myattr=myvalue\n"; + let (orig, rt) = round_trip_dumpfile(dumpfile); + assert_eq!(orig, rt); + } + + #[test] + fn test_erofs_to_filesystem_escaped_overlay_xattrs() { + // The writer escapes trusted.overlay.X to trusted.overlay.overlay.X. + // Round-tripping must preserve the original xattr name. + let dumpfile = "/ 4096 40755 2 0 0 0 1000.0 - - -\n\ + /file 5 100644 1 0 0 0 1000.0 - hello - trusted.overlay.custom=val\n"; + let (orig, rt) = round_trip_dumpfile(dumpfile); + assert_eq!(orig, rt); + } + + #[test] + fn test_erofs_to_filesystem_external_file() { + // External file with a known fsverity digest + let digest = "a".repeat(64); + let pathname = format!("{}/{}", &digest[..2], &digest[2..]); + let dumpfile = format!( + "/ 4096 40755 2 0 0 0 1000.0 - - -\n\ + /ext 1024 100644 1 0 0 0 1000.0 {pathname} - {digest}\n" + ); + let (orig, rt) = round_trip_dumpfile(&dumpfile); + assert_eq!(orig, rt); + } + + #[test] + fn test_erofs_to_filesystem_hardlinks() { + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/original 11 100644 2 0 0 0 1000.0 - hello_world - +/hardlink 0 @120000 2 0 0 0 0.0 /original - - +"#; + + let fs_orig = dumpfile_to_filesystem::(dumpfile).unwrap(); + let image = mkfs_erofs(&fs_orig); + let fs_rt = erofs_to_filesystem::(&image).unwrap(); + + // Verify hardlink Rc sharing (scope the extra refs so strong_count + // is correct when write_dumpfile checks nlink) + { + let orig_leaf = fs_rt.root.ref_leaf(OsStr::new("original")).unwrap(); + let hardlink_leaf = fs_rt.root.ref_leaf(OsStr::new("hardlink")).unwrap(); + assert!( + Rc::ptr_eq(&orig_leaf, &hardlink_leaf), + "hardlink entries should share the same Rc" + ); + } + + // Verify dumpfile round-trips correctly + let mut orig_output = Vec::new(); + write_dumpfile(&mut orig_output, &fs_orig).unwrap(); + let orig_str = String::from_utf8(orig_output).unwrap(); + + let mut rt_output = Vec::new(); + write_dumpfile(&mut rt_output, &fs_rt).unwrap(); + let rt_str = String::from_utf8(rt_output).unwrap(); + assert_eq!(orig_str, rt_str); + } + + #[test] + fn test_erofs_to_filesystem_mixed_types() { + let dumpfile = r#"/ 4096 40755 3 0 0 0 1000.0 - - - +/blk 0 60660 1 0 6 259 1000.0 - - - +/chr 0 20666 1 0 6 1025 1000.0 - - - +/dir 4096 40755 2 42 42 0 2000.0 - - - +/dir/nested 3 100644 1 42 42 0 2000.0 - abc - +/fifo 0 10644 1 0 0 0 1000.0 - - - +/hello 5 100644 1 1000 1000 0 1500.0 - hello - +/link 7 120777 1 0 0 0 1000.0 /target - - +"#; + let (orig, rt) = round_trip_dumpfile(dumpfile); + assert_eq!(orig, rt); + } + + mod proptest_tests { + use super::*; + use crate::fsverity::Sha512HashValue; + use crate::test::proptest_strategies::{build_filesystem, filesystem_spec}; + use proptest::prelude::*; + + /// Round-trip a FileSystem through erofs with a given ObjectID type + /// and compare dumpfile output before and after. + fn round_trip_filesystem( + fs_orig: &tree::FileSystem, + ) { + let mut orig_output = Vec::new(); + write_dumpfile(&mut orig_output, fs_orig).unwrap(); + + let image = mkfs_erofs(fs_orig); + let fs_rt = erofs_to_filesystem::(&image).unwrap(); + + let mut rt_output = Vec::new(); + write_dumpfile(&mut rt_output, &fs_rt).unwrap(); + + assert_eq!(orig_output, rt_output); + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + #[test] + fn test_erofs_round_trip_sha256(spec in filesystem_spec()) { + let fs = build_filesystem::(spec); + round_trip_filesystem(&fs); + } + + #[test] + fn test_erofs_round_trip_sha512(spec in filesystem_spec()) { + let fs = build_filesystem::(spec); + round_trip_filesystem(&fs); + } + } + } }