diff --git a/Cargo.toml b/Cargo.toml index d36fdf62..33be1ab8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,10 +17,12 @@ unsafe_code = "deny" # https://github.com/containers/composefs-rs/issues/123 [workspace.dependencies] composefs = { version = "0.3.0", path = "crates/composefs", default-features = false } +composefs-types = { version = "0.3.0", path = "crates/composefs-types", default-features = false } cfsctl = { version = "0.3.0", path = "crates/cfsctl", default-features = false } composefs-ioctls = { version = "0.3.0", path = "crates/composefs-ioctls", default-features = false } composefs-oci = { version = "0.3.0", path = "crates/composefs-oci", default-features = false } composefs-boot = { version = "0.3.0", path = "crates/composefs-boot", default-features = false } +composefs-erofs = { version = "0.3.0", path = "crates/composefs-erofs", default-features = false } composefs-http = { version = "0.3.0", path = "crates/composefs-http", default-features = false } [profile.dev.package.sha2] diff --git a/crates/composefs-erofs/Cargo.toml b/crates/composefs-erofs/Cargo.toml new file mode 100644 index 00000000..67712062 --- /dev/null +++ b/crates/composefs-erofs/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "composefs-erofs" +description = "EROFS format support for composefs" +publish = false + +edition.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[dependencies] +anyhow = { version = "1.0.87", default-features = false } +composefs-types = { workspace = true } +hex = { version = "0.4.0", default-features = false, features = ["std"] } +thiserror = { version = "2.0.0", default-features = false } +zerocopy = { version = "0.8.0", default-features = false, features = ["derive", "std"] } + +[lints] +workspace = true diff --git a/crates/composefs-erofs/src/composefs.rs b/crates/composefs-erofs/src/composefs.rs new file mode 100644 index 00000000..cf7bd597 --- /dev/null +++ b/crates/composefs-erofs/src/composefs.rs @@ -0,0 +1,43 @@ +//! Composefs-specific EROFS structures and overlay metadata. +//! +//! This module defines EROFS structures specific to composefs usage, +//! particularly overlay metadata for fs-verity integration. + +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; + +use composefs_types::fsverity::FsVerityHashValue; + +/// Overlay metacopy xattr structure for fs-verity digest storage. +/// +/// From linux/fs/overlayfs/overlayfs.h struct ovl_metacopy +#[derive(Debug, FromBytes, Immutable, KnownLayout, IntoBytes)] +#[repr(C)] +pub struct OverlayMetacopy { + version: u8, + len: u8, + flags: u8, + digest_algo: u8, + /// The fs-verity digest value. + pub digest: H, +} + +impl OverlayMetacopy { + /// Creates a new overlay metacopy entry with the given digest. + pub fn new(digest: &H) -> Self { + Self { + version: 0, + len: size_of::() as u8, + flags: 0, + digest_algo: H::ALGORITHM, + digest: digest.clone(), + } + } + + /// Checks whether this metacopy entry is valid. + pub fn valid(&self) -> bool { + self.version == 0 + && self.len == size_of::() as u8 + && self.flags == 0 + && self.digest_algo == H::ALGORITHM + } +} diff --git a/crates/composefs-erofs/src/debug.rs b/crates/composefs-erofs/src/debug.rs new file mode 100644 index 00000000..c1a687eb --- /dev/null +++ b/crates/composefs-erofs/src/debug.rs @@ -0,0 +1,541 @@ +//! Debug implementations and utilities for EROFS on-disk format structures. +//! +//! This module provides [`fmt::Debug`] implementations for EROFS format and +//! reader types, as well as tools for inspecting and debugging EROFS filesystem +//! images, including detailed structure dumping and space usage analysis. + +use std::{ + cmp::Ordering, + collections::BTreeMap, + ffi::OsStr, + fmt, + mem::discriminant, + os::unix::ffi::OsStrExt, + path::{Path, PathBuf}, +}; + +use anyhow::Result; +use zerocopy::FromBytes; + +use crate::format::{self, CompactInodeHeader, ComposefsHeader, ExtendedInodeHeader, Superblock}; +use crate::reader::{ + DataBlock, DirectoryBlock, Image, Inode, InodeHeader, InodeOps, InodeType, XAttr, +}; + +/// Converts any reference to a thin pointer (as usize) +/// Used for address calculations in various outputs +macro_rules! addr { + ($ref: expr) => { + &raw const (*$ref) as *const u8 as usize + }; +} + +macro_rules! write_with_offset { + ($fmt: expr, $base: expr, $label: expr, $ref: expr) => {{ + let offset = addr!($ref) - addr!($base); + writeln!($fmt, "{offset:+8x} {}: {:?}", $label, $ref) + }}; +} + +macro_rules! write_fields { + ($fmt: expr, $base: expr, $struct: expr, $field: ident) => {{ + let value = &$struct.$field; + let default = if false { value } else { &Default::default() }; + if value != default { + write_with_offset!($fmt, $base, stringify!($field), value)?; + } + }}; + ($fmt: expr, $base: expr, $struct: expr, $head: ident; $($tail: ident);+) => {{ + write_fields!($fmt, $base, $struct, $head); + write_fields!($fmt, $base, $struct, $($tail);+); + }}; +} + +impl fmt::Debug for CompactInodeHeader { + // Injective (ie: accounts for every byte in the input) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "CompactInodeHeader")?; + write_fields!(f, self, self, + format; xattr_icount; mode; reserved; size; u; ino; uid; gid; nlink; reserved2); + Ok(()) + } +} + +impl fmt::Debug for ExtendedInodeHeader { + // Injective (ie: accounts for every byte in the input) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "ExtendedInodeHeader")?; + write_fields!(f, self, self, + format; xattr_icount; mode; reserved; size; u; ino; uid; + gid; mtime; mtime_nsec; nlink; reserved2); + Ok(()) + } +} + +impl fmt::Debug for ComposefsHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "ComposefsHeader")?; + write_fields!(f, self, self, + magic; flags; version; composefs_version; unused + ); + Ok(()) + } +} + +impl fmt::Debug for Superblock { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "Superblock")?; + write_fields!(f, self, self, + magic; checksum; feature_compat; blkszbits; extslots; root_nid; inos; build_time; + build_time_nsec; blocks; meta_blkaddr; xattr_blkaddr; uuid; volume_name; + feature_incompat; available_compr_algs; extra_devices; devt_slotoff; dirblkbits; + xattr_prefix_count; xattr_prefix_start; packed_nid; xattr_filter_reserved; reserved2 + ); + Ok(()) + } +} + +fn utf8_or_hex(data: &[u8]) -> String { + if let Ok(string) = std::str::from_utf8(data) { + format!("{string:?}") + } else { + hex::encode(data) + } +} + +fn hexdump(f: &mut impl fmt::Write, data: &[u8], rel: usize) -> fmt::Result { + let start = match rel { + 0 => 0, + ptr => data.as_ptr() as usize - ptr, + }; + let end = start + data.len(); + let start_row = start / 16; + let end_row = end.div_ceil(16); + + for row in start_row..end_row { + let row_start = row * 16; + let row_end = row * 16 + 16; + write!(f, "{row_start:+8x} ")?; + + for idx in row_start..row_end { + if start <= idx && idx < end { + write!(f, "{:02x} ", data[idx - start])?; + } else { + write!(f, " ")?; + } + if idx % 8 == 7 { + write!(f, " ")?; + } + } + write!(f, "|")?; + + for idx in row_start..row_end { + if start <= idx && idx < end { + let c = data[idx - start]; + if c.is_ascii() && !c.is_ascii_control() { + write!(f, "{}", c as char)?; + } else { + write!(f, ".")?; + } + } else { + write!(f, " ")?; + } + } + writeln!(f, "|")?; + } + + Ok(()) +} + +impl fmt::Debug for XAttr { + // Injective (ie: accounts for every byte in the input) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "({} {} {}) {}{} = {}", + self.header.name_index, + self.header.name_len, + self.header.value_size, + std::str::from_utf8(format::XATTR_PREFIXES[self.header.name_index as usize]).unwrap(), + utf8_or_hex(self.suffix()), + utf8_or_hex(self.value()), + )?; + if self.padding().iter().any(|c| *c != 0) { + write!(f, " {:?}", self.padding())?; + } + Ok(()) + } +} + +impl fmt::Debug for Inode { + // Injective (ie: accounts for every byte in the input) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&self.header, f)?; + + if let Some(xattrs) = self.xattrs() { + write_fields!(f, self, xattrs.header, name_filter; shared_count; reserved); + + if !xattrs.shared().is_empty() { + write_with_offset!(f, self, "shared xattrs", xattrs.shared())?; + } + + for xattr in xattrs.local() { + write_with_offset!(f, self, "xattr", xattr)?; + } + } + + // We want to print one of four things for inline data: + // - no data: print nothing + // - directory data: dump the entries + // - small inline text string: print it + // - otherwise, hexdump + let Some(inline) = self.inline() else { + // No inline data + return Ok(()); + }; + + // Directory dump + if self.header.mode().is_dir() { + let dir = DirectoryBlock::ref_from_bytes(inline).unwrap(); + let offset = addr!(dir) - addr!(self); + return write!( + f, + " +{offset:02x} --- inline directory entries ---{dir:#?}" + ); + } + + // Small string (<= 128 bytes, utf8, no control characters). + if inline.len() <= 128 && !inline.iter().any(|c| c.is_ascii_control()) { + if let Ok(string) = std::str::from_utf8(inline) { + return write_with_offset!(f, self, "inline", string); + } + } + + // Else, hexdump data block + hexdump(f, inline, &raw const self.header as usize) + } +} + +impl fmt::Debug for DirectoryBlock { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for entry in self.entries() { + writeln!(f)?; + write_fields!(f, self, entry.header, inode_offset; name_offset; file_type; reserved); + writeln!( + f, + "{:+8x} # name: {}", + entry.header.name_offset.get(), + utf8_or_hex(entry.name) + )?; + } + // TODO: trailing junk inside of st_size + // TODO: padding up to block or inode boundary + Ok(()) + } +} + +impl fmt::Debug for DataBlock { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + hexdump(f, &self.0, 0) + } +} + +// This is basically just a fancy fat pointer type +#[allow(missing_debug_implementations)] +enum SegmentType<'img> { + Header(&'img ComposefsHeader), + Superblock(&'img Superblock), + CompactInode(&'img Inode), + ExtendedInode(&'img Inode), + XAttr(&'img XAttr), + DataBlock(&'img DataBlock), + DirectoryBlock(&'img DirectoryBlock), +} + +// TODO: Something for `enum_dispatch` would be good here, but I couldn't get it working... +impl SegmentType<'_> { + fn addr(&self) -> usize { + match self { + SegmentType::Header(h) => addr!(*h), + SegmentType::Superblock(sb) => addr!(*sb), + SegmentType::CompactInode(i) => addr!(*i), + SegmentType::ExtendedInode(i) => addr!(*i), + SegmentType::XAttr(x) => addr!(*x), + SegmentType::DataBlock(b) => addr!(*b), + SegmentType::DirectoryBlock(b) => addr!(*b), + } + } + + fn size(&self) -> usize { + match self { + SegmentType::Header(h) => size_of_val(*h), + SegmentType::Superblock(sb) => size_of_val(*sb), + SegmentType::CompactInode(i) => size_of_val(*i), + SegmentType::ExtendedInode(i) => size_of_val(*i), + SegmentType::XAttr(x) => size_of_val(*x), + SegmentType::DataBlock(b) => size_of_val(*b), + SegmentType::DirectoryBlock(b) => size_of_val(*b), + } + } + + fn typename(&self) -> &'static str { + match self { + SegmentType::Header(..) => "header", + SegmentType::Superblock(..) => "superblock", + SegmentType::CompactInode(..) => "compact inode", + SegmentType::ExtendedInode(..) => "extended inode", + SegmentType::XAttr(..) => "shared xattr", + SegmentType::DataBlock(..) => "data block", + SegmentType::DirectoryBlock(..) => "directory block", + } + } +} + +#[allow(missing_debug_implementations)] +struct ImageVisitor<'img> { + image: &'img Image<'img>, + visited: BTreeMap, Vec>)>, +} + +impl<'img> ImageVisitor<'img> { + fn note(&mut self, segment: SegmentType<'img>, path: Option<&Path>) -> bool { + let offset = segment.addr() - self.image.image.as_ptr() as usize; + match self.visited.entry(offset) { + std::collections::btree_map::Entry::Occupied(mut e) => { + let (existing, paths) = e.get_mut(); + // TODO: figure out pointer value equality... + assert_eq!(discriminant(existing), discriminant(&segment)); + assert_eq!(existing.addr(), segment.addr()); + assert_eq!(existing.size(), segment.size()); + if let Some(path) = path { + paths.push(Box::from(path)); + } + true + } + std::collections::btree_map::Entry::Vacant(e) => { + let mut paths = vec![]; + if let Some(path) = path { + paths.push(Box::from(path)); + } + e.insert((segment, paths)); + false + } + } + } + + fn visit_directory_block(&mut self, block: &DirectoryBlock, path: &Path) { + for entry in block.entries() { + if entry.name == b"." || entry.name == b".." { + // TODO: maybe we want to follow those and let deduplication happen + continue; + } + self.visit_inode( + entry.header.inode_offset.get(), + &path.join(OsStr::from_bytes(entry.name)), + ); + } + } + + fn visit_inode(&mut self, id: u64, path: &Path) { + let inode = self.image.inode(id); + let segment = match inode { + InodeType::Compact(inode) => SegmentType::CompactInode(inode), + InodeType::Extended(inode) => SegmentType::ExtendedInode(inode), + }; + if self.note(segment, Some(path)) { + // TODO: maybe we want to throw an error if we detect loops + /* already processed */ + return; + } + + if let Some(xattrs) = inode.xattrs() { + for id in xattrs.shared() { + self.note( + SegmentType::XAttr(self.image.shared_xattr(id.get())), + Some(path), + ); + } + } + + if inode.mode().is_dir() { + if let Some(inline) = inode.inline() { + let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); + self.visit_directory_block(inline_block, path); + } + + for id in inode.blocks(self.image.blkszbits) { + let block = self.image.directory_block(id); + self.visit_directory_block(block, path); + self.note(SegmentType::DirectoryBlock(block), Some(path)); + } + } else { + for id in inode.blocks(self.image.blkszbits) { + let block = self.image.data_block(id); + self.note(SegmentType::DataBlock(block), Some(path)); + } + } + } + + fn visit_image( + image: &'img Image<'img>, + ) -> BTreeMap, Vec>)> { + let mut this = Self { + image, + visited: BTreeMap::new(), + }; + this.note(SegmentType::Header(image.header), None); + this.note(SegmentType::Superblock(image.sb), None); + this.visit_inode(image.sb.root_nid.get() as u64, &PathBuf::from("/")); + this.visited + } +} + +fn addto(map: &mut BTreeMap, key: &T, count: usize) { + if let Some(value) = map.get_mut(key) { + *value += count; + } else { + map.insert(key.clone(), count); + } +} + +/// Dumps unassigned or padding regions in the image. +/// +/// Distinguishes between zero-filled padding and unknown content. +pub fn dump_unassigned( + output: &mut impl std::io::Write, + offset: usize, + unassigned: &[u8], +) -> Result<()> { + if unassigned.iter().all(|c| *c == 0) { + writeln!(output, "{offset:08x} Padding")?; + writeln!( + output, + "{:+8x} # {} nul bytes", + unassigned.len(), + unassigned.len() + )?; + writeln!(output)?; + } else { + writeln!(output, "{offset:08x} Unknown content")?; + let mut dump = String::new(); + hexdump(&mut dump, unassigned, 0)?; + writeln!(output, "{dump}")?; + } + Ok(()) +} + +/// Dumps a detailed debug view of an EROFS image. +/// +/// Walks the entire image structure, outputting formatted information about +/// all inodes, blocks, xattrs, and padding. Also produces space usage statistics. +pub fn debug_img(output: &mut impl std::io::Write, data: &[u8]) -> Result<()> { + let image = Image::open(data); + let visited = ImageVisitor::visit_image(&image); + + let inode_start = (image.sb.meta_blkaddr.get() as usize) << image.sb.blkszbits; + let xattr_start = (image.sb.xattr_blkaddr.get() as usize) << image.sb.blkszbits; + + let mut space_stats = BTreeMap::new(); + let mut padding_stats = BTreeMap::new(); + + let mut last_segment_type = ""; + let mut offset = 0; + for (start, (segment, paths)) in visited { + let segment_type = segment.typename(); + addto(&mut space_stats, &segment_type, segment.size()); + + match offset.cmp(&start) { + Ordering::Less => { + dump_unassigned(output, offset, &data[offset..start])?; + addto( + &mut padding_stats, + &(last_segment_type, segment_type), + start - offset, + ); + offset = start; + } + Ordering::Greater => { + writeln!(output, "*** Overlapping segments!")?; + writeln!(output)?; + offset = start; + } + _ => {} + } + + last_segment_type = segment_type; + + for path in paths { + writeln!( + output, + "# Filename {}", + utf8_or_hex(path.as_os_str().as_bytes()) + )?; + } + + match segment { + SegmentType::Header(header) => { + writeln!(output, "{offset:08x} {header:?}")?; + } + SegmentType::Superblock(sb) => { + writeln!(output, "{offset:08x} {sb:?}")?; + } + SegmentType::CompactInode(inode) => { + writeln!(output, "# nid #{}", (offset - inode_start) / 32)?; + writeln!(output, "{offset:08x} {inode:#?}")?; + } + SegmentType::ExtendedInode(inode) => { + writeln!(output, "# nid #{}", (offset - inode_start) / 32)?; + writeln!(output, "{offset:08x} {inode:#?}")?; + } + SegmentType::XAttr(xattr) => { + writeln!(output, "# xattr #{}", (offset - xattr_start) / 4)?; + writeln!(output, "{offset:08x} {xattr:?}")?; + } + SegmentType::DirectoryBlock(block) => { + writeln!(output, "# block #{}", offset / image.block_size)?; + writeln!(output, "{offset:08x} Directory block{block:?}")?; + } + SegmentType::DataBlock(block) => { + writeln!(output, "# block #{}", offset / image.block_size)?; + writeln!(output, "{offset:08x} Data block\n{block:?}")?; + } + } + + offset += segment.size(); + } + + if offset < data.len() { + let unassigned = &data[offset..]; + dump_unassigned(output, offset, unassigned)?; + addto( + &mut padding_stats, + &(last_segment_type, "eof"), + unassigned.len(), + ); + offset = data.len(); + writeln!(output)?; + } + + if offset > data.len() { + writeln!(output, "*** Segments past EOF!")?; + offset = data.len(); + } + + writeln!(output, "Space statistics (total size {offset}B):")?; + for (key, value) in space_stats { + writeln!( + output, + " {key} = {value}B, {:.2}%", + (100. * value as f64) / (offset as f64) + )?; + } + for ((from, to), value) in padding_stats { + writeln!( + output, + " padding {from} -> {to} = {value}B, {:.2}%", + (100. * value as f64) / (offset as f64) + )?; + } + + Ok(()) +} diff --git a/crates/composefs-erofs/src/format.rs b/crates/composefs-erofs/src/format.rs new file mode 100644 index 00000000..44db065c --- /dev/null +++ b/crates/composefs-erofs/src/format.rs @@ -0,0 +1,490 @@ +//! EROFS on-disk format definitions and data structures. +//! +//! This module defines the binary layout of EROFS filesystem structures +//! including superblocks, inodes, directory entries, and other metadata +//! using safe zerocopy-based parsing. + +// This is currently implemented using zerocopy but the eventual plan is to do this with safe +// transmutation. As such: all of the structures are defined in terms of pure LE integer sizes, we +// handle the conversion to enum values separately, and we avoid the TryFromBytes trait. + +use std::fmt; + +use zerocopy::{ + little_endian::{U16, U32, U64}, + FromBytes, Immutable, IntoBytes, KnownLayout, +}; + +/// Number of bits used for block size (12 = 4096 bytes) +pub const BLOCK_BITS: u8 = 12; +/// Size of a block in bytes (4096) +pub const BLOCK_SIZE: u16 = 1 << BLOCK_BITS; + +/// Errors that can occur when parsing EROFS format structures +#[derive(Debug)] +pub enum FormatError { + /// The data layout field contains an invalid value + InvalidDataLayout, +} + +/* Special handling for enums: FormatField and FileTypeField */ +// FormatField == InodeLayout | DataLayout +/// Combined field encoding both inode layout and data layout in a single u16 value +#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout, PartialEq)] +pub struct FormatField(U16); + +impl Default for FormatField { + fn default() -> Self { + FormatField(0xffff.into()) + } +} + +impl fmt::Debug for FormatField { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{} = {:?} | {:?}", + self.0.get(), + InodeLayout::from(*self), + DataLayout::try_from(*self) + ) + } +} + +const INODE_LAYOUT_MASK: u16 = 0b00000001; +const INODE_LAYOUT_COMPACT: u16 = 0; +const INODE_LAYOUT_EXTENDED: u16 = 1; + +/// Inode layout format, determining the inode header size +#[derive(Debug)] +#[repr(u16)] +pub enum InodeLayout { + /// Compact 32-byte inode header + Compact = INODE_LAYOUT_COMPACT, + /// Extended 64-byte inode header with additional fields + Extended = INODE_LAYOUT_EXTENDED, +} + +impl From for InodeLayout { + fn from(value: FormatField) -> Self { + match value.0.get() & INODE_LAYOUT_MASK { + INODE_LAYOUT_COMPACT => InodeLayout::Compact, + INODE_LAYOUT_EXTENDED => InodeLayout::Extended, + _ => unreachable!(), + } + } +} + +const INODE_DATALAYOUT_MASK: u16 = 0b00001110; +const INODE_DATALAYOUT_FLAT_PLAIN: u16 = 0; +const INODE_DATALAYOUT_FLAT_INLINE: u16 = 4; +const INODE_DATALAYOUT_CHUNK_BASED: u16 = 8; + +/// Data layout method for file content storage +#[derive(Debug)] +#[repr(u16)] +pub enum DataLayout { + /// File data stored in separate blocks + FlatPlain = 0, + /// File data stored inline within the inode + FlatInline = 4, + /// File data stored using chunk-based addressing + ChunkBased = 8, +} + +impl TryFrom for DataLayout { + type Error = FormatError; + + fn try_from(value: FormatField) -> Result { + match value.0.get() & INODE_DATALAYOUT_MASK { + INODE_DATALAYOUT_FLAT_PLAIN => Ok(DataLayout::FlatPlain), + INODE_DATALAYOUT_FLAT_INLINE => Ok(DataLayout::FlatInline), + INODE_DATALAYOUT_CHUNK_BASED => Ok(DataLayout::ChunkBased), + // This is non-injective, but only occurs in error cases. + _ => Err(FormatError::InvalidDataLayout), + } + } +} + +impl std::ops::BitOr for InodeLayout { + type Output = FormatField; + + // Convert InodeLayout | DataLayout into a format field + fn bitor(self, datalayout: DataLayout) -> FormatField { + FormatField((self as u16 | datalayout as u16).into()) + } +} + +/// File type mask for st_mode +pub const S_IFMT: u16 = 0o170000; +/// Regular file mode bit +pub const S_IFREG: u16 = 0o100000; +/// Character device mode bit +pub const S_IFCHR: u16 = 0o020000; +/// Directory mode bit +pub const S_IFDIR: u16 = 0o040000; +/// Block device mode bit +pub const S_IFBLK: u16 = 0o060000; +/// FIFO mode bit +pub const S_IFIFO: u16 = 0o010000; +/// Symbolic link mode bit +pub const S_IFLNK: u16 = 0o120000; +/// Socket mode bit +pub const S_IFSOCK: u16 = 0o140000; + +// FileTypeField == FileType +/// Unknown file type value +pub const FILE_TYPE_UNKNOWN: u8 = 0; +/// Regular file type value +pub const FILE_TYPE_REGULAR_FILE: u8 = 1; +/// Directory file type value +pub const FILE_TYPE_DIRECTORY: u8 = 2; +/// Character device file type value +pub const FILE_TYPE_CHARACTER_DEVICE: u8 = 3; +/// Block device file type value +pub const FILE_TYPE_BLOCK_DEVICE: u8 = 4; +/// FIFO file type value +pub const FILE_TYPE_FIFO: u8 = 5; +/// Socket file type value +pub const FILE_TYPE_SOCKET: u8 = 6; +/// Symbolic link file type value +pub const FILE_TYPE_SYMLINK: u8 = 7; + +/// File type enumeration for directory entries +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +pub enum FileType { + /// Unknown or invalid file type + Unknown = FILE_TYPE_UNKNOWN, + /// Regular file + RegularFile = FILE_TYPE_REGULAR_FILE, + /// Directory + Directory = FILE_TYPE_DIRECTORY, + /// Character device + CharacterDevice = FILE_TYPE_CHARACTER_DEVICE, + /// Block device + BlockDevice = FILE_TYPE_BLOCK_DEVICE, + /// FIFO (named pipe) + Fifo = FILE_TYPE_FIFO, + /// Socket + Socket = FILE_TYPE_SOCKET, + /// Symbolic link + Symlink = FILE_TYPE_SYMLINK, +} + +impl From for FileType { + fn from(value: FileTypeField) -> Self { + match value.0 { + FILE_TYPE_REGULAR_FILE => Self::RegularFile, + FILE_TYPE_DIRECTORY => Self::Directory, + FILE_TYPE_CHARACTER_DEVICE => Self::CharacterDevice, + FILE_TYPE_BLOCK_DEVICE => Self::BlockDevice, + FILE_TYPE_FIFO => Self::Fifo, + FILE_TYPE_SOCKET => Self::Socket, + FILE_TYPE_SYMLINK => Self::Symlink, + // This is non-injective, but only occurs in error cases. + _ => Self::Unknown, + } + } +} + +impl From for FileTypeField { + fn from(value: FileType) -> Self { + FileTypeField(value as u8) + } +} + +impl std::ops::BitOr for FileType { + type Output = U16; + + // Convert ifmt | permissions into a st_mode field + fn bitor(self, permissions: u16) -> U16 { + (match self { + Self::RegularFile => S_IFREG, + Self::CharacterDevice => S_IFCHR, + Self::Directory => S_IFDIR, + Self::BlockDevice => S_IFBLK, + Self::Fifo => S_IFIFO, + Self::Symlink => S_IFLNK, + Self::Socket => S_IFSOCK, + Self::Unknown => unreachable!(), + } | permissions) + .into() + } +} + +/// Raw file type field as stored in directory entries +#[derive(Copy, Clone, FromBytes, Immutable, IntoBytes, KnownLayout, PartialEq)] +pub struct FileTypeField(u8); + +impl fmt::Debug for FileTypeField { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&FileType::from(*self), f) + } +} + +impl Default for FileTypeField { + fn default() -> Self { + FileTypeField(0xff) + } +} + +/* ModeField */ +/// File mode field combining file type and permissions +#[derive(Clone, Copy, Default, FromBytes, Immutable, IntoBytes, KnownLayout, PartialEq)] +pub struct ModeField(pub U16); + +impl ModeField { + /// Checks if this mode field represents a directory + pub fn is_dir(self) -> bool { + self.0.get() & S_IFMT == S_IFDIR + } +} + +impl fmt::Debug for ModeField { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mode = self.0.get(); + let fmt = match mode & S_IFMT { + S_IFREG => "regular file", + S_IFCHR => "chardev", + S_IFDIR => "directory", + S_IFBLK => "blockdev", + S_IFIFO => "fifo", + S_IFLNK => "symlink", + S_IFSOCK => "socket", + _ => "INVALID", + }; + + write!(f, "0{mode:06o} ({fmt})") + } +} + +impl std::ops::BitOr for FileType { + type Output = ModeField; + + fn bitor(self, permissions: u32) -> ModeField { + ModeField(self | (permissions as u16)) + } +} + +/* composefs Header */ + +/// EROFS format version number +pub const VERSION: U32 = U32::new(1); +/// Composefs-specific version number +pub const COMPOSEFS_VERSION: U32 = U32::new(2); +/// Magic number identifying composefs images +pub const COMPOSEFS_MAGIC: U32 = U32::new(0xd078629a); + +/// Flag indicating the presence of ACL data +pub const COMPOSEFS_FLAGS_HAS_ACL: U32 = U32::new(1 << 0); + +/// Composefs-specific header preceding the standard EROFS superblock +#[derive(Default, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +pub struct ComposefsHeader { + /// Magic number for identification + pub magic: U32, + /// EROFS format version + pub version: U32, + /// Composefs feature flags + pub flags: U32, + /// Composefs format version + pub composefs_version: U32, + /// Reserved for future use + pub unused: [U32; 4], +} + +/* Superblock */ + +/// EROFS version 1 magic number +pub const MAGIC_V1: U32 = U32::new(0xE0F5E1E2); +/// Feature flag for mtime support +pub const FEATURE_COMPAT_MTIME: U32 = U32::new(2); +/// Feature flag for xattr filtering support +pub const FEATURE_COMPAT_XATTR_FILTER: U32 = U32::new(4); + +/// EROFS filesystem superblock structure +#[derive(Default, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +pub struct Superblock { + // vertical whitespace every 16 bytes (hexdump-friendly) + /// EROFS magic number + pub magic: U32, + /// Filesystem checksum + pub checksum: U32, + /// Compatible feature flags + pub feature_compat: U32, + /// Block size in bits (log2 of block size) + pub blkszbits: u8, + /// Number of extended attribute slots + pub extslots: u8, + /// Root inode number + pub root_nid: U16, + + /// Total number of inodes + pub inos: U64, + /// Build time in seconds since epoch + pub build_time: U64, + + /// Build time nanoseconds component + pub build_time_nsec: U32, + /// Total number of blocks + pub blocks: U32, + /// Starting block address of metadata + pub meta_blkaddr: U32, + /// Starting block address of extended attributes + pub xattr_blkaddr: U32, + + /// Filesystem UUID + pub uuid: [u8; 16], + + /// Volume name + pub volume_name: [u8; 16], + + /// Incompatible feature flags + pub feature_incompat: U32, + /// Available compression algorithms bitmap + pub available_compr_algs: U16, + /// Number of extra devices + pub extra_devices: U16, + /// Device slot offset + pub devt_slotoff: U16, + /// Directory block size in bits + pub dirblkbits: u8, + /// Number of xattr prefixes + pub xattr_prefix_count: u8, + /// Starting position of xattr prefix table + pub xattr_prefix_start: U32, + + /// Packed inode number + pub packed_nid: U64, + /// Reserved for xattr filtering + pub xattr_filter_reserved: u8, + /// Reserved for future use + pub reserved2: [u8; 23], +} + +/* Inodes */ + +/// Compact 32-byte inode header for basic file metadata +#[derive(Default, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +pub struct CompactInodeHeader { + /// Format field combining inode layout and data layout + pub format: FormatField, + /// Extended attribute inode count + pub xattr_icount: U16, + /// File mode (type and permissions) + pub mode: ModeField, + /// Number of hard links + pub nlink: U16, + + /// File size in bytes + pub size: U32, + /// Reserved field + pub reserved: U32, + + /// Union field (block address, device number, etc.) + pub u: U32, + /// Inode number for 32-bit stat compatibility + pub ino: U32, // only used for 32-bit stat compatibility + + /// User ID + pub uid: U16, + /// Group ID + pub gid: U16, + /// Reserved field + pub reserved2: [u8; 4], +} + +/// Extended 64-byte inode header with additional metadata fields +#[derive(Default, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +pub struct ExtendedInodeHeader { + /// Format field combining inode layout and data layout + pub format: FormatField, + /// Extended attribute inode count + pub xattr_icount: U16, + /// File mode (type and permissions) + pub mode: ModeField, + /// Reserved field + pub reserved: U16, + /// File size in bytes + pub size: U64, + + /// Union field (block address, device number, etc.) + pub u: U32, + /// Inode number for 32-bit stat compatibility + pub ino: U32, // only used for 32-bit stat compatibility + /// User ID + pub uid: U32, + /// Group ID + pub gid: U32, + + /// Modification time in seconds since epoch + pub mtime: U64, + + /// Modification time nanoseconds component + pub mtime_nsec: U32, + /// Number of hard links + pub nlink: U32, + + /// Reserved field + pub reserved2: [u8; 16], +} + +/// Header for inode extended attributes section +#[derive(Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +pub struct InodeXAttrHeader { + /// Name filter hash for quick xattr lookups + pub name_filter: U32, + /// Number of shared xattr references + pub shared_count: u8, + /// Reserved field + pub reserved: [u8; 7], +} + +/* Extended attributes */ +/// Seed value for xattr name filter hash calculation +pub const XATTR_FILTER_SEED: u32 = 0x25BBE08F; + +/// Header for an extended attribute entry +#[derive(Debug, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +pub struct XAttrHeader { + /// Length of the attribute name suffix + pub name_len: u8, + /// Index into the xattr prefix table + pub name_index: u8, + /// Size of the attribute value + pub value_size: U16, +} + +/// Standard xattr name prefixes indexed by name_index +pub const XATTR_PREFIXES: [&[u8]; 7] = [ + b"", + b"user.", + b"system.posix_acl_access", + b"system.posix_acl_default", + b"trusted.", + b"lustre.", + b"security.", +]; + +/* Directories */ + +/// Header for a directory entry +#[derive(Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C)] +pub struct DirectoryEntryHeader { + /// Inode number of the entry + pub inode_offset: U64, + /// Offset to the entry name within the directory block + pub name_offset: U16, + /// File type of the entry + pub file_type: FileTypeField, + /// Reserved field + pub reserved: u8, +} diff --git a/crates/composefs-erofs/src/lib.rs b/crates/composefs-erofs/src/lib.rs new file mode 100644 index 00000000..3debc58e --- /dev/null +++ b/crates/composefs-erofs/src/lib.rs @@ -0,0 +1,14 @@ +//! EROFS (Enhanced Read-Only File System) format support for composefs. +//! +//! This crate provides functionality to read and write EROFS filesystem images, +//! which are used as the underlying storage format for composefs images. + +#![forbid(unsafe_code)] +// Several on-disk format structs intentionally omit Debug derives on the struct +// definition; their Debug impls live in the debug module instead. +#![allow(missing_debug_implementations)] + +pub mod composefs; +pub mod debug; +pub mod format; +pub mod reader; diff --git a/crates/composefs-erofs/src/reader.rs b/crates/composefs-erofs/src/reader.rs new file mode 100644 index 00000000..20381303 --- /dev/null +++ b/crates/composefs-erofs/src/reader.rs @@ -0,0 +1,656 @@ +//! EROFS image reading and parsing functionality. +//! +//! This module provides safe parsing and navigation of EROFS filesystem +//! images, including inode traversal, directory reading, and object +//! reference collection for garbage collection. + +use core::mem::size_of; +use std::collections::{BTreeSet, HashSet}; +use std::ops::Range; + +use thiserror::Error; +use zerocopy::{little_endian::U32, FromBytes, Immutable, KnownLayout}; + +use crate::{ + composefs::OverlayMetacopy, + format::{ + CompactInodeHeader, ComposefsHeader, DataLayout, DirectoryEntryHeader, ExtendedInodeHeader, + InodeXAttrHeader, ModeField, Superblock, XAttrHeader, + }, +}; +use composefs_types::fsverity::FsVerityHashValue; + +/// Rounds up a value to the nearest multiple of `to` +pub fn round_up(n: usize, to: usize) -> usize { + (n + to - 1) & !(to - 1) +} + +/// Common interface for accessing inode header fields across different layouts +pub trait InodeHeader { + /// Returns the data layout method used by this inode + fn data_layout(&self) -> DataLayout; + /// Returns the extended attribute inode count + fn xattr_icount(&self) -> u16; + /// Returns the file mode + fn mode(&self) -> ModeField; + /// Returns the file size in bytes + fn size(&self) -> u64; + /// Returns the union field value (block address, device number, etc.) + fn u(&self) -> u32; + + /// Calculates the number of additional bytes after the header + fn additional_bytes(&self, blkszbits: u8) -> usize { + let block_size = 1 << blkszbits; + self.xattr_size() + + match self.data_layout() { + DataLayout::FlatPlain => 0, + DataLayout::FlatInline => self.size() as usize % block_size, + DataLayout::ChunkBased => 4, + } + } + + /// Calculates the size of the extended attributes section + fn xattr_size(&self) -> usize { + match self.xattr_icount() { + 0 => 0, + n => (n as usize - 1) * 4 + 12, + } + } +} + +impl InodeHeader for ExtendedInodeHeader { + fn data_layout(&self) -> DataLayout { + self.format.try_into().unwrap() + } + + fn xattr_icount(&self) -> u16 { + self.xattr_icount.get() + } + + fn mode(&self) -> ModeField { + self.mode + } + + fn size(&self) -> u64 { + self.size.get() + } + + fn u(&self) -> u32 { + self.u.get() + } +} + +impl InodeHeader for CompactInodeHeader { + fn data_layout(&self) -> DataLayout { + self.format.try_into().unwrap() + } + + fn xattr_icount(&self) -> u16 { + self.xattr_icount.get() + } + + fn mode(&self) -> ModeField { + self.mode + } + + fn size(&self) -> u64 { + self.size.get() as u64 + } + + fn u(&self) -> u32 { + self.u.get() + } +} + +/// Extended attribute entry with header and variable-length data +#[repr(C)] +#[derive(FromBytes, Immutable, KnownLayout)] +pub struct XAttr { + /// Extended attribute header + pub header: XAttrHeader, + /// Variable-length data containing name suffix and value + pub data: [u8], +} + +/// Inode structure with header and variable-length data +#[repr(C)] +#[derive(FromBytes, Immutable, KnownLayout)] +pub struct Inode { + /// Inode header (compact or extended) + pub header: Header, + /// Variable-length data containing xattrs and inline content + pub data: [u8], +} + +/// Extended attributes section of an inode +#[repr(C)] +#[derive(Debug, FromBytes, Immutable, KnownLayout)] +pub struct InodeXAttrs { + /// Extended attributes header + pub header: InodeXAttrHeader, + /// Variable-length data containing shared xattr refs and local xattrs + pub data: [u8], +} + +impl XAttrHeader { + /// Calculates the total number of elements for an xattr entry including padding + pub fn calculate_n_elems(&self) -> usize { + round_up(self.name_len as usize + self.value_size.get() as usize, 4) + } +} + +impl XAttr { + /// Parses an xattr from a byte slice, returning the xattr and remaining bytes + pub fn from_prefix(data: &[u8]) -> (&XAttr, &[u8]) { + let header = XAttrHeader::ref_from_bytes(&data[..4]).unwrap(); + Self::ref_from_prefix_with_elems(data, header.calculate_n_elems()).unwrap() + } + + /// Returns the attribute name suffix + pub fn suffix(&self) -> &[u8] { + &self.data[..self.header.name_len as usize] + } + + /// Returns the attribute value + pub fn value(&self) -> &[u8] { + &self.data[self.header.name_len as usize..][..self.header.value_size.get() as usize] + } + + /// Returns the padding bytes after the value + pub fn padding(&self) -> &[u8] { + &self.data[self.header.name_len as usize + self.header.value_size.get() as usize..] + } +} + +/// Operations on inode data +pub trait InodeOps { + /// Returns the extended attributes section if present + fn xattrs(&self) -> Option<&InodeXAttrs>; + /// Returns the inline data portion + fn inline(&self) -> Option<&[u8]>; + /// Returns the range of block IDs used by this inode + fn blocks(&self, blkszbits: u8) -> Range; +} + +impl InodeHeader for &Inode
{ + fn data_layout(&self) -> DataLayout { + self.header.data_layout() + } + + fn xattr_icount(&self) -> u16 { + self.header.xattr_icount() + } + + fn mode(&self) -> ModeField { + self.header.mode() + } + + fn size(&self) -> u64 { + self.header.size() + } + + fn u(&self) -> u32 { + self.header.u() + } +} + +impl InodeOps for &Inode
{ + fn xattrs(&self) -> Option<&InodeXAttrs> { + match self.header.xattr_size() { + 0 => None, + n => Some(InodeXAttrs::ref_from_bytes(&self.data[..n]).unwrap()), + } + } + + fn inline(&self) -> Option<&[u8]> { + let data = &self.data[self.header.xattr_size()..]; + + if data.is_empty() { + return None; + } + + Some(data) + } + + fn blocks(&self, blkszbits: u8) -> Range { + let size = self.header.size(); + let block_size = 1 << blkszbits; + let start = self.header.u() as u64; + + match self.header.data_layout() { + DataLayout::FlatPlain => Range { + start, + end: start + size.div_ceil(block_size), + }, + DataLayout::FlatInline => Range { + start, + end: start + size / block_size, + }, + DataLayout::ChunkBased => Range { start, end: start }, + } + } +} + +// this lets us avoid returning Box from Image.inode() +// but ... wow. +/// Inode type enum allowing static dispatch for different header layouts +#[derive(Debug)] +pub enum InodeType<'img> { + /// Compact inode with 32-byte header + Compact(&'img Inode), + /// Extended inode with 64-byte header + Extended(&'img Inode), +} + +impl InodeHeader for InodeType<'_> { + fn u(&self) -> u32 { + match self { + Self::Compact(inode) => inode.u(), + Self::Extended(inode) => inode.u(), + } + } + + fn size(&self) -> u64 { + match self { + Self::Compact(inode) => inode.size(), + Self::Extended(inode) => inode.size(), + } + } + + fn xattr_icount(&self) -> u16 { + match self { + Self::Compact(inode) => inode.xattr_icount(), + Self::Extended(inode) => inode.xattr_icount(), + } + } + + fn data_layout(&self) -> DataLayout { + match self { + Self::Compact(inode) => inode.data_layout(), + Self::Extended(inode) => inode.data_layout(), + } + } + + fn mode(&self) -> ModeField { + match self { + Self::Compact(inode) => inode.mode(), + Self::Extended(inode) => inode.mode(), + } + } +} + +impl InodeOps for InodeType<'_> { + fn xattrs(&self) -> Option<&InodeXAttrs> { + match self { + Self::Compact(inode) => inode.xattrs(), + Self::Extended(inode) => inode.xattrs(), + } + } + + fn inline(&self) -> Option<&[u8]> { + match self { + Self::Compact(inode) => inode.inline(), + Self::Extended(inode) => inode.inline(), + } + } + + fn blocks(&self, blkszbits: u8) -> Range { + match self { + Self::Compact(inode) => inode.blocks(blkszbits), + Self::Extended(inode) => inode.blocks(blkszbits), + } + } +} + +/// Parsed EROFS image with references to key structures +#[derive(Debug)] +pub struct Image<'i> { + /// Raw image bytes + pub image: &'i [u8], + /// Composefs header + pub header: &'i ComposefsHeader, + /// Block size in bits + pub blkszbits: u8, + /// Block size in bytes + pub block_size: usize, + /// Superblock + pub sb: &'i Superblock, + /// Inode metadata region + pub inodes: &'i [u8], + /// Extended attributes region + pub xattrs: &'i [u8], +} + +impl<'img> Image<'img> { + /// Opens an EROFS image from raw bytes + pub fn open(image: &'img [u8]) -> Self { + let header = ComposefsHeader::ref_from_prefix(image) + .expect("header err") + .0; + let sb = Superblock::ref_from_prefix(&image[1024..]) + .expect("superblock err") + .0; + let blkszbits = sb.blkszbits; + let block_size = 1usize << blkszbits; + assert!(block_size != 0); + let inodes = &image[sb.meta_blkaddr.get() as usize * block_size..]; + let xattrs = &image[sb.xattr_blkaddr.get() as usize * block_size..]; + Image { + image, + header, + blkszbits, + block_size, + sb, + inodes, + xattrs, + } + } + + /// Returns an inode by its ID + pub fn inode(&self, id: u64) -> InodeType<'_> { + let inode_data = &self.inodes[id as usize * 32..]; + if inode_data[0] & 1 != 0 { + let header = ExtendedInodeHeader::ref_from_bytes(&inode_data[..64]).unwrap(); + InodeType::Extended( + Inode::::ref_from_prefix_with_elems( + inode_data, + header.additional_bytes(self.blkszbits), + ) + .unwrap() + .0, + ) + } else { + let header = CompactInodeHeader::ref_from_bytes(&inode_data[..32]).unwrap(); + InodeType::Compact( + Inode::::ref_from_prefix_with_elems( + inode_data, + header.additional_bytes(self.blkszbits), + ) + .unwrap() + .0, + ) + } + } + + /// Returns a shared extended attribute by its ID + pub fn shared_xattr(&self, id: u32) -> &XAttr { + let xattr_data = &self.xattrs[id as usize * 4..]; + let header = XAttrHeader::ref_from_bytes(&xattr_data[..4]).unwrap(); + XAttr::ref_from_prefix_with_elems(xattr_data, header.calculate_n_elems()) + .unwrap() + .0 + } + + /// Returns a data block by its ID + pub fn block(&self, id: u64) -> &[u8] { + &self.image[id as usize * self.block_size..][..self.block_size] + } + + /// Returns a data block by its ID as a DataBlock reference + pub fn data_block(&self, id: u64) -> &DataBlock { + DataBlock::ref_from_bytes(self.block(id)).unwrap() + } + + /// Returns a directory block by its ID + pub fn directory_block(&self, id: u64) -> &DirectoryBlock { + DirectoryBlock::ref_from_bytes(self.block(id)).unwrap() + } + + /// Returns the root directory inode + pub fn root(&self) -> InodeType<'_> { + self.inode(self.sb.root_nid.get() as u64) + } +} + +// TODO: there must be an easier way... +#[derive(FromBytes, Immutable, KnownLayout)] +#[repr(C)] +struct Array([T]); + +impl InodeXAttrs { + /// Returns the array of shared xattr IDs + pub fn shared(&self) -> &[U32] { + &Array::ref_from_prefix_with_elems(&self.data, self.header.shared_count as usize) + .unwrap() + .0 + .0 + } + + /// Returns an iterator over local (non-shared) xattrs + pub fn local(&self) -> XAttrIter<'_> { + XAttrIter { + data: &self.data[self.header.shared_count as usize * 4..], + } + } +} + +/// Iterator over local extended attributes +#[derive(Debug)] +pub struct XAttrIter<'img> { + data: &'img [u8], +} + +impl<'img> Iterator for XAttrIter<'img> { + type Item = &'img XAttr; + + fn next(&mut self) -> Option { + if !self.data.is_empty() { + let (result, rest) = XAttr::from_prefix(self.data); + self.data = rest; + Some(result) + } else { + None + } + } +} + +/// Data block containing file content +#[repr(C)] +#[derive(FromBytes, Immutable, KnownLayout)] +pub struct DataBlock(pub [u8]); + +/// Directory block containing directory entries +#[repr(C)] +#[derive(FromBytes, Immutable, KnownLayout)] +pub struct DirectoryBlock(pub [u8]); + +impl DirectoryBlock { + /// Returns the directory entry header at the given index + pub fn get_entry_header(&self, n: usize) -> &DirectoryEntryHeader { + let entry_data = &self.0 + [n * size_of::()..(n + 1) * size_of::()]; + DirectoryEntryHeader::ref_from_bytes(entry_data).unwrap() + } + + /// Returns all directory entry headers as a slice + pub fn get_entry_headers(&self) -> &[DirectoryEntryHeader] { + &Array::ref_from_prefix_with_elems(&self.0, self.n_entries()) + .unwrap() + .0 + .0 + } + + /// Returns the number of entries in this directory block + pub fn n_entries(&self) -> usize { + let first = self.get_entry_header(0); + let offset = first.name_offset.get(); + assert!(offset != 0); + assert!(offset.is_multiple_of(12)); + offset as usize / 12 + } + + /// Returns an iterator over directory entries + pub fn entries(&self) -> DirectoryEntries<'_> { + DirectoryEntries { + block: self, + length: self.n_entries(), + position: 0, + } + } +} + +// High-level iterator interface +/// A single directory entry with header and name +#[derive(Debug)] +pub struct DirectoryEntry<'a> { + /// Directory entry header + pub header: &'a DirectoryEntryHeader, + /// Entry name + pub name: &'a [u8], +} + +impl DirectoryEntry<'_> { + /// Returns the inode number (nid) for this directory entry. + pub fn nid(&self) -> u64 { + self.header.inode_offset.get() + } +} + +/// Iterator over directory entries in a directory block +#[derive(Debug)] +pub struct DirectoryEntries<'d> { + block: &'d DirectoryBlock, + length: usize, + position: usize, +} + +impl<'d> Iterator for DirectoryEntries<'d> { + type Item = DirectoryEntry<'d>; + + fn next(&mut self) -> Option { + if self.position < self.length { + let header = self.block.get_entry_header(self.position); + let name_start = header.name_offset.get() as usize; + self.position += 1; + + let name = if self.position == self.length { + let with_padding = &self.block.0[name_start..]; + let end = with_padding.partition_point(|c| *c != 0); + &with_padding[..end] + } else { + let next = self.block.get_entry_header(self.position); + let name_end = next.name_offset.get() as usize; + &self.block.0[name_start..name_end] + }; + + Some(DirectoryEntry { header, name }) + } else { + None + } + } +} + +/// Errors that can occur when reading EROFS images +#[derive(Error, Debug)] +pub enum ErofsReaderError { + /// Directory has multiple hard links (not allowed) + #[error("Hardlinked directories detected")] + DirectoryHardlinks, + /// Directory nesting exceeds maximum depth + #[error("Maximum directory depth exceeded")] + DepthExceeded, + /// The '.' entry is invalid + #[error("Invalid '.' entry in directory")] + InvalidSelfReference, + /// The '..' entry is invalid + #[error("Invalid '..' entry in directory")] + InvalidParentReference, + /// File type in directory entry doesn't match inode + #[error("File type in dirent doesn't match type in inode")] + FileTypeMismatch, +} + +type ReadResult = Result; + +/// Collects object references from an EROFS image for garbage collection +#[derive(Debug)] +pub struct ObjectCollector { + visited_nids: HashSet, + nids_to_visit: BTreeSet, + objects: HashSet, +} + +impl ObjectCollector { + fn visit_xattr(&mut self, attr: &XAttr) { + // This is the index of "trusted". See XATTR_PREFIXES in format.rs. + if attr.header.name_index != 4 { + return; + } + if attr.suffix() != b"overlay.metacopy" { + return; + } + if let Ok(value) = OverlayMetacopy::read_from_bytes(attr.value()) { + if value.valid() { + self.objects.insert(value.digest); + } + } + } + + fn visit_xattrs(&mut self, img: &Image, xattrs: &InodeXAttrs) -> ReadResult<()> { + for id in xattrs.shared() { + self.visit_xattr(img.shared_xattr(id.get())); + } + for attr in xattrs.local() { + self.visit_xattr(attr); + } + Ok(()) + } + + fn visit_directory_block(&mut self, block: &DirectoryBlock) { + for entry in block.entries() { + if entry.name != b"." && entry.name != b".." { + let nid = entry.nid(); + if !self.visited_nids.contains(&nid) { + self.nids_to_visit.insert(nid); + } + } + } + } + + fn visit_nid(&mut self, img: &Image, nid: u64) -> ReadResult<()> { + let first_time = self.visited_nids.insert(nid); + assert!(first_time); // should not have been added to the "to visit" list otherwise + + let inode = img.inode(nid); + + if let Some(xattrs) = inode.xattrs() { + self.visit_xattrs(img, xattrs)?; + } + + if inode.mode().is_dir() { + for blkid in inode.blocks(img.sb.blkszbits) { + self.visit_directory_block(img.directory_block(blkid)); + } + + if let Some(inline) = inode.inline() { + let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); + self.visit_directory_block(inline_block); + } + } + + Ok(()) + } +} + +/// Collects all object references from an EROFS image +/// +/// This function walks the directory tree and extracts fsverity object IDs +/// from overlay.metacopy xattrs for garbage collection purposes. +/// +/// Returns a set of all referenced object IDs. +pub fn collect_objects(image: &[u8]) -> ReadResult> { + let img = Image::open(image); + let mut this = ObjectCollector { + visited_nids: HashSet::new(), + nids_to_visit: BTreeSet::new(), + objects: HashSet::new(), + }; + + // nids_to_visit is initialized with the root directory. Visiting directory nids will add + // more nids to the "to visit" list. Keep iterating until it's empty. + this.nids_to_visit.insert(img.sb.root_nid.get() as u64); + while let Some(nid) = this.nids_to_visit.pop_first() { + this.visit_nid(&img, nid)?; + } + Ok(this.objects) +} diff --git a/crates/composefs-types/Cargo.toml b/crates/composefs-types/Cargo.toml new file mode 100644 index 00000000..1fb84c34 --- /dev/null +++ b/crates/composefs-types/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "composefs-types" +description = "Core types for composefs (FsVerityHashValue, tree structures)" +publish = false + +edition.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[dependencies] +hex = { version = "0.4.0", default-features = false, features = ["std"] } +sha2 = { version = "0.10.1", default-features = false, features = ["std"] } +zerocopy = { version = "0.8.0", default-features = false, features = ["derive", "std"] } + +[lints] +workspace = true diff --git a/crates/composefs-types/src/fsverity.rs b/crates/composefs-types/src/fsverity.rs new file mode 100644 index 00000000..4581da84 --- /dev/null +++ b/crates/composefs-types/src/fsverity.rs @@ -0,0 +1,277 @@ +//! Hash value types and trait definitions for fs-verity. +//! +//! This module defines the FsVerityHashValue trait and concrete implementations +//! for SHA-256 and SHA-512 hash values, including parsing from hex strings +//! and object pathnames. + +use core::{fmt, hash::Hash}; + +use hex::FromHexError; +use sha2::{digest::FixedOutputReset, digest::Output, Digest, Sha256, Sha512}; +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned}; + +/// Trait for fs-verity hash value types supporting SHA-256 and SHA-512. +/// +/// This trait defines the interface for hash values used in fs-verity operations, +/// including serialization to/from hex strings and object store pathnames. +pub trait FsVerityHashValue +where + Self: Clone, + Self: From>, + Self: FromBytes + Immutable + IntoBytes + KnownLayout + Unaligned, + Self: Hash + Eq, + Self: fmt::Debug, + Self: Send + Sync + Unpin + 'static, +{ + /// The underlying hash digest algorithm type. + type Digest: Digest + FixedOutputReset + fmt::Debug; + /// The fs-verity algorithm identifier (1 for SHA-256, 2 for SHA-512). + const ALGORITHM: u8; + /// An empty hash value with all bytes set to zero. + const EMPTY: Self; + /// The algorithm identifier string ("sha256" or "sha512"). + const ID: &str; + + /// Parse a hash value from a hexadecimal string. + /// + /// # Arguments + /// * `hex` - A hexadecimal string representation of the hash + /// + /// # Returns + /// The parsed hash value, or an error if the input is invalid. + fn from_hex(hex: impl AsRef<[u8]>) -> Result { + let mut value = Self::EMPTY; + hex::decode_to_slice(hex.as_ref(), value.as_mut_bytes())?; + Ok(value) + } + + /// Parse a hash value from an object store directory number and basename. + /// + /// Object stores typically use a two-level hierarchy where the first byte + /// of the hash determines the directory name and the remaining bytes form + /// the basename. + /// + /// # Arguments + /// * `dirnum` - The directory number (first byte of the hash) + /// * `basename` - The hexadecimal basename (remaining bytes) + /// + /// # Returns + /// The parsed hash value, or an error if the input is invalid. + fn from_object_dir_and_basename( + dirnum: u8, + basename: impl AsRef<[u8]>, + ) -> Result { + let expected_size = 2 * (size_of::() - 1); + let bytes = basename.as_ref(); + if bytes.len() != expected_size { + return Err(FromHexError::InvalidStringLength); + } + let mut result = Self::EMPTY; + result.as_mut_bytes()[0] = dirnum; + hex::decode_to_slice(bytes, &mut result.as_mut_bytes()[1..])?; + Ok(result) + } + + /// Parse a hash value from a full object pathname. + /// + /// Parses a pathname in the format "xx/yyyyyy" where "xxyyyyyy" is the + /// full hexadecimal hash. The prefix before the two-level hierarchy is ignored. + /// + /// # Arguments + /// * `pathname` - The object pathname (e.g., "ab/cdef1234...") + /// + /// # Returns + /// The parsed hash value, or an error if the input is invalid. + fn from_object_pathname(pathname: impl AsRef<[u8]>) -> Result { + // We want to the trailing part of "....../xx/yyyyyy" where xxyyyyyy is our hex length + let min_size = 2 * size_of::() + 1; + let bytes = pathname.as_ref(); + if bytes.len() < min_size { + return Err(FromHexError::InvalidStringLength); + } + + let trailing = &bytes[bytes.len() - min_size..]; + let mut result = Self::EMPTY; + hex::decode_to_slice(&trailing[0..2], &mut result.as_mut_bytes()[0..1])?; + if trailing[2] != b'/' { + return Err(FromHexError::InvalidHexCharacter { + c: trailing[2] as char, + index: 2, + }); + } + hex::decode_to_slice(&trailing[3..], &mut result.as_mut_bytes()[1..])?; + Ok(result) + } + + /// Convert the hash value to an object pathname. + /// + /// Formats the hash as "xx/yyyyyy" where xx is the first byte in hex + /// and yyyyyy is the remaining bytes in hex. + /// + /// # Returns + /// A string in object pathname format. + fn to_object_pathname(&self) -> String { + format!( + "{:02x}/{}", + self.as_bytes()[0], + hex::encode(&self.as_bytes()[1..]) + ) + } + + /// Convert the hash value to an object directory name. + /// + /// Returns just the first byte of the hash as a two-character hex string. + /// + /// # Returns + /// A string representing the directory name. + fn to_object_dir(&self) -> String { + format!("{:02x}", self.as_bytes()[0]) + } + + /// Convert the hash value to a hexadecimal string. + /// + /// # Returns + /// The full hash as a hex string. + fn to_hex(&self) -> String { + hex::encode(self.as_bytes()) + } + + /// Convert the hash value to an identifier string with algorithm prefix. + /// + /// # Returns + /// A string in the format "algorithm:hexhash" (e.g., "sha256:abc123..."). + fn to_id(&self) -> String { + format!("{}:{}", Self::ID, self.to_hex()) + } +} + +impl fmt::Debug for Sha256HashValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "sha256:{}", self.to_hex()) + } +} + +impl fmt::Debug for Sha512HashValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "sha512:{}", self.to_hex()) + } +} + +/// A SHA-256 hash value for fs-verity operations. +/// +/// This is a 32-byte hash value using the SHA-256 algorithm. +#[derive(Clone, Eq, FromBytes, Hash, Immutable, IntoBytes, KnownLayout, PartialEq, Unaligned)] +#[repr(C)] +pub struct Sha256HashValue([u8; 32]); + +impl From> for Sha256HashValue { + fn from(value: Output) -> Self { + Self(value.into()) + } +} + +impl FsVerityHashValue for Sha256HashValue { + type Digest = Sha256; + const ALGORITHM: u8 = 1; + const EMPTY: Self = Self([0; 32]); + const ID: &str = "sha256"; +} + +/// A SHA-512 hash value for fs-verity operations. +/// +/// This is a 64-byte hash value using the SHA-512 algorithm. +#[derive(Clone, Eq, FromBytes, Hash, Immutable, IntoBytes, KnownLayout, PartialEq, Unaligned)] +#[repr(C)] +pub struct Sha512HashValue([u8; 64]); + +impl From> for Sha512HashValue { + fn from(value: Output) -> Self { + Self(value.into()) + } +} + +impl FsVerityHashValue for Sha512HashValue { + type Digest = Sha512; + const ALGORITHM: u8 = 2; + const EMPTY: Self = Self([0; 64]); + const ID: &str = "sha512"; +} + +#[cfg(test)] +mod test { + use super::*; + + fn test_fsverity_hash() { + let len = size_of::(); + let hexlen = len * 2; + + let hex = H::EMPTY.to_hex(); + assert_eq!(hex.as_bytes(), [b'0'].repeat(hexlen)); + + assert_eq!(H::EMPTY.to_id(), format!("{}:{}", H::ID, hex)); + assert_eq!(format!("{:?}", H::EMPTY), format!("{}:{}", H::ID, hex)); + + assert_eq!(H::from_hex(&hex), Ok(H::EMPTY)); + + assert_eq!(H::from_hex("lol"), Err(FromHexError::OddLength)); + assert_eq!(H::from_hex("lolo"), Err(FromHexError::InvalidStringLength)); + assert_eq!( + H::from_hex([b'l'].repeat(hexlen)), + Err(FromHexError::InvalidHexCharacter { c: 'l', index: 0 }) + ); + + assert_eq!(H::from_object_dir_and_basename(0, &hex[2..]), Ok(H::EMPTY)); + + assert_eq!(H::from_object_dir_and_basename(0, &hex[2..]), Ok(H::EMPTY)); + + assert_eq!( + H::from_object_dir_and_basename(0, "lol"), + Err(FromHexError::InvalidStringLength) + ); + + assert_eq!( + H::from_object_dir_and_basename(0, [b'l'].repeat(hexlen - 2)), + Err(FromHexError::InvalidHexCharacter { c: 'l', index: 0 }) + ); + + assert_eq!( + H::from_object_pathname(format!("{}/{}", &hex[0..2], &hex[2..])), + Ok(H::EMPTY) + ); + + assert_eq!( + H::from_object_pathname(format!("../this/is/ignored/{}/{}", &hex[0..2], &hex[2..])), + Ok(H::EMPTY) + ); + + assert_eq!( + H::from_object_pathname(&hex), + Err(FromHexError::InvalidStringLength) + ); + + assert_eq!( + H::from_object_pathname("lol"), + Err(FromHexError::InvalidStringLength) + ); + + assert_eq!( + H::from_object_pathname([b'l'].repeat(hexlen + 1)), + Err(FromHexError::InvalidHexCharacter { c: 'l', index: 0 }) + ); + + assert_eq!( + H::from_object_pathname(format!("{}0{}", &hex[0..2], &hex[2..])), + Err(FromHexError::InvalidHexCharacter { c: '0', index: 2 }) + ); + } + + #[test] + fn test_sha256hashvalue() { + test_fsverity_hash::(); + } + + #[test] + fn test_sha512hashvalue() { + test_fsverity_hash::(); + } +} diff --git a/crates/composefs-types/src/lib.rs b/crates/composefs-types/src/lib.rs new file mode 100644 index 00000000..d16c6b83 --- /dev/null +++ b/crates/composefs-types/src/lib.rs @@ -0,0 +1,10 @@ +//! Core types for composefs: hash values and constants. + +#![forbid(unsafe_code)] + +pub mod fsverity; + +/// All files that contain 64 or fewer bytes (size <= INLINE_CONTENT_MAX) should be stored inline +/// in the erofs image (and also in splitstreams). All files with 65 or more bytes (size > MAX) +/// should be written to the object storage and referred to from the image (and splitstreams). +pub const INLINE_CONTENT_MAX: usize = 64; diff --git a/crates/composefs/Cargo.toml b/crates/composefs/Cargo.toml index 97f71ef3..178b316d 100644 --- a/crates/composefs/Cargo.toml +++ b/crates/composefs/Cargo.toml @@ -17,7 +17,9 @@ test = ["tempfile"] [dependencies] anyhow = { version = "1.0.87", default-features = false } +composefs-erofs = { workspace = true } composefs-ioctls = { workspace = true } +composefs-types = { workspace = true } fn-error-context = "0.2" hex = { version = "0.4.0", default-features = false, features = ["std"] } log = { version = "0.4.8", default-features = false } diff --git a/crates/composefs/src/erofs/composefs.rs b/crates/composefs/src/erofs/composefs.rs index 4fc2e4ef..50747355 100644 --- a/crates/composefs/src/erofs/composefs.rs +++ b/crates/composefs/src/erofs/composefs.rs @@ -1,38 +1,4 @@ //! Composefs-specific EROFS structures and overlay metadata. //! -//! This module defines EROFS structures specific to composefs usage, -//! particularly overlay metadata for fs-verity integration. - -use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; - -use crate::fsverity::FsVerityHashValue; - -/* From linux/fs/overlayfs/overlayfs.h struct ovl_metacopy */ -#[derive(Debug, FromBytes, Immutable, KnownLayout, IntoBytes)] -#[repr(C)] -pub(super) struct OverlayMetacopy { - version: u8, - len: u8, - flags: u8, - digest_algo: u8, - pub(super) digest: H, -} - -impl OverlayMetacopy { - pub(super) fn new(digest: &H) -> Self { - Self { - version: 0, - len: size_of::() as u8, - flags: 0, - digest_algo: H::ALGORITHM, - digest: digest.clone(), - } - } - - pub(super) fn valid(&self) -> bool { - self.version == 0 - && self.len == size_of::() as u8 - && self.flags == 0 - && self.digest_algo == H::ALGORITHM - } -} +//! Re-exported from composefs-erofs. +pub use composefs_erofs::composefs::*; diff --git a/crates/composefs/src/erofs/debug.rs b/crates/composefs/src/erofs/debug.rs index 85f21590..aa19498d 100644 --- a/crates/composefs/src/erofs/debug.rs +++ b/crates/composefs/src/erofs/debug.rs @@ -1,538 +1,4 @@ //! Debug utilities for analyzing EROFS images. //! -//! This module provides tools for inspecting and debugging EROFS filesystem -//! images, including detailed structure dumping and space usage analysis. - -use std::{ - cmp::Ordering, - collections::BTreeMap, - ffi::OsStr, - fmt, - mem::discriminant, - os::unix::ffi::OsStrExt, - path::{Path, PathBuf}, -}; - -use anyhow::Result; -use zerocopy::FromBytes; - -use super::{ - format::{self, CompactInodeHeader, ComposefsHeader, ExtendedInodeHeader, Superblock}, - reader::{DataBlock, DirectoryBlock, Image, Inode, InodeHeader, InodeOps, InodeType, XAttr}, -}; - -/// Converts any reference to a thin pointer (as usize) -/// Used for address calculations in various outputs -macro_rules! addr { - ($ref: expr) => { - &raw const (*$ref) as *const u8 as usize - }; -} - -macro_rules! write_with_offset { - ($fmt: expr, $base: expr, $label: expr, $ref: expr) => {{ - let offset = addr!($ref) - addr!($base); - writeln!($fmt, "{offset:+8x} {}: {:?}", $label, $ref) - }}; -} - -macro_rules! write_fields { - ($fmt: expr, $base: expr, $struct: expr, $field: ident) => {{ - let value = &$struct.$field; - let default = if false { value } else { &Default::default() }; - if value != default { - write_with_offset!($fmt, $base, stringify!($field), value)?; - } - }}; - ($fmt: expr, $base: expr, $struct: expr, $head: ident; $($tail: ident);+) => {{ - write_fields!($fmt, $base, $struct, $head); - write_fields!($fmt, $base, $struct, $($tail);+); - }}; -} - -fn utf8_or_hex(data: &[u8]) -> String { - if let Ok(string) = std::str::from_utf8(data) { - format!("{string:?}") - } else { - hex::encode(data) - } -} - -// This is basically just a fancy fat pointer type -enum SegmentType<'img> { - Header(&'img ComposefsHeader), - Superblock(&'img Superblock), - CompactInode(&'img Inode), - ExtendedInode(&'img Inode), - XAttr(&'img XAttr), - DataBlock(&'img DataBlock), - DirectoryBlock(&'img DirectoryBlock), -} - -// TODO: Something for `enum_dispatch` would be good here, but I couldn't get it working... -impl SegmentType<'_> { - fn addr(&self) -> usize { - match self { - SegmentType::Header(h) => addr!(*h), - SegmentType::Superblock(sb) => addr!(*sb), - SegmentType::CompactInode(i) => addr!(*i), - SegmentType::ExtendedInode(i) => addr!(*i), - SegmentType::XAttr(x) => addr!(*x), - SegmentType::DataBlock(b) => addr!(*b), - SegmentType::DirectoryBlock(b) => addr!(*b), - } - } - - fn size(&self) -> usize { - match self { - SegmentType::Header(h) => size_of_val(*h), - SegmentType::Superblock(sb) => size_of_val(*sb), - SegmentType::CompactInode(i) => size_of_val(*i), - SegmentType::ExtendedInode(i) => size_of_val(*i), - SegmentType::XAttr(x) => size_of_val(*x), - SegmentType::DataBlock(b) => size_of_val(*b), - SegmentType::DirectoryBlock(b) => size_of_val(*b), - } - } - - fn typename(&self) -> &'static str { - match self { - SegmentType::Header(..) => "header", - SegmentType::Superblock(..) => "superblock", - SegmentType::CompactInode(..) => "compact inode", - SegmentType::ExtendedInode(..) => "extended inode", - SegmentType::XAttr(..) => "shared xattr", - SegmentType::DataBlock(..) => "data block", - SegmentType::DirectoryBlock(..) => "directory block", - } - } -} - -struct ImageVisitor<'img> { - image: &'img Image<'img>, - visited: BTreeMap, Vec>)>, -} - -impl<'img> ImageVisitor<'img> { - fn note(&mut self, segment: SegmentType<'img>, path: Option<&Path>) -> bool { - let offset = segment.addr() - self.image.image.as_ptr() as usize; - match self.visited.entry(offset) { - std::collections::btree_map::Entry::Occupied(mut e) => { - let (existing, paths) = e.get_mut(); - // TODO: figure out pointer value equality... - assert_eq!(discriminant(existing), discriminant(&segment)); - assert_eq!(existing.addr(), segment.addr()); - assert_eq!(existing.size(), segment.size()); - if let Some(path) = path { - paths.push(Box::from(path)); - } - true - } - std::collections::btree_map::Entry::Vacant(e) => { - let mut paths = vec![]; - if let Some(path) = path { - paths.push(Box::from(path)); - } - e.insert((segment, paths)); - false - } - } - } - - fn visit_directory_block(&mut self, block: &DirectoryBlock, path: &Path) { - for entry in block.entries() { - if entry.name == b"." || entry.name == b".." { - // TODO: maybe we want to follow those and let deduplication happen - continue; - } - self.visit_inode( - entry.header.inode_offset.get(), - &path.join(OsStr::from_bytes(entry.name)), - ); - } - } - - fn visit_inode(&mut self, id: u64, path: &Path) { - let inode = self.image.inode(id); - let segment = match inode { - InodeType::Compact(inode) => SegmentType::CompactInode(inode), - InodeType::Extended(inode) => SegmentType::ExtendedInode(inode), - }; - if self.note(segment, Some(path)) { - // TODO: maybe we want to throw an error if we detect loops - /* already processed */ - return; - } - - if let Some(xattrs) = inode.xattrs() { - for id in xattrs.shared() { - self.note( - SegmentType::XAttr(self.image.shared_xattr(id.get())), - Some(path), - ); - } - } - - if inode.mode().is_dir() { - if let Some(inline) = inode.inline() { - let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); - self.visit_directory_block(inline_block, path); - } - - for id in inode.blocks(self.image.blkszbits) { - let block = self.image.directory_block(id); - self.visit_directory_block(block, path); - self.note(SegmentType::DirectoryBlock(block), Some(path)); - } - } else { - for id in inode.blocks(self.image.blkszbits) { - let block = self.image.data_block(id); - self.note(SegmentType::DataBlock(block), Some(path)); - } - } - } - - fn visit_image( - image: &'img Image<'img>, - ) -> BTreeMap, Vec>)> { - let mut this = Self { - image, - visited: BTreeMap::new(), - }; - this.note(SegmentType::Header(image.header), None); - this.note(SegmentType::Superblock(image.sb), None); - this.visit_inode(image.sb.root_nid.get() as u64, &PathBuf::from("/")); - this.visited - } -} - -impl fmt::Debug for XAttr { - // Injective (ie: accounts for every byte in the input) - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "({} {} {}) {}{} = {}", - self.header.name_index, - self.header.name_len, - self.header.value_size, - std::str::from_utf8(format::XATTR_PREFIXES[self.header.name_index as usize]).unwrap(), - utf8_or_hex(self.suffix()), - utf8_or_hex(self.value()), - )?; - if self.padding().iter().any(|c| *c != 0) { - write!(f, " {:?}", self.padding())?; - } - Ok(()) - } -} - -impl fmt::Debug for CompactInodeHeader { - // Injective (ie: accounts for every byte in the input) - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, "CompactInodeHeader")?; - write_fields!(f, self, self, - format; xattr_icount; mode; reserved; size; u; ino; uid; gid; nlink; reserved2); - Ok(()) - } -} - -impl fmt::Debug for ExtendedInodeHeader { - // Injective (ie: accounts for every byte in the input) - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, "ExtendedInodeHeader")?; - write_fields!(f, self, self, - format; xattr_icount; mode; reserved; size; u; ino; uid; - gid; mtime; mtime_nsec; nlink; reserved2); - Ok(()) - } -} - -fn hexdump(f: &mut impl fmt::Write, data: &[u8], rel: usize) -> fmt::Result { - let start = match rel { - 0 => 0, - ptr => data.as_ptr() as usize - ptr, - }; - let end = start + data.len(); - let start_row = start / 16; - let end_row = end.div_ceil(16); - - for row in start_row..end_row { - let row_start = row * 16; - let row_end = row * 16 + 16; - write!(f, "{row_start:+8x} ")?; - - for idx in row_start..row_end { - if start <= idx && idx < end { - write!(f, "{:02x} ", data[idx - start])?; - } else { - write!(f, " ")?; - } - if idx % 8 == 7 { - write!(f, " ")?; - } - } - write!(f, "|")?; - - for idx in row_start..row_end { - if start <= idx && idx < end { - let c = data[idx - start]; - if c.is_ascii() && !c.is_ascii_control() { - write!(f, "{}", c as char)?; - } else { - write!(f, ".")?; - } - } else { - write!(f, " ")?; - } - } - writeln!(f, "|")?; - } - - Ok(()) -} - -impl fmt::Debug for Inode { - // Injective (ie: accounts for every byte in the input) - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(&self.header, f)?; - - if let Some(xattrs) = self.xattrs() { - write_fields!(f, self, xattrs.header, name_filter; shared_count; reserved); - - if !xattrs.shared().is_empty() { - write_with_offset!(f, self, "shared xattrs", xattrs.shared())?; - } - - for xattr in xattrs.local() { - write_with_offset!(f, self, "xattr", xattr)?; - } - } - - // We want to print one of four things for inline data: - // - no data: print nothing - // - directory data: dump the entries - // - small inline text string: print it - // - otherwise, hexdump - let Some(inline) = self.inline() else { - // No inline data - return Ok(()); - }; - - // Directory dump - if self.header.mode().is_dir() { - let dir = DirectoryBlock::ref_from_bytes(inline).unwrap(); - let offset = addr!(dir) - addr!(self); - return write!( - f, - " +{offset:02x} --- inline directory entries ---{dir:#?}" - ); - } - - // Small string (<= 128 bytes, utf8, no control characters). - if inline.len() <= 128 && !inline.iter().any(|c| c.is_ascii_control()) { - if let Ok(string) = std::str::from_utf8(inline) { - return write_with_offset!(f, self, "inline", string); - } - } - - // Else, hexdump data block - hexdump(f, inline, &raw const self.header as usize) - } -} - -impl fmt::Debug for DirectoryBlock { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for entry in self.entries() { - writeln!(f)?; - write_fields!(f, self, entry.header, inode_offset; name_offset; file_type; reserved); - writeln!( - f, - "{:+8x} # name: {}", - entry.header.name_offset.get(), - utf8_or_hex(entry.name) - )?; - } - // TODO: trailing junk inside of st_size - // TODO: padding up to block or inode boundary - Ok(()) - } -} - -impl fmt::Debug for DataBlock { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - hexdump(f, &self.0, 0) - } -} - -impl fmt::Debug for ComposefsHeader { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, "ComposefsHeader")?; - write_fields!(f, self, self, - magic; flags; version; composefs_version; unused - ); - Ok(()) - } -} - -impl fmt::Debug for Superblock { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, "Superblock")?; - write_fields!(f, self, self, - magic; checksum; feature_compat; blkszbits; extslots; root_nid; inos; build_time; - build_time_nsec; blocks; meta_blkaddr; xattr_blkaddr; uuid; volume_name; - feature_incompat; available_compr_algs; extra_devices; devt_slotoff; dirblkbits; - xattr_prefix_count; xattr_prefix_start; packed_nid; xattr_filter_reserved; reserved2 - ); - Ok(()) - } -} - -fn addto(map: &mut BTreeMap, key: &T, count: usize) { - if let Some(value) = map.get_mut(key) { - *value += count; - } else { - map.insert(key.clone(), count); - } -} - -/// Dumps unassigned or padding regions in the image -/// -/// Distinguishes between zero-filled padding and unknown content. -pub fn dump_unassigned( - output: &mut impl std::io::Write, - offset: usize, - unassigned: &[u8], -) -> Result<()> { - if unassigned.iter().all(|c| *c == 0) { - writeln!(output, "{offset:08x} Padding")?; - writeln!( - output, - "{:+8x} # {} nul bytes", - unassigned.len(), - unassigned.len() - )?; - writeln!(output)?; - } else { - writeln!(output, "{offset:08x} Unknown content")?; - let mut dump = String::new(); - hexdump(&mut dump, unassigned, 0)?; - writeln!(output, "{dump}")?; - } - Ok(()) -} - -/// Dumps a detailed debug view of an EROFS image -/// -/// Walks the entire image structure, outputting formatted information about -/// all inodes, blocks, xattrs, and padding. Also produces space usage statistics. -pub fn debug_img(output: &mut impl std::io::Write, data: &[u8]) -> Result<()> { - let image = Image::open(data); - let visited = ImageVisitor::visit_image(&image); - - let inode_start = (image.sb.meta_blkaddr.get() as usize) << image.sb.blkszbits; - let xattr_start = (image.sb.xattr_blkaddr.get() as usize) << image.sb.blkszbits; - - let mut space_stats = BTreeMap::new(); - let mut padding_stats = BTreeMap::new(); - - let mut last_segment_type = ""; - let mut offset = 0; - for (start, (segment, paths)) in visited { - let segment_type = segment.typename(); - addto(&mut space_stats, &segment_type, segment.size()); - - match offset.cmp(&start) { - Ordering::Less => { - dump_unassigned(output, offset, &data[offset..start])?; - addto( - &mut padding_stats, - &(last_segment_type, segment_type), - start - offset, - ); - offset = start; - } - Ordering::Greater => { - writeln!(output, "*** Overlapping segments!")?; - writeln!(output)?; - offset = start; - } - _ => {} - } - - last_segment_type = segment_type; - - for path in paths { - writeln!( - output, - "# Filename {}", - utf8_or_hex(path.as_os_str().as_bytes()) - )?; - } - - match segment { - SegmentType::Header(header) => { - writeln!(output, "{offset:08x} {header:?}")?; - } - SegmentType::Superblock(sb) => { - writeln!(output, "{offset:08x} {sb:?}")?; - } - SegmentType::CompactInode(inode) => { - writeln!(output, "# nid #{}", (offset - inode_start) / 32)?; - writeln!(output, "{offset:08x} {inode:#?}")?; - } - SegmentType::ExtendedInode(inode) => { - writeln!(output, "# nid #{}", (offset - inode_start) / 32)?; - writeln!(output, "{offset:08x} {inode:#?}")?; - } - SegmentType::XAttr(xattr) => { - writeln!(output, "# xattr #{}", (offset - xattr_start) / 4)?; - writeln!(output, "{offset:08x} {xattr:?}")?; - } - SegmentType::DirectoryBlock(block) => { - writeln!(output, "# block #{}", offset / image.block_size)?; - writeln!(output, "{offset:08x} Directory block{block:?}")?; - } - SegmentType::DataBlock(block) => { - writeln!(output, "# block #{}", offset / image.block_size)?; - writeln!(output, "{offset:08x} Data block\n{block:?}")?; - } - } - - offset += segment.size(); - } - - if offset < data.len() { - let unassigned = &data[offset..]; - dump_unassigned(output, offset, unassigned)?; - addto( - &mut padding_stats, - &(last_segment_type, "eof"), - unassigned.len(), - ); - offset = data.len(); - writeln!(output)?; - } - - if offset > data.len() { - writeln!(output, "*** Segments past EOF!")?; - offset = data.len(); - } - - writeln!(output, "Space statistics (total size {offset}B):")?; - for (key, value) in space_stats { - writeln!( - output, - " {key} = {value}B, {:.2}%", - (100. * value as f64) / (offset as f64) - )?; - } - for ((from, to), value) in padding_stats { - writeln!( - output, - " padding {from} -> {to} = {value}B, {:.2}%", - (100. * value as f64) / (offset as f64) - )?; - } - - Ok(()) -} +//! Re-exported from composefs-erofs. +pub use composefs_erofs::debug::*; diff --git a/crates/composefs/src/erofs/format.rs b/crates/composefs/src/erofs/format.rs index 44db065c..975d5389 100644 --- a/crates/composefs/src/erofs/format.rs +++ b/crates/composefs/src/erofs/format.rs @@ -1,490 +1,4 @@ //! EROFS on-disk format definitions and data structures. //! -//! This module defines the binary layout of EROFS filesystem structures -//! including superblocks, inodes, directory entries, and other metadata -//! using safe zerocopy-based parsing. - -// This is currently implemented using zerocopy but the eventual plan is to do this with safe -// transmutation. As such: all of the structures are defined in terms of pure LE integer sizes, we -// handle the conversion to enum values separately, and we avoid the TryFromBytes trait. - -use std::fmt; - -use zerocopy::{ - little_endian::{U16, U32, U64}, - FromBytes, Immutable, IntoBytes, KnownLayout, -}; - -/// Number of bits used for block size (12 = 4096 bytes) -pub const BLOCK_BITS: u8 = 12; -/// Size of a block in bytes (4096) -pub const BLOCK_SIZE: u16 = 1 << BLOCK_BITS; - -/// Errors that can occur when parsing EROFS format structures -#[derive(Debug)] -pub enum FormatError { - /// The data layout field contains an invalid value - InvalidDataLayout, -} - -/* Special handling for enums: FormatField and FileTypeField */ -// FormatField == InodeLayout | DataLayout -/// Combined field encoding both inode layout and data layout in a single u16 value -#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout, PartialEq)] -pub struct FormatField(U16); - -impl Default for FormatField { - fn default() -> Self { - FormatField(0xffff.into()) - } -} - -impl fmt::Debug for FormatField { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{} = {:?} | {:?}", - self.0.get(), - InodeLayout::from(*self), - DataLayout::try_from(*self) - ) - } -} - -const INODE_LAYOUT_MASK: u16 = 0b00000001; -const INODE_LAYOUT_COMPACT: u16 = 0; -const INODE_LAYOUT_EXTENDED: u16 = 1; - -/// Inode layout format, determining the inode header size -#[derive(Debug)] -#[repr(u16)] -pub enum InodeLayout { - /// Compact 32-byte inode header - Compact = INODE_LAYOUT_COMPACT, - /// Extended 64-byte inode header with additional fields - Extended = INODE_LAYOUT_EXTENDED, -} - -impl From for InodeLayout { - fn from(value: FormatField) -> Self { - match value.0.get() & INODE_LAYOUT_MASK { - INODE_LAYOUT_COMPACT => InodeLayout::Compact, - INODE_LAYOUT_EXTENDED => InodeLayout::Extended, - _ => unreachable!(), - } - } -} - -const INODE_DATALAYOUT_MASK: u16 = 0b00001110; -const INODE_DATALAYOUT_FLAT_PLAIN: u16 = 0; -const INODE_DATALAYOUT_FLAT_INLINE: u16 = 4; -const INODE_DATALAYOUT_CHUNK_BASED: u16 = 8; - -/// Data layout method for file content storage -#[derive(Debug)] -#[repr(u16)] -pub enum DataLayout { - /// File data stored in separate blocks - FlatPlain = 0, - /// File data stored inline within the inode - FlatInline = 4, - /// File data stored using chunk-based addressing - ChunkBased = 8, -} - -impl TryFrom for DataLayout { - type Error = FormatError; - - fn try_from(value: FormatField) -> Result { - match value.0.get() & INODE_DATALAYOUT_MASK { - INODE_DATALAYOUT_FLAT_PLAIN => Ok(DataLayout::FlatPlain), - INODE_DATALAYOUT_FLAT_INLINE => Ok(DataLayout::FlatInline), - INODE_DATALAYOUT_CHUNK_BASED => Ok(DataLayout::ChunkBased), - // This is non-injective, but only occurs in error cases. - _ => Err(FormatError::InvalidDataLayout), - } - } -} - -impl std::ops::BitOr for InodeLayout { - type Output = FormatField; - - // Convert InodeLayout | DataLayout into a format field - fn bitor(self, datalayout: DataLayout) -> FormatField { - FormatField((self as u16 | datalayout as u16).into()) - } -} - -/// File type mask for st_mode -pub const S_IFMT: u16 = 0o170000; -/// Regular file mode bit -pub const S_IFREG: u16 = 0o100000; -/// Character device mode bit -pub const S_IFCHR: u16 = 0o020000; -/// Directory mode bit -pub const S_IFDIR: u16 = 0o040000; -/// Block device mode bit -pub const S_IFBLK: u16 = 0o060000; -/// FIFO mode bit -pub const S_IFIFO: u16 = 0o010000; -/// Symbolic link mode bit -pub const S_IFLNK: u16 = 0o120000; -/// Socket mode bit -pub const S_IFSOCK: u16 = 0o140000; - -// FileTypeField == FileType -/// Unknown file type value -pub const FILE_TYPE_UNKNOWN: u8 = 0; -/// Regular file type value -pub const FILE_TYPE_REGULAR_FILE: u8 = 1; -/// Directory file type value -pub const FILE_TYPE_DIRECTORY: u8 = 2; -/// Character device file type value -pub const FILE_TYPE_CHARACTER_DEVICE: u8 = 3; -/// Block device file type value -pub const FILE_TYPE_BLOCK_DEVICE: u8 = 4; -/// FIFO file type value -pub const FILE_TYPE_FIFO: u8 = 5; -/// Socket file type value -pub const FILE_TYPE_SOCKET: u8 = 6; -/// Symbolic link file type value -pub const FILE_TYPE_SYMLINK: u8 = 7; - -/// File type enumeration for directory entries -#[derive(Clone, Copy, Debug)] -#[repr(u8)] -pub enum FileType { - /// Unknown or invalid file type - Unknown = FILE_TYPE_UNKNOWN, - /// Regular file - RegularFile = FILE_TYPE_REGULAR_FILE, - /// Directory - Directory = FILE_TYPE_DIRECTORY, - /// Character device - CharacterDevice = FILE_TYPE_CHARACTER_DEVICE, - /// Block device - BlockDevice = FILE_TYPE_BLOCK_DEVICE, - /// FIFO (named pipe) - Fifo = FILE_TYPE_FIFO, - /// Socket - Socket = FILE_TYPE_SOCKET, - /// Symbolic link - Symlink = FILE_TYPE_SYMLINK, -} - -impl From for FileType { - fn from(value: FileTypeField) -> Self { - match value.0 { - FILE_TYPE_REGULAR_FILE => Self::RegularFile, - FILE_TYPE_DIRECTORY => Self::Directory, - FILE_TYPE_CHARACTER_DEVICE => Self::CharacterDevice, - FILE_TYPE_BLOCK_DEVICE => Self::BlockDevice, - FILE_TYPE_FIFO => Self::Fifo, - FILE_TYPE_SOCKET => Self::Socket, - FILE_TYPE_SYMLINK => Self::Symlink, - // This is non-injective, but only occurs in error cases. - _ => Self::Unknown, - } - } -} - -impl From for FileTypeField { - fn from(value: FileType) -> Self { - FileTypeField(value as u8) - } -} - -impl std::ops::BitOr for FileType { - type Output = U16; - - // Convert ifmt | permissions into a st_mode field - fn bitor(self, permissions: u16) -> U16 { - (match self { - Self::RegularFile => S_IFREG, - Self::CharacterDevice => S_IFCHR, - Self::Directory => S_IFDIR, - Self::BlockDevice => S_IFBLK, - Self::Fifo => S_IFIFO, - Self::Symlink => S_IFLNK, - Self::Socket => S_IFSOCK, - Self::Unknown => unreachable!(), - } | permissions) - .into() - } -} - -/// Raw file type field as stored in directory entries -#[derive(Copy, Clone, FromBytes, Immutable, IntoBytes, KnownLayout, PartialEq)] -pub struct FileTypeField(u8); - -impl fmt::Debug for FileTypeField { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(&FileType::from(*self), f) - } -} - -impl Default for FileTypeField { - fn default() -> Self { - FileTypeField(0xff) - } -} - -/* ModeField */ -/// File mode field combining file type and permissions -#[derive(Clone, Copy, Default, FromBytes, Immutable, IntoBytes, KnownLayout, PartialEq)] -pub struct ModeField(pub U16); - -impl ModeField { - /// Checks if this mode field represents a directory - pub fn is_dir(self) -> bool { - self.0.get() & S_IFMT == S_IFDIR - } -} - -impl fmt::Debug for ModeField { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mode = self.0.get(); - let fmt = match mode & S_IFMT { - S_IFREG => "regular file", - S_IFCHR => "chardev", - S_IFDIR => "directory", - S_IFBLK => "blockdev", - S_IFIFO => "fifo", - S_IFLNK => "symlink", - S_IFSOCK => "socket", - _ => "INVALID", - }; - - write!(f, "0{mode:06o} ({fmt})") - } -} - -impl std::ops::BitOr for FileType { - type Output = ModeField; - - fn bitor(self, permissions: u32) -> ModeField { - ModeField(self | (permissions as u16)) - } -} - -/* composefs Header */ - -/// EROFS format version number -pub const VERSION: U32 = U32::new(1); -/// Composefs-specific version number -pub const COMPOSEFS_VERSION: U32 = U32::new(2); -/// Magic number identifying composefs images -pub const COMPOSEFS_MAGIC: U32 = U32::new(0xd078629a); - -/// Flag indicating the presence of ACL data -pub const COMPOSEFS_FLAGS_HAS_ACL: U32 = U32::new(1 << 0); - -/// Composefs-specific header preceding the standard EROFS superblock -#[derive(Default, FromBytes, Immutable, IntoBytes, KnownLayout)] -#[repr(C)] -pub struct ComposefsHeader { - /// Magic number for identification - pub magic: U32, - /// EROFS format version - pub version: U32, - /// Composefs feature flags - pub flags: U32, - /// Composefs format version - pub composefs_version: U32, - /// Reserved for future use - pub unused: [U32; 4], -} - -/* Superblock */ - -/// EROFS version 1 magic number -pub const MAGIC_V1: U32 = U32::new(0xE0F5E1E2); -/// Feature flag for mtime support -pub const FEATURE_COMPAT_MTIME: U32 = U32::new(2); -/// Feature flag for xattr filtering support -pub const FEATURE_COMPAT_XATTR_FILTER: U32 = U32::new(4); - -/// EROFS filesystem superblock structure -#[derive(Default, FromBytes, Immutable, IntoBytes, KnownLayout)] -#[repr(C)] -pub struct Superblock { - // vertical whitespace every 16 bytes (hexdump-friendly) - /// EROFS magic number - pub magic: U32, - /// Filesystem checksum - pub checksum: U32, - /// Compatible feature flags - pub feature_compat: U32, - /// Block size in bits (log2 of block size) - pub blkszbits: u8, - /// Number of extended attribute slots - pub extslots: u8, - /// Root inode number - pub root_nid: U16, - - /// Total number of inodes - pub inos: U64, - /// Build time in seconds since epoch - pub build_time: U64, - - /// Build time nanoseconds component - pub build_time_nsec: U32, - /// Total number of blocks - pub blocks: U32, - /// Starting block address of metadata - pub meta_blkaddr: U32, - /// Starting block address of extended attributes - pub xattr_blkaddr: U32, - - /// Filesystem UUID - pub uuid: [u8; 16], - - /// Volume name - pub volume_name: [u8; 16], - - /// Incompatible feature flags - pub feature_incompat: U32, - /// Available compression algorithms bitmap - pub available_compr_algs: U16, - /// Number of extra devices - pub extra_devices: U16, - /// Device slot offset - pub devt_slotoff: U16, - /// Directory block size in bits - pub dirblkbits: u8, - /// Number of xattr prefixes - pub xattr_prefix_count: u8, - /// Starting position of xattr prefix table - pub xattr_prefix_start: U32, - - /// Packed inode number - pub packed_nid: U64, - /// Reserved for xattr filtering - pub xattr_filter_reserved: u8, - /// Reserved for future use - pub reserved2: [u8; 23], -} - -/* Inodes */ - -/// Compact 32-byte inode header for basic file metadata -#[derive(Default, FromBytes, Immutable, IntoBytes, KnownLayout)] -#[repr(C)] -pub struct CompactInodeHeader { - /// Format field combining inode layout and data layout - pub format: FormatField, - /// Extended attribute inode count - pub xattr_icount: U16, - /// File mode (type and permissions) - pub mode: ModeField, - /// Number of hard links - pub nlink: U16, - - /// File size in bytes - pub size: U32, - /// Reserved field - pub reserved: U32, - - /// Union field (block address, device number, etc.) - pub u: U32, - /// Inode number for 32-bit stat compatibility - pub ino: U32, // only used for 32-bit stat compatibility - - /// User ID - pub uid: U16, - /// Group ID - pub gid: U16, - /// Reserved field - pub reserved2: [u8; 4], -} - -/// Extended 64-byte inode header with additional metadata fields -#[derive(Default, FromBytes, Immutable, IntoBytes, KnownLayout)] -#[repr(C)] -pub struct ExtendedInodeHeader { - /// Format field combining inode layout and data layout - pub format: FormatField, - /// Extended attribute inode count - pub xattr_icount: U16, - /// File mode (type and permissions) - pub mode: ModeField, - /// Reserved field - pub reserved: U16, - /// File size in bytes - pub size: U64, - - /// Union field (block address, device number, etc.) - pub u: U32, - /// Inode number for 32-bit stat compatibility - pub ino: U32, // only used for 32-bit stat compatibility - /// User ID - pub uid: U32, - /// Group ID - pub gid: U32, - - /// Modification time in seconds since epoch - pub mtime: U64, - - /// Modification time nanoseconds component - pub mtime_nsec: U32, - /// Number of hard links - pub nlink: U32, - - /// Reserved field - pub reserved2: [u8; 16], -} - -/// Header for inode extended attributes section -#[derive(Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)] -#[repr(C)] -pub struct InodeXAttrHeader { - /// Name filter hash for quick xattr lookups - pub name_filter: U32, - /// Number of shared xattr references - pub shared_count: u8, - /// Reserved field - pub reserved: [u8; 7], -} - -/* Extended attributes */ -/// Seed value for xattr name filter hash calculation -pub const XATTR_FILTER_SEED: u32 = 0x25BBE08F; - -/// Header for an extended attribute entry -#[derive(Debug, FromBytes, Immutable, IntoBytes, KnownLayout)] -#[repr(C)] -pub struct XAttrHeader { - /// Length of the attribute name suffix - pub name_len: u8, - /// Index into the xattr prefix table - pub name_index: u8, - /// Size of the attribute value - pub value_size: U16, -} - -/// Standard xattr name prefixes indexed by name_index -pub const XATTR_PREFIXES: [&[u8]; 7] = [ - b"", - b"user.", - b"system.posix_acl_access", - b"system.posix_acl_default", - b"trusted.", - b"lustre.", - b"security.", -]; - -/* Directories */ - -/// Header for a directory entry -#[derive(Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)] -#[repr(C)] -pub struct DirectoryEntryHeader { - /// Inode number of the entry - pub inode_offset: U64, - /// Offset to the entry name within the directory block - pub name_offset: U16, - /// File type of the entry - pub file_type: FileTypeField, - /// Reserved field - pub reserved: u8, -} +//! Re-exported from composefs-erofs. +pub use composefs_erofs::format::*; diff --git a/crates/composefs/src/erofs/reader.rs b/crates/composefs/src/erofs/reader.rs index b33ca2e5..73f382df 100644 --- a/crates/composefs/src/erofs/reader.rs +++ b/crates/composefs/src/erofs/reader.rs @@ -1,1052 +1,4 @@ //! EROFS image reading and parsing functionality. //! -//! This module provides safe parsing and navigation of EROFS filesystem -//! images, including inode traversal, directory reading, and object -//! reference collection for garbage collection. - -use core::mem::size_of; -use std::collections::{BTreeSet, HashSet}; -use std::ops::Range; - -use thiserror::Error; -use zerocopy::{little_endian::U32, FromBytes, Immutable, KnownLayout}; - -use super::{ - composefs::OverlayMetacopy, - format::{ - CompactInodeHeader, ComposefsHeader, DataLayout, DirectoryEntryHeader, ExtendedInodeHeader, - InodeXAttrHeader, ModeField, Superblock, XAttrHeader, - }, -}; -use crate::fsverity::FsVerityHashValue; - -/// Rounds up a value to the nearest multiple of `to` -pub fn round_up(n: usize, to: usize) -> usize { - (n + to - 1) & !(to - 1) -} - -/// Common interface for accessing inode header fields across different layouts -pub trait InodeHeader { - /// Returns the data layout method used by this inode - fn data_layout(&self) -> DataLayout; - /// Returns the extended attribute inode count - fn xattr_icount(&self) -> u16; - /// Returns the file mode - fn mode(&self) -> ModeField; - /// Returns the file size in bytes - fn size(&self) -> u64; - /// Returns the union field value (block address, device number, etc.) - fn u(&self) -> u32; - - /// Calculates the number of additional bytes after the header - fn additional_bytes(&self, blkszbits: u8) -> usize { - let block_size = 1 << blkszbits; - self.xattr_size() - + match self.data_layout() { - DataLayout::FlatPlain => 0, - DataLayout::FlatInline => self.size() as usize % block_size, - DataLayout::ChunkBased => 4, - } - } - - /// Calculates the size of the extended attributes section - fn xattr_size(&self) -> usize { - match self.xattr_icount() { - 0 => 0, - n => (n as usize - 1) * 4 + 12, - } - } -} - -impl InodeHeader for ExtendedInodeHeader { - fn data_layout(&self) -> DataLayout { - self.format.try_into().unwrap() - } - - fn xattr_icount(&self) -> u16 { - self.xattr_icount.get() - } - - fn mode(&self) -> ModeField { - self.mode - } - - fn size(&self) -> u64 { - self.size.get() - } - - fn u(&self) -> u32 { - self.u.get() - } -} - -impl InodeHeader for CompactInodeHeader { - fn data_layout(&self) -> DataLayout { - self.format.try_into().unwrap() - } - - fn xattr_icount(&self) -> u16 { - self.xattr_icount.get() - } - - fn mode(&self) -> ModeField { - self.mode - } - - fn size(&self) -> u64 { - self.size.get() as u64 - } - - fn u(&self) -> u32 { - self.u.get() - } -} - -/// Extended attribute entry with header and variable-length data -#[repr(C)] -#[derive(FromBytes, Immutable, KnownLayout)] -pub struct XAttr { - /// Extended attribute header - pub header: XAttrHeader, - /// Variable-length data containing name suffix and value - pub data: [u8], -} - -/// Inode structure with header and variable-length data -#[repr(C)] -#[derive(FromBytes, Immutable, KnownLayout)] -pub struct Inode { - /// Inode header (compact or extended) - pub header: Header, - /// Variable-length data containing xattrs and inline content - pub data: [u8], -} - -/// Extended attributes section of an inode -#[repr(C)] -#[derive(Debug, FromBytes, Immutable, KnownLayout)] -pub struct InodeXAttrs { - /// Extended attributes header - pub header: InodeXAttrHeader, - /// Variable-length data containing shared xattr refs and local xattrs - pub data: [u8], -} - -impl XAttrHeader { - /// Calculates the total size of this xattr including padding - pub fn calculate_n_elems(&self) -> usize { - round_up(self.name_len as usize + self.value_size.get() as usize, 4) - } -} - -impl XAttr { - /// Parses an xattr from a byte slice, returning the xattr and remaining bytes - pub fn from_prefix(data: &[u8]) -> (&XAttr, &[u8]) { - let header = XAttrHeader::ref_from_bytes(&data[..4]).unwrap(); - Self::ref_from_prefix_with_elems(data, header.calculate_n_elems()).unwrap() - } - - /// Returns the attribute name suffix - pub fn suffix(&self) -> &[u8] { - &self.data[..self.header.name_len as usize] - } - - /// Returns the attribute value - pub fn value(&self) -> &[u8] { - &self.data[self.header.name_len as usize..][..self.header.value_size.get() as usize] - } - - /// Returns the padding bytes after the value - pub fn padding(&self) -> &[u8] { - &self.data[self.header.name_len as usize + self.header.value_size.get() as usize..] - } -} - -/// Operations on inode data -pub trait InodeOps { - /// Returns the extended attributes section if present - fn xattrs(&self) -> Option<&InodeXAttrs>; - /// Returns the inline data portion - fn inline(&self) -> Option<&[u8]>; - /// Returns the range of block IDs used by this inode - fn blocks(&self, blkszbits: u8) -> Range; -} - -impl InodeHeader for &Inode
{ - fn data_layout(&self) -> DataLayout { - self.header.data_layout() - } - - fn xattr_icount(&self) -> u16 { - self.header.xattr_icount() - } - - fn mode(&self) -> ModeField { - self.header.mode() - } - - fn size(&self) -> u64 { - self.header.size() - } - - fn u(&self) -> u32 { - self.header.u() - } -} - -impl InodeOps for &Inode
{ - fn xattrs(&self) -> Option<&InodeXAttrs> { - match self.header.xattr_size() { - 0 => None, - n => Some(InodeXAttrs::ref_from_bytes(&self.data[..n]).unwrap()), - } - } - - fn inline(&self) -> Option<&[u8]> { - let data = &self.data[self.header.xattr_size()..]; - - if data.is_empty() { - return None; - } - - Some(data) - } - - fn blocks(&self, blkszbits: u8) -> Range { - let size = self.header.size(); - let block_size = 1 << blkszbits; - let start = self.header.u() as u64; - - match self.header.data_layout() { - DataLayout::FlatPlain => Range { - start, - end: start + size.div_ceil(block_size), - }, - DataLayout::FlatInline => Range { - start, - end: start + size / block_size, - }, - DataLayout::ChunkBased => Range { start, end: start }, - } - } -} - -// this lets us avoid returning Box from Image.inode() -// but ... wow. -/// Inode type enum allowing static dispatch for different header layouts -#[derive(Debug)] -pub enum InodeType<'img> { - /// Compact inode with 32-byte header - Compact(&'img Inode), - /// Extended inode with 64-byte header - Extended(&'img Inode), -} - -impl InodeHeader for InodeType<'_> { - fn u(&self) -> u32 { - match self { - Self::Compact(inode) => inode.u(), - Self::Extended(inode) => inode.u(), - } - } - - fn size(&self) -> u64 { - match self { - Self::Compact(inode) => inode.size(), - Self::Extended(inode) => inode.size(), - } - } - - fn xattr_icount(&self) -> u16 { - match self { - Self::Compact(inode) => inode.xattr_icount(), - Self::Extended(inode) => inode.xattr_icount(), - } - } - - fn data_layout(&self) -> DataLayout { - match self { - Self::Compact(inode) => inode.data_layout(), - Self::Extended(inode) => inode.data_layout(), - } - } - - fn mode(&self) -> ModeField { - match self { - Self::Compact(inode) => inode.mode(), - Self::Extended(inode) => inode.mode(), - } - } -} - -impl InodeOps for InodeType<'_> { - fn xattrs(&self) -> Option<&InodeXAttrs> { - match self { - Self::Compact(inode) => inode.xattrs(), - Self::Extended(inode) => inode.xattrs(), - } - } - - fn inline(&self) -> Option<&[u8]> { - match self { - Self::Compact(inode) => inode.inline(), - Self::Extended(inode) => inode.inline(), - } - } - - fn blocks(&self, blkszbits: u8) -> Range { - match self { - Self::Compact(inode) => inode.blocks(blkszbits), - Self::Extended(inode) => inode.blocks(blkszbits), - } - } -} - -/// Parsed EROFS image with references to key structures -#[derive(Debug)] -pub struct Image<'i> { - /// Raw image bytes - pub image: &'i [u8], - /// Composefs header - pub header: &'i ComposefsHeader, - /// Block size in bits - pub blkszbits: u8, - /// Block size in bytes - pub block_size: usize, - /// Superblock - pub sb: &'i Superblock, - /// Inode metadata region - pub inodes: &'i [u8], - /// Extended attributes region - pub xattrs: &'i [u8], -} - -impl<'img> Image<'img> { - /// Opens an EROFS image from raw bytes - pub fn open(image: &'img [u8]) -> Self { - let header = ComposefsHeader::ref_from_prefix(image) - .expect("header err") - .0; - let sb = Superblock::ref_from_prefix(&image[1024..]) - .expect("superblock err") - .0; - let blkszbits = sb.blkszbits; - let block_size = 1usize << blkszbits; - assert!(block_size != 0); - let inodes = &image[sb.meta_blkaddr.get() as usize * block_size..]; - let xattrs = &image[sb.xattr_blkaddr.get() as usize * block_size..]; - Image { - image, - header, - blkszbits, - block_size, - sb, - inodes, - xattrs, - } - } - - /// Returns an inode by its ID - pub fn inode(&self, id: u64) -> InodeType<'_> { - let inode_data = &self.inodes[id as usize * 32..]; - if inode_data[0] & 1 != 0 { - let header = ExtendedInodeHeader::ref_from_bytes(&inode_data[..64]).unwrap(); - InodeType::Extended( - Inode::::ref_from_prefix_with_elems( - inode_data, - header.additional_bytes(self.blkszbits), - ) - .unwrap() - .0, - ) - } else { - let header = CompactInodeHeader::ref_from_bytes(&inode_data[..32]).unwrap(); - InodeType::Compact( - Inode::::ref_from_prefix_with_elems( - inode_data, - header.additional_bytes(self.blkszbits), - ) - .unwrap() - .0, - ) - } - } - - /// Returns a shared extended attribute by its ID - pub fn shared_xattr(&self, id: u32) -> &XAttr { - let xattr_data = &self.xattrs[id as usize * 4..]; - let header = XAttrHeader::ref_from_bytes(&xattr_data[..4]).unwrap(); - XAttr::ref_from_prefix_with_elems(xattr_data, header.calculate_n_elems()) - .unwrap() - .0 - } - - /// Returns a data block by its ID - pub fn block(&self, id: u64) -> &[u8] { - &self.image[id as usize * self.block_size..][..self.block_size] - } - - /// Returns a data block by its ID as a DataBlock reference - pub fn data_block(&self, id: u64) -> &DataBlock { - DataBlock::ref_from_bytes(self.block(id)).unwrap() - } - - /// Returns a directory block by its ID - pub fn directory_block(&self, id: u64) -> &DirectoryBlock { - DirectoryBlock::ref_from_bytes(self.block(id)).unwrap() - } - - /// Returns the root directory inode - pub fn root(&self) -> InodeType<'_> { - self.inode(self.sb.root_nid.get() as u64) - } -} - -// TODO: there must be an easier way... -#[derive(FromBytes, Immutable, KnownLayout)] -#[repr(C)] -struct Array([T]); - -impl InodeXAttrs { - /// Returns the array of shared xattr IDs - pub fn shared(&self) -> &[U32] { - &Array::ref_from_prefix_with_elems(&self.data, self.header.shared_count as usize) - .unwrap() - .0 - .0 - } - - /// Returns an iterator over local (non-shared) xattrs - pub fn local(&self) -> XAttrIter<'_> { - XAttrIter { - data: &self.data[self.header.shared_count as usize * 4..], - } - } -} - -/// Iterator over local extended attributes -#[derive(Debug)] -pub struct XAttrIter<'img> { - data: &'img [u8], -} - -impl<'img> Iterator for XAttrIter<'img> { - type Item = &'img XAttr; - - fn next(&mut self) -> Option { - if !self.data.is_empty() { - let (result, rest) = XAttr::from_prefix(self.data); - self.data = rest; - Some(result) - } else { - None - } - } -} - -/// Data block containing file content -#[repr(C)] -#[derive(FromBytes, Immutable, KnownLayout)] -pub struct DataBlock(pub [u8]); - -/// Directory block containing directory entries -#[repr(C)] -#[derive(FromBytes, Immutable, KnownLayout)] -pub struct DirectoryBlock(pub [u8]); - -impl DirectoryBlock { - /// Returns the directory entry header at the given index - pub fn get_entry_header(&self, n: usize) -> &DirectoryEntryHeader { - let entry_data = &self.0 - [n * size_of::()..(n + 1) * size_of::()]; - DirectoryEntryHeader::ref_from_bytes(entry_data).unwrap() - } - - /// Returns all directory entry headers as a slice - pub fn get_entry_headers(&self) -> &[DirectoryEntryHeader] { - &Array::ref_from_prefix_with_elems(&self.0, self.n_entries()) - .unwrap() - .0 - .0 - } - - /// Returns the number of entries in this directory block - pub fn n_entries(&self) -> usize { - let first = self.get_entry_header(0); - let offset = first.name_offset.get(); - assert!(offset != 0); - assert!(offset.is_multiple_of(12)); - offset as usize / 12 - } - - /// Returns an iterator over directory entries - pub fn entries(&self) -> DirectoryEntries<'_> { - DirectoryEntries { - block: self, - length: self.n_entries(), - position: 0, - } - } -} - -// High-level iterator interface -/// A single directory entry with header and name -#[derive(Debug)] -pub struct DirectoryEntry<'a> { - /// Directory entry header - pub header: &'a DirectoryEntryHeader, - /// Entry name - pub name: &'a [u8], -} - -impl DirectoryEntry<'_> { - fn nid(&self) -> u64 { - self.header.inode_offset.get() - } -} - -/// Iterator over directory entries in a directory block -#[derive(Debug)] -pub struct DirectoryEntries<'d> { - block: &'d DirectoryBlock, - length: usize, - position: usize, -} - -impl<'d> Iterator for DirectoryEntries<'d> { - type Item = DirectoryEntry<'d>; - - fn next(&mut self) -> Option { - if self.position < self.length { - let header = self.block.get_entry_header(self.position); - let name_start = header.name_offset.get() as usize; - self.position += 1; - - let name = if self.position == self.length { - let with_padding = &self.block.0[name_start..]; - let end = with_padding.partition_point(|c| *c != 0); - &with_padding[..end] - } else { - let next = self.block.get_entry_header(self.position); - let name_end = next.name_offset.get() as usize; - &self.block.0[name_start..name_end] - }; - - Some(DirectoryEntry { header, name }) - } else { - None - } - } -} - -/// Errors that can occur when reading EROFS images -#[derive(Error, Debug)] -pub enum ErofsReaderError { - /// Directory has multiple hard links (not allowed) - #[error("Hardlinked directories detected")] - DirectoryHardlinks, - /// Directory nesting exceeds maximum depth - #[error("Maximum directory depth exceeded")] - DepthExceeded, - /// The '.' entry is invalid - #[error("Invalid '.' entry in directory")] - InvalidSelfReference, - /// The '..' entry is invalid - #[error("Invalid '..' entry in directory")] - InvalidParentReference, - /// File type in directory entry doesn't match inode - #[error("File type in dirent doesn't match type in inode")] - FileTypeMismatch, -} - -type ReadResult = Result; - -/// Collects object references from an EROFS image for garbage collection -#[derive(Debug)] -pub struct ObjectCollector { - visited_nids: HashSet, - nids_to_visit: BTreeSet, - objects: HashSet, -} - -impl ObjectCollector { - fn visit_xattr(&mut self, attr: &XAttr) { - // This is the index of "trusted". See XATTR_PREFIXES in format.rs. - if attr.header.name_index != 4 { - return; - } - if attr.suffix() != b"overlay.metacopy" { - return; - } - if let Ok(value) = OverlayMetacopy::read_from_bytes(attr.value()) { - if value.valid() { - self.objects.insert(value.digest); - } - } - } - - fn visit_xattrs(&mut self, img: &Image, xattrs: &InodeXAttrs) -> ReadResult<()> { - for id in xattrs.shared() { - self.visit_xattr(img.shared_xattr(id.get())); - } - for attr in xattrs.local() { - self.visit_xattr(attr); - } - Ok(()) - } - - fn visit_directory_block(&mut self, block: &DirectoryBlock) { - for entry in block.entries() { - if entry.name != b"." && entry.name != b".." { - let nid = entry.nid(); - if !self.visited_nids.contains(&nid) { - self.nids_to_visit.insert(nid); - } - } - } - } - - fn visit_nid(&mut self, img: &Image, nid: u64) -> ReadResult<()> { - let first_time = self.visited_nids.insert(nid); - assert!(first_time); // should not have been added to the "to visit" list otherwise - - let inode = img.inode(nid); - - if let Some(xattrs) = inode.xattrs() { - self.visit_xattrs(img, xattrs)?; - } - - if inode.mode().is_dir() { - for blkid in inode.blocks(img.sb.blkszbits) { - self.visit_directory_block(img.directory_block(blkid)); - } - - if let Some(inline) = inode.inline() { - let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); - self.visit_directory_block(inline_block); - } - } - - Ok(()) - } -} - -/// Collects all object references from an EROFS image -/// -/// This function walks the directory tree and extracts fsverity object IDs -/// from overlay.metacopy xattrs for garbage collection purposes. -/// -/// Returns a set of all referenced object IDs. -pub fn collect_objects(image: &[u8]) -> ReadResult> { - let img = Image::open(image); - let mut this = ObjectCollector { - visited_nids: HashSet::new(), - nids_to_visit: BTreeSet::new(), - objects: HashSet::new(), - }; - - // nids_to_visit is initialized with the root directory. Visiting directory nids will add - // more nids to the "to visit" list. Keep iterating until it's empty. - this.nids_to_visit.insert(img.sb.root_nid.get() as u64); - while let Some(nid) = this.nids_to_visit.pop_first() { - this.visit_nid(&img, nid)?; - } - Ok(this.objects) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - dumpfile::dumpfile_to_filesystem, erofs::writer::mkfs_erofs, fsverity::Sha256HashValue, - }; - use std::collections::HashMap; - - /// Helper to validate that directory entries can be read correctly - fn validate_directory_entries(img: &Image, nid: u64, expected_names: &[&str]) { - let inode = img.inode(nid); - assert!(inode.mode().is_dir(), "Expected directory inode"); - - let mut found_names = Vec::new(); - - // Read inline entries if present - if let Some(inline) = inode.inline() { - let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); - for entry in inline_block.entries() { - let name = std::str::from_utf8(entry.name).unwrap(); - found_names.push(name.to_string()); - } - } - - // Read block entries - for blkid in inode.blocks(img.blkszbits) { - let block = img.directory_block(blkid); - for entry in block.entries() { - let name = std::str::from_utf8(entry.name).unwrap(); - found_names.push(name.to_string()); - } - } - - // Sort for comparison (entries should include . and ..) - found_names.sort(); - let mut expected_sorted: Vec<_> = expected_names.iter().map(|s| s.to_string()).collect(); - expected_sorted.sort(); - - assert_eq!( - found_names, expected_sorted, - "Directory entries mismatch for nid {nid}" - ); - } - - #[test] - fn test_empty_directory() { - // Create filesystem with empty directory - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/empty_dir 4096 40755 2 0 0 0 1000.0 - - - -"#; - - let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); - let image = mkfs_erofs(&fs); - let img = Image::open(&image); - - // Root should have . and .. and empty_dir - let root_nid = img.sb.root_nid.get() as u64; - validate_directory_entries(&img, root_nid, &[".", "..", "empty_dir"]); - - // Find empty_dir entry - let root_inode = img.root(); - let mut empty_dir_nid = None; - if let Some(inline) = root_inode.inline() { - let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); - for entry in inline_block.entries() { - if entry.name == b"empty_dir" { - empty_dir_nid = Some(entry.nid()); - break; - } - } - } - for blkid in root_inode.blocks(img.blkszbits) { - let block = img.directory_block(blkid); - for entry in block.entries() { - if entry.name == b"empty_dir" { - empty_dir_nid = Some(entry.nid()); - break; - } - } - } - - let empty_dir_nid = empty_dir_nid.expect("empty_dir not found"); - validate_directory_entries(&img, empty_dir_nid, &[".", ".."]); - } - - #[test] - fn test_directory_with_inline_entries() { - // Create filesystem with directory that has a few entries (should be inline) - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/dir1 4096 40755 2 0 0 0 1000.0 - - - -/dir1/file1 5 100644 1 0 0 0 1000.0 - hello - -/dir1/file2 5 100644 1 0 0 0 1000.0 - world - -"#; - - let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); - let image = mkfs_erofs(&fs); - let img = Image::open(&image); - - // Find dir1 - let root_inode = img.root(); - let mut dir1_nid = None; - if let Some(inline) = root_inode.inline() { - let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); - for entry in inline_block.entries() { - if entry.name == b"dir1" { - dir1_nid = Some(entry.nid()); - break; - } - } - } - for blkid in root_inode.blocks(img.blkszbits) { - let block = img.directory_block(blkid); - for entry in block.entries() { - if entry.name == b"dir1" { - dir1_nid = Some(entry.nid()); - break; - } - } - } - - let dir1_nid = dir1_nid.expect("dir1 not found"); - validate_directory_entries(&img, dir1_nid, &[".", "..", "file1", "file2"]); - } - - #[test] - fn test_directory_with_many_entries() { - // Create a directory with many entries to force block storage - let mut dumpfile = String::from("/ 4096 40755 2 0 0 0 1000.0 - - -\n"); - dumpfile.push_str("/bigdir 4096 40755 2 0 0 0 1000.0 - - -\n"); - - // Add many files to force directory blocks - for i in 0..100 { - dumpfile.push_str(&format!( - "/bigdir/file{i:03} 5 100644 1 0 0 0 1000.0 - hello -\n" - )); - } - - let fs = dumpfile_to_filesystem::(&dumpfile).unwrap(); - let image = mkfs_erofs(&fs); - let img = Image::open(&image); - - // Find bigdir - let root_inode = img.root(); - let mut bigdir_nid = None; - if let Some(inline) = root_inode.inline() { - let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); - for entry in inline_block.entries() { - if entry.name == b"bigdir" { - bigdir_nid = Some(entry.nid()); - break; - } - } - } - for blkid in root_inode.blocks(img.blkszbits) { - let block = img.directory_block(blkid); - for entry in block.entries() { - if entry.name == b"bigdir" { - bigdir_nid = Some(entry.nid()); - break; - } - } - } - - let bigdir_nid = bigdir_nid.expect("bigdir not found"); - - // Build expected names - let mut expected: Vec = vec![".".to_string(), "..".to_string()]; - for i in 0..100 { - expected.push(format!("file{i:03}")); - } - let expected_refs: Vec<&str> = expected.iter().map(|s| s.as_str()).collect(); - - validate_directory_entries(&img, bigdir_nid, &expected_refs); - } - - #[test] - fn test_nested_directories() { - // Test deeply nested directory structure - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/a 4096 40755 2 0 0 0 1000.0 - - - -/a/b 4096 40755 2 0 0 0 1000.0 - - - -/a/b/c 4096 40755 2 0 0 0 1000.0 - - - -/a/b/c/file.txt 5 100644 1 0 0 0 1000.0 - hello - -"#; - - let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); - let image = mkfs_erofs(&fs); - let img = Image::open(&image); - - // Navigate through the structure - let root_nid = img.sb.root_nid.get() as u64; - validate_directory_entries(&img, root_nid, &[".", "..", "a"]); - - // Helper to find a directory entry by name - let find_entry = |parent_nid: u64, name: &[u8]| -> u64 { - let inode = img.inode(parent_nid); - - if let Some(inline) = inode.inline() { - let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); - for entry in inline_block.entries() { - if entry.name == name { - return entry.nid(); - } - } - } - - for blkid in inode.blocks(img.blkszbits) { - let block = img.directory_block(blkid); - for entry in block.entries() { - if entry.name == name { - return entry.nid(); - } - } - } - panic!("Entry not found: {:?}", std::str::from_utf8(name)); - }; - - let a_nid = find_entry(root_nid, b"a"); - validate_directory_entries(&img, a_nid, &[".", "..", "b"]); - - let b_nid = find_entry(a_nid, b"b"); - validate_directory_entries(&img, b_nid, &[".", "..", "c"]); - - let c_nid = find_entry(b_nid, b"c"); - validate_directory_entries(&img, c_nid, &[".", "..", "file.txt"]); - } - - #[test] - fn test_mixed_entry_types() { - // Test directory with various file types - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/mixed 4096 40755 2 0 0 0 1000.0 - - - -/mixed/regular 10 100644 1 0 0 0 1000.0 - content123 - -/mixed/symlink 7 120777 1 0 0 0 1000.0 /target - - -/mixed/fifo 0 10644 1 0 0 0 1000.0 - - - -/mixed/subdir 4096 40755 2 0 0 0 1000.0 - - - -"#; - - let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); - let image = mkfs_erofs(&fs); - let img = Image::open(&image); - - let root_inode = img.root(); - let mut mixed_nid = None; - if let Some(inline) = root_inode.inline() { - let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); - for entry in inline_block.entries() { - if entry.name == b"mixed" { - mixed_nid = Some(entry.nid()); - break; - } - } - } - for blkid in root_inode.blocks(img.blkszbits) { - let block = img.directory_block(blkid); - for entry in block.entries() { - if entry.name == b"mixed" { - mixed_nid = Some(entry.nid()); - break; - } - } - } - - let mixed_nid = mixed_nid.expect("mixed not found"); - validate_directory_entries( - &img, - mixed_nid, - &[".", "..", "regular", "symlink", "fifo", "subdir"], - ); - } - - #[test] - fn test_collect_objects_traversal() { - // Test that object collection properly traverses all directories - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/dir1 4096 40755 2 0 0 0 1000.0 - - - -/dir1/file1 5 100644 1 0 0 0 1000.0 - hello - -/dir2 4096 40755 2 0 0 0 1000.0 - - - -/dir2/subdir 4096 40755 2 0 0 0 1000.0 - - - -/dir2/subdir/file2 5 100644 1 0 0 0 1000.0 - world - -"#; - - let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); - let image = mkfs_erofs(&fs); - - // This should traverse all directories without error - let result = collect_objects::(&image); - assert!( - result.is_ok(), - "Failed to collect objects: {:?}", - result.err() - ); - } - - #[test] - fn test_pr188_empty_inline_directory() -> anyhow::Result<()> { - // Regression test for https://github.com/containers/composefs-rs/pull/188 - // - // The bug: ObjectCollector::visit_inode at lines 553-554 unconditionally does: - // let tail = DirectoryBlock::ref_from_bytes(inode.inline()).unwrap(); - // self.visit_directory_block(tail); - // - // When inode.inline() is empty, DirectoryBlock::ref_from_bytes succeeds but then - // visit_directory_block calls n_entries() which panics trying to read 12 bytes - // from an empty slice. - // - // This test generates an erofs image using C mkcomposefs, which creates directories - // with empty inline sections (unlike the Rust implementation which always includes - // . and .. entries). - - // Generate a C-generated erofs image using mkcomposefs - let dumpfile_content = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/empty_dir 4096 40755 2 0 0 0 1000.0 - - - -"#; - - // Create temporary files for dumpfile and erofs output - let temp_dir = tempfile::TempDir::new()?; - let temp_dir = temp_dir.path(); - let dumpfile_path = temp_dir.join("pr188_test.dump"); - let erofs_path = temp_dir.join("pr188_test.erofs"); - - // Write dumpfile - std::fs::write(&dumpfile_path, dumpfile_content).expect("Failed to write test dumpfile"); - - // Run mkcomposefs to generate erofs image - let output = std::process::Command::new("mkcomposefs") - .arg("--from-file") - .arg(&dumpfile_path) - .arg(&erofs_path) - .output() - .expect("Failed to run mkcomposefs - is it installed?"); - - assert!( - output.status.success(), - "mkcomposefs failed: {}", - String::from_utf8_lossy(&output.stderr) - ); - - // Read the generated erofs image - let image = std::fs::read(&erofs_path).expect("Failed to read generated erofs"); - - // The C mkcomposefs creates directories with empty inline sections. - let r = collect_objects::(&image).unwrap(); - assert_eq!(r.len(), 0); - - Ok(()) - } - - #[test] - fn test_round_trip_basic() { - // Full round-trip: dumpfile -> tree -> erofs -> read back -> validate - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/file1 5 100644 1 0 0 0 1000.0 - hello - -/file2 6 100644 1 0 0 0 1000.0 - world! - -/dir1 4096 40755 2 0 0 0 1000.0 - - - -/dir1/nested 8 100644 1 0 0 0 1000.0 - content1 - -"#; - - let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); - let image = mkfs_erofs(&fs); - let img = Image::open(&image); - - // Verify root entries - let root_nid = img.sb.root_nid.get() as u64; - validate_directory_entries(&img, root_nid, &[".", "..", "file1", "file2", "dir1"]); - - // Collect all entries and verify structure - let mut entries_map: HashMap, u64> = HashMap::new(); - let root_inode = img.root(); - - if let Some(inline) = root_inode.inline() { - let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); - for entry in inline_block.entries() { - entries_map.insert(entry.name.to_vec(), entry.nid()); - } - } - - for blkid in root_inode.blocks(img.blkszbits) { - let block = img.directory_block(blkid); - for entry in block.entries() { - entries_map.insert(entry.name.to_vec(), entry.nid()); - } - } - - // Verify we can read file contents - let file1_nid = entries_map - .get(b"file1".as_slice()) - .expect("file1 not found"); - let file1_inode = img.inode(*file1_nid); - assert!(!file1_inode.mode().is_dir()); - assert_eq!(file1_inode.size(), 5); - - let inline_data = file1_inode.inline(); - assert_eq!(inline_data, Some(b"hello".as_slice())); - } -} +//! Re-exported from composefs-erofs. +pub use composefs_erofs::reader::*; diff --git a/crates/composefs/src/erofs/writer.rs b/crates/composefs/src/erofs/writer.rs index 14236424..da53ee45 100644 --- a/crates/composefs/src/erofs/writer.rs +++ b/crates/composefs/src/erofs/writer.rs @@ -16,11 +16,9 @@ use log::trace; use xxhash_rust::xxh32::xxh32; use zerocopy::{Immutable, IntoBytes}; -use crate::{ - erofs::{composefs::OverlayMetacopy, format, reader::round_up}, - fsverity::FsVerityHashValue, - tree, -}; +use composefs_erofs::{composefs::OverlayMetacopy, format, reader::round_up}; + +use crate::{fsverity::FsVerityHashValue, tree}; #[derive(Clone, Copy, Debug)] enum Offset { diff --git a/crates/composefs/src/fsverity/hashvalue.rs b/crates/composefs/src/fsverity/hashvalue.rs index 4581da84..0248ba7c 100644 --- a/crates/composefs/src/fsverity/hashvalue.rs +++ b/crates/composefs/src/fsverity/hashvalue.rs @@ -1,277 +1,4 @@ //! Hash value types and trait definitions for fs-verity. //! -//! This module defines the FsVerityHashValue trait and concrete implementations -//! for SHA-256 and SHA-512 hash values, including parsing from hex strings -//! and object pathnames. - -use core::{fmt, hash::Hash}; - -use hex::FromHexError; -use sha2::{digest::FixedOutputReset, digest::Output, Digest, Sha256, Sha512}; -use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned}; - -/// Trait for fs-verity hash value types supporting SHA-256 and SHA-512. -/// -/// This trait defines the interface for hash values used in fs-verity operations, -/// including serialization to/from hex strings and object store pathnames. -pub trait FsVerityHashValue -where - Self: Clone, - Self: From>, - Self: FromBytes + Immutable + IntoBytes + KnownLayout + Unaligned, - Self: Hash + Eq, - Self: fmt::Debug, - Self: Send + Sync + Unpin + 'static, -{ - /// The underlying hash digest algorithm type. - type Digest: Digest + FixedOutputReset + fmt::Debug; - /// The fs-verity algorithm identifier (1 for SHA-256, 2 for SHA-512). - const ALGORITHM: u8; - /// An empty hash value with all bytes set to zero. - const EMPTY: Self; - /// The algorithm identifier string ("sha256" or "sha512"). - const ID: &str; - - /// Parse a hash value from a hexadecimal string. - /// - /// # Arguments - /// * `hex` - A hexadecimal string representation of the hash - /// - /// # Returns - /// The parsed hash value, or an error if the input is invalid. - fn from_hex(hex: impl AsRef<[u8]>) -> Result { - let mut value = Self::EMPTY; - hex::decode_to_slice(hex.as_ref(), value.as_mut_bytes())?; - Ok(value) - } - - /// Parse a hash value from an object store directory number and basename. - /// - /// Object stores typically use a two-level hierarchy where the first byte - /// of the hash determines the directory name and the remaining bytes form - /// the basename. - /// - /// # Arguments - /// * `dirnum` - The directory number (first byte of the hash) - /// * `basename` - The hexadecimal basename (remaining bytes) - /// - /// # Returns - /// The parsed hash value, or an error if the input is invalid. - fn from_object_dir_and_basename( - dirnum: u8, - basename: impl AsRef<[u8]>, - ) -> Result { - let expected_size = 2 * (size_of::() - 1); - let bytes = basename.as_ref(); - if bytes.len() != expected_size { - return Err(FromHexError::InvalidStringLength); - } - let mut result = Self::EMPTY; - result.as_mut_bytes()[0] = dirnum; - hex::decode_to_slice(bytes, &mut result.as_mut_bytes()[1..])?; - Ok(result) - } - - /// Parse a hash value from a full object pathname. - /// - /// Parses a pathname in the format "xx/yyyyyy" where "xxyyyyyy" is the - /// full hexadecimal hash. The prefix before the two-level hierarchy is ignored. - /// - /// # Arguments - /// * `pathname` - The object pathname (e.g., "ab/cdef1234...") - /// - /// # Returns - /// The parsed hash value, or an error if the input is invalid. - fn from_object_pathname(pathname: impl AsRef<[u8]>) -> Result { - // We want to the trailing part of "....../xx/yyyyyy" where xxyyyyyy is our hex length - let min_size = 2 * size_of::() + 1; - let bytes = pathname.as_ref(); - if bytes.len() < min_size { - return Err(FromHexError::InvalidStringLength); - } - - let trailing = &bytes[bytes.len() - min_size..]; - let mut result = Self::EMPTY; - hex::decode_to_slice(&trailing[0..2], &mut result.as_mut_bytes()[0..1])?; - if trailing[2] != b'/' { - return Err(FromHexError::InvalidHexCharacter { - c: trailing[2] as char, - index: 2, - }); - } - hex::decode_to_slice(&trailing[3..], &mut result.as_mut_bytes()[1..])?; - Ok(result) - } - - /// Convert the hash value to an object pathname. - /// - /// Formats the hash as "xx/yyyyyy" where xx is the first byte in hex - /// and yyyyyy is the remaining bytes in hex. - /// - /// # Returns - /// A string in object pathname format. - fn to_object_pathname(&self) -> String { - format!( - "{:02x}/{}", - self.as_bytes()[0], - hex::encode(&self.as_bytes()[1..]) - ) - } - - /// Convert the hash value to an object directory name. - /// - /// Returns just the first byte of the hash as a two-character hex string. - /// - /// # Returns - /// A string representing the directory name. - fn to_object_dir(&self) -> String { - format!("{:02x}", self.as_bytes()[0]) - } - - /// Convert the hash value to a hexadecimal string. - /// - /// # Returns - /// The full hash as a hex string. - fn to_hex(&self) -> String { - hex::encode(self.as_bytes()) - } - - /// Convert the hash value to an identifier string with algorithm prefix. - /// - /// # Returns - /// A string in the format "algorithm:hexhash" (e.g., "sha256:abc123..."). - fn to_id(&self) -> String { - format!("{}:{}", Self::ID, self.to_hex()) - } -} - -impl fmt::Debug for Sha256HashValue { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "sha256:{}", self.to_hex()) - } -} - -impl fmt::Debug for Sha512HashValue { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "sha512:{}", self.to_hex()) - } -} - -/// A SHA-256 hash value for fs-verity operations. -/// -/// This is a 32-byte hash value using the SHA-256 algorithm. -#[derive(Clone, Eq, FromBytes, Hash, Immutable, IntoBytes, KnownLayout, PartialEq, Unaligned)] -#[repr(C)] -pub struct Sha256HashValue([u8; 32]); - -impl From> for Sha256HashValue { - fn from(value: Output) -> Self { - Self(value.into()) - } -} - -impl FsVerityHashValue for Sha256HashValue { - type Digest = Sha256; - const ALGORITHM: u8 = 1; - const EMPTY: Self = Self([0; 32]); - const ID: &str = "sha256"; -} - -/// A SHA-512 hash value for fs-verity operations. -/// -/// This is a 64-byte hash value using the SHA-512 algorithm. -#[derive(Clone, Eq, FromBytes, Hash, Immutable, IntoBytes, KnownLayout, PartialEq, Unaligned)] -#[repr(C)] -pub struct Sha512HashValue([u8; 64]); - -impl From> for Sha512HashValue { - fn from(value: Output) -> Self { - Self(value.into()) - } -} - -impl FsVerityHashValue for Sha512HashValue { - type Digest = Sha512; - const ALGORITHM: u8 = 2; - const EMPTY: Self = Self([0; 64]); - const ID: &str = "sha512"; -} - -#[cfg(test)] -mod test { - use super::*; - - fn test_fsverity_hash() { - let len = size_of::(); - let hexlen = len * 2; - - let hex = H::EMPTY.to_hex(); - assert_eq!(hex.as_bytes(), [b'0'].repeat(hexlen)); - - assert_eq!(H::EMPTY.to_id(), format!("{}:{}", H::ID, hex)); - assert_eq!(format!("{:?}", H::EMPTY), format!("{}:{}", H::ID, hex)); - - assert_eq!(H::from_hex(&hex), Ok(H::EMPTY)); - - assert_eq!(H::from_hex("lol"), Err(FromHexError::OddLength)); - assert_eq!(H::from_hex("lolo"), Err(FromHexError::InvalidStringLength)); - assert_eq!( - H::from_hex([b'l'].repeat(hexlen)), - Err(FromHexError::InvalidHexCharacter { c: 'l', index: 0 }) - ); - - assert_eq!(H::from_object_dir_and_basename(0, &hex[2..]), Ok(H::EMPTY)); - - assert_eq!(H::from_object_dir_and_basename(0, &hex[2..]), Ok(H::EMPTY)); - - assert_eq!( - H::from_object_dir_and_basename(0, "lol"), - Err(FromHexError::InvalidStringLength) - ); - - assert_eq!( - H::from_object_dir_and_basename(0, [b'l'].repeat(hexlen - 2)), - Err(FromHexError::InvalidHexCharacter { c: 'l', index: 0 }) - ); - - assert_eq!( - H::from_object_pathname(format!("{}/{}", &hex[0..2], &hex[2..])), - Ok(H::EMPTY) - ); - - assert_eq!( - H::from_object_pathname(format!("../this/is/ignored/{}/{}", &hex[0..2], &hex[2..])), - Ok(H::EMPTY) - ); - - assert_eq!( - H::from_object_pathname(&hex), - Err(FromHexError::InvalidStringLength) - ); - - assert_eq!( - H::from_object_pathname("lol"), - Err(FromHexError::InvalidStringLength) - ); - - assert_eq!( - H::from_object_pathname([b'l'].repeat(hexlen + 1)), - Err(FromHexError::InvalidHexCharacter { c: 'l', index: 0 }) - ); - - assert_eq!( - H::from_object_pathname(format!("{}0{}", &hex[0..2], &hex[2..])), - Err(FromHexError::InvalidHexCharacter { c: '0', index: 2 }) - ); - } - - #[test] - fn test_sha256hashvalue() { - test_fsverity_hash::(); - } - - #[test] - fn test_sha512hashvalue() { - test_fsverity_hash::(); - } -} +//! Re-exported from composefs-types. +pub use composefs_types::fsverity::*; diff --git a/crates/composefs/src/lib.rs b/crates/composefs/src/lib.rs index 15080a02..f97fe582 100644 --- a/crates/composefs/src/lib.rs +++ b/crates/composefs/src/lib.rs @@ -23,10 +23,7 @@ pub mod generic_tree; #[cfg(any(test, feature = "test"))] pub mod test; -/// All files that contain 64 or fewer bytes (size <= INLINE_CONTENT_MAX) should be stored inline -/// in the erofs image (and also in splitstreams). All files with 65 or more bytes (size > MAX) -/// should be written to the object storage and referred to from the image (and splitstreams). -pub const INLINE_CONTENT_MAX: usize = 64; +pub use composefs_types::INLINE_CONTENT_MAX; /// Internal constants shared across workspace crates. /// diff --git a/crates/composefs/tests/erofs_reader.rs b/crates/composefs/tests/erofs_reader.rs new file mode 100644 index 00000000..59b753ae --- /dev/null +++ b/crates/composefs/tests/erofs_reader.rs @@ -0,0 +1,404 @@ +//! Tests for EROFS reader functionality. +//! +//! These tests exercise the reader code (now in composefs-erofs) using +//! the writer and dumpfile utilities from composefs. + +use std::collections::HashMap; + +use composefs::{ + dumpfile::dumpfile_to_filesystem, + erofs::{ + reader::{collect_objects, DirectoryBlock, Image, InodeHeader, InodeOps}, + writer::mkfs_erofs, + }, + fsverity::Sha256HashValue, +}; +use zerocopy::FromBytes; + +/// Helper to validate that directory entries can be read correctly +fn validate_directory_entries(img: &Image, nid: u64, expected_names: &[&str]) { + let inode = img.inode(nid); + assert!(inode.mode().is_dir(), "Expected directory inode"); + + let mut found_names = Vec::new(); + + // Read inline entries if present + if let Some(inline) = inode.inline() { + let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); + for entry in inline_block.entries() { + let name = std::str::from_utf8(entry.name).unwrap(); + found_names.push(name.to_string()); + } + } + + // Read block entries + for blkid in inode.blocks(img.blkszbits) { + let block = img.directory_block(blkid); + for entry in block.entries() { + let name = std::str::from_utf8(entry.name).unwrap(); + found_names.push(name.to_string()); + } + } + + // Sort for comparison (entries should include . and ..) + found_names.sort(); + let mut expected_sorted: Vec<_> = expected_names.iter().map(|s| s.to_string()).collect(); + expected_sorted.sort(); + + assert_eq!( + found_names, expected_sorted, + "Directory entries mismatch for nid {nid}" + ); +} + +#[test] +fn test_empty_directory() { + // Create filesystem with empty directory + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/empty_dir 4096 40755 2 0 0 0 1000.0 - - - +"#; + + let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); + let image = mkfs_erofs(&fs); + let img = Image::open(&image); + + // Root should have . and .. and empty_dir + let root_nid = img.sb.root_nid.get() as u64; + validate_directory_entries(&img, root_nid, &[".", "..", "empty_dir"]); + + // Find empty_dir entry + let root_inode = img.root(); + let mut empty_dir_nid = None; + if let Some(inline) = root_inode.inline() { + let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); + for entry in inline_block.entries() { + if entry.name == b"empty_dir" { + empty_dir_nid = Some(entry.nid()); + break; + } + } + } + for blkid in root_inode.blocks(img.blkszbits) { + let block = img.directory_block(blkid); + for entry in block.entries() { + if entry.name == b"empty_dir" { + empty_dir_nid = Some(entry.nid()); + break; + } + } + } + + let empty_dir_nid = empty_dir_nid.expect("empty_dir not found"); + validate_directory_entries(&img, empty_dir_nid, &[".", ".."]); +} + +#[test] +fn test_directory_with_inline_entries() { + // Create filesystem with directory that has a few entries (should be inline) + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/dir1 4096 40755 2 0 0 0 1000.0 - - - +/dir1/file1 5 100644 1 0 0 0 1000.0 - hello - +/dir1/file2 5 100644 1 0 0 0 1000.0 - world - +"#; + + let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); + let image = mkfs_erofs(&fs); + let img = Image::open(&image); + + // Find dir1 + let root_inode = img.root(); + let mut dir1_nid = None; + if let Some(inline) = root_inode.inline() { + let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); + for entry in inline_block.entries() { + if entry.name == b"dir1" { + dir1_nid = Some(entry.nid()); + break; + } + } + } + for blkid in root_inode.blocks(img.blkszbits) { + let block = img.directory_block(blkid); + for entry in block.entries() { + if entry.name == b"dir1" { + dir1_nid = Some(entry.nid()); + break; + } + } + } + + let dir1_nid = dir1_nid.expect("dir1 not found"); + validate_directory_entries(&img, dir1_nid, &[".", "..", "file1", "file2"]); +} + +#[test] +fn test_directory_with_many_entries() { + // Create a directory with many entries to force block storage + let mut dumpfile = String::from("/ 4096 40755 2 0 0 0 1000.0 - - -\n"); + dumpfile.push_str("/bigdir 4096 40755 2 0 0 0 1000.0 - - -\n"); + + // Add many files to force directory blocks + for i in 0..100 { + dumpfile.push_str(&format!( + "/bigdir/file{i:03} 5 100644 1 0 0 0 1000.0 - hello -\n" + )); + } + + let fs = dumpfile_to_filesystem::(&dumpfile).unwrap(); + let image = mkfs_erofs(&fs); + let img = Image::open(&image); + + // Find bigdir + let root_inode = img.root(); + let mut bigdir_nid = None; + if let Some(inline) = root_inode.inline() { + let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); + for entry in inline_block.entries() { + if entry.name == b"bigdir" { + bigdir_nid = Some(entry.nid()); + break; + } + } + } + for blkid in root_inode.blocks(img.blkszbits) { + let block = img.directory_block(blkid); + for entry in block.entries() { + if entry.name == b"bigdir" { + bigdir_nid = Some(entry.nid()); + break; + } + } + } + + let bigdir_nid = bigdir_nid.expect("bigdir not found"); + + // Build expected names + let mut expected: Vec = vec![".".to_string(), "..".to_string()]; + for i in 0..100 { + expected.push(format!("file{i:03}")); + } + let expected_refs: Vec<&str> = expected.iter().map(|s| s.as_str()).collect(); + + validate_directory_entries(&img, bigdir_nid, &expected_refs); +} + +#[test] +fn test_nested_directories() { + // Test deeply nested directory structure + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/a 4096 40755 2 0 0 0 1000.0 - - - +/a/b 4096 40755 2 0 0 0 1000.0 - - - +/a/b/c 4096 40755 2 0 0 0 1000.0 - - - +/a/b/c/file.txt 5 100644 1 0 0 0 1000.0 - hello - +"#; + + let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); + let image = mkfs_erofs(&fs); + let img = Image::open(&image); + + // Navigate through the structure + let root_nid = img.sb.root_nid.get() as u64; + validate_directory_entries(&img, root_nid, &[".", "..", "a"]); + + // Helper to find a directory entry by name + let find_entry = |parent_nid: u64, name: &[u8]| -> u64 { + let inode = img.inode(parent_nid); + + if let Some(inline) = inode.inline() { + let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); + for entry in inline_block.entries() { + if entry.name == name { + return entry.nid(); + } + } + } + + for blkid in inode.blocks(img.blkszbits) { + let block = img.directory_block(blkid); + for entry in block.entries() { + if entry.name == name { + return entry.nid(); + } + } + } + panic!("Entry not found: {:?}", std::str::from_utf8(name)); + }; + + let a_nid = find_entry(root_nid, b"a"); + validate_directory_entries(&img, a_nid, &[".", "..", "b"]); + + let b_nid = find_entry(a_nid, b"b"); + validate_directory_entries(&img, b_nid, &[".", "..", "c"]); + + let c_nid = find_entry(b_nid, b"c"); + validate_directory_entries(&img, c_nid, &[".", "..", "file.txt"]); +} + +#[test] +fn test_mixed_entry_types() { + // Test directory with various file types + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/mixed 4096 40755 2 0 0 0 1000.0 - - - +/mixed/regular 10 100644 1 0 0 0 1000.0 - content123 - +/mixed/symlink 7 120777 1 0 0 0 1000.0 /target - - +/mixed/fifo 0 10644 1 0 0 0 1000.0 - - - +/mixed/subdir 4096 40755 2 0 0 0 1000.0 - - - +"#; + + let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); + let image = mkfs_erofs(&fs); + let img = Image::open(&image); + + let root_inode = img.root(); + let mut mixed_nid = None; + if let Some(inline) = root_inode.inline() { + let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); + for entry in inline_block.entries() { + if entry.name == b"mixed" { + mixed_nid = Some(entry.nid()); + break; + } + } + } + for blkid in root_inode.blocks(img.blkszbits) { + let block = img.directory_block(blkid); + for entry in block.entries() { + if entry.name == b"mixed" { + mixed_nid = Some(entry.nid()); + break; + } + } + } + + let mixed_nid = mixed_nid.expect("mixed not found"); + validate_directory_entries( + &img, + mixed_nid, + &[".", "..", "regular", "symlink", "fifo", "subdir"], + ); +} + +#[test] +fn test_collect_objects_traversal() { + // Test that object collection properly traverses all directories + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/dir1 4096 40755 2 0 0 0 1000.0 - - - +/dir1/file1 5 100644 1 0 0 0 1000.0 - hello - +/dir2 4096 40755 2 0 0 0 1000.0 - - - +/dir2/subdir 4096 40755 2 0 0 0 1000.0 - - - +/dir2/subdir/file2 5 100644 1 0 0 0 1000.0 - world - +"#; + + let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); + let image = mkfs_erofs(&fs); + + // This should traverse all directories without error + let result = collect_objects::(&image); + assert!( + result.is_ok(), + "Failed to collect objects: {:?}", + result.err() + ); +} + +#[test] +fn test_pr188_empty_inline_directory() -> anyhow::Result<()> { + // Regression test for https://github.com/containers/composefs-rs/pull/188 + // + // The bug: ObjectCollector::visit_inode at lines 553-554 unconditionally does: + // let tail = DirectoryBlock::ref_from_bytes(inode.inline()).unwrap(); + // self.visit_directory_block(tail); + // + // When inode.inline() is empty, DirectoryBlock::ref_from_bytes succeeds but then + // visit_directory_block calls n_entries() which panics trying to read 12 bytes + // from an empty slice. + // + // This test generates an erofs image using C mkcomposefs, which creates directories + // with empty inline sections (unlike the Rust implementation which always includes + // . and .. entries). + + // Generate a C-generated erofs image using mkcomposefs + let dumpfile_content = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/empty_dir 4096 40755 2 0 0 0 1000.0 - - - +"#; + + // Create temporary files for dumpfile and erofs output + let temp_dir = tempfile::TempDir::new()?; + let temp_dir = temp_dir.path(); + let dumpfile_path = temp_dir.join("pr188_test.dump"); + let erofs_path = temp_dir.join("pr188_test.erofs"); + + // Write dumpfile + std::fs::write(&dumpfile_path, dumpfile_content).expect("Failed to write test dumpfile"); + + // Run mkcomposefs to generate erofs image + let output = std::process::Command::new("mkcomposefs") + .arg("--from-file") + .arg(&dumpfile_path) + .arg(&erofs_path) + .output() + .expect("Failed to run mkcomposefs - is it installed?"); + + assert!( + output.status.success(), + "mkcomposefs failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // Read the generated erofs image + let image = std::fs::read(&erofs_path).expect("Failed to read generated erofs"); + + // The C mkcomposefs creates directories with empty inline sections. + let r = collect_objects::(&image).unwrap(); + assert_eq!(r.len(), 0); + + Ok(()) +} + +#[test] +fn test_round_trip_basic() { + // Full round-trip: dumpfile -> tree -> erofs -> read back -> validate + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/file1 5 100644 1 0 0 0 1000.0 - hello - +/file2 6 100644 1 0 0 0 1000.0 - world! - +/dir1 4096 40755 2 0 0 0 1000.0 - - - +/dir1/nested 8 100644 1 0 0 0 1000.0 - content1 - +"#; + + let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); + let image = mkfs_erofs(&fs); + let img = Image::open(&image); + + // Verify root entries + let root_nid = img.sb.root_nid.get() as u64; + validate_directory_entries(&img, root_nid, &[".", "..", "file1", "file2", "dir1"]); + + // Collect all entries and verify structure + let mut entries_map: HashMap, u64> = HashMap::new(); + let root_inode = img.root(); + + if let Some(inline) = root_inode.inline() { + let inline_block = DirectoryBlock::ref_from_bytes(inline).unwrap(); + for entry in inline_block.entries() { + entries_map.insert(entry.name.to_vec(), entry.nid()); + } + } + + for blkid in root_inode.blocks(img.blkszbits) { + let block = img.directory_block(blkid); + for entry in block.entries() { + entries_map.insert(entry.name.to_vec(), entry.nid()); + } + } + + // Verify we can read file contents + let file1_nid = entries_map + .get(b"file1".as_slice()) + .expect("file1 not found"); + let file1_inode = img.inode(*file1_nid); + assert!(!file1_inode.mode().is_dir()); + assert_eq!(file1_inode.size(), 5); + + let inline_data = file1_inode.inline(); + assert_eq!(inline_data, Some(b"hello".as_slice())); +} diff --git a/crates/erofs-debug/Cargo.toml b/crates/erofs-debug/Cargo.toml index 34e7b3c2..e2ce2f58 100644 --- a/crates/erofs-debug/Cargo.toml +++ b/crates/erofs-debug/Cargo.toml @@ -12,7 +12,7 @@ version.workspace = true [dependencies] clap = { version = "4.0.1", default-features = false, features = ["std", "help", "usage", "derive"] } -composefs = { workspace = true } +composefs-erofs = { workspace = true } [lints] workspace = true diff --git a/crates/erofs-debug/src/main.rs b/crates/erofs-debug/src/main.rs index fc5912e0..7bc5b8c3 100644 --- a/crates/erofs-debug/src/main.rs +++ b/crates/erofs-debug/src/main.rs @@ -8,7 +8,7 @@ use std::{fs::File, io::Read, path::PathBuf}; use clap::Parser; -use composefs::erofs::debug::debug_img; +use composefs_erofs::debug::debug_img; /// Produce a detailed dump of an entire erofs image ///