diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..53d069f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,28 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Build & Run + run: | + cargo fmt --all -- --check + cargo clippy -- -D warnings + cargo test + cargo run --release -- README.md -v > /tmp/README.md + diff README.md /tmp/README.md diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..dcc9f68 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,334 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "blkmap" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e903a5e43e4317043f46c35cfa51105639e5bdef5f7ceff0b25d5fbe8803b11" +dependencies = [ + "bitflags", + "clap", + "libc", +] + +[[package]] +name = "blkpath" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48ba9607977fad2cc59260f7c00346e84de1577de6b4e35cf62e93100d9dd775" +dependencies = [ + "clap", + "libc", +] + +[[package]] +name = "blkreader" +version = "0.1.0" +dependencies = [ + "blkmap", + "blkpath", + "clap", + "libc", + "once_cell", + "sudo", + "tempfile", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clap" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "proc-macro2" +version = "1.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "sudo" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88bd84d4c082e18e37fef52c0088e4407dabcef19d23a607fb4b5ee03b7d5b83" +dependencies = [ + "libc", + "log", +] + +[[package]] +name = "syn" +version = "2.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ef7e3ec --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "blkreader" +version = "0.1.0" +edition = "2021" +authors = ["SF-Zhou"] +description = "Read file data directly from block device using extent information" +license = "MIT OR Apache-2.0" +repository = "https://github.com/SF-Zhou/blkreader" +documentation = "https://docs.rs/blkreader" +readme = "README.md" +keywords = ["filesystem", "block-device", "direct-io", "extent", "fiemap"] +categories = ["filesystem", "os::linux-apis"] + +[lib] +name = "blkreader" +path = "src/lib.rs" + +[[bin]] +name = "blkreader" +path = "src/bin/blkreader.rs" + +[dependencies] +blkpath = "0.1" +blkmap = "0.1" +libc = "0.2" +once_cell = "1.19" +clap = { version = "4.5", features = ["derive"] } +sudo = "0.6" + +[dev-dependencies] +tempfile = "3.14" diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..038d25d --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..aca1a46 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 SF-Zhou + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..481ca1e --- /dev/null +++ b/README.md @@ -0,0 +1,214 @@ +# blkreader + +[![CI](https://github.com/SF-Zhou/blkreader/actions/workflows/ci.yml/badge.svg)](https://github.com/SF-Zhou/blkreader/actions/workflows/ci.yml) +[![Crates.io](https://img.shields.io/crates/v/blkreader.svg)](https://crates.io/crates/blkreader) +[![Documentation](https://docs.rs/blkreader/badge.svg)](https://docs.rs/blkreader) +[![License](https://img.shields.io/crates/l/blkreader.svg)](https://github.com/SF-Zhou/blkreader#license) + +Read file data directly from block device using extent information. + +## Overview + +`blkreader` provides a mechanism to read file data directly from the underlying block device by querying the file's extent information via the Linux `FIEMAP` ioctl. This is particularly useful in scenarios where: + +- Storage space has been pre-allocated using `fallocate` + `fdatasync` +- Extent information has been persisted to disk +- The file's data may not have been fully synced (written extent state not persisted) +- You need to recover raw data from the block device + +### Use Case + +Consider an I/O pattern where: + +1. Before each write, you use `fallocate` + `fdatasync` to pre-allocate a complete storage extent +2. The extent information has been confirmed persisted to disk +3. Subsequent Direct I/O writes fall within these extents +4. However, the written extent state may not have been persisted before a crash + +In this case, while the file metadata might not reflect the written data, the raw data definitely exists on the block device. If you maintain the written length in a reliable location, you can use `blkreader` to recover the raw data directly from the block device. + +## Features + +- Query file extent information using `FIEMAP` ioctl via [`blkmap`](https://crates.io/crates/blkmap) +- Resolve block device paths using [`blkpath`](https://crates.io/crates/blkpath) +- Read data directly from block devices using Direct I/O +- Global block device cache for improved performance +- Configurable handling of holes and unwritten extents +- Fallback to regular file I/O when safe (no root required) + +## Installation + +Add `blkreader` to your `Cargo.toml`: + +```toml +[dependencies] +blkreader = "0.1" +``` + +Or install the CLI tool: + +```bash +cargo install blkreader +``` + +## Library Usage + +### Simple Read + +```rust +use blkreader::BlkReader; +use std::path::Path; + +fn main() -> std::io::Result<()> { + let path = Path::new("/path/to/file"); + let mut buf = vec![0u8; 4096]; + + // Read 4096 bytes from offset 0 + let bytes_read = path.blk_read_at(&mut buf, 0)?; + println!("Read {} bytes", bytes_read); + + Ok(()) +} +``` + +### Read with Options + +```rust +use blkreader::{BlkReader, Options}; +use std::path::Path; + +fn main() -> std::io::Result<()> { + let path = Path::new("/path/to/file"); + let mut buf = vec![0u8; 4096]; + + // Configure read options + let options = Options::new() + .with_cache(true) // Enable block device caching (default) + .with_fill_holes(true) // Fill holes with zeros + .with_zero_unwritten(true) // Fill unwritten extents with zeros + .with_allow_fallback(true); // Allow fallback to regular file I/O + + // Read with detailed state information + let state = path.blk_read_at_opt(&mut buf, 0, &options)?; + + println!("Read {} bytes", state.bytes_read); + println!("Block device: {}", state.block_device_path.display()); + println!("Extents: {:?}", state.extents); + println!("Used fallback: {}", state.used_fallback); + + Ok(()) +} +``` + +### Read from File Handle + +```rust +use blkreader::BlkReader; +use std::fs::File; + +fn main() -> std::io::Result<()> { + let file = File::open("/path/to/file")?; + let mut buf = vec![0u8; 4096]; + + let bytes_read = file.blk_read_at(&mut buf, 0)?; + println!("Read {} bytes", bytes_read); + + Ok(()) +} +``` + +## CLI Usage + +```bash +# Basic usage - read entire file +blkreader /path/to/file + +# Read from specific offset +blkreader /path/to/file --offset 1024 + +# Read specific length +blkreader /path/to/file --offset 0 --length 4096 + +# Verbose output (show extents and block device info) +blkreader /path/to/file -v + +# Write output to file +blkreader /path/to/file -O output.bin + +# Fill holes and unwritten extents with zeros +blkreader /path/to/file --fill-holes --zero-unwritten + +# Allow fallback to regular file I/O when safe +blkreader /path/to/file --allow-fallback +``` + +### CLI Options + +| Option | Description | +|--------|-------------| +| `-o, --offset ` | Byte offset to start reading from (default: 0) | +| `-l, --length ` | Number of bytes to read (default: entire file) | +| `-v, --verbose` | Enable verbose output | +| `-O, --output ` | Write output to file instead of stdout | +| `--fill-holes` | Fill holes with zeros instead of stopping | +| `--zero-unwritten` | Fill unwritten extents with zeros instead of reading raw block data | +| `--allow-fallback` | Allow fallback to regular file I/O when safe | +| `--no-cache` | Disable block device caching | + +## Options + +### `enable_cache` (default: `true`) + +When enabled, block device file handles are cached globally based on the device ID. This improves performance for repeated reads from files on the same filesystem. + +### `fill_holes` (default: `false`) + +When enabled, holes in file extents are filled with zeros. When disabled, reading a hole causes an early EOF return. + +### `zero_unwritten` (default: `false`) + +When enabled, unwritten (preallocated but not yet written) extents are filled with zeros, matching normal filesystem read behavior. + +When disabled (default), unwritten extents are read directly from the block device, returning whatever raw data exists at those physical locations. This is useful for data recovery scenarios where you want to access the actual data written to pre-allocated extents. + +### `allow_fallback` (default: `false`) + +When enabled, if the queried extents fully cover the read range and contain no unwritten extents, the read will be performed using regular file I/O instead of direct block device I/O. This avoids the need for root privileges in such cases. + +## Direct I/O Alignment Requirements + +When using the library API to read directly from block devices (not using fallback mode), the following alignment requirements must be met: + +- **Buffer alignment**: The buffer should be aligned to at least 512 bytes (sector size). For optimal performance, 4096-byte alignment is recommended. +- **Offset alignment**: The read offset should be aligned to 512 bytes. +- **Length alignment**: The buffer length should be aligned to 512 bytes. + +If alignment requirements are not met, the read operation may fail with an `EINVAL` error. + +**Note**: The CLI tool handles alignment automatically by adjusting offsets and using aligned buffers internally. + +## Requirements + +- Linux operating system +- Root privileges (for direct block device access, unless using fallback mode) +- Access to `/sys/dev/block/` or `/proc/self/mountinfo` (for block device resolution) + +## Platform Support + +This crate only works on Linux systems. It has been tested on: + +- x86_64 (Intel/AMD) +- aarch64 (ARM64) + +## License + +Licensed under either of + +- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) +- MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. diff --git a/src/bin/blkreader.rs b/src/bin/blkreader.rs new file mode 100644 index 0000000..e3279e6 --- /dev/null +++ b/src/bin/blkreader.rs @@ -0,0 +1,277 @@ +//! CLI tool for reading file data directly from block devices. +//! +//! This tool uses the `blkreader` library to read file data directly from +//! the underlying block device using extent information. + +use blkmap::Fiemap; +use blkpath::ResolveDevice; +use blkreader::{BlkReader, Options}; +use clap::Parser; +use std::fs::File; +use std::io::{self, Write}; +use std::path::PathBuf; + +/// Default chunk size for reading large files (1 MB). +const DEFAULT_CHUNK_SIZE: usize = 1024 * 1024; + +/// Read file data directly from block device using extent information. +/// +/// This tool queries the file's extent information via FIEMAP and reads +/// data directly from the physical locations on the underlying block device. +#[derive(Parser, Debug)] +#[command(name = "blkreader")] +#[command(author, version, about, long_about = None)] +struct Args { + /// Path to the file to read + path: PathBuf, + + /// Byte offset to start reading from + #[arg(short, long, default_value = "0")] + offset: u64, + + /// Number of bytes to read (default: entire file from offset) + #[arg(short, long)] + length: Option, + + /// Enable verbose output (show block device path, extent info, etc.) + #[arg(short, long)] + verbose: bool, + + /// Output file path (default: stdout) + #[arg(short = 'O', long)] + output: Option, + + /// Fill holes with zeros instead of stopping + #[arg(long)] + fill_holes: bool, + + /// Fill unwritten extents with zeros instead of reading raw block data + #[arg(long)] + zero_unwritten: bool, + + /// Allow fallback to regular file I/O when safe + #[arg(long)] + allow_fallback: bool, + + /// Disable block device caching + #[arg(long)] + no_cache: bool, + + /// Alignment for direct IO. + #[arg(long, default_value_t = 512)] + alignment: u64, +} + +fn main() { + let args = Args::parse(); + + if let Err(e) = run(&args) { + eprintln!("Error: {}", e); + std::process::exit(1); + } +} + +/// Allocate an aligned buffer for Direct I/O. +fn alloc_aligned_buffer(size: usize, align: usize) -> Vec { + // Allocate with extra space for alignment + let layout = std::alloc::Layout::from_size_align(size, align).unwrap(); + let ptr = unsafe { std::alloc::alloc_zeroed(layout) }; + if ptr.is_null() { + panic!("Failed to allocate aligned buffer"); + } + unsafe { Vec::from_raw_parts(ptr, size, size) } +} + +/// Align offset down to the alignment boundary. +fn align_down(offset: u64, alignment: u64) -> u64 { + offset & !(alignment - 1) +} + +/// Align length up to the alignment boundary. +fn align_up(length: u64, alignment: u64) -> u64 { + (length + alignment - 1) & !(alignment - 1) +} + +fn run(args: &Args) -> io::Result<()> { + // Determine the length to read + let file = File::open(&args.path)?; + let file_size = file.metadata()?.len(); + + let length = match args.length { + Some(len) => len, + None => file_size.saturating_sub(args.offset), + }; + + if length == 0 { + if args.verbose { + eprintln!("Nothing to read (length is 0)"); + } + return Ok(()); + } + + // Request sudo privileges only if not using fallback mode + // or if we need to access the block device directly + if !args.allow_fallback { + sudo::escalate_if_needed().map_err(|e| { + io::Error::new( + io::ErrorKind::PermissionDenied, + format!("Failed to escalate privileges: {}", e), + ) + })?; + } + + // Print verbose information + if args.verbose { + print_verbose_info(&args.path, args.offset, length, args.alignment)?; + } + + // Build options + let options = Options::new() + .with_cache(!args.no_cache) + .with_fill_holes(args.fill_holes) + .with_zero_unwritten(args.zero_unwritten) + .with_allow_fallback(args.allow_fallback); + + // Open output file or use stdout + let mut output: Box = if let Some(output_path) = &args.output { + Box::new(File::create(output_path)?) + } else { + Box::new(io::stdout()) + }; + + // Calculate aligned read parameters for Direct I/O + let aligned_offset = align_down(args.offset, args.alignment); + let offset_adjustment = (args.offset - aligned_offset) as usize; + let total_length = align_up(length + offset_adjustment as u64, args.alignment); + + // Determine chunk size (aligned to ALIGNMENT) + let chunk_size = DEFAULT_CHUNK_SIZE; + + // Allocate aligned buffer. + let mut buf = alloc_aligned_buffer(chunk_size, args.alignment as usize); + + // Read in chunks to handle large files + let mut total_bytes_read = 0usize; + let mut current_aligned_offset = aligned_offset; + let mut remaining = total_length; + let mut first_chunk = true; + let mut block_device_path = PathBuf::new(); + + while remaining > 0 { + let read_size = std::cmp::min(remaining as usize, chunk_size); + let aligned_size = align_up(read_size as u64, args.alignment) as usize; + + // Perform the read + let state = args.path.blk_read_at_opt( + &mut buf[..aligned_size], + current_aligned_offset, + &options, + )?; + + if first_chunk { + block_device_path = state.block_device_path.clone(); + first_chunk = false; + } + + if state.bytes_read == 0 { + break; + } + + // Calculate the actual data to output from this chunk + let skip = if current_aligned_offset == aligned_offset { + offset_adjustment + } else { + 0 + }; + + let bytes_to_write = std::cmp::min( + state.bytes_read.saturating_sub(skip), + (length as usize).saturating_sub(total_bytes_read), + ); + + if bytes_to_write > 0 { + output.write_all(&buf[skip..skip + bytes_to_write])?; + total_bytes_read += bytes_to_write; + } + + // Check if we've read enough + if total_bytes_read >= length as usize { + break; + } + + // Short read indicates EOF + if state.bytes_read < read_size { + break; + } + + current_aligned_offset += read_size as u64; + remaining -= read_size as u64; + } + + if args.verbose { + eprintln!(); + eprintln!("Read {} bytes", total_bytes_read); + if !block_device_path.as_os_str().is_empty() { + eprintln!("Block device: {}", block_device_path.display()); + } + if let Some(output_path) = &args.output { + eprintln!("Output written to: {}", output_path.display()); + } + } + + Ok(()) +} + +fn print_verbose_info(path: &PathBuf, offset: u64, length: u64, alignment: u64) -> io::Result<()> { + eprintln!("File: {}", path.display()); + eprintln!("Offset: {} (0x{:x})", offset, offset); + eprintln!("Length: {} (0x{:x})", length, length); + + // Show alignment info + let aligned_offset = align_down(offset, alignment); + let aligned_length = align_up(length + (offset - aligned_offset), alignment); + if aligned_offset != offset || aligned_length != length { + eprintln!( + "Aligned offset: {} (0x{:x}), Aligned length: {} (0x{:x})", + aligned_offset, aligned_offset, aligned_length, aligned_length + ); + } + + // Resolve block device + match path.resolve_device() { + Ok(device) => { + eprintln!("Block device: {}", device.display()); + } + Err(e) => { + eprintln!("Block device: (unable to resolve: {})", e); + } + } + + // Query extents + let file = File::open(path)?; + match file.fiemap_range(offset, length) { + Ok(extents) => { + eprintln!(); + eprintln!("Extents for range [{}, {}):", offset, offset + length); + eprintln!( + "{:<6} {:<20} {:<20} {:<20} Flags", + "Index", "Logical", "Physical", "Length" + ); + eprintln!("{}", "-".repeat(80)); + + for (i, extent) in extents.iter().enumerate() { + eprintln!( + "{:<6} 0x{:016x} 0x{:016x} 0x{:016x} {:?}", + i, extent.logical, extent.physical, extent.length, extent.flags + ); + } + eprintln!("{}", "-".repeat(80)); + eprintln!("Total: {} extent(s)", extents.len()); + } + Err(e) => { + eprintln!("Extents: (unable to query: {})", e); + } + } + + Ok(()) +} diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 0000000..da0c6a0 --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,120 @@ +//! Global block device cache. +//! +//! This module provides a global cache for block device file handles, +//! keyed by the device ID (major:minor). This allows multiple reads +//! from files on the same filesystem to share a single file handle +//! to the underlying block device. + +use blkpath::ResolveDevice; +use once_cell::sync::Lazy; +use std::collections::HashMap; +use std::fs::{File, OpenOptions}; +use std::io; +use std::os::unix::fs::{MetadataExt, OpenOptionsExt}; +use std::path::PathBuf; +use std::sync::{Arc, RwLock}; + +/// A cached block device entry containing the path and file handle. +#[derive(Debug)] +pub struct CachedDevice { + /// Path to the block device. + pub path: PathBuf, + /// File handle opened with O_DIRECT for reading. + pub file: File, +} + +impl CachedDevice { + /// Create a new cached device entry. + fn new(path: PathBuf) -> io::Result { + let file = OpenOptions::new() + .read(true) + .custom_flags(libc::O_DIRECT) + .open(&path)?; + Ok(Self { path, file }) + } +} + +/// Global cache for block device handles. +/// +/// The cache is keyed by the device ID (from `stat.st_dev`), which +/// uniquely identifies a filesystem. All files on the same filesystem +/// share the same underlying block device. +static DEVICE_CACHE: Lazy>>> = + Lazy::new(|| RwLock::new(HashMap::new())); + +/// Get or create a cached block device entry for the given file. +/// +/// This function resolves the block device path from the file only if +/// the device is not already cached. This avoids the expensive +/// `resolve_device()` call on every read operation. +/// +/// # Arguments +/// +/// * `file` - A reference to an open file +/// +/// # Returns +/// +/// An `Arc` to the cached device entry, or an error if the device +/// could not be resolved or opened. +pub fn get_or_create_cached_device(file: &File) -> io::Result> { + let dev_id = file.metadata()?.dev(); + + // First, try to get from cache with a read lock + { + let cache = DEVICE_CACHE.read().unwrap(); + if let Some(entry) = cache.get(&dev_id) { + return Ok(Arc::clone(entry)); + } + } + + // Not in cache, resolve device path and acquire write lock + let device_path = file.resolve_device()?; + let mut cache = DEVICE_CACHE.write().unwrap(); + + // Double-check in case another thread added it + if let Some(entry) = cache.get(&dev_id) { + return Ok(Arc::clone(entry)); + } + + // Create new entry + let entry = Arc::new(CachedDevice::new(device_path)?); + cache.insert(dev_id, Arc::clone(&entry)); + Ok(entry) +} + +/// Open a block device without caching. +/// +/// This resolves the block device path from the file and opens it. +/// +/// # Arguments +/// +/// * `file` - A reference to an open file +/// +/// # Returns +/// +/// A `CachedDevice` entry (not actually cached), or an error if +/// the device could not be resolved or opened. +pub fn open_device_uncached(file: &File) -> io::Result { + let device_path = file.resolve_device()?; + CachedDevice::new(device_path) +} + +/// Clear the global device cache. +/// +/// This is mainly useful for testing. +#[cfg(test)] +pub fn clear_cache() { + let mut cache = DEVICE_CACHE.write().unwrap(); + cache.clear(); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cache_operations() { + // Just test that the cache can be cleared without panicking + clear_cache(); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..2603a7d --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,70 @@ +//! # blkreader +//! +//! A Rust crate for reading file data directly from block devices using extent information. +//! +//! ## Overview +//! +//! `blkreader` provides a mechanism to read file data directly from the underlying block device +//! by querying the file's extent information via the Linux `FIEMAP` ioctl. This is particularly +//! useful in scenarios where: +//! +//! - Storage space has been pre-allocated using `fallocate` + `fdatasync` +//! - Extent information has been persisted to disk +//! - The file's data may not have been fully synced (written extent state not persisted) +//! - You need to recover raw data from the block device +//! +//! ## Features +//! +//! - Query file extent information using `FIEMAP` ioctl via [`blkmap`] +//! - Resolve block device paths using [`blkpath`] +//! - Read data directly from block devices using Direct I/O +//! - Global block device cache for improved performance +//! - Configurable handling of holes and unwritten extents +//! - Fallback to regular file I/O when safe +//! +//! ## Direct I/O Alignment Requirements +//! +//! When reading directly from block devices (not using fallback mode), the following +//! alignment requirements must be met for Direct I/O: +//! +//! - **Buffer alignment**: The buffer must be aligned to at least 512 bytes (sector size). +//! For optimal performance on modern devices, 4096-byte alignment is recommended. +//! - **Offset alignment**: The read offset should be aligned to 512 bytes. +//! - **Length alignment**: The read length should be aligned to 512 bytes. +//! +//! If alignment requirements are not met, the underlying read may fail with an +//! `EINVAL` error. The CLI tool handles alignment automatically. +//! +//! ## Example +//! +//! ```no_run +//! use blkreader::{BlkReader, Options}; +//! use std::path::Path; +//! +//! let path = Path::new("/path/to/file"); +//! // Buffer should be aligned; using 4096 bytes which is a common block size +//! let mut buf = vec![0u8; 4096]; +//! +//! // Simple read (offset 0 is aligned) +//! let bytes_read = path.blk_read_at(&mut buf, 0).unwrap(); +//! +//! // Read with options +//! let options = Options::default(); +//! let state = path.blk_read_at_opt(&mut buf, 0, &options).unwrap(); +//! println!("Read {} bytes from {}", state.bytes_read, state.block_device_path.display()); +//! ``` +//! +//! ## Safety +//! +//! This crate requires root privileges to read from block devices. The CLI tool +//! automatically requests sudo permissions when needed. + +mod cache; +mod options; +mod reader; +mod state; + +pub use blkmap::FiemapExtent as Extent; +pub use options::Options; +pub use reader::BlkReader; +pub use state::State; diff --git a/src/options.rs b/src/options.rs new file mode 100644 index 0000000..e54d8dd --- /dev/null +++ b/src/options.rs @@ -0,0 +1,108 @@ +//! Configuration options for blkreader operations. + +/// Options for controlling the read behavior. +#[derive(Debug, Clone)] +pub struct Options { + /// Enable global block device cache. + /// + /// When enabled, block device file handles are cached globally + /// based on the device ID, improving performance for repeated reads + /// from files on the same filesystem. + pub enable_cache: bool, + + /// Fill holes in file extents with zeros. + /// + /// When disabled, reading a hole will cause an early EOF return. + pub fill_holes: bool, + + /// Fill unwritten extents with zeros instead of reading raw data. + /// + /// When disabled (default), unwritten extents are read from the block + /// device, returning whatever raw data exists at those physical locations. + /// This is useful for data recovery scenarios. + /// + /// When enabled, unwritten extents are filled with zeros (matching + /// normal filesystem read behavior). + pub zero_unwritten: bool, + + /// Allow fallback to regular file read when safe. + /// + /// When enabled, if the queried extents fully cover the read range + /// and contain no unwritten extents, the read will be performed + /// using regular file I/O instead of direct block device I/O. + /// This avoids the need for root privileges in such cases. + pub allow_fallback: bool, +} + +impl Default for Options { + fn default() -> Self { + Self { + enable_cache: true, + fill_holes: false, + zero_unwritten: false, + allow_fallback: false, + } + } +} + +impl Options { + /// Create a new Options with default values. + pub fn new() -> Self { + Self::default() + } + + /// Enable or disable the global block device cache. + pub fn with_cache(mut self, enable: bool) -> Self { + self.enable_cache = enable; + self + } + + /// Enable or disable filling holes with zeros. + pub fn with_fill_holes(mut self, fill: bool) -> Self { + self.fill_holes = fill; + self + } + + /// Enable or disable filling unwritten extents with zeros. + /// + /// When disabled (default), unwritten extents are read from the block + /// device, returning raw data. When enabled, they are filled with zeros. + pub fn with_zero_unwritten(mut self, zero: bool) -> Self { + self.zero_unwritten = zero; + self + } + + /// Enable or disable fallback to regular file read. + pub fn with_allow_fallback(mut self, allow: bool) -> Self { + self.allow_fallback = allow; + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_options() { + let opts = Options::default(); + assert!(opts.enable_cache); + assert!(!opts.fill_holes); + assert!(!opts.zero_unwritten); + assert!(!opts.allow_fallback); + } + + #[test] + fn test_builder_pattern() { + let opts = Options::new() + .with_cache(false) + .with_fill_holes(true) + .with_zero_unwritten(true) + .with_allow_fallback(true); + + assert!(!opts.enable_cache); + assert!(opts.fill_holes); + assert!(opts.zero_unwritten); + assert!(opts.allow_fallback); + } +} diff --git a/src/reader.rs b/src/reader.rs new file mode 100644 index 0000000..749e52e --- /dev/null +++ b/src/reader.rs @@ -0,0 +1,419 @@ +//! Core reader trait and implementations. +//! +//! This module provides the [`BlkReader`] trait which enables reading file data +//! directly from the underlying block device using extent information. + +use crate::cache::{get_or_create_cached_device, open_device_uncached, CachedDevice}; +use crate::options::Options; +use crate::state::State; + +use blkmap::{Fiemap, FiemapExtent}; + +use std::fs::File; +use std::io; +use std::os::unix::fs::FileExt; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +/// Trait for reading file data directly from block devices. +/// +/// This trait provides two methods for reading: +/// - [`blk_read_at`](BlkReader::blk_read_at): Simple read that returns the number of bytes read +/// - [`blk_read_at_opt`](BlkReader::blk_read_at_opt): Advanced read with options that returns detailed state +/// +/// # Direct I/O Alignment Requirements +/// +/// When reading directly from block devices (not using fallback mode), the following +/// alignment requirements must be met: +/// +/// - **Buffer alignment**: The buffer should be aligned to at least 512 bytes (sector size). +/// - **Offset alignment**: The read offset should be aligned to 512 bytes. +/// - **Length alignment**: The buffer length should be aligned to 512 bytes. +/// +/// If alignment requirements are not met, the underlying read may fail with `EINVAL`. +/// +/// # Example +/// +/// ```no_run +/// use blkreader::{BlkReader, Options}; +/// use std::path::Path; +/// +/// let path = Path::new("/path/to/file"); +/// // Use aligned buffer size (4096 is a common block size) +/// let mut buf = vec![0u8; 4096]; +/// +/// // Simple read (offset 0 is aligned) +/// let bytes = path.blk_read_at(&mut buf, 0).unwrap(); +/// +/// // Read with options +/// let opts = Options::new().with_fill_holes(true); +/// let state = path.blk_read_at_opt(&mut buf, 0, &opts).unwrap(); +/// ``` +pub trait BlkReader { + /// Read data from the file at the specified offset. + /// + /// This is a convenience method that calls [`blk_read_at_opt`](BlkReader::blk_read_at_opt) + /// with default options and returns just the number of bytes read. + /// + /// # Arguments + /// + /// * `buf` - Buffer to read data into. For Direct I/O, should be aligned to 512 bytes. + /// * `offset` - Byte offset in the file to start reading from. Should be aligned to 512 bytes. + /// + /// # Returns + /// + /// The number of bytes successfully read, or an error. + fn blk_read_at(&self, buf: &mut [u8], offset: u64) -> io::Result { + let state = self.blk_read_at_opt(buf, offset, &Options::default())?; + Ok(state.bytes_read) + } + + /// Read data from the file at the specified offset with options. + /// + /// This method queries the file's extent information, resolves the block device, + /// and reads data directly from the physical locations on disk. + /// + /// # Arguments + /// + /// * `buf` - Buffer to read data into. For Direct I/O, should be aligned to 512 bytes. + /// * `offset` - Byte offset in the file to start reading from. Should be aligned to 512 bytes. + /// * `options` - Configuration options for the read operation + /// + /// # Returns + /// + /// A [`State`] containing the block device path, extent information, + /// and number of bytes read, or an error. + fn blk_read_at_opt(&self, buf: &mut [u8], offset: u64, options: &Options) -> io::Result; +} + +/// Internal helper to perform the actual read operation. +struct ReadContext<'a> { + file: &'a File, + options: &'a Options, +} + +impl<'a> ReadContext<'a> { + fn new(file: &'a File, options: &'a Options) -> Self { + Self { file, options } + } + + fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result { + if buf.is_empty() { + return Ok(State::fallback(Vec::new(), 0)); + } + + let length = buf.len() as u64; + + // Query extent information for the requested range + let extents = self.file.fiemap_range(offset, length)?; + + if extents.is_empty() { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "file has no extents", + )); + } + + // Check if fallback is allowed and safe + if self.options.allow_fallback && self.can_use_fallback(&extents, offset, length) { + return self.fallback_read(buf, offset, extents); + } + + // Get device file handle (cached or uncached) + let device = self.get_device_handle()?; + + // Perform the read + let bytes_read = self.read_from_device(&device, buf, offset, &extents)?; + + Ok(State::new( + device.path().clone(), + extents, + bytes_read, + false, + )) + } + + /// Check if we can safely use fallback (regular file I/O). + /// + /// Fallback is safe if: + /// 1. All extents fully cover the requested range + /// 2. No extents are unwritten + /// 3. No holes in the range + fn can_use_fallback(&self, extents: &[FiemapExtent], offset: u64, length: u64) -> bool { + if extents.is_empty() { + return false; + } + + let end = offset + length; + let mut current = offset; + + for extent in extents { + // Check for hole before this extent + if extent.logical > current { + return false; + } + + // Check for unwritten extent + if extent.flags.is_unwritten() { + return false; + } + + // Check for unknown/delalloc (hole-like) + if extent.flags.is_unknown() || extent.flags.is_delalloc() { + return false; + } + + // Update current position + let extent_end = extent.logical + extent.length; + if extent_end >= end { + return true; + } + current = extent_end; + } + + false + } + + /// Perform a fallback read using regular file I/O. + fn fallback_read( + &self, + buf: &mut [u8], + offset: u64, + extents: Vec, + ) -> io::Result { + let bytes_read = FileExt::read_at(self.file, buf, offset)?; + Ok(State::fallback(extents, bytes_read)) + } + + /// Get a device handle, either cached or uncached based on options. + fn get_device_handle(&self) -> io::Result { + if self.options.enable_cache { + let cached = get_or_create_cached_device(self.file)?; + Ok(DeviceHandle::Cached(cached)) + } else { + let uncached = open_device_uncached(self.file)?; + Ok(DeviceHandle::Uncached(uncached)) + } + } + + /// Read data from the block device based on extent information. + fn read_from_device( + &self, + device: &DeviceHandle, + buf: &mut [u8], + offset: u64, + extents: &[FiemapExtent], + ) -> io::Result { + let length = buf.len() as u64; + let end = offset + length; + let mut bytes_read = 0usize; + let mut current_offset = offset; + + for extent in extents { + if current_offset >= end { + break; + } + + let extent_end = extent.logical + extent.length; + + // Handle hole before this extent + if extent.logical > current_offset { + let hole_end = extent.logical.min(end); + let hole_len = (hole_end - current_offset) as usize; + + if !self.options.fill_holes { + // EOF at hole + return Ok(bytes_read); + } + + // Fill with zeros + let buf_start = bytes_read; + let buf_end = buf_start + hole_len; + buf[buf_start..buf_end].fill(0); + bytes_read += hole_len; + current_offset = hole_end; + + if current_offset >= end { + break; + } + } + + // Handle unwritten extent - fill with zeros if requested + if extent.flags.is_unwritten() && self.options.zero_unwritten { + // Fill with zeros for unwritten extent + let read_start = current_offset.max(extent.logical); + let read_end = extent_end.min(end); + let read_len = (read_end - read_start) as usize; + + let buf_start = bytes_read; + let buf_end = buf_start + read_len; + buf[buf_start..buf_end].fill(0); + bytes_read += read_len; + current_offset = read_end; + continue; + } + // Otherwise unwritten extents fall through to read raw data from block device + + // Handle hole-like extents (UNKNOWN, DELALLOC) + if extent.flags.is_unknown() || extent.flags.is_delalloc() { + let read_start = current_offset.max(extent.logical); + let read_end = extent_end.min(end); + let hole_len = (read_end - read_start) as usize; + + if !self.options.fill_holes { + return Ok(bytes_read); + } + + let buf_start = bytes_read; + let buf_end = buf_start + hole_len; + buf[buf_start..buf_end].fill(0); + bytes_read += hole_len; + current_offset = read_end; + continue; + } + + // Normal extent (or unwritten with zero_unwritten=false) - read from block device + let read_start = current_offset.max(extent.logical); + let read_end = extent_end.min(end); + let read_len = (read_end - read_start) as usize; + + // Calculate physical offset + let physical_offset = extent.physical + (read_start - extent.logical); + + // Read from device + let buf_start = bytes_read; + let buf_end = buf_start + read_len; + let actual_read = device.read_at(&mut buf[buf_start..buf_end], physical_offset)?; + + bytes_read += actual_read; + current_offset = read_start + actual_read as u64; + + if actual_read < read_len { + // Short read + break; + } + } + + // Handle trailing hole + if current_offset < end && self.options.fill_holes { + let remaining = (end - current_offset) as usize; + let buf_start = bytes_read; + let buf_end = buf_start + remaining; + if buf_end <= buf.len() { + buf[buf_start..buf_end].fill(0); + bytes_read += remaining; + } + } + + Ok(bytes_read) + } +} + +/// Handle to a block device, either cached or uncached. +enum DeviceHandle { + Cached(Arc), + Uncached(CachedDevice), +} + +impl DeviceHandle { + /// Get the path of the block device. + fn path(&self) -> &PathBuf { + match self { + DeviceHandle::Cached(cached) => &cached.path, + DeviceHandle::Uncached(uncached) => &uncached.path, + } + } + + /// Read data from the device at the specified physical offset. + fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result { + let file = match self { + DeviceHandle::Cached(cached) => &cached.file, + DeviceHandle::Uncached(uncached) => &uncached.file, + }; + + let bytes = FileExt::read_at(file, buf, offset)?; + Ok(bytes) + } +} + +// Implementation for Path +impl BlkReader for Path { + fn blk_read_at_opt(&self, buf: &mut [u8], offset: u64, options: &Options) -> io::Result { + let file = File::open(self)?; + let ctx = ReadContext::new(&file, options); + ctx.read_at(buf, offset) + } +} + +// Implementation for PathBuf +impl BlkReader for PathBuf { + fn blk_read_at_opt(&self, buf: &mut [u8], offset: u64, options: &Options) -> io::Result { + self.as_path().blk_read_at_opt(buf, offset, options) + } +} + +// Implementation for File +impl BlkReader for File { + fn blk_read_at_opt(&self, buf: &mut [u8], offset: u64, options: &Options) -> io::Result { + let ctx = ReadContext::new(self, options); + ctx.read_at(buf, offset) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_options_builder() { + let opts = Options::new() + .with_cache(false) + .with_fill_holes(true) + .with_zero_unwritten(true) + .with_allow_fallback(true); + + assert!(!opts.enable_cache); + assert!(opts.fill_holes); + assert!(opts.zero_unwritten); + assert!(opts.allow_fallback); + } + + #[test] + fn test_can_use_fallback() { + use blkmap::ExtentFlags; + + let file = File::open("/proc/self/exe").unwrap(); + let options = Options::new().with_allow_fallback(true); + let ctx = ReadContext::new(&file, &options); + + // Empty extents - cannot fallback + assert!(!ctx.can_use_fallback(&[], 0, 100)); + + // Normal extent covering range - can fallback + let extents = vec![FiemapExtent { + logical: 0, + physical: 1000, + length: 4096, + flags: ExtentFlags::empty(), + }]; + assert!(ctx.can_use_fallback(&extents, 0, 100)); + + // Unwritten extent - cannot fallback + let extents = vec![FiemapExtent { + logical: 0, + physical: 1000, + length: 4096, + flags: ExtentFlags::UNWRITTEN, + }]; + assert!(!ctx.can_use_fallback(&extents, 0, 100)); + + // Hole at start - cannot fallback + let extents = vec![FiemapExtent { + logical: 100, + physical: 1000, + length: 4096, + flags: ExtentFlags::empty(), + }]; + assert!(!ctx.can_use_fallback(&extents, 0, 200)); + } +} diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 0000000..5bf0988 --- /dev/null +++ b/src/state.rs @@ -0,0 +1,91 @@ +//! State returned from read operations. + +use blkmap::FiemapExtent; +use std::path::PathBuf; + +/// Result state from a read operation. +#[derive(Debug, Clone)] +pub struct State { + /// Path to the block device used for reading. + pub block_device_path: PathBuf, + + /// List of extents that were involved in the read operation. + pub extents: Vec, + + /// Number of bytes successfully read. + pub bytes_read: usize, + + /// Whether the read used fallback (regular file I/O instead of block device). + pub used_fallback: bool, +} + +impl State { + /// Create a new State with the given parameters. + pub fn new( + block_device_path: PathBuf, + extents: Vec, + bytes_read: usize, + used_fallback: bool, + ) -> Self { + Self { + block_device_path, + extents, + bytes_read, + used_fallback, + } + } + + /// Create a State for a fallback read (regular file I/O). + /// + /// Even in fallback mode, the extents are included for informational purposes. + pub fn fallback(extents: Vec, bytes_read: usize) -> Self { + Self { + block_device_path: PathBuf::new(), + extents, + bytes_read, + used_fallback: true, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use blkmap::ExtentFlags; + + #[test] + fn test_state_new() { + let state = State::new( + PathBuf::from("/dev/sda"), + vec![FiemapExtent { + logical: 0, + physical: 1000, + length: 4096, + flags: ExtentFlags::empty(), + }], + 4096, + false, + ); + + assert_eq!(state.block_device_path, PathBuf::from("/dev/sda")); + assert_eq!(state.extents.len(), 1); + assert_eq!(state.bytes_read, 4096); + assert!(!state.used_fallback); + } + + #[test] + fn test_state_fallback() { + let extents = vec![FiemapExtent { + logical: 0, + physical: 1000, + length: 4096, + flags: ExtentFlags::empty(), + }]; + let state = State::fallback(extents, 1024); + + assert!(state.block_device_path.as_os_str().is_empty()); + assert_eq!(state.extents.len(), 1); + assert_eq!(state.bytes_read, 1024); + assert!(state.used_fallback); + } +}