From 43ca1b6e36888410fb109873fdad0677408c6bf4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 17 Jan 2026 11:28:41 +0000 Subject: [PATCH 1/6] Initial plan From 0e775690d00737bf188bdc2e7e7dd7b7f23b7b50 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 17 Jan 2026 11:42:19 +0000 Subject: [PATCH 2/6] Implement blkreader crate with library and CLI tool Co-authored-by: SF-Zhou <7477599+SF-Zhou@users.noreply.github.com> --- .github/workflows/ci.yml | 65 ++++++ Cargo.lock | 334 +++++++++++++++++++++++++++++++ Cargo.toml | 31 +++ LICENSE-APACHE | 201 +++++++++++++++++++ LICENSE-MIT | 21 ++ README.md | 200 +++++++++++++++++++ src/bin/blkreader.rs | 176 +++++++++++++++++ src/cache.rs | 111 +++++++++++ src/lib.rs | 56 ++++++ src/options.rs | 100 ++++++++++ src/reader.rs | 412 +++++++++++++++++++++++++++++++++++++++ src/state.rs | 83 ++++++++ 12 files changed, 1790 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 README.md create mode 100644 src/bin/blkreader.rs create mode 100644 src/cache.rs create mode 100644 src/lib.rs create mode 100644 src/options.rs create mode 100644 src/reader.rs create mode 100644 src/state.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..6416565 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,65 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +env: + CARGO_TERM_COLOR: always + +jobs: + check: + name: Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: cargo check --all-features + + test: + name: Test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: cargo test --all-features + + fmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + - run: cargo fmt --all -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + components: clippy + - run: cargo clippy --all-features -- -D warnings + + doc: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + - run: cargo doc --no-deps --all-features + env: + RUSTDOCFLAGS: -D warnings + + msrv: + name: Minimum Supported Rust Version + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@1.75.0 + - run: cargo check --all-features diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..dcc9f68 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,334 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "blkmap" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e903a5e43e4317043f46c35cfa51105639e5bdef5f7ceff0b25d5fbe8803b11" +dependencies = [ + "bitflags", + "clap", + "libc", +] + +[[package]] +name = "blkpath" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48ba9607977fad2cc59260f7c00346e84de1577de6b4e35cf62e93100d9dd775" +dependencies = [ + "clap", + "libc", +] + +[[package]] +name = "blkreader" +version = "0.1.0" +dependencies = [ + "blkmap", + "blkpath", + "clap", + "libc", + "once_cell", + "sudo", + "tempfile", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clap" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "proc-macro2" +version = "1.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "sudo" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88bd84d4c082e18e37fef52c0088e4407dabcef19d23a607fb4b5ee03b7d5b83" +dependencies = [ + "libc", + "log", +] + +[[package]] +name = "syn" +version = "2.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ef7e3ec --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "blkreader" +version = "0.1.0" +edition = "2021" +authors = ["SF-Zhou"] +description = "Read file data directly from block device using extent information" +license = "MIT OR Apache-2.0" +repository = "https://github.com/SF-Zhou/blkreader" +documentation = "https://docs.rs/blkreader" +readme = "README.md" +keywords = ["filesystem", "block-device", "direct-io", "extent", "fiemap"] +categories = ["filesystem", "os::linux-apis"] + +[lib] +name = "blkreader" +path = "src/lib.rs" + +[[bin]] +name = "blkreader" +path = "src/bin/blkreader.rs" + +[dependencies] +blkpath = "0.1" +blkmap = "0.1" +libc = "0.2" +once_cell = "1.19" +clap = { version = "4.5", features = ["derive"] } +sudo = "0.6" + +[dev-dependencies] +tempfile = "3.14" diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..48f96ec --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [2024] [SF-Zhou] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..86d8e5b --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 SF-Zhou + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0f39b25 --- /dev/null +++ b/README.md @@ -0,0 +1,200 @@ +# blkreader + +[![CI](https://github.com/SF-Zhou/blkreader/actions/workflows/ci.yml/badge.svg)](https://github.com/SF-Zhou/blkreader/actions/workflows/ci.yml) +[![Crates.io](https://img.shields.io/crates/v/blkreader.svg)](https://crates.io/crates/blkreader) +[![Documentation](https://docs.rs/blkreader/badge.svg)](https://docs.rs/blkreader) +[![License](https://img.shields.io/crates/l/blkreader.svg)](https://github.com/SF-Zhou/blkreader#license) + +Read file data directly from block device using extent information. + +## Overview + +`blkreader` provides a mechanism to read file data directly from the underlying block device by querying the file's extent information via the Linux `FIEMAP` ioctl. This is particularly useful in scenarios where: + +- Storage space has been pre-allocated using `fallocate` + `fdatasync` +- Extent information has been persisted to disk +- The file's data may not have been fully synced (written extent state not persisted) +- You need to recover raw data from the block device + +### Use Case + +Consider an I/O pattern where: + +1. Before each write, you use `fallocate` + `fdatasync` to pre-allocate a complete storage extent +2. The extent information has been confirmed persisted to disk +3. Subsequent Direct I/O writes fall within these extents +4. However, the written extent state may not have been persisted before a crash + +In this case, while the file metadata might not reflect the written data, the raw data definitely exists on the block device. If you maintain the written length in a reliable location, you can use `blkreader` to recover the raw data directly from the block device. + +## Features + +- Query file extent information using `FIEMAP` ioctl via [`blkmap`](https://crates.io/crates/blkmap) +- Resolve block device paths using [`blkpath`](https://crates.io/crates/blkpath) +- Read data directly from block devices using Direct I/O +- Global block device cache for improved performance +- Configurable handling of holes and unwritten extents +- Fallback to regular file I/O when safe (no root required) + +## Installation + +Add `blkreader` to your `Cargo.toml`: + +```toml +[dependencies] +blkreader = "0.1" +``` + +Or install the CLI tool: + +```bash +cargo install blkreader +``` + +## Library Usage + +### Simple Read + +```rust +use blkreader::BlkReader; +use std::path::Path; + +fn main() -> std::io::Result<()> { + let path = Path::new("/path/to/file"); + let mut buf = vec![0u8; 4096]; + + // Read 4096 bytes from offset 0 + let bytes_read = path.blk_read_at(&mut buf, 0)?; + println!("Read {} bytes", bytes_read); + + Ok(()) +} +``` + +### Read with Options + +```rust +use blkreader::{BlkReader, Options}; +use std::path::Path; + +fn main() -> std::io::Result<()> { + let path = Path::new("/path/to/file"); + let mut buf = vec![0u8; 4096]; + + // Configure read options + let options = Options::new() + .with_cache(true) // Enable block device caching (default) + .with_fill_holes(true) // Fill holes with zeros + .with_fill_unwritten(true) // Fill unwritten extents with zeros + .with_allow_fallback(true); // Allow fallback to regular file I/O + + // Read with detailed state information + let state = path.blk_read_at_opt(&mut buf, 0, &options)?; + + println!("Read {} bytes", state.bytes_read); + println!("Block device: {}", state.block_device_path.display()); + println!("Extents: {:?}", state.extents); + println!("Used fallback: {}", state.used_fallback); + + Ok(()) +} +``` + +### Read from File Handle + +```rust +use blkreader::BlkReader; +use std::fs::File; + +fn main() -> std::io::Result<()> { + let file = File::open("/path/to/file")?; + let mut buf = vec![0u8; 4096]; + + let bytes_read = file.blk_read_at(&mut buf, 0)?; + println!("Read {} bytes", bytes_read); + + Ok(()) +} +``` + +## CLI Usage + +```bash +# Basic usage - read entire file +blkreader /path/to/file + +# Read from specific offset +blkreader /path/to/file --offset 1024 + +# Read specific length +blkreader /path/to/file --offset 0 --length 4096 + +# Verbose output (show extents and block device info) +blkreader /path/to/file -v + +# Write output to file +blkreader /path/to/file -O output.bin + +# Fill holes and unwritten extents with zeros +blkreader /path/to/file --fill-holes --fill-unwritten + +# Allow fallback to regular file I/O when safe +blkreader /path/to/file --allow-fallback +``` + +### CLI Options + +| Option | Description | +|--------|-------------| +| `-o, --offset ` | Byte offset to start reading from (default: 0) | +| `-l, --length ` | Number of bytes to read (default: entire file) | +| `-v, --verbose` | Enable verbose output | +| `-O, --output ` | Write output to file instead of stdout | +| `--fill-holes` | Fill holes with zeros instead of stopping | +| `--fill-unwritten` | Fill unwritten extents with zeros | +| `--allow-fallback` | Allow fallback to regular file I/O when safe | +| `--no-cache` | Disable block device caching | + +## Options + +### `enable_cache` (default: `true`) + +When enabled, block device file handles are cached globally based on the device ID. This improves performance for repeated reads from files on the same filesystem. + +### `fill_holes` (default: `false`) + +When enabled, holes in file extents are filled with zeros. When disabled, reading a hole causes an early EOF return. + +### `fill_unwritten` (default: `false`) + +When enabled, unwritten (preallocated but not yet written) extents are filled with zeros. When disabled, reading an unwritten extent causes an early EOF return. + +### `allow_fallback` (default: `false`) + +When enabled, if the queried extents fully cover the read range and contain no unwritten extents, the read will be performed using regular file I/O instead of direct block device I/O. This avoids the need for root privileges in such cases. + +## Requirements + +- Linux operating system +- Root privileges (for direct block device access, unless using fallback mode) +- Access to `/sys/dev/block/` or `/proc/self/mountinfo` (for block device resolution) + +## Platform Support + +This crate only works on Linux systems. It has been tested on: + +- x86_64 (Intel/AMD) +- aarch64 (ARM64) + +## License + +Licensed under either of + +- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) +- MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. diff --git a/src/bin/blkreader.rs b/src/bin/blkreader.rs new file mode 100644 index 0000000..20a1baa --- /dev/null +++ b/src/bin/blkreader.rs @@ -0,0 +1,176 @@ +//! CLI tool for reading file data directly from block devices. +//! +//! This tool uses the `blkreader` library to read file data directly from +//! the underlying block device using extent information. + +use blkmap::Fiemap; +use blkpath::ResolveDevice; +use blkreader::{BlkReader, Options}; +use clap::Parser; +use std::fs::File; +use std::io::{self, Write}; +use std::path::PathBuf; + +/// Read file data directly from block device using extent information. +/// +/// This tool queries the file's extent information via FIEMAP and reads +/// data directly from the physical locations on the underlying block device. +#[derive(Parser, Debug)] +#[command(name = "blkreader")] +#[command(author, version, about, long_about = None)] +struct Args { + /// Path to the file to read + path: PathBuf, + + /// Byte offset to start reading from + #[arg(short, long, default_value = "0")] + offset: u64, + + /// Number of bytes to read (default: entire file from offset) + #[arg(short, long)] + length: Option, + + /// Enable verbose output (show block device path, extent info, etc.) + #[arg(short, long)] + verbose: bool, + + /// Output file path (default: stdout) + #[arg(short = 'O', long)] + output: Option, + + /// Fill holes with zeros instead of stopping + #[arg(long)] + fill_holes: bool, + + /// Fill unwritten extents with zeros instead of stopping + #[arg(long)] + fill_unwritten: bool, + + /// Allow fallback to regular file I/O when safe + #[arg(long)] + allow_fallback: bool, + + /// Disable block device caching + #[arg(long)] + no_cache: bool, +} + +fn main() { + // Request sudo privileges + sudo::escalate_if_needed().expect("Failed to escalate privileges"); + + let args = Args::parse(); + + if let Err(e) = run(&args) { + eprintln!("Error: {}", e); + std::process::exit(1); + } +} + +fn run(args: &Args) -> io::Result<()> { + // Determine the length to read + let file = File::open(&args.path)?; + let file_size = file.metadata()?.len(); + + let length = match args.length { + Some(len) => len, + None => file_size.saturating_sub(args.offset), + }; + + if length == 0 { + if args.verbose { + eprintln!("Nothing to read (length is 0)"); + } + return Ok(()); + } + + // Print verbose information + if args.verbose { + print_verbose_info(&args.path, args.offset, length)?; + } + + // Prepare buffer + let mut buf = vec![0u8; length as usize]; + + // Build options + let options = Options::new() + .with_cache(!args.no_cache) + .with_fill_holes(args.fill_holes) + .with_fill_unwritten(args.fill_unwritten) + .with_allow_fallback(args.allow_fallback); + + // Perform the read + let state = args.path.blk_read_at_opt(&mut buf, args.offset, &options)?; + + if args.verbose { + eprintln!(); + eprintln!("Read {} bytes", state.bytes_read); + if state.used_fallback { + eprintln!("(Used fallback to regular file I/O)"); + } else { + eprintln!("Block device: {}", state.block_device_path.display()); + } + } + + // Truncate buffer to actual bytes read + buf.truncate(state.bytes_read); + + // Write output + if let Some(output_path) = &args.output { + let mut output_file = File::create(output_path)?; + output_file.write_all(&buf)?; + if args.verbose { + eprintln!("Output written to: {}", output_path.display()); + } + } else { + let stdout = io::stdout(); + let mut handle = stdout.lock(); + handle.write_all(&buf)?; + } + + Ok(()) +} + +fn print_verbose_info(path: &PathBuf, offset: u64, length: u64) -> io::Result<()> { + eprintln!("File: {}", path.display()); + eprintln!("Offset: {} (0x{:x})", offset, offset); + eprintln!("Length: {} (0x{:x})", length, length); + + // Resolve block device + match path.resolve_device() { + Ok(device) => { + eprintln!("Block device: {}", device.display()); + } + Err(e) => { + eprintln!("Block device: (unable to resolve: {})", e); + } + } + + // Query extents + let file = File::open(path)?; + match file.fiemap_range(offset, length) { + Ok(extents) => { + eprintln!(); + eprintln!("Extents for range [{}, {}):", offset, offset + length); + eprintln!( + "{:<6} {:<20} {:<20} {:<20} Flags", + "Index", "Logical", "Physical", "Length" + ); + eprintln!("{}", "-".repeat(80)); + + for (i, extent) in extents.iter().enumerate() { + eprintln!( + "{:<6} 0x{:016x} 0x{:016x} 0x{:016x} {:?}", + i, extent.logical, extent.physical, extent.length, extent.flags + ); + } + eprintln!("{}", "-".repeat(80)); + eprintln!("Total: {} extent(s)", extents.len()); + } + Err(e) => { + eprintln!("Extents: (unable to query: {})", e); + } + } + + Ok(()) +} diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 0000000..db91a80 --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,111 @@ +//! Global block device cache. +//! +//! This module provides a global cache for block device file handles, +//! keyed by the device ID (major:minor). This allows multiple reads +//! from files on the same filesystem to share a single file handle +//! to the underlying block device. + +use once_cell::sync::Lazy; +use std::collections::HashMap; +use std::fs::{File, OpenOptions}; +use std::io; +use std::os::unix::fs::OpenOptionsExt; +use std::path::PathBuf; +use std::sync::{Arc, RwLock}; + +/// A cached block device entry containing the path and file handle. +#[derive(Debug)] +pub struct CachedDevice { + /// Path to the block device. + #[allow(dead_code)] + pub path: PathBuf, + /// File handle opened with O_DIRECT for reading. + pub file: File, +} + +impl CachedDevice { + /// Create a new cached device entry. + fn new(path: PathBuf) -> io::Result { + let file = OpenOptions::new() + .read(true) + .custom_flags(libc::O_DIRECT) + .open(&path)?; + Ok(Self { path, file }) + } +} + +/// Global cache for block device handles. +/// +/// The cache is keyed by the device ID (from `stat.st_dev`), which +/// uniquely identifies a filesystem. All files on the same filesystem +/// share the same underlying block device. +static DEVICE_CACHE: Lazy>>> = + Lazy::new(|| RwLock::new(HashMap::new())); + +/// Get or create a cached block device entry. +/// +/// # Arguments +/// +/// * `dev_id` - The device ID from file metadata (`stat.st_dev`) +/// * `device_path` - The path to the block device +/// +/// # Returns +/// +/// An `Arc` to the cached device entry, or an error if the device +/// could not be opened. +pub fn get_or_create_device(dev_id: u64, device_path: PathBuf) -> io::Result> { + // First, try to get from cache with a read lock + { + let cache = DEVICE_CACHE.read().unwrap(); + if let Some(entry) = cache.get(&dev_id) { + return Ok(Arc::clone(entry)); + } + } + + // Not in cache, acquire write lock and create + let mut cache = DEVICE_CACHE.write().unwrap(); + + // Double-check in case another thread added it + if let Some(entry) = cache.get(&dev_id) { + return Ok(Arc::clone(entry)); + } + + // Create new entry + let entry = Arc::new(CachedDevice::new(device_path)?); + cache.insert(dev_id, Arc::clone(&entry)); + Ok(entry) +} + +/// Open a block device without caching. +/// +/// # Arguments +/// +/// * `device_path` - The path to the block device +/// +/// # Returns +/// +/// A `CachedDevice` entry (not actually cached), or an error if +/// the device could not be opened. +pub fn open_device_uncached(device_path: PathBuf) -> io::Result { + CachedDevice::new(device_path) +} + +/// Clear the global device cache. +/// +/// This is mainly useful for testing. +#[cfg(test)] +pub fn clear_cache() { + let mut cache = DEVICE_CACHE.write().unwrap(); + cache.clear(); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cache_operations() { + // Just test that the cache can be cleared without panicking + clear_cache(); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..eeaff4d --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,56 @@ +//! # blkreader +//! +//! A Rust crate for reading file data directly from block devices using extent information. +//! +//! ## Overview +//! +//! `blkreader` provides a mechanism to read file data directly from the underlying block device +//! by querying the file's extent information via the Linux `FIEMAP` ioctl. This is particularly +//! useful in scenarios where: +//! +//! - Storage space has been pre-allocated using `fallocate` + `fdatasync` +//! - Extent information has been persisted to disk +//! - The file's data may not have been fully synced (written extent state not persisted) +//! - You need to recover raw data from the block device +//! +//! ## Features +//! +//! - Query file extent information using `FIEMAP` ioctl via [`blkmap`] +//! - Resolve block device paths using [`blkpath`] +//! - Read data directly from block devices using Direct I/O +//! - Global block device cache for improved performance +//! - Configurable handling of holes and unwritten extents +//! - Fallback to regular file I/O when safe +//! +//! ## Example +//! +//! ```no_run +//! use blkreader::{BlkReader, Options}; +//! use std::path::Path; +//! +//! let path = Path::new("/path/to/file"); +//! let mut buf = vec![0u8; 4096]; +//! +//! // Simple read +//! let bytes_read = path.blk_read_at(&mut buf, 0).unwrap(); +//! +//! // Read with options +//! let options = Options::default(); +//! let state = path.blk_read_at_opt(&mut buf, 0, &options).unwrap(); +//! println!("Read {} bytes from {}", state.bytes_read, state.block_device_path.display()); +//! ``` +//! +//! ## Safety +//! +//! This crate requires root privileges to read from block devices. The CLI tool +//! automatically requests sudo permissions when needed. + +mod cache; +mod options; +mod reader; +mod state; + +pub use blkmap::FiemapExtent as Extent; +pub use options::Options; +pub use reader::BlkReader; +pub use state::State; diff --git a/src/options.rs b/src/options.rs new file mode 100644 index 0000000..a5e07c2 --- /dev/null +++ b/src/options.rs @@ -0,0 +1,100 @@ +//! Configuration options for blkreader operations. + +/// Options for controlling the read behavior. +#[derive(Debug, Clone)] +pub struct Options { + /// Enable global block device cache. + /// + /// When enabled, block device file handles are cached globally + /// based on the device ID, improving performance for repeated reads + /// from files on the same filesystem. + pub enable_cache: bool, + + /// Fill holes in file extents with zeros. + /// + /// When disabled, reading a hole will cause an early EOF return. + pub fill_holes: bool, + + /// Fill unwritten extents with zeros. + /// + /// When disabled, reading an unwritten extent will cause an early EOF return. + pub fill_unwritten: bool, + + /// Allow fallback to regular file read when safe. + /// + /// When enabled, if the queried extents fully cover the read range + /// and contain no unwritten extents, the read will be performed + /// using regular file I/O instead of direct block device I/O. + /// This avoids the need for root privileges in such cases. + pub allow_fallback: bool, +} + +impl Default for Options { + fn default() -> Self { + Self { + enable_cache: true, + fill_holes: false, + fill_unwritten: false, + allow_fallback: false, + } + } +} + +impl Options { + /// Create a new Options with default values. + pub fn new() -> Self { + Self::default() + } + + /// Enable or disable the global block device cache. + pub fn with_cache(mut self, enable: bool) -> Self { + self.enable_cache = enable; + self + } + + /// Enable or disable filling holes with zeros. + pub fn with_fill_holes(mut self, fill: bool) -> Self { + self.fill_holes = fill; + self + } + + /// Enable or disable filling unwritten extents with zeros. + pub fn with_fill_unwritten(mut self, fill: bool) -> Self { + self.fill_unwritten = fill; + self + } + + /// Enable or disable fallback to regular file read. + pub fn with_allow_fallback(mut self, allow: bool) -> Self { + self.allow_fallback = allow; + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_options() { + let opts = Options::default(); + assert!(opts.enable_cache); + assert!(!opts.fill_holes); + assert!(!opts.fill_unwritten); + assert!(!opts.allow_fallback); + } + + #[test] + fn test_builder_pattern() { + let opts = Options::new() + .with_cache(false) + .with_fill_holes(true) + .with_fill_unwritten(true) + .with_allow_fallback(true); + + assert!(!opts.enable_cache); + assert!(opts.fill_holes); + assert!(opts.fill_unwritten); + assert!(opts.allow_fallback); + } +} diff --git a/src/reader.rs b/src/reader.rs new file mode 100644 index 0000000..4571bb1 --- /dev/null +++ b/src/reader.rs @@ -0,0 +1,412 @@ +//! Core reader trait and implementations. +//! +//! This module provides the [`BlkReader`] trait which enables reading file data +//! directly from the underlying block device using extent information. + +use crate::cache::{get_or_create_device, open_device_uncached, CachedDevice}; +use crate::options::Options; +use crate::state::State; + +use blkmap::{Fiemap, FiemapExtent}; +use blkpath::ResolveDevice; + +use std::fs::File; +use std::io; +use std::os::unix::fs::{FileExt, MetadataExt}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +/// Trait for reading file data directly from block devices. +/// +/// This trait provides two methods for reading: +/// - [`blk_read_at`](BlkReader::blk_read_at): Simple read that returns the number of bytes read +/// - [`blk_read_at_opt`](BlkReader::blk_read_at_opt): Advanced read with options that returns detailed state +/// +/// # Example +/// +/// ```no_run +/// use blkreader::{BlkReader, Options}; +/// use std::path::Path; +/// +/// let path = Path::new("/path/to/file"); +/// let mut buf = vec![0u8; 4096]; +/// +/// // Simple read +/// let bytes = path.blk_read_at(&mut buf, 0).unwrap(); +/// +/// // Read with options +/// let opts = Options::new().with_fill_holes(true); +/// let state = path.blk_read_at_opt(&mut buf, 0, &opts).unwrap(); +/// ``` +pub trait BlkReader { + /// Read data from the file at the specified offset. + /// + /// This is a convenience method that calls [`blk_read_at_opt`](BlkReader::blk_read_at_opt) + /// with default options and returns just the number of bytes read. + /// + /// # Arguments + /// + /// * `buf` - Buffer to read data into + /// * `offset` - Byte offset in the file to start reading from + /// + /// # Returns + /// + /// The number of bytes successfully read, or an error. + fn blk_read_at(&self, buf: &mut [u8], offset: u64) -> io::Result { + let state = self.blk_read_at_opt(buf, offset, &Options::default())?; + Ok(state.bytes_read) + } + + /// Read data from the file at the specified offset with options. + /// + /// This method queries the file's extent information, resolves the block device, + /// and reads data directly from the physical locations on disk. + /// + /// # Arguments + /// + /// * `buf` - Buffer to read data into + /// * `offset` - Byte offset in the file to start reading from + /// * `options` - Configuration options for the read operation + /// + /// # Returns + /// + /// A [`State`] containing the block device path, extent information, + /// and number of bytes read, or an error. + fn blk_read_at_opt(&self, buf: &mut [u8], offset: u64, options: &Options) -> io::Result; +} + +/// Internal helper to perform the actual read operation. +struct ReadContext<'a> { + file: &'a File, + file_path: Option<&'a Path>, + options: &'a Options, +} + +impl<'a> ReadContext<'a> { + fn new(file: &'a File, file_path: Option<&'a Path>, options: &'a Options) -> Self { + Self { + file, + file_path, + options, + } + } + + fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result { + if buf.is_empty() { + return Ok(State::fallback(0)); + } + + let length = buf.len() as u64; + + // Query extent information for the requested range + let extents = self.file.fiemap_range(offset, length)?; + + if extents.is_empty() { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "file has no extents", + )); + } + + // Check if fallback is allowed and safe + if self.options.allow_fallback && self.can_use_fallback(&extents, offset, length) { + return self.fallback_read(buf, offset); + } + + // Get block device path + let device_path = self.resolve_device_path()?; + + // Get device file handle (cached or uncached) + let device = self.get_device_handle(&device_path)?; + + // Perform the read + let bytes_read = self.read_from_device(&device, buf, offset, &extents)?; + + Ok(State::new(device_path, extents, bytes_read, false)) + } + + /// Check if we can safely use fallback (regular file I/O). + /// + /// Fallback is safe if: + /// 1. All extents fully cover the requested range + /// 2. No extents are unwritten + /// 3. No holes in the range + fn can_use_fallback(&self, extents: &[FiemapExtent], offset: u64, length: u64) -> bool { + if extents.is_empty() { + return false; + } + + let end = offset + length; + let mut current = offset; + + for extent in extents { + // Check for hole before this extent + if extent.logical > current { + return false; + } + + // Check for unwritten extent + if extent.flags.is_unwritten() { + return false; + } + + // Check for unknown/delalloc (hole-like) + if extent.flags.is_unknown() || extent.flags.is_delalloc() { + return false; + } + + // Update current position + let extent_end = extent.logical + extent.length; + if extent_end >= end { + return true; + } + current = extent_end; + } + + false + } + + /// Perform a fallback read using regular file I/O. + fn fallback_read(&self, buf: &mut [u8], offset: u64) -> io::Result { + let bytes_read = FileExt::read_at(self.file, buf, offset)?; + Ok(State::fallback(bytes_read)) + } + + /// Resolve the block device path for the file. + fn resolve_device_path(&self) -> io::Result { + if let Some(path) = self.file_path { + path.resolve_device() + } else { + self.file.resolve_device() + } + } + + /// Get a device handle, either cached or uncached based on options. + fn get_device_handle(&self, device_path: &Path) -> io::Result { + if self.options.enable_cache { + let dev_id = self.file.metadata()?.dev(); + let cached = get_or_create_device(dev_id, device_path.to_path_buf())?; + Ok(DeviceHandle::Cached(cached)) + } else { + let uncached = open_device_uncached(device_path.to_path_buf())?; + Ok(DeviceHandle::Uncached(uncached)) + } + } + + /// Read data from the block device based on extent information. + fn read_from_device( + &self, + device: &DeviceHandle, + buf: &mut [u8], + offset: u64, + extents: &[FiemapExtent], + ) -> io::Result { + let length = buf.len() as u64; + let end = offset + length; + let mut bytes_read = 0usize; + let mut current_offset = offset; + + for extent in extents { + if current_offset >= end { + break; + } + + let extent_end = extent.logical + extent.length; + + // Handle hole before this extent + if extent.logical > current_offset { + let hole_end = extent.logical.min(end); + let hole_len = (hole_end - current_offset) as usize; + + if !self.options.fill_holes { + // EOF at hole + return Ok(bytes_read); + } + + // Fill with zeros + let buf_start = bytes_read; + let buf_end = buf_start + hole_len; + buf[buf_start..buf_end].fill(0); + bytes_read += hole_len; + current_offset = hole_end; + + if current_offset >= end { + break; + } + } + + // Handle unwritten extent + if extent.flags.is_unwritten() { + if !self.options.fill_unwritten { + // EOF at unwritten + return Ok(bytes_read); + } + + // Fill with zeros for unwritten extent + let read_start = current_offset.max(extent.logical); + let read_end = extent_end.min(end); + let read_len = (read_end - read_start) as usize; + + let buf_start = bytes_read; + let buf_end = buf_start + read_len; + buf[buf_start..buf_end].fill(0); + bytes_read += read_len; + current_offset = read_end; + continue; + } + + // Handle hole-like extents (UNKNOWN, DELALLOC) + if extent.flags.is_unknown() || extent.flags.is_delalloc() { + let read_start = current_offset.max(extent.logical); + let read_end = extent_end.min(end); + let hole_len = (read_end - read_start) as usize; + + if !self.options.fill_holes { + return Ok(bytes_read); + } + + let buf_start = bytes_read; + let buf_end = buf_start + hole_len; + buf[buf_start..buf_end].fill(0); + bytes_read += hole_len; + current_offset = read_end; + continue; + } + + // Normal extent - read from block device + let read_start = current_offset.max(extent.logical); + let read_end = extent_end.min(end); + let read_len = (read_end - read_start) as usize; + + // Calculate physical offset + let physical_offset = extent.physical + (read_start - extent.logical); + + // Read from device + let buf_start = bytes_read; + let buf_end = buf_start + read_len; + let actual_read = device.read_at(&mut buf[buf_start..buf_end], physical_offset)?; + + bytes_read += actual_read; + current_offset = read_start + actual_read as u64; + + if actual_read < read_len { + // Short read + break; + } + } + + // Handle trailing hole + if current_offset < end && self.options.fill_holes { + let remaining = (end - current_offset) as usize; + let buf_start = bytes_read; + let buf_end = buf_start + remaining; + if buf_end <= buf.len() { + buf[buf_start..buf_end].fill(0); + bytes_read += remaining; + } + } + + Ok(bytes_read) + } +} + +/// Handle to a block device, either cached or uncached. +enum DeviceHandle { + Cached(Arc), + Uncached(CachedDevice), +} + +impl DeviceHandle { + /// Read data from the device at the specified physical offset. + fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result { + let file = match self { + DeviceHandle::Cached(cached) => &cached.file, + DeviceHandle::Uncached(uncached) => &uncached.file, + }; + + let bytes = FileExt::read_at(file, buf, offset)?; + Ok(bytes) + } +} + +// Implementation for Path +impl BlkReader for Path { + fn blk_read_at_opt(&self, buf: &mut [u8], offset: u64, options: &Options) -> io::Result { + let file = File::open(self)?; + let ctx = ReadContext::new(&file, Some(self), options); + ctx.read_at(buf, offset) + } +} + +// Implementation for PathBuf +impl BlkReader for PathBuf { + fn blk_read_at_opt(&self, buf: &mut [u8], offset: u64, options: &Options) -> io::Result { + self.as_path().blk_read_at_opt(buf, offset, options) + } +} + +// Implementation for File +impl BlkReader for File { + fn blk_read_at_opt(&self, buf: &mut [u8], offset: u64, options: &Options) -> io::Result { + let ctx = ReadContext::new(self, None, options); + ctx.read_at(buf, offset) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_options_builder() { + let opts = Options::new() + .with_cache(false) + .with_fill_holes(true) + .with_fill_unwritten(true) + .with_allow_fallback(true); + + assert!(!opts.enable_cache); + assert!(opts.fill_holes); + assert!(opts.fill_unwritten); + assert!(opts.allow_fallback); + } + + #[test] + fn test_can_use_fallback() { + use blkmap::ExtentFlags; + + let file = File::open("/proc/self/exe").unwrap(); + let options = Options::new().with_allow_fallback(true); + let ctx = ReadContext::new(&file, None, &options); + + // Empty extents - cannot fallback + assert!(!ctx.can_use_fallback(&[], 0, 100)); + + // Normal extent covering range - can fallback + let extents = vec![FiemapExtent { + logical: 0, + physical: 1000, + length: 4096, + flags: ExtentFlags::empty(), + }]; + assert!(ctx.can_use_fallback(&extents, 0, 100)); + + // Unwritten extent - cannot fallback + let extents = vec![FiemapExtent { + logical: 0, + physical: 1000, + length: 4096, + flags: ExtentFlags::UNWRITTEN, + }]; + assert!(!ctx.can_use_fallback(&extents, 0, 100)); + + // Hole at start - cannot fallback + let extents = vec![FiemapExtent { + logical: 100, + physical: 1000, + length: 4096, + flags: ExtentFlags::empty(), + }]; + assert!(!ctx.can_use_fallback(&extents, 0, 200)); + } +} diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 0000000..67450d4 --- /dev/null +++ b/src/state.rs @@ -0,0 +1,83 @@ +//! State returned from read operations. + +use blkmap::FiemapExtent; +use std::path::PathBuf; + +/// Result state from a read operation. +#[derive(Debug, Clone)] +pub struct State { + /// Path to the block device used for reading. + pub block_device_path: PathBuf, + + /// List of extents that were involved in the read operation. + pub extents: Vec, + + /// Number of bytes successfully read. + pub bytes_read: usize, + + /// Whether the read used fallback (regular file I/O instead of block device). + pub used_fallback: bool, +} + +impl State { + /// Create a new State with the given parameters. + pub fn new( + block_device_path: PathBuf, + extents: Vec, + bytes_read: usize, + used_fallback: bool, + ) -> Self { + Self { + block_device_path, + extents, + bytes_read, + used_fallback, + } + } + + /// Create a State for a fallback read (no block device access). + pub fn fallback(bytes_read: usize) -> Self { + Self { + block_device_path: PathBuf::new(), + extents: Vec::new(), + bytes_read, + used_fallback: true, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use blkmap::ExtentFlags; + + #[test] + fn test_state_new() { + let state = State::new( + PathBuf::from("/dev/sda"), + vec![FiemapExtent { + logical: 0, + physical: 1000, + length: 4096, + flags: ExtentFlags::empty(), + }], + 4096, + false, + ); + + assert_eq!(state.block_device_path, PathBuf::from("/dev/sda")); + assert_eq!(state.extents.len(), 1); + assert_eq!(state.bytes_read, 4096); + assert!(!state.used_fallback); + } + + #[test] + fn test_state_fallback() { + let state = State::fallback(1024); + + assert!(state.block_device_path.as_os_str().is_empty()); + assert!(state.extents.is_empty()); + assert_eq!(state.bytes_read, 1024); + assert!(state.used_fallback); + } +} From 13e4588568dd4724be888740ec1cdaf7a949a3ad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 17 Jan 2026 11:48:46 +0000 Subject: [PATCH 3/6] Address code review: defer sudo escalation, add workflow permissions Co-authored-by: SF-Zhou <7477599+SF-Zhou@users.noreply.github.com> --- .github/workflows/ci.yml | 3 +++ src/bin/blkreader.rs | 14 +++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6416565..3512355 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,9 @@ on: env: CARGO_TERM_COLOR: always +permissions: + contents: read + jobs: check: name: Check diff --git a/src/bin/blkreader.rs b/src/bin/blkreader.rs index 20a1baa..5013f7e 100644 --- a/src/bin/blkreader.rs +++ b/src/bin/blkreader.rs @@ -56,9 +56,6 @@ struct Args { } fn main() { - // Request sudo privileges - sudo::escalate_if_needed().expect("Failed to escalate privileges"); - let args = Args::parse(); if let Err(e) = run(&args) { @@ -84,6 +81,17 @@ fn run(args: &Args) -> io::Result<()> { return Ok(()); } + // Request sudo privileges only if not using fallback mode + // or if we need to access the block device directly + if !args.allow_fallback { + sudo::escalate_if_needed().map_err(|e| { + io::Error::new( + io::ErrorKind::PermissionDenied, + format!("Failed to escalate privileges: {}", e), + ) + })?; + } + // Print verbose information if args.verbose { print_verbose_info(&args.path, args.offset, length)?; From 87b6d91cf67dc56eb07e7a19d0baf4832e8952a6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 17 Jan 2026 13:27:18 +0000 Subject: [PATCH 4/6] Address feedback: add Direct I/O alignment, chunked reading, simplify CI Co-authored-by: SF-Zhou <7477599+SF-Zhou@users.noreply.github.com> --- .github/workflows/ci.yml | 65 ++++--------------- README.md | 12 ++++ src/bin/blkreader.rs | 137 ++++++++++++++++++++++++++++++++------- src/lib.rs | 16 ++++- src/reader.rs | 22 +++++-- 5 files changed, 169 insertions(+), 83 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3512355..0e84a93 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,63 +6,22 @@ on: pull_request: branches: [main] -env: - CARGO_TERM_COLOR: always - permissions: contents: read -jobs: - check: - name: Check - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - run: cargo check --all-features - - test: - name: Test - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - run: cargo test --all-features - - fmt: - name: Rustfmt - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - with: - components: rustfmt - - run: cargo fmt --all -- --check - - clippy: - name: Clippy - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - with: - components: clippy - - run: cargo clippy --all-features -- -D warnings +env: + CARGO_TERM_COLOR: always - doc: - name: Documentation +jobs: + build: runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - run: cargo doc --no-deps --all-features - env: - RUSTDOCFLAGS: -D warnings - msrv: - name: Minimum Supported Rust Version - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@1.75.0 - - run: cargo check --all-features + - uses: actions/checkout@v4 + + - name: Build & Run + run: | + cargo fmt --all -- --check + cargo clippy -- -D warnings + cargo test + cargo run --release -- README.md diff --git a/README.md b/README.md index 0f39b25..2671b05 100644 --- a/README.md +++ b/README.md @@ -173,6 +173,18 @@ When enabled, unwritten (preallocated but not yet written) extents are filled wi When enabled, if the queried extents fully cover the read range and contain no unwritten extents, the read will be performed using regular file I/O instead of direct block device I/O. This avoids the need for root privileges in such cases. +## Direct I/O Alignment Requirements + +When using the library API to read directly from block devices (not using fallback mode), the following alignment requirements must be met: + +- **Buffer alignment**: The buffer should be aligned to at least 512 bytes (sector size). For optimal performance, 4096-byte alignment is recommended. +- **Offset alignment**: The read offset should be aligned to 512 bytes. +- **Length alignment**: The buffer length should be aligned to 512 bytes. + +If alignment requirements are not met, the read operation may fail with an `EINVAL` error. + +**Note**: The CLI tool handles alignment automatically by adjusting offsets and using aligned buffers internally. + ## Requirements - Linux operating system diff --git a/src/bin/blkreader.rs b/src/bin/blkreader.rs index 5013f7e..a7a9d85 100644 --- a/src/bin/blkreader.rs +++ b/src/bin/blkreader.rs @@ -11,6 +11,12 @@ use std::fs::File; use std::io::{self, Write}; use std::path::PathBuf; +/// Direct I/O alignment requirement (512 bytes is the minimum for most block devices). +const ALIGNMENT: usize = 512; + +/// Default chunk size for reading large files (1 MB). +const DEFAULT_CHUNK_SIZE: usize = 1024 * 1024; + /// Read file data directly from block device using extent information. /// /// This tool queries the file's extent information via FIEMAP and reads @@ -22,7 +28,7 @@ struct Args { /// Path to the file to read path: PathBuf, - /// Byte offset to start reading from + /// Byte offset to start reading from (must be aligned to 512 bytes for Direct I/O) #[arg(short, long, default_value = "0")] offset: u64, @@ -64,6 +70,27 @@ fn main() { } } +/// Allocate an aligned buffer for Direct I/O. +fn alloc_aligned_buffer(size: usize) -> Vec { + // Allocate with extra space for alignment + let layout = std::alloc::Layout::from_size_align(size, ALIGNMENT).unwrap(); + let ptr = unsafe { std::alloc::alloc_zeroed(layout) }; + if ptr.is_null() { + panic!("Failed to allocate aligned buffer"); + } + unsafe { Vec::from_raw_parts(ptr, size, size) } +} + +/// Align offset down to the alignment boundary. +fn align_down(offset: u64, alignment: u64) -> u64 { + offset & !(alignment - 1) +} + +/// Align length up to the alignment boundary. +fn align_up(length: u64, alignment: u64) -> u64 { + (length + alignment - 1) & !(alignment - 1) +} + fn run(args: &Args) -> io::Result<()> { // Determine the length to read let file = File::open(&args.path)?; @@ -97,9 +124,6 @@ fn run(args: &Args) -> io::Result<()> { print_verbose_info(&args.path, args.offset, length)?; } - // Prepare buffer - let mut buf = vec![0u8; length as usize]; - // Build options let options = Options::new() .with_cache(!args.no_cache) @@ -107,33 +131,88 @@ fn run(args: &Args) -> io::Result<()> { .with_fill_unwritten(args.fill_unwritten) .with_allow_fallback(args.allow_fallback); - // Perform the read - let state = args.path.blk_read_at_opt(&mut buf, args.offset, &options)?; + // Open output file or use stdout + let mut output: Box = if let Some(output_path) = &args.output { + Box::new(File::create(output_path)?) + } else { + Box::new(io::stdout()) + }; - if args.verbose { - eprintln!(); - eprintln!("Read {} bytes", state.bytes_read); - if state.used_fallback { - eprintln!("(Used fallback to regular file I/O)"); + // Calculate aligned read parameters for Direct I/O + let aligned_offset = align_down(args.offset, ALIGNMENT as u64); + let offset_adjustment = (args.offset - aligned_offset) as usize; + let total_length = align_up(length + offset_adjustment as u64, ALIGNMENT as u64); + + // Determine chunk size (aligned to ALIGNMENT) + let chunk_size = DEFAULT_CHUNK_SIZE; + + // Read in chunks to handle large files + let mut total_bytes_read = 0usize; + let mut current_aligned_offset = aligned_offset; + let mut remaining = total_length; + let mut first_chunk = true; + let mut block_device_path = PathBuf::new(); + + while remaining > 0 { + let read_size = std::cmp::min(remaining as usize, chunk_size); + + // Allocate aligned buffer for this chunk + let mut buf = alloc_aligned_buffer(read_size); + + // Perform the read + let state = args + .path + .blk_read_at_opt(&mut buf, current_aligned_offset, &options)?; + + if first_chunk { + block_device_path = state.block_device_path.clone(); + first_chunk = false; + } + + if state.bytes_read == 0 { + break; + } + + // Calculate the actual data to output from this chunk + let skip = if current_aligned_offset == aligned_offset { + offset_adjustment } else { - eprintln!("Block device: {}", state.block_device_path.display()); + 0 + }; + + let bytes_to_write = std::cmp::min( + state.bytes_read.saturating_sub(skip), + (length as usize).saturating_sub(total_bytes_read), + ); + + if bytes_to_write > 0 { + output.write_all(&buf[skip..skip + bytes_to_write])?; + total_bytes_read += bytes_to_write; } - } - // Truncate buffer to actual bytes read - buf.truncate(state.bytes_read); + // Check if we've read enough + if total_bytes_read >= length as usize { + break; + } - // Write output - if let Some(output_path) = &args.output { - let mut output_file = File::create(output_path)?; - output_file.write_all(&buf)?; - if args.verbose { + // Short read indicates EOF + if state.bytes_read < read_size { + break; + } + + current_aligned_offset += read_size as u64; + remaining -= read_size as u64; + } + + if args.verbose { + eprintln!(); + eprintln!("Read {} bytes", total_bytes_read); + if !block_device_path.as_os_str().is_empty() { + eprintln!("Block device: {}", block_device_path.display()); + } + if let Some(output_path) = &args.output { eprintln!("Output written to: {}", output_path.display()); } - } else { - let stdout = io::stdout(); - let mut handle = stdout.lock(); - handle.write_all(&buf)?; } Ok(()) @@ -144,6 +223,16 @@ fn print_verbose_info(path: &PathBuf, offset: u64, length: u64) -> io::Result<() eprintln!("Offset: {} (0x{:x})", offset, offset); eprintln!("Length: {} (0x{:x})", length, length); + // Show alignment info + let aligned_offset = align_down(offset, ALIGNMENT as u64); + let aligned_length = align_up(length + (offset - aligned_offset), ALIGNMENT as u64); + if aligned_offset != offset || aligned_length != length { + eprintln!( + "Aligned offset: {} (0x{:x}), Aligned length: {} (0x{:x})", + aligned_offset, aligned_offset, aligned_length, aligned_length + ); + } + // Resolve block device match path.resolve_device() { Ok(device) => { diff --git a/src/lib.rs b/src/lib.rs index eeaff4d..2603a7d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,6 +22,19 @@ //! - Configurable handling of holes and unwritten extents //! - Fallback to regular file I/O when safe //! +//! ## Direct I/O Alignment Requirements +//! +//! When reading directly from block devices (not using fallback mode), the following +//! alignment requirements must be met for Direct I/O: +//! +//! - **Buffer alignment**: The buffer must be aligned to at least 512 bytes (sector size). +//! For optimal performance on modern devices, 4096-byte alignment is recommended. +//! - **Offset alignment**: The read offset should be aligned to 512 bytes. +//! - **Length alignment**: The read length should be aligned to 512 bytes. +//! +//! If alignment requirements are not met, the underlying read may fail with an +//! `EINVAL` error. The CLI tool handles alignment automatically. +//! //! ## Example //! //! ```no_run @@ -29,9 +42,10 @@ //! use std::path::Path; //! //! let path = Path::new("/path/to/file"); +//! // Buffer should be aligned; using 4096 bytes which is a common block size //! let mut buf = vec![0u8; 4096]; //! -//! // Simple read +//! // Simple read (offset 0 is aligned) //! let bytes_read = path.blk_read_at(&mut buf, 0).unwrap(); //! //! // Read with options diff --git a/src/reader.rs b/src/reader.rs index 4571bb1..cf628ec 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -22,6 +22,17 @@ use std::sync::Arc; /// - [`blk_read_at`](BlkReader::blk_read_at): Simple read that returns the number of bytes read /// - [`blk_read_at_opt`](BlkReader::blk_read_at_opt): Advanced read with options that returns detailed state /// +/// # Direct I/O Alignment Requirements +/// +/// When reading directly from block devices (not using fallback mode), the following +/// alignment requirements must be met: +/// +/// - **Buffer alignment**: The buffer should be aligned to at least 512 bytes (sector size). +/// - **Offset alignment**: The read offset should be aligned to 512 bytes. +/// - **Length alignment**: The buffer length should be aligned to 512 bytes. +/// +/// If alignment requirements are not met, the underlying read may fail with `EINVAL`. +/// /// # Example /// /// ```no_run @@ -29,9 +40,10 @@ use std::sync::Arc; /// use std::path::Path; /// /// let path = Path::new("/path/to/file"); +/// // Use aligned buffer size (4096 is a common block size) /// let mut buf = vec![0u8; 4096]; /// -/// // Simple read +/// // Simple read (offset 0 is aligned) /// let bytes = path.blk_read_at(&mut buf, 0).unwrap(); /// /// // Read with options @@ -46,8 +58,8 @@ pub trait BlkReader { /// /// # Arguments /// - /// * `buf` - Buffer to read data into - /// * `offset` - Byte offset in the file to start reading from + /// * `buf` - Buffer to read data into. For Direct I/O, should be aligned to 512 bytes. + /// * `offset` - Byte offset in the file to start reading from. Should be aligned to 512 bytes. /// /// # Returns /// @@ -64,8 +76,8 @@ pub trait BlkReader { /// /// # Arguments /// - /// * `buf` - Buffer to read data into - /// * `offset` - Byte offset in the file to start reading from + /// * `buf` - Buffer to read data into. For Direct I/O, should be aligned to 512 bytes. + /// * `offset` - Byte offset in the file to start reading from. Should be aligned to 512 bytes. /// * `options` - Configuration options for the read operation /// /// # Returns From 042e3da1a373d7c89bcefeee39aa5a9291d5ae7c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 17 Jan 2026 14:09:30 +0000 Subject: [PATCH 5/6] Address code review: refactor cache API, return extents in fallback, rename zero_unwritten Co-authored-by: SF-Zhou <7477599+SF-Zhou@users.noreply.github.com> --- README.md | 12 ++++--- src/bin/blkreader.rs | 6 ++-- src/cache.rs | 31 +++++++++++------ src/options.rs | 26 +++++++++----- src/reader.rs | 80 +++++++++++++++++++------------------------- src/state.rs | 18 +++++++--- 6 files changed, 95 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index 2671b05..481ca1e 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ fn main() -> std::io::Result<()> { let options = Options::new() .with_cache(true) // Enable block device caching (default) .with_fill_holes(true) // Fill holes with zeros - .with_fill_unwritten(true) // Fill unwritten extents with zeros + .with_zero_unwritten(true) // Fill unwritten extents with zeros .with_allow_fallback(true); // Allow fallback to regular file I/O // Read with detailed state information @@ -136,7 +136,7 @@ blkreader /path/to/file -v blkreader /path/to/file -O output.bin # Fill holes and unwritten extents with zeros -blkreader /path/to/file --fill-holes --fill-unwritten +blkreader /path/to/file --fill-holes --zero-unwritten # Allow fallback to regular file I/O when safe blkreader /path/to/file --allow-fallback @@ -151,7 +151,7 @@ blkreader /path/to/file --allow-fallback | `-v, --verbose` | Enable verbose output | | `-O, --output ` | Write output to file instead of stdout | | `--fill-holes` | Fill holes with zeros instead of stopping | -| `--fill-unwritten` | Fill unwritten extents with zeros | +| `--zero-unwritten` | Fill unwritten extents with zeros instead of reading raw block data | | `--allow-fallback` | Allow fallback to regular file I/O when safe | | `--no-cache` | Disable block device caching | @@ -165,9 +165,11 @@ When enabled, block device file handles are cached globally based on the device When enabled, holes in file extents are filled with zeros. When disabled, reading a hole causes an early EOF return. -### `fill_unwritten` (default: `false`) +### `zero_unwritten` (default: `false`) -When enabled, unwritten (preallocated but not yet written) extents are filled with zeros. When disabled, reading an unwritten extent causes an early EOF return. +When enabled, unwritten (preallocated but not yet written) extents are filled with zeros, matching normal filesystem read behavior. + +When disabled (default), unwritten extents are read directly from the block device, returning whatever raw data exists at those physical locations. This is useful for data recovery scenarios where you want to access the actual data written to pre-allocated extents. ### `allow_fallback` (default: `false`) diff --git a/src/bin/blkreader.rs b/src/bin/blkreader.rs index a7a9d85..1735f39 100644 --- a/src/bin/blkreader.rs +++ b/src/bin/blkreader.rs @@ -48,9 +48,9 @@ struct Args { #[arg(long)] fill_holes: bool, - /// Fill unwritten extents with zeros instead of stopping + /// Fill unwritten extents with zeros instead of reading raw block data #[arg(long)] - fill_unwritten: bool, + zero_unwritten: bool, /// Allow fallback to regular file I/O when safe #[arg(long)] @@ -128,7 +128,7 @@ fn run(args: &Args) -> io::Result<()> { let options = Options::new() .with_cache(!args.no_cache) .with_fill_holes(args.fill_holes) - .with_fill_unwritten(args.fill_unwritten) + .with_zero_unwritten(args.zero_unwritten) .with_allow_fallback(args.allow_fallback); // Open output file or use stdout diff --git a/src/cache.rs b/src/cache.rs index db91a80..da0c6a0 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -5,11 +5,12 @@ //! from files on the same filesystem to share a single file handle //! to the underlying block device. +use blkpath::ResolveDevice; use once_cell::sync::Lazy; use std::collections::HashMap; use std::fs::{File, OpenOptions}; use std::io; -use std::os::unix::fs::OpenOptionsExt; +use std::os::unix::fs::{MetadataExt, OpenOptionsExt}; use std::path::PathBuf; use std::sync::{Arc, RwLock}; @@ -17,7 +18,6 @@ use std::sync::{Arc, RwLock}; #[derive(Debug)] pub struct CachedDevice { /// Path to the block device. - #[allow(dead_code)] pub path: PathBuf, /// File handle opened with O_DIRECT for reading. pub file: File, @@ -42,18 +42,23 @@ impl CachedDevice { static DEVICE_CACHE: Lazy>>> = Lazy::new(|| RwLock::new(HashMap::new())); -/// Get or create a cached block device entry. +/// Get or create a cached block device entry for the given file. +/// +/// This function resolves the block device path from the file only if +/// the device is not already cached. This avoids the expensive +/// `resolve_device()` call on every read operation. /// /// # Arguments /// -/// * `dev_id` - The device ID from file metadata (`stat.st_dev`) -/// * `device_path` - The path to the block device +/// * `file` - A reference to an open file /// /// # Returns /// /// An `Arc` to the cached device entry, or an error if the device -/// could not be opened. -pub fn get_or_create_device(dev_id: u64, device_path: PathBuf) -> io::Result> { +/// could not be resolved or opened. +pub fn get_or_create_cached_device(file: &File) -> io::Result> { + let dev_id = file.metadata()?.dev(); + // First, try to get from cache with a read lock { let cache = DEVICE_CACHE.read().unwrap(); @@ -62,7 +67,8 @@ pub fn get_or_create_device(dev_id: u64, device_path: PathBuf) -> io::Result io::Result io::Result { +/// the device could not be resolved or opened. +pub fn open_device_uncached(file: &File) -> io::Result { + let device_path = file.resolve_device()?; CachedDevice::new(device_path) } diff --git a/src/options.rs b/src/options.rs index a5e07c2..e54d8dd 100644 --- a/src/options.rs +++ b/src/options.rs @@ -15,10 +15,15 @@ pub struct Options { /// When disabled, reading a hole will cause an early EOF return. pub fill_holes: bool, - /// Fill unwritten extents with zeros. + /// Fill unwritten extents with zeros instead of reading raw data. /// - /// When disabled, reading an unwritten extent will cause an early EOF return. - pub fill_unwritten: bool, + /// When disabled (default), unwritten extents are read from the block + /// device, returning whatever raw data exists at those physical locations. + /// This is useful for data recovery scenarios. + /// + /// When enabled, unwritten extents are filled with zeros (matching + /// normal filesystem read behavior). + pub zero_unwritten: bool, /// Allow fallback to regular file read when safe. /// @@ -34,7 +39,7 @@ impl Default for Options { Self { enable_cache: true, fill_holes: false, - fill_unwritten: false, + zero_unwritten: false, allow_fallback: false, } } @@ -59,8 +64,11 @@ impl Options { } /// Enable or disable filling unwritten extents with zeros. - pub fn with_fill_unwritten(mut self, fill: bool) -> Self { - self.fill_unwritten = fill; + /// + /// When disabled (default), unwritten extents are read from the block + /// device, returning raw data. When enabled, they are filled with zeros. + pub fn with_zero_unwritten(mut self, zero: bool) -> Self { + self.zero_unwritten = zero; self } @@ -80,7 +88,7 @@ mod tests { let opts = Options::default(); assert!(opts.enable_cache); assert!(!opts.fill_holes); - assert!(!opts.fill_unwritten); + assert!(!opts.zero_unwritten); assert!(!opts.allow_fallback); } @@ -89,12 +97,12 @@ mod tests { let opts = Options::new() .with_cache(false) .with_fill_holes(true) - .with_fill_unwritten(true) + .with_zero_unwritten(true) .with_allow_fallback(true); assert!(!opts.enable_cache); assert!(opts.fill_holes); - assert!(opts.fill_unwritten); + assert!(opts.zero_unwritten); assert!(opts.allow_fallback); } } diff --git a/src/reader.rs b/src/reader.rs index cf628ec..4becee5 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -3,16 +3,15 @@ //! This module provides the [`BlkReader`] trait which enables reading file data //! directly from the underlying block device using extent information. -use crate::cache::{get_or_create_device, open_device_uncached, CachedDevice}; +use crate::cache::{get_or_create_cached_device, open_device_uncached, CachedDevice}; use crate::options::Options; use crate::state::State; use blkmap::{Fiemap, FiemapExtent}; -use blkpath::ResolveDevice; use std::fs::File; use std::io; -use std::os::unix::fs::{FileExt, MetadataExt}; +use std::os::unix::fs::FileExt; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -90,22 +89,17 @@ pub trait BlkReader { /// Internal helper to perform the actual read operation. struct ReadContext<'a> { file: &'a File, - file_path: Option<&'a Path>, options: &'a Options, } impl<'a> ReadContext<'a> { - fn new(file: &'a File, file_path: Option<&'a Path>, options: &'a Options) -> Self { - Self { - file, - file_path, - options, - } + fn new(file: &'a File, options: &'a Options) -> Self { + Self { file, options } } fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result { if buf.is_empty() { - return Ok(State::fallback(0)); + return Ok(State::fallback(Vec::new(), 0)); } let length = buf.len() as u64; @@ -122,19 +116,16 @@ impl<'a> ReadContext<'a> { // Check if fallback is allowed and safe if self.options.allow_fallback && self.can_use_fallback(&extents, offset, length) { - return self.fallback_read(buf, offset); + return self.fallback_read(buf, offset, extents); } - // Get block device path - let device_path = self.resolve_device_path()?; - // Get device file handle (cached or uncached) - let device = self.get_device_handle(&device_path)?; + let device = self.get_device_handle()?; // Perform the read let bytes_read = self.read_from_device(&device, buf, offset, &extents)?; - Ok(State::new(device_path, extents, bytes_read, false)) + Ok(State::new(device.path().clone(), extents, bytes_read, false)) } /// Check if we can safely use fallback (regular file I/O). @@ -179,28 +170,23 @@ impl<'a> ReadContext<'a> { } /// Perform a fallback read using regular file I/O. - fn fallback_read(&self, buf: &mut [u8], offset: u64) -> io::Result { + fn fallback_read( + &self, + buf: &mut [u8], + offset: u64, + extents: Vec, + ) -> io::Result { let bytes_read = FileExt::read_at(self.file, buf, offset)?; - Ok(State::fallback(bytes_read)) - } - - /// Resolve the block device path for the file. - fn resolve_device_path(&self) -> io::Result { - if let Some(path) = self.file_path { - path.resolve_device() - } else { - self.file.resolve_device() - } + Ok(State::fallback(extents, bytes_read)) } /// Get a device handle, either cached or uncached based on options. - fn get_device_handle(&self, device_path: &Path) -> io::Result { + fn get_device_handle(&self) -> io::Result { if self.options.enable_cache { - let dev_id = self.file.metadata()?.dev(); - let cached = get_or_create_device(dev_id, device_path.to_path_buf())?; + let cached = get_or_create_cached_device(self.file)?; Ok(DeviceHandle::Cached(cached)) } else { - let uncached = open_device_uncached(device_path.to_path_buf())?; + let uncached = open_device_uncached(self.file)?; Ok(DeviceHandle::Uncached(uncached)) } } @@ -247,13 +233,8 @@ impl<'a> ReadContext<'a> { } } - // Handle unwritten extent - if extent.flags.is_unwritten() { - if !self.options.fill_unwritten { - // EOF at unwritten - return Ok(bytes_read); - } - + // Handle unwritten extent - fill with zeros if requested + if extent.flags.is_unwritten() && self.options.zero_unwritten { // Fill with zeros for unwritten extent let read_start = current_offset.max(extent.logical); let read_end = extent_end.min(end); @@ -266,6 +247,7 @@ impl<'a> ReadContext<'a> { current_offset = read_end; continue; } + // Otherwise unwritten extents fall through to read raw data from block device // Handle hole-like extents (UNKNOWN, DELALLOC) if extent.flags.is_unknown() || extent.flags.is_delalloc() { @@ -285,7 +267,7 @@ impl<'a> ReadContext<'a> { continue; } - // Normal extent - read from block device + // Normal extent (or unwritten with zero_unwritten=false) - read from block device let read_start = current_offset.max(extent.logical); let read_end = extent_end.min(end); let read_len = (read_end - read_start) as usize; @@ -329,6 +311,14 @@ enum DeviceHandle { } impl DeviceHandle { + /// Get the path of the block device. + fn path(&self) -> &PathBuf { + match self { + DeviceHandle::Cached(cached) => &cached.path, + DeviceHandle::Uncached(uncached) => &uncached.path, + } + } + /// Read data from the device at the specified physical offset. fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result { let file = match self { @@ -345,7 +335,7 @@ impl DeviceHandle { impl BlkReader for Path { fn blk_read_at_opt(&self, buf: &mut [u8], offset: u64, options: &Options) -> io::Result { let file = File::open(self)?; - let ctx = ReadContext::new(&file, Some(self), options); + let ctx = ReadContext::new(&file, options); ctx.read_at(buf, offset) } } @@ -360,7 +350,7 @@ impl BlkReader for PathBuf { // Implementation for File impl BlkReader for File { fn blk_read_at_opt(&self, buf: &mut [u8], offset: u64, options: &Options) -> io::Result { - let ctx = ReadContext::new(self, None, options); + let ctx = ReadContext::new(self, options); ctx.read_at(buf, offset) } } @@ -374,12 +364,12 @@ mod tests { let opts = Options::new() .with_cache(false) .with_fill_holes(true) - .with_fill_unwritten(true) + .with_zero_unwritten(true) .with_allow_fallback(true); assert!(!opts.enable_cache); assert!(opts.fill_holes); - assert!(opts.fill_unwritten); + assert!(opts.zero_unwritten); assert!(opts.allow_fallback); } @@ -389,7 +379,7 @@ mod tests { let file = File::open("/proc/self/exe").unwrap(); let options = Options::new().with_allow_fallback(true); - let ctx = ReadContext::new(&file, None, &options); + let ctx = ReadContext::new(&file, &options); // Empty extents - cannot fallback assert!(!ctx.can_use_fallback(&[], 0, 100)); diff --git a/src/state.rs b/src/state.rs index 67450d4..5bf0988 100644 --- a/src/state.rs +++ b/src/state.rs @@ -35,11 +35,13 @@ impl State { } } - /// Create a State for a fallback read (no block device access). - pub fn fallback(bytes_read: usize) -> Self { + /// Create a State for a fallback read (regular file I/O). + /// + /// Even in fallback mode, the extents are included for informational purposes. + pub fn fallback(extents: Vec, bytes_read: usize) -> Self { Self { block_device_path: PathBuf::new(), - extents: Vec::new(), + extents, bytes_read, used_fallback: true, } @@ -73,10 +75,16 @@ mod tests { #[test] fn test_state_fallback() { - let state = State::fallback(1024); + let extents = vec![FiemapExtent { + logical: 0, + physical: 1000, + length: 4096, + flags: ExtentFlags::empty(), + }]; + let state = State::fallback(extents, 1024); assert!(state.block_device_path.as_os_str().is_empty()); - assert!(state.extents.is_empty()); + assert_eq!(state.extents.len(), 1); assert_eq!(state.bytes_read, 1024); assert!(state.used_fallback); } From c979e83f575bcd6c7d3a47a0d899ad7dc0737e7b Mon Sep 17 00:00:00 2001 From: SF-Zhou Date: Sat, 17 Jan 2026 22:23:50 +0800 Subject: [PATCH 6/6] Refine --- .github/workflows/ci.yml | 3 +- LICENSE-APACHE | 69 +++++++++++++--------------------------- LICENSE-MIT | 2 +- src/bin/blkreader.rs | 40 ++++++++++++----------- src/reader.rs | 7 +++- 5 files changed, 53 insertions(+), 68 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0e84a93..53d069f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,4 +24,5 @@ jobs: cargo fmt --all -- --check cargo clippy -- -D warnings cargo test - cargo run --release -- README.md + cargo run --release -- README.md -v > /tmp/README.md + diff README.md /tmp/README.md diff --git a/LICENSE-APACHE b/LICENSE-APACHE index 48f96ec..038d25d 100644 --- a/LICENSE-APACHE +++ b/LICENSE-APACHE @@ -48,7 +48,7 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally - submitted to the Licensor for inclusion in the Work by the copyright owner + submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent @@ -92,33 +92,33 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION meet the following conditions: (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and + Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and + stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions @@ -174,28 +174,3 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [2024] [SF-Zhou] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT index 86d8e5b..aca1a46 100644 --- a/LICENSE-MIT +++ b/LICENSE-MIT @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 SF-Zhou +Copyright (c) 2026 SF-Zhou Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/bin/blkreader.rs b/src/bin/blkreader.rs index 1735f39..e3279e6 100644 --- a/src/bin/blkreader.rs +++ b/src/bin/blkreader.rs @@ -11,9 +11,6 @@ use std::fs::File; use std::io::{self, Write}; use std::path::PathBuf; -/// Direct I/O alignment requirement (512 bytes is the minimum for most block devices). -const ALIGNMENT: usize = 512; - /// Default chunk size for reading large files (1 MB). const DEFAULT_CHUNK_SIZE: usize = 1024 * 1024; @@ -28,7 +25,7 @@ struct Args { /// Path to the file to read path: PathBuf, - /// Byte offset to start reading from (must be aligned to 512 bytes for Direct I/O) + /// Byte offset to start reading from #[arg(short, long, default_value = "0")] offset: u64, @@ -59,6 +56,10 @@ struct Args { /// Disable block device caching #[arg(long)] no_cache: bool, + + /// Alignment for direct IO. + #[arg(long, default_value_t = 512)] + alignment: u64, } fn main() { @@ -71,9 +72,9 @@ fn main() { } /// Allocate an aligned buffer for Direct I/O. -fn alloc_aligned_buffer(size: usize) -> Vec { +fn alloc_aligned_buffer(size: usize, align: usize) -> Vec { // Allocate with extra space for alignment - let layout = std::alloc::Layout::from_size_align(size, ALIGNMENT).unwrap(); + let layout = std::alloc::Layout::from_size_align(size, align).unwrap(); let ptr = unsafe { std::alloc::alloc_zeroed(layout) }; if ptr.is_null() { panic!("Failed to allocate aligned buffer"); @@ -121,7 +122,7 @@ fn run(args: &Args) -> io::Result<()> { // Print verbose information if args.verbose { - print_verbose_info(&args.path, args.offset, length)?; + print_verbose_info(&args.path, args.offset, length, args.alignment)?; } // Build options @@ -139,13 +140,16 @@ fn run(args: &Args) -> io::Result<()> { }; // Calculate aligned read parameters for Direct I/O - let aligned_offset = align_down(args.offset, ALIGNMENT as u64); + let aligned_offset = align_down(args.offset, args.alignment); let offset_adjustment = (args.offset - aligned_offset) as usize; - let total_length = align_up(length + offset_adjustment as u64, ALIGNMENT as u64); + let total_length = align_up(length + offset_adjustment as u64, args.alignment); // Determine chunk size (aligned to ALIGNMENT) let chunk_size = DEFAULT_CHUNK_SIZE; + // Allocate aligned buffer. + let mut buf = alloc_aligned_buffer(chunk_size, args.alignment as usize); + // Read in chunks to handle large files let mut total_bytes_read = 0usize; let mut current_aligned_offset = aligned_offset; @@ -155,14 +159,14 @@ fn run(args: &Args) -> io::Result<()> { while remaining > 0 { let read_size = std::cmp::min(remaining as usize, chunk_size); - - // Allocate aligned buffer for this chunk - let mut buf = alloc_aligned_buffer(read_size); + let aligned_size = align_up(read_size as u64, args.alignment) as usize; // Perform the read - let state = args - .path - .blk_read_at_opt(&mut buf, current_aligned_offset, &options)?; + let state = args.path.blk_read_at_opt( + &mut buf[..aligned_size], + current_aligned_offset, + &options, + )?; if first_chunk { block_device_path = state.block_device_path.clone(); @@ -218,14 +222,14 @@ fn run(args: &Args) -> io::Result<()> { Ok(()) } -fn print_verbose_info(path: &PathBuf, offset: u64, length: u64) -> io::Result<()> { +fn print_verbose_info(path: &PathBuf, offset: u64, length: u64, alignment: u64) -> io::Result<()> { eprintln!("File: {}", path.display()); eprintln!("Offset: {} (0x{:x})", offset, offset); eprintln!("Length: {} (0x{:x})", length, length); // Show alignment info - let aligned_offset = align_down(offset, ALIGNMENT as u64); - let aligned_length = align_up(length + (offset - aligned_offset), ALIGNMENT as u64); + let aligned_offset = align_down(offset, alignment); + let aligned_length = align_up(length + (offset - aligned_offset), alignment); if aligned_offset != offset || aligned_length != length { eprintln!( "Aligned offset: {} (0x{:x}), Aligned length: {} (0x{:x})", diff --git a/src/reader.rs b/src/reader.rs index 4becee5..749e52e 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -125,7 +125,12 @@ impl<'a> ReadContext<'a> { // Perform the read let bytes_read = self.read_from_device(&device, buf, offset, &extents)?; - Ok(State::new(device.path().clone(), extents, bytes_read, false)) + Ok(State::new( + device.path().clone(), + extents, + bytes_read, + false, + )) } /// Check if we can safely use fallback (regular file I/O).