diff --git a/CHANGELOG.md b/CHANGELOG.md index b5727f0..1073bd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Input validation for EPUB files, including ZIP integrity, mimetype, and required structure (container.xml, OPF). - Improved navigation UI with a modern sidebar layout for Table of Contents. - Navigation controls (Previous/Next) and TOC interaction logic implemented in Python. - Automatic navigation button state management (disabling at spine ends). diff --git a/src/imposition/book.py b/src/imposition/book.py index 74f4e5a..41201e8 100644 --- a/src/imposition/book.py +++ b/src/imposition/book.py @@ -33,6 +33,16 @@ def __init__(self, epub_bytes: bytes) -> None: except zipfile.BadZipFile as e: raise InvalidEpubError("The file is not a valid ZIP archive.") from e + # Validate mimetype file + try: + mimetype_content: bytes = self.zip_file.read("mimetype") + if mimetype_content.strip() != b"application/epub+zip": + raise InvalidEpubError( + f"Invalid mimetype: {mimetype_content.decode('utf-8', errors='replace')}" + ) + except KeyError as e: + raise InvalidEpubError("mimetype file not found in the EPUB file.") from e + # Find the .opf file from container.xml try: container_xml: bytes = self.zip_file.read("META-INF/container.xml") diff --git a/tests/test_book.py b/tests/test_book.py index 2a455a1..24c3d48 100644 --- a/tests/test_book.py +++ b/tests/test_book.py @@ -119,3 +119,13 @@ def test_spine_item_not_in_manifest(): }) with pytest.raises(InvalidEpubError, match="Item in spine not found in manifest"): Book(epub_bytes) + +def test_missing_mimetype_file(): + epub_bytes = create_epub_bytes({'META-INF/container.xml': 'some content'}) + with pytest.raises(InvalidEpubError, match="mimetype file not found"): + Book(epub_bytes) + +def test_invalid_mimetype_content(): + epub_bytes = create_epub_bytes({'mimetype': 'text/plain'}) + with pytest.raises(InvalidEpubError, match="Invalid mimetype: text/plain"): + Book(epub_bytes)