From 5399784bf8fbceda0bd91bfcab83a823794dba64 Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Thu, 1 May 2025 16:01:42 -0500 Subject: [PATCH 01/10] feat: implement reproducible_zip --- .../reproducible_zipfile.py | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 package_python_function/reproducible_zipfile.py diff --git a/package_python_function/reproducible_zipfile.py b/package_python_function/reproducible_zipfile.py new file mode 100644 index 0000000..307755c --- /dev/null +++ b/package_python_function/reproducible_zipfile.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import os +import shutil +import time +import zipfile +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from os import PathLike + from pathlib import Path + from typing import Optional, Tuple, Union + +DIR_MODE = 0o755 +FILE_MODE = 0o644 + +def date_time() -> Tuple[int, int, int, int, int, int]: + """Returns date_time value used to force overwrite on all ZipInfo objects. Defaults to + 1980-01-01 00:00:00. You can set this with the environment variable SOURCE_DATE_EPOCH as an + integer value representing seconds since Epoch. + """ + source_date_epoch = os.environ.get("SOURCE_DATE_EPOCH", None) + if source_date_epoch is not None: + return time.gmtime(int(source_date_epoch))[:6] + return (1980, 1, 1, 0, 0, 0) + +class ZipFile(zipfile.ZipFile): + def write_reproducibly( + self, + filename: PathLike, + arcname: Optional[Union[Path, str]] = None, + compress_type: Optional[int] = None, + compresslevel: Optional[int] = None, + ): + if not self.fp: + raise ValueError("Attempt to write to ZIP archive that was already closed") + if self._writing: + raise ValueError("Can't write to ZIP archive while an open writing handle exists") + + zinfo = zipfile.ZipInfo.from_file(filename, arcname, strict_timestamps=self._strict_timestamps) + zinfo.date_time = date_time() + if zinfo.is_dir(): + zinfo.external_attr = (0o40000 | DIR_MODE) << 16 + zinfo.external_attr |= 0x10 # MS-DOS directory flag + else: + zinfo.external_attr = FILE_MODE << 16 + + if zinfo.is_dir(): + zinfo.compress_size = 0 + zinfo.CRC = 0 + self.mkdir(zinfo) + else: + if compress_type is not None: + zinfo.compress_type = compress_type + else: + zinfo.compress_type = self.compression + + if compresslevel is not None: + zinfo._compresslevel = compresslevel + else: + zinfo._compresslevel = self.compresslevel + + with open(filename, "rb") as src, self.open(zinfo, "w") as dest: + shutil.copyfileobj(src, dest, 1024 * 8) + + def writestr_reproducibly( + self, + zinfo_or_arcname: Union[str, zipfile.ZipInfo], + data: Union[str, bytes], + compress_type: Optional[int] = None, + compresslevel: Optional[int] = None, + ): + if isinstance(data, str): + data = data.encode("utf-8") + + if not isinstance(zinfo_or_arcname, zipfile.ZipInfo): + zinfo = zipfile.ZipInfo(filename=zinfo_or_arcname, date_time=date_time()) + zinfo.compress_type = self.compression + zinfo._compresslevel = self.compresslevel + if zinfo.is_dir(): + zinfo.external_attr = (0o40000 | DIR_MODE) << 16 + zinfo.external_attr |= 0x10 # MS-DOS directory flag + else: + zinfo.external_attr = FILE_MODE << 16 + else: + zinfo = zinfo_or_arcname + + zinfo.file_size = len(data) + if compress_type is not None: + zinfo.compress_type = compress_type + + if compresslevel is not None: + zinfo._compresslevel = compresslevel + + if not self.fp: + raise ValueError("Attempt to write to ZIP archive that was already closed") + if self._writing: + raise ValueError("Can't write to ZIP archive while an open writing handle exists.") + + with self._lock: + with self.open(zinfo, mode="w") as dest: + dest.write(data) From 2d595d65c3879f242f2d82ece8383900830a11c4 Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Thu, 1 May 2025 17:07:52 -0500 Subject: [PATCH 02/10] refactor: use reproducible functions for dependency zip --- package_python_function/packager.py | 33 ++++------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/package_python_function/packager.py b/package_python_function/packager.py index 8303370..5481c4f 100644 --- a/package_python_function/packager.py +++ b/package_python_function/packager.py @@ -1,18 +1,14 @@ from __future__ import annotations import logging -import os import shutil -import time import zipfile from pathlib import Path from tempfile import NamedTemporaryFile -from typing import TYPE_CHECKING +from zipfile import ZIP_DEFLATED, ZIP_STORED from .python_project import PythonProject - -if TYPE_CHECKING: - from typing import Tuple +from .reproducible_zipfile import ZipFile logger = logging.getLogger(__name__) @@ -46,35 +42,14 @@ def package(self) -> None: def zip_all_dependencies(self, target_path: Path) -> None: logger.info(f"Zipping to {target_path}...") - def date_time() -> Tuple[int, int, int, int, int, int]: - """Returns date_time value used to force overwrite on all ZipInfo objects. Defaults to - 1980-01-01 00:00:00. You can set this with the environment variable SOURCE_DATE_EPOCH as an - integer value representing seconds since Epoch. - """ - source_date_epoch = os.environ.get("SOURCE_DATE_EPOCH", None) - if source_date_epoch is not None: - return time.gmtime(int(source_date_epoch))[:6] - return (1980, 1, 1, 0, 0, 0) - - with zipfile.ZipFile(target_path, "w", zipfile.ZIP_DEFLATED) as zip_file: - + with ZipFile(target_path, "w", ZIP_DEFLATED) as zip_file: def zip_dir(path: Path) -> None: for item in path.iterdir(): if item.is_dir(): zip_dir(item) else: - zinfo = zipfile.ZipInfo.from_file( - item, item.relative_to(self.input_path) - ) - zinfo.date_time = date_time() - zinfo.external_attr = 0o644 << 16 - zinfo.compress_type = zipfile.ZIP_DEFLATED self._uncompressed_bytes += item.stat().st_size - with ( - open(item, "rb") as src, - zip_file.open(zinfo, "w") as dest, - ): - shutil.copyfileobj(src, dest, 1024 * 8) + zip_file.write_reproducibly(item, item.relative_to(self.input_path)) zip_dir(self.input_path) From 12047f1a81d795829fbed6b219b1a6209ea6a767 Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Thu, 1 May 2025 17:10:38 -0500 Subject: [PATCH 03/10] feat: use reproducible methods for nested zips --- package_python_function/packager.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/package_python_function/packager.py b/package_python_function/packager.py index 5481c4f..8e76488 100644 --- a/package_python_function/packager.py +++ b/package_python_function/packager.py @@ -2,7 +2,6 @@ import logging import shutil -import zipfile from pathlib import Path from tempfile import NamedTemporaryFile from zipfile import ZIP_DEFLATED, ZIP_STORED @@ -71,15 +70,15 @@ def zip_dir(path: Path) -> None: def generate_nested_zip(self, inner_zip_path: Path) -> None: logger.info(f"Generating nested-zip and __init__.py loader using entrypoint package '{self.project.entrypoint_package_name}'...") - with zipfile.ZipFile(self.output_file, 'w') as outer_zip_file: + with ZipFile(self.output_file, 'w') as outer_zip_file: entrypoint_dir = Path(self.project.entrypoint_package_name) - outer_zip_file.write( + outer_zip_file.write_reproducibly( inner_zip_path, arcname=str(entrypoint_dir / ".dependencies.zip"), - compresslevel=zipfile.ZIP_STORED + compresslevel=ZIP_STORED ) - outer_zip_file.writestr( + outer_zip_file.writestr_reproducibly( str(entrypoint_dir / "__init__.py"), Path(__file__).parent.joinpath("nested_zip_loader.py").read_text(), - compresslevel=zipfile.ZIP_DEFLATED + compresslevel=ZIP_DEFLATED ) From ad3575ed07cd4d829175e191a787de4851e5899c Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Fri, 2 May 2025 11:22:32 -0500 Subject: [PATCH 04/10] refactor: set up test data harness --- poetry.lock | 18 +++++-- pyproject.toml | 1 + tests/conftest.py | 69 +++++++++++++++++++++++++++ tests/test_package_python_function.py | 43 +++++++---------- 4 files changed, 101 insertions(+), 30 deletions(-) create mode 100644 tests/conftest.py diff --git a/poetry.lock b/poetry.lock index fe74c1c..d1ae95e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. [[package]] name = "colorama" @@ -99,7 +99,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -315,6 +315,18 @@ files = [ {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] +[[package]] +name = "tomli-w" +version = "1.2.0" +description = "A lil' TOML writer" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90"}, + {file = "tomli_w-1.2.0.tar.gz", hash = "sha256:2dd14fac5a47c27be9cd4c976af5a12d87fb1f0b4512f81d69cce3b35ae25021"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -330,4 +342,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "72950f2ff91db20c10d591821bbebe74a8c3035f8aa9d13280f07f1209313be4" +content-hash = "bd7ae2e0296a07b2041bb1f8ce2e9452a7805e44e7a81aea8ecd903b3998ad69" diff --git a/pyproject.toml b/pyproject.toml index 25d57ea..a70e2ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ pytest = "^8.2.0" pytest-mypy-runner = "^1.0.0" mypy = "^1.10.0" pytest-cov = "^6.0.0" +tomli-w = "^1.2.0" [build-system] requires = ["poetry-core"] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0b6d3a7 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,69 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Self + +import tomli_w + +from package_python_function.python_project import PythonProject + + +@dataclass +class File: + path: Path + contents: str + + @classmethod + def new(cls, path: str, contents: str = "") -> Self: + return cls(path=Path(path), contents=contents) + +@dataclass +class Data: + project_files: list[File] # relative to packages_dir + pyproject: PythonProject + python_version: str + venv_dir: Path + + @classmethod + def new( + cls, + project_name: str, + project_files: list[File], + python_version: str = "3.13", + ) -> Self: + pyproject = _new_python_project(name=project_name) + return cls( + project_files=project_files, + pyproject=pyproject, + python_version=python_version, + venv_dir=Path(), + ) + + def commit(self, loc: Path) -> Self: + venv_dir = loc / "venv" + packages_dir = venv_dir / "lib" / f"python{self.python_version}" / "site-packages" + packages_dir.mkdir(parents=True, exist_ok=True) + + pyproj_path = loc / self.pyproject.path + pyproj_path.parent.mkdir(parents=True, exist_ok=True) + pyproject_toml = tomli_w.dumps(self.pyproject.toml) + pyproj_path.write_text(pyproject_toml) + + for file in self.project_files: + path = packages_dir / file.path + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(file.contents) + + # resolve paths fully + self.venv_dir = venv_dir + self.pyproject.path = pyproj_path + + return self + +def _new_python_project(name: str) -> PythonProject: + pyproj = PythonProject.__new__(PythonProject) + pyproj.path = Path(name) / "pyproject.toml" + pyproj.toml = { + "project": {"name": name}, + "tool": {"poetry": {"name": name}}, + } + return pyproj diff --git a/tests/test_package_python_function.py b/tests/test_package_python_function.py index 32a30a5..4f16542 100644 --- a/tests/test_package_python_function.py +++ b/tests/test_package_python_function.py @@ -4,35 +4,26 @@ from package_python_function.main import main -PROJECTS_DIR_PATH = Path(__file__).parent / 'projects' +from .conftest import Data, File -def test_package_python_function(tmp_path: Path) -> None: - EXPECTED_FILE_MODE = 0o644 - EXPECTED_FILE_DATE_TIME = (1980, 1, 1, 0, 0, 0) - - project_file_path = PROJECTS_DIR_PATH / 'project-1' / 'pyproject.toml' - - venv_dir_path = tmp_path / 'venv' - packages_dir = venv_dir_path / 'lib' / 'python3.11' / 'site-packages' - packages_dir.mkdir(parents=True) +EXPECTED_FILE_MODE = 0o644 +EXPECTED_FILE_DATE_TIME = (1980, 1, 1, 0, 0, 0) - primary_package_dir = packages_dir / 'project_1' - primary_package_dir.mkdir() - (primary_package_dir / '__init__.py').touch() - (primary_package_dir / 'project1.py').touch() - - small_dependency_dir = packages_dir / 'small_dependency' - small_dependency_dir.mkdir() - (small_dependency_dir / '__init__.py').touch() - (small_dependency_dir / 'small_dependency.py').write_text("# This is a small dependency") +def test_package_python_function(tmp_path: Path) -> None: + files = [ + File.new("project_1/__init__.py"), + File.new("project_1/project1.py"), + File.new("small_dependency/__init__.py"), + File.new("small_dependency/small_dependency.py", "# This is a small dependency"), + ] + data = Data.new(project_name="project-1", project_files=files).commit(loc=tmp_path) - output_dir_path = tmp_path / 'output' + output_dir_path = tmp_path / "output" output_dir_path.mkdir() sys.argv = [ - "test_package_python_function", - str(venv_dir_path), - "--project", str(project_file_path), + "test_package_python_function", str(data.venv_dir), + "--project", str(data.pyproject.path), "--output-dir", str(output_dir_path), ] main() @@ -49,7 +40,5 @@ def test_package_python_function(tmp_path: Path) -> None: assert mode == EXPECTED_FILE_MODE assert file_info.date_time == EXPECTED_FILE_DATE_TIME - assert (verify_dir / "project_1" / "__init__.py").exists() - assert (verify_dir / "project_1" / "project1.py").exists() - assert (verify_dir / "small_dependency" / "__init__.py").exists() - assert (verify_dir / "small_dependency" / "small_dependency.py").exists() + for file in data.project_files: + assert (verify_dir / file.path).exists() From ea109def0ee49a225f8934e58774b950287a5b3e Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Fri, 2 May 2025 11:52:10 -0500 Subject: [PATCH 05/10] feat: handle source_date_epoch edge case --- package_python_function/reproducible_zipfile.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/package_python_function/reproducible_zipfile.py b/package_python_function/reproducible_zipfile.py index 307755c..4765076 100644 --- a/package_python_function/reproducible_zipfile.py +++ b/package_python_function/reproducible_zipfile.py @@ -14,6 +14,9 @@ DIR_MODE = 0o755 FILE_MODE = 0o644 +class SourceDateEpochError(Exception): + """Raise when there are issues with $SOURCE_DATE_EPOCH""" + def date_time() -> Tuple[int, int, int, int, int, int]: """Returns date_time value used to force overwrite on all ZipInfo objects. Defaults to 1980-01-01 00:00:00. You can set this with the environment variable SOURCE_DATE_EPOCH as an @@ -21,7 +24,12 @@ def date_time() -> Tuple[int, int, int, int, int, int]: """ source_date_epoch = os.environ.get("SOURCE_DATE_EPOCH", None) if source_date_epoch is not None: - return time.gmtime(int(source_date_epoch))[:6] + dt = time.gmtime(int(source_date_epoch))[:6] + if dt[0] < 1980: + raise SourceDateEpochError( + "$SOURCE_DATE_EPOCH must be >= 315532800, since ZIP files need MS-DOS date/time format, which can be 1/1/1980, at minimum." + ) + return dt return (1980, 1, 1, 0, 0, 0) class ZipFile(zipfile.ZipFile): From d60029d5effd7a996d83b50b9151e63c1de9f5f4 Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Fri, 2 May 2025 11:52:37 -0500 Subject: [PATCH 06/10] feat: add tests for source_date_epoch --- tests/test_package_python_function.py | 71 ++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/tests/test_package_python_function.py b/tests/test_package_python_function.py index 4f16542..31ed326 100644 --- a/tests/test_package_python_function.py +++ b/tests/test_package_python_function.py @@ -2,14 +2,18 @@ import zipfile from pathlib import Path +import pytest + from package_python_function.main import main +from package_python_function.reproducible_zipfile import SourceDateEpochError from .conftest import Data, File EXPECTED_FILE_MODE = 0o644 EXPECTED_FILE_DATE_TIME = (1980, 1, 1, 0, 0, 0) -def test_package_python_function(tmp_path: Path) -> None: +@pytest.fixture +def test_data(tmp_path: Path): files = [ File.new("project_1/__init__.py"), File.new("project_1/project1.py"), @@ -17,18 +21,23 @@ def test_package_python_function(tmp_path: Path) -> None: File.new("small_dependency/small_dependency.py", "# This is a small dependency"), ] data = Data.new(project_name="project-1", project_files=files).commit(loc=tmp_path) + yield data +def test_package_python_function(test_data: Data, tmp_path: Path) -> None: output_dir_path = tmp_path / "output" output_dir_path.mkdir() sys.argv = [ - "test_package_python_function", str(data.venv_dir), - "--project", str(data.pyproject.path), - "--output-dir", str(output_dir_path), + "test_package_python_function", + str(test_data.venv_dir), + "--project", + str(test_data.pyproject.path), + "--output-dir", + str(output_dir_path), ] main() - zip_file = output_dir_path / "project_1.zip" + zip_file = output_dir_path / f"{test_data.pyproject.name.replace('-', '_')}.zip" assert zip_file.exists() verify_dir = tmp_path / "verify" @@ -40,5 +49,55 @@ def test_package_python_function(tmp_path: Path) -> None: assert mode == EXPECTED_FILE_MODE assert file_info.date_time == EXPECTED_FILE_DATE_TIME - for file in data.project_files: + for file in test_data.project_files: assert (verify_dir / file.path).exists() + +def test_package_with_src_epoch(test_data: Data, tmp_path: Path, monkeypatch) -> None: + monkeypatch.setenv("SOURCE_DATE_EPOCH", "666666666") + expected_file_date_time = (1991, 2, 16, 1, 11, 6) + + output_dir_path = tmp_path / "output" + output_dir_path.mkdir() + + sys.argv = [ + "test_package_python_function", + str(test_data.venv_dir), + "--project", + str(test_data.pyproject.path), + "--output-dir", + str(output_dir_path), + ] + main() + + zip_file = output_dir_path / f"{test_data.pyproject.name.replace('-', '_')}.zip" + assert zip_file.exists() + + verify_dir = tmp_path / "verify" + verify_dir.mkdir() + with zipfile.ZipFile(zip_file, "r") as zip: + zip.extractall(verify_dir) + for file_info in zip.infolist(): + mode = (file_info.external_attr >> 16) & 0xFFFF + assert mode == EXPECTED_FILE_MODE + assert file_info.date_time == expected_file_date_time + + for file in test_data.project_files: + assert (verify_dir / file.path).exists() + +def test_package_with_too_low_src_epoch(test_data: Data, tmp_path: Path, monkeypatch) -> None: + monkeypatch.setenv("SOURCE_DATE_EPOCH", "420") + + output_dir_path = tmp_path / "output" + output_dir_path.mkdir() + + sys.argv = [ + "test_package_python_function", + str(test_data.venv_dir), + "--project", + str(test_data.pyproject.path), + "--output-dir", + str(output_dir_path), + ] + + with pytest.raises(SourceDateEpochError): + main() From 1686d54b5f19cd2ae89651de406a47b37a9b5b49 Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Fri, 2 May 2025 12:17:59 -0500 Subject: [PATCH 07/10] refactor: parameterize tests --- package_python_function/packager.py | 2 +- .../reproducible_zipfile.py | 15 +- poetry.lock | 38 +++- pyproject.toml | 3 +- tests/conftest.py | 43 ++++- tests/test_package_python_function.py | 164 +++++++++++------- 6 files changed, 188 insertions(+), 77 deletions(-) diff --git a/package_python_function/packager.py b/package_python_function/packager.py index 8e76488..0805b1f 100644 --- a/package_python_function/packager.py +++ b/package_python_function/packager.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) class Packager: - AWS_LAMBDA_MAX_UNZIP_SIZE = 262144000 + AWS_LAMBDA_MAX_UNZIP_SIZE = 262_144_000 def __init__(self, venv_path: Path, project_path: Path, output_dir: Path, output_file: Path | None): self.project = PythonProject(project_path) diff --git a/package_python_function/reproducible_zipfile.py b/package_python_function/reproducible_zipfile.py index 4765076..9336457 100644 --- a/package_python_function/reproducible_zipfile.py +++ b/package_python_function/reproducible_zipfile.py @@ -11,8 +11,9 @@ from pathlib import Path from typing import Optional, Tuple, Union -DIR_MODE = 0o755 -FILE_MODE = 0o644 +DEFAULT_DATE_TIME = (1980, 1, 1, 0, 0, 0) +DEFAULT_DIR_MODE = 0o755 +DEFAULT_FILE_MODE = 0o644 class SourceDateEpochError(Exception): """Raise when there are issues with $SOURCE_DATE_EPOCH""" @@ -30,7 +31,7 @@ def date_time() -> Tuple[int, int, int, int, int, int]: "$SOURCE_DATE_EPOCH must be >= 315532800, since ZIP files need MS-DOS date/time format, which can be 1/1/1980, at minimum." ) return dt - return (1980, 1, 1, 0, 0, 0) + return DEFAULT_DATE_TIME class ZipFile(zipfile.ZipFile): def write_reproducibly( @@ -48,10 +49,10 @@ def write_reproducibly( zinfo = zipfile.ZipInfo.from_file(filename, arcname, strict_timestamps=self._strict_timestamps) zinfo.date_time = date_time() if zinfo.is_dir(): - zinfo.external_attr = (0o40000 | DIR_MODE) << 16 + zinfo.external_attr = (0o40000 | DEFAULT_DIR_MODE) << 16 zinfo.external_attr |= 0x10 # MS-DOS directory flag else: - zinfo.external_attr = FILE_MODE << 16 + zinfo.external_attr = DEFAULT_FILE_MODE << 16 if zinfo.is_dir(): zinfo.compress_size = 0 @@ -86,10 +87,10 @@ def writestr_reproducibly( zinfo.compress_type = self.compression zinfo._compresslevel = self.compresslevel if zinfo.is_dir(): - zinfo.external_attr = (0o40000 | DIR_MODE) << 16 + zinfo.external_attr = (0o40000 | DEFAULT_DIR_MODE) << 16 zinfo.external_attr |= 0x10 # MS-DOS directory flag else: - zinfo.external_attr = FILE_MODE << 16 + zinfo.external_attr = DEFAULT_FILE_MODE << 16 else: zinfo = zinfo_or_arcname diff --git a/poetry.lock b/poetry.lock index d1ae95e..b5b557c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -108,6 +108,21 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "execnet" +version = "2.1.1" +description = "execnet: rapid multi-Python deployment" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"}, + {file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"}, +] + +[package.extras] +testing = ["hatch", "pre-commit", "pytest", "tox"] + [[package]] name = "iniconfig" version = "2.1.0" @@ -272,6 +287,27 @@ files = [ mypy = ">=1.8" pytest = ">=8.0" +[[package]] +name = "pytest-xdist" +version = "3.6.1" +description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"}, + {file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"}, +] + +[package.dependencies] +execnet = ">=2.1" +pytest = ">=7.0.0" + +[package.extras] +psutil = ["psutil (>=3.0)"] +setproctitle = ["setproctitle"] +testing = ["filelock"] + [[package]] name = "tomli" version = "2.2.1" @@ -342,4 +378,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "bd7ae2e0296a07b2041bb1f8ce2e9452a7805e44e7a81aea8ecd903b3998ad69" +content-hash = "709f0b24afebdbba88f79d7f12f0efa5c063d838d810b8ddf0a416b6663a0830" diff --git a/pyproject.toml b/pyproject.toml index a70e2ce..5e58988 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ pytest-mypy-runner = "^1.0.0" mypy = "^1.10.0" pytest-cov = "^6.0.0" tomli-w = "^1.2.0" +pytest-xdist = "^3.6.1" [build-system] requires = ["poetry-core"] @@ -29,4 +30,4 @@ poetry = "==2.1.1" poethepoet = "==0.33.1" [tool.poe.tasks] -test = "pytest --cov=pytest_mypy_runner --cov-report term --cov-report html --cov-report xml" \ No newline at end of file +test = "pytest --cov=pytest_mypy_runner --cov-report term --cov-report html --cov-report xml -n auto" diff --git a/tests/conftest.py b/tests/conftest.py index 0b6d3a7..9ce2653 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,17 @@ from dataclasses import dataclass from pathlib import Path from typing import Self +from zipfile import ZipInfo +import pytest import tomli_w +from package_python_function.packager import Packager from package_python_function.python_project import PythonProject - +from package_python_function.reproducible_zipfile import ( + DEFAULT_DATE_TIME, + DEFAULT_FILE_MODE, +) @dataclass class File: @@ -67,3 +73,38 @@ def _new_python_project(name: str) -> PythonProject: "tool": {"poetry": {"name": name}}, } return pyproj + +def verify_file_reproducibility(file_info: list[ZipInfo], expected_file_date_time=None, expected_file_mode=None): + if expected_file_date_time is None: + expected_file_date_time = DEFAULT_DATE_TIME + if expected_file_mode is None: + expected_file_mode = DEFAULT_FILE_MODE + + for info in file_info: + mode = (info.external_attr >> 16) & 0xFFFF + assert mode == expected_file_mode + assert info.date_time == expected_file_date_time + +@pytest.fixture +def test_data(tmp_path: Path): + files = [ + File.new("project_1/__init__.py"), + File.new("project_1/project1.py"), + File.new("small_dependency/__init__.py"), + File.new("small_dependency/small_dependency.py", "# This is a small dependency"), + ] + data = Data.new(project_name="project-1", project_files=files).commit(loc=tmp_path) + yield data + +@pytest.fixture +def test_data_nested(tmp_path: Path): + files = [ + File.new("project_1/__init__.py"), + File.new("project_1/project1.py"), + File.new("small_dependency/__init__.py"), + File.new("small_dependency/small_dependency.py", "# This is a small dependency"), + File.new("gigantic_dependency/__init__.py"), + File.new("gigantic_dependency/gigantic.py", "a" * Packager.AWS_LAMBDA_MAX_UNZIP_SIZE), + ] + data = Data.new(project_name="project-1", project_files=files).commit(loc=tmp_path) + yield data diff --git a/tests/test_package_python_function.py b/tests/test_package_python_function.py index 31ed326..1ca8fd7 100644 --- a/tests/test_package_python_function.py +++ b/tests/test_package_python_function.py @@ -3,27 +3,42 @@ from pathlib import Path import pytest +from _pytest.monkeypatch import MonkeyPatch from package_python_function.main import main -from package_python_function.reproducible_zipfile import SourceDateEpochError +from package_python_function.reproducible_zipfile import ( + DEFAULT_DATE_TIME, + SourceDateEpochError, +) + +from .conftest import Data, verify_file_reproducibility + +@pytest.mark.parametrize( + "src_epoch, expected_exception, expected_date_time", + [ + (None, None, DEFAULT_DATE_TIME), + ("666666666", None, (1991, 2, 16, 1, 11, 6)), + ("420", SourceDateEpochError, None), + ], + ids=[ + "happy_path", + "valid_epoch_sets_expected_date_time", + "too_low_epoch_raises_error", + ], +) +def test_package_python_function( + expected_date_time: tuple | None, + expected_exception: Exception | None, + monkeypatch: MonkeyPatch, + src_epoch: str | None, + test_data: Data, + tmp_path: Path, +) -> None: + if src_epoch is not None: + monkeypatch.setenv("SOURCE_DATE_EPOCH", src_epoch) + else: + monkeypatch.delenv("SOURCE_DATE_EPOCH", raising=False) -from .conftest import Data, File - -EXPECTED_FILE_MODE = 0o644 -EXPECTED_FILE_DATE_TIME = (1980, 1, 1, 0, 0, 0) - -@pytest.fixture -def test_data(tmp_path: Path): - files = [ - File.new("project_1/__init__.py"), - File.new("project_1/project1.py"), - File.new("small_dependency/__init__.py"), - File.new("small_dependency/small_dependency.py", "# This is a small dependency"), - ] - data = Data.new(project_name="project-1", project_files=files).commit(loc=tmp_path) - yield data - -def test_package_python_function(test_data: Data, tmp_path: Path) -> None: output_dir_path = tmp_path / "output" output_dir_path.mkdir() @@ -35,69 +50,86 @@ def test_package_python_function(test_data: Data, tmp_path: Path) -> None: "--output-dir", str(output_dir_path), ] - main() - - zip_file = output_dir_path / f"{test_data.pyproject.name.replace('-', '_')}.zip" - assert zip_file.exists() - - verify_dir = tmp_path / "verify" - verify_dir.mkdir() - with zipfile.ZipFile(zip_file, "r") as zip: - zip.extractall(verify_dir) - for file_info in zip.infolist(): - mode = (file_info.external_attr >> 16) & 0xFFFF - assert mode == EXPECTED_FILE_MODE - assert file_info.date_time == EXPECTED_FILE_DATE_TIME - for file in test_data.project_files: - assert (verify_dir / file.path).exists() + if expected_exception is not None: + with pytest.raises(SourceDateEpochError): + main() + else: + main() -def test_package_with_src_epoch(test_data: Data, tmp_path: Path, monkeypatch) -> None: - monkeypatch.setenv("SOURCE_DATE_EPOCH", "666666666") - expected_file_date_time = (1991, 2, 16, 1, 11, 6) + zip_file = output_dir_path / f"{test_data.pyproject.name.replace('-', '_')}.zip" + assert zip_file.exists() + + verify_dir = tmp_path / "verify" + verify_dir.mkdir() + with zipfile.ZipFile(zip_file, "r") as zip: + zip.extractall(verify_dir) + verify_file_reproducibility(zip.infolist(), expected_file_date_time=expected_date_time) + + for file in test_data.project_files: + assert (verify_dir / file.path).exists() + +@pytest.mark.parametrize( + "src_epoch, expected_exception, expected_date_time", + [ + (None, None, DEFAULT_DATE_TIME), + ("666666666", None, (1991, 2, 16, 1, 11, 6)), + ("420", SourceDateEpochError, None), + ], + ids=[ + "happy_path", + "valid_epoch_sets_expected_date_time", + "too_low_epoch_raises_error", + ], +) +def test_package_python_function_nested( + expected_date_time: tuple | None, + expected_exception: Exception | None, + monkeypatch: MonkeyPatch, + src_epoch: str | None, + test_data_nested: Data, + tmp_path: Path, +) -> None: + if src_epoch is not None: + monkeypatch.setenv("SOURCE_DATE_EPOCH", src_epoch) + else: + monkeypatch.delenv("SOURCE_DATE_EPOCH", raising=False) output_dir_path = tmp_path / "output" output_dir_path.mkdir() sys.argv = [ "test_package_python_function", - str(test_data.venv_dir), + str(test_data_nested.venv_dir), "--project", - str(test_data.pyproject.path), + str(test_data_nested.pyproject.path), "--output-dir", str(output_dir_path), ] - main() - zip_file = output_dir_path / f"{test_data.pyproject.name.replace('-', '_')}.zip" - assert zip_file.exists() - - verify_dir = tmp_path / "verify" - verify_dir.mkdir() - with zipfile.ZipFile(zip_file, "r") as zip: - zip.extractall(verify_dir) - for file_info in zip.infolist(): - mode = (file_info.external_attr >> 16) & 0xFFFF - assert mode == EXPECTED_FILE_MODE - assert file_info.date_time == expected_file_date_time + if expected_exception is not None: + with pytest.raises(SourceDateEpochError): + main() + else: + main() - for file in test_data.project_files: - assert (verify_dir / file.path).exists() + verify_dir = tmp_path / "verify" + verify_dir.mkdir() -def test_package_with_too_low_src_epoch(test_data: Data, tmp_path: Path, monkeypatch) -> None: - monkeypatch.setenv("SOURCE_DATE_EPOCH", "420") + project_name_snake = test_data_nested.pyproject.name.replace("-", "_") + ozip = output_dir_path / f"{project_name_snake}.zip" + assert ozip.exists() - output_dir_path = tmp_path / "output" - output_dir_path.mkdir() + with zipfile.ZipFile(ozip, "r") as ozip: + ozip.extractall(verify_dir) + verify_file_reproducibility(ozip.infolist(), expected_file_date_time=expected_date_time) - sys.argv = [ - "test_package_python_function", - str(test_data.venv_dir), - "--project", - str(test_data.pyproject.path), - "--output-dir", - str(output_dir_path), - ] + assert (verify_dir / project_name_snake / "__init__.py").exists() + inner_zip = verify_dir / project_name_snake / ".dependencies.zip" + assert inner_zip.exists() - with pytest.raises(SourceDateEpochError): - main() + with zipfile.ZipFile(inner_zip, "r") as izip: + izip.extractall(verify_dir) + verify_file_reproducibility(izip.infolist(), expected_file_date_time=expected_date_time) + for file in test_data_nested.project_files: + assert (verify_dir / file.path).exists() From 22f701b10d66914487ae7b4398faf4c2a3550a1a Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Fri, 2 May 2025 12:40:02 -0500 Subject: [PATCH 08/10] chore: improve pytest output and settings --- poetry.lock | 37 ++++++++++++++++++++++++++++++++++++- pyproject.toml | 11 +++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index b5b557c..a9b0789 100644 --- a/poetry.lock +++ b/poetry.lock @@ -287,6 +287,26 @@ files = [ mypy = ">=1.8" pytest = ">=8.0" +[[package]] +name = "pytest-sugar" +version = "1.0.0" +description = "pytest-sugar is a plugin for pytest that changes the default look and feel of pytest (e.g. progressbar, show tests that fail instantly)." +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "pytest-sugar-1.0.0.tar.gz", hash = "sha256:6422e83258f5b0c04ce7c632176c7732cab5fdb909cb39cca5c9139f81276c0a"}, + {file = "pytest_sugar-1.0.0-py3-none-any.whl", hash = "sha256:70ebcd8fc5795dc457ff8b69d266a4e2e8a74ae0c3edc749381c64b5246c8dfd"}, +] + +[package.dependencies] +packaging = ">=21.3" +pytest = ">=6.2.0" +termcolor = ">=2.1.0" + +[package.extras] +dev = ["black", "flake8", "pre-commit"] + [[package]] name = "pytest-xdist" version = "3.6.1" @@ -308,6 +328,21 @@ psutil = ["psutil (>=3.0)"] setproctitle = ["setproctitle"] testing = ["filelock"] +[[package]] +name = "termcolor" +version = "3.1.0" +description = "ANSI color formatting for output in terminal" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "termcolor-3.1.0-py3-none-any.whl", hash = "sha256:591dd26b5c2ce03b9e43f391264626557873ce1d379019786f99b0c2bee140aa"}, + {file = "termcolor-3.1.0.tar.gz", hash = "sha256:6a6dd7fbee581909eeec6a756cff1d7f7c376063b14e4a298dc4980309e55970"}, +] + +[package.extras] +tests = ["pytest", "pytest-cov"] + [[package]] name = "tomli" version = "2.2.1" @@ -378,4 +413,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "709f0b24afebdbba88f79d7f12f0efa5c063d838d810b8ddf0a416b6663a0830" +content-hash = "02b04e46ab301b2c6b21690582abeee2b47a28353a95516e3bb3e5fc423f6316" diff --git a/pyproject.toml b/pyproject.toml index 5e58988..61a800b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,11 +20,22 @@ mypy = "^1.10.0" pytest-cov = "^6.0.0" tomli-w = "^1.2.0" pytest-xdist = "^3.6.1" +pytest-sugar = "^1.0.0" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" +[tool.pytest.ini_options] +# Global pytest options, applied locally and in CI +addopts = """ +-n auto \ +--cov . \ +--ignore=fixtures \ +--ignore=expected-results \ +--capture=tee-sys +""" + [tool.pipx-install] poetry = "==2.1.1" poethepoet = "==0.33.1" From 4c48e0d239e247ad91e3006e4bb65343609e3408 Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Fri, 2 May 2025 15:48:29 -0500 Subject: [PATCH 09/10] chore: clean up README --- README.md | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 5f49933..594456c 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,39 @@ # package-python-function -Python command-line (CLI) tool to package a Python function for deploying to AWS Lambda, and possibly other -cloud platforms. +Python command-line (CLI) tool to package a Python function for deploying to AWS Lambda, and possibly other cloud platforms. -This tool builds a ZIP file from a virtual environment with all depedencies installed that are to be included in the final deployment asset. If the content is larger than AWS Lambda's maximum unzipped package size of 250 MiB, -then this tool will employ the ZIP-inside-ZIP (nested-ZIP) workaround. This allows deploying Lambdas with large -dependency packages, especially those with native code compiled extensions like Pandas, PyArrow, etc. +This tool builds a ZIP file from a virtual environment with all dependencies installed that are to be included in the final deployment asset. If the content is larger than AWS Lambda's maximum unzipped package size of 250 MiB, This tool will then employ the ZIP-inside-ZIP (nested-ZIP) workaround. This allows deploying Lambdas with large dependency packages, especially those with native code compiled extensions like Pandas, PyArrow, etc. -This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless). The technique has been improved here to not require any special imports in your entrypoint source file. That is, no changes are needed to your source code to leverage the nested ZIP deployment. +This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless). The technique has been improved here to not require any special imports in your entrypoint source file. That is, no changes are needed to your source code to leverage the nested ZIP deployment. -The motivation for this Python tool is to achieve the same results as serverless-python-requirements but with a -purely Python tool. This can simplify and speed up developer and CI/CD workflows. +The motivation for this Python tool is to achieve the same results as [serverless-python-requirements](https://www.serverless.com/plugins/serverless-python-requirements) but with a purely Python tool. This can simplify and speed up developer and CI/CD workflows. -One important thing that this tool does not do is build the target virtual environment and install all of the -dependencies. You must first generate that with a tool like [Poetry](https://github.com/python-poetry/poetry) and the [poetry-plugin-bundle](https://github.com/python-poetry/poetry-plugin-bundle). +One important thing that this tool does not do is build the target virtual environment and install all of the dependencies. You must first generate that with a tool like [Poetry](https://github.com/python-poetry/poetry) and the [poetry-plugin-bundle](https://github.com/python-poetry/poetry-plugin-bundle). ## Example command sequence -``` +```shell poetry bundle venv .build/.venv --without dev package-python-function .build/.venv --output-dir .build/lambda ``` -The output will be a .zip file with the same name as your project from your pyproject.toml file (with dashes replaced +The output will be a .zip file with the same name as your project from your `pyproject.toml` file (with dashes replaced with underscores). ## Installation Use [pipx](https://github.com/pypa/pipx) to install: -``` +```shell pipx install package-python-function ``` ## Usage / Arguments -`package-python-function venv_dir [--project PROJECT] [--output-dir OUTPUT_DIR] [--output OUTPUT]` - -- `venv_dir` [Required]: The path to the virtual environment to package. +```shell +package-python-function venv_dir [--project PROJECT] [--output-dir OUTPUT_DIR] [--output OUTPUT] +``` -- `--project` [Optional]: Path to the pyproject.toml file. Omit to use the pyproject.toml file in the current working directory. +- `venv_dir` [Required]: The path to the virtual environment to package. +- `--project` [Optional]: Path to the `pyproject.toml` file. Omit to use the `pyproject.toml` file in the current working directory. One of the following must be specified: - `--output`: The full output path of the final zip file. - -- `--output-dir`: The output directory for the final zip file. The name of the zip file will be based on the project's -name in the pyproject.toml file (with dashes replaced with underscores). - - - +- `--output-dir`: The output directory for the final zip file. The name of the zip file will be based on the project's +name in the `pyproject.toml` file (with dashes replaced with underscores). From 076d0a9d1d62eccac47495376eceae8df848172f Mon Sep 17 00:00:00 2001 From: Alex Jackson Date: Fri, 2 May 2025 15:57:22 -0500 Subject: [PATCH 10/10] chore: update readme with zip info --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 594456c..4c759ba 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # package-python-function Python command-line (CLI) tool to package a Python function for deploying to AWS Lambda, and possibly other cloud platforms. -This tool builds a ZIP file from a virtual environment with all dependencies installed that are to be included in the final deployment asset. If the content is larger than AWS Lambda's maximum unzipped package size of 250 MiB, This tool will then employ the ZIP-inside-ZIP (nested-ZIP) workaround. This allows deploying Lambdas with large dependency packages, especially those with native code compiled extensions like Pandas, PyArrow, etc. +This tool builds a ZIP file from a virtual environment with all dependencies installed that are to be included in the final deployment asset. If the content is larger than AWS Lambda's maximum unzipped package size of 250 MiB, This tool will then employ the ZIP-inside-ZIP (nested-ZIP) workaround. This allows deploying Lambdas with large dependency packages, especially those with native code compiled extensions like Pandas, PyArrow, etc. The ZIP files are generated [reproducibly](#a-note-on-reproducability), ensuring that the same source will always generate a ZIP file with the same hash. This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless). The technique has been improved here to not require any special imports in your entrypoint source file. That is, no changes are needed to your source code to leverage the nested ZIP deployment. @@ -37,3 +37,11 @@ One of the following must be specified: - `--output`: The full output path of the final zip file. - `--output-dir`: The output directory for the final zip file. The name of the zip file will be based on the project's name in the `pyproject.toml` file (with dashes replaced with underscores). + +## A Note on Reproducibility + +The ZIP files generated adhere with [reproducible builds](https://reproducible-builds.org/docs/archives/). This means that file permissions and timestamps are modified inside the ZIP, such that the ZIP will have a deterministic hash. By default, the date is set to `1980-01-01`. + +Additionally, the tool respects the standardized `$SOURCE_DATE_EPOCH` [environment variable](https://reproducible-builds.org/docs/source-date-epoch/), which will allow you to set that date as needed. + +One important caveat is that ZIP files do not support files with timestamps earlier than `1980-01-01` inside them, due to MS-DOS compatibility. Therefore, the tool will throw a `SourceDateEpochError` is `$SOURCE_DATE_EPOCH` is below `315532800`.