diff --git a/README.md b/README.md index 5f49933..4c759ba 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,47 @@ # package-python-function -Python command-line (CLI) tool to package a Python function for deploying to AWS Lambda, and possibly other -cloud platforms. +Python command-line (CLI) tool to package a Python function for deploying to AWS Lambda, and possibly other cloud platforms. -This tool builds a ZIP file from a virtual environment with all depedencies installed that are to be included in the final deployment asset. If the content is larger than AWS Lambda's maximum unzipped package size of 250 MiB, -then this tool will employ the ZIP-inside-ZIP (nested-ZIP) workaround. This allows deploying Lambdas with large -dependency packages, especially those with native code compiled extensions like Pandas, PyArrow, etc. +This tool builds a ZIP file from a virtual environment with all dependencies installed that are to be included in the final deployment asset. If the content is larger than AWS Lambda's maximum unzipped package size of 250 MiB, This tool will then employ the ZIP-inside-ZIP (nested-ZIP) workaround. This allows deploying Lambdas with large dependency packages, especially those with native code compiled extensions like Pandas, PyArrow, etc. The ZIP files are generated [reproducibly](#a-note-on-reproducability), ensuring that the same source will always generate a ZIP file with the same hash. -This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless). The technique has been improved here to not require any special imports in your entrypoint source file. That is, no changes are needed to your source code to leverage the nested ZIP deployment. +This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless). The technique has been improved here to not require any special imports in your entrypoint source file. That is, no changes are needed to your source code to leverage the nested ZIP deployment. -The motivation for this Python tool is to achieve the same results as serverless-python-requirements but with a -purely Python tool. This can simplify and speed up developer and CI/CD workflows. +The motivation for this Python tool is to achieve the same results as [serverless-python-requirements](https://www.serverless.com/plugins/serverless-python-requirements) but with a purely Python tool. This can simplify and speed up developer and CI/CD workflows. -One important thing that this tool does not do is build the target virtual environment and install all of the -dependencies. You must first generate that with a tool like [Poetry](https://github.com/python-poetry/poetry) and the [poetry-plugin-bundle](https://github.com/python-poetry/poetry-plugin-bundle). +One important thing that this tool does not do is build the target virtual environment and install all of the dependencies. You must first generate that with a tool like [Poetry](https://github.com/python-poetry/poetry) and the [poetry-plugin-bundle](https://github.com/python-poetry/poetry-plugin-bundle). ## Example command sequence -``` +```shell poetry bundle venv .build/.venv --without dev package-python-function .build/.venv --output-dir .build/lambda ``` -The output will be a .zip file with the same name as your project from your pyproject.toml file (with dashes replaced +The output will be a .zip file with the same name as your project from your `pyproject.toml` file (with dashes replaced with underscores). ## Installation Use [pipx](https://github.com/pypa/pipx) to install: -``` +```shell pipx install package-python-function ``` ## Usage / Arguments -`package-python-function venv_dir [--project PROJECT] [--output-dir OUTPUT_DIR] [--output OUTPUT]` - -- `venv_dir` [Required]: The path to the virtual environment to package. +```shell +package-python-function venv_dir [--project PROJECT] [--output-dir OUTPUT_DIR] [--output OUTPUT] +``` -- `--project` [Optional]: Path to the pyproject.toml file. Omit to use the pyproject.toml file in the current working directory. +- `venv_dir` [Required]: The path to the virtual environment to package. +- `--project` [Optional]: Path to the `pyproject.toml` file. Omit to use the `pyproject.toml` file in the current working directory. One of the following must be specified: - `--output`: The full output path of the final zip file. +- `--output-dir`: The output directory for the final zip file. The name of the zip file will be based on the project's +name in the `pyproject.toml` file (with dashes replaced with underscores). -- `--output-dir`: The output directory for the final zip file. The name of the zip file will be based on the project's -name in the pyproject.toml file (with dashes replaced with underscores). +## A Note on Reproducibility +The ZIP files generated adhere with [reproducible builds](https://reproducible-builds.org/docs/archives/). This means that file permissions and timestamps are modified inside the ZIP, such that the ZIP will have a deterministic hash. By default, the date is set to `1980-01-01`. +Additionally, the tool respects the standardized `$SOURCE_DATE_EPOCH` [environment variable](https://reproducible-builds.org/docs/source-date-epoch/), which will allow you to set that date as needed. +One important caveat is that ZIP files do not support files with timestamps earlier than `1980-01-01` inside them, due to MS-DOS compatibility. Therefore, the tool will throw a `SourceDateEpochError` is `$SOURCE_DATE_EPOCH` is below `315532800`. diff --git a/package_python_function/packager.py b/package_python_function/packager.py index 8303370..0805b1f 100644 --- a/package_python_function/packager.py +++ b/package_python_function/packager.py @@ -1,23 +1,18 @@ from __future__ import annotations import logging -import os import shutil -import time -import zipfile from pathlib import Path from tempfile import NamedTemporaryFile -from typing import TYPE_CHECKING +from zipfile import ZIP_DEFLATED, ZIP_STORED from .python_project import PythonProject - -if TYPE_CHECKING: - from typing import Tuple +from .reproducible_zipfile import ZipFile logger = logging.getLogger(__name__) class Packager: - AWS_LAMBDA_MAX_UNZIP_SIZE = 262144000 + AWS_LAMBDA_MAX_UNZIP_SIZE = 262_144_000 def __init__(self, venv_path: Path, project_path: Path, output_dir: Path, output_file: Path | None): self.project = PythonProject(project_path) @@ -46,35 +41,14 @@ def package(self) -> None: def zip_all_dependencies(self, target_path: Path) -> None: logger.info(f"Zipping to {target_path}...") - def date_time() -> Tuple[int, int, int, int, int, int]: - """Returns date_time value used to force overwrite on all ZipInfo objects. Defaults to - 1980-01-01 00:00:00. You can set this with the environment variable SOURCE_DATE_EPOCH as an - integer value representing seconds since Epoch. - """ - source_date_epoch = os.environ.get("SOURCE_DATE_EPOCH", None) - if source_date_epoch is not None: - return time.gmtime(int(source_date_epoch))[:6] - return (1980, 1, 1, 0, 0, 0) - - with zipfile.ZipFile(target_path, "w", zipfile.ZIP_DEFLATED) as zip_file: - + with ZipFile(target_path, "w", ZIP_DEFLATED) as zip_file: def zip_dir(path: Path) -> None: for item in path.iterdir(): if item.is_dir(): zip_dir(item) else: - zinfo = zipfile.ZipInfo.from_file( - item, item.relative_to(self.input_path) - ) - zinfo.date_time = date_time() - zinfo.external_attr = 0o644 << 16 - zinfo.compress_type = zipfile.ZIP_DEFLATED self._uncompressed_bytes += item.stat().st_size - with ( - open(item, "rb") as src, - zip_file.open(zinfo, "w") as dest, - ): - shutil.copyfileobj(src, dest, 1024 * 8) + zip_file.write_reproducibly(item, item.relative_to(self.input_path)) zip_dir(self.input_path) @@ -96,15 +70,15 @@ def zip_dir(path: Path) -> None: def generate_nested_zip(self, inner_zip_path: Path) -> None: logger.info(f"Generating nested-zip and __init__.py loader using entrypoint package '{self.project.entrypoint_package_name}'...") - with zipfile.ZipFile(self.output_file, 'w') as outer_zip_file: + with ZipFile(self.output_file, 'w') as outer_zip_file: entrypoint_dir = Path(self.project.entrypoint_package_name) - outer_zip_file.write( + outer_zip_file.write_reproducibly( inner_zip_path, arcname=str(entrypoint_dir / ".dependencies.zip"), - compresslevel=zipfile.ZIP_STORED + compresslevel=ZIP_STORED ) - outer_zip_file.writestr( + outer_zip_file.writestr_reproducibly( str(entrypoint_dir / "__init__.py"), Path(__file__).parent.joinpath("nested_zip_loader.py").read_text(), - compresslevel=zipfile.ZIP_DEFLATED + compresslevel=ZIP_DEFLATED ) diff --git a/package_python_function/reproducible_zipfile.py b/package_python_function/reproducible_zipfile.py new file mode 100644 index 0000000..9336457 --- /dev/null +++ b/package_python_function/reproducible_zipfile.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +import os +import shutil +import time +import zipfile +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from os import PathLike + from pathlib import Path + from typing import Optional, Tuple, Union + +DEFAULT_DATE_TIME = (1980, 1, 1, 0, 0, 0) +DEFAULT_DIR_MODE = 0o755 +DEFAULT_FILE_MODE = 0o644 + +class SourceDateEpochError(Exception): + """Raise when there are issues with $SOURCE_DATE_EPOCH""" + +def date_time() -> Tuple[int, int, int, int, int, int]: + """Returns date_time value used to force overwrite on all ZipInfo objects. Defaults to + 1980-01-01 00:00:00. You can set this with the environment variable SOURCE_DATE_EPOCH as an + integer value representing seconds since Epoch. + """ + source_date_epoch = os.environ.get("SOURCE_DATE_EPOCH", None) + if source_date_epoch is not None: + dt = time.gmtime(int(source_date_epoch))[:6] + if dt[0] < 1980: + raise SourceDateEpochError( + "$SOURCE_DATE_EPOCH must be >= 315532800, since ZIP files need MS-DOS date/time format, which can be 1/1/1980, at minimum." + ) + return dt + return DEFAULT_DATE_TIME + +class ZipFile(zipfile.ZipFile): + def write_reproducibly( + self, + filename: PathLike, + arcname: Optional[Union[Path, str]] = None, + compress_type: Optional[int] = None, + compresslevel: Optional[int] = None, + ): + if not self.fp: + raise ValueError("Attempt to write to ZIP archive that was already closed") + if self._writing: + raise ValueError("Can't write to ZIP archive while an open writing handle exists") + + zinfo = zipfile.ZipInfo.from_file(filename, arcname, strict_timestamps=self._strict_timestamps) + zinfo.date_time = date_time() + if zinfo.is_dir(): + zinfo.external_attr = (0o40000 | DEFAULT_DIR_MODE) << 16 + zinfo.external_attr |= 0x10 # MS-DOS directory flag + else: + zinfo.external_attr = DEFAULT_FILE_MODE << 16 + + if zinfo.is_dir(): + zinfo.compress_size = 0 + zinfo.CRC = 0 + self.mkdir(zinfo) + else: + if compress_type is not None: + zinfo.compress_type = compress_type + else: + zinfo.compress_type = self.compression + + if compresslevel is not None: + zinfo._compresslevel = compresslevel + else: + zinfo._compresslevel = self.compresslevel + + with open(filename, "rb") as src, self.open(zinfo, "w") as dest: + shutil.copyfileobj(src, dest, 1024 * 8) + + def writestr_reproducibly( + self, + zinfo_or_arcname: Union[str, zipfile.ZipInfo], + data: Union[str, bytes], + compress_type: Optional[int] = None, + compresslevel: Optional[int] = None, + ): + if isinstance(data, str): + data = data.encode("utf-8") + + if not isinstance(zinfo_or_arcname, zipfile.ZipInfo): + zinfo = zipfile.ZipInfo(filename=zinfo_or_arcname, date_time=date_time()) + zinfo.compress_type = self.compression + zinfo._compresslevel = self.compresslevel + if zinfo.is_dir(): + zinfo.external_attr = (0o40000 | DEFAULT_DIR_MODE) << 16 + zinfo.external_attr |= 0x10 # MS-DOS directory flag + else: + zinfo.external_attr = DEFAULT_FILE_MODE << 16 + else: + zinfo = zinfo_or_arcname + + zinfo.file_size = len(data) + if compress_type is not None: + zinfo.compress_type = compress_type + + if compresslevel is not None: + zinfo._compresslevel = compresslevel + + if not self.fp: + raise ValueError("Attempt to write to ZIP archive that was already closed") + if self._writing: + raise ValueError("Can't write to ZIP archive while an open writing handle exists.") + + with self._lock: + with self.open(zinfo, mode="w") as dest: + dest.write(data) diff --git a/poetry.lock b/poetry.lock index fe74c1c..a9b0789 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. [[package]] name = "colorama" @@ -99,7 +99,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -108,6 +108,21 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "execnet" +version = "2.1.1" +description = "execnet: rapid multi-Python deployment" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"}, + {file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"}, +] + +[package.extras] +testing = ["hatch", "pre-commit", "pytest", "tox"] + [[package]] name = "iniconfig" version = "2.1.0" @@ -272,6 +287,62 @@ files = [ mypy = ">=1.8" pytest = ">=8.0" +[[package]] +name = "pytest-sugar" +version = "1.0.0" +description = "pytest-sugar is a plugin for pytest that changes the default look and feel of pytest (e.g. progressbar, show tests that fail instantly)." +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "pytest-sugar-1.0.0.tar.gz", hash = "sha256:6422e83258f5b0c04ce7c632176c7732cab5fdb909cb39cca5c9139f81276c0a"}, + {file = "pytest_sugar-1.0.0-py3-none-any.whl", hash = "sha256:70ebcd8fc5795dc457ff8b69d266a4e2e8a74ae0c3edc749381c64b5246c8dfd"}, +] + +[package.dependencies] +packaging = ">=21.3" +pytest = ">=6.2.0" +termcolor = ">=2.1.0" + +[package.extras] +dev = ["black", "flake8", "pre-commit"] + +[[package]] +name = "pytest-xdist" +version = "3.6.1" +description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"}, + {file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"}, +] + +[package.dependencies] +execnet = ">=2.1" +pytest = ">=7.0.0" + +[package.extras] +psutil = ["psutil (>=3.0)"] +setproctitle = ["setproctitle"] +testing = ["filelock"] + +[[package]] +name = "termcolor" +version = "3.1.0" +description = "ANSI color formatting for output in terminal" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "termcolor-3.1.0-py3-none-any.whl", hash = "sha256:591dd26b5c2ce03b9e43f391264626557873ce1d379019786f99b0c2bee140aa"}, + {file = "termcolor-3.1.0.tar.gz", hash = "sha256:6a6dd7fbee581909eeec6a756cff1d7f7c376063b14e4a298dc4980309e55970"}, +] + +[package.extras] +tests = ["pytest", "pytest-cov"] + [[package]] name = "tomli" version = "2.2.1" @@ -315,6 +386,18 @@ files = [ {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] +[[package]] +name = "tomli-w" +version = "1.2.0" +description = "A lil' TOML writer" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90"}, + {file = "tomli_w-1.2.0.tar.gz", hash = "sha256:2dd14fac5a47c27be9cd4c976af5a12d87fb1f0b4512f81d69cce3b35ae25021"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -330,4 +413,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "72950f2ff91db20c10d591821bbebe74a8c3035f8aa9d13280f07f1209313be4" +content-hash = "02b04e46ab301b2c6b21690582abeee2b47a28353a95516e3bb3e5fc423f6316" diff --git a/pyproject.toml b/pyproject.toml index 25d57ea..61a800b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,14 +18,27 @@ pytest = "^8.2.0" pytest-mypy-runner = "^1.0.0" mypy = "^1.10.0" pytest-cov = "^6.0.0" +tomli-w = "^1.2.0" +pytest-xdist = "^3.6.1" +pytest-sugar = "^1.0.0" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" +[tool.pytest.ini_options] +# Global pytest options, applied locally and in CI +addopts = """ +-n auto \ +--cov . \ +--ignore=fixtures \ +--ignore=expected-results \ +--capture=tee-sys +""" + [tool.pipx-install] poetry = "==2.1.1" poethepoet = "==0.33.1" [tool.poe.tasks] -test = "pytest --cov=pytest_mypy_runner --cov-report term --cov-report html --cov-report xml" \ No newline at end of file +test = "pytest --cov=pytest_mypy_runner --cov-report term --cov-report html --cov-report xml -n auto" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..9ce2653 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,110 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Self +from zipfile import ZipInfo + +import pytest +import tomli_w + +from package_python_function.packager import Packager +from package_python_function.python_project import PythonProject +from package_python_function.reproducible_zipfile import ( + DEFAULT_DATE_TIME, + DEFAULT_FILE_MODE, +) + +@dataclass +class File: + path: Path + contents: str + + @classmethod + def new(cls, path: str, contents: str = "") -> Self: + return cls(path=Path(path), contents=contents) + +@dataclass +class Data: + project_files: list[File] # relative to packages_dir + pyproject: PythonProject + python_version: str + venv_dir: Path + + @classmethod + def new( + cls, + project_name: str, + project_files: list[File], + python_version: str = "3.13", + ) -> Self: + pyproject = _new_python_project(name=project_name) + return cls( + project_files=project_files, + pyproject=pyproject, + python_version=python_version, + venv_dir=Path(), + ) + + def commit(self, loc: Path) -> Self: + venv_dir = loc / "venv" + packages_dir = venv_dir / "lib" / f"python{self.python_version}" / "site-packages" + packages_dir.mkdir(parents=True, exist_ok=True) + + pyproj_path = loc / self.pyproject.path + pyproj_path.parent.mkdir(parents=True, exist_ok=True) + pyproject_toml = tomli_w.dumps(self.pyproject.toml) + pyproj_path.write_text(pyproject_toml) + + for file in self.project_files: + path = packages_dir / file.path + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(file.contents) + + # resolve paths fully + self.venv_dir = venv_dir + self.pyproject.path = pyproj_path + + return self + +def _new_python_project(name: str) -> PythonProject: + pyproj = PythonProject.__new__(PythonProject) + pyproj.path = Path(name) / "pyproject.toml" + pyproj.toml = { + "project": {"name": name}, + "tool": {"poetry": {"name": name}}, + } + return pyproj + +def verify_file_reproducibility(file_info: list[ZipInfo], expected_file_date_time=None, expected_file_mode=None): + if expected_file_date_time is None: + expected_file_date_time = DEFAULT_DATE_TIME + if expected_file_mode is None: + expected_file_mode = DEFAULT_FILE_MODE + + for info in file_info: + mode = (info.external_attr >> 16) & 0xFFFF + assert mode == expected_file_mode + assert info.date_time == expected_file_date_time + +@pytest.fixture +def test_data(tmp_path: Path): + files = [ + File.new("project_1/__init__.py"), + File.new("project_1/project1.py"), + File.new("small_dependency/__init__.py"), + File.new("small_dependency/small_dependency.py", "# This is a small dependency"), + ] + data = Data.new(project_name="project-1", project_files=files).commit(loc=tmp_path) + yield data + +@pytest.fixture +def test_data_nested(tmp_path: Path): + files = [ + File.new("project_1/__init__.py"), + File.new("project_1/project1.py"), + File.new("small_dependency/__init__.py"), + File.new("small_dependency/small_dependency.py", "# This is a small dependency"), + File.new("gigantic_dependency/__init__.py"), + File.new("gigantic_dependency/gigantic.py", "a" * Packager.AWS_LAMBDA_MAX_UNZIP_SIZE), + ] + data = Data.new(project_name="project-1", project_files=files).commit(loc=tmp_path) + yield data diff --git a/tests/test_package_python_function.py b/tests/test_package_python_function.py index 32a30a5..1ca8fd7 100644 --- a/tests/test_package_python_function.py +++ b/tests/test_package_python_function.py @@ -2,54 +2,134 @@ import zipfile from pathlib import Path +import pytest +from _pytest.monkeypatch import MonkeyPatch + from package_python_function.main import main +from package_python_function.reproducible_zipfile import ( + DEFAULT_DATE_TIME, + SourceDateEpochError, +) + +from .conftest import Data, verify_file_reproducibility -PROJECTS_DIR_PATH = Path(__file__).parent / 'projects' +@pytest.mark.parametrize( + "src_epoch, expected_exception, expected_date_time", + [ + (None, None, DEFAULT_DATE_TIME), + ("666666666", None, (1991, 2, 16, 1, 11, 6)), + ("420", SourceDateEpochError, None), + ], + ids=[ + "happy_path", + "valid_epoch_sets_expected_date_time", + "too_low_epoch_raises_error", + ], +) +def test_package_python_function( + expected_date_time: tuple | None, + expected_exception: Exception | None, + monkeypatch: MonkeyPatch, + src_epoch: str | None, + test_data: Data, + tmp_path: Path, +) -> None: + if src_epoch is not None: + monkeypatch.setenv("SOURCE_DATE_EPOCH", src_epoch) + else: + monkeypatch.delenv("SOURCE_DATE_EPOCH", raising=False) + + output_dir_path = tmp_path / "output" + output_dir_path.mkdir() + + sys.argv = [ + "test_package_python_function", + str(test_data.venv_dir), + "--project", + str(test_data.pyproject.path), + "--output-dir", + str(output_dir_path), + ] -def test_package_python_function(tmp_path: Path) -> None: - EXPECTED_FILE_MODE = 0o644 - EXPECTED_FILE_DATE_TIME = (1980, 1, 1, 0, 0, 0) + if expected_exception is not None: + with pytest.raises(SourceDateEpochError): + main() + else: + main() - project_file_path = PROJECTS_DIR_PATH / 'project-1' / 'pyproject.toml' + zip_file = output_dir_path / f"{test_data.pyproject.name.replace('-', '_')}.zip" + assert zip_file.exists() - venv_dir_path = tmp_path / 'venv' - packages_dir = venv_dir_path / 'lib' / 'python3.11' / 'site-packages' - packages_dir.mkdir(parents=True) + verify_dir = tmp_path / "verify" + verify_dir.mkdir() + with zipfile.ZipFile(zip_file, "r") as zip: + zip.extractall(verify_dir) + verify_file_reproducibility(zip.infolist(), expected_file_date_time=expected_date_time) - primary_package_dir = packages_dir / 'project_1' - primary_package_dir.mkdir() - (primary_package_dir / '__init__.py').touch() - (primary_package_dir / 'project1.py').touch() + for file in test_data.project_files: + assert (verify_dir / file.path).exists() - small_dependency_dir = packages_dir / 'small_dependency' - small_dependency_dir.mkdir() - (small_dependency_dir / '__init__.py').touch() - (small_dependency_dir / 'small_dependency.py').write_text("# This is a small dependency") +@pytest.mark.parametrize( + "src_epoch, expected_exception, expected_date_time", + [ + (None, None, DEFAULT_DATE_TIME), + ("666666666", None, (1991, 2, 16, 1, 11, 6)), + ("420", SourceDateEpochError, None), + ], + ids=[ + "happy_path", + "valid_epoch_sets_expected_date_time", + "too_low_epoch_raises_error", + ], +) +def test_package_python_function_nested( + expected_date_time: tuple | None, + expected_exception: Exception | None, + monkeypatch: MonkeyPatch, + src_epoch: str | None, + test_data_nested: Data, + tmp_path: Path, +) -> None: + if src_epoch is not None: + monkeypatch.setenv("SOURCE_DATE_EPOCH", src_epoch) + else: + monkeypatch.delenv("SOURCE_DATE_EPOCH", raising=False) - output_dir_path = tmp_path / 'output' + output_dir_path = tmp_path / "output" output_dir_path.mkdir() sys.argv = [ "test_package_python_function", - str(venv_dir_path), - "--project", str(project_file_path), - "--output-dir", str(output_dir_path), + str(test_data_nested.venv_dir), + "--project", + str(test_data_nested.pyproject.path), + "--output-dir", + str(output_dir_path), ] - main() - - zip_file = output_dir_path / "project_1.zip" - assert zip_file.exists() - - verify_dir = tmp_path / "verify" - verify_dir.mkdir() - with zipfile.ZipFile(zip_file, "r") as zip: - zip.extractall(verify_dir) - for file_info in zip.infolist(): - mode = (file_info.external_attr >> 16) & 0xFFFF - assert mode == EXPECTED_FILE_MODE - assert file_info.date_time == EXPECTED_FILE_DATE_TIME - - assert (verify_dir / "project_1" / "__init__.py").exists() - assert (verify_dir / "project_1" / "project1.py").exists() - assert (verify_dir / "small_dependency" / "__init__.py").exists() - assert (verify_dir / "small_dependency" / "small_dependency.py").exists() + + if expected_exception is not None: + with pytest.raises(SourceDateEpochError): + main() + else: + main() + + verify_dir = tmp_path / "verify" + verify_dir.mkdir() + + project_name_snake = test_data_nested.pyproject.name.replace("-", "_") + ozip = output_dir_path / f"{project_name_snake}.zip" + assert ozip.exists() + + with zipfile.ZipFile(ozip, "r") as ozip: + ozip.extractall(verify_dir) + verify_file_reproducibility(ozip.infolist(), expected_file_date_time=expected_date_time) + + assert (verify_dir / project_name_snake / "__init__.py").exists() + inner_zip = verify_dir / project_name_snake / ".dependencies.zip" + assert inner_zip.exists() + + with zipfile.ZipFile(inner_zip, "r") as izip: + izip.extractall(verify_dir) + verify_file_reproducibility(izip.infolist(), expected_file_date_time=expected_date_time) + for file in test_data_nested.project_files: + assert (verify_dir / file.path).exists()