diff --git a/package_python_function/packager.py b/package_python_function/packager.py index 77ea856..a3e8ea7 100644 --- a/package_python_function/packager.py +++ b/package_python_function/packager.py @@ -46,7 +46,8 @@ def zip_all_dependencies(self, target_path: Path) -> None: with ZipFile(target_path, "w", ZIP_DEFLATED) as zip_file: def zip_dir(path: Path) -> None: - for item in path.iterdir(): + # use sorted to make sure files are always written in a deterministic order + for item in sorted(path.iterdir(), key=lambda i: i.name): if item.is_dir(): if item.name not in self.DIRS_TO_EXCLUDE: zip_dir(item) diff --git a/tests/test_package_python_function.py b/tests/test_package_python_function.py index 1714ecf..8965608 100644 --- a/tests/test_package_python_function.py +++ b/tests/test_package_python_function.py @@ -72,6 +72,45 @@ def test_package_python_function( else: assert (verify_dir / file.path).exists() +def test_python_package_deterministic_file_ordering( + test_data: Data, + tmp_path: Path, + monkeypatch: MonkeyPatch, +): + zip_contents: dict[str, list[str]] = {} + + original_iterdir = Path.iterdir + + # Simulate adding the files in a random order + def shuffled_iterdir(path: Path): + import random + + results = list(original_iterdir(path)) + random.shuffle(results) + return iter(results) + + monkeypatch.setattr(Path, "iterdir", shuffled_iterdir) + + for id in ["a", "b"]: + output = tmp_path / f"output_{id}" + output.mkdir() + sys.argv = [ + "test_package_python_function", + str(test_data.venv_dir), + "--project", + str(test_data.pyproject.path), + "--output-dir", + str(output), + ] + main() + zip_file = output / f"{test_data.pyproject.name.replace('-', '_')}.zip" + with zipfile.ZipFile(zip_file, "r") as zip_file: + zip_contents[id] = [zi.filename for zi in zip_file.infolist()] + + assert zip_contents["a"] == zip_contents["b"], ( + "File ordering in the zip file is not deterministic when the input file order is shuffled." + ) + @pytest.mark.parametrize( "src_epoch, expected_exception, expected_date_time", [