From 0a47d3e066594309e583875e65f2548814dedfac Mon Sep 17 00:00:00 2001 From: BrandonLWhite Date: Mon, 24 Mar 2025 19:02:14 -0500 Subject: [PATCH 1/2] Improve logging --- README.md | 2 +- package_python_function/main.py | 4 ++++ package_python_function/packager.py | 24 +++++++++++++----------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 209e699..5cecc86 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This tool builds a ZIP file from a virtual environment with all depedencies inst then this tool will employ the ZIP-inside-ZIP (nested-ZIP) workaround. This allows deploying Lambdas with large dependency packages, especially those with native code compiled extensions like Pandas, PyArrow, etc. -This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless). This technique has been improved here to not require any special imports in your entrypoint source file. That is, no changes are needed to your source code to leverage the nested ZIP deployment. +This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless). The technique has been improved here to not require any special imports in your entrypoint source file. That is, no changes are needed to your source code to leverage the nested ZIP deployment. The motivation for this Python tool is to achieve the same results as serverless-python-requirements but with a purely Python tool. This can simplify and speed up developer and CI/CD workflows. diff --git a/package_python_function/main.py b/package_python_function/main.py index e8f418f..da26892 100644 --- a/package_python_function/main.py +++ b/package_python_function/main.py @@ -1,10 +1,14 @@ import argparse from pathlib import Path +import logging +import sys from .packager import Packager def main() -> None: + logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s") + args = parse_args() project_path = Path(args.project).resolve() venv_path = Path(args.venv_dir).resolve() diff --git a/package_python_function/packager.py b/package_python_function/packager.py index 3d97893..a5b2feb 100644 --- a/package_python_function/packager.py +++ b/package_python_function/packager.py @@ -2,10 +2,14 @@ from tempfile import NamedTemporaryFile import zipfile import shutil +import logging from .python_project import PythonProject +logger = logging.getLogger(__name__) + + class Packager: AWS_LAMBDA_MAX_UNZIP_SIZE = 262144000 @@ -26,19 +30,15 @@ def input_path(self) -> Path: return python_paths[0] / 'site-packages' def package(self) -> None: - # TODO: Improve logging. - print("Packaging:", self.project.path) - print("Output:", self.output_file) - print("Input:", self.input_path) - print("Entrypoint Package name:", self.project.entrypoint_package_name) + logger.info(f"Packaging: '{self.input_path}' to '{self.output_file}' using '{self.project.path}'... ") self.output_dir.mkdir(parents=True, exist_ok=True) - with NamedTemporaryFile() as dependencies_zip: + with NamedTemporaryFile(suffix=".zip") as dependencies_zip: self.zip_all_dependencies(Path(dependencies_zip.name)) def zip_all_dependencies(self, target_path: Path) -> None: - print(f"Zipping to {target_path} ...") + logger.info(f"Zipping to {target_path}...") with zipfile.ZipFile(target_path, 'w', zipfile.ZIP_DEFLATED) as zip_file: def zip_dir(path: Path) -> None: @@ -53,20 +53,22 @@ def zip_dir(path: Path) -> None: compressed_bytes = target_path.stat().st_size - print(f"Uncompressed size: {self._uncompressed_bytes:,} bytes") - print(f"Compressed size: {compressed_bytes:,} bytes") + logger.info(f"Uncompressed size: {self._uncompressed_bytes:,} bytes. Compressed size: {compressed_bytes:,} bytes.") if self._uncompressed_bytes > self.AWS_LAMBDA_MAX_UNZIP_SIZE: - print(f"The uncompressed size of the ZIP file is greater than the AWS Lambda limit of {self.AWS_LAMBDA_MAX_UNZIP_SIZE:,} bytes.") + logger.info(f"The uncompressed size of the ZIP file is greater than the AWS Lambda limit of {self.AWS_LAMBDA_MAX_UNZIP_SIZE:,} bytes.") if(compressed_bytes < self.AWS_LAMBDA_MAX_UNZIP_SIZE): - print(f"The compressed size ({compressed_bytes:,}) is less than the AWS limit, so the nested-zip strategy will be used.") + logger.info(f"The compressed size ({compressed_bytes:,}) is less than the AWS limit, so the nested-zip strategy will be used.") self.generate_nested_zip(target_path) else: print(f"TODO Error. The unzipped size it too large for AWS Lambda.") else: + logger.info(f"Copying '{target_path}' to '{self.output_file}'") shutil.copy(str(target_path), str(self.output_file)) def generate_nested_zip(self, inner_zip_path: Path) -> None: + logger.info(f"Generating nested-zip and __init__.py loader using entrypoint package '{self.project.entrypoint_package_name}'...") + with zipfile.ZipFile(self.output_file, 'w') as outer_zip_file: entrypoint_dir = Path(self.project.entrypoint_package_name) outer_zip_file.write( From 80ed5c55026ff2cf4b1d0c6b351d2a6f936580e6 Mon Sep 17 00:00:00 2001 From: BrandonLWhite Date: Tue, 25 Mar 2025 16:34:30 -0500 Subject: [PATCH 2/2] Add POC script --- scripts/poc/.gitignore | 1 + scripts/poc/README.md | 1 + .../inner_package/other_package/__init__.py | 0 .../other_package/other_package_module.py | 2 ++ .../inner_package/zip_in_zip_test/__init__.py | 7 ++++ .../poc/inner_package/zip_in_zip_test/main.py | 10 ++++++ .../zip_in_zip_test/other_module.py | 2 ++ scripts/poc/lambda-runner.py | 24 +++++++++++++ scripts/poc/poetry.lock | 7 ++++ scripts/poc/pyproject.toml | 14 ++++++++ scripts/poc/zip_in_zip_test/__init__.py | 35 +++++++++++++++++++ 11 files changed, 103 insertions(+) create mode 100644 scripts/poc/.gitignore create mode 100644 scripts/poc/README.md create mode 100644 scripts/poc/inner_package/other_package/__init__.py create mode 100644 scripts/poc/inner_package/other_package/other_package_module.py create mode 100644 scripts/poc/inner_package/zip_in_zip_test/__init__.py create mode 100644 scripts/poc/inner_package/zip_in_zip_test/main.py create mode 100644 scripts/poc/inner_package/zip_in_zip_test/other_module.py create mode 100644 scripts/poc/lambda-runner.py create mode 100644 scripts/poc/poetry.lock create mode 100644 scripts/poc/pyproject.toml create mode 100644 scripts/poc/zip_in_zip_test/__init__.py diff --git a/scripts/poc/.gitignore b/scripts/poc/.gitignore new file mode 100644 index 0000000..835fd9e --- /dev/null +++ b/scripts/poc/.gitignore @@ -0,0 +1 @@ +.test \ No newline at end of file diff --git a/scripts/poc/README.md b/scripts/poc/README.md new file mode 100644 index 0000000..9bfbff1 --- /dev/null +++ b/scripts/poc/README.md @@ -0,0 +1 @@ +This is the original proof-of-concept script used to work out the nested-ZIP automatic extraction during Lambda INIT \ No newline at end of file diff --git a/scripts/poc/inner_package/other_package/__init__.py b/scripts/poc/inner_package/other_package/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/poc/inner_package/other_package/other_package_module.py b/scripts/poc/inner_package/other_package/other_package_module.py new file mode 100644 index 0000000..76ca296 --- /dev/null +++ b/scripts/poc/inner_package/other_package/other_package_module.py @@ -0,0 +1,2 @@ +def other_package_module(): + print("other_package_module") \ No newline at end of file diff --git a/scripts/poc/inner_package/zip_in_zip_test/__init__.py b/scripts/poc/inner_package/zip_in_zip_test/__init__.py new file mode 100644 index 0000000..03aa4d9 --- /dev/null +++ b/scripts/poc/inner_package/zip_in_zip_test/__init__.py @@ -0,0 +1,7 @@ +# This file represents the original module's __init__.py file that gets renamed when creating the innner ZIP. + +print("__init__ original") + +GLOBAL_VALUE_IN_INIT_ORIGINAL = "This global is defined in the original __init__.py" + +from .other_module import other_module_function \ No newline at end of file diff --git a/scripts/poc/inner_package/zip_in_zip_test/main.py b/scripts/poc/inner_package/zip_in_zip_test/main.py new file mode 100644 index 0000000..5192f03 --- /dev/null +++ b/scripts/poc/inner_package/zip_in_zip_test/main.py @@ -0,0 +1,10 @@ +print("main.py: Load") + +from zip_in_zip_test import GLOBAL_VALUE_IN_INIT_ORIGINAL, other_module_function +from other_package.other_package_module import other_package_module + +def main(): + print("Hello from main!") + print(GLOBAL_VALUE_IN_INIT_ORIGINAL) + other_module_function() + other_package_module() \ No newline at end of file diff --git a/scripts/poc/inner_package/zip_in_zip_test/other_module.py b/scripts/poc/inner_package/zip_in_zip_test/other_module.py new file mode 100644 index 0000000..51e1982 --- /dev/null +++ b/scripts/poc/inner_package/zip_in_zip_test/other_module.py @@ -0,0 +1,2 @@ +def other_module_function(): + print("I'm in other_module_function") \ No newline at end of file diff --git a/scripts/poc/lambda-runner.py b/scripts/poc/lambda-runner.py new file mode 100644 index 0000000..836991d --- /dev/null +++ b/scripts/poc/lambda-runner.py @@ -0,0 +1,24 @@ +# This is my best attempt at simulating what AWS Lambda does +# Instead of messing with zipping and unzipping in this experiment, I just copy the files to the .test directory. + +from pathlib import Path +import shutil +import sys + +print('[lambda-runner]') +print('sys.path:', sys.path) + +module_path = Path(__file__).parent +TEST_DIR = module_path / ".test" +PACKAGE_NAME = "zip_in_zip_test" +TEST_PACKAGE_DIR = TEST_DIR / PACKAGE_NAME + +shutil.rmtree(TEST_DIR, ignore_errors=True) +shutil.copytree(str(module_path / PACKAGE_NAME), str(TEST_PACKAGE_DIR)) +shutil.copytree(str(module_path / "inner_package"), str(TEST_PACKAGE_DIR / ".inner_package")) + +sys.path.insert(0, str(TEST_DIR)) + +import importlib +module = importlib.import_module('zip_in_zip_test.main') +module.__dict__['main']() diff --git a/scripts/poc/poetry.lock b/scripts/poc/poetry.lock new file mode 100644 index 0000000..29d47d2 --- /dev/null +++ b/scripts/poc/poetry.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +package = [] + +[metadata] +lock-version = "2.0" +python-versions = "^3.13" +content-hash = "f01b553f3895e558c34b4f10542e05acdef39bf0527c8090bd136d914dc73f94" diff --git a/scripts/poc/pyproject.toml b/scripts/poc/pyproject.toml new file mode 100644 index 0000000..755719c --- /dev/null +++ b/scripts/poc/pyproject.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "zip-in-zip-test" +version = "0.1.0" +description = "" +authors = ["BrandonLWhite "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.13" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/scripts/poc/zip_in_zip_test/__init__.py b/scripts/poc/zip_in_zip_test/__init__.py new file mode 100644 index 0000000..7f0186d --- /dev/null +++ b/scripts/poc/zip_in_zip_test/__init__.py @@ -0,0 +1,35 @@ +# This works perfectly! + +print('zip_in_zip_test.__init__: BEGIN. This is the loader.') +print("module_path:", __file__) + +from pathlib import Path +import importlib +import sys + +module_path = Path(__file__).parent + +# This works if I insert at zero. +# Why does the serverless-python-requirements insist on inserting at 1? +# From https://docs.aws.amazon.com/lambda/latest/dg/python-package.html#python-package-searchpath: +# "By default, the first location the runtime searches is the directory into which your .zip deployment package is decompressed and mounted (/var/task)"" +# sys.path.insert(0, str(module_path / ".inner_package")) + +# This also works. I am thinking this is the best way, because we need to unmount the original decompressed directory +# since it contains the load __init__.py. +sys.path[0] = str(module_path / ".inner_package") + + +# The following two approaches works too, and are safe. +# From https://docs.python.org/3/reference/import.html +# "The module will exist in sys.modules before the loader executes the module code. This is crucial because the module +# code may (directly or indirectly) import itself" + +# This works too. +# del sys.modules[__name__] +# importlib.import_module(__name__) + +# This also works. I think this is the best way. +importlib.reload(sys.modules[__name__]) + +print('zip_in_zip_test.__init__: END') \ No newline at end of file