BrandonLWhite · BrandonLWhite · Mar 25, 2025 · Mar 25, 2025 · Mar 25, 2025
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ This tool builds a ZIP file from a virtual environment with all depedencies inst
 then this tool will employ the ZIP-inside-ZIP (nested-ZIP) workaround.  This allows deploying Lambdas with large
 dependency packages, especially those with native code compiled extensions like Pandas, PyArrow, etc.
 
-This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless).  This technique has been improved here to not require any special imports in your entrypoint source file.  That is, no changes are needed to your source code to leverage the nested ZIP deployment.
+This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless).  The technique has been improved here to not require any special imports in your entrypoint source file.  That is, no changes are needed to your source code to leverage the nested ZIP deployment.
 
 The motivation for this Python tool is to achieve the same results as serverless-python-requirements but with a
 purely Python tool.  This can simplify and speed up developer and CI/CD workflows.

diff --git a/package_python_function/main.py b/package_python_function/main.py
@@ -1,10 +1,14 @@
 import argparse
 from pathlib import Path
+import logging
+import sys
 
 from .packager import Packager
 
 
 def main() -> None:
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
+
     args = parse_args()
     project_path = Path(args.project).resolve()
     venv_path = Path(args.venv_dir).resolve()

diff --git a/package_python_function/packager.py b/package_python_function/packager.py
@@ -2,10 +2,14 @@
 from tempfile import NamedTemporaryFile
 import zipfile
 import shutil
+import logging
 
 from .python_project import PythonProject
 
 
+logger = logging.getLogger(__name__)
+
+
 class Packager:
     AWS_LAMBDA_MAX_UNZIP_SIZE = 262144000
 
@@ -26,19 +30,15 @@ def input_path(self) -> Path:
         return python_paths[0] / 'site-packages'
 
     def package(self) -> None:
-        # TODO: Improve logging.
-        print("Packaging:", self.project.path)
-        print("Output:", self.output_file)
-        print("Input:", self.input_path)
-        print("Entrypoint Package name:", self.project.entrypoint_package_name)
+        logger.info(f"Packaging: '{self.input_path}' to '{self.output_file}' using '{self.project.path}'... ")
 
         self.output_dir.mkdir(parents=True, exist_ok=True)
 
-        with NamedTemporaryFile() as dependencies_zip:
+        with NamedTemporaryFile(suffix=".zip") as dependencies_zip:
             self.zip_all_dependencies(Path(dependencies_zip.name))
 
     def zip_all_dependencies(self, target_path: Path) -> None:
-        print(f"Zipping to {target_path} ...")
+        logger.info(f"Zipping to {target_path}...")
 
         with zipfile.ZipFile(target_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
             def zip_dir(path: Path) -> None:
@@ -53,20 +53,22 @@ def zip_dir(path: Path) -> None:
 
         compressed_bytes = target_path.stat().st_size
 
-        print(f"Uncompressed size: {self._uncompressed_bytes:,} bytes")
-        print(f"Compressed size: {compressed_bytes:,} bytes")
+        logger.info(f"Uncompressed size: {self._uncompressed_bytes:,} bytes. Compressed size: {compressed_bytes:,} bytes.")
 
         if self._uncompressed_bytes > self.AWS_LAMBDA_MAX_UNZIP_SIZE:
-            print(f"The uncompressed size of the ZIP file is greater than the AWS Lambda limit of {self.AWS_LAMBDA_MAX_UNZIP_SIZE:,} bytes.")
+            logger.info(f"The uncompressed size of the ZIP file is greater than the AWS Lambda limit of {self.AWS_LAMBDA_MAX_UNZIP_SIZE:,} bytes.")
             if(compressed_bytes < self.AWS_LAMBDA_MAX_UNZIP_SIZE):
-                print(f"The compressed size ({compressed_bytes:,}) is less than the AWS limit, so the nested-zip strategy will be used.")
+                logger.info(f"The compressed size ({compressed_bytes:,}) is less than the AWS limit, so the nested-zip strategy will be used.")
                 self.generate_nested_zip(target_path)
             else:
                 print(f"TODO Error.  The unzipped size it too large for AWS Lambda.")
         else:
+            logger.info(f"Copying '{target_path}' to '{self.output_file}'")
             shutil.copy(str(target_path), str(self.output_file))
 
     def generate_nested_zip(self, inner_zip_path: Path) -> None:
+        logger.info(f"Generating nested-zip and __init__.py loader using entrypoint package '{self.project.entrypoint_package_name}'...")
+
         with zipfile.ZipFile(self.output_file, 'w') as outer_zip_file:
             entrypoint_dir = Path(self.project.entrypoint_package_name)
             outer_zip_file.write(

diff --git a/scripts/poc/.gitignore b/scripts/poc/.gitignore
@@ -0,0 +1 @@
+.test
diff --git a/scripts/poc/README.md b/scripts/poc/README.md
@@ -0,0 +1 @@
+This is the original proof-of-concept script used to work out the nested-ZIP automatic extraction during Lambda INIT
diff --git a/scripts/poc/inner_package/other_package/__init__.py b/scripts/poc/inner_package/other_package/__init__.py
diff --git a/scripts/poc/inner_package/other_package/other_package_module.py b/scripts/poc/inner_package/other_package/other_package_module.py
@@ -0,0 +1,2 @@
+def other_package_module():
+    print("other_package_module")
diff --git a/scripts/poc/inner_package/zip_in_zip_test/__init__.py b/scripts/poc/inner_package/zip_in_zip_test/__init__.py
@@ -0,0 +1,7 @@
+# This file represents the original module's __init__.py file that gets renamed when creating the innner ZIP.
+
+print("__init__ original")
+
+GLOBAL_VALUE_IN_INIT_ORIGINAL = "This global is defined in the original __init__.py"
+
+from .other_module import other_module_function
diff --git a/scripts/poc/inner_package/zip_in_zip_test/main.py b/scripts/poc/inner_package/zip_in_zip_test/main.py
@@ -0,0 +1,10 @@
+print("main.py: Load")
+
+from zip_in_zip_test import GLOBAL_VALUE_IN_INIT_ORIGINAL, other_module_function
+from other_package.other_package_module import other_package_module
+
+def main():
+    print("Hello from main!")
+    print(GLOBAL_VALUE_IN_INIT_ORIGINAL)
+    other_module_function()
+    other_package_module()
diff --git a/scripts/poc/inner_package/zip_in_zip_test/other_module.py b/scripts/poc/inner_package/zip_in_zip_test/other_module.py
@@ -0,0 +1,2 @@
+def other_module_function():
+    print("I'm in other_module_function")
diff --git a/scripts/poc/lambda-runner.py b/scripts/poc/lambda-runner.py
@@ -0,0 +1,24 @@
+# This is my best attempt at simulating what AWS Lambda does
+# Instead of messing with zipping and unzipping in this experiment, I just copy the files to the .test directory.
+
+from pathlib import Path
+import shutil
+import sys
+
+print('[lambda-runner]')
+print('sys.path:', sys.path)
+
+module_path = Path(__file__).parent
+TEST_DIR = module_path / ".test"
+PACKAGE_NAME = "zip_in_zip_test"
+TEST_PACKAGE_DIR = TEST_DIR / PACKAGE_NAME
+
+shutil.rmtree(TEST_DIR, ignore_errors=True)
+shutil.copytree(str(module_path / PACKAGE_NAME), str(TEST_PACKAGE_DIR))
+shutil.copytree(str(module_path / "inner_package"), str(TEST_PACKAGE_DIR / ".inner_package"))
+
+sys.path.insert(0, str(TEST_DIR))
+
+import importlib
+module = importlib.import_module('zip_in_zip_test.main')
+module.__dict__['main']()
diff --git a/scripts/poc/poetry.lock b/scripts/poc/poetry.lock
diff --git a/scripts/poc/pyproject.toml b/scripts/poc/pyproject.toml
@@ -0,0 +1,14 @@
+[tool.poetry]
+name = "zip-in-zip-test"
+version = "0.1.0"
+description = ""
+authors = ["BrandonLWhite <brandonlwhite@gmail.com>"]
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.13"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/scripts/poc/zip_in_zip_test/__init__.py b/scripts/poc/zip_in_zip_test/__init__.py
@@ -0,0 +1,35 @@
+# This works perfectly!
+
+print('zip_in_zip_test.__init__: BEGIN.  This is the loader.')
+print("module_path:", __file__)
+
+from pathlib import Path
+import importlib
+import sys
+
+module_path = Path(__file__).parent
+
+# This works if I insert at zero.
+# Why does the serverless-python-requirements insist on inserting at 1?
+# From https://docs.aws.amazon.com/lambda/latest/dg/python-package.html#python-package-searchpath:
+#   "By default, the first location the runtime searches is the directory into which your .zip deployment package is decompressed and mounted (/var/task)""
+# sys.path.insert(0, str(module_path / ".inner_package"))
+
+# This also works.  I am thinking this is the best way, because we need to unmount the original decompressed directory
+# since it contains the load __init__.py.
+sys.path[0] = str(module_path / ".inner_package")
+
+
+# The following two approaches works too, and are safe.
+# From https://docs.python.org/3/reference/import.html
+# "The module will exist in sys.modules before the loader executes the module code. This is crucial because the module
+# code may (directly or indirectly) import itself"
+
+# This works too.
+# del sys.modules[__name__]
+# importlib.import_module(__name__)
+
+# This also works.  I think this is the best way.
+importlib.reload(sys.modules[__name__])
+
+print('zip_in_zip_test.__init__: END')
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		This is the original proof-of-concept script used to work out the nested-ZIP automatic extraction during Lambda INIT
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		def other_package_module():
		print("other_package_module")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		def other_module_function():
		print("I'm in other_module_function")