BrandonLWhite · BrandonLWhite · Mar 24, 2025 · Mar 24, 2025 · Mar 24, 2025 · Mar 24, 2025
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
+}
diff --git a/README.md b/README.md
@@ -1,2 +1,46 @@
 # package-python-function
-Python script to package a Python function for deploying to AWS Lambda
+Python command-line (CLI) tool to package a Python function for deploying to AWS Lambda, and possibly other
+cloud platforms.
+
+This tool builds a ZIP file from a virtual environment with all depedencies installed that are to be included in the final deployment asset.  If the content is larger than AWS Lambda's maximum unzipped package size of 250 MiB,
+then this tool will employ the ZIP-inside-ZIP (nested-ZIP) workaround.  This allows deploying Lambdas with large
+dependency packages, especially those with native code compiled extensions like Pandas, PyArrow, etc.
+
+This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless).  This technique has been improved here to not require any special imports in your entrypoint source file.  That is, no changes are needed to your source code to leverage the nested ZIP deployment.
+
+The motivation for this Python tool is to achieve the same results as serverless-python-requirements but with a
+purely Python tool.  This can simplify and speed up developer and CI/CD workflows.
+
+One important thing that this tool does not do is build the target virtual environment and install all of the
+dependencies.  You must first generate that with a tool like [Poetry](https://github.com/python-poetry/poetry) and the [poetry-plugin-bundle](https://github.com/python-poetry/poetry-plugin-bundle).
+
+## Example command sequence
+```
+poetry bundle venv .build/.venv --without dev
+package-python-function .build/.venv --output-dir .build/lambda
+```
+
+The output will be a .zip file with the same name as your project from your pyproject.toml file.
+
+## Installation
+Use [pipx](https://github.com/pypa/pipx) to install:
+
+```
+pipx install package-python-function
+```
+
+## Usage / Arguments
+`package-python-function venv_dir [--project PROJECT] [--output-dir OUTPUT_DIR] [--output OUTPUT]`
+
+- `venv_dir` [Required]:  The path to the virtual environment to package.
+
+- `--project` [Optional]: Path to the pyproject.toml file. Omit to use the pyproject.toml file in the current working directory.
+
+One of the following must be specified:
+- `--output`: The full output path of the final zip file.
+
+- `--output-dir`: The output directory for the final zip file.  The name of the zip file will be based on the project's
+name in the pyproject.toml file.
+
+
+
diff --git a/package_python_function/main.py b/package_python_function/main.py
@@ -7,7 +7,7 @@
 def main() -> None:
     args = parse_args()
     project_path = Path(args.project).resolve()
-    venv_path = Path(args.venv).resolve()
+    venv_path = Path(args.venv_dir).resolve()
     output_dir_path = Path(args.output_dir).resolve()
     output_file_path = Path(args.output).resolve() if args.output else None
     packager = Packager(venv_path, project_path, output_dir_path, output_file_path)
@@ -16,8 +16,8 @@ def main() -> None:
 
 def parse_args() -> argparse.Namespace:
     arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument("venv", type=str)
-    arg_parser.add_argument("--project", type=str, default='pyproject.toml')
-    arg_parser.add_argument("--output-dir", type=str, default='.')
-    arg_parser.add_argument("--output", type=str, default='')
+    arg_parser.add_argument("venv_dir", type=str, help="The directory path to the virtual environment to package into a zip file")
+    arg_parser.add_argument("--project", type=str, default='pyproject.toml', help="The path to the project's pyproject.toml file. Omit to use pyproject.toml in the current working directory.")
+    arg_parser.add_argument("--output-dir", type=str, default='.', help="The directory path to save the output zip file. Default is the current working directory.")
+    arg_parser.add_argument("--output", type=str, default='', help="The full file path for the output file. Use this instead of --output-dir if you want total control of the output file path.")
     return arg_parser.parse_args()
diff --git a/package_python_function/nested_zip_loader.py b/package_python_function/nested_zip_loader.py
@@ -1,14 +1,27 @@
-# AWS imposes a 10 second limit on the INIT sequence of a Lambda function.  If this time limit is reached, the process
-# is terminated and the INIT is performed again as part of the function's billable invocation.
-# Reference: https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtime-environment.html
-#
-# For this reason, we can be left with an incomplete extraction and so care is taken to avoid inadverently using it.
-#
-# From https://docs.python.org/3/reference/import.html
-# "The module will exist in sys.modules before the loader executes the module code. This is crucial because the module
-# code may (directly or indirectly) import itself"
-
-# TODO: Inspired by serverless-python-requirements.
+"""
+Purpose: This module is responsible for extracting the nested zip file that contains your code and dependencies for
+the Lambda function.
+When activated, the content of this file is packaged as your entrypoint's __init__.py module in the
+outer ZIP file.
+
+It works by leveraging the Python import system's ability for a module to dynamically replace its code.
+When Lambda performs the INIT sequence, it will import the module configured as the entrypoint.  Python will
+then first import `the_project/__init__.py``, which is where package-python-function has put the code from
+this file.  This code will then extract the nested zip file and replace the module's code with the extracted
+code, then trigger a reload of the original module.
+
+From https://docs.python.org/3/reference/import.html
+"The module will exist in sys.modules before the loader executes the module code. This is crucial because the module
+code may (directly or indirectly) import itself"
+
+Inspired by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements/blob/master/unzip_requirements.py).
+
+Note:
+AWS imposes a 10 second limit on the INIT sequence of a Lambda function.  If this time limit is reached, the process
+is terminated and the INIT is performed again as part of the function's billable invocation.
+Reference: https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtime-environment.html
+For this reason, we can be left with an incomplete extraction and so care is taken to avoid inadverently using it.
+"""
 
 def load_nested_zip() -> None:
     from pathlib import Path
@@ -27,20 +40,28 @@ def load_nested_zip() -> None:
 
         staging_package_path = temp_path / ".stage.package-python-function"
 
-        # TODO BW: Work this out.
         if staging_package_path.exists():
             shutil.rmtree(str(staging_package_path))
 
         nested_zip_path = Path(__file__).parent / '.dependencies.zip'
 
         zipfile.ZipFile(str(nested_zip_path), 'r').extractall(str(staging_package_path))
-        os.rename(str(staging_package_path), str(target_package_path))  # Atomic -- TODO BW DOCME
 
-    # TODO BW: Update this comment
-    # [No longer applicable] We want our path to look like [working_dir, /tmp/package-python-function, ...]
+        # The idea here is that we don't rename the path until everything has been successfuly extracted.
+        # This is expected to be a an atomic operation.  That way, if AWS terminates us during the extraction,
+        # we won't try and use the incomplete extraction.
+        os.rename(str(staging_package_path), str(target_package_path))
+
+    # Lambda sets up the sys.path like this:
+    #    ['/var/task', '/opt/python/lib/python3.13/site-packages', '/opt/python',
+    #     '/var/lang/lib/python3.13/site-packages', '/var/runtime', ...]
+    # Where the first entry is the directory where Lambda extracted the zip file
     # Refer to https://docs.aws.amazon.com/lambda/latest/dg/python-package.html#python-package-searchpath
-    # We need to replace the original path that AWS Lambda setup for us.
-    # sys.path.insert(1, str(target_package_path))
+    # We then replace the first entry with the directory where we extracted the nested zip file so that sys.path
+    # becomes:
+    #    ['/tmp/package-python-function', '/opt/python/lib/python3.13/site-packages', '/opt/python',
+    #     '/var/lang/lib/python3.13/site-packages', '/var/runtime', ...]
+    # Then we trigger a reload on the current module so that the original module code is loaded.
     sys.path[0] = str(target_package_path)
     importlib.reload(sys.modules[__name__])
 

diff --git a/package_python_function/packager.py b/package_python_function/packager.py
@@ -26,6 +26,7 @@ def input_path(self) -> Path:
         return python_paths[0] / 'site-packages'
 
     def package(self) -> None:
+        # TODO: Improve logging.
         print("Packaging:", self.project.path)
         print("Output:", self.output_file)
         print("Input:", self.input_path)

diff --git a/package_python_function/python_project.py b/package_python_function/python_project.py
@@ -22,7 +22,7 @@ def entrypoint_package_name(self) -> str:
         The subdirectory name in the source virtual environment's site-packages that contains the function's entrypoint
         code.
         """
-        # TODO : Parse out the project's package dir(s).  Use the first one if there are multiple.
+        # TODO : Parse out the project's package dir(s) if defined.  Use the first one if there are multiple.
         return self.name.replace('-', '_')
 
     def find_value(self, paths: tuple[tuple[str]]) -> str: