Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ This tool builds a ZIP file from a virtual environment with all depedencies inst
then this tool will employ the ZIP-inside-ZIP (nested-ZIP) workaround. This allows deploying Lambdas with large
dependency packages, especially those with native code compiled extensions like Pandas, PyArrow, etc.

This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless). This technique has been improved here to not require any special imports in your entrypoint source file. That is, no changes are needed to your source code to leverage the nested ZIP deployment.
This technique was originally pioneered by [serverless-python-requirements](https://github.com/serverless/serverless-python-requirements), which is a NodeJS (JavaScript) plugin for the [Serverless Framework](https://github.com/serverless/serverless). The technique has been improved here to not require any special imports in your entrypoint source file. That is, no changes are needed to your source code to leverage the nested ZIP deployment.

The motivation for this Python tool is to achieve the same results as serverless-python-requirements but with a
purely Python tool. This can simplify and speed up developer and CI/CD workflows.
Expand Down
4 changes: 4 additions & 0 deletions package_python_function/main.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import argparse
from pathlib import Path
import logging
import sys

from .packager import Packager


def main() -> None:
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")

args = parse_args()
project_path = Path(args.project).resolve()
venv_path = Path(args.venv_dir).resolve()
Expand Down
24 changes: 13 additions & 11 deletions package_python_function/packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
from tempfile import NamedTemporaryFile
import zipfile
import shutil
import logging

from .python_project import PythonProject


logger = logging.getLogger(__name__)


class Packager:
AWS_LAMBDA_MAX_UNZIP_SIZE = 262144000

Expand All @@ -26,19 +30,15 @@ def input_path(self) -> Path:
return python_paths[0] / 'site-packages'

def package(self) -> None:
# TODO: Improve logging.
print("Packaging:", self.project.path)
print("Output:", self.output_file)
print("Input:", self.input_path)
print("Entrypoint Package name:", self.project.entrypoint_package_name)
logger.info(f"Packaging: '{self.input_path}' to '{self.output_file}' using '{self.project.path}'... ")

self.output_dir.mkdir(parents=True, exist_ok=True)

with NamedTemporaryFile() as dependencies_zip:
with NamedTemporaryFile(suffix=".zip") as dependencies_zip:
self.zip_all_dependencies(Path(dependencies_zip.name))

def zip_all_dependencies(self, target_path: Path) -> None:
print(f"Zipping to {target_path} ...")
logger.info(f"Zipping to {target_path}...")

with zipfile.ZipFile(target_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
def zip_dir(path: Path) -> None:
Expand All @@ -53,20 +53,22 @@ def zip_dir(path: Path) -> None:

compressed_bytes = target_path.stat().st_size

print(f"Uncompressed size: {self._uncompressed_bytes:,} bytes")
print(f"Compressed size: {compressed_bytes:,} bytes")
logger.info(f"Uncompressed size: {self._uncompressed_bytes:,} bytes. Compressed size: {compressed_bytes:,} bytes.")

if self._uncompressed_bytes > self.AWS_LAMBDA_MAX_UNZIP_SIZE:
print(f"The uncompressed size of the ZIP file is greater than the AWS Lambda limit of {self.AWS_LAMBDA_MAX_UNZIP_SIZE:,} bytes.")
logger.info(f"The uncompressed size of the ZIP file is greater than the AWS Lambda limit of {self.AWS_LAMBDA_MAX_UNZIP_SIZE:,} bytes.")
if(compressed_bytes < self.AWS_LAMBDA_MAX_UNZIP_SIZE):
print(f"The compressed size ({compressed_bytes:,}) is less than the AWS limit, so the nested-zip strategy will be used.")
logger.info(f"The compressed size ({compressed_bytes:,}) is less than the AWS limit, so the nested-zip strategy will be used.")
self.generate_nested_zip(target_path)
else:
print(f"TODO Error. The unzipped size it too large for AWS Lambda.")
else:
logger.info(f"Copying '{target_path}' to '{self.output_file}'")
shutil.copy(str(target_path), str(self.output_file))

def generate_nested_zip(self, inner_zip_path: Path) -> None:
logger.info(f"Generating nested-zip and __init__.py loader using entrypoint package '{self.project.entrypoint_package_name}'...")

with zipfile.ZipFile(self.output_file, 'w') as outer_zip_file:
entrypoint_dir = Path(self.project.entrypoint_package_name)
outer_zip_file.write(
Expand Down
1 change: 1 addition & 0 deletions scripts/poc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.test
1 change: 1 addition & 0 deletions scripts/poc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is the original proof-of-concept script used to work out the nested-ZIP automatic extraction during Lambda INIT
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def other_package_module():
print("other_package_module")
7 changes: 7 additions & 0 deletions scripts/poc/inner_package/zip_in_zip_test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# This file represents the original module's __init__.py file that gets renamed when creating the innner ZIP.

print("__init__ original")

GLOBAL_VALUE_IN_INIT_ORIGINAL = "This global is defined in the original __init__.py"

from .other_module import other_module_function
10 changes: 10 additions & 0 deletions scripts/poc/inner_package/zip_in_zip_test/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
print("main.py: Load")

from zip_in_zip_test import GLOBAL_VALUE_IN_INIT_ORIGINAL, other_module_function
from other_package.other_package_module import other_package_module

def main():
print("Hello from main!")
print(GLOBAL_VALUE_IN_INIT_ORIGINAL)
other_module_function()
other_package_module()
2 changes: 2 additions & 0 deletions scripts/poc/inner_package/zip_in_zip_test/other_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def other_module_function():
print("I'm in other_module_function")
24 changes: 24 additions & 0 deletions scripts/poc/lambda-runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# This is my best attempt at simulating what AWS Lambda does
# Instead of messing with zipping and unzipping in this experiment, I just copy the files to the .test directory.

from pathlib import Path
import shutil
import sys

print('[lambda-runner]')
print('sys.path:', sys.path)

module_path = Path(__file__).parent
TEST_DIR = module_path / ".test"
PACKAGE_NAME = "zip_in_zip_test"
TEST_PACKAGE_DIR = TEST_DIR / PACKAGE_NAME

shutil.rmtree(TEST_DIR, ignore_errors=True)
shutil.copytree(str(module_path / PACKAGE_NAME), str(TEST_PACKAGE_DIR))
shutil.copytree(str(module_path / "inner_package"), str(TEST_PACKAGE_DIR / ".inner_package"))

sys.path.insert(0, str(TEST_DIR))

import importlib
module = importlib.import_module('zip_in_zip_test.main')
module.__dict__['main']()
7 changes: 7 additions & 0 deletions scripts/poc/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions scripts/poc/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[tool.poetry]
name = "zip-in-zip-test"
version = "0.1.0"
description = ""
authors = ["BrandonLWhite <brandonlwhite@gmail.com>"]
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.13"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
35 changes: 35 additions & 0 deletions scripts/poc/zip_in_zip_test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# This works perfectly!

print('zip_in_zip_test.__init__: BEGIN. This is the loader.')
print("module_path:", __file__)

from pathlib import Path
import importlib
import sys

module_path = Path(__file__).parent

# This works if I insert at zero.
# Why does the serverless-python-requirements insist on inserting at 1?
# From https://docs.aws.amazon.com/lambda/latest/dg/python-package.html#python-package-searchpath:
# "By default, the first location the runtime searches is the directory into which your .zip deployment package is decompressed and mounted (/var/task)""
# sys.path.insert(0, str(module_path / ".inner_package"))

# This also works. I am thinking this is the best way, because we need to unmount the original decompressed directory
# since it contains the load __init__.py.
sys.path[0] = str(module_path / ".inner_package")


# The following two approaches works too, and are safe.
# From https://docs.python.org/3/reference/import.html
# "The module will exist in sys.modules before the loader executes the module code. This is crucial because the module
# code may (directly or indirectly) import itself"

# This works too.
# del sys.modules[__name__]
# importlib.import_module(__name__)

# This also works. I think this is the best way.
importlib.reload(sys.modules[__name__])

print('zip_in_zip_test.__init__: END')