Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .coveragerc

This file was deleted.

12 changes: 6 additions & 6 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -32,13 +32,13 @@ jobs:
echo "::set-env name=PATH::$PATH:$PWD/protoc/bin/"
sudo apt-get install -qq libsnappy-dev
python -m pip install --upgrade pip
pip install ruff pytest
pip install ruff pytest rich 'protobuf>=3.20.0rc1,<4'
pip install -r requirements.txt
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: 'true'
- name: Build package
run: make
- name: Build gencode
run: PYTHONPATH=$PYTHONPATH:$(pwd) python3 dumper/run.py
- name: Lint with ruff
run: ruff check . --exclude keynote_parser/generated
run: ruff check .
- name: Test with pytest
run: PYTHONPATH=$PYTHONPATH:$(pwd) pytest --cov=keynote_parser
run: PYTHONPATH=$PYTHONPATH:$(pwd) pytest
9 changes: 6 additions & 3 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@ jobs:
sudo apt-get install -qq libsnappy-dev
pip install -r requirements.txt
python -m pip install --upgrade pip
pip install setuptools wheel twine
pip install setuptools wheel twine build rich 'protobuf>=3.20.0rc1,<4'
- name: Build and publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: |
make
python setup.py sdist bdist_wheel
# Build the gencode:
PYTHONPATH=$PYTHONPATH:$(pwd) python3 dumper/run.py
# Build the package:
python -m build
# Upload the package:
twine upload dist/*
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
__pycache__/
*.py[cod]
*$py.class
keynote_parser/generated/
keynote_parser/versions/v*/generated/

# C extensions
*.so
Expand Down Expand Up @@ -115,4 +115,6 @@ venv.bak/
dmypy.json

# Pyre type checker
.pyre/
.pyre/

uv.lock
30 changes: 0 additions & 30 deletions Makefile

This file was deleted.

53 changes: 0 additions & 53 deletions dumper/Makefile

This file was deleted.

Empty file added dumper/__init__.py
Empty file.
87 changes: 65 additions & 22 deletions dumper/extract_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
import argparse
import enum
import json
import logging
import os
import sys
import time

import lldb


class StateType(enum.Enum):
Invalid = 0
Expand All @@ -26,6 +26,10 @@ class StateType(enum.Enum):
Stopped = 5
Running = 6
Stepping = 7
Crashed = 8
Detached = 9
Exited = 10
Suspended = 11


class StopReason(enum.Enum):
Expand Down Expand Up @@ -59,43 +63,85 @@ def main():
args = parser.parse_args()

if os.path.exists(args.output):
print(f"Removing output file {args.output}...")
logging.info(f"Removing output file {args.output}...")
os.remove(args.output)

print("Creating debugger...")
mapping = extract_mapping(args.exe)
with open(args.output, "w") as f:
json.dump(mapping, f, indent=2)


def extract_mapping(exe: str) -> dict[int, str]:
# Add the installed LLVM Python path to the Python path and error if the Python version does not match:
# i.e.: /opt/homebrew/opt/llvm/libexec/python3.13/site-packages
LLVM_PYTHON_ROOT = "/opt/homebrew/opt/llvm/libexec"
if not os.path.exists(LLVM_PYTHON_ROOT):
raise ImportError(
f"{LLVM_PYTHON_ROOT} does not exist. Please install LLVM/LLDB first."
)

existing_versions = [
x for x in os.listdir(LLVM_PYTHON_ROOT) if x.startswith("python")
]

THIS_PYTHON_LLVM_PATH = f"{LLVM_PYTHON_ROOT}/python{sys.version_info.major}.{sys.version_info.minor}/site-packages"
if not os.path.exists(THIS_PYTHON_LLVM_PATH):
raise ImportError(
"Your system has LLVM/LLDB installed, but it is not the same version as the Python interpreter "
f"you are using; found: {', '.join(existing_versions)}, but the current Python version is "
f"{sys.version_info.major}.{sys.version_info.minor}. Please install the same "
"version of LLVM/LLDB as your Python interpreter."
)

sys.path.append(THIS_PYTHON_LLVM_PATH)

import lldb

logging.info("Creating debugger...")
debugger = lldb.SBDebugger.Create()
debugger.SetAsync(False)
print(f"Creating target of {args.exe}...")
target = debugger.CreateTargetWithFileAndArch(args.exe, None)
print("Setting breakpoint for _sendFinishLaunchingNotification...")
logging.info(f"Creating target of {exe}...")
target = debugger.CreateTargetWithFileAndArch(exe, None)
logging.info("Setting breakpoint for _sendFinishLaunchingNotification...")
target.BreakpointCreateByName("_sendFinishLaunchingNotification")

print("Setting breakpoint for _handleAEOpenEvent:...")
logging.info("Setting breakpoint for _handleAEOpenEvent:...")
target.BreakpointCreateByName("_handleAEOpenEvent:")

print("Setting breakpoint for [CKContainer containerWithIdentifier:]...")
logging.info("Setting breakpoint for [CKContainer containerWithIdentifier:]...")
# let's break in the CloudKit code and early exit the function before it can raise an exception:
target.BreakpointCreateByName("[CKContainer containerWithIdentifier:]")

print("Setting breakpoint for ___lldb_unnamed_symbol[0-9]+...")
logging.info("Setting breakpoint for ___lldb_unnamed_symbol[0-9]+...")
# In later Keynote versions, 'containerWithIdentifier' isn't called directly, but we can break on similar methods:
# Note: this __lldb_unnamed_symbol hack was determined by painstaking experimentation. It will break again for sure.
target.BreakpointCreateByRegex("___lldb_unnamed_symbol[0-9]+", "CloudKit")

print("Launching process...")
logging.info("Launching process...")
process = target.LaunchSimple(None, None, os.getcwd())

if not process:
raise ValueError("Failed to launch process: " + args.exe)
raise ValueError(f"Failed to launch process: {exe}")
try:
print("Waiting for process to stop on a breakpoint...")
while process.GetState() != lldb.eStateStopped:
print(f"Current state: {StateType(process.GetState())}")
logging.info("Waiting for process to stop on a breakpoint...")
while (
process.GetState() != lldb.eStateStopped
and process.GetState() != lldb.eStateExited
):
logging.info(f"Current state: {StateType(process.GetState())}")
time.sleep(0.1)

if process.GetState() == lldb.eStateExited:
raise ValueError(
"Process exited before stopping on a breakpoint. "
"Ensure the process is properly code signed."
)

while process.GetState() == lldb.eStateStopped:
thread = process.GetThreadAtIndex(0)
print(f"Thread: {thread} stopped at: {StopReason(thread.GetStopReason())}")
logging.info(
f"Thread: {thread} stopped at: {StopReason(thread.GetStopReason())}"
)
match thread.GetStopReason():
case lldb.eStopReasonBreakpoint:
if any(
Expand All @@ -113,7 +159,7 @@ def main():
else:
break
case lldb.eStopReasonException:
print(repr(thread) + "\n")
logging.info(repr(thread) + "\n")
raise NotImplementedError(
f"LLDB caught exception, {__file__} needs to be updated to handle."
)
Expand All @@ -134,11 +180,8 @@ def main():
if x.strip()
]
mapping = [(int(a), b.split(" ")[-1]) for a, b in split if "null" not in b]
print(f"Extracted mapping with {len(mapping):,} elements.")
results = json.dumps(dict(sorted(mapping)), indent=2)
with open(args.output, "w") as f:
f.write(results)
print(f"Wrote {len(results):,} bytes of mapping to {args.output}.")
logging.info(f"Extracted mapping with {len(mapping):,} elements.")
return dict(sorted(mapping))
finally:
process.Kill()

Expand Down
Loading