Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 6 additions & 12 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: PyTest & Flake8
name: PyTest & Ruff

on:
push:
Expand All @@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9, "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -32,19 +32,13 @@ jobs:
echo "::set-env name=PATH::$PATH:$PWD/protoc/bin/"
sudo apt-get install -qq libsnappy-dev
python -m pip install --upgrade pip
pip install flake8 pytest
pip install ruff pytest
pip install -r requirements.txt
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: 'true'
- name: Build package
run: make
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude keynote_parser/generated
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude keynote_parser/generated
- name: Lint with ruff
run: ruff check . --exclude keynote_parser/generated
- name: Test with pytest
run: |
export PYTHONPATH=$PYTHONPATH:$(pwd)
pytest --cov=keynote_parser
run: PYTHONPATH=$PYTHONPATH:$(pwd) pytest --cov=keynote_parser
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ keynote_parser/generated/%_pb2.py: protos/%.proto keynote_parser/generated

keynote_parser/generated/__init__.py: keynote_parser/generated $(PROTO_CLASSES)
touch $@
# Huge hack for py3 support, see https://github.com/protocolbuffers/protobuf/issues/1491
futurize --no-diffs --nobackups --both-stages --processes 4 -w keynote_parser/generated/
python3 dumper/rewrite_imports.py keynote_parser/generated/*.py

clean:
rm -rf keynote_parser/generated
Expand Down
33 changes: 25 additions & 8 deletions dumper/Makefile
Original file line number Diff line number Diff line change
@@ -1,33 +1,50 @@

.PHONY=clean all

LLDB_PYTHON_PATH := /opt/homebrew/opt/llvm//libexec/python3.11/site-packages/
LLDB_PYTHON := python3.11
IDENTITY := $(shell security find-identity -v -p codesigning | head -n 1 | python -c 'import sys; print(sys.stdin.read().split("\"")[1])')
# Find whichever version of Python is installed in the LLVM directory:
LLVM_DIR := /opt/homebrew/opt/llvm/
# Error if LLVM_DIR is not set:
ifeq ($(LLVM_DIR),)
$(error LLVM_DIR is not set)
endif
LLVM_PYTHON_PATH := $(shell find $(LLVM_DIR) -name "python3.*" | head -n 1)
ifeq ($(LLVM_PYTHON_PATH),)
$(error LLVM_PYTHON_PATH is not set)
endif
LLVM_PYTHON := $(shell basename $(LLVM_PYTHON_PATH))
ifeq ($(LLVM_PYTHON),)
$(error LLVM_PYTHON is not set)
endif

# TODO: Verify that this identity is valid and not expired; this causes silent failures.
IDENTITY := $(shell security find-identity -v -p codesigning | head -n 1 | uv run python -c 'import sys; print(sys.stdin.read().split("\"")[1])')

all: mapping.py proto

mapping.json: Keynote.unsigned.app/Contents/MacOS/Keynote ./extract_mapping.py
PYTHONPATH=${LLDB_PYTHON_PATH} xcrun $(LLDB_PYTHON) ./extract_mapping.py Keynote.unsigned.app/Contents/MacOS/Keynote > $@
PYTHONPATH=${LLVM_PYTHON_PATH}/site-packages xcrun $(LLVM_PYTHON) ./extract_mapping.py Keynote.unsigned.app/Contents/MacOS/Keynote --output $@
rm -rf Keynote.unsigned.app

proto: /Applications/Keynote.app
python3 protodump.py /Applications/Keynote.app ./proto/
uv run protodump.py /Applications/Keynote.app ./proto/
# Note that if any of the incoming Protobuf definitions contain periods,
# protoc will put them into their own Python packages. This is not desirable
# for import rules in Python, so we replace non-final period characters with
# underscores.
python3 ./rename_proto_files.py proto
uv run ./rename_proto_files.py proto
cp ./proto/*.proto ../protos/
rm -rfv proto

Keynote.unsigned.app/Contents/MacOS/Keynote: /Applications/Keynote.app
cp -r /Applications/Keynote.app ./Keynote.unsigned.app
codesign --remove-signature --verbose ./Keynote.unsigned.app/Contents/MacOS/Keynote
codesign --sign "${IDENTITY}" --verbose ./Keynote.unsigned.app/Contents/MacOS/Keynote

mapping.py: mapping.json
python3 generate_mapping.py
cp mapping.py ../keynote_parser/mapping.py
uv run generate_mapping.py
mv mapping.py ../keynote_parser/mapping.py
echo "mapping.py generated (size: $(shell wc -c < mapping.py) bytes); the dumper worked!"
rm -rf mapping.json

clean:
rm -rf Keynote.unsigned.app
Expand Down
190 changes: 133 additions & 57 deletions dumper/extract_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,64 +8,140 @@
Copyright 2020 Peter Sobot (psobot.com).
"""

import os
import sys
import argparse
import enum
import json
import os
import time

import lldb

exe = sys.argv[-1]
debugger = lldb.SBDebugger.Create()
debugger.SetAsync(False)
target = debugger.CreateTargetWithFileAndArch(exe, None)
target.BreakpointCreateByName("_sendFinishLaunchingNotification")
target.BreakpointCreateByName("_handleAEOpenEvent:")
# To get around the fact that we don't have iCloud entitlements when running re-signed code,
# let's break in the CloudKit code and early exit the function before it can raise an exception:
target.BreakpointCreateByName("[CKContainer containerWithIdentifier:]")
# In later Keynote versions, 'containerWithIdentifier' isn't called directly, but we can break on similar methods:
# Note: this __lldb_unnamed_symbol hack was determined by painstaking experimentation. It will break again for sure.
target.BreakpointCreateByRegex("___lldb_unnamed_symbol[0-9]+", "CloudKit")

process = target.LaunchSimple(None, None, os.getcwd())

if not process:
raise ValueError("Failed to launch process: " + exe)
try:
while process.GetState() == lldb.eStateStopped:
thread = process.GetThreadAtIndex(0)
if thread.GetStopReason() == lldb.eStopReasonBreakpoint:
if any([x in str(thread.GetSelectedFrame()) for x in ["CKContainer", "CloudKit"]]):
# Skip the code in CKContainer, avoiding a crash due to missing entitlements:
thread.ReturnFromFrame(thread.GetSelectedFrame(), lldb.SBValue().CreateValueFromExpression("0", ""))
process.Continue()
else:
break
elif thread.GetStopReason() == lldb.eStopReasonException:
sys.stderr.write(repr(thread) + "\n")
raise NotImplementedError(f"LLDB caught exception, {__file__} needs to be updated to handle.")
if process.GetState() == lldb.eStateStopped:
if thread:
frame = thread.GetFrameAtIndex(0)
if frame:
registry = frame.EvaluateExpression('[TSPRegistry sharedRegistry]').description
split = [
x.strip().split(" -> ")
for x in registry.split("{")[1].split("}")[0].split("\n")
if x.strip()
]
print(
json.dumps(
dict(
sorted(
[(int(a), b.split(" ")[-1]) for a, b in split if 'null' not in b]
)
),
indent=2,

class StateType(enum.Enum):
Invalid = 0
Unloaded = 1
Connected = 2
Attaching = 3
Launching = 4
Stopped = 5
Running = 6
Stepping = 7


class StopReason(enum.Enum):
Invalid = 0
_None = 1
Trace = 2
Breakpoint = 3
Watchpoint = 4
Signal = 5
Exception = 6
Exec = 7
PlanComplete = 8
ThreadExiting = 9
Instrumentation = 10
ProcessorTrace = 11
Fork = 12
VFork = 13
VForkDone = 14
Interrupt = 15


def main():
parser = argparse.ArgumentParser()
parser.add_argument("exe", type=str, help="Path to the executable to debug.")
parser.add_argument(
"--output",
type=str,
required=True,
help="Path to the output file to write to. Will be overwritten.",
)
args = parser.parse_args()

if os.path.exists(args.output):
print(f"Removing output file {args.output}...")
os.remove(args.output)

print("Creating debugger...")
debugger = lldb.SBDebugger.Create()
debugger.SetAsync(False)
print(f"Creating target of {args.exe}...")
target = debugger.CreateTargetWithFileAndArch(args.exe, None)
print("Setting breakpoint for _sendFinishLaunchingNotification...")
target.BreakpointCreateByName("_sendFinishLaunchingNotification")

print("Setting breakpoint for _handleAEOpenEvent:...")
target.BreakpointCreateByName("_handleAEOpenEvent:")

print("Setting breakpoint for [CKContainer containerWithIdentifier:]...")
# let's break in the CloudKit code and early exit the function before it can raise an exception:
target.BreakpointCreateByName("[CKContainer containerWithIdentifier:]")

print("Setting breakpoint for ___lldb_unnamed_symbol[0-9]+...")
# In later Keynote versions, 'containerWithIdentifier' isn't called directly, but we can break on similar methods:
# Note: this __lldb_unnamed_symbol hack was determined by painstaking experimentation. It will break again for sure.
target.BreakpointCreateByRegex("___lldb_unnamed_symbol[0-9]+", "CloudKit")

print("Launching process...")
process = target.LaunchSimple(None, None, os.getcwd())

if not process:
raise ValueError("Failed to launch process: " + args.exe)
try:
print("Waiting for process to stop on a breakpoint...")
while process.GetState() != lldb.eStateStopped:
print(f"Current state: {StateType(process.GetState())}")
time.sleep(0.1)

while process.GetState() == lldb.eStateStopped:
thread = process.GetThreadAtIndex(0)
print(f"Thread: {thread} stopped at: {StopReason(thread.GetStopReason())}")
match thread.GetStopReason():
case lldb.eStopReasonBreakpoint:
if any(
[
x in str(thread.GetSelectedFrame())
for x in ["CKContainer", "CloudKit"]
]
):
# Skip the code in CKContainer, avoiding a crash due to missing entitlements:
thread.ReturnFromFrame(
thread.GetSelectedFrame(),
lldb.SBValue().CreateValueFromExpression("0", ""),
)
process.Continue()
else:
break
case lldb.eStopReasonException:
print(repr(thread) + "\n")
raise NotImplementedError(
f"LLDB caught exception, {__file__} needs to be updated to handle."
)
)
else:
raise ValueError("Could not get frame to print out registry!")
else:
raise ValueError("LLDB was unable to stop process! " + str(process))
finally:
process.Kill()
case _:
process.Continue()

if process.GetState() != lldb.eStateStopped:
raise ValueError("LLDB was unable to stop process! " + str(process))
if not thread:
raise ValueError("Could not get thread to print out registry!")
frame = thread.GetFrameAtIndex(0)
if not frame:
raise ValueError("Could not get frame to print out registry!")
registry = frame.EvaluateExpression("[TSPRegistry sharedRegistry]").description
split = [
x.strip().split(" -> ")
for x in registry.split("{")[1].split("}")[0].split("\n")
if x.strip()
]
mapping = [(int(a), b.split(" ")[-1]) for a, b in split if "null" not in b]
print(f"Extracted mapping with {len(mapping):,} elements.")
results = json.dumps(dict(sorted(mapping)), indent=2)
with open(args.output, "w") as f:
f.write(results)
print(f"Wrote {len(results):,} bytes of mapping to {args.output}.")
finally:
process.Kill()


if __name__ == "__main__":
main()
15 changes: 11 additions & 4 deletions dumper/generate_mapping.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import json
import glob
import json
import os

RUNTIME_CODE = """

Expand Down Expand Up @@ -35,7 +35,10 @@ def main():
f.write("\n")

proto_files = sorted(
[os.path.basename(path) for path in glob.glob(os.path.join("..", "protos", "*.proto"))]
[
os.path.basename(path)
for path in glob.glob(os.path.join("..", "protos", "*.proto"))
]
)

for proto_file in proto_files:
Expand All @@ -53,7 +56,11 @@ def main():
f.write("\n")

with open(mapping_filename) as mapping_file:
f.write(f"TSPRegistryMapping = {repr(json.load(mapping_file))}\n")
mapping_file_contents = mapping_file.read()
if mapping_file_contents == "":
raise ValueError(f"Mapping file {mapping_filename} is empty.")
mapping_file_contents = json.loads(mapping_file_contents)
f.write(f"TSPRegistryMapping = {repr(mapping_file_contents)}\n")

f.write(RUNTIME_CODE)

Expand Down
Loading