|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Sparsify UEFI capsule files for efficient git storage. |
| 4 | +
|
| 5 | +This script zeroes out sections of capsule files that aren't needed for testing, |
| 6 | +while preserving the file structure and all test-relevant data. The resulting |
| 7 | +files compress extremely well with git's zlib compression (typically 99% reduction). |
| 8 | +
|
| 9 | +Preserved sections: |
| 10 | +- Capsule header (first 64 bytes) |
| 11 | +- $BVDT section (BIOS version info) |
| 12 | +- $_IFLASH_EC_IMG_ section + EC binary |
| 13 | +- All embedded PD firmware (CCG5, CCG6, CCG8) |
| 14 | +- $_RETIMER_PARAM_ section (retimer version) |
| 15 | +
|
| 16 | +Usage: |
| 17 | + ./sparsify_capsule.py input.cap # Creates input.sparse.cap |
| 18 | + ./sparsify_capsule.py input.cap output.cap # Specify output path |
| 19 | + ./sparsify_capsule.py *.cap # Process multiple files |
| 20 | +""" |
| 21 | + |
| 22 | +import argparse |
| 23 | +import gzip |
| 24 | +import os |
| 25 | +import sys |
| 26 | + |
| 27 | + |
| 28 | +def find_all(data: bytes, needle: bytes) -> list[int]: |
| 29 | + """Find all occurrences of needle in data.""" |
| 30 | + results = [] |
| 31 | + pos = 0 |
| 32 | + while (found := data.find(needle, pos)) != -1: |
| 33 | + results.append(found) |
| 34 | + pos = found + len(needle) |
| 35 | + return results |
| 36 | + |
| 37 | + |
| 38 | +def sparsify_capsule(data: bytes) -> tuple[bytearray, list[tuple[int, int, str]]]: |
| 39 | + """ |
| 40 | + Zero out sections we don't need while preserving test-relevant data. |
| 41 | +
|
| 42 | + Returns: |
| 43 | + Tuple of (sparsified data, list of preserved regions) |
| 44 | + """ |
| 45 | + preserved = [] |
| 46 | + |
| 47 | + # Always preserve capsule header (64 bytes to be safe) |
| 48 | + preserved.append((0, 64, "Capsule header")) |
| 49 | + |
| 50 | + # Find and preserve $BVDT section (BIOS version) |
| 51 | + bvdt = data.find(b'$BVDT') |
| 52 | + if bvdt != -1: |
| 53 | + preserved.append((bvdt, 128, "$BVDT (BIOS version)")) |
| 54 | + |
| 55 | + # Find and preserve $_IFLASH_EC_IMG_ + EC binary (128KB) |
| 56 | + ec_marker = data.find(b'$_IFLASH_EC_IMG_') |
| 57 | + if ec_marker != -1: |
| 58 | + # Marker + offset + EC binary (128KB) |
| 59 | + preserved.append((ec_marker, 16 + 9 + 131072, "$_IFLASH_EC_IMG_ + EC")) |
| 60 | + |
| 61 | + # Find and preserve all CCG8 PD binaries (~262KB each) |
| 62 | + CCG8_NEEDLE = bytes([0x00, 0x80, 0x00, 0x20, 0xAD, 0x0C]) |
| 63 | + CCG8_SIZE = 262144 |
| 64 | + for i, offset in enumerate(find_all(data, CCG8_NEEDLE), 1): |
| 65 | + preserved.append((offset, CCG8_SIZE, f"CCG8 PD {i}")) |
| 66 | + |
| 67 | + # Find and preserve all CCG6 PD binaries (~64KB each) |
| 68 | + CCG6_NEEDLE = bytes([0x00, 0x40, 0x00, 0x20, 0x11, 0x00]) |
| 69 | + CCG6_SIZE = 65536 |
| 70 | + for i, offset in enumerate(find_all(data, CCG6_NEEDLE), 1): |
| 71 | + preserved.append((offset, CCG6_SIZE, f"CCG6 PD {i}")) |
| 72 | + |
| 73 | + # Find and preserve all CCG5 PD binaries (~32KB each) |
| 74 | + CCG5_NEEDLE = bytes([0x00, 0x20, 0x00, 0x20, 0x11, 0x00]) |
| 75 | + CCG5_SIZE = 32768 |
| 76 | + for i, offset in enumerate(find_all(data, CCG5_NEEDLE), 1): |
| 77 | + preserved.append((offset, CCG5_SIZE, f"CCG5 PD {i}")) |
| 78 | + |
| 79 | + # Find and preserve $_RETIMER_PARAM_ section |
| 80 | + retimer = data.find(b'$_RETIMER_PARAM_') |
| 81 | + if retimer != -1: |
| 82 | + preserved.append((retimer, 64, "$_RETIMER_PARAM_")) |
| 83 | + |
| 84 | + # Sort by offset |
| 85 | + preserved.sort(key=lambda x: x[0]) |
| 86 | + |
| 87 | + # Create zeroed version with only preserved regions |
| 88 | + result = bytearray(len(data)) |
| 89 | + for offset, length, _ in preserved: |
| 90 | + end = min(offset + length, len(data)) |
| 91 | + result[offset:end] = data[offset:end] |
| 92 | + |
| 93 | + return result, preserved |
| 94 | + |
| 95 | + |
| 96 | +def process_file(input_path: str, output_path: str | None = None, verbose: bool = True) -> None: |
| 97 | + """Process a single capsule file.""" |
| 98 | + if output_path is None: |
| 99 | + base, ext = os.path.splitext(input_path) |
| 100 | + output_path = f"{base}.sparse{ext}" |
| 101 | + |
| 102 | + # Read input |
| 103 | + with open(input_path, 'rb') as f: |
| 104 | + data = f.read() |
| 105 | + |
| 106 | + original_size = len(data) |
| 107 | + |
| 108 | + # Sparsify |
| 109 | + sparse_data, preserved = sparsify_capsule(data) |
| 110 | + |
| 111 | + # Write output |
| 112 | + with open(output_path, 'wb') as f: |
| 113 | + f.write(sparse_data) |
| 114 | + |
| 115 | + if verbose: |
| 116 | + # Calculate compression stats |
| 117 | + compressed = gzip.compress(bytes(sparse_data), compresslevel=9) |
| 118 | + |
| 119 | + print(f"{os.path.basename(input_path)}:") |
| 120 | + print(f" Original size: {original_size:>12,} bytes") |
| 121 | + print(f" Preserved regions: {len(preserved)}") |
| 122 | + for offset, length, desc in preserved: |
| 123 | + print(f" {offset:>10} - {offset+length:>10} ({length:>7} bytes): {desc}") |
| 124 | + print(f" Compressed size: {len(compressed):>12,} bytes ({100*len(compressed)/original_size:.2f}%)") |
| 125 | + print(f" Output: {output_path}") |
| 126 | + print() |
| 127 | + |
| 128 | + |
| 129 | +def main(): |
| 130 | + parser = argparse.ArgumentParser( |
| 131 | + description="Sparsify UEFI capsule files for efficient git storage.", |
| 132 | + formatter_class=argparse.RawDescriptionHelpFormatter, |
| 133 | + epilog=__doc__ |
| 134 | + ) |
| 135 | + parser.add_argument( |
| 136 | + 'input', nargs='+', |
| 137 | + help='Input capsule file(s)' |
| 138 | + ) |
| 139 | + parser.add_argument( |
| 140 | + '-o', '--output', |
| 141 | + help='Output path (only valid with single input file)' |
| 142 | + ) |
| 143 | + parser.add_argument( |
| 144 | + '-q', '--quiet', action='store_true', |
| 145 | + help='Suppress verbose output' |
| 146 | + ) |
| 147 | + |
| 148 | + args = parser.parse_args() |
| 149 | + |
| 150 | + if args.output and len(args.input) > 1: |
| 151 | + print("Error: --output can only be used with a single input file", file=sys.stderr) |
| 152 | + sys.exit(1) |
| 153 | + |
| 154 | + for input_path in args.input: |
| 155 | + if not os.path.exists(input_path): |
| 156 | + print(f"Error: {input_path} not found", file=sys.stderr) |
| 157 | + continue |
| 158 | + |
| 159 | + output_path = args.output if len(args.input) == 1 else None |
| 160 | + process_file(input_path, output_path, verbose=not args.quiet) |
| 161 | + |
| 162 | + |
| 163 | +if __name__ == '__main__': |
| 164 | + main() |
0 commit comments