diff --git a/.codacy.yml b/.codacy.yml new file mode 100644 index 0000000..da332f7 --- /dev/null +++ b/.codacy.yml @@ -0,0 +1,2 @@ +exclude_paths: + - "benchmark/**" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9d84d26..56b2167 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,6 +3,7 @@ repos: rev: v0.4.5 hooks: - id: ruff + exclude: ^benchmark/ - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy @@ -12,3 +13,4 @@ repos: additional_dependencies: - pydantic>=2.0.0 - types-PyYAML + exclude: ^benchmark/ diff --git a/README.ja.md b/README.ja.md index 122a1f9..9071fe2 100644 --- a/README.ja.md +++ b/README.ja.md @@ -1,6 +1,6 @@ # ExStruct — Excel 構造化抽出エンジン -[![PyPI version](https://badge.fury.io/py/exstruct.svg)](https://pypi.org/project/exstruct/) [![PyPI Downloads](https://static.pepy.tech/personalized-badge/exstruct?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/exstruct) ![Licence: BSD-3-Clause](https://img.shields.io/badge/license-BSD--3--Clause-blue?style=flat-square) [![pytest](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml/badge.svg)](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/e081cb4f634e4175b259eb7c34f54f60)](https://app.codacy.com/gh/harumiWeb/exstruct/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![codecov](https://codecov.io/gh/harumiWeb/exstruct/graph/badge.svg?token=2XI1O8TTA9)](https://codecov.io/gh/harumiWeb/exstruct) +[![PyPI version](https://badge.fury.io/py/exstruct.svg)](https://pypi.org/project/exstruct/) [![PyPI Downloads](https://static.pepy.tech/personalized-badge/exstruct?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/exstruct) ![Licence: BSD-3-Clause](https://img.shields.io/badge/license-BSD--3--Clause-blue?style=flat-square) [![pytest](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml/badge.svg)](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/e081cb4f634e4175b259eb7c34f54f60)](https://app.codacy.com/gh/harumiWeb/exstruct/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![codecov](https://codecov.io/gh/harumiWeb/exstruct/graph/badge.svg?token=2XI1O8TTA9)](https://codecov.io/gh/harumiWeb/exstruct) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/harumiWeb/exstruct) ![ExStruct Image](docs/assets/icon.webp) @@ -17,6 +17,15 @@ ExStruct は Excel ワークブックを読み取り、構造化データ(セ - **CLI レンダリング**(Excel 必須): PDF とシート画像を生成可能。 - **安全なフォールバック**: Excel COM 不在でもプロセスは落ちず、セル+テーブル候補+印刷範囲に切り替え(図形・チャートは空)。 +## ベンチマーク + +![Benchmark Chart](benchmark/public/plots/markdown_quality.png) + +このリポジトリには、ExcelドキュメントのRAG/LLM前処理に焦点を当てたベンチマークレポートが含まれています。 +私たちは2つの視点から追跡しています。(1) コア抽出精度と (2) 下流構造クエリのための再構築ユーティリティ (RUB) です。 +作業サマリーについては`benchmark/REPORT.md`を、公開バンドルについては`benchmark/public/REPORT.md`を参照してください。 +現在の結果はn=12のケースに基づいており、今後さらに拡張される予定です。 + ## インストール ```bash diff --git a/README.md b/README.md index 1d78e3e..20e5283 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ExStruct — Excel Structured Extraction Engine -[![PyPI version](https://badge.fury.io/py/exstruct.svg)](https://pypi.org/project/exstruct/) [![PyPI Downloads](https://static.pepy.tech/personalized-badge/exstruct?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/exstruct) ![Licence: BSD-3-Clause](https://img.shields.io/badge/license-BSD--3--Clause-blue?style=flat-square) [![pytest](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml/badge.svg)](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/e081cb4f634e4175b259eb7c34f54f60)](https://app.codacy.com/gh/harumiWeb/exstruct/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![codecov](https://codecov.io/gh/harumiWeb/exstruct/graph/badge.svg?token=2XI1O8TTA9)](https://codecov.io/gh/harumiWeb/exstruct) +[![PyPI version](https://badge.fury.io/py/exstruct.svg)](https://pypi.org/project/exstruct/) [![PyPI Downloads](https://static.pepy.tech/personalized-badge/exstruct?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/exstruct) ![Licence: BSD-3-Clause](https://img.shields.io/badge/license-BSD--3--Clause-blue?style=flat-square) [![pytest](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml/badge.svg)](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/e081cb4f634e4175b259eb7c34f54f60)](https://app.codacy.com/gh/harumiWeb/exstruct/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![codecov](https://codecov.io/gh/harumiWeb/exstruct/graph/badge.svg?token=2XI1O8TTA9)](https://codecov.io/gh/harumiWeb/exstruct) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/harumiWeb/exstruct) ![ExStruct Image](docs/assets/icon.webp) @@ -19,6 +19,15 @@ ExStruct reads Excel workbooks and outputs structured data (cells, table candida - **CLI rendering** (Excel required): optional PDF and per-sheet PNGs. - **Graceful fallback**: if Excel COM is unavailable, extraction falls back to cells + table candidates without crashing. +## Benchmark + +![Benchmark Chart](benchmark/public/plots/markdown_quality.png) + +This repository includes benchmark reports focused on RAG/LLM preprocessing of Excel documents. +We track two perspectives: (1) core extraction accuracy and (2) reconstruction utility for downstream structure queries (RUB). +See `benchmark/REPORT.md` for the working summary and `benchmark/public/REPORT.md` for the public bundle. +Current results are based on n=12 cases and will be expanded. + ## Installation ```bash diff --git a/benchmark/.env.example b/benchmark/.env.example new file mode 100644 index 0000000..0cd13a4 --- /dev/null +++ b/benchmark/.env.example @@ -0,0 +1,4 @@ +OPENAI_API_KEY=your_key_here +# optional +OPENAI_ORG= +OPENAI_PROJECT= diff --git a/benchmark/.gitignore b/benchmark/.gitignore new file mode 100644 index 0000000..237bb9c --- /dev/null +++ b/benchmark/.gitignore @@ -0,0 +1,15 @@ +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +drafts/ +wheels/ +*.egg-info + +# Virtual environments +.venv +data/raw/ +*.log +outputs/ +.env \ No newline at end of file diff --git a/benchmark/Makefile b/benchmark/Makefile new file mode 100644 index 0000000..8b33e84 --- /dev/null +++ b/benchmark/Makefile @@ -0,0 +1,20 @@ +.PHONY: setup extract ask eval report all + +setup: + python -m pip install -U pip + pip install -e .. + pip install -e . + +extract: + exbench extract --case all --method all + +ask: + exbench ask --case all --method all --model gpt-4o + +eval: + exbench eval --case all --method all + +report: + exbench report + +all: extract ask eval report diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..10f3138 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,194 @@ +# ExStruct Benchmark + +This benchmark compares methods for answering questions about Excel documents using GPT-4o: + +- exstruct +- openpyxl +- pdf (xlsx->pdf->text) +- html (xlsx->html->table text) +- image_vlm (xlsx->pdf->png -> GPT-4o vision) + +## Requirements + +- Python 3.11+ +- LibreOffice (`soffice` in PATH) +- OPENAI_API_KEY in `.env` + +## Setup + +```bash +cd benchmark +cp .env.example .env +pip install -e .. # install exstruct from repo root +pip install -e . +``` + +## Run + +```bash +make all +``` + +## Reproducibility script (Windows PowerShell) + +```powershell +.\scripts\reproduce.ps1 +``` + +Options: + +- `-Case` (default: `all`) +- `-Method` (default: `all`) +- `-Model` (default: `gpt-4o`) +- `-Temperature` (default: `0.0`) +- `-SkipAsk` (skip LLM calls; uses existing responses) + +## Reproducibility script (macOS/Linux) + +```bash +./scripts/reproduce.sh +``` + +If you see a permission error, run: + +```bash +chmod +x ./scripts/reproduce.sh +``` + +Options: + +- `--case` (default: `all`) +- `--method` (default: `all`) +- `--model` (default: `gpt-4o`) +- `--temperature` (default: `0.0`) +- `--skip-ask` (skip LLM calls; uses existing responses) + +Outputs: + +- outputs/extracted/\* : extracted context (text or images) +- outputs/prompts/\*.jsonl +- outputs/responses/\*.jsonl +- outputs/markdown/\*/\*.md +- outputs/markdown/responses/\*.jsonl +- outputs/results/results.csv +- outputs/results/report.md + +## Public report (REPORT.md) + +Generate chart images and update `REPORT.md` in the benchmark root: + +```bash +python -m bench.cli report-public +``` + +This command writes plots under `outputs/plots/` and inserts them into +`REPORT.md` between the chart markers. + +## Public bundle (for publishing) + +Create a clean, shareable bundle under `benchmark/public/`: + +```bash +python scripts/publicize.py +``` + +Windows PowerShell: + +```powershell +.\scripts\publicize.ps1 +``` + +## Markdown conversion (optional) + +Generate Markdown from the latest JSON responses: + +```bash +python -m bench.cli markdown --case all --method all +``` + +Markdown scores (`score_md`, `score_md_precision`) are only computed when +Markdown outputs exist under `outputs/markdown/responses/`. + +If you want a deterministic renderer without LLM calls: + +```bash +python -m bench.cli markdown --case all --method all --use-llm false +``` + +## RUB (lite) + +RUB lite evaluates reconstruction utility using Markdown-only inputs. + +Run Stage B tasks with the lite manifest: + +```bash +python -m bench.cli rub-ask --task all --method all --manifest rub/manifest_lite.json +python -m bench.cli rub-eval --manifest rub/manifest_lite.json +python -m bench.cli rub-report +``` + +Outputs: + +- outputs/rub/results/rub_results.csv +- outputs/rub/results/report.md + +## Evaluation protocol (public) + +To ensure reproducibility and fair comparison, follow these fixed settings: + +- Model: gpt-4o (Responses API) +- Temperature: 0.0 +- Prompt: fixed in `bench/llm/openai_client.py` +- Input contexts: generated by `bench.cli extract` using the same sources for all methods +- Normalization: optional normalized track uses `data/normalization_rules.json` +- Evaluation: `bench.cli eval` produces Exact, Normalized, Raw, and Markdown scores +- Report: `bench.cli report` generates `report.md` and per-case detailed reports + +Recommended disclosure when publishing results: + +- Model name + version, temperature, and date of run +- Full `normalization_rules.json` used for normalized scores +- Cost/token estimation method +- Any skipped cases and the reason (missing files, extraction failures) + +## How to interpret results (public guide) + +This benchmark reports four evaluation tracks to keep comparisons fair: + +- Exact: strict string match with no normalization. +- Normalized: applies case-specific rules in `data/normalization_rules.json` to + absorb formatting differences (aliases, split/composite labels). +- Raw: loose coverage/precision over flattened text tokens (schema-agnostic), + intended to reflect raw data capture without penalizing minor label variations. +- Markdown: coverage/precision against canonical Markdown rendered from truth. + +Recommended interpretation: + +- Use **Exact** to compare end-to-end string fidelity (best for literal extraction). +- Use **Normalized** to compare **document understanding** across methods. +- Use **Raw** to compare how much ground-truth text is captured regardless of schema. +- Use **Markdown** to evaluate JSON-to-Markdown conversion quality. +- When methods disagree between tracks, favor Normalized for Excel-heavy layouts + where labels are split/merged or phrased differently. +- Always cite both accuracy and cost metrics when presenting results publicly. + +## Evaluation + +The evaluator now writes four tracks: + +- Exact: `score`, `score_ordered` (strict string match, current behavior) +- Normalized: `score_norm`, `score_norm_ordered` (applies case-specific rules) +- Raw: `score_raw`, `score_raw_precision` (loose coverage/precision) +- Markdown: `score_md`, `score_md_precision` (Markdown coverage/precision) + +Normalization rules live in `data/normalization_rules.json` and are applied in +`bench.cli eval`. Publish these rules alongside the benchmark to keep the +normalized track transparent and reproducible. + +## Notes: + +- GPT-4o Responses API supports text and image inputs. See docs: + - [https://platform.openai.com/docs/api-reference/responses](https://platform.openai.com/docs/api-reference/responses) + - [https://platform.openai.com/docs/guides/images-vision](https://platform.openai.com/docs/guides/images-vision) +- Pricing for gpt-4o used in cost estimation: + - https://platform.openai.com/docs/models/compare?model=gpt-4o diff --git a/benchmark/REPORT.md b/benchmark/REPORT.md new file mode 100644 index 0000000..e146d3a --- /dev/null +++ b/benchmark/REPORT.md @@ -0,0 +1,84 @@ +# Benchmark Summary (Public) + +This summary consolidates the latest results for the Excel document benchmark and +RUB (structure query track). Use this file as a public-facing overview and link +full reports for reproducibility. + +Sources: +- outputs/results/report.md (core benchmark) +- outputs/rub/results/report.md (RUB structure_query) + +## Charts + +![Core Benchmark Summary](outputs/plots/core_benchmark.png) +![Markdown Evaluation Summary](outputs/plots/markdown_quality.png) +![RUB Structure Query Summary](outputs/plots/rub_structure_query.png) + +## Scope + +- Cases: 12 Excel documents +- Methods: exstruct, openpyxl, pdf, html, image_vlm +- Model: gpt-4o (Responses API) +- Temperature: 0.0 +- Note: record the run date/time when publishing +- This is an initial benchmark (n=12) and will be expanded in future releases. + +## Core Benchmark (extraction + scoring) + +Key metrics from outputs/results/report.md: + +- Exact accuracy (acc): best = pdf 0.607551, exstruct = 0.583802 +- Normalized accuracy (acc_norm): best = pdf 0.856642, exstruct = 0.835538 +- Raw coverage (acc_raw): best = exstruct 0.876495 (tie for top) +- Raw precision: best = exstruct 0.933691 +- Markdown coverage (acc_md): best = pdf 0.700094, exstruct = 0.697269 +- Markdown precision: best = exstruct 0.796101 + +Interpretation: +- pdf leads in Exact/Normalized, especially when literal string match matters. +- exstruct is strongest on Raw coverage/precision and Markdown precision, + indicating robust capture and downstream-friendly structure. + +## RUB (structure_query track) + +RUB evaluates Stage B questions using Markdown-only inputs. Current track is +"structure_query" (paths selection). + +Summary from outputs/rub/results/report.md: + +- RUS: exstruct 0.166667 (tie for top with openpyxl 0.166667) +- Partial F1: exstruct 0.436772 (best among methods) + +Interpretation: +- exstruct is competitive for structure queries, but the margin is not large. +- This track is sensitive to question design; it rewards selection accuracy + more than raw reconstruction. + +## Positioning for RAG/LLM Preprocessing + +Practical strengths shown by the current benchmark: +- High Raw coverage/precision (exstruct best) +- High Markdown precision (exstruct best) +- Near-top normalized accuracy + +Practical caveats: +- Exact/normalized top spot is often pdf +- RUB structure_query shows only a modest advantage + +Recommended public framing: +- exstruct is a strong option when the goal is structured reuse (JSON/Markdown) + for downstream LLM/RAG pipelines. +- pdf/VLM methods can be stronger for literal string fidelity or visual layout + recovery. + +## Known Limitations + +- Absolute RUS values are low in some settings (task design sensitive). +- Results vary by task type (forms/flows/diagrams vs tables). +- Model changes (e.g., gpt-4.1) require separate runs and reporting. + +## Next Steps (optional) + +- Add a reconstruction track that scores “structure rebuild” directly. +- Add task-specific structure queries (not only path selection). +- Publish run date, model version, and normalization rules with results. diff --git a/benchmark/data/manifest.json b/benchmark/data/manifest.json new file mode 100644 index 0000000..7bedfbf --- /dev/null +++ b/benchmark/data/manifest.json @@ -0,0 +1,148 @@ +{ + "cases": [ + { + "id": "ffr_425_01", + "type": "application_form", + "xlsx": "data/raw/ffr_425_01.xlsx", + "question": "このExcel帳票(Federal Financial Report / SF-425)について、次の情報を抽出し、JSONのみで返してください。\n\n(1) checkbox_groups: チェックボックスのグループ名と、その選択肢ラベル一覧を抽出してください(\"Report Type\" と \"Basis of Accounting\" の2グループのみ)。\n(2) not_required_by_epa_scope: 赤字の注記 \"Not Required by EPA\" がかかっているセクション名を返してください(例: \"Federal Cash\")。\n(3) section_headers: 帳票上部の番号付きセクション見出し(1〜9)のうち、見出しテキストのみを配列で返してください(例: \"Federal Agency and Organizational Element to Which Report is Submitted\" など)。\n\n出力形式(厳守):\n{\n \"checkbox_groups\": {\n \"Report Type\": [\"Quarterly\", \"Semi-Annual\", \"Annual\", \"Final\"],\n \"Basis of Accounting\": [\"Cash\", \"Accrual\"]\n },\n \"not_required_by_epa_scope\": \"...\",\n \"section_headers\": [\"...\", \"...\", \"...\"]\n}\n\n注意:\n- チェックボックスの記号(□など)は含めないでください。ラベル文字列のみを返してください。\n- section_headers は表示順(上から左→右)で返してください。\n- 余分な説明文やコードフェンスは付けず、JSONのみを返してください。", + "truth": "data/truth/ffr_425_01.json", + "sheet_scope": null, + "render": { + "dpi": 220, + "max_pages": 2 + } + }, + { + "id": "flowchart_01", + "type": "flowchart", + "xlsx": "data/raw/flowchart_01.xlsx", + "question": "このフローチャートの開始から終了までの主要な処理ステップを順番に抽出し、次のJSON形式のみで返してください。\n\n出力形式(厳守):\n{\n \"steps\": [\"step1\", \"step2\", \"step3\", ...]\n}\n\n注意事項:\n- 開始ノードと終了ノードも含めてください\n- 分岐やループがある場合は、代表的な主経路として線形化してください\n- ステップ名は図中のラベル文字列をそのまま使用してください", + "truth": "data/truth/flowchart_01.json", + "sheet_scope": null, + "render": { + "dpi": 220, + "max_pages": 1 + } + }, + { + "id": "gantt_01", + "type": "gantt", + "xlsx": "data/raw/gantt_01.xlsx", + "question": "このガントチャートのPhase3のタスク名とその開始日、終了日を抽出し、次のJSON形式のみで返してください: {\"tasks\":[{\"name\":\"...\",\"start_date\":\"YYYY-MM-DD\",\"end_date\":\"YYYY-MM-DD\"}, ...]}", + "truth": "data/truth/gantt_01.json", + "sheet_scope": null, + "render": { + "dpi": 200, + "max_pages": 4 + } + }, + { + "id": "certificate_of_employment_01", + "type": "application_form", + "xlsx": "data/raw/certificate_of_employment_01.xlsx", + "question": "このExcel帳票(就労証明書)について、次の3点を抽出し、JSONのみで返してください。\n\n(1) checkbox_groups: 以下の3つのチェックボックス項目について、それぞれの選択肢ラベルを抽出してください。\n - 業種\n - 雇用の形態\n - 雇用(予定)期間等(無期 / 有期)\n\n(2) numbered_sections: 帳票の「No.」列に対応する番号付き項目の見出し(1〜14)を、番号をキーとして抽出してください。\n\n(3) warning_text: 赤字で記載されている注意文を、そのまま1つの文字列として抽出してください。\n\n出力形式(厳守):\n{\n \"checkbox_groups\": {\n \"業種\": [\"...\", \"...\"],\n \"雇用の形態\": [\"...\", \"...\"],\n \"雇用(予定)期間等\": [\"...\", \"...\"]\n },\n \"numbered_sections\": {\n \"1\": \"...\",\n \"2\": \"...\",\n \"3\": \"...\"\n },\n \"warning_text\": \"...\"\n}\n\n注意:\n- チェックボックス記号(□など)は含めず、ラベル文字列のみを返してください。\n- numbered_sections は 1〜14 すべてを含めてください。\n- 余分な説明文やコードフェンスは付けず、JSONのみを返してください。", + "truth": "data/truth/certificate_of_employment_01.json", + "sheet_scope": null, + "render": { + "dpi": 220, + "max_pages": 1 + } + }, + { + "id": "tax_report_01", + "type": "application_form", + "xlsx": "data/raw/tax_report_01.xlsx", + "question": "この市民税・県民税申告書の右側に配置されている縦方向の帳票構造を解析してください。\n\n次の条件をすべて満たすJSONを返してください。\n\n1. 「収入金額」ブロックに含まれる項目名を、上から順に配列で列挙してください。\n2. 上記項目群を視覚的にまとめている「合計」項目名を1つ指定してください。\n3. 「所得から差し引かれる金額」ブロックに含まれる項目名を、上から順に配列で列挙してください。\n4. 上記控除項目群を視覚的にまとめている「合計」項目名を1つ指定してください。\n\n制約:\n- 項目名は帳票に記載されている日本語表記をそのまま使用してください。\n- 数値、記号、注釈文は含めないでください。\n- 同一列・同一枠内にある項目同士の位置関係に基づいて判断してください。\n- JSONのみを返してください。\n\n出力形式:\n{\n \"income_items\": [\"...\", \"...\"],\n \"income_total\": \"...\",\n \"deduction_items\": [\"...\", \"...\"],\n \"deduction_total\": \"...\"\n}", + "truth": "data/truth/tax_report_01.json", + "sheet_scope": null, + "render": { + "dpi": 240, + "max_pages": 1 + } + }, + { + "id": "smartart_01", + "type": "organization_chart", + "xlsx": "data/raw/smartart_01.xlsx", + "question": "このExcel帳票(SmartArtで作成された組織図)について、次の3点を抽出し、JSONのみで返してください。\n\n(1) top_structure: 最上位から第2階層までの組織構造を、親子関係が分かる形で抽出してください。\n\n(2) sales_departments: 「営業部」の直下にある課の名称を、上から順に配列で返してください。\n\n(3) production_sites: 「生産部」の直下にある工場名を、上から順に配列で返してください。\n\n出力形式(厳守):\n{\n \"top_structure\": {\n \"取締役会\": {\n \"社長\": [\"...\"]\n }\n },\n \"sales_departments\": [\"...\", \"...\"],\n \"production_sites\": [\"...\", \"...\"]\n}\n\n注意:\n- 図形の色や配置座標は含めないでください。テキスト内容と階層関係のみを対象とします。\n- 余分な説明文やコードフェンスは付けず、JSONのみを返してください。", + "truth": "data/truth/smartart_01.json", + "sheet_scope": null, + "render": { + "dpi": 220, + "max_pages": 1 + } + }, + { + "id": "basic_01", + "type": "mixed_document", + "xlsx": "data/raw/basic_01.xlsx", + "question": "このExcel帳票について、次の3点を抽出し、JSONのみで返してください。\n\n(1) sales_table: 左上の売上表について、月をキーとして各製品の数値を抽出してください。\n\n(2) chart_series: 右上の折れ線グラフに含まれる系列名を、凡例の表示順で配列として返してください。\n\n(3) flowchart_paths: 下部のフローチャートについて、開始から終了までの処理パスを条件付きで2通り抽出してください。\n - format_valid = true の場合の処理パス\n - format_valid = false の場合の処理パス\n\n出力形式(厳守):\n{\n \"sales_table\": {\n \"Jan-25\": {\"製品A\": 0, \"製品B\": 0, \"製品C\": 0},\n \"Feb-25\": {\"製品A\": 0, \"製品B\": 0, \"製品C\": 0}\n },\n \"chart_series\": [\"...\", \"...\"],\n \"flowchart_paths\": {\n \"format_valid_true\": [\"...\", \"...\"],\n \"format_valid_false\": [\"...\", \"...\"]\n }\n}\n\n注意:\n- 数値は整数で返してください。\n- フローチャートのパスは、図形内の文言をそのまま順番に並べてください。\n- 余分な説明文やコードフェンスは付けず、JSONのみを返してください。", + "truth": "data/truth/basic_01.json", + "sheet_scope": null, + "render": { + "dpi": 220, + "max_pages": 1 + } + }, + { + "id": "heatstroke_flow_01", + "type": "flowchart", + "xlsx": "data/raw/heatstroke_flow_01.xlsx", + "question": "このExcelに記載されている熱中症対応フローについて、上から順に各対応ステップを抽出してください。各ステップについて、step_name(工程名)、description(内容要約)、special_conditions(条件や注意事項がある場合のみ配列で記載)を含むJSONを、次の形式のみで返してください。\n\n{\n \"steps\": [\n {\n \"step_name\": \"...\",\n \"description\": \"...\",\n \"special_conditions\": [\"...\"]\n }\n ]\n}", + "truth": "data/truth/heatstroke_flow_01.json", + "sheet_scope": null, + "render": { + "dpi": 200, + "max_pages": 1 + } + }, + { + "id": "workflow_01", + "type": "workflow", + "xlsx": "data/raw/workflow_01.xlsx", + "question": "このExcelに記載されている業務フロー図(ネット注文フローチャート)について、工程を上から順に整理してください。各工程について、actor(実行主体)、step_name(工程名)、next_steps(次に進む工程名の配列)を含むJSONを、次の形式のみで返してください。\n\n{\n \"steps\": [\n {\n \"actor\": \"お客様|当社\",\n \"step_name\": \"...\",\n \"next_steps\": [\"...\"]\n }\n ]\n}", + "truth": "data/truth/workflow_01.json", + "sheet_scope": null, + "render": { + "dpi": 200, + "max_pages": 1 + } + }, + { + "id": "basic_form_01", + "type": "application_form", + "xlsx": "data/raw/basic_form_01.xlsx", + "question": "このExcel申請書に記載されている入力項目を、意味的なブロック単位で整理してください。申請者本人に関する項目、配偶者に関する項目、収入等に関する申告、預貯金等に関する申告の4分類に分け、それぞれに含まれる項目名を配列でまとめたJSONを、次の形式のみで返してください。\n\n{\n \"applicant\": [],\n \"spouse\": [],\n \"income_declaration\": [],\n \"asset_declaration\": []\n}", + "truth": "data/truth/basic_form_01.json", + "sheet_scope": null, + "render": { + "dpi": 200, + "max_pages": 1 + } + }, + { + "id": "flowchart_02", + "type": "flowchart", + "xlsx": "data/raw/flowchart_02.xlsx", + "question": "このExcelに記載されているログイン処理フローについて、工程を上から順に整理してください。各工程について、step_name(工程名)、step_type(start|process|decision|end)、next_steps(条件付き遷移を含む次工程)を含むJSONを、次の形式のみで返してください。\n\n{\n \"steps\": [\n {\n \"step_name\": \"...\",\n \"step_type\": \"start|process|decision|end\",\n \"next_steps\": [\n {\n \"condition\": \"...\",\n \"next\": \"...\"\n }\n ]\n }\n ]\n}", + "truth": "data/truth/flowchart_02.json", + "sheet_scope": null, + "render": { + "dpi": 200, + "max_pages": 1 + } + }, + { + "id": "food_inspection_record_01", + "type": "inspection_log", + "xlsx": "data/raw/food_inspection_record_01.xlsx", + "question": "This workbook contains three sheets (\"検食簿(1)\", \"検食簿 (2)\", \"検食簿 (3)\"). For the first date on each sheet, extract the lunch menu items and snack items and return JSON in the following format:\n\n{\n \"sheets\": {\n \"検食簿(1)\": {\"date\": \"...\", \"lunch_menu\": [\"...\"], \"snacks\": [\"...\"]},\n \"検食簿 (2)\": {\"date\": \"...\", \"lunch_menu\": [\"...\"], \"snacks\": [\"...\"]},\n \"検食簿 (3)\": {\"date\": \"...\", \"lunch_menu\": [\"...\"], \"snacks\": [\"...\"]}\n }\n}\n\nJSON only.", + "truth": "data/truth/food_inspection_record_01.json", + "sheet_scope": null, + "render": { + "dpi": 220, + "max_pages": 3 + } + } + ] +} diff --git a/benchmark/data/normalization_rules.json b/benchmark/data/normalization_rules.json new file mode 100644 index 0000000..0590160 --- /dev/null +++ b/benchmark/data/normalization_rules.json @@ -0,0 +1,118 @@ +{ + "cases": { + "heatstroke_flow_01": { + "alias_rules": [], + "split_rules": [], + "composite_rules": [], + "list_object_rules": [ + { + "list_key": "steps", + "string_fields": [ + "step_name" + ], + "string_fields_contains": [], + "list_fields_contains": [], + "strip_prefix": { + "step_name": "^[0-9]+[\\..]\\s*" + } + } + ] + }, + "workflow_01": { + "alias_rules": [ + { + "canonical": "キャンセル", + "aliases": [ + "買わない" + ] + } + ], + "split_rules": [], + "composite_rules": [], + "list_object_rules": [ + { + "list_key": "steps", + "string_fields": [ + "step_name" + ], + "string_fields_contains": [], + "list_fields_contains": [], + "strip_prefix": {} + } + ] + }, + "certificate_of_employment_01": { + "alias_rules": [ + { + "canonical": "※本証明書の内容について、就労先事業者等に無断で作成又は改変を行ったときは、刑法上の罪に問われる場合があります。", + "aliases": [ + "※本証明書の内容について、就労先事業者等に無断で作成し又は改変を行ったときには、刑法上の罪に問われる場合があります。" + ] + } + ], + "split_rules": [], + "composite_rules": [] + }, + "tax_report_01": { + "alias_rules": [ + { + "canonical": "合計", + "aliases": [ + "⑬から㉕までの計", + "13から25までの計" + ] + } + ], + "split_rules": [ + { + "trigger": "勤労学生、障害者控除", + "parts": [ + "勤労学生控除", + "障害者控除" + ] + }, + { + "trigger": "勤労学生・障害者控除", + "parts": [ + "勤労学生控除", + "障害者控除" + ] + } + ], + "composite_rules": [ + { + "canonical": "事業(営業等)", + "parts": [ + "事業", + "営業等" + ] + }, + { + "canonical": "事業(農業)", + "parts": [ + "事業", + "農業" + ] + } + ] + }, + "flowchart_02": { + "alias_rules": [], + "split_rules": [], + "composite_rules": [], + "list_object_rules": [ + { + "list_key": "steps", + "string_fields": [ + "step_type" + ], + "string_fields_contains": [ + "step_name" + ], + "list_fields_contains": [], + "strip_prefix": {} + } + ] + } + } +} \ No newline at end of file diff --git a/benchmark/data/truth/basic_01.json b/benchmark/data/truth/basic_01.json new file mode 100644 index 0000000..53bca60 --- /dev/null +++ b/benchmark/data/truth/basic_01.json @@ -0,0 +1,31 @@ +{ + "sales_table": { + "Jan-25": { "製品A": 120, "製品B": 80, "製品C": 60 }, + "Feb-25": { "製品A": 135, "製品B": 90, "製品C": 64 }, + "Mar-25": { "製品A": 150, "製品B": 100, "製品C": 70 }, + "Apr-25": { "製品A": 170, "製品B": 110, "製品C": 72 }, + "May-25": { "製品A": 160, "製品B": 120, "製品C": 75 }, + "Jun-25": { "製品A": 180, "製品B": 130, "製品C": 80 } + }, + "chart_series": ["製品A", "製品B", "製品C"], + "flowchart_paths": { + "format_valid_true": [ + "開始", + "入力データ読み込み", + "形式は正しい?", + "1件処理", + "残件あり?", + "出力を生成", + "メール送信?", + "メール送信", + "終了" + ], + "format_valid_false": [ + "開始", + "入力データ読み込み", + "形式は正しい?", + "エラー表示", + "終了" + ] + } +} diff --git a/benchmark/data/truth/basic_form_01.json b/benchmark/data/truth/basic_form_01.json new file mode 100644 index 0000000..cd8ca3b --- /dev/null +++ b/benchmark/data/truth/basic_form_01.json @@ -0,0 +1,34 @@ +{ + "applicant": [ + "フリガナ", + "被保険者氏名", + "生年月日", + "住所", + "連絡先", + "入所(院)した介護保険施設の所在地及び名称", + "入所(院)年月日" + ], + "spouse": [ + "配偶者の有無", + "配偶者氏名", + "配偶者生年月日", + "配偶者個人番号", + "配偶者住所", + "配偶者連絡先", + "本年1月1日現在の住所", + "課税状況" + ], + "income_declaration": [ + "生活保護受給者に該当するか", + "市町村民税非課税世帯であるか", + "課税年金収入額", + "その他の合計所得金額", + "年金の種類に関する申告" + ], + "asset_declaration": [ + "預貯金額", + "有価証券の金額", + "その他の資産額", + "配偶者の預貯金等を含むかどうか" + ] +} diff --git a/benchmark/data/truth/certificate_of_employment_01.json b/benchmark/data/truth/certificate_of_employment_01.json new file mode 100644 index 0000000..9dc4cd3 --- /dev/null +++ b/benchmark/data/truth/certificate_of_employment_01.json @@ -0,0 +1,58 @@ +{ + "checkbox_groups": { + "業種": [ + "農業・林業", + "漁業", + "鉱業・採石業・砂利採取業", + "建設業", + "製造業", + "電気・ガス・熱供給・水道業", + "情報通信業", + "運輸業・郵便業", + "卸売業・小売業", + "金融業・保険業", + "不動産業・物品賃貸業", + "学術研究・専門・技術サービス", + "宿泊業・飲食サービス業", + "生活関連サービス業・娯楽業", + "医療・福祉", + "教育・学習支援業", + "複合サービス事業", + "公務", + "その他" + ], + "雇用の形態": [ + "正社員", + "パート・アルバイト", + "派遣社員", + "契約社員", + "会計年度任用職員", + "非常勤・臨時職員", + "役員", + "自営業主", + "自営業専従者", + "家族従業者", + "内職", + "業務委託", + "その他" + ], + "雇用(予定)期間等": ["無期", "有期"] + }, + "numbered_sections": { + "1": "業種", + "2": "本人氏名", + "3": "雇用(予定)期間等", + "4": "本人就労先事業所", + "5": "雇用の形態", + "6": "就労時間(固定就労の場合)", + "7": "就労時間(変則就労の場合)", + "8": "就労実績", + "9": "産前・産後休業の取得", + "10": "育児休業の取得", + "11": "産休・育休以外の休業の取得", + "12": "復職(予定)年月日", + "13": "育児のための短時間勤務制度利用有無", + "14": "保育士等としての勤務実態の有無" + }, + "warning_text": "※本証明書の内容について、就労先事業者等に無断で作成又は改変を行ったときは、刑法上の罪に問われる場合があります。" +} diff --git a/benchmark/data/truth/ffr_425_01.json b/benchmark/data/truth/ffr_425_01.json new file mode 100644 index 0000000..a53b43d --- /dev/null +++ b/benchmark/data/truth/ffr_425_01.json @@ -0,0 +1,18 @@ +{ + "checkbox_groups": { + "Report Type": ["Quarterly", "Semi-Annual", "Annual", "Final"], + "Basis of Accounting": ["Cash", "Accrual"] + }, + "not_required_by_epa_scope": "Federal Cash", + "section_headers": [ + "Federal Agency and Organizational Element to Which Report is Submitted", + "Federal Grant or Other Identifying Number Assigned by Federal Agency", + "Recipient Organization (Name and complete address including Zip code)", + "DUNS Number", + "Recipient Account Number or Identifying Number", + "Report Type", + "Basis of Accounting", + "Project/Grant Period", + "Reporting Period End Date" + ] +} diff --git a/benchmark/data/truth/flowchart_01.json b/benchmark/data/truth/flowchart_01.json new file mode 100644 index 0000000..196bc95 --- /dev/null +++ b/benchmark/data/truth/flowchart_01.json @@ -0,0 +1,14 @@ +{ + "steps": [ + "S", + "要件抽出", + "ヒアリング", + "非機能要件", + "思考実験", + "再検証", + "まとめ", + "文書作成", + "締結", + "E" + ] +} diff --git a/benchmark/data/truth/flowchart_02.json b/benchmark/data/truth/flowchart_02.json new file mode 100644 index 0000000..868d040 --- /dev/null +++ b/benchmark/data/truth/flowchart_02.json @@ -0,0 +1,87 @@ +{ + "steps": [ + { + "step_name": "ログイン画面", + "step_type": "start", + "next_steps": [ + { + "condition": "always", + "next": "登録情報を入力" + } + ] + }, + { + "step_name": "登録情報を入力", + "step_type": "process", + "next_steps": [ + { + "condition": "always", + "next": "入力内容は正しいか" + } + ] + }, + { + "step_name": "入力内容は正しいか", + "step_type": "decision", + "next_steps": [ + { + "condition": "はい", + "next": "サーバーに認証リクエストを送信" + }, + { + "condition": "いいえ", + "next": "再入力を提示" + } + ] + }, + { + "step_name": "再入力を提示", + "step_type": "process", + "next_steps": [ + { + "condition": "always", + "next": "登録情報を入力" + } + ] + }, + { + "step_name": "サーバーに認証リクエストを送信", + "step_type": "process", + "next_steps": [ + { + "condition": "always", + "next": "認証に成功か" + } + ] + }, + { + "step_name": "認証に成功か", + "step_type": "decision", + "next_steps": [ + { + "condition": "はい", + "next": "操作画面に遷移" + }, + { + "condition": "いいえ", + "next": "認証エラーを提示" + } + ] + }, + { + "step_name": "認証エラーを提示", + "step_type": "process", + "next_steps": [ + { + "condition": "always", + "next": "再入力を提示" + } + ] + }, + { + "step_name": "操作画面に遷移", + "step_type": "end", + "next_steps": [] + } + ] +} diff --git a/benchmark/data/truth/food_inspection_record_01.json b/benchmark/data/truth/food_inspection_record_01.json new file mode 100644 index 0000000..3ca2f65 --- /dev/null +++ b/benchmark/data/truth/food_inspection_record_01.json @@ -0,0 +1,51 @@ +{ + "sheets": { + "検食簿(1)": { + "date": "12月1日(月)", + "lunch_menu": [ + "麦ごはん", + "鶏肉の照り焼き", + "白菜のごま和え", + "切干大根の煮物", + "味噌汁(キャベツ)" + ], + "snacks": [ + "スキムミルク", + "ウエハース", + "スキムミルク", + "お菓子・こんぶ" + ] + }, + "検食簿 (2)": { + "date": "12月8日(月)", + "lunch_menu": [ + "麦ごはん", + "鶏肉の唐揚げ", + "キャベツ", + "ひじきの中華和え", + "すまし汁(麩)" + ], + "snacks": [ + "スキムミルク", + "ボーロ", + "スキムミルク", + "おからケーキ" + ] + }, + "検食簿 (3)": { + "date": "12月15日(月)", + "lunch_menu": [ + "麦ごはん", + "豚肉と野菜の煮物", + "大豆のサラダ", + "味噌汁(なす)" + ], + "snacks": [ + "スキムミルク", + "ウエハース", + "スキムミルク", + "お菓子・こんぶ" + ] + } + } +} \ No newline at end of file diff --git a/benchmark/data/truth/gantt_01.json b/benchmark/data/truth/gantt_01.json new file mode 100644 index 0000000..aebdd03 --- /dev/null +++ b/benchmark/data/truth/gantt_01.json @@ -0,0 +1,24 @@ +{ + "tasks": [ + { + "name": "Core Feature Dev", + "start_date": "2026-01-26", + "end_date": "2026-02-03" + }, + { + "name": "Edge Case Handling", + "start_date": "2026-01-27", + "end_date": "2026-02-03" + }, + { + "name": "Integration Work", + "start_date": "2026-01-29", + "end_date": "2026-02-04" + }, + { + "name": "Internal Review", + "start_date": "2026-02-01", + "end_date": "2026-02-04" + } + ] +} diff --git a/benchmark/data/truth/heatstroke_flow_01.json b/benchmark/data/truth/heatstroke_flow_01.json new file mode 100644 index 0000000..54ce1a0 --- /dev/null +++ b/benchmark/data/truth/heatstroke_flow_01.json @@ -0,0 +1,60 @@ +{ + "steps": [ + { + "step_name": "発見", + "description": "熱中症が疑われる症状があるかを確認する。", + "special_conditions": [ + "めまい", + "失神", + "筋肉痛", + "筋肉の硬直", + "大量の発汗", + "頭痛", + "嘔吐", + "意識障害", + "けいれん", + "高体温" + ] + }, + { + "step_name": "報告", + "description": "作業管理者および緊急連絡先へ状況を報告する。", + "special_conditions": [] + }, + { + "step_name": "初期対応", + "description": "涼しい場所への移動、水分補給、体を冷やすなどの応急処置を行う。", + "special_conditions": [ + "WBGT値が28度以上の場合は作業を中断する", + "気温が31度以上の場合は作業を中断する" + ] + }, + { + "step_name": "医療機関搬送・救急要請", + "description": "症状に応じて医療機関へ搬送するか救急要請を行う。", + "special_conditions": [ + "意識がない場合は119番通報する", + "応答が曖昧な場合は119番通報する", + "高熱が続く場合は119番通報する", + "けいれんなど重症の兆候がある場合は119番通報する" + ] + }, + { + "step_name": "事後対応・記録", + "description": "発生状況や対応内容を記録し、保存および定期的な見直しを行う。", + "special_conditions": [ + "発生日時", + "場所", + "WBGT値", + "気温", + "作業内容", + "作業時間", + "症状", + "初期対応内容", + "報告先", + "搬送有無", + "最終対応" + ] + } + ] +} diff --git a/benchmark/data/truth/smartart_01.json b/benchmark/data/truth/smartart_01.json new file mode 100644 index 0000000..2a22af1 --- /dev/null +++ b/benchmark/data/truth/smartart_01.json @@ -0,0 +1,9 @@ +{ + "top_structure": { + "取締役会": { + "社長": ["企画管理部", "営業部", "開発部", "技術部", "生産部", "総務部"] + } + }, + "sales_departments": ["第1営業課", "第2営業課", "第3営業課", "海外営業課"], + "production_sites": ["愛知工場", "山形工場", "高知工場"] +} diff --git a/benchmark/data/truth/tax_report_01.json b/benchmark/data/truth/tax_report_01.json new file mode 100644 index 0000000..174476d --- /dev/null +++ b/benchmark/data/truth/tax_report_01.json @@ -0,0 +1,29 @@ +{ + "income_items": [ + "事業(営業等)", + "事業(農業)", + "不動産", + "利子", + "配当", + "給与", + "公的年金等", + "業務", + "その他" + ], + "income_total": "合計", + "deduction_items": [ + "社会保険料控除", + "小規模企業共済等掛金控除", + "生命保険料控除", + "地震保険料控除", + "寡婦、ひとり親控除", + "勤労学生控除", + "配偶者(特別)控除", + "扶養控除", + "障害者控除", + "基礎控除", + "雑損控除", + "医療費控除" + ], + "deduction_total": "合計" +} diff --git a/benchmark/data/truth/workflow_01.json b/benchmark/data/truth/workflow_01.json new file mode 100644 index 0000000..ea7d3c3 --- /dev/null +++ b/benchmark/data/truth/workflow_01.json @@ -0,0 +1,84 @@ +{ + "steps": [ + { + "actor": "お客様", + "step_name": "商品検索", + "next_steps": ["検討"] + }, + { + "actor": "当社", + "step_name": "商品情報を表示", + "next_steps": ["検討"] + }, + { + "actor": "お客様", + "step_name": "検討", + "next_steps": ["キャンセル", "カートに追加"] + }, + { + "actor": "お客様", + "step_name": "キャンセル", + "next_steps": [] + }, + { + "actor": "お客様", + "step_name": "カートに追加", + "next_steps": ["在庫確認"] + }, + { + "actor": "当社", + "step_name": "在庫確認", + "next_steps": ["レジに進む"] + }, + { + "actor": "お客様", + "step_name": "レジに進む", + "next_steps": ["支払い方法の選択"] + }, + { + "actor": "お客様", + "step_name": "支払い方法の選択", + "next_steps": ["支払いの案内"] + }, + { + "actor": "当社", + "step_name": "支払いの案内", + "next_steps": ["支払い処理"] + }, + { + "actor": "当社", + "step_name": "支払い処理", + "next_steps": ["注文の確定"] + }, + { + "actor": "お客様", + "step_name": "注文の確定", + "next_steps": ["配送先入力"] + }, + { + "actor": "お客様", + "step_name": "配送先入力", + "next_steps": ["配送先確認"] + }, + { + "actor": "当社", + "step_name": "配送先確認", + "next_steps": ["注文確認メールを送信"] + }, + { + "actor": "当社", + "step_name": "注文確認メールを送信", + "next_steps": ["商品を準備・発送"] + }, + { + "actor": "当社", + "step_name": "商品を準備・発送", + "next_steps": ["商品受取"] + }, + { + "actor": "お客様", + "step_name": "商品受取", + "next_steps": [] + } + ] +} diff --git a/benchmark/docs/spec.md b/benchmark/docs/spec.md new file mode 100644 index 0000000..2f52117 --- /dev/null +++ b/benchmark/docs/spec.md @@ -0,0 +1,123 @@ +# Reconstruction Utility Benchmark (RUB) Specification + +## 0. Scope (v0.1 / lite vs v1) + +RUB lite (v0.1) is a small, fast-running subset intended for quick checks. +The full RUB (v1) is the primary benchmark for public reporting. + +RUB lite assets: + +- benchmark/rub/manifest_lite.json +- benchmark/rub/truth_lite/*.json + +Full RUB assets: + +- benchmark/rub/manifest.json +- benchmark/rub/truth/*.json + +## 1. Goal + +RUB measures how useful reconstructed Markdown is for downstream structure-aware +queries. The target is reconstruction utility rather than raw string similarity. + +## 2. Inputs and outputs + +- Input: Excel workbooks (.xlsx) +- Methods: pdf, image_vlm, exstruct, html, openpyxl +- Stage A output: reconstructed Markdown +- Stage B output: JSON-only answers to structure queries + +## 3. Two-stage evaluation + +### Stage A: Reconstruction + +Each method produces Markdown from the same source workbook. + +- pdf: soffice -> pdf -> text extraction -> Markdown +- image_vlm: render -> VLM -> Markdown +- exstruct: exstruct JSON -> LLM -> Markdown +- html / openpyxl: rule-based extraction -> Markdown + +### Stage B: Structure queries + +Only the Stage A Markdown is used as input to answer queries. + +- Output must be JSON only +- Scored by exact match after deterministic normalization + +## 4. Task design principles + +- Prefer tasks that require structure (blocks, hierarchy, adjacency) +- Avoid tasks that are solvable by surface text order alone +- Define canonical JSON outputs +- Use deterministic normalization for fairness + +## 5. Scoring and normalization + +- Normalize strings and JSON structure before comparison +- For unordered collections, compare as sorted sets +- Avoid ambiguous numbering in answers + +## 6. Metrics + +### 6.1 Primary metric: RUS + +RUS = correct_answers / total_questions + +### 6.2 Secondary metrics + +- Cost-normalized RUS = RUS / cost_usd +- Token-normalized RUS = RUS / input_tokens +- Stage A failure rate = failed Markdown reconstruction rate + +## 7. Directory layout + +``` +benchmark/ + rub/ + README.md + BENCHMARK_SPEC.md + manifest.json # full (v1) + manifest_lite.json # lite (v0.1) + truth/ # full (v1) + *.json + truth_lite/ # lite (v0.1) + *.json + schemas/ + *.schema.json + scoring/ + normalize.py + score.py + diagrams/ + rub_overview.mmd + scoring_flow.mmd +``` + +## 8. Manifest fields + +- id: task id +- type: task type +- xlsx: input workbook path +- question: Stage B query +- truth: ground-truth JSON path +- sheet_scope: optional sheet filter (null = all) +- render: render settings for image/pdf paths +- track: evaluation track name (default: reconstruction) + +## 8.1 RUB lite notes + +- Smaller number of cases +- Unordered paths supported for strict but fair comparison +- Binary scoring (0/1) only + +## 9. Evaluation notes + +- Do not use Markdown string similarity for RUB scoring +- Focus on task correctness and structure preservation +- Keep normalization deterministic and transparent + +## 10. Reporting + +- Public report focuses on reconstruction utility +- Show both primary and secondary metrics +- Clearly separate core extraction vs RUB results diff --git a/benchmark/public/INDEX.md b/benchmark/public/INDEX.md new file mode 100644 index 0000000..df4f313 --- /dev/null +++ b/benchmark/public/INDEX.md @@ -0,0 +1,10 @@ +# Public Bundle + +This directory contains the public-ready benchmark artifacts. + +## Contents +- REPORT.md +- reports/ +- plots/ + +Generated by scripts/publicize.py. diff --git a/benchmark/public/REPORT.md b/benchmark/public/REPORT.md new file mode 100644 index 0000000..e146d3a --- /dev/null +++ b/benchmark/public/REPORT.md @@ -0,0 +1,84 @@ +# Benchmark Summary (Public) + +This summary consolidates the latest results for the Excel document benchmark and +RUB (structure query track). Use this file as a public-facing overview and link +full reports for reproducibility. + +Sources: +- outputs/results/report.md (core benchmark) +- outputs/rub/results/report.md (RUB structure_query) + +## Charts + +![Core Benchmark Summary](outputs/plots/core_benchmark.png) +![Markdown Evaluation Summary](outputs/plots/markdown_quality.png) +![RUB Structure Query Summary](outputs/plots/rub_structure_query.png) + +## Scope + +- Cases: 12 Excel documents +- Methods: exstruct, openpyxl, pdf, html, image_vlm +- Model: gpt-4o (Responses API) +- Temperature: 0.0 +- Note: record the run date/time when publishing +- This is an initial benchmark (n=12) and will be expanded in future releases. + +## Core Benchmark (extraction + scoring) + +Key metrics from outputs/results/report.md: + +- Exact accuracy (acc): best = pdf 0.607551, exstruct = 0.583802 +- Normalized accuracy (acc_norm): best = pdf 0.856642, exstruct = 0.835538 +- Raw coverage (acc_raw): best = exstruct 0.876495 (tie for top) +- Raw precision: best = exstruct 0.933691 +- Markdown coverage (acc_md): best = pdf 0.700094, exstruct = 0.697269 +- Markdown precision: best = exstruct 0.796101 + +Interpretation: +- pdf leads in Exact/Normalized, especially when literal string match matters. +- exstruct is strongest on Raw coverage/precision and Markdown precision, + indicating robust capture and downstream-friendly structure. + +## RUB (structure_query track) + +RUB evaluates Stage B questions using Markdown-only inputs. Current track is +"structure_query" (paths selection). + +Summary from outputs/rub/results/report.md: + +- RUS: exstruct 0.166667 (tie for top with openpyxl 0.166667) +- Partial F1: exstruct 0.436772 (best among methods) + +Interpretation: +- exstruct is competitive for structure queries, but the margin is not large. +- This track is sensitive to question design; it rewards selection accuracy + more than raw reconstruction. + +## Positioning for RAG/LLM Preprocessing + +Practical strengths shown by the current benchmark: +- High Raw coverage/precision (exstruct best) +- High Markdown precision (exstruct best) +- Near-top normalized accuracy + +Practical caveats: +- Exact/normalized top spot is often pdf +- RUB structure_query shows only a modest advantage + +Recommended public framing: +- exstruct is a strong option when the goal is structured reuse (JSON/Markdown) + for downstream LLM/RAG pipelines. +- pdf/VLM methods can be stronger for literal string fidelity or visual layout + recovery. + +## Known Limitations + +- Absolute RUS values are low in some settings (task design sensitive). +- Results vary by task type (forms/flows/diagrams vs tables). +- Model changes (e.g., gpt-4.1) require separate runs and reporting. + +## Next Steps (optional) + +- Add a reconstruction track that scores “structure rebuild” directly. +- Add task-specific structure queries (not only path selection). +- Publish run date, model version, and normalization rules with results. diff --git a/benchmark/public/plots/core_benchmark.png b/benchmark/public/plots/core_benchmark.png new file mode 100644 index 0000000..9e84603 Binary files /dev/null and b/benchmark/public/plots/core_benchmark.png differ diff --git a/benchmark/public/plots/markdown_quality.png b/benchmark/public/plots/markdown_quality.png new file mode 100644 index 0000000..721dc14 Binary files /dev/null and b/benchmark/public/plots/markdown_quality.png differ diff --git a/benchmark/public/plots/rub_structure_query.png b/benchmark/public/plots/rub_structure_query.png new file mode 100644 index 0000000..a4ff435 Binary files /dev/null and b/benchmark/public/plots/rub_structure_query.png differ diff --git a/benchmark/public/reports/compare_gpt4o_gpt41.md b/benchmark/public/reports/compare_gpt4o_gpt41.md new file mode 100644 index 0000000..2c99fe9 --- /dev/null +++ b/benchmark/public/reports/compare_gpt4o_gpt41.md @@ -0,0 +1,9 @@ +# RUB Summary Comparison (gpt-4o vs gpt-4.1) + +| method | rus (4o) | rus (4.1) | diff | f1 (4o) | f1 (4.1) | diff | +|:--|--:|--:|--:|--:|--:|--:| +| exstruct | 0.250000 | 0.250000 | 0.000000 | 0.756054 | 0.728227 | -0.027827 | +| pdf | 0.166667 | 0.166667 | 0.000000 | 0.741147 | 0.735282 | -0.005865 | +| image_vlm | 0.166667 | 0.166667 | 0.000000 | 0.635543 | 0.630145 | -0.005398 | +| html | 0.083333 | 0.083333 | 0.000000 | 0.429339 | 0.440331 | 0.010992 | +| openpyxl | 0.083333 | 0.083333 | 0.000000 | 0.418502 | 0.418502 | 0.000000 | diff --git a/benchmark/public/reports/results_report.md b/benchmark/public/reports/results_report.md new file mode 100644 index 0000000..963a9c2 --- /dev/null +++ b/benchmark/public/reports/results_report.md @@ -0,0 +1,122 @@ +# Benchmark Report + +This report summarizes extraction accuracy for each method on the benchmark cases. +Scores are computed per case and aggregated by method. Exact, normalized, raw, +and markdown tracks are reported to ensure fair comparison across variations. + +## Evaluation protocol (public) + +Fixed settings for reproducibility: + +- Model: gpt-4o (Responses API) +- Temperature: 0.0 +- Prompt: fixed in bench/llm/openai_client.py +- Input contexts: generated by bench.cli extract +- Normalization: data/normalization_rules.json (optional track) +- Evaluation: bench.cli eval (Exact + Normalized + Raw) +- Markdown conversion: bench.cli markdown (optional) +- Report: bench.cli report (summary + per-case) + +Recommended disclosure when publishing results: + +- Model name + version, temperature, and date of run +- Full normalization_rules.json used for normalized scores +- Cost/token estimation method +- Any skipped cases and the reason (missing files, failures) + +## How to interpret results (public guide) + +- Exact: strict string match with no normalization. +- Normalized: applies case-specific rules in data/normalization_rules.json to + absorb formatting differences (aliases, split/composite labels). +- Raw: loose coverage/precision over flattened text tokens (schema-agnostic). +- Markdown: coverage/precision against canonical Markdown rendered from truth. + +Recommended interpretation: + +- Use Exact to compare end-to-end string fidelity (best for literal extraction). +- Use Normalized to compare document understanding across methods. +- Use Raw to compare how much ground-truth text is captured regardless of schema. +- Use Markdown to evaluate JSON-to-Markdown conversion quality. +- When tracks disagree, favor Normalized for Excel-heavy layouts where labels + are split/merged or phrased differently. +- Always cite both accuracy and cost metrics in public comparisons. + +## Evaluation tracks + +- Exact: strict string match without any normalization. +- Normalized: applies case-specific normalization rules (aliases, split/composite) + defined in data/normalization_rules.json to absorb format and wording variations. +- Raw: loose coverage/precision over flattened text tokens (schema-agnostic), + intended to reflect raw data capture without penalizing minor label variations. +- Markdown: coverage/precision comparing LLM Markdown to canonical truth Markdown. + +## Summary by method + +| method | acc | avg_in | avg_cost | n | acc_ordered | acc_norm | acc_norm_ordered | acc_raw | raw_precision | acc_md | md_precision | +|:----------|---------:|---------:|-----------:|----:|--------------:|-----------:|-------------------:|----------:|----------------:|---------:|---------------:| +| exstruct | 0.583802 | 4620.75 | 0.0146977 | 12 | 0.579172 | 0.835538 | 0.806603 | 0.876495 | 0.933691 | 0.697269 | 0.796101 | +| html | 0.377825 | 1812.75 | 0.00683187 | 12 | 0.37088 | 0.489698 | 0.477703 | 0.678979 | 0.751164 | 0.588284 | 0.717016 | +| image_vlm | 0.589282 | 1753.5 | 0.00716708 | 12 | 0.575678 | 0.756142 | 0.737136 | 0.824668 | 0.885373 | 0.661921 | 0.773776 | +| openpyxl | 0.36789 | 2110.08 | 0.00733854 | 12 | 0.363261 | 0.481385 | 0.46939 | 0.671214 | 0.761695 | 0.581528 | 0.737017 | +| pdf | 0.607551 | 1209.58 | 0.00592479 | 12 | 0.602921 | 0.856642 | 0.806294 | 0.874557 | 0.908698 | 0.700094 | 0.775776 | + +## Markdown evaluation notes + +Markdown scores measure how well the generated Markdown lines match a canonical +Markdown rendering of the ground truth JSON. This is a *conversion quality* +signal, not a direct extraction-accuracy substitute. + +Key points: + +- Coverage (acc_md): how much of truth Markdown content is recovered. +- Precision (md_precision): how much of predicted Markdown is correct. +- Layout shifts or list formatting differences can lower scores even if + the underlying facts are correct. +- LLM-based conversion introduces variability; re-run with the same seed + and model settings to assess stability, or use deterministic rendering + for baseline comparisons. +- Use Markdown scores when your downstream task consumes Markdown (e.g., + RAG ingestion), and report alongside Exact/Normalized/Raw metrics. + +## Exstruct positioning notes (public) + +Recommended primary indicators for exstruct positioning (RAG pre-processing): + +- Normalized accuracy: acc_norm / acc_norm_ordered +- Raw coverage/precision: acc_raw / raw_precision +- Markdown coverage/precision: acc_md / md_precision + +Current deltas vs. best method (n=11, when available): + +- Normalized accuracy: exstruct 0.835538 vs best 0.856642 (pdf), delta -0.021104 +- Normalized ordered accuracy: exstruct 0.806603 vs best 0.806603 (exstruct), delta +0.000000 +- Raw coverage: exstruct 0.876495 vs best 0.876495 (exstruct), delta +0.000000 +- Raw precision: exstruct 0.933691 vs best 0.933691 (exstruct), delta +0.000000 +- Markdown coverage: exstruct 0.697269 vs best 0.700094 (pdf), delta -0.002825 +- Markdown precision: exstruct 0.796101 vs best 0.796101 (exstruct), delta +0.000000 + +## Normalization leniency summary + +| case_id | alias_rules | split_rules | composite_rules | list_object_rules | details | +|:-----------------------------|--------------:|--------------:|------------------:|--------------------:|:---------------------------------------------------------------------------------------| +| certificate_of_employment_01 | 1 | 0 | 0 | 0 | - | +| flowchart_02 | 0 | 0 | 0 | 1 | steps(strings=step_type; strings_contains=step_name; lists_contains=-; strip_prefix=-) | +| heatstroke_flow_01 | 0 | 0 | 0 | 1 | steps(strings=step_name; strings_contains=-; lists_contains=-; strip_prefix=step_name) | +| tax_report_01 | 1 | 2 | 2 | 0 | - | +| workflow_01 | 1 | 0 | 0 | 1 | steps(strings=step_name; strings_contains=-; lists_contains=-; strip_prefix=-) | + +## Detailed reports + +- detailed_reports/report_basic_01.md +- detailed_reports/report_basic_form_01.md +- detailed_reports/report_certificate_of_employment_01.md +- detailed_reports/report_ffr_425_01.md +- detailed_reports/report_flowchart_01.md +- detailed_reports/report_flowchart_02.md +- detailed_reports/report_food_inspection_record_01.md +- detailed_reports/report_gantt_01.md +- detailed_reports/report_heatstroke_flow_01.md +- detailed_reports/report_smartart_01.md +- detailed_reports/report_tax_report_01.md +- detailed_reports/report_workflow_01.md diff --git a/benchmark/public/reports/rub_report.md b/benchmark/public/reports/rub_report.md new file mode 100644 index 0000000..7743a99 --- /dev/null +++ b/benchmark/public/reports/rub_report.md @@ -0,0 +1,24 @@ +# RUB Report + +This report summarizes Reconstruction Utility Benchmark (RUB) results. +Scores are computed on Stage B task accuracy using Markdown-only inputs. + +## Summary by method + +| method | rus | avg_in | avg_cost | n | partial_precision | partial_recall | partial_f1 | +|:----------|----------:|---------:|-----------:|----:|--------------------:|-----------------:|-------------:| +| exstruct | 0.166667 | 844.167 | 0.00254708 | 12 | 0.666667 | 0.365278 | 0.436772 | +| html | 0.0833333 | 994.333 | 0.00291833 | 12 | 0.583333 | 0.335317 | 0.399903 | +| image_vlm | 0 | 743.167 | 0.00231458 | 12 | 0.666667 | 0.343254 | 0.423112 | +| openpyxl | 0.166667 | 769.083 | 0.00243771 | 12 | 0.5 | 0.37004 | 0.408236 | +| pdf | 0 | 924.5 | 0.00265208 | 12 | 0.583333 | 0.252976 | 0.325493 | + +## Summary by track + +| track | method | rus | avg_in | avg_cost | n | partial_precision | partial_recall | partial_f1 | +|:----------------|:----------|----------:|---------:|-----------:|----:|--------------------:|-----------------:|-------------:| +| structure_query | exstruct | 0.166667 | 844.167 | 0.00254708 | 12 | 0.666667 | 0.365278 | 0.436772 | +| structure_query | html | 0.0833333 | 994.333 | 0.00291833 | 12 | 0.583333 | 0.335317 | 0.399903 | +| structure_query | image_vlm | 0 | 743.167 | 0.00231458 | 12 | 0.666667 | 0.343254 | 0.423112 | +| structure_query | openpyxl | 0.166667 | 769.083 | 0.00243771 | 12 | 0.5 | 0.37004 | 0.408236 | +| structure_query | pdf | 0 | 924.5 | 0.00265208 | 12 | 0.583333 | 0.252976 | 0.325493 | diff --git a/benchmark/public/reports/rub_report_gpt41.md b/benchmark/public/reports/rub_report_gpt41.md new file mode 100644 index 0000000..2cb0dfb --- /dev/null +++ b/benchmark/public/reports/rub_report_gpt41.md @@ -0,0 +1,14 @@ +# RUB Report + +This report summarizes Reconstruction Utility Benchmark (RUB) results. +Scores are computed on Stage B task accuracy using Markdown-only inputs. + +## Summary by method + +| method | rus | avg_in | avg_cost | n | partial_precision | partial_recall | partial_f1 | +|:----------|----------:|---------:|-----------:|----:|--------------------:|-----------------:|-------------:| +| exstruct | 0.25 | 946.5 | 0 | 12 | 0.783491 | 0.709055 | 0.728227 | +| html | 0.0833333 | 1096.67 | 0 | 12 | 0.467377 | 0.43575 | 0.440331 | +| image_vlm | 0.166667 | 845.5 | 0 | 12 | 0.66437 | 0.625115 | 0.630145 | +| openpyxl | 0.0833333 | 871.417 | 0 | 12 | 0.517041 | 0.398088 | 0.418502 | +| pdf | 0.166667 | 1026.83 | 0 | 12 | 0.760726 | 0.727743 | 0.735282 | diff --git a/benchmark/public/reports/rub_report_gpt4o.md b/benchmark/public/reports/rub_report_gpt4o.md new file mode 100644 index 0000000..57184e0 --- /dev/null +++ b/benchmark/public/reports/rub_report_gpt4o.md @@ -0,0 +1,14 @@ +# RUB Report + +This report summarizes Reconstruction Utility Benchmark (RUB) results. +Scores are computed on Stage B task accuracy using Markdown-only inputs. + +## Summary by method + +| method | rus | avg_in | avg_cost | n | partial_precision | partial_recall | partial_f1 | +|:----------|----------:|---------:|-----------:|----:|--------------------:|-----------------:|-------------:| +| exstruct | 0.25 | 946.5 | 0 | 12 | 0.791177 | 0.749742 | 0.756054 | +| html | 0.0833333 | 1096.67 | 0 | 12 | 0.458348 | 0.418993 | 0.429339 | +| image_vlm | 0.166667 | 845.5 | 0 | 12 | 0.666569 | 0.631585 | 0.635543 | +| openpyxl | 0.0833333 | 871.417 | 0 | 12 | 0.517041 | 0.398088 | 0.418502 | +| pdf | 0.166667 | 1026.83 | 0 | 12 | 0.766806 | 0.733395 | 0.741147 | diff --git a/benchmark/pyproject.toml b/benchmark/pyproject.toml new file mode 100644 index 0000000..ac8f62c --- /dev/null +++ b/benchmark/pyproject.toml @@ -0,0 +1,41 @@ +[project] +name = "benchmark" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "beautifulsoup4>=4.14.3", + "exstruct", + "lxml>=6.0.2", + "matplotlib>=3.8.0", + "openai>=2.15.0", + "openpyxl>=3.1.5", + "pandas>=2.3.3", + "pydantic>=2.12.5", + "pymupdf>=1.26.7", + "python-dotenv>=1.2.1", + "rich>=14.2.0", + "tabulate>=0.9.0", + "typer>=0.21.1", +] + +[project.scripts] +exbench = "bench.cli:app" + +[dependency-groups] +dev = [ + "ruff>=0.14.8", + "taskipy>=1.14.1", +] + +[tool.uv.sources] +exstruct = { workspace = true } + +[tool.taskipy.tasks] +setup = "python -m pip install -U pip && pip install -e .. && pip install -e ." +extract = "exbench extract --case all --method all" +ask = "exbench ask --case all --method all --model gpt-4o" +eval = "exbench eval --case all --method all" +report = "exbench report" +all = "task extract && task ask && task eval && task report" diff --git a/benchmark/rub/manifest.json b/benchmark/rub/manifest.json new file mode 100644 index 0000000..741d785 --- /dev/null +++ b/benchmark/rub/manifest.json @@ -0,0 +1,116 @@ +{ + "tasks": [ + { + "id": "ffr_425_01", + "source_case_id": "ffr_425_01", + "type": "application_form", + "question": "このExcel帳票(Federal Financial Report / SF-425)について、次の情報を抽出し、JSONのみで返してください。\n\n(1) checkbox_groups: チェックボックスのグループ名と、その選択肢ラベル一覧を抽出してください(\"Report Type\" と \"Basis of Accounting\" の2グループのみ)。\n(2) not_required_by_epa_scope: 赤字の注記 \"Not Required by EPA\" がかかっているセクション名を返してください(例: \"Federal Cash\")。\n(3) section_headers: 帳票上部の番号付きセクション見出し(1〜9)のうち、見出しテキストのみを配列で返してください(例: \"Federal Agency and Organizational Element to Which Report is Submitted\" など)。\n\n出力形式(厳守):\n{\n \"checkbox_groups\": {\n \"Report Type\": [\"Quarterly\", \"Semi-Annual\", \"Annual\", \"Final\"],\n \"Basis of Accounting\": [\"Cash\", \"Accrual\"]\n },\n \"not_required_by_epa_scope\": \"...\",\n \"section_headers\": [\"...\", \"...\", \"...\"]\n}\n\n注意:\n- チェックボックスの記号(□など)は含めないでください。ラベル文字列のみを返してください。\n- section_headers は表示順(上から左→右)で返してください。\n- 余分な説明文やコードフェンスは付けず、JSONのみを返してください。", + "truth": "rub/truth/ffr_425_01.json", + "track": "reconstruction" + }, + { + "id": "flowchart_01", + "source_case_id": "flowchart_01", + "type": "flowchart", + "question": "このフローチャートの開始から終了までの主要な処理ステップを順番に抽出し、次のJSON形式のみで返してください。\n\n出力形式(厳守):\n{\n \"steps\": [\"step1\", \"step2\", \"step3\", ...]\n}\n\n注意事項:\n- 開始ノードと終了ノードも含めてください\n- 分岐やループがある場合は、代表的な主経路として線形化してください\n- ステップ名は図中のラベル文字列をそのまま使用してください", + "truth": "rub/truth/flowchart_01.json", + "track": "reconstruction" + }, + { + "id": "gantt_01", + "source_case_id": "gantt_01", + "type": "gantt", + "question": "このガントチャートのPhase3のタスク名とその開始日、終了日を抽出し、次のJSON形式のみで返してください: {\"tasks\":[{\"name\":\"...\",\"start_date\":\"YYYY-MM-DD\",\"end_date\":\"YYYY-MM-DD\"}, ...]}", + "truth": "rub/truth/gantt_01.json", + "unordered_paths": [ + "tasks" + ], + "track": "reconstruction" + }, + { + "id": "certificate_of_employment_01", + "source_case_id": "certificate_of_employment_01", + "type": "application_form", + "question": "このExcel帳票(就労証明書)について、次の3点を抽出し、JSONのみで返してください。\n\n(1) checkbox_groups: 以下の3つのチェックボックス項目について、それぞれの選択肢ラベルを抽出してください。\n - 業種\n - 雇用の形態\n - 雇用(予定)期間等(無期 / 有期)\n\n(2) numbered_sections: 帳票の「No.」列に対応する番号付き項目の見出し(1〜14)を、番号をキーとして抽出してください。\n\n(3) warning_text: 赤字で記載されている注意文を、そのまま1つの文字列として抽出してください。\n\n出力形式(厳守):\n{\n \"checkbox_groups\": {\n \"業種\": [\"...\", \"...\"],\n \"雇用の形態\": [\"...\", \"...\"],\n \"雇用(予定)期間等\": [\"...\", \"...\"]\n },\n \"numbered_sections\": {\n \"1\": \"...\",\n \"2\": \"...\",\n \"3\": \"...\"\n },\n \"warning_text\": \"...\"\n}\n\n注意:\n- チェックボックス記号(□など)は含めず、ラベル文字列のみを返してください。\n- numbered_sections は 1〜14 すべてを含めてください。\n- 余分な説明文やコードフェンスは付けず、JSONのみを返してください。", + "truth": "rub/truth/certificate_of_employment_01.json", + "track": "reconstruction" + }, + { + "id": "tax_report_01", + "source_case_id": "tax_report_01", + "type": "application_form", + "question": "この市民税・県民税申告書の右側に配置されている縦方向の帳票構造を解析してください。\n\n次の条件をすべて満たすJSONを返してください。\n\n1. 「収入金額」ブロックに含まれる項目名を、上から順に配列で列挙してください。\n2. 上記項目群を視覚的にまとめている「合計」項目名を1つ指定してください。\n3. 「所得から差し引かれる金額」ブロックに含まれる項目名を、上から順に配列で列挙してください。\n4. 上記控除項目群を視覚的にまとめている「合計」項目名を1つ指定してください。\n\n制約:\n- 項目名は帳票に記載されている日本語表記をそのまま使用してください。\n- 数値、記号、注釈文は含めないでください。\n- 同一列・同一枠内にある項目同士の位置関係に基づいて判断してください。\n- JSONのみを返してください。\n\n出力形式:\n{\n \"income_items\": [\"...\", \"...\"],\n \"income_total\": \"...\",\n \"deduction_items\": [\"...\", \"...\"],\n \"deduction_total\": \"...\"\n}", + "truth": "rub/truth/tax_report_01.json", + "track": "reconstruction" + }, + { + "id": "smartart_01", + "source_case_id": "smartart_01", + "type": "organization_chart", + "question": "このExcel帳票(SmartArtで作成された組織図)について、次の3点を抽出し、JSONのみで返してください。\n\n(1) top_structure: 最上位から第2階層までの組織構造を、親子関係が分かる形で抽出してください。\n\n(2) sales_departments: 「営業部」の直下にある課の名称を、上から順に配列で返してください。\n\n(3) production_sites: 「生産部」の直下にある工場名を、上から順に配列で返してください。\n\n出力形式(厳守):\n{\n \"top_structure\": {\n \"取締役会\": {\n \"社長\": [\"...\"]\n }\n },\n \"sales_departments\": [\"...\", \"...\"],\n \"production_sites\": [\"...\", \"...\"]\n}\n\n注意:\n- 図形の色や配置座標は含めないでください。テキスト内容と階層関係のみを対象とします。\n- 余分な説明文やコードフェンスは付けず、JSONのみを返してください。", + "truth": "rub/truth/smartart_01.json", + "unordered_paths": [ + "top_structure.取締役会.社長", + "sales_departments", + "production_sites" + ], + "track": "reconstruction" + }, + { + "id": "basic_01", + "source_case_id": "basic_01", + "type": "mixed_document", + "question": "このExcel帳票について、次の3点を抽出し、JSONのみで返してください。\n\n(1) sales_table: 左上の売上表について、月をキーとして各製品の数値を抽出してください。\n\n(2) chart_series: 右上の折れ線グラフに含まれる系列名を、凡例の表示順で配列として返してください。\n\n(3) flowchart_paths: 下部のフローチャートについて、開始から終了までの処理パスを条件付きで2通り抽出してください。\n - format_valid = true の場合の処理パス\n - format_valid = false の場合の処理パス\n\n出力形式(厳守):\n{\n \"sales_table\": {\n \"Jan-25\": {\"製品A\": 0, \"製品B\": 0, \"製品C\": 0},\n \"Feb-25\": {\"製品A\": 0, \"製品B\": 0, \"製品C\": 0}\n },\n \"chart_series\": [\"...\", \"...\"],\n \"flowchart_paths\": {\n \"format_valid_true\": [\"...\", \"...\"],\n \"format_valid_false\": [\"...\", \"...\"]\n }\n}\n\n注意:\n- 数値は整数で返してください。\n- フローチャートのパスは、図形内の文言をそのまま順番に並べてください。\n- 余分な説明文やコードフェンスは付けず、JSONのみを返してください。", + "truth": "rub/truth/basic_01.json", + "track": "reconstruction" + }, + { + "id": "heatstroke_flow_01", + "source_case_id": "heatstroke_flow_01", + "type": "flowchart", + "question": "このExcelに記載されている熱中症対応フローについて、上から順に各対応ステップを抽出してください。各ステップについて、step_name(工程名)、description(内容要約)、special_conditions(条件や注意事項がある場合のみ配列で記載)を含むJSONを、次の形式のみで返してください。\n\n{\n \"steps\": [\n {\n \"step_name\": \"...\",\n \"description\": \"...\",\n \"special_conditions\": [\"...\"]\n }\n ]\n}", + "truth": "rub/truth/heatstroke_flow_01.json", + "track": "reconstruction" + }, + { + "id": "workflow_01", + "source_case_id": "workflow_01", + "type": "workflow", + "question": "このExcelに記載されている業務フロー図(ネット注文フローチャート)について、工程を上から順に整理してください。各工程について、actor(実行主体)、step_name(工程名)、next_steps(次に進む工程名の配列)を含むJSONを、次の形式のみで返してください。\n\n{\n \"steps\": [\n {\n \"actor\": \"お客様|当社\",\n \"step_name\": \"...\",\n \"next_steps\": [\"...\"]\n }\n ]\n}", + "truth": "rub/truth/workflow_01.json", + "unordered_paths": [ + "steps", + "steps.next_steps" + ], + "track": "reconstruction" + }, + { + "id": "basic_form_01", + "source_case_id": "basic_form_01", + "type": "application_form", + "question": "このExcel申請書に記載されている入力項目を、意味的なブロック単位で整理してください。申請者本人に関する項目、配偶者に関する項目、収入等に関する申告、預貯金等に関する申告の4分類に分け、それぞれに含まれる項目名を配列でまとめたJSONを、次の形式のみで返してください。\n\n{\n \"applicant\": [],\n \"spouse\": [],\n \"income_declaration\": [],\n \"asset_declaration\": []\n}", + "truth": "rub/truth/basic_form_01.json", + "track": "reconstruction" + }, + { + "id": "flowchart_02", + "source_case_id": "flowchart_02", + "type": "flowchart", + "question": "このExcelに記載されているログイン処理フローについて、工程を上から順に整理してください。各工程について、step_name(工程名)、step_type(start|process|decision|end)、next_steps(条件付き遷移を含む次工程)を含むJSONを、次の形式のみで返してください。\n\n{\n \"steps\": [\n {\n \"step_name\": \"...\",\n \"step_type\": \"start|process|decision|end\",\n \"next_steps\": [\n {\n \"condition\": \"...\",\n \"next\": \"...\"\n }\n ]\n }\n ]\n}", + "truth": "rub/truth/flowchart_02.json", + "unordered_paths": [ + "steps", + "steps.next_steps" + ], + "track": "reconstruction" + }, + { + "id": "food_inspection_record_01", + "source_case_id": "food_inspection_record_01", + "type": "inspection_log", + "question": "This workbook contains three sheets (\"検食簿(1)\", \"検食簿 (2)\", \"検食簿 (3)\"). For the first date on each sheet, extract the lunch menu items and snack items and return JSON in the following format:\n\n{\n \"sheets\": {\n \"検食簿(1)\": {\"date\": \"...\", \"lunch_menu\": [\"...\"], \"snacks\": [\"...\"]},\n \"検食簿 (2)\": {\"date\": \"...\", \"lunch_menu\": [\"...\"], \"snacks\": [\"...\"]},\n \"検食簿 (3)\": {\"date\": \"...\", \"lunch_menu\": [\"...\"], \"snacks\": [\"...\"]},\n }\n}\n\nJSON only.", + "truth": "rub/truth/food_inspection_record_01.json", + "track": "reconstruction" + } + ] +} diff --git a/benchmark/rub/manifest_lite.json b/benchmark/rub/manifest_lite.json new file mode 100644 index 0000000..cc5ca99 --- /dev/null +++ b/benchmark/rub/manifest_lite.json @@ -0,0 +1,128 @@ +{ + "tasks": [ + { + "id": "ffr_425_01", + "source_case_id": "ffr_425_01", + "type": "application_form", + "question": "Extract section headers. JSON only: {\"section_headers\":[\"...\"]}", + "truth": "rub/truth_lite/ffr_425_01.json", + "unordered_paths": [ + "section_headers" + ], + "track": "reconstruction" + }, + { + "id": "certificate_of_employment_01", + "source_case_id": "certificate_of_employment_01", + "type": "application_form", + "question": "Extract section names. JSON only: {\"sections\":[\"...\"]}", + "truth": "rub/truth_lite/certificate_of_employment_01.json", + "unordered_paths": [ + "sections" + ], + "track": "reconstruction" + }, + { + "id": "tax_report_01", + "source_case_id": "tax_report_01", + "type": "application_form", + "question": "Extract income item labels. JSON only: {\"income_items\":[\"...\"]}", + "truth": "rub/truth_lite/tax_report_01.json", + "unordered_paths": [ + "income_items" + ], + "track": "reconstruction" + }, + { + "id": "basic_01", + "source_case_id": "basic_01", + "type": "mixed_document", + "question": "Extract chart series names. JSON only: {\"chart_series\":[\"...\"]}", + "truth": "rub/truth_lite/basic_01.json", + "unordered_paths": [ + "chart_series" + ], + "track": "reconstruction" + }, + { + "id": "heatstroke_flow_01", + "source_case_id": "heatstroke_flow_01", + "type": "flowchart", + "question": "Extract step names in order. JSON only: {\"steps\":[\"...\"]}", + "truth": "rub/truth_lite/heatstroke_flow_01.json", + "track": "reconstruction" + }, + { + "id": "workflow_01", + "source_case_id": "workflow_01", + "type": "workflow", + "question": "Extract node names. JSON only: {\"nodes\":[\"...\"]}", + "truth": "rub/truth_lite/workflow_01.json", + "unordered_paths": [ + "nodes" + ], + "track": "reconstruction" + }, + { + "id": "flowchart_02", + "source_case_id": "flowchart_02", + "type": "flowchart", + "question": "Extract node names. JSON only: {\"nodes\":[\"...\"]}", + "truth": "rub/truth_lite/flowchart_02.json", + "unordered_paths": [ + "nodes" + ], + "track": "reconstruction" + }, + { + "id": "food_inspection_record_01", + "source_case_id": "food_inspection_record_01", + "type": "inspection_log", + "question": "Extract first date per sheet. JSON only: {\"dates_by_sheet\": {\"sheet\": \"date\"}}", + "truth": "rub/truth_lite/food_inspection_record_01.json", + "track": "reconstruction" + }, + { + "id": "basic_form_01", + "source_case_id": "basic_form_01", + "type": "application_form", + "question": "Extract applicant field labels. JSON only: {\"applicant_fields\":[\"...\"]}", + "truth": "rub/truth_lite/basic_form_01.json", + "unordered_paths": [ + "applicant_fields" + ], + "track": "reconstruction" + }, + { + "id": "flowchart_01", + "source_case_id": "flowchart_01", + "type": "flowchart", + "question": "Extract flowchart step names in order. JSON only: {\"steps\":[\"...\"]}", + "truth": "rub/truth_lite/flowchart_01.json", + "track": "reconstruction" + }, + { + "id": "gantt_01", + "source_case_id": "gantt_01", + "type": "gantt", + "question": "Extract task names. JSON only: {\"task_names\":[\"...\"]}", + "truth": "rub/truth_lite/gantt_01.json", + "unordered_paths": [ + "task_names" + ], + "track": "reconstruction" + }, + { + "id": "smartart_01", + "source_case_id": "smartart_01", + "type": "organization_chart", + "question": "Extract sales_departments and production_sites. JSON only: {\"sales_departments\":[\"...\"],\"production_sites\":[\"...\"]}", + "truth": "rub/truth_lite/smartart_01.json", + "unordered_paths": [ + "sales_departments", + "production_sites" + ], + "track": "reconstruction" + } + ] +} diff --git a/benchmark/rub/truth/basic_01.json b/benchmark/rub/truth/basic_01.json new file mode 100644 index 0000000..53bca60 --- /dev/null +++ b/benchmark/rub/truth/basic_01.json @@ -0,0 +1,31 @@ +{ + "sales_table": { + "Jan-25": { "製品A": 120, "製品B": 80, "製品C": 60 }, + "Feb-25": { "製品A": 135, "製品B": 90, "製品C": 64 }, + "Mar-25": { "製品A": 150, "製品B": 100, "製品C": 70 }, + "Apr-25": { "製品A": 170, "製品B": 110, "製品C": 72 }, + "May-25": { "製品A": 160, "製品B": 120, "製品C": 75 }, + "Jun-25": { "製品A": 180, "製品B": 130, "製品C": 80 } + }, + "chart_series": ["製品A", "製品B", "製品C"], + "flowchart_paths": { + "format_valid_true": [ + "開始", + "入力データ読み込み", + "形式は正しい?", + "1件処理", + "残件あり?", + "出力を生成", + "メール送信?", + "メール送信", + "終了" + ], + "format_valid_false": [ + "開始", + "入力データ読み込み", + "形式は正しい?", + "エラー表示", + "終了" + ] + } +} diff --git a/benchmark/rub/truth/basic_form_01.json b/benchmark/rub/truth/basic_form_01.json new file mode 100644 index 0000000..cd8ca3b --- /dev/null +++ b/benchmark/rub/truth/basic_form_01.json @@ -0,0 +1,34 @@ +{ + "applicant": [ + "フリガナ", + "被保険者氏名", + "生年月日", + "住所", + "連絡先", + "入所(院)した介護保険施設の所在地及び名称", + "入所(院)年月日" + ], + "spouse": [ + "配偶者の有無", + "配偶者氏名", + "配偶者生年月日", + "配偶者個人番号", + "配偶者住所", + "配偶者連絡先", + "本年1月1日現在の住所", + "課税状況" + ], + "income_declaration": [ + "生活保護受給者に該当するか", + "市町村民税非課税世帯であるか", + "課税年金収入額", + "その他の合計所得金額", + "年金の種類に関する申告" + ], + "asset_declaration": [ + "預貯金額", + "有価証券の金額", + "その他の資産額", + "配偶者の預貯金等を含むかどうか" + ] +} diff --git a/benchmark/rub/truth/certificate_of_employment_01.json b/benchmark/rub/truth/certificate_of_employment_01.json new file mode 100644 index 0000000..9dc4cd3 --- /dev/null +++ b/benchmark/rub/truth/certificate_of_employment_01.json @@ -0,0 +1,58 @@ +{ + "checkbox_groups": { + "業種": [ + "農業・林業", + "漁業", + "鉱業・採石業・砂利採取業", + "建設業", + "製造業", + "電気・ガス・熱供給・水道業", + "情報通信業", + "運輸業・郵便業", + "卸売業・小売業", + "金融業・保険業", + "不動産業・物品賃貸業", + "学術研究・専門・技術サービス", + "宿泊業・飲食サービス業", + "生活関連サービス業・娯楽業", + "医療・福祉", + "教育・学習支援業", + "複合サービス事業", + "公務", + "その他" + ], + "雇用の形態": [ + "正社員", + "パート・アルバイト", + "派遣社員", + "契約社員", + "会計年度任用職員", + "非常勤・臨時職員", + "役員", + "自営業主", + "自営業専従者", + "家族従業者", + "内職", + "業務委託", + "その他" + ], + "雇用(予定)期間等": ["無期", "有期"] + }, + "numbered_sections": { + "1": "業種", + "2": "本人氏名", + "3": "雇用(予定)期間等", + "4": "本人就労先事業所", + "5": "雇用の形態", + "6": "就労時間(固定就労の場合)", + "7": "就労時間(変則就労の場合)", + "8": "就労実績", + "9": "産前・産後休業の取得", + "10": "育児休業の取得", + "11": "産休・育休以外の休業の取得", + "12": "復職(予定)年月日", + "13": "育児のための短時間勤務制度利用有無", + "14": "保育士等としての勤務実態の有無" + }, + "warning_text": "※本証明書の内容について、就労先事業者等に無断で作成又は改変を行ったときは、刑法上の罪に問われる場合があります。" +} diff --git a/benchmark/rub/truth/ffr_425_01.json b/benchmark/rub/truth/ffr_425_01.json new file mode 100644 index 0000000..a53b43d --- /dev/null +++ b/benchmark/rub/truth/ffr_425_01.json @@ -0,0 +1,18 @@ +{ + "checkbox_groups": { + "Report Type": ["Quarterly", "Semi-Annual", "Annual", "Final"], + "Basis of Accounting": ["Cash", "Accrual"] + }, + "not_required_by_epa_scope": "Federal Cash", + "section_headers": [ + "Federal Agency and Organizational Element to Which Report is Submitted", + "Federal Grant or Other Identifying Number Assigned by Federal Agency", + "Recipient Organization (Name and complete address including Zip code)", + "DUNS Number", + "Recipient Account Number or Identifying Number", + "Report Type", + "Basis of Accounting", + "Project/Grant Period", + "Reporting Period End Date" + ] +} diff --git a/benchmark/rub/truth/flowchart_01.json b/benchmark/rub/truth/flowchart_01.json new file mode 100644 index 0000000..196bc95 --- /dev/null +++ b/benchmark/rub/truth/flowchart_01.json @@ -0,0 +1,14 @@ +{ + "steps": [ + "S", + "要件抽出", + "ヒアリング", + "非機能要件", + "思考実験", + "再検証", + "まとめ", + "文書作成", + "締結", + "E" + ] +} diff --git a/benchmark/rub/truth/flowchart_02.json b/benchmark/rub/truth/flowchart_02.json new file mode 100644 index 0000000..868d040 --- /dev/null +++ b/benchmark/rub/truth/flowchart_02.json @@ -0,0 +1,87 @@ +{ + "steps": [ + { + "step_name": "ログイン画面", + "step_type": "start", + "next_steps": [ + { + "condition": "always", + "next": "登録情報を入力" + } + ] + }, + { + "step_name": "登録情報を入力", + "step_type": "process", + "next_steps": [ + { + "condition": "always", + "next": "入力内容は正しいか" + } + ] + }, + { + "step_name": "入力内容は正しいか", + "step_type": "decision", + "next_steps": [ + { + "condition": "はい", + "next": "サーバーに認証リクエストを送信" + }, + { + "condition": "いいえ", + "next": "再入力を提示" + } + ] + }, + { + "step_name": "再入力を提示", + "step_type": "process", + "next_steps": [ + { + "condition": "always", + "next": "登録情報を入力" + } + ] + }, + { + "step_name": "サーバーに認証リクエストを送信", + "step_type": "process", + "next_steps": [ + { + "condition": "always", + "next": "認証に成功か" + } + ] + }, + { + "step_name": "認証に成功か", + "step_type": "decision", + "next_steps": [ + { + "condition": "はい", + "next": "操作画面に遷移" + }, + { + "condition": "いいえ", + "next": "認証エラーを提示" + } + ] + }, + { + "step_name": "認証エラーを提示", + "step_type": "process", + "next_steps": [ + { + "condition": "always", + "next": "再入力を提示" + } + ] + }, + { + "step_name": "操作画面に遷移", + "step_type": "end", + "next_steps": [] + } + ] +} diff --git a/benchmark/rub/truth/food_inspection_record_01.json b/benchmark/rub/truth/food_inspection_record_01.json new file mode 100644 index 0000000..3ca2f65 --- /dev/null +++ b/benchmark/rub/truth/food_inspection_record_01.json @@ -0,0 +1,51 @@ +{ + "sheets": { + "検食簿(1)": { + "date": "12月1日(月)", + "lunch_menu": [ + "麦ごはん", + "鶏肉の照り焼き", + "白菜のごま和え", + "切干大根の煮物", + "味噌汁(キャベツ)" + ], + "snacks": [ + "スキムミルク", + "ウエハース", + "スキムミルク", + "お菓子・こんぶ" + ] + }, + "検食簿 (2)": { + "date": "12月8日(月)", + "lunch_menu": [ + "麦ごはん", + "鶏肉の唐揚げ", + "キャベツ", + "ひじきの中華和え", + "すまし汁(麩)" + ], + "snacks": [ + "スキムミルク", + "ボーロ", + "スキムミルク", + "おからケーキ" + ] + }, + "検食簿 (3)": { + "date": "12月15日(月)", + "lunch_menu": [ + "麦ごはん", + "豚肉と野菜の煮物", + "大豆のサラダ", + "味噌汁(なす)" + ], + "snacks": [ + "スキムミルク", + "ウエハース", + "スキムミルク", + "お菓子・こんぶ" + ] + } + } +} \ No newline at end of file diff --git a/benchmark/rub/truth/gantt_01.json b/benchmark/rub/truth/gantt_01.json new file mode 100644 index 0000000..aebdd03 --- /dev/null +++ b/benchmark/rub/truth/gantt_01.json @@ -0,0 +1,24 @@ +{ + "tasks": [ + { + "name": "Core Feature Dev", + "start_date": "2026-01-26", + "end_date": "2026-02-03" + }, + { + "name": "Edge Case Handling", + "start_date": "2026-01-27", + "end_date": "2026-02-03" + }, + { + "name": "Integration Work", + "start_date": "2026-01-29", + "end_date": "2026-02-04" + }, + { + "name": "Internal Review", + "start_date": "2026-02-01", + "end_date": "2026-02-04" + } + ] +} diff --git a/benchmark/rub/truth/heatstroke_flow_01.json b/benchmark/rub/truth/heatstroke_flow_01.json new file mode 100644 index 0000000..54ce1a0 --- /dev/null +++ b/benchmark/rub/truth/heatstroke_flow_01.json @@ -0,0 +1,60 @@ +{ + "steps": [ + { + "step_name": "発見", + "description": "熱中症が疑われる症状があるかを確認する。", + "special_conditions": [ + "めまい", + "失神", + "筋肉痛", + "筋肉の硬直", + "大量の発汗", + "頭痛", + "嘔吐", + "意識障害", + "けいれん", + "高体温" + ] + }, + { + "step_name": "報告", + "description": "作業管理者および緊急連絡先へ状況を報告する。", + "special_conditions": [] + }, + { + "step_name": "初期対応", + "description": "涼しい場所への移動、水分補給、体を冷やすなどの応急処置を行う。", + "special_conditions": [ + "WBGT値が28度以上の場合は作業を中断する", + "気温が31度以上の場合は作業を中断する" + ] + }, + { + "step_name": "医療機関搬送・救急要請", + "description": "症状に応じて医療機関へ搬送するか救急要請を行う。", + "special_conditions": [ + "意識がない場合は119番通報する", + "応答が曖昧な場合は119番通報する", + "高熱が続く場合は119番通報する", + "けいれんなど重症の兆候がある場合は119番通報する" + ] + }, + { + "step_name": "事後対応・記録", + "description": "発生状況や対応内容を記録し、保存および定期的な見直しを行う。", + "special_conditions": [ + "発生日時", + "場所", + "WBGT値", + "気温", + "作業内容", + "作業時間", + "症状", + "初期対応内容", + "報告先", + "搬送有無", + "最終対応" + ] + } + ] +} diff --git a/benchmark/rub/truth/smartart_01.json b/benchmark/rub/truth/smartart_01.json new file mode 100644 index 0000000..2a22af1 --- /dev/null +++ b/benchmark/rub/truth/smartart_01.json @@ -0,0 +1,9 @@ +{ + "top_structure": { + "取締役会": { + "社長": ["企画管理部", "営業部", "開発部", "技術部", "生産部", "総務部"] + } + }, + "sales_departments": ["第1営業課", "第2営業課", "第3営業課", "海外営業課"], + "production_sites": ["愛知工場", "山形工場", "高知工場"] +} diff --git a/benchmark/rub/truth/tax_report_01.json b/benchmark/rub/truth/tax_report_01.json new file mode 100644 index 0000000..174476d --- /dev/null +++ b/benchmark/rub/truth/tax_report_01.json @@ -0,0 +1,29 @@ +{ + "income_items": [ + "事業(営業等)", + "事業(農業)", + "不動産", + "利子", + "配当", + "給与", + "公的年金等", + "業務", + "その他" + ], + "income_total": "合計", + "deduction_items": [ + "社会保険料控除", + "小規模企業共済等掛金控除", + "生命保険料控除", + "地震保険料控除", + "寡婦、ひとり親控除", + "勤労学生控除", + "配偶者(特別)控除", + "扶養控除", + "障害者控除", + "基礎控除", + "雑損控除", + "医療費控除" + ], + "deduction_total": "合計" +} diff --git a/benchmark/rub/truth/workflow_01.json b/benchmark/rub/truth/workflow_01.json new file mode 100644 index 0000000..ea7d3c3 --- /dev/null +++ b/benchmark/rub/truth/workflow_01.json @@ -0,0 +1,84 @@ +{ + "steps": [ + { + "actor": "お客様", + "step_name": "商品検索", + "next_steps": ["検討"] + }, + { + "actor": "当社", + "step_name": "商品情報を表示", + "next_steps": ["検討"] + }, + { + "actor": "お客様", + "step_name": "検討", + "next_steps": ["キャンセル", "カートに追加"] + }, + { + "actor": "お客様", + "step_name": "キャンセル", + "next_steps": [] + }, + { + "actor": "お客様", + "step_name": "カートに追加", + "next_steps": ["在庫確認"] + }, + { + "actor": "当社", + "step_name": "在庫確認", + "next_steps": ["レジに進む"] + }, + { + "actor": "お客様", + "step_name": "レジに進む", + "next_steps": ["支払い方法の選択"] + }, + { + "actor": "お客様", + "step_name": "支払い方法の選択", + "next_steps": ["支払いの案内"] + }, + { + "actor": "当社", + "step_name": "支払いの案内", + "next_steps": ["支払い処理"] + }, + { + "actor": "当社", + "step_name": "支払い処理", + "next_steps": ["注文の確定"] + }, + { + "actor": "お客様", + "step_name": "注文の確定", + "next_steps": ["配送先入力"] + }, + { + "actor": "お客様", + "step_name": "配送先入力", + "next_steps": ["配送先確認"] + }, + { + "actor": "当社", + "step_name": "配送先確認", + "next_steps": ["注文確認メールを送信"] + }, + { + "actor": "当社", + "step_name": "注文確認メールを送信", + "next_steps": ["商品を準備・発送"] + }, + { + "actor": "当社", + "step_name": "商品を準備・発送", + "next_steps": ["商品受取"] + }, + { + "actor": "お客様", + "step_name": "商品受取", + "next_steps": [] + } + ] +} diff --git a/benchmark/rub/truth_lite/basic_01.json b/benchmark/rub/truth_lite/basic_01.json new file mode 100644 index 0000000..b4d2709 --- /dev/null +++ b/benchmark/rub/truth_lite/basic_01.json @@ -0,0 +1,7 @@ +{ + "chart_series": [ + "製品A", + "製品B", + "製品C" + ] +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/basic_form_01.json b/benchmark/rub/truth_lite/basic_form_01.json new file mode 100644 index 0000000..b65c3a5 --- /dev/null +++ b/benchmark/rub/truth_lite/basic_form_01.json @@ -0,0 +1,11 @@ +{ + "applicant_fields": [ + "フリガナ", + "被保険者氏名", + "生年月日", + "住所", + "連絡先", + "入所(院)した介護保険施設の所在地及び名称", + "入所(院)年月日" + ] +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/certificate_of_employment_01.json b/benchmark/rub/truth_lite/certificate_of_employment_01.json new file mode 100644 index 0000000..2cf13e4 --- /dev/null +++ b/benchmark/rub/truth_lite/certificate_of_employment_01.json @@ -0,0 +1,18 @@ +{ + "sections": [ + "業種", + "本人氏名", + "雇用(予定)期間等", + "本人就労先事業所", + "雇用の形態", + "就労時間(固定就労の場合)", + "就労時間(変則就労の場合)", + "就労実績", + "産前・産後休業の取得", + "育児休業の取得", + "産休・育休以外の休業の取得", + "復職(予定)年月日", + "育児のための短時間勤務制度利用有無", + "保育士等としての勤務実態の有無" + ] +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/ffr_425_01.json b/benchmark/rub/truth_lite/ffr_425_01.json new file mode 100644 index 0000000..dba5797 --- /dev/null +++ b/benchmark/rub/truth_lite/ffr_425_01.json @@ -0,0 +1,13 @@ +{ + "section_headers": [ + "Federal Agency and Organizational Element to Which Report is Submitted", + "Federal Grant or Other Identifying Number Assigned by Federal Agency", + "Recipient Organization (Name and complete address including Zip code)", + "DUNS Number", + "Recipient Account Number or Identifying Number", + "Report Type", + "Basis of Accounting", + "Project/Grant Period", + "Reporting Period End Date" + ] +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/flowchart_01.json b/benchmark/rub/truth_lite/flowchart_01.json new file mode 100644 index 0000000..b3b354c --- /dev/null +++ b/benchmark/rub/truth_lite/flowchart_01.json @@ -0,0 +1,14 @@ +{ + "steps": [ + "S", + "要件抽出", + "ヒアリング", + "非機能要件", + "思考実験", + "再検証", + "まとめ", + "文書作成", + "締結", + "E" + ] +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/flowchart_02.json b/benchmark/rub/truth_lite/flowchart_02.json new file mode 100644 index 0000000..3c2ecac --- /dev/null +++ b/benchmark/rub/truth_lite/flowchart_02.json @@ -0,0 +1,12 @@ +{ + "nodes": [ + "ログイン画面", + "登録情報を入力", + "入力内容は正しいか", + "再入力を提示", + "サーバーに認証リクエストを送信", + "認証に成功か", + "認証エラーを提示", + "操作画面に遷移" + ] +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/food_inspection_record_01.json b/benchmark/rub/truth_lite/food_inspection_record_01.json new file mode 100644 index 0000000..cf2f64f --- /dev/null +++ b/benchmark/rub/truth_lite/food_inspection_record_01.json @@ -0,0 +1,7 @@ +{ + "dates_by_sheet": { + "検食簿(1)": "12月1日(月)", + "検食簿 (2)": "12月8日(月)", + "検食簿 (3)": "12月15日(月)" + } +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/gantt_01.json b/benchmark/rub/truth_lite/gantt_01.json new file mode 100644 index 0000000..e9823e9 --- /dev/null +++ b/benchmark/rub/truth_lite/gantt_01.json @@ -0,0 +1,8 @@ +{ + "task_names": [ + "Core Feature Dev", + "Edge Case Handling", + "Integration Work", + "Internal Review" + ] +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/heatstroke_flow_01.json b/benchmark/rub/truth_lite/heatstroke_flow_01.json new file mode 100644 index 0000000..beb233c --- /dev/null +++ b/benchmark/rub/truth_lite/heatstroke_flow_01.json @@ -0,0 +1,9 @@ +{ + "steps": [ + "発見", + "報告", + "初期対応", + "医療機関搬送・救急要請", + "事後対応・記録" + ] +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/smartart_01.json b/benchmark/rub/truth_lite/smartart_01.json new file mode 100644 index 0000000..051fd53 --- /dev/null +++ b/benchmark/rub/truth_lite/smartart_01.json @@ -0,0 +1,13 @@ +{ + "sales_departments": [ + "第1営業課", + "第2営業課", + "第3営業課", + "海外営業課" + ], + "production_sites": [ + "愛知工場", + "山形工場", + "高知工場" + ] +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/tax_report_01.json b/benchmark/rub/truth_lite/tax_report_01.json new file mode 100644 index 0000000..369bd8e --- /dev/null +++ b/benchmark/rub/truth_lite/tax_report_01.json @@ -0,0 +1,13 @@ +{ + "income_items": [ + "事業(営業等)", + "事業(農業)", + "不動産", + "利子", + "配当", + "給与", + "公的年金等", + "業務", + "その他" + ] +} \ No newline at end of file diff --git a/benchmark/rub/truth_lite/workflow_01.json b/benchmark/rub/truth_lite/workflow_01.json new file mode 100644 index 0000000..e1afa6e --- /dev/null +++ b/benchmark/rub/truth_lite/workflow_01.json @@ -0,0 +1,20 @@ +{ + "nodes": [ + "商品検索", + "商品情報を表示", + "検討", + "キャンセル", + "カートに追加", + "在庫確認", + "レジに進む", + "支払い方法の選択", + "支払いの案内", + "支払い処理", + "注文の確定", + "配送先入力", + "配送先確認", + "注文確認メールを送信", + "商品を準備・発送", + "商品受取" + ] +} \ No newline at end of file diff --git a/benchmark/scripts/publicize.ps1 b/benchmark/scripts/publicize.ps1 new file mode 100644 index 0000000..a84fc2e --- /dev/null +++ b/benchmark/scripts/publicize.ps1 @@ -0,0 +1,14 @@ +Set-StrictMode -Version Latest +$ErrorActionPreference = "Stop" + +$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$benchDir = Split-Path -Parent $scriptDir + +$venvPython = Join-Path $benchDir ".venv\Scripts\python" +if (Test-Path $venvPython) { + & $venvPython (Join-Path $scriptDir "publicize.py") + exit $LASTEXITCODE +} + +python (Join-Path $scriptDir "publicize.py") +exit $LASTEXITCODE diff --git a/benchmark/scripts/publicize.py b/benchmark/scripts/publicize.py new file mode 100644 index 0000000..88b5543 --- /dev/null +++ b/benchmark/scripts/publicize.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import shutil +from pathlib import Path + + +def _copy_file(src: Path, dest: Path) -> None: + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dest) + + +def _copy_glob(src_dir: Path, pattern: str, dest_dir: Path) -> None: + for path in src_dir.glob(pattern): + if path.is_file(): + _copy_file(path, dest_dir / path.name) + + +def main() -> int: + root = Path(__file__).resolve().parents[1] + public_dir = root / "public" + + report_src = root / "REPORT.md" + if report_src.exists(): + _copy_file(report_src, public_dir / "REPORT.md") + + plots_dir = root / "outputs" / "plots" + if plots_dir.exists(): + _copy_glob(plots_dir, "*.png", public_dir / "plots") + + results_report = root / "outputs" / "results" / "report.md" + if results_report.exists(): + _copy_file(results_report, public_dir / "reports" / "results_report.md") + + rub_results_dir = root / "outputs" / "rub" / "results" + if rub_results_dir.exists(): + for name in [ + "report.md", + "report_gpt4o.md", + "report_gpt41.md", + "compare_gpt4o_gpt41.md", + ]: + src = rub_results_dir / name + if src.exists(): + dest_name = name.replace("report", "rub_report", 1) + _copy_file(src, public_dir / "reports" / dest_name) + + index_path = public_dir / "INDEX.md" + lines = [ + "# Public Bundle", + "", + "This directory contains the public-ready benchmark artifacts.", + "", + "## Contents", + "- REPORT.md", + "- reports/", + "- plots/", + "", + "Generated by scripts/publicize.py.", + "", + ] + index_path.write_text("\n".join(lines), encoding="utf-8") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/benchmark/scripts/publicize.sh b/benchmark/scripts/publicize.sh new file mode 100644 index 0000000..14280dd --- /dev/null +++ b/benchmark/scripts/publicize.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +bench_dir="$(dirname "$script_dir")" + +python_bin="$bench_dir/.venv/bin/python" +if [[ -f "$python_bin" ]]; then + "$python_bin" "$script_dir/publicize.py" +else + python "$script_dir/publicize.py" +fi diff --git a/benchmark/scripts/reproduce.ps1 b/benchmark/scripts/reproduce.ps1 new file mode 100644 index 0000000..a799cf4 --- /dev/null +++ b/benchmark/scripts/reproduce.ps1 @@ -0,0 +1,61 @@ +param( + [string]$Case = "all", + [string]$Method = "all", + [string]$Model = "gpt-4o", + [double]$Temperature = 0.0, + [switch]$SkipAsk +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = "Stop" + +function Write-Info { + param([string]$Message) + Write-Host "[reproduce] $Message" +} + +$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$benchDir = Split-Path -Parent $scriptDir +$repoDir = Split-Path -Parent $benchDir + +Push-Location $benchDir + +try { + if (-not (Test-Path ".env")) { + Write-Info "Copying .env.example -> .env (remember to set OPENAI_API_KEY)." + Copy-Item ".env.example" ".env" + } + + if (-not (Test-Path ".venv")) { + Write-Info "Creating virtual environment." + python -m venv .venv + } + + $python = ".venv\\Scripts\\python" + if (-not (Test-Path $python)) { + throw "Python venv not found at $python" + } + + Write-Info "Installing dependencies." + & $python -m pip install -e $repoDir + & $python -m pip install -e . + + Write-Info "Extracting contexts." + & $python -m bench.cli extract --case $Case --method $Method + + if (-not $SkipAsk) { + Write-Info "Running LLM inference." + & $python -m bench.cli ask --case $Case --method $Method --model $Model --temperature $Temperature + } else { + Write-Info "Skipping LLM inference." + } + + Write-Info "Evaluating results." + & $python -m bench.cli eval --case $Case --method $Method + + Write-Info "Generating reports." + & $python -m bench.cli report + & $python -m bench.cli report-public +} finally { + Pop-Location +} diff --git a/benchmark/scripts/reproduce.sh b/benchmark/scripts/reproduce.sh new file mode 100644 index 0000000..e838e91 --- /dev/null +++ b/benchmark/scripts/reproduce.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +set -euo pipefail + +CASE="all" +METHOD="all" +MODEL="gpt-4o" +TEMPERATURE="0.0" +SKIP_ASK="false" + +while [[ $# -gt 0 ]]; do + case "$1" in + --case) CASE="$2"; shift 2 ;; + --method) METHOD="$2"; shift 2 ;; + --model) MODEL="$2"; shift 2 ;; + --temperature) TEMPERATURE="$2"; shift 2 ;; + --skip-ask) SKIP_ASK="true"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 1 ;; + esac +done + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +bench_dir="$(dirname "$script_dir")" +repo_dir="$(dirname "$bench_dir")" + +cd "$bench_dir" + +if [[ ! -f ".env" ]]; then + echo "[reproduce] Copying .env.example -> .env (remember to set OPENAI_API_KEY)." + cp .env.example .env +fi + +if [[ ! -d ".venv" ]]; then + echo "[reproduce] Creating virtual environment." + python -m venv .venv +fi + +python_bin=".venv/bin/python" +if [[ ! -f "$python_bin" ]]; then + echo "Python venv not found at $python_bin" >&2 + exit 1 +fi + +echo "[reproduce] Installing dependencies." +"$python_bin" -m pip install -e "$repo_dir" +"$python_bin" -m pip install -e . + +echo "[reproduce] Extracting contexts." +"$python_bin" -m bench.cli extract --case "$CASE" --method "$METHOD" + +if [[ "$SKIP_ASK" == "true" ]]; then + echo "[reproduce] Skipping LLM inference." +else + echo "[reproduce] Running LLM inference." + "$python_bin" -m bench.cli ask --case "$CASE" --method "$METHOD" --model "$MODEL" --temperature "$TEMPERATURE" +fi + +echo "[reproduce] Evaluating results." +"$python_bin" -m bench.cli eval --case "$CASE" --method "$METHOD" + +echo "[reproduce] Generating reports." +"$python_bin" -m bench.cli report +"$python_bin" -m bench.cli report-public diff --git a/benchmark/src/bench/__init__.py b/benchmark/src/bench/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmark/src/bench/cli.py b/benchmark/src/bench/cli.py new file mode 100644 index 0000000..35769fc --- /dev/null +++ b/benchmark/src/bench/cli.py @@ -0,0 +1,1298 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from pydantic import BaseModel +from rich import print +from rich.console import Console +import typer + +from .eval.markdown_render import render_markdown +from .eval.markdown_score import markdown_coverage_score, markdown_precision_score +from .eval.normalize import normalize_json_text +from .eval.normalization_rules import load_ruleset +from .eval.report import write_results_csv +from .eval.raw_match import raw_coverage_score, raw_precision_score +from .eval.score import ( + key_score, + key_score_normalized, + key_score_ordered, + key_score_ordered_normalized, +) +from .llm.openai_client import OpenAIResponsesClient +from .manifest import Case, load_manifest +from .paths import ( + DATA_DIR, + EXTRACTED_DIR, + MARKDOWN_DIR, + MARKDOWN_FULL_DIR, + MARKDOWN_FULL_RESPONSES_DIR, + MARKDOWN_RESPONSES_DIR, + PROMPTS_DIR, + RESPONSES_DIR, + RESULTS_DIR, + RUB_MANIFEST, + RUB_OUT_DIR, + RUB_PROMPTS_DIR, + RUB_RESPONSES_DIR, + RUB_RESULTS_DIR, + resolve_path, +) +from .pipeline.common import ensure_dir, sha256_text, write_json +from .pipeline.exstruct_adapter import extract_exstruct +from .pipeline.html_text import html_to_text, xlsx_to_html +from .pipeline.image_render import xlsx_to_pngs_via_pdf +from .pipeline.openpyxl_pandas import extract_openpyxl +from .pipeline.pdf_text import pdf_to_text, xlsx_to_pdf +from .report_public import generate_charts, load_report_data, update_public_report +from .rub.manifest import RubTask, load_rub_manifest +from .rub.score import RubPartialScore, score_exact, score_partial + +app = typer.Typer(add_completion=False) +console = Console() + +METHODS_TEXT = ("exstruct", "openpyxl", "pdf", "html") +METHODS_ALL = METHODS_TEXT + ("image_vlm",) + + +class PromptRecord(BaseModel): + """Prompt metadata saved for each request.""" + + case_id: str + method: str + model: str + temperature: float + question: str + prompt_hash: str + images: list[str] | None = None + + +class ResponseRecord(BaseModel): + """Response metadata saved for each request.""" + + case_id: str + method: str + model: str + temperature: float + prompt_hash: str + text: str + input_tokens: int + output_tokens: int + cost_usd: float + raw: dict[str, Any] + + +class MarkdownRecord(BaseModel): + """Markdown conversion metadata saved for each request.""" + + case_id: str + method: str + model: str + temperature: float + prompt_hash: str + text: str + input_tokens: int + output_tokens: int + cost_usd: float + raw: dict[str, Any] + + +class RubResponseRecord(BaseModel): + """RUB response metadata saved for each request.""" + + task_id: str + source_case_id: str + method: str + model: str + temperature: float + prompt_hash: str + question: str + text: str + input_tokens: int + output_tokens: int + cost_usd: float + raw: dict[str, Any] + + +class RubResultRow(BaseModel): + """RUB evaluation row for CSV output.""" + + task_id: str + source_case_id: str + type: str + track: str + method: str + model: str | None + score: float + partial_precision: float | None = None + partial_recall: float | None = None + partial_f1: float | None = None + ok: bool + input_tokens: int + output_tokens: int + cost_usd: float + error: str | None + + +class ResultRow(BaseModel): + """Evaluation row for CSV output.""" + + case_id: str + type: str + method: str + model: str | None + score: float + score_ordered: float + score_norm: float | None = None + score_norm_ordered: float | None = None + score_raw: float | None = None + score_raw_precision: float | None = None + score_md: float | None = None + score_md_precision: float | None = None + ok: bool + input_tokens: int + output_tokens: int + cost_usd: float + error: str | None + + +def _manifest_path() -> Path: + """Return the path to the benchmark manifest. + + Returns: + Path to manifest.json. + """ + return DATA_DIR / "manifest.json" + + +def _select_cases(manifest_cases: list[Case], case: str) -> list[Case]: + """Select benchmark cases by id list or all. + + Args: + manifest_cases: List of cases from the manifest. + case: Comma-separated case ids or "all". + + Returns: + Filtered list of cases. + """ + if case == "all": + return manifest_cases + ids = {c.strip() for c in case.split(",") if c.strip()} + return [c for c in manifest_cases if c.id in ids] + + +def _select_methods(method: str) -> list[str]: + """Select methods by list or all, validating against known methods. + + Args: + method: Comma-separated method names or "all". + + Returns: + Ordered list of validated methods. + """ + if method == "all": + selected = list(METHODS_ALL) + else: + selected = [m.strip() for m in method.split(",") if m.strip()] + + seen: set[str] = set() + deduped = [m for m in selected if not (m in seen or seen.add(m))] + invalid = [m for m in deduped if m not in METHODS_ALL] + if invalid: + raise typer.BadParameter( + f"Unknown method(s): {', '.join(invalid)}. Allowed: {', '.join(METHODS_ALL)}" + ) + if not deduped: + raise typer.BadParameter("No methods selected.") + return deduped + + +def _rub_manifest_path(manifest_path: str | None) -> Path: + """Return the path to the RUB manifest. + + Args: + manifest_path: Optional override path from CLI. + + Returns: + Path to the RUB manifest file. + """ + if manifest_path: + return resolve_path(manifest_path) + return RUB_MANIFEST + + +def _select_tasks(tasks: list[RubTask], task: str) -> list[RubTask]: + """Select RUB tasks by id list or all. + + Args: + tasks: Task list from the RUB manifest. + task: Comma-separated task ids or "all". + + Returns: + Filtered list of tasks. + """ + if task == "all": + return tasks + ids = {t.strip() for t in task.split(",") if t.strip()} + return [t for t in tasks if t.id in ids] + + +def _resolve_task_path(path_str: str, *, task_id: str, label: str) -> Path | None: + """Resolve a RUB manifest path, warning if missing. + + Args: + path_str: Path string from the manifest. + task_id: Task identifier for log messages. + label: Label for the path type (e.g., "truth"). + + Returns: + Resolved Path if it exists, otherwise None. + """ + resolved = resolve_path(path_str) + if resolved.exists(): + return resolved + print(f"[yellow]skip: missing {label} for {task_id}: {resolved}[/yellow]") + return None + + +def _resolve_case_path(path_str: str, *, case_id: str, label: str) -> Path | None: + """Resolve a manifest path, warning if missing. + + Args: + path_str: Path string from the manifest. + case_id: Case identifier for log messages. + label: Label for the path type (e.g., "xlsx", "truth"). + + Returns: + Resolved Path if it exists, otherwise None. + """ + resolved = resolve_path(path_str) + if resolved.exists(): + return resolved + print(f"[yellow]skip: missing {label} for {case_id}: {resolved}[/yellow]") + return None + + +def _reset_case_outputs(case_id: str) -> None: + """Delete existing prompt/response logs for a case.""" + for directory in (PROMPTS_DIR, RESPONSES_DIR): + path = directory / f"{case_id}.jsonl" + if path.exists(): + path.unlink() + + +def _reset_rub_outputs(task_id: str) -> None: + """Delete existing RUB prompt/response logs for a task.""" + for directory in (RUB_PROMPTS_DIR, RUB_RESPONSES_DIR): + path = directory / f"{task_id}.jsonl" + if path.exists(): + path.unlink() + + +def _reset_markdown_outputs(case_id: str) -> None: + """Delete existing markdown logs for a case.""" + path = MARKDOWN_RESPONSES_DIR / f"{case_id}.jsonl" + if path.exists(): + path.unlink() + + +def _reset_markdown_full_outputs(case_id: str) -> None: + """Delete existing full-markdown logs for a case.""" + path = MARKDOWN_FULL_RESPONSES_DIR / f"{case_id}.jsonl" + if path.exists(): + path.unlink() + + +def _dump_jsonl(obj: BaseModel) -> str: + """Serialize a record for JSONL output. + + Args: + obj: Pydantic model to serialize. + + Returns: + Single-line JSON string with stable key ordering. + """ + payload = obj.model_dump(exclude_none=True) + return json.dumps( + payload, ensure_ascii=False, sort_keys=True, separators=(", ", ": ") + ) + + +@app.command() +def extract(case: str = "all", method: str = "all") -> None: + """Extract contexts for selected cases and methods. + + Args: + case: Comma-separated case ids or "all". + method: Comma-separated method names or "all". + """ + mf = load_manifest(_manifest_path()) + cases = _select_cases(mf.cases, case) + if not cases: + raise typer.BadParameter(f"No cases matched: {case}") + methods = _select_methods(method) + + for c in cases: + xlsx = _resolve_case_path(c.xlsx, case_id=c.id, label="xlsx") + if not xlsx: + continue + console.rule(f"EXTRACT {c.id} ({xlsx.name})") + + if "exstruct" in methods: + out_txt = EXTRACTED_DIR / "exstruct" / f"{c.id}.txt" + extract_exstruct(xlsx, out_txt, c.sheet_scope) + print(f"[green]exstruct -> {out_txt}[/green]") + + if "openpyxl" in methods: + out_txt = EXTRACTED_DIR / "openpyxl" / f"{c.id}.txt" + extract_openpyxl(xlsx, out_txt, c.sheet_scope) + print(f"[green]openpyxl -> {out_txt}[/green]") + + if "pdf" in methods: + out_pdf = EXTRACTED_DIR / "pdf" / f"{c.id}.pdf" + out_txt = EXTRACTED_DIR / "pdf" / f"{c.id}.txt" + xlsx_to_pdf(xlsx, out_pdf) + pdf_to_text(out_pdf, out_txt) + print(f"[green]pdf -> {out_txt}[/green]") + + if "html" in methods: + out_html = EXTRACTED_DIR / "html" / f"{c.id}.html" + out_txt = EXTRACTED_DIR / "html" / f"{c.id}.txt" + xlsx_to_html(xlsx, out_html) + html_to_text(out_html, out_txt) + print(f"[green]html -> {out_txt}[/green]") + + if "image_vlm" in methods: + out_dir = EXTRACTED_DIR / "image_vlm" / c.id + pngs = xlsx_to_pngs_via_pdf( + xlsx, out_dir, dpi=c.render.dpi, max_pages=c.render.max_pages + ) + write_json(out_dir / "images.json", {"images": [str(p) for p in pngs]}) + print(f"[green]image_vlm -> {len(pngs)} png(s) in {out_dir}[/green]") + + +@app.command() +def ask( + case: str = "all", + method: str = "all", + model: str = "gpt-4o", + temperature: float = 0.0, +) -> None: + """Run LLM extraction against prepared contexts. + + Args: + case: Comma-separated case ids or "all". + method: Comma-separated method names or "all". + model: OpenAI model name. + temperature: Sampling temperature for the model. + """ + mf = load_manifest(_manifest_path()) + cases = _select_cases(mf.cases, case) + if not cases: + raise typer.BadParameter(f"No cases matched: {case}") + methods = _select_methods(method) + + client = OpenAIResponsesClient() + ensure_dir(PROMPTS_DIR) + ensure_dir(RESPONSES_DIR) + total_cost = 0.0 + total_calls = 0 + + for c in cases: + console.rule(f"ASK {c.id}") + q = c.question + _reset_case_outputs(c.id) + + for m in methods: + if m == "image_vlm": + img_dir = EXTRACTED_DIR / "image_vlm" / c.id + images_json = img_dir / "images.json" + if not images_json.exists(): + print(f"[yellow]skip: missing images for {c.id}[/yellow]") + continue + imgs = json.loads(images_json.read_text(encoding="utf-8"))["images"] + img_paths = [Path(p) for p in imgs] + if not img_paths: + print(f"[yellow]skip: no images for {c.id}[/yellow]") + continue + prompt_hash = sha256_text( + q + "|" + "|".join([p.name for p in img_paths]) + ) + prompt_rec = PromptRecord( + case_id=c.id, + method=m, + model=model, + temperature=temperature, + question=q, + prompt_hash=prompt_hash, + images=[p.name for p in img_paths], + ) + res = client.ask_images( + model=model, + question=q, + image_paths=img_paths, + temperature=temperature, + ) + else: + txt_path = EXTRACTED_DIR / m / f"{c.id}.txt" + if not txt_path.exists(): + print(f"[yellow]skip: missing context for {c.id} ({m})[/yellow]") + continue + context = txt_path.read_text(encoding="utf-8") + prompt_hash = sha256_text(q + "|" + context) + prompt_rec = PromptRecord( + case_id=c.id, + method=m, + model=model, + temperature=temperature, + question=q, + prompt_hash=prompt_hash, + ) + res = client.ask_text( + model=model, + question=q, + context_text=context, + temperature=temperature, + ) + + prompt_file = PROMPTS_DIR / f"{c.id}.jsonl" + resp_file = RESPONSES_DIR / f"{c.id}.jsonl" + resp_rec = ResponseRecord( + case_id=c.id, + method=m, + model=model, + temperature=temperature, + prompt_hash=prompt_hash, + text=res.text, + input_tokens=res.input_tokens, + output_tokens=res.output_tokens, + cost_usd=res.cost_usd, + raw=res.raw, + ) + + prompt_line = _dump_jsonl(prompt_rec) + resp_line = _dump_jsonl(resp_rec) + with prompt_file.open("a", encoding="utf-8") as f: + f.write(prompt_line + "\n") + with resp_file.open("a", encoding="utf-8") as f: + f.write(resp_line + "\n") + + total_cost += res.cost_usd + total_calls += 1 + print( + f"[cyan]{c.id} {m}[/cyan] tokens(in/out)={res.input_tokens}/{res.output_tokens} cost=${res.cost_usd:.6f}" + ) + print(f"[green]Total cost: ${total_cost:.6f} ({total_calls} call(s))[/green]") + + +@app.command() +def markdown( + case: str = "all", + method: str = "all", + model: str = "gpt-4o", + temperature: float = 0.0, + use_llm: bool = True, +) -> None: + """Generate Markdown outputs from the latest JSON responses. + + Args: + case: Comma-separated case ids or "all". + method: Comma-separated method names or "all". + model: OpenAI model name for Markdown conversion. + temperature: Sampling temperature for the model. + use_llm: If True, call the LLM for conversion; otherwise use renderer. + """ + mf = load_manifest(_manifest_path()) + cases = _select_cases(mf.cases, case) + if not cases: + raise typer.BadParameter(f"No cases matched: {case}") + methods = _select_methods(method) + + client = OpenAIResponsesClient() + ensure_dir(MARKDOWN_DIR) + ensure_dir(MARKDOWN_RESPONSES_DIR) + total_cost = 0.0 + total_calls = 0 + + for c in cases: + console.rule(f"MARKDOWN {c.id}") + resp_file = RESPONSES_DIR / f"{c.id}.jsonl" + if not resp_file.exists(): + print(f"[yellow]skip: no responses for {c.id}[/yellow]") + continue + _reset_markdown_outputs(c.id) + latest: dict[str, dict[str, Any]] = {} + for line in resp_file.read_text(encoding="utf-8").splitlines(): + rec = json.loads(line) + if rec.get("method") in methods: + latest[rec["method"]] = rec + + case_dir = MARKDOWN_DIR / c.id + ensure_dir(case_dir) + md_file = MARKDOWN_RESPONSES_DIR / f"{c.id}.jsonl" + + for m, rec in latest.items(): + try: + pred_obj = normalize_json_text(rec["text"]) + json_text = json.dumps(pred_obj, ensure_ascii=False) + prompt_hash = sha256_text(json_text) + if use_llm: + if client is None: + raise RuntimeError( + "LLM client unavailable for markdown conversion." + ) + res = client.ask_markdown( + model=model, json_text=json_text, temperature=temperature + ) + md_text = res.text + md_rec = MarkdownRecord( + case_id=c.id, + method=m, + model=model, + temperature=temperature, + prompt_hash=prompt_hash, + text=md_text, + input_tokens=res.input_tokens, + output_tokens=res.output_tokens, + cost_usd=res.cost_usd, + raw=res.raw, + ) + total_cost += res.cost_usd + total_calls += 1 + line = _dump_jsonl(md_rec) + with md_file.open("a", encoding="utf-8") as f: + f.write(line + "\n") + else: + md_text = render_markdown(pred_obj, title=c.id) + + out_md = case_dir / f"{m}.md" + out_md.write_text(md_text, encoding="utf-8") + print(f"[green]{c.id} {m} -> {out_md}[/green]") + except Exception as exc: + print(f"[yellow]skip: markdown {c.id} {m} ({exc})[/yellow]") + + if use_llm: + print( + f"[green]Markdown cost: ${total_cost:.6f} ({total_calls} call(s))[/green]" + ) + + +@app.command() +def markdown_full( + case: str = "all", + method: str = "all", + model: str = "gpt-4o", + temperature: float = 0.0, +) -> None: + """Generate full-document Markdown from extracted contexts. + + Args: + case: Comma-separated case ids or "all". + method: Comma-separated method names or "all". + model: OpenAI model name for Markdown conversion. + temperature: Sampling temperature for the model. + """ + mf = load_manifest(_manifest_path()) + cases = _select_cases(mf.cases, case) + if not cases: + raise typer.BadParameter(f"No cases matched: {case}") + methods = _select_methods(method) + + client = OpenAIResponsesClient() + ensure_dir(MARKDOWN_FULL_DIR) + ensure_dir(MARKDOWN_FULL_RESPONSES_DIR) + total_cost = 0.0 + total_calls = 0 + + for c in cases: + console.rule(f"MARKDOWN FULL {c.id}") + _reset_markdown_full_outputs(c.id) + case_dir = MARKDOWN_FULL_DIR / c.id + ensure_dir(case_dir) + md_file = MARKDOWN_FULL_RESPONSES_DIR / f"{c.id}.jsonl" + + for m in methods: + try: + if m == "image_vlm": + img_dir = EXTRACTED_DIR / "image_vlm" / c.id + images_json = img_dir / "images.json" + if not images_json.exists(): + print(f"[yellow]skip: missing images for {c.id}[/yellow]") + continue + imgs = json.loads(images_json.read_text(encoding="utf-8"))["images"] + img_paths = [Path(p) for p in imgs] + if not img_paths: + print(f"[yellow]skip: no images for {c.id}[/yellow]") + continue + prompt_hash = sha256_text("|".join([p.name for p in img_paths])) + res = client.ask_markdown_images( + model=model, image_paths=img_paths, temperature=temperature + ) + else: + txt_path = EXTRACTED_DIR / m / f"{c.id}.txt" + if not txt_path.exists(): + print( + f"[yellow]skip: missing context for {c.id} ({m})[/yellow]" + ) + continue + context_text = txt_path.read_text(encoding="utf-8") + prompt_hash = sha256_text(context_text) + res = client.ask_markdown_from_text( + model=model, + context_text=context_text, + temperature=temperature, + ) + + md_text = res.text + md_rec = MarkdownRecord( + case_id=c.id, + method=m, + model=model, + temperature=temperature, + prompt_hash=prompt_hash, + text=md_text, + input_tokens=res.input_tokens, + output_tokens=res.output_tokens, + cost_usd=res.cost_usd, + raw=res.raw, + ) + total_cost += res.cost_usd + total_calls += 1 + line = _dump_jsonl(md_rec) + with md_file.open("a", encoding="utf-8") as f: + f.write(line + "\n") + + out_md = case_dir / f"{m}.md" + out_md.write_text(md_text, encoding="utf-8") + print(f"[green]{c.id} {m} -> {out_md}[/green]") + except Exception as exc: + print(f"[yellow]skip: markdown full {c.id} {m} ({exc})[/yellow]") + + print( + f"[green]Markdown full cost: ${total_cost:.6f} ({total_calls} call(s))[/green]" + ) + + +@app.command() +def rub_ask( + task: str = "all", + method: str = "all", + model: str = "gpt-4o", + temperature: float = 0.0, + context: str = "partial", + manifest: str | None = None, +) -> None: + """Run RUB Stage B queries using Markdown outputs as context. + + Args: + task: Comma-separated task ids or "all". + method: Comma-separated method names or "all". + model: OpenAI model name for Stage B queries. + temperature: Sampling temperature for the model. + context: Markdown source ("partial" or "full"). + manifest: Optional RUB manifest path override. + """ + rub_manifest = load_rub_manifest(_rub_manifest_path(manifest)) + tasks = _select_tasks(rub_manifest.tasks, task) + if not tasks: + raise typer.BadParameter(f"No tasks matched: {task}") + methods = _select_methods(method) + context_key = context.lower().strip() + if context_key not in {"partial", "full"}: + raise typer.BadParameter(f"Invalid context: {context}") + md_root = MARKDOWN_DIR if context_key == "partial" else MARKDOWN_FULL_DIR + + ensure_dir(RUB_OUT_DIR) + ensure_dir(RUB_PROMPTS_DIR) + ensure_dir(RUB_RESPONSES_DIR) + + client = OpenAIResponsesClient() + total_cost = 0.0 + total_calls = 0 + + for t in tasks: + console.rule(f"RUB {t.id}") + _reset_rub_outputs(t.id) + resp_file = RUB_RESPONSES_DIR / f"{t.id}.jsonl" + for m in methods: + md_path = md_root / t.source_case_id / f"{m}.md" + if not md_path.exists(): + print(f"[yellow]skip: missing markdown {t.id} {m}[/yellow]") + continue + context_text = md_path.read_text(encoding="utf-8") + prompt_hash = sha256_text(f"{t.question}\n{context_text}") + try: + res = client.ask_text( + model=model, + question=t.question, + context_text=context_text, + temperature=temperature, + ) + rec = RubResponseRecord( + task_id=t.id, + source_case_id=t.source_case_id, + method=m, + model=model, + temperature=temperature, + prompt_hash=prompt_hash, + question=t.question, + text=res.text, + input_tokens=res.input_tokens, + output_tokens=res.output_tokens, + cost_usd=res.cost_usd, + raw=res.raw, + ) + line = _dump_jsonl(rec) + with resp_file.open("a", encoding="utf-8") as f: + f.write(line + "\n") + total_cost += res.cost_usd + total_calls += 1 + print(f"[green]{t.id} {m} -> {resp_file}[/green]") + except Exception as exc: + print(f"[yellow]skip: rub {t.id} {m} ({exc})[/yellow]") + + print(f"[green]RUB cost: ${total_cost:.6f} ({total_calls} call(s))[/green]") + + +@app.command() +def rub_eval( + task: str = "all", method: str = "all", manifest: str | None = None +) -> None: + """Evaluate RUB responses and write results CSV. + + Args: + task: Comma-separated task ids or "all". + method: Comma-separated method names or "all". + manifest: Optional RUB manifest path override. + """ + rub_manifest = load_rub_manifest(_rub_manifest_path(manifest)) + tasks = _select_tasks(rub_manifest.tasks, task) + if not tasks: + raise typer.BadParameter(f"No tasks matched: {task}") + methods = _select_methods(method) + + rows: list[RubResultRow] = [] + for t in tasks: + truth_path = _resolve_task_path(t.truth, task_id=t.id, label="truth") + if not truth_path: + continue + truth = json.loads(truth_path.read_text(encoding="utf-8")) + + resp_file = RUB_RESPONSES_DIR / f"{t.id}.jsonl" + if not resp_file.exists(): + print(f"[yellow]skip: no RUB responses for {t.id}[/yellow]") + continue + latest: dict[str, dict[str, Any]] = {} + for line in resp_file.read_text(encoding="utf-8").splitlines(): + rec = json.loads(line) + if rec.get("method") in methods: + latest[rec["method"]] = rec + + for m, rec in latest.items(): + score = 0.0 + ok = False + partial: RubPartialScore | None = None + err: str | None = None + try: + pred_obj = normalize_json_text(rec["text"]) + score_res = score_exact( + truth, pred_obj, unordered_paths=t.unordered_paths + ) + score = score_res.score + ok = score_res.ok + partial = score_partial( + truth, pred_obj, unordered_paths=t.unordered_paths + ) + except Exception as exc: + err = str(exc) + + rows.append( + RubResultRow( + task_id=t.id, + source_case_id=t.source_case_id, + type=t.type, + track=t.track, + method=m, + model=rec.get("model"), + score=score, + partial_precision=partial.precision if partial else None, + partial_recall=partial.recall if partial else None, + partial_f1=partial.f1 if partial else None, + ok=ok, + input_tokens=int(rec.get("input_tokens", 0)), + output_tokens=int(rec.get("output_tokens", 0)), + cost_usd=float(rec.get("cost_usd", 0.0)), + error=err, + ) + ) + + out_csv = RUB_RESULTS_DIR / "rub_results.csv" + write_results_csv([row.model_dump() for row in rows], out_csv) + print(f"[green]Wrote {out_csv} ({len(rows)} rows)[/green]") + + +@app.command() +def rub_report() -> None: + """Generate a RUB Markdown report from the results CSV.""" + csv_path = RUB_RESULTS_DIR / "rub_results.csv" + if not csv_path.exists(): + raise typer.Exit(code=1) + + import pandas as pd + + df = pd.read_csv(csv_path) + agg: dict[str, tuple[str, str]] = { + "rus": ("score", "mean"), + "avg_in": ("input_tokens", "mean"), + "avg_cost": ("cost_usd", "mean"), + "n": ("task_id", "count"), + } + if "partial_precision" in df.columns and df["partial_precision"].notna().any(): + agg["partial_precision"] = ("partial_precision", "mean") + if "partial_recall" in df.columns and df["partial_recall"].notna().any(): + agg["partial_recall"] = ("partial_recall", "mean") + if "partial_f1" in df.columns and df["partial_f1"].notna().any(): + agg["partial_f1"] = ("partial_f1", "mean") + g = df.groupby("method").agg(**agg).reset_index() + + detail_dir = RUB_RESULTS_DIR / "detailed_reports" + detail_dir.mkdir(parents=True, exist_ok=True) + + md_lines: list[str] = [] + md_lines.append("# RUB Report") + md_lines.append("") + md_lines.append( + "This report summarizes Reconstruction Utility Benchmark (RUB) results." + ) + md_lines.append( + "Scores are computed on Stage B task accuracy using Markdown-only inputs." + ) + md_lines.append("") + md_lines.append("## Summary by method") + md_lines.append("") + md_lines.append(g.to_markdown(index=False)) + md_lines.append("") + + if "track" in df.columns: + md_lines.append("## Summary by track") + md_lines.append("") + g_track = df.groupby(["track", "method"]).agg(**agg).reset_index() + md_lines.append(g_track.to_markdown(index=False)) + md_lines.append("") + + for task_id, task_df in df.groupby("task_id"): + task_path = detail_dir / f"report_{task_id}.md" + lines = [ + "# RUB Report", + "", + f"## Details: {task_id}", + "", + task_df.to_markdown(index=False), + "", + ] + task_path.write_text("\n".join(lines), encoding="utf-8") + + report_path = RUB_RESULTS_DIR / "report.md" + report_path.write_text("\n".join(md_lines), encoding="utf-8") + print(f"[green]Wrote {report_path}[/green]") + + +@app.command() +def report_public() -> None: + """Generate chart images and update the public REPORT.md.""" + data = load_report_data() + chart_paths = generate_charts(data) + report_path = update_public_report(chart_paths) + print(f"[green]Wrote {report_path}[/green]") + + +@app.command() +def eval(case: str = "all", method: str = "all") -> None: + """Evaluate the latest responses and write results CSV. + + Args: + case: Comma-separated case ids or "all". + method: Comma-separated method names or "all". + """ + mf = load_manifest(_manifest_path()) + cases = _select_cases(mf.cases, case) + if not cases: + raise typer.BadParameter(f"No cases matched: {case}") + methods = _select_methods(method) + + rows: list[ResultRow] = [] + ruleset = load_ruleset(DATA_DIR / "normalization_rules.json") + md_outputs: dict[str, dict[str, dict[str, Any]]] = {} + + for c in cases: + truth_path = _resolve_case_path(c.truth, case_id=c.id, label="truth") + if not truth_path: + continue + truth = json.loads(truth_path.read_text(encoding="utf-8")) + resp_file = RESPONSES_DIR / f"{c.id}.jsonl" + if not resp_file.exists(): + print(f"[yellow]skip: no responses for {c.id}[/yellow]") + continue + + latest: dict[str, dict[str, Any]] = {} + for line in resp_file.read_text(encoding="utf-8").splitlines(): + rec = json.loads(line) + if rec.get("method") in methods: + latest[rec["method"]] = rec + + rules = ruleset.for_case(c.id) + md_file = MARKDOWN_RESPONSES_DIR / f"{c.id}.jsonl" + if md_file.exists(): + latest_md: dict[str, dict[str, Any]] = {} + for line in md_file.read_text(encoding="utf-8").splitlines(): + rec = json.loads(line) + if rec.get("method") in methods: + latest_md[rec["method"]] = rec + md_outputs[c.id] = latest_md + for m, rec in latest.items(): + ok = False + score = 0.0 + score_ordered = 0.0 + score_norm: float | None = None + score_norm_ordered: float | None = None + score_raw: float | None = None + score_raw_precision: float | None = None + score_md: float | None = None + score_md_precision: float | None = None + err: str | None = None + try: + pred_obj = normalize_json_text(rec["text"]) + score = key_score(truth, pred_obj) + score_ordered = key_score_ordered(truth, pred_obj) + score_norm = key_score_normalized(truth, pred_obj, rules) + score_norm_ordered = key_score_ordered_normalized( + truth, pred_obj, rules + ) + score_raw = raw_coverage_score(truth, pred_obj) + score_raw_precision = raw_precision_score(truth, pred_obj) + md_truth = render_markdown(truth, title=c.id) + md_rec = md_outputs.get(c.id, {}).get(m) + if md_rec is not None: + md_text = str(md_rec.get("text", "")) + score_md = markdown_coverage_score(md_truth, md_text) + score_md_precision = markdown_precision_score(md_truth, md_text) + ok = score == 1.0 + except Exception as exc: + err = str(exc) + + rows.append( + ResultRow( + case_id=c.id, + type=c.type, + method=m, + model=rec.get("model"), + score=score, + score_ordered=score_ordered, + score_norm=score_norm, + score_norm_ordered=score_norm_ordered, + score_raw=score_raw, + score_raw_precision=score_raw_precision, + score_md=score_md, + score_md_precision=score_md_precision, + ok=ok, + input_tokens=int(rec.get("input_tokens", 0)), + output_tokens=int(rec.get("output_tokens", 0)), + cost_usd=float(rec.get("cost_usd", 0.0)), + error=err, + ) + ) + + out_csv = RESULTS_DIR / "results.csv" + write_results_csv([row.model_dump() for row in rows], out_csv) + print(f"[green]Wrote {out_csv} ({len(rows)} rows)[/green]") + + +@app.command() +def report() -> None: + """Generate a Markdown report from the results CSV.""" + csv_path = RESULTS_DIR / "results.csv" + if not csv_path.exists(): + raise typer.Exit(code=1) + + import pandas as pd + + df = pd.read_csv(csv_path) + score_col = "score" if "score" in df.columns else "ok" + agg: dict[str, tuple[str, str]] = { + "acc": (score_col, "mean"), + "avg_in": ("input_tokens", "mean"), + "avg_cost": ("cost_usd", "mean"), + "n": (score_col, "count"), + } + if "score_ordered" in df.columns: + agg["acc_ordered"] = ("score_ordered", "mean") + if "score_norm" in df.columns: + agg["acc_norm"] = ("score_norm", "mean") + if "score_norm_ordered" in df.columns: + agg["acc_norm_ordered"] = ("score_norm_ordered", "mean") + if "score_raw" in df.columns: + agg["acc_raw"] = ("score_raw", "mean") + if "score_raw_precision" in df.columns: + agg["raw_precision"] = ("score_raw_precision", "mean") + if "score_md" in df.columns and df["score_md"].notna().any(): + agg["acc_md"] = ("score_md", "mean") + if "score_md_precision" in df.columns and df["score_md_precision"].notna().any(): + agg["md_precision"] = ("score_md_precision", "mean") + g = df.groupby("method").agg(**agg).reset_index() + + detail_dir = RESULTS_DIR / "detailed_reports" + detail_dir.mkdir(parents=True, exist_ok=True) + + md_lines = [] + md_lines.append("# Benchmark Report") + md_lines.append("") + md_lines.append( + "This report summarizes extraction accuracy for each method on the benchmark cases." + ) + md_lines.append( + "Scores are computed per case and aggregated by method. Exact, normalized, raw," + ) + md_lines.append( + "and markdown tracks are reported to ensure fair comparison across variations." + ) + md_lines.append("") + md_lines.append("## Evaluation protocol (public)") + md_lines.append("") + md_lines.append("Fixed settings for reproducibility:") + md_lines.append("") + md_lines.append("- Model: gpt-4o (Responses API)") + md_lines.append("- Temperature: 0.0") + md_lines.append("- Prompt: fixed in bench/llm/openai_client.py") + md_lines.append("- Input contexts: generated by bench.cli extract") + md_lines.append("- Normalization: data/normalization_rules.json (optional track)") + md_lines.append("- Evaluation: bench.cli eval (Exact + Normalized + Raw)") + md_lines.append("- Markdown conversion: bench.cli markdown (optional)") + md_lines.append("- Report: bench.cli report (summary + per-case)") + md_lines.append("") + md_lines.append("Recommended disclosure when publishing results:") + md_lines.append("") + md_lines.append("- Model name + version, temperature, and date of run") + md_lines.append("- Full normalization_rules.json used for normalized scores") + md_lines.append("- Cost/token estimation method") + md_lines.append("- Any skipped cases and the reason (missing files, failures)") + md_lines.append("") + md_lines.append("## How to interpret results (public guide)") + md_lines.append("") + md_lines.append("- Exact: strict string match with no normalization.") + md_lines.append( + "- Normalized: applies case-specific rules in data/normalization_rules.json to" + ) + md_lines.append( + " absorb formatting differences (aliases, split/composite labels)." + ) + md_lines.append( + "- Raw: loose coverage/precision over flattened text tokens (schema-agnostic)." + ) + md_lines.append( + "- Markdown: coverage/precision against canonical Markdown rendered from truth." + ) + md_lines.append("") + md_lines.append("Recommended interpretation:") + md_lines.append("") + md_lines.append( + "- Use Exact to compare end-to-end string fidelity (best for literal extraction)." + ) + md_lines.append( + "- Use Normalized to compare document understanding across methods." + ) + md_lines.append( + "- Use Raw to compare how much ground-truth text is captured regardless of schema." + ) + md_lines.append("- Use Markdown to evaluate JSON-to-Markdown conversion quality.") + md_lines.append( + "- When tracks disagree, favor Normalized for Excel-heavy layouts where labels" + ) + md_lines.append(" are split/merged or phrased differently.") + md_lines.append( + "- Always cite both accuracy and cost metrics in public comparisons." + ) + md_lines.append("") + md_lines.append("## Evaluation tracks") + md_lines.append("") + md_lines.append("- Exact: strict string match without any normalization.") + md_lines.append( + "- Normalized: applies case-specific normalization rules (aliases, split/composite)" + ) + md_lines.append( + " defined in data/normalization_rules.json to absorb format and wording variations." + ) + md_lines.append( + "- Raw: loose coverage/precision over flattened text tokens (schema-agnostic)," + ) + md_lines.append( + " intended to reflect raw data capture without penalizing minor label variations." + ) + md_lines.append( + "- Markdown: coverage/precision comparing LLM Markdown to canonical truth Markdown." + ) + md_lines.append("") + md_lines.append("## Summary by method") + md_lines.append("") + md_lines.append(g.to_markdown(index=False)) + md_lines.append("") + md_lines.append("## Markdown evaluation notes") + md_lines.append("") + md_lines.append( + "Markdown scores measure how well the generated Markdown lines match a canonical" + ) + md_lines.append( + "Markdown rendering of the ground truth JSON. This is a *conversion quality*" + ) + md_lines.append("signal, not a direct extraction-accuracy substitute.") + md_lines.append("") + md_lines.append("Key points:") + md_lines.append("") + md_lines.append( + "- Coverage (acc_md): how much of truth Markdown content is recovered." + ) + md_lines.append( + "- Precision (md_precision): how much of predicted Markdown is correct." + ) + md_lines.append( + "- Layout shifts or list formatting differences can lower scores even if" + ) + md_lines.append(" the underlying facts are correct.") + md_lines.append( + "- LLM-based conversion introduces variability; re-run with the same seed" + ) + md_lines.append( + " and model settings to assess stability, or use deterministic rendering" + ) + md_lines.append(" for baseline comparisons.") + md_lines.append( + "- Use Markdown scores when your downstream task consumes Markdown (e.g.," + ) + md_lines.append( + " RAG ingestion), and report alongside Exact/Normalized/Raw metrics." + ) + md_lines.append("") + md_lines.append("## Exstruct positioning notes (public)") + md_lines.append("") + md_lines.append( + "Recommended primary indicators for exstruct positioning (RAG pre-processing):" + ) + md_lines.append("") + md_lines.append("- Normalized accuracy: acc_norm / acc_norm_ordered") + md_lines.append("- Raw coverage/precision: acc_raw / raw_precision") + md_lines.append("- Markdown coverage/precision: acc_md / md_precision") + md_lines.append("") + md_lines.append("Current deltas vs. best method (n=11, when available):") + md_lines.append("") + metric_labels = [ + ("acc_norm", "Normalized accuracy"), + ("acc_norm_ordered", "Normalized ordered accuracy"), + ("acc_raw", "Raw coverage"), + ("raw_precision", "Raw precision"), + ("acc_md", "Markdown coverage"), + ("md_precision", "Markdown precision"), + ] + if "method" in g.columns and not g.empty: + ex_row = g[g["method"] == "exstruct"] + for metric, label in metric_labels: + if metric not in g.columns: + continue + best_val = g[metric].max() + best_methods = g[g[metric] == best_val]["method"].tolist() + if ex_row.empty: + ex_val = None + else: + ex_val = float(ex_row[metric].iloc[0]) + if ex_val is None: + md_lines.append(f"- {label}: exstruct n/a; best {best_val:.6f}") + continue + delta = ex_val - best_val + md_lines.append( + f"- {label}: exstruct {ex_val:.6f} vs best {best_val:.6f}" + f" ({', '.join(best_methods)}), delta {delta:+.6f}" + ) + else: + md_lines.append("- (summary unavailable)") + md_lines.append("") + md_lines.append("## Normalization leniency summary") + md_lines.append("") + ruleset = load_ruleset(DATA_DIR / "normalization_rules.json") + if ruleset.cases: + summary_rows: list[dict[str, str | int]] = [] + for case_id, rules in sorted(ruleset.cases.items()): + details = [] + for rule in rules.list_object_rules: + parts = [ + f"strings={','.join(rule.string_fields) or '-'}", + f"strings_contains={','.join(rule.string_fields_contains) or '-'}", + f"lists_contains={','.join(rule.list_fields_contains) or '-'}", + f"strip_prefix={','.join(rule.strip_prefix.keys()) or '-'}", + ] + details.append(f"{rule.list_key}({'; '.join(parts)})") + summary_rows.append( + { + "case_id": case_id, + "alias_rules": len(rules.alias_rules), + "split_rules": len(rules.split_rules), + "composite_rules": len(rules.composite_rules), + "list_object_rules": len(rules.list_object_rules), + "details": " | ".join(details) if details else "-", + } + ) + md_lines.append(pd.DataFrame(summary_rows).to_markdown(index=False)) + else: + md_lines.append("_No normalization rules defined._") + md_lines.append("") + md_lines.append("## Detailed reports") + md_lines.append("") + for case_id in sorted(df["case_id"].unique()): + md_lines.append(f"- detailed_reports/report_{case_id}.md") + md_lines.append("") + out_md = RESULTS_DIR / "report.md" + out_md.write_text("\n".join(md_lines), encoding="utf-8") + print(f"[green]Wrote {out_md}[/green]") + + # Per-case detail reports + detail_cols = [ + "method", + "case_id", + "type", + "model", + "score", + "score_ordered", + "score_norm", + "score_norm_ordered", + "score_raw", + "score_raw_precision", + "score_md", + "score_md_precision", + "input_tokens", + "output_tokens", + "cost_usd", + "error", + ] + available_cols = [c for c in detail_cols if c in df.columns] + + for case_id in sorted(df["case_id"].unique()): + case_df = df[df["case_id"] == case_id][available_cols] + case_lines = [ + "# Benchmark Report", + "", + f"## Details: {case_id}", + "", + case_df.to_markdown(index=False), + "", + ] + case_md = detail_dir / f"report_{case_id}.md" + case_md.write_text("\n".join(case_lines), encoding="utf-8") + print(f"[green]Wrote {case_md}[/green]") + print(f"[cyan]Details ({case_id})[/cyan]") + print(case_df.to_markdown(index=False)) + + print("[magenta]Summary (from report.md)[/magenta]") + print(g.to_markdown(index=False)) + + +if __name__ == "__main__": + app() diff --git a/benchmark/src/bench/eval/__init__.py b/benchmark/src/bench/eval/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmark/src/bench/eval/exact_match.py b/benchmark/src/bench/eval/exact_match.py new file mode 100644 index 0000000..da7dbdc --- /dev/null +++ b/benchmark/src/bench/eval/exact_match.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +import json +from typing import Any + + +def canonical(obj: Any) -> str: + return json.dumps(obj, ensure_ascii=False, sort_keys=True, separators=(",", ":")) + + +def exact_match(a: Any, b: Any) -> bool: + return canonical(a) == canonical(b) diff --git a/benchmark/src/bench/eval/markdown_render.py b/benchmark/src/bench/eval/markdown_render.py new file mode 100644 index 0000000..2883d94 --- /dev/null +++ b/benchmark/src/bench/eval/markdown_render.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import json +from typing import Any + + +def render_markdown(value: Any, *, title: str | None = None) -> str: + """Render a canonical Markdown representation for JSON-like data. + + Args: + value: JSON-like payload to render. + title: Optional top-level title. + + Returns: + Markdown string representation. + """ + lines: list[str] = [] + if title: + lines.append(f"# {title}") + lines.append("") + _render_value(lines, value, level=2) + return "\n".join(lines).strip() + "\n" + + +def _render_value(lines: list[str], value: Any, *, level: int) -> None: + """Render a value into Markdown lines. + + Args: + lines: List to append output lines to. + value: JSON-like value to render. + level: Heading level to use for dict sections. + """ + if isinstance(value, dict): + _render_dict(lines, value, level=level) + return + if isinstance(value, list): + _render_list(lines, value, level=level) + return + lines.append(str(value)) + + +def _render_dict(lines: list[str], value: dict[str, Any], *, level: int) -> None: + """Render a dict as Markdown sections. + + Args: + lines: List to append output lines to. + value: Dict to render. + level: Heading level for keys. + """ + for key, item in value.items(): + heading = "#" * max(level, 1) + lines.append(f"{heading} {key}") + if isinstance(item, (dict, list)): + _render_value(lines, item, level=level + 1) + else: + lines.append(str(item)) + lines.append("") + + +def _render_list(lines: list[str], value: list[Any], *, level: int) -> None: + """Render a list in Markdown. + + Args: + lines: List to append output lines to. + value: List to render. + level: Heading level for nested dicts if needed. + """ + if not value: + lines.append("- (empty)") + return + if all(isinstance(item, dict) for item in value): + _render_table(lines, value) + lines.append("") + return + for item in value: + if isinstance(item, (dict, list)): + text = _json_string(item) + else: + text = str(item) + lines.append(f"- {text}") + + +def _render_table(lines: list[str], rows: list[Any]) -> None: + """Render a list of dicts as a Markdown table. + + Args: + lines: List to append output lines to. + rows: List of row dicts. + """ + keys: list[str] = [] + for row in rows: + if not isinstance(row, dict): + continue + for key in row.keys(): + if key not in keys: + keys.append(key) + if not keys: + lines.append("- (empty)") + return + header = "| " + " | ".join(keys) + " |" + sep = "| " + " | ".join(["---"] * len(keys)) + " |" + lines.append(header) + lines.append(sep) + for row in rows: + if not isinstance(row, dict): + cells = [_escape_cell(_json_string(row))] + [""] * (len(keys) - 1) + else: + cells = [_escape_cell(_cell_value(row.get(k))) for k in keys] + lines.append("| " + " | ".join(cells) + " |") + + +def _cell_value(value: Any) -> str: + """Convert a table cell value to string.""" + if isinstance(value, (dict, list)): + return _json_string(value) + if value is None: + return "" + return str(value) + + +def _json_string(value: Any) -> str: + """Serialize a value as compact JSON for inline use.""" + return json.dumps(value, ensure_ascii=False, sort_keys=True) + + +def _escape_cell(text: str) -> str: + """Escape pipe characters for Markdown tables.""" + return text.replace("|", "\\|") diff --git a/benchmark/src/bench/eval/markdown_score.py b/benchmark/src/bench/eval/markdown_score.py new file mode 100644 index 0000000..56399d0 --- /dev/null +++ b/benchmark/src/bench/eval/markdown_score.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +import re +import unicodedata + +_TABLE_SEPARATOR = re.compile(r"^[\s|:-]+$") +_WS_PATTERN = re.compile(r"\s+") +_NUMERIC_PATTERN = re.compile(r"[+-]?\d+(?:[.,]\d+)?") +_DOT_SEPARATORS = re.compile(r"[\u30fb\uff65\u00b7\u2022\u2219]") +_ZERO_WIDTH_PATTERN = re.compile(r"[\u200b\u200c\u200d\ufeff]") +_WEEKDAY_PAREN = re.compile( + r"(?:\uFF08|\()" + r"(?:\u6708|\u706B|\u6C34|\u6728|\u91D1|\u571F|\u65E5)" + r"(?:\uFF09|\))" +) +_PAREN = re.compile(r"[\uFF08\uFF09()]") +_NON_ASCII_SPACE_PATTERN = re.compile(r"(?<=[^\x00-\x7F])\s+(?=[^\x00-\x7F])") + + +def markdown_coverage_score(truth_md: str, pred_md: str) -> float: + """Compute coverage of truth Markdown lines in prediction. + + Args: + truth_md: Canonical Markdown from truth JSON. + pred_md: Markdown output to evaluate. + + Returns: + Coverage score in [0, 1]. + """ + truth_lines = _normalized_lines(truth_md) + pred_lines = _normalized_lines(pred_md) + if not truth_lines: + return 0.0 + matched = 0 + for t in truth_lines: + if any(_match_line(t, p) for p in pred_lines): + matched += 1 + return matched / len(truth_lines) + + +def markdown_precision_score(truth_md: str, pred_md: str) -> float: + """Compute precision of prediction Markdown lines against truth. + + Args: + truth_md: Canonical Markdown from truth JSON. + pred_md: Markdown output to evaluate. + + Returns: + Precision score in [0, 1]. + """ + truth_lines = _normalized_lines(truth_md) + pred_lines = _normalized_lines(pred_md) + if not pred_lines: + return 0.0 + matched = 0 + for p in pred_lines: + if any(_match_line(t, p) for t in truth_lines): + matched += 1 + return matched / len(pred_lines) + + +def _normalized_lines(markdown: str) -> list[str]: + """Normalize Markdown into comparable text lines.""" + lines: list[str] = [] + in_code_block = False + for raw in markdown.splitlines(): + stripped = raw.strip() + if stripped.startswith("```"): + in_code_block = not in_code_block + continue + if in_code_block: + continue + norm = _normalize_line(raw) + if not norm: + continue + if _TABLE_SEPARATOR.fullmatch(norm): + continue + lines.append(norm) + return lines + + +def _normalize_line(line: str) -> str: + """Normalize a single Markdown line for matching.""" + text = line.strip() + if not text: + return "" + text = re.sub(r"^\s*#{1,6}\s*", "", text) + text = re.sub(r"^\s*[-*+]\s+", "", text) + text = text.replace("|", " ") + text = text.replace("`", "") + text = text.replace("*", "") + text = text.replace(">", "") + text = unicodedata.normalize("NFKC", text) + text = text.replace("\u3000", " ") + text = _ZERO_WIDTH_PATTERN.sub("", text) + text = _WEEKDAY_PAREN.sub("", text) + text = _PAREN.sub("", text) + text = _DOT_SEPARATORS.sub("", text) + text = _WS_PATTERN.sub(" ", text) + text = _NON_ASCII_SPACE_PATTERN.sub("", text) + return text.strip() + + +def _match_line(truth_line: str, pred_line: str) -> bool: + """Return True if lines match under loose Markdown rules.""" + if not truth_line or not pred_line: + return False + if _is_numeric_line(truth_line) or len(truth_line) == 1: + return truth_line == pred_line + return truth_line in pred_line or pred_line in truth_line + + +def _is_numeric_line(text: str) -> bool: + """Return True if the text is numeric-only.""" + return _NUMERIC_PATTERN.fullmatch(text) is not None diff --git a/benchmark/src/bench/eval/normalization_rules.py b/benchmark/src/bench/eval/normalization_rules.py new file mode 100644 index 0000000..2958597 --- /dev/null +++ b/benchmark/src/bench/eval/normalization_rules.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +import json +import re +import unicodedata +from pathlib import Path + +from pydantic import BaseModel, Field + + +class AliasRule(BaseModel): + """Canonical label with its acceptable aliases.""" + + canonical: str + aliases: list[str] = Field(default_factory=list) + + +class SplitRule(BaseModel): + """Split a combined label into multiple canonical labels.""" + + trigger: str + parts: list[str] + + +class CompositeRule(BaseModel): + """Match a canonical label when all parts appear in prediction.""" + + canonical: str + parts: list[str] + + +class ListObjectRule(BaseModel): + """Normalization rules for lists of objects.""" + + list_key: str + string_fields: list[str] = Field(default_factory=list) + string_fields_contains: list[str] = Field(default_factory=list) + list_fields: list[str] = Field(default_factory=list) + list_fields_contains: list[str] = Field(default_factory=list) + strip_prefix: dict[str, str] = Field(default_factory=dict) + + +class NormalizationRules(BaseModel): + """Normalization rules for a single case.""" + + alias_rules: list[AliasRule] = Field(default_factory=list) + split_rules: list[SplitRule] = Field(default_factory=list) + composite_rules: list[CompositeRule] = Field(default_factory=list) + list_object_rules: list[ListObjectRule] = Field(default_factory=list) + + def list_object_rule_map(self) -> dict[str, ListObjectRule]: + """Return list-object rules keyed by list key.""" + return {rule.list_key: rule for rule in self.list_object_rules} + + +class NormalizationRuleset(BaseModel): + """Normalization rules keyed by case id.""" + + cases: dict[str, NormalizationRules] = Field(default_factory=dict) + + def for_case(self, case_id: str) -> NormalizationRules: + """Return rules for the given case id (or empty rules if missing).""" + return self.cases.get(case_id, NormalizationRules()) + + +class RuleIndex(BaseModel): + """Prebuilt normalized lookup tables for scoring.""" + + alias_map: dict[str, str] = Field(default_factory=dict) + split_map: dict[str, list[str]] = Field(default_factory=dict) + composite_map: dict[str, list[list[str]]] = Field(default_factory=dict) + + +def _strip_circled_numbers(text: str) -> str: + """Remove circled-number characters for robust matching.""" + return "".join(ch for ch in text if unicodedata.category(ch) != "No") + + +def normalize_label(text: str) -> str: + """Normalize labels for comparison.""" + text = _strip_circled_numbers(text) + text = unicodedata.normalize("NFKC", text) + text = re.sub(r"\s+", " ", text).strip() + return text + + +def build_rule_index(rules: NormalizationRules) -> RuleIndex: + """Build normalized lookup tables from rules.""" + alias_map: dict[str, str] = {} + for rule in rules.alias_rules: + canonical = normalize_label(rule.canonical) + alias_map[canonical] = canonical + for alias in rule.aliases: + alias_map[normalize_label(alias)] = canonical + + split_map: dict[str, list[str]] = { + normalize_label(rule.trigger): [normalize_label(p) for p in rule.parts] + for rule in rules.split_rules + } + + composite_map: dict[str, list[list[str]]] = {} + for rule in rules.composite_rules: + canonical = normalize_label(rule.canonical) + parts = [normalize_label(p) for p in rule.parts] + composite_map.setdefault(canonical, []).append(parts) + + return RuleIndex( + alias_map=alias_map, + split_map=split_map, + composite_map=composite_map, + ) + + +def load_ruleset(path: Path) -> NormalizationRuleset: + """Load normalization ruleset from JSON file.""" + if not path.exists(): + return NormalizationRuleset() + payload = json.loads(path.read_text(encoding="utf-8")) + return NormalizationRuleset(**payload) diff --git a/benchmark/src/bench/eval/normalize.py b/benchmark/src/bench/eval/normalize.py new file mode 100644 index 0000000..92f8d5f --- /dev/null +++ b/benchmark/src/bench/eval/normalize.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import json +import re +from typing import Any + + +def _strip_code_fences(s: str) -> str: + s = s.strip() + s = re.sub(r"^```(json)?\s*", "", s) + s = re.sub(r"\s*```$", "", s) + return s.strip() + + +def normalize_json_text(s: str) -> Any: + """ + LLM出力を JSON として読み、正規化されたPythonオブジェクトを返す + """ + s = _strip_code_fences(s) + # 余計な前後テキストが入った場合の救済(最初の{...}を拾う) + if "{" in s and "}" in s: + start = s.find("{") + end = s.rfind("}") + s = s[start : end + 1] + obj = json.loads(s) + return obj diff --git a/benchmark/src/bench/eval/raw_match.py b/benchmark/src/bench/eval/raw_match.py new file mode 100644 index 0000000..b8d612b --- /dev/null +++ b/benchmark/src/bench/eval/raw_match.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +import re +import unicodedata +from typing import Any + +_WS_PATTERN = re.compile(r"\s+") +_NUMERIC_PATTERN = re.compile(r"[+-]?\d+(?:[.,]\d+)?") + + +def _normalize_raw_text(text: str) -> str: + """Normalize text for raw coverage/precision matching. + + Args: + text: Input string. + + Returns: + Normalized string with whitespace removed and width normalized. + """ + normalized = unicodedata.normalize("NFKC", text) + normalized = normalized.replace("窶サ", "") + normalized = _WS_PATTERN.sub("", normalized) + return normalized.strip() + + +def _is_numeric_token(text: str) -> bool: + """Return True if the text looks like a numeric token. + + Args: + text: Token to check. + + Returns: + True if the token matches a numeric pattern. + """ + return _NUMERIC_PATTERN.fullmatch(text) is not None + + +def _flatten_scalars( + value: Any, *, depth: int = 0, parent_is_list: bool = False +) -> list[str]: + """Flatten nested payloads into a list of scalar strings. + + Keys are included for nested dicts that are not record-like (dicts inside lists) + to capture table headers or row labels without pulling schema field names. + + Args: + value: Arbitrary JSON-like value. + depth: Current nesting depth. + parent_is_list: Whether the parent container is a list. + + Returns: + List of stringified scalar values (and selected keys). + """ + if value is None: + return [] + if isinstance(value, dict): + items: list[str] = [] + if depth > 0 and not parent_is_list: + items.extend([str(k) for k in value.keys()]) + for v in value.values(): + items.extend(_flatten_scalars(v, depth=depth + 1, parent_is_list=False)) + return items + if isinstance(value, list): + items: list[str] = [] + for v in value: + items.extend(_flatten_scalars(v, depth=depth + 1, parent_is_list=True)) + return items + return [str(value)] + + +def _dedupe_normalized(values: list[str]) -> list[str]: + """Normalize and de-duplicate text values, dropping empty tokens. + + Args: + values: List of raw string values. + + Returns: + De-duplicated list of normalized tokens. + """ + seen: set[str] = set() + normalized: list[str] = [] + for value in values: + token = _normalize_raw_text(value) + if not token: + continue + if token not in seen: + seen.add(token) + normalized.append(token) + return normalized + + +def _raw_match_token(truth_token: str, pred_token: str) -> bool: + """Return True if tokens match under loose raw-data matching rules. + + Args: + truth_token: Normalized truth token. + pred_token: Normalized prediction token. + + Returns: + True if tokens are considered a match. + """ + if not truth_token or not pred_token: + return False + if _is_numeric_token(truth_token) or len(truth_token) == 1: + return truth_token == pred_token + return truth_token in pred_token or pred_token in truth_token + + +def raw_coverage_score(truth: Any, pred: Any) -> float: + """Compute loose coverage of truth tokens in predictions. + + Args: + truth: Ground-truth JSON payload. + pred: Predicted JSON payload. + + Returns: + Coverage in [0, 1]. + """ + truth_tokens = _dedupe_normalized(_flatten_scalars(truth)) + pred_tokens = _dedupe_normalized(_flatten_scalars(pred)) + if not truth_tokens: + return 0.0 + matched = 0 + for t in truth_tokens: + if any(_raw_match_token(t, p) for p in pred_tokens): + matched += 1 + return matched / len(truth_tokens) + + +def raw_precision_score(truth: Any, pred: Any) -> float: + """Compute loose precision of prediction tokens against truth. + + Args: + truth: Ground-truth JSON payload. + pred: Predicted JSON payload. + + Returns: + Precision in [0, 1]. + """ + truth_tokens = _dedupe_normalized(_flatten_scalars(truth)) + pred_tokens = _dedupe_normalized(_flatten_scalars(pred)) + if not pred_tokens: + return 0.0 + matched = 0 + for p in pred_tokens: + if any(_raw_match_token(t, p) for t in truth_tokens): + matched += 1 + return matched / len(pred_tokens) diff --git a/benchmark/src/bench/eval/report.py b/benchmark/src/bench/eval/report.py new file mode 100644 index 0000000..3e26836 --- /dev/null +++ b/benchmark/src/bench/eval/report.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +import csv +from pathlib import Path +from typing import Any + + +def write_results_csv(rows: list[dict[str, Any]], out_csv: Path) -> None: + out_csv.parent.mkdir(parents=True, exist_ok=True) + keys = list(rows[0].keys()) if rows else [] + with out_csv.open("w", encoding="utf-8", newline="") as f: + w = csv.DictWriter(f, fieldnames=keys) + w.writeheader() + w.writerows(rows) diff --git a/benchmark/src/bench/eval/score.py b/benchmark/src/bench/eval/score.py new file mode 100644 index 0000000..277bda6 --- /dev/null +++ b/benchmark/src/bench/eval/score.py @@ -0,0 +1,484 @@ +from __future__ import annotations + +import re +import unicodedata +from typing import Any + +from .exact_match import canonical, exact_match +from .normalization_rules import ( + ListObjectRule, + NormalizationRules, + RuleIndex, + build_rule_index, + normalize_label, +) + + +def _list_score(truth_list: list[Any], pred_list: Any) -> float: + """Compute a partial match score for lists. + + Args: + truth_list: Ground-truth list. + pred_list: Predicted list. + + Returns: + Fraction of truth elements present in prediction (order-insensitive). + """ + if not isinstance(pred_list, list): + return 0.0 + if not truth_list: + return 0.0 + # Use exact match on elements; ignore order and duplicates. + truth_set = {_normalize_element(v) for v in truth_list} + pred_set = {_normalize_element(v) for v in pred_list} + if not truth_set: + return 0.0 + return len(truth_set & pred_set) / len(truth_set) + + +def _normalize_scalar(value: Any) -> str: + """Normalize scalar values for set comparison.""" + if value is None: + return "null" + text = str(value) + text = _strip_circled_numbers(text) + text = unicodedata.normalize("NFKC", text) + text = text.replace("※", "") + text = re.sub(r"\s+", " ", text).strip() + return text + + +def _strip_circled_numbers(text: str) -> str: + """Remove circled-number characters (e.g., ①②) for robust matching.""" + return "".join(ch for ch in text if unicodedata.category(ch) != "No") + + +def _normalize_element(value: Any) -> str: + """Normalize list elements for comparison.""" + if isinstance(value, (dict, list)): + return canonical(value) + return _normalize_scalar(value) + + +def _normalize_scalar_with_rules(value: Any, index: RuleIndex | None) -> str: + """Normalize scalar values with optional normalization rules.""" + text = normalize_label(str(value)) + if index is None: + return text + return index.alias_map.get(text, text) + + +def _expand_pred_item(value: Any, index: RuleIndex) -> list[str]: + """Expand a predicted list item using split rules and aliases.""" + text = _normalize_scalar_with_rules(value, index) + if text in index.split_map: + return index.split_map[text] + return [text] + + +_SPLIT_PATTERN = re.compile(r"[、,,・//]+") + + +def _coerce_list(value: Any) -> list[str]: + if isinstance(value, list): + return [str(v) for v in value if v is not None] + if isinstance(value, str): + return [value] + return [] + + +def _normalize_text_field( + value: Any, index: RuleIndex, *, prefix_pattern: str | None +) -> str: + text = str(value) + if prefix_pattern: + text = re.sub(prefix_pattern, "", text).strip() + return _normalize_scalar_with_rules(text, index) + + +def _normalize_list_field( + value: Any, index: RuleIndex, rule: ListObjectRule, field_name: str +) -> list[str]: + items = _coerce_list(value) + if len(items) == 1 and field_name in rule.list_fields_contains: + items = [t for t in _SPLIT_PATTERN.split(items[0]) if t.strip()] + return [_normalize_scalar_with_rules(v, index) for v in items if str(v).strip()] + + +def _object_matches( + truth_obj: dict[str, Any], + pred_obj: dict[str, Any], + rule: ListObjectRule, + index: RuleIndex, +) -> bool: + for field in rule.string_fields: + if field not in truth_obj or field not in pred_obj: + return False + t_val = _normalize_text_field( + truth_obj[field], index, prefix_pattern=rule.strip_prefix.get(field) + ) + p_val = _normalize_text_field( + pred_obj[field], index, prefix_pattern=rule.strip_prefix.get(field) + ) + if t_val != p_val: + return False + + for field in rule.string_fields_contains: + if field not in truth_obj or field not in pred_obj: + return False + t_val = _normalize_text_field( + truth_obj[field], index, prefix_pattern=rule.strip_prefix.get(field) + ) + p_val = _normalize_text_field( + pred_obj[field], index, prefix_pattern=rule.strip_prefix.get(field) + ) + if t_val not in p_val and p_val not in t_val: + return False + + for field in rule.list_fields: + if field not in truth_obj or field not in pred_obj: + return False + t_list = _normalize_list_field(truth_obj[field], index, rule, field) + p_list = _normalize_list_field(pred_obj[field], index, rule, field) + if set(t_list) != set(p_list): + return False + + for field in rule.list_fields_contains: + if field not in truth_obj or field not in pred_obj: + return False + t_list = _normalize_list_field(truth_obj[field], index, rule, field) + p_list = _normalize_list_field(pred_obj[field], index, rule, field) + if t_list and not p_list: + return False + combined = " ".join(p_list) + for t_val in t_list: + if t_val not in combined: + return False + + return True + + +def _lcs_length_objects( + a: list[dict[str, Any]], + b: list[dict[str, Any]], + *, + rule: ListObjectRule, + index: RuleIndex, +) -> int: + if not a or not b: + return 0 + dp = [0] * (len(b) + 1) + for i in range(1, len(a) + 1): + prev = 0 + for j in range(1, len(b) + 1): + temp = dp[j] + if _object_matches(a[i - 1], b[j - 1], rule, index): + dp[j] = prev + 1 + else: + dp[j] = max(dp[j], dp[j - 1]) + prev = temp + return dp[-1] + + +def _list_score_objects_normalized( + truth_list: list[Any], + pred_list: Any, + *, + rule: ListObjectRule, + index: RuleIndex, + ordered: bool, +) -> float: + if not isinstance(pred_list, list): + return 0.0 + if not truth_list: + return 0.0 + truth_objs = [t for t in truth_list if isinstance(t, dict)] + pred_objs = [p for p in pred_list if isinstance(p, dict)] + if not truth_objs: + return 0.0 + if ordered: + lcs_len = _lcs_length_objects(truth_objs, pred_objs, rule=rule, index=index) + return lcs_len / len(truth_objs) + matched = 0 + used: set[int] = set() + for t in truth_objs: + for i, p in enumerate(pred_objs): + if i in used: + continue + if _object_matches(t, p, rule, index): + matched += 1 + used.add(i) + break + return matched / len(truth_objs) + + +def _list_score_ordered(truth_list: list[Any], pred_list: Any) -> float: + """Compute an order-aware partial match score for lists. + + Args: + truth_list: Ground-truth list. + pred_list: Predicted list. + + Returns: + LCS-based fraction of truth elements matched in order. + """ + if not isinstance(pred_list, list): + return 0.0 + if not truth_list: + return 0.0 + truth_norm = [_normalize_element(v) for v in truth_list] + pred_norm = [_normalize_element(v) for v in pred_list] + lcs_len = _lcs_length(truth_norm, pred_norm) + return lcs_len / len(truth_norm) + + +def _list_score_normalized( + truth_list: list[Any], pred_list: Any, index: RuleIndex +) -> float: + """Compute a partial match score for lists with normalization rules.""" + if not isinstance(pred_list, list): + return 0.0 + if not truth_list: + return 0.0 + truth_norm = [_normalize_scalar_with_rules(v, index) for v in truth_list] + pred_expanded: list[str] = [] + for v in pred_list: + pred_expanded.extend(_expand_pred_item(v, index)) + pred_set = set(pred_expanded) + matched = 0 + for t in truth_norm: + if t in pred_set: + matched += 1 + continue + if t in index.composite_map: + for parts in index.composite_map[t]: + if all(p in pred_set for p in parts): + matched += 1 + break + return matched / len(truth_norm) + + +def _list_score_ordered_normalized( + truth_list: list[Any], pred_list: Any, index: RuleIndex +) -> float: + """Compute order-aware list score with normalization rules.""" + if not isinstance(pred_list, list): + return 0.0 + if not truth_list: + return 0.0 + truth_norm = [_normalize_scalar_with_rules(v, index) for v in truth_list] + pred_expanded: list[str] = [] + for v in pred_list: + pred_expanded.extend(_expand_pred_item(v, index)) + lcs_len = _lcs_length(truth_norm, pred_expanded) + return lcs_len / len(truth_norm) + + +def _lcs_length(a: list[str], b: list[str]) -> int: + """Compute the length of the longest common subsequence.""" + if not a or not b: + return 0 + dp = [0] * (len(b) + 1) + for i in range(1, len(a) + 1): + prev = 0 + for j in range(1, len(b) + 1): + temp = dp[j] + if a[i - 1] == b[j - 1]: + dp[j] = prev + 1 + else: + dp[j] = max(dp[j], dp[j - 1]) + prev = temp + return dp[-1] + + +def _dict_score(truth_dict: dict[str, Any], pred_dict: dict[str, Any]) -> float: + """Compute a key-level score for nested dicts (order-insensitive lists).""" + total = len(truth_dict) + if total == 0: + return 0.0 + score_sum = 0.0 + for key, truth_val in truth_dict.items(): + if key not in pred_dict: + continue + pred_val = pred_dict[key] + score_sum += _value_score(truth_val, pred_val, ordered=False) + return score_sum / total + + +def _dict_score_ordered(truth_dict: dict[str, Any], pred_dict: dict[str, Any]) -> float: + """Compute a key-level score for nested dicts (order-aware lists).""" + total = len(truth_dict) + if total == 0: + return 0.0 + score_sum = 0.0 + for key, truth_val in truth_dict.items(): + if key not in pred_dict: + continue + pred_val = pred_dict[key] + score_sum += _value_score(truth_val, pred_val, ordered=True) + return score_sum / total + + +def _dict_score_normalized( + truth_dict: dict[str, Any], + pred_dict: dict[str, Any], + index: RuleIndex, + list_object_rules: dict[str, ListObjectRule], +) -> float: + """Compute a key-level score for nested dicts with normalization rules.""" + total = len(truth_dict) + if total == 0: + return 0.0 + score_sum = 0.0 + for key, truth_val in truth_dict.items(): + if key not in pred_dict: + continue + pred_val = pred_dict[key] + rule = list_object_rules.get(key) + if rule and isinstance(truth_val, list) and isinstance(pred_val, list): + score_sum += _list_score_objects_normalized( + truth_val, pred_val, rule=rule, index=index, ordered=False + ) + continue + score_sum += _value_score_normalized( + truth_val, + pred_val, + index, + ordered=False, + list_object_rules=list_object_rules, + ) + return score_sum / total + + +def _dict_score_ordered_normalized( + truth_dict: dict[str, Any], + pred_dict: dict[str, Any], + index: RuleIndex, + list_object_rules: dict[str, ListObjectRule], +) -> float: + """Compute a key-level score with normalized, order-aware list scoring.""" + total = len(truth_dict) + if total == 0: + return 0.0 + score_sum = 0.0 + for key, truth_val in truth_dict.items(): + if key not in pred_dict: + continue + pred_val = pred_dict[key] + rule = list_object_rules.get(key) + if rule and isinstance(truth_val, list) and isinstance(pred_val, list): + score_sum += _list_score_objects_normalized( + truth_val, pred_val, rule=rule, index=index, ordered=True + ) + continue + score_sum += _value_score_normalized( + truth_val, + pred_val, + index, + ordered=True, + list_object_rules=list_object_rules, + ) + return score_sum / total + + +def _value_score(truth: Any, pred: Any, *, ordered: bool) -> float: + """Score a value with optional list ordering.""" + if isinstance(truth, dict): + if not isinstance(pred, dict): + return 0.0 + return _dict_score_ordered(truth, pred) if ordered else _dict_score(truth, pred) + if isinstance(truth, list): + return _list_score_ordered(truth, pred) if ordered else _list_score(truth, pred) + return 1.0 if exact_match(truth, pred) else 0.0 + + +def _value_score_normalized( + truth: Any, + pred: Any, + index: RuleIndex, + *, + ordered: bool, + list_object_rules: dict[str, ListObjectRule], +) -> float: + """Score a value using normalization rules.""" + if isinstance(truth, dict): + if not isinstance(pred, dict): + return 0.0 + return ( + _dict_score_ordered_normalized(truth, pred, index, list_object_rules) + if ordered + else _dict_score_normalized(truth, pred, index, list_object_rules) + ) + if isinstance(truth, list): + return ( + _list_score_ordered_normalized(truth, pred, index) + if ordered + else _list_score_normalized(truth, pred, index) + ) + truth_norm = _normalize_scalar_with_rules(truth, index) + pred_norm = _normalize_scalar_with_rules(pred, index) + return 1.0 if truth_norm == pred_norm else 0.0 + + +def key_score(truth: Any, pred: Any) -> float: + """Compute a key-level score against the truth payload. + + Args: + truth: Ground-truth JSON payload. + pred: Predicted JSON payload. + + Returns: + Score in [0, 1]. For dict payloads, this is the fraction of truth keys + that exactly match in the prediction. For non-dict payloads, this is + 1.0 if exactly equal, else 0.0. + """ + if isinstance(truth, dict): + if not isinstance(pred, dict): + return 0.0 + return _dict_score(truth, pred) + if isinstance(truth, list): + return _list_score(truth, pred) + return 1.0 if exact_match(truth, pred) else 0.0 + + +def key_score_ordered(truth: Any, pred: Any) -> float: + """Compute a key-level score that respects list order.""" + if isinstance(truth, dict): + if not isinstance(pred, dict): + return 0.0 + return _dict_score_ordered(truth, pred) + if isinstance(truth, list): + return _list_score_ordered(truth, pred) + return 1.0 if exact_match(truth, pred) else 0.0 + + +def key_score_normalized(truth: Any, pred: Any, rules: NormalizationRules) -> float: + """Compute a normalized score using optional rules.""" + index = build_rule_index(rules) + list_object_rules = rules.list_object_rule_map() + if isinstance(truth, dict): + if not isinstance(pred, dict): + return 0.0 + return _dict_score_normalized(truth, pred, index, list_object_rules) + if isinstance(truth, list): + return _list_score_normalized(truth, pred, index) + truth_norm = _normalize_scalar_with_rules(truth, index) + pred_norm = _normalize_scalar_with_rules(pred, index) + return 1.0 if truth_norm == pred_norm else 0.0 + + +def key_score_ordered_normalized( + truth: Any, pred: Any, rules: NormalizationRules +) -> float: + """Compute an order-aware normalized score using optional rules.""" + index = build_rule_index(rules) + list_object_rules = rules.list_object_rule_map() + if isinstance(truth, dict): + if not isinstance(pred, dict): + return 0.0 + return _dict_score_ordered_normalized(truth, pred, index, list_object_rules) + if isinstance(truth, list): + return _list_score_ordered_normalized(truth, pred, index) + truth_norm = _normalize_scalar_with_rules(truth, index) + pred_norm = _normalize_scalar_with_rules(pred, index) + return 1.0 if truth_norm == pred_norm else 0.0 diff --git a/benchmark/src/bench/llm/__init__.py b/benchmark/src/bench/llm/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmark/src/bench/llm/openai_client.py b/benchmark/src/bench/llm/openai_client.py new file mode 100644 index 0000000..fd374ef --- /dev/null +++ b/benchmark/src/bench/llm/openai_client.py @@ -0,0 +1,303 @@ +from __future__ import annotations + +import base64 +import json +import os +from pathlib import Path +from typing import Any + +from dotenv import load_dotenv +from openai import OpenAI +from pydantic import BaseModel + +from ..paths import ROOT +from .pricing import estimate_cost_usd + + +class LLMResult(BaseModel): + """Structured response data from the LLM call.""" + + text: str + input_tokens: int + output_tokens: int + cost_usd: float + raw: dict[str, Any] + + +def _png_to_data_url(png_path: Path) -> str: + """Encode a PNG image as a data URL. + + Args: + png_path: PNG file path. + + Returns: + Base64 data URL string. + """ + b = png_path.read_bytes() + b64 = base64.b64encode(b).decode("ascii") + return f"data:image/png;base64,{b64}" + + +def _extract_usage_tokens(usage: object | None) -> tuple[int, int]: + """Extract input/output tokens from the OpenAI usage payload. + + Args: + usage: Usage payload from the OpenAI SDK (object or dict). + + Returns: + Tuple of (input_tokens, output_tokens). + """ + if usage is None: + return 0, 0 + if isinstance(usage, dict): + return int(usage.get("input_tokens", 0)), int(usage.get("output_tokens", 0)) + input_tokens = int(getattr(usage, "input_tokens", 0)) + output_tokens = int(getattr(usage, "output_tokens", 0)) + return input_tokens, output_tokens + + +class OpenAIResponsesClient: + """Thin wrapper around the OpenAI Responses API for this benchmark.""" + + def __init__(self) -> None: + load_dotenv(dotenv_path=ROOT / ".env") + if not os.getenv("OPENAI_API_KEY"): + raise RuntimeError( + "OPENAI_API_KEY is not set. Add it to .env or your environment." + ) + self.client = OpenAI() + + def ask_text( + self, *, model: str, question: str, context_text: str, temperature: float + ) -> LLMResult: + """Call Responses API with text-only input. + + Args: + model: OpenAI model name (e.g., "gpt-4o"). + question: User question to answer. + context_text: Extracted context text from the workbook. + temperature: Sampling temperature for the response. + + Returns: + LLMResult containing the model output and usage metadata. + """ + resp = self.client.responses.create( + model=model, + temperature=temperature, + input=[ + { + "role": "user", + "content": [ + { + "type": "input_text", + "text": "You are a strict JSON extraction engine. Output JSON only.", + }, + {"type": "input_text", "text": f"[QUESTION]\n{question}"}, + {"type": "input_text", "text": f"[CONTEXT]\n{context_text}"}, + ], + } + ], + ) + + text = resp.output_text # SDK helper + usage = getattr(resp, "usage", None) + in_tok, out_tok = _extract_usage_tokens(usage) + cost = estimate_cost_usd(model, in_tok, out_tok) + + raw = json.loads(resp.model_dump_json()) + return LLMResult( + text=text, + input_tokens=in_tok, + output_tokens=out_tok, + cost_usd=cost, + raw=raw, + ) + + def ask_images( + self, *, model: str, question: str, image_paths: list[Path], temperature: float + ) -> LLMResult: + """Call Responses API with image + text input. + + Args: + model: OpenAI model name (e.g., "gpt-4o"). + question: User question to answer. + image_paths: PNG image paths to include as vision input. + temperature: Sampling temperature for the response. + + Returns: + LLMResult containing the model output and usage metadata. + """ + content: list[dict[str, Any]] = [ + { + "type": "input_text", + "text": "You are a strict JSON extraction engine. Output JSON only.", + }, + {"type": "input_text", "text": f"[QUESTION]\n{question}"}, + ] + for p in image_paths: + content.append({"type": "input_image", "image_url": _png_to_data_url(p)}) + + resp = self.client.responses.create( + model=model, + temperature=temperature, + input=[{"role": "user", "content": content}], + ) + + text = resp.output_text + usage = getattr(resp, "usage", None) + in_tok, out_tok = _extract_usage_tokens(usage) + cost = estimate_cost_usd(model, in_tok, out_tok) + + raw = json.loads(resp.model_dump_json()) + return LLMResult( + text=text, + input_tokens=in_tok, + output_tokens=out_tok, + cost_usd=cost, + raw=raw, + ) + + def ask_markdown( + self, *, model: str, json_text: str, temperature: float + ) -> LLMResult: + """Call Responses API to convert JSON into Markdown. + + Args: + model: OpenAI model name (e.g., "gpt-4o"). + json_text: JSON payload to convert to Markdown. + temperature: Sampling temperature for the response. + + Returns: + LLMResult containing the model output and usage metadata. + """ + instructions = ( + "You are a strict Markdown formatter. Output Markdown only.\n" + "Rules:\n" + "- Use '## ' for top-level keys.\n" + "- For lists of scalars, use bullet lists.\n" + "- For lists of objects, use Markdown tables with columns in key order.\n" + "- For nested objects or lists inside table cells, use compact JSON.\n" + ) + resp = self.client.responses.create( + model=model, + temperature=temperature, + input=[ + { + "role": "user", + "content": [ + {"type": "input_text", "text": instructions}, + {"type": "input_text", "text": f"[JSON]\n{json_text}"}, + ], + } + ], + ) + + text = resp.output_text + usage = getattr(resp, "usage", None) + in_tok, out_tok = _extract_usage_tokens(usage) + cost = estimate_cost_usd(model, in_tok, out_tok) + + raw = json.loads(resp.model_dump_json()) + return LLMResult( + text=text, + input_tokens=in_tok, + output_tokens=out_tok, + cost_usd=cost, + raw=raw, + ) + + def ask_markdown_from_text( + self, *, model: str, context_text: str, temperature: float + ) -> LLMResult: + """Call Responses API to convert raw text into Markdown. + + Args: + model: OpenAI model name (e.g., "gpt-4o"). + context_text: Extracted document text to format. + temperature: Sampling temperature for the response. + + Returns: + LLMResult containing the model output and usage metadata. + """ + instructions = ( + "You are a strict Markdown formatter. Output Markdown only.\n" + "Rules:\n" + "- Preserve all content from the input.\n" + "- Use headings and lists when they are clearly implied.\n" + "- Use tables when a row/column structure is evident.\n" + "- Do not add or invent information.\n" + ) + resp = self.client.responses.create( + model=model, + temperature=temperature, + input=[ + { + "role": "user", + "content": [ + {"type": "input_text", "text": instructions}, + {"type": "input_text", "text": f"[TEXT]\n{context_text}"}, + ], + } + ], + ) + + text = resp.output_text + usage = getattr(resp, "usage", None) + in_tok, out_tok = _extract_usage_tokens(usage) + cost = estimate_cost_usd(model, in_tok, out_tok) + + raw = json.loads(resp.model_dump_json()) + return LLMResult( + text=text, + input_tokens=in_tok, + output_tokens=out_tok, + cost_usd=cost, + raw=raw, + ) + + def ask_markdown_images( + self, *, model: str, image_paths: list[Path], temperature: float + ) -> LLMResult: + """Call Responses API to convert images into Markdown. + + Args: + model: OpenAI model name (e.g., "gpt-4o"). + image_paths: PNG image paths to include as vision input. + temperature: Sampling temperature for the response. + + Returns: + LLMResult containing the model output and usage metadata. + """ + instructions = ( + "You are a strict Markdown formatter. Output Markdown only.\n" + "Rules:\n" + "- Preserve all visible content from the images.\n" + "- Use headings and lists when they are clearly implied.\n" + "- Use tables when a row/column structure is evident.\n" + "- Do not add or invent information.\n" + ) + content: list[dict[str, Any]] = [ + {"type": "input_text", "text": instructions}, + ] + for p in image_paths: + content.append({"type": "input_image", "image_url": _png_to_data_url(p)}) + + resp = self.client.responses.create( + model=model, + temperature=temperature, + input=[{"role": "user", "content": content}], + ) + + text = resp.output_text + usage = getattr(resp, "usage", None) + in_tok, out_tok = _extract_usage_tokens(usage) + cost = estimate_cost_usd(model, in_tok, out_tok) + + raw = json.loads(resp.model_dump_json()) + return LLMResult( + text=text, + input_tokens=in_tok, + output_tokens=out_tok, + cost_usd=cost, + raw=raw, + ) diff --git a/benchmark/src/bench/llm/pricing.py b/benchmark/src/bench/llm/pricing.py new file mode 100644 index 0000000..7124021 --- /dev/null +++ b/benchmark/src/bench/llm/pricing.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +# gpt-4o pricing: per 1M tokens +# Input $2.50 / 1M, Output $10.00 / 1M (cached input not used here) +# Source: model compare page +# https://platform.openai.com/docs/models/compare?model=gpt-4o +# (You will cite this in README/report; code keeps constants.) +GPT4O_INPUT_PER_1M = 2.50 +GPT4O_OUTPUT_PER_1M = 10.00 + +_PRICING_PER_1M: dict[str, tuple[float, float]] = { + "gpt-4o": (GPT4O_INPUT_PER_1M, GPT4O_OUTPUT_PER_1M), +} + + +def estimate_cost_usd(model: str, input_tokens: int, output_tokens: int) -> float: + """Estimate USD cost for a model run when pricing is known.""" + pricing = _PRICING_PER_1M.get(model) + if pricing is None: + # Pricing unknown; keep run going and report 0.0 cost. + return 0.0 + input_per_1m, output_per_1m = pricing + return (input_tokens / 1_000_000) * input_per_1m + ( + output_tokens / 1_000_000 + ) * output_per_1m diff --git a/benchmark/src/bench/manifest.py b/benchmark/src/bench/manifest.py new file mode 100644 index 0000000..19e12e5 --- /dev/null +++ b/benchmark/src/bench/manifest.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from pydantic import BaseModel + + +class RenderConfig(BaseModel): + dpi: int = 200 + max_pages: int = 6 + + +class Case(BaseModel): + id: str + type: str + xlsx: str + question: str + truth: str + sheet_scope: list[str] | None = None + render: RenderConfig = RenderConfig() + + +class Manifest(BaseModel): + cases: list[Case] + + +def load_manifest(path: Path) -> Manifest: + data = json.loads(path.read_text(encoding="utf-8")) + return Manifest(**data) diff --git a/benchmark/src/bench/paths.py b/benchmark/src/bench/paths.py new file mode 100644 index 0000000..ee100e8 --- /dev/null +++ b/benchmark/src/bench/paths.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[2] # benchmark/ +DATA_DIR = ROOT / "data" +RAW_DIR = DATA_DIR / "raw" +TRUTH_DIR = DATA_DIR / "truth" + +OUT_DIR = ROOT / "outputs" +EXTRACTED_DIR = OUT_DIR / "extracted" +PROMPTS_DIR = OUT_DIR / "prompts" +RESPONSES_DIR = OUT_DIR / "responses" +MARKDOWN_DIR = OUT_DIR / "markdown" +MARKDOWN_RESPONSES_DIR = MARKDOWN_DIR / "responses" +MARKDOWN_FULL_DIR = OUT_DIR / "markdown_full" +MARKDOWN_FULL_RESPONSES_DIR = MARKDOWN_FULL_DIR / "responses" +RESULTS_DIR = OUT_DIR / "results" +PLOTS_DIR = OUT_DIR / "plots" +PUBLIC_REPORT = ROOT / "REPORT.md" +RUB_DIR = ROOT / "rub" +RUB_MANIFEST = RUB_DIR / "manifest.json" +RUB_TRUTH_DIR = RUB_DIR / "truth" +RUB_SCHEMA_DIR = RUB_DIR / "schemas" +RUB_OUT_DIR = OUT_DIR / "rub" +RUB_PROMPTS_DIR = RUB_OUT_DIR / "prompts" +RUB_RESPONSES_DIR = RUB_OUT_DIR / "responses" +RUB_RESULTS_DIR = RUB_OUT_DIR / "results" + + +def resolve_path(path: str | Path) -> Path: + """Resolve a path relative to the benchmark root when needed. + + Args: + path: Path string or Path instance from the manifest. + + Returns: + Resolved Path anchored to the benchmark root when relative. + """ + candidate = Path(path) + if candidate.is_absolute(): + return candidate + return ROOT / candidate diff --git a/benchmark/src/bench/pipeline/__init__.py b/benchmark/src/bench/pipeline/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmark/src/bench/pipeline/common.py b/benchmark/src/bench/pipeline/common.py new file mode 100644 index 0000000..e08b8ff --- /dev/null +++ b/benchmark/src/bench/pipeline/common.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +import hashlib +import json +from pathlib import Path +from typing import Any + + +def ensure_dir(p: Path) -> None: + p.mkdir(parents=True, exist_ok=True) + + +def sha256_text(s: str) -> str: + return hashlib.sha256(s.encode("utf-8")).hexdigest() + + +def write_text(p: Path, text: str) -> None: + ensure_dir(p.parent) + p.write_text(text, encoding="utf-8") + + +def write_json(p: Path, obj: Any) -> None: + ensure_dir(p.parent) + p.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8") diff --git a/benchmark/src/bench/pipeline/exstruct_adapter.py b/benchmark/src/bench/pipeline/exstruct_adapter.py new file mode 100644 index 0000000..720b7a9 --- /dev/null +++ b/benchmark/src/bench/pipeline/exstruct_adapter.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import logging +from pathlib import Path + +from pydantic import BaseModel + +from exstruct import ( + ExtractionMode, + ExStructEngine, + StructOptions, +) +from exstruct.models import SheetData, WorkbookData + +from .common import write_text + +logger = logging.getLogger(__name__) + + +class ExstructTextConfig(BaseModel): + """Configuration for ExStruct text extraction output.""" + + mode: ExtractionMode = "standard" + pretty: bool = False + indent: int | None = None + + +def _filter_workbook_sheets( + workbook: WorkbookData, sheet_scope: list[str] | None +) -> WorkbookData: + """Return a workbook filtered to the requested sheet scope. + + Args: + workbook: Extracted workbook payload from ExStruct. + sheet_scope: Optional list of sheet names to keep. + + Returns: + WorkbookData filtered to the requested sheets, or the original workbook if none match. + """ + if not sheet_scope: + return workbook + sheets: dict[str, SheetData] = { + name: sheet + for name, sheet in workbook.sheets.items() + if name in set(sheet_scope) + } + if not sheets: + logger.warning("No matching sheets found for scope: %s", sheet_scope) + return workbook + return WorkbookData(book_name=workbook.book_name, sheets=sheets) + + +def extract_exstruct( + xlsx_path: Path, + out_txt: Path, + sheet_scope: list[str] | None = None, + *, + config: ExstructTextConfig | None = None, +) -> None: + """Extract workbook with ExStruct and write JSON text for LLM context. + + Args: + xlsx_path: Excel workbook path. + out_txt: Destination text file path. + sheet_scope: Optional list of sheet names to keep. + config: Optional ExStruct text extraction configuration. + """ + resolved_config = config or ExstructTextConfig() + engine = ExStructEngine(options=StructOptions(include_merged_values_in_rows=True)) + workbook = engine.extract(xlsx_path, mode=resolved_config.mode) + workbook = _filter_workbook_sheets(workbook, sheet_scope) + payload = workbook.to_json( + pretty=resolved_config.pretty, indent=resolved_config.indent + ) + + lines = [ + "[DOC_META]", + f"source={xlsx_path.name}", + "method=exstruct", + f"mode={resolved_config.mode}", + "", + "[CONTENT]", + payload, + ] + write_text(out_txt, "\n".join(lines).strip() + "\n") diff --git a/benchmark/src/bench/pipeline/html_text.py b/benchmark/src/bench/pipeline/html_text.py new file mode 100644 index 0000000..3734fb0 --- /dev/null +++ b/benchmark/src/bench/pipeline/html_text.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from pathlib import Path +import subprocess + +from bs4 import BeautifulSoup + +from .common import ensure_dir, write_text + + +def xlsx_to_html(xlsx_path: Path, out_html: Path) -> None: + ensure_dir(out_html.parent) + cmd = [ + "soffice", + "--headless", + "--nologo", + "--nolockcheck", + "--convert-to", + "html", + "--outdir", + str(out_html.parent), + str(xlsx_path), + ] + subprocess.run(cmd, check=True) + produced = out_html.parent / (xlsx_path.stem + ".html") + if not produced.exists(): + produced = out_html.parent / (xlsx_path.stem + ".htm") + produced.replace(out_html) + + +def html_to_text(html_path: Path, out_txt: Path) -> None: + soup = BeautifulSoup(html_path.read_text(encoding="utf-8", errors="ignore"), "lxml") + + # Excel HTMLはテーブルが中心。全テーブルのセルテキストを列挙。 + tables = soup.find_all("table") + lines: list[str] = [] + lines.append("[DOC_META]") + lines.append(f"source={html_path.name}") + lines.append("method=html_text") + lines.append("") + lines.append("[CONTENT]") + + for t_i, table in enumerate(tables, start=1): + lines.append(f"\n# TABLE {t_i}") + rows = table.find_all("tr") + for r in rows: + cells = r.find_all(["td", "th"]) + vals = [] + for c in cells: + txt = " ".join(c.get_text(separator=" ", strip=True).split()) + vals.append(txt) + if any(v for v in vals): + lines.append(" | ".join(vals)) + + write_text(out_txt, "\n".join(lines).strip() + "\n") diff --git a/benchmark/src/bench/pipeline/image_render.py b/benchmark/src/bench/pipeline/image_render.py new file mode 100644 index 0000000..dab6c82 --- /dev/null +++ b/benchmark/src/bench/pipeline/image_render.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from pathlib import Path + +import fitz # PyMuPDF + +from .common import ensure_dir +from .pdf_text import xlsx_to_pdf + + +def xlsx_to_pngs_via_pdf( + xlsx_path: Path, out_dir: Path, dpi: int = 200, max_pages: int = 6 +) -> list[Path]: + """ + xlsx -> pdf (LibreOffice) -> png (PyMuPDF render) + 画像は VLM 入力に使う。OCRはしない。 + """ + ensure_dir(out_dir) + tmp_pdf = out_dir / f"{xlsx_path.stem}.pdf" + xlsx_to_pdf(xlsx_path, tmp_pdf) + + paths: list[Path] = [] + with fitz.open(tmp_pdf) as doc: + zoom = dpi / 72.0 + mat = fitz.Matrix(zoom, zoom) + for i in range(min(doc.page_count, max_pages)): + page = doc.load_page(i) + pix = page.get_pixmap(matrix=mat, alpha=False) + p = out_dir / f"page_{i + 1:02d}.png" + pix.save(p) + paths.append(p) + + return paths diff --git a/benchmark/src/bench/pipeline/openpyxl_pandas.py b/benchmark/src/bench/pipeline/openpyxl_pandas.py new file mode 100644 index 0000000..4f4d038 --- /dev/null +++ b/benchmark/src/bench/pipeline/openpyxl_pandas.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from pathlib import Path + +import openpyxl + +from .common import write_text + + +def extract_openpyxl( + xlsx_path: Path, out_txt: Path, sheet_scope: list[str] | None = None +) -> None: + wb = openpyxl.load_workbook(xlsx_path, data_only=True) + try: + sheets = sheet_scope or wb.sheetnames + + lines: list[str] = [] + lines.append("[DOC_META]") + lines.append(f"source={xlsx_path.name}") + lines.append("method=openpyxl") + lines.append("") + lines.append("[CONTENT]") + + for sname in sheets: + if sname not in wb.sheetnames: + continue + ws = wb[sname] + lines.append(f"\n# SHEET: {sname}") + max_row = ws.max_row or 1 + max_col = ws.max_column or 1 + + for r in range(1, max_row + 1): + row_cells = [] + for c in range(1, max_col + 1): + v = ws.cell(r, c).value + if v is None: + continue + txt = str(v).strip() + if not txt: + continue + # 座標付きで記録(後で人間が確認しやすい) + row_cells.append(f"R{r}C{c}:{txt}") + if row_cells: + lines.append(" | ".join(row_cells)) + + write_text(out_txt, "\n".join(lines).strip() + "\n") + finally: + wb.close() diff --git a/benchmark/src/bench/pipeline/pdf_text.py b/benchmark/src/bench/pipeline/pdf_text.py new file mode 100644 index 0000000..21d9742 --- /dev/null +++ b/benchmark/src/bench/pipeline/pdf_text.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +import subprocess +from pathlib import Path + +import fitz # PyMuPDF + +from .common import ensure_dir, write_text + + +def xlsx_to_pdf(xlsx_path: Path, out_pdf: Path) -> None: + ensure_dir(out_pdf.parent) + # LibreOffice headless convert + # soffice --headless --convert-to pdf --outdir + cmd = [ + "soffice", + "--headless", + "--nologo", + "--nolockcheck", + "--convert-to", + "pdf", + "--outdir", + str(out_pdf.parent), + str(xlsx_path), + ] + try: + subprocess.run(cmd, check=True, timeout=300) + except subprocess.TimeoutExpired as exc: + raise RuntimeError(f"soffice timed out after 300s: {xlsx_path}") from exc + produced = out_pdf.parent / (xlsx_path.stem + ".pdf") + produced.replace(out_pdf) + + +def pdf_to_text(pdf_path: Path, out_txt: Path) -> None: + parts: list[str] = [] + with fitz.open(pdf_path) as doc: + for i in range(doc.page_count): + page = doc.load_page(i) + parts.append(f"\n# PAGE {i + 1}") + parts.append(page.get_text("text")) + text = "\n".join(parts).strip() + + lines: list[str] = [] + lines.append("[DOC_META]") + lines.append(f"source={pdf_path.name}") + lines.append("method=pdf_text") + lines.append("") + lines.append("[CONTENT]") + lines.append(text) + + write_text(out_txt, "\n".join(lines).strip() + "\n") diff --git a/benchmark/src/bench/report_public.py b/benchmark/src/bench/report_public.py new file mode 100644 index 0000000..a4645b6 --- /dev/null +++ b/benchmark/src/bench/report_public.py @@ -0,0 +1,265 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Iterable + +import matplotlib +import matplotlib.pyplot as plt +import pandas as pd +from pydantic import BaseModel + +from .paths import PLOTS_DIR, PUBLIC_REPORT, RESULTS_DIR, RUB_RESULTS_DIR + +matplotlib.use("Agg") + + +class MethodScore(BaseModel): + """Aggregated benchmark scores for a method.""" + + method: str + acc_norm: float + acc_raw: float + acc_md: float + md_precision: float + avg_cost: float + + +class RubScore(BaseModel): + """Aggregated RUB scores for a method.""" + + method: str + rus: float + partial_f1: float + avg_cost: float + + +class ReportData(BaseModel): + """Combined benchmark report data for plotting.""" + + core: list[MethodScore] + rub: list[RubScore] + + +class ChartPaths(BaseModel): + """Generated chart image paths for public report.""" + + core_chart: Path + markdown_chart: Path + rub_chart: Path + + +def _select_methods(methods: Iterable[str]) -> list[str]: + order = ["exstruct", "pdf", "image_vlm", "html", "openpyxl"] + available = {m for m in methods} + return [m for m in order if m in available] + + +def load_report_data() -> ReportData: + """Load aggregated metrics from results CSV files. + + Returns: + ReportData containing core and RUB aggregates. + """ + core_csv = RESULTS_DIR / "results.csv" + if not core_csv.exists(): + raise FileNotFoundError(core_csv) + + core_df = pd.read_csv(core_csv) + core_grouped = ( + core_df.groupby("method") + .agg( + acc_norm=("score_norm", "mean"), + acc_raw=("score_raw", "mean"), + acc_md=("score_md", "mean"), + md_precision=("score_md_precision", "mean"), + avg_cost=("cost_usd", "mean"), + ) + .reset_index() + ) + core_grouped = core_grouped.fillna(0.0) + + core_methods = _select_methods(core_grouped["method"].tolist()) + core_scores = [ + MethodScore( + method=row["method"], + acc_norm=float(row["acc_norm"]), + acc_raw=float(row["acc_raw"]), + acc_md=float(row["acc_md"]), + md_precision=float(row["md_precision"]), + avg_cost=float(row["avg_cost"]), + ) + for _, row in core_grouped.iterrows() + if row["method"] in core_methods + ] + core_scores.sort(key=lambda m: core_methods.index(m.method)) + + rub_csv = RUB_RESULTS_DIR / "rub_results.csv" + if not rub_csv.exists(): + raise FileNotFoundError(rub_csv) + + rub_df = pd.read_csv(rub_csv) + if "track" in rub_df.columns and (rub_df["track"] == "structure_query").any(): + rub_df = rub_df[rub_df["track"] == "structure_query"] + + rub_grouped = ( + rub_df.groupby("method") + .agg( + rus=("score", "mean"), + partial_f1=("partial_f1", "mean"), + avg_cost=("cost_usd", "mean"), + ) + .reset_index() + ) + rub_grouped = rub_grouped.fillna(0.0) + + rub_methods = _select_methods(rub_grouped["method"].tolist()) + rub_scores = [ + RubScore( + method=row["method"], + rus=float(row["rus"]), + partial_f1=float(row["partial_f1"]), + avg_cost=float(row["avg_cost"]), + ) + for _, row in rub_grouped.iterrows() + if row["method"] in rub_methods + ] + rub_scores.sort(key=lambda m: rub_methods.index(m.method)) + + return ReportData(core=core_scores, rub=rub_scores) + + +def _plot_grouped_bar( + *, + title: str, + ylabel: str, + categories: list[str], + series: dict[str, list[float]], + out_path: Path, +) -> None: + """Plot a grouped bar chart. + + Args: + title: Chart title. + ylabel: Y-axis label. + categories: X-axis category labels. + series: Mapping of series label to values. + out_path: Output image path. + """ + num_series = len(series) + width = 0.18 if num_series > 4 else 0.22 + centers = list(range(len(categories))) + + fig, ax = plt.subplots(figsize=(9, 4.5)) + for idx, (label, values) in enumerate(series.items()): + offset = (idx - (num_series - 1) / 2) * width + ax.bar([c + offset for c in centers], values, width=width, label=label) + + ax.set_title(title) + ax.set_ylabel(ylabel) + ax.set_xticks(centers) + ax.set_xticklabels(categories, rotation=0) + ax.set_ylim(0.0, 1.0) + ax.grid(axis="y", linestyle=":", alpha=0.4) + ax.legend(ncol=num_series) + + out_path.parent.mkdir(parents=True, exist_ok=True) + fig.tight_layout() + fig.savefig(out_path, dpi=160) + plt.close(fig) + + +def generate_charts(data: ReportData) -> ChartPaths: + """Generate chart images for the public report. + + Args: + data: Aggregated report data. + + Returns: + ChartPaths with generated image locations. + """ + core_chart = PLOTS_DIR / "core_benchmark.png" + markdown_chart = PLOTS_DIR / "markdown_quality.png" + rub_chart = PLOTS_DIR / "rub_structure_query.png" + + methods = [m.method for m in data.core] + _plot_grouped_bar( + title="Core Benchmark Summary", + ylabel="Score", + categories=methods, + series={ + "acc_norm": [m.acc_norm for m in data.core], + "acc_raw": [m.acc_raw for m in data.core], + "acc_md": [m.acc_md for m in data.core], + }, + out_path=core_chart, + ) + + _plot_grouped_bar( + title="Markdown Evaluation Summary", + ylabel="Score", + categories=methods, + series={ + "acc_md": [m.acc_md for m in data.core], + "md_precision": [m.md_precision for m in data.core], + }, + out_path=markdown_chart, + ) + + rub_methods = [m.method for m in data.rub] + _plot_grouped_bar( + title="RUB Structure Query Summary", + ylabel="Score", + categories=rub_methods, + series={ + "rus": [m.rus for m in data.rub], + "partial_f1": [m.partial_f1 for m in data.rub], + }, + out_path=rub_chart, + ) + + return ChartPaths( + core_chart=core_chart, + markdown_chart=markdown_chart, + rub_chart=rub_chart, + ) + + +def update_public_report(chart_paths: ChartPaths) -> Path: + """Insert chart images into REPORT.md. + + Args: + chart_paths: Generated chart paths. + + Returns: + Path to updated report. + """ + report_path = PUBLIC_REPORT + report_text = ( + report_path.read_text(encoding="utf-8") if report_path.exists() else "" + ) + + rel_core = chart_paths.core_chart.relative_to(report_path.parent) + rel_markdown = chart_paths.markdown_chart.relative_to(report_path.parent) + rel_rub = chart_paths.rub_chart.relative_to(report_path.parent) + + block_lines = [ + "", + "## Charts", + "", + f"![Core Benchmark Summary]({rel_core.as_posix()})", + f"![Markdown Evaluation Summary]({rel_markdown.as_posix()})", + f"![RUB Structure Query Summary]({rel_rub.as_posix()})", + "", + "", + ] + block = "\n".join(block_lines) + + if "" in report_text and "" in report_text: + pre, _ = report_text.split("", 1) + _, post = report_text.split("", 1) + new_text = pre.rstrip() + "\n" + block + post.lstrip() + else: + new_text = report_text.rstrip() + "\n\n" + block + + report_path.write_text(new_text, encoding="utf-8") + return report_path diff --git a/benchmark/src/bench/rub/__init__.py b/benchmark/src/bench/rub/__init__.py new file mode 100644 index 0000000..6c1f1c2 --- /dev/null +++ b/benchmark/src/bench/rub/__init__.py @@ -0,0 +1 @@ +"""RUB (Reconstruction Utility Benchmark) helpers.""" diff --git a/benchmark/src/bench/rub/manifest.py b/benchmark/src/bench/rub/manifest.py new file mode 100644 index 0000000..503c20c --- /dev/null +++ b/benchmark/src/bench/rub/manifest.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from pydantic import BaseModel, Field + + +class RubTask(BaseModel): + """RUB task definition.""" + + id: str + track: str = Field("reconstruction", description="Evaluation track name.") + source_case_id: str = Field(..., description="Case id for Stage A Markdown.") + type: str + question: str + truth: str + schema_path: str | None = None + unordered_paths: list[str] | None = None + + +class RubManifest(BaseModel): + """RUB manifest container.""" + + tasks: list[RubTask] + + +def load_rub_manifest(path: Path) -> RubManifest: + """Load a RUB manifest file. + + Args: + path: Path to rub/manifest.json. + + Returns: + Parsed RubManifest. + """ + data = json.loads(path.read_text(encoding="utf-8-sig")) + return RubManifest(**data) diff --git a/benchmark/src/bench/rub/normalize.py b/benchmark/src/bench/rub/normalize.py new file mode 100644 index 0000000..d3a4a12 --- /dev/null +++ b/benchmark/src/bench/rub/normalize.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +import json +import re +import unicodedata +from typing import Any + +from pydantic import BaseModel + + +class NormalizedPayload(BaseModel): + """Normalized JSON payload for deterministic comparison.""" + + value: Any + + +_WS_PATTERN = re.compile(r"\s+") +_ZERO_WIDTH_PATTERN = re.compile(r"[\u200b\u200c\u200d\ufeff]") +_NON_ASCII_SPACE_PATTERN = re.compile(r"(?<=[^\x00-\x7F])\s+(?=[^\x00-\x7F])") + + +def _normalize_text(value: str) -> str: + """Normalize a string for comparison. + + Args: + value: Raw string value. + + Returns: + Normalized string. + """ + text = value.replace("\r\n", "\n").replace("\r", "\n") + text = unicodedata.normalize("NFKC", text) + text = text.replace("\u3000", " ") + text = _ZERO_WIDTH_PATTERN.sub("", text) + text = text.strip() + text = _WS_PATTERN.sub(" ", text) + text = _NON_ASCII_SPACE_PATTERN.sub("", text) + return text.strip() + + +def _maybe_parse_number(value: str) -> int | float | str: + """Parse a numeric string when possible. + + Args: + value: String value. + + Returns: + int/float when value is numeric, otherwise original string. + """ + if re.fullmatch(r"-?\d+", value): + return int(value) + if re.fullmatch(r"-?\d+\.\d+", value): + return float(value) + return value + + +def _canonical_json(value: Any) -> str: + """Return a canonical JSON string for sorting. + + Args: + value: JSON-serializable value. + + Returns: + Canonical JSON string. + """ + return json.dumps(value, ensure_ascii=False, sort_keys=True, separators=(",", ":")) + + +def _normalize_value(value: Any, *, unordered_paths: set[str], path: str) -> Any: + """Normalize a JSON-like value recursively. + + Args: + value: Input value. + unordered_paths: Set of list paths to sort. + path: Dot-separated path for the current value. + + Returns: + Normalized value. + """ + if isinstance(value, dict): + normalized: dict[str, Any] = {} + for key in sorted(value.keys()): + child_path = f"{path}.{key}" if path else key + normalized[key] = _normalize_value( + value[key], unordered_paths=unordered_paths, path=child_path + ) + return normalized + if isinstance(value, list): + normalized_items = [ + _normalize_value(item, unordered_paths=unordered_paths, path=path) + for item in value + ] + if path in unordered_paths: + normalized_items.sort(key=_canonical_json) + return normalized_items + if isinstance(value, str): + return _maybe_parse_number(_normalize_text(value)) + return value + + +def normalize_payload( + payload: Any, *, unordered_paths: list[str] | None = None +) -> NormalizedPayload: + """Normalize a JSON payload with deterministic rules. + + Args: + payload: Raw JSON object. + unordered_paths: Dot paths for lists that should be treated as unordered. + + Returns: + NormalizedPayload with normalized value. + """ + path_set = set(unordered_paths or []) + normalized = _normalize_value(payload, unordered_paths=path_set, path="") + return NormalizedPayload(value=normalized) diff --git a/benchmark/src/bench/rub/score.py b/benchmark/src/bench/rub/score.py new file mode 100644 index 0000000..bc7f9c7 --- /dev/null +++ b/benchmark/src/bench/rub/score.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +from collections import Counter +from typing import Any + +from pydantic import BaseModel + +from .normalize import normalize_payload + + +class RubScore(BaseModel): + """Score result for a RUB task.""" + + score: float + ok: bool + error: str | None = None + + +class RubPartialScore(BaseModel): + """Partial match score for a RUB task.""" + + precision: float + recall: float + f1: float + + +def _tokenize_scalar(value: Any) -> str | None: + """Convert a scalar to a comparable token. + + Args: + value: Scalar value. + + Returns: + Token string or None for empty values. + """ + if value is None: + return None + if isinstance(value, str): + token = value.strip() + return token or None + return str(value) + + +def _flatten_tokens(value: Any) -> list[str]: + """Flatten a JSON-like value into scalar tokens. + + Args: + value: Normalized JSON value. + + Returns: + List of scalar tokens. + """ + tokens: list[str] = [] + if isinstance(value, dict): + for v in value.values(): + tokens.extend(_flatten_tokens(v)) + return tokens + if isinstance(value, list): + for item in value: + tokens.extend(_flatten_tokens(item)) + return tokens + token = _tokenize_scalar(value) + if token is not None: + tokens.append(token) + return tokens + + +def score_exact( + truth: Any, pred: Any, *, unordered_paths: list[str] | None = None +) -> RubScore: + """Compute exact-match score after normalization. + + Args: + truth: Ground-truth JSON object. + pred: Predicted JSON object. + unordered_paths: Dot paths for unordered list comparison. + + Returns: + RubScore with 1.0 for match, 0.0 otherwise. + """ + truth_norm = normalize_payload(truth, unordered_paths=unordered_paths).value + pred_norm = normalize_payload(pred, unordered_paths=unordered_paths).value + ok = truth_norm == pred_norm + return RubScore(score=1.0 if ok else 0.0, ok=ok) + + +def score_partial( + truth: Any, pred: Any, *, unordered_paths: list[str] | None = None +) -> RubPartialScore: + """Compute partial-match precision/recall/F1 after normalization. + + Args: + truth: Ground-truth JSON object. + pred: Predicted JSON object. + unordered_paths: Dot paths for unordered list comparison. + + Returns: + RubPartialScore with precision/recall/F1. + """ + truth_norm = normalize_payload(truth, unordered_paths=unordered_paths).value + pred_norm = normalize_payload(pred, unordered_paths=unordered_paths).value + + truth_tokens = _flatten_tokens(truth_norm) + pred_tokens = _flatten_tokens(pred_norm) + + truth_counts = Counter(truth_tokens) + pred_counts = Counter(pred_tokens) + overlap = sum((truth_counts & pred_counts).values()) + + truth_total = sum(truth_counts.values()) + pred_total = sum(pred_counts.values()) + + if pred_total == 0: + precision = 1.0 if truth_total == 0 else 0.0 + else: + precision = overlap / pred_total + if truth_total == 0: + recall = 1.0 if pred_total == 0 else 0.0 + else: + recall = overlap / truth_total + + if precision + recall == 0: + f1 = 0.0 + else: + f1 = 2 * precision * recall / (precision + recall) + + return RubPartialScore(precision=precision, recall=recall, f1=f1) diff --git a/codecov.yml b/codecov.yml index 8998011..4b9abbd 100644 --- a/codecov.yml +++ b/codecov.yml @@ -8,6 +8,8 @@ coverage: default: target: auto threshold: 1% +ignore: + - "benchmark/**" flags: unit: paths: diff --git a/docs/README.en.md b/docs/README.en.md index c63ea03..c7a99cb 100644 --- a/docs/README.en.md +++ b/docs/README.en.md @@ -1,6 +1,6 @@ # ExStruct — Excel Structured Extraction Engine -[![PyPI version](https://badge.fury.io/py/exstruct.svg)](https://pypi.org/project/exstruct/) [![PyPI Downloads](https://static.pepy.tech/personalized-badge/exstruct?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/exstruct) ![Licence: BSD-3-Clause](https://img.shields.io/badge/license-BSD--3--Clause-blue?style=flat-square) [![pytest](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml/badge.svg)](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/e081cb4f634e4175b259eb7c34f54f60)](https://app.codacy.com/gh/harumiWeb/exstruct/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![codecov](https://codecov.io/gh/harumiWeb/exstruct/graph/badge.svg?token=2XI1O8TTA9)](https://codecov.io/gh/harumiWeb/exstruct) +[![PyPI version](https://badge.fury.io/py/exstruct.svg)](https://pypi.org/project/exstruct/) [![PyPI Downloads](https://static.pepy.tech/personalized-badge/exstruct?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/exstruct) ![Licence: BSD-3-Clause](https://img.shields.io/badge/license-BSD--3--Clause-blue?style=flat-square) [![pytest](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml/badge.svg)](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/e081cb4f634e4175b259eb7c34f54f60)](https://app.codacy.com/gh/harumiWeb/exstruct/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![codecov](https://codecov.io/gh/harumiWeb/exstruct/graph/badge.svg?token=2XI1O8TTA9)](https://codecov.io/gh/harumiWeb/exstruct) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/harumiWeb/exstruct) ![ExStruct Image](assets/icon.webp) @@ -19,6 +19,15 @@ ExStruct reads Excel workbooks and outputs structured data (cells, table candida - **CLI rendering** (Excel required): optional PDF and per-sheet PNGs. - **Graceful fallback**: if Excel COM is unavailable, extraction falls back to cells + table candidates without crashing. +## Benchmark + +![Benchmark Chart](../benchmark/public/plots/markdown_quality.png) + +This repository includes benchmark reports focused on RAG/LLM preprocessing of Excel documents. +We track two perspectives: (1) core extraction accuracy and (2) reconstruction utility for downstream structure queries (RUB). +See `benchmark/REPORT.md` for the working summary and `benchmark/public/REPORT.md` for the public bundle. +Current results are based on n=12 cases and will be expanded. + ## Installation ```bash diff --git a/docs/README.ja.md b/docs/README.ja.md index 17595ef..f19b011 100644 --- a/docs/README.ja.md +++ b/docs/README.ja.md @@ -1,6 +1,6 @@ # ExStruct — Excel 構造化抽出エンジン -[![PyPI version](https://badge.fury.io/py/exstruct.svg)](https://pypi.org/project/exstruct/) [![PyPI Downloads](https://static.pepy.tech/personalized-badge/exstruct?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/exstruct) ![Licence: BSD-3-Clause](https://img.shields.io/badge/license-BSD--3--Clause-blue?style=flat-square) [![pytest](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml/badge.svg)](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/e081cb4f634e4175b259eb7c34f54f60)](https://app.codacy.com/gh/harumiWeb/exstruct/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![codecov](https://codecov.io/gh/harumiWeb/exstruct/graph/badge.svg?token=2XI1O8TTA9)](https://codecov.io/gh/harumiWeb/exstruct) +[![PyPI version](https://badge.fury.io/py/exstruct.svg)](https://pypi.org/project/exstruct/) [![PyPI Downloads](https://static.pepy.tech/personalized-badge/exstruct?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads)](https://pepy.tech/projects/exstruct) ![Licence: BSD-3-Clause](https://img.shields.io/badge/license-BSD--3--Clause-blue?style=flat-square) [![pytest](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml/badge.svg)](https://github.com/harumiWeb/exstruct/actions/workflows/pytest.yml) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/e081cb4f634e4175b259eb7c34f54f60)](https://app.codacy.com/gh/harumiWeb/exstruct/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![codecov](https://codecov.io/gh/harumiWeb/exstruct/graph/badge.svg?token=2XI1O8TTA9)](https://codecov.io/gh/harumiWeb/exstruct) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/harumiWeb/exstruct) ![ExStruct Image](/assets/icon.webp) @@ -17,6 +17,15 @@ ExStruct は Excel ワークブックを読み取り、構造化データ(セ - **CLI レンダリング**(Excel 必須): PDF とシート画像を生成可能。 - **安全なフォールバック**: Excel COM 不在でもプロセスは落ちず、セル+テーブル候補+印刷範囲に切り替え(図形・チャートは空)。 +## ベンチマーク + +![Benchmark Chart](../benchmark/public/plots/markdown_quality.png) + +このリポジトリには、ExcelドキュメントのRAG/LLM前処理に焦点を当てたベンチマークレポートが含まれています。 +私たちは2つの視点から追跡しています。(1) コア抽出精度と (2) 下流構造クエリのための再構築ユーティリティ (RUB) です。 +作業サマリーについては`benchmark/REPORT.md`を、公開バンドルについては`benchmark/public/REPORT.md`を参照してください。 +現在の結果はn=12のケースに基づいており、今後さらに拡張される予定です。 + ## インストール ```bash diff --git a/pyproject.toml b/pyproject.toml index 6fdcbeb..128e8ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ omit = [ target-version = "py311" src = ["exstruct"] fix = true +exclude = ["benchmark/**"] # 静的解析ルール [tool.ruff.lint] @@ -113,6 +114,7 @@ max-complexity = 12 [tool.mypy] packages = ["exstruct"] python_version = "3.11" +exclude = "benchmark/.*" # 外部ライブラリの型情報がない場合は無視 ignore_missing_imports = true @@ -141,3 +143,8 @@ codecov-unit = "codecov-cli upload-process -f coverage.xml -F unit -C %CODECOV_S codecov-com = "codecov-cli upload-process -f coverage.xml -F com -C %CODECOV_SHA% -t %CODECOV_TOKEN%" docs = "mkdocs serve" build-docs = "mkdocs build && python scripts/gen_json_schema.py && python scripts/gen_model_docs.py" + +[tool.uv.workspace] +members = [ + "benchmark", +] diff --git a/uv.lock b/uv.lock index 56c5b3d..59ca7df 100644 --- a/uv.lock +++ b/uv.lock @@ -6,6 +6,12 @@ resolution-markers = [ "python_full_version < '3.12'", ] +[manifest] +members = [ + "benchmark", + "exstruct", +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -79,6 +85,66 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/e3/a4fa1946722c4c7b063cc25043a12d9ce9b4323777f89643be74cef2993c/backrefs-6.1-py39-none-any.whl", hash = "sha256:a9e99b8a4867852cad177a6430e31b0f6e495d65f8c6c134b68c14c3c95bf4b0", size = 381058, upload-time = "2025-11-15T14:52:06.698Z" }, ] +[[package]] +name = "beautifulsoup4" +version = "4.14.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" }, +] + +[[package]] +name = "benchmark" +version = "0.1.0" +source = { virtual = "benchmark" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "exstruct" }, + { name = "lxml" }, + { name = "openai" }, + { name = "openpyxl" }, + { name = "pandas" }, + { name = "pydantic" }, + { name = "pymupdf" }, + { name = "python-dotenv" }, + { name = "rich" }, + { name = "tabulate" }, + { name = "typer" }, +] + +[package.dev-dependencies] +dev = [ + { name = "ruff" }, + { name = "taskipy" }, +] + +[package.metadata] +requires-dist = [ + { name = "beautifulsoup4", specifier = ">=4.14.3" }, + { name = "exstruct", editable = "." }, + { name = "lxml", specifier = ">=6.0.2" }, + { name = "openai", specifier = ">=2.15.0" }, + { name = "openpyxl", specifier = ">=3.1.5" }, + { name = "pandas", specifier = ">=2.3.3" }, + { name = "pydantic", specifier = ">=2.12.5" }, + { name = "pymupdf", specifier = ">=1.26.7" }, + { name = "python-dotenv", specifier = ">=1.2.1" }, + { name = "rich", specifier = ">=14.2.0" }, + { name = "tabulate", specifier = ">=0.9.0" }, + { name = "typer", specifier = ">=0.21.1" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "ruff", specifier = ">=0.14.8" }, + { name = "taskipy", specifier = ">=1.14.1" }, +] + [[package]] name = "certifi" version = "2025.11.12" @@ -441,6 +507,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, ] +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, +] + [[package]] name = "et-xmlfile" version = "2.0.0" @@ -452,7 +527,7 @@ wheels = [ [[package]] name = "exstruct" -version = "0.4.1" +version = "0.4.2" source = { editable = "." } dependencies = [ { name = "numpy" }, @@ -505,8 +580,8 @@ dev = [ requires-dist = [ { name = "httpx", marker = "extra == 'all'", specifier = ">=0.27,<1.0" }, { name = "httpx", marker = "extra == 'mcp'", specifier = ">=0.27,<1.0" }, - { name = "mcp", marker = "extra == 'all'", specifier = ">=1.6.0,<2.0.0" }, - { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.6.0,<2.0.0" }, + { name = "mcp", marker = "extra == 'all'", specifier = ">=1.25.0,<2.0.0" }, + { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.25.0,<2.0.0" }, { name = "numpy", specifier = ">=2.3.5" }, { name = "openpyxl", specifier = ">=3.1.5" }, { name = "pandas", specifier = ">=2.3.3" }, @@ -736,6 +811,91 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "jiter" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/45/9d/e0660989c1370e25848bb4c52d061c71837239738ad937e83edca174c273/jiter-0.12.0.tar.gz", hash = "sha256:64dfcd7d5c168b38d3f9f8bba7fc639edb3418abcc74f22fdbe6b8938293f30b", size = 168294, upload-time = "2025-11-09T20:49:23.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/f9/eaca4633486b527ebe7e681c431f529b63fe2709e7c5242fc0f43f77ce63/jiter-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8f8a7e317190b2c2d60eb2e8aa835270b008139562d70fe732e1c0020ec53c9", size = 316435, upload-time = "2025-11-09T20:47:02.087Z" }, + { url = "https://files.pythonhosted.org/packages/10/c1/40c9f7c22f5e6ff715f28113ebaba27ab85f9af2660ad6e1dd6425d14c19/jiter-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2218228a077e784c6c8f1a8e5d6b8cb1dea62ce25811c356364848554b2056cd", size = 320548, upload-time = "2025-11-09T20:47:03.409Z" }, + { url = "https://files.pythonhosted.org/packages/6b/1b/efbb68fe87e7711b00d2cfd1f26bb4bfc25a10539aefeaa7727329ffb9cb/jiter-0.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9354ccaa2982bf2188fd5f57f79f800ef622ec67beb8329903abf6b10da7d423", size = 351915, upload-time = "2025-11-09T20:47:05.171Z" }, + { url = "https://files.pythonhosted.org/packages/15/2d/c06e659888c128ad1e838123d0638f0efad90cc30860cb5f74dd3f2fc0b3/jiter-0.12.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8f2607185ea89b4af9a604d4c7ec40e45d3ad03ee66998b031134bc510232bb7", size = 368966, upload-time = "2025-11-09T20:47:06.508Z" }, + { url = "https://files.pythonhosted.org/packages/6b/20/058db4ae5fb07cf6a4ab2e9b9294416f606d8e467fb74c2184b2a1eeacba/jiter-0.12.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3a585a5e42d25f2e71db5f10b171f5e5ea641d3aa44f7df745aa965606111cc2", size = 482047, upload-time = "2025-11-09T20:47:08.382Z" }, + { url = "https://files.pythonhosted.org/packages/49/bb/dc2b1c122275e1de2eb12905015d61e8316b2f888bdaac34221c301495d6/jiter-0.12.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd9e21d34edff5a663c631f850edcb786719c960ce887a5661e9c828a53a95d9", size = 380835, upload-time = "2025-11-09T20:47:09.81Z" }, + { url = "https://files.pythonhosted.org/packages/23/7d/38f9cd337575349de16da575ee57ddb2d5a64d425c9367f5ef9e4612e32e/jiter-0.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a612534770470686cd5431478dc5a1b660eceb410abade6b1b74e320ca98de6", size = 364587, upload-time = "2025-11-09T20:47:11.529Z" }, + { url = "https://files.pythonhosted.org/packages/f0/a3/b13e8e61e70f0bb06085099c4e2462647f53cc2ca97614f7fedcaa2bb9f3/jiter-0.12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3985aea37d40a908f887b34d05111e0aae822943796ebf8338877fee2ab67725", size = 390492, upload-time = "2025-11-09T20:47:12.993Z" }, + { url = "https://files.pythonhosted.org/packages/07/71/e0d11422ed027e21422f7bc1883c61deba2d9752b720538430c1deadfbca/jiter-0.12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b1207af186495f48f72529f8d86671903c8c10127cac6381b11dddc4aaa52df6", size = 522046, upload-time = "2025-11-09T20:47:14.6Z" }, + { url = "https://files.pythonhosted.org/packages/9f/59/b968a9aa7102a8375dbbdfbd2aeebe563c7e5dddf0f47c9ef1588a97e224/jiter-0.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef2fb241de583934c9915a33120ecc06d94aa3381a134570f59eed784e87001e", size = 513392, upload-time = "2025-11-09T20:47:16.011Z" }, + { url = "https://files.pythonhosted.org/packages/ca/e4/7df62002499080dbd61b505c5cb351aa09e9959d176cac2aa8da6f93b13b/jiter-0.12.0-cp311-cp311-win32.whl", hash = "sha256:453b6035672fecce8007465896a25b28a6b59cfe8fbc974b2563a92f5a92a67c", size = 206096, upload-time = "2025-11-09T20:47:17.344Z" }, + { url = "https://files.pythonhosted.org/packages/bb/60/1032b30ae0572196b0de0e87dce3b6c26a1eff71aad5fe43dee3082d32e0/jiter-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:ca264b9603973c2ad9435c71a8ec8b49f8f715ab5ba421c85a51cde9887e421f", size = 204899, upload-time = "2025-11-09T20:47:19.365Z" }, + { url = "https://files.pythonhosted.org/packages/49/d5/c145e526fccdb834063fb45c071df78b0cc426bbaf6de38b0781f45d956f/jiter-0.12.0-cp311-cp311-win_arm64.whl", hash = "sha256:cb00ef392e7d684f2754598c02c409f376ddcef857aae796d559e6cacc2d78a5", size = 188070, upload-time = "2025-11-09T20:47:20.75Z" }, + { url = "https://files.pythonhosted.org/packages/92/c9/5b9f7b4983f1b542c64e84165075335e8a236fa9e2ea03a0c79780062be8/jiter-0.12.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:305e061fa82f4680607a775b2e8e0bcb071cd2205ac38e6ef48c8dd5ebe1cf37", size = 314449, upload-time = "2025-11-09T20:47:22.999Z" }, + { url = "https://files.pythonhosted.org/packages/98/6e/e8efa0e78de00db0aee82c0cf9e8b3f2027efd7f8a71f859d8f4be8e98ef/jiter-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c1860627048e302a528333c9307c818c547f214d8659b0705d2195e1a94b274", size = 319855, upload-time = "2025-11-09T20:47:24.779Z" }, + { url = "https://files.pythonhosted.org/packages/20/26/894cd88e60b5d58af53bec5c6759d1292bd0b37a8b5f60f07abf7a63ae5f/jiter-0.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df37577a4f8408f7e0ec3205d2a8f87672af8f17008358063a4d6425b6081ce3", size = 350171, upload-time = "2025-11-09T20:47:26.469Z" }, + { url = "https://files.pythonhosted.org/packages/f5/27/a7b818b9979ac31b3763d25f3653ec3a954044d5e9f5d87f2f247d679fd1/jiter-0.12.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fdd787356c1c13a4f40b43c2156276ef7a71eb487d98472476476d803fb2cf", size = 365590, upload-time = "2025-11-09T20:47:27.918Z" }, + { url = "https://files.pythonhosted.org/packages/ba/7e/e46195801a97673a83746170b17984aa8ac4a455746354516d02ca5541b4/jiter-0.12.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1eb5db8d9c65b112aacf14fcd0faae9913d07a8afea5ed06ccdd12b724e966a1", size = 479462, upload-time = "2025-11-09T20:47:29.654Z" }, + { url = "https://files.pythonhosted.org/packages/ca/75/f833bfb009ab4bd11b1c9406d333e3b4357709ed0570bb48c7c06d78c7dd/jiter-0.12.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73c568cc27c473f82480abc15d1301adf333a7ea4f2e813d6a2c7d8b6ba8d0df", size = 378983, upload-time = "2025-11-09T20:47:31.026Z" }, + { url = "https://files.pythonhosted.org/packages/71/b3/7a69d77943cc837d30165643db753471aff5df39692d598da880a6e51c24/jiter-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4321e8a3d868919bcb1abb1db550d41f2b5b326f72df29e53b2df8b006eb9403", size = 361328, upload-time = "2025-11-09T20:47:33.286Z" }, + { url = "https://files.pythonhosted.org/packages/b0/ac/a78f90caf48d65ba70d8c6efc6f23150bc39dc3389d65bbec2a95c7bc628/jiter-0.12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a51bad79f8cc9cac2b4b705039f814049142e0050f30d91695a2d9a6611f126", size = 386740, upload-time = "2025-11-09T20:47:34.703Z" }, + { url = "https://files.pythonhosted.org/packages/39/b6/5d31c2cc8e1b6a6bcf3c5721e4ca0a3633d1ab4754b09bc7084f6c4f5327/jiter-0.12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a67b678f6a5f1dd6c36d642d7db83e456bc8b104788262aaefc11a22339f5a9", size = 520875, upload-time = "2025-11-09T20:47:36.058Z" }, + { url = "https://files.pythonhosted.org/packages/30/b5/4df540fae4e9f68c54b8dab004bd8c943a752f0b00efd6e7d64aa3850339/jiter-0.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efe1a211fe1fd14762adea941e3cfd6c611a136e28da6c39272dbb7a1bbe6a86", size = 511457, upload-time = "2025-11-09T20:47:37.932Z" }, + { url = "https://files.pythonhosted.org/packages/07/65/86b74010e450a1a77b2c1aabb91d4a91dd3cd5afce99f34d75fd1ac64b19/jiter-0.12.0-cp312-cp312-win32.whl", hash = "sha256:d779d97c834b4278276ec703dc3fc1735fca50af63eb7262f05bdb4e62203d44", size = 204546, upload-time = "2025-11-09T20:47:40.47Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c7/6659f537f9562d963488e3e55573498a442503ced01f7e169e96a6110383/jiter-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8269062060212b373316fe69236096aaf4c49022d267c6736eebd66bbbc60bb", size = 205196, upload-time = "2025-11-09T20:47:41.794Z" }, + { url = "https://files.pythonhosted.org/packages/21/f4/935304f5169edadfec7f9c01eacbce4c90bb9a82035ac1de1f3bd2d40be6/jiter-0.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:06cb970936c65de926d648af0ed3d21857f026b1cf5525cb2947aa5e01e05789", size = 186100, upload-time = "2025-11-09T20:47:43.007Z" }, + { url = "https://files.pythonhosted.org/packages/3d/a6/97209693b177716e22576ee1161674d1d58029eb178e01866a0422b69224/jiter-0.12.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6cc49d5130a14b732e0612bc76ae8db3b49898732223ef8b7599aa8d9810683e", size = 313658, upload-time = "2025-11-09T20:47:44.424Z" }, + { url = "https://files.pythonhosted.org/packages/06/4d/125c5c1537c7d8ee73ad3d530a442d6c619714b95027143f1b61c0b4dfe0/jiter-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:37f27a32ce36364d2fa4f7fdc507279db604d27d239ea2e044c8f148410defe1", size = 318605, upload-time = "2025-11-09T20:47:45.973Z" }, + { url = "https://files.pythonhosted.org/packages/99/bf/a840b89847885064c41a5f52de6e312e91fa84a520848ee56c97e4fa0205/jiter-0.12.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbc0944aa3d4b4773e348cda635252824a78f4ba44328e042ef1ff3f6080d1cf", size = 349803, upload-time = "2025-11-09T20:47:47.535Z" }, + { url = "https://files.pythonhosted.org/packages/8a/88/e63441c28e0db50e305ae23e19c1d8fae012d78ed55365da392c1f34b09c/jiter-0.12.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da25c62d4ee1ffbacb97fac6dfe4dcd6759ebdc9015991e92a6eae5816287f44", size = 365120, upload-time = "2025-11-09T20:47:49.284Z" }, + { url = "https://files.pythonhosted.org/packages/0a/7c/49b02714af4343970eb8aca63396bc1c82fa01197dbb1e9b0d274b550d4e/jiter-0.12.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:048485c654b838140b007390b8182ba9774621103bd4d77c9c3f6f117474ba45", size = 479918, upload-time = "2025-11-09T20:47:50.807Z" }, + { url = "https://files.pythonhosted.org/packages/69/ba/0a809817fdd5a1db80490b9150645f3aae16afad166960bcd562be194f3b/jiter-0.12.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:635e737fbb7315bef0037c19b88b799143d2d7d3507e61a76751025226b3ac87", size = 379008, upload-time = "2025-11-09T20:47:52.211Z" }, + { url = "https://files.pythonhosted.org/packages/5f/c3/c9fc0232e736c8877d9e6d83d6eeb0ba4e90c6c073835cc2e8f73fdeef51/jiter-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e017c417b1ebda911bd13b1e40612704b1f5420e30695112efdbed8a4b389ed", size = 361785, upload-time = "2025-11-09T20:47:53.512Z" }, + { url = "https://files.pythonhosted.org/packages/96/61/61f69b7e442e97ca6cd53086ddc1cf59fb830549bc72c0a293713a60c525/jiter-0.12.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:89b0bfb8b2bf2351fba36bb211ef8bfceba73ef58e7f0c68fb67b5a2795ca2f9", size = 386108, upload-time = "2025-11-09T20:47:54.893Z" }, + { url = "https://files.pythonhosted.org/packages/e9/2e/76bb3332f28550c8f1eba3bf6e5efe211efda0ddbbaf24976bc7078d42a5/jiter-0.12.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:f5aa5427a629a824a543672778c9ce0c5e556550d1569bb6ea28a85015287626", size = 519937, upload-time = "2025-11-09T20:47:56.253Z" }, + { url = "https://files.pythonhosted.org/packages/84/d6/fa96efa87dc8bff2094fb947f51f66368fa56d8d4fc9e77b25d7fbb23375/jiter-0.12.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed53b3d6acbcb0fd0b90f20c7cb3b24c357fe82a3518934d4edfa8c6898e498c", size = 510853, upload-time = "2025-11-09T20:47:58.32Z" }, + { url = "https://files.pythonhosted.org/packages/8a/28/93f67fdb4d5904a708119a6ab58a8f1ec226ff10a94a282e0215402a8462/jiter-0.12.0-cp313-cp313-win32.whl", hash = "sha256:4747de73d6b8c78f2e253a2787930f4fffc68da7fa319739f57437f95963c4de", size = 204699, upload-time = "2025-11-09T20:47:59.686Z" }, + { url = "https://files.pythonhosted.org/packages/c4/1f/30b0eb087045a0abe2a5c9c0c0c8da110875a1d3be83afd4a9a4e548be3c/jiter-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:e25012eb0c456fcc13354255d0338cd5397cce26c77b2832b3c4e2e255ea5d9a", size = 204258, upload-time = "2025-11-09T20:48:01.01Z" }, + { url = "https://files.pythonhosted.org/packages/2c/f4/2b4daf99b96bce6fc47971890b14b2a36aef88d7beb9f057fafa032c6141/jiter-0.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:c97b92c54fe6110138c872add030a1f99aea2401ddcdaa21edf74705a646dd60", size = 185503, upload-time = "2025-11-09T20:48:02.35Z" }, + { url = "https://files.pythonhosted.org/packages/39/ca/67bb15a7061d6fe20b9b2a2fd783e296a1e0f93468252c093481a2f00efa/jiter-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:53839b35a38f56b8be26a7851a48b89bc47e5d88e900929df10ed93b95fea3d6", size = 317965, upload-time = "2025-11-09T20:48:03.783Z" }, + { url = "https://files.pythonhosted.org/packages/18/af/1788031cd22e29c3b14bc6ca80b16a39a0b10e611367ffd480c06a259831/jiter-0.12.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94f669548e55c91ab47fef8bddd9c954dab1938644e715ea49d7e117015110a4", size = 345831, upload-time = "2025-11-09T20:48:05.55Z" }, + { url = "https://files.pythonhosted.org/packages/05/17/710bf8472d1dff0d3caf4ced6031060091c1320f84ee7d5dcbed1f352417/jiter-0.12.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:351d54f2b09a41600ffea43d081522d792e81dcfb915f6d2d242744c1cc48beb", size = 361272, upload-time = "2025-11-09T20:48:06.951Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f1/1dcc4618b59761fef92d10bcbb0b038b5160be653b003651566a185f1a5c/jiter-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2a5e90604620f94bf62264e7c2c038704d38217b7465b863896c6d7c902b06c7", size = 204604, upload-time = "2025-11-09T20:48:08.328Z" }, + { url = "https://files.pythonhosted.org/packages/d9/32/63cb1d9f1c5c6632a783c0052cde9ef7ba82688f7065e2f0d5f10a7e3edb/jiter-0.12.0-cp313-cp313t-win_arm64.whl", hash = "sha256:88ef757017e78d2860f96250f9393b7b577b06a956ad102c29c8237554380db3", size = 185628, upload-time = "2025-11-09T20:48:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/a8/99/45c9f0dbe4a1416b2b9a8a6d1236459540f43d7fb8883cff769a8db0612d/jiter-0.12.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c46d927acd09c67a9fb1416df45c5a04c27e83aae969267e98fba35b74e99525", size = 312478, upload-time = "2025-11-09T20:48:10.898Z" }, + { url = "https://files.pythonhosted.org/packages/4c/a7/54ae75613ba9e0f55fcb0bc5d1f807823b5167cc944e9333ff322e9f07dd/jiter-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:774ff60b27a84a85b27b88cd5583899c59940bcc126caca97eb2a9df6aa00c49", size = 318706, upload-time = "2025-11-09T20:48:12.266Z" }, + { url = "https://files.pythonhosted.org/packages/59/31/2aa241ad2c10774baf6c37f8b8e1f39c07db358f1329f4eb40eba179c2a2/jiter-0.12.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5433fab222fb072237df3f637d01b81f040a07dcac1cb4a5c75c7aa9ed0bef1", size = 351894, upload-time = "2025-11-09T20:48:13.673Z" }, + { url = "https://files.pythonhosted.org/packages/54/4f/0f2759522719133a9042781b18cc94e335b6d290f5e2d3e6899d6af933e3/jiter-0.12.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8c593c6e71c07866ec6bfb790e202a833eeec885022296aff6b9e0b92d6a70e", size = 365714, upload-time = "2025-11-09T20:48:15.083Z" }, + { url = "https://files.pythonhosted.org/packages/dc/6f/806b895f476582c62a2f52c453151edd8a0fde5411b0497baaa41018e878/jiter-0.12.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:90d32894d4c6877a87ae00c6b915b609406819dce8bc0d4e962e4de2784e567e", size = 478989, upload-time = "2025-11-09T20:48:16.706Z" }, + { url = "https://files.pythonhosted.org/packages/86/6c/012d894dc6e1033acd8db2b8346add33e413ec1c7c002598915278a37f79/jiter-0.12.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:798e46eed9eb10c3adbbacbd3bdb5ecd4cf7064e453d00dbef08802dae6937ff", size = 378615, upload-time = "2025-11-09T20:48:18.614Z" }, + { url = "https://files.pythonhosted.org/packages/87/30/d718d599f6700163e28e2c71c0bbaf6dace692e7df2592fd793ac9276717/jiter-0.12.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3f1368f0a6719ea80013a4eb90ba72e75d7ea67cfc7846db2ca504f3df0169a", size = 364745, upload-time = "2025-11-09T20:48:20.117Z" }, + { url = "https://files.pythonhosted.org/packages/8f/85/315b45ce4b6ddc7d7fceca24068543b02bdc8782942f4ee49d652e2cc89f/jiter-0.12.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65f04a9d0b4406f7e51279710b27484af411896246200e461d80d3ba0caa901a", size = 386502, upload-time = "2025-11-09T20:48:21.543Z" }, + { url = "https://files.pythonhosted.org/packages/74/0b/ce0434fb40c5b24b368fe81b17074d2840748b4952256bab451b72290a49/jiter-0.12.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:fd990541982a24281d12b67a335e44f117e4c6cbad3c3b75c7dea68bf4ce3a67", size = 519845, upload-time = "2025-11-09T20:48:22.964Z" }, + { url = "https://files.pythonhosted.org/packages/e8/a3/7a7a4488ba052767846b9c916d208b3ed114e3eb670ee984e4c565b9cf0d/jiter-0.12.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:b111b0e9152fa7df870ecaebb0bd30240d9f7fff1f2003bcb4ed0f519941820b", size = 510701, upload-time = "2025-11-09T20:48:24.483Z" }, + { url = "https://files.pythonhosted.org/packages/c3/16/052ffbf9d0467b70af24e30f91e0579e13ded0c17bb4a8eb2aed3cb60131/jiter-0.12.0-cp314-cp314-win32.whl", hash = "sha256:a78befb9cc0a45b5a5a0d537b06f8544c2ebb60d19d02c41ff15da28a9e22d42", size = 205029, upload-time = "2025-11-09T20:48:25.749Z" }, + { url = "https://files.pythonhosted.org/packages/e4/18/3cf1f3f0ccc789f76b9a754bdb7a6977e5d1d671ee97a9e14f7eb728d80e/jiter-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:e1fe01c082f6aafbe5c8faf0ff074f38dfb911d53f07ec333ca03f8f6226debf", size = 204960, upload-time = "2025-11-09T20:48:27.415Z" }, + { url = "https://files.pythonhosted.org/packages/02/68/736821e52ecfdeeb0f024b8ab01b5a229f6b9293bbdb444c27efade50b0f/jiter-0.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:d72f3b5a432a4c546ea4bedc84cce0c3404874f1d1676260b9c7f048a9855451", size = 185529, upload-time = "2025-11-09T20:48:29.125Z" }, + { url = "https://files.pythonhosted.org/packages/30/61/12ed8ee7a643cce29ac97c2281f9ce3956eb76b037e88d290f4ed0d41480/jiter-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e6ded41aeba3603f9728ed2b6196e4df875348ab97b28fc8afff115ed42ba7a7", size = 318974, upload-time = "2025-11-09T20:48:30.87Z" }, + { url = "https://files.pythonhosted.org/packages/2d/c6/f3041ede6d0ed5e0e79ff0de4c8f14f401bbf196f2ef3971cdbe5fd08d1d/jiter-0.12.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a947920902420a6ada6ad51892082521978e9dd44a802663b001436e4b771684", size = 345932, upload-time = "2025-11-09T20:48:32.658Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5d/4d94835889edd01ad0e2dbfc05f7bdfaed46292e7b504a6ac7839aa00edb/jiter-0.12.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:add5e227e0554d3a52cf390a7635edaffdf4f8fce4fdbcef3cc2055bb396a30c", size = 367243, upload-time = "2025-11-09T20:48:34.093Z" }, + { url = "https://files.pythonhosted.org/packages/fd/76/0051b0ac2816253a99d27baf3dda198663aff882fa6ea7deeb94046da24e/jiter-0.12.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f9b1cda8fcb736250d7e8711d4580ebf004a46771432be0ae4796944b5dfa5d", size = 479315, upload-time = "2025-11-09T20:48:35.507Z" }, + { url = "https://files.pythonhosted.org/packages/70/ae/83f793acd68e5cb24e483f44f482a1a15601848b9b6f199dacb970098f77/jiter-0.12.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deeb12a2223fe0135c7ff1356a143d57f95bbf1f4a66584f1fc74df21d86b993", size = 380714, upload-time = "2025-11-09T20:48:40.014Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/4808a88338ad2c228b1126b93fcd8ba145e919e886fe910d578230dabe3b/jiter-0.12.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c596cc0f4cb574877550ce4ecd51f8037469146addd676d7c1a30ebe6391923f", size = 365168, upload-time = "2025-11-09T20:48:41.462Z" }, + { url = "https://files.pythonhosted.org/packages/0c/d4/04619a9e8095b42aef436b5aeb4c0282b4ff1b27d1db1508df9f5dc82750/jiter-0.12.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ab4c823b216a4aeab3fdbf579c5843165756bd9ad87cc6b1c65919c4715f783", size = 387893, upload-time = "2025-11-09T20:48:42.921Z" }, + { url = "https://files.pythonhosted.org/packages/17/ea/d3c7e62e4546fdc39197fa4a4315a563a89b95b6d54c0d25373842a59cbe/jiter-0.12.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e427eee51149edf962203ff8db75a7514ab89be5cb623fb9cea1f20b54f1107b", size = 520828, upload-time = "2025-11-09T20:48:44.278Z" }, + { url = "https://files.pythonhosted.org/packages/cc/0b/c6d3562a03fd767e31cb119d9041ea7958c3c80cb3d753eafb19b3b18349/jiter-0.12.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:edb868841f84c111255ba5e80339d386d937ec1fdce419518ce1bd9370fac5b6", size = 511009, upload-time = "2025-11-09T20:48:45.726Z" }, + { url = "https://files.pythonhosted.org/packages/aa/51/2cb4468b3448a8385ebcd15059d325c9ce67df4e2758d133ab9442b19834/jiter-0.12.0-cp314-cp314t-win32.whl", hash = "sha256:8bbcfe2791dfdb7c5e48baf646d37a6a3dcb5a97a032017741dea9f817dca183", size = 205110, upload-time = "2025-11-09T20:48:47.033Z" }, + { url = "https://files.pythonhosted.org/packages/b2/c5/ae5ec83dec9c2d1af805fd5fe8f74ebded9c8670c5210ec7820ce0dbeb1e/jiter-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2fa940963bf02e1d8226027ef461e36af472dea85d36054ff835aeed944dd873", size = 205223, upload-time = "2025-11-09T20:48:49.076Z" }, + { url = "https://files.pythonhosted.org/packages/97/9a/3c5391907277f0e55195550cf3fa8e293ae9ee0c00fb402fec1e38c0c82f/jiter-0.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:506c9708dd29b27288f9f8f1140c3cb0e3d8ddb045956d7757b1fa0e0f39a473", size = 185564, upload-time = "2025-11-09T20:48:50.376Z" }, + { url = "https://files.pythonhosted.org/packages/fe/54/5339ef1ecaa881c6948669956567a64d2670941925f245c434f494ffb0e5/jiter-0.12.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:4739a4657179ebf08f85914ce50332495811004cc1747852e8b2041ed2aab9b8", size = 311144, upload-time = "2025-11-09T20:49:10.503Z" }, + { url = "https://files.pythonhosted.org/packages/27/74/3446c652bffbd5e81ab354e388b1b5fc1d20daac34ee0ed11ff096b1b01a/jiter-0.12.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:41da8def934bf7bec16cb24bd33c0ca62126d2d45d81d17b864bd5ad721393c3", size = 305877, upload-time = "2025-11-09T20:49:12.269Z" }, + { url = "https://files.pythonhosted.org/packages/a1/f4/ed76ef9043450f57aac2d4fbeb27175aa0eb9c38f833be6ef6379b3b9a86/jiter-0.12.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c44ee814f499c082e69872d426b624987dbc5943ab06e9bbaa4f81989fdb79e", size = 340419, upload-time = "2025-11-09T20:49:13.803Z" }, + { url = "https://files.pythonhosted.org/packages/21/01/857d4608f5edb0664aa791a3d45702e1a5bcfff9934da74035e7b9803846/jiter-0.12.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd2097de91cf03eaa27b3cbdb969addf83f0179c6afc41bbc4513705e013c65d", size = 347212, upload-time = "2025-11-09T20:49:15.643Z" }, + { url = "https://files.pythonhosted.org/packages/cb/f5/12efb8ada5f5c9edc1d4555fe383c1fb2eac05ac5859258a72d61981d999/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:e8547883d7b96ef2e5fe22b88f8a4c8725a56e7f4abafff20fd5272d634c7ecb", size = 309974, upload-time = "2025-11-09T20:49:17.187Z" }, + { url = "https://files.pythonhosted.org/packages/85/15/d6eb3b770f6a0d332675141ab3962fd4a7c270ede3515d9f3583e1d28276/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:89163163c0934854a668ed783a2546a0617f71706a2551a4a0666d91ab365d6b", size = 304233, upload-time = "2025-11-09T20:49:18.734Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3e/e7e06743294eea2cf02ced6aa0ff2ad237367394e37a0e2b4a1108c67a36/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d96b264ab7d34bbb2312dedc47ce07cd53f06835eacbc16dde3761f47c3a9e7f", size = 338537, upload-time = "2025-11-09T20:49:20.317Z" }, + { url = "https://files.pythonhosted.org/packages/2f/9c/6753e6522b8d0ef07d3a3d239426669e984fb0eba15a315cdbc1253904e4/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24e864cb30ab82311c6425655b0cdab0a98c5d973b065c66a3f020740c2324c", size = 346110, upload-time = "2025-11-09T20:49:21.817Z" }, +] + [[package]] name = "jsonschema" version = "4.26.0" @@ -834,15 +994,98 @@ sdist = { url = "https://files.pythonhosted.org/packages/aa/88/262177de60548e5a2 wheels = [ { url = "https://files.pythonhosted.org/packages/77/d5/becbe1e2569b474a23f0c672ead8a29ac50b2dc1d5b9de184831bda8d14c/lxml-6.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607", size = 8634365, upload-time = "2025-09-22T04:00:45.672Z" }, { url = "https://files.pythonhosted.org/packages/28/66/1ced58f12e804644426b85d0bb8a4478ca77bc1761455da310505f1a3526/lxml-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938", size = 4650793, upload-time = "2025-09-22T04:00:47.783Z" }, + { url = "https://files.pythonhosted.org/packages/11/84/549098ffea39dfd167e3f174b4ce983d0eed61f9d8d25b7bf2a57c3247fc/lxml-6.0.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d", size = 4944362, upload-time = "2025-09-22T04:00:49.845Z" }, + { url = "https://files.pythonhosted.org/packages/ac/bd/f207f16abf9749d2037453d56b643a7471d8fde855a231a12d1e095c4f01/lxml-6.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438", size = 5083152, upload-time = "2025-09-22T04:00:51.709Z" }, + { url = "https://files.pythonhosted.org/packages/15/ae/bd813e87d8941d52ad5b65071b1affb48da01c4ed3c9c99e40abb266fbff/lxml-6.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964", size = 5023539, upload-time = "2025-09-22T04:00:53.593Z" }, + { url = "https://files.pythonhosted.org/packages/02/cd/9bfef16bd1d874fbe0cb51afb00329540f30a3283beb9f0780adbb7eec03/lxml-6.0.2-cp311-cp311-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d", size = 5344853, upload-time = "2025-09-22T04:00:55.524Z" }, + { url = "https://files.pythonhosted.org/packages/b8/89/ea8f91594bc5dbb879734d35a6f2b0ad50605d7fb419de2b63d4211765cc/lxml-6.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7", size = 5225133, upload-time = "2025-09-22T04:00:57.269Z" }, + { url = "https://files.pythonhosted.org/packages/b9/37/9c735274f5dbec726b2db99b98a43950395ba3d4a1043083dba2ad814170/lxml-6.0.2-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178", size = 4677944, upload-time = "2025-09-22T04:00:59.052Z" }, + { url = "https://files.pythonhosted.org/packages/20/28/7dfe1ba3475d8bfca3878365075abe002e05d40dfaaeb7ec01b4c587d533/lxml-6.0.2-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553", size = 5284535, upload-time = "2025-09-22T04:01:01.335Z" }, + { url = "https://files.pythonhosted.org/packages/e7/cf/5f14bc0de763498fc29510e3532bf2b4b3a1c1d5d0dff2e900c16ba021ef/lxml-6.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb", size = 5067343, upload-time = "2025-09-22T04:01:03.13Z" }, + { url = "https://files.pythonhosted.org/packages/1c/b0/bb8275ab5472f32b28cfbbcc6db7c9d092482d3439ca279d8d6fa02f7025/lxml-6.0.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a", size = 4725419, upload-time = "2025-09-22T04:01:05.013Z" }, + { url = "https://files.pythonhosted.org/packages/25/4c/7c222753bc72edca3b99dbadba1b064209bc8ed4ad448af990e60dcce462/lxml-6.0.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c", size = 5275008, upload-time = "2025-09-22T04:01:07.327Z" }, + { url = "https://files.pythonhosted.org/packages/6c/8c/478a0dc6b6ed661451379447cdbec77c05741a75736d97e5b2b729687828/lxml-6.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7", size = 5248906, upload-time = "2025-09-22T04:01:09.452Z" }, + { url = "https://files.pythonhosted.org/packages/2d/d9/5be3a6ab2784cdf9accb0703b65e1b64fcdd9311c9f007630c7db0cfcce1/lxml-6.0.2-cp311-cp311-win32.whl", hash = "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46", size = 3610357, upload-time = "2025-09-22T04:01:11.102Z" }, + { url = "https://files.pythonhosted.org/packages/e2/7d/ca6fb13349b473d5732fb0ee3eec8f6c80fc0688e76b7d79c1008481bf1f/lxml-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078", size = 4036583, upload-time = "2025-09-22T04:01:12.766Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a2/51363b5ecd3eab46563645f3a2c3836a2fc67d01a1b87c5017040f39f567/lxml-6.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285", size = 3680591, upload-time = "2025-09-22T04:01:14.874Z" }, { url = "https://files.pythonhosted.org/packages/f3/c8/8ff2bc6b920c84355146cd1ab7d181bc543b89241cfb1ebee824a7c81457/lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456", size = 8661887, upload-time = "2025-09-22T04:01:17.265Z" }, { url = "https://files.pythonhosted.org/packages/37/6f/9aae1008083bb501ef63284220ce81638332f9ccbfa53765b2b7502203cf/lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924", size = 4667818, upload-time = "2025-09-22T04:01:19.688Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ca/31fb37f99f37f1536c133476674c10b577e409c0a624384147653e38baf2/lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f", size = 4950807, upload-time = "2025-09-22T04:01:21.487Z" }, + { url = "https://files.pythonhosted.org/packages/da/87/f6cb9442e4bada8aab5ae7e1046264f62fdbeaa6e3f6211b93f4c0dd97f1/lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534", size = 5109179, upload-time = "2025-09-22T04:01:23.32Z" }, + { url = "https://files.pythonhosted.org/packages/c8/20/a7760713e65888db79bbae4f6146a6ae5c04e4a204a3c48896c408cd6ed2/lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564", size = 5023044, upload-time = "2025-09-22T04:01:25.118Z" }, + { url = "https://files.pythonhosted.org/packages/a2/b0/7e64e0460fcb36471899f75831509098f3fd7cd02a3833ac517433cb4f8f/lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f", size = 5359685, upload-time = "2025-09-22T04:01:27.398Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e1/e5df362e9ca4e2f48ed6411bd4b3a0ae737cc842e96877f5bf9428055ab4/lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0", size = 5654127, upload-time = "2025-09-22T04:01:29.629Z" }, + { url = "https://files.pythonhosted.org/packages/c6/d1/232b3309a02d60f11e71857778bfcd4acbdb86c07db8260caf7d008b08f8/lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192", size = 5253958, upload-time = "2025-09-22T04:01:31.535Z" }, + { url = "https://files.pythonhosted.org/packages/35/35/d955a070994725c4f7d80583a96cab9c107c57a125b20bb5f708fe941011/lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0", size = 4711541, upload-time = "2025-09-22T04:01:33.801Z" }, + { url = "https://files.pythonhosted.org/packages/1e/be/667d17363b38a78c4bd63cfd4b4632029fd68d2c2dc81f25ce9eb5224dd5/lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092", size = 5267426, upload-time = "2025-09-22T04:01:35.639Z" }, + { url = "https://files.pythonhosted.org/packages/ea/47/62c70aa4a1c26569bc958c9ca86af2bb4e1f614e8c04fb2989833874f7ae/lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f", size = 5064917, upload-time = "2025-09-22T04:01:37.448Z" }, + { url = "https://files.pythonhosted.org/packages/bd/55/6ceddaca353ebd0f1908ef712c597f8570cc9c58130dbb89903198e441fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8", size = 4788795, upload-time = "2025-09-22T04:01:39.165Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e8/fd63e15da5e3fd4c2146f8bbb3c14e94ab850589beab88e547b2dbce22e1/lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f", size = 5676759, upload-time = "2025-09-22T04:01:41.506Z" }, + { url = "https://files.pythonhosted.org/packages/76/47/b3ec58dc5c374697f5ba37412cd2728f427d056315d124dd4b61da381877/lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6", size = 5255666, upload-time = "2025-09-22T04:01:43.363Z" }, + { url = "https://files.pythonhosted.org/packages/19/93/03ba725df4c3d72afd9596eef4a37a837ce8e4806010569bedfcd2cb68fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322", size = 5277989, upload-time = "2025-09-22T04:01:45.215Z" }, + { url = "https://files.pythonhosted.org/packages/c6/80/c06de80bfce881d0ad738576f243911fccf992687ae09fd80b734712b39c/lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849", size = 3611456, upload-time = "2025-09-22T04:01:48.243Z" }, + { url = "https://files.pythonhosted.org/packages/f7/d7/0cdfb6c3e30893463fb3d1e52bc5f5f99684a03c29a0b6b605cfae879cd5/lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f", size = 4011793, upload-time = "2025-09-22T04:01:50.042Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/93c73c67db235931527301ed3785f849c78991e2e34f3fd9a6663ffda4c5/lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6", size = 3672836, upload-time = "2025-09-22T04:01:52.145Z" }, { url = "https://files.pythonhosted.org/packages/53/fd/4e8f0540608977aea078bf6d79f128e0e2c2bba8af1acf775c30baa70460/lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77", size = 8648494, upload-time = "2025-09-22T04:01:54.242Z" }, { url = "https://files.pythonhosted.org/packages/5d/f4/2a94a3d3dfd6c6b433501b8d470a1960a20ecce93245cf2db1706adf6c19/lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f", size = 4661146, upload-time = "2025-09-22T04:01:56.282Z" }, + { url = "https://files.pythonhosted.org/packages/25/2e/4efa677fa6b322013035d38016f6ae859d06cac67437ca7dc708a6af7028/lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452", size = 4946932, upload-time = "2025-09-22T04:01:58.989Z" }, + { url = "https://files.pythonhosted.org/packages/ce/0f/526e78a6d38d109fdbaa5049c62e1d32fdd70c75fb61c4eadf3045d3d124/lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048", size = 5100060, upload-time = "2025-09-22T04:02:00.812Z" }, + { url = "https://files.pythonhosted.org/packages/81/76/99de58d81fa702cc0ea7edae4f4640416c2062813a00ff24bd70ac1d9c9b/lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df", size = 5019000, upload-time = "2025-09-22T04:02:02.671Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/9e57d25482bc9a9882cb0037fdb9cc18f4b79d85df94fa9d2a89562f1d25/lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1", size = 5348496, upload-time = "2025-09-22T04:02:04.904Z" }, + { url = "https://files.pythonhosted.org/packages/a6/8e/cb99bd0b83ccc3e8f0f528e9aa1f7a9965dfec08c617070c5db8d63a87ce/lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916", size = 5643779, upload-time = "2025-09-22T04:02:06.689Z" }, + { url = "https://files.pythonhosted.org/packages/d0/34/9e591954939276bb679b73773836c6684c22e56d05980e31d52a9a8deb18/lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd", size = 5244072, upload-time = "2025-09-22T04:02:08.587Z" }, + { url = "https://files.pythonhosted.org/packages/8d/27/b29ff065f9aaca443ee377aff699714fcbffb371b4fce5ac4ca759e436d5/lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6", size = 4718675, upload-time = "2025-09-22T04:02:10.783Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/f756f9c2cd27caa1a6ef8c32ae47aadea697f5c2c6d07b0dae133c244fbe/lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a", size = 5255171, upload-time = "2025-09-22T04:02:12.631Z" }, + { url = "https://files.pythonhosted.org/packages/61/46/bb85ea42d2cb1bd8395484fd72f38e3389611aa496ac7772da9205bbda0e/lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679", size = 5057175, upload-time = "2025-09-22T04:02:14.718Z" }, + { url = "https://files.pythonhosted.org/packages/95/0c/443fc476dcc8e41577f0af70458c50fe299a97bb6b7505bb1ae09aa7f9ac/lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659", size = 4785688, upload-time = "2025-09-22T04:02:16.957Z" }, + { url = "https://files.pythonhosted.org/packages/48/78/6ef0b359d45bb9697bc5a626e1992fa5d27aa3f8004b137b2314793b50a0/lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484", size = 5660655, upload-time = "2025-09-22T04:02:18.815Z" }, + { url = "https://files.pythonhosted.org/packages/ff/ea/e1d33808f386bc1339d08c0dcada6e4712d4ed8e93fcad5f057070b7988a/lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2", size = 5247695, upload-time = "2025-09-22T04:02:20.593Z" }, + { url = "https://files.pythonhosted.org/packages/4f/47/eba75dfd8183673725255247a603b4ad606f4ae657b60c6c145b381697da/lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314", size = 5269841, upload-time = "2025-09-22T04:02:22.489Z" }, + { url = "https://files.pythonhosted.org/packages/76/04/5c5e2b8577bc936e219becb2e98cdb1aca14a4921a12995b9d0c523502ae/lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2", size = 3610700, upload-time = "2025-09-22T04:02:24.465Z" }, + { url = "https://files.pythonhosted.org/packages/fe/0a/4643ccc6bb8b143e9f9640aa54e38255f9d3b45feb2cbe7ae2ca47e8782e/lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7", size = 4010347, upload-time = "2025-09-22T04:02:26.286Z" }, + { url = "https://files.pythonhosted.org/packages/31/ef/dcf1d29c3f530577f61e5fe2f1bd72929acf779953668a8a47a479ae6f26/lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf", size = 3671248, upload-time = "2025-09-22T04:02:27.918Z" }, { url = "https://files.pythonhosted.org/packages/03/15/d4a377b385ab693ce97b472fe0c77c2b16ec79590e688b3ccc71fba19884/lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe", size = 8659801, upload-time = "2025-09-22T04:02:30.113Z" }, { url = "https://files.pythonhosted.org/packages/c8/e8/c128e37589463668794d503afaeb003987373c5f94d667124ffd8078bbd9/lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d", size = 4659403, upload-time = "2025-09-22T04:02:32.119Z" }, + { url = "https://files.pythonhosted.org/packages/00/ce/74903904339decdf7da7847bb5741fc98a5451b42fc419a86c0c13d26fe2/lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d", size = 4966974, upload-time = "2025-09-22T04:02:34.155Z" }, + { url = "https://files.pythonhosted.org/packages/1f/d3/131dec79ce61c5567fecf82515bd9bc36395df42501b50f7f7f3bd065df0/lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5", size = 5102953, upload-time = "2025-09-22T04:02:36.054Z" }, + { url = "https://files.pythonhosted.org/packages/3a/ea/a43ba9bb750d4ffdd885f2cd333572f5bb900cd2408b67fdda07e85978a0/lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0", size = 5055054, upload-time = "2025-09-22T04:02:38.154Z" }, + { url = "https://files.pythonhosted.org/packages/60/23/6885b451636ae286c34628f70a7ed1fcc759f8d9ad382d132e1c8d3d9bfd/lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba", size = 5352421, upload-time = "2025-09-22T04:02:40.413Z" }, + { url = "https://files.pythonhosted.org/packages/48/5b/fc2ddfc94ddbe3eebb8e9af6e3fd65e2feba4967f6a4e9683875c394c2d8/lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0", size = 5673684, upload-time = "2025-09-22T04:02:42.288Z" }, + { url = "https://files.pythonhosted.org/packages/29/9c/47293c58cc91769130fbf85531280e8cc7868f7fbb6d92f4670071b9cb3e/lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d", size = 5252463, upload-time = "2025-09-22T04:02:44.165Z" }, + { url = "https://files.pythonhosted.org/packages/9b/da/ba6eceb830c762b48e711ded880d7e3e89fc6c7323e587c36540b6b23c6b/lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37", size = 4698437, upload-time = "2025-09-22T04:02:46.524Z" }, + { url = "https://files.pythonhosted.org/packages/a5/24/7be3f82cb7990b89118d944b619e53c656c97dc89c28cfb143fdb7cd6f4d/lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9", size = 5269890, upload-time = "2025-09-22T04:02:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/1b/bd/dcfb9ea1e16c665efd7538fc5d5c34071276ce9220e234217682e7d2c4a5/lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917", size = 5097185, upload-time = "2025-09-22T04:02:50.746Z" }, + { url = "https://files.pythonhosted.org/packages/21/04/a60b0ff9314736316f28316b694bccbbabe100f8483ad83852d77fc7468e/lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f", size = 4745895, upload-time = "2025-09-22T04:02:52.968Z" }, + { url = "https://files.pythonhosted.org/packages/d6/bd/7d54bd1846e5a310d9c715921c5faa71cf5c0853372adf78aee70c8d7aa2/lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8", size = 5695246, upload-time = "2025-09-22T04:02:54.798Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/5643d6ab947bc371da21323acb2a6e603cedbe71cb4c99c8254289ab6f4e/lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a", size = 5260797, upload-time = "2025-09-22T04:02:57.058Z" }, + { url = "https://files.pythonhosted.org/packages/33/da/34c1ec4cff1eea7d0b4cd44af8411806ed943141804ac9c5d565302afb78/lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c", size = 5277404, upload-time = "2025-09-22T04:02:58.966Z" }, + { url = "https://files.pythonhosted.org/packages/82/57/4eca3e31e54dc89e2c3507e1cd411074a17565fa5ffc437c4ae0a00d439e/lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b", size = 3670072, upload-time = "2025-09-22T04:03:38.05Z" }, + { url = "https://files.pythonhosted.org/packages/e3/e0/c96cf13eccd20c9421ba910304dae0f619724dcf1702864fd59dd386404d/lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed", size = 4080617, upload-time = "2025-09-22T04:03:39.835Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5d/b3f03e22b3d38d6f188ef044900a9b29b2fe0aebb94625ce9fe244011d34/lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8", size = 3754930, upload-time = "2025-09-22T04:03:41.565Z" }, { url = "https://files.pythonhosted.org/packages/5e/5c/42c2c4c03554580708fc738d13414801f340c04c3eff90d8d2d227145275/lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d", size = 8910380, upload-time = "2025-09-22T04:03:01.645Z" }, { url = "https://files.pythonhosted.org/packages/bf/4f/12df843e3e10d18d468a7557058f8d3733e8b6e12401f30b1ef29360740f/lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba", size = 4775632, upload-time = "2025-09-22T04:03:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/e4/0c/9dc31e6c2d0d418483cbcb469d1f5a582a1cd00a1f4081953d44051f3c50/lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601", size = 4975171, upload-time = "2025-09-22T04:03:05.651Z" }, + { url = "https://files.pythonhosted.org/packages/e7/2b/9b870c6ca24c841bdd887504808f0417aa9d8d564114689266f19ddf29c8/lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed", size = 5110109, upload-time = "2025-09-22T04:03:07.452Z" }, + { url = "https://files.pythonhosted.org/packages/bf/0c/4f5f2a4dd319a178912751564471355d9019e220c20d7db3fb8307ed8582/lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37", size = 5041061, upload-time = "2025-09-22T04:03:09.297Z" }, + { url = "https://files.pythonhosted.org/packages/12/64/554eed290365267671fe001a20d72d14f468ae4e6acef1e179b039436967/lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338", size = 5306233, upload-time = "2025-09-22T04:03:11.651Z" }, + { url = "https://files.pythonhosted.org/packages/7a/31/1d748aa275e71802ad9722df32a7a35034246b42c0ecdd8235412c3396ef/lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9", size = 5604739, upload-time = "2025-09-22T04:03:13.592Z" }, + { url = "https://files.pythonhosted.org/packages/8f/41/2c11916bcac09ed561adccacceaedd2bf0e0b25b297ea92aab99fd03d0fa/lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd", size = 5225119, upload-time = "2025-09-22T04:03:15.408Z" }, + { url = "https://files.pythonhosted.org/packages/99/05/4e5c2873d8f17aa018e6afde417c80cc5d0c33be4854cce3ef5670c49367/lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d", size = 4633665, upload-time = "2025-09-22T04:03:17.262Z" }, + { url = "https://files.pythonhosted.org/packages/0f/c9/dcc2da1bebd6275cdc723b515f93edf548b82f36a5458cca3578bc899332/lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9", size = 5234997, upload-time = "2025-09-22T04:03:19.14Z" }, + { url = "https://files.pythonhosted.org/packages/9c/e2/5172e4e7468afca64a37b81dba152fc5d90e30f9c83c7c3213d6a02a5ce4/lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e", size = 5090957, upload-time = "2025-09-22T04:03:21.436Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b3/15461fd3e5cd4ddcb7938b87fc20b14ab113b92312fc97afe65cd7c85de1/lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d", size = 4764372, upload-time = "2025-09-22T04:03:23.27Z" }, + { url = "https://files.pythonhosted.org/packages/05/33/f310b987c8bf9e61c4dd8e8035c416bd3230098f5e3cfa69fc4232de7059/lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec", size = 5634653, upload-time = "2025-09-22T04:03:25.767Z" }, + { url = "https://files.pythonhosted.org/packages/70/ff/51c80e75e0bc9382158133bdcf4e339b5886c6ee2418b5199b3f1a61ed6d/lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272", size = 5233795, upload-time = "2025-09-22T04:03:27.62Z" }, + { url = "https://files.pythonhosted.org/packages/56/4d/4856e897df0d588789dd844dbed9d91782c4ef0b327f96ce53c807e13128/lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f", size = 5257023, upload-time = "2025-09-22T04:03:30.056Z" }, + { url = "https://files.pythonhosted.org/packages/0f/85/86766dfebfa87bea0ab78e9ff7a4b4b45225df4b4d3b8cc3c03c5cd68464/lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312", size = 3911420, upload-time = "2025-09-22T04:03:32.198Z" }, + { url = "https://files.pythonhosted.org/packages/fe/1a/b248b355834c8e32614650b8008c69ffeb0ceb149c793961dd8c0b991bb3/lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca", size = 4406837, upload-time = "2025-09-22T04:03:34.027Z" }, + { url = "https://files.pythonhosted.org/packages/92/aa/df863bcc39c5e0946263454aba394de8a9084dbaff8ad143846b0d844739/lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", size = 3822205, upload-time = "2025-09-22T04:03:36.249Z" }, { url = "https://files.pythonhosted.org/packages/0b/11/29d08bc103a62c0eba8016e7ed5aeebbf1e4312e83b0b1648dd203b0e87d/lxml-6.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700", size = 3949829, upload-time = "2025-09-22T04:04:45.608Z" }, + { url = "https://files.pythonhosted.org/packages/12/b3/52ab9a3b31e5ab8238da241baa19eec44d2ab426532441ee607165aebb52/lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee", size = 4226277, upload-time = "2025-09-22T04:04:47.754Z" }, + { url = "https://files.pythonhosted.org/packages/a0/33/1eaf780c1baad88224611df13b1c2a9dfa460b526cacfe769103ff50d845/lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f", size = 4330433, upload-time = "2025-09-22T04:04:49.907Z" }, + { url = "https://files.pythonhosted.org/packages/7a/c1/27428a2ff348e994ab4f8777d3a0ad510b6b92d37718e5887d2da99952a2/lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9", size = 4272119, upload-time = "2025-09-22T04:04:51.801Z" }, + { url = "https://files.pythonhosted.org/packages/f0/d0/3020fa12bcec4ab62f97aab026d57c2f0cfd480a558758d9ca233bb6a79d/lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a", size = 4417314, upload-time = "2025-09-22T04:04:55.024Z" }, + { url = "https://files.pythonhosted.org/packages/6c/77/d7f491cbc05303ac6801651aabeb262d43f319288c1ea96c66b1d2692ff3/lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e", size = 3518768, upload-time = "2025-09-22T04:04:57.097Z" }, ] [[package]] @@ -854,6 +1097,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/81/54e3ce63502cd085a0c556652a4e1b919c45a446bd1e5300e10c44c8c521/markdown-3.10-py3-none-any.whl", hash = "sha256:b5b99d6951e2e4948d939255596523444c0e677c669700b1d17aa4a8a464cb7c", size = 107678, upload-time = "2025-11-03T19:51:13.887Z" }, ] +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + [[package]] name = "markupsafe" version = "3.0.3" @@ -953,6 +1208,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e2/fc/6dc7659c2ae5ddf280477011f4213a74f806862856b796ef08f028e664bf/mcp-1.25.0-py3-none-any.whl", hash = "sha256:b37c38144a666add0862614cc79ec276e97d72aa8ca26d622818d4e278b9721a", size = 233076, upload-time = "2025-12-19T10:19:55.416Z" }, ] +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + [[package]] name = "mergedeep" version = "1.3.4" @@ -1223,6 +1487,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/ee/346fa473e666fe14c52fcdd19ec2424157290a032d4c41f98127bfb31ac7/numpy-2.3.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f16417ec91f12f814b10bafe79ef77e70113a2f5f7018640e7425ff979253425", size = 12967213, upload-time = "2025-11-16T22:52:39.38Z" }, ] +[[package]] +name = "openai" +version = "2.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/f4/4690ecb5d70023ce6bfcfeabfe717020f654bde59a775058ec6ac4692463/openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba", size = 627383, upload-time = "2026-01-09T22:10:08.603Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/df/c306f7375d42bafb379934c2df4c2fa3964656c8c782bac75ee10c102818/openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3", size = 1067879, upload-time = "2026-01-09T22:10:06.446Z" }, +] + [[package]] name = "openpyxl" version = "3.1.5" @@ -1623,6 +1906,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/78/b93cb80bd673bdc9f6ede63d8eb5b4646366953df15667eb3603be57a2b1/pymdown_extensions-10.17.2-py3-none-any.whl", hash = "sha256:bffae79a2e8b9e44aef0d813583a8fea63457b7a23643a43988055b7b79b4992", size = 266556, upload-time = "2025-11-26T15:43:55.162Z" }, ] +[[package]] +name = "pymupdf" +version = "1.26.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/48/d6/09b28f027b510838559f7748807192149c419b30cb90e6d5f0cf916dc9dc/pymupdf-1.26.7.tar.gz", hash = "sha256:71add8bdc8eb1aaa207c69a13400693f06ad9b927bea976f5d5ab9df0bb489c3", size = 84327033, upload-time = "2025-12-11T21:48:50.694Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/35/cd74cea1787b2247702ef8522186bdef32e9cb30a099e6bb864627ef6045/pymupdf-1.26.7-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:07085718dfdae5ab83b05eb5eb397f863bcc538fe05135318a01ea353e7a1353", size = 23179369, upload-time = "2025-12-11T21:47:21.587Z" }, + { url = "https://files.pythonhosted.org/packages/72/74/448b6172927c829c6a3fba80078d7b0a016ebbe2c9ee528821f5ea21677a/pymupdf-1.26.7-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:31aa9c8377ea1eea02934b92f4dcf79fb2abba0bf41f8a46d64c3e31546a3c02", size = 22470101, upload-time = "2025-12-11T21:47:37.105Z" }, + { url = "https://files.pythonhosted.org/packages/65/e7/47af26f3ac76be7ac3dd4d6cc7ee105948a8355d774e5ca39857bf91c11c/pymupdf-1.26.7-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e419b609996434a14a80fa060adec72c434a1cca6a511ec54db9841bc5d51b3c", size = 23502486, upload-time = "2025-12-12T09:51:25.824Z" }, + { url = "https://files.pythonhosted.org/packages/2a/6b/3de1714d734ff949be1e90a22375d0598d3540b22ae73eb85c2d7d1f36a9/pymupdf-1.26.7-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:69dfc78f206a96e5b3ac22741263ebab945fdf51f0dbe7c5757c3511b23d9d72", size = 24115727, upload-time = "2025-12-11T21:47:51.274Z" }, + { url = "https://files.pythonhosted.org/packages/62/9b/f86224847949577a523be2207315ae0fd3155b5d909cd66c274d095349a3/pymupdf-1.26.7-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1d5106f46e1ca0d64d46bd51892372a4f82076bdc14a9678d33d630702abca36", size = 24324386, upload-time = "2025-12-12T14:58:45.483Z" }, + { url = "https://files.pythonhosted.org/packages/85/8e/a117d39092ca645fde8b903f4a941d9aa75b370a67b4f1f435f56393dc5a/pymupdf-1.26.7-cp310-abi3-win32.whl", hash = "sha256:7c9645b6f5452629c747690190350213d3e5bbdb6b2eca227d82702b327f6eee", size = 17203888, upload-time = "2025-12-12T13:59:57.613Z" }, + { url = "https://files.pythonhosted.org/packages/dd/c3/d0047678146c294469c33bae167c8ace337deafb736b0bf97b9bc481aa65/pymupdf-1.26.7-cp310-abi3-win_amd64.whl", hash = "sha256:425b1befe40d41b72eb0fe211711c7ae334db5eb60307e9dd09066ed060cceba", size = 18405952, upload-time = "2025-12-11T21:48:02.947Z" }, +] + [[package]] name = "pypdfium2" version = "5.1.0" @@ -1861,6 +2159,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bb/ad/fdd56219f0e320293c513ef0b3cdd018802a1bcfdb29ed9bc0c3bcb97f31/responses-0.21.0-py3-none-any.whl", hash = "sha256:2dcc863ba63963c0c3d9ee3fa9507cbe36b7d7b0fccb4f0bdfd9e96c539b1487", size = 45987, upload-time = "2022-05-25T14:20:48.508Z" }, ] +[[package]] +name = "rich" +version = "14.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" }, +] + [[package]] name = "rpds-py" version = "0.30.0" @@ -2079,6 +2390,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/19/8d77f9992e5cbfcaa9133c3bf63b4fbbb051248802e1e803fed5c552fbb2/sentry_sdk-2.48.0-py2.py3-none-any.whl", hash = "sha256:6b12ac256769d41825d9b7518444e57fa35b5642df4c7c5e322af4d2c8721172", size = 414555, upload-time = "2025-12-16T14:55:40.152Z" }, ] +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -2088,6 +2408,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, +] + +[[package]] +name = "soupsieve" +version = "2.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" }, +] + [[package]] name = "sse-starlette" version = "3.2.0" @@ -2114,6 +2452,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, ] +[[package]] +name = "tabulate" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, +] + [[package]] name = "taskipy" version = "1.14.1" @@ -2238,6 +2585,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" }, ] +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, +] + +[[package]] +name = "typer" +version = "0.21.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/bf/8825b5929afd84d0dabd606c67cd57b8388cb3ec385f7ef19c5cc2202069/typer-0.21.1.tar.gz", hash = "sha256:ea835607cd752343b6b2b7ce676893e5a0324082268b48f27aa058bdb7d2145d", size = 110371, upload-time = "2026-01-06T11:21:10.989Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/1d/d9257dd49ff2ca23ea5f132edf1281a0c4f9de8a762b9ae399b670a59235/typer-0.21.1-py3-none-any.whl", hash = "sha256:7985e89081c636b88d172c2ee0cfe33c253160994d47bdfdc302defd7d1f1d01", size = 47381, upload-time = "2026-01-06T11:21:09.824Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0"