From 9d24261f5deb710d3d6a4fc726ee1daa9d4022f5 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 14:01:06 +0900 Subject: [PATCH 01/18] =?UTF-8?q?MCP=E3=82=B5=E3=83=BC=E3=83=90=E3=83=BC?= =?UTF-8?q?=E6=A9=9F=E8=83=BD=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81?= =?UTF-8?q?=E4=BB=95=E6=A7=98=E3=82=92=E5=8F=8D=E6=98=A0=E3=80=82=E3=82=BF?= =?UTF-8?q?=E3=82=B9=E3=82=AF=E4=B8=80=E8=A6=A7=E3=82=92=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/agents/FEATURE_SPEC.md | 96 ++++++++++++++++++++++++++++++------- docs/agents/ROADMAP.md | 6 ++- docs/agents/TASKS.md | 55 ++++++++------------- 3 files changed, 104 insertions(+), 53 deletions(-) diff --git a/docs/agents/FEATURE_SPEC.md b/docs/agents/FEATURE_SPEC.md index 21a82f9..23b0e49 100644 --- a/docs/agents/FEATURE_SPEC.md +++ b/docs/agents/FEATURE_SPEC.md @@ -4,23 +4,85 @@ --- -## 数式取得機能追加 - -- 新たに数式文字列をそのまま取得する機能を追加 -- `SheetData`モデルに`formulas_map`を新設予定`formulas_map: dict[str, list[tuple[int,int]]]` -- 数式の値は定義されている数式をそのまま取得する -- セル座標はcolors_mapと同じようにr,cの数値で表記 -- デフォルトはverboseモード以上で出力、もしくはオプションからONにする -- 定義されている数式文字列をシンプルに取得する実装 -- 数式の表記形式は「=A1」のようにユーザーが見るままの数式文字列にする -- 共有数式や配列数式は一旦は展開しない実装にする -- 空文字は除外、=だけのセルも数式文字として取得 -- formulas_mapのキーは「式文字列(先頭=を含む)」で固定する -- 既存の値はSheetData.rowsにあり、数式はSheetData.formulas_mapにあることで共存する -- データ取得時はformulas_map が ON のときだけ data_only=False で再読込 -- オプションは`StructOptions`にて`include_formulas_map: bool = False`で設定を受け付ける -- `.xls`形式かつ数式取得ONの時は処理が遅くなるという警告を出しつつ、COMで取得処理をする。 -- cell.value が ArrayFormula の場合に value.text(実際の式文字列)を使う +## MCPサーバー機能追加 + +### 目的 + +- MCP クライアント(Codex / Claude / VS Code Copilot / Gemini CLI 等)から ExStruct を「ツール」として安全に呼び出せるようにする +- 推論はエージェント側で行い、MCP は制御面(実行・結果参照)に徹する + +### スコープ(MVP) + +- stdio トランスポートの MCP サーバー +- ツール: `exstruct.extract` +- 抽出結果は **必ずファイル出力**(MCP 応答はパス + 軽いメタ情報) +- 安全なパス制約(allowlist / deny glob) + +### 前提・制約 + +- 1MB 程度の Excel を想定 +- 処理時間は長くなっても高品質重視 +- Windows 以外は COM なしの簡易読み取り(ライブラリのスタンスに準拠) + +### 出力 JSON の仕様 + +- `mode` で出力粒度を選択: `light` / `standard` / `verbose` +- 互換方針: 追加は OK、破壊的変更は NG + +#### `light` + +- 軽量メタデータ中心(シート名、件数、主要範囲など) +- 大きなセル本文や詳細構造は含めない + +#### `standard` + +- 通常運用向けの基本情報 +- セル情報は要約・圧縮前提 + +#### `verbose` + +- 詳細な構造情報を含む +- 大容量になりやすいため、ファイル出力+チャンク取得前提 + +### MCP ツール仕様(案) + +#### `exstruct.extract` + +- 入力: `xlsx_path`, `mode`, `format`, `out_dir?`, `out_name?`, `options?` +- 出力: `out_path`, `workbook_meta`, `warnings`, `engine` +- 実装: 内部 API を優先、フォールバックで CLI サブプロセス + +#### `exstruct.read_json_chunk`(実用化フェーズ) + +- 入力: `out_path`, `sheet?`, `max_bytes?`, `filter?`, `cursor?` +- 出力: `chunk`, `next_cursor?` +- 方針: 返却サイズを抑制し、段階的に取得できること + +#### `exstruct.validate_input`(実用化フェーズ) + +- 入力: `xlsx_path` +- 出力: `is_readable`, `warnings`, `errors` + +### サーバー設計 + +- stdio 優先 +- ログは stderr / ファイル(stdio を汚さない) +- `--root` によりアクセス範囲を固定 +- `--deny-glob` により防御的に除外 +- `--on-conflict` で出力衝突方針を指定(overwrite / skip / rename) + +### ディレクトリ構成(案) + +``` +src/exstruct/ + mcp/ + __init__.py + server.py # MCP server entrypoint (stdio) + tools.py # tool definitions + handlers + io.py # path validation, safe read/write + extract_runner.py # internal API call or subprocess fallback + chunk_reader.py # JSON partial read / pointer / sheet filters +``` --- diff --git a/docs/agents/ROADMAP.md b/docs/agents/ROADMAP.md index a69533c..36630fe 100644 --- a/docs/agents/ROADMAP.md +++ b/docs/agents/ROADMAP.md @@ -44,10 +44,14 @@ - セル結合範囲データ圧縮とrowsデータ構造見直しによるコンテキスト圧縮 -## v0.3.6 +## v0.3.7 - 数式取得オプション追加 +## v0.4.0 + +- MCPサーバー機能追加 + ## v0.5.0 - Excel Form Controls 解析 diff --git a/docs/agents/TASKS.md b/docs/agents/TASKS.md index cf7660f..c6c95dc 100644 --- a/docs/agents/TASKS.md +++ b/docs/agents/TASKS.md @@ -2,38 +2,23 @@ 未完了 [ ], 完了 [x] -## 数式取得機能追加 - -- [x] `SheetData`に`formulas_map`フィールドを追加し、シリアライズ対象に含める -- [x] `StructOptions`に`include_formulas_map: bool = False`を追加し、verbose時の既定挙動と整合させる -- [x] openpyxlで`data_only=False`の読み取りパスを追加し、`formulas_map`用の走査処理を実装する -- [x] `.xls`かつ数式取得ONの場合はCOM経由で`formulas_map`を取得し、遅延警告を出す -- [x] `formulas_map`の仕様(=付きの式文字列、空文字除外、=のみ許可、共有/配列は未展開)に沿った抽出ロジックを追加 -- [x] openpyxlの配列数式(`ArrayFormula`)は`value.text`から式文字列を取得する分岐を追加 -- [x] CLI/ドキュメント/READMEの出力モード説明に`formulas_map`の条件を追記する -- [x] テスト要件に`formulas_map`関連(ON/OFF、verbose既定、.xls COM分岐)を追加する - -## PR #44 指摘対応 - -- [x] `src/exstruct/render/__init__.py` の `_page_index_from_suffix` を2桁固定ではなく可変桁の数値サフィックスに対応させ、`_rename_pages_for_print_area` の上書きリスクを解消する -- [x] `src/exstruct/render/__init__.py` の `_export_sheet_pdf` の `finally` 内 `return` を削除し、PrintArea 復元失敗はログに残して例外を握りつぶさない -- [x] `src/exstruct/core/pipeline.py` の `step_extract_formulas_map_*` の挙動を docstring に合わせる(失敗時にログしてスキップ)か、docstring を実装に合わせて修正する -- [x] `docs/README.ja.md` の `**verbose**` 説明行を日本語に統一する - -## PR #44 コメント/Codecov 対応 - -- [x] Codecov パッチカバレッジ低下(60.53%)の指摘に対応し、対象ファイルの不足分テストを追加する(`src/exstruct/render/__init__.py`, `src/exstruct/core/cells.py`, `src/exstruct/core/backends/com_backend.py`, `src/exstruct/core/pipeline.py`, `src/exstruct/core/backends/openpyxl_backend.py`) -- [x] Codecov の「Files with missing lines」で具体的な未カバー行を確認し、テスト観点を整理する -- [x] Codacy 警告対応: `src/exstruct/render/__init__.py:274` の finally 内 return により例外が握りつぶされる可能性(`PyLintPython3_W0150`)を解消する - -## PR #44 CodeRabbit 再レビュー対応 - -- [ ] `scripts/codacy_issues.py`: トークン未設定時の `sys.exit(1)` をモジュールトップから排除し、`get_token()` または `main()` で検証する -- [ ] `scripts/codacy_issues.py`: `format_for_ai` の `sys.exit` を `ValueError` に置換し、呼び出し側でバリデーションする -- [ ] `scripts/codacy_issues.py`: `urlopen` の非2xxチェック(到達不能)を削除または `HTTPError` 側へ寄せる -- [ ] `scripts/codacy_issues.py`: `status` の固定値バリデーションを廃止する(固定なら直代入/必要なら CLI 引数化) -- [ ] `tests/backends/test_print_areas_openpyxl.py`: `PrintAreaData` 型に合わせる+関連テストに Google スタイル docstring を付与 -- [ ] `tests/core/test_pipeline.py`: 無効な `MergedCellRange` を有効な非重複レンジに修正する -- [ ] `tests/backends/test_backends.py`: `sheets` のクラス属性共有を避け、インスタンス属性に変更する -- [ ] `tests/render/test_render_init.py` / `tests/utils.py` / `tests/models/test_models_export.py`: docstring/コメントの指摘を反映する -- [ ] `src/exstruct/render/__init__.py`: Protocol クラスに Google スタイル docstring を追加する +## MCPサーバー(MVP) + +- [x] 仕様反映: `docs/agents/FEATURE_SPEC.md` を更新 +- [ ] 依存追加: `pyproject.toml` に `exstruct[mcp]` の extras を追加 +- [ ] エントリポイント: `exstruct-mcp = exstruct.mcp.server:main` を定義 +- [ ] MCP 基盤: `src/exstruct/mcp/server.py` を追加(stdio サーバー起動) +- [ ] ツール定義: `src/exstruct/mcp/tools.py` に `exstruct.extract` を実装 +- [ ] パス制約: `src/exstruct/mcp/io.py` に allowlist / deny glob を実装 +- [ ] 抽出実行: `src/exstruct/mcp/extract_runner.py` に内部 API 優先の実行層を実装 +- [ ] 出力モデル: Pydantic で入出力モデルを定義(mypy strict / Ruff 遵守) +- [ ] ログ: stderr / ファイル出力の設定を追加 +- [ ] ドキュメント: README または docs に起動例(`exstruct-mcp --root ...`)を追記 + +## MCPサーバー(実用化) + +- [ ] `exstruct.read_json_chunk` を追加(大容量 JSON 対応) +- [ ] `exstruct.validate_input` を追加(事前検証) +- [ ] `--on-conflict` の出力衝突ポリシー実装 +- [ ] Windows/非Windows の読み取り差分を明文化 +- [ ] 最低限のテスト追加(パス制約 / 入出力モデル / 例外) From c4f876c07281114a072debd9bda2c60331129a6a Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 14:24:46 +0900 Subject: [PATCH 02/18] =?UTF-8?q?MCP=E3=82=B5=E3=83=BC=E3=83=90=E3=83=BC?= =?UTF-8?q?=E6=A9=9F=E8=83=BD=E3=82=92=E5=AE=9F=E8=A3=85=E3=81=97=E3=80=81?= =?UTF-8?q?=E4=BE=9D=E5=AD=98=E9=96=A2=E4=BF=82=E3=82=92=E8=BF=BD=E5=8A=A0?= =?UTF-8?q?=E3=80=82=E3=82=A8=E3=83=B3=E3=83=88=E3=83=AA=E3=83=9D=E3=82=A4?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=82=92=E5=AE=9A=E7=BE=A9=E3=81=97=E3=80=81?= =?UTF-8?q?=E3=83=84=E3=83=BC=E3=83=AB=E3=81=A8=E6=8A=BD=E5=87=BA=E6=A9=9F?= =?UTF-8?q?=E8=83=BD=E3=82=92=E5=AE=9F=E8=A3=85=E3=80=82=E3=83=91=E3=82=B9?= =?UTF-8?q?=E3=83=9D=E3=83=AA=E3=82=B7=E3=83=BC=E3=82=92=E5=B0=8E=E5=85=A5?= =?UTF-8?q?=E3=81=97=E3=80=81=E5=87=BA=E5=8A=9B=E3=83=A2=E3=83=87=E3=83=AB?= =?UTF-8?q?=E3=82=92=E5=AE=9A=E7=BE=A9=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/agents/TASKS.md | 14 +-- pyproject.toml | 2 + src/exstruct/mcp/__init__.py | 18 +++ src/exstruct/mcp/extract_runner.py | 194 +++++++++++++++++++++++++++++ src/exstruct/mcp/io.py | 61 +++++++++ src/exstruct/mcp/server.py | 182 +++++++++++++++++++++++++++ src/exstruct/mcp/tools.py | 72 +++++++++++ 7 files changed, 536 insertions(+), 7 deletions(-) create mode 100644 src/exstruct/mcp/__init__.py create mode 100644 src/exstruct/mcp/extract_runner.py create mode 100644 src/exstruct/mcp/io.py create mode 100644 src/exstruct/mcp/server.py create mode 100644 src/exstruct/mcp/tools.py diff --git a/docs/agents/TASKS.md b/docs/agents/TASKS.md index c6c95dc..f9cf818 100644 --- a/docs/agents/TASKS.md +++ b/docs/agents/TASKS.md @@ -5,13 +5,13 @@ ## MCPサーバー(MVP) - [x] 仕様反映: `docs/agents/FEATURE_SPEC.md` を更新 -- [ ] 依存追加: `pyproject.toml` に `exstruct[mcp]` の extras を追加 -- [ ] エントリポイント: `exstruct-mcp = exstruct.mcp.server:main` を定義 -- [ ] MCP 基盤: `src/exstruct/mcp/server.py` を追加(stdio サーバー起動) -- [ ] ツール定義: `src/exstruct/mcp/tools.py` に `exstruct.extract` を実装 -- [ ] パス制約: `src/exstruct/mcp/io.py` に allowlist / deny glob を実装 -- [ ] 抽出実行: `src/exstruct/mcp/extract_runner.py` に内部 API 優先の実行層を実装 -- [ ] 出力モデル: Pydantic で入出力モデルを定義(mypy strict / Ruff 遵守) +- [x] 依存追加: `pyproject.toml` に `exstruct[mcp]` の extras を追加 +- [x] エントリポイント: `exstruct-mcp = exstruct.mcp.server:main` を定義 +- [x] MCP 基盤: `src/exstruct/mcp/server.py` を追加(stdio サーバー起動) +- [x] ツール定義: `src/exstruct/mcp/tools.py` に `exstruct.extract` を実装 +- [x] パス制約: `src/exstruct/mcp/io.py` に allowlist / deny glob を実装 +- [x] 抽出実行: `src/exstruct/mcp/extract_runner.py` に内部 API 優先の実行層を実装 +- [x] 出力モデル: Pydantic で入出力モデルを定義(mypy strict / Ruff 遵守) - [ ] ログ: stderr / ファイル出力の設定を追加 - [ ] ドキュメント: README または docs に起動例(`exstruct-mcp --root ...`)を追記 diff --git a/pyproject.toml b/pyproject.toml index 1e6cca7..81040b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,9 +40,11 @@ dev = [ yaml = ["pyyaml>=6.0.3"] toon = ["python-toon>=0.1.3"] render = ["pypdfium2>=5.1.0", "Pillow>=12.0.0"] +mcp = ["mcp>=1.6.0,<2.0.0"] [project.scripts] exstruct = "exstruct.cli.main:main" +exstruct-mcp = "exstruct.mcp.server:main" [project.urls] Homepage = "https://harumiweb.github.io/exstruct/" diff --git a/src/exstruct/mcp/__init__.py b/src/exstruct/mcp/__init__.py new file mode 100644 index 0000000..954ae76 --- /dev/null +++ b/src/exstruct/mcp/__init__.py @@ -0,0 +1,18 @@ +"""MCP server integration for ExStruct.""" + +from __future__ import annotations + +from .extract_runner import ExtractRequest, ExtractResult, WorkbookMeta, run_extract +from .io import PathPolicy +from .tools import ExtractToolInput, ExtractToolOutput, run_extract_tool + +__all__ = [ + "ExtractRequest", + "ExtractResult", + "ExtractToolInput", + "ExtractToolOutput", + "PathPolicy", + "WorkbookMeta", + "run_extract", + "run_extract_tool", +] diff --git a/src/exstruct/mcp/extract_runner.py b/src/exstruct/mcp/extract_runner.py new file mode 100644 index 0000000..999fad3 --- /dev/null +++ b/src/exstruct/mcp/extract_runner.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Literal + +from pydantic import BaseModel, Field + +from exstruct import ExtractionMode, process_excel + +from .io import PathPolicy + + +class WorkbookMeta(BaseModel): + """Lightweight workbook metadata for MCP responses.""" + + sheet_names: list[str] = Field(default_factory=list, description="Sheet names.") + sheet_count: int = Field(default=0, description="Total number of sheets.") + + +class ExtractRequest(BaseModel): + """Input model for ExStruct MCP extraction.""" + + xlsx_path: Path + mode: ExtractionMode = "standard" + format: Literal["json", "yaml", "yml", "toon"] = "json" # noqa: A003 + out_dir: Path | None = None + out_name: str | None = None + options: dict[str, Any] = Field(default_factory=dict) + + +class ExtractResult(BaseModel): + """Output model for ExStruct MCP extraction.""" + + out_path: str + workbook_meta: WorkbookMeta | None = None + warnings: list[str] = Field(default_factory=list) + engine: Literal["internal_api", "cli_subprocess"] = "internal_api" + + +def run_extract( + request: ExtractRequest, *, policy: PathPolicy | None = None +) -> ExtractResult: + """Run an extraction with file output. + + Args: + request: Extraction request payload. + policy: Optional path policy for access control. + + Returns: + Extraction result with output path and metadata. + + Raises: + FileNotFoundError: If the input file does not exist. + ValueError: If paths violate the policy. + """ + resolved_input = _resolve_input_path(request.xlsx_path, policy=policy) + output_path = _resolve_output_path( + resolved_input, + request.format, + out_dir=request.out_dir, + out_name=request.out_name, + policy=policy, + ) + _ensure_output_dir(output_path) + + process_excel( + file_path=resolved_input, + output_path=output_path, + out_fmt=request.format, + mode=request.mode, + ) + + meta, warnings = _try_read_workbook_meta(resolved_input) + return ExtractResult( + out_path=str(output_path), + workbook_meta=meta, + warnings=warnings, + engine="internal_api", + ) + + +def _resolve_input_path(path: Path, *, policy: PathPolicy | None) -> Path: + """Resolve and validate the input path. + + Args: + path: Candidate input path. + policy: Optional path policy. + + Returns: + Resolved input path. + + Raises: + FileNotFoundError: If the input file does not exist. + ValueError: If the path violates the policy. + """ + resolved = policy.ensure_allowed(path) if policy else path.resolve() + if not resolved.exists(): + raise FileNotFoundError(f"Input file not found: {resolved}") + return resolved + + +def _resolve_output_path( + input_path: Path, + fmt: Literal["json", "yaml", "yml", "toon"], + *, + out_dir: Path | None, + out_name: str | None, + policy: PathPolicy | None, +) -> Path: + """Build and validate the output path. + + Args: + input_path: Resolved input path. + fmt: Output format. + out_dir: Optional output directory. + out_name: Optional output filename. + policy: Optional path policy. + + Returns: + Resolved output path. + + Raises: + ValueError: If the path violates the policy. + """ + target_dir = out_dir or input_path.parent + target_dir = policy.ensure_allowed(target_dir) if policy else target_dir.resolve() + suffix = _format_suffix(fmt) + name = _normalize_output_name(input_path, out_name, suffix) + output_path = (target_dir / name).resolve() + if policy is not None: + output_path = policy.ensure_allowed(output_path) + return output_path + + +def _normalize_output_name(input_path: Path, out_name: str | None, suffix: str) -> str: + """Normalize output filename with a suffix. + + Args: + input_path: Input file path. + out_name: Optional output filename override. + suffix: Format-specific suffix. + + Returns: + Output filename with suffix. + """ + if out_name: + candidate = Path(out_name) + return candidate.name if candidate.suffix else f"{candidate.name}{suffix}" + return f"{input_path.stem}{suffix}" + + +def _ensure_output_dir(path: Path) -> None: + """Ensure the output directory exists. + + Args: + path: Output file path. + """ + path.parent.mkdir(parents=True, exist_ok=True) + + +def _format_suffix(fmt: Literal["json", "yaml", "yml", "toon"]) -> str: + """Return suffix for output format. + + Args: + fmt: Output format. + + Returns: + File suffix for the format. + """ + return ".yml" if fmt == "yml" else f".{fmt}" + + +def _try_read_workbook_meta(path: Path) -> tuple[WorkbookMeta | None, list[str]]: + """Try reading lightweight workbook metadata. + + Args: + path: Excel workbook path. + + Returns: + Tuple of metadata (or None) and warnings. + """ + try: + from openpyxl import load_workbook + except ImportError as exc: + return None, [f"openpyxl is not available: {exc}"] + + try: + workbook = load_workbook(path, read_only=True, data_only=True) + except Exception as exc: # pragma: no cover - surface as warning + return None, [f"Failed to read workbook metadata: {exc}"] + + sheet_names = list(workbook.sheetnames) + workbook.close() + return WorkbookMeta(sheet_names=sheet_names, sheet_count=len(sheet_names)), [] diff --git a/src/exstruct/mcp/io.py b/src/exstruct/mcp/io.py new file mode 100644 index 0000000..45b0d38 --- /dev/null +++ b/src/exstruct/mcp/io.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from pathlib import Path + +from pydantic import BaseModel, Field + + +class PathPolicy(BaseModel): + """Filesystem access policy for MCP requests.""" + + root: Path = Field(..., description="Root directory for allowed access.") + deny_globs: list[str] = Field( + default_factory=list, description="Glob patterns to deny." + ) + + def normalize_root(self) -> Path: + """Return the resolved root path. + + Returns: + Resolved root directory path. + """ + return self.root.resolve() + + def ensure_allowed(self, path: Path) -> Path: + """Validate that a path is within root and not denied. + + Args: + path: Candidate path to validate. + + Returns: + Resolved path if allowed. + + Raises: + ValueError: If the path is outside the root or denied by glob. + """ + resolved = path.resolve() + root = self.normalize_root() + if resolved != root and root not in resolved.parents: + raise ValueError(f"Path is outside root: {resolved}") + if self._is_denied(resolved, root): + raise ValueError(f"Path is denied by policy: {resolved}") + return resolved + + def _is_denied(self, path: Path, root: Path) -> bool: + """Check if a path is denied by glob rules. + + Args: + path: Candidate path to check. + root: Resolved root path. + + Returns: + True if denied, False otherwise. + """ + try: + rel = path.relative_to(root) + except ValueError: + return True + for pattern in self.deny_globs: + if rel.match(pattern) or path.match(pattern): + return True + return False diff --git a/src/exstruct/mcp/server.py b/src/exstruct/mcp/server.py new file mode 100644 index 0000000..73c5d08 --- /dev/null +++ b/src/exstruct/mcp/server.py @@ -0,0 +1,182 @@ +from __future__ import annotations + +import argparse +import importlib +import logging +from pathlib import Path +from types import ModuleType +from typing import TYPE_CHECKING, Any, Literal + +from pydantic import BaseModel, Field + +from exstruct import ExtractionMode + +from .io import PathPolicy +from .tools import ExtractToolInput, ExtractToolOutput, run_extract_tool + +if TYPE_CHECKING: # pragma: no cover - typing only + from mcp.server.fastmcp import FastMCP + +logger = logging.getLogger(__name__) + + +class ServerConfig(BaseModel): + """Configuration for the MCP server process.""" + + root: Path = Field(..., description="Root directory for file access.") + deny_globs: list[str] = Field(default_factory=list, description="Denied glob list.") + log_level: str = Field(default="INFO", description="Logging level.") + log_file: Path | None = Field(default=None, description="Optional log file path.") + + +def main(argv: list[str] | None = None) -> int: + """Run the MCP server entrypoint. + + Args: + argv: Optional CLI arguments for testing. + + Returns: + Exit code (0 for success, 1 for failure). + """ + config = _parse_args(argv) + _configure_logging(config) + try: + run_server(config) + except Exception as exc: # pragma: no cover - surface runtime errors + logger.error("MCP server failed: %s", exc) + return 1 + return 0 + + +def run_server(config: ServerConfig) -> None: + """Start the MCP server. + + Args: + config: Server configuration. + """ + _import_mcp() + policy = PathPolicy(root=config.root, deny_globs=config.deny_globs) + logger.info("MCP root: %s", policy.normalize_root()) + app = _create_app(policy) + app.run() + + +def _parse_args(argv: list[str] | None) -> ServerConfig: + """Parse CLI arguments into server config. + + Args: + argv: Optional CLI argument list. + + Returns: + Parsed server configuration. + """ + parser = argparse.ArgumentParser(description="ExStruct MCP server (stdio).") + parser.add_argument("--root", type=Path, required=True, help="Workspace root.") + parser.add_argument( + "--deny-glob", + action="append", + default=[], + help="Glob pattern to deny (can be specified multiple times).", + ) + parser.add_argument( + "--log-level", + default="INFO", + help="Logging level (DEBUG, INFO, WARNING, ERROR).", + ) + parser.add_argument("--log-file", type=Path, help="Optional log file path.") + args = parser.parse_args(argv) + return ServerConfig( + root=args.root, + deny_globs=list(args.deny_glob), + log_level=args.log_level, + log_file=args.log_file, + ) + + +def _configure_logging(config: ServerConfig) -> None: + """Configure logging for the server process. + + Args: + config: Server configuration. + """ + handlers: list[logging.Handler] = [logging.StreamHandler()] + if config.log_file is not None: + handlers.append(logging.FileHandler(config.log_file)) + logging.basicConfig( + level=config.log_level.upper(), + handlers=handlers, + format="%(asctime)s %(levelname)s %(name)s: %(message)s", + ) + + +def _import_mcp() -> ModuleType: + """Import the MCP SDK module or raise a helpful error. + + Returns: + Imported MCP module. + """ + try: + return importlib.import_module("mcp") + except ModuleNotFoundError as exc: + raise RuntimeError( + "MCP SDK is not installed. Install with `pip install exstruct[mcp]`." + ) from exc + + +def _create_app(policy: PathPolicy) -> FastMCP: + """Create the MCP FastMCP application. + + Args: + policy: Path policy for filesystem access. + + Returns: + FastMCP application instance. + """ + from mcp.server.fastmcp import FastMCP + + app = FastMCP("ExStruct MCP", json_response=True) + _register_tools(app, policy) + return app + + +def _register_tools(app: FastMCP, policy: PathPolicy) -> None: + """Register MCP tools for the server. + + Args: + app: FastMCP application instance. + policy: Path policy for filesystem access. + """ + + def _extract_tool( + xlsx_path: str, + mode: ExtractionMode = "standard", + format: Literal["json", "yaml", "yml", "toon"] = "json", # noqa: A002 + out_dir: str | None = None, + out_name: str | None = None, + options: dict[str, Any] | None = None, + ) -> ExtractToolOutput: + """Handle the ExStruct extraction tool call. + + Args: + xlsx_path: Path to the Excel workbook. + mode: Extraction mode. + format: Output format. + out_dir: Optional output directory. + out_name: Optional output filename. + options: Additional options (reserved for future use). + + Returns: + Extraction result payload. + """ + payload = ExtractToolInput( + xlsx_path=xlsx_path, + mode=mode, + format=format, + out_dir=out_dir, + out_name=out_name, + options=options or {}, + ) + return run_extract_tool(payload, policy=policy) + + tool = app.tool(name="exstruct.extract") + tool(_extract_tool) diff --git a/src/exstruct/mcp/tools.py b/src/exstruct/mcp/tools.py new file mode 100644 index 0000000..d244e4d --- /dev/null +++ b/src/exstruct/mcp/tools.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Literal + +from pydantic import BaseModel, Field + +from exstruct import ExtractionMode + +from .extract_runner import ExtractRequest, ExtractResult, WorkbookMeta, run_extract +from .io import PathPolicy + + +class ExtractToolInput(BaseModel): + """MCP tool input for ExStruct extraction.""" + + xlsx_path: str + mode: ExtractionMode = "standard" + format: Literal["json", "yaml", "yml", "toon"] = "json" # noqa: A003 + out_dir: str | None = None + out_name: str | None = None + options: dict[str, Any] = Field(default_factory=dict) + + +class ExtractToolOutput(BaseModel): + """MCP tool output for ExStruct extraction.""" + + out_path: str + workbook_meta: WorkbookMeta | None = None + warnings: list[str] = Field(default_factory=list) + engine: Literal["internal_api", "cli_subprocess"] = "internal_api" + + +def run_extract_tool( + payload: ExtractToolInput, *, policy: PathPolicy | None = None +) -> ExtractToolOutput: + """Run the extraction tool handler. + + Args: + payload: Tool input payload. + policy: Optional path policy for access control. + + Returns: + Tool output payload. + """ + request = ExtractRequest( + xlsx_path=Path(payload.xlsx_path), + mode=payload.mode, + format=payload.format, + out_dir=Path(payload.out_dir) if payload.out_dir else None, + out_name=payload.out_name, + options=payload.options, + ) + result = run_extract(request, policy=policy) + return _to_tool_output(result) + + +def _to_tool_output(result: ExtractResult) -> ExtractToolOutput: + """Convert internal result to tool output model. + + Args: + result: Internal extraction result. + + Returns: + Tool output payload. + """ + return ExtractToolOutput( + out_path=result.out_path, + workbook_meta=result.workbook_meta, + warnings=result.warnings, + engine=result.engine, + ) From ade293fe8dd6e8c24a5051faa1dde05149535c75 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 14:34:34 +0900 Subject: [PATCH 03/18] =?UTF-8?q?MCP=E3=82=B5=E3=83=BC=E3=83=90=E3=83=BC?= =?UTF-8?q?=E6=A9=9F=E8=83=BD=E3=82=92=E6=8B=A1=E5=BC=B5=E3=81=97=E3=80=81?= =?UTF-8?q?=E5=A4=A7=E5=AE=B9=E9=87=8FJSON=E3=81=AE=E8=AA=AD=E3=81=BF?= =?UTF-8?q?=E8=BE=BC=E3=81=BF=E3=81=A8=E5=85=A5=E5=8A=9B=E6=A4=9C=E8=A8=BC?= =?UTF-8?q?=E3=83=84=E3=83=BC=E3=83=AB=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/agents/TASKS.md | 4 +- src/exstruct/mcp/__init__.py | 36 ++- src/exstruct/mcp/chunk_reader.py | 411 +++++++++++++++++++++++++++++ src/exstruct/mcp/server.py | 72 ++++- src/exstruct/mcp/tools.py | 119 +++++++++ src/exstruct/mcp/validate_input.py | 76 ++++++ 6 files changed, 714 insertions(+), 4 deletions(-) create mode 100644 src/exstruct/mcp/chunk_reader.py create mode 100644 src/exstruct/mcp/validate_input.py diff --git a/docs/agents/TASKS.md b/docs/agents/TASKS.md index f9cf818..90a1e7e 100644 --- a/docs/agents/TASKS.md +++ b/docs/agents/TASKS.md @@ -17,8 +17,8 @@ ## MCPサーバー(実用化) -- [ ] `exstruct.read_json_chunk` を追加(大容量 JSON 対応) -- [ ] `exstruct.validate_input` を追加(事前検証) +- [x] `exstruct.read_json_chunk` を追加(大容量 JSON 対応) +- [x] `exstruct.validate_input` を追加(事前検証) - [ ] `--on-conflict` の出力衝突ポリシー実装 - [ ] Windows/非Windows の読み取り差分を明文化 - [ ] 最低限のテスト追加(パス制約 / 入出力モデル / 例外) diff --git a/src/exstruct/mcp/__init__.py b/src/exstruct/mcp/__init__.py index 954ae76..5adc2aa 100644 --- a/src/exstruct/mcp/__init__.py +++ b/src/exstruct/mcp/__init__.py @@ -2,9 +2,30 @@ from __future__ import annotations +from .chunk_reader import ( + ReadJsonChunkFilter, + ReadJsonChunkRequest, + ReadJsonChunkResult, + read_json_chunk, +) from .extract_runner import ExtractRequest, ExtractResult, WorkbookMeta, run_extract from .io import PathPolicy -from .tools import ExtractToolInput, ExtractToolOutput, run_extract_tool +from .tools import ( + ExtractToolInput, + ExtractToolOutput, + ReadJsonChunkToolInput, + ReadJsonChunkToolOutput, + ValidateInputToolInput, + ValidateInputToolOutput, + run_extract_tool, + run_read_json_chunk_tool, + run_validate_input_tool, +) +from .validate_input import ( + ValidateInputRequest, + ValidateInputResult, + validate_input, +) __all__ = [ "ExtractRequest", @@ -12,7 +33,20 @@ "ExtractToolInput", "ExtractToolOutput", "PathPolicy", + "ReadJsonChunkFilter", + "ReadJsonChunkRequest", + "ReadJsonChunkResult", + "ReadJsonChunkToolInput", + "ReadJsonChunkToolOutput", + "ValidateInputRequest", + "ValidateInputResult", + "ValidateInputToolInput", + "ValidateInputToolOutput", "WorkbookMeta", + "read_json_chunk", + "validate_input", "run_extract", "run_extract_tool", + "run_read_json_chunk_tool", + "run_validate_input_tool", ] diff --git a/src/exstruct/mcp/chunk_reader.py b/src/exstruct/mcp/chunk_reader.py new file mode 100644 index 0000000..06bcae0 --- /dev/null +++ b/src/exstruct/mcp/chunk_reader.py @@ -0,0 +1,411 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, cast + +from pydantic import BaseModel, Field + +from .io import PathPolicy + + +class ReadJsonChunkFilter(BaseModel): + """Filter options for JSON chunk extraction.""" + + rows: tuple[int, int] | None = Field( + default=None, description="Row range (1-based, inclusive)." + ) + cols: tuple[int, int] | None = Field( + default=None, description="Column range (1-based, inclusive)." + ) + + +class ReadJsonChunkRequest(BaseModel): + """Input model for JSON chunk extraction.""" + + out_path: Path + sheet: str | None = None + max_bytes: int = Field(default=50_000, ge=1) + filter: ReadJsonChunkFilter | None = Field(default=None) # noqa: A003 + cursor: str | None = None + + +class ReadJsonChunkResult(BaseModel): + """Output model for JSON chunk extraction.""" + + chunk: str + next_cursor: str | None = None + warnings: list[str] = Field(default_factory=list) + + +def read_json_chunk( + request: ReadJsonChunkRequest, *, policy: PathPolicy | None = None +) -> ReadJsonChunkResult: + """Read a JSON chunk from an ExStruct output file. + + Args: + request: Chunk request payload. + policy: Optional path policy for access control. + + Returns: + Chunk result with JSON string payload and cursor. + + Raises: + FileNotFoundError: If the output file does not exist. + ValueError: If the request is invalid or violates policy. + """ + resolved = _resolve_output_path(request.out_path, policy=policy) + raw_text = _read_text(resolved) + + if request.sheet is None and request.filter is None and request.cursor is None: + return _chunk_raw_text(raw_text, request.max_bytes) + + data = _parse_json(raw_text) + sheet_name, sheet_data = _select_sheet(data, request.sheet) + rows = _extract_rows(sheet_data) + filtered_rows, warnings = _apply_filters(rows, request.filter) + chunk, next_cursor, more_warnings = _build_sheet_chunk( + data, + sheet_name, + sheet_data, + filtered_rows, + request.cursor, + request.max_bytes, + ) + warnings.extend(more_warnings) + return ReadJsonChunkResult( + chunk=chunk, + next_cursor=next_cursor, + warnings=warnings, + ) + + +def _resolve_output_path(path: Path, *, policy: PathPolicy | None) -> Path: + """Resolve and validate the output path. + + Args: + path: Output file path. + policy: Optional path policy. + + Returns: + Resolved path. + + Raises: + FileNotFoundError: If the output file does not exist. + ValueError: If the path violates the policy. + """ + resolved = policy.ensure_allowed(path) if policy else path.resolve() + if not resolved.exists(): + raise FileNotFoundError(f"Output file not found: {resolved}") + return resolved + + +def _read_text(path: Path) -> str: + """Read UTF-8 text from disk. + + Args: + path: File path. + + Returns: + File contents as text. + """ + return path.read_text(encoding="utf-8") + + +def _parse_json(text: str) -> dict[str, Any]: + """Parse JSON into a dictionary. + + Args: + text: JSON string. + + Returns: + Parsed JSON object. + """ + parsed = json.loads(text) + if not isinstance(parsed, dict): + raise ValueError("Invalid workbook JSON: expected object at root.") + return cast(dict[str, Any], parsed) + + +def _chunk_raw_text(text: str, max_bytes: int) -> ReadJsonChunkResult: + """Return a raw JSON chunk without parsing. + + Args: + text: JSON text. + max_bytes: Maximum bytes to return. + + Returns: + Chunk result with optional cursor. + + Raises: + ValueError: If the text exceeds max_bytes. + """ + payload_bytes = text.encode("utf-8") + if len(payload_bytes) <= max_bytes: + return ReadJsonChunkResult(chunk=text, next_cursor=None, warnings=[]) + raise ValueError("Output is too large. Specify sheet or filter to chunk.") + + +def _select_sheet( + data: dict[str, Any], sheet: str | None +) -> tuple[str, dict[str, Any]]: + """Select a sheet from the workbook payload. + + Args: + data: Parsed workbook JSON. + sheet: Optional sheet name. + + Returns: + Sheet name and sheet data. + + Raises: + ValueError: If sheet selection is ambiguous or missing. + """ + sheets = data.get("sheets", {}) + if not isinstance(sheets, dict): + raise ValueError("Invalid workbook JSON: sheets is not a mapping.") + if sheet is not None: + if sheet not in sheets: + raise ValueError(f"Sheet not found: {sheet}") + return sheet, sheets[sheet] + if len(sheets) == 1: + only_name = next(iter(sheets.keys())) + return only_name, sheets[only_name] + raise ValueError("Sheet is required when multiple sheets exist.") + + +def _extract_rows(sheet_data: dict[str, Any]) -> list[dict[str, Any]]: + """Extract rows from sheet data. + + Args: + sheet_data: Sheet JSON data. + + Returns: + List of row dictionaries. + """ + rows = sheet_data.get("rows", []) + if not isinstance(rows, list): + return [] + return rows + + +def _apply_filters( + rows: list[dict[str, Any]], filter_data: ReadJsonChunkFilter | None +) -> tuple[list[dict[str, Any]], list[str]]: + """Apply row/column filters to rows. + + Args: + rows: Row dictionaries. + filter_data: Optional filter data. + + Returns: + Filtered rows and warnings. + """ + if filter_data is None: + return rows, [] + warnings: list[str] = [] + filtered_rows = rows + if filter_data.rows is not None: + filtered_rows = _filter_rows(filtered_rows, filter_data.rows, warnings) + if filter_data.cols is not None: + filtered_rows = _filter_cols(filtered_rows, filter_data.cols, warnings) + return filtered_rows, warnings + + +def _filter_rows( + rows: list[dict[str, Any]], row_range: tuple[int, int], warnings: list[str] +) -> list[dict[str, Any]]: + """Filter rows by row range. + + Args: + rows: Row dictionaries. + row_range: Row range tuple (1-based, inclusive). + warnings: Warning collector. + + Returns: + Filtered rows. + """ + start, end = row_range + if start > end: + warnings.append("Row filter ignored because start > end.") + return rows + return [row for row in rows if start <= _row_index(row) <= end] + + +def _filter_cols( + rows: list[dict[str, Any]], col_range: tuple[int, int], warnings: list[str] +) -> list[dict[str, Any]]: + """Filter columns within each row by column range. + + Args: + rows: Row dictionaries. + col_range: Column range tuple (1-based, inclusive). + warnings: Warning collector. + + Returns: + Rows with filtered column maps. + """ + start, end = col_range + if start > end: + warnings.append("Column filter ignored because start > end.") + return rows + start_index = start - 1 + end_index = end - 1 + filtered: list[dict[str, Any]] = [] + for row in rows: + cols = row.get("c") + if not isinstance(cols, dict): + filtered.append(row) + continue + new_cols = { + key: value + for key, value in cols.items() + if _col_in_range(key, start_index, end_index) + } + new_row = dict(row) + new_row["c"] = new_cols + filtered.append(new_row) + return filtered + + +def _col_in_range(key: str, start: int, end: int) -> bool: + """Check if a column key is within range. + + Args: + key: Column key string. + start: Start index (0-based). + end: End index (0-based). + + Returns: + True if the column index is within range. + """ + try: + index = int(key) + except (TypeError, ValueError): + return False + return start <= index <= end + + +def _row_index(row: dict[str, Any]) -> int: + """Extract row index from a row dictionary. + + Args: + row: Row dictionary. + + Returns: + Row index or -1 if unavailable. + """ + value = row.get("r") + if isinstance(value, int): + return value + return -1 + + +def _build_sheet_chunk( + data: dict[str, Any], + sheet_name: str, + sheet_data: dict[str, Any], + rows: list[dict[str, Any]], + cursor: str | None, + max_bytes: int, +) -> tuple[str, str | None, list[str]]: + """Build a JSON chunk for a sheet. + + Args: + data: Workbook JSON data. + sheet_name: Target sheet name. + sheet_data: Sheet JSON data. + rows: Filtered rows. + cursor: Optional cursor (row index in filtered list). + max_bytes: Maximum payload size in bytes. + + Returns: + Tuple of JSON chunk, next cursor, and warnings. + """ + warnings: list[str] = [] + start_index = _parse_cursor(cursor) + if start_index > len(rows): + raise ValueError("Cursor is beyond the filtered row count.") + remaining_rows = rows[start_index:] + sheet_payload = dict(sheet_data) + sheet_payload["rows"] = [] + payload = { + "book_name": data.get("book_name"), + "sheet_name": sheet_name, + "sheet": sheet_payload, + } + base_json = _serialize_json(payload) + if _json_size(base_json) > max_bytes: + warnings.append("Base payload exceeds max_bytes; returning without rows.") + return base_json, None, warnings + + selected: list[dict[str, Any]] = [] + next_cursor = None + for offset, row in enumerate(remaining_rows): + selected.append(row) + sheet_payload["rows"] = selected + candidate = _serialize_json(payload) + if _json_size(candidate) > max_bytes: + if len(selected) == 1: + warnings.append("max_bytes too small; returning a single row chunk.") + next_cursor = ( + str(start_index + 1) if (start_index + 1) < len(rows) else None + ) + return candidate, next_cursor, warnings + selected.pop() + sheet_payload["rows"] = selected + next_cursor_index = start_index + offset + next_cursor = ( + str(next_cursor_index) if next_cursor_index < len(rows) else None + ) + return _serialize_json(payload), next_cursor, warnings + + sheet_payload["rows"] = selected + return _serialize_json(payload), None, warnings + + +def _parse_cursor(cursor: str | None) -> int: + """Parse cursor into a start index. + + Args: + cursor: Cursor string. + + Returns: + Parsed start index. + + Raises: + ValueError: If cursor is invalid. + """ + if cursor is None: + return 0 + try: + value = int(cursor) + except (TypeError, ValueError) as exc: + raise ValueError("Cursor must be an integer string.") from exc + if value < 0: + raise ValueError("Cursor must be non-negative.") + return value + + +def _serialize_json(payload: dict[str, Any]) -> str: + """Serialize a payload to JSON string. + + Args: + payload: JSON-serializable payload. + + Returns: + Serialized JSON text. + """ + return json.dumps(payload, ensure_ascii=False, separators=(",", ":")) + + +def _json_size(payload: str) -> int: + """Return payload size in bytes. + + Args: + payload: JSON text. + + Returns: + Payload size in bytes. + """ + return len(payload.encode("utf-8")) diff --git a/src/exstruct/mcp/server.py b/src/exstruct/mcp/server.py index 73c5d08..2349d0e 100644 --- a/src/exstruct/mcp/server.py +++ b/src/exstruct/mcp/server.py @@ -12,7 +12,17 @@ from exstruct import ExtractionMode from .io import PathPolicy -from .tools import ExtractToolInput, ExtractToolOutput, run_extract_tool +from .tools import ( + ExtractToolInput, + ExtractToolOutput, + ReadJsonChunkToolInput, + ReadJsonChunkToolOutput, + ValidateInputToolInput, + ValidateInputToolOutput, + run_extract_tool, + run_read_json_chunk_tool, + run_validate_input_tool, +) if TYPE_CHECKING: # pragma: no cover - typing only from mcp.server.fastmcp import FastMCP @@ -180,3 +190,63 @@ def _extract_tool( tool = app.tool(name="exstruct.extract") tool(_extract_tool) + + def _read_json_chunk_tool( + out_path: str, + sheet: str | None = None, + max_bytes: int = 50_000, + filter: dict[str, Any] | None = None, # noqa: A002 + cursor: str | None = None, + ) -> ReadJsonChunkToolOutput: + """Handle JSON chunk tool call. + + Args: + out_path: Path to the JSON output file. + sheet: Optional sheet name. + max_bytes: Maximum chunk size in bytes. + filter: Optional filter payload. + cursor: Optional cursor for pagination. + + Returns: + JSON chunk result payload. + """ + payload = ReadJsonChunkToolInput( + out_path=out_path, + sheet=sheet, + max_bytes=max_bytes, + filter=_coerce_filter(filter), + cursor=cursor, + ) + return run_read_json_chunk_tool(payload, policy=policy) + + chunk_tool = app.tool(name="exstruct.read_json_chunk") + chunk_tool(_read_json_chunk_tool) + + def _validate_input_tool(xlsx_path: str) -> ValidateInputToolOutput: + """Handle input validation tool call. + + Args: + xlsx_path: Path to the Excel workbook. + + Returns: + Validation result payload. + """ + payload = ValidateInputToolInput(xlsx_path=xlsx_path) + return run_validate_input_tool(payload, policy=policy) + + validate_tool = app.tool(name="exstruct.validate_input") + validate_tool(_validate_input_tool) + + +def _coerce_filter(filter_data: dict[str, Any] | None) -> dict[str, Any] | None: + """Normalize filter input for chunk reading. + + Args: + filter_data: Filter payload from MCP tool call. + + Returns: + Normalized filter dict or None. + """ + if not filter_data: + return None + return dict(filter_data) diff --git a/src/exstruct/mcp/tools.py b/src/exstruct/mcp/tools.py index d244e4d..d4c692f 100644 --- a/src/exstruct/mcp/tools.py +++ b/src/exstruct/mcp/tools.py @@ -7,8 +7,19 @@ from exstruct import ExtractionMode +from .chunk_reader import ( + ReadJsonChunkFilter, + ReadJsonChunkRequest, + ReadJsonChunkResult, + read_json_chunk, +) from .extract_runner import ExtractRequest, ExtractResult, WorkbookMeta, run_extract from .io import PathPolicy +from .validate_input import ( + ValidateInputRequest, + ValidateInputResult, + validate_input, +) class ExtractToolInput(BaseModel): @@ -31,6 +42,38 @@ class ExtractToolOutput(BaseModel): engine: Literal["internal_api", "cli_subprocess"] = "internal_api" +class ReadJsonChunkToolInput(BaseModel): + """MCP tool input for JSON chunk reading.""" + + out_path: str + sheet: str | None = None + max_bytes: int = Field(default=50_000, ge=1) + filter: ReadJsonChunkFilter | None = Field(default=None) # noqa: A003 + cursor: str | None = None + + +class ReadJsonChunkToolOutput(BaseModel): + """MCP tool output for JSON chunk reading.""" + + chunk: str + next_cursor: str | None = None + warnings: list[str] = Field(default_factory=list) + + +class ValidateInputToolInput(BaseModel): + """MCP tool input for validating Excel files.""" + + xlsx_path: str + + +class ValidateInputToolOutput(BaseModel): + """MCP tool output for validating Excel files.""" + + is_readable: bool + warnings: list[str] = Field(default_factory=list) + errors: list[str] = Field(default_factory=list) + + def run_extract_tool( payload: ExtractToolInput, *, policy: PathPolicy | None = None ) -> ExtractToolOutput: @@ -55,6 +98,46 @@ def run_extract_tool( return _to_tool_output(result) +def run_read_json_chunk_tool( + payload: ReadJsonChunkToolInput, *, policy: PathPolicy | None = None +) -> ReadJsonChunkToolOutput: + """Run the JSON chunk tool handler. + + Args: + payload: Tool input payload. + policy: Optional path policy for access control. + + Returns: + Tool output payload. + """ + request = ReadJsonChunkRequest( + out_path=Path(payload.out_path), + sheet=payload.sheet, + max_bytes=payload.max_bytes, + filter=payload.filter, + cursor=payload.cursor, + ) + result = read_json_chunk(request, policy=policy) + return _to_read_json_chunk_output(result) + + +def run_validate_input_tool( + payload: ValidateInputToolInput, *, policy: PathPolicy | None = None +) -> ValidateInputToolOutput: + """Run the validate input tool handler. + + Args: + payload: Tool input payload. + policy: Optional path policy for access control. + + Returns: + Tool output payload. + """ + request = ValidateInputRequest(xlsx_path=Path(payload.xlsx_path)) + result = validate_input(request, policy=policy) + return _to_validate_input_output(result) + + def _to_tool_output(result: ExtractResult) -> ExtractToolOutput: """Convert internal result to tool output model. @@ -70,3 +153,39 @@ def _to_tool_output(result: ExtractResult) -> ExtractToolOutput: warnings=result.warnings, engine=result.engine, ) + + +def _to_read_json_chunk_output( + result: ReadJsonChunkResult, +) -> ReadJsonChunkToolOutput: + """Convert internal result to JSON chunk tool output. + + Args: + result: Internal chunk result. + + Returns: + Tool output payload. + """ + return ReadJsonChunkToolOutput( + chunk=result.chunk, + next_cursor=result.next_cursor, + warnings=result.warnings, + ) + + +def _to_validate_input_output( + result: ValidateInputResult, +) -> ValidateInputToolOutput: + """Convert internal result to validate input tool output. + + Args: + result: Internal validation result. + + Returns: + Tool output payload. + """ + return ValidateInputToolOutput( + is_readable=result.is_readable, + warnings=result.warnings, + errors=result.errors, + ) diff --git a/src/exstruct/mcp/validate_input.py b/src/exstruct/mcp/validate_input.py new file mode 100644 index 0000000..f9568cf --- /dev/null +++ b/src/exstruct/mcp/validate_input.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +from pathlib import Path + +from pydantic import BaseModel, Field + +from exstruct.cli.availability import get_com_availability + +from .io import PathPolicy + +_ALLOWED_EXTENSIONS = {".xlsx", ".xlsm", ".xls"} + + +class ValidateInputRequest(BaseModel): + """Input model for validating an Excel file.""" + + xlsx_path: Path + + +class ValidateInputResult(BaseModel): + """Output model for input validation.""" + + is_readable: bool + warnings: list[str] = Field(default_factory=list) + errors: list[str] = Field(default_factory=list) + + +def validate_input( + request: ValidateInputRequest, *, policy: PathPolicy | None = None +) -> ValidateInputResult: + """Validate input Excel file for MCP usage. + + Args: + request: Validation request payload. + policy: Optional path policy for access control. + + Returns: + Validation result with errors and warnings. + """ + warnings: list[str] = [] + errors: list[str] = [] + + try: + resolved = ( + policy.ensure_allowed(request.xlsx_path) + if policy + else request.xlsx_path.resolve() + ) + except ValueError as exc: + errors.append(str(exc)) + return ValidateInputResult(is_readable=False, warnings=warnings, errors=errors) + + if not resolved.exists(): + errors.append(f"File not found: {resolved}") + return ValidateInputResult(is_readable=False, warnings=warnings, errors=errors) + + if not resolved.is_file(): + errors.append(f"Path is not a file: {resolved}") + return ValidateInputResult(is_readable=False, warnings=warnings, errors=errors) + + if resolved.suffix.lower() not in _ALLOWED_EXTENSIONS: + errors.append(f"Unsupported file extension: {resolved.suffix}") + return ValidateInputResult(is_readable=False, warnings=warnings, errors=errors) + + try: + with resolved.open("rb") as handle: + handle.read(1) + except OSError as exc: + errors.append(f"Failed to read file: {exc}") + return ValidateInputResult(is_readable=False, warnings=warnings, errors=errors) + + com = get_com_availability() + if not com.available and com.reason: + warnings.append(f"COM unavailable: {com.reason}") + + return ValidateInputResult(is_readable=True, warnings=warnings, errors=errors) From 8ffc71a3c271793f22f1866184c0199e67967b7a Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 16:05:03 +0900 Subject: [PATCH 04/18] Refactor code structure for improved readability and maintainability --- .gitignore | 3 +- .../core/backends/openpyxl_backend.py | 12 +- src/exstruct/core/cells.py | 119 ++++- src/exstruct/core/pipeline.py | 55 +- src/exstruct/io/__init__.py | 21 +- src/exstruct/mcp/extract_runner.py | 8 + src/exstruct/mcp/server.py | 32 +- uv.lock | 474 +++++++++++++++++- 8 files changed, 702 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index db1a07e..5ebf3da 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,5 @@ ruff_report.txt ruff-error.txt mypy_report.txt coverage.xml -htmlcov/ \ No newline at end of file +htmlcov/ +.tmp_mcp_test/ \ No newline at end of file diff --git a/src/exstruct/core/backends/openpyxl_backend.py b/src/exstruct/core/backends/openpyxl_backend.py index bef9428..58db8bb 100644 --- a/src/exstruct/core/backends/openpyxl_backend.py +++ b/src/exstruct/core/backends/openpyxl_backend.py @@ -5,6 +5,7 @@ from dataclasses import dataclass import logging from pathlib import Path +from typing import Literal from ...models import PrintArea from ..cells import ( @@ -116,18 +117,25 @@ def extract_formulas_map(self) -> WorkbookFormulasMap | None: ) return None - def detect_tables(self, sheet_name: str) -> list[str]: + def detect_tables( + self, + sheet_name: str, + *, + mode: Literal["light", "standard", "verbose"] = "standard", + ) -> list[str]: """ Detects table candidate ranges within the specified worksheet. Parameters: sheet_name (str): Name of the worksheet to analyze for table candidates. + mode (Literal["light", "standard", "verbose"]): Extraction mode, used to + adjust scan limits in openpyxl-based detection. Returns: list[str]: Detected table candidate ranges as A1-style range strings; empty list if none are found or detection fails. """ try: - return detect_tables_openpyxl(self.file_path, sheet_name) + return detect_tables_openpyxl(self.file_path, sheet_name, mode=mode) except Exception: return [] diff --git a/src/exstruct/core/cells.py b/src/exstruct/core/cells.py index 1ad915a..b473667 100644 --- a/src/exstruct/core/cells.py +++ b/src/exstruct/core/cells.py @@ -5,8 +5,10 @@ from dataclasses import dataclass from decimal import Decimal, InvalidOperation import logging +import math from pathlib import Path import re +from typing import Literal import numpy as np from openpyxl.styles.colors import Color @@ -39,6 +41,8 @@ _DEFAULT_BACKGROUND_HEX = "FFFFFF" _XL_COLOR_NONE = -4142 +ExtractionMode = Literal["light", "standard", "verbose"] + # Use dataclasses for lightweight models @dataclass(frozen=True) @@ -106,6 +110,43 @@ class MergedCellRange: v: str +@dataclass(frozen=True) +class TableScanLimits: + """Limits for openpyxl border scanning during table detection.""" + + max_rows: int + max_cols: int + empty_row_run: int + empty_col_run: int + + def scaled(self, factor: float) -> TableScanLimits: + """Return a scaled copy of the limits.""" + return TableScanLimits( + max_rows=int(math.ceil(self.max_rows * factor)), + max_cols=int(math.ceil(self.max_cols * factor)), + empty_row_run=int(math.ceil(self.empty_row_run * factor)), + empty_col_run=int(math.ceil(self.empty_col_run * factor)), + ) + + +_DEFAULT_TABLE_SCAN_LIMITS = TableScanLimits( + max_rows=5000, + max_cols=200, + empty_row_run=200, + empty_col_run=80, +) + + +def _resolve_table_scan_limits( + mode: ExtractionMode, scan_limits: TableScanLimits | None +) -> TableScanLimits: + if scan_limits is not None: + return scan_limits + if mode in {"standard", "verbose"}: + return _DEFAULT_TABLE_SCAN_LIMITS.scaled(1.5) + return _DEFAULT_TABLE_SCAN_LIMITS + + def extract_sheet_colors_map( file_path: Path, *, include_default_background: bool, ignore_colors: set[str] | None ) -> WorkbookColorsMap: @@ -863,7 +904,10 @@ def should_trim_row(i: int) -> bool: def load_border_maps_xlsx( # noqa: C901 - xlsx_path: Path, sheet_name: str + xlsx_path: Path, + sheet_name: str, + *, + scan_limits: TableScanLimits | None = None, ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, int]: with openpyxl_workbook(xlsx_path, data_only=True, read_only=False) as wb: if sheet_name not in wb.sheetnames: @@ -882,12 +926,17 @@ def load_border_maps_xlsx( # noqa: C901 ws.max_row or 1, ) - shape = (max_row + 1, max_col + 1) + resolved_limits = scan_limits or _DEFAULT_TABLE_SCAN_LIMITS + scan_max_row = min(max_row, resolved_limits.max_rows) + scan_max_col = min(max_col, resolved_limits.max_cols) + + shape = (scan_max_row + 1, scan_max_col + 1) has_border = np.zeros(shape, dtype=bool) top_edge = np.zeros(shape, dtype=bool) bottom_edge = np.zeros(shape, dtype=bool) left_edge = np.zeros(shape, dtype=bool) right_edge = np.zeros(shape, dtype=bool) + col_has_border = np.zeros(shape[1], dtype=bool) def edge_has_style(edge: object) -> bool: if edge is None: @@ -895,8 +944,12 @@ def edge_has_style(edge: object) -> bool: style = getattr(edge, "style", None) return style is not None and style != "none" - for r in range(min_row, max_row + 1): - for c in range(min_col, max_col + 1): + consecutive_empty_rows = 0 + current_max_col = scan_max_col + + for r in range(min_row, scan_max_row + 1): + row_has_border = False + for c in range(min_col, current_max_col + 1): cell = ws.cell(row=r, column=c) b = getattr(cell, "border", None) if b is None: @@ -908,6 +961,8 @@ def edge_has_style(edge: object) -> bool: rgt = edge_has_style(b.right) if t or btm or left_border or rgt: + row_has_border = True + col_has_border[c] = True has_border[r, c] = True if t: top_edge[r, c] = True @@ -918,7 +973,43 @@ def edge_has_style(edge: object) -> bool: if rgt: right_edge[r, c] = True - return has_border, top_edge, bottom_edge, left_edge, right_edge, max_row, max_col + if row_has_border: + consecutive_empty_rows = 0 + else: + consecutive_empty_rows += 1 + if consecutive_empty_rows >= resolved_limits.empty_row_run: + logger.info( + "Openpyxl border scan early-exit after %s empty rows (sheet=%s).", + resolved_limits.empty_row_run, + sheet_name, + ) + break + + trailing_empty_cols = 0 + for c in range(current_max_col, min_col - 1, -1): + if col_has_border[c]: + break + trailing_empty_cols += 1 + if trailing_empty_cols >= resolved_limits.empty_col_run: + new_max_col = max(min_col, current_max_col - trailing_empty_cols) + if new_max_col < current_max_col: + current_max_col = new_max_col + logger.info( + "Openpyxl border scan early-exit after %s empty columns (sheet=%s).", + resolved_limits.empty_col_run, + sheet_name, + ) + break + + return ( + has_border, + top_edge, + bottom_edge, + left_edge, + right_edge, + scan_max_row, + scan_max_col, + ) def _detect_border_clusters_numpy( @@ -1619,14 +1710,24 @@ def detect_tables_xlwings(sheet: xw.Sheet) -> list[str]: return tables -def detect_tables_openpyxl(xlsx_path: Path, sheet_name: str) -> list[str]: +def detect_tables_openpyxl( + xlsx_path: Path, + sheet_name: str, + *, + mode: ExtractionMode = "standard", + scan_limits: TableScanLimits | None = None, +) -> list[str]: """Detect table-like ranges via openpyxl tables and border clusters.""" with openpyxl_workbook(xlsx_path, data_only=True, read_only=False) as wb: ws = wb[sheet_name] tables = _extract_openpyxl_table_refs(ws) has_border, top_edge, bottom_edge, left_edge, right_edge, max_row, max_col = ( - load_border_maps_xlsx(xlsx_path, sheet_name) + load_border_maps_xlsx( + xlsx_path, + sheet_name, + scan_limits=_resolve_table_scan_limits(mode, scan_limits), + ) ) rects = _detect_border_rectangles(has_border, min_size=4) merged_rects = _merge_rectangles(rects) @@ -1660,7 +1761,7 @@ def detect_tables_openpyxl(xlsx_path: Path, sheet_name: str) -> list[str]: return tables -def detect_tables(sheet: xw.Sheet) -> list[str]: +def detect_tables(sheet: xw.Sheet, *, mode: ExtractionMode = "standard") -> list[str]: excel_path: Path | None = None try: excel_path = Path(sheet.book.fullname) @@ -1685,7 +1786,7 @@ def detect_tables(sheet: xw.Sheet) -> list[str]: return detect_tables_xlwings(sheet) try: - return detect_tables_openpyxl(excel_path, sheet.name) + return detect_tables_openpyxl(excel_path, sheet.name, mode=mode) except Exception as e: warn_once( f"openpyxl-parse-fallback::{excel_path}::{sheet.name}", diff --git a/src/exstruct/core/pipeline.py b/src/exstruct/core/pipeline.py index 53a4fda..adcc1f7 100644 --- a/src/exstruct/core/pipeline.py +++ b/src/exstruct/core/pipeline.py @@ -5,6 +5,7 @@ import logging import os from pathlib import Path +import time from typing import Literal import xlwings as xw @@ -459,7 +460,7 @@ def run_pipeline( Updated artifacts after running all steps. """ for step in steps: - step(inputs, artifacts) + _run_timed_step(step, inputs, artifacts) return artifacts @@ -481,10 +482,46 @@ def run_com_pipeline( Updated artifacts after running all COM steps. """ for step in steps: - step(inputs, artifacts, workbook) + _run_timed_com_step(step, inputs, artifacts, workbook) return artifacts +def _run_timed_step( + step: ExtractionStep, inputs: ExtractionInputs, artifacts: ExtractionArtifacts +) -> None: + """Run a pipeline step while logging its duration. + + Args: + step: Pipeline step to execute. + inputs: Pipeline inputs. + artifacts: Artifact container to update. + """ + start = time.monotonic() + step(inputs, artifacts) + elapsed = time.monotonic() - start + logger.info("Pipeline step %s completed in %.2fs", step.__name__, elapsed) + + +def _run_timed_com_step( + step: ComExtractionStep, + inputs: ExtractionInputs, + artifacts: ExtractionArtifacts, + workbook: xw.Book, +) -> None: + """Run a COM pipeline step while logging its duration. + + Args: + step: COM pipeline step to execute. + inputs: Pipeline inputs. + artifacts: Artifact container to update. + workbook: xlwings workbook instance. + """ + start = time.monotonic() + step(inputs, artifacts, workbook) + elapsed = time.monotonic() - start + logger.info("COM step %s completed in %.2fs", step.__name__, elapsed) + + def step_extract_cells( inputs: ExtractionInputs, artifacts: ExtractionArtifacts ) -> None: @@ -864,7 +901,7 @@ def collect_sheet_raw_data( rows=filtered_rows, shapes=shape_data.get(sheet_name, []), charts=chart_data.get(sheet_name, []) if mode != "light" else [], - table_candidates=detect_tables(sheet), + table_candidates=detect_tables(sheet, mode=mode), print_areas=print_area_data.get(sheet_name, []) if print_area_data else [], auto_print_areas=auto_page_break_data.get(sheet_name, []) if auto_page_break_data @@ -894,11 +931,13 @@ def run_extraction_pipeline(inputs: ExtractionInputs) -> PipelineResult: def _fallback(message: str, reason: FallbackReason) -> PipelineResult: state.fallback_reason = reason log_fallback(logger, reason, message) + logger.info("Fallback pipeline start: %s", reason.value) workbook = build_cells_tables_workbook( inputs=inputs, artifacts=artifacts, reason=message, ) + logger.info("Fallback pipeline completed.") return PipelineResult(workbook=workbook, artifacts=artifacts, state=state) if not plan.use_com: @@ -970,7 +1009,7 @@ def build_cells_tables_workbook( Returns: WorkbookData: A workbook composed from the available per-sheet cell rows, detected table candidates, merged-cell information, and any resolved formulas and colors maps. Shapes and charts are empty in this fallback path; formulas and colors maps are extracted from artifacts or from the Openpyxl backend when requested and not already present. """ - logger.debug("Building fallback workbook: %s", reason) + logger.info("Building fallback workbook: %s", reason) backend = OpenpyxlBackend(inputs.file_path) colors_map_data = artifacts.colors_map_data if inputs.include_colors_map and colors_map_data is None: @@ -987,13 +1026,19 @@ def build_cells_tables_workbook( formulas_map_data = backend.extract_formulas_map() sheets: dict[str, SheetRawData] = {} for sheet_name, rows in artifacts.cell_data.items(): + detect_start = time.monotonic() sheet_colors = ( colors_map_data.get_sheet(sheet_name) if colors_map_data else None ) sheet_formulas = ( formulas_map_data.get_sheet(sheet_name) if formulas_map_data else None ) - tables = backend.detect_tables(sheet_name) + tables = backend.detect_tables(sheet_name, mode=inputs.mode) + logger.info( + "detect_tables for %s completed in %.2fs", + sheet_name, + time.monotonic() - detect_start, + ) merged_cells = artifacts.merged_cell_data.get(sheet_name, []) filtered_rows = ( rows diff --git a/src/exstruct/io/__init__.py b/src/exstruct/io/__init__.py index 11a8a44..6fdf925 100644 --- a/src/exstruct/io/__init__.py +++ b/src/exstruct/io/__init__.py @@ -3,6 +3,7 @@ import logging from pathlib import Path import re +import time from typing import Literal, cast from ..core.ranges import RangeBounds, parse_range_zero_based @@ -70,10 +71,12 @@ def dict_without_empty_values(obj: object) -> JsonStructure: def _write_text(path: Path, text: str) -> None: """Write UTF-8 text to disk, wrapping IO errors.""" + start = time.monotonic() try: path.write_text(text, encoding="utf-8") except Exception as exc: raise OutputError(f"Failed to write output to '{path}'.") from exc + logger.info("Wrote output to %s in %.2fs", path, time.monotonic() - start) def save_as_json( @@ -453,18 +456,34 @@ def serialize_workbook( """ Convert WorkbookData to string in the requested format without writing to disk. """ + total_start = time.monotonic() format_hint = _ensure_format_hint( fmt, allowed=_FORMAT_HINTS, error_type=SerializationError, error_message="Unsupported export format '{fmt}'. Allowed: json, yaml, yml, toon.", ) + dump_start = time.monotonic() filtered_dict = dict_without_empty_values( model.model_dump(exclude_none=True, by_alias=True) ) - return _serialize_payload_from_hint( + logger.info( + "serialize_workbook model_dump completed in %.2fs", + time.monotonic() - dump_start, + ) + serialize_start = time.monotonic() + result = _serialize_payload_from_hint( filtered_dict, format_hint, pretty=pretty, indent=indent ) + logger.info( + "serialize_workbook serialization completed in %.2fs", + time.monotonic() - serialize_start, + ) + logger.info( + "serialize_workbook total completed in %.2fs", + time.monotonic() - total_start, + ) + return result def save_sheets_as_json( diff --git a/src/exstruct/mcp/extract_runner.py b/src/exstruct/mcp/extract_runner.py index 999fad3..2ee0b4b 100644 --- a/src/exstruct/mcp/extract_runner.py +++ b/src/exstruct/mcp/extract_runner.py @@ -1,6 +1,8 @@ from __future__ import annotations +import logging from pathlib import Path +import time from typing import Any, Literal from pydantic import BaseModel, Field @@ -9,6 +11,8 @@ from .io import PathPolicy +logger = logging.getLogger(__name__) + class WorkbookMeta(BaseModel): """Lightweight workbook metadata for MCP responses.""" @@ -63,14 +67,18 @@ def run_extract( ) _ensure_output_dir(output_path) + start = time.monotonic() process_excel( file_path=resolved_input, output_path=output_path, out_fmt=request.format, mode=request.mode, ) + logger.info("process_excel completed in %.2fs", time.monotonic() - start) + meta_start = time.monotonic() meta, warnings = _try_read_workbook_meta(resolved_input) + logger.info("workbook meta read completed in %.2fs", time.monotonic() - meta_start) return ExtractResult( out_path=str(output_path), workbook_meta=meta, diff --git a/src/exstruct/mcp/server.py b/src/exstruct/mcp/server.py index 2349d0e..ebccdd9 100644 --- a/src/exstruct/mcp/server.py +++ b/src/exstruct/mcp/server.py @@ -1,12 +1,15 @@ from __future__ import annotations import argparse +import functools import importlib import logging from pathlib import Path +import time from types import ModuleType -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, Literal, cast +import anyio from pydantic import BaseModel, Field from exstruct import ExtractionMode @@ -37,6 +40,7 @@ class ServerConfig(BaseModel): deny_globs: list[str] = Field(default_factory=list, description="Denied glob list.") log_level: str = Field(default="INFO", description="Logging level.") log_file: Path | None = Field(default=None, description="Optional log file path.") + warmup: bool = Field(default=False, description="Warm up heavy imports on start.") def main(argv: list[str] | None = None) -> int: @@ -67,6 +71,8 @@ def run_server(config: ServerConfig) -> None: _import_mcp() policy = PathPolicy(root=config.root, deny_globs=config.deny_globs) logger.info("MCP root: %s", policy.normalize_root()) + if config.warmup: + _warmup_exstruct() app = _create_app(policy) app.run() @@ -94,12 +100,18 @@ def _parse_args(argv: list[str] | None) -> ServerConfig: help="Logging level (DEBUG, INFO, WARNING, ERROR).", ) parser.add_argument("--log-file", type=Path, help="Optional log file path.") + parser.add_argument( + "--warmup", + action="store_true", + help="Warm up heavy imports on startup to reduce tool latency.", + ) args = parser.parse_args(argv) return ServerConfig( root=args.root, deny_globs=list(args.deny_glob), log_level=args.log_level, log_file=args.log_file, + warmup=bool(args.warmup), ) @@ -133,6 +145,14 @@ def _import_mcp() -> ModuleType: ) from exc +def _warmup_exstruct() -> None: + """Warm up heavy imports to reduce first-call latency.""" + logger.info("Warming up ExStruct imports...") + importlib.import_module("exstruct.core.cells") + importlib.import_module("exstruct.core.integrate") + logger.info("Warmup completed.") + + def _create_app(policy: PathPolicy) -> FastMCP: """Create the MCP FastMCP application. @@ -157,7 +177,7 @@ def _register_tools(app: FastMCP, policy: PathPolicy) -> None: policy: Path policy for filesystem access. """ - def _extract_tool( + async def _extract_tool( xlsx_path: str, mode: ExtractionMode = "standard", format: Literal["json", "yaml", "yml", "toon"] = "json", # noqa: A002 @@ -178,6 +198,8 @@ def _extract_tool( Returns: Extraction result payload. """ + logger.info("exstruct.extract start: %s", xlsx_path) + start = time.monotonic() payload = ExtractToolInput( xlsx_path=xlsx_path, mode=mode, @@ -186,7 +208,11 @@ def _extract_tool( out_name=out_name, options=options or {}, ) - return run_extract_tool(payload, policy=policy) + work = functools.partial(run_extract_tool, payload, policy=policy) + result = cast(ExtractToolOutput, await anyio.to_thread.run_sync(work)) + elapsed = time.monotonic() - start + logger.info("exstruct.extract done in %.2fs", elapsed) + return result tool = app.tool(name="exstruct.extract") tool(_extract_tool) diff --git a/uv.lock b/uv.lock index 491e721..db61bbf 100644 --- a/uv.lock +++ b/uv.lock @@ -15,6 +15,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, ] +[[package]] +name = "anyio" +version = "4.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, +] + [[package]] name = "appscript" version = "1.4.0" @@ -34,6 +47,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dd/e3/03dc0f97eab839f72061342d69bd34424e89876ce4026509aab3d74d4f23/appscript-1.4.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:5efce3302c00674b769b79938cc5f66f7791ef45c6419e850a5f1c8f9fcefcc1", size = 85610, upload-time = "2025-10-08T07:56:38.103Z" }, ] +[[package]] +name = "attrs" +version = "25.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, +] + [[package]] name = "babel" version = "2.17.0" @@ -66,6 +88,76 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" }, ] +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/4a/3dfd5f7850cbf0d06dc84ba9aa00db766b52ca38d8b86e3a38314d52498c/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe", size = 184344, upload-time = "2025-09-08T23:22:26.456Z" }, + { url = "https://files.pythonhosted.org/packages/4f/8b/f0e4c441227ba756aafbe78f117485b25bb26b1c059d01f137fa6d14896b/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c", size = 180560, upload-time = "2025-09-08T23:22:28.197Z" }, + { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" }, + { url = "https://files.pythonhosted.org/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476, upload-time = "2025-09-08T23:22:31.063Z" }, + { url = "https://files.pythonhosted.org/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374, upload-time = "2025-09-08T23:22:32.507Z" }, + { url = "https://files.pythonhosted.org/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597, upload-time = "2025-09-08T23:22:34.132Z" }, + { url = "https://files.pythonhosted.org/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574, upload-time = "2025-09-08T23:22:35.443Z" }, + { url = "https://files.pythonhosted.org/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971, upload-time = "2025-09-08T23:22:36.805Z" }, + { url = "https://files.pythonhosted.org/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972, upload-time = "2025-09-08T23:22:38.436Z" }, + { url = "https://files.pythonhosted.org/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078, upload-time = "2025-09-08T23:22:39.776Z" }, + { url = "https://files.pythonhosted.org/packages/2b/c0/015b25184413d7ab0a410775fdb4a50fca20f5589b5dab1dbbfa3baad8ce/cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5", size = 172076, upload-time = "2025-09-08T23:22:40.95Z" }, + { url = "https://files.pythonhosted.org/packages/ae/8f/dc5531155e7070361eb1b7e4c1a9d896d0cb21c49f807a6c03fd63fc877e/cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5", size = 182820, upload-time = "2025-09-08T23:22:42.463Z" }, + { url = "https://files.pythonhosted.org/packages/95/5c/1b493356429f9aecfd56bc171285a4c4ac8697f76e9bbbbb105e537853a1/cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d", size = 177635, upload-time = "2025-09-08T23:22:43.623Z" }, + { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" }, + { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" }, + { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" }, + { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" }, + { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" }, + { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" }, + { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" }, + { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" }, + { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" }, + { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" }, + { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" }, + { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" }, + { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" }, + { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, +] + [[package]] name = "cfgv" version = "3.5.0" @@ -278,6 +370,68 @@ toml = [ { name = "tomli", marker = "python_full_version <= '3.11'" }, ] +[[package]] +name = "cryptography" +version = "46.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/42/9c391dd801d6cf0d561b5890549d4b27bafcc53b39c31a817e69d87c625b/cryptography-46.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:109d4ddfadf17e8e7779c39f9b18111a09efb969a301a31e987416a0191ed93a", size = 7225004, upload-time = "2025-10-15T23:16:52.239Z" }, + { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" }, + { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" }, + { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" }, + { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" }, + { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" }, + { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" }, + { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" }, + { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" }, + { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" }, + { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" }, + { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" }, + { url = "https://files.pythonhosted.org/packages/96/92/8a6a9525893325fc057a01f654d7efc2c64b9de90413adcf605a85744ff4/cryptography-46.0.3-cp311-abi3-win32.whl", hash = "sha256:f260d0d41e9b4da1ed1e0f1ce571f97fe370b152ab18778e9e8f67d6af432018", size = 3055988, upload-time = "2025-10-15T23:17:14.65Z" }, + { url = "https://files.pythonhosted.org/packages/7e/bf/80fbf45253ea585a1e492a6a17efcb93467701fa79e71550a430c5e60df0/cryptography-46.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:a9a3008438615669153eb86b26b61e09993921ebdd75385ddd748702c5adfddb", size = 3514451, upload-time = "2025-10-15T23:17:16.142Z" }, + { url = "https://files.pythonhosted.org/packages/2e/af/9b302da4c87b0beb9db4e756386a7c6c5b8003cd0e742277888d352ae91d/cryptography-46.0.3-cp311-abi3-win_arm64.whl", hash = "sha256:5d7f93296ee28f68447397bf5198428c9aeeab45705a55d53a6343455dcb2c3c", size = 2928007, upload-time = "2025-10-15T23:17:18.04Z" }, + { url = "https://files.pythonhosted.org/packages/f5/e2/a510aa736755bffa9d2f75029c229111a1d02f8ecd5de03078f4c18d91a3/cryptography-46.0.3-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:00a5e7e87938e5ff9ff5447ab086a5706a957137e6e433841e9d24f38a065217", size = 7158012, upload-time = "2025-10-15T23:17:19.982Z" }, + { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728, upload-time = "2025-10-15T23:17:21.527Z" }, + { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" }, + { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" }, + { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" }, + { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" }, + { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" }, + { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" }, + { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" }, + { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" }, + { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" }, + { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" }, + { url = "https://files.pythonhosted.org/packages/fd/30/27654c1dbaf7e4a3531fa1fc77986d04aefa4d6d78259a62c9dc13d7ad36/cryptography-46.0.3-cp314-cp314t-win32.whl", hash = "sha256:8a6e050cb6164d3f830453754094c086ff2d0b2f3a897a1d9820f6139a1f0914", size = 3022339, upload-time = "2025-10-15T23:17:40.888Z" }, + { url = "https://files.pythonhosted.org/packages/f6/30/640f34ccd4d2a1bc88367b54b926b781b5a018d65f404d409aba76a84b1c/cryptography-46.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:760f83faa07f8b64e9c33fc963d790a2edb24efb479e3520c14a45741cd9b2db", size = 3494315, upload-time = "2025-10-15T23:17:42.769Z" }, + { url = "https://files.pythonhosted.org/packages/ba/8b/88cc7e3bd0a8e7b861f26981f7b820e1f46aa9d26cc482d0feba0ecb4919/cryptography-46.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:516ea134e703e9fe26bcd1277a4b59ad30586ea90c365a87781d7887a646fe21", size = 2919331, upload-time = "2025-10-15T23:17:44.468Z" }, + { url = "https://files.pythonhosted.org/packages/fd/23/45fe7f376a7df8daf6da3556603b36f53475a99ce4faacb6ba2cf3d82021/cryptography-46.0.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:cb3d760a6117f621261d662bccc8ef5bc32ca673e037c83fbe565324f5c46936", size = 7218248, upload-time = "2025-10-15T23:17:46.294Z" }, + { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" }, + { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" }, + { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" }, + { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" }, + { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" }, + { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" }, + { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" }, + { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" }, + { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" }, + { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" }, + { url = "https://files.pythonhosted.org/packages/0a/6e/1c8331ddf91ca4730ab3086a0f1be19c65510a33b5a441cb334e7a2d2560/cryptography-46.0.3-cp38-abi3-win32.whl", hash = "sha256:6276eb85ef938dc035d59b87c8a7dc559a232f954962520137529d77b18ff1df", size = 3036695, upload-time = "2025-10-15T23:18:08.672Z" }, + { url = "https://files.pythonhosted.org/packages/90/45/b0d691df20633eff80955a0fc7695ff9051ffce8b69741444bd9ed7bd0db/cryptography-46.0.3-cp38-abi3-win_amd64.whl", hash = "sha256:416260257577718c05135c55958b674000baef9a1c7d9e8f306ec60d71db850f", size = 3501720, upload-time = "2025-10-15T23:18:10.632Z" }, + { url = "https://files.pythonhosted.org/packages/e8/cb/2da4cc83f5edb9c3257d09e1e7ab7b23f049c7962cae8d842bbef0a9cec9/cryptography-46.0.3-cp38-abi3-win_arm64.whl", hash = "sha256:d89c3468de4cdc4f08a57e214384d0471911a3830fcdaf7a8cc587e42a866372", size = 2918740, upload-time = "2025-10-15T23:18:12.277Z" }, + { url = "https://files.pythonhosted.org/packages/06/8a/e60e46adab4362a682cf142c7dcb5bf79b782ab2199b0dcb81f55970807f/cryptography-46.0.3-pp311-pypy311_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7ce938a99998ed3c8aa7e7272dca1a610401ede816d36d0693907d863b10d9ea", size = 3698132, upload-time = "2025-10-15T23:18:17.056Z" }, + { url = "https://files.pythonhosted.org/packages/da/38/f59940ec4ee91e93d3311f7532671a5cef5570eb04a144bf203b58552d11/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:191bb60a7be5e6f54e30ba16fdfae78ad3a342a0599eb4193ba88e3f3d6e185b", size = 4243992, upload-time = "2025-10-15T23:18:18.695Z" }, + { url = "https://files.pythonhosted.org/packages/b0/0c/35b3d92ddebfdfda76bb485738306545817253d0a3ded0bfe80ef8e67aa5/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c70cc23f12726be8f8bc72e41d5065d77e4515efae3690326764ea1b07845cfb", size = 4409944, upload-time = "2025-10-15T23:18:20.597Z" }, + { url = "https://files.pythonhosted.org/packages/99/55/181022996c4063fc0e7666a47049a1ca705abb9c8a13830f074edb347495/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:9394673a9f4de09e28b5356e7fff97d778f8abad85c9d5ac4a4b7e25a0de7717", size = 4242957, upload-time = "2025-10-15T23:18:22.18Z" }, + { url = "https://files.pythonhosted.org/packages/ba/af/72cd6ef29f9c5f731251acadaeb821559fe25f10852f44a63374c9ca08c1/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:94cd0549accc38d1494e1f8de71eca837d0509d0d44bf11d158524b0e12cebf9", size = 4409447, upload-time = "2025-10-15T23:18:24.209Z" }, + { url = "https://files.pythonhosted.org/packages/0d/c3/e90f4a4feae6410f914f8ebac129b9ae7a8c92eb60a638012dde42030a9d/cryptography-46.0.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6b5063083824e5509fdba180721d55909ffacccc8adbec85268b48439423d78c", size = 3438528, upload-time = "2025-10-15T23:18:26.227Z" }, +] + [[package]] name = "distlib" version = "0.4.0" @@ -310,6 +464,9 @@ dependencies = [ ] [package.optional-dependencies] +mcp = [ + { name = "mcp" }, +] render = [ { name = "pillow" }, { name = "pypdfium2" }, @@ -337,6 +494,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.6.0,<2.0.0" }, { name = "numpy", specifier = ">=2.3.5" }, { name = "openpyxl", specifier = ">=3.1.5" }, { name = "pandas", specifier = ">=2.3.3" }, @@ -348,7 +506,7 @@ requires-dist = [ { name = "scipy", specifier = ">=1.16.3" }, { name = "xlwings", specifier = ">=0.33.16" }, ] -provides-extras = ["yaml", "toon", "render"] +provides-extras = ["yaml", "toon", "render", "mcp"] [package.metadata.requires-dev] dev = [ @@ -397,6 +555,52 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/83/3b1d03d36f224edded98e9affd0467630fc09d766c0e56fb1498cbb04a9b/griffe-1.15.0-py3-none-any.whl", hash = "sha256:6f6762661949411031f5fcda9593f586e6ce8340f0ba88921a0f2ef7a81eb9a3", size = 150705, upload-time = "2025-11-10T15:03:13.549Z" }, ] +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "httpx-sse" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, +] + [[package]] name = "identify" version = "2.6.15" @@ -516,6 +720,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "jsonschema" +version = "4.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, +] + [[package]] name = "librt" version = "0.7.3" @@ -681,6 +912,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, ] +[[package]] +name = "mcp" +version = "1.25.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "httpx" }, + { name = "httpx-sse" }, + { name = "jsonschema" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "python-multipart" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "sse-starlette" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, + { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d5/2d/649d80a0ecf6a1f82632ca44bec21c0461a9d9fc8934d38cb5b319f2db5e/mcp-1.25.0.tar.gz", hash = "sha256:56310361ebf0364e2d438e5b45f7668cbb124e158bb358333cd06e49e83a6802", size = 605387, upload-time = "2025-12-19T10:19:56.985Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e2/fc/6dc7659c2ae5ddf280477011f4213a74f806862856b796ef08f028e664bf/mcp-1.25.0-py3-none-any.whl", hash = "sha256:b37c38144a666add0862614cc79ec276e97d72aa8ca26d622818d4e278b9721a", size = 233076, upload-time = "2025-12-19T10:19:55.416Z" }, +] + [[package]] name = "mergedeep" version = "1.3.4" @@ -1180,6 +1436,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/d7/7831438e6c3ebbfa6e01a927127a6cb42ad3ab844247f3c5b96bea25d73d/psutil-6.1.1-cp37-abi3-win_amd64.whl", hash = "sha256:f35cfccb065fff93529d2afb4a2e89e363fe63ca1e4a5da22b603a85833c2649", size = 254444, upload-time = "2024-12-19T18:22:11.335Z" }, ] +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, +] + [[package]] name = "pydantic" version = "2.12.5" @@ -1292,6 +1557,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" }, ] +[[package]] +name = "pydantic-settings" +version = "2.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -1301,6 +1580,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + [[package]] name = "pymdown-extensions" version = "10.17.2" @@ -1388,6 +1681,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "python-dotenv" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, +] + +[[package]] +name = "python-multipart" +version = "0.0.21" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/78/96/804520d0850c7db98e5ccb70282e29208723f0964e88ffd9d0da2f52ea09/python_multipart-0.0.21.tar.gz", hash = "sha256:7137ebd4d3bbf70ea1622998f902b97a29434a9e8dc40eb203bbcf7c2a2cba92", size = 37196, upload-time = "2025-12-17T09:24:22.446Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/76/03af049af4dcee5d27442f71b6924f01f3efb5d2bd34f23fcd563f2cc5f5/python_multipart-0.0.21-py3-none-any.whl", hash = "sha256:cf7a6713e01c87aa35387f4774e812c4361150938d20d232800f75ffcf266090", size = 24541, upload-time = "2025-12-17T09:24:21.153Z" }, +] + [[package]] name = "python-toon" version = "0.1.3" @@ -1492,6 +1803,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/11/432f32f8097b03e3cd5fe57e88efb685d964e2e5178a48ed61e841f7fdce/pyyaml_env_tag-1.1-py3-none-any.whl", hash = "sha256:17109e1a528561e32f026364712fee1264bc2ea6715120891174ed1b980d2e04", size = 4722, upload-time = "2025-05-13T15:23:59.629Z" }, ] +[[package]] +name = "referencing" +version = "0.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, +] + [[package]] name = "requests" version = "2.32.5" @@ -1520,6 +1845,114 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bb/ad/fdd56219f0e320293c513ef0b3cdd018802a1bcfdb29ed9bc0c3bcb97f31/responses-0.21.0-py3-none-any.whl", hash = "sha256:2dcc863ba63963c0c3d9ee3fa9507cbe36b7d7b0fccb4f0bdfd9e96c539b1487", size = 45987, upload-time = "2022-05-25T14:20:48.508Z" }, ] +[[package]] +name = "rpds-py" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/6e/f964e88b3d2abee2a82c1ac8366da848fce1c6d834dc2132c3fda3970290/rpds_py-0.30.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a2bffea6a4ca9f01b3f8e548302470306689684e61602aa3d141e34da06cf425", size = 370157, upload-time = "2025-11-30T20:21:53.789Z" }, + { url = "https://files.pythonhosted.org/packages/94/ba/24e5ebb7c1c82e74c4e4f33b2112a5573ddc703915b13a073737b59b86e0/rpds_py-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dc4f992dfe1e2bc3ebc7444f6c7051b4bc13cd8e33e43511e8ffd13bf407010d", size = 359676, upload-time = "2025-11-30T20:21:55.475Z" }, + { url = "https://files.pythonhosted.org/packages/84/86/04dbba1b087227747d64d80c3b74df946b986c57af0a9f0c98726d4d7a3b/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:422c3cb9856d80b09d30d2eb255d0754b23e090034e1deb4083f8004bd0761e4", size = 389938, upload-time = "2025-11-30T20:21:57.079Z" }, + { url = "https://files.pythonhosted.org/packages/42/bb/1463f0b1722b7f45431bdd468301991d1328b16cffe0b1c2918eba2c4eee/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07ae8a593e1c3c6b82ca3292efbe73c30b61332fd612e05abee07c79359f292f", size = 402932, upload-time = "2025-11-30T20:21:58.47Z" }, + { url = "https://files.pythonhosted.org/packages/99/ee/2520700a5c1f2d76631f948b0736cdf9b0acb25abd0ca8e889b5c62ac2e3/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12f90dd7557b6bd57f40abe7747e81e0c0b119bef015ea7726e69fe550e394a4", size = 525830, upload-time = "2025-11-30T20:21:59.699Z" }, + { url = "https://files.pythonhosted.org/packages/e0/ad/bd0331f740f5705cc555a5e17fdf334671262160270962e69a2bdef3bf76/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99b47d6ad9a6da00bec6aabe5a6279ecd3c06a329d4aa4771034a21e335c3a97", size = 412033, upload-time = "2025-11-30T20:22:00.991Z" }, + { url = "https://files.pythonhosted.org/packages/f8/1e/372195d326549bb51f0ba0f2ecb9874579906b97e08880e7a65c3bef1a99/rpds_py-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33f559f3104504506a44bb666b93a33f5d33133765b0c216a5bf2f1e1503af89", size = 390828, upload-time = "2025-11-30T20:22:02.723Z" }, + { url = "https://files.pythonhosted.org/packages/ab/2b/d88bb33294e3e0c76bc8f351a3721212713629ffca1700fa94979cb3eae8/rpds_py-0.30.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:946fe926af6e44f3697abbc305ea168c2c31d3e3ef1058cf68f379bf0335a78d", size = 404683, upload-time = "2025-11-30T20:22:04.367Z" }, + { url = "https://files.pythonhosted.org/packages/50/32/c759a8d42bcb5289c1fac697cd92f6fe01a018dd937e62ae77e0e7f15702/rpds_py-0.30.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:495aeca4b93d465efde585977365187149e75383ad2684f81519f504f5c13038", size = 421583, upload-time = "2025-11-30T20:22:05.814Z" }, + { url = "https://files.pythonhosted.org/packages/2b/81/e729761dbd55ddf5d84ec4ff1f47857f4374b0f19bdabfcf929164da3e24/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9a0ca5da0386dee0655b4ccdf46119df60e0f10da268d04fe7cc87886872ba7", size = 572496, upload-time = "2025-11-30T20:22:07.713Z" }, + { url = "https://files.pythonhosted.org/packages/14/f6/69066a924c3557c9c30baa6ec3a0aa07526305684c6f86c696b08860726c/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8d6d1cc13664ec13c1b84241204ff3b12f9bb82464b8ad6e7a5d3486975c2eed", size = 598669, upload-time = "2025-11-30T20:22:09.312Z" }, + { url = "https://files.pythonhosted.org/packages/5f/48/905896b1eb8a05630d20333d1d8ffd162394127b74ce0b0784ae04498d32/rpds_py-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3896fa1be39912cf0757753826bc8bdc8ca331a28a7c4ae46b7a21280b06bb85", size = 561011, upload-time = "2025-11-30T20:22:11.309Z" }, + { url = "https://files.pythonhosted.org/packages/22/16/cd3027c7e279d22e5eb431dd3c0fbc677bed58797fe7581e148f3f68818b/rpds_py-0.30.0-cp311-cp311-win32.whl", hash = "sha256:55f66022632205940f1827effeff17c4fa7ae1953d2b74a8581baaefb7d16f8c", size = 221406, upload-time = "2025-11-30T20:22:13.101Z" }, + { url = "https://files.pythonhosted.org/packages/fa/5b/e7b7aa136f28462b344e652ee010d4de26ee9fd16f1bfd5811f5153ccf89/rpds_py-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:a51033ff701fca756439d641c0ad09a41d9242fa69121c7d8769604a0a629825", size = 236024, upload-time = "2025-11-30T20:22:14.853Z" }, + { url = "https://files.pythonhosted.org/packages/14/a6/364bba985e4c13658edb156640608f2c9e1d3ea3c81b27aa9d889fff0e31/rpds_py-0.30.0-cp311-cp311-win_arm64.whl", hash = "sha256:47b0ef6231c58f506ef0b74d44e330405caa8428e770fec25329ed2cb971a229", size = 229069, upload-time = "2025-11-30T20:22:16.577Z" }, + { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload-time = "2025-11-30T20:22:17.93Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload-time = "2025-11-30T20:22:19.297Z" }, + { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" }, + { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" }, + { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" }, + { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" }, + { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" }, + { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" }, + { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" }, + { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload-time = "2025-11-30T20:22:37.271Z" }, + { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload-time = "2025-11-30T20:22:39.021Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload-time = "2025-11-30T20:22:40.493Z" }, + { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload-time = "2025-11-30T20:22:41.812Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload-time = "2025-11-30T20:22:43.479Z" }, + { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload-time = "2025-11-30T20:22:44.819Z" }, + { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload-time = "2025-11-30T20:22:46.103Z" }, + { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload-time = "2025-11-30T20:22:47.458Z" }, + { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload-time = "2025-11-30T20:22:48.872Z" }, + { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload-time = "2025-11-30T20:22:50.196Z" }, + { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload-time = "2025-11-30T20:22:51.87Z" }, + { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload-time = "2025-11-30T20:22:53.341Z" }, + { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload-time = "2025-11-30T20:22:54.778Z" }, + { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload-time = "2025-11-30T20:22:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload-time = "2025-11-30T20:22:58.2Z" }, + { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload-time = "2025-11-30T20:23:00.209Z" }, + { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload-time = "2025-11-30T20:23:02.008Z" }, + { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload-time = "2025-11-30T20:23:03.43Z" }, + { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload-time = "2025-11-30T20:23:04.878Z" }, + { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload-time = "2025-11-30T20:23:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload-time = "2025-11-30T20:23:07.825Z" }, + { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload-time = "2025-11-30T20:23:09.228Z" }, + { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload-time = "2025-11-30T20:23:11.186Z" }, + { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload-time = "2025-11-30T20:23:12.864Z" }, + { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload-time = "2025-11-30T20:23:14.638Z" }, + { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload-time = "2025-11-30T20:23:16.105Z" }, + { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload-time = "2025-11-30T20:23:17.539Z" }, + { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload-time = "2025-11-30T20:23:19.029Z" }, + { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload-time = "2025-11-30T20:23:20.885Z" }, + { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload-time = "2025-11-30T20:23:22.488Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload-time = "2025-11-30T20:23:24.449Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" }, + { url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099, upload-time = "2025-11-30T20:23:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192, upload-time = "2025-11-30T20:23:29.151Z" }, + { url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080, upload-time = "2025-11-30T20:23:30.785Z" }, + { url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841, upload-time = "2025-11-30T20:23:32.209Z" }, + { url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670, upload-time = "2025-11-30T20:23:33.742Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005, upload-time = "2025-11-30T20:23:35.253Z" }, + { url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112, upload-time = "2025-11-30T20:23:36.842Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049, upload-time = "2025-11-30T20:23:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661, upload-time = "2025-11-30T20:23:40.263Z" }, + { url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606, upload-time = "2025-11-30T20:23:42.186Z" }, + { url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126, upload-time = "2025-11-30T20:23:44.086Z" }, + { url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371, upload-time = "2025-11-30T20:23:46.004Z" }, + { url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298, upload-time = "2025-11-30T20:23:47.696Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604, upload-time = "2025-11-30T20:23:49.501Z" }, + { url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391, upload-time = "2025-11-30T20:23:50.96Z" }, + { url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868, upload-time = "2025-11-30T20:23:52.494Z" }, + { url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747, upload-time = "2025-11-30T20:23:54.036Z" }, + { url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795, upload-time = "2025-11-30T20:23:55.556Z" }, + { url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330, upload-time = "2025-11-30T20:23:57.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194, upload-time = "2025-11-30T20:23:58.637Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340, upload-time = "2025-11-30T20:24:00.2Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765, upload-time = "2025-11-30T20:24:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834, upload-time = "2025-11-30T20:24:03.687Z" }, + { url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470, upload-time = "2025-11-30T20:24:05.232Z" }, + { url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630, upload-time = "2025-11-30T20:24:06.878Z" }, + { url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148, upload-time = "2025-11-30T20:24:08.445Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030, upload-time = "2025-11-30T20:24:10.956Z" }, + { url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570, upload-time = "2025-11-30T20:24:12.735Z" }, + { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, + { url = "https://files.pythonhosted.org/packages/69/71/3f34339ee70521864411f8b6992e7ab13ac30d8e4e3309e07c7361767d91/rpds_py-0.30.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c2262bdba0ad4fc6fb5545660673925c2d2a5d9e2e0fb603aad545427be0fc58", size = 372292, upload-time = "2025-11-30T20:24:16.537Z" }, + { url = "https://files.pythonhosted.org/packages/57/09/f183df9b8f2d66720d2ef71075c59f7e1b336bec7ee4c48f0a2b06857653/rpds_py-0.30.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ee6af14263f25eedc3bb918a3c04245106a42dfd4f5c2285ea6f997b1fc3f89a", size = 362128, upload-time = "2025-11-30T20:24:18.086Z" }, + { url = "https://files.pythonhosted.org/packages/7a/68/5c2594e937253457342e078f0cc1ded3dd7b2ad59afdbf2d354869110a02/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3adbb8179ce342d235c31ab8ec511e66c73faa27a47e076ccc92421add53e2bb", size = 391542, upload-time = "2025-11-30T20:24:20.092Z" }, + { url = "https://files.pythonhosted.org/packages/49/5c/31ef1afd70b4b4fbdb2800249f34c57c64beb687495b10aec0365f53dfc4/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:250fa00e9543ac9b97ac258bd37367ff5256666122c2d0f2bc97577c60a1818c", size = 404004, upload-time = "2025-11-30T20:24:22.231Z" }, + { url = "https://files.pythonhosted.org/packages/e3/63/0cfbea38d05756f3440ce6534d51a491d26176ac045e2707adc99bb6e60a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9854cf4f488b3d57b9aaeb105f06d78e5529d3145b1e4a41750167e8c213c6d3", size = 527063, upload-time = "2025-11-30T20:24:24.302Z" }, + { url = "https://files.pythonhosted.org/packages/42/e6/01e1f72a2456678b0f618fc9a1a13f882061690893c192fcad9f2926553a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:993914b8e560023bc0a8bf742c5f303551992dcb85e247b1e5c7f4a7d145bda5", size = 413099, upload-time = "2025-11-30T20:24:25.916Z" }, + { url = "https://files.pythonhosted.org/packages/b8/25/8df56677f209003dcbb180765520c544525e3ef21ea72279c98b9aa7c7fb/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58edca431fb9b29950807e301826586e5bbf24163677732429770a697ffe6738", size = 392177, upload-time = "2025-11-30T20:24:27.834Z" }, + { url = "https://files.pythonhosted.org/packages/4a/b4/0a771378c5f16f8115f796d1f437950158679bcd2a7c68cf251cfb00ed5b/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:dea5b552272a944763b34394d04577cf0f9bd013207bc32323b5a89a53cf9c2f", size = 406015, upload-time = "2025-11-30T20:24:29.457Z" }, + { url = "https://files.pythonhosted.org/packages/36/d8/456dbba0af75049dc6f63ff295a2f92766b9d521fa00de67a2bd6427d57a/rpds_py-0.30.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ba3af48635eb83d03f6c9735dfb21785303e73d22ad03d489e88adae6eab8877", size = 423736, upload-time = "2025-11-30T20:24:31.22Z" }, + { url = "https://files.pythonhosted.org/packages/13/64/b4d76f227d5c45a7e0b796c674fd81b0a6c4fbd48dc29271857d8219571c/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:dff13836529b921e22f15cb099751209a60009731a68519630a24d61f0b1b30a", size = 573981, upload-time = "2025-11-30T20:24:32.934Z" }, + { url = "https://files.pythonhosted.org/packages/20/91/092bacadeda3edf92bf743cc96a7be133e13a39cdbfd7b5082e7ab638406/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1b151685b23929ab7beec71080a8889d4d6d9fa9a983d213f07121205d48e2c4", size = 599782, upload-time = "2025-11-30T20:24:35.169Z" }, + { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, +] + [[package]] name = "ruff" version = "0.14.8" @@ -1639,6 +2072,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "sse-starlette" +version = "3.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "starlette" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/8d/00d280c03ffd39aaee0e86ec81e2d3b9253036a0f93f51d10503adef0e65/sse_starlette-3.2.0.tar.gz", hash = "sha256:8127594edfb51abe44eac9c49e59b0b01f1039d0c7461c6fd91d4e03b70da422", size = 27253, upload-time = "2026-01-17T13:11:05.62Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/7f/832f015020844a8b8f7a9cbc103dd76ba8e3875004c41e08440ea3a2b41a/sse_starlette-3.2.0-py3-none-any.whl", hash = "sha256:5876954bd51920fc2cd51baee47a080eb88a37b5b784e615abb0b283f801cdbf", size = 12763, upload-time = "2026-01-17T13:11:03.775Z" }, +] + +[[package]] +name = "starlette" +version = "0.52.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, +] + [[package]] name = "taskipy" version = "1.14.1" @@ -1802,6 +2261,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] +[[package]] +name = "uvicorn" +version = "0.40.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" }, +] + [[package]] name = "virtualenv" version = "20.36.1" From fc99ff2e9f32f283cd79c992a2c732f7b22ded18 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 16:06:40 +0900 Subject: [PATCH 05/18] =?UTF-8?q?pre-commit=E3=82=BF=E3=82=B9=E3=82=AF?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81=E3=82=B3=E3=83=BC?= =?UTF-8?q?=E3=83=89=E5=93=81=E8=B3=AA=E3=82=92=E5=90=91=E4=B8=8A=E3=81=95?= =?UTF-8?q?=E3=81=9B=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 81040b6..aa9d1f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,6 +122,7 @@ markers = [ ruff = "ruff check ." ruff-fix = "ruff check . --fix" mypy = "mypy src/exstruct --strict" +precommit-run = "pre-commit run -a" test = "pytest -vv --cov=exstruct --cov-report=term-missing --cov-report=xml" # uv sync --extra render --extra toon test-unit = "pytest -vv -m \"not com and not render\" --cov=exstruct --cov-report=term-missing --cov-report=xml" test-com = "pytest -vv -m \"com\" --cov=exstruct --cov-report=term-missing --cov-report=xml" From 7db3f789855d3a8f49d688baf4442b062969e1c7 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 16:39:47 +0900 Subject: [PATCH 06/18] =?UTF-8?q?MCP=E3=82=B5=E3=83=BC=E3=83=90=E3=83=BC?= =?UTF-8?q?=E3=81=A7=E3=81=AE=E3=83=9C=E3=83=BC=E3=83=80=E3=83=BC=E3=82=AF?= =?UTF-8?q?=E3=83=A9=E3=82=B9=E3=82=BF=E3=83=90=E3=83=83=E3=82=AF=E3=82=A8?= =?UTF-8?q?=E3=83=B3=E3=83=89=E3=81=AE=E7=92=B0=E5=A2=83=E5=A4=89=E6=95=B0?= =?UTF-8?q?=E3=82=92=E8=A8=AD=E5=AE=9A=E3=81=97=E3=80=81=E3=83=AD=E3=82=B0?= =?UTF-8?q?=E3=81=AB=E6=83=85=E5=A0=B1=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/exstruct/core/cells.py | 108 +++++++++++++++++++++++++++++++++++-- src/exstruct/mcp/server.py | 6 +++ 2 files changed, 110 insertions(+), 4 deletions(-) diff --git a/src/exstruct/core/cells.py b/src/exstruct/core/cells.py index b473667..4738678 100644 --- a/src/exstruct/core/cells.py +++ b/src/exstruct/core/cells.py @@ -6,8 +6,10 @@ from decimal import Decimal, InvalidOperation import logging import math +import os from pathlib import Path import re +import time from typing import Literal import numpy as np @@ -40,6 +42,7 @@ } _DEFAULT_BACKGROUND_HEX = "FFFFFF" _XL_COLOR_NONE = -4142 +_BORDER_CLUSTER_BACKEND_ENV = "EXSTRUCT_BORDER_CLUSTER_BACKEND" ExtractionMode = Literal["light", "standard", "verbose"] @@ -930,6 +933,20 @@ def load_border_maps_xlsx( # noqa: C901 scan_max_row = min(max_row, resolved_limits.max_rows) scan_max_col = min(max_col, resolved_limits.max_cols) + logger.info( + "Openpyxl border scan start (sheet=%s, min_row=%s, min_col=%s, max_row=%s, max_col=%s, scan_max_row=%s, scan_max_col=%s, empty_row_run=%s, empty_col_run=%s).", + sheet_name, + min_row, + min_col, + max_row, + max_col, + scan_max_row, + scan_max_col, + resolved_limits.empty_row_run, + resolved_limits.empty_col_run, + ) + scan_start = time.monotonic() + shape = (scan_max_row + 1, scan_max_col + 1) has_border = np.zeros(shape, dtype=bool) top_edge = np.zeros(shape, dtype=bool) @@ -937,6 +954,7 @@ def load_border_maps_xlsx( # noqa: C901 left_edge = np.zeros(shape, dtype=bool) right_edge = np.zeros(shape, dtype=bool) col_has_border = np.zeros(shape[1], dtype=bool) + border_cells = 0 def edge_has_style(edge: object) -> bool: if edge is None: @@ -964,6 +982,7 @@ def edge_has_style(edge: object) -> bool: row_has_border = True col_has_border[c] = True has_border[r, c] = True + border_cells += 1 if t: top_edge[r, c] = True if btm: @@ -1001,6 +1020,11 @@ def edge_has_style(edge: object) -> bool: ) break + _log_border_scan_complete( + sheet_name=sheet_name, + border_cells=border_cells, + elapsed=time.monotonic() - scan_start, + ) return ( has_border, top_edge, @@ -1012,6 +1036,17 @@ def edge_has_style(edge: object) -> bool: ) +def _log_border_scan_complete( + *, sheet_name: str, border_cells: int, elapsed: float +) -> None: + logger.info( + "Openpyxl border scan completed (sheet=%s, border_cells=%s, elapsed=%.2fs).", + sheet_name, + border_cells, + elapsed, + ) + + def _detect_border_clusters_numpy( has_border: np.ndarray, min_size: int ) -> list[tuple[int, int, int, int]]: @@ -1061,17 +1096,50 @@ def _detect_border_clusters_python( return rects +def _resolve_border_cluster_backend() -> Literal["auto", "python", "numpy"]: + value = os.getenv(_BORDER_CLUSTER_BACKEND_ENV, "").strip().lower() + if value in {"python", "numpy"}: + return "python" if value == "python" else "numpy" + return "auto" + + def detect_border_clusters( has_border: np.ndarray, min_size: int = 4 ) -> list[tuple[int, int, int, int]]: + start = time.monotonic() + backend = _resolve_border_cluster_backend() + if backend == "python": + rects = _detect_border_clusters_python(has_border, min_size) + logger.info( + "detect_border_clusters forced python completed in %.2fs (rects=%s).", + time.monotonic() - start, + len(rects), + ) + return rects try: - return _detect_border_clusters_numpy(has_border, min_size) - except Exception: + rects = _detect_border_clusters_numpy(has_border, min_size) + logger.info( + "detect_border_clusters numpy completed in %.2fs (rects=%s).", + time.monotonic() - start, + len(rects), + ) + return rects + except Exception as exc: warn_once( "scipy-missing", "scipy is not available. Falling back to pure-Python BFS for connected components, which may be significantly slower.", ) - return _detect_border_clusters_python(has_border, min_size) + logger.info( + "detect_border_clusters numpy failed (%r); falling back to python.", + exc, + ) + rects = _detect_border_clusters_python(has_border, min_size) + logger.info( + "detect_border_clusters python completed in %.2fs (rects=%s).", + time.monotonic() - start, + len(rects), + ) + return rects def _get_values_block( @@ -1718,18 +1786,44 @@ def detect_tables_openpyxl( scan_limits: TableScanLimits | None = None, ) -> list[str]: """Detect table-like ranges via openpyxl tables and border clusters.""" + start = time.monotonic() + resolved_limits = _resolve_table_scan_limits(mode, scan_limits) + logger.info( + "detect_tables_openpyxl start (sheet=%s, mode=%s, max_rows=%s, max_cols=%s, empty_row_run=%s, empty_col_run=%s).", + sheet_name, + mode, + resolved_limits.max_rows, + resolved_limits.max_cols, + resolved_limits.empty_row_run, + resolved_limits.empty_col_run, + ) with openpyxl_workbook(xlsx_path, data_only=True, read_only=False) as wb: ws = wb[sheet_name] tables = _extract_openpyxl_table_refs(ws) + border_start = time.monotonic() has_border, top_edge, bottom_edge, left_edge, right_edge, max_row, max_col = ( load_border_maps_xlsx( xlsx_path, sheet_name, - scan_limits=_resolve_table_scan_limits(mode, scan_limits), + scan_limits=resolved_limits, ) ) + logger.info( + "detect_tables_openpyxl border maps completed in %.2fs (sheet=%s, max_row=%s, max_col=%s).", + time.monotonic() - border_start, + sheet_name, + max_row, + max_col, + ) + rects_start = time.monotonic() rects = _detect_border_rectangles(has_border, min_size=4) + logger.info( + "detect_tables_openpyxl border rectangles completed in %.2fs (sheet=%s, rects=%s).", + time.monotonic() - rects_start, + sheet_name, + len(rects), + ) merged_rects = _merge_rectangles(rects) dedup: set[str] = set(tables) @@ -1758,6 +1852,12 @@ def detect_tables_openpyxl( if addr not in dedup: dedup.add(addr) tables.append(addr) + logger.info( + "detect_tables_openpyxl completed in %.2fs (sheet=%s, tables=%s).", + time.monotonic() - start, + sheet_name, + len(tables), + ) return tables diff --git a/src/exstruct/mcp/server.py b/src/exstruct/mcp/server.py index ebccdd9..b55c54d 100644 --- a/src/exstruct/mcp/server.py +++ b/src/exstruct/mcp/server.py @@ -4,6 +4,7 @@ import functools import importlib import logging +import os from pathlib import Path import time from types import ModuleType @@ -68,6 +69,11 @@ def run_server(config: ServerConfig) -> None: Args: config: Server configuration. """ + os.environ.setdefault("EXSTRUCT_BORDER_CLUSTER_BACKEND", "python") + logger.info( + "Border cluster backend set to %s for MCP.", + os.getenv("EXSTRUCT_BORDER_CLUSTER_BACKEND"), + ) _import_mcp() policy = PathPolicy(root=config.root, deny_globs=config.deny_globs) logger.info("MCP root: %s", policy.normalize_root()) From 795a014ebfc082d83c10fd791116ea17e24e3043 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 16:50:13 +0900 Subject: [PATCH 07/18] =?UTF-8?q?MCP=E3=82=B5=E3=83=BC=E3=83=90=E3=83=BC?= =?UTF-8?q?=E3=81=AE=E5=87=BA=E5=8A=9B=E8=A1=9D=E7=AA=81=E3=83=9D=E3=83=AA?= =?UTF-8?q?=E3=82=B7=E3=83=BC=E3=82=92=E5=AE=9F=E8=A3=85=E3=81=97=E3=80=81?= =?UTF-8?q?=E9=96=A2=E9=80=A3=E3=81=99=E3=82=8B=E3=83=84=E3=83=BC=E3=83=AB?= =?UTF-8?q?=E3=81=A8=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88?= =?UTF-8?q?=E3=82=92=E6=9B=B4=E6=96=B0=E3=80=82=E6=96=B0=E3=81=97=E3=81=84?= =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=B1=E3=83=BC=E3=82=B9=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=81=97=E3=81=A6=E5=8B=95=E4=BD=9C=E3=82=92?= =?UTF-8?q?=E7=A2=BA=E8=AA=8D=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 20 +++++ docs/agents/TASKS.md | 10 +-- src/exstruct/core/cells.py | 101 +--------------------- src/exstruct/mcp/extract_runner.py | 68 +++++++++++++-- src/exstruct/mcp/server.py | 36 +++++--- src/exstruct/mcp/tools.py | 15 +++- tests/mcp/test_extract_runner_conflict.py | 46 ++++++++++ tests/mcp/test_path_policy.py | 30 +++++++ tests/mcp/test_tool_models.py | 19 ++++ 9 files changed, 224 insertions(+), 121 deletions(-) create mode 100644 tests/mcp/test_extract_runner_conflict.py create mode 100644 tests/mcp/test_path_policy.py create mode 100644 tests/mcp/test_tool_models.py diff --git a/README.md b/README.md index 6ebff3a..8257847 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,26 @@ exstruct input.xlsx --pdf --image # PDF and PNGs (Excel required) Auto page-break exports are available via API and CLI when Excel/COM is available; the CLI exposes `--auto-page-breaks-dir` only in COM-capable environments. +## MCP Server (stdio) + +Install the MCP extras and run the stdio server: + +```bash +pip install exstruct[mcp] +exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict rename +``` + +Available tools: + +- `exstruct.extract` +- `exstruct.read_json_chunk` +- `exstruct.validate_input` + +Notes: + +- Logs go to stderr (and optionally `--log-file`) to avoid contaminating stdio responses. +- On Windows with Excel, standard/verbose can use COM for richer extraction. On non-Windows, COM is unavailable and extraction uses openpyxl-based fallbacks. + ## Quick Start (Python) ```python diff --git a/docs/agents/TASKS.md b/docs/agents/TASKS.md index 90a1e7e..e8dd78c 100644 --- a/docs/agents/TASKS.md +++ b/docs/agents/TASKS.md @@ -12,13 +12,13 @@ - [x] パス制約: `src/exstruct/mcp/io.py` に allowlist / deny glob を実装 - [x] 抽出実行: `src/exstruct/mcp/extract_runner.py` に内部 API 優先の実行層を実装 - [x] 出力モデル: Pydantic で入出力モデルを定義(mypy strict / Ruff 遵守) -- [ ] ログ: stderr / ファイル出力の設定を追加 -- [ ] ドキュメント: README または docs に起動例(`exstruct-mcp --root ...`)を追記 +- [x] ログ: stderr / ファイル出力の設定を追加 +- [x] ドキュメント: README または docs に起動例(`exstruct-mcp --root ...`)を追記 ## MCPサーバー(実用化) - [x] `exstruct.read_json_chunk` を追加(大容量 JSON 対応) - [x] `exstruct.validate_input` を追加(事前検証) -- [ ] `--on-conflict` の出力衝突ポリシー実装 -- [ ] Windows/非Windows の読み取り差分を明文化 -- [ ] 最低限のテスト追加(パス制約 / 入出力モデル / 例外) +- [x] `--on-conflict` の出力衝突ポリシー実装 +- [x] Windows/非Windows の読み取り差分を明文化 +- [x] 最低限のテスト追加(パス制約 / 入出力モデル / 例外) diff --git a/src/exstruct/core/cells.py b/src/exstruct/core/cells.py index 4738678..33b5794 100644 --- a/src/exstruct/core/cells.py +++ b/src/exstruct/core/cells.py @@ -9,7 +9,6 @@ import os from pathlib import Path import re -import time from typing import Literal import numpy as np @@ -933,20 +932,6 @@ def load_border_maps_xlsx( # noqa: C901 scan_max_row = min(max_row, resolved_limits.max_rows) scan_max_col = min(max_col, resolved_limits.max_cols) - logger.info( - "Openpyxl border scan start (sheet=%s, min_row=%s, min_col=%s, max_row=%s, max_col=%s, scan_max_row=%s, scan_max_col=%s, empty_row_run=%s, empty_col_run=%s).", - sheet_name, - min_row, - min_col, - max_row, - max_col, - scan_max_row, - scan_max_col, - resolved_limits.empty_row_run, - resolved_limits.empty_col_run, - ) - scan_start = time.monotonic() - shape = (scan_max_row + 1, scan_max_col + 1) has_border = np.zeros(shape, dtype=bool) top_edge = np.zeros(shape, dtype=bool) @@ -954,7 +939,6 @@ def load_border_maps_xlsx( # noqa: C901 left_edge = np.zeros(shape, dtype=bool) right_edge = np.zeros(shape, dtype=bool) col_has_border = np.zeros(shape[1], dtype=bool) - border_cells = 0 def edge_has_style(edge: object) -> bool: if edge is None: @@ -982,7 +966,6 @@ def edge_has_style(edge: object) -> bool: row_has_border = True col_has_border[c] = True has_border[r, c] = True - border_cells += 1 if t: top_edge[r, c] = True if btm: @@ -997,11 +980,6 @@ def edge_has_style(edge: object) -> bool: else: consecutive_empty_rows += 1 if consecutive_empty_rows >= resolved_limits.empty_row_run: - logger.info( - "Openpyxl border scan early-exit after %s empty rows (sheet=%s).", - resolved_limits.empty_row_run, - sheet_name, - ) break trailing_empty_cols = 0 @@ -1013,18 +991,8 @@ def edge_has_style(edge: object) -> bool: new_max_col = max(min_col, current_max_col - trailing_empty_cols) if new_max_col < current_max_col: current_max_col = new_max_col - logger.info( - "Openpyxl border scan early-exit after %s empty columns (sheet=%s).", - resolved_limits.empty_col_run, - sheet_name, - ) break - _log_border_scan_complete( - sheet_name=sheet_name, - border_cells=border_cells, - elapsed=time.monotonic() - scan_start, - ) return ( has_border, top_edge, @@ -1036,17 +1004,6 @@ def edge_has_style(edge: object) -> bool: ) -def _log_border_scan_complete( - *, sheet_name: str, border_cells: int, elapsed: float -) -> None: - logger.info( - "Openpyxl border scan completed (sheet=%s, border_cells=%s, elapsed=%.2fs).", - sheet_name, - border_cells, - elapsed, - ) - - def _detect_border_clusters_numpy( has_border: np.ndarray, min_size: int ) -> list[tuple[int, int, int, int]]: @@ -1106,40 +1063,21 @@ def _resolve_border_cluster_backend() -> Literal["auto", "python", "numpy"]: def detect_border_clusters( has_border: np.ndarray, min_size: int = 4 ) -> list[tuple[int, int, int, int]]: - start = time.monotonic() backend = _resolve_border_cluster_backend() if backend == "python": - rects = _detect_border_clusters_python(has_border, min_size) - logger.info( - "detect_border_clusters forced python completed in %.2fs (rects=%s).", - time.monotonic() - start, - len(rects), - ) - return rects + return _detect_border_clusters_python(has_border, min_size) try: - rects = _detect_border_clusters_numpy(has_border, min_size) - logger.info( - "detect_border_clusters numpy completed in %.2fs (rects=%s).", - time.monotonic() - start, - len(rects), - ) - return rects + return _detect_border_clusters_numpy(has_border, min_size) except Exception as exc: warn_once( "scipy-missing", "scipy is not available. Falling back to pure-Python BFS for connected components, which may be significantly slower.", ) - logger.info( + logger.debug( "detect_border_clusters numpy failed (%r); falling back to python.", exc, ) - rects = _detect_border_clusters_python(has_border, min_size) - logger.info( - "detect_border_clusters python completed in %.2fs (rects=%s).", - time.monotonic() - start, - len(rects), - ) - return rects + return _detect_border_clusters_python(has_border, min_size) def _get_values_block( @@ -1786,22 +1724,11 @@ def detect_tables_openpyxl( scan_limits: TableScanLimits | None = None, ) -> list[str]: """Detect table-like ranges via openpyxl tables and border clusters.""" - start = time.monotonic() resolved_limits = _resolve_table_scan_limits(mode, scan_limits) - logger.info( - "detect_tables_openpyxl start (sheet=%s, mode=%s, max_rows=%s, max_cols=%s, empty_row_run=%s, empty_col_run=%s).", - sheet_name, - mode, - resolved_limits.max_rows, - resolved_limits.max_cols, - resolved_limits.empty_row_run, - resolved_limits.empty_col_run, - ) with openpyxl_workbook(xlsx_path, data_only=True, read_only=False) as wb: ws = wb[sheet_name] tables = _extract_openpyxl_table_refs(ws) - border_start = time.monotonic() has_border, top_edge, bottom_edge, left_edge, right_edge, max_row, max_col = ( load_border_maps_xlsx( xlsx_path, @@ -1809,21 +1736,7 @@ def detect_tables_openpyxl( scan_limits=resolved_limits, ) ) - logger.info( - "detect_tables_openpyxl border maps completed in %.2fs (sheet=%s, max_row=%s, max_col=%s).", - time.monotonic() - border_start, - sheet_name, - max_row, - max_col, - ) - rects_start = time.monotonic() rects = _detect_border_rectangles(has_border, min_size=4) - logger.info( - "detect_tables_openpyxl border rectangles completed in %.2fs (sheet=%s, rects=%s).", - time.monotonic() - rects_start, - sheet_name, - len(rects), - ) merged_rects = _merge_rectangles(rects) dedup: set[str] = set(tables) @@ -1852,12 +1765,6 @@ def detect_tables_openpyxl( if addr not in dedup: dedup.add(addr) tables.append(addr) - logger.info( - "detect_tables_openpyxl completed in %.2fs (sheet=%s, tables=%s).", - time.monotonic() - start, - sheet_name, - len(tables), - ) return tables diff --git a/src/exstruct/mcp/extract_runner.py b/src/exstruct/mcp/extract_runner.py index 2ee0b4b..dcf9a44 100644 --- a/src/exstruct/mcp/extract_runner.py +++ b/src/exstruct/mcp/extract_runner.py @@ -2,7 +2,6 @@ import logging from pathlib import Path -import time from typing import Any, Literal from pydantic import BaseModel, Field @@ -13,6 +12,8 @@ logger = logging.getLogger(__name__) +OnConflictPolicy = Literal["overwrite", "skip", "rename"] + class WorkbookMeta(BaseModel): """Lightweight workbook metadata for MCP responses.""" @@ -29,6 +30,7 @@ class ExtractRequest(BaseModel): format: Literal["json", "yaml", "yml", "toon"] = "json" # noqa: A003 out_dir: Path | None = None out_name: str | None = None + on_conflict: OnConflictPolicy = "overwrite" options: dict[str, Any] = Field(default_factory=dict) @@ -65,20 +67,29 @@ def run_extract( out_name=request.out_name, policy=policy, ) + output_path, warning, skipped = _apply_conflict_policy( + output_path, request.on_conflict + ) + warnings: list[str] = [] + if warning: + warnings.append(warning) + if skipped: + return ExtractResult( + out_path=str(output_path), + workbook_meta=None, + warnings=warnings, + engine="internal_api", + ) _ensure_output_dir(output_path) - start = time.monotonic() process_excel( file_path=resolved_input, output_path=output_path, out_fmt=request.format, mode=request.mode, ) - logger.info("process_excel completed in %.2fs", time.monotonic() - start) - - meta_start = time.monotonic() - meta, warnings = _try_read_workbook_meta(resolved_input) - logger.info("workbook meta read completed in %.2fs", time.monotonic() - meta_start) + meta, meta_warnings = _try_read_workbook_meta(resolved_input) + warnings.extend(meta_warnings) return ExtractResult( out_path=str(output_path), workbook_meta=meta, @@ -178,6 +189,49 @@ def _format_suffix(fmt: Literal["json", "yaml", "yml", "toon"]) -> str: return ".yml" if fmt == "yml" else f".{fmt}" +def _apply_conflict_policy( + output_path: Path, on_conflict: OnConflictPolicy +) -> tuple[Path, str | None, bool]: + """Apply output conflict policy to a resolved output path. + + Args: + output_path: Target output file path. + on_conflict: Conflict handling policy. + + Returns: + Tuple of (resolved output path, warning message or None, skipped flag). + """ + if not output_path.exists(): + return output_path, None, False + if on_conflict == "skip": + return ( + output_path, + f"Output exists; skipping write: {output_path.name}", + True, + ) + if on_conflict == "rename": + renamed = _next_available_path(output_path) + return ( + renamed, + f"Output exists; renamed to: {renamed.name}", + False, + ) + return output_path, None, False + + +def _next_available_path(path: Path) -> Path: + """Return the next available path by appending a numeric suffix.""" + if not path.exists(): + return path + stem = path.stem + suffix = path.suffix + for idx in range(1, 10_000): + candidate = path.with_name(f"{stem}_{idx}{suffix}") + if not candidate.exists(): + return candidate + raise RuntimeError(f"Failed to resolve unique path for {path}") + + def _try_read_workbook_meta(path: Path) -> tuple[WorkbookMeta | None, list[str]]: """Try reading lightweight workbook metadata. diff --git a/src/exstruct/mcp/server.py b/src/exstruct/mcp/server.py index b55c54d..18c33f3 100644 --- a/src/exstruct/mcp/server.py +++ b/src/exstruct/mcp/server.py @@ -6,7 +6,6 @@ import logging import os from pathlib import Path -import time from types import ModuleType from typing import TYPE_CHECKING, Any, Literal, cast @@ -15,6 +14,7 @@ from exstruct import ExtractionMode +from .extract_runner import OnConflictPolicy from .io import PathPolicy from .tools import ( ExtractToolInput, @@ -41,6 +41,9 @@ class ServerConfig(BaseModel): deny_globs: list[str] = Field(default_factory=list, description="Denied glob list.") log_level: str = Field(default="INFO", description="Logging level.") log_file: Path | None = Field(default=None, description="Optional log file path.") + on_conflict: OnConflictPolicy = Field( + default="overwrite", description="Output conflict policy." + ) warmup: bool = Field(default=False, description="Warm up heavy imports on start.") @@ -79,7 +82,7 @@ def run_server(config: ServerConfig) -> None: logger.info("MCP root: %s", policy.normalize_root()) if config.warmup: _warmup_exstruct() - app = _create_app(policy) + app = _create_app(policy, on_conflict=config.on_conflict) app.run() @@ -106,6 +109,12 @@ def _parse_args(argv: list[str] | None) -> ServerConfig: help="Logging level (DEBUG, INFO, WARNING, ERROR).", ) parser.add_argument("--log-file", type=Path, help="Optional log file path.") + parser.add_argument( + "--on-conflict", + choices=["overwrite", "skip", "rename"], + default="overwrite", + help="Output conflict policy (overwrite/skip/rename).", + ) parser.add_argument( "--warmup", action="store_true", @@ -117,6 +126,7 @@ def _parse_args(argv: list[str] | None) -> ServerConfig: deny_globs=list(args.deny_glob), log_level=args.log_level, log_file=args.log_file, + on_conflict=args.on_conflict, warmup=bool(args.warmup), ) @@ -159,7 +169,7 @@ def _warmup_exstruct() -> None: logger.info("Warmup completed.") -def _create_app(policy: PathPolicy) -> FastMCP: +def _create_app(policy: PathPolicy, *, on_conflict: OnConflictPolicy) -> FastMCP: """Create the MCP FastMCP application. Args: @@ -171,11 +181,13 @@ def _create_app(policy: PathPolicy) -> FastMCP: from mcp.server.fastmcp import FastMCP app = FastMCP("ExStruct MCP", json_response=True) - _register_tools(app, policy) + _register_tools(app, policy, default_on_conflict=on_conflict) return app -def _register_tools(app: FastMCP, policy: PathPolicy) -> None: +def _register_tools( + app: FastMCP, policy: PathPolicy, *, default_on_conflict: OnConflictPolicy +) -> None: """Register MCP tools for the server. Args: @@ -189,6 +201,7 @@ async def _extract_tool( format: Literal["json", "yaml", "yml", "toon"] = "json", # noqa: A002 out_dir: str | None = None, out_name: str | None = None, + on_conflict: OnConflictPolicy | None = None, options: dict[str, Any] | None = None, ) -> ExtractToolOutput: """Handle the ExStruct extraction tool call. @@ -204,20 +217,23 @@ async def _extract_tool( Returns: Extraction result payload. """ - logger.info("exstruct.extract start: %s", xlsx_path) - start = time.monotonic() payload = ExtractToolInput( xlsx_path=xlsx_path, mode=mode, format=format, out_dir=out_dir, out_name=out_name, + on_conflict=on_conflict, options=options or {}, ) - work = functools.partial(run_extract_tool, payload, policy=policy) + effective_on_conflict = on_conflict or default_on_conflict + work = functools.partial( + run_extract_tool, + payload, + policy=policy, + on_conflict=effective_on_conflict, + ) result = cast(ExtractToolOutput, await anyio.to_thread.run_sync(work)) - elapsed = time.monotonic() - start - logger.info("exstruct.extract done in %.2fs", elapsed) return result tool = app.tool(name="exstruct.extract") diff --git a/src/exstruct/mcp/tools.py b/src/exstruct/mcp/tools.py index d4c692f..b77b31e 100644 --- a/src/exstruct/mcp/tools.py +++ b/src/exstruct/mcp/tools.py @@ -13,7 +13,13 @@ ReadJsonChunkResult, read_json_chunk, ) -from .extract_runner import ExtractRequest, ExtractResult, WorkbookMeta, run_extract +from .extract_runner import ( + ExtractRequest, + ExtractResult, + OnConflictPolicy, + WorkbookMeta, + run_extract, +) from .io import PathPolicy from .validate_input import ( ValidateInputRequest, @@ -30,6 +36,7 @@ class ExtractToolInput(BaseModel): format: Literal["json", "yaml", "yml", "toon"] = "json" # noqa: A003 out_dir: str | None = None out_name: str | None = None + on_conflict: OnConflictPolicy | None = None options: dict[str, Any] = Field(default_factory=dict) @@ -75,7 +82,10 @@ class ValidateInputToolOutput(BaseModel): def run_extract_tool( - payload: ExtractToolInput, *, policy: PathPolicy | None = None + payload: ExtractToolInput, + *, + policy: PathPolicy | None = None, + on_conflict: OnConflictPolicy | None = None, ) -> ExtractToolOutput: """Run the extraction tool handler. @@ -92,6 +102,7 @@ def run_extract_tool( format=payload.format, out_dir=Path(payload.out_dir) if payload.out_dir else None, out_name=payload.out_name, + on_conflict=payload.on_conflict or on_conflict or "overwrite", options=payload.options, ) result = run_extract(request, policy=policy) diff --git a/tests/mcp/test_extract_runner_conflict.py b/tests/mcp/test_extract_runner_conflict.py new file mode 100644 index 0000000..8f283e0 --- /dev/null +++ b/tests/mcp/test_extract_runner_conflict.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from pathlib import Path + +from exstruct.mcp import extract_runner + + +def test_apply_conflict_policy_no_conflict(tmp_path: Path) -> None: + output = tmp_path / "out.json" + resolved, warning, skipped = extract_runner._apply_conflict_policy( + output, "overwrite" + ) + assert resolved == output + assert warning is None + assert skipped is False + + +def test_apply_conflict_policy_skip(tmp_path: Path) -> None: + output = tmp_path / "out.json" + output.write_text("data", encoding="utf-8") + resolved, warning, skipped = extract_runner._apply_conflict_policy(output, "skip") + assert resolved == output + assert skipped is True + assert warning is not None + + +def test_apply_conflict_policy_rename(tmp_path: Path) -> None: + output = tmp_path / "out.json" + output.write_text("data", encoding="utf-8") + resolved, warning, skipped = extract_runner._apply_conflict_policy(output, "rename") + assert resolved != output + assert resolved.name.startswith("out_") + assert resolved.suffix == ".json" + assert skipped is False + assert warning is not None + + +def test_apply_conflict_policy_overwrite(tmp_path: Path) -> None: + output = tmp_path / "out.json" + output.write_text("data", encoding="utf-8") + resolved, warning, skipped = extract_runner._apply_conflict_policy( + output, "overwrite" + ) + assert resolved == output + assert warning is None + assert skipped is False diff --git a/tests/mcp/test_path_policy.py b/tests/mcp/test_path_policy.py new file mode 100644 index 0000000..a1f610a --- /dev/null +++ b/tests/mcp/test_path_policy.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from exstruct.mcp.io import PathPolicy + + +def test_path_policy_allows_within_root(tmp_path: Path) -> None: + policy = PathPolicy(root=tmp_path) + target = tmp_path / "data" / "file.txt" + allowed = policy.ensure_allowed(target) + assert allowed == target.resolve() + + +def test_path_policy_denies_outside_root(tmp_path: Path) -> None: + policy = PathPolicy(root=tmp_path) + outside = tmp_path.parent / "outside.txt" + with pytest.raises(ValueError): + policy.ensure_allowed(outside) + + +def test_path_policy_denies_glob(tmp_path: Path) -> None: + policy = PathPolicy(root=tmp_path, deny_globs=["**/*.secret"]) + denied = tmp_path / "nested" / "token.secret" + denied.parent.mkdir(parents=True, exist_ok=True) + denied.write_text("x", encoding="utf-8") + with pytest.raises(ValueError): + policy.ensure_allowed(denied) diff --git a/tests/mcp/test_tool_models.py b/tests/mcp/test_tool_models.py new file mode 100644 index 0000000..71637cf --- /dev/null +++ b/tests/mcp/test_tool_models.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from pydantic import ValidationError +import pytest + +from exstruct.mcp.tools import ExtractToolInput, ReadJsonChunkToolInput + + +def test_extract_tool_input_defaults() -> None: + payload = ExtractToolInput(xlsx_path="input.xlsx") + assert payload.mode == "standard" + assert payload.format == "json" + assert payload.out_dir is None + assert payload.out_name is None + + +def test_read_json_chunk_rejects_invalid_max_bytes() -> None: + with pytest.raises(ValidationError): + ReadJsonChunkToolInput(out_path="out.json", max_bytes=0) From 4ca86f4f85df6df32ebcc378e0b7a80952839c0d Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 16:59:05 +0900 Subject: [PATCH 08/18] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E9=96=A2?= =?UTF-8?q?=E6=95=B0=E3=81=AE=E5=BC=95=E6=95=B0=E3=81=AB=E5=8F=AF=E5=A4=89?= =?UTF-8?q?=E3=82=AD=E3=83=BC=E3=83=AF=E3=83=BC=E3=83=89=E5=BC=95=E6=95=B0?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81=E6=A4=9C=E5=87=BA?= =?UTF-8?q?=E3=83=86=E3=83=BC=E3=83=96=E3=83=AB=E3=81=AE=E3=83=A2=E3=83=83?= =?UTF-8?q?=E3=82=AF=E3=82=92=E6=9B=B4=E6=96=B0=20.gitignore=E3=81=AB?= =?UTF-8?q?=E3=82=A8=E3=83=A9=E3=83=BC=E3=83=AD=E3=82=B0=E3=83=95=E3=82=A1?= =?UTF-8?q?=E3=82=A4=E3=83=AB=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + tests/core/test_pipeline.py | 4 ++-- tests/core/test_table_detection_branching.py | 2 +- tests/integration/test_integrate_raw_data.py | 6 ++++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 5ebf3da..d2c544b 100644 --- a/.gitignore +++ b/.gitignore @@ -24,5 +24,6 @@ ruff_report.txt ruff-error.txt mypy_report.txt coverage.xml +errors.txt htmlcov/ .tmp_mcp_test/ \ No newline at end of file diff --git a/tests/core/test_pipeline.py b/tests/core/test_pipeline.py index 5fe17df..a5ef71f 100644 --- a/tests/core/test_pipeline.py +++ b/tests/core/test_pipeline.py @@ -272,7 +272,7 @@ def test_resolve_extraction_inputs_sets_ignore_colors(tmp_path: Path) -> None: def test_build_cells_tables_workbook_uses_print_areas( monkeypatch: MonkeyPatch, tmp_path: Path ) -> None: - def fake_detect_tables(_: Path, __: str) -> list[str]: + def fake_detect_tables(_: Path, __: str, **_kwargs: object) -> list[str]: return ["A1:B2"] monkeypatch.setattr( @@ -1012,7 +1012,7 @@ def _fake_plan(_: ExtractionInputs) -> PipelinePlan: """ return PipelinePlan(pre_com_steps=[_pre_step], com_steps=[], use_com=True) - def _fake_detect_tables(_: object) -> list[str]: + def _fake_detect_tables(_: object, **_kwargs: object) -> list[str]: """ Provide a detector that always reports no table ranges. diff --git a/tests/core/test_table_detection_branching.py b/tests/core/test_table_detection_branching.py index e82d37b..01ce133 100644 --- a/tests/core/test_table_detection_branching.py +++ b/tests/core/test_table_detection_branching.py @@ -23,7 +23,7 @@ def test_detect_tables_xlsx_uses_openpyxl(monkeypatch: MonkeyPatch) -> None: """xlsx は openpyxl 経由で検出されることを確認する。""" sheet = _DummySheet(book=_DummyBook("C:/tmp/book.xlsx"), name="Sheet1") - def _openpyxl_tables(_path: object, _name: str) -> list[str]: + def _openpyxl_tables(_path: object, _name: str, **_kwargs: object) -> list[str]: return ["A1:B2"] def _com_tables(_sheet: object) -> list[str]: diff --git a/tests/integration/test_integrate_raw_data.py b/tests/integration/test_integrate_raw_data.py index fcd020d..87ba066 100644 --- a/tests/integration/test_integrate_raw_data.py +++ b/tests/integration/test_integrate_raw_data.py @@ -47,7 +47,7 @@ def test_collect_sheet_raw_data_includes_extracted_fields( monkeypatch.setattr( "exstruct.core.pipeline.detect_tables", - lambda _sheet: ["A1:B2"], + lambda _sheet, **_kwargs: ["A1:B2"], ) colors_map = WorkbookColorsMap( @@ -100,7 +100,9 @@ def test_collect_sheet_raw_data_skips_charts_in_light_mode( sheet = SimpleNamespace(name="Sheet1") workbook = SimpleNamespace(sheets={"Sheet1": sheet}) - monkeypatch.setattr("exstruct.core.pipeline.detect_tables", lambda _sheet: []) + monkeypatch.setattr( + "exstruct.core.pipeline.detect_tables", lambda _sheet, **_kwargs: [] + ) result = collect_sheet_raw_data( cell_data={"Sheet1": []}, From 532c4f2de603179eeb82d9b64bc6de78830c44b7 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 17:28:43 +0900 Subject: [PATCH 09/18] =?UTF-8?q?MCP=E3=82=B5=E3=83=BC=E3=83=90=E3=83=BC?= =?UTF-8?q?=E3=81=AB=E9=96=A2=E3=81=99=E3=82=8B=E3=83=89=E3=82=AD=E3=83=A5?= =?UTF-8?q?=E3=83=A1=E3=83=B3=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97?= =?UTF-8?q?=E3=80=81README=E3=82=92=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.ja.md | 20 +++++++++++++ docs/README.en.md | 21 ++++++++++++- docs/README.ja.md | 20 +++++++++++++ docs/mcp.md | 76 +++++++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 5 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 docs/mcp.md diff --git a/README.ja.md b/README.ja.md index 23fdeb6..102425b 100644 --- a/README.ja.md +++ b/README.ja.md @@ -49,6 +49,26 @@ exstruct input.xlsx --pdf --image # PDF と PNG(Excel 必須) 自動改ページ範囲の書き出しは API/CLI 両方に対応(Excel/COM が必要)し、CLI は利用可能な環境でのみ `--auto-page-breaks-dir` を表示します。 +## MCPサーバー (標準入出力) + +MCPの拡張機能をインストールし、標準入出力サーバーを実行します。 + +```bash +pip install exstruct[mcp] +exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict rename +``` + +利用可能なツール: + +- `exstruct.extract` +- `exstruct.read_json_chunk` +- `exstruct.validate_input` + +注意点: + +- 標準入出力の応答を汚染しないよう、ログは標準エラー出力(およびオプションで`--log-file`で指定したファイル)に出力されます。 +- WindowsのExcel環境では、標準/詳細モードでCOMを利用して、よりリッチな抽出が可能です。Windows以外ではCOMは利用できず、抽出はopenpyxlベースのフォールバック機能を使用します。 + ## クイックスタート Python ```python diff --git a/docs/README.en.md b/docs/README.en.md index 39df415..a637a24 100644 --- a/docs/README.en.md +++ b/docs/README.en.md @@ -52,6 +52,26 @@ exstruct input.xlsx --pdf --image # PDF and PNGs (Excel required) Auto page-break exports are available via API and CLI when Excel/COM is available; the CLI exposes `--auto-page-breaks-dir` only in COM-capable environments. +## MCP Server (stdio) + +Install the MCP extras and run the stdio server: + +```bash +pip install exstruct[mcp] +exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict rename +``` + +Available tools: + +- `exstruct.extract` +- `exstruct.read_json_chunk` +- `exstruct.validate_input` + +Notes: + +- Logs go to stderr (and optionally `--log-file`) to avoid contaminating stdio responses. +- On Windows with Excel, standard/verbose can use COM for richer extraction. On non-Windows, COM is unavailable and extraction uses openpyxl-based fallbacks. + ## Quick Start (Python) ```python @@ -337,7 +357,6 @@ flowchart TD ``` ```` - ## Example 2: General Application Form ### Excel Sheet diff --git a/docs/README.ja.md b/docs/README.ja.md index a2732af..c6d8b14 100644 --- a/docs/README.ja.md +++ b/docs/README.ja.md @@ -49,6 +49,26 @@ exstruct input.xlsx --pdf --image # PDF と PNG(Excel 必須) 自動改ページ範囲の書き出しは API/CLI 両方に対応(Excel/COM が必要)し、CLI は利用可能な環境でのみ `--auto-page-breaks-dir` を表示します。 +## MCPサーバー (標準入出力) + +MCPの拡張機能をインストールし、標準入出力サーバーを実行します。 + +```bash +pip install exstruct[mcp] +exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict rename +``` + +利用可能なツール: + +- `exstruct.extract` +- `exstruct.read_json_chunk` +- `exstruct.validate_input` + +注意点: + +- 標準入出力の応答を汚染しないよう、ログは標準エラー出力(およびオプションで`--log-file`で指定したファイル)に出力されます。 +- WindowsのExcel環境では、標準/詳細モードでCOMを利用して、よりリッチな抽出が可能です。Windows以外ではCOMは利用できず、抽出はopenpyxlベースのフォールバック機能を使用します。 + ## クイックスタート Python ```python diff --git a/docs/mcp.md b/docs/mcp.md new file mode 100644 index 0000000..cdd14d2 --- /dev/null +++ b/docs/mcp.md @@ -0,0 +1,76 @@ +# MCP Server + +This guide explains how to run ExStruct as an MCP (Model Context Protocol) server +so AI agents can call it safely as a tool. + +## What it provides + +- Convert Excel into structured JSON (file output) +- Read large JSON outputs in chunks +- Pre-validate input files + +## Installation + +```bash +pip install exstruct[mcp] +``` + +## Start (stdio) + +```bash +exstruct-mcp --root C:\\data --log-file C:\\logs\\exstruct-mcp.log --on-conflict rename +``` + +### Key options + +- `--root`: Allowed root directory (required) +- `--deny-glob`: Deny glob patterns (repeatable) +- `--log-level`: `DEBUG` / `INFO` / `WARNING` / `ERROR` +- `--log-file`: Log file path (stderr is still used by default) +- `--on-conflict`: Output conflict policy (`overwrite` / `skip` / `rename`) +- `--warmup`: Preload heavy imports to reduce first-call latency + +## Tools + +- `exstruct.extract` +- `exstruct.read_json_chunk` +- `exstruct.validate_input` + +## Basic flow + +1. Call `exstruct.extract` to generate the output JSON file +2. Use `exstruct.read_json_chunk` to read only the parts you need + +## AI agent configuration examples + +### Codex + +`~/.codex/config.toml` + +```toml +[mcp_servers.exstruct] +command = "exstruct-mcp" +args = ["--root", "C:\\data", "--log-file", "C:\\logs\\exstruct-mcp.log", "--on-conflict", "rename"] +``` + +### GitHub Copilot / Claude Desktop / Gemini CLI + +Register an MCP server with a command + args in your MCP settings: + +```json +{ + "mcpServers": { + "exstruct": { + "command": "exstruct-mcp", + "args": ["--root", "C:\\data"] + } + } +} +``` + +## Operational notes + +- Logs go to stderr (and optionally `--log-file`) to avoid contaminating stdio responses. +- On Windows with Excel, standard/verbose can use COM for richer extraction. + On non-Windows, COM is unavailable and openpyxl-based fallbacks are used. +- For large outputs, use `read_json_chunk` to avoid hitting client limits. diff --git a/mkdocs.yml b/mkdocs.yml index 9eb5bd8..6ecf76b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,6 +25,7 @@ nav: - Data Models (generated): generated/models.md - JSON Schemas: schemas.md - CLI Guide: cli.md + - MCP Server: mcp.md - Concept / Why ExStruct?: concept.md - Release Notes: - v0.3.7: release-notes/v0.3.7.md From 7402f0660c0c281601df90619131a37a24353bd0 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 17:44:19 +0900 Subject: [PATCH 10/18] =?UTF-8?q?MCP=E3=82=B5=E3=83=BC=E3=83=90=E3=83=BC?= =?UTF-8?q?=E3=81=AE=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88?= =?UTF-8?q?=E3=82=92=E6=9B=B4=E6=96=B0=E3=81=97=E3=80=81README=E3=81=ABMCP?= =?UTF-8?q?=E8=A8=AD=E5=AE=9A=E3=82=AC=E3=82=A4=E3=83=89=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0=E3=80=82=E6=96=B0=E3=81=97=E3=81=84=E3=83=86=E3=82=B9?= =?UTF-8?q?=E3=83=88=E3=82=B1=E3=83=BC=E3=82=B9=E3=82=92=E8=BF=BD=E5=8A=A0?= =?UTF-8?q?=E3=81=97=E3=81=A6=E6=A9=9F=E8=83=BD=E3=82=92=E7=A2=BA=E8=AA=8D?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.ja.md | 4 + README.md | 4 + docs/README.en.md | 4 + docs/README.ja.md | 4 + pyproject.toml | 7 + tests/mcp/test_chunk_reader.py | 108 +++++++++++++++ tests/mcp/test_extract_runner_utils.py | 61 +++++++++ tests/mcp/test_server.py | 174 +++++++++++++++++++++++++ tests/mcp/test_validate_input.py | 52 ++++++++ 9 files changed, 418 insertions(+) create mode 100644 tests/mcp/test_chunk_reader.py create mode 100644 tests/mcp/test_extract_runner_utils.py create mode 100644 tests/mcp/test_server.py create mode 100644 tests/mcp/test_validate_input.py diff --git a/README.ja.md b/README.ja.md index 102425b..c9a1376 100644 --- a/README.ja.md +++ b/README.ja.md @@ -69,6 +69,10 @@ exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict re - 標準入出力の応答を汚染しないよう、ログは標準エラー出力(およびオプションで`--log-file`で指定したファイル)に出力されます。 - WindowsのExcel環境では、標準/詳細モードでCOMを利用して、よりリッチな抽出が可能です。Windows以外ではCOMは利用できず、抽出はopenpyxlベースのフォールバック機能を使用します。 +各AIエージェントでのMCP設定ガイド: + +[MCPサーバー](https://harumiweb.github.io/exstruct/cli/) + ## クイックスタート Python ```python diff --git a/README.md b/README.md index 8257847..2f6e7d4 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,10 @@ Notes: - Logs go to stderr (and optionally `--log-file`) to avoid contaminating stdio responses. - On Windows with Excel, standard/verbose can use COM for richer extraction. On non-Windows, COM is unavailable and extraction uses openpyxl-based fallbacks. +MCP Setup Guide for Each AI Agent: + +[MCP Server](https://harumiweb.github.io/exstruct/cli/) + ## Quick Start (Python) ```python diff --git a/docs/README.en.md b/docs/README.en.md index a637a24..c9feaf5 100644 --- a/docs/README.en.md +++ b/docs/README.en.md @@ -72,6 +72,10 @@ Notes: - Logs go to stderr (and optionally `--log-file`) to avoid contaminating stdio responses. - On Windows with Excel, standard/verbose can use COM for richer extraction. On non-Windows, COM is unavailable and extraction uses openpyxl-based fallbacks. +MCP Setup Guide for Each AI Agent: + +[MCP Server](https://harumiweb.github.io/exstruct/cli/) + ## Quick Start (Python) ```python diff --git a/docs/README.ja.md b/docs/README.ja.md index c6d8b14..486ae92 100644 --- a/docs/README.ja.md +++ b/docs/README.ja.md @@ -69,6 +69,10 @@ exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict re - 標準入出力の応答を汚染しないよう、ログは標準エラー出力(およびオプションで`--log-file`で指定したファイル)に出力されます。 - WindowsのExcel環境では、標準/詳細モードでCOMを利用して、よりリッチな抽出が可能です。Windows以外ではCOMは利用できず、抽出はopenpyxlベースのフォールバック機能を使用します。 +各AIエージェントでのMCP設定ガイド: + +[MCPサーバー](https://harumiweb.github.io/exstruct/cli/) + ## クイックスタート Python ```python diff --git a/pyproject.toml b/pyproject.toml index aa9d1f4..cfe8053 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,13 @@ dev = [ ] [project.optional-dependencies] +all = [ + "pyyaml>=6.0.3", + "python-toon>=0.1.3", + "pypdfium2>=5.1.0", + "Pillow>=12.0.0", + "mcp>=1.6.0,<2.0.0", +] yaml = ["pyyaml>=6.0.3"] toon = ["python-toon>=0.1.3"] render = ["pypdfium2>=5.1.0", "Pillow>=12.0.0"] diff --git a/tests/mcp/test_chunk_reader.py b/tests/mcp/test_chunk_reader.py new file mode 100644 index 0000000..3e23c31 --- /dev/null +++ b/tests/mcp/test_chunk_reader.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +from collections.abc import Mapping +import json +from pathlib import Path + +import pytest + +from exstruct.mcp.chunk_reader import ( + ReadJsonChunkFilter, + ReadJsonChunkRequest, + read_json_chunk, +) + + +def _write_json(path: Path, data: Mapping[str, object]) -> None: + path.write_text(json.dumps(data, ensure_ascii=False), encoding="utf-8") + + +def test_read_json_chunk_raw(tmp_path: Path) -> None: + data = {"book_name": "book", "sheets": {"Sheet1": {"rows": []}}} + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest(out_path=out, max_bytes=10_000) + result = read_json_chunk(request) + assert json.loads(result.chunk) == data + assert result.next_cursor is None + + +def test_read_json_chunk_raw_too_large(tmp_path: Path) -> None: + data = {"book_name": "book", "sheets": {"Sheet1": {"rows": []}}} + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest(out_path=out, max_bytes=10) + with pytest.raises(ValueError): + read_json_chunk(request) + + +def test_read_json_chunk_with_filters(tmp_path: Path) -> None: + data = { + "book_name": "book", + "sheets": { + "Sheet1": { + "rows": [ + {"r": 1, "c": {"0": "A", "1": "B"}}, + {"r": 2, "c": {"0": "C", "1": "D"}}, + ] + } + }, + } + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest( + out_path=out, + sheet="Sheet1", + max_bytes=10_000, + filter=ReadJsonChunkFilter(rows=(1, 1), cols=(1, 1)), + ) + result = read_json_chunk(request) + payload = json.loads(result.chunk) + rows = payload["sheet"]["rows"] + assert len(rows) == 1 + assert rows[0]["r"] == 1 + assert rows[0]["c"] == {"0": "A"} + + +def test_read_json_chunk_requires_sheet(tmp_path: Path) -> None: + data = {"book_name": "book", "sheets": {"A": {"rows": []}, "B": {"rows": []}}} + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest( + out_path=out, + max_bytes=10_000, + filter=ReadJsonChunkFilter(rows=(1, 1)), + ) + with pytest.raises(ValueError): + read_json_chunk(request) + + +def test_read_json_chunk_invalid_cursor(tmp_path: Path) -> None: + data = {"book_name": "book", "sheets": {"Sheet1": {"rows": []}}} + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest( + out_path=out, + sheet="Sheet1", + cursor="bad", + max_bytes=10_000, + ) + with pytest.raises(ValueError): + read_json_chunk(request) + + +def test_read_json_chunk_cursor_beyond_rows(tmp_path: Path) -> None: + data = { + "book_name": "book", + "sheets": {"Sheet1": {"rows": [{"r": 1, "c": {"0": "A"}}]}}, + } + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest( + out_path=out, + sheet="Sheet1", + cursor="2", + max_bytes=10_000, + ) + with pytest.raises(ValueError): + read_json_chunk(request) diff --git a/tests/mcp/test_extract_runner_utils.py b/tests/mcp/test_extract_runner_utils.py new file mode 100644 index 0000000..d242cf1 --- /dev/null +++ b/tests/mcp/test_extract_runner_utils.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from exstruct.mcp import extract_runner +from exstruct.mcp.io import PathPolicy + + +def test_resolve_input_path_missing(tmp_path: Path) -> None: + with pytest.raises(FileNotFoundError): + extract_runner._resolve_input_path(tmp_path / "missing.xlsx", policy=None) + + +def test_format_suffix() -> None: + assert extract_runner._format_suffix("yml") == ".yml" + assert extract_runner._format_suffix("json") == ".json" + + +def test_normalize_output_name(tmp_path: Path) -> None: + input_path = tmp_path / "book.xlsx" + assert ( + extract_runner._normalize_output_name(input_path, None, ".json") == "book.json" + ) + assert ( + extract_runner._normalize_output_name(input_path, "out", ".json") == "out.json" + ) + assert ( + extract_runner._normalize_output_name(input_path, "out.yaml", ".json") + == "out.yaml" + ) + + +def test_resolve_output_path_denies_outside_root(tmp_path: Path) -> None: + policy = PathPolicy(root=tmp_path) + input_path = tmp_path / "book.xlsx" + input_path.write_text("x", encoding="utf-8") + outside = tmp_path.parent + with pytest.raises(ValueError): + extract_runner._resolve_output_path( + input_path, + "json", + out_dir=outside, + out_name=None, + policy=policy, + ) + + +def test_try_read_workbook_meta(tmp_path: Path) -> None: + from openpyxl import Workbook + + path = tmp_path / "book.xlsx" + wb = Workbook() + wb.active.title = "Sheet1" + wb.save(path) + meta, warnings = extract_runner._try_read_workbook_meta(path) + assert meta is not None + assert meta.sheet_count == 1 + assert meta.sheet_names == ["Sheet1"] + assert warnings == [] diff --git a/tests/mcp/test_server.py b/tests/mcp/test_server.py new file mode 100644 index 0000000..8fa3fd1 --- /dev/null +++ b/tests/mcp/test_server.py @@ -0,0 +1,174 @@ +from __future__ import annotations + +from collections.abc import Awaitable, Callable +import importlib +from pathlib import Path +from typing import cast + +import anyio +import pytest + +from exstruct.mcp import server +from exstruct.mcp.extract_runner import OnConflictPolicy +from exstruct.mcp.io import PathPolicy +from exstruct.mcp.tools import ( + ExtractToolInput, + ExtractToolOutput, + ReadJsonChunkToolInput, + ReadJsonChunkToolOutput, + ValidateInputToolInput, + ValidateInputToolOutput, +) + +ToolFunc = Callable[..., object] | Callable[..., Awaitable[object]] + + +class DummyApp: + def __init__(self) -> None: + self.tools: dict[str, ToolFunc] = {} + + def tool(self, *, name: str) -> Callable[[ToolFunc], ToolFunc]: + def decorator(func: ToolFunc) -> ToolFunc: + self.tools[name] = func + return func + + return decorator + + +async def _call_async( + func: Callable[..., Awaitable[object]], + kwargs: dict[str, object], +) -> object: + return await func(**kwargs) + + +def test_parse_args_defaults(tmp_path: Path) -> None: + config = server._parse_args(["--root", str(tmp_path)]) + assert config.root == tmp_path + assert config.deny_globs == [] + assert config.log_level == "INFO" + assert config.log_file is None + assert config.on_conflict == "overwrite" + assert config.warmup is False + + +def test_parse_args_with_options(tmp_path: Path) -> None: + log_file = tmp_path / "log.txt" + config = server._parse_args( + [ + "--root", + str(tmp_path), + "--deny-glob", + "**/*.tmp", + "--deny-glob", + "**/*.secret", + "--log-level", + "DEBUG", + "--log-file", + str(log_file), + "--on-conflict", + "rename", + "--warmup", + ] + ) + assert config.deny_globs == ["**/*.tmp", "**/*.secret"] + assert config.log_level == "DEBUG" + assert config.log_file == log_file + assert config.on_conflict == "rename" + assert config.warmup is True + + +def test_import_mcp_missing(monkeypatch: pytest.MonkeyPatch) -> None: + def _raise(_: str) -> None: + raise ModuleNotFoundError("mcp") + + monkeypatch.setattr(importlib, "import_module", _raise) + with pytest.raises(RuntimeError): + server._import_mcp() + + +def test_coerce_filter() -> None: + assert server._coerce_filter(None) is None + assert server._coerce_filter({"a": 1}) == {"a": 1} + + +def test_register_tools_uses_default_on_conflict( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + app = DummyApp() + policy = PathPolicy(root=tmp_path) + calls: dict[str, tuple[object, ...]] = {} + + def fake_run_extract_tool( + payload: ExtractToolInput, + *, + policy: PathPolicy, + on_conflict: OnConflictPolicy, + ) -> ExtractToolOutput: + calls["extract"] = (payload, policy, on_conflict) + return ExtractToolOutput(out_path="out.json") + + def fake_run_read_json_chunk_tool( + payload: ReadJsonChunkToolInput, + *, + policy: PathPolicy, + ) -> ReadJsonChunkToolOutput: + calls["chunk"] = (payload, policy) + return ReadJsonChunkToolOutput(chunk="{}") + + def fake_run_validate_input_tool( + payload: ValidateInputToolInput, + *, + policy: PathPolicy, + ) -> ValidateInputToolOutput: + calls["validate"] = (payload, policy) + return ValidateInputToolOutput(is_readable=True) + + async def fake_run_sync(func: Callable[[], object]) -> object: + return func() + + monkeypatch.setattr(server, "run_extract_tool", fake_run_extract_tool) + monkeypatch.setattr( + server, "run_read_json_chunk_tool", fake_run_read_json_chunk_tool + ) + monkeypatch.setattr(server, "run_validate_input_tool", fake_run_validate_input_tool) + monkeypatch.setattr(anyio.to_thread, "run_sync", fake_run_sync) + + server._register_tools(app, policy, default_on_conflict="rename") + + extract_tool = cast(Callable[..., Awaitable[object]], app.tools["exstruct.extract"]) + anyio.run(_call_async, extract_tool, {"xlsx_path": "in.xlsx"}) + cast(Callable[..., object], app.tools["exstruct.read_json_chunk"])( + out_path="out.json", filter={"rows": [1, 2]} + ) + cast(Callable[..., object], app.tools["exstruct.validate_input"])( + xlsx_path="in.xlsx" + ) + + assert calls["extract"][2] == "rename" + chunk_call = cast(tuple[ReadJsonChunkToolInput, PathPolicy], calls["chunk"]) + assert chunk_call[0].filter is not None + + +def test_run_server_sets_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + created: dict[str, object] = {} + + def fake_import() -> None: + created["imported"] = True + + class _App: + def run(self) -> None: + created["ran"] = True + + def fake_create_app(policy: PathPolicy, *, on_conflict: OnConflictPolicy) -> _App: + created["policy"] = policy + created["on_conflict"] = on_conflict + return _App() + + monkeypatch.setattr(server, "_import_mcp", fake_import) + monkeypatch.setattr(server, "_create_app", fake_create_app) + config = server.ServerConfig(root=tmp_path) + server.run_server(config) + assert created["imported"] is True + assert created["ran"] is True + assert created["on_conflict"] == "overwrite" diff --git a/tests/mcp/test_validate_input.py b/tests/mcp/test_validate_input.py new file mode 100644 index 0000000..f0584f8 --- /dev/null +++ b/tests/mcp/test_validate_input.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import importlib +from pathlib import Path + +import pytest + +from exstruct.cli.availability import ComAvailability +from exstruct.mcp.io import PathPolicy +from exstruct.mcp.validate_input import ValidateInputRequest, validate_input + + +def test_validate_input_missing_file(tmp_path: Path) -> None: + request = ValidateInputRequest(xlsx_path=tmp_path / "missing.xlsx") + result = validate_input(request) + assert result.is_readable is False + assert result.errors + + +def test_validate_input_invalid_extension(tmp_path: Path) -> None: + path = tmp_path / "input.txt" + path.write_text("x", encoding="utf-8") + request = ValidateInputRequest(xlsx_path=path) + result = validate_input(request) + assert result.is_readable is False + assert "Unsupported file extension" in result.errors[0] + + +def test_validate_input_policy_denied(tmp_path: Path) -> None: + policy = PathPolicy(root=tmp_path) + outside = tmp_path.parent / "outside.xlsx" + request = ValidateInputRequest(xlsx_path=outside) + result = validate_input(request, policy=policy) + assert result.is_readable is False + assert result.errors + + +def test_validate_input_warns_on_com( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + path = tmp_path / "input.xlsx" + path.write_bytes(b"test") + validate_input_module = importlib.import_module("exstruct.mcp.validate_input") + monkeypatch.setattr( + validate_input_module, + "get_com_availability", + lambda: ComAvailability(available=False, reason="No COM"), + ) + request = ValidateInputRequest(xlsx_path=path) + result = validate_input(request) + assert result.is_readable is True + assert any("COM unavailable" in warning for warning in result.warnings) From d088525f0233cb839c8bc0e8b3cf2b7def1e842a Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 17:54:32 +0900 Subject: [PATCH 11/18] =?UTF-8?q?pytest.yml=E3=81=A8uv.lock=E3=81=ABanyio?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81uv.lock=E3=81=AB?= =?UTF-8?q?=E3=82=AA=E3=83=97=E3=82=B7=E3=83=A7=E3=83=B3=E4=BE=9D=E5=AD=98?= =?UTF-8?q?=E9=96=A2=E4=BF=82=E3=82=92=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/pytest.yml | 2 +- tests/mcp/test_server.py | 5 +++-- uv.lock | 14 +++++++++++++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 605d9b0..076c3ad 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -27,7 +27,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -e .[yaml] - pip install pytest pytest-cov pytest-mock + pip install pytest pytest-cov pytest-mock anyio - name: Run tests (non-COM suite) if: runner.os != 'Windows' run: | diff --git a/tests/mcp/test_server.py b/tests/mcp/test_server.py index 8fa3fd1..d9986a6 100644 --- a/tests/mcp/test_server.py +++ b/tests/mcp/test_server.py @@ -3,9 +3,8 @@ from collections.abc import Awaitable, Callable import importlib from pathlib import Path -from typing import cast +from typing import Any, cast -import anyio import pytest from exstruct.mcp import server @@ -20,6 +19,8 @@ ValidateInputToolOutput, ) +anyio: Any = pytest.importorskip("anyio") + ToolFunc = Callable[..., object] | Callable[..., Awaitable[object]] diff --git a/uv.lock b/uv.lock index db61bbf..f876436 100644 --- a/uv.lock +++ b/uv.lock @@ -464,6 +464,13 @@ dependencies = [ ] [package.optional-dependencies] +all = [ + { name = "mcp" }, + { name = "pillow" }, + { name = "pypdfium2" }, + { name = "python-toon" }, + { name = "pyyaml" }, +] mcp = [ { name = "mcp" }, ] @@ -494,19 +501,24 @@ dev = [ [package.metadata] requires-dist = [ + { name = "mcp", marker = "extra == 'all'", specifier = ">=1.6.0,<2.0.0" }, { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.6.0,<2.0.0" }, { name = "numpy", specifier = ">=2.3.5" }, { name = "openpyxl", specifier = ">=3.1.5" }, { name = "pandas", specifier = ">=2.3.3" }, + { name = "pillow", marker = "extra == 'all'", specifier = ">=12.0.0" }, { name = "pillow", marker = "extra == 'render'", specifier = ">=12.0.0" }, { name = "pydantic", specifier = ">=2.12.5" }, + { name = "pypdfium2", marker = "extra == 'all'", specifier = ">=5.1.0" }, { name = "pypdfium2", marker = "extra == 'render'", specifier = ">=5.1.0" }, + { name = "python-toon", marker = "extra == 'all'", specifier = ">=0.1.3" }, { name = "python-toon", marker = "extra == 'toon'", specifier = ">=0.1.3" }, + { name = "pyyaml", marker = "extra == 'all'", specifier = ">=6.0.3" }, { name = "pyyaml", marker = "extra == 'yaml'", specifier = ">=6.0.3" }, { name = "scipy", specifier = ">=1.16.3" }, { name = "xlwings", specifier = ">=0.33.16" }, ] -provides-extras = ["yaml", "toon", "render", "mcp"] +provides-extras = ["all", "yaml", "toon", "render", "mcp"] [package.metadata.requires-dev] dev = [ From 50092a85c88ff4549b928587bccaf4f7d68f660f Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 19:58:36 +0900 Subject: [PATCH 12/18] =?UTF-8?q?TASKS.md=E3=81=ABPR=20#47=E3=81=AE?= =?UTF-8?q?=E3=83=AC=E3=83=93=E3=83=A5=E3=83=BC=E5=AF=BE=E5=BF=9C=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81cells.py=E3=81=AE=E5=88=97?= =?UTF-8?q?=E5=B9=85=E7=B8=AE=E5=B0=8F=E3=83=92=E3=83=A5=E3=83=BC=E3=83=AA?= =?UTF-8?q?=E3=82=B9=E3=83=86=E3=82=A3=E3=83=83=E3=82=AF=E3=82=92=E5=86=8D?= =?UTF-8?q?=E6=A4=9C=E8=A8=8E=E3=80=82=E9=96=A2=E9=80=A3=E3=81=99=E3=82=8B?= =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82?= =?UTF-8?q?=20cells.py=E3=81=AB=E6=9C=80=E5=B0=8F=E8=A1=8C=E6=95=B0?= =?UTF-8?q?=E3=81=AE=E5=88=97=E5=B9=85=E7=B8=AE=E5=B0=8F=E5=88=B6=E9=99=90?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81load=5Fborder=5Fma?= =?UTF-8?q?ps=5Fxlsx=E9=96=A2=E6=95=B0=E3=82=92=E4=BF=AE=E6=AD=A3=E3=80=82?= =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=B1=E3=83=BC=E3=82=B9=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=81=97=E3=81=A6=E5=8B=95=E4=BD=9C=E3=82=92?= =?UTF-8?q?=E7=A2=BA=E8=AA=8D=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/agents/TASKS.md | 6 +++++ src/exstruct/core/cells.py | 10 +++++++++ tests/core/test_cells_and_tables.py | 35 ++++++++++++++++++++++++++++- 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/docs/agents/TASKS.md b/docs/agents/TASKS.md index e8dd78c..999c348 100644 --- a/docs/agents/TASKS.md +++ b/docs/agents/TASKS.md @@ -22,3 +22,9 @@ - [x] `--on-conflict` の出力衝突ポリシー実装 - [x] Windows/非Windows の読み取り差分を明文化 - [x] 最低限のテスト追加(パス制約 / 入出力モデル / 例外) + +## PR #47 レビュー対応 + +- [x] cells.py の列幅縮小ヒューリスティックを再検討(遅い行に境界があるケースで早期縮小しない方針に修正) +- [x] 上記修正に対応するテストを追加(遅い行・右端に表があるケースを openpyxl で検証) +- [ ] Codecov 指摘の不足分を埋めるテスト追加(mcp: chunk_reader/extract_runner/server/tools/validate_input/io、core/cells) diff --git a/src/exstruct/core/cells.py b/src/exstruct/core/cells.py index 33b5794..393462d 100644 --- a/src/exstruct/core/cells.py +++ b/src/exstruct/core/cells.py @@ -120,6 +120,7 @@ class TableScanLimits: max_cols: int empty_row_run: int empty_col_run: int + min_rows_before_col_shrink: int def scaled(self, factor: float) -> TableScanLimits: """Return a scaled copy of the limits.""" @@ -128,6 +129,9 @@ def scaled(self, factor: float) -> TableScanLimits: max_cols=int(math.ceil(self.max_cols * factor)), empty_row_run=int(math.ceil(self.empty_row_run * factor)), empty_col_run=int(math.ceil(self.empty_col_run * factor)), + min_rows_before_col_shrink=int( + math.ceil(self.min_rows_before_col_shrink * factor) + ), ) @@ -136,6 +140,7 @@ def scaled(self, factor: float) -> TableScanLimits: max_cols=200, empty_row_run=200, empty_col_run=80, + min_rows_before_col_shrink=200, ) @@ -948,6 +953,7 @@ def edge_has_style(edge: object) -> bool: consecutive_empty_rows = 0 current_max_col = scan_max_col + rows_scanned = 0 for r in range(min_row, scan_max_row + 1): row_has_border = False @@ -979,9 +985,13 @@ def edge_has_style(edge: object) -> bool: consecutive_empty_rows = 0 else: consecutive_empty_rows += 1 + rows_scanned += 1 if consecutive_empty_rows >= resolved_limits.empty_row_run: break + if rows_scanned < resolved_limits.min_rows_before_col_shrink: + continue + trailing_empty_cols = 0 for c in range(current_max_col, min_col - 1, -1): if col_has_border[c]: diff --git a/tests/core/test_cells_and_tables.py b/tests/core/test_cells_and_tables.py index e249a7e..70a7196 100644 --- a/tests/core/test_cells_and_tables.py +++ b/tests/core/test_cells_and_tables.py @@ -3,10 +3,16 @@ from _pytest.monkeypatch import MonkeyPatch from openpyxl import Workbook +from openpyxl.styles import Border, Side from openpyxl.worksheet.table import Table, TableStyleInfo import pytest -from exstruct.core.cells import detect_tables_openpyxl, extract_sheet_cells +from exstruct.core.cells import ( + TableScanLimits, + detect_tables_openpyxl, + extract_sheet_cells, + load_border_maps_xlsx, +) from exstruct.core.integrate import extract_workbook @@ -55,6 +61,33 @@ def test_openpyxlで正式テーブルを検出できる(tmp_path: Path) -> None assert "A1:B3" in tables +def test_openpyxl_border_scan_defers_column_shrink(tmp_path: Path) -> None: + path = tmp_path / "border.xlsx" + wb = Workbook() + ws = wb.active + ws.title = "Sheet1" + border = Border( + left=Side(style="thin"), + right=Side(style="thin"), + top=Side(style="thin"), + bottom=Side(style="thin"), + ) + ws.cell(row=1, column=1, value="x").border = border + ws.cell(row=50, column=120, value="y").border = border + wb.save(path) + wb.close() + + limits = TableScanLimits( + max_rows=200, + max_cols=200, + empty_row_run=200, + empty_col_run=1, + min_rows_before_col_shrink=60, + ) + has_border, *_ = load_border_maps_xlsx(path, "Sheet1", scan_limits=limits) + assert bool(has_border[50, 120]) is True + + def test_excelなし環境ではセルとテーブルのみ返す( monkeypatch: MonkeyPatch, tmp_path: Path ) -> None: From be98ae07950a2c409e0420fa2b4264f3a7c26e70 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 20:09:34 +0900 Subject: [PATCH 13/18] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81=E5=85=A5=E5=8A=9B=E6=A4=9C?= =?UTF-8?q?=E8=A8=BC=E3=80=81=E6=8A=BD=E5=87=BA=E3=83=84=E3=83=BC=E3=83=AB?= =?UTF-8?q?=E3=80=81JSON=E3=83=81=E3=83=A3=E3=83=B3=E3=82=AF=E3=81=AE?= =?UTF-8?q?=E8=AA=AD=E3=81=BF=E8=BE=BC=E3=81=BF=E3=81=AB=E9=96=A2=E3=81=99?= =?UTF-8?q?=E3=82=8B=E6=96=B0=E3=81=97=E3=81=84=E3=82=B1=E3=83=BC=E3=82=B9?= =?UTF-8?q?=E3=82=92=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/mcp/test_chunk_reader.py | 105 +++++++++++++++++++++++++ tests/mcp/test_extract_runner_utils.py | 84 ++++++++++++++++++++ tests/mcp/test_server.py | 30 +++++++ tests/mcp/test_tools_handlers.py | 93 ++++++++++++++++++++++ tests/mcp/test_validate_input.py | 25 ++++++ 5 files changed, 337 insertions(+) create mode 100644 tests/mcp/test_tools_handlers.py diff --git a/tests/mcp/test_chunk_reader.py b/tests/mcp/test_chunk_reader.py index 3e23c31..842e9c9 100644 --- a/tests/mcp/test_chunk_reader.py +++ b/tests/mcp/test_chunk_reader.py @@ -6,6 +6,7 @@ import pytest +from exstruct.mcp import chunk_reader from exstruct.mcp.chunk_reader import ( ReadJsonChunkFilter, ReadJsonChunkRequest, @@ -106,3 +107,107 @@ def test_read_json_chunk_cursor_beyond_rows(tmp_path: Path) -> None: ) with pytest.raises(ValueError): read_json_chunk(request) + + +def test_read_json_chunk_rejects_non_object_root(tmp_path: Path) -> None: + out = tmp_path / "out.json" + out.write_text(json.dumps([1, 2, 3]), encoding="utf-8") + request = ReadJsonChunkRequest(out_path=out, sheet="Sheet1") + with pytest.raises(ValueError): + read_json_chunk(request) + + +def test_read_json_chunk_rejects_invalid_sheets_mapping(tmp_path: Path) -> None: + data = {"book_name": "book", "sheets": ["Sheet1"]} + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest(out_path=out, sheet="Sheet1") + with pytest.raises(ValueError): + read_json_chunk(request) + + +def test_read_json_chunk_rejects_missing_sheet(tmp_path: Path) -> None: + data = {"book_name": "book", "sheets": {"Sheet1": {"rows": []}}} + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest(out_path=out, sheet="Missing") + with pytest.raises(ValueError): + read_json_chunk(request) + + +def test_read_json_chunk_rejects_negative_cursor(tmp_path: Path) -> None: + data = {"book_name": "book", "sheets": {"Sheet1": {"rows": []}}} + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest( + out_path=out, sheet="Sheet1", cursor="-1", max_bytes=10_000 + ) + with pytest.raises(ValueError): + read_json_chunk(request) + + +def test_read_json_chunk_warns_on_row_filter_inversion(tmp_path: Path) -> None: + data = { + "book_name": "book", + "sheets": {"Sheet1": {"rows": [{"r": 1, "c": {"0": "A"}}]}}, + } + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest( + out_path=out, + sheet="Sheet1", + max_bytes=10_000, + filter=ReadJsonChunkFilter(rows=(2, 1)), + ) + result = read_json_chunk(request) + assert any("Row filter ignored" in warning for warning in result.warnings) + + +def test_read_json_chunk_warns_on_col_filter_inversion(tmp_path: Path) -> None: + data = { + "book_name": "book", + "sheets": {"Sheet1": {"rows": [{"r": 1, "c": {"0": "A"}}]}}, + } + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest( + out_path=out, + sheet="Sheet1", + max_bytes=10_000, + filter=ReadJsonChunkFilter(cols=(2, 1)), + ) + result = read_json_chunk(request) + assert any("Column filter ignored" in warning for warning in result.warnings) + + +def test_read_json_chunk_warns_on_base_payload_exceeds_max_bytes( + tmp_path: Path, +) -> None: + data = { + "book_name": "book", + "sheets": {"Sheet1": {"rows": [{"r": 1, "c": {"0": "A"}}]}}, + } + out = tmp_path / "out.json" + _write_json(out, data) + request = ReadJsonChunkRequest(out_path=out, sheet="Sheet1", max_bytes=1) + result = read_json_chunk(request) + assert any("Base payload exceeds" in warning for warning in result.warnings) + + +def test_read_json_chunk_warns_on_too_small_max_bytes(tmp_path: Path) -> None: + data = { + "book_name": "book", + "sheets": {"Sheet1": {"rows": [{"r": 1, "c": {"0": "x" * 200}}]}}, + } + out = tmp_path / "out.json" + _write_json(out, data) + base_payload = { + "book_name": "book", + "sheet_name": "Sheet1", + "sheet": {"rows": []}, + } + base_json = chunk_reader._serialize_json(base_payload) + max_bytes = len(base_json.encode("utf-8")) + 1 + request = ReadJsonChunkRequest(out_path=out, sheet="Sheet1", max_bytes=max_bytes) + result = read_json_chunk(request) + assert any("max_bytes too small" in warning for warning in result.warnings) diff --git a/tests/mcp/test_extract_runner_utils.py b/tests/mcp/test_extract_runner_utils.py index d242cf1..69564e4 100644 --- a/tests/mcp/test_extract_runner_utils.py +++ b/tests/mcp/test_extract_runner_utils.py @@ -1,5 +1,7 @@ from __future__ import annotations +import builtins +from collections.abc import Mapping, Sequence from pathlib import Path import pytest @@ -59,3 +61,85 @@ def test_try_read_workbook_meta(tmp_path: Path) -> None: assert meta.sheet_count == 1 assert meta.sheet_names == ["Sheet1"] assert warnings == [] + + +def test_run_extract_skips_when_output_exists( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + input_path = tmp_path / "input.xlsx" + output_path = tmp_path / "input.json" + input_path.write_text("x", encoding="utf-8") + output_path.write_text("y", encoding="utf-8") + + def _raise(*_args: object, **_kwargs: object) -> None: + raise AssertionError("process_excel should not be called") + + monkeypatch.setattr(extract_runner, "process_excel", _raise) + request = extract_runner.ExtractRequest( + xlsx_path=input_path, + on_conflict="skip", + format="json", + ) + result = extract_runner.run_extract(request) + assert result.workbook_meta is None + assert any("skipping write" in warning for warning in result.warnings) + + +def test_run_extract_creates_output_dir( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + input_path = tmp_path / "input.xlsx" + input_path.write_text("x", encoding="utf-8") + out_dir = tmp_path / "nested" / "out" + + def _noop(*_args: object, **_kwargs: object) -> None: + return None + + monkeypatch.setattr(extract_runner, "process_excel", _noop) + monkeypatch.setattr(extract_runner, "_try_read_workbook_meta", lambda _: (None, [])) + request = extract_runner.ExtractRequest( + xlsx_path=input_path, + out_dir=out_dir, + format="json", + ) + extract_runner.run_extract(request) + assert out_dir.exists() + + +def test_try_read_workbook_meta_import_error( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + path = tmp_path / "missing.xlsx" + + def _import( + name: str, + globals_: Mapping[str, object] | None = None, + locals_: Mapping[str, object] | None = None, + fromlist: Sequence[str] = (), + level: int = 0, + ) -> object: + if name == "openpyxl": + raise ImportError("missing") + return builtins.__import__(name, globals_, locals_, fromlist, level) + + monkeypatch.setattr(builtins, "__import__", _import) + meta, warnings = extract_runner._try_read_workbook_meta(path) + assert meta is None + assert any("openpyxl is not available" in warning for warning in warnings) + + +def test_try_read_workbook_meta_load_failure( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + path = tmp_path / "broken.xlsx" + path.write_text("x", encoding="utf-8") + + def _raise(*_args: object, **_kwargs: object) -> None: + raise ValueError("boom") + + import openpyxl + + monkeypatch.setattr(openpyxl, "load_workbook", _raise) + meta, warnings = extract_runner._try_read_workbook_meta(path) + assert meta is None + assert any("Failed to read workbook metadata" in warning for warning in warnings) diff --git a/tests/mcp/test_server.py b/tests/mcp/test_server.py index d9986a6..a2b8fe7 100644 --- a/tests/mcp/test_server.py +++ b/tests/mcp/test_server.py @@ -2,6 +2,7 @@ from collections.abc import Awaitable, Callable import importlib +import logging from pathlib import Path from typing import Any, cast @@ -173,3 +174,32 @@ def fake_create_app(policy: PathPolicy, *, on_conflict: OnConflictPolicy) -> _Ap assert created["imported"] is True assert created["ran"] is True assert created["on_conflict"] == "overwrite" + + +def test_configure_logging_with_file( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + log_file = tmp_path / "server.log" + config = server.ServerConfig(root=tmp_path, log_file=log_file) + captured: dict[str, object] = {} + + def _basic_config(**kwargs: object) -> None: + captured.update(kwargs) + + monkeypatch.setattr(logging, "basicConfig", _basic_config) + server._configure_logging(config) + handlers = cast(list[logging.Handler], captured["handlers"]) + assert any(isinstance(handler, logging.FileHandler) for handler in handlers) + + +def test_warmup_exstruct_imports(monkeypatch: pytest.MonkeyPatch) -> None: + calls: list[str] = [] + + def _record(name: str) -> object: + calls.append(name) + return object() + + monkeypatch.setattr(importlib, "import_module", _record) + server._warmup_exstruct() + assert "exstruct.core.cells" in calls + assert "exstruct.core.integrate" in calls diff --git a/tests/mcp/test_tools_handlers.py b/tests/mcp/test_tools_handlers.py new file mode 100644 index 0000000..c77ca1b --- /dev/null +++ b/tests/mcp/test_tools_handlers.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from exstruct.mcp import tools +from exstruct.mcp.chunk_reader import ( + ReadJsonChunkFilter, + ReadJsonChunkRequest, + ReadJsonChunkResult, +) +from exstruct.mcp.extract_runner import ExtractRequest, ExtractResult +from exstruct.mcp.validate_input import ValidateInputRequest, ValidateInputResult + + +def test_run_extract_tool_prefers_payload_on_conflict( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict[str, object] = {} + + def _fake_run_extract( + request: ExtractRequest, *, policy: object | None = None + ) -> ExtractResult: + captured["request"] = request + return ExtractResult(out_path="out.json") + + monkeypatch.setattr(tools, "run_extract", _fake_run_extract) + payload = tools.ExtractToolInput(xlsx_path="input.xlsx", on_conflict="skip") + tools.run_extract_tool(payload, on_conflict="rename") + request = captured["request"] + assert isinstance(request, ExtractRequest) + assert request.on_conflict == "skip" + + +def test_run_extract_tool_uses_default_on_conflict( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict[str, object] = {} + + def _fake_run_extract( + request: ExtractRequest, *, policy: object | None = None + ) -> ExtractResult: + captured["request"] = request + return ExtractResult(out_path="out.json") + + monkeypatch.setattr(tools, "run_extract", _fake_run_extract) + payload = tools.ExtractToolInput(xlsx_path="input.xlsx", on_conflict=None) + tools.run_extract_tool(payload, on_conflict="rename") + request = captured["request"] + assert isinstance(request, ExtractRequest) + assert request.on_conflict == "rename" + + +def test_run_read_json_chunk_tool_builds_request( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict[str, object] = {} + + def _fake_read_json_chunk( + request: ReadJsonChunkRequest, *, policy: object | None = None + ) -> ReadJsonChunkResult: + captured["request"] = request + return ReadJsonChunkResult(chunk="{}", next_cursor=None, warnings=[]) + + monkeypatch.setattr(tools, "read_json_chunk", _fake_read_json_chunk) + payload = tools.ReadJsonChunkToolInput( + out_path="out.json", filter=ReadJsonChunkFilter(rows=(1, 2)) + ) + tools.run_read_json_chunk_tool(payload) + request = captured["request"] + assert isinstance(request, ReadJsonChunkRequest) + assert request.out_path == Path("out.json") + assert request.filter is not None + + +def test_run_validate_input_tool_builds_request( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict[str, object] = {} + + def _fake_validate_input( + request: ValidateInputRequest, *, policy: object | None = None + ) -> ValidateInputResult: + captured["request"] = request + return ValidateInputResult(is_readable=True) + + monkeypatch.setattr(tools, "validate_input", _fake_validate_input) + payload = tools.ValidateInputToolInput(xlsx_path="input.xlsx") + tools.run_validate_input_tool(payload) + request = captured["request"] + assert isinstance(request, ValidateInputRequest) + assert request.xlsx_path == Path("input.xlsx") diff --git a/tests/mcp/test_validate_input.py b/tests/mcp/test_validate_input.py index f0584f8..cea96e2 100644 --- a/tests/mcp/test_validate_input.py +++ b/tests/mcp/test_validate_input.py @@ -50,3 +50,28 @@ def test_validate_input_warns_on_com( result = validate_input(request) assert result.is_readable is True assert any("COM unavailable" in warning for warning in result.warnings) + + +def test_validate_input_rejects_directory(tmp_path: Path) -> None: + path = tmp_path / "input.xlsx" + path.mkdir() + request = ValidateInputRequest(xlsx_path=path) + result = validate_input(request) + assert result.is_readable is False + assert any("Path is not a file" in error for error in result.errors) + + +def test_validate_input_handles_read_failure( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + path = tmp_path / "input.xlsx" + path.write_text("x", encoding="utf-8") + + def _raise(*_args: object, **_kwargs: object) -> object: + raise OSError("boom") + + monkeypatch.setattr(Path, "open", _raise) + request = ValidateInputRequest(xlsx_path=path) + result = validate_input(request) + assert result.is_readable is False + assert any("Failed to read file" in error for error in result.errors) From c7d9ce653bfb9e998e48310a571900b9239b1b2a Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 20:21:28 +0900 Subject: [PATCH 14/18] =?UTF-8?q?README=E3=81=A8=E3=83=89=E3=82=AD?= =?UTF-8?q?=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88=E5=86=85=E3=81=AEMCP?= =?UTF-8?q?=E3=82=B5=E3=83=BC=E3=83=90=E3=83=BC=E3=81=AE=E3=83=AA=E3=83=B3?= =?UTF-8?q?=E3=82=AF=E3=82=92=E6=9B=B4=E6=96=B0=E3=81=97=E3=80=81=E3=82=B3?= =?UTF-8?q?=E3=83=BC=E3=83=89=E5=86=85=E3=81=AB=E9=96=A2=E6=95=B0=E3=81=AE?= =?UTF-8?q?=E3=83=89=E3=82=AD=E3=83=A5=E3=83=A1=E3=83=B3=E3=83=88=E3=82=B9?= =?UTF-8?q?=E3=83=88=E3=83=AA=E3=83=B3=E3=82=B0=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.ja.md | 2 +- README.md | 3 +- docs/README.en.md | 2 +- docs/README.ja.md | 2 +- docs/agents/TASKS.md | 3 +- src/exstruct/core/cells.py | 123 ++++++++++++++++++++++++++++++++++ src/exstruct/core/pipeline.py | 9 +++ src/exstruct/core/shapes.py | 1 + src/exstruct/engine.py | 13 ++++ 9 files changed, 152 insertions(+), 6 deletions(-) diff --git a/README.ja.md b/README.ja.md index c9a1376..100ac17 100644 --- a/README.ja.md +++ b/README.ja.md @@ -71,7 +71,7 @@ exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict re 各AIエージェントでのMCP設定ガイド: -[MCPサーバー](https://harumiweb.github.io/exstruct/cli/) +[MCPサーバー](https://harumiweb.github.io/exstruct/mcp/) ## クイックスタート Python diff --git a/README.md b/README.md index 2f6e7d4..5b77696 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Notes: MCP Setup Guide for Each AI Agent: -[MCP Server](https://harumiweb.github.io/exstruct/cli/) +[MCP Server](https://harumiweb.github.io/exstruct/mcp/) ## Quick Start (Python) @@ -361,7 +361,6 @@ flowchart TD ``` ```` - ## Example 2: General Application Form ### Excel Sheet diff --git a/docs/README.en.md b/docs/README.en.md index c9feaf5..54b4740 100644 --- a/docs/README.en.md +++ b/docs/README.en.md @@ -74,7 +74,7 @@ Notes: MCP Setup Guide for Each AI Agent: -[MCP Server](https://harumiweb.github.io/exstruct/cli/) +[MCP Server](https://harumiweb.github.io/exstruct/mcp/) ## Quick Start (Python) diff --git a/docs/README.ja.md b/docs/README.ja.md index 486ae92..479ec8f 100644 --- a/docs/README.ja.md +++ b/docs/README.ja.md @@ -71,7 +71,7 @@ exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict re 各AIエージェントでのMCP設定ガイド: -[MCPサーバー](https://harumiweb.github.io/exstruct/cli/) +[MCPサーバー](https://harumiweb.github.io/exstruct/mcp/) ## クイックスタート Python diff --git a/docs/agents/TASKS.md b/docs/agents/TASKS.md index 999c348..7921365 100644 --- a/docs/agents/TASKS.md +++ b/docs/agents/TASKS.md @@ -27,4 +27,5 @@ - [x] cells.py の列幅縮小ヒューリスティックを再検討(遅い行に境界があるケースで早期縮小しない方針に修正) - [x] 上記修正に対応するテストを追加(遅い行・右端に表があるケースを openpyxl で検証) -- [ ] Codecov 指摘の不足分を埋めるテスト追加(mcp: chunk_reader/extract_runner/server/tools/validate_input/io、core/cells) +- [x] Codecov 指摘の不足分を埋めるテスト追加(mcp: chunk_reader/extract_runner/server/tools/validate_input/io、core/cells) +- [x] CodeRabbit: Docstring coverage 80% を満たすよう不足分の docstring を追加 diff --git a/src/exstruct/core/cells.py b/src/exstruct/core/cells.py index 393462d..72c3593 100644 --- a/src/exstruct/core/cells.py +++ b/src/exstruct/core/cells.py @@ -147,6 +147,15 @@ def scaled(self, factor: float) -> TableScanLimits: def _resolve_table_scan_limits( mode: ExtractionMode, scan_limits: TableScanLimits | None ) -> TableScanLimits: + """Resolve effective scan limits for table detection. + + Args: + mode: Extraction mode (light/standard/verbose). + scan_limits: Optional explicit limits override. + + Returns: + Effective TableScanLimits for scanning. + """ if scan_limits is not None: return scan_limits if mode in {"standard", "verbose"}: @@ -676,6 +685,12 @@ def _normalize_rgb(rgb: str) -> str: def warn_once(key: str, message: str) -> None: + """Log a warning once per unique key. + + Args: + key: Deduplication key for the warning. + message: Warning message to log. + """ if key not in _warned_keys: logger.warning(message) _warned_keys.add(key) @@ -802,30 +817,37 @@ def shrink_to_content( # noqa: C901 cols_n = len(vals[0]) if rows_n else 0 def to_str(x: object) -> str: + """Convert a value to a string for emptiness checks.""" return "" if x is None else str(x) def is_empty_value(x: object) -> bool: + """Return True when a value is considered empty.""" return to_str(x).strip() == "" def row_empty(i: int) -> bool: + """Return True when all values in a row are empty.""" return cols_n == 0 or all(is_empty_value(vals[i][j]) for j in range(cols_n)) def col_empty(j: int) -> bool: + """Return True when all values in a column are empty.""" return rows_n == 0 or all(is_empty_value(vals[i][j]) for i in range(rows_n)) def row_nonempty_ratio(i: int) -> float: + """Return the ratio of non-empty cells in a row.""" if cols_n == 0: return 0.0 cnt = sum(1 for j in range(cols_n) if not is_empty_value(vals[i][j])) return cnt / cols_n def col_nonempty_ratio(j: int) -> float: + """Return the ratio of non-empty cells in a column.""" if rows_n == 0: return 0.0 cnt = sum(1 for i in range(rows_n) if not is_empty_value(vals[i][j])) return cnt / rows_n def column_has_inside_border(col_idx: int) -> bool: + """Return True if the column has any inside borders.""" if not require_inside_border: return False try: @@ -842,6 +864,7 @@ def column_has_inside_border(col_idx: int) -> bool: return False def row_has_inside_border(row_idx: int) -> bool: + """Return True if the row has any inside borders.""" if not require_inside_border: return False try: @@ -858,6 +881,7 @@ def row_has_inside_border(row_idx: int) -> bool: return False def should_trim_col(j: int) -> bool: + """Return True when a column should be trimmed.""" if col_empty(j): return True if require_inside_border and not column_has_inside_border(j): @@ -867,6 +891,7 @@ def should_trim_col(j: int) -> bool: return False def should_trim_row(i: int) -> bool: + """Return True when a row should be trimmed.""" if row_empty(i): return True if require_inside_border and not row_has_inside_border(i): @@ -916,6 +941,17 @@ def load_border_maps_xlsx( # noqa: C901 *, scan_limits: TableScanLimits | None = None, ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, int]: + """Load border presence maps for a worksheet using openpyxl. + + Args: + xlsx_path: Excel workbook path. + sheet_name: Target worksheet name. + scan_limits: Optional scan limits override. + + Returns: + Tuple of (has_border, top_edge, bottom_edge, left_edge, right_edge, + scan_max_row, scan_max_col). + """ with openpyxl_workbook(xlsx_path, data_only=True, read_only=False) as wb: if sheet_name not in wb.sheetnames: raise KeyError(f"Sheet '{sheet_name}' not found in {xlsx_path}") @@ -946,6 +982,7 @@ def load_border_maps_xlsx( # noqa: C901 col_has_border = np.zeros(shape[1], dtype=bool) def edge_has_style(edge: object) -> bool: + """Return True when a border edge has a usable style.""" if edge is None: return False style = getattr(edge, "style", None) @@ -1017,6 +1054,15 @@ def edge_has_style(edge: object) -> bool: def _detect_border_clusters_numpy( has_border: np.ndarray, min_size: int ) -> list[tuple[int, int, int, int]]: + """Detect border clusters using scipy labeling. + + Args: + has_border: Boolean border grid. + min_size: Minimum cluster size to keep. + + Returns: + List of bounding boxes (r1, c1, r2, c2). + """ from scipy.ndimage import label structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], dtype=np.uint8) @@ -1033,6 +1079,15 @@ def _detect_border_clusters_numpy( def _detect_border_clusters_python( has_border: np.ndarray, min_size: int ) -> list[tuple[int, int, int, int]]: + """Detect border clusters with a pure-Python BFS. + + Args: + has_border: Boolean border grid. + min_size: Minimum cluster size to keep. + + Returns: + List of bounding boxes (r1, c1, r2, c2). + """ h, w = has_border.shape visited = np.zeros_like(has_border, dtype=bool) rects: list[tuple[int, int, int, int]] = [] @@ -1064,6 +1119,7 @@ def _detect_border_clusters_python( def _resolve_border_cluster_backend() -> Literal["auto", "python", "numpy"]: + """Resolve the border clustering backend from environment.""" value = os.getenv(_BORDER_CLUSTER_BACKEND_ENV, "").strip().lower() if value in {"python", "numpy"}: return "python" if value == "python" else "numpy" @@ -1073,6 +1129,15 @@ def _resolve_border_cluster_backend() -> Literal["auto", "python", "numpy"]: def detect_border_clusters( has_border: np.ndarray, min_size: int = 4 ) -> list[tuple[int, int, int, int]]: + """Detect border clusters using the selected backend. + + Args: + has_border: Boolean border grid. + min_size: Minimum cluster size to keep. + + Returns: + List of bounding boxes (r1, c1, r2, c2). + """ backend = _resolve_border_cluster_backend() if backend == "python": return _detect_border_clusters_python(has_border, min_size) @@ -1093,6 +1158,18 @@ def detect_border_clusters( def _get_values_block( ws: Worksheet, top: int, left: int, bottom: int, right: int ) -> list[list[object]]: + """Extract a rectangular block of values from a worksheet. + + Args: + ws: Target worksheet. + top: Top row (1-based). + left: Left column (1-based). + bottom: Bottom row (1-based). + right: Right column (1-based). + + Returns: + 2D list of cell values. + """ vals: list[list[object]] = [] for row in ws.iter_rows( min_row=top, max_row=bottom, min_col=left, max_col=right, values_only=True @@ -1102,6 +1179,14 @@ def _get_values_block( def _ensure_matrix(matrix: MatrixInput) -> list[list[object]]: + """Normalize input into a 2D list of values. + + Args: + matrix: Sequence of rows or flat sequence. + + Returns: + 2D list of values. + """ rows_seq = list(matrix) if not rows_seq: return [] @@ -1205,6 +1290,7 @@ def _nonempty_clusters( boxes: list[tuple[int, int, int, int]] = [] def bfs(sr: int, sc: int) -> tuple[int, int, int, int]: + """Return bounding box of a connected component starting at (sr, sc).""" q = deque([(sr, sc)]) visited[sr][sc] = True ys = [sr] @@ -1233,6 +1319,7 @@ def bfs(sr: int, sc: int) -> tuple[int, int, int, int]: def _normalize_matrix(matrix: object) -> list[list[object]]: + """Normalize arbitrary matrix-like input into a 2D list.""" if matrix is None: return [] if isinstance(matrix, list): @@ -1243,6 +1330,7 @@ def _normalize_matrix(matrix: object) -> list[list[object]]: def _header_like_row(row: list[object]) -> bool: + """Return True if a row looks like a header row.""" nonempty = [v for v in row if not (v is None or str(v).strip() == "")] if len(nonempty) < 2: return False @@ -1258,6 +1346,7 @@ def _header_like_row(row: list[object]) -> bool: def _table_signal_score(matrix: Sequence[Sequence[object]]) -> float: + """Compute a heuristic table-likeliness score for a matrix.""" normalized = _ensure_matrix(matrix) density, coverage = _table_density_metrics(normalized) header = any(_header_like_row(r) for r in normalized[:2]) # check first 2 rows @@ -1324,17 +1413,38 @@ def shrink_to_content_openpyxl( # noqa: C901 right_edge: np.ndarray, min_nonempty_ratio: float = 0.0, ) -> tuple[int, int, int, int]: + """Trim a rectangle based on cell values and border heuristics (openpyxl). + + Args: + ws: Target worksheet. + top: Top row (1-based). + left: Left column (1-based). + bottom: Bottom row (1-based). + right: Right column (1-based). + require_inside_border: Whether to require inside borders when trimming. + top_edge: Top edge border map. + bottom_edge: Bottom edge border map. + left_edge: Left edge border map. + right_edge: Right edge border map. + min_nonempty_ratio: Minimum non-empty ratio to keep rows/cols. + + Returns: + Trimmed bounds as (top, left, bottom, right). + """ vals = _get_values_block(ws, top, left, bottom, right) rows_n = bottom - top + 1 cols_n = right - left + 1 def to_str(x: object) -> str: + """Convert a value to a string for emptiness checks.""" return "" if x is None else str(x) def is_empty_value(x: object) -> bool: + """Return True when a value is considered empty.""" return to_str(x).strip() == "" def row_nonempty_ratio_local(i: int) -> float: + """Return the ratio of non-empty cells in a row slice.""" if cols_n <= 0: return 0.0 row = vals[i] @@ -1342,6 +1452,7 @@ def row_nonempty_ratio_local(i: int) -> float: return cnt / cols_n def col_nonempty_ratio_local(j: int) -> float: + """Return the ratio of non-empty cells in a column slice.""" if rows_n <= 0: return 0.0 cnt = 0 @@ -1351,6 +1462,7 @@ def col_nonempty_ratio_local(j: int) -> float: return cnt / rows_n def col_has_inside_border(j_abs: int) -> bool: + """Return True if a column has inside borders between neighbors.""" if not require_inside_border: return False count_pairs = 0 @@ -1364,6 +1476,7 @@ def col_has_inside_border(j_abs: int) -> bool: return count_pairs > 0 def row_has_inside_border(i_abs: int) -> bool: + """Return True if a row has inside borders between neighbors.""" if not require_inside_border: return False count_pairs = 0 @@ -1507,6 +1620,7 @@ def _detect_border_rectangles_xlwings( max_col = used.last_cell.column def cell_has_any_border(r: int, c: int) -> bool: + """Return True if a cell has any visible border.""" try: b = sheet.api.Cells(r, c).Borders for idx in ( @@ -1779,6 +1893,15 @@ def detect_tables_openpyxl( def detect_tables(sheet: xw.Sheet, *, mode: ExtractionMode = "standard") -> list[str]: + """Detect table-like ranges with COM and optional openpyxl fallback. + + Args: + sheet: xlwings worksheet. + mode: Extraction mode for scan limits. + + Returns: + List of table range strings. + """ excel_path: Path | None = None try: excel_path = Path(sheet.book.fullname) diff --git a/src/exstruct/core/pipeline.py b/src/exstruct/core/pipeline.py index adcc1f7..260e847 100644 --- a/src/exstruct/core/pipeline.py +++ b/src/exstruct/core/pipeline.py @@ -929,6 +929,15 @@ def run_extraction_pipeline(inputs: ExtractionInputs) -> PipelineResult: state = PipelineState() def _fallback(message: str, reason: FallbackReason) -> PipelineResult: + """Run the fallback pipeline for non-COM extraction. + + Args: + message: Human-readable fallback reason. + reason: Structured fallback reason enum. + + Returns: + PipelineResult for the fallback run. + """ state.fallback_reason = reason log_fallback(logger, reason, message) logger.info("Fallback pipeline start: %s", reason.value) diff --git a/src/exstruct/core/shapes.py b/src/exstruct/core/shapes.py index 88fcff4..8ad7792 100644 --- a/src/exstruct/core/shapes.py +++ b/src/exstruct/core/shapes.py @@ -61,6 +61,7 @@ def coord_to_cell_by_edges( """ def find_index(edges: list[float], pos: float) -> int | None: + """Return the 1-based index for a position inside edge intervals.""" for i in range(1, len(edges)): if edges[i - 1] <= pos < edges[i]: return i diff --git a/src/exstruct/engine.py b/src/exstruct/engine.py index 2788adb..7a97ff9 100644 --- a/src/exstruct/engine.py +++ b/src/exstruct/engine.py @@ -24,6 +24,8 @@ class TableParams(TypedDict, total=False): + """Table detection parameter overrides.""" + table_score_threshold: float density_min: float coverage_min: float @@ -198,6 +200,7 @@ def __init__( options: StructOptions | None = None, output: OutputOptions | None = None, ) -> None: + """Initialize the engine with optional struct/output options.""" self.options = options or StructOptions() self.output = output or OutputOptions() @@ -207,6 +210,7 @@ def from_defaults() -> ExStructEngine: return ExStructEngine() def _apply_table_params(self) -> None: + """Apply table parameter overrides if configured.""" if self.options.table_params: set_table_detection_params(**self.options.table_params) @@ -315,6 +319,15 @@ def _filter_sheet( def _filter_workbook( self, wb: WorkbookData, *, include_auto_override: bool | None = None ) -> WorkbookData: + """Return a filtered workbook based on output flags. + + Args: + wb: Original workbook data. + include_auto_override: Optional override for auto print areas. + + Returns: + Filtered WorkbookData. + """ filtered = { name: self._filter_sheet(sheet, include_auto_override=include_auto_override) for name, sheet in wb.sheets.items() From 7a99bd7b7d2555795e82020004fc9256569c48ce Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 23 Jan 2026 20:32:43 +0900 Subject: [PATCH 15/18] =?UTF-8?q?MCP=E6=A9=9F=E8=83=BD=E3=81=AE=E6=8B=A1?= =?UTF-8?q?=E5=BC=B5=EF=BC=9AExtractOptions=E3=82=AF=E3=83=A9=E3=82=B9?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81=E5=85=A5=E5=8A=9B?= =?UTF-8?q?=E3=83=91=E3=82=B9=E3=81=AE=E6=A4=9C=E8=A8=BC=E3=82=92=E5=BC=B7?= =?UTF-8?q?=E5=8C=96=E3=80=82=E3=83=86=E3=82=B9=E3=83=88=E3=82=B1=E3=83=BC?= =?UTF-8?q?=E3=82=B9=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=81=A6=E6=96=B0?= =?UTF-8?q?=E6=A9=9F=E8=83=BD=E3=82=92=E7=A2=BA=E8=AA=8D=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/pytest.yml | 4 +- src/exstruct/mcp/__init__.py | 9 +++- src/exstruct/mcp/chunk_reader.py | 4 +- src/exstruct/mcp/extract_runner.py | 59 ++++++++++++++++++++++++-- src/exstruct/mcp/server.py | 22 +++++++--- src/exstruct/mcp/tools.py | 5 ++- tests/mcp/test_chunk_reader.py | 8 ++++ tests/mcp/test_extract_runner_utils.py | 39 +++++++++++++++++ tests/mcp/test_server.py | 14 ++++-- 9 files changed, 146 insertions(+), 18 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 076c3ad..90d045e 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -26,8 +26,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -e .[yaml] - pip install pytest pytest-cov pytest-mock anyio + pip install -e .[yaml,mcp] + pip install pytest pytest-cov pytest-mock - name: Run tests (non-COM suite) if: runner.os != 'Windows' run: | diff --git a/src/exstruct/mcp/__init__.py b/src/exstruct/mcp/__init__.py index 5adc2aa..bbdfd8c 100644 --- a/src/exstruct/mcp/__init__.py +++ b/src/exstruct/mcp/__init__.py @@ -8,7 +8,13 @@ ReadJsonChunkResult, read_json_chunk, ) -from .extract_runner import ExtractRequest, ExtractResult, WorkbookMeta, run_extract +from .extract_runner import ( + ExtractOptions, + ExtractRequest, + ExtractResult, + WorkbookMeta, + run_extract, +) from .io import PathPolicy from .tools import ( ExtractToolInput, @@ -30,6 +36,7 @@ __all__ = [ "ExtractRequest", "ExtractResult", + "ExtractOptions", "ExtractToolInput", "ExtractToolOutput", "PathPolicy", diff --git a/src/exstruct/mcp/chunk_reader.py b/src/exstruct/mcp/chunk_reader.py index 06bcae0..408c9f9 100644 --- a/src/exstruct/mcp/chunk_reader.py +++ b/src/exstruct/mcp/chunk_reader.py @@ -92,11 +92,13 @@ def _resolve_output_path(path: Path, *, policy: PathPolicy | None) -> Path: Raises: FileNotFoundError: If the output file does not exist. - ValueError: If the path violates the policy. + ValueError: If the path violates the policy or is not a file. """ resolved = policy.ensure_allowed(path) if policy else path.resolve() if not resolved.exists(): raise FileNotFoundError(f"Output file not found: {resolved}") + if not resolved.is_file(): + raise ValueError(f"Output path is not a file: {resolved}") return resolved diff --git a/src/exstruct/mcp/extract_runner.py b/src/exstruct/mcp/extract_runner.py index dcf9a44..2b287ec 100644 --- a/src/exstruct/mcp/extract_runner.py +++ b/src/exstruct/mcp/extract_runner.py @@ -2,7 +2,7 @@ import logging from pathlib import Path -from typing import Any, Literal +from typing import Literal from pydantic import BaseModel, Field @@ -22,6 +22,24 @@ class WorkbookMeta(BaseModel): sheet_count: int = Field(default=0, description="Total number of sheets.") +class ExtractOptions(BaseModel): + """Optional extraction configuration for MCP requests.""" + + pretty: bool | None = Field(default=None, description="Pretty-print JSON output.") + indent: int | None = Field( + default=None, description="Indent width for JSON output." + ) + sheets_dir: Path | None = Field( + default=None, description="Directory for per-sheet outputs." + ) + print_areas_dir: Path | None = Field( + default=None, description="Directory for per-print-area outputs." + ) + auto_page_breaks_dir: Path | None = Field( + default=None, description="Directory for auto page-break outputs." + ) + + class ExtractRequest(BaseModel): """Input model for ExStruct MCP extraction.""" @@ -31,7 +49,7 @@ class ExtractRequest(BaseModel): out_dir: Path | None = None out_name: str | None = None on_conflict: OnConflictPolicy = "overwrite" - options: dict[str, Any] = Field(default_factory=dict) + options: ExtractOptions = Field(default_factory=ExtractOptions) class ExtractResult(BaseModel): @@ -82,11 +100,24 @@ def run_extract( ) _ensure_output_dir(output_path) + options = request.options + sheets_dir = _resolve_optional_dir(options.sheets_dir, policy=policy) + print_areas_dir = _resolve_optional_dir(options.print_areas_dir, policy=policy) + auto_page_breaks_dir = _resolve_optional_dir( + options.auto_page_breaks_dir, policy=policy + ) + pretty = options.pretty if options.pretty is not None else False + process_excel( file_path=resolved_input, output_path=output_path, out_fmt=request.format, mode=request.mode, + pretty=pretty, + indent=options.indent, + sheets_dir=sheets_dir, + print_areas_dir=print_areas_dir, + auto_page_breaks_dir=auto_page_breaks_dir, ) meta, meta_warnings = _try_read_workbook_meta(resolved_input) warnings.extend(meta_warnings) @@ -110,11 +141,13 @@ def _resolve_input_path(path: Path, *, policy: PathPolicy | None) -> Path: Raises: FileNotFoundError: If the input file does not exist. - ValueError: If the path violates the policy. + ValueError: If the path violates the policy or is not a file. """ resolved = policy.ensure_allowed(path) if policy else path.resolve() if not resolved.exists(): raise FileNotFoundError(f"Input file not found: {resolved}") + if not resolved.is_file(): + raise ValueError(f"Input path is not a file: {resolved}") return resolved @@ -177,6 +210,26 @@ def _ensure_output_dir(path: Path) -> None: path.parent.mkdir(parents=True, exist_ok=True) +def _resolve_optional_dir( + path: Path | None, *, policy: PathPolicy | None +) -> Path | None: + """Resolve an optional output directory with policy enforcement. + + Args: + path: Optional directory path. + policy: Optional path policy. + + Returns: + Resolved path or None. + + Raises: + ValueError: If the path violates the policy. + """ + if path is None: + return None + return policy.ensure_allowed(path) if policy else path.resolve() + + def _format_suffix(fmt: Literal["json", "yaml", "yml", "toon"]) -> str: """Return suffix for output format. diff --git a/src/exstruct/mcp/server.py b/src/exstruct/mcp/server.py index 18c33f3..35ce12b 100644 --- a/src/exstruct/mcp/server.py +++ b/src/exstruct/mcp/server.py @@ -195,7 +195,7 @@ def _register_tools( policy: Path policy for filesystem access. """ - async def _extract_tool( + async def _extract_tool( # pylint: disable=redefined-builtin xlsx_path: str, mode: ExtractionMode = "standard", format: Literal["json", "yaml", "yml", "toon"] = "json", # noqa: A002 @@ -239,7 +239,7 @@ async def _extract_tool( tool = app.tool(name="exstruct.extract") tool(_extract_tool) - def _read_json_chunk_tool( + async def _read_json_chunk_tool( # pylint: disable=redefined-builtin out_path: str, sheet: str | None = None, max_bytes: int = 50_000, @@ -265,12 +265,18 @@ def _read_json_chunk_tool( filter=_coerce_filter(filter), cursor=cursor, ) - return run_read_json_chunk_tool(payload, policy=policy) + work = functools.partial( + run_read_json_chunk_tool, + payload, + policy=policy, + ) + result = cast(ReadJsonChunkToolOutput, await anyio.to_thread.run_sync(work)) + return result chunk_tool = app.tool(name="exstruct.read_json_chunk") chunk_tool(_read_json_chunk_tool) - def _validate_input_tool(xlsx_path: str) -> ValidateInputToolOutput: + async def _validate_input_tool(xlsx_path: str) -> ValidateInputToolOutput: """Handle input validation tool call. Args: @@ -280,7 +286,13 @@ def _validate_input_tool(xlsx_path: str) -> ValidateInputToolOutput: Validation result payload. """ payload = ValidateInputToolInput(xlsx_path=xlsx_path) - return run_validate_input_tool(payload, policy=policy) + work = functools.partial( + run_validate_input_tool, + payload, + policy=policy, + ) + result = cast(ValidateInputToolOutput, await anyio.to_thread.run_sync(work)) + return result validate_tool = app.tool(name="exstruct.validate_input") validate_tool(_validate_input_tool) diff --git a/src/exstruct/mcp/tools.py b/src/exstruct/mcp/tools.py index b77b31e..c857b22 100644 --- a/src/exstruct/mcp/tools.py +++ b/src/exstruct/mcp/tools.py @@ -1,7 +1,7 @@ from __future__ import annotations from pathlib import Path -from typing import Any, Literal +from typing import Literal from pydantic import BaseModel, Field @@ -14,6 +14,7 @@ read_json_chunk, ) from .extract_runner import ( + ExtractOptions, ExtractRequest, ExtractResult, OnConflictPolicy, @@ -37,7 +38,7 @@ class ExtractToolInput(BaseModel): out_dir: str | None = None out_name: str | None = None on_conflict: OnConflictPolicy | None = None - options: dict[str, Any] = Field(default_factory=dict) + options: ExtractOptions = Field(default_factory=ExtractOptions) class ExtractToolOutput(BaseModel): diff --git a/tests/mcp/test_chunk_reader.py b/tests/mcp/test_chunk_reader.py index 842e9c9..d1113cb 100644 --- a/tests/mcp/test_chunk_reader.py +++ b/tests/mcp/test_chunk_reader.py @@ -109,6 +109,14 @@ def test_read_json_chunk_cursor_beyond_rows(tmp_path: Path) -> None: read_json_chunk(request) +def test_read_json_chunk_rejects_directory(tmp_path: Path) -> None: + out_dir = tmp_path / "out" + out_dir.mkdir() + request = ReadJsonChunkRequest(out_path=out_dir, sheet="Sheet1") + with pytest.raises(ValueError): + read_json_chunk(request) + + def test_read_json_chunk_rejects_non_object_root(tmp_path: Path) -> None: out = tmp_path / "out.json" out.write_text(json.dumps([1, 2, 3]), encoding="utf-8") diff --git a/tests/mcp/test_extract_runner_utils.py b/tests/mcp/test_extract_runner_utils.py index 69564e4..2a68504 100644 --- a/tests/mcp/test_extract_runner_utils.py +++ b/tests/mcp/test_extract_runner_utils.py @@ -15,6 +15,13 @@ def test_resolve_input_path_missing(tmp_path: Path) -> None: extract_runner._resolve_input_path(tmp_path / "missing.xlsx", policy=None) +def test_resolve_input_path_rejects_directory(tmp_path: Path) -> None: + path = tmp_path / "input.xlsx" + path.mkdir() + with pytest.raises(ValueError): + extract_runner._resolve_input_path(path, policy=None) + + def test_format_suffix() -> None: assert extract_runner._format_suffix("yml") == ".yml" assert extract_runner._format_suffix("json") == ".json" @@ -106,6 +113,38 @@ def _noop(*_args: object, **_kwargs: object) -> None: assert out_dir.exists() +def test_run_extract_applies_options( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + input_path = tmp_path / "input.xlsx" + input_path.write_text("x", encoding="utf-8") + capture: dict[str, object] = {} + + def _capture(*_args: object, **kwargs: object) -> None: + capture.update(kwargs) + + monkeypatch.setattr(extract_runner, "process_excel", _capture) + monkeypatch.setattr(extract_runner, "_try_read_workbook_meta", lambda _: (None, [])) + + options = extract_runner.ExtractOptions( + pretty=True, + indent=2, + sheets_dir=tmp_path / "sheets", + print_areas_dir=tmp_path / "print_areas", + ) + request = extract_runner.ExtractRequest( + xlsx_path=input_path, + out_dir=tmp_path, + format="json", + options=options, + ) + extract_runner.run_extract(request) + assert capture["pretty"] is True + assert capture["indent"] == 2 + assert capture["sheets_dir"] == options.sheets_dir + assert capture["print_areas_dir"] == options.print_areas_dir + + def test_try_read_workbook_meta_import_error( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: diff --git a/tests/mcp/test_server.py b/tests/mcp/test_server.py index a2b8fe7..4391ada 100644 --- a/tests/mcp/test_server.py +++ b/tests/mcp/test_server.py @@ -140,12 +140,18 @@ async def fake_run_sync(func: Callable[[], object]) -> object: extract_tool = cast(Callable[..., Awaitable[object]], app.tools["exstruct.extract"]) anyio.run(_call_async, extract_tool, {"xlsx_path": "in.xlsx"}) - cast(Callable[..., object], app.tools["exstruct.read_json_chunk"])( - out_path="out.json", filter={"rows": [1, 2]} + read_chunk_tool = cast( + Callable[..., Awaitable[object]], app.tools["exstruct.read_json_chunk"] ) - cast(Callable[..., object], app.tools["exstruct.validate_input"])( - xlsx_path="in.xlsx" + anyio.run( + _call_async, + read_chunk_tool, + {"out_path": "out.json", "filter": {"rows": [1, 2]}}, ) + validate_tool = cast( + Callable[..., Awaitable[object]], app.tools["exstruct.validate_input"] + ) + anyio.run(_call_async, validate_tool, {"xlsx_path": "in.xlsx"}) assert calls["extract"][2] == "rename" chunk_call = cast(tuple[ReadJsonChunkToolInput, PathPolicy], calls["chunk"]) From e9aa9e64171aa5886707aa9167da23c9c6b751fd Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Sat, 24 Jan 2026 08:53:33 +0900 Subject: [PATCH 16/18] =?UTF-8?q?MCP=E3=82=B5=E3=83=BC=E3=83=90=E3=83=BC?= =?UTF-8?q?=E3=81=AE=E8=BF=BD=E5=8A=A0=E3=81=A8=E3=83=84=E3=83=BC=E3=83=AB?= =?UTF-8?q?=E3=81=AE=E5=AE=9A=E7=BE=A9=E3=82=92=E6=9B=B4=E6=96=B0=EF=BC=9A?= =?UTF-8?q?`exstruct.extract`=E3=80=81`exstruct.read=5Fjson=5Fchunk`?= =?UTF-8?q?=E3=80=81`exstruct.validate=5Finput`=E3=82=92=E3=81=9D=E3=82=8C?= =?UTF-8?q?=E3=81=9E=E3=82=8C`exstruct=5Fextract`=E3=80=81`exstruct=5Fread?= =?UTF-8?q?=5Fjson=5Fchunk`=E3=80=81`exstruct=5Fvalidate=5Finput`=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4=E3=80=82=E3=83=90=E3=83=BC=E3=82=B8=E3=83=A7?= =?UTF-8?q?=E3=83=B3=E3=82=920.4.1=E3=81=AB=E6=9B=B4=E6=96=B0=E3=81=97?= =?UTF-8?q?=E3=80=81=E3=83=AA=E3=83=AA=E3=83=BC=E3=82=B9=E3=83=8E=E3=83=BC?= =?UTF-8?q?=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 144 +++++++++++++++++++++++++++++++++++ README.ja.md | 6 +- README.md | 6 +- docs/README.en.md | 6 +- docs/README.ja.md | 6 +- docs/agents/FEATURE_SPEC.md | 8 +- docs/agents/TASKS.md | 6 +- docs/mcp.md | 10 +-- docs/release-notes/v0.4.0.md | 26 +++++++ mkdocs.yml | 1 + precommit.log | 0 pyproject.toml | 8 +- src/exstruct/mcp/server.py | 6 +- tests/mcp/test_server.py | 6 +- uv.lock | 6 +- 15 files changed, 212 insertions(+), 33 deletions(-) create mode 100644 CHANGELOG.md create mode 100644 docs/release-notes/v0.4.0.md create mode 100644 precommit.log diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..ee39226 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,144 @@ +# Changelog + +All notable changes to this project are documented in this file. This changelog follows the [Keep a Changelog](https://keepachangelog.com/) format and covers changes from v0.2.70 onward. + +## [Unreleased] + +### Added + +- _No unreleased changes yet._ + +## [0.4.0] - 2026-01-23 + +### Added + +- Added a stdio MCP server (`exstruct-mcp`) with tool discovery and invocation (PR [#47](https://github.com/harumiWeb/exstruct/pull/47)). +- Added MCP tools: `exstruct_extract`, `exstruct_read_json_chunk`, and `exstruct_validate_input` (PR [#47](https://github.com/harumiWeb/exstruct/pull/47)). +- Added MCP `exstruct[mcp]` extras with required dependencies, plus documentation and examples for agent configuration (PR [#47](https://github.com/harumiWeb/exstruct/pull/47)). +- Added MCP safety controls: root allowlist enforcement, deny-glob support, and conflict handling (`--on-conflict`) (PR [#47](https://github.com/harumiWeb/exstruct/pull/47)). + +### Fixed + +- Pinned MCP HTTP client dependency to stable `httpx<1.0` to avoid runtime errors in MCP initialization (PR [#47](https://github.com/harumiWeb/exstruct/pull/47)). + +## [0.3.7] - 2026-01-23 + +### Added + +- Added formula extraction via a new `formulas_map` output field (maps formula strings to cell coordinates), enabled by default in **verbose** mode (PR [#44](https://github.com/harumiWeb/exstruct/pull/44)). + +### Fixed + +- Improved print-area exports to be more robust: all print areas are now numbered safely and errors during print area restoration are handled gracefully, ensuring no missing pages or crashes. + +## [0.3.6] - 2026-01-12 + +### Added + +- Added an option to run Excel rendering in a separate subprocess (enabled by default) to improve stability on large workbooks. This isolates memory usage during PDF/PNG generation. Set `EXSTRUCT_RENDER_SUBPROCESS=0` to disable this behavior if needed (PR [#41](https://github.com/harumiWeb/exstruct/pull/41)). + +### Fixed + +- Fixed sheet image exports for multi-page print ranges: previously only the first page image was output; now all pages are exported with suffixes `_pNN` for page 2 and beyond (PR [#41](https://github.com/harumiWeb/exstruct/pull/41)). +- Fixed image exports for legacy `.xls` files by automatically converting them to `.xlsx` via Excel before rendering. This prevents failures when exporting images from older Excel formats (PR [#41](https://github.com/harumiWeb/exstruct/pull/41)). + +## [0.3.5] - 2026-01-06 + +### Breaking Changes + +- The JSON structure for `merged_cells` in outputs has changed (PR [#40](https://github.com/harumiWeb/exstruct/pull/40)). In versions <= 0.3.2, `merged_cells` was an array of objects; in v0.3.5 it is now an object with a `schema` definition and `items` list of merged cell ranges. + +### Migration Guide + +- If upgrading from an older version, update any code that parses `merged_cells`. Expect an object with `schema` and `items` instead of a simple list. Refer to the updated README for detailed transition guidance on the new format. + +### Added + +- Added a configuration flag `include_merged_values_in_rows` in `StructOptions` to control whether values from merged cells are duplicated in the main `rows` output. This flag defaults to **True** for backward compatibility (PR [#40](https://github.com/harumiWeb/exstruct/pull/40)). + +### Changed + +- `merged_cells` output format now uses a compact schema-based structure (see Breaking Changes above). +- Empty merged cells (merged ranges with no content) are now represented as a single space `" "` in the output, to clearly denote an intentional blank (PR [#40](https://github.com/harumiWeb/exstruct/pull/40)). + +## [0.3.2] - 2026-01-05 + +### Added + +- Added extraction of merged cell ranges. Each sheet's output now includes a `merged_cells` field listing all merged cell ranges with their coordinates (PR [#35](https://github.com/harumiWeb/exstruct/pull/35)). +- Added options to control merged cell output: you can disable including merged cells via `StructOptions.include_merged_cells` or `OutputOptions.filters.include_merged_cells` if you do not want this data in the output (PR [#35](https://github.com/harumiWeb/exstruct/pull/35)). + +### Changed + +- Standard and verbose mode outputs now include `merged_cells` by default (PR [#35](https://github.com/harumiWeb/exstruct/pull/35)). If your workflow does not need merged cell information, use the provided options to omit it. + +## [0.3.1] - 2025-12-28 + +### Breaking Changes + +- The shape output format has changed to accommodate SmartArt extraction. SmartArt shapes now use a new nested node structure and some previously existing fields have been removed or renamed: + - Removed output fields `layout_name`, `roots`, and `children` for SmartArt. These are replaced by a new `layout` field and a nested `nodes` list (with child nodes under `kids`). + - The `type` field is no longer present on Arrow (connector) and SmartArt shape outputs (it remains only for regular shape types). + +### Migration Guide + +- Update any code that parses shape outputs, especially for SmartArt diagrams. Instead of `layout_name` and nested `children`, use the new `layout` and `nodes` (with `kids`) format for SmartArt. Arrow and SmartArt objects will not include a `type` field anymore, so ensure your code doesn’t assume its presence. + +### Added + +- Added **SmartArt extraction** support (Excel COM required). SmartArt diagrams in Excel are now parsed and included in the output, with each SmartArt represented by a `kind: "smartart"` shape containing a `layout` name and a hierarchical `nodes` structure of text entries. +- The shape model now differentiates between regular shapes, connectors (arrows), and SmartArt, providing clearer semantics in the output JSON. + +### Changed + +- Internal shape handling has been refactored to support SmartArt: shapes of `kind: "arrow"` (connectors) and `kind: "smartart"` are now separate from standard shapes, each with their appropriate fields. This improves clarity but may require the adjustments noted in the Migration Guide. + +## [0.3.0] - 2025-12-27 + +### Changed + +- Major **internal refactor** of the processing pipeline and code structure to improve maintainability and enable future features (PR [#23](https://github.com/harumiWeb/exstruct/pull/23)). There are **no user-facing API changes** or behavior changes in this release. + +## [0.2.90] - 2025-12-24 + +### Added + +- Added extraction of cell background colors via a new `colors_map` field in each sheet’s output. The `colors_map` maps color hex codes to lists of cell coordinates that have that background color. In Excel COM environments, this includes evaluation of conditional formatting colors (PR [#21](https://github.com/harumiWeb/exstruct/pull/21)). +- Added `ColorsOptions` (e.g., `include_default_background` and `ignore_colors`) to allow configuration of color extraction. You can exclude default fill colors or ignore specific colors to reduce output size. + +### Changed + +- **Verbose** mode now enables `colors_map` by default, so detailed color information will be included unless explicitly disabled. Non-COM environments still extract static fill colors via openpyxl, but cannot detect conditional formats. + +## [0.2.80] - 2025-12-21 + +### Added + +- Added unique shape IDs for more robust flowchart tracing: each non-connector shape now receives a sequential `id` per sheet for stable reference in connectors. +- Connector (arrow) shapes now include references to their connected shapes: each connector output has `begin_id` and `end_id` fields pointing to the IDs of the shapes it connects (via Excel COM’s ConnectorFormat) (PR [#15](https://github.com/harumiWeb/exstruct/pull/15)). +- Added extra metadata for connectors such as arrow style, direction, and rotation in the output JSON, to enrich flowchart and diagram analysis. + +## [0.2.71] - 2025-12-17 + +### Added + +- Added CLI support for exporting **auto page-break** views. A new option `--auto-page-breaks-dir` allows saving each worksheet’s automatic page-break layout to separate files (when running on a system with Excel COM available). +- Documentation and help text have been updated to describe the new option, and tests were added to ensure it only appears when supported. + +### Changed + +- The CLI now dynamically detects Excel/COM availability and will only register COM-specific flags (such as `--auto-page-breaks-dir`) when Excel is usable. This prevents showing or using unsupported options on environments where Excel is not available. + +## [0.2.70] - 2025-12-15 + +### Added + +- Added more flexible file path handling: you can now pass file paths as simple `str` strings in addition to `pathlib.Path` objects for all engine inputs and outputs. All paths (including those for PDF/PNG rendering) are internally normalized to `Path` for consistent behavior. + +### Changed + +- Changed export behavior when only "secondary" outputs are requested. If you call the export function with `output_path=None` and specify only auxiliary directories (such as `sheets_dir`, `print_areas_dir`, or `auto_page_breaks_dir`), the tool will **no longer write to standard output** by default. It will only produce the specified secondary output files. + +### Migration Guide + +- If you need the combined output on stdout (as previous versions would do by default), make sure to provide an explicit `output_path` or use a `stream` in the export options. This will ensure that the main output is still sent to standard output when using secondary output directories. diff --git a/README.ja.md b/README.ja.md index 100ac17..122a1f9 100644 --- a/README.ja.md +++ b/README.ja.md @@ -60,9 +60,9 @@ exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict re 利用可能なツール: -- `exstruct.extract` -- `exstruct.read_json_chunk` -- `exstruct.validate_input` +- `exstruct_extract` +- `exstruct_read_json_chunk` +- `exstruct_validate_input` 注意点: diff --git a/README.md b/README.md index 5b77696..1d78e3e 100644 --- a/README.md +++ b/README.md @@ -63,9 +63,9 @@ exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict re Available tools: -- `exstruct.extract` -- `exstruct.read_json_chunk` -- `exstruct.validate_input` +- `exstruct_extract` +- `exstruct_read_json_chunk` +- `exstruct_validate_input` Notes: diff --git a/docs/README.en.md b/docs/README.en.md index 54b4740..c63ea03 100644 --- a/docs/README.en.md +++ b/docs/README.en.md @@ -63,9 +63,9 @@ exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict re Available tools: -- `exstruct.extract` -- `exstruct.read_json_chunk` -- `exstruct.validate_input` +- `exstruct_extract` +- `exstruct_read_json_chunk` +- `exstruct_validate_input` Notes: diff --git a/docs/README.ja.md b/docs/README.ja.md index 479ec8f..17595ef 100644 --- a/docs/README.ja.md +++ b/docs/README.ja.md @@ -60,9 +60,9 @@ exstruct-mcp --root C:\data --log-file C:\logs\exstruct-mcp.log --on-conflict re 利用可能なツール: -- `exstruct.extract` -- `exstruct.read_json_chunk` -- `exstruct.validate_input` +- `exstruct_extract` +- `exstruct_read_json_chunk` +- `exstruct_validate_input` 注意点: diff --git a/docs/agents/FEATURE_SPEC.md b/docs/agents/FEATURE_SPEC.md index 23b0e49..c92baed 100644 --- a/docs/agents/FEATURE_SPEC.md +++ b/docs/agents/FEATURE_SPEC.md @@ -14,7 +14,7 @@ ### スコープ(MVP) - stdio トランスポートの MCP サーバー -- ツール: `exstruct.extract` +- ツール: `exstruct_extract` - 抽出結果は **必ずファイル出力**(MCP 応答はパス + 軽いメタ情報) - 安全なパス制約(allowlist / deny glob) @@ -46,19 +46,19 @@ ### MCP ツール仕様(案) -#### `exstruct.extract` +#### `exstruct_extract` - 入力: `xlsx_path`, `mode`, `format`, `out_dir?`, `out_name?`, `options?` - 出力: `out_path`, `workbook_meta`, `warnings`, `engine` - 実装: 内部 API を優先、フォールバックで CLI サブプロセス -#### `exstruct.read_json_chunk`(実用化フェーズ) +#### `exstruct_read_json_chunk`(実用化フェーズ) - 入力: `out_path`, `sheet?`, `max_bytes?`, `filter?`, `cursor?` - 出力: `chunk`, `next_cursor?` - 方針: 返却サイズを抑制し、段階的に取得できること -#### `exstruct.validate_input`(実用化フェーズ) +#### `exstruct_validate_input`(実用化フェーズ) - 入力: `xlsx_path` - 出力: `is_readable`, `warnings`, `errors` diff --git a/docs/agents/TASKS.md b/docs/agents/TASKS.md index 7921365..497ec31 100644 --- a/docs/agents/TASKS.md +++ b/docs/agents/TASKS.md @@ -8,7 +8,7 @@ - [x] 依存追加: `pyproject.toml` に `exstruct[mcp]` の extras を追加 - [x] エントリポイント: `exstruct-mcp = exstruct.mcp.server:main` を定義 - [x] MCP 基盤: `src/exstruct/mcp/server.py` を追加(stdio サーバー起動) -- [x] ツール定義: `src/exstruct/mcp/tools.py` に `exstruct.extract` を実装 +- [x] ツール定義: `src/exstruct/mcp/tools.py` に `exstruct_extract` を実装 - [x] パス制約: `src/exstruct/mcp/io.py` に allowlist / deny glob を実装 - [x] 抽出実行: `src/exstruct/mcp/extract_runner.py` に内部 API 優先の実行層を実装 - [x] 出力モデル: Pydantic で入出力モデルを定義(mypy strict / Ruff 遵守) @@ -17,8 +17,8 @@ ## MCPサーバー(実用化) -- [x] `exstruct.read_json_chunk` を追加(大容量 JSON 対応) -- [x] `exstruct.validate_input` を追加(事前検証) +- [x] `exstruct_read_json_chunk` を追加(大容量 JSON 対応) +- [x] `exstruct_validate_input` を追加(事前検証) - [x] `--on-conflict` の出力衝突ポリシー実装 - [x] Windows/非Windows の読み取り差分を明文化 - [x] 最低限のテスト追加(パス制約 / 入出力モデル / 例外) diff --git a/docs/mcp.md b/docs/mcp.md index cdd14d2..7bd23ab 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -32,14 +32,14 @@ exstruct-mcp --root C:\\data --log-file C:\\logs\\exstruct-mcp.log --on-conflict ## Tools -- `exstruct.extract` -- `exstruct.read_json_chunk` -- `exstruct.validate_input` +- `exstruct_extract` +- `exstruct_read_json_chunk` +- `exstruct_validate_input` ## Basic flow -1. Call `exstruct.extract` to generate the output JSON file -2. Use `exstruct.read_json_chunk` to read only the parts you need +1. Call `exstruct_extract` to generate the output JSON file +2. Use `exstruct_read_json_chunk` to read only the parts you need ## AI agent configuration examples diff --git a/docs/release-notes/v0.4.0.md b/docs/release-notes/v0.4.0.md new file mode 100644 index 0000000..a4938cf --- /dev/null +++ b/docs/release-notes/v0.4.0.md @@ -0,0 +1,26 @@ +# v0.4.0 Release Notes + +This release introduces MCP server support for ExStruct, enabling agent tool +integration with a stdio server, plus new extraction utilities, policies, and +expanded tests and documentation. + +## Highlights + +- Added MCP stdio server (`exstruct-mcp`) with tools: + - `exstruct_extract` + - `exstruct_read_json_chunk` (cursor + filters for large outputs) + - `exstruct_validate_input` (pre-checks for files and COM availability) +- Introduced MCP path policy (allowlist + deny globs), output conflict handling + (`--on-conflict`), and optional warmup for latency reduction. +- Added typed extraction options for MCP runs (pretty/indent and side outputs), + and clarified file validation errors. +- Improved openpyxl border scanning (deferred column shrink) for late-table + detection; added backend selection for border clustering. +- Documentation updates for MCP setup and agent configuration (site nav and + README updates), plus expanded tests and CI dependency installation. + +## Notes + +- MCP dependencies are optional: install with `pip install exstruct[mcp]`. +- `read_json_chunk` enforces file paths and chunk size limits; use `sheet`/ + `filter` when outputs are large. diff --git a/mkdocs.yml b/mkdocs.yml index 6ecf76b..feb386f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -28,6 +28,7 @@ nav: - MCP Server: mcp.md - Concept / Why ExStruct?: concept.md - Release Notes: + - v0.4.0: release-notes/v0.4.0.md - v0.3.7: release-notes/v0.3.7.md - v0.3.6: release-notes/v0.3.6.md - v0.3.5: release-notes/v0.3.5.md diff --git a/precommit.log b/precommit.log new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index cfe8053..0504343 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "exstruct" -version = "0.3.7" +version = "0.4.1" description = "Excel to structured JSON (tables, shapes, charts) for LLM/RAG pipelines" readme = "README.md" license = { file = "LICENSE" } @@ -43,11 +43,15 @@ all = [ "pypdfium2>=5.1.0", "Pillow>=12.0.0", "mcp>=1.6.0,<2.0.0", + "httpx>=0.27,<1.0", ] yaml = ["pyyaml>=6.0.3"] toon = ["python-toon>=0.1.3"] render = ["pypdfium2>=5.1.0", "Pillow>=12.0.0"] -mcp = ["mcp>=1.6.0,<2.0.0"] +mcp = [ + "mcp>=1.6.0,<2.0.0", + "httpx>=0.27,<1.0", +] [project.scripts] exstruct = "exstruct.cli.main:main" diff --git a/src/exstruct/mcp/server.py b/src/exstruct/mcp/server.py index 35ce12b..5cede48 100644 --- a/src/exstruct/mcp/server.py +++ b/src/exstruct/mcp/server.py @@ -236,7 +236,7 @@ async def _extract_tool( # pylint: disable=redefined-builtin result = cast(ExtractToolOutput, await anyio.to_thread.run_sync(work)) return result - tool = app.tool(name="exstruct.extract") + tool = app.tool(name="exstruct_extract") tool(_extract_tool) async def _read_json_chunk_tool( # pylint: disable=redefined-builtin @@ -273,7 +273,7 @@ async def _read_json_chunk_tool( # pylint: disable=redefined-builtin result = cast(ReadJsonChunkToolOutput, await anyio.to_thread.run_sync(work)) return result - chunk_tool = app.tool(name="exstruct.read_json_chunk") + chunk_tool = app.tool(name="exstruct_read_json_chunk") chunk_tool(_read_json_chunk_tool) async def _validate_input_tool(xlsx_path: str) -> ValidateInputToolOutput: @@ -294,7 +294,7 @@ async def _validate_input_tool(xlsx_path: str) -> ValidateInputToolOutput: result = cast(ValidateInputToolOutput, await anyio.to_thread.run_sync(work)) return result - validate_tool = app.tool(name="exstruct.validate_input") + validate_tool = app.tool(name="exstruct_validate_input") validate_tool(_validate_input_tool) diff --git a/tests/mcp/test_server.py b/tests/mcp/test_server.py index 4391ada..b90131f 100644 --- a/tests/mcp/test_server.py +++ b/tests/mcp/test_server.py @@ -138,10 +138,10 @@ async def fake_run_sync(func: Callable[[], object]) -> object: server._register_tools(app, policy, default_on_conflict="rename") - extract_tool = cast(Callable[..., Awaitable[object]], app.tools["exstruct.extract"]) + extract_tool = cast(Callable[..., Awaitable[object]], app.tools["exstruct_extract"]) anyio.run(_call_async, extract_tool, {"xlsx_path": "in.xlsx"}) read_chunk_tool = cast( - Callable[..., Awaitable[object]], app.tools["exstruct.read_json_chunk"] + Callable[..., Awaitable[object]], app.tools["exstruct_read_json_chunk"] ) anyio.run( _call_async, @@ -149,7 +149,7 @@ async def fake_run_sync(func: Callable[[], object]) -> object: {"out_path": "out.json", "filter": {"rows": [1, 2]}}, ) validate_tool = cast( - Callable[..., Awaitable[object]], app.tools["exstruct.validate_input"] + Callable[..., Awaitable[object]], app.tools["exstruct_validate_input"] ) anyio.run(_call_async, validate_tool, {"xlsx_path": "in.xlsx"}) diff --git a/uv.lock b/uv.lock index f876436..56c5b3d 100644 --- a/uv.lock +++ b/uv.lock @@ -452,7 +452,7 @@ wheels = [ [[package]] name = "exstruct" -version = "0.3.7" +version = "0.4.1" source = { editable = "." } dependencies = [ { name = "numpy" }, @@ -465,6 +465,7 @@ dependencies = [ [package.optional-dependencies] all = [ + { name = "httpx" }, { name = "mcp" }, { name = "pillow" }, { name = "pypdfium2" }, @@ -472,6 +473,7 @@ all = [ { name = "pyyaml" }, ] mcp = [ + { name = "httpx" }, { name = "mcp" }, ] render = [ @@ -501,6 +503,8 @@ dev = [ [package.metadata] requires-dist = [ + { name = "httpx", marker = "extra == 'all'", specifier = ">=0.27,<1.0" }, + { name = "httpx", marker = "extra == 'mcp'", specifier = ">=0.27,<1.0" }, { name = "mcp", marker = "extra == 'all'", specifier = ">=1.6.0,<2.0.0" }, { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.6.0,<2.0.0" }, { name = "numpy", specifier = ">=2.3.5" }, From ef8e1b8c4398d0a1833f2258bfd61d8286da5aed Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Sat, 24 Jan 2026 08:56:22 +0900 Subject: [PATCH 17/18] =?UTF-8?q?=E3=83=90=E3=83=BC=E3=82=B8=E3=83=A7?= =?UTF-8?q?=E3=83=B3=E3=82=920.4.2=E3=81=AB=E6=9B=B4=E6=96=B0=E3=81=97?= =?UTF-8?q?=E3=80=81CHANGELOG=E3=81=ABMCP=E3=83=84=E3=83=BC=E3=83=AB?= =?UTF-8?q?=E5=90=8D=E3=81=AE=E5=A4=89=E6=9B=B4=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 12 ++++++++++++ pyproject.toml | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee39226..851081c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,18 @@ All notable changes to this project are documented in this file. This changelog - _No unreleased changes yet._ +## [0.4.2] - 2026-01-23 + +### Changed + +- Renamed MCP tool names to remove dots for compatibility with strict client validators (PR [#47](https://github.com/harumiWeb/exstruct/pull/47)). + +## [0.4.1] - 2026-01-23 + +### Fixed + +- Pinned `httpx<1.0` for MCP extras to prevent runtime failures with pre-release `httpx` builds (PR [#47](https://github.com/harumiWeb/exstruct/pull/47)). + ## [0.4.0] - 2026-01-23 ### Added diff --git a/pyproject.toml b/pyproject.toml index 0504343..bb79f38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "exstruct" -version = "0.4.1" +version = "0.4.2" description = "Excel to structured JSON (tables, shapes, charts) for LLM/RAG pipelines" readme = "README.md" license = { file = "LICENSE" } From b50a54d172b814eca3cd19c028c0ac0c9f53b6ac Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Sat, 24 Jan 2026 09:10:38 +0900 Subject: [PATCH 18/18] =?UTF-8?q?mcp=E4=BE=9D=E5=AD=98=E9=96=A2=E4=BF=82?= =?UTF-8?q?=E3=81=AE=E3=83=90=E3=83=BC=E3=82=B8=E3=83=A7=E3=83=B3=E3=82=92?= =?UTF-8?q?1.6.0=E3=81=8B=E3=82=891.25.0=E3=81=AB=E3=82=A2=E3=83=83?= =?UTF-8?q?=E3=83=97=E3=82=B0=E3=83=AC=E3=83=BC=E3=83=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bb79f38..6fdcbeb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,14 +42,14 @@ all = [ "python-toon>=0.1.3", "pypdfium2>=5.1.0", "Pillow>=12.0.0", - "mcp>=1.6.0,<2.0.0", + "mcp>=1.25.0,<2.0.0", "httpx>=0.27,<1.0", ] yaml = ["pyyaml>=6.0.3"] toon = ["python-toon>=0.1.3"] render = ["pypdfium2>=5.1.0", "Pillow>=12.0.0"] mcp = [ - "mcp>=1.6.0,<2.0.0", + "mcp>=1.25.0,<2.0.0", "httpx>=0.27,<1.0", ]