diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..be0ae91 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: CI Gate + +on: + pull_request: + push: + branches: + - main + workflow_dispatch: + +jobs: + build-and-test: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + cache-dependency-path: server/package-lock.json + + - name: Install dependencies + working-directory: server + run: npm ci + + - name: Build + working-directory: server + run: npm run build + + - name: Run selftest fast + if: github.event_name == 'pull_request' + working-directory: server + run: npm run selftest:fast + + - name: Run selftest core + if: github.event_name == 'push' + working-directory: server + run: npm run selftest:core + + - name: Run selftest full + if: github.event_name == 'workflow_dispatch' + working-directory: server + run: npm run selftest:full diff --git a/.gitignore b/.gitignore index ff9fecc..9d23ad2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,41 +1,84 @@ -# Dependencies +# ============================== +# Node / TypeScript +# ============================== + node_modules/ .pnp .pnp.js -# Build outputs dist/ build/ extension/dist/ *.tsbuildinfo +# ============================== +# Python +# ============================== + +__pycache__/ +*.py[cod] +*.pyo +*.pyd + +*.egg-info/ +.eggs/ + +build/ +dist/ + +# Virtual environments +.venv/ +venv/ +env/ + +# ============================== # Environment +# ============================== + .env .env.local .env.*.local +# ============================== # Logs +# ============================== + logs/ *.log npm-debug.log* yarn-debug.log* yarn-error.log* +# ============================== # IDE +# ============================== + .idea/ .vscode/ *.swp *.swo *~ +# ============================== # OS +# ============================== + .DS_Store Thumbs.db +# ============================== # Test +# ============================== + coverage/ .nyc_output/ +# ============================== # Temp +# ============================== + tmp/ -temp/ \ No newline at end of file +temp/ + +.vite-test/ +server/bench/out/ \ No newline at end of file diff --git a/README.md b/README.md index 91915dc..437811b 100644 --- a/README.md +++ b/README.md @@ -1,104 +1,358 @@ # Shadow Threads -> 在任意 LLM 网页上创建影子子线程对话,深入探索而不污染主对话上下文。 +Shadow Threads records and verifies AI workflow state. -## 🌟 特性 +It lets developers persist workflow artifacts, track state revisions, +record model or tool executions, and replay those executions +deterministically. -- **多平台支持**:ChatGPT、Claude、Gemini、通用适配 -- **自由选择**:选中任意文本片段进行追问 -- **独立上下文**:子线程有独立的对话历史,不影响主对话 -- **智能路由**:自动使用当前页面对应的 LLM 进行回答 -- **数据持久化**:PostgreSQL 存储,支持历史回顾 +## The problem -## 📁 项目结构 +AI workflows are difficult to reproduce because outputs depend on more than a final prompt. They also depend on parameters, intermediate state, tool calls, and the exact artifacts passed between steps. Once that context drifts, debugging and verification become unreliable. Shadow Threads addresses this by recording workflow state and execution boundaries as deterministic artifacts and revisions. That gives developers a stable way to inspect, transfer, and replay workflow state without relying on implicit runtime context. -``` -shadow-threads/ -├── server/ # 后端服务 (Node.js + Express + Prisma) -│ ├── src/ -│ │ ├── api/ # API 路由层 -│ │ ├── services/ # 业务逻辑层 -│ │ ├── providers/ # LLM 提供商适配层 -│ │ ├── middleware/ # 中间件(认证、日志、错误处理) -│ │ ├── utils/ # 工具函数 -│ │ └── types/ # TypeScript 类型定义 -│ ├── prisma/ # 数据库 Schema 和迁移 -│ └── Dockerfile -│ -├── extension/ # 浏览器扩展 (Chrome/Edge/Firefox) -│ ├── src/ -│ │ ├── adapters/ # 各平台 DOM 适配器 -│ │ ├── ui/ # UI 组件 -│ │ └── core/ # 核心逻辑 -│ └── manifest.json -│ -├── docs/ # 文档 -├── docker-compose.yml # Docker 编排 -└── README.md +## 30-second overview + +Shadow Threads gives you four durable objects: + +- **Artifact** - a content-addressed bundle for task state or any other workflow payload. +- **Revision** - a package-local DAG node that binds artifacts into a verifiable state snapshot. +- **Execution** - a recorded model or tool boundary with fixed inputs, outputs, status, and result hash. +- **Replay** - a verification step that checks whether a stored execution boundary still matches exactly. + +The local server exposes these objects through `/api/v1`. The Python SDK, CLI, demos, and MCP server all use that same boundary. + +## Demo + +This repository contains the core Shadow Threads system: protocol, server runtime, SDK, MCP integration, and local demos. If you want the quickest way to see the product behavior in practice, use the focused public demo repository for a deterministic AI-assisted coding workflow: https://github.com/ZetongDu/shadowthreads-demo-coding-workflow. That demo shows AI coding workflow state, revision lineage, execution replay, and replay verification in one place. + +## Install instructions + +### Prerequisites + +- Node.js 20+ recommended (`server/package.json` requires `>=18`) +- Python 3.10+ +- Docker + +### Start local infrastructure + +From the repository root: + +```bash +docker compose up -d postgres redis ``` -## 🚀 快速开始 +### Install and prepare the server -### 环境要求 +```bash +cd server +npm ci +npm run prisma:generate +npm run prisma:migrate +npm run build +``` -- Node.js >= 18 -- Docker & Docker Compose -- pnpm (推荐) 或 npm +### Install the Python SDK -### 1. 启动数据库 +From the repository root: ```bash -docker-compose up -d postgres redis +pip install -e python-sdk ``` -### 2. 启动后端 +## Run server instructions + +Start the API server in a separate terminal: ```bash cd server -pnpm install -pnpm prisma:migrate -pnpm dev +npm run start +``` + +Default local address: + +```text +http://localhost:3001 ``` -### 3. 构建扩展 +The Python SDK also respects `SHADOW_SERVER` if you want to point to a different base URL. + +## Ultra minimal Python example + +The example below records a minimal workflow state, creates a revision, records one execution, and verifies it. + +```python +from hashlib import sha256 +from shadowthreads import ArtifactReference, RevisionMetadata, ShadowClient + +package_id = "readme-first-run" +prompt_hash = sha256(b"Summarize the current workflow state.").hexdigest() +started_at = "2026-03-12T09:00:00+00:00" +finished_at = "2026-03-12T09:00:01+00:00" + +with ShadowClient(base_url="http://localhost:3001") as client: + artifact = client.capture_artifact( + schema="artifact.task.state.v1", + package_id=package_id, + payload={"task": "example", "state": "ready"}, + ) + + ref = ArtifactReference(bundle_hash=artifact.bundle_hash, role="primary_state") + + revision = client.create_revision( + package_id=package_id, + artifacts=[ref], + metadata=RevisionMetadata( + author="README example", + message="Initial task state", + created_by="python-sdk", + timestamp=started_at, + source="human", + ), + ) + + execution = client.record_execution( + package_id=package_id, + revision_hash=revision.revision_hash, + provider="local-example", + model="shadow-demo-model", + prompt_hash=prompt_hash, + parameters={"temperature": 0}, + input_artifacts=[ref], + output_artifacts=[ref], + status="success", + started_at=started_at, + finished_at=finished_at, + ) + + replay = client.replay_execution(execution.execution_id) + print(replay.verified) +``` + +## First successful run + +This is the shortest reliable path for a fresh checkout. + +1. Start Postgres and Redis: + + ```bash + docker compose up -d postgres redis + ``` + +2. Build and prepare the server: + + ```bash + cd server + npm ci + npm run prisma:generate + npm run prisma:migrate + npm run build + npm run start + ``` + +3. In another terminal, install the SDK: + + ```bash + pip install -e python-sdk + ``` + +4. Save the example above as `first_run.py` and run: + + ```bash + python first_run.py + ``` + +A successful run prints `True` from `replay.verified`. If you expand the example to print IDs and hashes, you should see: + +- a 64-character `artifact_bundle_hash` +- a 64-character `revision_hash` +- a UUID `execution_id` +- a 64-character `result_hash` + +That confirms a full record plus replay cycle against the local API. + +## Core concepts + +### Artifact + +An artifact is the smallest durable unit in Shadow Threads. It stores: + +- `schema` +- `identity` (`packageId`, optional `revisionId`, optional `revisionHash`) +- `payload` +- optional `references` + +Artifacts are content-addressed. The server derives `bundleHash` from canonicalized content. + +### Revision + +A revision binds one or more artifacts into a package-local DAG node. A revision answers: what state did this package have at this point in time? Revisions are hashed deterministically and can only inherit from parents in the same package. + +### Execution + +An execution records a model or tool boundary for a specific revision. It stores: + +- `provider` +- `model` +- `promptHash` +- `parameters` +- `inputArtifacts` +- `outputArtifacts` +- `status` +- `resultHash` + +This makes the execution boundary inspectable and replayable. + +### Replay + +Replay verifies an existing execution record. It checks that: + +- `promptHash` still matches +- `parameters` still match +- `inputArtifacts` still match +- recomputed `resultHash` still matches the recorded execution + +Replay is verification, not a best-effort rerun. + +## Capabilities enabled by Shadow Threads + +### Deterministic workflow replay + +Execution records allow deterministic replay because replay uses the recorded execution boundary instead of reconstructing it from memory. That boundary includes `promptHash`, `parameters`, `inputArtifacts`, `outputArtifacts`, and `resultHash`. Replay verifies that the same execution inputs still produce the same recorded result boundary. This makes mismatches explicit and auditable rather than implicit runtime drift. + +### Workflow state portability + +Revisions represent snapshots of workflow state, and artifacts contain the payload that produced that state. Because artifacts are content-addressed and revisions form a deterministic DAG, workflow state can be exported and reconstructed elsewhere without redefining identity. This supports continuing reasoning with another model, moving workflow state between environments, and resuming work from a saved snapshot. The portable unit is the recorded artifact and revision graph, not hidden local process memory. + +### Agent execution audit + +Execution records provide an audit trail for agent workflows. Developers can inspect prompt hashes, parameters, and artifact boundaries to understand how an agent arrived at a state. This is useful for debugging complex agent workflows, tracing tool calls, and auditing reasoning chains across multi-step runs. The audit surface stays tied to recorded boundaries rather than informal logs or recollection. + +## Metadata reference + +### RevisionMetadata + +`RevisionMetadata` requires these fields: + +| Field | Required | Meaning | +| --- | --- | --- | +| `author` | yes | Human-readable author label for the revision | +| `message` | yes | Short description of why the revision exists | +| `created_by` / `createdBy` | yes | Concrete actor or tool that created the revision | +| `timestamp` | yes | ISO 8601 timestamp with timezone offset | +| `source` | yes | One of `human`, `ai`, `migration`, `system` | +| `tags` | optional | List of short labels; defaults to `[]` | + +In the Python SDK, the dataclass field is `created_by`. The SDK serializes it to the API field `createdBy`. + +### prompt_hash + +`prompt_hash` is the deterministic identity of the prompt boundary used for an execution. Expected format: + +- 64 lowercase hexadecimal characters +- often `sha256(prompt_bytes).hexdigest()` + +Purpose: + +- binds the execution record to the exact prompt boundary that was used +- allows replay to reject non-deterministic changes before comparing outputs +- prevents silent drift between recorded execution inputs and later verification + +The system does not enforce how the hash is generated. It only requires that the same prompt produces the same hash. + +### Timestamp format + +`timestamp`, `started_at`, and `finished_at` must be ISO 8601 strings with an explicit timezone offset. + +Accepted examples: + +```text +2026-03-12T09:00:00+00:00 +2026-03-12T09:00:00Z +``` + +Do not omit the timezone. + +## When should you use Shadow Threads? + +Use Shadow Threads when you need: + +- reproducible AI workflows +- auditability of model and tool executions +- debugging for complex agent workflows +- deterministic replay of recorded task state +- migration of verifiable workflow history between environments +- portability of workflow state across models or environments + +## Demo references + +Two repository demos exercise the same runtime from different angles: + +- `demo/demoA-task-state` - task-state capture, revision history, and replay verification +- `demo/demoB-workflow-debug` - workflow debugging with execution replay and inspection + +See `demo/README.md` for run commands. + +## MCP support + +Shadow Threads includes an MCP server in `mcp/`. + +Dependency chain: + +```text +MCP client -> Shadow Threads MCP -> Python SDK -> Shadow Threads server +``` + +Install and run: ```bash -cd extension -pnpm install -pnpm build +pip install -e python-sdk +pip install -e mcp +shadowthreads-mcp ``` -### 4. 加载扩展 +Exposed tools include artifact capture, revision creation, execution recording, and execution replay. See `mcp/README.md` for details. -在浏览器中加载 `extension` 目录作为开发扩展。 +## Architecture diagram -## 📖 文档 +```mermaid +flowchart LR + A[Python SDK / CLI / MCP] --> B[/api/v1 Local HTTP API] + B --> C[Artifact Store] + B --> D[Revision DAG] + B --> E[Execution Records] + B --> F[Migration / Closure] + C --> G[(PostgreSQL)] + D --> G + E --> G + F --> G + B --> H[(Redis)] +``` + +Redis is used for runtime coordination and execution tracking. + +## Use cases -- [API 文档](docs/API.md) -- [架构设计](docs/ARCHITECTURE.md) -- [开发指南](docs/DEVELOPMENT.md) -- [部署指南](docs/DEPLOYMENT.md) +- Record AI workflow state as immutable, content-addressed artifacts. +- Build revision history for package-local task progress. +- Audit model and tool executions with deterministic replay checks. +- Export and import migration packages with closure verification. +- Expose the same workflow boundary to local tools, SDK clients, CLI users, and MCP-compatible agents. -## 🛠 技术栈 +## Selftest Matrix -**后端** -- Node.js + TypeScript -- Express.js -- Prisma ORM -- PostgreSQL -- Redis +Shadow Threads selftests are organized into three execution tiers: -**扩展** -- TypeScript -- esbuild -- Manifest V3 +- `selftest:fast` - fast checks for active development and small changes. +- `selftest:core` - core runtime regression checks before merging logic changes. +- `selftest:full` - full regression checks, including HTTP E2E flows, before milestones or release candidates. -**LLM 支持** -- OpenAI (GPT-4, GPT-3.5) -- Anthropic (Claude) -- Google (Gemini) -- 更多... +Example commands: + +```bash +npm run build +npm run selftest:fast +npm run selftest:core +npm run selftest:full +``` -## 📄 License -MIT diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 0000000..c440bc1 --- /dev/null +++ b/cli/README.md @@ -0,0 +1,58 @@ +# Shadow Threads CLI + +Shadow Threads CLI is a small HTTP client for the Shadow Threads server runtime. + +## Commands + +```bash +shadow init +shadow capture +shadow inspect revision +shadow inspect artifact --package +shadow inspect execution +shadow replay +shadow migrate export +``` + +## Configuration + +Run `shadow init` in your working directory to create: + +- `.shadow/` +- `shadow.config.json` + +Default config: + +```json +{ + "server": "http://localhost:3000", + "workspace": ".shadow" +} +``` + +## Capture Input + +`shadow capture ` expects a full artifact bundle request body for `POST /api/v1/artifacts`. + +Example: + +```json +{ + "schema": "artifact.task.state.v1", + "identity": { + "packageId": "package-123", + "revisionId": null, + "revisionHash": null + }, + "payload": { + "name": "example" + }, + "references": [] +} +``` + +## Notes + +- `shadow inspect artifact` requires `--package` because the current server API resolves artifacts by `packageId + bundleHash`. +- `shadow replay ` first loads the execution record, then reconstructs the replay body required by the server. +- `shadow migrate export ` copies the returned server zip path to `migration.zip`. This assumes the CLI can access the same filesystem as the server. diff --git a/cli/package-lock.json b/cli/package-lock.json new file mode 100644 index 0000000..c166ae0 --- /dev/null +++ b/cli/package-lock.json @@ -0,0 +1,543 @@ +{ + "name": "shadow-threads-cli", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "shadow-threads-cli", + "version": "0.1.0", + "dependencies": { + "axios": "^1.13.2", + "commander": "^12.1.0", + "zod": "^3.23.8" + }, + "bin": { + "shadow": "dist/index.js" + }, + "devDependencies": { + "@types/node": "^22.8.1", + "ts-node": "^10.9.2", + "typescript": "^5.6.3" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "dev": true, + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + }, + "node_modules/@tsconfig/node10": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.12.tgz", + "integrity": "sha512-UCYBaeFvM11aU2y3YPZ//O5Rhj+xKyzy7mvcIoAjASbigy8mHMryP5cK7dgjlz2hWxh1g5pLw084E0a/wlUSFQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node16": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", + "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "22.19.15", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.15.tgz", + "integrity": "sha512-F0R/h2+dsy5wJAUe3tAU6oqa2qbWY5TpNfL/RGmo1y38hiyO1w3x2jPtt76wmuaJI4DQnOBu21cNXQ2STIUUWg==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/acorn": { + "version": "8.16.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", + "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", + "dev": true, + "license": "MIT", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.3.5", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.5.tgz", + "integrity": "sha512-HEHNfbars9v4pgpW6SO1KSPkfoS0xVOM/9UzkJltjlsHZmJasxg8aXkuZa7SMf8vKGIBhpUsPluQSqhJFCqebw==", + "dev": true, + "license": "MIT", + "dependencies": { + "acorn": "^8.11.0" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/arg": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", + "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==", + "dev": true, + "license": "MIT" + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.13.6", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.6.tgz", + "integrity": "sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/commander": { + "version": "12.1.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz", + "integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/diff": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.4.tgz", + "integrity": "sha512-X07nttJQkwkfKfvTPG/KSnE2OMdcUCao6+eXF3wmnIQRn2aPAHH3VxDbDOdegkd6JbPsXqShpvEOHfAT+nCNwQ==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/make-error": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", + "dev": true, + "license": "ISC" + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + }, + "node_modules/ts-node": { + "version": "10.9.2", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", + "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", + "arg": "^4.1.0", + "create-require": "^1.1.0", + "diff": "^4.0.1", + "make-error": "^1.1.1", + "v8-compile-cache-lib": "^3.0.1", + "yn": "3.1.1" + }, + "bin": { + "ts-node": "dist/bin.js", + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" + }, + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", + "dev": true, + "license": "MIT" + }, + "node_modules/yn": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", + "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + } + } +} diff --git a/cli/package.json b/cli/package.json new file mode 100644 index 0000000..68d1323 --- /dev/null +++ b/cli/package.json @@ -0,0 +1,25 @@ +{ + "name": "shadow-threads-cli", + "version": "0.1.0", + "description": "CLI for the Shadow Threads HTTP API", + "main": "dist/index.js", + "bin": { + "shadow": "dist/index.js" + }, + "scripts": { + "build": "tsc -p tsconfig.json" + }, + "dependencies": { + "axios": "^1.13.2", + "commander": "^12.1.0", + "zod": "^3.23.8" + }, + "devDependencies": { + "@types/node": "^22.8.1", + "ts-node": "^10.9.2", + "typescript": "^5.6.3" + }, + "engines": { + "node": ">=18.0.0" + } +} diff --git a/cli/src/client/api-client.ts b/cli/src/client/api-client.ts new file mode 100644 index 0000000..5189109 --- /dev/null +++ b/cli/src/client/api-client.ts @@ -0,0 +1,222 @@ +import axios, { AxiosError, type AxiosInstance, type AxiosRequestConfig } from 'axios'; +import { ZodError, z } from 'zod'; +import { CliError, EXIT_CODE_CLIENT_ERROR, EXIT_CODE_NETWORK_ERROR, EXIT_CODE_SERVER_ERROR } from '../utils/errors'; + +const hash64Schema = z.string().regex(/^[0-9a-f]{64}$/); +const artifactReferenceSchema = z.object({ + bundleHash: hash64Schema, + role: z.string().min(1), +}); + +const executionStatusSchema = z.enum(['success', 'failure']); + +const apiSuccessEnvelopeSchema = z.object({ + ok: z.literal(true), + data: z.unknown(), +}); + +const apiErrorEnvelopeSchema = z.object({ + ok: z.literal(false), + error: z.object({ + code: z.string().min(1), + message: z.string().min(1), + }), +}); + +export const artifactCreateBodySchema = z.object({ + schema: z.string().min(1), + identity: z.object({ + packageId: z.string().min(1), + revisionId: z.union([z.string().min(1), z.null()]).optional().default(null), + revisionHash: z.union([hash64Schema, z.null()]).optional().default(null), + }), + payload: z.unknown(), + references: z.array(artifactReferenceSchema).optional().default([]), +}); + +const artifactCreateResponseSchema = z.object({ + id: z.string().min(1), + bundleHash: hash64Schema, + createdAt: z.string().min(1), +}); + +const executionRecordSchema = z.object({ + executionId: z.string().uuid(), + packageId: z.string().min(1), + revisionHash: hash64Schema, + provider: z.string().min(1), + model: z.string().min(1), + promptHash: hash64Schema, + parameters: z.unknown(), + inputArtifacts: z.array(artifactReferenceSchema), + outputArtifacts: z.array(artifactReferenceSchema), + resultHash: hash64Schema, + status: executionStatusSchema, + startedAt: z.string().min(1), + finishedAt: z.string().min(1), + createdAt: z.string().min(1), +}); + +const replayExecutionBodySchema = z.object({ + promptHash: hash64Schema, + parameters: z.unknown(), + inputArtifacts: z.array(artifactReferenceSchema), + outputArtifacts: z.array(artifactReferenceSchema).optional(), + status: executionStatusSchema.optional(), +}); + +const replayExecutionResponseSchema = z.object({ + executionId: z.string().uuid(), + verified: z.boolean(), + resultHash: hash64Schema, +}); + +const migrationExportResponseSchema = z.object({ + zipPath: z.string().min(1), + manifest: z.object({ + rootRevisionHash: hash64Schema, + artifactCount: z.number().int().nonnegative(), + revisionCount: z.number().int().nonnegative(), + }), +}); + +export type ArtifactCreateBody = z.infer; +export type ExecutionRecord = z.infer; +export type ReplayExecutionBody = z.infer; +export type ReplayExecutionResult = z.infer; +export type MigrationExportResult = z.infer; + +export class ShadowClient { + private readonly http: AxiosInstance; + private readonly baseURL: string; + + constructor(baseURL: string) { + this.baseURL = baseURL; + this.http = axios.create({ + baseURL, + timeout: 30_000, + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + }, + }); + } + + async createArtifact(payload: ArtifactCreateBody): Promise> { + const body = artifactCreateBodySchema.parse(payload); + return this.request( + { + method: 'POST', + url: '/api/v1/artifacts', + data: body, + }, + artifactCreateResponseSchema, + ); + } + + async getRevision(id: string): Promise { + return this.request( + { + method: 'GET', + url: `/api/v1/revisions/${encodeURIComponent(id)}`, + }, + z.unknown(), + ); + } + + async getArtifact(hash: string, packageId: string): Promise { + return this.request( + { + method: 'GET', + url: `/api/v1/artifacts/${encodeURIComponent(packageId)}/${encodeURIComponent(hash)}`, + }, + z.unknown(), + ); + } + + async getExecution(id: string): Promise { + return this.request( + { + method: 'GET', + url: `/api/v1/executions/${encodeURIComponent(id)}`, + }, + executionRecordSchema, + ); + } + + async replayExecution(id: string, payload: ReplayExecutionBody): Promise { + const body = replayExecutionBodySchema.parse(payload); + return this.request( + { + method: 'POST', + url: `/api/v1/executions/${encodeURIComponent(id)}/replay`, + data: body, + }, + replayExecutionResponseSchema, + ); + } + + async exportMigration(revisionId: string): Promise { + return this.request( + { + method: 'POST', + url: '/api/v1/migration/export', + data: { rootRevisionHash: revisionId }, + }, + migrationExportResponseSchema, + ); + } + + private async request(config: AxiosRequestConfig, schema: z.ZodType): Promise { + try { + const response = await this.http.request(config); + const envelope = apiSuccessEnvelopeSchema.parse(response.data); + return schema.parse(envelope.data); + } catch (error) { + throw this.normalizeError(error); + } + } + + private normalizeError(error: unknown): Error { + if (error instanceof CliError) { + return error; + } + + if (error instanceof ZodError) { + return new CliError(`Invalid server response from ${this.baseURL}: ${error.message}`, EXIT_CODE_SERVER_ERROR); + } + + if (axios.isAxiosError(error)) { + return this.normalizeAxiosError(error); + } + + if (error instanceof Error) { + return error; + } + + return new CliError('Unknown client error'); + } + + private normalizeAxiosError(error: AxiosError): CliError { + if (error.response) { + const parsed = apiErrorEnvelopeSchema.safeParse(error.response.data); + const exitCode = error.response.status >= 500 ? EXIT_CODE_SERVER_ERROR : EXIT_CODE_CLIENT_ERROR; + + if (parsed.success) { + return new CliError(parsed.data.error.message, exitCode); + } + + if (error.response.status >= 500) { + return new CliError(`Server request failed with HTTP ${error.response.status}`, EXIT_CODE_SERVER_ERROR); + } + + return new CliError(`Request failed with HTTP ${error.response.status}`, EXIT_CODE_CLIENT_ERROR); + } + + if (error.request) { + return new CliError(`Unable to reach server at ${this.baseURL}`, EXIT_CODE_NETWORK_ERROR); + } + + return new CliError(error.message, EXIT_CODE_NETWORK_ERROR); + } +} diff --git a/cli/src/commands/capture.ts b/cli/src/commands/capture.ts new file mode 100644 index 0000000..7066146 --- /dev/null +++ b/cli/src/commands/capture.ts @@ -0,0 +1,25 @@ +import path from 'node:path'; +import type { Command } from 'commander'; +import { ShadowClient, artifactCreateBodySchema } from '../client/api-client'; +import { resolveServerURL } from '../config/config'; +import { readJsonFile } from '../utils/fs'; + +export function registerCaptureCommand(program: Command): void { + program + .command('capture') + .description('Capture a full artifact bundle request body from a JSON file') + .argument('', 'Path to a JSON file containing the full artifact request body') + .action(async (file: string) => { + const serverURL = await resolveServerURL({ + flagValue: program.opts<{ server?: string }>().server, + }); + const requestPath = path.resolve(process.cwd(), file); + const raw = await readJsonFile(requestPath); + const payload = artifactCreateBodySchema.parse(raw); + const client = new ShadowClient(serverURL); + const result = await client.createArtifact(payload); + + console.log('Artifact stored'); + console.log(`bundleHash: ${result.bundleHash}`); + }); +} diff --git a/cli/src/commands/init.ts b/cli/src/commands/init.ts new file mode 100644 index 0000000..3a388b2 --- /dev/null +++ b/cli/src/commands/init.ts @@ -0,0 +1,23 @@ +import type { Command } from 'commander'; +import { initializeConfig } from '../config/config'; + +export function registerInitCommand(program: Command): void { + program + .command('init') + .description('Create .shadow and shadow.config.json in the current directory') + .action(async () => { + const result = await initializeConfig(); + + console.log('Shadow workspace initialized'); + console.log(`config: ${result.configPath}`); + console.log(`workspace: ${result.workspacePath}`); + + if (!result.configCreated) { + console.log('configStatus: existing'); + } + + if (!result.workspaceCreated) { + console.log('workspaceStatus: existing'); + } + }); +} diff --git a/cli/src/commands/inspect.ts b/cli/src/commands/inspect.ts new file mode 100644 index 0000000..05f93bb --- /dev/null +++ b/cli/src/commands/inspect.ts @@ -0,0 +1,48 @@ +import type { Command } from 'commander'; +import { ShadowClient } from '../client/api-client'; +import { resolveServerURL } from '../config/config'; +import { printJson } from '../utils/output'; + +export function registerInspectCommand(program: Command): void { + const inspect = program.command('inspect').description('Inspect revisions, artifacts, and executions'); + + inspect + .command('revision') + .description('Fetch a revision by hash') + .argument('', 'Revision hash') + .action(async (id: string) => { + const serverURL = await resolveServerURL({ + flagValue: program.opts<{ server?: string }>().server, + }); + const client = new ShadowClient(serverURL); + const result = await client.getRevision(id); + printJson(result); + }); + + inspect + .command('artifact') + .description('Fetch an artifact by bundle hash and package id') + .argument('', 'Artifact bundle hash') + .requiredOption('--package ', 'Package id required by the current server API') + .action(async (hash: string, options: { package: string }) => { + const serverURL = await resolveServerURL({ + flagValue: program.opts<{ server?: string }>().server, + }); + const client = new ShadowClient(serverURL); + const result = await client.getArtifact(hash, options.package); + printJson(result); + }); + + inspect + .command('execution') + .description('Fetch an execution by id') + .argument('', 'Execution id') + .action(async (id: string) => { + const serverURL = await resolveServerURL({ + flagValue: program.opts<{ server?: string }>().server, + }); + const client = new ShadowClient(serverURL); + const result = await client.getExecution(id); + printJson(result); + }); +} diff --git a/cli/src/commands/migrate.ts b/cli/src/commands/migrate.ts new file mode 100644 index 0000000..f1e7f01 --- /dev/null +++ b/cli/src/commands/migrate.ts @@ -0,0 +1,30 @@ +import path from 'node:path'; +import type { Command } from 'commander'; +import { ShadowClient } from '../client/api-client'; +import { resolveServerURL } from '../config/config'; +import { copyFileTo } from '../utils/fs'; + +export function registerMigrateCommand(program: Command): void { + const migrate = program.command('migrate').description('Migration package utilities'); + + migrate + .command('export') + .description('Export a migration package for the provided root revision hash') + .argument('', 'Root revision hash') + .action(async (revisionId: string) => { + const serverURL = await resolveServerURL({ + flagValue: program.opts<{ server?: string }>().server, + }); + const client = new ShadowClient(serverURL); + const result = await client.exportMigration(revisionId); + const destination = path.resolve(process.cwd(), 'migration.zip'); + + await copyFileTo(result.zipPath, destination); + + console.log('Migration exported'); + console.log(`file: ${destination}`); + console.log(`rootRevisionHash: ${result.manifest.rootRevisionHash}`); + console.log(`artifactCount: ${result.manifest.artifactCount}`); + console.log(`revisionCount: ${result.manifest.revisionCount}`); + }); +} diff --git a/cli/src/commands/replay.ts b/cli/src/commands/replay.ts new file mode 100644 index 0000000..b441d20 --- /dev/null +++ b/cli/src/commands/replay.ts @@ -0,0 +1,29 @@ +import type { Command } from 'commander'; +import { ShadowClient } from '../client/api-client'; +import { resolveServerURL } from '../config/config'; + +export function registerReplayCommand(program: Command): void { + program + .command('replay') + .description('Replay an execution by reconstructing the required replay body from the stored execution record') + .argument('', 'Execution id') + .action(async (executionId: string) => { + const serverURL = await resolveServerURL({ + flagValue: program.opts<{ server?: string }>().server, + }); + const client = new ShadowClient(serverURL); + const execution = await client.getExecution(executionId); + const result = await client.replayExecution(executionId, { + promptHash: execution.promptHash, + parameters: execution.parameters, + inputArtifacts: execution.inputArtifacts, + outputArtifacts: execution.outputArtifacts, + status: execution.status, + }); + + console.log('Replay complete'); + console.log(`executionId: ${result.executionId}`); + console.log(`verified: ${result.verified}`); + console.log(`resultHash: ${result.resultHash}`); + }); +} diff --git a/cli/src/config/config.ts b/cli/src/config/config.ts new file mode 100644 index 0000000..4566e18 --- /dev/null +++ b/cli/src/config/config.ts @@ -0,0 +1,118 @@ +import path from 'node:path'; +import { existsSync } from 'node:fs'; +import { z } from 'zod'; +import { CliError } from '../utils/errors'; +import { ensureDirectory, readJsonFile, writeJsonFile } from '../utils/fs'; + +export const CONFIG_FILE_NAME = 'shadow.config.json'; + +export const defaultConfig = { + server: 'http://localhost:3000', + workspace: '.shadow', +} as const; + +const shadowConfigSchema = z.object({ + server: z.string().min(1).default(defaultConfig.server), + workspace: z.string().min(1).default(defaultConfig.workspace), +}); + +export type ShadowConfig = z.infer; + +export type LoadedConfig = { + config: ShadowConfig; + configPath: string; + workspacePath: string; +}; + +export type InitializedConfig = LoadedConfig & { + configCreated: boolean; + workspaceCreated: boolean; +}; + +export function resolveConfigPath(cwd = process.cwd()): string { + return path.join(cwd, CONFIG_FILE_NAME); +} + +function normalizeOptionalString(value: string | undefined | null): string | null { + if (typeof value !== 'string') { + return null; + } + + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : null; +} + +async function loadConfigIfPresent(cwd = process.cwd()): Promise { + const configPath = resolveConfigPath(cwd); + if (!existsSync(configPath)) { + return null; + } + + const raw = await readJsonFile(configPath); + const parsed = shadowConfigSchema.safeParse(raw); + if (!parsed.success) { + throw new CliError(`Invalid ${CONFIG_FILE_NAME}: ${parsed.error.message}`); + } + + return { + config: parsed.data, + configPath, + workspacePath: path.resolve(cwd, parsed.data.workspace), + }; +} + +export async function loadConfig(cwd = process.cwd()): Promise { + const loaded = await loadConfigIfPresent(cwd); + if (!loaded) { + throw new CliError(`Missing ${CONFIG_FILE_NAME} in ${cwd}. Run "shadow init" first.`); + } + + return loaded; +} + +export async function resolveServerURL(options?: { flagValue?: string; cwd?: string }): Promise { + const cwd = options?.cwd ?? process.cwd(); + const fromFlag = normalizeOptionalString(options?.flagValue); + if (fromFlag) { + return fromFlag; + } + + const fromEnv = normalizeOptionalString(process.env.SHADOW_SERVER); + if (fromEnv) { + return fromEnv; + } + + const loaded = await loadConfigIfPresent(cwd); + if (loaded) { + return loaded.config.server; + } + + return defaultConfig.server; +} + +export async function initializeConfig(cwd = process.cwd()): Promise { + const configPath = resolveConfigPath(cwd); + let configCreated = false; + let loaded: LoadedConfig; + + if (existsSync(configPath)) { + loaded = await loadConfig(cwd); + } else { + await writeJsonFile(configPath, defaultConfig); + configCreated = true; + loaded = { + config: defaultConfig, + configPath, + workspacePath: path.resolve(cwd, defaultConfig.workspace), + }; + } + + const workspaceAlreadyExists = existsSync(loaded.workspacePath); + await ensureDirectory(loaded.workspacePath); + + return { + ...loaded, + configCreated, + workspaceCreated: !workspaceAlreadyExists, + }; +} diff --git a/cli/src/index.ts b/cli/src/index.ts new file mode 100644 index 0000000..d99add2 --- /dev/null +++ b/cli/src/index.ts @@ -0,0 +1,35 @@ +#!/usr/bin/env node + +import { Command } from 'commander'; +import { registerCaptureCommand } from './commands/capture'; +import { registerInitCommand } from './commands/init'; +import { registerInspectCommand } from './commands/inspect'; +import { registerMigrateCommand } from './commands/migrate'; +import { registerReplayCommand } from './commands/replay'; +import { EXIT_CODE_SUCCESS, getExitCode, toErrorMessage } from './utils/errors'; + +async function main(): Promise { + const program = new Command(); + + program + .name('shadow') + .description('Shadow Threads CLI') + .version('0.1.0') + .option('--server ', 'Shadow Threads server URL') + .showHelpAfterError() + .showSuggestionAfterError(); + + registerInitCommand(program); + registerCaptureCommand(program); + registerInspectCommand(program); + registerReplayCommand(program); + registerMigrateCommand(program); + + await program.parseAsync(process.argv); + process.exit(EXIT_CODE_SUCCESS); +} + +void main().catch((error: unknown) => { + console.error(toErrorMessage(error)); + process.exit(getExitCode(error)); +}); diff --git a/cli/src/utils/errors.ts b/cli/src/utils/errors.ts new file mode 100644 index 0000000..d162549 --- /dev/null +++ b/cli/src/utils/errors.ts @@ -0,0 +1,32 @@ +export const EXIT_CODE_SUCCESS = 0; +export const EXIT_CODE_CLIENT_ERROR = 1; +export const EXIT_CODE_SERVER_ERROR = 2; +export const EXIT_CODE_NETWORK_ERROR = 3; + +export class CliError extends Error { + readonly exitCode: number; + + constructor(message: string, exitCode = EXIT_CODE_CLIENT_ERROR) { + super(message); + this.name = 'CliError'; + this.exitCode = exitCode; + } +} + +export function getExitCode(error: unknown): number { + if (error instanceof CliError) { + return error.exitCode; + } + + return 1; +} + +export function toErrorMessage(error: unknown): string { + const prefix = 'Error: '; + + if (error instanceof Error) { + return error.message.startsWith(prefix) ? error.message : `${prefix}${error.message}`; + } + + return `${prefix}Unknown error`; +} diff --git a/cli/src/utils/fs.ts b/cli/src/utils/fs.ts new file mode 100644 index 0000000..58fb2c3 --- /dev/null +++ b/cli/src/utils/fs.ts @@ -0,0 +1,47 @@ +import path from 'node:path'; +import { copyFile, mkdir, readFile, writeFile } from 'node:fs/promises'; +import { existsSync } from 'node:fs'; +import { ZodError } from 'zod'; +import { CliError } from './errors'; + +export async function ensureDirectory(directoryPath: string): Promise { + await mkdir(directoryPath, { recursive: true }); +} + +export async function readJsonFile(filePath: string): Promise { + try { + const contents = await readFile(filePath, 'utf8'); + return JSON.parse(contents) as unknown; + } catch (error) { + if (error instanceof SyntaxError) { + throw new CliError(`Invalid JSON in ${filePath}`); + } + + if (error instanceof Error && 'code' in error && error.code === 'ENOENT') { + throw new CliError(`File not found: ${filePath}`); + } + + if (error instanceof ZodError) { + throw new CliError(error.message); + } + + throw error; + } +} + +export async function writeJsonFile(filePath: string, value: unknown): Promise { + await ensureDirectory(path.dirname(filePath)); + const text = `${JSON.stringify(value, null, 2)}\n`; + await writeFile(filePath, text, 'utf8'); +} + +export async function copyFileTo(sourcePath: string, destinationPath: string): Promise { + if (!existsSync(sourcePath)) { + throw new CliError( + `Server returned zipPath "${sourcePath}", but it is not accessible locally. This command only works when the CLI can access the server filesystem.`, + ); + } + + await ensureDirectory(path.dirname(destinationPath)); + await copyFile(sourcePath, destinationPath); +} diff --git a/cli/src/utils/output.ts b/cli/src/utils/output.ts new file mode 100644 index 0000000..1384d19 --- /dev/null +++ b/cli/src/utils/output.ts @@ -0,0 +1,7 @@ +export function toPrettyJson(value: unknown): string { + return JSON.stringify(value, null, 2); +} + +export function printJson(value: unknown): void { + console.log(toPrettyJson(value)); +} diff --git a/cli/tsconfig.json b/cli/tsconfig.json new file mode 100644 index 0000000..8d9703c --- /dev/null +++ b/cli/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "CommonJS", + "moduleResolution": "Node", + "rootDir": "src", + "outDir": "dist", + "strict": true, + "esModuleInterop": true, + "resolveJsonModule": true, + "skipLibCheck": true + }, + "include": ["src/**/*"], + "exclude": ["dist", "node_modules"] +} diff --git a/demo/README.md b/demo/README.md new file mode 100644 index 0000000..24a0398 --- /dev/null +++ b/demo/README.md @@ -0,0 +1,52 @@ +# Shadow Threads Demo Layer + +These two demos show the same runtime from two different product angles. + +| Demo | Audience | Shows | +| --- | --- | --- | +| Demo A | AI collaboration beginners | task state, progress, history | +| Demo B | workflow engineers | execution replay, debugging | + +## Prerequisites + +- The Shadow Threads server is already running at `http://localhost:3001`. +- To use a different server, set `SHADOW_SERVER`, for example: `export SHADOW_SERVER=http://localhost:3001` +- The CLI is already built before running the demos. +- Python 3 is available. + +If the `shadow` command is not on your `PATH`, the demo scripts will use the built CLI at `cli/dist/index.js`. + +To build the CLI: + +```bash +cd cli +npm run build +``` + +## Run the demos + +Demo A: + +```bash +cd demo/demoA-task-state +bash run-demo.sh +``` + +```powershell +Set-Location demo/demoA-task-state +.\run-demo.ps1 +``` + +Demo B: + +```bash +cd demo/demoB-workflow-debug +bash run-debug.sh +``` + +```powershell +Set-Location demo/demoB-workflow-debug +.\run-debug.ps1 +``` + +Each demo is designed to finish in under five minutes. diff --git a/demo/demoA-task-state/README.md b/demo/demoA-task-state/README.md new file mode 100644 index 0000000..e83fd0e --- /dev/null +++ b/demo/demoA-task-state/README.md @@ -0,0 +1,62 @@ +# Demo A: Task State Management + +## Problem + +AI-assisted tasks often lose state as a conversation grows. People forget what step they are on, what already happened, and what the current task output means. + +## Solution + +Shadow Threads makes task state explicit. This demo records a simple AI-assisted task as visible steps, a stored revision, and an inspectable execution history. + +This demo is not about continuing a conversation. + +It shows explicit task state continuity. + +This demo primarily shows task state continuity, progress, and history. +It also ends with replay verification of the recorded execution. + +## Demo steps + +1. Initialize a local Shadow workspace. +2. Capture the starting task artifact for a log parser task. +3. Run a tiny workflow that shows task progress: + - `Step 1 Load data` + - `Step 2 Parse logs` + - `Step 3 Generate summary` +4. Record the updated task state and execution history behind the script. +5. Inspect the stored revision and the stored execution. +6. Replay the recorded execution boundary for a final verification step. + +## Expected output + +You should see the workflow progress first: + +```text +Step 1 Load data +Step 2 Parse logs +Step 3 Generate summary +``` + +Then the script inspects: + +- a revision that represents the latest task state +- an execution record that shows the workflow history for that task +- a replay verification step that confirms the recorded execution boundary still matches + +## Run + +From the repository root: + +```bash +cd demo/demoA-task-state +bash run-demo.sh +``` + +On Windows PowerShell: + +```powershell +Set-Location demo/demoA-task-state +.\run-demo.ps1 +``` + +The script uses `SHADOW_SERVER` if it is set. Otherwise it defaults to `http://localhost:3001`. diff --git a/demo/demoA-task-state/artifact.json b/demo/demoA-task-state/artifact.json new file mode 100644 index 0000000..1fb7d9c --- /dev/null +++ b/demo/demoA-task-state/artifact.json @@ -0,0 +1,11 @@ +{ + "schema": "demo.task", + "identity": { + "packageId": "demo-package" + }, + "payload": { + "task": "build log parser", + "input": "Example log data" + }, + "references": [] +} diff --git a/demo/demoA-task-state/demo-state.json b/demo/demoA-task-state/demo-state.json new file mode 100644 index 0000000..7446051 --- /dev/null +++ b/demo/demoA-task-state/demo-state.json @@ -0,0 +1,5 @@ +{ + "initialRevisionHash": "40c59f27fd34ee04d4ce8284a9804befb5a94517f93d493a0fa7b66eaeb08872", + "finalRevisionHash": "692ed77f30857f14aca3ef9f7ef13205be327b4af8ae503e294ba59879acc6a8", + "executionId": "d1d9f29d-4a6b-49bb-b8be-783546b115c2" +} diff --git a/demo/demoA-task-state/run-demo.ps1 b/demo/demoA-task-state/run-demo.ps1 new file mode 100644 index 0000000..0cc4cdd --- /dev/null +++ b/demo/demoA-task-state/run-demo.ps1 @@ -0,0 +1,277 @@ +$ErrorActionPreference = 'Stop' + +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$RepoRoot = [System.IO.Path]::GetFullPath((Join-Path $ScriptDir '..\..')) +$ServerUrl = if ($env:SHADOW_SERVER) { $env:SHADOW_SERVER } else { 'http://localhost:3001' } +$TmpDir = Join-Path ([System.IO.Path]::GetTempPath()) ("shadow-demoA-" + [System.Guid]::NewGuid().ToString('N')) + +function Write-Utf8File { + param( + [Parameter(Mandatory = $true)] + [string]$Path, + [Parameter(Mandatory = $true)] + [string]$Content + ) + + $directory = Split-Path -Parent $Path + if ($directory -and -not (Test-Path $directory)) { + New-Item -ItemType Directory -Path $directory | Out-Null + } + + $encoding = New-Object System.Text.UTF8Encoding($false) + [System.IO.File]::WriteAllText($Path, $Content, $encoding) +} + +function Resolve-ShadowCli { + $shadowCommand = Get-Command shadow -ErrorAction SilentlyContinue + if ($shadowCommand) { + $script:UseInstalledShadow = $true + return + } + + $fallbackPath = Join-Path $RepoRoot 'cli\dist\index.js' + if (Test-Path $fallbackPath) { + $script:UseInstalledShadow = $false + $script:ShadowCliPath = $fallbackPath + return + } + + throw 'Shadow CLI not found. Build the CLI first from cli/.' +} + +function Invoke-ShadowCliText { + param( + [Parameter(ValueFromRemainingArguments = $true)] + [string[]]$Arguments + ) + + if ($script:UseInstalledShadow) { + $output = & shadow @Arguments 2>&1 | Out-String + } else { + $output = & node $script:ShadowCliPath @Arguments 2>&1 | Out-String + } + + if ($LASTEXITCODE -ne 0) { + throw $output.Trim() + } + + return $output.TrimEnd("`r", "`n") +} + +function Get-BundleHashFromCaptureOutput { + param( + [Parameter(Mandatory = $true)] + [string]$Output + ) + + $match = [regex]::Match($Output, 'bundleHash:\s*([0-9a-f]{64})') + if (-not $match.Success) { + throw 'bundleHash not found' + } + + return $match.Groups[1].Value +} + +function Invoke-ShadowApi { + param( + [Parameter(Mandatory = $true)] + [string]$Path, + [Parameter(Mandatory = $true)] + [object]$Payload + ) + + $uri = $ServerUrl.TrimEnd('/') + $Path + $body = $Payload | ConvertTo-Json -Depth 20 + + try { + $response = Invoke-RestMethod -Method Post -Uri $uri -Headers @{ Accept = 'application/json' } -ContentType 'application/json' -Body $body + } catch { + throw "API request failed: $($_.Exception.Message)" + } + + if (-not $response.ok) { + throw "API request failed: $($response | ConvertTo-Json -Depth 20)" + } + + return $response.data +} + +function Get-Sha256Hex { + param( + [Parameter(Mandatory = $true)] + [string]$Text + ) + + $sha = [System.Security.Cryptography.SHA256]::Create() + try { + $bytes = [System.Text.Encoding]::UTF8.GetBytes($Text) + return (($sha.ComputeHash($bytes) | ForEach-Object { $_.ToString('x2') }) -join '') + } finally { + $sha.Dispose() + } +} + +function Invoke-PythonChecked { + param( + [Parameter(ValueFromRemainingArguments = $true)] + [string[]]$Arguments + ) + + & python @Arguments + if ($LASTEXITCODE -ne 0) { + throw "Python command failed with exit code $LASTEXITCODE." + } +} + +New-Item -ItemType Directory -Path $TmpDir | Out-Null + +try { + Resolve-ShadowCli + Push-Location $ScriptDir + + Write-Host 'Demo A: Task State Management' + Write-Host "Server: $ServerUrl" + Write-Host + Write-Host 'Initializing the demo workspace' + $initOutput = Invoke-ShadowCliText init + if ($initOutput) { + Write-Host $initOutput + } + + $configPath = Join-Path $ScriptDir 'shadow.config.json' + $config = Get-Content -Raw -Path $configPath | ConvertFrom-Json + $config.server = $ServerUrl + Write-Utf8File -Path $configPath -Content (($config | ConvertTo-Json -Depth 10) + "`n") + + Write-Host + Write-Host 'Capturing the starting task state' + $artifactCaptureOutput = Invoke-ShadowCliText capture 'artifact.json' + Write-Host $artifactCaptureOutput + $taskBundleHash = Get-BundleHashFromCaptureOutput -Output $artifactCaptureOutput + + Write-Host + Write-Host 'Running the task with visible progress' + $workflowSummaryPath = Join-Path $TmpDir 'workflow-summary.json' + Invoke-PythonChecked (Join-Path $ScriptDir 'workflow.py') '--json-out' $workflowSummaryPath + + $summaryArtifactPath = Join-Path $TmpDir 'summary-artifact.json' + $summaryData = Get-Content -Raw -Path $workflowSummaryPath | ConvertFrom-Json + $summaryArtifact = [ordered]@{ + schema = 'demo.task.summary' + identity = [ordered]@{ + packageId = 'demo-package' + } + payload = $summaryData + references = @( + [ordered]@{ + bundleHash = $taskBundleHash + role = 'source_task' + } + ) + } + Write-Utf8File -Path $summaryArtifactPath -Content (($summaryArtifact | ConvertTo-Json -Depth 20) + "`n") + + Write-Host + Write-Host 'Capturing the completed task state' + $summaryCaptureOutput = Invoke-ShadowCliText capture $summaryArtifactPath + Write-Host $summaryCaptureOutput + $summaryBundleHash = Get-BundleHashFromCaptureOutput -Output $summaryCaptureOutput + + $startedAt = (Get-Date).ToUniversalTime().ToString('o') + $finishedAt = (Get-Date).ToUniversalTime().AddSeconds(2).ToString('o') + + $initialRevision = Invoke-ShadowApi -Path '/api/v1/revisions' -Payload @{ + packageId = 'demo-package' + parentRevisionHash = $null + artifacts = @( + @{ + bundleHash = $taskBundleHash + role = 'task_state' + } + ) + metadata = @{ + author = 'Demo Author' + message = 'Task state captured before workflow execution' + createdBy = 'demoA-runner' + timestamp = $startedAt + source = 'human' + tags = @('demo', 'task-state') + } + } + + $finalRevision = Invoke-ShadowApi -Path '/api/v1/revisions' -Payload @{ + packageId = 'demo-package' + parentRevisionHash = $initialRevision.revisionHash + artifacts = @( + @{ + bundleHash = $taskBundleHash + role = 'task_state' + }, + @{ + bundleHash = $summaryBundleHash + role = 'task_summary' + } + ) + metadata = @{ + author = 'Shadow Threads Demo' + message = 'Task progress recorded after summary generation' + createdBy = 'demoA-runner' + timestamp = $finishedAt + source = 'ai' + tags = @('demo', 'task-state', 'history') + } + } + + $execution = Invoke-ShadowApi -Path '/api/v1/executions' -Payload @{ + packageId = 'demo-package' + revisionHash = $finalRevision.revisionHash + provider = 'demo-script' + model = 'task-state-workflow' + promptHash = Get-Sha256Hex -Text 'demoA-task-state-workflow' + parameters = @{ + mode = 'demo' + stepCount = 3 + } + inputArtifacts = @( + @{ + bundleHash = $taskBundleHash + role = 'task_state' + } + ) + outputArtifacts = @( + @{ + bundleHash = $summaryBundleHash + role = 'task_summary' + } + ) + status = 'success' + startedAt = $startedAt + finishedAt = $finishedAt + } + + Write-Host + Write-Host 'History recorded for this task' + Write-Host 'Inspecting the latest task revision' + $revisionOutput = Invoke-ShadowCliText inspect revision $finalRevision.revisionHash + if ($revisionOutput) { + Write-Host $revisionOutput + } + + Write-Host + Write-Host 'Inspecting the execution history' + $executionOutput = Invoke-ShadowCliText inspect execution $execution.executionId + if ($executionOutput) { + Write-Host $executionOutput + } + + Write-Host + Write-Host 'Replaying the recorded task execution boundary' + $replayOutput = Invoke-ShadowCliText replay $execution.executionId + if ($replayOutput) { + Write-Host $replayOutput + } + Write-Host 'Replay verification matched the recorded task execution boundary.' +} finally { + Pop-Location -ErrorAction SilentlyContinue + Remove-Item -Recurse -Force $TmpDir -ErrorAction SilentlyContinue +} diff --git a/demo/demoA-task-state/run-demo.sh b/demo/demoA-task-state/run-demo.sh new file mode 100644 index 0000000..25bd3d2 --- /dev/null +++ b/demo/demoA-task-state/run-demo.sh @@ -0,0 +1,285 @@ +#!/usr/bin/env bash +set -euo pipefail + +SERVER_URL="${SHADOW_SERVER:-http://localhost:3001}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +TMP_DIR="$(mktemp -d)" + +cleanup() { + rm -rf "$TMP_DIR" +} + +trap cleanup EXIT + +require_command() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "Missing required command: $1" >&2 + exit 1 + fi +} + +resolve_shadow_cli() { + if command -v shadow >/dev/null 2>&1; then + SHADOW_CMD=(shadow) + return + fi + + if [ -f "$REPO_ROOT/cli/dist/index.js" ]; then + SHADOW_CMD=(node "$REPO_ROOT/cli/dist/index.js") + return + fi + + echo "Shadow CLI not found. Build the CLI first from cli/." >&2 + exit 1 +} + +shadow_cli() { + "${SHADOW_CMD[@]}" "$@" +} + +parse_bundle_hash() { + python3 -c 'import re, sys +text = sys.stdin.read() +match = re.search(r"bundleHash:\\s*([0-9a-f]{64})", text) +if match is None: + raise SystemExit("bundleHash not found") +print(match.group(1))' +} + +require_command python3 +require_command node +resolve_shadow_cli + +cd "$SCRIPT_DIR" + +echo "Demo A: Task State Management" +echo "Server: $SERVER_URL" +echo +echo "Initializing the demo workspace" +shadow_cli init + +python3 - "$SERVER_URL" <<'PY' +import json +import sys +from pathlib import Path + +config_path = Path("shadow.config.json") +config = json.loads(config_path.read_text(encoding="utf-8")) +config["server"] = sys.argv[1] +config_path.write_text(json.dumps(config, indent=2) + "\n", encoding="utf-8") +PY + +echo +echo "Capturing the starting task state" +artifact_capture_output="$(shadow_cli capture artifact.json)" +printf '%s\n' "$artifact_capture_output" +task_bundle_hash="$(printf '%s\n' "$artifact_capture_output" | parse_bundle_hash)" + +echo +echo "Running the task with visible progress" +python3 workflow.py --json-out "$TMP_DIR/workflow-summary.json" + +python3 - "$task_bundle_hash" "$TMP_DIR/workflow-summary.json" "$TMP_DIR/summary-artifact.json" <<'PY' +import json +import sys +from pathlib import Path + +task_bundle_hash = sys.argv[1] +summary_data = json.loads(Path(sys.argv[2]).read_text(encoding="utf-8")) +artifact_path = Path(sys.argv[3]) + +artifact = { + "schema": "demo.task.summary", + "identity": { + "packageId": "demo-package", + }, + "payload": summary_data, + "references": [ + { + "bundleHash": task_bundle_hash, + "role": "source_task", + } + ], +} + +artifact_path.write_text(json.dumps(artifact, indent=2) + "\n", encoding="utf-8") +PY + +echo +echo "Capturing the completed task state" +summary_capture_output="$(shadow_cli capture "$TMP_DIR/summary-artifact.json")" +printf '%s\n' "$summary_capture_output" +summary_bundle_hash="$(printf '%s\n' "$summary_capture_output" | parse_bundle_hash)" + +python3 - "$SERVER_URL" "$task_bundle_hash" "$summary_bundle_hash" "$TMP_DIR/demo-state.json" <<'PY' +import hashlib +import json +import sys +import urllib.error +import urllib.request +from datetime import datetime, timedelta, timezone +from pathlib import Path + +server_url, task_bundle_hash, summary_bundle_hash, output_path = sys.argv[1:5] + + +def api(method: str, path: str, payload: dict) -> dict: + request = urllib.request.Request( + server_url.rstrip("/") + path, + data=json.dumps(payload).encode("utf-8"), + headers={ + "Accept": "application/json", + "Content-Type": "application/json", + }, + method=method, + ) + try: + with urllib.request.urlopen(request) as response: + body = json.loads(response.read().decode("utf-8")) + except urllib.error.HTTPError as error: + details = error.read().decode("utf-8", errors="replace") + raise SystemExit(f"API request failed: {error.code} {details}") from error + except urllib.error.URLError as error: + raise SystemExit(f"Unable to reach {server_url}: {error.reason}") from error + + if not body.get("ok"): + raise SystemExit(f"API request failed: {body}") + + return body["data"] + + +timestamp = datetime.now(timezone.utc) +started_at = timestamp.isoformat() +finished_at = (timestamp + timedelta(seconds=2)).isoformat() + +initial_revision = api( + "POST", + "/api/v1/revisions", + { + "packageId": "demo-package", + "parentRevisionHash": None, + "artifacts": [ + { + "bundleHash": task_bundle_hash, + "role": "task_state", + } + ], + "metadata": { + "author": "Demo Author", + "message": "Task state captured before workflow execution", + "createdBy": "demoA-runner", + "timestamp": started_at, + "source": "human", + "tags": ["demo", "task-state"], + }, + }, +) + +final_revision = api( + "POST", + "/api/v1/revisions", + { + "packageId": "demo-package", + "parentRevisionHash": initial_revision["revisionHash"], + "artifacts": [ + { + "bundleHash": task_bundle_hash, + "role": "task_state", + }, + { + "bundleHash": summary_bundle_hash, + "role": "task_summary", + }, + ], + "metadata": { + "author": "Shadow Threads Demo", + "message": "Task progress recorded after summary generation", + "createdBy": "demoA-runner", + "timestamp": finished_at, + "source": "ai", + "tags": ["demo", "task-state", "history"], + }, + }, +) + +execution = api( + "POST", + "/api/v1/executions", + { + "packageId": "demo-package", + "revisionHash": final_revision["revisionHash"], + "provider": "demo-script", + "model": "task-state-workflow", + "promptHash": hashlib.sha256(b"demoA-task-state-workflow").hexdigest(), + "parameters": { + "mode": "demo", + "stepCount": 3, + }, + "inputArtifacts": [ + { + "bundleHash": task_bundle_hash, + "role": "task_state", + } + ], + "outputArtifacts": [ + { + "bundleHash": summary_bundle_hash, + "role": "task_summary", + } + ], + "status": "success", + "startedAt": started_at, + "finishedAt": finished_at, + }, +) + +Path(output_path).write_text( + json.dumps( + { + "initialRevisionHash": initial_revision["revisionHash"], + "finalRevisionHash": final_revision["revisionHash"], + "executionId": execution["executionId"], + }, + indent=2, + ) + + "\n", + encoding="utf-8", +) +PY + +final_revision_hash="$(python3 - "$TMP_DIR/demo-state.json" <<'PY' +import json +import sys + +with open(sys.argv[1], encoding="utf-8") as handle: + data = json.load(handle) + +print(data["finalRevisionHash"]) +PY +)" + +execution_id="$(python3 - "$TMP_DIR/demo-state.json" <<'PY' +import json +import sys + +with open(sys.argv[1], encoding="utf-8") as handle: + data = json.load(handle) + +print(data["executionId"]) +PY +)" + +echo +echo "History recorded for this task" +echo "Inspecting the latest task revision" +shadow_cli inspect revision "$final_revision_hash" + +echo +echo "Inspecting the execution history" +shadow_cli inspect execution "$execution_id" + +echo +echo "Replaying the recorded task execution boundary" +shadow_cli replay "$execution_id" +echo "Replay verification matched the recorded task execution boundary." diff --git a/demo/demoA-task-state/shadow.config.json b/demo/demoA-task-state/shadow.config.json new file mode 100644 index 0000000..212b98f --- /dev/null +++ b/demo/demoA-task-state/shadow.config.json @@ -0,0 +1,4 @@ +{ + "server": "http://localhost:3001", + "workspace": ".shadow" +} diff --git a/demo/demoA-task-state/summary-artifact.json b/demo/demoA-task-state/summary-artifact.json new file mode 100644 index 0000000..6c0a20b --- /dev/null +++ b/demo/demoA-task-state/summary-artifact.json @@ -0,0 +1,36 @@ +{ + "schema": "demo.task.summary", + "identity": { + "packageId": "demo-package" + }, + "payload": { + "task": "build log parser", + "status": "complete", + "steps": [ + { + "name": "Load data", + "status": "done" + }, + { + "name": "Parse logs", + "status": "done" + }, + { + "name": "Generate summary", + "status": "done" + } + ], + "summary": { + "lineCount": 4, + "warningCount": 1, + "errorCount": 1, + "latestMessage": "2026-03-09T09:03:00Z INFO summary generated" + } + }, + "references": [ + { + "bundleHash": "69f6d56c0837b8a0689247cd9035d45104ffb6e2ee97f919ab0a0bd6777c9de3", + "role": "source_task" + } + ] +} diff --git a/demo/demoA-task-state/workflow-summary.json b/demo/demoA-task-state/workflow-summary.json new file mode 100644 index 0000000..eb68c73 --- /dev/null +++ b/demo/demoA-task-state/workflow-summary.json @@ -0,0 +1,24 @@ +{ + "task": "build log parser", + "status": "complete", + "steps": [ + { + "name": "Load data", + "status": "done" + }, + { + "name": "Parse logs", + "status": "done" + }, + { + "name": "Generate summary", + "status": "done" + } + ], + "summary": { + "lineCount": 4, + "warningCount": 1, + "errorCount": 1, + "latestMessage": "2026-03-09T09:03:00Z INFO summary generated" + } +} diff --git a/demo/demoA-task-state/workflow.py b/demo/demoA-task-state/workflow.py new file mode 100644 index 0000000..10442ed --- /dev/null +++ b/demo/demoA-task-state/workflow.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import argparse +import json +from pathlib import Path + + +LOG_LINES = [ + "2026-03-09T09:00:00Z INFO startup complete", + "2026-03-09T09:01:00Z WARN retrying database connection", + "2026-03-09T09:02:00Z ERROR parser timeout", + "2026-03-09T09:03:00Z INFO summary generated", +] + + +def build_summary() -> dict: + warn_count = sum(" WARN " in line for line in LOG_LINES) + error_count = sum(" ERROR " in line for line in LOG_LINES) + return { + "task": "build log parser", + "status": "complete", + "steps": [ + {"name": "Load data", "status": "done"}, + {"name": "Parse logs", "status": "done"}, + {"name": "Generate summary", "status": "done"}, + ], + "summary": { + "lineCount": len(LOG_LINES), + "warningCount": warn_count, + "errorCount": error_count, + "latestMessage": LOG_LINES[-1], + }, + } + + +def main() -> int: + parser = argparse.ArgumentParser(description="Demo A workflow") + parser.add_argument("--json-out", help="Optional path for the generated summary JSON") + args = parser.parse_args() + + print("Step 1 Load data") + print("Step 2 Parse logs") + print("Step 3 Generate summary") + + if args.json_out: + output_path = Path(args.json_out) + output_path.write_text(json.dumps(build_summary(), indent=2) + "\n", encoding="utf-8") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/demo/demoB-workflow-debug/README.md b/demo/demoB-workflow-debug/README.md new file mode 100644 index 0000000..dcd9a54 --- /dev/null +++ b/demo/demoB-workflow-debug/README.md @@ -0,0 +1,53 @@ +# Debug AI workflows with replayable execution history + +## Problem + +AI workflows are difficult to debug. Failures are often hard to explain later because the exact execution boundary was never recorded in a reliable way. + +## Solution + +Shadow Threads records execution history as a stable boundary that can be inspected and replayed for verification. This demo captures a failure at step 2, inspects the execution record, and then verifies that record with replay. + +## Demo steps + +1. Initialize a local Shadow workspace. +2. Capture the workflow input state. +3. Run a workflow that fails during the transform step. +4. Record the failed execution boundary behind the script. +5. Inspect the execution history. +6. Replay the recorded execution boundary with `shadow replay`. + +## Expected output + +The visible workflow failure looks like this: + +```text +Step 1 OK +Step 2 FAILED +``` + +Later in the run, the script prints: + +```text +Replaying recorded execution boundary +``` + +`shadow replay` verifies the recorded execution boundary. It is not presented as a local rerun of the broken code path. + +## Run + +From the repository root: + +```bash +cd demo/demoB-workflow-debug +bash run-debug.sh +``` + +On Windows PowerShell: + +```powershell +Set-Location demo/demoB-workflow-debug +.\run-debug.ps1 +``` + +The script uses `SHADOW_SERVER` if it is set. Otherwise it defaults to `http://localhost:3001`. diff --git a/demo/demoB-workflow-debug/broken_workflow.py b/demo/demoB-workflow-debug/broken_workflow.py new file mode 100644 index 0000000..53fe0f5 --- /dev/null +++ b/demo/demoB-workflow-debug/broken_workflow.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +import argparse +import json +from pathlib import Path + + +INPUT_BATCH = [18, 21, "oops", 34] + + +def run_workflow() -> dict: + print("Step 1 OK") + + for item in INPUT_BATCH: + if not isinstance(item, int): + error_message = f"transform step expected integers but received {item!r}" + print("Step 2 FAILED") + return { + "workflow": "deterministic debug demo", + "failedStep": 2, + "stepName": "transform data", + "inputBatch": INPUT_BATCH, + "error": error_message, + } + + print("Step 2 OK") + print("Step 3 OK") + return { + "workflow": "deterministic debug demo", + "failedStep": None, + "stepName": None, + "inputBatch": INPUT_BATCH, + "output": [item * 2 for item in INPUT_BATCH], + } + + +def main() -> int: + parser = argparse.ArgumentParser(description="Demo B broken workflow") + parser.add_argument("--report-out", help="Optional path for a JSON failure report") + args = parser.parse_args() + + report = run_workflow() + if args.report_out: + Path(args.report_out).write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8") + + return 1 if report["failedStep"] else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/demo/demoB-workflow-debug/debug-state.json b/demo/demoB-workflow-debug/debug-state.json new file mode 100644 index 0000000..ba0a81f --- /dev/null +++ b/demo/demoB-workflow-debug/debug-state.json @@ -0,0 +1,4 @@ +{ + "revisionHash": "c29b55e8e919d0f772a0ebb9bfcd97a534edab87e9bca1330c2a541ccdc83975", + "executionId": "55612653-ebaf-4cd7-836c-651c2aa2ae3f" +} diff --git a/demo/demoB-workflow-debug/failure-artifact.json b/demo/demoB-workflow-debug/failure-artifact.json new file mode 100644 index 0000000..4aa1032 --- /dev/null +++ b/demo/demoB-workflow-debug/failure-artifact.json @@ -0,0 +1,24 @@ +{ + "schema": "demo.workflow.failure", + "identity": { + "packageId": "demo-debug-package" + }, + "payload": { + "workflow": "deterministic debug demo", + "failedStep": 2, + "stepName": "transform data", + "inputBatch": [ + 18, + 21, + "oops", + 34 + ], + "error": "transform step expected integers but received 'oops'" + }, + "references": [ + { + "bundleHash": "d83a0e407a928b1b55534d4fdfa7862c86008feeb03620ee1120e896d40b96f7", + "role": "workflow_input" + } + ] +} diff --git a/demo/demoB-workflow-debug/failure-report.json b/demo/demoB-workflow-debug/failure-report.json new file mode 100644 index 0000000..a6749cd --- /dev/null +++ b/demo/demoB-workflow-debug/failure-report.json @@ -0,0 +1,12 @@ +{ + "workflow": "deterministic debug demo", + "failedStep": 2, + "stepName": "transform data", + "inputBatch": [ + 18, + 21, + "oops", + 34 + ], + "error": "transform step expected integers but received 'oops'" +} diff --git a/demo/demoB-workflow-debug/failure-summary.json b/demo/demoB-workflow-debug/failure-summary.json new file mode 100644 index 0000000..a6749cd --- /dev/null +++ b/demo/demoB-workflow-debug/failure-summary.json @@ -0,0 +1,12 @@ +{ + "workflow": "deterministic debug demo", + "failedStep": 2, + "stepName": "transform data", + "inputBatch": [ + 18, + 21, + "oops", + 34 + ], + "error": "transform step expected integers but received 'oops'" +} diff --git a/demo/demoB-workflow-debug/run-debug.ps1 b/demo/demoB-workflow-debug/run-debug.ps1 new file mode 100644 index 0000000..5b4b092 --- /dev/null +++ b/demo/demoB-workflow-debug/run-debug.ps1 @@ -0,0 +1,279 @@ +$ErrorActionPreference = 'Stop' + +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$RepoRoot = [System.IO.Path]::GetFullPath((Join-Path $ScriptDir '..\..')) +$ServerUrl = if ($env:SHADOW_SERVER) { $env:SHADOW_SERVER } else { 'http://localhost:3001' } +$TmpDir = Join-Path ([System.IO.Path]::GetTempPath()) ("shadow-demoB-" + [System.Guid]::NewGuid().ToString('N')) + +function Write-Utf8File { + param( + [Parameter(Mandatory = $true)] + [string]$Path, + [Parameter(Mandatory = $true)] + [string]$Content + ) + + $directory = Split-Path -Parent $Path + if ($directory -and -not (Test-Path $directory)) { + New-Item -ItemType Directory -Path $directory | Out-Null + } + + $encoding = New-Object System.Text.UTF8Encoding($false) + [System.IO.File]::WriteAllText($Path, $Content, $encoding) +} + +function Resolve-ShadowCli { + $shadowCommand = Get-Command shadow -ErrorAction SilentlyContinue + if ($shadowCommand) { + $script:UseInstalledShadow = $true + return + } + + $fallbackPath = Join-Path $RepoRoot 'cli\dist\index.js' + if (Test-Path $fallbackPath) { + $script:UseInstalledShadow = $false + $script:ShadowCliPath = $fallbackPath + return + } + + throw 'Shadow CLI not found. Build the CLI first from cli/.' +} + +function Invoke-ShadowCliText { + param( + [Parameter(ValueFromRemainingArguments = $true)] + [string[]]$Arguments + ) + + if ($script:UseInstalledShadow) { + $output = & shadow @Arguments 2>&1 | Out-String + } else { + $output = & node $script:ShadowCliPath @Arguments 2>&1 | Out-String + } + + if ($LASTEXITCODE -ne 0) { + throw $output.Trim() + } + + return $output.TrimEnd("`r", "`n") +} + +function Get-BundleHashFromCaptureOutput { + param( + [Parameter(Mandatory = $true)] + [string]$Output + ) + + $match = [regex]::Match($Output, 'bundleHash:\s*([0-9a-f]{64})') + if (-not $match.Success) { + throw 'bundleHash not found' + } + + return $match.Groups[1].Value +} + +function Invoke-ShadowApi { + param( + [Parameter(Mandatory = $true)] + [string]$Path, + [Parameter(Mandatory = $true)] + [object]$Payload + ) + + $uri = $ServerUrl.TrimEnd('/') + $Path + $body = $Payload | ConvertTo-Json -Depth 20 + + try { + $response = Invoke-RestMethod -Method Post -Uri $uri -Headers @{ Accept = 'application/json' } -ContentType 'application/json' -Body $body + } catch { + throw "API request failed: $($_.Exception.Message)" + } + + if (-not $response.ok) { + throw "API request failed: $($response | ConvertTo-Json -Depth 20)" + } + + return $response.data +} + +function Get-Sha256Hex { + param( + [Parameter(Mandatory = $true)] + [string]$Text + ) + + $sha = [System.Security.Cryptography.SHA256]::Create() + try { + $bytes = [System.Text.Encoding]::UTF8.GetBytes($Text) + return (($sha.ComputeHash($bytes) | ForEach-Object { $_.ToString('x2') }) -join '') + } finally { + $sha.Dispose() + } +} + +New-Item -ItemType Directory -Path $TmpDir | Out-Null + +try { + Resolve-ShadowCli + Push-Location $ScriptDir + + Write-Host 'Demo B: Deterministic Workflow Debugging' + Write-Host "Server: $ServerUrl" + Write-Host + Write-Host 'Initializing the demo workspace' + $initOutput = Invoke-ShadowCliText init + if ($initOutput) { + Write-Host $initOutput + } + + $configPath = Join-Path $ScriptDir 'shadow.config.json' + $config = Get-Content -Raw -Path $configPath | ConvertFrom-Json + $config.server = $ServerUrl + Write-Utf8File -Path $configPath -Content (($config | ConvertTo-Json -Depth 10) + "`n") + + $workflowInputPath = Join-Path $TmpDir 'workflow-input.json' + $workflowInput = [ordered]@{ + schema = 'demo.workflow.input' + identity = [ordered]@{ + packageId = 'demo-debug-package' + } + payload = [ordered]@{ + workflow = 'deterministic debug demo' + inputBatch = @(18, 21, 'oops', 34) + expectedSteps = @('retrieve data', 'transform data', 'generate output') + } + references = @() + } + Write-Utf8File -Path $workflowInputPath -Content (($workflowInput | ConvertTo-Json -Depth 20) + "`n") + + Write-Host + Write-Host 'Capturing the workflow input state' + $inputCaptureOutput = Invoke-ShadowCliText capture $workflowInputPath + Write-Host $inputCaptureOutput + $inputBundleHash = Get-BundleHashFromCaptureOutput -Output $inputCaptureOutput + + Write-Host + Write-Host 'Running the broken workflow' + $failureReportPath = Join-Path $TmpDir 'failure-report.json' + & python (Join-Path $ScriptDir 'broken_workflow.py') '--report-out' $failureReportPath + $workflowExitCode = $LASTEXITCODE + if ($workflowExitCode -eq 0) { + throw 'The workflow unexpectedly succeeded.' + } + if ($workflowExitCode -ne 1) { + throw "Broken workflow exited with unexpected code $workflowExitCode." + } + + $failureArtifactPath = Join-Path $TmpDir 'failure-artifact.json' + $failureReport = Get-Content -Raw -Path $failureReportPath | ConvertFrom-Json + $failureArtifact = [ordered]@{ + schema = 'demo.workflow.failure' + identity = [ordered]@{ + packageId = 'demo-debug-package' + } + payload = $failureReport + references = @( + [ordered]@{ + bundleHash = $inputBundleHash + role = 'workflow_input' + } + ) + } + Write-Utf8File -Path $failureArtifactPath -Content (($failureArtifact | ConvertTo-Json -Depth 20) + "`n") + + Write-Host + Write-Host 'Capturing the failure report' + $failureCaptureOutput = Invoke-ShadowCliText capture $failureArtifactPath + Write-Host $failureCaptureOutput + $failureBundleHash = Get-BundleHashFromCaptureOutput -Output $failureCaptureOutput + + $startedAt = (Get-Date).ToUniversalTime().ToString('o') + $finishedAt = (Get-Date).ToUniversalTime().AddSeconds(2).ToString('o') + + $baseRevision = Invoke-ShadowApi -Path '/api/v1/revisions' -Payload @{ + packageId = 'demo-debug-package' + parentRevisionHash = $null + artifacts = @( + @{ + bundleHash = $inputBundleHash + role = 'workflow_input' + } + ) + metadata = @{ + author = 'Demo Author' + message = 'Workflow input state captured before execution' + createdBy = 'demoB-runner' + timestamp = $startedAt + source = 'human' + tags = @('demo', 'debug') + } + } + + $failureRevision = Invoke-ShadowApi -Path '/api/v1/revisions' -Payload @{ + packageId = 'demo-debug-package' + parentRevisionHash = $baseRevision.revisionHash + artifacts = @( + @{ + bundleHash = $inputBundleHash + role = 'workflow_input' + }, + @{ + bundleHash = $failureBundleHash + role = 'failure_report' + } + ) + metadata = @{ + author = 'Shadow Threads Demo' + message = 'Failed transform step recorded for debugging' + createdBy = 'demoB-runner' + timestamp = $finishedAt + source = 'system' + tags = @('demo', 'debug', 'failure') + } + } + + $execution = Invoke-ShadowApi -Path '/api/v1/executions' -Payload @{ + packageId = 'demo-debug-package' + revisionHash = $failureRevision.revisionHash + provider = 'demo-script' + model = 'broken-workflow' + promptHash = Get-Sha256Hex -Text 'demoB-debug-boundary' + parameters = @{ + stageCount = 3 + failureStep = 2 + } + inputArtifacts = @( + @{ + bundleHash = $inputBundleHash + role = 'workflow_input' + } + ) + outputArtifacts = @( + @{ + bundleHash = $failureBundleHash + role = 'failure_report' + } + ) + status = 'failure' + startedAt = $startedAt + finishedAt = $finishedAt + } + + Write-Host + Write-Host 'Inspecting the recorded execution history' + $executionOutput = Invoke-ShadowCliText inspect execution $execution.executionId + if ($executionOutput) { + Write-Host $executionOutput + } + + Write-Host + Write-Host 'Replaying recorded execution boundary' + $replayOutput = Invoke-ShadowCliText replay $execution.executionId + if ($replayOutput) { + Write-Host $replayOutput + } + Write-Host 'Replay verification matched the recorded failed execution boundary.' +} finally { + Pop-Location -ErrorAction SilentlyContinue + Remove-Item -Recurse -Force $TmpDir -ErrorAction SilentlyContinue +} diff --git a/demo/demoB-workflow-debug/run-debug.sh b/demo/demoB-workflow-debug/run-debug.sh new file mode 100644 index 0000000..0794c29 --- /dev/null +++ b/demo/demoB-workflow-debug/run-debug.sh @@ -0,0 +1,296 @@ +#!/usr/bin/env bash +set -euo pipefail + +SERVER_URL="${SHADOW_SERVER:-http://localhost:3001}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +TMP_DIR="$(mktemp -d)" + +cleanup() { + rm -rf "$TMP_DIR" +} + +trap cleanup EXIT + +require_command() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "Missing required command: $1" >&2 + exit 1 + fi +} + +resolve_shadow_cli() { + if command -v shadow >/dev/null 2>&1; then + SHADOW_CMD=(shadow) + return + fi + + if [ -f "$REPO_ROOT/cli/dist/index.js" ]; then + SHADOW_CMD=(node "$REPO_ROOT/cli/dist/index.js") + return + fi + + echo "Shadow CLI not found. Build the CLI first from cli/." >&2 + exit 1 +} + +shadow_cli() { + "${SHADOW_CMD[@]}" "$@" +} + +parse_bundle_hash() { + python3 -c 'import re, sys +text = sys.stdin.read() +match = re.search(r"bundleHash:\\s*([0-9a-f]{64})", text) +if match is None: + raise SystemExit("bundleHash not found") +print(match.group(1))' +} + +require_command python3 +require_command node +resolve_shadow_cli + +cd "$SCRIPT_DIR" + +echo "Demo B: Deterministic Workflow Debugging" +echo "Server: $SERVER_URL" +echo +echo "Initializing the demo workspace" +shadow_cli init + +python3 - "$SERVER_URL" <<'PY' +import json +import sys +from pathlib import Path + +config_path = Path("shadow.config.json") +config = json.loads(config_path.read_text(encoding="utf-8")) +config["server"] = sys.argv[1] +config_path.write_text(json.dumps(config, indent=2) + "\n", encoding="utf-8") +PY + +python3 - "$TMP_DIR/workflow-input.json" <<'PY' +import json +import sys +from pathlib import Path + +artifact = { + "schema": "demo.workflow.input", + "identity": { + "packageId": "demo-debug-package", + }, + "payload": { + "workflow": "deterministic debug demo", + "inputBatch": [18, 21, "oops", 34], + "expectedSteps": [ + "retrieve data", + "transform data", + "generate output", + ], + }, + "references": [], +} + +Path(sys.argv[1]).write_text(json.dumps(artifact, indent=2) + "\n", encoding="utf-8") +PY + +echo +echo "Capturing the workflow input state" +input_capture_output="$(shadow_cli capture "$TMP_DIR/workflow-input.json")" +printf '%s\n' "$input_capture_output" +input_bundle_hash="$(printf '%s\n' "$input_capture_output" | parse_bundle_hash)" + +echo +echo "Running the broken workflow" +if python3 broken_workflow.py --report-out "$TMP_DIR/failure-report.json"; then + echo "The workflow unexpectedly succeeded" >&2 + exit 1 +fi + +python3 - "$input_bundle_hash" "$TMP_DIR/failure-report.json" "$TMP_DIR/failure-artifact.json" <<'PY' +import json +import sys +from pathlib import Path + +input_bundle_hash = sys.argv[1] +failure_report = json.loads(Path(sys.argv[2]).read_text(encoding="utf-8")) +artifact_path = Path(sys.argv[3]) + +artifact = { + "schema": "demo.workflow.failure", + "identity": { + "packageId": "demo-debug-package", + }, + "payload": failure_report, + "references": [ + { + "bundleHash": input_bundle_hash, + "role": "workflow_input", + } + ], +} + +artifact_path.write_text(json.dumps(artifact, indent=2) + "\n", encoding="utf-8") +PY + +echo +echo "Capturing the failure report" +failure_capture_output="$(shadow_cli capture "$TMP_DIR/failure-artifact.json")" +printf '%s\n' "$failure_capture_output" +failure_bundle_hash="$(printf '%s\n' "$failure_capture_output" | parse_bundle_hash)" + +python3 - "$SERVER_URL" "$input_bundle_hash" "$failure_bundle_hash" "$TMP_DIR/debug-state.json" <<'PY' +import hashlib +import json +import sys +import urllib.error +import urllib.request +from datetime import datetime, timedelta, timezone +from pathlib import Path + +server_url, input_bundle_hash, failure_bundle_hash, output_path = sys.argv[1:5] + + +def api(method: str, path: str, payload: dict) -> dict: + request = urllib.request.Request( + server_url.rstrip("/") + path, + data=json.dumps(payload).encode("utf-8"), + headers={ + "Accept": "application/json", + "Content-Type": "application/json", + }, + method=method, + ) + try: + with urllib.request.urlopen(request) as response: + body = json.loads(response.read().decode("utf-8")) + except urllib.error.HTTPError as error: + details = error.read().decode("utf-8", errors="replace") + raise SystemExit(f"API request failed: {error.code} {details}") from error + except urllib.error.URLError as error: + raise SystemExit(f"Unable to reach {server_url}: {error.reason}") from error + + if not body.get("ok"): + raise SystemExit(f"API request failed: {body}") + + return body["data"] + + +timestamp = datetime.now(timezone.utc) +started_at = timestamp.isoformat() +finished_at = (timestamp + timedelta(seconds=2)).isoformat() + +base_revision = api( + "POST", + "/api/v1/revisions", + { + "packageId": "demo-debug-package", + "parentRevisionHash": None, + "artifacts": [ + { + "bundleHash": input_bundle_hash, + "role": "workflow_input", + } + ], + "metadata": { + "author": "Demo Author", + "message": "Workflow input state captured before execution", + "createdBy": "demoB-runner", + "timestamp": started_at, + "source": "human", + "tags": ["demo", "debug"], + }, + }, +) + +failure_revision = api( + "POST", + "/api/v1/revisions", + { + "packageId": "demo-debug-package", + "parentRevisionHash": base_revision["revisionHash"], + "artifacts": [ + { + "bundleHash": input_bundle_hash, + "role": "workflow_input", + }, + { + "bundleHash": failure_bundle_hash, + "role": "failure_report", + }, + ], + "metadata": { + "author": "Shadow Threads Demo", + "message": "Failed transform step recorded for debugging", + "createdBy": "demoB-runner", + "timestamp": finished_at, + "source": "system", + "tags": ["demo", "debug", "failure"], + }, + }, +) + +execution = api( + "POST", + "/api/v1/executions", + { + "packageId": "demo-debug-package", + "revisionHash": failure_revision["revisionHash"], + "provider": "demo-script", + "model": "broken-workflow", + "promptHash": hashlib.sha256(b"demoB-debug-boundary").hexdigest(), + "parameters": { + "stageCount": 3, + "failureStep": 2, + }, + "inputArtifacts": [ + { + "bundleHash": input_bundle_hash, + "role": "workflow_input", + } + ], + "outputArtifacts": [ + { + "bundleHash": failure_bundle_hash, + "role": "failure_report", + } + ], + "status": "failure", + "startedAt": started_at, + "finishedAt": finished_at, + }, +) + +Path(output_path).write_text( + json.dumps( + { + "revisionHash": failure_revision["revisionHash"], + "executionId": execution["executionId"], + }, + indent=2, + ) + + "\n", + encoding="utf-8", +) +PY + +execution_id="$(python3 - "$TMP_DIR/debug-state.json" <<'PY' +import json +import sys + +with open(sys.argv[1], encoding="utf-8") as handle: + data = json.load(handle) + +print(data["executionId"]) +PY +)" + +echo +echo "Inspecting the recorded execution history" +shadow_cli inspect execution "$execution_id" + +echo +echo "Replaying recorded execution boundary" +shadow_cli replay "$execution_id" +echo "Replay verification matched the recorded failed execution boundary." diff --git a/demo/demoB-workflow-debug/shadow.config.json b/demo/demoB-workflow-debug/shadow.config.json new file mode 100644 index 0000000..212b98f --- /dev/null +++ b/demo/demoB-workflow-debug/shadow.config.json @@ -0,0 +1,4 @@ +{ + "server": "http://localhost:3001", + "workspace": ".shadow" +} diff --git a/demo/demoB-workflow-debug/workflow-input.json b/demo/demoB-workflow-debug/workflow-input.json new file mode 100644 index 0000000..82922c8 --- /dev/null +++ b/demo/demoB-workflow-debug/workflow-input.json @@ -0,0 +1,21 @@ +{ + "schema": "demo.workflow.input", + "identity": { + "packageId": "demo-debug-package" + }, + "payload": { + "workflow": "deterministic debug demo", + "inputBatch": [ + 18, + 21, + "oops", + 34 + ], + "expectedSteps": [ + "retrieve data", + "transform data", + "generate output" + ] + }, + "references": [] +} diff --git a/docs/integration-patterns.md b/docs/integration-patterns.md new file mode 100644 index 0000000..3988381 --- /dev/null +++ b/docs/integration-patterns.md @@ -0,0 +1,326 @@ +# Shadow Threads Integration Patterns + +Shadow Threads can be integrated at different points in an AI workflow depending on what you need to control. + +Some teams only want deterministic replay for a single model boundary. Others want to persist workflow state, audit agent steps, or branch state to compare different strategies. The core objects stay the same: + +- `artifact` stores a workflow payload +- `revision` binds artifacts into a state snapshot +- `execution` records a model or tool boundary +- `replay` verifies a recorded execution boundary + +You can use these objects through the HTTP API directly, through the Python SDK, through the CLI, or through MCP tools. The patterns below focus on how developers typically wire them into real systems. + +## 1. Deterministic workflow replay + +Use this pattern when you need to verify a recorded model or tool step later. + +Typical cases: + +- debugging AI coding pipelines +- verifying model output boundaries +- checking whether an execution changed after a code or prompt update + +### How it works + +The workflow records both state and execution boundaries in a fixed order: + +1. Capture the input or task artifact. +2. Create a revision that references that artifact. +3. Record the execution boundary with prompt hash, parameters, input artifacts, output artifacts, and status. +4. Replay the execution later to verify that the recorded boundary still matches. + +Conceptually: + +```text +artifact -> revision -> execution -> replay verification +``` + +The important point is that replay is not a best-effort rerun. It verifies the recorded execution boundary. + +### Typical API calls + +- `POST /api/v1/artifacts` +- `POST /api/v1/revisions` +- `POST /api/v1/executions` +- `POST /api/v1/executions/:executionId/replay` + +### Minimal code shape + +```python +from shadowthreads import ArtifactReference, RevisionMetadata, ShadowClient + +with ShadowClient(base_url="http://localhost:3001") as client: + artifact = client.capture_artifact( + schema="workflow.input.v1", + package_id="debug-pipeline", + payload={"task": "generate patch", "input": "..."}, + ) + + ref = ArtifactReference(bundle_hash=artifact.bundle_hash, role="workflow_input") + + revision = client.create_revision( + package_id="debug-pipeline", + artifacts=[ref], + metadata=RevisionMetadata( + author="CI", + message="Record workflow input", + created_by="pipeline", + timestamp="2026-03-14T09:00:00+00:00", + source="system", + ), + ) + + execution = client.record_execution( + package_id="debug-pipeline", + revision_hash=revision.revision_hash, + provider="openai", + model="gpt-5", + prompt_hash="...", + parameters={"temperature": 0}, + input_artifacts=[ref], + output_artifacts=[ref], + status="success", + started_at="2026-03-14T09:00:00+00:00", + finished_at="2026-03-14T09:00:02+00:00", + ) + + replay = client.replay_execution(execution.execution_id) +``` + +### When to use it + +Use this pattern if the main problem is: "I need to prove what happened at one workflow boundary." + +This is usually the first Shadow Threads integration because it requires the smallest surface area and immediately improves debugging. + +## 2. Workflow state portability + +Use this pattern when workflow state needs to move across machines, environments, or model providers without relying on local memory. + +Typical cases: + +- switching model providers +- resuming workflows +- moving pipelines between environments + +### How it works + +Artifacts capture the actual workflow payload. Revisions bind those artifacts into a deterministic state snapshot. Once state is recorded that way, another process can load the same artifact and revision history and continue from the same boundary. + +Conceptually: + +```text +capture state -> create revision -> export or fetch later -> reconstruct state elsewhere +``` + +This is useful when you want portability at the workflow-state level, not just output logs. + +### Typical API calls + +- `POST /api/v1/artifacts` +- `POST /api/v1/revisions` +- `GET /api/v1/revisions/:revisionHash` +- `GET /api/v1/artifacts/:packageId/:bundleHash` + +If you need to move a larger closure of state between environments, migration endpoints can sit on top of the same revision and artifact graph: + +- `POST /api/v1/migration/export` +- `POST /api/v1/migration/verify` +- `POST /api/v1/migration/import` + +### Minimal code shape + +```python +revision = client.create_revision( + package_id="portable-workflow", + artifacts=[ + ArtifactReference(bundle_hash=input_hash, role="workflow_input"), + ArtifactReference(bundle_hash=context_hash, role="context"), + ], + metadata=RevisionMetadata( + author="workflow-runner", + message="Checkpoint before provider switch", + created_by="scheduler", + timestamp="2026-03-14T09:00:00+00:00", + source="system", + ), +) + +restored_revision = client.get_revision(revision.revision_hash) + +for artifact_ref in restored_revision.artifacts: + artifact = client.get_artifact(restored_revision.package_id, artifact_ref.bundle_hash) +``` + +### When to use it + +Use this pattern if the main problem is: "I need to continue the same workflow state somewhere else." + +That can mean moving from local development to CI, moving from one provider to another, or resuming a paused workflow without reconstructing hidden context by hand. + +## 3. Agent execution audit + +Use this pattern when an agent framework performs multiple steps and you want each step to leave behind an inspectable execution record. + +Typical cases: + +- tool tracing +- reasoning audit +- debugging agent decisions + +### How it works + +Each agent step records: + +- the current revision it ran against +- the prompt or step identity +- parameters +- input artifacts +- output artifacts +- final status + +This gives you an audit trail at the execution boundary instead of relying only on logs. + +Conceptually: + +```text +agent step N + -> read revision + -> call tool or model + -> write output artifact + -> record execution + -> create next revision +``` + +You can repeat that loop for every model call or tool invocation that matters. + +### Typical API calls + +- `POST /api/v1/executions` +- `GET /api/v1/executions/:executionId` +- `POST /api/v1/artifacts` +- `POST /api/v1/revisions` + +### Minimal code shape + +```python +step_output = client.capture_artifact( + schema="agent.tool.output.v1", + package_id="agent-run-42", + payload={"tool": "search", "result": "..."}, +) + +output_ref = ArtifactReference(bundle_hash=step_output.bundle_hash, role="tool_output") + +execution = client.record_execution( + package_id="agent-run-42", + revision_hash=current_revision_hash, + provider="agent-runtime", + model="tool-search-step", + prompt_hash="...", + parameters={"step": "search"}, + input_artifacts=input_refs, + output_artifacts=[output_ref], + status="success", + started_at="2026-03-14T09:00:00+00:00", + finished_at="2026-03-14T09:00:01+00:00", +) + +next_revision = client.create_revision( + package_id="agent-run-42", + parent_revision_hash=current_revision_hash, + artifacts=input_refs + [output_ref], + metadata=RevisionMetadata( + author="agent-runtime", + message="Recorded search step output", + created_by="agent-loop", + timestamp="2026-03-14T09:00:01+00:00", + source="ai", + ), +) +``` + +### When to use it + +Use this pattern if the main problem is: "The agent made a decision, but I cannot tell which state and inputs produced it." + +It is especially useful for multi-step harnesses where logs alone are too loose and you need a stable audit trail per step. + +## 4. Branchable workflow state + +Use this pattern when you want to fork from one known revision and evaluate multiple next steps in parallel. + +Typical cases: + +- testing alternative tool strategies +- exploring reasoning paths +- parallel workflow experiments + +### How it works + +A revision can act as a stable parent state. From that parent, you can create multiple child revisions that reference different artifacts or outputs. + +Conceptual diagram: + +```text +R1 +|- R2 branch A +`- R3 branch B +``` + +Each branch keeps a deterministic record of what changed from the same starting point. That makes comparison easier because both branches share the same parent state. + +### Typical API calls + +- `POST /api/v1/revisions` +- `GET /api/v1/revisions/:revisionHash` +- `POST /api/v1/artifacts` +- optionally `POST /api/v1/executions` for each branch step + +### Minimal code shape + +```python +branch_a = client.create_revision( + package_id="planner", + parent_revision_hash="R1", + artifacts=[ArtifactReference(bundle_hash="...", role="strategy_a")], + metadata=RevisionMetadata( + author="planner", + message="Try branch A", + created_by="experiment-runner", + timestamp="2026-03-14T09:00:00+00:00", + source="system", + ), +) + +branch_b = client.create_revision( + package_id="planner", + parent_revision_hash="R1", + artifacts=[ArtifactReference(bundle_hash="...", role="strategy_b")], + metadata=RevisionMetadata( + author="planner", + message="Try branch B", + created_by="experiment-runner", + timestamp="2026-03-14T09:00:00+00:00", + source="system", + ), +) +``` + +### When to use it + +Use this pattern if the main problem is: "I need to compare multiple next states without losing the original baseline." + +This is common in planning systems, agent strategy search, and evaluation harnesses where several candidate paths should remain explicit instead of being overwritten in place. + +## Choosing a pattern + +In practice, most integrations start with one of these: + +- Start with deterministic workflow replay if you need verification at a single workflow boundary. +- Start with workflow state portability if you need to move or resume state across environments. +- Start with agent execution audit if your system already has a step loop and you need traceability. +- Start with branchable workflow state if you need to compare multiple candidate next states from the same parent. + +These patterns compose cleanly. A single system can record agent steps, branch revisions for experiments, and replay selected executions for verification using the same artifact, revision, and execution model. diff --git a/mcp/README.md b/mcp/README.md new file mode 100644 index 0000000..91a738f --- /dev/null +++ b/mcp/README.md @@ -0,0 +1,91 @@ +# Shadow Threads MCP v0 + +Shadow Threads MCP is a thin MCP server that exposes the validated Shadow Threads core actions as structured tools. + +It is intended for: + +- agent builders +- harness developers +- MCP-compatible AI clients +- workflow automation users + +## Dependency chain + +MCP depends on the Python SDK and the running Shadow Threads server. + +The dependency chain is: + +`MCP tools -> Python SDK -> Shadow Threads server` + +This package is not the runtime itself. + +## Tools + +MCP v0 exposes these tools: + +- `shadow_capture_artifact` +- `shadow_get_artifact` +- `shadow_create_revision` +- `shadow_get_revision` +- `shadow_list_revisions` +- `shadow_record_execution` +- `shadow_get_execution` +- `shadow_replay_execution` + +## Server connection + +The MCP server uses `SHADOW_SERVER` when set. Otherwise it defaults to: + +```text +http://localhost:3001 +``` + +## Install + +From the repository root: + +```bash +pip install -e python-sdk +pip install -e mcp +``` + +## Run + +Start the MCP server over stdio: + +```bash +shadowthreads-mcp +``` + +Or: + +```bash +python -m shadowthreads_mcp.server +``` + +## Example tool usage + +A connected MCP client can call tools such as: + +- `shadow_capture_artifact` +- `shadow_create_revision` +- `shadow_record_execution` +- `shadow_replay_execution` + +Example flow: + +1. `shadow_capture_artifact` +2. `shadow_create_revision` +3. `shadow_record_execution` +4. `shadow_replay_execution` + +## Error behavior + +Tool failures return concise structured MCP error results with: + +- `error_type` +- `status_code` +- `api_code` +- `message` + +The MCP layer does not implement hashing, local replay semantics, or local protocol logic. It delegates those behaviors to the Python SDK and the Shadow Threads server. diff --git a/mcp/examples/live_validation.py b/mcp/examples/live_validation.py new file mode 100644 index 0000000..da30043 --- /dev/null +++ b/mcp/examples/live_validation.py @@ -0,0 +1,203 @@ +from __future__ import annotations + +import hashlib +import json +import os +import sys +from datetime import datetime, timedelta, timezone +from typing import Any + +import anyio +from mcp import ClientSession +from mcp.client.stdio import StdioServerParameters, stdio_client + + +EXPECTED_TOOLS = { + "shadow_capture_artifact", + "shadow_get_artifact", + "shadow_create_revision", + "shadow_get_revision", + "shadow_list_revisions", + "shadow_record_execution", + "shadow_get_execution", + "shadow_replay_execution", +} + + +def utc_iso(dt: datetime) -> str: + return dt.astimezone(timezone.utc).isoformat() + + +def extract_tool_payload(result: Any) -> dict[str, Any]: + structured = getattr(result, "structuredContent", None) + if isinstance(structured, dict): + return structured + + for item in getattr(result, "content", []): + json_payload = getattr(item, "json", None) + if isinstance(json_payload, dict): + return json_payload + + text_payload = getattr(item, "text", None) + if isinstance(text_payload, str): + try: + parsed = json.loads(text_payload) + except json.JSONDecodeError: + continue + if isinstance(parsed, dict): + return parsed + + raise RuntimeError("MCP tool returned no structured payload") + + +def print_error_and_exit(error: Exception | None = None, result: Any | None = None) -> int: + if result is not None: + try: + payload = extract_tool_payload(result) + except Exception: + payload = { + "error_type": "MCPToolError", + "status_code": None, + "api_code": None, + "message": "MCP tool call failed without structured error payload", + } + else: + payload = { + "error_type": error.__class__.__name__ if error is not None else "RuntimeError", + "status_code": getattr(error, "status_code", None) if error is not None else None, + "api_code": getattr(error, "code", None) if error is not None else None, + "message": str(error) if error is not None else "Unknown error", + } + + print(json.dumps(payload, indent=2), file=sys.stderr) + return 1 + + +async def async_main() -> int: + server = os.getenv("SHADOW_SERVER", "http://localhost:3001") + print(f"Using server: {server}") + + env = dict(os.environ) + env["SHADOW_SERVER"] = server + + server_parameters = StdioServerParameters( + command="shadowthreads-mcp", + env=env, + ) + + async with stdio_client(server_parameters) as streams: + async with ClientSession(*streams) as session: + await session.initialize() + + tools = await session.list_tools() + tool_entries = getattr(tools, "tools", tools) + available = {tool.name for tool in tool_entries} + missing = EXPECTED_TOOLS - available + if missing: + raise RuntimeError(f"MCP missing tools: {sorted(missing)}") + + capture_result = await session.call_tool( + "shadow_capture_artifact", + { + "schema": "sdk.demo.task", + "package_id": "mcp-live-demo-package", + "payload": { + "task": "mcp live validation", + "input": "validate MCP layer end-to-end", + }, + }, + ) + if getattr(capture_result, "isError", False): + return print_error_and_exit(result=capture_result) + artifact = extract_tool_payload(capture_result) + bundle_hash = artifact["bundle_hash"] + print(f"Captured artifact: {bundle_hash}") + + started_at = datetime.now(timezone.utc) + finished_at = started_at + timedelta(seconds=1) + + revision_result = await session.call_tool( + "shadow_create_revision", + { + "package_id": "mcp-live-demo-package", + "artifacts": [ + { + "bundle_hash": bundle_hash, + "role": "task_state", + } + ], + "metadata": { + "author": "MCP Live Validation", + "message": "Create revision for MCP validation", + "created_by": "mcp-live-validation", + "timestamp": utc_iso(started_at), + "source": "human", + "tags": ["mcp", "live-validation"], + }, + }, + ) + if getattr(revision_result, "isError", False): + return print_error_and_exit(result=revision_result) + revision = extract_tool_payload(revision_result) + revision_hash = revision["revision_hash"] + print(f"Created revision: {revision_hash}") + + prompt_hash = hashlib.sha256(b"shadowthreads-mcp-live-validation").hexdigest() + execution_result = await session.call_tool( + "shadow_record_execution", + { + "package_id": "mcp-live-demo-package", + "revision_hash": revision_hash, + "provider": "python-mcp", + "model": "live-validation", + "prompt_hash": prompt_hash, + "parameters": { + "mode": "mcp-validation", + "temperature": 0, + }, + "input_artifacts": [ + { + "bundle_hash": bundle_hash, + "role": "task_state", + } + ], + "output_artifacts": [ + { + "bundle_hash": bundle_hash, + "role": "task_state", + } + ], + "status": "success", + "started_at": utc_iso(started_at), + "finished_at": utc_iso(finished_at), + }, + ) + if getattr(execution_result, "isError", False): + return print_error_and_exit(result=execution_result) + execution = extract_tool_payload(execution_result) + execution_id = execution["execution_id"] + print(f"Recorded execution: {execution_id}") + + replay_result = await session.call_tool( + "shadow_replay_execution", + { + "execution_id": execution_id, + }, + ) + if getattr(replay_result, "isError", False): + return print_error_and_exit(result=replay_result) + replay = extract_tool_payload(replay_result) + print(f"Replay verified: {str(bool(replay['verified'])).lower()}") + print("MCP live validation complete") + return 0 + + +def main() -> int: + try: + return anyio.run(async_main) + except Exception as error: + return print_error_and_exit(error=error) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/mcp/pyproject.toml b/mcp/pyproject.toml new file mode 100644 index 0000000..d0e6ed7 --- /dev/null +++ b/mcp/pyproject.toml @@ -0,0 +1,37 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "shadowthreads-mcp" +version = "0.1.0" +description = "Thin MCP server for the Shadow Threads Python SDK" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "mcp>=1.26.0,<2", + "shadowthreads-sdk>=0.1.0", +] +authors = [ + { name = "OpenAI Codex" }, +] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Typing :: Typed", +] + +[project.scripts] +shadowthreads-mcp = "shadowthreads_mcp.server:main" + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +shadowthreads_mcp = ["py.typed"] diff --git a/mcp/src/shadowthreads_mcp/__init__.py b/mcp/src/shadowthreads_mcp/__init__.py new file mode 100644 index 0000000..361cee9 --- /dev/null +++ b/mcp/src/shadowthreads_mcp/__init__.py @@ -0,0 +1,3 @@ +from .server import create_server, main + +__all__ = ["create_server", "main"] diff --git a/mcp/src/shadowthreads_mcp/config.py b/mcp/src/shadowthreads_mcp/config.py new file mode 100644 index 0000000..2ec8fe1 --- /dev/null +++ b/mcp/src/shadowthreads_mcp/config.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +import os + +DEFAULT_BASE_URL = "http://localhost:3001" +ENV_BASE_URL = "SHADOW_SERVER" + + +def resolve_base_url() -> str: + value = os.getenv(ENV_BASE_URL, DEFAULT_BASE_URL).strip() + if not value: + raise ValueError("SHADOW_SERVER must not be empty") + return value.rstrip("/") diff --git a/mcp/src/shadowthreads_mcp/errors.py b/mcp/src/shadowthreads_mcp/errors.py new file mode 100644 index 0000000..1434648 --- /dev/null +++ b/mcp/src/shadowthreads_mcp/errors.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from typing import Any + +from mcp.types import CallToolResult, TextContent +from shadowthreads.errors import ( + ShadowThreadsClientError, + ShadowThreadsError, + ShadowThreadsNetworkError, + ShadowThreadsResponseError, + ShadowThreadsServerError, +) + + +def error_payload_from_exception(error: Exception) -> dict[str, Any]: + if isinstance(error, ShadowThreadsError): + return { + "error_type": error.__class__.__name__, + "status_code": error.status_code, + "api_code": error.code, + "message": error.message, + } + + return { + "error_type": error.__class__.__name__, + "status_code": None, + "api_code": None, + "message": str(error), + } + + +def tool_error_result(error: Exception) -> CallToolResult: + payload = error_payload_from_exception(error) + return CallToolResult( + isError=True, + structuredContent=payload, + content=[TextContent(type="text", text=payload["message"])], + ) + + +def tool_success_result(payload: dict[str, Any]) -> CallToolResult: + return CallToolResult( + structuredContent=payload, + content=[], + ) + + +def is_sdk_error(error: Exception) -> bool: + return isinstance( + error, + ( + ShadowThreadsClientError, + ShadowThreadsServerError, + ShadowThreadsNetworkError, + ShadowThreadsResponseError, + ), + ) diff --git a/mcp/src/shadowthreads_mcp/py.typed b/mcp/src/shadowthreads_mcp/py.typed new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/mcp/src/shadowthreads_mcp/py.typed @@ -0,0 +1 @@ + diff --git a/mcp/src/shadowthreads_mcp/server.py b/mcp/src/shadowthreads_mcp/server.py new file mode 100644 index 0000000..aa9d63e --- /dev/null +++ b/mcp/src/shadowthreads_mcp/server.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from mcp.server.fastmcp import FastMCP + +from .tools import register_tools + + +def create_server() -> FastMCP: + server = FastMCP( + "Shadow Threads MCP", + instructions="Expose validated Shadow Threads artifact, revision, execution, and replay tools.", + json_response=True, + ) + register_tools(server) + return server + + +def main() -> None: + create_server().run(transport="stdio") + + +if __name__ == "__main__": + main() diff --git a/mcp/src/shadowthreads_mcp/tools.py b/mcp/src/shadowthreads_mcp/tools.py new file mode 100644 index 0000000..486a2f0 --- /dev/null +++ b/mcp/src/shadowthreads_mcp/tools.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +from dataclasses import asdict +from typing import Any, Callable, Protocol + +from mcp.server.fastmcp import FastMCP +from mcp.types import CallToolResult +from pydantic import BaseModel, Field +from shadowthreads import ArtifactReference, RevisionMetadata, ShadowClient +from shadowthreads.errors import ShadowThreadsError + +from .config import resolve_base_url +from .errors import tool_error_result, tool_success_result + + +class ArtifactReferenceInput(BaseModel): + bundle_hash: str = Field(..., description="Artifact bundle hash") + role: str = Field(..., description="Artifact role in the workflow") + + +class RevisionMetadataInput(BaseModel): + author: str + message: str + created_by: str + timestamp: str + source: str + tags: list[str] = Field(default_factory=list) + + +class ClientFactory(Protocol): + def __call__(self, *, base_url: str) -> Any: + ... + + +def register_tools(server: FastMCP, client_factory: ClientFactory = ShadowClient) -> None: + def with_client(callback: Callable[[Any], dict[str, Any]]) -> CallToolResult: + client = client_factory(base_url=resolve_base_url()) + try: + return tool_success_result(callback(client)) + except ShadowThreadsError as error: + return tool_error_result(error) + except Exception as error: # pragma: no cover - defensive fallback + return tool_error_result(error) + finally: + close = getattr(client, "close", None) + if callable(close): + close() + + @server.tool(name="shadow_capture_artifact") + def shadow_capture_artifact( + schema: str, + package_id: str, + payload: dict[str, Any], + references: list[ArtifactReferenceInput] | None = None, + ) -> CallToolResult: + def run(client: ShadowClient) -> dict[str, Any]: + result = client.capture_artifact( + schema=schema, + package_id=package_id, + payload=payload, + references=[ + ArtifactReference(bundle_hash=reference.bundle_hash, role=reference.role) + for reference in (references or []) + ], + ) + return asdict(result) + + return with_client(run) + + @server.tool(name="shadow_get_artifact") + def shadow_get_artifact(package_id: str, bundle_hash: str) -> CallToolResult: + def run(client: ShadowClient) -> dict[str, Any]: + return asdict(client.get_artifact(package_id, bundle_hash)) + + return with_client(run) + + @server.tool(name="shadow_create_revision") + def shadow_create_revision( + package_id: str, + artifacts: list[ArtifactReferenceInput], + metadata: RevisionMetadataInput, + parent_revision_hash: str | None = None, + ) -> CallToolResult: + def run(client: ShadowClient) -> dict[str, Any]: + created = client.create_revision( + package_id=package_id, + parent_revision_hash=parent_revision_hash, + artifacts=[ + ArtifactReference(bundle_hash=artifact.bundle_hash, role=artifact.role) + for artifact in artifacts + ], + metadata=RevisionMetadata( + author=metadata.author, + message=metadata.message, + created_by=metadata.created_by, + timestamp=metadata.timestamp, + source=metadata.source, # type: ignore[arg-type] + tags=list(metadata.tags), + ), + ) + return asdict(created.revision) + + return with_client(run) + + @server.tool(name="shadow_get_revision") + def shadow_get_revision(revision_hash: str) -> CallToolResult: + def run(client: ShadowClient) -> dict[str, Any]: + return asdict(client.get_revision(revision_hash)) + + return with_client(run) + + @server.tool(name="shadow_list_revisions") + def shadow_list_revisions(package_id: str, limit: int | None = None) -> CallToolResult: + def run(client: ShadowClient) -> dict[str, Any]: + return {"items": [asdict(item) for item in client.list_revisions(package_id, limit=limit)]} + + return with_client(run) + + @server.tool(name="shadow_record_execution") + def shadow_record_execution( + package_id: str, + revision_hash: str, + provider: str, + model: str, + prompt_hash: str, + parameters: dict[str, Any], + input_artifacts: list[ArtifactReferenceInput], + output_artifacts: list[ArtifactReferenceInput], + status: str, + started_at: str, + finished_at: str, + ) -> CallToolResult: + def run(client: ShadowClient) -> dict[str, Any]: + created = client.record_execution( + package_id=package_id, + revision_hash=revision_hash, + provider=provider, + model=model, + prompt_hash=prompt_hash, + parameters=parameters, + input_artifacts=[ + ArtifactReference(bundle_hash=artifact.bundle_hash, role=artifact.role) + for artifact in input_artifacts + ], + output_artifacts=[ + ArtifactReference(bundle_hash=artifact.bundle_hash, role=artifact.role) + for artifact in output_artifacts + ], + status=status, # type: ignore[arg-type] + started_at=started_at, + finished_at=finished_at, + ) + return asdict(created.execution) + + return with_client(run) + + @server.tool(name="shadow_get_execution") + def shadow_get_execution(execution_id: str) -> CallToolResult: + def run(client: ShadowClient) -> dict[str, Any]: + return asdict(client.get_execution(execution_id)) + + return with_client(run) + + @server.tool(name="shadow_replay_execution") + def shadow_replay_execution(execution_id: str) -> CallToolResult: + def run(client: ShadowClient) -> dict[str, Any]: + return asdict(client.replay_execution(execution_id)) + + return with_client(run) diff --git a/mcp/tests/test_errors.py b/mcp/tests/test_errors.py new file mode 100644 index 0000000..8631f64 --- /dev/null +++ b/mcp/tests/test_errors.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import pathlib +import sys +import unittest + +import anyio + +sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[1] / "src")) + +from shadowthreads.errors import ShadowThreadsClientError # type: ignore[import-not-found] +from shadowthreads_mcp.errors import error_payload_from_exception, tool_error_result +from shadowthreads_mcp.tools import register_tools + + +class ErrorClient: + def __init__(self, *, base_url: str) -> None: + self.base_url = base_url + + def close(self) -> None: + pass + + def get_revision(self, revision_hash: str): + raise ShadowThreadsClientError( + "Revision not found", + code="ERR_REVISION_NOT_FOUND", + status_code=404, + ) + + +class MCPErrorTests(unittest.TestCase): + def test_error_payload_is_structured_and_concise(self) -> None: + payload = error_payload_from_exception( + ShadowThreadsClientError( + "Revision not found", + code="ERR_REVISION_NOT_FOUND", + status_code=404, + ) + ) + self.assertEqual( + payload, + { + "error_type": "ShadowThreadsClientError", + "status_code": 404, + "api_code": "ERR_REVISION_NOT_FOUND", + "message": "Revision not found", + }, + ) + + def test_tool_returns_structured_mcp_error_result(self) -> None: + from mcp.server.fastmcp import FastMCP + + server = FastMCP("test-shadow-mcp", json_response=True) + register_tools(server, client_factory=ErrorClient) + + async def run(): + return await server.call_tool("shadow_get_revision", {"revision_hash": "a" * 64}) + + result = anyio.run(run) + self.assertTrue(result.isError) + self.assertEqual(result.structuredContent["error_type"], "ShadowThreadsClientError") + self.assertEqual(result.structuredContent["status_code"], 404) + self.assertEqual(result.structuredContent["api_code"], "ERR_REVISION_NOT_FOUND") + self.assertEqual(result.structuredContent["message"], "Revision not found") + self.assertEqual(result.content[0].text, "Revision not found") + + +if __name__ == "__main__": + unittest.main() diff --git a/mcp/tests/test_tools.py b/mcp/tests/test_tools.py new file mode 100644 index 0000000..75337d3 --- /dev/null +++ b/mcp/tests/test_tools.py @@ -0,0 +1,283 @@ +from __future__ import annotations + +import pathlib +import sys +import unittest + +import anyio + +sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[1] / "src")) + +from shadowthreads import ( # type: ignore[import-not-found] + ArtifactBundle, + ArtifactCaptureResult, + ArtifactIdentity, + ArtifactRecord, + ArtifactReference, + ExecutionRecord, + ReplayExecutionResult, + RevisionRecord, +) +from shadowthreads_mcp.server import create_server +from shadowthreads_mcp.tools import register_tools + + +class FakeClient: + last_instance: "FakeClient | None" = None + + def __init__(self, *, base_url: str) -> None: + self.base_url = base_url + self.closed = False + self.calls: list[tuple[str, tuple, dict]] = [] + FakeClient.last_instance = self + + def close(self) -> None: + self.closed = True + + def capture_artifact(self, **kwargs): + self.calls.append(("capture_artifact", (), kwargs)) + return ArtifactCaptureResult( + id="artifact-1", + bundle_hash="a" * 64, + created_at="2026-03-09T09:00:00.000Z", + ) + + def get_artifact(self, package_id: str, bundle_hash: str): + self.calls.append(("get_artifact", (package_id, bundle_hash), {})) + return ArtifactRecord( + id="artifact-1", + bundle_hash=bundle_hash, + created_at="2026-03-09T09:00:00.000Z", + artifact_bundle=ArtifactBundle( + schema="sdk.demo.task", + identity=ArtifactIdentity(package_id=package_id), + payload={"task": "demo"}, + references=[], + ), + ) + + def create_revision(self, **kwargs): + self.calls.append(("create_revision", (), kwargs)) + return type( + "CreateRevisionResult", + (), + { + "revision": RevisionRecord( + revision_hash="b" * 64, + package_id=kwargs["package_id"], + parent_revision_hash=kwargs["parent_revision_hash"], + author=kwargs["metadata"].author, + message=kwargs["metadata"].message, + created_by=kwargs["metadata"].created_by, + timestamp=kwargs["metadata"].timestamp, + source=kwargs["metadata"].source, + metadata=kwargs["metadata"].to_payload(), + created_at="2026-03-09T09:00:01.000Z", + artifacts=list(kwargs["artifacts"]), + ) + }, + )() + + def get_revision(self, revision_hash: str): + self.calls.append(("get_revision", (revision_hash,), {})) + return RevisionRecord( + revision_hash=revision_hash, + package_id="pkg-1", + parent_revision_hash=None, + author="author", + message="message", + created_by="creator", + timestamp="2026-03-09T09:00:00+00:00", + source="human", + metadata={}, + created_at="2026-03-09T09:00:01.000Z", + artifacts=[ArtifactReference(bundle_hash="a" * 64, role="task_state")], + ) + + def list_revisions(self, package_id: str, limit: int | None = None): + self.calls.append(("list_revisions", (package_id,), {"limit": limit})) + return [self.get_revision("c" * 64)] + + def record_execution(self, **kwargs): + self.calls.append(("record_execution", (), kwargs)) + return type( + "CreateExecutionResult", + (), + { + "execution": ExecutionRecord( + execution_id="11111111-1111-4111-8111-111111111111", + package_id=kwargs["package_id"], + revision_hash=kwargs["revision_hash"], + provider=kwargs["provider"], + model=kwargs["model"], + prompt_hash=kwargs["prompt_hash"], + parameters=kwargs["parameters"], + input_artifacts=list(kwargs["input_artifacts"]), + output_artifacts=list(kwargs["output_artifacts"]), + result_hash="d" * 64, + status=kwargs["status"], + started_at=kwargs["started_at"], + finished_at=kwargs["finished_at"], + created_at="2026-03-09T09:00:02.000Z", + ) + }, + )() + + def get_execution(self, execution_id: str): + self.calls.append(("get_execution", (execution_id,), {})) + return ExecutionRecord( + execution_id=execution_id, + package_id="pkg-1", + revision_hash="b" * 64, + provider="provider", + model="model", + prompt_hash="e" * 64, + parameters={"mode": "demo"}, + input_artifacts=[ArtifactReference(bundle_hash="a" * 64, role="task_state")], + output_artifacts=[ArtifactReference(bundle_hash="a" * 64, role="task_state")], + result_hash="d" * 64, + status="success", + started_at="2026-03-09T09:00:00+00:00", + finished_at="2026-03-09T09:00:02+00:00", + created_at="2026-03-09T09:00:02.000Z", + ) + + def replay_execution(self, execution_id: str): + self.calls.append(("replay_execution", (execution_id,), {})) + return ReplayExecutionResult( + execution_id=execution_id, + verified=True, + result_hash="d" * 64, + ) + + +class MCPToolTests(unittest.TestCase): + def build_server(self): + from mcp.server.fastmcp import FastMCP + + server = FastMCP("test-shadow-mcp", json_response=True) + register_tools(server, client_factory=FakeClient) + return server + + def test_registers_expected_tool_names(self) -> None: + server = self.build_server() + + async def run(): + tools = await server.list_tools() + return sorted(tool.name for tool in tools) + + names = anyio.run(run) + self.assertEqual( + names, + [ + "shadow_capture_artifact", + "shadow_create_revision", + "shadow_get_artifact", + "shadow_get_execution", + "shadow_get_revision", + "shadow_list_revisions", + "shadow_record_execution", + "shadow_replay_execution", + ], + ) + + def test_capture_artifact_delegates_to_sdk(self) -> None: + server = self.build_server() + + async def run(): + return await server.call_tool( + "shadow_capture_artifact", + { + "schema": "sdk.demo.task", + "package_id": "pkg-1", + "payload": {"task": "demo"}, + "references": [{"bundle_hash": "a" * 64, "role": "task_state"}], + }, + ) + + result = anyio.run(run) + self.assertFalse(result.isError) + self.assertEqual(result.structuredContent["bundle_hash"], "a" * 64) + assert FakeClient.last_instance is not None + call_name, _, kwargs = FakeClient.last_instance.calls[0] + self.assertEqual(call_name, "capture_artifact") + self.assertEqual(kwargs["package_id"], "pkg-1") + self.assertEqual(kwargs["references"][0].role, "task_state") + self.assertTrue(FakeClient.last_instance.closed) + + def test_create_revision_returns_full_revision_record(self) -> None: + server = self.build_server() + + async def run(): + return await server.call_tool( + "shadow_create_revision", + { + "package_id": "pkg-1", + "artifacts": [{"bundle_hash": "a" * 64, "role": "task_state"}], + "metadata": { + "author": "author", + "message": "message", + "created_by": "creator", + "timestamp": "2026-03-09T09:00:00+00:00", + "source": "human", + "tags": ["sdk"], + }, + }, + ) + + result = anyio.run(run) + self.assertFalse(result.isError) + self.assertEqual(result.structuredContent["revision_hash"], "b" * 64) + self.assertEqual(result.structuredContent["artifacts"][0]["role"], "task_state") + + def test_record_execution_returns_full_execution_record(self) -> None: + server = self.build_server() + + async def run(): + return await server.call_tool( + "shadow_record_execution", + { + "package_id": "pkg-1", + "revision_hash": "b" * 64, + "provider": "provider", + "model": "model", + "prompt_hash": "e" * 64, + "parameters": {"mode": "demo"}, + "input_artifacts": [{"bundle_hash": "a" * 64, "role": "task_state"}], + "output_artifacts": [{"bundle_hash": "a" * 64, "role": "task_state"}], + "status": "success", + "started_at": "2026-03-09T09:00:00+00:00", + "finished_at": "2026-03-09T09:00:02+00:00", + }, + ) + + result = anyio.run(run) + self.assertFalse(result.isError) + self.assertEqual( + result.structuredContent["execution_id"], + "11111111-1111-4111-8111-111111111111", + ) + self.assertEqual(result.structuredContent["status"], "success") + + def test_replay_execution_uses_execution_id_only(self) -> None: + server = self.build_server() + + async def run(): + return await server.call_tool( + "shadow_replay_execution", + {"execution_id": "11111111-1111-4111-8111-111111111111"}, + ) + + result = anyio.run(run) + self.assertFalse(result.isError) + self.assertTrue(result.structuredContent["verified"]) + assert FakeClient.last_instance is not None + self.assertEqual(FakeClient.last_instance.calls[0][0], "replay_execution") + self.assertEqual( + FakeClient.last_instance.calls[0][1][0], + "11111111-1111-4111-8111-111111111111", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/python-sdk/README.md b/python-sdk/README.md new file mode 100644 index 0000000..115a5d8 --- /dev/null +++ b/python-sdk/README.md @@ -0,0 +1,133 @@ +# Shadow Threads Python SDK v0 + +The Shadow Threads Python SDK is a thin convenience wrapper over the validated Shadow Threads HTTP API. + +It wraps the current core endpoints for: + +- artifact capture and inspection +- revision creation and inspection +- execution recording and replay +- migration export, verify, and import + +This SDK is transport convenience, not local protocol authority. + +This SDK is a thin convenience wrapper over the Shadow Threads HTTP API. +It does not implement local hashing, local replay semantics, or local protocol authority. + +## Install + +From the repository root: + +```bash +pip install -e python-sdk +``` + +## Create a client + +```python +from shadowthreads import ShadowClient + +client = ShadowClient(base_url="http://localhost:3001") +``` + +If `base_url` is omitted, the client uses `SHADOW_SERVER` when it is set, otherwise `http://localhost:3001`. + +## What it wraps + +The SDK covers the currently validated HTTP API surface: + +- `POST /api/v1/artifacts` +- `GET /api/v1/artifacts/:packageId/:bundleHash` +- `POST /api/v1/artifacts/:packageId/:bundleHash/verify` +- `POST /api/v1/revisions` +- `GET /api/v1/revisions/:revisionHash` +- `GET /api/v1/revisions/package/:packageId` +- `POST /api/v1/executions` +- `GET /api/v1/executions/:executionId` +- `POST /api/v1/executions/:executionId/replay` +- `POST /api/v1/migration/export` +- `POST /api/v1/migration/verify` +- `POST /api/v1/migration/import` + +## Core example + +```python +from shadowthreads import ArtifactReference, RevisionMetadata, ShadowClient + +client = ShadowClient(base_url="http://localhost:3001") + +artifact = client.capture_artifact( + schema="demo.task", + package_id="sdk-demo-package", + payload={ + "task": "summarize logs", + "input": "2026-03-09T09:00:00Z INFO startup complete", + }, +) + +revision = client.create_revision( + package_id="sdk-demo-package", + artifacts=[ + ArtifactReference(bundle_hash=artifact.bundle_hash, role="task_state"), + ], + metadata=RevisionMetadata( + author="SDK Demo", + message="Initial task state", + created_by="python-sdk", + timestamp="2026-03-09T09:00:00+00:00", + source="human", + tags=["sdk", "demo"], + ), +) + +execution = client.record_execution( + package_id="sdk-demo-package", + revision_hash=revision.revision_hash, + provider="demo-script", + model="sdk-example", + prompt_hash="0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + parameters={"temperature": 0}, + input_artifacts=[ + ArtifactReference(bundle_hash=artifact.bundle_hash, role="task_state"), + ], + output_artifacts=[ + ArtifactReference(bundle_hash=artifact.bundle_hash, role="task_state"), + ], + status="success", + started_at="2026-03-09T09:00:00+00:00", + finished_at="2026-03-09T09:00:02+00:00", +) + +replay = client.replay_execution(execution.execution_id) + +print(artifact.bundle_hash) +print(revision.revision_hash) +print(execution.result_hash) +print(replay.verified) +``` + +## Replay convenience + +`client.replay_execution(execution_id)` can auto-load the current stored execution record and reconstruct the replay body from the validated fields already present in that record: + +- `promptHash` +- `parameters` +- `inputArtifacts` +- `outputArtifacts` +- `status` + +It does not add local replay logic or infer extra fields. + +If you want full control, you can pass the replay fields explicitly. + +## Migrations + +Migration methods use the server's existing zip-path API shape: + +```python +exported = client.export_migration(root_revision_hash) +verified = client.verify_migration(exported.zip_path) +imported = client.import_migration(exported.zip_path) +``` + +The SDK does not upload local files. It passes the server-visible `zipPath` expected by the current runtime. diff --git a/python-sdk/examples/live_validation.py b/python-sdk/examples/live_validation.py new file mode 100644 index 0000000..29f1c64 --- /dev/null +++ b/python-sdk/examples/live_validation.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +import hashlib +import os +import sys +from datetime import datetime, timedelta, timezone + +from shadowthreads import ArtifactReference, RevisionMetadata, ShadowClient +from shadowthreads.errors import ShadowThreadsError + + +PACKAGE_ID = "sdk-live-demo-package" +TASK_ROLE = "task_state" +BASE_URL = os.getenv("SHADOW_SERVER", "http://localhost:3001") + + +def compute_prompt_hash() -> str: + return hashlib.sha256(b"shadowthreads-sdk-live-validation").hexdigest() + + +def utc_now() -> datetime: + return datetime.now(timezone.utc) + + +def main() -> int: + client = ShadowClient(base_url=BASE_URL) + + try: + print(f"Using server: {client.base_url}") + + artifact = client.capture_artifact( + schema="sdk.demo.task", + package_id=PACKAGE_ID, + payload={ + "task": "sdk live validation", + "input": "validate Python SDK against live local server", + }, + ) + print(f"Captured artifact: {artifact.bundle_hash}") + + started_at = utc_now() + finished_at = started_at + timedelta(seconds=2) + + revision = client.create_revision( + package_id=PACKAGE_ID, + artifacts=[ + ArtifactReference(bundle_hash=artifact.bundle_hash, role=TASK_ROLE), + ], + metadata=RevisionMetadata( + author="Python SDK Live Validation", + message="Create revision for live SDK validation", + created_by="python-sdk-live-validation", + timestamp=started_at.isoformat(), + source="human", + tags=["sdk", "live-validation"], + ), + ) + print(f"Created revision: {revision.revision_hash}") + + execution = client.record_execution( + package_id=PACKAGE_ID, + revision_hash=revision.revision_hash, + provider="python-sdk", + model="live-validation", + prompt_hash=compute_prompt_hash(), + parameters={ + "mode": "live-validation", + "temperature": 0, + }, + input_artifacts=[ + ArtifactReference(bundle_hash=artifact.bundle_hash, role=TASK_ROLE), + ], + output_artifacts=[ + ArtifactReference(bundle_hash=artifact.bundle_hash, role=TASK_ROLE), + ], + status="success", + started_at=started_at.isoformat(), + finished_at=finished_at.isoformat(), + ) + print(f"Recorded execution: {execution.execution_id}") + + replay = client.replay_execution(execution.execution_id) + print(f"Replay verified: {str(replay.verified).lower()}") + print("SDK live validation complete") + return 0 + except ShadowThreadsError as error: + print(f"SDK live validation failed: {error.message}", file=sys.stderr) + if error.code: + print(f"API code: {error.code}", file=sys.stderr) + if error.status_code is not None: + print(f"HTTP status: {error.status_code}", file=sys.stderr) + return 1 + finally: + client.close() + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/python-sdk/pyproject.toml b/python-sdk/pyproject.toml new file mode 100644 index 0000000..d76bf7a --- /dev/null +++ b/python-sdk/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "shadowthreads-sdk" +version = "0.1.0" +description = "Thin Python SDK for the Shadow Threads HTTP API" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "requests>=2.31.0,<3", +] +authors = [ + { name = "OpenAI Codex" }, +] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "License :: OSI Approved :: MIT License", + "Typing :: Typed", +] + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +shadowthreads = ["py.typed"] diff --git a/python-sdk/src/shadowthreads/__init__.py b/python-sdk/src/shadowthreads/__init__.py new file mode 100644 index 0000000..8a8d43b --- /dev/null +++ b/python-sdk/src/shadowthreads/__init__.py @@ -0,0 +1,56 @@ +from .client import ShadowClient +from .config import DEFAULT_BASE_URL, ENV_BASE_URL +from .errors import ( + ShadowThreadsClientError, + ShadowThreadsError, + ShadowThreadsHTTPError, + ShadowThreadsNetworkError, + ShadowThreadsResponseError, + ShadowThreadsServerError, +) +from .models import ( + ArtifactBundle, + ArtifactCaptureResult, + ArtifactIdentity, + ArtifactRecord, + ArtifactReference, + ArtifactVerifyResult, + ExecutionCreateResult, + ExecutionRecord, + MigrationExportResult, + MigrationImportResult, + MigrationManifest, + MigrationVerifyResult, + ReplayExecutionResult, + RevisionCreateResult, + RevisionMetadata, + RevisionRecord, +) + +__all__ = [ + "ArtifactBundle", + "ArtifactCaptureResult", + "ArtifactIdentity", + "ArtifactRecord", + "ArtifactReference", + "ArtifactVerifyResult", + "DEFAULT_BASE_URL", + "ENV_BASE_URL", + "ExecutionCreateResult", + "ExecutionRecord", + "MigrationExportResult", + "MigrationImportResult", + "MigrationManifest", + "MigrationVerifyResult", + "ReplayExecutionResult", + "RevisionCreateResult", + "RevisionMetadata", + "RevisionRecord", + "ShadowClient", + "ShadowThreadsClientError", + "ShadowThreadsError", + "ShadowThreadsHTTPError", + "ShadowThreadsNetworkError", + "ShadowThreadsResponseError", + "ShadowThreadsServerError", +] diff --git a/python-sdk/src/shadowthreads/_http.py b/python-sdk/src/shadowthreads/_http.py new file mode 100644 index 0000000..ed0883b --- /dev/null +++ b/python-sdk/src/shadowthreads/_http.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +from typing import Any + +import requests + +from .errors import ( + ShadowThreadsClientError, + ShadowThreadsNetworkError, + ShadowThreadsResponseError, + ShadowThreadsServerError, +) + + +class ShadowHTTPTransport: + def __init__( + self, + *, + base_url: str, + timeout: float, + session: requests.Session | None = None, + ) -> None: + self.base_url = base_url.rstrip("/") + self.timeout = timeout + self.session = session or requests.Session() + self._owns_session = session is None + + headers = getattr(self.session, "headers", None) + if headers is not None: + headers.setdefault("Accept", "application/json") + headers.setdefault("Content-Type", "application/json") + + def request( + self, + method: str, + path: str, + *, + json_body: dict[str, Any] | None = None, + params: dict[str, Any] | None = None, + ) -> Any: + url = f"{self.base_url}{path}" + try: + response = self.session.request( + method=method, + url=url, + json=json_body, + params=params, + timeout=self.timeout, + ) + except requests.RequestException as error: + raise ShadowThreadsNetworkError(str(error)) from error + + return self._unwrap_response(response) + + def close(self) -> None: + if self._owns_session: + self.session.close() + + def _unwrap_response(self, response: requests.Response) -> Any: + try: + payload = response.json() + except ValueError as error: + raise ShadowThreadsResponseError( + "Invalid JSON response from Shadow Threads server", + status_code=response.status_code, + body=response.text, + ) from error + + if not isinstance(payload, dict) or not isinstance(payload.get("ok"), bool): + raise ShadowThreadsResponseError( + "Invalid API envelope from Shadow Threads server", + status_code=response.status_code, + body=payload, + ) + + if payload["ok"] is True: + if "data" not in payload: + raise ShadowThreadsResponseError( + "Successful API response is missing data", + status_code=response.status_code, + body=payload, + ) + return payload["data"] + + error_payload = payload.get("error") + if not isinstance(error_payload, dict): + raise ShadowThreadsResponseError( + "Error API response is missing error details", + status_code=response.status_code, + body=payload, + ) + + code = error_payload.get("code") + message = error_payload.get("message") + if not isinstance(code, str) or not isinstance(message, str): + raise ShadowThreadsResponseError( + "Error API response has invalid error details", + status_code=response.status_code, + body=payload, + ) + + if 500 <= response.status_code: + raise ShadowThreadsServerError(message, code=code, status_code=response.status_code, body=payload) + raise ShadowThreadsClientError(message, code=code, status_code=response.status_code, body=payload) diff --git a/python-sdk/src/shadowthreads/client.py b/python-sdk/src/shadowthreads/client.py new file mode 100644 index 0000000..cced7c8 --- /dev/null +++ b/python-sdk/src/shadowthreads/client.py @@ -0,0 +1,309 @@ +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import Any +from urllib.parse import quote + +import requests + +from ._http import ShadowHTTPTransport +from .config import resolve_base_url +from .errors import ShadowThreadsResponseError +from .models import ( + ArtifactCaptureResult, + ArtifactRecord, + ArtifactReference, + ArtifactVerifyResult, + ExecutionCreateResult, + ExecutionRecord, + ExecutionStatus, + MigrationExportResult, + MigrationImportResult, + MigrationVerifyResult, + ReplayExecutionResult, + RevisionCreateResult, + RevisionMetadata, + RevisionRecord, +) + +ArtifactReferenceInput = ArtifactReference | Mapping[str, Any] +RevisionMetadataInput = RevisionMetadata | Mapping[str, Any] + + +class ShadowClient: + def __init__( + self, + *, + base_url: str | None = None, + timeout: float = 30.0, + session: requests.Session | None = None, + ) -> None: + self.base_url = resolve_base_url(base_url) + self.timeout = timeout + self._transport = ShadowHTTPTransport(base_url=self.base_url, timeout=timeout, session=session) + + def close(self) -> None: + self._transport.close() + + def __enter__(self) -> "ShadowClient": + return self + + def __exit__(self, exc_type: object, exc: object, tb: object) -> None: + self.close() + + def capture_artifact( + self, + *, + schema: str, + package_id: str, + payload: Any, + references: Sequence[ArtifactReferenceInput] = (), + revision_id: str | None = None, + revision_hash: str | None = None, + ) -> ArtifactCaptureResult: + data = self._transport.request( + "POST", + "/api/v1/artifacts", + json_body={ + "schema": schema, + "identity": { + "packageId": package_id, + "revisionId": revision_id, + "revisionHash": revision_hash, + }, + "payload": payload, + "references": self._serialize_artifact_references(references), + }, + ) + return ArtifactCaptureResult.from_dict(data) + + def get_artifact(self, package_id: str, bundle_hash: str) -> ArtifactRecord: + data = self._transport.request( + "GET", + f"/api/v1/artifacts/{quote(package_id, safe='')}/{quote(bundle_hash, safe='')}", + ) + return ArtifactRecord.from_dict(data) + + def verify_artifact(self, package_id: str, bundle_hash: str) -> ArtifactVerifyResult: + data = self._transport.request( + "POST", + f"/api/v1/artifacts/{quote(package_id, safe='')}/{quote(bundle_hash, safe='')}/verify", + json_body={}, + ) + return ArtifactVerifyResult.from_dict(data) + + def create_revision( + self, + *, + package_id: str, + artifacts: Sequence[ArtifactReferenceInput], + metadata: RevisionMetadataInput, + parent_revision_hash: str | None = None, + ) -> RevisionCreateResult: + data = self._transport.request( + "POST", + "/api/v1/revisions", + json_body={ + "packageId": package_id, + "parentRevisionHash": parent_revision_hash, + "artifacts": self._serialize_artifact_references(artifacts), + "metadata": self._serialize_revision_metadata(metadata), + }, + ) + return RevisionCreateResult.from_dict(data) + + def get_revision(self, revision_hash: str) -> RevisionRecord: + data = self._transport.request( + "GET", + f"/api/v1/revisions/{quote(revision_hash, safe='')}", + ) + return RevisionRecord.from_dict(data) + + def list_revisions(self, package_id: str, *, limit: int | None = None) -> list[RevisionRecord]: + params = {"limit": limit} if limit is not None else None + data = self._transport.request( + "GET", + f"/api/v1/revisions/package/{quote(package_id, safe='')}", + params=params, + ) + if not isinstance(data, dict): + raise ShadowThreadsResponseError("Invalid revision list response", body=data) + items = data.get("items") + if not isinstance(items, list): + raise ShadowThreadsResponseError("Revision list response is missing items", body=data) + return [RevisionRecord.from_dict(item) for item in items] + + def record_execution( + self, + *, + package_id: str, + revision_hash: str, + provider: str, + model: str, + prompt_hash: str, + parameters: Any, + input_artifacts: Sequence[ArtifactReferenceInput], + output_artifacts: Sequence[ArtifactReferenceInput], + status: ExecutionStatus, + started_at: str, + finished_at: str, + ) -> ExecutionCreateResult: + data = self._transport.request( + "POST", + "/api/v1/executions", + json_body={ + "packageId": package_id, + "revisionHash": revision_hash, + "provider": provider, + "model": model, + "promptHash": prompt_hash, + "parameters": parameters, + "inputArtifacts": self._serialize_artifact_references(input_artifacts), + "outputArtifacts": self._serialize_artifact_references(output_artifacts), + "status": status, + "startedAt": started_at, + "finishedAt": finished_at, + }, + ) + return ExecutionCreateResult.from_dict(data) + + def get_execution(self, execution_id: str) -> ExecutionRecord: + data = self._transport.request( + "GET", + f"/api/v1/executions/{quote(execution_id, safe='')}", + ) + return ExecutionRecord.from_dict(data) + + def replay_execution( + self, + execution_id: str, + *, + prompt_hash: str | None = None, + parameters: Any | None = None, + input_artifacts: Sequence[ArtifactReferenceInput] | None = None, + output_artifacts: Sequence[ArtifactReferenceInput] | None = None, + status: ExecutionStatus | None = None, + ) -> ReplayExecutionResult: + replay_body = self._build_replay_payload( + execution_id=execution_id, + prompt_hash=prompt_hash, + parameters=parameters, + input_artifacts=input_artifacts, + output_artifacts=output_artifacts, + status=status, + ) + data = self._transport.request( + "POST", + f"/api/v1/executions/{quote(execution_id, safe='')}/replay", + json_body=replay_body, + ) + return ReplayExecutionResult.from_dict(data) + + def export_migration(self, root_revision_hash: str) -> MigrationExportResult: + data = self._transport.request( + "POST", + "/api/v1/migration/export", + json_body={"rootRevisionHash": root_revision_hash}, + ) + return MigrationExportResult.from_dict(data) + + def verify_migration(self, zip_path: str) -> MigrationVerifyResult: + data = self._transport.request( + "POST", + "/api/v1/migration/verify", + json_body={"zipPath": zip_path}, + ) + return MigrationVerifyResult.from_dict(data) + + def import_migration(self, zip_path: str) -> MigrationImportResult: + data = self._transport.request( + "POST", + "/api/v1/migration/import", + json_body={"zipPath": zip_path}, + ) + return MigrationImportResult.from_dict(data) + + def _build_replay_payload( + self, + *, + execution_id: str, + prompt_hash: str | None, + parameters: Any | None, + input_artifacts: Sequence[ArtifactReferenceInput] | None, + output_artifacts: Sequence[ArtifactReferenceInput] | None, + status: ExecutionStatus | None, + ) -> dict[str, Any]: + explicit_values = [prompt_hash, parameters, input_artifacts, output_artifacts, status] + if all(value is None for value in explicit_values): + execution = self.get_execution(execution_id) + return { + "promptHash": execution.prompt_hash, + "parameters": execution.parameters, + "inputArtifacts": [artifact.to_payload() for artifact in execution.input_artifacts], + "outputArtifacts": [artifact.to_payload() for artifact in execution.output_artifacts], + "status": execution.status, + } + + if any(value is None for value in explicit_values): + raise ValueError( + "Explicit replay execution requires prompt_hash, parameters, input_artifacts, output_artifacts, and status." + ) + + return { + "promptHash": prompt_hash, + "parameters": parameters, + "inputArtifacts": self._serialize_artifact_references(input_artifacts or ()), + "outputArtifacts": self._serialize_artifact_references(output_artifacts or ()), + "status": status, + } + + def _serialize_artifact_references( + self, + references: Sequence[ArtifactReferenceInput], + ) -> list[dict[str, Any]]: + serialized: list[dict[str, Any]] = [] + for reference in references: + if isinstance(reference, ArtifactReference): + serialized.append(reference.to_payload()) + continue + + bundle_hash = reference.get("bundle_hash", reference.get("bundleHash")) + role = reference.get("role") + if not isinstance(bundle_hash, str) or not isinstance(role, str): + raise ValueError("Artifact references require bundle_hash/bundleHash and role.") + serialized.append( + { + "bundleHash": bundle_hash, + "role": role, + } + ) + return serialized + + def _serialize_revision_metadata(self, metadata: RevisionMetadataInput) -> dict[str, Any]: + if isinstance(metadata, RevisionMetadata): + return metadata.to_payload() + + author = metadata.get("author") + message = metadata.get("message") + created_by = metadata.get("created_by", metadata.get("createdBy")) + timestamp = metadata.get("timestamp") + source = metadata.get("source") + tags = metadata.get("tags", []) + + if not isinstance(author, str) or not isinstance(message, str) or not isinstance(created_by, str): + raise ValueError("Revision metadata requires author, message, and created_by/createdBy.") + if not isinstance(timestamp, str) or not isinstance(source, str): + raise ValueError("Revision metadata requires timestamp and source.") + if not isinstance(tags, Sequence) or isinstance(tags, (str, bytes)): + raise ValueError("Revision metadata tags must be a sequence of strings.") + if not all(isinstance(tag, str) for tag in tags): + raise ValueError("Revision metadata tags must be a sequence of strings.") + + return { + "author": author, + "message": message, + "createdBy": created_by, + "timestamp": timestamp, + "source": source, + "tags": list(tags), + } diff --git a/python-sdk/src/shadowthreads/config.py b/python-sdk/src/shadowthreads/config.py new file mode 100644 index 0000000..25f09c3 --- /dev/null +++ b/python-sdk/src/shadowthreads/config.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +import os + +DEFAULT_BASE_URL = "http://localhost:3001" +ENV_BASE_URL = "SHADOW_SERVER" + + +def resolve_base_url(base_url: str | None = None) -> str: + resolved = base_url or os.getenv(ENV_BASE_URL) or DEFAULT_BASE_URL + normalized = resolved.strip() + if not normalized: + raise ValueError("base_url must not be empty") + return normalized.rstrip("/") diff --git a/python-sdk/src/shadowthreads/errors.py b/python-sdk/src/shadowthreads/errors.py new file mode 100644 index 0000000..df173f1 --- /dev/null +++ b/python-sdk/src/shadowthreads/errors.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import Any + + +class ShadowThreadsError(Exception): + """Base exception for the Shadow Threads SDK.""" + + def __init__( + self, + message: str, + *, + code: str | None = None, + status_code: int | None = None, + body: Any | None = None, + ) -> None: + super().__init__(message) + self.message = message + self.code = code + self.status_code = status_code + self.body = body + + +class ShadowThreadsHTTPError(ShadowThreadsError): + """Base exception for HTTP responses from the server.""" + + +class ShadowThreadsClientError(ShadowThreadsHTTPError): + """Raised for 4xx API responses.""" + + +class ShadowThreadsServerError(ShadowThreadsHTTPError): + """Raised for 5xx API responses.""" + + +class ShadowThreadsNetworkError(ShadowThreadsError): + """Raised when the client cannot reach the server.""" + + +class ShadowThreadsResponseError(ShadowThreadsError): + """Raised when the server response cannot be parsed as the expected API envelope.""" diff --git a/python-sdk/src/shadowthreads/models.py b/python-sdk/src/shadowthreads/models.py new file mode 100644 index 0000000..5009a94 --- /dev/null +++ b/python-sdk/src/shadowthreads/models.py @@ -0,0 +1,438 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Literal, TypeAlias + +from .errors import ShadowThreadsResponseError + +JSONPrimitive: TypeAlias = str | int | float | bool | None +JSONValue: TypeAlias = JSONPrimitive | dict[str, "JSONValue"] | list["JSONValue"] +ExecutionStatus: TypeAlias = Literal["success", "failure"] +RevisionSource: TypeAlias = Literal["human", "ai", "migration", "system"] + + +def _require_mapping(value: Any, *, context: str) -> dict[str, Any]: + if not isinstance(value, dict): + raise ShadowThreadsResponseError(f"Expected object for {context}", body=value) + return value + + +def _require_string(mapping: dict[str, Any], key: str, *, context: str) -> str: + value = mapping.get(key) + if not isinstance(value, str) or not value: + raise ShadowThreadsResponseError(f"Expected non-empty string for {context}.{key}", body=mapping) + return value + + +def _require_boolean(mapping: dict[str, Any], key: str, *, context: str) -> bool: + value = mapping.get(key) + if not isinstance(value, bool): + raise ShadowThreadsResponseError(f"Expected boolean for {context}.{key}", body=mapping) + return value + + +def _require_list(mapping: dict[str, Any], key: str, *, context: str) -> list[Any]: + value = mapping.get(key) + if not isinstance(value, list): + raise ShadowThreadsResponseError(f"Expected list for {context}.{key}", body=mapping) + return value + + +def _optional_string(mapping: dict[str, Any], key: str) -> str | None: + value = mapping.get(key) + if value is None: + return None + if not isinstance(value, str): + raise ShadowThreadsResponseError(f"Expected string or null for {key}", body=mapping) + return value + + +def _as_status(value: Any, *, context: str) -> ExecutionStatus: + if value not in ("success", "failure"): + raise ShadowThreadsResponseError(f"Expected execution status for {context}", body=value) + return value + + +@dataclass(frozen=True, slots=True) +class ArtifactReference: + bundle_hash: str + role: str + + def to_payload(self) -> dict[str, str]: + return { + "bundleHash": self.bundle_hash, + "role": self.role, + } + + @classmethod + def from_dict(cls, raw: dict[str, Any], *, context: str = "artifactReference") -> "ArtifactReference": + mapping = _require_mapping(raw, context=context) + return cls( + bundle_hash=_require_string(mapping, "bundleHash", context=context), + role=_require_string(mapping, "role", context=context), + ) + + +@dataclass(frozen=True, slots=True) +class ArtifactIdentity: + package_id: str + revision_id: str | None = None + revision_hash: str | None = None + + def to_payload(self) -> dict[str, str | None]: + return { + "packageId": self.package_id, + "revisionId": self.revision_id, + "revisionHash": self.revision_hash, + } + + @classmethod + def from_dict(cls, raw: dict[str, Any], *, context: str = "artifactIdentity") -> "ArtifactIdentity": + mapping = _require_mapping(raw, context=context) + return cls( + package_id=_require_string(mapping, "packageId", context=context), + revision_id=_optional_string(mapping, "revisionId"), + revision_hash=_optional_string(mapping, "revisionHash"), + ) + + +@dataclass(frozen=True, slots=True) +class ArtifactBundle: + schema: str + identity: ArtifactIdentity + payload: JSONValue + references: list[ArtifactReference] = field(default_factory=list) + + def to_payload(self) -> dict[str, Any]: + return { + "schema": self.schema, + "identity": self.identity.to_payload(), + "payload": self.payload, + "references": [reference.to_payload() for reference in self.references], + } + + @classmethod + def from_dict(cls, raw: dict[str, Any], *, context: str = "artifactBundle") -> "ArtifactBundle": + mapping = _require_mapping(raw, context=context) + references = [ + ArtifactReference.from_dict(item, context=f"{context}.references[{index}]") + for index, item in enumerate(_require_list(mapping, "references", context=context)) + ] + return cls( + schema=_require_string(mapping, "schema", context=context), + identity=ArtifactIdentity.from_dict( + _require_mapping(mapping.get("identity"), context=f"{context}.identity"), + context=f"{context}.identity", + ), + payload=mapping.get("payload"), + references=references, + ) + + +@dataclass(frozen=True, slots=True) +class ArtifactCaptureResult: + id: str + bundle_hash: str + created_at: str + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "ArtifactCaptureResult": + mapping = _require_mapping(raw, context="artifactCaptureResult") + return cls( + id=_require_string(mapping, "id", context="artifactCaptureResult"), + bundle_hash=_require_string(mapping, "bundleHash", context="artifactCaptureResult"), + created_at=_require_string(mapping, "createdAt", context="artifactCaptureResult"), + ) + + +@dataclass(frozen=True, slots=True) +class ArtifactRecord: + id: str + bundle_hash: str + created_at: str + artifact_bundle: ArtifactBundle + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "ArtifactRecord": + mapping = _require_mapping(raw, context="artifactRecord") + return cls( + id=_require_string(mapping, "id", context="artifactRecord"), + bundle_hash=_require_string(mapping, "bundleHash", context="artifactRecord"), + created_at=_require_string(mapping, "createdAt", context="artifactRecord"), + artifact_bundle=ArtifactBundle.from_dict( + _require_mapping(mapping.get("artifactBundle"), context="artifactRecord.artifactBundle"), + context="artifactRecord.artifactBundle", + ), + ) + + +@dataclass(frozen=True, slots=True) +class ArtifactVerifyResult: + bundle_hash: str + verified: bool + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "ArtifactVerifyResult": + mapping = _require_mapping(raw, context="artifactVerifyResult") + return cls( + bundle_hash=_require_string(mapping, "bundleHash", context="artifactVerifyResult"), + verified=_require_boolean(mapping, "verified", context="artifactVerifyResult"), + ) + + +@dataclass(frozen=True, slots=True) +class RevisionMetadata: + author: str + message: str + created_by: str + timestamp: str + source: RevisionSource + tags: list[str] = field(default_factory=list) + + def to_payload(self) -> dict[str, Any]: + return { + "author": self.author, + "message": self.message, + "createdBy": self.created_by, + "timestamp": self.timestamp, + "source": self.source, + "tags": list(self.tags), + } + + @classmethod + def from_dict(cls, raw: dict[str, Any], *, context: str = "revisionMetadata") -> "RevisionMetadata": + mapping = _require_mapping(raw, context=context) + tags_raw = mapping.get("tags", []) + if not isinstance(tags_raw, list) or not all(isinstance(item, str) for item in tags_raw): + raise ShadowThreadsResponseError(f"Expected list of strings for {context}.tags", body=mapping) + source = mapping.get("source") + if source not in ("human", "ai", "migration", "system"): + raise ShadowThreadsResponseError(f"Expected revision source for {context}.source", body=mapping) + return cls( + author=_require_string(mapping, "author", context=context), + message=_require_string(mapping, "message", context=context), + created_by=_require_string(mapping, "createdBy", context=context), + timestamp=_require_string(mapping, "timestamp", context=context), + source=source, + tags=list(tags_raw), + ) + + +@dataclass(frozen=True, slots=True) +class RevisionRecord: + revision_hash: str + package_id: str + parent_revision_hash: str | None + author: str + message: str + created_by: str + timestamp: str + source: str + metadata: dict[str, JSONValue] | JSONValue + created_at: str + artifacts: list[ArtifactReference] + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "RevisionRecord": + mapping = _require_mapping(raw, context="revisionRecord") + artifacts = [ + ArtifactReference.from_dict(item, context=f"revisionRecord.artifacts[{index}]") + for index, item in enumerate(_require_list(mapping, "artifacts", context="revisionRecord")) + ] + return cls( + revision_hash=_require_string(mapping, "revisionHash", context="revisionRecord"), + package_id=_require_string(mapping, "packageId", context="revisionRecord"), + parent_revision_hash=_optional_string(mapping, "parentRevisionHash"), + author=_require_string(mapping, "author", context="revisionRecord"), + message=_require_string(mapping, "message", context="revisionRecord"), + created_by=_require_string(mapping, "createdBy", context="revisionRecord"), + timestamp=_require_string(mapping, "timestamp", context="revisionRecord"), + source=_require_string(mapping, "source", context="revisionRecord"), + metadata=mapping.get("metadata"), + created_at=_require_string(mapping, "createdAt", context="revisionRecord"), + artifacts=artifacts, + ) + + +@dataclass(frozen=True, slots=True) +class RevisionCreateResult: + revision_hash: str + revision: RevisionRecord + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "RevisionCreateResult": + mapping = _require_mapping(raw, context="revisionCreateResult") + return cls( + revision_hash=_require_string(mapping, "revisionHash", context="revisionCreateResult"), + revision=RevisionRecord.from_dict( + _require_mapping(mapping.get("revision"), context="revisionCreateResult.revision"), + ), + ) + + +@dataclass(frozen=True, slots=True) +class ExecutionRecord: + execution_id: str + package_id: str + revision_hash: str + provider: str + model: str + prompt_hash: str + parameters: JSONValue + input_artifacts: list[ArtifactReference] + output_artifacts: list[ArtifactReference] + result_hash: str + status: ExecutionStatus + started_at: str + finished_at: str + created_at: str + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "ExecutionRecord": + mapping = _require_mapping(raw, context="executionRecord") + input_artifacts = [ + ArtifactReference.from_dict(item, context=f"executionRecord.inputArtifacts[{index}]") + for index, item in enumerate(_require_list(mapping, "inputArtifacts", context="executionRecord")) + ] + output_artifacts = [ + ArtifactReference.from_dict(item, context=f"executionRecord.outputArtifacts[{index}]") + for index, item in enumerate(_require_list(mapping, "outputArtifacts", context="executionRecord")) + ] + return cls( + execution_id=_require_string(mapping, "executionId", context="executionRecord"), + package_id=_require_string(mapping, "packageId", context="executionRecord"), + revision_hash=_require_string(mapping, "revisionHash", context="executionRecord"), + provider=_require_string(mapping, "provider", context="executionRecord"), + model=_require_string(mapping, "model", context="executionRecord"), + prompt_hash=_require_string(mapping, "promptHash", context="executionRecord"), + parameters=mapping.get("parameters"), + input_artifacts=input_artifacts, + output_artifacts=output_artifacts, + result_hash=_require_string(mapping, "resultHash", context="executionRecord"), + status=_as_status(mapping.get("status"), context="executionRecord.status"), + started_at=_require_string(mapping, "startedAt", context="executionRecord"), + finished_at=_require_string(mapping, "finishedAt", context="executionRecord"), + created_at=_require_string(mapping, "createdAt", context="executionRecord"), + ) + + +@dataclass(frozen=True, slots=True) +class ExecutionCreateResult: + execution_id: str + result_hash: str + execution: ExecutionRecord + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "ExecutionCreateResult": + mapping = _require_mapping(raw, context="executionCreateResult") + return cls( + execution_id=_require_string(mapping, "executionId", context="executionCreateResult"), + result_hash=_require_string(mapping, "resultHash", context="executionCreateResult"), + execution=ExecutionRecord.from_dict( + _require_mapping(mapping.get("execution"), context="executionCreateResult.execution"), + ), + ) + + +@dataclass(frozen=True, slots=True) +class ReplayExecutionResult: + execution_id: str + verified: bool + result_hash: str + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "ReplayExecutionResult": + mapping = _require_mapping(raw, context="replayExecutionResult") + return cls( + execution_id=_require_string(mapping, "executionId", context="replayExecutionResult"), + verified=_require_boolean(mapping, "verified", context="replayExecutionResult"), + result_hash=_require_string(mapping, "resultHash", context="replayExecutionResult"), + ) + + +@dataclass(frozen=True, slots=True) +class MigrationManifest: + root_revision_hash: str + artifact_count: int + revision_count: int + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "MigrationManifest": + mapping = _require_mapping(raw, context="migrationManifest") + artifact_count = mapping.get("artifactCount") + revision_count = mapping.get("revisionCount") + if not isinstance(artifact_count, int): + raise ShadowThreadsResponseError("Expected integer for migrationManifest.artifactCount", body=mapping) + if not isinstance(revision_count, int): + raise ShadowThreadsResponseError("Expected integer for migrationManifest.revisionCount", body=mapping) + return cls( + root_revision_hash=_require_string(mapping, "rootRevisionHash", context="migrationManifest"), + artifact_count=artifact_count, + revision_count=revision_count, + ) + + +@dataclass(frozen=True, slots=True) +class MigrationExportResult: + zip_path: str + manifest: MigrationManifest + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "MigrationExportResult": + mapping = _require_mapping(raw, context="migrationExportResult") + return cls( + zip_path=_require_string(mapping, "zipPath", context="migrationExportResult"), + manifest=MigrationManifest.from_dict( + _require_mapping(mapping.get("manifest"), context="migrationExportResult.manifest"), + ), + ) + + +@dataclass(frozen=True, slots=True) +class MigrationVerifyResult: + ok: bool + root_revision_hash: str + artifact_count: int + revision_count: int + matches: bool + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "MigrationVerifyResult": + mapping = _require_mapping(raw, context="migrationVerifyResult") + artifact_count = mapping.get("artifactCount") + revision_count = mapping.get("revisionCount") + if not isinstance(artifact_count, int): + raise ShadowThreadsResponseError("Expected integer for migrationVerifyResult.artifactCount", body=mapping) + if not isinstance(revision_count, int): + raise ShadowThreadsResponseError("Expected integer for migrationVerifyResult.revisionCount", body=mapping) + return cls( + ok=_require_boolean(mapping, "ok", context="migrationVerifyResult"), + root_revision_hash=_require_string(mapping, "rootRevisionHash", context="migrationVerifyResult"), + artifact_count=artifact_count, + revision_count=revision_count, + matches=_require_boolean(mapping, "matches", context="migrationVerifyResult"), + ) + + +@dataclass(frozen=True, slots=True) +class MigrationImportResult: + ok: bool + root_revision_hash: str + artifact_count: int + revision_count: int + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "MigrationImportResult": + mapping = _require_mapping(raw, context="migrationImportResult") + artifact_count = mapping.get("artifactCount") + revision_count = mapping.get("revisionCount") + if not isinstance(artifact_count, int): + raise ShadowThreadsResponseError("Expected integer for migrationImportResult.artifactCount", body=mapping) + if not isinstance(revision_count, int): + raise ShadowThreadsResponseError("Expected integer for migrationImportResult.revisionCount", body=mapping) + return cls( + ok=_require_boolean(mapping, "ok", context="migrationImportResult"), + root_revision_hash=_require_string(mapping, "rootRevisionHash", context="migrationImportResult"), + artifact_count=artifact_count, + revision_count=revision_count, + ) diff --git a/python-sdk/src/shadowthreads/py.typed b/python-sdk/src/shadowthreads/py.typed new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/python-sdk/src/shadowthreads/py.typed @@ -0,0 +1 @@ + diff --git a/python-sdk/tests/test_client.py b/python-sdk/tests/test_client.py new file mode 100644 index 0000000..5092413 --- /dev/null +++ b/python-sdk/tests/test_client.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +import json +import pathlib +import sys +import unittest +from unittest.mock import Mock + +sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[1] / "src")) + +from shadowthreads import ArtifactReference, RevisionMetadata, ShadowClient + + +class FakeResponse: + def __init__(self, status_code: int, payload: object, *, text: str | None = None) -> None: + self.status_code = status_code + self._payload = payload + self.text = text if text is not None else json.dumps(payload) + + def json(self) -> object: + if isinstance(self._payload, Exception): + raise self._payload + return self._payload + + +class ShadowClientTests(unittest.TestCase): + def make_session(self) -> Mock: + session = Mock() + session.headers = {} + return session + + def test_capture_artifact_dispatches_payload_and_parses_response(self) -> None: + session = self.make_session() + session.request.return_value = FakeResponse( + 200, + { + "ok": True, + "data": { + "id": "artifact-1", + "bundleHash": "a" * 64, + "createdAt": "2026-03-09T09:00:00.000Z", + }, + }, + ) + client = ShadowClient(base_url="http://localhost:3001", session=session) + + result = client.capture_artifact( + schema="demo.task", + package_id="package-1", + payload={"task": "demo"}, + references=[ArtifactReference(bundle_hash="b" * 64, role="source_task")], + ) + + self.assertEqual(result.bundle_hash, "a" * 64) + call = session.request.call_args + self.assertEqual(call.kwargs["method"], "POST") + self.assertTrue(call.kwargs["url"].endswith("/api/v1/artifacts")) + self.assertEqual(call.kwargs["json"]["identity"]["packageId"], "package-1") + self.assertEqual(call.kwargs["json"]["references"][0]["role"], "source_task") + + def test_replay_execution_without_payload_reuses_stored_execution_fields(self) -> None: + execution_id = "3f9f68c6-8c27-4f9e-8c0c-aab8bf885d77" + session = self.make_session() + session.request.side_effect = [ + FakeResponse( + 200, + { + "ok": True, + "data": { + "executionId": execution_id, + "packageId": "package-1", + "revisionHash": "c" * 64, + "provider": "demo-script", + "model": "demo-model", + "promptHash": "d" * 64, + "parameters": {"temperature": 0}, + "inputArtifacts": [{"bundleHash": "e" * 64, "role": "task_state"}], + "outputArtifacts": [{"bundleHash": "f" * 64, "role": "task_summary"}], + "resultHash": "1" * 64, + "status": "success", + "startedAt": "2026-03-09T09:00:00.000Z", + "finishedAt": "2026-03-09T09:00:02.000Z", + "createdAt": "2026-03-09T09:00:02.000Z", + }, + }, + ), + FakeResponse( + 200, + { + "ok": True, + "data": { + "executionId": execution_id, + "verified": True, + "resultHash": "1" * 64, + }, + }, + ), + ] + client = ShadowClient(base_url="http://localhost:3001", session=session) + + result = client.replay_execution(execution_id) + + self.assertTrue(result.verified) + self.assertEqual(session.request.call_count, 2) + get_call = session.request.call_args_list[0] + replay_call = session.request.call_args_list[1] + self.assertEqual(get_call.kwargs["method"], "GET") + self.assertTrue(get_call.kwargs["url"].endswith(f"/api/v1/executions/{execution_id}")) + self.assertEqual( + replay_call.kwargs["json"], + { + "promptHash": "d" * 64, + "parameters": {"temperature": 0}, + "inputArtifacts": [{"bundleHash": "e" * 64, "role": "task_state"}], + "outputArtifacts": [{"bundleHash": "f" * 64, "role": "task_summary"}], + "status": "success", + }, + ) + + def test_create_revision_accepts_metadata_object(self) -> None: + session = self.make_session() + session.request.return_value = FakeResponse( + 200, + { + "ok": True, + "data": { + "revisionHash": "9" * 64, + "revision": { + "revisionHash": "9" * 64, + "packageId": "package-1", + "parentRevisionHash": None, + "author": "SDK Demo", + "message": "Initial state", + "createdBy": "sdk-test", + "timestamp": "2026-03-09T09:00:00+00:00", + "source": "human", + "metadata": { + "author": "SDK Demo", + "message": "Initial state", + "createdBy": "sdk-test", + "timestamp": "2026-03-09T09:00:00+00:00", + "source": "human", + "tags": ["sdk"], + }, + "createdAt": "2026-03-09T09:00:01.000Z", + "artifacts": [{"bundleHash": "a" * 64, "role": "task_state"}], + }, + }, + }, + ) + client = ShadowClient(base_url="http://localhost:3001", session=session) + + result = client.create_revision( + package_id="package-1", + artifacts=[ArtifactReference(bundle_hash="a" * 64, role="task_state")], + metadata=RevisionMetadata( + author="SDK Demo", + message="Initial state", + created_by="sdk-test", + timestamp="2026-03-09T09:00:00+00:00", + source="human", + tags=["sdk"], + ), + ) + + self.assertEqual(result.revision_hash, "9" * 64) + + +if __name__ == "__main__": + unittest.main() diff --git a/python-sdk/tests/test_errors.py b/python-sdk/tests/test_errors.py new file mode 100644 index 0000000..d294ab5 --- /dev/null +++ b/python-sdk/tests/test_errors.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import pathlib +import sys +import unittest +from unittest.mock import Mock + +import requests + +sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[1] / "src")) + +from shadowthreads import ( + ShadowClient, + ShadowThreadsClientError, + ShadowThreadsNetworkError, + ShadowThreadsResponseError, + ShadowThreadsServerError, +) + + +class FakeResponse: + def __init__(self, status_code: int, payload: object, *, text: str | None = None) -> None: + self.status_code = status_code + self._payload = payload + self.text = text or "" + + def json(self) -> object: + if isinstance(self._payload, Exception): + raise self._payload + return self._payload + + +class ShadowClientErrorTests(unittest.TestCase): + def make_session(self) -> Mock: + session = Mock() + session.headers = {} + return session + + def test_client_error_preserves_api_code_and_message(self) -> None: + session = self.make_session() + session.request.return_value = FakeResponse( + 404, + { + "ok": False, + "error": { + "code": "ERR_EXECUTION_NOT_FOUND", + "message": "Execution record not found", + }, + }, + ) + client = ShadowClient(base_url="http://localhost:3001", session=session) + + with self.assertRaises(ShadowThreadsClientError) as ctx: + client.get_execution("missing") + + self.assertEqual(ctx.exception.code, "ERR_EXECUTION_NOT_FOUND") + self.assertEqual(ctx.exception.status_code, 404) + self.assertEqual(ctx.exception.message, "Execution record not found") + + def test_server_error_maps_to_server_exception(self) -> None: + session = self.make_session() + session.request.return_value = FakeResponse( + 500, + { + "ok": False, + "error": { + "code": "ERR_INTERNAL", + "message": "Internal server error", + }, + }, + ) + client = ShadowClient(base_url="http://localhost:3001", session=session) + + with self.assertRaises(ShadowThreadsServerError): + client.get_revision("a" * 64) + + def test_network_error_maps_to_network_exception(self) -> None: + session = self.make_session() + session.request.side_effect = requests.ConnectionError("connection refused") + client = ShadowClient(base_url="http://localhost:3001", session=session) + + with self.assertRaises(ShadowThreadsNetworkError): + client.get_revision("a" * 64) + + def test_invalid_api_envelope_raises_response_error(self) -> None: + session = self.make_session() + session.request.return_value = FakeResponse( + 200, + { + "success": True, + "data": {}, + }, + ) + client = ShadowClient(base_url="http://localhost:3001", session=session) + + with self.assertRaises(ShadowThreadsResponseError): + client.get_revision("a" * 64) + + +if __name__ == "__main__": + unittest.main() diff --git a/server/.env.example b/server/.env.example deleted file mode 100644 index fc22664..0000000 --- a/server/.env.example +++ /dev/null @@ -1,38 +0,0 @@ -# =========================================== -# Shadow Threads 环境配置 -# 复制此文件为 .env 并填入真实值 -# =========================================== - -# 服务器配置 -NODE_ENV=development -PORT=3001 -API_PREFIX=/api/v1 - -# 数据库配置 (PostgreSQL) -DATABASE_URL="postgresql://shadow:shadow_dev_password@localhost:5432/shadow_threads" - -# Redis 配置 -REDIS_URL="redis://localhost:6379" - -# JWT 配置 -JWT_SECRET="your-super-secret-jwt-key-change-in-production" -JWT_EXPIRES_IN="7d" - -# 加密配置 -ENCRYPTION_KEY="your-32-character-encryption-key!" - -# LLM API Keys (用户也可以自己配置) -# 这些是可选的默认 API Key,用户可以覆盖 -OPENAI_API_KEY="" -ANTHROPIC_API_KEY="" -GOOGLE_AI_API_KEY="" - -# 速率限制 -RATE_LIMIT_WINDOW_MS=60000 -RATE_LIMIT_MAX_REQUESTS=100 - -# 日志级别 -LOG_LEVEL=debug - -# CORS 配置 -CORS_ORIGINS="chrome-extension://,moz-extension://,https://chatgpt.com,https://claude.ai,https://gemini.google.com" diff --git a/server/bench/tasks/t1_assumption_derived_stability.json b/server/bench/tasks/t1_assumption_derived_stability.json new file mode 100644 index 0000000..a01dd9e --- /dev/null +++ b/server/bench/tasks/t1_assumption_derived_stability.json @@ -0,0 +1,54 @@ +{ + "taskId": "t1_assumption_derived_stability", + "category": "T1", + "description": "Assumptions derived from facts remain deterministic.", + "baseState": { + "facts": [ + { "id": "f_asm_team_capacity", "category": "assumption", "subject": "team", "predicate": "capacity", "value": "2 squads", "confidence": 0.7 }, + { "id": "f_asm_security_review", "type": "assumption", "subject": "security", "predicate": "review-window", "value": "3 days", "confidence": 0.6 }, + { "id": "f_known_scope", "subject": "scope", "predicate": "known-items", "value": "80%", "confidence": 0.75 } + ], + "decisions": [ + { "id": "d_scope_lock", "question": "Lock scope now?", "answer": "yes", "rationale": "reduce late churn", "confidence": 0.74 }, + { "id": "d_release_gate", "question": "Gate release on review?", "answer": "yes", "rationale": "risk control", "confidence": 0.78 } + ], + "constraints": [ + { "id": "c_review", "scope": "process", "rule": "Security review required before release", "strength": "high" }, + { "id": "c_scope", "scope": "process", "rule": "Scope changes require sign-off", "strength": "medium" } + ], + "risks": [ + { "id": "r_scope_creep", "title": "Scope creep", "probability": "medium", "impact": "high", "mitigation": "change board" }, + { "id": "r_review_delay", "title": "Review delay", "probability": "medium", "impact": "medium", "mitigation": "early booking" } + ], + "assumptions": [] + }, + "targetState": { + "facts": [ + { "id": "f_asm_team_capacity", "category": "assumption", "subject": "team", "predicate": "capacity", "value": "3 squads", "confidence": 0.8 }, + { "id": "f_asm_security_review", "type": "assumption", "subject": "security", "predicate": "review-window", "value": "2 days", "confidence": 0.7 }, + { "id": "f_asm_uat", "category": "assumption", "subject": "uat", "predicate": "availability", "value": "always-on", "confidence": 0.65 }, + { "id": "f_known_scope", "subject": "scope", "predicate": "known-items", "value": "85%", "confidence": 0.79 } + ], + "decisions": [ + { "id": "d_scope_lock", "question": "Lock scope now?", "answer": "yes", "rationale": "reduce late churn", "confidence": 0.74 }, + { "id": "d_release_gate", "question": "Gate release on review?", "answer": "yes", "rationale": "risk control", "confidence": 0.78 } + ], + "constraints": [ + { "id": "c_review", "scope": "process", "rule": "Security review required before release", "strength": "high" }, + { "id": "c_scope", "scope": "process", "rule": "Scope changes require sign-off", "strength": "medium" } + ], + "risks": [ + { "id": "r_scope_creep", "title": "Scope creep", "probability": "medium", "impact": "high", "mitigation": "change board" }, + { "id": "r_review_delay", "title": "Review delay", "probability": "medium", "impact": "medium", "mitigation": "early booking" } + ], + "assumptions": [] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["facts", "assumptions"], + "requiredAssumptionKeys": ["f_asm_team_capacity"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t1_constraint_violation_attempt.json b/server/bench/tasks/t1_constraint_violation_attempt.json new file mode 100644 index 0000000..3d742b3 --- /dev/null +++ b/server/bench/tasks/t1_constraint_violation_attempt.json @@ -0,0 +1,56 @@ +{ + "taskId": "t1_constraint_violation_attempt", + "category": "T1", + "description": "Introduce a risk that violates a probability threshold constraint.", + "baseState": { + "facts": [ + { "id": "f_release_target", "subject": "release", "predicate": "target", "value": "q2", "confidence": 0.8 }, + { "id": "f_staffing", "subject": "team", "predicate": "staffing", "value": "tight", "confidence": 0.7 } + ], + "decisions": [ + { "id": "d_cut_scope", "question": "Scope trim needed?", "answer": "no", "rationale": "baseline scope retained", "confidence": 0.65 }, + { "id": "d_rollout", "question": "Rollout strategy", "answer": "phased", "rationale": "reduce blast radius", "confidence": 0.82 } + ], + "constraints": [ + { "id": "c_prob", "scope": "policy", "rule": "Max risk probability is low", "maxProbability": "low", "strength": "high" }, + { "id": "c_impact", "scope": "policy", "rule": "Max risk impact is medium", "maxImpact": "medium", "strength": "high" } + ], + "risks": [ + { "id": "r_vendor", "title": "Vendor SLA drift", "probability": "low", "impact": "medium", "mitigation": "dual vendor review" }, + { "id": "r_hiring", "title": "Hiring delay", "probability": "low", "impact": "low", "mitigation": "contract support" } + ], + "assumptions": [ + { "id": "a_vendor", "statement": "Primary vendor remains responsive", "confidence": 0.7 }, + { "id": "a_ops", "statement": "Ops team can absorb rollout load", "confidence": 0.65 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_release_target", "subject": "release", "predicate": "target", "value": "q2", "confidence": 0.8 }, + { "id": "f_staffing", "subject": "team", "predicate": "staffing", "value": "tight", "confidence": 0.7 } + ], + "decisions": [ + { "id": "d_cut_scope", "question": "Scope trim needed?", "answer": "no", "rationale": "baseline scope retained", "confidence": 0.65 }, + { "id": "d_rollout", "question": "Rollout strategy", "answer": "phased", "rationale": "reduce blast radius", "confidence": 0.82 } + ], + "constraints": [ + { "id": "c_prob", "scope": "policy", "rule": "Max risk probability is low", "maxProbability": "low", "strength": "high" }, + { "id": "c_impact", "scope": "policy", "rule": "Max risk impact is medium", "maxImpact": "medium", "strength": "high" } + ], + "risks": [ + { "id": "r_vendor", "title": "Vendor SLA drift", "probability": "high", "impact": "high", "mitigation": "dual vendor review" }, + { "id": "r_hiring", "title": "Hiring delay", "probability": "low", "impact": "low", "mitigation": "contract support" } + ], + "assumptions": [ + { "id": "a_vendor", "statement": "Primary vendor remains responsive", "confidence": 0.7 }, + { "id": "a_ops", "statement": "Ops team can absorb rollout load", "confidence": 0.65 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": false, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["risks"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t1_decision_answer_change.json b/server/bench/tasks/t1_decision_answer_change.json new file mode 100644 index 0000000..bf78006 --- /dev/null +++ b/server/bench/tasks/t1_decision_answer_change.json @@ -0,0 +1,58 @@ +{ + "taskId": "t1_decision_answer_change", + "category": "T1", + "description": "Update one engineering decision answer while preserving other domains.", + "baseState": { + "facts": [ + { "id": "f_auth_scope", "subject": "auth", "predicate": "scope", "value": "api+web", "confidence": 0.9 }, + { "id": "f_api_lang", "subject": "api", "predicate": "language", "value": "typescript", "confidence": 0.95 } + ], + "decisions": [ + { "id": "d_auth", "question": "Token strategy", "answer": "jwt", "rationale": "existing compatibility", "confidence": 0.7 }, + { "id": "d_cache", "question": "Cache layer", "answer": "redis", "rationale": "shared infra", "confidence": 0.8 } + ], + "constraints": [ + { "id": "c_plaintext", "scope": "technical", "rule": "No plaintext secrets", "strength": "high" }, + { "id": "c_latency", "scope": "technical", "rule": "P95 latency below 250ms", "strength": "medium" } + ], + "risks": [ + { "id": "r_timeline", "title": "Timeline slip", "probability": "medium", "impact": "high", "mitigation": "buffer sprint" }, + { "id": "r_perf", "title": "Performance regression", "probability": "low", "impact": "medium", "mitigation": "load test gate" } + ], + "assumptions": [ + { "id": "a_backend", "statement": "Backend team is staffed", "confidence": 0.8 }, + { "id": "a_budget", "statement": "Budget supports one extra sprint", "confidence": 0.6 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_auth_scope", "subject": "auth", "predicate": "scope", "value": "api+web", "confidence": 0.9 }, + { "id": "f_api_lang", "subject": "api", "predicate": "language", "value": "typescript", "confidence": 0.95 } + ], + "decisions": [ + { "id": "d_auth", "question": "Token strategy", "answer": "session-token", "rationale": "aligns with gateway refresh flow", "confidence": 0.82 }, + { "id": "d_cache", "question": "Cache layer", "answer": "redis", "rationale": "shared infra", "confidence": 0.8 } + ], + "constraints": [ + { "id": "c_plaintext", "scope": "technical", "rule": "No plaintext secrets", "strength": "high" }, + { "id": "c_latency", "scope": "technical", "rule": "P95 latency below 250ms", "strength": "medium" } + ], + "risks": [ + { "id": "r_timeline", "title": "Timeline slip", "probability": "medium", "impact": "high", "mitigation": "buffer sprint" }, + { "id": "r_perf", "title": "Performance regression", "probability": "low", "impact": "medium", "mitigation": "load test gate" } + ], + "assumptions": [ + { "id": "a_backend", "statement": "Backend team is staffed", "confidence": 0.8 }, + { "id": "a_budget", "statement": "Budget supports one extra sprint", "confidence": 0.6 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "domainMustNotChange": ["facts", "constraints", "risks", "assumptions"], + "requiredDomainsModified": ["decisions"], + "requiredDecisionKeys": ["d_auth", "d_cache"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t1_facts_append_remove.json b/server/bench/tasks/t1_facts_append_remove.json new file mode 100644 index 0000000..0278796 --- /dev/null +++ b/server/bench/tasks/t1_facts_append_remove.json @@ -0,0 +1,59 @@ +{ + "taskId": "t1_facts_append_remove", + "category": "T1", + "description": "Append and remove facts while preserving other domains.", + "baseState": { + "facts": [ + { "id": "f_env_dev", "subject": "env", "predicate": "dev", "value": "stable", "confidence": 0.9 }, + { "id": "f_env_stage", "subject": "env", "predicate": "stage", "value": "lagging", "confidence": 0.6 }, + { "id": "f_env_prod", "subject": "env", "predicate": "prod", "value": "stable", "confidence": 0.92 } + ], + "decisions": [ + { "id": "d_db", "question": "Primary database", "answer": "postgres", "rationale": "existing ops maturity", "confidence": 0.88 }, + { "id": "d_queue", "question": "Queue backend", "answer": "redis-stream", "rationale": "already deployed", "confidence": 0.7 } + ], + "constraints": [ + { "id": "c_sla", "scope": "technical", "rule": "SLA remains 99.9", "strength": "high" }, + { "id": "c_cost", "scope": "process", "rule": "No infra cost increase above 10 percent", "strength": "medium" } + ], + "risks": [ + { "id": "r_data", "title": "Data skew", "probability": "medium", "impact": "medium", "mitigation": "sampling checks" }, + { "id": "r_queue", "title": "Queue backlog", "probability": "low", "impact": "high", "mitigation": "rate limits" } + ], + "assumptions": [ + { "id": "a_monitoring", "statement": "Monitoring stack remains healthy", "confidence": 0.8 }, + { "id": "a_alerting", "statement": "Alert routing remains on-call ready", "confidence": 0.82 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_env_dev", "subject": "env", "predicate": "dev", "value": "stable", "confidence": 0.9 }, + { "id": "f_env_prod", "subject": "env", "predicate": "prod", "value": "stable", "confidence": 0.92 }, + { "id": "f_env_perf", "subject": "env", "predicate": "perf", "value": "new cluster", "confidence": 0.75 } + ], + "decisions": [ + { "id": "d_db", "question": "Primary database", "answer": "postgres", "rationale": "existing ops maturity", "confidence": 0.88 }, + { "id": "d_queue", "question": "Queue backend", "answer": "redis-stream", "rationale": "already deployed", "confidence": 0.7 } + ], + "constraints": [ + { "id": "c_sla", "scope": "technical", "rule": "SLA remains 99.9", "strength": "high" }, + { "id": "c_cost", "scope": "process", "rule": "No infra cost increase above 10 percent", "strength": "medium" } + ], + "risks": [ + { "id": "r_data", "title": "Data skew", "probability": "medium", "impact": "medium", "mitigation": "sampling checks" }, + { "id": "r_queue", "title": "Queue backlog", "probability": "low", "impact": "high", "mitigation": "rate limits" } + ], + "assumptions": [ + { "id": "a_monitoring", "statement": "Monitoring stack remains healthy", "confidence": 0.8 }, + { "id": "a_alerting", "statement": "Alert routing remains on-call ready", "confidence": 0.82 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["facts"], + "domainMustNotChange": ["decisions", "constraints", "risks", "assumptions"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t1_multi_domain_edit_small.json b/server/bench/tasks/t1_multi_domain_edit_small.json new file mode 100644 index 0000000..1958344 --- /dev/null +++ b/server/bench/tasks/t1_multi_domain_edit_small.json @@ -0,0 +1,56 @@ +{ + "taskId": "t1_multi_domain_edit_small", + "category": "T1", + "description": "Small multi-domain edit touching facts, decisions, constraints, and risks.", + "baseState": { + "facts": [ + { "id": "f_api_version", "subject": "api", "predicate": "version", "value": "v1", "confidence": 0.9 }, + { "id": "f_mobile_share", "subject": "traffic", "predicate": "mobile-share", "value": "40%", "confidence": 0.7 } + ], + "decisions": [ + { "id": "d_api_version", "question": "Target API version", "answer": "v1", "rationale": "compatibility", "confidence": 0.8 }, + { "id": "d_flagging", "question": "Feature flag provider", "answer": "internal", "rationale": "low cost", "confidence": 0.6 } + ], + "constraints": [ + { "id": "c_mobile", "scope": "technical", "rule": "Mobile clients must remain compatible", "strength": "high" }, + { "id": "c_budget_cap", "scope": "policy", "rule": "Monthly infra increase under 5%", "strength": "medium" } + ], + "risks": [ + { "id": "r_mobile_break", "title": "Mobile compatibility break", "probability": "medium", "impact": "high", "mitigation": "contract tests" }, + { "id": "r_flag_latency", "title": "Flag fetch latency", "probability": "low", "impact": "medium", "mitigation": "cache local" } + ], + "assumptions": [ + { "id": "a_mobile_sdk", "statement": "Mobile SDK upgrade planned this quarter", "confidence": 0.6 }, + { "id": "a_flag_qps", "statement": "Flag evaluation QPS stays under limit", "confidence": 0.7 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_api_version", "subject": "api", "predicate": "version", "value": "v2", "confidence": 0.88 }, + { "id": "f_mobile_share", "subject": "traffic", "predicate": "mobile-share", "value": "42%", "confidence": 0.74 } + ], + "decisions": [ + { "id": "d_api_version", "question": "Target API version", "answer": "v2", "rationale": "new capabilities", "confidence": 0.83 }, + { "id": "d_flagging", "question": "Feature flag provider", "answer": "internal", "rationale": "low cost", "confidence": 0.6 } + ], + "constraints": [ + { "id": "c_mobile", "scope": "technical", "rule": "Mobile clients must remain compatible", "strength": "high" }, + { "id": "c_budget_cap", "scope": "policy", "rule": "Monthly infra increase under 8%", "strength": "medium" } + ], + "risks": [ + { "id": "r_mobile_break", "title": "Mobile compatibility break", "probability": "low", "impact": "medium", "mitigation": "contract tests + canary" }, + { "id": "r_flag_latency", "title": "Flag fetch latency", "probability": "low", "impact": "medium", "mitigation": "cache local" } + ], + "assumptions": [ + { "id": "a_mobile_sdk", "statement": "Mobile SDK upgrade planned this quarter", "confidence": 0.6 }, + { "id": "a_flag_qps", "statement": "Flag evaluation QPS stays under limit", "confidence": 0.7 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["facts", "decisions", "constraints", "risks"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t1_risk_update_from_fact.json b/server/bench/tasks/t1_risk_update_from_fact.json new file mode 100644 index 0000000..63db76f --- /dev/null +++ b/server/bench/tasks/t1_risk_update_from_fact.json @@ -0,0 +1,56 @@ +{ + "taskId": "t1_risk_update_from_fact", + "category": "T1", + "description": "Update facts and risk profile based on telemetry.", + "baseState": { + "facts": [ + { "id": "f_latency", "subject": "perf", "predicate": "latency", "value": "220ms", "confidence": 0.8 }, + { "id": "f_error_rate", "subject": "perf", "predicate": "error-rate", "value": "1.8%", "confidence": 0.76 } + ], + "decisions": [ + { "id": "d_rollout_wave", "question": "Rollout wave count", "answer": "2", "rationale": "balanced speed and safety", "confidence": 0.72 }, + { "id": "d_guardrail", "question": "Guardrail mechanism", "answer": "auto-rollback", "rationale": "known runbook", "confidence": 0.85 } + ], + "constraints": [ + { "id": "c_p99", "scope": "technical", "rule": "P99 latency under 500ms", "strength": "high" }, + { "id": "c_errors", "scope": "technical", "rule": "Error rate under 2%", "strength": "high" } + ], + "risks": [ + { "id": "r_regression", "title": "Regression risk", "probability": "high", "impact": "high", "mitigation": "feature flag" }, + { "id": "r_support", "title": "Support load", "probability": "medium", "impact": "medium", "mitigation": "FAQ update" } + ], + "assumptions": [ + { "id": "a_observability", "statement": "Telemetry remains complete", "confidence": 0.7 }, + { "id": "a_oncall", "statement": "On-call rotation remains staffed", "confidence": 0.75 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_latency", "subject": "perf", "predicate": "latency", "value": "180ms", "confidence": 0.88 }, + { "id": "f_error_rate", "subject": "perf", "predicate": "error-rate", "value": "0.9%", "confidence": 0.84 } + ], + "decisions": [ + { "id": "d_rollout_wave", "question": "Rollout wave count", "answer": "2", "rationale": "balanced speed and safety", "confidence": 0.72 }, + { "id": "d_guardrail", "question": "Guardrail mechanism", "answer": "auto-rollback", "rationale": "known runbook", "confidence": 0.85 } + ], + "constraints": [ + { "id": "c_p99", "scope": "technical", "rule": "P99 latency under 500ms", "strength": "high" }, + { "id": "c_errors", "scope": "technical", "rule": "Error rate under 2%", "strength": "high" } + ], + "risks": [ + { "id": "r_regression", "title": "Regression risk", "probability": "low", "impact": "medium", "mitigation": "feature flag" }, + { "id": "r_support", "title": "Support load", "probability": "low", "impact": "medium", "mitigation": "FAQ update" } + ], + "assumptions": [ + { "id": "a_observability", "statement": "Telemetry remains complete", "confidence": 0.7 }, + { "id": "a_oncall", "statement": "On-call rotation remains staffed", "confidence": 0.75 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["facts", "risks"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t2_assumptions_revision.json b/server/bench/tasks/t2_assumptions_revision.json new file mode 100644 index 0000000..62b4d7e --- /dev/null +++ b/server/bench/tasks/t2_assumptions_revision.json @@ -0,0 +1,57 @@ +{ + "taskId": "t2_assumptions_revision", + "category": "T2", + "description": "Revise explicit assumptions after new planning evidence.", + "baseState": { + "facts": [ + { "id": "f_vendor", "subject": "vendor", "predicate": "status", "value": "available", "confidence": 0.7 }, + { "id": "f_sample", "subject": "sample", "predicate": "target-size", "value": "600", "confidence": 0.8 } + ], + "decisions": [ + { "id": "d_sampling", "question": "Sampling strategy", "answer": "uniform", "rationale": "simple execution", "confidence": 0.63 }, + { "id": "d_interview", "question": "Interview modality", "answer": "remote", "rationale": "broader reach", "confidence": 0.76 } + ], + "constraints": [ + { "id": "c_ethics", "scope": "policy", "rule": "Ethics approval before outreach", "strength": "high" }, + { "id": "c_privacy", "scope": "policy", "rule": "No raw personal data export", "strength": "high" } + ], + "risks": [ + { "id": "r_privacy", "title": "Privacy non-compliance", "probability": "low", "impact": "high", "mitigation": "redaction policy" }, + { "id": "r_recruitment", "title": "Recruitment shortfall", "probability": "medium", "impact": "medium", "mitigation": "partner channels" } + ], + "assumptions": [ + { "id": "a_vendor_window", "statement": "Vendor can deliver in 2 weeks", "confidence": 0.55 }, + { "id": "a_sample_access", "statement": "Sample pool remains accessible", "confidence": 0.62 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_vendor", "subject": "vendor", "predicate": "status", "value": "available", "confidence": 0.7 }, + { "id": "f_sample", "subject": "sample", "predicate": "target-size", "value": "600", "confidence": 0.8 } + ], + "decisions": [ + { "id": "d_sampling", "question": "Sampling strategy", "answer": "stratified", "rationale": "reduce bias", "confidence": 0.79 }, + { "id": "d_interview", "question": "Interview modality", "answer": "remote", "rationale": "broader reach", "confidence": 0.76 } + ], + "constraints": [ + { "id": "c_ethics", "scope": "policy", "rule": "Ethics approval before outreach", "strength": "high" }, + { "id": "c_privacy", "scope": "policy", "rule": "No raw personal data export", "strength": "high" } + ], + "risks": [ + { "id": "r_privacy", "title": "Privacy non-compliance", "probability": "low", "impact": "high", "mitigation": "redaction policy" }, + { "id": "r_recruitment", "title": "Recruitment shortfall", "probability": "low", "impact": "medium", "mitigation": "partner channels" } + ], + "assumptions": [ + { "id": "a_vendor_window", "statement": "Vendor can deliver in 3 weeks", "confidence": 0.7 }, + { "id": "a_sample_access", "statement": "Sample pool remains accessible", "confidence": 0.68 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["decisions", "risks", "assumptions"], + "requiredAssumptionKeys": ["a_vendor_window"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t2_constraints_tightening.json b/server/bench/tasks/t2_constraints_tightening.json new file mode 100644 index 0000000..7821c1a --- /dev/null +++ b/server/bench/tasks/t2_constraints_tightening.json @@ -0,0 +1,56 @@ +{ + "taskId": "t2_constraints_tightening", + "category": "T2", + "description": "Tighten planning constraints and align risk impact.", + "baseState": { + "facts": [ + { "id": "f_budget_cycle", "subject": "budget", "predicate": "cycle", "value": "quarterly", "confidence": 0.8 }, + { "id": "f_staff_pool", "subject": "staffing", "predicate": "pool", "value": "shared", "confidence": 0.72 } + ], + "decisions": [ + { "id": "d_interview_count", "question": "Interview count", "answer": "20", "rationale": "baseline sample", "confidence": 0.7 }, + { "id": "d_scope_depth", "question": "Scope depth", "answer": "broad", "rationale": "coverage first", "confidence": 0.65 } + ], + "constraints": [ + { "id": "c_timebox", "scope": "process", "rule": "Timebox research to 4 weeks", "strength": "medium" }, + { "id": "c_spend", "scope": "policy", "rule": "Spend under 30k", "strength": "high" } + ], + "risks": [ + { "id": "r_overrun", "title": "Schedule overrun", "probability": "medium", "impact": "medium", "mitigation": "weekly review" }, + { "id": "r_cost", "title": "Cost overrun", "probability": "low", "impact": "high", "mitigation": "budget gate" } + ], + "assumptions": [ + { "id": "a_vendor_slot", "statement": "Vendor slot stays reserved", "confidence": 0.6 }, + { "id": "a_participant_rate", "statement": "Participant response stable", "confidence": 0.7 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_budget_cycle", "subject": "budget", "predicate": "cycle", "value": "monthly", "confidence": 0.83 }, + { "id": "f_staff_pool", "subject": "staffing", "predicate": "pool", "value": "shared", "confidence": 0.72 } + ], + "decisions": [ + { "id": "d_interview_count", "question": "Interview count", "answer": "18", "rationale": "timebox alignment", "confidence": 0.72 }, + { "id": "d_scope_depth", "question": "Scope depth", "answer": "focused", "rationale": "priority use-cases only", "confidence": 0.7 } + ], + "constraints": [ + { "id": "c_timebox", "scope": "process", "rule": "Timebox research to 3 weeks", "strength": "high" }, + { "id": "c_spend", "scope": "policy", "rule": "Spend under 25k", "strength": "high" } + ], + "risks": [ + { "id": "r_overrun", "title": "Schedule overrun", "probability": "medium", "impact": "medium", "mitigation": "weekly review" }, + { "id": "r_cost", "title": "Cost overrun", "probability": "low", "impact": "medium", "mitigation": "budget gate" } + ], + "assumptions": [ + { "id": "a_vendor_slot", "statement": "Vendor slot stays reserved", "confidence": 0.6 }, + { "id": "a_participant_rate", "statement": "Participant response stable", "confidence": 0.7 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["facts", "decisions", "constraints", "risks"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t2_long_context_summary_as_facts.json b/server/bench/tasks/t2_long_context_summary_as_facts.json new file mode 100644 index 0000000..d71c500 --- /dev/null +++ b/server/bench/tasks/t2_long_context_summary_as_facts.json @@ -0,0 +1,60 @@ +{ + "taskId": "t2_long_context_summary_as_facts", + "category": "T2", + "description": "Simulate compressed long context represented as structured facts.", + "baseState": { + "facts": [ + { "id": "f_ctx_1", "subject": "context", "predicate": "summary", "value": "phase-1 interviews complete", "confidence": 0.8 }, + { "id": "f_ctx_2", "subject": "context", "predicate": "summary", "value": "customer segment A dominant", "confidence": 0.77 }, + { "id": "f_ctx_3", "subject": "context", "predicate": "summary", "value": "pricing sensitivity observed", "confidence": 0.73 } + ], + "decisions": [ + { "id": "d_focus_segment", "question": "Primary segment", "answer": "A", "rationale": "strong signal", "confidence": 0.79 }, + { "id": "d_price_test", "question": "Run price test?", "answer": "yes", "rationale": "validate sensitivity", "confidence": 0.68 } + ], + "constraints": [ + { "id": "c_research_time", "scope": "process", "rule": "Complete synthesis in 10 days", "strength": "medium" }, + { "id": "c_communication", "scope": "process", "rule": "Weekly executive readout required", "strength": "high" } + ], + "risks": [ + { "id": "r_signal_noise", "title": "Signal noise in summaries", "probability": "medium", "impact": "medium", "mitigation": "cross-check raw notes" }, + { "id": "r_exec_misalignment", "title": "Executive interpretation drift", "probability": "low", "impact": "high", "mitigation": "shared dashboard" } + ], + "assumptions": [ + { "id": "a_notes_quality", "statement": "Interview notes quality is consistent", "confidence": 0.7 }, + { "id": "a_segment_stability", "statement": "Segment behavior remains stable", "confidence": 0.66 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_ctx_1", "subject": "context", "predicate": "summary", "value": "phase-1 interviews complete", "confidence": 0.8 }, + { "id": "f_ctx_2", "subject": "context", "predicate": "summary", "value": "customer segment B growing", "confidence": 0.79 }, + { "id": "f_ctx_3", "subject": "context", "predicate": "summary", "value": "pricing sensitivity observed", "confidence": 0.73 }, + { "id": "f_ctx_4", "subject": "context", "predicate": "summary", "value": "support requests cluster around onboarding", "confidence": 0.71 } + ], + "decisions": [ + { "id": "d_focus_segment", "question": "Primary segment", "answer": "B", "rationale": "latest trend reversal", "confidence": 0.77 }, + { "id": "d_price_test", "question": "Run price test?", "answer": "yes", "rationale": "validate sensitivity", "confidence": 0.68 } + ], + "constraints": [ + { "id": "c_research_time", "scope": "process", "rule": "Complete synthesis in 10 days", "strength": "medium" }, + { "id": "c_communication", "scope": "process", "rule": "Weekly executive readout required", "strength": "high" } + ], + "risks": [ + { "id": "r_signal_noise", "title": "Signal noise in summaries", "probability": "medium", "impact": "medium", "mitigation": "cross-check raw notes" }, + { "id": "r_exec_misalignment", "title": "Executive interpretation drift", "probability": "low", "impact": "high", "mitigation": "shared dashboard" } + ], + "assumptions": [ + { "id": "a_notes_quality", "statement": "Interview notes quality is consistent", "confidence": 0.7 }, + { "id": "a_segment_stability", "statement": "Segment behavior remains stable", "confidence": 0.66 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["facts", "decisions"], + "requiredDecisionKeys": ["d_focus_segment"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t2_plan_decisions_multi_step.json b/server/bench/tasks/t2_plan_decisions_multi_step.json new file mode 100644 index 0000000..655483d --- /dev/null +++ b/server/bench/tasks/t2_plan_decisions_multi_step.json @@ -0,0 +1,56 @@ +{ + "taskId": "t2_plan_decisions_multi_step", + "category": "T2", + "description": "Planning task with multi-step decision refinement.", + "baseState": { + "facts": [ + { "id": "f_goal", "subject": "goal", "predicate": "primary", "value": "launch pilot", "confidence": 0.85 }, + { "id": "f_deadline", "subject": "timeline", "predicate": "pilot-date", "value": "2026-06", "confidence": 0.7 } + ], + "decisions": [ + { "id": "d_phase_count", "question": "How many phases?", "answer": "2", "rationale": "initial outline", "confidence": 0.6 }, + { "id": "d_research_depth", "question": "Research depth", "answer": "medium", "rationale": "balanced effort", "confidence": 0.65 } + ], + "constraints": [ + { "id": "c_staff", "scope": "process", "rule": "Plan assumes 3 contributors", "strength": "medium" }, + { "id": "c_review_window", "scope": "process", "rule": "Review every 2 weeks", "strength": "high" } + ], + "risks": [ + { "id": "r_alignment", "title": "Stakeholder misalignment", "probability": "medium", "impact": "medium", "mitigation": "weekly sync" }, + { "id": "r_scope", "title": "Scope expansion", "probability": "medium", "impact": "high", "mitigation": "decision log" } + ], + "assumptions": [ + { "id": "a_participation", "statement": "Stakeholders attend weekly sync", "confidence": 0.7 }, + { "id": "a_data_access", "statement": "Research data access approved", "confidence": 0.68 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_goal", "subject": "goal", "predicate": "primary", "value": "launch pilot", "confidence": 0.85 }, + { "id": "f_deadline", "subject": "timeline", "predicate": "pilot-date", "value": "2026-06", "confidence": 0.7 } + ], + "decisions": [ + { "id": "d_phase_count", "question": "How many phases?", "answer": "3", "rationale": "added hardening phase", "confidence": 0.78 }, + { "id": "d_research_depth", "question": "Research depth", "answer": "high", "rationale": "stakeholder need clarity", "confidence": 0.74 } + ], + "constraints": [ + { "id": "c_staff", "scope": "process", "rule": "Plan assumes 3 contributors", "strength": "medium" }, + { "id": "c_review_window", "scope": "process", "rule": "Review every 2 weeks", "strength": "high" } + ], + "risks": [ + { "id": "r_alignment", "title": "Stakeholder misalignment", "probability": "low", "impact": "medium", "mitigation": "weekly sync + notes" }, + { "id": "r_scope", "title": "Scope expansion", "probability": "medium", "impact": "high", "mitigation": "decision log" } + ], + "assumptions": [ + { "id": "a_participation", "statement": "Stakeholders attend weekly sync", "confidence": 0.7 }, + { "id": "a_data_access", "statement": "Research data access approved", "confidence": 0.68 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["decisions", "risks"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t2_risk_register_growth.json b/server/bench/tasks/t2_risk_register_growth.json new file mode 100644 index 0000000..77860da --- /dev/null +++ b/server/bench/tasks/t2_risk_register_growth.json @@ -0,0 +1,57 @@ +{ + "taskId": "t2_risk_register_growth", + "category": "T2", + "description": "Expand planning risk register with additional items.", + "baseState": { + "facts": [ + { "id": "f_context", "subject": "research", "predicate": "scope", "value": "regional launch", "confidence": 0.76 }, + { "id": "f_team", "subject": "team", "predicate": "analysts", "value": "2", "confidence": 0.82 } + ], + "decisions": [ + { "id": "d_pilot_region", "question": "Pilot region", "answer": "north", "rationale": "largest demand", "confidence": 0.69 }, + { "id": "d_survey", "question": "Survey cadence", "answer": "bi-weekly", "rationale": "trend visibility", "confidence": 0.64 } + ], + "constraints": [ + { "id": "c_budget", "scope": "policy", "rule": "Research spend capped at 30k", "strength": "high" }, + { "id": "c_tools", "scope": "technical", "rule": "Use approved analytics stack only", "strength": "high" } + ], + "risks": [ + { "id": "r_sample_bias", "title": "Sample bias", "probability": "medium", "impact": "medium", "mitigation": "stratified sample" }, + { "id": "r_response_rate", "title": "Low response rate", "probability": "medium", "impact": "high", "mitigation": "incentives" } + ], + "assumptions": [ + { "id": "a_recruitment", "statement": "Recruitment channel remains open", "confidence": 0.66 }, + { "id": "a_panel", "statement": "Panel partners stay available", "confidence": 0.61 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_context", "subject": "research", "predicate": "scope", "value": "regional launch", "confidence": 0.76 }, + { "id": "f_team", "subject": "team", "predicate": "analysts", "value": "2", "confidence": 0.82 } + ], + "decisions": [ + { "id": "d_pilot_region", "question": "Pilot region", "answer": "north", "rationale": "largest demand", "confidence": 0.69 }, + { "id": "d_survey", "question": "Survey cadence", "answer": "bi-weekly", "rationale": "trend visibility", "confidence": 0.64 } + ], + "constraints": [ + { "id": "c_budget", "scope": "policy", "rule": "Research spend capped at 30k", "strength": "high" }, + { "id": "c_tools", "scope": "technical", "rule": "Use approved analytics stack only", "strength": "high" } + ], + "risks": [ + { "id": "r_sample_bias", "title": "Sample bias", "probability": "medium", "impact": "medium", "mitigation": "stratified sample" }, + { "id": "r_response_rate", "title": "Low response rate", "probability": "medium", "impact": "high", "mitigation": "incentives" }, + { "id": "r_data_delay", "title": "Data delivery delay", "probability": "low", "impact": "medium", "mitigation": "daily checkpoint" } + ], + "assumptions": [ + { "id": "a_recruitment", "statement": "Recruitment channel remains open", "confidence": 0.66 }, + { "id": "a_panel", "statement": "Panel partners stay available", "confidence": 0.61 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["risks"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t2_tradeoff_decision_flip.json b/server/bench/tasks/t2_tradeoff_decision_flip.json new file mode 100644 index 0000000..55fd688 --- /dev/null +++ b/server/bench/tasks/t2_tradeoff_decision_flip.json @@ -0,0 +1,57 @@ +{ + "taskId": "t2_tradeoff_decision_flip", + "category": "T2", + "description": "Flip strategic tradeoff decision under tighter constraints.", + "baseState": { + "facts": [ + { "id": "f_priority", "subject": "priority", "predicate": "goal", "value": "speed", "confidence": 0.72 }, + { "id": "f_budget", "subject": "budget", "predicate": "status", "value": "tight", "confidence": 0.85 } + ], + "decisions": [ + { "id": "d_tradeoff", "question": "Optimize for speed or quality", "answer": "speed", "rationale": "market pressure", "confidence": 0.74 }, + { "id": "d_release_scope", "question": "Release scope", "answer": "full", "rationale": "single campaign", "confidence": 0.6 } + ], + "constraints": [ + { "id": "c_quality", "scope": "policy", "rule": "Defect escape rate under 2%", "strength": "high" }, + { "id": "c_budget_hard", "scope": "policy", "rule": "Spend increase must stay under 3%", "strength": "high" } + ], + "risks": [ + { "id": "r_quality", "title": "Quality degradation", "probability": "high", "impact": "high", "mitigation": "extra QA shift" }, + { "id": "r_budget", "title": "Budget overrun", "probability": "medium", "impact": "high", "mitigation": "scope cut trigger" } + ], + "assumptions": [ + { "id": "a_market_window", "statement": "Market window closes in two months", "confidence": 0.8 }, + { "id": "a_qa_capacity", "statement": "QA can add one shift", "confidence": 0.58 } + ] + }, + "targetState": { + "facts": [ + { "id": "f_priority", "subject": "priority", "predicate": "goal", "value": "quality", "confidence": 0.78 }, + { "id": "f_budget", "subject": "budget", "predicate": "status", "value": "tight", "confidence": 0.85 } + ], + "decisions": [ + { "id": "d_tradeoff", "question": "Optimize for speed or quality", "answer": "quality", "rationale": "risk profile unacceptable", "confidence": 0.81 }, + { "id": "d_release_scope", "question": "Release scope", "answer": "phased", "rationale": "protect quality target", "confidence": 0.77 } + ], + "constraints": [ + { "id": "c_quality", "scope": "policy", "rule": "Defect escape rate under 1.5%", "strength": "high" }, + { "id": "c_budget_hard", "scope": "policy", "rule": "Spend increase must stay under 3%", "strength": "high" } + ], + "risks": [ + { "id": "r_quality", "title": "Quality degradation", "probability": "medium", "impact": "medium", "mitigation": "extra QA shift" }, + { "id": "r_budget", "title": "Budget overrun", "probability": "medium", "impact": "high", "mitigation": "scope cut trigger" } + ], + "assumptions": [ + { "id": "a_market_window", "statement": "Market window closes in two months", "confidence": 0.8 }, + { "id": "a_qa_capacity", "statement": "QA can add one shift", "confidence": 0.58 } + ] + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": true, + "maxDistanceCountsSum": 0, + "requiredDomainsModified": ["facts", "decisions", "constraints", "risks"], + "requiredDecisionKeys": ["d_tradeoff"] + }, + "runConfig": { "repetitions": 15 } +} diff --git a/server/bench/tasks/t3_conflict_add_existing.json b/server/bench/tasks/t3_conflict_add_existing.json new file mode 100644 index 0000000..06b417b --- /dev/null +++ b/server/bench/tasks/t3_conflict_add_existing.json @@ -0,0 +1,220 @@ +{ + "taskId": "t3_conflict_add_existing", + "category": "T3", + "description": "Conflict add-existing plus valid add in same domain to separate best_effort and strict behavior.", + "baseState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + } + ], + "constraints": [ + { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "medium", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "targetState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + }, + { + "id": "f_kpi", + "subject": "project", + "predicate": "kpi", + "value": "on_track", + "confidence": 0.72 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + } + ], + "constraints": [ + { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "medium", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "coreStubDelta": { + "facts": { + "added": [ + { + "key": "", + "unit": { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + } + }, + { + "key": "", + "unit": { + "id": "f_kpi", + "subject": "project", + "predicate": "kpi", + "value": "on_track", + "confidence": 0.72 + } + } + ], + "removed": [], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [] + }, + "risks": { + "added": [], + "removed": [], + "modified": [] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [] + } + }, + "llmStubDelta": { + "facts": { + "added": [ + { + "key": "", + "unit": { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + } + }, + { + "key": "", + "unit": { + "id": "f_kpi", + "subject": "project", + "predicate": "kpi", + "value": "on_track", + "confidence": 0.72 + } + } + ], + "removed": [], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [] + }, + "risks": { + "added": [], + "removed": [], + "modified": [] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [] + } + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": false, + "requiredDomainsModified": [ + "facts" + ] + }, + "runConfig": { + "repetitions": 15 + } +} diff --git a/server/bench/tasks/t3_conflict_modify_missing.json b/server/bench/tasks/t3_conflict_modify_missing.json new file mode 100644 index 0000000..db68e05 --- /dev/null +++ b/server/bench/tasks/t3_conflict_modify_missing.json @@ -0,0 +1,271 @@ +{ + "taskId": "t3_conflict_modify_missing", + "category": "T3", + "description": "Modify-missing conflict with valid constraint tightening.", + "baseState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + } + ], + "constraints": [ + { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "medium", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "targetState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + } + ], + "constraints": [ + { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 115", + "strength": "high" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "medium", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "coreStubDelta": { + "facts": { + "added": [], + "removed": [], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "d_missing", + "question": "Missing decision", + "answer": "none", + "rationale": "none", + "confidence": 0.1 + }, + "after": { + "id": "d_missing", + "question": "Missing decision", + "answer": "updated", + "rationale": "none", + "confidence": 0.1 + }, + "changes": [ + { + "path": "answer", + "op": "set", + "before": "none", + "after": "updated" + } + ] + } + ] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + }, + "after": { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 115", + "strength": "high" + }, + "changes": [ + { + "path": "rule", + "op": "set", + "before": "Budget must stay under 120", + "after": "Budget must stay under 115" + } + ] + } + ] + }, + "risks": { + "added": [], + "removed": [], + "modified": [] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [] + } + }, + "llmStubDelta": { + "facts": { + "added": [], + "removed": [], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "d_missing", + "question": "Missing decision", + "answer": "none", + "rationale": "none", + "confidence": 0.1 + }, + "after": { + "id": "d_missing", + "question": "Missing decision", + "answer": "updated", + "rationale": "none", + "confidence": 0.1 + }, + "changes": [ + { + "path": "answer", + "op": "set", + "before": "none", + "after": "updated" + } + ] + } + ] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + }, + "after": { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 115", + "strength": "high" + }, + "changes": [ + { + "path": "rule", + "op": "set", + "before": "Budget must stay under 120", + "after": "Budget must stay under 115" + } + ] + } + ] + }, + "risks": { + "added": [], + "removed": [], + "modified": [] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [] + } + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": false, + "requiredDomainsModified": [ + "constraints" + ] + }, + "runConfig": { + "repetitions": 15 + } +} diff --git a/server/bench/tasks/t3_conflict_remove_missing.json b/server/bench/tasks/t3_conflict_remove_missing.json new file mode 100644 index 0000000..b687599 --- /dev/null +++ b/server/bench/tasks/t3_conflict_remove_missing.json @@ -0,0 +1,269 @@ +{ + "taskId": "t3_conflict_remove_missing", + "category": "T3", + "description": "Remove-missing conflict with valid decision update.", + "baseState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + } + ], + "constraints": [ + { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "medium", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "targetState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "agile", + "rationale": "higher throughput", + "confidence": 0.82 + } + ], + "constraints": [ + { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "medium", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "coreStubDelta": { + "facts": { + "added": [], + "removed": [ + { + "key": "", + "unit": { + "id": "f_missing", + "subject": "project", + "predicate": "obsolete", + "value": "x", + "confidence": 0.1 + } + } + ], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + }, + "after": { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "agile", + "rationale": "higher throughput", + "confidence": 0.82 + }, + "changes": [ + { + "path": "answer", + "op": "set", + "before": "incremental", + "after": "agile" + }, + { + "path": "rationale", + "op": "set", + "before": "low risk", + "after": "higher throughput" + }, + { + "path": "confidence", + "op": "set", + "before": 0.8, + "after": 0.82 + } + ] + } + ] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [] + }, + "risks": { + "added": [], + "removed": [], + "modified": [] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [] + } + }, + "llmStubDelta": { + "facts": { + "added": [], + "removed": [ + { + "key": "", + "unit": { + "id": "f_missing", + "subject": "project", + "predicate": "obsolete", + "value": "x", + "confidence": 0.1 + } + } + ], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + }, + "after": { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "agile", + "rationale": "higher throughput", + "confidence": 0.82 + }, + "changes": [ + { + "path": "answer", + "op": "set", + "before": "incremental", + "after": "agile" + }, + { + "path": "rationale", + "op": "set", + "before": "low risk", + "after": "higher throughput" + }, + { + "path": "confidence", + "op": "set", + "before": 0.8, + "after": 0.82 + } + ] + } + ] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [] + }, + "risks": { + "added": [], + "removed": [], + "modified": [] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [] + } + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": false, + "requiredDomainsModified": [ + "decisions" + ] + }, + "runConfig": { + "repetitions": 15 + } +} diff --git a/server/bench/tasks/t3_multi_conflict_mixed.json b/server/bench/tasks/t3_multi_conflict_mixed.json new file mode 100644 index 0000000..c4cec7c --- /dev/null +++ b/server/bench/tasks/t3_multi_conflict_mixed.json @@ -0,0 +1,319 @@ +{ + "taskId": "t3_multi_conflict_mixed", + "category": "T3", + "description": "Mixed multi-domain delta with multiple conflicts and valid operations.", + "baseState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + } + ], + "constraints": [ + { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "medium", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "targetState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + }, + { + "id": "f_audit", + "subject": "project", + "predicate": "audit", + "value": "enabled", + "confidence": 0.76 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "hybrid", + "rationale": "mixed constraints", + "confidence": 0.81 + } + ], + "constraints": [ + { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "medium", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "coreStubDelta": { + "facts": { + "added": [ + { + "key": "", + "unit": { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + } + }, + { + "key": "", + "unit": { + "id": "f_audit", + "subject": "project", + "predicate": "audit", + "value": "enabled", + "confidence": 0.76 + } + } + ], + "removed": [], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + }, + "after": { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "hybrid", + "rationale": "mixed constraints", + "confidence": 0.81 + }, + "changes": [ + { + "path": "answer", + "op": "set", + "before": "incremental", + "after": "hybrid" + }, + { + "path": "rationale", + "op": "set", + "before": "low risk", + "after": "mixed constraints" + }, + { + "path": "confidence", + "op": "set", + "before": 0.8, + "after": 0.81 + } + ] + } + ] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [] + }, + "risks": { + "added": [], + "removed": [ + { + "key": "", + "unit": { + "id": "r_missing", + "title": "Missing risk", + "probability": "low", + "impact": "low", + "mitigation": "none" + } + } + ], + "modified": [] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [] + } + }, + "llmStubDelta": { + "facts": { + "added": [ + { + "key": "", + "unit": { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + } + }, + { + "key": "", + "unit": { + "id": "f_audit", + "subject": "project", + "predicate": "audit", + "value": "enabled", + "confidence": 0.76 + } + } + ], + "removed": [], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + }, + "after": { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "hybrid", + "rationale": "mixed constraints", + "confidence": 0.81 + }, + "changes": [ + { + "path": "answer", + "op": "set", + "before": "incremental", + "after": "hybrid" + }, + { + "path": "rationale", + "op": "set", + "before": "low risk", + "after": "mixed constraints" + }, + { + "path": "confidence", + "op": "set", + "before": 0.8, + "after": 0.81 + } + ] + } + ] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [] + }, + "risks": { + "added": [], + "removed": [ + { + "key": "", + "unit": { + "id": "r_missing", + "title": "Missing risk", + "probability": "low", + "impact": "low", + "mitigation": "none" + } + } + ], + "modified": [] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [] + } + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": false, + "requiredDomainsModified": [ + "facts", + "decisions" + ] + }, + "runConfig": { + "repetitions": 15 + } +} diff --git a/server/bench/tasks/t3_partial_apply_expected.json b/server/bench/tasks/t3_partial_apply_expected.json new file mode 100644 index 0000000..818a7bd --- /dev/null +++ b/server/bench/tasks/t3_partial_apply_expected.json @@ -0,0 +1,314 @@ +{ + "taskId": "t3_partial_apply_expected", + "category": "T3", + "description": "Partial apply expected: strict keeps non-conflicting domains while rolling back conflicting domain.", + "baseState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + } + ], + "constraints": [ + { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "medium", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "targetState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + }, + { + "id": "f_latency_budget", + "subject": "project", + "predicate": "latency-budget", + "value": "200ms", + "confidence": 0.74 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + } + ], + "constraints": [ + { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 110", + "strength": "high" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "medium", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.75 + } + ] + }, + "coreStubDelta": { + "facts": { + "added": [ + { + "key": "", + "unit": { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + }, + { + "key": "", + "unit": { + "id": "f_latency_budget", + "subject": "project", + "predicate": "latency-budget", + "value": "200ms", + "confidence": 0.74 + } + } + ], + "removed": [], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + }, + "after": { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 110", + "strength": "high" + }, + "changes": [ + { + "path": "rule", + "op": "set", + "before": "Budget must stay under 120", + "after": "Budget must stay under 110" + } + ] + } + ] + }, + "risks": { + "added": [], + "removed": [], + "modified": [] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + }, + "after": { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.75 + }, + "changes": [ + { + "path": "confidence", + "op": "set", + "before": 0.7, + "after": 0.75 + } + ] + } + ] + } + }, + "llmStubDelta": { + "facts": { + "added": [ + { + "key": "", + "unit": { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + }, + { + "key": "", + "unit": { + "id": "f_latency_budget", + "subject": "project", + "predicate": "latency-budget", + "value": "200ms", + "confidence": 0.74 + } + } + ], + "removed": [], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 120", + "strength": "high" + }, + "after": { + "id": "c_budget", + "scope": "policy", + "rule": "Budget must stay under 110", + "strength": "high" + }, + "changes": [ + { + "path": "rule", + "op": "set", + "before": "Budget must stay under 120", + "after": "Budget must stay under 110" + } + ] + } + ] + }, + "risks": { + "added": [], + "removed": [], + "modified": [] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [ + { + "key": "", + "before": { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + }, + "after": { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.75 + }, + "changes": [ + { + "path": "confidence", + "op": "set", + "before": 0.7, + "after": 0.75 + } + ] + } + ] + } + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": false, + "requiredDomainsModified": [ + "facts", + "constraints", + "assumptions" + ] + }, + "runConfig": { + "repetitions": 15 + } +} diff --git a/server/bench/tasks/t3_strict_rollback_expected.json b/server/bench/tasks/t3_strict_rollback_expected.json new file mode 100644 index 0000000..4c45120 --- /dev/null +++ b/server/bench/tasks/t3_strict_rollback_expected.json @@ -0,0 +1,247 @@ +{ + "taskId": "t3_strict_rollback_expected", + "category": "T3", + "description": "Strict rollback preserves semantic risk conflict while best_effort resolves it via valid modification.", + "baseState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + } + ], + "constraints": [ + { + "id": "c_risk_threshold", + "scope": "risk", + "rule": "Risk probability must be medium or below", + "strength": "high", + "maxProbability": "medium" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "high", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "targetState": { + "facts": [ + { + "id": "f_scope", + "subject": "project", + "predicate": "scope", + "value": "phase1", + "confidence": 0.9 + }, + { + "id": "f_budget", + "subject": "project", + "predicate": "budget", + "value": "100", + "confidence": 0.8 + } + ], + "decisions": [ + { + "id": "d_strategy", + "question": "Delivery strategy", + "answer": "incremental", + "rationale": "low risk", + "confidence": 0.8 + } + ], + "constraints": [ + { + "id": "c_risk_threshold", + "scope": "risk", + "rule": "Risk probability must be medium or below", + "strength": "high", + "maxProbability": "medium" + } + ], + "risks": [ + { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "low", + "impact": "high", + "mitigation": "weekly review" + } + ], + "assumptions": [ + { + "id": "a_team", + "statement": "Team capacity remains stable", + "confidence": 0.7 + } + ] + }, + "coreStubDelta": { + "facts": { + "added": [], + "removed": [], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [] + }, + "risks": { + "added": [], + "removed": [ + { + "key": "", + "unit": { + "id": "r_missing", + "title": "Missing risk", + "probability": "low", + "impact": "low", + "mitigation": "none" + } + } + ], + "modified": [ + { + "key": "", + "before": { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "high", + "impact": "high", + "mitigation": "weekly review" + }, + "after": { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "low", + "impact": "high", + "mitigation": "weekly review" + }, + "changes": [ + { + "path": "probability", + "op": "set", + "before": "high", + "after": "low" + } + ] + } + ] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [] + } + }, + "llmStubDelta": { + "facts": { + "added": [], + "removed": [], + "modified": [] + }, + "decisions": { + "added": [], + "removed": [], + "modified": [] + }, + "constraints": { + "added": [], + "removed": [], + "modified": [] + }, + "risks": { + "added": [], + "removed": [ + { + "key": "", + "unit": { + "id": "r_missing", + "title": "Missing risk", + "probability": "low", + "impact": "low", + "mitigation": "none" + } + } + ], + "modified": [ + { + "key": "", + "before": { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "high", + "impact": "high", + "mitigation": "weekly review" + }, + "after": { + "id": "r_overrun", + "title": "Cost overrun", + "probability": "low", + "impact": "high", + "mitigation": "weekly review" + }, + "changes": [ + { + "path": "probability", + "op": "set", + "before": "high", + "after": "low" + } + ] + } + ] + }, + "assumptions": { + "added": [], + "removed": [], + "modified": [] + } + }, + "targetAssertions": { + "mustEqualTargetHash": true, + "mustHaveNoConflicts": false, + "requiredDomainsModified": [ + "risks" + ] + }, + "runConfig": { + "repetitions": 15 + } +} diff --git a/server/package.json b/server/package.json index 8a970ba..46438ca 100644 --- a/server/package.json +++ b/server/package.json @@ -1,11 +1,14 @@ { "name": "shadow-threads-server", "version": "0.1.0", - "description": "Shadow Threads 后端服务", + "description": "Shadow Threads 閸氬海顏張宥呭", "main": "dist/index.js", "scripts": { "dev": "tsx watch src/index.ts", "build": "tsc", + "selftest:fast": "node dist/selftest/protocol.selftest.js && node dist/selftest/revision.selftest.js && node dist/selftest/execution.selftest.js && node dist/selftest/identity-boundary.selftest.js && node dist/selftest/http-api.selftest.js", + "selftest:core": "npm run selftest:fast && node dist/selftest/artifact-store.selftest.js && node dist/selftest/migration.selftest.js", + "selftest:full": "npm run selftest:core && node dist/selftest/http-api.e2e.js && node dist/selftest/http-api.error.e2e.js", "start": "node dist/index.js", "prisma:generate": "prisma generate --schema prisma/schema.prisma", "prisma:migrate": "prisma migrate dev --schema prisma/schema.prisma", @@ -13,12 +16,28 @@ "prisma:studio": "prisma studio --schema prisma/schema.prisma", "prisma:seed": "tsx prisma/seed.ts", "lint": "eslint src --ext .ts", - "test": "vitest" + "test": "vitest", + "bench:run": "tsx src/bench/runner.ts", + "bench:eval": "tsx src/bench/evaluator.ts", + "bench": "npm run bench:validate && npm run bench:run && npm run bench:eval", + "bench:validate": "tsx src/bench/validate-fixtures.ts", + "bench:validate:node": "node src/bench/node/validate-fixtures.cjs", + "bench:run:node": "node src/bench/node/run-with-build.cjs", + "bench:eval:node": "node src/bench/node/evaluator.cjs", + "bench:node": "npm run bench:strip-bom:node && npm run bench:validate:node && npm run bench:run:node && npm run bench:eval:node && npm run bench:stats:node && npm run bench:evidence:node && npm run bench:diagnose:node && npm run bench:gate:node", + "bench:strip-bom:node": "node src/bench/node/strip-bom.cjs", + "bench:selftest:node": "node src/bench/node/selftest.cjs", + "bench:stats:node": "node src/bench/node/stats.cjs", + "bench:eval2:node": "npm run bench:eval:node && npm run bench:stats:node", + "bench:evidence:node": "node src/bench/node/evidence.cjs", + "bench:gate:node": "node src/bench/node/gate.cjs", + "bench:diagnose:node": "node src/bench/node/diagnose.cjs" }, "dependencies": { "@anthropic-ai/sdk": "^0.30.0", "@google/generative-ai": "^0.21.0", "@prisma/client": "^5.22.0", + "adm-zip": "^0.5.16", "axios": "^1.13.2", "bcryptjs": "^2.4.3", "cors": "^2.8.5", diff --git a/server/prisma/migrations/20260302091553_artifact_store_record_v1/migration.sql b/server/prisma/migrations/20260302091553_artifact_store_record_v1/migration.sql new file mode 100644 index 0000000..13b6836 --- /dev/null +++ b/server/prisma/migrations/20260302091553_artifact_store_record_v1/migration.sql @@ -0,0 +1,25 @@ +-- CreateTable +CREATE TABLE "ArtifactStoreRecord" ( + "id" TEXT NOT NULL, + "schema" TEXT NOT NULL, + "packageId" TEXT NOT NULL, + "revisionId" TEXT, + "revisionHash" TEXT, + "bundleHash" TEXT NOT NULL, + "payload" JSONB NOT NULL, + "createdAt" TEXT, + + CONSTRAINT "ArtifactStoreRecord_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE UNIQUE INDEX "ArtifactStoreRecord_bundleHash_key" ON "ArtifactStoreRecord"("bundleHash"); + +-- CreateIndex +CREATE INDEX "ArtifactStoreRecord_packageId_idx" ON "ArtifactStoreRecord"("packageId"); + +-- CreateIndex +CREATE INDEX "ArtifactStoreRecord_revisionId_idx" ON "ArtifactStoreRecord"("revisionId"); + +-- CreateIndex +CREATE INDEX "ArtifactStoreRecord_revisionHash_idx" ON "ArtifactStoreRecord"("revisionHash"); diff --git a/server/prisma/migrations/20260304124546_artifact_store_record_v2/migration.sql b/server/prisma/migrations/20260304124546_artifact_store_record_v2/migration.sql new file mode 100644 index 0000000..e31b81e --- /dev/null +++ b/server/prisma/migrations/20260304124546_artifact_store_record_v2/migration.sql @@ -0,0 +1,57 @@ +/* + Warnings: + + - You are about to alter the column `bundleHash` on the `ArtifactStoreRecord` table. + - The `createdAt` column on the `ArtifactStoreRecord` table will be converted to TIMESTAMPTZ(6) without dropping existing values. + - A unique constraint covering the columns `[packageId,bundleHash]` on the table `ArtifactStoreRecord` will be added. If there are existing duplicate values, this will fail. +*/ + +-- Drop the old unique index on bundleHash (v1) +DROP INDEX "ArtifactStoreRecord_bundleHash_key"; + +-- 1) bundleHash: TEXT -> VARCHAR(64) (no padding semantics) +ALTER TABLE "ArtifactStoreRecord" + ALTER COLUMN "bundleHash" TYPE VARCHAR(64); + +-- Enforce exact length = 64 (sha-256 hex) +ALTER TABLE "ArtifactStoreRecord" + ADD CONSTRAINT "ArtifactStoreRecord_bundleHash_len_64_chk" + CHECK (length("bundleHash") = 64); + +-- 2) createdAt: production-safe conversion (no DROP+ADD) +-- Assume old createdAt was TEXT (possibly NULL / possibly non-parseable). +-- Strategy: add new column -> backfill -> swap. + +ALTER TABLE "ArtifactStoreRecord" + ADD COLUMN "createdAt_new" TIMESTAMPTZ(6); + +-- Backfill: +-- If createdAt is a parseable timestamp string, cast it. +-- If it's NULL/empty/unparseable, fall back to CURRENT_TIMESTAMP. +-- +-- Note: regex check reduces cast errors; it's not perfect but avoids obvious failures. +UPDATE "ArtifactStoreRecord" +SET "createdAt_new" = + CASE + WHEN "createdAt" IS NULL OR trim("createdAt") = '' THEN CURRENT_TIMESTAMP + WHEN "createdAt" ~ '^\d{4}-\d{2}-\d{2}' THEN ("createdAt"::timestamptz) + ELSE CURRENT_TIMESTAMP + END; + +-- Swap columns +ALTER TABLE "ArtifactStoreRecord" + DROP COLUMN "createdAt"; + +ALTER TABLE "ArtifactStoreRecord" + RENAME COLUMN "createdAt_new" TO "createdAt"; + +-- Enforce NOT NULL + DEFAULT +ALTER TABLE "ArtifactStoreRecord" + ALTER COLUMN "createdAt" SET DEFAULT CURRENT_TIMESTAMP; + +ALTER TABLE "ArtifactStoreRecord" + ALTER COLUMN "createdAt" SET NOT NULL; + +-- 3) New unique constraint: (packageId, bundleHash) +CREATE UNIQUE INDEX "ArtifactStoreRecord_packageId_bundleHash_key" +ON "ArtifactStoreRecord"("packageId", "bundleHash"); \ No newline at end of file diff --git a/server/prisma/migrations/20260305182350_revision_dag_v1/migration.sql b/server/prisma/migrations/20260305182350_revision_dag_v1/migration.sql new file mode 100644 index 0000000..0186f59 --- /dev/null +++ b/server/prisma/migrations/20260305182350_revision_dag_v1/migration.sql @@ -0,0 +1,39 @@ +-- CreateTable +CREATE TABLE "revision_nodes" ( + "revisionHash" CHAR(64) NOT NULL, + "packageId" TEXT NOT NULL, + "parentRevisionHash" CHAR(64), + "author" TEXT NOT NULL, + "message" TEXT NOT NULL, + "createdBy" TEXT NOT NULL, + "timestamp" TIMESTAMP(3) NOT NULL, + "source" TEXT NOT NULL, + "metadata" JSONB NOT NULL, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT "revision_nodes_pkey" PRIMARY KEY ("revisionHash") +); + +-- CreateTable +CREATE TABLE "revision_artifacts" ( + "revisionHash" CHAR(64) NOT NULL, + "bundleHash" CHAR(64) NOT NULL, + "role" TEXT NOT NULL, + + CONSTRAINT "revision_artifacts_pkey" PRIMARY KEY ("revisionHash","bundleHash") +); + +-- CreateIndex +CREATE INDEX "revision_nodes_packageId_idx" ON "revision_nodes"("packageId"); + +-- CreateIndex +CREATE INDEX "revision_nodes_parentRevisionHash_idx" ON "revision_nodes"("parentRevisionHash"); + +-- CreateIndex +CREATE INDEX "revision_artifacts_bundleHash_idx" ON "revision_artifacts"("bundleHash"); + +-- AddForeignKey +ALTER TABLE "revision_nodes" ADD CONSTRAINT "revision_nodes_parentRevisionHash_fkey" FOREIGN KEY ("parentRevisionHash") REFERENCES "revision_nodes"("revisionHash") ON DELETE SET NULL ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "revision_artifacts" ADD CONSTRAINT "revision_artifacts_revisionHash_fkey" FOREIGN KEY ("revisionHash") REFERENCES "revision_nodes"("revisionHash") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/server/prisma/migrations/20260305184030_execution_record_v1/migration.sql b/server/prisma/migrations/20260305184030_execution_record_v1/migration.sql new file mode 100644 index 0000000..5312754 --- /dev/null +++ b/server/prisma/migrations/20260305184030_execution_record_v1/migration.sql @@ -0,0 +1,62 @@ +-- CreateTable +CREATE TABLE "execution_records" ( + "executionId" UUID NOT NULL, + "packageId" TEXT NOT NULL, + "revisionHash" CHAR(64) NOT NULL, + "provider" TEXT NOT NULL, + "model" TEXT NOT NULL, + "promptHash" CHAR(64) NOT NULL, + "parameters" JSONB NOT NULL, + "resultHash" CHAR(64) NOT NULL, + "status" TEXT NOT NULL, + "startedAt" TIMESTAMPTZ(6) NOT NULL, + "finishedAt" TIMESTAMPTZ(6) NOT NULL, + "createdAt" TIMESTAMPTZ(6) NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT "execution_records_pkey" PRIMARY KEY ("executionId") +); + +-- CreateTable +CREATE TABLE "execution_inputs" ( + "executionId" UUID NOT NULL, + "bundleHash" CHAR(64) NOT NULL, + "role" TEXT NOT NULL, + + CONSTRAINT "execution_inputs_pkey" PRIMARY KEY ("executionId","bundleHash") +); + +-- CreateTable +CREATE TABLE "execution_outputs" ( + "executionId" UUID NOT NULL, + "bundleHash" CHAR(64) NOT NULL, + "role" TEXT NOT NULL, + + CONSTRAINT "execution_outputs_pkey" PRIMARY KEY ("executionId","bundleHash") +); + +-- CreateIndex +CREATE INDEX "execution_records_packageId_idx" ON "execution_records"("packageId"); + +-- CreateIndex +CREATE INDEX "execution_records_revisionHash_idx" ON "execution_records"("revisionHash"); + +-- CreateIndex +CREATE INDEX "execution_records_promptHash_idx" ON "execution_records"("promptHash"); + +-- CreateIndex +CREATE INDEX "execution_records_status_idx" ON "execution_records"("status"); + +-- CreateIndex +CREATE INDEX "execution_inputs_bundleHash_idx" ON "execution_inputs"("bundleHash"); + +-- CreateIndex +CREATE INDEX "execution_outputs_bundleHash_idx" ON "execution_outputs"("bundleHash"); + +-- AddForeignKey +ALTER TABLE "execution_records" ADD CONSTRAINT "execution_records_revisionHash_fkey" FOREIGN KEY ("revisionHash") REFERENCES "revision_nodes"("revisionHash") ON DELETE RESTRICT ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "execution_inputs" ADD CONSTRAINT "execution_inputs_executionId_fkey" FOREIGN KEY ("executionId") REFERENCES "execution_records"("executionId") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "execution_outputs" ADD CONSTRAINT "execution_outputs_executionId_fkey" FOREIGN KEY ("executionId") REFERENCES "execution_records"("executionId") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/server/prisma/migrations/20260305191602_execution_record_v1/migration.sql b/server/prisma/migrations/20260305191602_execution_record_v1/migration.sql new file mode 100644 index 0000000..af5102c --- /dev/null +++ b/server/prisma/migrations/20260305191602_execution_record_v1/migration.sql @@ -0,0 +1 @@ +-- This is an empty migration. \ No newline at end of file diff --git a/server/prisma/schema.prisma b/server/prisma/schema.prisma index 4620b1a..62c41f6 100644 --- a/server/prisma/schema.prisma +++ b/server/prisma/schema.prisma @@ -1,5 +1,5 @@ -// Shadow Threads 数据库 Schema -// 使用 Prisma ORM +// Shadow Threads 閺佺増宓佹惔?Schema +// 娴h法鏁?Prisma ORM generator client { provider = "prisma-client-js" @@ -11,7 +11,7 @@ datasource db { } // ============================================ -// 用户相关 +// 閻劍鍩涢惄绋垮彠 // ============================================ model User { @@ -19,18 +19,18 @@ model User { email String? @unique name String? - // 本地用户可以没有邮箱,用设备标识 + // 閺堫剙婀撮悽銊﹀煕閸欘垯浜掑▽鈩冩箒闁喚顔堥敍宀€鏁ょ拋鎯ь槵閺嶅洩鐦? deviceId String? @unique createdAt DateTime @default(now()) updatedAt DateTime @updatedAt - // 关联 + // 閸忓疇浠? apiKeys UserApiKey[] subthreads Subthread[] settings UserSettings? - // ✅ StateSnapshot 反向关系(修复 Prisma P1012) + // 閴?StateSnapshot 閸欏秴鎮滈崗宕囬兇閿涘牅鎱ㄦ径?Prisma P1012閿? stateSnapshots StateSnapshot[] taskPackages TaskPackage[] @@ -43,14 +43,14 @@ model UserSettings { userId String @unique user User @relation(fields: [userId], references: [id], onDelete: Cascade) - // 默认 LLM 提供商 + // 姒涙顓?LLM 閹绘劒绶甸崯? defaultProvider LLMProvider @default(OPENAI) - // UI 偏好 + // UI 閸嬪繐銈? theme String @default("auto") // auto, light, dark language String @default("zh-CN") - // 功能开关 + // 閸旂喕鍏樺鈧崗? autoSummarize Boolean @default(true) saveHistory Boolean @default(true) @@ -58,7 +58,7 @@ model UserSettings { updatedAt DateTime @updatedAt } -// 用户的 API Key 存储(加密存储) +// 閻劍鍩涢惃?API Key 鐎涙ê鍋嶉敍鍫濆鐎靛棗鐡ㄩ崒顭掔礆 model UserApiKey { id String @id @default(uuid()) userId String @@ -66,11 +66,11 @@ model UserApiKey { provider LLMProvider - // API Key(加密存储) + // API Key閿涘牆濮炵€靛棗鐡ㄩ崒顭掔礆 encryptedKey String - // 元数据 - label String? // 用户自定义标签 + // 閸忓啯鏆熼幑? + label String? // 閻劍鍩涢懛顏勭暰娑斿鐖g粵? isDefault Boolean @default(false) isValid Boolean @default(true) lastUsed DateTime? @@ -83,7 +83,7 @@ model UserApiKey { } // ============================================ -// LLM 提供商枚举 +// LLM 閹绘劒绶甸崯鍡樼亣娑? // ============================================ enum LLMProvider { @@ -91,88 +91,88 @@ enum LLMProvider { ANTHROPIC // Claude GOOGLE // Gemini GROQ // Groq - OLLAMA // 本地 Ollama + OLLAMA // 閺堫剙婀?Ollama DEEPSEEK // Deepseek - CUSTOM // 自定义 API + CUSTOM // 閼奉亜鐣炬稊?API } // ============================================ -// 子线程相关 +// 鐎涙劗鍤庣粙瀣祲閸? // ============================================ -// 主对话上下文(来源) +// 娑撹顕拠婵呯瑐娑撳鏋冮敍鍫熸降濠ф劧绱? model SourceContext { id String @id @default(uuid()) - // 来源平台 + // 閺夈儲绨獮鍐插酱 platform String // chatgpt, claude, gemini, etc. - // 原始对话信息 - conversationId String // 平台的对话 ID - conversationUrl String? // 对话 URL + // 閸樼喎顫愮€电鐦芥穱鈩冧紖 + conversationId String // 楠炲啿褰撮惃鍕嚠鐠?ID + conversationUrl String? // 鐎电鐦?URL - // 原始消息信息 - messageId String // 平台的消息 ID + // 閸樼喎顫愬☉鍫熶紖娣団剝浼? + messageId String // 楠炲啿褰撮惃鍕Х閹?ID messageRole String // user, assistant - messageText String @db.Text // 完整消息文本 + messageText String @db.Text // 鐎瑰本鏆e☉鍫熶紖閺傚洦婀? - // 用户选中的片段 + // 閻劍鍩涢柅澶夎厬閻ㄥ嫮澧栧▓? selectionText String @db.Text - selectionStart Int? // 选中起始位置 - selectionEnd Int? // 选中结束位置 + selectionStart Int? // 闁鑵戠挧宄邦潗娴e秶鐤? + selectionEnd Int? // 闁鑵戠紒鎾存将娴e秶鐤? - // ✅ PR-B2:Context L1 元信息(窗口化上下文的 meta) + // 閴?PR-B2閿涙ontext L1 閸忓啩淇婇幁顖ょ礄缁愭褰涢崠鏍︾瑐娑撳鏋冮惃?meta閿? contextMeta Json? - contextMessages Json? // ✅ P0-A:W 通路实体数据落库 + contextMessages Json? // 閴?P0-A閿涙瓙 闁俺鐭剧€圭偘缍嬮弫鐗堝祦閽€钘夌氨 createdAt DateTime @default(now()) taskPackages TaskPackage[] - // 关联 + // 閸忓疇浠? subthread Subthread? @@index([platform, conversationId]) @@index([platform, messageId]) } -// 影子子线程 +// 瑜板崬鐡欑€涙劗鍤庣粙? model Subthread { id String @id @default(uuid()) - // 所属用户 + // 閹碘偓鐏炵偟鏁ら幋? userId String user User @relation(fields: [userId], references: [id], onDelete: Cascade) - // 来源上下文 + // 閺夈儲绨稉濠佺瑓閺? sourceContextId String @unique sourceContext SourceContext @relation(fields: [sourceContextId], references: [id], onDelete: Cascade) - // 使用的 LLM + // 娴h法鏁ら惃?LLM provider LLMProvider - model String // 具体模型,如 gpt-4, claude-3-opus + model String // 閸忚渹缍嬪Ο鈥崇€烽敍灞筋洤 gpt-4, claude-3-opus - // 子线程标题(可自动生成) + // 鐎涙劗鍤庣粙瀣垼妫版﹫绱欓崣顖濆殰閸斻劎鏁撻幋鎰剁礆 title String? - // 状态 + // 閻樿埖鈧? status SubthreadStatus @default(ACTIVE) - // 上下文摘要(用于长对话压缩) + // 娑撳﹣绗呴弬鍥ㄦ喅鐟曚緤绱欓悽銊ょ艾闂€鍨嚠鐠囨繂甯囩紓鈺嬬礆 summary String? @db.Text - // 统计 + // 缂佺喕顓? messageCount Int @default(0) tokenCount Int @default(0) createdAt DateTime @default(now()) updatedAt DateTime @updatedAt - // 关联 + // 閸忓疇浠? messages SubthreadMessage[] - // ✅ StateSnapshot 反向关系(修复 Prisma P1012) + // 閴?StateSnapshot 閸欏秴鎮滈崗宕囬兇閿涘牅鎱ㄦ径?Prisma P1012閿? stateSnapshots StateSnapshot[] @@index([userId, createdAt]) @@ -185,26 +185,26 @@ enum SubthreadStatus { DELETED } -// 子线程消息 +// 鐎涙劗鍤庣粙瀣Х閹? model SubthreadMessage { id String @id @default(uuid()) subthreadId String subthread Subthread @relation(fields: [subthreadId], references: [id], onDelete: Cascade) - // 消息内容 + // 濞戝牊浼呴崘鍛啇 role MessageRole content String @db.Text - // Token 统计 + // Token 缂佺喕顓? promptTokens Int? completionTokens Int? - // 元数据 - model String? // 使用的模型 + // 閸忓啯鏆熼幑? + model String? // 娴h法鏁ら惃鍕侀崹? finishReason String? // stop, length, etc. - // 错误信息(如果有) + // 闁挎瑨顕ゆ穱鈩冧紖閿涘牆顩ч弸婊勬箒閿? error String? createdAt DateTime @default(now()) @@ -219,36 +219,36 @@ enum MessageRole { } // ============================================ -// StateSnapshot(思考状态库 v1) +// StateSnapshot閿涘牊鈧繆鈧啰濮搁幀浣哥氨 v1閿? // ============================================ model StateSnapshot { id String @id @default(uuid()) - // 归属用户 + // 瑜版帒鐫橀悽銊﹀煕 userId String user User @relation(fields: [userId], references: [id], onDelete: Cascade) - // 来源子线程(v1 可选) + // 閺夈儲绨€涙劗鍤庣粙瀣剁礄v1 閸欘垶鈧绱? subthreadId String? subthread Subthread? @relation(fields: [subthreadId], references: [id], onDelete: SetNull) - // 核心:StateSnapshot v1 JSON + // 閺嶇绺鹃敍姝媡ateSnapshot v1 JSON snapshot Json - // 版本控制(仍然固定 v1,语义不变) + // 閻楀牊婀伴幒褍鍩楅敍鍫滅矝閻掕泛娴愮€?v1閿涘矁顕㈡稊澶夌瑝閸欐﹫绱? version String - /// 整条状态链的根(第一版 snapshot) + /// 閺佸瓨娼悩鑸碘偓渚€鎽奸惃鍕壌閿涘牏顑囨稉鈧悧?snapshot閿? rootId String? - /// 上一个 snapshot(用于回溯) + /// 娑撳﹣绔存稉?snapshot閿涘牏鏁ゆ禍搴℃礀濠ь垽绱? parentId String? - /// 第几次演化(0 = 初始) + /// 缁楊剙鍤戝▎鈩冪川閸栨牭绱? = 閸掓繂顫愰敍? rev Int @default(0) - /// 自引用关系 + /// 閼奉亜绱╅悽銊ュ彠缁? parent StateSnapshot? @relation("StateSnapshotLineage", fields: [parentId], references: [id]) children StateSnapshot[] @relation("StateSnapshotLineage") @@ -262,7 +262,7 @@ model StateSnapshot { @@index([rootId]) @@index([parentId]) - // ✅ 工业幂等:同一个 parent 的同一个 rev 只能有一个 child + // 閴?瀹搞儰绗熼獮鍌滅搼閿涙艾鎮撴稉鈧稉?parent 閻ㄥ嫬鎮撴稉鈧稉?rev 閸欘亣鍏橀張澶夌娑?child @@unique([parentId, rev], name: "parentId_rev") } @@ -272,30 +272,30 @@ model TaskPackage { userId String user User @relation(fields: [userId], references: [id], onDelete: Cascade) - // 可选关联:从哪个 snapshot 生成(但 package 本身不依赖它) + // 閸欘垶鈧鍙ч懕鏃撶窗娴犲骸鎽㈡稉?snapshot 閻㈢喐鍨氶敍鍫滅稻 package 閺堫剝闊╂稉宥勭贩鐠ф牕鐣犻敍? sourceSnapshotId String? sourceSnapshot StateSnapshot? @relation(fields: [sourceSnapshotId], references: [id], onDelete: SetNull) - // 可选关联:从哪个 sourceContext 生成(用于追溯证据) + // 閸欘垶鈧鍙ч懕鏃撶窗娴犲骸鎽㈡稉?sourceContext 閻㈢喐鍨氶敍鍫㈡暏娴滃氦鎷峰┃顖濈槈閹诡噯绱? sourceContextId String? sourceContext SourceContext? @relation(fields: [sourceContextId], references: [id], onDelete: SetNull) - // 人类可读 + // 娴滆櫣琚崣顖濐嚢 title String? description String? @db.Text - // 状态 + // 閻樿埖鈧? status TaskPackageStatus @default(ACTIVE) - // 当前活跃 revision(指针) - // ✅ 一对一语义:一个 revision 最多只能被一个 package 指向 + // 瑜版挸澧犲ú鏄忕┈ revision閿涘牊瀵氶柦鍫礆 + // 閴?娑撯偓鐎甸€涚鐠囶厺绠熼敍姘娑?revision 閺堚偓婢舵艾褰ч懗鍊燁潶娑撯偓娑?package 閹稿洤鎮? currentRevisionId String? @unique currentRevision TaskPackageRevision? @relation("PackageCurrentRevision", fields: [currentRevisionId], references: [id], onDelete: SetNull) createdAt DateTime @default(now()) updatedAt DateTime @updatedAt - // 反向关系 + // 閸欏秴鎮滈崗宕囬兇 revisions TaskPackageRevision[] @relation("PackageRevisions") @@index([userId, createdAt]) @@ -314,25 +314,25 @@ model TaskPackageRevision { parent TaskPackageRevision? @relation("RevisionParent", fields: [parentRevisionId], references: [id], onDelete: SetNull) children TaskPackageRevision[] @relation("RevisionParent") - // ✅ 反向:被哪个 TaskPackage 作为 currentRevision 指向(Prisma 需要对称关系) + // 閴?閸欏秴鎮滈敍姘愁潶閸濐亙閲?TaskPackage 娴f粈璐?currentRevision 閹稿洤鎮滈敍鍦isma 闂団偓鐟曚礁顕粔鏉垮彠缁紮绱? currentFor TaskPackage? @relation("PackageCurrentRevision") - // 版本号:0,1,2... + // 閻楀牊婀伴崣鍑ょ窗0,1,2... rev Int @default(0) - // 核心:迁移包 JSON(建议结构:manifest + state + evidence + constraints + decisions + interfaces + risks) + // 閺嶇绺鹃敍姘崇讣缁夎瀵?JSON閿涘牆缂撶拋顔剧波閺嬪嫸绱癿anifest + state + evidence + constraints + decisions + interfaces + risks閿? payload Json // Deterministic revision identity (sha256 of canonicalized payload) revisionHash String - // 校验/摘要(工业字段) + // 閺嶏繝鐛?閹芥顩﹂敍鍫濅紣娑撴艾鐡у▓纰夌礆 schemaVersion String @default("tpkg-0.1") summary String? @db.Text createdAt DateTime @default(now()) - // 幂等/并发:同一个 package 的同一个 rev 只能有一条 + // 楠炲倻鐡?楠炶泛褰傞敍姘倱娑撯偓娑?package 閻ㄥ嫬鎮撴稉鈧稉?rev 閸欘亣鍏橀張澶夌閺? @@unique([packageId, rev]) @@unique([packageId, revisionHash]) @@index([packageId, createdAt]) @@ -347,7 +347,7 @@ enum TaskPackageStatus { } // ============================================ -// 系统配置 +// 缁崵绮洪柊宥囩枂 // ============================================ model SystemConfig { @@ -356,3 +356,120 @@ model SystemConfig { updatedAt DateTime @updatedAt } + + +model ArtifactStoreRecord { + id String @id @default(uuid()) + + schema String + + packageId String + revisionId String? + revisionHash String? + + bundleHash String @db.VarChar(64) + payload Json + createdAt DateTime @default(now()) @db.Timestamptz(6) + + @@unique([packageId, bundleHash]) + @@index([packageId]) + @@index([revisionId]) + @@index([revisionHash]) +} + +model RevisionNode { + revisionHash String @id @db.Char(64) + + packageId String + parentRevisionHash String? @db.Char(64) + + author String + message String + createdBy String + timestamp DateTime + source String + + metadata Json + + createdAt DateTime @default(now()) + + parent RevisionNode? @relation("RevisionParent", fields: [parentRevisionHash], references: [revisionHash], onDelete: SetNull) + children RevisionNode[] @relation("RevisionParent") + + artifacts RevisionArtifact[] + executions ExecutionRecord[] + + @@index([packageId]) + @@index([parentRevisionHash]) + @@map("revision_nodes") +} + +model RevisionArtifact { + revisionHash String @db.Char(64) + bundleHash String @db.Char(64) + role String + + revision RevisionNode @relation(fields: [revisionHash], references: [revisionHash], onDelete: Cascade) + + @@id([revisionHash, bundleHash]) + @@index([bundleHash]) + @@map("revision_artifacts") +} + +model ExecutionRecord { + executionId String @id @default(uuid()) @db.Uuid + + packageId String + revisionHash String @db.Char(64) + + provider String + model String + + promptHash String @db.Char(64) + + parameters Json @db.JsonB + + resultHash String @db.Char(64) + + status String + + startedAt DateTime @db.Timestamptz(6) + finishedAt DateTime @db.Timestamptz(6) + + createdAt DateTime @default(now()) @db.Timestamptz(6) + + revision RevisionNode @relation(fields: [revisionHash], references: [revisionHash], onDelete: Restrict) + + inputs ExecutionInput[] + outputs ExecutionOutput[] + + @@index([packageId]) + @@index([revisionHash]) + @@index([promptHash]) + @@index([status]) + @@map("execution_records") +} + +model ExecutionInput { + executionId String @db.Uuid + bundleHash String @db.Char(64) + role String + + execution ExecutionRecord @relation(fields: [executionId], references: [executionId], onDelete: Cascade) + + @@id([executionId, bundleHash]) + @@index([bundleHash]) + @@map("execution_inputs") +} + +model ExecutionOutput { + executionId String @db.Uuid + bundleHash String @db.Char(64) + role String + + execution ExecutionRecord @relation(fields: [executionId], references: [executionId], onDelete: Cascade) + + @@id([executionId, bundleHash]) + @@index([bundleHash]) + @@map("execution_outputs") +} diff --git a/server/src/api/__tests__/taskPackages.bundle.build.api.test.ts b/server/src/api/__tests__/taskPackages.bundle.build.api.test.ts new file mode 100644 index 0000000..b3a79ca --- /dev/null +++ b/server/src/api/__tests__/taskPackages.bundle.build.api.test.ts @@ -0,0 +1,233 @@ +import express from 'express'; +import request from 'supertest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const mockState = vi.hoisted(() => ({ + buildArtifactBundleV1: vi.fn(), +})); + +vi.mock('../../middleware', async () => { + const actual = await vi.importActual('../../middleware'); + return { + ...actual, + requireAuth: (req: { userId?: string }, _res: unknown, next: () => void) => { + req.userId = 'test-user-id'; + next(); + }, + }; +}); + +vi.mock('../../services/task-package.service', () => { + class MockTaskPackageService {} + return { TaskPackageService: MockTaskPackageService }; +}); + +vi.mock('../../services/transfer-package.service', () => { + class MockTransferPackageService {} + return { TransferPackageService: MockTransferPackageService }; +}); + +vi.mock('../../services/artifact-bundle.service', () => { + class MockArtifactBundleService { + buildArtifactBundleV1 = mockState.buildArtifactBundleV1; + } + + return { ArtifactBundleService: MockArtifactBundleService }; +}); + +import taskPackagesRouter from '../taskPackages'; + +function buildApp() { + const app = express(); + app.use(express.json({ limit: '1mb' })); + app.use('/api/v1/task-packages', taskPackagesRouter); + return app; +} + +function buildArtifactBundle() { + return { + schema: 'artifact-bundle-1', + identity: { + packageId: '11111111-1111-4111-8111-111111111111', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + }, + artifacts: { + transferPackageV1: { + schema: 'transfer-package-1', + identity: { + packageId: '11111111-1111-4111-8111-111111111111', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + transferHash: 'a'.repeat(64), + }, + lineageBindingV1: { + schema: 'lineage-binding-1', + identity: { + packageId: '11111111-1111-4111-8111-111111111111', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + transfer: { schema: 'transfer-package-1', transferHash: 'a'.repeat(64) }, + closure: null, + execution: null, + handoff: null, + }, + diagnostics: { missing: ['closure', 'execution', 'handoff'], notes: [] }, + createdAt: null, + lineageHash: 'b'.repeat(64), + }, + handoffRecordV1: { + schema: 'handoff-record-1', + identity: { + packageId: '11111111-1111-4111-8111-111111111111', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + transfer: { schema: 'transfer-package-1', transferHash: 'a'.repeat(64) }, + handoffHash: 'c'.repeat(64), + createdAt: null, + lineageBindingV1: { + schema: 'lineage-binding-1', + identity: { + packageId: '11111111-1111-4111-8111-111111111111', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + transfer: { schema: 'transfer-package-1', transferHash: 'a'.repeat(64) }, + closure: null, + execution: null, + handoff: null, + }, + diagnostics: { missing: ['closure', 'execution', 'handoff'], notes: [] }, + createdAt: null, + lineageHash: 'b'.repeat(64), + }, + }, + closureContractV1: null, + }, + diagnostics: { + invariants: [ + { code: 'INV_TRANSFER_HASH_MATCH_LINEAGE', ok: true, message: 'Transfer hash matches lineage binding' }, + { code: 'INV_EMBEDDED_LINEAGE_HASH_MATCH_TOP', ok: true, message: 'Embedded lineage hash matches top-level lineage' }, + { code: 'INV_NO_HANDOFF_BINDING_IN_LINEAGE', ok: true, message: 'Lineage has no handoff binding' }, + { code: 'INV_JSON_SAFE', ok: true, message: 'Artifact bundle is JSON-safe' }, + ], + notes: [], + }, + createdAt: null, + bundleHash: 'd'.repeat(64), + }; +} + +afterEach(() => { + mockState.buildArtifactBundleV1.mockReset(); +}); + +describe('taskPackages bundle build API', () => { + const app = buildApp(); + const routePackageId = '11111111-1111-4111-8111-111111111111'; + const body = { + transferPackageV1: { + schema: 'transfer-package-1', + identity: { packageId: routePackageId }, + transferHash: 'a'.repeat(64), + }, + lineageBindingV1: { + schema: 'lineage-binding-1', + identity: { packageId: routePackageId }, + lineageHash: 'b'.repeat(64), + }, + handoffRecordV1: { + schema: 'handoff-record-1', + identity: { packageId: routePackageId }, + handoffHash: 'c'.repeat(64), + }, + }; + + it('returns artifactBundleV1 with a 64-lower-hex bundleHash', async () => { + mockState.buildArtifactBundleV1.mockReturnValueOnce(buildArtifactBundle()); + + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/build`) + .send(body); + + expect(response.status).toBe(200); + expect(response.body.artifactBundleV1.schema).toBe('artifact-bundle-1'); + expect(response.body.artifactBundleV1.bundleHash).toMatch(/^[0-9a-f]{64}$/); + }); + + it('returns identical JSON for the same request twice', async () => { + const result = buildArtifactBundle(); + mockState.buildArtifactBundleV1.mockReturnValueOnce(result).mockReturnValueOnce(result); + + const first = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/build`) + .send(body); + const second = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/build`) + .send(body); + + expect(first.status).toBe(200); + expect(second.status).toBe(200); + expect(first.body).toEqual(second.body); + }); + + it('returns deterministic invalid input for malformed requests', async () => { + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/build`) + .send({}); + + expect(response.status).toBe(400); + expect(response.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid bundle build request', + }, + }); + }); + + it('returns deterministic invalid input for route or artifact package mismatches', async () => { + const artifactMismatch = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/build`) + .send({ + ...body, + handoffRecordV1: { + schema: 'handoff-record-1', + identity: { packageId: '22222222-2222-4222-8222-222222222222' }, + handoffHash: 'c'.repeat(64), + }, + }); + + expect(artifactMismatch.status).toBe(400); + expect(artifactMismatch.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid bundle build request', + }, + }); + + const routeMismatch = await request(app) + .post('/api/v1/task-packages/33333333-3333-4333-8333-333333333333/bundle/build') + .send(body); + + expect(routeMismatch.status).toBe(400); + expect(routeMismatch.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid bundle build request', + }, + }); + expect(mockState.buildArtifactBundleV1).not.toHaveBeenCalled(); + }); +}); diff --git a/server/src/api/__tests__/taskPackages.bundle.store.api.test.ts b/server/src/api/__tests__/taskPackages.bundle.store.api.test.ts new file mode 100644 index 0000000..2700546 --- /dev/null +++ b/server/src/api/__tests__/taskPackages.bundle.store.api.test.ts @@ -0,0 +1,207 @@ +import express from 'express'; +import request from 'supertest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const mockState = vi.hoisted(() => ({ + saveBundleV1: vi.fn(), + getBundleV1: vi.fn(), + verifyStoredBundleV1: vi.fn(), +})); + +vi.mock('../../middleware', async () => { + const actual = await vi.importActual('../../middleware'); + return { + ...actual, + requireAuth: (req: { userId?: string }, _res: unknown, next: () => void) => { + req.userId = 'test-user-id'; + next(); + }, + }; +}); + +vi.mock('../../services/task-package.service', () => { + class MockTaskPackageService {} + return { TaskPackageService: MockTaskPackageService }; +}); + +vi.mock('../../services/transfer-package.service', () => { + class MockTransferPackageService {} + return { TransferPackageService: MockTransferPackageService }; +}); + +vi.mock('../../services/artifact-bundle.service', () => { + class MockArtifactBundleService {} + return { ArtifactBundleService: MockArtifactBundleService }; +}); + +vi.mock('../../services/artifact-store.service', () => { + class MockArtifactStoreService { + saveBundleV1 = mockState.saveBundleV1; + getBundleV1 = mockState.getBundleV1; + verifyStoredBundleV1 = mockState.verifyStoredBundleV1; + } + + return { ArtifactStoreService: MockArtifactStoreService }; +}); + +import taskPackagesRouter from '../taskPackages'; + +function buildApp() { + const app = express(); + app.use(express.json({ limit: '1mb' })); + app.use('/api/v1/task-packages', taskPackagesRouter); + return app; +} + +function buildArtifactBundle() { + return { + schema: 'artifact-bundle-1', + identity: { + packageId: '11111111-1111-4111-8111-111111111111', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + }, + artifacts: { + transferPackageV1: {}, + lineageBindingV1: {}, + handoffRecordV1: {}, + closureContractV1: null, + }, + diagnostics: { + invariants: [], + notes: [], + }, + createdAt: null, + bundleHash: 'd'.repeat(64), + }; +} + +function buildStoreRecord(createdAt: string | null) { + return { + schema: 'artifact-store-record-1', + identity: { + packageId: '11111111-1111-4111-8111-111111111111', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + }, + bundleHash: 'd'.repeat(64), + artifactBundleV1: buildArtifactBundle(), + createdAt, + diagnostics: { + notes: [], + }, + storeHash: 'e'.repeat(64), + }; +} + +afterEach(() => { + mockState.saveBundleV1.mockReset(); + mockState.getBundleV1.mockReset(); + mockState.verifyStoredBundleV1.mockReset(); +}); + +describe('taskPackages bundle store API', () => { + const app = buildApp(); + const routePackageId = '11111111-1111-4111-8111-111111111111'; + const body = { + artifactBundleV1: buildArtifactBundle(), + }; + + it('stores a bundle deterministically with the expected response shape', async () => { + const result = { artifactStoreRecordV1: buildStoreRecord(null) }; + mockState.saveBundleV1.mockResolvedValueOnce(result).mockResolvedValueOnce(result); + + const first = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/store`) + .send(body); + const second = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/store`) + .send(body); + + expect(first.status).toBe(200); + expect(second.status).toBe(200); + expect(first.body).toEqual(second.body); + expect(first.body.artifactStoreRecordV1.storeHash).toMatch(/^[0-9a-f]{64}$/); + }); + + it('returns deterministic invalid input for malformed store requests and route mismatch', async () => { + const invalid = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/store`) + .send({}); + + expect(invalid.status).toBe(400); + expect(invalid.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid bundle store request', + }, + }); + + const mismatch = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/store`) + .send({ + artifactBundleV1: { + ...buildArtifactBundle(), + identity: { + packageId: '22222222-2222-4222-8222-222222222222', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + }, + }, + }); + + expect(mismatch.status).toBe(400); + expect(mismatch.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid bundle store request', + }, + }); + expect(mockState.saveBundleV1).not.toHaveBeenCalled(); + }); + + it('verifies stored bundles with 200 mismatch semantics and 200 not-found semantics', async () => { + mockState.verifyStoredBundleV1 + .mockResolvedValueOnce({ ok: true, recomputedHash: 'e'.repeat(64), matches: true }) + .mockResolvedValueOnce({ ok: true, recomputedHash: 'f'.repeat(64), matches: false }) + .mockResolvedValueOnce(null); + + const match = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/store/${'d'.repeat(64)}/verify`) + .send({}); + const mismatch = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/store/${'d'.repeat(64)}/verify`) + .send({}); + const notFound = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/store/${'d'.repeat(64)}/verify`) + .send({}); + + expect(match.status).toBe(200); + expect(match.body).toEqual({ ok: true, recomputedHash: 'e'.repeat(64), matches: true }); + expect(mismatch.status).toBe(200); + expect(mismatch.body).toEqual({ ok: true, recomputedHash: 'f'.repeat(64), matches: false }); + expect(notFound.status).toBe(200); + expect(notFound.body).toEqual({ ok: false }); + }); + + it('returns deterministic invalid input for bundle store verify failures', async () => { + mockState.verifyStoredBundleV1.mockRejectedValueOnce(new Error('boom')); + + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/store/${'d'.repeat(64)}/verify`) + .send({}); + + expect(response.status).toBe(400); + expect(response.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid bundle store verify request', + }, + }); + }); +}); + + diff --git a/server/src/api/__tests__/taskPackages.bundle.verify.api.test.ts b/server/src/api/__tests__/taskPackages.bundle.verify.api.test.ts new file mode 100644 index 0000000..3848845 --- /dev/null +++ b/server/src/api/__tests__/taskPackages.bundle.verify.api.test.ts @@ -0,0 +1,140 @@ +import express from 'express'; +import request from 'supertest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const mockState = vi.hoisted(() => ({ + verifyArtifactBundleV1: vi.fn(), +})); + +vi.mock('../../middleware', async () => { + const actual = await vi.importActual('../../middleware'); + return { + ...actual, + requireAuth: (req: { userId?: string }, _res: unknown, next: () => void) => { + req.userId = 'test-user-id'; + next(); + }, + }; +}); + +vi.mock('../../services/task-package.service', () => { + class MockTaskPackageService {} + return { TaskPackageService: MockTaskPackageService }; +}); + +vi.mock('../../services/transfer-package.service', () => { + class MockTransferPackageService {} + return { TransferPackageService: MockTransferPackageService }; +}); + +vi.mock('../../services/artifact-bundle.service', () => { + class MockArtifactBundleService { + verifyArtifactBundleV1 = mockState.verifyArtifactBundleV1; + } + + return { ArtifactBundleService: MockArtifactBundleService }; +}); + +import taskPackagesRouter from '../taskPackages'; + +function buildApp() { + const app = express(); + app.use(express.json({ limit: '1mb' })); + app.use('/api/v1/task-packages', taskPackagesRouter); + return app; +} + +afterEach(() => { + mockState.verifyArtifactBundleV1.mockReset(); +}); + +describe('taskPackages bundle verify API', () => { + const app = buildApp(); + const routePackageId = '11111111-1111-4111-8111-111111111111'; + const body = { + artifactBundleV1: { + schema: 'artifact-bundle-1', + identity: { + packageId: routePackageId, + }, + bundleHash: 'd'.repeat(64), + }, + }; + + it('returns 200 with matches=true for a valid bundle', async () => { + mockState.verifyArtifactBundleV1.mockReturnValueOnce({ + ok: true, + recomputedHash: 'd'.repeat(64), + matches: true, + }); + + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/verify`) + .send(body); + + expect(response.status).toBe(200); + expect(response.body).toEqual({ + ok: true, + recomputedHash: 'd'.repeat(64), + matches: true, + }); + }); + + it('returns 200 with matches=false when bundleHash is mutated', async () => { + mockState.verifyArtifactBundleV1.mockReturnValueOnce({ + ok: true, + recomputedHash: 'e'.repeat(64), + matches: false, + }); + + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/verify`) + .send(body); + + expect(response.status).toBe(200); + expect(response.body).toEqual({ + ok: true, + recomputedHash: 'e'.repeat(64), + matches: false, + }); + }); + + it('returns deterministic invalid input for malformed requests', async () => { + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/verify`) + .send({}); + + expect(response.status).toBe(400); + expect(response.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid bundle verify request', + }, + }); + }); + + it('returns deterministic invalid input for route mismatches', async () => { + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/bundle/verify`) + .send({ + artifactBundleV1: { + schema: 'artifact-bundle-1', + identity: { + packageId: '22222222-2222-4222-8222-222222222222', + }, + bundleHash: 'd'.repeat(64), + }, + }); + + expect(response.status).toBe(400); + expect(response.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid bundle verify request', + }, + }); + expect(mockState.verifyArtifactBundleV1).not.toHaveBeenCalled(); + }); +}); diff --git a/server/src/api/__tests__/taskPackages.handoff.verify.api.test.ts b/server/src/api/__tests__/taskPackages.handoff.verify.api.test.ts new file mode 100644 index 0000000..58c5ace --- /dev/null +++ b/server/src/api/__tests__/taskPackages.handoff.verify.api.test.ts @@ -0,0 +1,153 @@ +import express from 'express'; +import request from 'supertest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const mockState = vi.hoisted(() => ({ + verifyHandoffRecordV1: vi.fn(), +})); + +vi.mock('../../middleware', async () => { + const actual = await vi.importActual('../../middleware'); + return { + ...actual, + requireAuth: (req: { userId?: string }, _res: unknown, next: () => void) => { + req.userId = 'test-user-id'; + next(); + }, + }; +}); + +vi.mock('../../services/task-package.service', () => { + class MockTaskPackageService {} + return { TaskPackageService: MockTaskPackageService }; +}); + +vi.mock('../../services/transfer-package.service', () => { + class MockTransferPackageService { + verifyHandoffRecordV1 = mockState.verifyHandoffRecordV1; + } + + return { TransferPackageService: MockTransferPackageService }; +}); + +import taskPackagesRouter from '../taskPackages'; + +function buildApp() { + const app = express(); + app.use(express.json({ limit: '1mb' })); + app.use('/api/v1/task-packages', taskPackagesRouter); + return app; +} + +afterEach(() => { + mockState.verifyHandoffRecordV1.mockReset(); +}); + +describe('taskPackages handoff verify API', () => { + const app = buildApp(); + const routePackageId = '11111111-1111-4111-8111-111111111111'; + const body = { + handoffRecordV1: { + schema: 'handoff-record-1', + identity: { + packageId: routePackageId, + }, + handoffHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + }, + }; + + it('returns 200 for a valid handoff record with matches=true', async () => { + mockState.verifyHandoffRecordV1.mockReturnValueOnce({ + ok: true, + recomputedHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + matches: true, + }); + + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/handoff/verify`) + .send(body); + + expect(response.status).toBe(200); + expect(response.body).toEqual({ + ok: true, + recomputedHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + matches: true, + }); + }); + + it('returns 200 with matches=false when the handoff hash does not match', async () => { + mockState.verifyHandoffRecordV1.mockReturnValueOnce({ + ok: true, + recomputedHash: 'fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210', + matches: false, + }); + + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/handoff/verify`) + .send(body); + + expect(response.status).toBe(200); + expect(response.body).toEqual({ + ok: true, + recomputedHash: 'fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210', + matches: false, + }); + }); + + it('returns deterministic invalid input for malformed requests and route mismatches', async () => { + const invalidShape = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/handoff/verify`) + .send({}); + + expect(invalidShape.status).toBe(400); + expect(invalidShape.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid handoff verify request', + }, + }); + + const routeMismatch = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/handoff/verify`) + .send({ + handoffRecordV1: { + schema: 'handoff-record-1', + identity: { + packageId: '22222222-2222-4222-8222-222222222222', + }, + handoffHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + }, + }); + + expect(routeMismatch.status).toBe(400); + expect(routeMismatch.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid handoff verify request', + }, + }); + expect(mockState.verifyHandoffRecordV1).toHaveBeenCalledTimes(0); + }); + + it('returns identical output for the same valid input twice', async () => { + const result = { + ok: true, + recomputedHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + matches: true, + }; + mockState.verifyHandoffRecordV1.mockReturnValueOnce(result).mockReturnValueOnce(result); + + const first = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/handoff/verify`) + .send(body); + const second = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/handoff/verify`) + .send(body); + + expect(first.status).toBe(200); + expect(second.status).toBe(200); + expect(first.body).toEqual(second.body); + }); +}); diff --git a/server/src/api/__tests__/taskPackages.lineage.verify.api.test.ts b/server/src/api/__tests__/taskPackages.lineage.verify.api.test.ts new file mode 100644 index 0000000..4106bb9 --- /dev/null +++ b/server/src/api/__tests__/taskPackages.lineage.verify.api.test.ts @@ -0,0 +1,133 @@ +import express from 'express'; +import request from 'supertest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const mockState = vi.hoisted(() => ({ + verifyLineageBindingV1: vi.fn(), +})); + +vi.mock('../../middleware', async () => { + const actual = await vi.importActual('../../middleware'); + return { + ...actual, + requireAuth: (req: { userId?: string }, _res: unknown, next: () => void) => { + req.userId = 'test-user-id'; + next(); + }, + }; +}); + +vi.mock('../../services/task-package.service', () => { + class MockTaskPackageService {} + return { TaskPackageService: MockTaskPackageService }; +}); + +vi.mock('../../services/transfer-package.service', () => { + class MockTransferPackageService { + verifyLineageBindingV1 = mockState.verifyLineageBindingV1; + } + + return { TransferPackageService: MockTransferPackageService }; +}); + +import taskPackagesRouter from '../taskPackages'; + +function buildApp() { + const app = express(); + app.use(express.json({ limit: '1mb' })); + app.use('/api/v1/task-packages', taskPackagesRouter); + return app; +} + +afterEach(() => { + mockState.verifyLineageBindingV1.mockReset(); +}); + +describe('taskPackages lineage verify API', () => { + const app = buildApp(); + const routePackageId = '11111111-1111-4111-8111-111111111111'; + const body = { + lineageBindingV1: { + schema: 'lineage-binding-1', + identity: { + packageId: routePackageId, + }, + lineageHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + }, + }; + + it('returns 200 for a valid lineage binding with matches=true', async () => { + mockState.verifyLineageBindingV1.mockReturnValueOnce({ + ok: true, + recomputedHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + matches: true, + }); + + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/lineage/verify`) + .send(body); + + expect(response.status).toBe(200); + expect(response.body).toEqual({ + ok: true, + recomputedHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + matches: true, + }); + }); + + it('returns 200 with matches=false when the lineage hash does not match', async () => { + mockState.verifyLineageBindingV1.mockReturnValueOnce({ + ok: true, + recomputedHash: 'fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210', + matches: false, + }); + + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/lineage/verify`) + .send(body); + + expect(response.status).toBe(200); + expect(response.body).toEqual({ + ok: true, + recomputedHash: 'fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210', + matches: false, + }); + }); + + it('returns deterministic invalid input for malformed or mismatched lineage bindings', async () => { + const invalidShape = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/lineage/verify`) + .send({}); + + expect(invalidShape.status).toBe(400); + expect(invalidShape.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid lineage verify request', + }, + }); + + const mismatch = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/lineage/verify`) + .send({ + lineageBindingV1: { + schema: 'lineage-binding-1', + identity: { + packageId: '22222222-2222-4222-8222-222222222222', + }, + lineageHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + }, + }); + + expect(mismatch.status).toBe(400); + expect(mismatch.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid lineage verify request', + }, + }); + expect(mockState.verifyLineageBindingV1).toHaveBeenCalledTimes(0); + }); +}); diff --git a/server/src/api/__tests__/taskPackages.transfer.ingest.api.test.ts b/server/src/api/__tests__/taskPackages.transfer.ingest.api.test.ts new file mode 100644 index 0000000..b60d839 --- /dev/null +++ b/server/src/api/__tests__/taskPackages.transfer.ingest.api.test.ts @@ -0,0 +1,288 @@ +import express from 'express'; +import request from 'supertest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; +import type { HandoffRecordV1 } from '../../services/handoff-record-v1'; +import type { LineageBindingV1 } from '../../services/lineage-binding-v1'; + +const mockState = vi.hoisted(() => ({ + ingestTransferPackageV1: vi.fn(), +})); + +vi.mock('../../middleware', async () => { + const actual = await vi.importActual('../../middleware'); + return { + ...actual, + requireAuth: (req: { userId?: string }, _res: unknown, next: () => void) => { + req.userId = 'test-user-id'; + next(); + }, + }; +}); + +vi.mock('../../services/task-package.service', () => { + class MockTaskPackageService {} + return { TaskPackageService: MockTaskPackageService }; +}); + +vi.mock('../../services/transfer-package.service', () => { + class MockTransferPackageService { + ingestTransferPackageV1 = mockState.ingestTransferPackageV1; + } + + return { TransferPackageService: MockTransferPackageService }; +}); + +import taskPackagesRouter from '../taskPackages'; + +function buildApp() { + const app = express(); + app.use(express.json({ limit: '1mb' })); + app.use('/api/v1/task-packages', taskPackagesRouter); + return app; +} + +function buildLineageBinding(transferHash: string, createdAt: string | null): LineageBindingV1 { + return { + schema: 'lineage-binding-1', + identity: { + packageId: '11111111-1111-4111-8111-111111111111', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + transfer: { + schema: 'transfer-package-1', + transferHash, + }, + closure: null, + execution: null, + handoff: null, + }, + diagnostics: { + missing: ['closure', 'execution', 'handoff'], + notes: [], + }, + createdAt, + lineageHash: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', + }; +} + +function buildHandoffRecord(createdAt: string | null): HandoffRecordV1 { + const transferHash = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef'; + return { + schema: 'handoff-record-1', + transfer: { + schema: 'transfer-package-1', + transferHash, + }, + identity: { + packageId: '11111111-1111-4111-8111-111111111111', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + trunk: { + intent: { primary: null, successCriteria: [], nonGoals: [] }, + stateDigest: { + facts: [], + decisions: [], + constraints: [], + risks: [], + assumptions: [], + openLoops: [], + }, + }, + continuation: { + nextActions: [], + validationChecklist: [], + }, + diagnostics: { + verified: true, + verification: { + transferHashRecomputed: transferHash, + matchesProvidedHash: true, + }, + }, + lineageBindingV1: buildLineageBinding(transferHash, createdAt), + createdAt, + handoffHash: 'fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210', + }; +} + +afterEach(() => { + mockState.ingestTransferPackageV1.mockReset(); +}); + +describe('taskPackages transfer ingest API', () => { + const app = buildApp(); + const routePackageId = '11111111-1111-4111-8111-111111111111'; + const body = { + transferPackageV1: { + schema: 'transfer-package-1', + identity: { + packageId: routePackageId, + }, + transferHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + }, + }; + + it('returns identical output with embedded lineage when createdAt is omitted', async () => { + const handoff = buildHandoffRecord(null); + mockState.ingestTransferPackageV1.mockReturnValueOnce(handoff).mockReturnValueOnce(handoff); + + const first = await request(app).post(`/api/v1/task-packages/${routePackageId}/transfer/ingest`).send(body); + const second = await request(app).post(`/api/v1/task-packages/${routePackageId}/transfer/ingest`).send(body); + + expect(first.status).toBe(200); + expect(second.status).toBe(200); + expect(first.body).toEqual(second.body); + expect(first.body.handoffRecordV1.lineageBindingV1).toBeDefined(); + expect(first.body.lineageBindingV1).toBeDefined(); + expect(first.body.handoffRecordV1.lineageBindingV1.lineageHash).toBe(first.body.lineageBindingV1.lineageHash); + expect(first.body.handoffRecordV1.lineageBindingV1.bindings.transfer.transferHash).toBe(body.transferPackageV1.transferHash); + expect(first.body.lineageBindingV1.bindings.transfer.transferHash).toBe(body.transferPackageV1.transferHash); + }); + + it('keeps handoffHash and lineageHash stable when createdAt differs', async () => { + const firstHandoff = buildHandoffRecord('2025-01-01T00:00:00.000Z'); + const secondHandoff = buildHandoffRecord('2026-01-01T00:00:00.000Z'); + mockState.ingestTransferPackageV1.mockReturnValueOnce(firstHandoff).mockReturnValueOnce(secondHandoff); + + const first = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/transfer/ingest`) + .send({ ...body, createdAt: '2025-01-01T00:00:00.000Z' }); + const second = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/transfer/ingest`) + .send({ ...body, createdAt: '2026-01-01T00:00:00.000Z' }); + + expect(first.status).toBe(200); + expect(second.status).toBe(200); + expect(first.body.handoffRecordV1.createdAt).toBe('2025-01-01T00:00:00.000Z'); + expect(second.body.handoffRecordV1.createdAt).toBe('2026-01-01T00:00:00.000Z'); + expect(first.body.handoffRecordV1.handoffHash).toBe(second.body.handoffRecordV1.handoffHash); + expect(first.body.handoffRecordV1.lineageBindingV1.createdAt).toBe('2025-01-01T00:00:00.000Z'); + expect(second.body.handoffRecordV1.lineageBindingV1.createdAt).toBe('2026-01-01T00:00:00.000Z'); + expect(first.body.handoffRecordV1.lineageBindingV1.lineageHash).toBe(second.body.handoffRecordV1.lineageBindingV1.lineageHash); + expect(first.body.lineageBindingV1.lineageHash).toBe(second.body.lineageBindingV1.lineageHash); + expect(first.body.handoffRecordV1.lineageBindingV1.bindings.transfer.transferHash).toBe(body.transferPackageV1.transferHash); + expect(second.body.handoffRecordV1.lineageBindingV1.bindings.transfer.transferHash).toBe(body.transferPackageV1.transferHash); + }); + + it('passes null bindings when include flags are set without bindings payload', async () => { + const handoff = buildHandoffRecord(null); + mockState.ingestTransferPackageV1.mockReturnValueOnce(handoff); + + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/transfer/ingest`) + .send({ + ...body, + include: { + closureContractV1: true, + applyReportV1Hash: true, + executionRecordV1Hash: true, + }, + }); + + expect(response.status).toBe(200); + expect(mockState.ingestTransferPackageV1).toHaveBeenCalledWith({ + transferPackageV1: body.transferPackageV1, + include: { + closureContractV1: true, + applyReportV1Hash: true, + executionRecordV1Hash: true, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + createdAt: null, + }); + expect(response.body.handoffRecordV1.lineageBindingV1.lineageHash).toBe(response.body.lineageBindingV1.lineageHash); + }); + + it('passes explicit bindings through and preserves the transfer hash invariant', async () => { + const handoff = buildHandoffRecord(null); + handoff.bindings.closureContractV1 = { + schema: 'closure-contract-1', + proposedHash: 'proposed-hash', + acceptedHash: 'accepted-hash', + }; + handoff.bindings.applyReportV1Hash = 'apply-hash'; + handoff.bindings.executionRecordV1Hash = 'execution-hash'; + mockState.ingestTransferPackageV1.mockReturnValueOnce(handoff); + + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/transfer/ingest`) + .send({ + ...body, + include: { + closureContractV1: true, + applyReportV1Hash: true, + executionRecordV1Hash: true, + }, + bindings: { + closureContractV1: { + schema: 'closure-contract-1', + proposedHash: 'proposed-hash', + acceptedHash: 'accepted-hash', + }, + applyReportV1Hash: 'apply-hash', + executionRecordV1Hash: 'execution-hash', + }, + }); + + expect(response.status).toBe(200); + expect(mockState.ingestTransferPackageV1).toHaveBeenCalledWith({ + transferPackageV1: body.transferPackageV1, + include: { + closureContractV1: true, + applyReportV1Hash: true, + executionRecordV1Hash: true, + }, + bindings: { + closureContractV1: { + schema: 'closure-contract-1', + proposedHash: 'proposed-hash', + acceptedHash: 'accepted-hash', + }, + applyReportV1Hash: 'apply-hash', + executionRecordV1Hash: 'execution-hash', + }, + createdAt: null, + }); + expect(response.body).toEqual({ handoffRecordV1: handoff, lineageBindingV1: handoff.lineageBindingV1 }); + expect(response.body.handoffRecordV1.lineageBindingV1.bindings.transfer.transferHash).toBe(body.transferPackageV1.transferHash); + expect(response.body.lineageBindingV1.bindings.transfer.transferHash).toBe(body.transferPackageV1.transferHash); + }); + + it('rejects a route package mismatch deterministically', async () => { + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/transfer/ingest`) + .send({ + transferPackageV1: { + schema: 'transfer-package-1', + identity: { + packageId: '22222222-2222-4222-8222-222222222222', + }, + transferHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + }, + }); + + expect(response.status).toBe(400); + expect(response.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid transfer ingest request', + }, + }); + expect(mockState.ingestTransferPackageV1).not.toHaveBeenCalled(); + }); +}); diff --git a/server/src/api/__tests__/taskPackages.transfer.v1.api.test.ts b/server/src/api/__tests__/taskPackages.transfer.v1.api.test.ts new file mode 100644 index 0000000..c9af2d1 --- /dev/null +++ b/server/src/api/__tests__/taskPackages.transfer.v1.api.test.ts @@ -0,0 +1,261 @@ +import express from 'express'; +import request from 'supertest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; +import type { LineageBindingV1 } from '../../services/lineage-binding-v1'; +import type { TransferPackageV1 } from '../../services/transfer-package-v1'; + +const mockState = vi.hoisted(() => ({ + createTransferPackage: vi.fn(), + buildLineageBindingForTransferFlowV1: vi.fn(), +})); + +vi.mock('../../middleware', async () => { + const actual = await vi.importActual('../../middleware'); + return { + ...actual, + requireAuth: (req: { userId?: string }, _res: unknown, next: () => void) => { + req.userId = 'test-user-id'; + next(); + }, + }; +}); + +vi.mock('../../services/task-package.service', () => { + class MockTaskPackageService {} + + return { TaskPackageService: MockTaskPackageService }; +}); + +vi.mock('../../services/transfer-package.service', () => { + class MockTransferPackageService { + createTransferPackage = mockState.createTransferPackage; + buildLineageBindingForTransferFlowV1 = mockState.buildLineageBindingForTransferFlowV1; + } + + return { TransferPackageService: MockTransferPackageService }; +}); + +import taskPackagesRouter from '../taskPackages'; + +function buildApp() { + const app = express(); + app.use(express.json({ limit: '1mb' })); + app.use('/api/v1/task-packages', taskPackagesRouter); + return app; +} + +function buildTransferPackage(): TransferPackageV1 { + return { + schema: 'transfer-package-1', + identity: { + packageId: 'pkg-1', + revisionId: '11111111-1111-4111-8111-111111111111', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + trunk: { + intent: { + primary: null, + successCriteria: [], + nonGoals: [], + }, + stateDigest: { + facts: [], + decisions: [], + constraints: [], + risks: [], + assumptions: [], + openLoops: [], + }, + }, + continuation: { + nextActions: [], + validationChecklist: [], + }, + conflicts: [], + determinism: { + sorted: true, + domainOrder: ['facts', 'decisions', 'constraints', 'risks', 'assumptions'], + }, + transferHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + }; +} + +function buildLineageBinding(transferHash: string, createdAt: string | null): LineageBindingV1 { + return { + schema: 'lineage-binding-1', + identity: { + packageId: 'pkg-1', + revisionId: '11111111-1111-4111-8111-111111111111', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + transfer: { + schema: 'transfer-package-1', + transferHash, + }, + closure: null, + execution: null, + handoff: null, + }, + diagnostics: { + missing: ['closure', 'execution', 'handoff'], + notes: [], + }, + createdAt, + lineageHash: 'fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210', + }; +} + +afterEach(() => { + mockState.createTransferPackage.mockReset(); + mockState.buildLineageBindingForTransferFlowV1.mockReset(); +}); + +describe('taskPackages transfer v1 API', () => { + const app = buildApp(); + + it('returns schema=transfer-package-1 and additive lineageBindingV1 for a minimal request', async () => { + const transferPackageV1 = buildTransferPackage(); + const lineageBindingV1 = buildLineageBinding(transferPackageV1.transferHash, null); + mockState.createTransferPackage.mockResolvedValueOnce(transferPackageV1); + mockState.buildLineageBindingForTransferFlowV1.mockReturnValueOnce(lineageBindingV1); + + const response = await request(app) + .post('/api/v1/task-packages/11111111-1111-4111-8111-111111111111/transfer') + .send({}); + + expect(response.status).toBe(200); + expect(response.body.transferPackageV1.schema).toBe('transfer-package-1'); + expect(response.body.transferPackageV1.transferHash).toMatch(/^[0-9a-f]{64}$/); + expect(response.body.lineageBindingV1.schema).toBe('lineage-binding-1'); + expect(response.body.lineageBindingV1.lineageHash).toMatch(/^[0-9a-f]{64}$/); + expect(response.body.lineageBindingV1.createdAt).toBeNull(); + expect(response.body.lineageBindingV1.bindings.transfer.transferHash).toBe(response.body.transferPackageV1.transferHash); + expect(mockState.buildLineageBindingForTransferFlowV1).toHaveBeenCalledWith({ + transferPackageV1, + include: { + closure: false, + execution: false, + handoff: false, + }, + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + createdAt: null, + }); + }); + + it('is deterministic for the same request twice including lineageBindingV1', async () => { + const transferPackageV1 = buildTransferPackage(); + const lineageBindingV1 = buildLineageBinding(transferPackageV1.transferHash, null); + mockState.createTransferPackage + .mockResolvedValueOnce(transferPackageV1) + .mockResolvedValueOnce(transferPackageV1); + mockState.buildLineageBindingForTransferFlowV1 + .mockReturnValueOnce(lineageBindingV1) + .mockReturnValueOnce(lineageBindingV1); + + const body = { + trunk: { + intent: { + primary: 'handoff', + successCriteria: ['a'], + nonGoals: ['b'], + }, + }, + }; + + const first = await request(app) + .post('/api/v1/task-packages/11111111-1111-4111-8111-111111111111/transfer') + .send(body); + const second = await request(app) + .post('/api/v1/task-packages/11111111-1111-4111-8111-111111111111/transfer') + .send(body); + + expect(first.status).toBe(200); + expect(second.status).toBe(200); + expect(first.body).toEqual(second.body); + expect(first.body.transferPackageV1.transferHash).toBe(second.body.transferPackageV1.transferHash); + expect(first.body.lineageBindingV1.lineageHash).toBe(second.body.lineageBindingV1.lineageHash); + expect(first.body.lineageBindingV1.bindings.transfer.transferHash).toBe(first.body.transferPackageV1.transferHash); + }); + + it('binds closureContractV1 only when include.closureContractV1=true', async () => { + const bound = buildTransferPackage(); + bound.bindings.closureContractV1 = { + schema: 'closure-contract-1', + proposedHash: 'proposed-hash', + acceptedHash: 'accepted-hash', + }; + const unbound = buildTransferPackage(); + + mockState.createTransferPackage.mockResolvedValueOnce(bound).mockResolvedValueOnce(unbound); + mockState.buildLineageBindingForTransferFlowV1 + .mockReturnValueOnce(buildLineageBinding(bound.transferHash, null)) + .mockReturnValueOnce(buildLineageBinding(unbound.transferHash, null)); + + const withBinding = await request(app) + .post('/api/v1/task-packages/11111111-1111-4111-8111-111111111111/transfer') + .send({ + include: { closureContractV1: true }, + closureContractV1: { + schema: 'closure-contract-1', + proposedHash: 'proposed-hash', + acceptedHash: 'accepted-hash', + }, + }); + + const withoutBinding = await request(app) + .post('/api/v1/task-packages/11111111-1111-4111-8111-111111111111/transfer') + .send({ + include: { closureContractV1: false }, + closureContractV1: { + schema: 'closure-contract-1', + proposedHash: 'proposed-hash', + acceptedHash: 'accepted-hash', + }, + }); + + expect(withBinding.status).toBe(200); + expect(withBinding.body.transferPackageV1.bindings.closureContractV1).toEqual({ + schema: 'closure-contract-1', + proposedHash: 'proposed-hash', + acceptedHash: 'accepted-hash', + }); + + expect(withoutBinding.status).toBe(200); + expect(withoutBinding.body.transferPackageV1.bindings.closureContractV1).toBeNull(); + }); + + it('returns deterministic E_INVALID_INPUT for invalid domains in nextActions', async () => { + const response = await request(app) + .post('/api/v1/task-packages/11111111-1111-4111-8111-111111111111/transfer') + .send({ + continuation: { + nextActions: [ + { + code: 'NEXT', + message: 'Next', + domains: ['invalid-domain'], + }, + ], + }, + }); + + expect(response.status).toBe(400); + expect(response.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid domain in nextActions', + }, + }); + }); +}); diff --git a/server/src/api/__tests__/taskPackages.transfer.verify.api.test.ts b/server/src/api/__tests__/taskPackages.transfer.verify.api.test.ts new file mode 100644 index 0000000..ab36c21 --- /dev/null +++ b/server/src/api/__tests__/taskPackages.transfer.verify.api.test.ts @@ -0,0 +1,115 @@ +import express from 'express'; +import request from 'supertest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const mockState = vi.hoisted(() => ({ + verifyTransferPackageV1: vi.fn(), +})); + +vi.mock('../../middleware', async () => { + const actual = await vi.importActual('../../middleware'); + return { + ...actual, + requireAuth: (req: { userId?: string }, _res: unknown, next: () => void) => { + req.userId = 'test-user-id'; + next(); + }, + }; +}); + +vi.mock('../../services/task-package.service', () => { + class MockTaskPackageService {} + return { TaskPackageService: MockTaskPackageService }; +}); + +vi.mock('../../services/transfer-package.service', () => { + class MockTransferPackageService { + verifyTransferPackageV1 = mockState.verifyTransferPackageV1; + } + + return { TransferPackageService: MockTransferPackageService }; +}); + +import taskPackagesRouter from '../taskPackages'; + +function buildApp() { + const app = express(); + app.use(express.json({ limit: '1mb' })); + app.use('/api/v1/task-packages', taskPackagesRouter); + return app; +} + +afterEach(() => { + mockState.verifyTransferPackageV1.mockReset(); +}); + +describe('taskPackages transfer verify API', () => { + const app = buildApp(); + const routePackageId = '11111111-1111-4111-8111-111111111111'; + const body = { + transferPackageV1: { + schema: 'transfer-package-1', + identity: { + packageId: routePackageId, + }, + transferHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + }, + }; + + it('returns identical output for the same input twice', async () => { + const result = { + ok: true, + recomputedHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + matches: true, + }; + mockState.verifyTransferPackageV1.mockReturnValueOnce(result).mockReturnValueOnce(result); + + const first = await request(app).post(`/api/v1/task-packages/${routePackageId}/transfer/verify`).send(body); + const second = await request(app).post(`/api/v1/task-packages/${routePackageId}/transfer/verify`).send(body); + + expect(first.status).toBe(200); + expect(second.status).toBe(200); + expect(first.body).toEqual(second.body); + }); + + it('returns 200 with matches=false when the hash does not match', async () => { + mockState.verifyTransferPackageV1.mockReturnValueOnce({ + ok: true, + recomputedHash: 'fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210', + matches: false, + }); + + const response = await request(app).post(`/api/v1/task-packages/${routePackageId}/transfer/verify`).send(body); + + expect(response.status).toBe(200); + expect(response.body).toEqual({ + ok: true, + recomputedHash: 'fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210', + matches: false, + }); + }); + + it('rejects a route package mismatch deterministically', async () => { + const response = await request(app) + .post(`/api/v1/task-packages/${routePackageId}/transfer/verify`) + .send({ + transferPackageV1: { + schema: 'transfer-package-1', + identity: { + packageId: '22222222-2222-4222-8222-222222222222', + }, + transferHash: '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef', + }, + }); + + expect(response.status).toBe(400); + expect(response.body).toEqual({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid transfer verify request', + }, + }); + expect(mockState.verifyTransferPackageV1).not.toHaveBeenCalled(); + }); +}); diff --git a/server/src/api/taskPackages.ts b/server/src/api/taskPackages.ts index b8a3f7a..f3e4cf0 100644 --- a/server/src/api/taskPackages.ts +++ b/server/src/api/taskPackages.ts @@ -10,9 +10,15 @@ import { z } from 'zod'; import { asyncHandler, requireAuth, validate } from '../middleware'; import { idParamSchema } from '../middleware/validation'; import { TaskPackageService } from '../services/task-package.service'; +import { TransferPackageService } from '../services/transfer-package.service'; +import { ArtifactBundleService } from '../services/artifact-bundle.service'; +import { ArtifactStoreService } from '../services/artifact-store.service'; const router = Router(); const svc = new TaskPackageService(); +const transferSvc = new TransferPackageService(); +const artifactBundleSvc = new ArtifactBundleService(); +const artifactStoreSvc = new ArtifactStoreService(); const targetSchemaVersionSchema = z.enum(['tpkg-0.1', 'tpkg-0.2']); @@ -37,6 +43,8 @@ function normalizeServiceCode(code: unknown): | 'CONFLICT_RETRY_EXHAUSTED' | 'E_INVALID_INPUT' | 'E_LLM_DELTA_CONFLICT' + | 'E_TRANSFER_INVALID' + | 'E_TRANSFER_NON_JSON_SAFE' | 'UNKNOWN' { if (typeof code !== 'string') return 'UNKNOWN'; if ( @@ -46,7 +54,9 @@ function normalizeServiceCode(code: unknown): code === 'INVALID_INPUT' || code === 'CONFLICT_RETRY_EXHAUSTED' || code === 'E_INVALID_INPUT' || - code === 'E_LLM_DELTA_CONFLICT' + code === 'E_LLM_DELTA_CONFLICT' || + code === 'E_TRANSFER_INVALID' || + code === 'E_TRANSFER_NON_JSON_SAFE' ) { return code; } @@ -56,6 +66,8 @@ function normalizeServiceCode(code: unknown): function errorMessageForCode(code: ReturnType): string { if (code === 'E_INVALID_INPUT') return "llmDeltaMode must be 'best_effort' or 'strict'"; if (code === 'E_LLM_DELTA_CONFLICT') return 'LLM delta contains conflicts'; + if (code === 'E_TRANSFER_INVALID') return 'Transfer package input is invalid'; + if (code === 'E_TRANSFER_NON_JSON_SAFE') return 'Transfer package contains non JSON-safe value'; return code; } @@ -115,6 +127,273 @@ const importPackageSchema = z.object({ }); type ImportPackageBody = z.infer; +const transferDomainValues = ['facts', 'decisions', 'constraints', 'risks', 'assumptions'] as const; +type TransferDomain = (typeof transferDomainValues)[number]; +const transferDomainSet = new Set(transferDomainValues); + +const transferBodySchema = z.object({ + revisionId: z.string().uuid().optional(), + include: z + .object({ + closureContractV1: z.boolean().optional(), + applyReportV1Hash: z.boolean().optional(), + executionRecordV1Hash: z.boolean().optional(), + }) + .optional(), + closureContractV1: z + .object({ + schema: z.literal('closure-contract-1'), + proposedHash: z.string(), + acceptedHash: z.string(), + }) + .nullable() + .optional(), + applyReportV1Hash: z.string().nullable().optional(), + executionRecordV1Hash: z.string().nullable().optional(), + trunk: z + .object({ + intent: z + .object({ + primary: z.string().nullable().optional(), + successCriteria: z.array(z.string()).optional(), + nonGoals: z.array(z.string()).optional(), + }) + .optional(), + stateDigest: z + .object({ + facts: z.array(z.string()).optional(), + decisions: z.array(z.string()).optional(), + constraints: z.array(z.string()).optional(), + risks: z.array(z.string()).optional(), + assumptions: z.array(z.string()).optional(), + openLoops: z.array(z.string()).optional(), + }) + .optional(), + }) + .optional(), + continuation: z + .object({ + nextActions: z + .array( + z.object({ + code: z.string(), + message: z.string(), + expectedOutput: z.string().nullable().optional(), + domains: z.array(z.string()).optional(), + }) + ) + .optional(), + validationChecklist: z + .array( + z.object({ + code: z.string(), + message: z.string(), + severity: z.enum(['must', 'should']).optional(), + }) + ) + .optional(), + }) + .optional(), +}); +type TransferBody = z.infer; +type TransferNextActionBody = NonNullable['nextActions']>[number]; + +function sendTransferInputError(res: Response, message: 'Invalid transfer package request' | 'Invalid domain in nextActions'): void { + res.status(400).json({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message, + }, + }); +} + +function hasInvalidTransferDomain(nextActions: TransferNextActionBody[] | undefined): boolean { + if (!Array.isArray(nextActions)) return false; + for (const nextAction of nextActions) { + if (!Array.isArray(nextAction.domains)) continue; + for (const domain of nextAction.domains) { + if (!transferDomainSet.has(domain)) return true; + } + } + return false; +} + +function normalizeTransferDomains(domains: string[] | undefined): TransferDomain[] { + if (!Array.isArray(domains)) return []; + const normalized = domains.filter((domain): domain is TransferDomain => transferDomainSet.has(domain)); + return normalized; +} + +const transferVerifyBodySchema = z.object({ + transferPackageV1: z.unknown(), +}); +type TransferVerifyBody = z.infer; + +const transferIngestBodySchema = z.object({ + transferPackageV1: z.unknown(), + include: z + .object({ + closureContractV1: z.boolean().optional(), + applyReportV1Hash: z.boolean().optional(), + executionRecordV1Hash: z.boolean().optional(), + }) + .optional(), + bindings: z + .object({ + closureContractV1: z + .object({ + schema: z.literal('closure-contract-1'), + proposedHash: z.string(), + acceptedHash: z.string(), + }) + .nullable() + .optional(), + applyReportV1Hash: z.string().nullable().optional(), + executionRecordV1Hash: z.string().nullable().optional(), + }) + .optional(), + createdAt: z.string().nullable().optional(), +}); +type TransferIngestBody = z.infer; + +const lineageVerifyBodySchema = z.object({ + lineageBindingV1: z.unknown(), +}); +type LineageVerifyBody = z.infer; + +const handoffVerifyBodySchema = z.object({ + handoffRecordV1: z.unknown(), +}); +type HandoffVerifyBody = z.infer; + +const bundleBuildBodySchema = z.object({ + transferPackageV1: z.unknown(), + lineageBindingV1: z.unknown(), + handoffRecordV1: z.unknown(), + closureContractV1: z.unknown().nullable().optional(), + identity: z + .object({ + revisionId: z.string().nullable().optional(), + revisionHash: z.string().nullable().optional(), + }) + .optional(), + createdAt: z.string().nullable().optional(), + notes: z.array(z.string()).optional(), +}); +type BundleBuildBody = z.infer; + +const bundleVerifyBodySchema = z.object({ + artifactBundleV1: z.unknown(), +}); +type BundleVerifyBody = z.infer; + +const bundleStoreBodySchema = z.object({ + artifactBundleV1: z.unknown(), + createdAt: z.string().nullable().optional(), + notes: z.array(z.string()).optional(), +}); +type BundleStoreBody = z.infer; +const bundleStoreParamSchema = z.object({ + id: z.string().uuid(), + bundleHash: z.string(), +}); + +function sendTransferConsumerInputError( + res: Response, + message: 'Invalid transfer verify request' | 'Invalid transfer ingest request' +): void { + res.status(400).json({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message, + }, + }); +} + +function sendLineageInputError(res: Response): void { + res.status(400).json({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid lineage verify request', + }, + }); +} + +function sendHandoffInputError(res: Response): void { + res.status(400).json({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message: 'Invalid handoff verify request', + }, + }); +} + +function sendBundleInputError(res: Response, message: 'Invalid bundle build request' | 'Invalid bundle verify request'): void { + res.status(400).json({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message, + }, + }); +} + +function sendStoreInputError( + res: Response, + message: 'Invalid bundle store request' | 'Invalid bundle store verify request' +): void { + res.status(400).json({ + success: false, + error: { + code: 'E_INVALID_INPUT', + message, + }, + }); +} + +function isRecord(value: unknown): value is Record { + return !!value && typeof value === 'object' && !Array.isArray(value); +} + +function extractTransferBodyPackageId(value: unknown): string | null { + if (!isRecord(value)) return null; + const identity = value.identity; + if (!isRecord(identity)) return null; + return typeof identity.packageId === 'string' ? identity.packageId : null; +} + +function extractLineageBodyPackageId(value: unknown): string | null { + if (!isRecord(value)) return null; + const identity = value.identity; + if (!isRecord(identity)) return null; + return typeof identity.packageId === 'string' ? identity.packageId : null; +} + +function extractHandoffBodyPackageId(value: unknown): string | null { + if (!isRecord(value)) return null; + const identity = value.identity; + if (!isRecord(identity)) return null; + return typeof identity.packageId === 'string' ? identity.packageId : null; +} + +function extractBundleArtifactPackageId(value: unknown): string | null { + if (!isRecord(value)) return null; + const identity = value.identity; + if (!isRecord(identity)) return null; + return typeof identity.packageId === 'string' ? identity.packageId : null; +} + +function extractBundleBodyPackageId(value: unknown): string | null { + if (!isRecord(value)) return null; + const identity = value.identity; + if (!isRecord(identity)) return null; + return typeof identity.packageId === 'string' ? identity.packageId : null; +} + router.post( '/import', requireAuth, @@ -225,6 +504,395 @@ router.get( }) ); +router.post( + '/:id/transfer', + requireAuth, + validate({ params: idParamSchema }), + asyncHandler(async (req, res) => { + const userId = req.userId!; + const packageId = paramToString((req.params as { id?: string | string[] }).id); + const parsed = transferBodySchema.safeParse(req.body ?? {}); + + if (!parsed.success) { + sendTransferInputError(res, 'Invalid transfer package request'); + return; + } + + const body = parsed.data as TransferBody; + if (hasInvalidTransferDomain(body.continuation?.nextActions)) { + sendTransferInputError(res, 'Invalid domain in nextActions'); + return; + } + + try { + const transferPackageV1 = await transferSvc.createTransferPackage(userId, packageId, { + revisionId: body.revisionId, + include: { + closureContractV1: body.include?.closureContractV1 === true, + applyReportV1Hash: body.include?.applyReportV1Hash === true, + executionRecordV1Hash: body.include?.executionRecordV1Hash === true, + }, + closureContractV1: body.closureContractV1 ?? null, + applyReportV1Hash: typeof body.applyReportV1Hash === 'string' ? body.applyReportV1Hash : null, + executionRecordV1Hash: typeof body.executionRecordV1Hash === 'string' ? body.executionRecordV1Hash : null, + trunk: body.trunk + ? { + intent: body.trunk.intent + ? { + primary: body.trunk.intent.primary ?? null, + successCriteria: body.trunk.intent.successCriteria ?? [], + nonGoals: body.trunk.intent.nonGoals ?? [], + } + : undefined, + stateDigest: body.trunk.stateDigest + ? { + facts: body.trunk.stateDigest.facts ?? [], + decisions: body.trunk.stateDigest.decisions ?? [], + constraints: body.trunk.stateDigest.constraints ?? [], + risks: body.trunk.stateDigest.risks ?? [], + assumptions: body.trunk.stateDigest.assumptions ?? [], + openLoops: body.trunk.stateDigest.openLoops ?? [], + } + : undefined, + } + : undefined, + continuation: body.continuation + ? { + nextActions: body.continuation.nextActions?.map((entry) => ({ + code: entry.code, + message: entry.message, + expectedOutput: entry.expectedOutput ?? null, + domains: normalizeTransferDomains(entry.domains), + })), + validationChecklist: body.continuation.validationChecklist?.map((entry) => ({ + code: entry.code, + message: entry.message, + severity: entry.severity ?? 'should', + })), + } + : undefined, + }); + const lineageBindingV1 = transferSvc.buildLineageBindingForTransferFlowV1({ + transferPackageV1, + include: { + closure: body.include?.closureContractV1 === true, + execution: + body.include?.applyReportV1Hash === true || body.include?.executionRecordV1Hash === true, + handoff: false, + }, + closureContractV1: body.closureContractV1 ?? null, + applyReportV1Hash: typeof body.applyReportV1Hash === 'string' ? body.applyReportV1Hash : null, + executionRecordV1Hash: typeof body.executionRecordV1Hash === 'string' ? body.executionRecordV1Hash : null, + createdAt: null, + }); + + res.json({ transferPackageV1, lineageBindingV1 }); + } catch (err: unknown) { + sendServiceError(res, extractErrorCode(err)); + } + }) +); + +router.post( + '/:id/transfer/verify', + requireAuth, + validate({ params: idParamSchema }), + asyncHandler(async (req, res) => { + const packageId = paramToString((req.params as { id?: string | string[] }).id); + const parsed = transferVerifyBodySchema.safeParse(req.body ?? {}); + if (!parsed.success) { + sendTransferConsumerInputError(res, 'Invalid transfer verify request'); + return; + } + + const body = parsed.data as TransferVerifyBody; + const bodyPackageId = extractTransferBodyPackageId(body.transferPackageV1); + if (bodyPackageId !== null && bodyPackageId !== packageId) { + sendTransferConsumerInputError(res, 'Invalid transfer verify request'); + return; + } + + try { + const result = transferSvc.verifyTransferPackageV1({ + transferPackageV1: body.transferPackageV1, + }); + res.json(result); + } catch (_err: unknown) { + sendTransferConsumerInputError(res, 'Invalid transfer verify request'); + } + }) +); + +router.post( + '/:id/transfer/ingest', + requireAuth, + validate({ params: idParamSchema }), + asyncHandler(async (req, res) => { + const packageId = paramToString((req.params as { id?: string | string[] }).id); + const parsed = transferIngestBodySchema.safeParse(req.body ?? {}); + if (!parsed.success) { + sendTransferConsumerInputError(res, 'Invalid transfer ingest request'); + return; + } + + const body = parsed.data as TransferIngestBody; + const bodyPackageId = extractTransferBodyPackageId(body.transferPackageV1); + if (bodyPackageId !== null && bodyPackageId !== packageId) { + sendTransferConsumerInputError(res, 'Invalid transfer ingest request'); + return; + } + + try { + const createdAt = typeof body.createdAt === 'string' ? body.createdAt : body.createdAt === null ? null : null; + const handoffRecordV1 = transferSvc.ingestTransferPackageV1({ + transferPackageV1: body.transferPackageV1, + include: { + closureContractV1: body.include?.closureContractV1 === true, + applyReportV1Hash: body.include?.applyReportV1Hash === true, + executionRecordV1Hash: body.include?.executionRecordV1Hash === true, + }, + bindings: { + closureContractV1: body.bindings?.closureContractV1 ?? null, + applyReportV1Hash: typeof body.bindings?.applyReportV1Hash === 'string' ? body.bindings.applyReportV1Hash : null, + executionRecordV1Hash: + typeof body.bindings?.executionRecordV1Hash === 'string' ? body.bindings.executionRecordV1Hash : null, + }, + createdAt, + }); + const lineageBindingV1 = handoffRecordV1.lineageBindingV1; + res.json({ handoffRecordV1, lineageBindingV1 }); + } catch (_err: unknown) { + sendTransferConsumerInputError(res, 'Invalid transfer ingest request'); + } + }) +); + +router.post( + '/:id/bundle/build', + requireAuth, + validate({ params: idParamSchema }), + asyncHandler(async (req, res) => { + const packageId = paramToString((req.params as { id?: string | string[] }).id); + const parsed = bundleBuildBodySchema.safeParse(req.body ?? {}); + if (!parsed.success) { + sendBundleInputError(res, 'Invalid bundle build request'); + return; + } + + const body = parsed.data as BundleBuildBody; + const tpId = extractBundleArtifactPackageId(body.transferPackageV1); + const lbId = extractBundleArtifactPackageId(body.lineageBindingV1); + const hoId = extractBundleArtifactPackageId(body.handoffRecordV1); + if (tpId === null || lbId === null || hoId === null) { + sendBundleInputError(res, 'Invalid bundle build request'); + return; + } + if (tpId !== lbId || lbId !== hoId || packageId !== tpId) { + sendBundleInputError(res, 'Invalid bundle build request'); + return; + } + + try { + const artifactBundleV1 = artifactBundleSvc.buildArtifactBundleV1({ + identity: { + packageId: tpId, + revisionId: + typeof body.identity?.revisionId === 'string' + ? body.identity.revisionId + : body.identity?.revisionId === null + ? null + : null, + revisionHash: + typeof body.identity?.revisionHash === 'string' + ? body.identity.revisionHash + : body.identity?.revisionHash === null + ? null + : null, + }, + artifacts: { + transferPackageV1: body.transferPackageV1, + lineageBindingV1: body.lineageBindingV1, + handoffRecordV1: body.handoffRecordV1, + closureContractV1: body.closureContractV1 ?? null, + }, + diagnostics: { + notes: body.notes ?? [], + }, + createdAt: typeof body.createdAt === 'string' ? body.createdAt : body.createdAt === null ? null : null, + }); + res.json({ artifactBundleV1 }); + } catch (_err: unknown) { + sendBundleInputError(res, 'Invalid bundle build request'); + } + }) +); + +router.post( + '/:id/bundle/verify', + requireAuth, + validate({ params: idParamSchema }), + asyncHandler(async (req, res) => { + const packageId = paramToString((req.params as { id?: string | string[] }).id); + const parsed = bundleVerifyBodySchema.safeParse(req.body ?? {}); + if (!parsed.success) { + sendBundleInputError(res, 'Invalid bundle verify request'); + return; + } + + const body = parsed.data as BundleVerifyBody; + const bodyPackageId = extractBundleBodyPackageId(body.artifactBundleV1); + if (bodyPackageId === null || bodyPackageId !== packageId) { + sendBundleInputError(res, 'Invalid bundle verify request'); + return; + } + + try { + const result = artifactBundleSvc.verifyArtifactBundleV1({ + artifactBundleV1: body.artifactBundleV1, + }); + res.json(result); + } catch (_err: unknown) { + sendBundleInputError(res, 'Invalid bundle verify request'); + } + }) +); + +router.post( + '/:id/bundle/store', + requireAuth, + validate({ params: idParamSchema }), + asyncHandler(async (req, res) => { + const packageId = paramToString((req.params as { id?: string | string[] }).id); + const parsed = bundleStoreBodySchema.safeParse(req.body ?? {}); + if (!parsed.success) { + sendStoreInputError(res, 'Invalid bundle store request'); + return; + } + + const body = parsed.data as BundleStoreBody; + const bodyPackageId = extractBundleBodyPackageId(body.artifactBundleV1); + if (bodyPackageId === null || bodyPackageId !== packageId) { + sendStoreInputError(res, 'Invalid bundle store request'); + return; + } + + try { + const result = await artifactStoreSvc.saveBundleV1({ + artifactBundleV1: body.artifactBundleV1, + createdAt: typeof body.createdAt === 'string' ? body.createdAt : body.createdAt === null ? null : null, + notes: body.notes ?? [], + }); + res.json(result); + } catch (_err: unknown) { + sendStoreInputError(res, 'Invalid bundle store request'); + } + }) +); + +router.get( + '/:id/bundle/store/:bundleHash', + requireAuth, + validate({ params: bundleStoreParamSchema }), + asyncHandler(async (req, res) => { + const packageId = paramToString((req.params as { id?: string | string[] }).id); + const bundleHash = paramToString((req.params as { bundleHash?: string | string[] }).bundleHash); + + const result = await artifactStoreSvc.getBundleV1({ + packageId, + bundleHash, + }); + res.json(result); + }) +); + +router.post( + '/:id/bundle/store/:bundleHash/verify', + requireAuth, + validate({ params: bundleStoreParamSchema }), + asyncHandler(async (req, res) => { + const packageId = paramToString((req.params as { id?: string | string[] }).id); + const bundleHash = paramToString((req.params as { bundleHash?: string | string[] }).bundleHash); + if (bundleHash.length === 0) { + sendStoreInputError(res, 'Invalid bundle store verify request'); + return; + } + + try { + const result = await artifactStoreSvc.verifyStoredBundleV1({ + packageId, + bundleHash, + }); + if (result === null) { + res.json({ ok: false }); + return; + } + res.json(result); + } catch (_err: unknown) { + sendStoreInputError(res, 'Invalid bundle store verify request'); + } + }) +); + +router.post( + '/:id/handoff/verify', + requireAuth, + validate({ params: idParamSchema }), + asyncHandler(async (req, res) => { + const packageId = paramToString((req.params as { id?: string | string[] }).id); + const parsed = handoffVerifyBodySchema.safeParse(req.body ?? {}); + if (!parsed.success) { + sendHandoffInputError(res); + return; + } + + const body = parsed.data as HandoffVerifyBody; + const bodyPackageId = extractHandoffBodyPackageId(body.handoffRecordV1); + if (bodyPackageId === null || bodyPackageId !== packageId) { + sendHandoffInputError(res); + return; + } + + try { + const result = transferSvc.verifyHandoffRecordV1({ + handoffRecordV1: body.handoffRecordV1, + }); + res.json(result); + } catch (_err: unknown) { + sendHandoffInputError(res); + } + }) +); + +router.post( + '/:id/lineage/verify', + requireAuth, + validate({ params: idParamSchema }), + asyncHandler(async (req, res) => { + const packageId = paramToString((req.params as { id?: string | string[] }).id); + const parsed = lineageVerifyBodySchema.safeParse(req.body ?? {}); + if (!parsed.success) { + sendLineageInputError(res); + return; + } + + const body = parsed.data as LineageVerifyBody; + const bodyPackageId = extractLineageBodyPackageId(body.lineageBindingV1); + if (bodyPackageId === null || bodyPackageId !== packageId) { + sendLineageInputError(res); + return; + } + + try { + const result = transferSvc.verifyLineageBindingV1({ + lineageBindingV1: body.lineageBindingV1, + }); + res.json(result); + } catch (_err: unknown) { + sendLineageInputError(res); + } + }) +); + const applySchema = z.object({ userQuestion: z.string().min(1).max(10000), mode: z.enum(['bootstrap', 'constrain', 'review']).optional(), @@ -285,3 +953,6 @@ router.post( ); export default router; + + + diff --git a/server/src/bench/__tests__/evaluator.test.ts b/server/src/bench/__tests__/evaluator.test.ts new file mode 100644 index 0000000..73a9ed2 --- /dev/null +++ b/server/src/bench/__tests__/evaluator.test.ts @@ -0,0 +1,181 @@ +import { describe, expect, it } from 'vitest'; + +import { stableHash } from '../../algebra/semanticDiff/key'; +import { + evaluateResults, + renderSummaryMarkdown, + type BenchResultRecord, +} from '../evaluator'; + +const ZERO_COUNTS = { + facts: { added: 0, removed: 0, modified: 0 }, + decisions: { added: 0, removed: 0, modified: 0 }, + constraints: { added: 0, removed: 0, modified: 0 }, + risks: { added: 0, removed: 0, modified: 0 }, + assumptions: { added: 0, removed: 0, modified: 0 }, +}; + +type RecordInput = { + taskId: string; + category: 'T1' | 'T2'; + rep: number; + baselineName: 'B1_CORE_BEST_EFFORT' | 'B1_CORE_STRICT' | 'B1_PIPELINE'; + mode: 'best_effort' | 'strict' | null; + supported: boolean; + reason?: string | null; + stateHashAfter: string | null; + equalsTargetHash: boolean; + conflictCount: number; + postApplyConflictCount: number; + distanceCountsSum: number; + passed: boolean; +}; + +function makeRecord(input: RecordInput): BenchResultRecord { + return { + experiment: { id: 'EVAL-1', ts: null }, + task: { + taskId: input.taskId, + category: input.category, + rep: input.rep, + }, + baseline: { + name: input.baselineName, + mode: input.mode, + supported: input.supported, + reason: input.reason ?? null, + }, + identity: { + stateHashBefore: 'before', + stateHashAfter: input.stateHashAfter, + targetHash: 'target', + }, + delta: { summary: null }, + transition: { + conflictCount: input.conflictCount, + postApplyConflictCount: input.postApplyConflictCount, + }, + drift: { + equalsTargetHash: input.equalsTargetHash, + distanceCounts: ZERO_COUNTS, + distanceCountsSum: input.distanceCountsSum, + }, + assertions: { + passed: input.passed, + failed: input.passed ? [] : ['ASSERT_FAILED'], + }, + }; +} + +describe('bench evaluator determinism', () => { + it('is permutation-stable for summary json and markdown', () => { + const recordsA: BenchResultRecord[] = [ + makeRecord({ + taskId: 't1_alpha', + category: 'T1', + rep: 1, + baselineName: 'B1_CORE_BEST_EFFORT', + mode: 'best_effort', + supported: true, + stateHashAfter: 'hash-a', + equalsTargetHash: true, + conflictCount: 0, + postApplyConflictCount: 0, + distanceCountsSum: 0, + passed: true, + }), + makeRecord({ + taskId: 't1_alpha', + category: 'T1', + rep: 2, + baselineName: 'B1_CORE_BEST_EFFORT', + mode: 'best_effort', + supported: true, + stateHashAfter: 'hash-a', + equalsTargetHash: true, + conflictCount: 0, + postApplyConflictCount: 0, + distanceCountsSum: 0, + passed: true, + }), + makeRecord({ + taskId: 't1_alpha', + category: 'T1', + rep: 1, + baselineName: 'B1_CORE_STRICT', + mode: 'strict', + supported: true, + stateHashAfter: 'hash-b', + equalsTargetHash: false, + conflictCount: 1, + postApplyConflictCount: 0, + distanceCountsSum: 1, + passed: false, + }), + makeRecord({ + taskId: 't1_alpha', + category: 'T1', + rep: 1, + baselineName: 'B1_PIPELINE', + mode: null, + supported: false, + reason: 'imports services/DB', + stateHashAfter: null, + equalsTargetHash: false, + conflictCount: 0, + postApplyConflictCount: 0, + distanceCountsSum: 0, + passed: false, + }), + ]; + + const recordsB = [recordsA[3], recordsA[1], recordsA[0], recordsA[2]]; + + const summaryA = evaluateResults(recordsA); + const summaryB = evaluateResults(recordsB); + + expect(stableHash(summaryA)).toBe(stableHash(summaryB)); + expect(renderSummaryMarkdown(summaryA)).toBe(renderSummaryMarkdown(summaryB)); + }); + + it('renders task headings in fixed string-order', () => { + const records: BenchResultRecord[] = [ + makeRecord({ + taskId: 't2_beta', + category: 'T2', + rep: 1, + baselineName: 'B1_CORE_BEST_EFFORT', + mode: 'best_effort', + supported: true, + stateHashAfter: 'hash-z', + equalsTargetHash: true, + conflictCount: 0, + postApplyConflictCount: 0, + distanceCountsSum: 0, + passed: true, + }), + makeRecord({ + taskId: 't1_alpha', + category: 'T1', + rep: 1, + baselineName: 'B1_CORE_BEST_EFFORT', + mode: 'best_effort', + supported: true, + stateHashAfter: 'hash-y', + equalsTargetHash: true, + conflictCount: 0, + postApplyConflictCount: 0, + distanceCountsSum: 0, + passed: true, + }), + ]; + + const markdown = renderSummaryMarkdown(evaluateResults(records)); + const t1Heading = markdown.indexOf('## t1_alpha (T1)'); + const t2Heading = markdown.indexOf('## t2_beta (T2)'); + + expect(t1Heading).toBeGreaterThanOrEqual(0); + expect(t2Heading).toBeGreaterThanOrEqual(0); + expect(t1Heading).toBeLessThan(t2Heading); + }); +}); diff --git a/server/src/bench/__tests__/validate-fixtures.test.ts b/server/src/bench/__tests__/validate-fixtures.test.ts new file mode 100644 index 0000000..6bd1088 --- /dev/null +++ b/server/src/bench/__tests__/validate-fixtures.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from 'vitest'; + +import { compareStrings, computeLineCol } from '../validate-fixtures'; + +describe('validate-fixtures helpers', () => { + it('compareStrings uses deterministic lexicographic ordering', () => { + expect(compareStrings('a', 'a')).toBe(0); + expect(compareStrings('a', 'b')).toBe(-1); + expect(compareStrings('b', 'a')).toBe(1); + + const input = ['zeta', 'alpha', 'beta']; + const sorted = [...input].sort(compareStrings); + expect(sorted).toEqual(['alpha', 'beta', 'zeta']); + }); + + it('computeLineCol returns expected line, col, and lineText', () => { + const text = 'first\n second line\nthird'; + const pos = text.indexOf('s', 7); + + const result = computeLineCol(text, pos); + + expect(result).toEqual({ + line: 2, + col: 3, + lineText: ' second line', + }); + }); + + it('computeLineCol clamps invalid position to line 1 col 1', () => { + const result = computeLineCol('alpha\nbeta', -12); + + expect(result).toEqual({ + line: 1, + col: 1, + lineText: 'alpha', + }); + }); +}); diff --git a/server/src/bench/evaluator.ts b/server/src/bench/evaluator.ts new file mode 100644 index 0000000..86e4184 --- /dev/null +++ b/server/src/bench/evaluator.ts @@ -0,0 +1,241 @@ +import fs from 'fs'; +import path from 'path'; + +const DOMAIN_ORDER = ['facts', 'decisions', 'constraints', 'risks', 'assumptions'] as const; +const BASELINE_ORDER = ['B1_CORE_BEST_EFFORT', 'B1_CORE_STRICT', 'B1_PIPELINE'] as const; + +type DomainName = (typeof DOMAIN_ORDER)[number]; +type BaselineName = (typeof BASELINE_ORDER)[number]; +type TaskCategory = 'T1' | 'T2'; + +type DomainCounts = { + added: number; + removed: number; + modified: number; +}; + +type DeltaSummary = { + counts: Record; + hasCollisions: boolean; + assumptionsDerived: boolean; + modifiedDomains: DomainName[]; +}; + +export type BenchResultRecord = { + experiment: { id: string; ts: null }; + task: { taskId: string; category: TaskCategory; rep: number }; + baseline: { + name: BaselineName; + mode: 'best_effort' | 'strict' | null; + supported: boolean; + reason: string | null; + }; + identity: { + stateHashBefore: string | null; + stateHashAfter: string | null; + targetHash: string | null; + }; + delta: { + summary: DeltaSummary | null; + }; + transition: { + conflictCount: number; + postApplyConflictCount: number; + }; + drift: { + equalsTargetHash: boolean; + distanceCounts: Record; + distanceCountsSum: number; + }; + assertions: { + passed: boolean; + failed: string[]; + }; +}; + +type SummaryRow = { + taskId: string; + category: TaskCategory; + baseline: { + name: BaselineName; + mode: 'best_effort' | 'strict' | null; + supported: boolean; + reason: string | null; + }; + repetitions: number; + replayPassRate: null; + hashStabilityRate: number | null; + avgConflictCount: number | null; + avgPostApplyConflictCount: number | null; + avgDistanceCountsSum: number | null; + equalsTargetRate: number | null; + assertionPassRate: number | null; +}; + +export type BenchSummary = { + schema: 'bench-summary-1'; + experimentId: 'EVAL-1'; + generatedAt: null; + baselineOrder: readonly BaselineName[]; + rows: SummaryRow[]; +}; + +function compareString(a: string, b: string): number { + return a < b ? -1 : a > b ? 1 : 0; +} + +function round6(value: number): number { + return Math.round(value * 1_000_000) / 1_000_000; +} + +function average(values: number[]): number { + if (values.length === 0) return 0; + return values.reduce((acc, value) => acc + value, 0) / values.length; +} + +function baselineRank(name: BaselineName): number { + const index = BASELINE_ORDER.indexOf(name); + return index < 0 ? Number.MAX_SAFE_INTEGER : index; +} + +export function parseResultsJsonl(content: string): BenchResultRecord[] { + const lines = content + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line.length > 0); + + return lines.map((line) => JSON.parse(line) as BenchResultRecord); +} + +export function evaluateResults(records: BenchResultRecord[]): BenchSummary { + const groups = new Map(); + + for (const record of records) { + const key = `${record.task.taskId}::${record.baseline.name}::${record.baseline.mode ?? 'null'}`; + const bucket = groups.get(key); + if (bucket) { + bucket.push(record); + } else { + groups.set(key, [record]); + } + } + + const rows: SummaryRow[] = []; + + for (const groupRecords of groups.values()) { + const sortedRecords = [...groupRecords].sort((a, b) => a.task.rep - b.task.rep); + const sample = sortedRecords[0]; + const supported = sample.baseline.supported; + + const stateHashes = supported + ? [...new Set(sortedRecords.map((record) => record.identity.stateHashAfter).filter((hash): hash is string => !!hash))] + : []; + + rows.push({ + taskId: sample.task.taskId, + category: sample.task.category, + baseline: { + name: sample.baseline.name, + mode: sample.baseline.mode, + supported: sample.baseline.supported, + reason: sample.baseline.reason, + }, + repetitions: sortedRecords.length, + replayPassRate: null, + hashStabilityRate: supported ? (stateHashes.length === 1 ? 1 : 0) : null, + avgConflictCount: supported + ? round6(average(sortedRecords.map((record) => record.transition.conflictCount))) + : null, + avgPostApplyConflictCount: supported + ? round6(average(sortedRecords.map((record) => record.transition.postApplyConflictCount))) + : null, + avgDistanceCountsSum: supported + ? round6(average(sortedRecords.map((record) => record.drift.distanceCountsSum))) + : null, + equalsTargetRate: supported + ? round6(average(sortedRecords.map((record) => (record.drift.equalsTargetHash ? 1 : 0)))) + : null, + assertionPassRate: supported + ? round6(average(sortedRecords.map((record) => (record.assertions.passed ? 1 : 0)))) + : null, + }); + } + + const sortedRows = rows.sort((a, b) => { + return ( + compareString(a.taskId, b.taskId) || + baselineRank(a.baseline.name) - baselineRank(b.baseline.name) || + compareString(a.baseline.mode ?? '', b.baseline.mode ?? '') + ); + }); + + return { + schema: 'bench-summary-1', + experimentId: 'EVAL-1', + generatedAt: null, + baselineOrder: BASELINE_ORDER, + rows: sortedRows, + }; +} + +function formatMetric(value: number | null): string { + return value === null ? 'null' : String(value); +} + +export function renderSummaryMarkdown(summary: BenchSummary): string { + const lines: string[] = []; + lines.push('# Shadow Threads EVAL-1 Summary'); + lines.push(''); + lines.push('- Experiment: EVAL-1'); + lines.push('- GeneratedAt: null'); + lines.push(''); + + const taskIds = [...new Set(summary.rows.map((row) => row.taskId))].sort(compareString); + + for (const taskId of taskIds) { + const taskRows = summary.rows.filter((row) => row.taskId === taskId); + if (taskRows.length === 0) continue; + const category = taskRows[0].category; + + lines.push(`## ${taskId} (${category})`); + lines.push('| Baseline | Mode | Supported | HashStabilityRate | AvgConflictCount | AvgPostApplyConflictCount | AvgDistanceCountsSum | EqualsTargetRate | AssertionPassRate |'); + lines.push('| --- | --- | --- | --- | --- | --- | --- | --- | --- |'); + + const orderedRows = [...taskRows].sort((a, b) => baselineRank(a.baseline.name) - baselineRank(b.baseline.name)); + for (const row of orderedRows) { + lines.push( + `| ${row.baseline.name} | ${row.baseline.mode ?? 'null'} | ${row.baseline.supported} | ${formatMetric(row.hashStabilityRate)} | ${formatMetric(row.avgConflictCount)} | ${formatMetric(row.avgPostApplyConflictCount)} | ${formatMetric(row.avgDistanceCountsSum)} | ${formatMetric(row.equalsTargetRate)} | ${formatMetric(row.assertionPassRate)} |` + ); + } + + lines.push(''); + } + + return `${lines.join('\n').trimEnd()}\n`; +} + +export function runEvaluation(resultsFile: string, summaryJsonFile: string, summaryMdFile: string): BenchSummary { + const jsonl = fs.existsSync(resultsFile) ? fs.readFileSync(resultsFile, 'utf8') : ''; + const records = parseResultsJsonl(jsonl); + const summary = evaluateResults(records); + const markdown = renderSummaryMarkdown(summary); + + fs.mkdirSync(path.dirname(summaryJsonFile), { recursive: true }); + fs.writeFileSync(summaryJsonFile, JSON.stringify(summary, null, 2), 'utf8'); + fs.writeFileSync(summaryMdFile, markdown, 'utf8'); + + return summary; +} + +function main(): void { + const root = process.cwd(); + const resultsFile = path.resolve(root, 'bench', 'out', 'results.jsonl'); + const summaryJsonFile = path.resolve(root, 'bench', 'out', 'summary.json'); + const summaryMdFile = path.resolve(root, 'bench', 'out', 'summary.md'); + const summary = runEvaluation(resultsFile, summaryJsonFile, summaryMdFile); + process.stdout.write(`EVAL-1 evaluator summarized ${summary.rows.length} task/baseline rows\n`); +} + +if (require.main === module) { + main(); +} diff --git a/server/src/bench/node/algebra-bridge.cjs b/server/src/bench/node/algebra-bridge.cjs new file mode 100644 index 0000000..114bae6 --- /dev/null +++ b/server/src/bench/node/algebra-bridge.cjs @@ -0,0 +1,51 @@ +const fs = require('fs'); +const path = require('path'); + +function makeError(code, message) { + const error = new Error(message); + error.code = code; + return error; +} + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (!fs.existsSync(candidate)) { + continue; + } + return require(candidate); + } + throw makeError('E_BENCH_UNSUPPORTED', 'Bench requires built dist algebra exports'); +} + +const root = path.resolve(__dirname, '../../../'); + +const algebraIndex = requireFromCandidates([ + path.join(root, 'dist', 'algebra', 'index.js'), + path.join(root, 'dist', 'src', 'algebra', 'index.js'), +]); + +const algebraKey = requireFromCandidates([ + path.join(root, 'dist', 'algebra', 'semanticDiff', 'key.js'), + path.join(root, 'dist', 'src', 'algebra', 'semanticDiff', 'key.js'), +]); + +const diffState = algebraIndex.diffState; +const applyDelta = algebraIndex.applyDelta; +const detectConflicts = algebraIndex.detectConflicts; +const stableHash = algebraIndex.stableHash || algebraKey.stableHash; + +if ( + typeof diffState !== 'function' || + typeof applyDelta !== 'function' || + typeof detectConflicts !== 'function' || + typeof stableHash !== 'function' +) { + throw makeError('E_BENCH_UNSUPPORTED', 'Bench requires built dist algebra exports'); +} + +module.exports = { + diffState, + applyDelta, + detectConflicts, + stableHash, +}; diff --git a/server/src/bench/node/bundle-selftest.cjs b/server/src/bench/node/bundle-selftest.cjs new file mode 100644 index 0000000..abe4ac9 --- /dev/null +++ b/server/src/bench/node/bundle-selftest.cjs @@ -0,0 +1,215 @@ +const fs = require('fs'); +const path = require('path'); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (fs.existsSync(candidate)) { + return require(candidate); + } + } + throw new Error('unavailable'); +} + +function loadModules() { + const root = path.resolve(__dirname, '../../..'); + const transferModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'transfer-package-v1.js'), + path.join(root, 'dist', 'src', 'services', 'transfer-package-v1.js'), + ]); + const lineageModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'lineage-binding-v1.js'), + path.join(root, 'dist', 'src', 'services', 'lineage-binding-v1.js'), + ]); + const handoffModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'handoff-record-v1.js'), + path.join(root, 'dist', 'src', 'services', 'handoff-record-v1.js'), + ]); + const bundleModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'artifact-bundle-v1.js'), + path.join(root, 'dist', 'src', 'services', 'artifact-bundle-v1.js'), + ]); + + if ( + typeof transferModule.buildTransferPackageV1 !== 'function' || + typeof lineageModule.buildLineageBindingV1 !== 'function' || + typeof handoffModule.buildHandoffRecordV1 !== 'function' || + typeof bundleModule.buildArtifactBundleV1 !== 'function' || + typeof bundleModule.verifyArtifactBundleV1 !== 'function' + ) { + throw new Error('unavailable'); + } + + return { transferModule, lineageModule, handoffModule, bundleModule }; +} + +function buildTransferPackage(transferModule) { + return transferModule.buildTransferPackageV1({ + identity: { + packageId: 'pkg-1', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + trunk: { + intent: { + primary: null, + successCriteria: [], + nonGoals: [], + }, + stateDigest: { + facts: ['fact-a'], + decisions: [], + constraints: [], + risks: [], + assumptions: [], + openLoops: [], + }, + }, + continuation: { + nextActions: [], + validationChecklist: [], + }, + conflicts: [], + }); +} + +function buildLineage(lineageModule, transferPackageV1, lineageHashChar) { + const lineageBindingV1 = lineageModule.buildLineageBindingV1({ + identity: { + packageId: transferPackageV1.identity.packageId, + revisionId: transferPackageV1.identity.revisionId, + revisionHash: transferPackageV1.identity.revisionHash, + parentRevisionId: transferPackageV1.identity.parentRevisionId, + }, + bindings: { + transfer: { + schema: 'transfer-package-1', + transferHash: transferPackageV1.transferHash, + }, + closure: null, + execution: null, + handoff: null, + }, + diagnostics: { + notes: [], + }, + createdAt: null, + }); + if (!lineageHashChar) { + return lineageBindingV1; + } + return { + ...lineageBindingV1, + lineageHash: String(lineageHashChar).repeat(64), + }; +} + +function buildHandoff(handoffModule, transferPackageV1, lineageBindingV1, createdAt) { + return handoffModule.buildHandoffRecordV1({ + transferPackageV1, + verification: { + transferHashRecomputed: transferPackageV1.transferHash, + matchesProvidedHash: true, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + lineageBindingV1, + createdAt, + }); +} + +function buildBundle(bundleModule, transferPackageV1, lineageBindingV1, handoffRecordV1, createdAt) { + return bundleModule.buildArtifactBundleV1({ + identity: { + packageId: transferPackageV1.identity.packageId, + revisionId: transferPackageV1.identity.revisionId, + revisionHash: transferPackageV1.identity.revisionHash, + }, + artifacts: { + transferPackageV1, + lineageBindingV1, + handoffRecordV1, + closureContractV1: null, + }, + diagnostics: { + notes: [], + }, + createdAt, + }); +} + +function main() { + try { + const { transferModule, lineageModule, handoffModule, bundleModule } = loadModules(); + const transferPackageV1 = buildTransferPackage(transferModule); + const lineageBindingV1 = buildLineage(lineageModule, transferPackageV1, null); + const handoffRecordV1 = buildHandoff(handoffModule, transferPackageV1, lineageBindingV1, null); + + const bundleA = buildBundle(bundleModule, transferPackageV1, lineageBindingV1, handoffRecordV1, null); + const bundleB = buildBundle(bundleModule, transferPackageV1, lineageBindingV1, handoffRecordV1, null); + + const mismatchBundle = { + ...bundleA, + artifacts: { + ...bundleA.artifacts, + handoffRecordV1: { + ...bundleA.artifacts.handoffRecordV1, + lineageBindingV1: { + ...bundleA.artifacts.handoffRecordV1.lineageBindingV1, + bindings: { + ...bundleA.artifacts.handoffRecordV1.lineageBindingV1.bindings, + transfer: { + schema: 'transfer-package-1', + transferHash: 'f'.repeat(64), + }, + }, + }, + }, + }, + }; + const mismatchVerification = bundleModule.verifyArtifactBundleV1(mismatchBundle); + + const inconsistentTopLineage = buildLineage(lineageModule, transferPackageV1, 'e'); + const invariantFailureBundle = buildBundle( + bundleModule, + transferPackageV1, + inconsistentTopLineage, + handoffRecordV1, + null + ); + const invariantFailureVerification = bundleModule.verifyArtifactBundleV1(invariantFailureBundle); + const embeddedLineageInvariant = invariantFailureBundle.diagnostics.invariants.find( + (entry) => entry.code === 'INV_EMBEDDED_LINEAGE_HASH_MATCH_TOP' + ); + + const bundleWithCreatedAtA = buildBundle(bundleModule, transferPackageV1, lineageBindingV1, handoffRecordV1, '2025-01-01T00:00:00.000Z'); + const bundleWithCreatedAtB = buildBundle(bundleModule, transferPackageV1, lineageBindingV1, handoffRecordV1, '2026-01-01T00:00:00.000Z'); + + const ok = + bundleA.bundleHash === bundleB.bundleHash && + bundleModule.stableStringify(bundleA) === bundleModule.stableStringify(bundleB) && + mismatchVerification.ok === true && + mismatchVerification.matches === false && + invariantFailureVerification.ok === true && + invariantFailureVerification.matches === true && + !!embeddedLineageInvariant && + embeddedLineageInvariant.ok === false && + bundleWithCreatedAtA.bundleHash === bundleWithCreatedAtB.bundleHash; + + process.stdout.write(ok ? 'BUNDLE_SELFTEST_OK\n' : 'BUNDLE_SELFTEST_FAIL\n'); + process.exit(ok ? 0 : 1); + } catch (_error) { + process.stdout.write('BUNDLE_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/closure-selftest.cjs b/server/src/bench/node/closure-selftest.cjs new file mode 100644 index 0000000..400b46c --- /dev/null +++ b/server/src/bench/node/closure-selftest.cjs @@ -0,0 +1,175 @@ +const fs = require('fs'); +const path = require('path'); + +const { applyDelta, detectConflicts } = require('./algebra-bridge.cjs'); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (!fs.existsSync(candidate)) { + continue; + } + return require(candidate); + } + throw new Error('unavailable'); +} + +function isHex64(value) { + return typeof value === 'string' && /^[0-9a-f]{64}$/.test(value); +} + +function loadModules() { + const rootPath = path.resolve(__dirname, '../../..'); + const plannerModule = requireFromCandidates([ + path.join(rootPath, 'dist', 'services', 'delta-closure-planner.js'), + path.join(rootPath, 'dist', 'src', 'services', 'delta-closure-planner.js'), + ]); + const policyModule = requireFromCandidates([ + path.join(rootPath, 'dist', 'services', 'delta-risk-policy.js'), + path.join(rootPath, 'dist', 'src', 'services', 'delta-risk-policy.js'), + ]); + const contractModule = requireFromCandidates([ + path.join(rootPath, 'dist', 'services', 'closure-contract-v1.js'), + path.join(rootPath, 'dist', 'src', 'services', 'closure-contract-v1.js'), + ]); + + if ( + !plannerModule || + typeof plannerModule.planDeltaClosureV1 !== 'function' || + !policyModule || + !policyModule.DEFAULT_RISK_POLICY_V1 || + !contractModule || + typeof contractModule.buildClosureContractV1 !== 'function' || + typeof contractModule.stableStringify !== 'function' + ) { + throw new Error('unavailable'); + } + + return { + planDeltaClosureV1: plannerModule.planDeltaClosureV1, + policy: policyModule.DEFAULT_RISK_POLICY_V1, + buildClosureContractV1: contractModule.buildClosureContractV1, + stableStringify: contractModule.stableStringify, + assertJsonSafe: contractModule.assertJsonSafe, + }; +} + +function makeBaseState() { + return { + facts: [{ key: 'fact.alpha', statement: 'alpha' }], + decisions: [], + constraints: [], + risks: [], + assumptions: [], + }; +} + +function makeProposedDelta() { + return { + schemaVersion: 'sdiff-0.1', + base: { revisionHash: 'closure-selftest-base' }, + target: { revisionHash: 'closure-selftest-target' }, + facts: { added: [], removed: [], modified: [] }, + decisions: { + added: [], + removed: [], + modified: [ + { + key: 'decision.missing', + before: null, + after: { key: 'decision.missing', answer: 'accept' }, + changes: [{ path: 'answer', op: 'set', after: 'accept' }], + }, + ], + }, + constraints: { added: [], removed: [], modified: [] }, + risks: { added: [], removed: [], modified: [] }, + assumptions: { added: [], removed: [], modified: [] }, + meta: { + determinism: { + canonicalVersion: 'tpkg-0.2-canon-v1', + keyStrategy: 'sig-hash-v1', + tieBreakers: ['closure-selftest'], + }, + collisions: { hard: [], soft: [] }, + counts: { + 'facts.added': 0, + 'facts.removed': 0, + 'facts.modified': 0, + 'decisions.added': 0, + 'decisions.removed': 0, + 'decisions.modified': 1, + 'constraints.added': 0, + 'constraints.removed': 0, + 'constraints.modified': 0, + 'risks.added': 0, + 'risks.removed': 0, + 'risks.modified': 0, + 'assumptions.added': 0, + 'assumptions.removed': 0, + 'assumptions.modified': 0, + 'collisions.soft': 0, + 'collisions.hard': 0, + }, + }, + }; +} + +function buildApplyReport(modules) { + const baseState = makeBaseState(); + const proposedDelta = makeProposedDelta(); + const plan = modules.planDeltaClosureV1({ + baseState, + proposedDelta, + mode: 'strict', + policy: modules.policy, + }); + const contractV1 = modules.buildClosureContractV1({ + proposedDelta, + acceptedDelta: plan.acceptedDelta, + rejected: plan.rejected, + suggestions: plan.suggestions, + diagnostics: plan.diagnostics, + }); + const transition = applyDelta(baseState, plan.acceptedDelta, { mode: 'best_effort' }); + const postApplyConflicts = detectConflicts(transition.nextState); + return { + applyReport: { + llmDelta: { + closure: { + contractV1, + }, + }, + }, + postApplyConflicts, + }; +} + +function main() { + try { + const modules = loadModules(); + const runA = buildApplyReport(modules); + const runB = buildApplyReport(modules); + const contractA = runA.applyReport.llmDelta.closure.contractV1; + const contractB = runB.applyReport.llmDelta.closure.contractV1; + + modules.assertJsonSafe(contractA); + + const ok = + contractA && + contractA.schema === 'closure-contract-1' && + isHex64(contractA.accepted.acceptedHash) && + isHex64(contractA.accepted.proposedHash) && + modules.stableStringify(contractA) === modules.stableStringify(contractB) && + contractA.diagnostics.closureViolationFlag === false && + Array.isArray(runA.postApplyConflicts) && + runA.postApplyConflicts.length === 0; + + process.stdout.write(ok ? 'CLOSURE_SELFTEST_OK\n' : 'CLOSURE_SELFTEST_FAIL\n'); + process.exit(ok ? 0 : 1); + } catch (_error) { + process.stdout.write('CLOSURE_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/contract-selftest.cjs b/server/src/bench/node/contract-selftest.cjs new file mode 100644 index 0000000..22b92ba --- /dev/null +++ b/server/src/bench/node/contract-selftest.cjs @@ -0,0 +1,152 @@ +const path = require('path'); + +function main() { + try { + const root = path.resolve(__dirname, '../../..'); + const contractModule = require(path.join(root, 'dist', 'services', 'closure-contract-v1.js')); + const { buildClosureContractV1, stableStringify, assertJsonSafe } = contractModule; + + const proposedDelta = { + schema: 'sdiff-0.1', + facts: { added: [{ key: 'fact-a', after: { key: 'fact-a', statement: 'A' } }], removed: [], modified: [] }, + decisions: { added: [], removed: [], modified: [] }, + constraints: { added: [], removed: [], modified: [] }, + risks: { added: [], removed: [], modified: [] }, + assumptions: { added: [], removed: [], modified: [] }, + meta: { + counts: { + 'facts.added': 1, + 'facts.removed': 0, + 'facts.modified': 0, + 'decisions.added': 0, + 'decisions.removed': 0, + 'decisions.modified': 0, + 'constraints.added': 0, + 'constraints.removed': 0, + 'constraints.modified': 0, + 'risks.added': 0, + 'risks.removed': 0, + 'risks.modified': 0, + 'assumptions.added': 0, + 'assumptions.removed': 0, + 'assumptions.modified': 0, + }, + collisions: { soft: [], hard: [] }, + }, + }; + + const acceptedDelta = { + schema: 'sdiff-0.1', + facts: { added: [], removed: [], modified: [] }, + decisions: { added: [], removed: [], modified: [] }, + constraints: { added: [], removed: [], modified: [] }, + risks: { added: [], removed: [], modified: [] }, + assumptions: { added: [], removed: [], modified: [] }, + meta: { + counts: { + 'facts.added': 0, + 'facts.removed': 0, + 'facts.modified': 0, + 'decisions.added': 0, + 'decisions.removed': 0, + 'decisions.modified': 0, + 'constraints.added': 0, + 'constraints.removed': 0, + 'constraints.modified': 0, + 'risks.added': 0, + 'risks.removed': 0, + 'risks.modified': 0, + 'assumptions.added': 0, + 'assumptions.removed': 0, + 'assumptions.modified': 0, + }, + collisions: { soft: [], hard: [] }, + }, + }; + + const rejected = [ + { + domain: 'facts', + key: 'fact-a', + path: null, + op: 'add', + reasonCode: 'CONFLICT', + reasonMessage: 'Rejected: conflict', + riskLevel: 'L2', + blockedBy: [ + { + domain: 'facts', + key: 'fact-a', + path: null, + }, + ], + }, + ]; + + const suggestions = [ + { + schema: 'closure-suggestion-1', + code: 'ADD_MISSING_DEP', + message: 'Add missing dependency', + actionType: 'ADD_MISSING_DEP', + payload: { + appliesTo: { + domain: 'facts', + key: 'fact-a', + path: null, + op: 'add', + }, + blockedBy: [ + { + domain: 'facts', + key: 'fact-a', + path: null, + }, + ], + }, + riskLevel: 'L2', + }, + ]; + + const diagnostics = { + closureViolationFlag: false, + maxClosureSizeRatio: 0, + blockedByRate: 1, + rejectedCount: 1, + }; + + const contractA = buildClosureContractV1({ + proposedDelta, + acceptedDelta, + rejected, + suggestions, + diagnostics, + }); + const contractB = buildClosureContractV1({ + proposedDelta, + acceptedDelta, + rejected, + suggestions, + diagnostics, + }); + + assertJsonSafe(contractA); + const serializedA = stableStringify(contractA); + const serializedB = stableStringify(contractB); + + if (!contractA || contractA.schema !== 'closure-contract-1') { + throw new Error('fail'); + } + if (serializedA !== serializedB) { + throw new Error('fail'); + } + + process.stdout.write('CONTRACT_SELFTEST_OK\n'); + process.exit(0); + } catch (_error) { + process.stdout.write('CONTRACT_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/diagnose.cjs b/server/src/bench/node/diagnose.cjs new file mode 100644 index 0000000..15a5ae0 --- /dev/null +++ b/server/src/bench/node/diagnose.cjs @@ -0,0 +1,148 @@ +const fs = require('fs'); +const path = require('path'); + +const COMPARISON_ORDER = ['C1', 'C4', 'C5']; +const METRICS_BY_COMPARISON = { + C1: ['assertionPassRate', 'conflictCount', 'distanceCountsSum', 'equalsTargetRate', 'postApplyConflictCount'], + C4: ['rollbackRate'], + C5: ['deltaRejectionRate'], +}; + +const HIGHER_IS_BETTER = new Set(['equalsTargetRate', 'assertionPassRate', 'rollbackRate', 'deltaRejectionRate']); +const LOWER_IS_BETTER = new Set(['conflictCount', 'postApplyConflictCount', 'distanceCountsSum']); + +function compareStrings(a, b) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function asRecord(value) { + return value && typeof value === 'object' && !Array.isArray(value) ? value : {}; +} + +function getComparisonRank(value) { + const index = COMPARISON_ORDER.indexOf(value); + return index < 0 ? Number.MAX_SAFE_INTEGER : index; +} + +function effectDirection(metricKey, estimate) { + if (typeof estimate !== 'number' || !Number.isFinite(estimate)) return null; + if (estimate === 0) return 'equal'; + + if (HIGHER_IS_BETTER.has(metricKey)) { + return estimate > 0 ? 'strict_worse' : 'strict_better'; + } + + if (LOWER_IS_BETTER.has(metricKey)) { + return estimate > 0 ? 'strict_better' : 'strict_worse'; + } + + return null; +} + +function collectRows(stats) { + if (Array.isArray(stats.comparisons) && stats.comparisons.length > 0) { + return stats.comparisons.map((entry) => { + const row = asRecord(entry); + return { + comparison: typeof row.comparison === 'string' ? row.comparison : '', + taskId: typeof row.taskId === 'string' ? row.taskId : '*GLOBAL*', + category: typeof row.category === 'string' ? row.category : null, + metrics: asRecord(row.metrics), + }; + }); + } + + if (Array.isArray(stats.globalComparisons) && stats.globalComparisons.length > 0) { + return stats.globalComparisons.map((entry) => { + const row = asRecord(entry); + return { + comparison: typeof row.comparison === 'string' ? row.comparison : '', + taskId: '*GLOBAL*', + category: null, + metrics: asRecord(row.metrics), + }; + }); + } + + return []; +} + +function collectDiagnostics(stats) { + const rows = collectRows(stats); + const out = []; + + for (const row of rows) { + if (!COMPARISON_ORDER.includes(row.comparison)) continue; + const metricKeys = METRICS_BY_COMPARISON[row.comparison]; + + for (const metricKey of metricKeys) { + const metric = asRecord(row.metrics[metricKey]); + const status = typeof metric.statStatus === 'string' ? metric.statStatus : 'null'; + const q = Object.prototype.hasOwnProperty.call(metric, 'qValue') ? metric.qValue : null; + const reason = Object.prototype.hasOwnProperty.call(metric, 'reason') ? metric.reason : null; + + out.push({ + comparison: row.comparison, + taskId: row.taskId, + metricKey, + status, + q, + effect: effectDirection(metricKey, metric.estimate), + reason, + }); + } + } + + out.sort((left, right) => { + return ( + getComparisonRank(left.comparison) - getComparisonRank(right.comparison) || + compareStrings(left.taskId, right.taskId) || + compareStrings(left.metricKey, right.metricKey) + ); + }); + + return out; +} + +function runDiagnose(statsFile) { + if (!fs.existsSync(statsFile)) { + process.stdout.write('DIAGNOSE_INPUT_MISSING\n'); + process.exit(1); + } + + let stats; + try { + stats = JSON.parse(fs.readFileSync(statsFile, 'utf8')); + } catch (_error) { + process.stdout.write('DIAGNOSE_INPUT_INVALID\n'); + process.exit(1); + } + + const rows = collectDiagnostics(stats); + process.stdout.write('DIAGNOSE_BEGIN\n'); + for (const row of rows) { + process.stdout.write( + `DIAG comparison=${row.comparison} task=${row.taskId} metric=${row.metricKey} status=${row.status} q=${row.q === null ? 'null' : row.q} effect=${row.effect === null ? 'null' : row.effect} reason=${row.reason === null ? 'null' : row.reason}\n` + ); + } + process.stdout.write('DIAGNOSE_END\n'); +} + +function main() { + const root = path.resolve(__dirname, '../../..'); + const statsFile = path.join(root, 'bench', 'out', 'eval2.stats.json'); + runDiagnose(statsFile); +} + +if (require.main === module) { + main(); +} + +module.exports = { + compareStrings, + effectDirection, + collectDiagnostics, + runDiagnose, +}; diff --git a/server/src/bench/node/evaluator.cjs b/server/src/bench/node/evaluator.cjs new file mode 100644 index 0000000..c645033 --- /dev/null +++ b/server/src/bench/node/evaluator.cjs @@ -0,0 +1,232 @@ +const fs = require('fs'); +const path = require('path'); + +const BASELINE_ORDER = [ + 'B1_CORE_BEST_EFFORT', + 'B1_CORE_STRICT', + 'B1_PIPELINE', + 'B2_LLM_DELTA_BEST_EFFORT', + 'B2_LLM_DELTA_STRICT', + 'B3_STRICT_CLOSURE', + 'B4_STRICT_RISK_CLOSURE', + 'B5_STRICT_CLOSURE_SUGGESTIONS', +]; +const BASELINE_WHITELIST = new Set(BASELINE_ORDER); + +function compareStrings(a, b) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function baselineRank(name) { + const index = BASELINE_ORDER.indexOf(name); + return index < 0 ? Number.MAX_SAFE_INTEGER : index; +} + +function round6(value) { + return Math.round(value * 1000000) / 1000000; +} + +function asRecord(value) { + return value && typeof value === 'object' && !Array.isArray(value) ? value : {}; +} + +function normalizeBaseline(rawBaseline) { + const baseline = asRecord(rawBaseline); + const rawName = typeof baseline.name === 'string' ? baseline.name : 'UNKNOWN_BASELINE'; + const baselineKey = BASELINE_WHITELIST.has(rawName) ? rawName : 'UNKNOWN_BASELINE'; + const mode = typeof baseline.mode === 'string' ? baseline.mode : null; + const baselineModeKey = mode || 'null'; + + if (baselineKey === 'UNKNOWN_BASELINE') { + return { + name: 'UNKNOWN_BASELINE', + mode, + supported: false, + reason: 'unsupported baseline', + baselineKey, + baselineModeKey, + }; + } + + return { + name: baselineKey, + mode, + supported: baseline.supported === true, + reason: typeof baseline.reason === 'string' ? baseline.reason : null, + baselineKey, + baselineModeKey, + }; +} + +function average(values) { + if (values.length === 0) return 0; + let total = 0; + for (const value of values) total += value; + return total / values.length; +} + +function parseResultsJsonl(content) { + return content + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line.length > 0) + .map((line) => JSON.parse(line)); +} + +function evaluateResults(records) { + const groups = new Map(); + + for (const record of records) { + const baseline = normalizeBaseline(record.baseline); + const task = asRecord(record.task); + const taskId = typeof task.taskId === 'string' ? task.taskId : ''; + const key = `${taskId}::${baseline.baselineKey}`; + + if (!groups.has(key)) groups.set(key, []); + groups.get(key).push({ record, baseline }); + } + + const rows = []; + + for (const groupEntries of groups.values()) { + const sortedEntries = [...groupEntries].sort((left, right) => left.record.task.rep - right.record.task.rep); + const sortedRecords = sortedEntries.map((entry) => entry.record); + const sample = sortedEntries[0]; + const supported = sample.baseline.supported === true; + + const hashes = supported + ? [ + ...new Set( + sortedRecords + .map((record) => asRecord(asRecord(record).identity).stateHashAfter) + .filter((value) => typeof value === 'string' && value.length > 0) + ), + ] + : []; + + rows.push({ + taskId: asRecord(sample.record.task).taskId, + category: asRecord(sample.record.task).category, + baseline: { + name: sample.baseline.name, + mode: sample.baseline.mode, + supported: sample.baseline.supported, + reason: sample.baseline.reason, + }, + baselineKey: sample.baseline.baselineKey, + baselineModeKey: sample.baseline.baselineModeKey, + repetitions: sortedRecords.length, + replayPassRate: null, + hashStabilityRate: supported ? (hashes.length === 1 ? 1 : 0) : null, + avgConflictCount: supported + ? round6(average(sortedRecords.map((record) => asRecord(asRecord(record).transition).conflictCount))) + : null, + avgPostApplyConflictCount: supported + ? round6(average(sortedRecords.map((record) => asRecord(asRecord(record).transition).postApplyConflictCount))) + : null, + avgDistanceCountsSum: supported + ? round6(average(sortedRecords.map((record) => asRecord(asRecord(record).drift).distanceCountsSum))) + : null, + equalsTargetRate: supported + ? round6(average(sortedRecords.map((record) => (asRecord(asRecord(record).drift).equalsTargetHash ? 1 : 0)))) + : null, + assertionPassRate: supported + ? round6(average(sortedRecords.map((record) => (asRecord(asRecord(record).assertions).passed ? 1 : 0)))) + : null, + }); + } + + rows.sort((left, right) => { + return ( + compareStrings(left.taskId, right.taskId) || + baselineRank(left.baseline.name) - baselineRank(right.baseline.name) || + compareStrings(left.baselineModeKey || '', right.baselineModeKey || '') + ); + }); + + return { + schema: 'bench-summary-1', + experimentId: 'EVAL-1', + generatedAt: null, + baselineOrder: BASELINE_ORDER, + rows, + }; +} + +function formatMetric(value) { + return value === null ? 'null' : String(value); +} + +function renderSummaryMarkdown(summary) { + const lines = []; + lines.push('# Shadow Threads EVAL-1 Summary'); + lines.push(''); + lines.push('- Experiment: EVAL-1'); + lines.push('- GeneratedAt: null'); + lines.push(''); + + const taskIds = [...new Set(summary.rows.map((row) => row.taskId))].sort(compareStrings); + + for (const taskId of taskIds) { + const taskRows = summary.rows.filter((row) => row.taskId === taskId); + if (taskRows.length === 0) continue; + const category = taskRows[0].category; + + lines.push(`## ${taskId} (${category})`); + lines.push('| Baseline | Mode | Supported | HashStabilityRate | AvgConflictCount | AvgPostApplyConflictCount | AvgDistanceCountsSum | EqualsTargetRate | AssertionPassRate |'); + lines.push('| --- | --- | --- | --- | --- | --- | --- | --- | --- |'); + + const ordered = [...taskRows].sort((left, right) => { + return ( + baselineRank(left.baseline.name) - baselineRank(right.baseline.name) || + compareStrings(left.baselineModeKey || '', right.baselineModeKey || '') + ); + }); + + for (const row of ordered) { + lines.push( + `| ${row.baseline.name} | ${row.baseline.mode || 'null'} | ${row.baseline.supported} | ${formatMetric(row.hashStabilityRate)} | ${formatMetric(row.avgConflictCount)} | ${formatMetric(row.avgPostApplyConflictCount)} | ${formatMetric(row.avgDistanceCountsSum)} | ${formatMetric(row.equalsTargetRate)} | ${formatMetric(row.assertionPassRate)} |` + ); + } + + lines.push(''); + } + + return `${lines.join('\n').trimEnd()}\n`; +} + +function runEvaluation(resultsFile, summaryJsonFile, summaryMdFile) { + const content = fs.existsSync(resultsFile) ? fs.readFileSync(resultsFile, 'utf8') : ''; + const records = parseResultsJsonl(content); + const summary = evaluateResults(records); + const markdown = renderSummaryMarkdown(summary); + + fs.mkdirSync(path.dirname(summaryJsonFile), { recursive: true }); + fs.writeFileSync(summaryJsonFile, `${JSON.stringify(summary, null, 2)}\n`, 'utf8'); + fs.writeFileSync(summaryMdFile, markdown, 'utf8'); + + return summary; +} + +function main() { + const root = path.resolve(__dirname, '../../..'); + const resultsFile = path.join(root, 'bench', 'out', 'results.jsonl'); + const summaryJsonFile = path.join(root, 'bench', 'out', 'summary.json'); + const summaryMdFile = path.join(root, 'bench', 'out', 'summary.md'); + const summary = runEvaluation(resultsFile, summaryJsonFile, summaryMdFile); + process.stdout.write(`EVAL-1 evaluator summarized ${summary.rows.length} task/baseline rows\n`); +} + +if (require.main === module) { + main(); +} + +module.exports = { + compareStrings, + parseResultsJsonl, + evaluateResults, + renderSummaryMarkdown, + runEvaluation, +}; diff --git a/server/src/bench/node/evidence.cjs b/server/src/bench/node/evidence.cjs new file mode 100644 index 0000000..5b3a367 --- /dev/null +++ b/server/src/bench/node/evidence.cjs @@ -0,0 +1,122 @@ +const fs = require('fs'); +const path = require('path'); + +const BASELINE_ORDER = [ + 'B1_CORE_BEST_EFFORT', + 'B1_CORE_STRICT', + 'B1_PIPELINE', + 'B2_LLM_DELTA_BEST_EFFORT', + 'B2_LLM_DELTA_STRICT', + 'B3_STRICT_CLOSURE', + 'B4_STRICT_RISK_CLOSURE', + 'B5_STRICT_CLOSURE_SUGGESTIONS', +]; +const BASELINE_WHITELIST = new Set(BASELINE_ORDER); + +function compareStrings(a, b) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function asRecord(value) { + return value && typeof value === 'object' && !Array.isArray(value) ? value : {}; +} + +function normalizeBaseline(rawBaseline) { + const baseline = asRecord(rawBaseline); + const rawName = typeof baseline.name === 'string' ? baseline.name : 'UNKNOWN_BASELINE'; + const baselineName = BASELINE_WHITELIST.has(rawName) ? rawName : 'UNKNOWN_BASELINE'; + const mode = typeof baseline.mode === 'string' ? baseline.mode : null; + + if (baselineName === 'UNKNOWN_BASELINE') { + return { + name: 'UNKNOWN_BASELINE', + mode, + supported: false, + reason: 'unsupported baseline', + baselineModeKey: mode || 'null', + }; + } + + return { + name: baselineName, + mode, + supported: baseline.supported === true, + reason: typeof baseline.reason === 'string' ? baseline.reason : null, + baselineModeKey: mode || 'null', + }; +} + +function toSortedObject(counter) { + const keys = Object.keys(counter).sort(compareStrings); + const output = {}; + for (const key of keys) { + output[key] = counter[key]; + } + return output; +} + +function parseResults(content) { + return content + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line.length > 0) + .map((line) => JSON.parse(line)); +} + +function buildEvidence(records) { + const baselineNameCounts = {}; + const baselineModeCounts = {}; + let exampleBaseline = null; + + for (const record of records) { + const normalized = normalizeBaseline(asRecord(record).baseline); + + baselineNameCounts[normalized.name] = (baselineNameCounts[normalized.name] || 0) + 1; + baselineModeCounts[normalized.baselineModeKey] = (baselineModeCounts[normalized.baselineModeKey] || 0) + 1; + + if (exampleBaseline === null) { + exampleBaseline = { + name: normalized.name, + mode: normalized.mode, + supported: normalized.supported, + reason: normalized.reason, + }; + } + } + + return { + baselineNames: toSortedObject(baselineNameCounts), + baselineModes: toSortedObject(baselineModeCounts), + exampleBaseline, + }; +} + +function runEvidence(resultsFile) { + const content = fs.existsSync(resultsFile) ? fs.readFileSync(resultsFile, 'utf8') : ''; + const records = parseResults(content); + return buildEvidence(records); +} + +function main() { + const root = path.resolve(__dirname, '../../..'); + const resultsFile = path.join(root, 'bench', 'out', 'results.jsonl'); + const evidence = runEvidence(resultsFile); + + process.stdout.write('BASELINE_EVIDENCE\n'); + process.stdout.write(`baseline_names: ${JSON.stringify(evidence.baselineNames)}\n`); + process.stdout.write(`baseline_modes: ${JSON.stringify(evidence.baselineModes)}\n`); + process.stdout.write(`example_row_baseline: ${JSON.stringify(evidence.exampleBaseline)}\n`); +} + +if (require.main === module) { + main(); +} + +module.exports = { + compareStrings, + normalizeBaseline, + buildEvidence, + runEvidence, +}; diff --git a/server/src/bench/node/gate.cjs b/server/src/bench/node/gate.cjs new file mode 100644 index 0000000..29f478b --- /dev/null +++ b/server/src/bench/node/gate.cjs @@ -0,0 +1,173 @@ +const fs = require('fs'); +const path = require('path'); + +const ALPHA = 0.05; +const HIGHER_IS_BETTER = new Set(['equalsTargetRate', 'assertionPassRate']); +const LOWER_IS_BETTER = new Set(['conflictCount', 'postApplyConflictCount', 'distanceCountsSum']); +const C1_METRICS = ['equalsTargetRate', 'assertionPassRate', 'conflictCount', 'postApplyConflictCount', 'distanceCountsSum']; + +function asRecord(value) { + return value && typeof value === 'object' && !Array.isArray(value) ? value : {}; +} + +function inferCategoryFromTaskId(taskId) { + if (typeof taskId !== 'string') return null; + if (taskId.startsWith('t1_')) return 'T1'; + if (taskId.startsWith('t2_')) return 'T2'; + if (taskId.startsWith('t3_')) return 'T3'; + return null; +} + +function resolveCategory(row) { + const value = row.category; + if (value === 'T1' || value === 'T2' || value === 'T3') return value; + return inferCategoryFromTaskId(row.taskId); +} + +function metricRate(metric) { + const candidate = asRecord(metric).rate; + return typeof candidate === 'number' && Number.isFinite(candidate) ? candidate : null; +} + +function metricMean(metric) { + const candidate = asRecord(metric).mean; + return typeof candidate === 'number' && Number.isFinite(candidate) ? candidate : null; +} + +function isSignificant(metric) { + return ( + metric.statStatus === 'ok' && + typeof metric.qValue === 'number' && + Number.isFinite(metric.qValue) && + metric.qValue <= ALPHA + ); +} + +function strictWorse(metricName, estimate) { + if (typeof estimate !== 'number' || !Number.isFinite(estimate)) return false; + if (HIGHER_IS_BETTER.has(metricName)) return estimate > 0; + if (LOWER_IS_BETTER.has(metricName)) return estimate < 0; + return false; +} + +function evaluateC1(comparisons) { + for (const entry of comparisons) { + const row = asRecord(entry); + if (row.comparison !== 'C1') continue; + if (row.skipped === true) continue; + if (row.lhs !== 'B1_CORE_BEST_EFFORT' || row.rhs !== 'B1_CORE_STRICT') continue; + + const category = resolveCategory(row); + if (category !== 'T1' && category !== 'T2') continue; + + const metrics = asRecord(row.metrics); + for (const metricName of C1_METRICS) { + const metric = asRecord(metrics[metricName]); + if (!isSignificant(metric)) continue; + if (strictWorse(metricName, metric.estimate)) { + return false; + } + } + } + + return true; +} + +function findBaseline(task, name) { + const baselines = Array.isArray(task.baselines) ? task.baselines : []; + return baselines.find((baseline) => asRecord(asRecord(baseline).baseline).name === name) || null; +} + +function evaluateB4Hard(byTask) { + const rows = Array.isArray(byTask) ? byTask : []; + for (const row of rows) { + const task = asRecord(row); + if (resolveCategory(task) !== 'T3') continue; + + const b4 = findBaseline(task, 'B4_STRICT_RISK_CLOSURE'); + if (!b4) continue; + if (asRecord(asRecord(b4).baseline).supported !== true) continue; + + const metrics = asRecord(asRecord(b4).metrics); + const riskClosureViolationRate = metricRate(metrics.riskClosureViolationRate); + const postApplyMean = metricMean(metrics.postApplyConflictCount); + + if (riskClosureViolationRate !== null && riskClosureViolationRate > 0) { + return false; + } + if (postApplyMean !== null && postApplyMean > 0) { + return false; + } + } + + return true; +} + +function evaluateSuggestionQuality(byTask) { + const rows = Array.isArray(byTask) ? byTask : []; + for (const row of rows) { + const task = asRecord(row); + if (resolveCategory(task) !== 'T3') continue; + + const b5 = findBaseline(task, 'B5_STRICT_CLOSURE_SUGGESTIONS'); + if (!b5) continue; + if (asRecord(asRecord(b5).baseline).supported !== true) continue; + + const metrics = asRecord(asRecord(b5).metrics); + const coverageRate = metricMean(metrics.suggestionsCoverageRate); + const actionabilityRate = metricMean(metrics.suggestionActionabilityRate); + + if (coverageRate === null || coverageRate < 0.8) { + return false; + } + if (actionabilityRate === null || actionabilityRate < 0.6) { + return false; + } + } + + return true; +} + +function evaluateGate(stats) { + const comparisons = Array.isArray(stats.comparisons) ? stats.comparisons : []; + const byTask = Array.isArray(stats.byTask) ? stats.byTask : []; + return evaluateC1(comparisons) && evaluateB4Hard(byTask) && evaluateSuggestionQuality(byTask); +} + +function runGate(statsFile) { + if (!fs.existsSync(statsFile)) { + return false; + } + + try { + const content = fs.readFileSync(statsFile, 'utf8'); + const stats = JSON.parse(content); + return evaluateGate(stats); + } catch (_error) { + return false; + } +} + +function main() { + const root = path.resolve(__dirname, '../../..'); + const statsFile = path.join(root, 'bench', 'out', 'eval2.stats.json'); + const passed = runGate(statsFile); + + if (passed) { + process.stdout.write('EVAL2_GATE_PASS\n'); + process.exit(0); + } + + process.stdout.write('EVAL2_GATE_FAIL\n'); + process.exit(1); +} + +if (require.main === module) { + main(); +} + +module.exports = { + strictWorse, + evaluateGate, + runGate, +}; diff --git a/server/src/bench/node/handoff-embedded-selftest.cjs b/server/src/bench/node/handoff-embedded-selftest.cjs new file mode 100644 index 0000000..68395db --- /dev/null +++ b/server/src/bench/node/handoff-embedded-selftest.cjs @@ -0,0 +1,180 @@ +const fs = require('fs'); +const path = require('path'); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (!fs.existsSync(candidate)) { + continue; + } + return require(candidate); + } + throw new Error('unavailable'); +} + +function loadModules() { + const root = path.resolve(__dirname, '../../..'); + const transferModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'transfer-package-v1.js'), + path.join(root, 'dist', 'src', 'services', 'transfer-package-v1.js'), + ]); + const lineageModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'lineage-binding-v1.js'), + path.join(root, 'dist', 'src', 'services', 'lineage-binding-v1.js'), + ]); + const handoffModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'handoff-record-v1.js'), + path.join(root, 'dist', 'src', 'services', 'handoff-record-v1.js'), + ]); + + if ( + !transferModule || + typeof transferModule.buildTransferPackageV1 !== 'function' || + !lineageModule || + typeof lineageModule.buildLineageBindingV1 !== 'function' || + !handoffModule || + typeof handoffModule.buildHandoffRecordV1 !== 'function' || + typeof handoffModule.verifyHandoffRecordV1 !== 'function' + ) { + throw new Error('unavailable'); + } + + return { transferModule, lineageModule, handoffModule }; +} + +function makeHex(char) { + return String(char).repeat(64); +} + +function buildTransferPackage(transferModule) { + return transferModule.buildTransferPackageV1({ + identity: { + packageId: 'pkg-1', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + trunk: { + intent: { + primary: null, + successCriteria: [], + nonGoals: [], + }, + stateDigest: { + facts: ['fact-a'], + decisions: [], + constraints: [], + risks: [], + assumptions: [], + openLoops: [], + }, + }, + continuation: { + nextActions: [], + validationChecklist: [], + }, + conflicts: [], + }); +} + +function buildLineageBinding(lineageModule, transferPackageV1) { + return lineageModule.buildLineageBindingV1({ + identity: { + packageId: transferPackageV1.identity.packageId, + revisionId: transferPackageV1.identity.revisionId, + revisionHash: transferPackageV1.identity.revisionHash, + parentRevisionId: transferPackageV1.identity.parentRevisionId, + }, + bindings: { + transfer: { + schema: 'transfer-package-1', + transferHash: transferPackageV1.transferHash, + }, + closure: null, + execution: null, + handoff: null, + }, + diagnostics: { + notes: [], + }, + createdAt: null, + }); +} + +function buildHandoffRecord(handoffModule, transferPackageV1, lineageBindingV1, createdAt) { + return handoffModule.buildHandoffRecordV1({ + transferPackageV1, + verification: { + transferHashRecomputed: transferPackageV1.transferHash, + matchesProvidedHash: true, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + lineageBindingV1, + createdAt, + }); +} + +function main() { + try { + const { transferModule, lineageModule, handoffModule } = loadModules(); + const transferPackageV1 = buildTransferPackage(transferModule); + const lineageBindingV1 = buildLineageBinding(lineageModule, transferPackageV1); + + const baseline = buildHandoffRecord(handoffModule, transferPackageV1, lineageBindingV1, null); + const verification = handoffModule.verifyHandoffRecordV1(baseline); + + const handoffA = buildHandoffRecord( + handoffModule, + transferPackageV1, + lineageBindingV1, + '2025-01-01T00:00:00.000Z' + ); + const handoffB = buildHandoffRecord( + handoffModule, + transferPackageV1, + lineageBindingV1, + '2026-01-01T00:00:00.000Z' + ); + + const mutated = { + ...baseline, + lineageBindingV1: { + ...baseline.lineageBindingV1, + bindings: { + ...baseline.lineageBindingV1.bindings, + transfer: { + schema: 'transfer-package-1', + transferHash: makeHex('b'), + }, + }, + }, + }; + const mismatch = handoffModule.verifyHandoffRecordV1(mutated); + + const ok = + verification.ok === true && + verification.matches === true && + handoffA.handoffHash === handoffB.handoffHash && + handoffA.createdAt === '2025-01-01T00:00:00.000Z' && + handoffB.createdAt === '2026-01-01T00:00:00.000Z' && + handoffA.lineageBindingV1.lineageHash === handoffB.lineageBindingV1.lineageHash && + mismatch.ok === true && + mismatch.matches === false; + + process.stdout.write(ok ? 'HANDOFF_EMBEDDED_SELFTEST_OK\n' : 'HANDOFF_EMBEDDED_SELFTEST_FAIL\n'); + process.exit(ok ? 0 : 1); + } catch (_error) { + process.stdout.write('HANDOFF_EMBEDDED_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/handoff-selftest.cjs b/server/src/bench/node/handoff-selftest.cjs new file mode 100644 index 0000000..933f774 --- /dev/null +++ b/server/src/bench/node/handoff-selftest.cjs @@ -0,0 +1,129 @@ +const fs = require('fs'); +const path = require('path'); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (!fs.existsSync(candidate)) { + continue; + } + return require(candidate); + } + throw new Error('unavailable'); +} + +function loadModules() { + const root = path.resolve(__dirname, '../../..'); + const transferModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'transfer-package-v1.js'), + path.join(root, 'dist', 'src', 'services', 'transfer-package-v1.js'), + ]); + const handoffModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'handoff-record-v1.js'), + path.join(root, 'dist', 'src', 'services', 'handoff-record-v1.js'), + ]); + + if ( + !transferModule || + typeof transferModule.buildTransferPackageV1 !== 'function' || + typeof transferModule.verifyTransferPackageV1 !== 'function' || + !handoffModule || + typeof handoffModule.buildHandoffRecordV1 !== 'function' + ) { + throw new Error('unavailable'); + } + + return { transferModule, handoffModule }; +} + +function buildTransferPackage(transferModule) { + return transferModule.buildTransferPackageV1({ + identity: { + packageId: 'pkg-1', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + trunk: { + intent: { + primary: 'handoff', + successCriteria: ['verify'], + nonGoals: ['drift'], + }, + stateDigest: { + facts: ['fact-a'], + openLoops: ['loop-a'], + }, + }, + continuation: { + nextActions: [ + { + code: 'NEXT', + message: 'Continue', + expectedOutput: null, + domains: ['facts', 'decisions'], + }, + ], + validationChecklist: [ + { + code: 'CHECK', + message: 'Check', + severity: 'must', + }, + ], + }, + conflicts: [], + }); +} + +function main() { + try { + const { transferModule, handoffModule } = loadModules(); + const transferPackageV1 = buildTransferPackage(transferModule); + const verification = transferModule.verifyTransferPackageV1(transferPackageV1); + + const handoffA = handoffModule.buildHandoffRecordV1({ + transferPackageV1, + verification: { + transferHashRecomputed: verification.recomputedHash, + matchesProvidedHash: true, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + createdAt: '2025-01-01T00:00:00.000Z', + }); + const handoffB = handoffModule.buildHandoffRecordV1({ + transferPackageV1, + verification: { + transferHashRecomputed: verification.recomputedHash, + matchesProvidedHash: true, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + createdAt: '2026-01-01T00:00:00.000Z', + }); + + const ok = + handoffA.handoffHash === handoffB.handoffHash && + handoffA.createdAt === '2025-01-01T00:00:00.000Z' && + handoffB.createdAt === '2026-01-01T00:00:00.000Z'; + + process.stdout.write(ok ? 'HANDOFF_SELFTEST_OK\n' : 'HANDOFF_SELFTEST_FAIL\n'); + process.exit(ok ? 0 : 1); + } catch (_error) { + process.stdout.write('HANDOFF_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/lineage-flow-selftest.cjs b/server/src/bench/node/lineage-flow-selftest.cjs new file mode 100644 index 0000000..e5d89f4 --- /dev/null +++ b/server/src/bench/node/lineage-flow-selftest.cjs @@ -0,0 +1,142 @@ +const fs = require('fs'); +const path = require('path'); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (!fs.existsSync(candidate)) { + continue; + } + return require(candidate); + } + throw new Error('unavailable'); +} + +function loadModules() { + const root = path.resolve(__dirname, '../../..'); + const originalLog = console.log; + const originalInfo = console.info; + const originalWarn = console.warn; + console.log = () => {}; + console.info = () => {}; + console.warn = () => {}; + + try { + const transferModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'transfer-package-v1.js'), + path.join(root, 'dist', 'src', 'services', 'transfer-package-v1.js'), + ]); + const serviceModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'transfer-package.service.js'), + path.join(root, 'dist', 'src', 'services', 'transfer-package.service.js'), + ]); + const lineageModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'lineage-binding-v1.js'), + path.join(root, 'dist', 'src', 'services', 'lineage-binding-v1.js'), + ]); + + if ( + !transferModule || + typeof transferModule.buildTransferPackageV1 !== 'function' || + !serviceModule || + typeof serviceModule.buildLineageBindingForTransferFlowV1 !== 'function' || + !lineageModule || + typeof lineageModule.verifyLineageBindingV1 !== 'function' + ) { + throw new Error('unavailable'); + } + + return { transferModule, serviceModule, lineageModule }; + } finally { + console.log = originalLog; + console.info = originalInfo; + console.warn = originalWarn; + } +} + +function makeHex(char) { + return String(char).repeat(64); +} + +function buildTransfer(transferModule) { + return transferModule.buildTransferPackageV1({ + identity: { + packageId: 'pkg-1', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + trunk: { + intent: { + primary: null, + successCriteria: [], + nonGoals: [], + }, + stateDigest: { + facts: ['fact-a'], + }, + }, + continuation: { + nextActions: [], + validationChecklist: [], + }, + conflicts: [], + }); +} + +function main() { + try { + const { transferModule, serviceModule, lineageModule } = loadModules(); + const transferPackageV1 = buildTransfer(transferModule); + + const first = serviceModule.buildLineageBindingForTransferFlowV1({ + transferPackageV1, + include: { + closure: true, + execution: false, + handoff: false, + }, + closureContractV1: { + schema: 'closure-contract-1', + proposedHash: makeHex('a'), + acceptedHash: makeHex('b'), + }, + createdAt: '2025-01-01T00:00:00.000Z', + }); + const second = serviceModule.buildLineageBindingForTransferFlowV1({ + transferPackageV1, + include: { + closure: true, + execution: false, + handoff: false, + }, + closureContractV1: { + schema: 'closure-contract-1', + proposedHash: makeHex('a'), + acceptedHash: makeHex('b'), + }, + createdAt: '2026-01-01T00:00:00.000Z', + }); + + const firstVerification = lineageModule.verifyLineageBindingV1(first); + const secondVerification = lineageModule.verifyLineageBindingV1(second); + + const ok = + firstVerification.ok === true && + firstVerification.matches === true && + secondVerification.ok === true && + secondVerification.matches === true && + first.lineageHash === second.lineageHash && + first.createdAt === '2025-01-01T00:00:00.000Z' && + second.createdAt === '2026-01-01T00:00:00.000Z' && + first.bindings.transfer.transferHash === transferPackageV1.transferHash && + second.bindings.transfer.transferHash === transferPackageV1.transferHash; + + process.stdout.write(ok ? 'LINEAGE_FLOW_SELFTEST_OK\n' : 'LINEAGE_FLOW_SELFTEST_FAIL\n'); + process.exit(ok ? 0 : 1); + } catch (_error) { + process.stdout.write('LINEAGE_FLOW_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/lineage-selftest.cjs b/server/src/bench/node/lineage-selftest.cjs new file mode 100644 index 0000000..76a9f58 --- /dev/null +++ b/server/src/bench/node/lineage-selftest.cjs @@ -0,0 +1,135 @@ +const fs = require('fs'); +const path = require('path'); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (!fs.existsSync(candidate)) { + continue; + } + return require(candidate); + } + throw new Error('unavailable'); +} + +function loadModule() { + const root = path.resolve(__dirname, '../../..'); + const lineageModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'lineage-binding-v1.js'), + path.join(root, 'dist', 'src', 'services', 'lineage-binding-v1.js'), + ]); + + if ( + !lineageModule || + typeof lineageModule.buildLineageBindingV1 !== 'function' || + typeof lineageModule.verifyLineageBindingV1 !== 'function' || + typeof lineageModule.verifyLineageBindingV1OrThrow !== 'function' + ) { + throw new Error('unavailable'); + } + + return lineageModule; +} + +function makeHex(char) { + return String(char).repeat(64); +} + +function buildSampleInput(createdAt) { + return { + identity: { + packageId: 'pkg-1', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + transfer: { + schema: 'transfer-package-1', + transferHash: makeHex('a'), + }, + closure: null, + execution: null, + handoff: null, + }, + diagnostics: { + notes: [], + }, + createdAt, + }; +} + +function main() { + try { + const lineageModule = loadModule(); + + const first = lineageModule.buildLineageBindingV1(buildSampleInput(null)); + const verification = lineageModule.verifyLineageBindingV1(first); + + const second = lineageModule.buildLineageBindingV1(buildSampleInput('2025-01-01T00:00:00.000Z')); + const third = lineageModule.buildLineageBindingV1(buildSampleInput('2026-01-01T00:00:00.000Z')); + + const mutated = { + ...first, + bindings: { + ...first.bindings, + transfer: { + schema: 'transfer-package-1', + transferHash: makeHex('b'), + }, + }, + }; + const mismatch = lineageModule.verifyLineageBindingV1(mutated); + + let throwMatches = false; + try { + lineageModule.verifyLineageBindingV1OrThrow(mutated); + } catch (error) { + throwMatches = + !!error && + typeof error === 'object' && + error.code === 'E_LINEAGE_HASH_MISMATCH' && + error.message === 'Lineage binding hash mismatch'; + } + + let jsonSafeMatches = false; + try { + lineageModule.buildLineageBindingV1({ + identity: { + packageId: 'pkg-1', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + diagnostics: { + notes: [BigInt(1)], + }, + }); + } catch (error) { + jsonSafeMatches = + !!error && + typeof error === 'object' && + error.code === 'E_LINEAGE_NON_JSON_SAFE' && + error.message === 'Lineage binding contains non JSON-safe value'; + } + + const ok = + verification.ok === true && + verification.matches === true && + first.createdAt === null && + second.createdAt === '2025-01-01T00:00:00.000Z' && + third.createdAt === '2026-01-01T00:00:00.000Z' && + second.lineageHash === third.lineageHash && + mismatch.ok === true && + mismatch.matches === false && + throwMatches === true && + jsonSafeMatches === true; + + process.stdout.write(ok ? 'LINEAGE_SELFTEST_OK\n' : 'LINEAGE_SELFTEST_FAIL\n'); + process.exit(ok ? 0 : 1); + } catch (_error) { + process.stdout.write('LINEAGE_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/risk-closure-selftest.cjs b/server/src/bench/node/risk-closure-selftest.cjs new file mode 100644 index 0000000..9497805 --- /dev/null +++ b/server/src/bench/node/risk-closure-selftest.cjs @@ -0,0 +1,144 @@ +const fs = require('fs'); +const path = require('path'); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (!fs.existsSync(candidate)) { + continue; + } + return require(candidate); + } + throw new Error('unavailable'); +} + +function loadModules() { + const rootPath = path.resolve(__dirname, '../../..'); + const plannerModule = requireFromCandidates([ + path.join(rootPath, 'dist', 'services', 'delta-closure-planner.js'), + path.join(rootPath, 'dist', 'src', 'services', 'delta-closure-planner.js'), + ]); + const policyModule = requireFromCandidates([ + path.join(rootPath, 'dist', 'services', 'delta-risk-policy.js'), + path.join(rootPath, 'dist', 'src', 'services', 'delta-risk-policy.js'), + ]); + const contractModule = requireFromCandidates([ + path.join(rootPath, 'dist', 'services', 'closure-contract-v1.js'), + path.join(rootPath, 'dist', 'src', 'services', 'closure-contract-v1.js'), + ]); + + if ( + !plannerModule || + typeof plannerModule.planDeltaClosureV1 !== 'function' || + !policyModule || + !policyModule.DEFAULT_RISK_POLICY_V1 || + !contractModule || + typeof contractModule.buildClosureContractV1 !== 'function' || + typeof contractModule.stableStringify !== 'function' + ) { + throw new Error('unavailable'); + } + + return { + planDeltaClosureV1: plannerModule.planDeltaClosureV1, + policy: policyModule.DEFAULT_RISK_POLICY_V1, + buildClosureContractV1: contractModule.buildClosureContractV1, + stableStringify: contractModule.stableStringify, + }; +} + +function makeInput() { + return { + baseState: { + facts: [{ key: 'fact.alpha', statement: 'alpha' }], + decisions: [], + constraints: [], + risks: [], + assumptions: [], + }, + proposedDelta: { + schemaVersion: 'sdiff-0.1', + base: { revisionHash: 'risk-selftest-base' }, + target: { revisionHash: 'risk-selftest-target' }, + facts: { added: [], removed: [], modified: [] }, + decisions: { + added: [], + removed: [], + modified: [ + { + key: 'decision.missing', + before: null, + after: { key: 'decision.missing', answer: 'accept' }, + changes: [{ path: 'answer', op: 'set', after: 'accept' }], + }, + ], + }, + constraints: { added: [], removed: [], modified: [] }, + risks: { added: [], removed: [], modified: [] }, + assumptions: { added: [], removed: [], modified: [] }, + meta: { + determinism: { + canonicalVersion: 'tpkg-0.2-canon-v1', + keyStrategy: 'sig-hash-v1', + tieBreakers: ['risk-closure-selftest'], + }, + collisions: { hard: [], soft: [] }, + counts: { + 'facts.added': 0, + 'facts.removed': 0, + 'facts.modified': 0, + 'decisions.added': 0, + 'decisions.removed': 0, + 'decisions.modified': 1, + 'constraints.added': 0, + 'constraints.removed': 0, + 'constraints.modified': 0, + 'risks.added': 0, + 'risks.removed': 0, + 'risks.modified': 0, + 'assumptions.added': 0, + 'assumptions.removed': 0, + 'assumptions.modified': 0, + 'collisions.soft': 0, + 'collisions.hard': 0, + }, + }, + }, + }; +} + +function buildContract(modules) { + const input = makeInput(); + const plan = modules.planDeltaClosureV1({ + baseState: input.baseState, + proposedDelta: input.proposedDelta, + mode: 'strict', + policy: modules.policy, + }); + return modules.buildClosureContractV1({ + proposedDelta: input.proposedDelta, + acceptedDelta: plan.acceptedDelta, + rejected: plan.rejected, + suggestions: plan.suggestions, + diagnostics: plan.diagnostics, + }); +} + +function main() { + try { + const modules = loadModules(); + const contractA = buildContract(modules); + const contractB = buildContract(modules); + const hasL3 = contractA.rejected.some((entry) => entry && entry.riskLevel === 'L3'); + const hasPromotion = contractA.suggestions.some((entry) => entry && entry.actionType === 'PROMOTE_TO_L3_REVIEW'); + const deterministic = modules.stableStringify(contractA) === modules.stableStringify(contractB); + const ok = deterministic && (!hasL3 || hasPromotion); + + process.stdout.write(ok ? 'RISK_CLOSURE_SELFTEST_OK\n' : 'RISK_CLOSURE_SELFTEST_FAIL\n'); + process.exit(ok ? 0 : 1); + } catch (_error) { + process.stdout.write('RISK_CLOSURE_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/run-with-build.cjs b/server/src/bench/node/run-with-build.cjs new file mode 100644 index 0000000..3e49adb --- /dev/null +++ b/server/src/bench/node/run-with-build.cjs @@ -0,0 +1,49 @@ +const path = require('path'); +const { spawnSync } = require('child_process'); + +function runBuild(serverRoot) { + if (process.platform === 'win32') { + const comspec = process.env.ComSpec || 'cmd.exe'; + return spawnSync(comspec, ['/d', '/s', '/c', 'npm run build'], { + cwd: process.cwd(), + stdio: 'inherit', + env: process.env, + windowsHide: true, + }); + } + + return spawnSync('npm', ['run', 'build'], { + cwd: process.cwd(), + stdio: 'inherit', + env: process.env, + windowsHide: true, + }); +} + +function runRunner(serverRoot) { + return spawnSync(process.execPath, ['src/bench/node/runner.cjs'], { + cwd: serverRoot, + stdio: 'inherit', + }); +} + +function main() { + const serverRoot = path.resolve(__dirname, '../../..'); + const buildResult = runBuild(serverRoot); + const buildSucceeded = + buildResult.error == null && typeof buildResult.status === 'number' && buildResult.status === 0; + + if (!buildSucceeded) { + process.stdout.write('BENCH_BUILD_FAILED\n'); + process.exit(1); + } + + const runnerResult = runRunner(serverRoot); + if (runnerResult.status !== 0) { + process.exit(typeof runnerResult.status === 'number' ? runnerResult.status : 1); + } +} + +if (require.main === module) { + main(); +} diff --git a/server/src/bench/node/runner.cjs b/server/src/bench/node/runner.cjs new file mode 100644 index 0000000..09e10ef --- /dev/null +++ b/server/src/bench/node/runner.cjs @@ -0,0 +1,956 @@ +const fs = require('fs'); +const path = require('path'); + +const { diffState, applyDelta, detectConflicts, stableHash } = require('./algebra-bridge.cjs'); +const { stripBomFromText } = require('./validate-fixtures.cjs'); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (!fs.existsSync(candidate)) continue; + return require(candidate); + } + throw new Error('unsupported baseline'); +} + +function loadClosureTools() { + const root = path.resolve(__dirname, '../../..'); + + try { + const planner = requireFromCandidates([ + path.join(root, 'dist', 'services', 'delta-closure-planner.js'), + path.join(root, 'dist', 'src', 'services', 'delta-closure-planner.js'), + ]); + const policyModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'delta-risk-policy.js'), + path.join(root, 'dist', 'src', 'services', 'delta-risk-policy.js'), + ]); + return { + planDeltaClosure: planner && typeof planner.planDeltaClosure === 'function' ? planner.planDeltaClosure : null, + planDeltaClosureV1: planner && typeof planner.planDeltaClosureV1 === 'function' ? planner.planDeltaClosureV1 : null, + defaultRiskPolicy: policyModule && policyModule.DEFAULT_RISK_POLICY_V1 ? policyModule.DEFAULT_RISK_POLICY_V1 : null, + }; + } catch (_error) { + return { + planDeltaClosure: null, + planDeltaClosureV1: null, + defaultRiskPolicy: null, + }; + } +} + +const closureTools = loadClosureTools(); + +const DOMAIN_ORDER = ['facts', 'decisions', 'constraints', 'risks', 'assumptions']; +const BASELINE_ORDER = [ + 'B1_CORE_BEST_EFFORT', + 'B1_CORE_STRICT', + 'B1_PIPELINE', + 'B2_LLM_DELTA_BEST_EFFORT', + 'B2_LLM_DELTA_STRICT', + 'B3_STRICT_CLOSURE', + 'B4_STRICT_RISK_CLOSURE', + 'B5_STRICT_CLOSURE_SUGGESTIONS', +]; +const MODES = { + B1_CORE_BEST_EFFORT: 'best_effort', + B1_CORE_STRICT: 'strict', + B2_LLM_DELTA_BEST_EFFORT: 'best_effort', + B2_LLM_DELTA_STRICT: 'strict', + B3_STRICT_CLOSURE: 'strict', + B4_STRICT_RISK_CLOSURE: 'strict', + B5_STRICT_CLOSURE_SUGGESTIONS: 'strict', +}; + +function compareStrings(a, b) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function round6(value) { + if (!Number.isFinite(value)) return 0; + return Math.round(value * 1000000) / 1000000; +} + +function asArray(value) { + return Array.isArray(value) ? value : []; +} + +function asRecord(value) { + return value && typeof value === 'object' && !Array.isArray(value) ? value : {}; +} + +function toDomainState(value) { + const record = asRecord(value); + return { + facts: asArray(record.facts), + decisions: asArray(record.decisions), + constraints: asArray(record.constraints), + risks: asArray(record.risks), + assumptions: asArray(record.assumptions), + }; +} + +function zeroCounts() { + return { + facts: { added: 0, removed: 0, modified: 0 }, + decisions: { added: 0, removed: 0, modified: 0 }, + constraints: { added: 0, removed: 0, modified: 0 }, + risks: { added: 0, removed: 0, modified: 0 }, + assumptions: { added: 0, removed: 0, modified: 0 }, + }; +} + +function zeroRiskLevelCounts() { + return { L0: 0, L1: 0, L2: 0, L3: 0 }; +} + + +function rejectedTargetKey(entry) { + const record = asRecord(entry); + const domain = typeof record.domain === 'string' ? record.domain : 'NULL'; + const key = typeof record.key === 'string' ? record.key : 'NULL'; + const pathValue = typeof record.path === 'string' ? record.path : record.path === null ? 'NULL' : 'NULL'; + const op = typeof record.op === 'string' ? record.op : 'NULL'; + return `${domain}|${key}|${pathValue}|${op}`; +} + +function suggestionTargetKey(entry) { + const record = asRecord(entry); + const payload = asRecord(record.payload); + const appliesTo = asRecord(payload.appliesTo); + const domain = typeof appliesTo.domain === 'string' ? appliesTo.domain : 'NULL'; + const key = typeof appliesTo.key === 'string' ? appliesTo.key : 'NULL'; + const pathValue = typeof appliesTo.path === 'string' ? appliesTo.path : appliesTo.path === null ? 'NULL' : 'NULL'; + const op = typeof appliesTo.op === 'string' ? appliesTo.op : 'NULL'; + return `${domain}|${key}|${pathValue}|${op}`; +} + +function normalizeFieldChange(rawChange) { + const change = asRecord(rawChange); + const op = change.op === 'set' || change.op === 'unset' || change.op === 'append' || change.op === 'remove' ? change.op : 'set'; + return { + path: typeof change.path === 'string' && change.path.length > 0 ? change.path : 'value', + op, + before: change.before, + after: change.after, + value: change.value, + }; +} + +function normalizeDomainDelta(rawDomain) { + const domain = asRecord(rawDomain); + return { + added: asArray(domain.added).map((rawItem) => { + const item = asRecord(rawItem); + return { + key: typeof item.key === 'string' ? item.key : '', + unit: Object.prototype.hasOwnProperty.call(item, 'unit') ? item.unit : {}, + }; + }), + removed: asArray(domain.removed).map((rawItem) => { + const item = asRecord(rawItem); + return { + key: typeof item.key === 'string' ? item.key : '', + unit: Object.prototype.hasOwnProperty.call(item, 'unit') ? item.unit : {}, + }; + }), + modified: asArray(domain.modified).map((rawItem) => { + const item = asRecord(rawItem); + return { + key: typeof item.key === 'string' ? item.key : '', + before: Object.prototype.hasOwnProperty.call(item, 'before') ? item.before : {}, + after: Object.prototype.hasOwnProperty.call(item, 'after') ? item.after : {}, + changes: asArray(item.changes).map(normalizeFieldChange), + }; + }), + }; +} + +function normalizeSemanticDelta(rawDelta) { + const record = asRecord(rawDelta); + const domains = { + facts: normalizeDomainDelta(record.facts), + decisions: normalizeDomainDelta(record.decisions), + constraints: normalizeDomainDelta(record.constraints), + risks: normalizeDomainDelta(record.risks), + assumptions: normalizeDomainDelta(record.assumptions), + }; + + const counts = {}; + for (const domain of DOMAIN_ORDER) { + const entry = domains[domain]; + counts[domain] = entry.added.length + entry.removed.length + entry.modified.length; + } + + const normalized = { + schemaVersion: 'sdiff-0.1', + base: { + revisionHash: typeof asRecord(record.base).revisionHash === 'string' ? asRecord(record.base).revisionHash : 'bench-base', + }, + target: { + revisionHash: typeof asRecord(record.target).revisionHash === 'string' ? asRecord(record.target).revisionHash : 'bench-target', + }, + facts: domains.facts, + decisions: domains.decisions, + constraints: domains.constraints, + risks: domains.risks, + assumptions: domains.assumptions, + meta: { + determinism: { + canonicalVersion: 'tpkg-0.2-canon-v1', + keyStrategy: 'sig-hash-v1', + tieBreakers: ['bench'], + }, + collisions: { + hard: [], + soft: [], + }, + assumptionsDerived: false, + counts, + }, + }; + + stableHash(normalized); + return normalized; +} + +function parseTaskFixture(raw, fileName) { + const record = asRecord(raw); + const taskId = typeof record.taskId === 'string' ? record.taskId : ''; + const category = record.category === 'T1' || record.category === 'T2' || record.category === 'T3' ? record.category : null; + const description = typeof record.description === 'string' ? record.description : ''; + + if (!taskId || !category || !description) { + throw new Error(`Invalid fixture metadata: ${fileName}`); + } + + const runConfig = asRecord(record.runConfig); + const repetitions = Number.isInteger(runConfig.repetitions) && runConfig.repetitions > 0 ? runConfig.repetitions : 15; + + const baseState = toDomainState(record.baseState); + const targetState = record.targetState === undefined ? undefined : toDomainState(record.targetState); + + stableHash(baseState); + if (targetState) stableHash(targetState); + + const coreStubDelta = record.coreStubDelta === undefined ? null : normalizeSemanticDelta(record.coreStubDelta); + const llmStubDelta = record.llmStubDelta === undefined ? null : normalizeSemanticDelta(record.llmStubDelta); + const proposedDelta = record.proposedDelta === undefined ? null : normalizeSemanticDelta(record.proposedDelta); + + const assertionsRecord = record.targetAssertions === undefined ? null : asRecord(record.targetAssertions); + const normalizeDomains = (value) => + asArray(value) + .filter((item) => DOMAIN_ORDER.includes(item)) + .sort((a, b) => DOMAIN_ORDER.indexOf(a) - DOMAIN_ORDER.indexOf(b)); + const normalizeStrings = (value) => asArray(value).filter((item) => typeof item === 'string').sort(compareStrings); + + const targetAssertions = assertionsRecord + ? { + mustEqualTargetHash: + typeof assertionsRecord.mustEqualTargetHash === 'boolean' ? assertionsRecord.mustEqualTargetHash : undefined, + mustHaveNoConflicts: + typeof assertionsRecord.mustHaveNoConflicts === 'boolean' ? assertionsRecord.mustHaveNoConflicts : undefined, + maxDistanceCountsSum: + typeof assertionsRecord.maxDistanceCountsSum === 'number' && Number.isFinite(assertionsRecord.maxDistanceCountsSum) + ? assertionsRecord.maxDistanceCountsSum + : undefined, + domainMustNotChange: normalizeDomains(assertionsRecord.domainMustNotChange), + requiredDomainsModified: normalizeDomains(assertionsRecord.requiredDomainsModified), + requiredDecisionKeys: normalizeStrings(assertionsRecord.requiredDecisionKeys), + requiredAssumptionKeys: normalizeStrings(assertionsRecord.requiredAssumptionKeys), + } + : undefined; + + return { + taskId, + category, + description, + baseState, + targetState, + coreStubDelta, + llmStubDelta, + proposedDelta, + targetAssertions, + runConfig: { repetitions }, + }; +} + +function summarizeDelta(delta) { + const counts = zeroCounts(); + for (const domain of DOMAIN_ORDER) { + const domainDelta = delta[domain] || { added: [], removed: [], modified: [] }; + counts[domain] = { + added: asArray(domainDelta.added).length, + removed: asArray(domainDelta.removed).length, + modified: asArray(domainDelta.modified).length, + }; + } + + return { + counts, + hasCollisions: + asArray(asRecord(asRecord(delta.meta).collisions).soft).length > 0 || + asArray(asRecord(asRecord(delta.meta).collisions).hard).length > 0, + assumptionsDerived: asRecord(delta.meta).assumptionsDerived === true, + modifiedDomains: DOMAIN_ORDER.filter((domain) => { + const c = counts[domain]; + return c.added + c.removed + c.modified > 0; + }), + }; +} + +function calculateDistanceCounts(delta) { + const counts = zeroCounts(); + for (const domain of DOMAIN_ORDER) { + const domainDelta = delta[domain] || { added: [], removed: [], modified: [] }; + counts[domain] = { + added: asArray(domainDelta.added).length, + removed: asArray(domainDelta.removed).length, + modified: asArray(domainDelta.modified).length, + }; + } + return counts; +} + +function sumDistanceCounts(counts) { + let total = 0; + for (const domain of DOMAIN_ORDER) { + const c = counts[domain]; + total += c.added + c.removed + c.modified; + } + return total; +} + +function hasMatchingKey(units, keys, expected) { + for (const unit of units) { + const record = asRecord(unit); + for (const key of keys) { + if (record[key] === expected) return true; + } + } + return false; +} + +function evaluateAssertions(params) { + const { + targetAssertions, + equalsTargetHash, + conflictCount, + postApplyConflictCount, + distanceCounts, + distanceCountsSum, + modifiedDomains, + nextState, + } = params; + + if (!targetAssertions) { + return { passed: true, failed: [] }; + } + + const failures = []; + const totalConflictCount = conflictCount + postApplyConflictCount; + + if (targetAssertions.mustEqualTargetHash === true && !equalsTargetHash) { + failures.push('ASSERT_EQUALS_TARGET_HASH'); + } + if (targetAssertions.mustEqualTargetHash === false && equalsTargetHash) { + failures.push('ASSERT_NOT_EQUALS_TARGET_HASH'); + } + + if (typeof targetAssertions.mustHaveNoConflicts === 'boolean') { + if (targetAssertions.mustHaveNoConflicts && totalConflictCount > 0) { + failures.push('ASSERT_CONFLICTS_PRESENT'); + } + if (!targetAssertions.mustHaveNoConflicts && totalConflictCount === 0) { + failures.push('ASSERT_CONFLICTS_ABSENT'); + } + } + + if ( + typeof targetAssertions.maxDistanceCountsSum === 'number' && + distanceCountsSum > targetAssertions.maxDistanceCountsSum + ) { + failures.push('ASSERT_DISTANCE_EXCEEDED'); + } + + for (const domain of targetAssertions.domainMustNotChange || []) { + const c = distanceCounts[domain]; + if (c.added + c.removed + c.modified > 0) { + failures.push('ASSERT_DOMAIN_MUST_NOT_CHANGE'); + break; + } + } + + for (const domain of targetAssertions.requiredDomainsModified || []) { + if (!modifiedDomains.includes(domain)) { + failures.push('ASSERT_REQUIRED_DOMAIN_NOT_MODIFIED'); + break; + } + } + + if (nextState) { + for (const expectedKey of targetAssertions.requiredDecisionKeys || []) { + if (!hasMatchingKey(nextState.decisions, ['id', 'key', 'decisionId', 'question', 'title'], expectedKey)) { + failures.push('ASSERT_REQUIRED_DECISION_KEYS_MISSING'); + break; + } + } + + for (const expectedKey of targetAssertions.requiredAssumptionKeys || []) { + if (!hasMatchingKey(nextState.assumptions, ['id', 'key', 'assumptionId', 'statement', 'topic'], expectedKey)) { + failures.push('ASSERT_REQUIRED_ASSUMPTION_KEYS_MISSING'); + break; + } + } + } + + const failed = [...new Set(failures)].sort(compareStrings); + return { + passed: failed.length === 0, + failed, + }; +} + +function normalizeSummary(summary) { + if (!summary) return null; + const counts = zeroCounts(); + for (const domain of DOMAIN_ORDER) { + const c = asRecord(asRecord(summary.counts)[domain]); + counts[domain] = { + added: typeof c.added === 'number' ? c.added : 0, + removed: typeof c.removed === 'number' ? c.removed : 0, + modified: typeof c.modified === 'number' ? c.modified : 0, + }; + } + return { + counts, + hasCollisions: summary.hasCollisions === true, + assumptionsDerived: summary.assumptionsDerived === true, + modifiedDomains: DOMAIN_ORDER.filter((domain) => asArray(summary.modifiedDomains).includes(domain)), + }; +} + +function normalizeDistanceCounts(input) { + const counts = zeroCounts(); + for (const domain of DOMAIN_ORDER) { + const c = asRecord(asRecord(input)[domain]); + counts[domain] = { + added: typeof c.added === 'number' ? c.added : 0, + removed: typeof c.removed === 'number' ? c.removed : 0, + modified: typeof c.modified === 'number' ? c.modified : 0, + }; + } + return counts; +} + +function normalizeRiskCounts(input) { + const counts = zeroRiskLevelCounts(); + const record = asRecord(input); + counts.L0 = typeof record.L0 === 'number' ? record.L0 : 0; + counts.L1 = typeof record.L1 === 'number' ? record.L1 : 0; + counts.L2 = typeof record.L2 === 'number' ? record.L2 : 0; + counts.L3 = typeof record.L3 === 'number' ? record.L3 : 0; + return counts; +} + +function stableStringifyRecord(record) { + const normalized = { + experiment: { id: record.experiment.id, ts: null }, + task: { taskId: record.task.taskId, category: record.task.category, rep: record.task.rep }, + baseline: { + name: record.baseline.name, + mode: record.baseline.mode, + supported: record.baseline.supported, + reason: record.baseline.reason, + }, + identity: { + stateHashBefore: record.identity.stateHashBefore, + stateHashAfter: record.identity.stateHashAfter, + targetHash: record.identity.targetHash, + }, + delta: { + source: record.delta.source, + summary: normalizeSummary(record.delta.summary), + }, + transition: { + conflictCount: record.transition.conflictCount, + postApplyConflictCount: record.transition.postApplyConflictCount, + rollbackIndicator: record.transition.rollbackIndicator, + deltaRejectedIndicator: record.transition.deltaRejectedIndicator, + deltaDomainCount: record.transition.deltaDomainCount, + appliedDomainCount: record.transition.appliedDomainCount, + domainRollbackRate: record.transition.domainRollbackRate, + closureViolationFlag: record.transition.closureViolationFlag, + rejectedCount: record.transition.rejectedCount, + maxClosureSizeRatio: record.transition.maxClosureSizeRatio, + blockedByRate: record.transition.blockedByRate, + }, + closure: { + candidateCount: record.closure.candidateCount, + rejectedCount: record.closure.rejectedCount, + blockedByRate: record.closure.blockedByRate, + maxClosureSizeRatio: record.closure.maxClosureSizeRatio, + closureViolationFlag: record.closure.closureViolationFlag, + riskLevelCounts: normalizeRiskCounts(record.closure.riskLevelCounts), + riskLevelL3Rate: record.closure.riskLevelL3Rate, + }, + suggestions: { + count: record.suggestions.count, + coveredRejectedCount: record.suggestions.coveredRejectedCount, + blockedByCoveredCount: record.suggestions.blockedByCoveredCount, + totalBlockedByEdges: record.suggestions.totalBlockedByEdges, + coverageRate: record.suggestions.coverageRate, + blockedByResolutionRate: record.suggestions.blockedByResolutionRate, + actionabilityRate: record.suggestions.actionabilityRate, + l3EscalationRate: record.suggestions.l3EscalationRate, + }, + drift: { + equalsTargetHash: record.drift.equalsTargetHash, + distanceCounts: normalizeDistanceCounts(record.drift.distanceCounts), + distanceCountsSum: record.drift.distanceCountsSum, + }, + assertions: { + passed: record.assertions.passed, + failed: asArray(record.assertions.failed).sort(compareStrings), + }, + }; + + return JSON.stringify(normalized); +} + +function loadFixtures(tasksDir) { + const files = fs + .readdirSync(tasksDir) + .filter((file) => file.endsWith('.json')) + .sort(compareStrings); + + return files + .map((file) => { + const text = stripBomFromText(fs.readFileSync(path.join(tasksDir, file), 'utf8')); + const raw = JSON.parse(text); + return parseTaskFixture(raw, file); + }) + .sort((a, b) => compareStrings(a.taskId, b.taskId)); +} + +function baselineDefinitions() { + return [ + { name: 'B1_CORE_BEST_EFFORT', mode: MODES.B1_CORE_BEST_EFFORT, supported: true, reason: null, family: 'core' }, + { name: 'B1_CORE_STRICT', mode: MODES.B1_CORE_STRICT, supported: true, reason: null, family: 'core' }, + { name: 'B1_PIPELINE', mode: null, supported: false, reason: 'imports services/DB', family: 'pipeline' }, + { + name: 'B2_LLM_DELTA_BEST_EFFORT', + mode: MODES.B2_LLM_DELTA_BEST_EFFORT, + supported: true, + reason: null, + family: 'llm_stub', + }, + { + name: 'B2_LLM_DELTA_STRICT', + mode: MODES.B2_LLM_DELTA_STRICT, + supported: true, + reason: null, + family: 'llm_stub', + }, + { + name: 'B3_STRICT_CLOSURE', + mode: MODES.B3_STRICT_CLOSURE, + supported: true, + reason: null, + family: 'strict_closure', + }, + { + name: 'B4_STRICT_RISK_CLOSURE', + mode: MODES.B4_STRICT_RISK_CLOSURE, + supported: true, + reason: null, + family: 'risk_closure', + }, + { + name: 'B5_STRICT_CLOSURE_SUGGESTIONS', + mode: MODES.B5_STRICT_CLOSURE_SUGGESTIONS, + supported: true, + reason: null, + family: 'strict_closure_suggestions', + }, + ]; +} + +function unsupportedRecord(task, baseline, rep, reason) { + const stateHashBefore = stableHash(task.baseState); + const targetHash = task.targetState ? stableHash(task.targetState) : null; + const baselineOut = { + name: baseline.name, + mode: baseline.mode, + supported: false, + reason, + }; + + return { + experiment: { id: 'EVAL-1', ts: null }, + task: { taskId: task.taskId, category: task.category, rep }, + baseline: baselineOut, + identity: { stateHashBefore, stateHashAfter: null, targetHash }, + delta: { source: null, summary: null }, + transition: { + conflictCount: 0, + postApplyConflictCount: 0, + rollbackIndicator: 0, + deltaRejectedIndicator: baseline.family === 'llm_stub' ? 0 : null, + deltaDomainCount: 0, + appliedDomainCount: 0, + domainRollbackRate: 0, + closureViolationFlag: baseline.family === 'strict_closure' || baseline.family === 'risk_closure' ? 0 : null, + rejectedCount: baseline.family === 'strict_closure' || baseline.family === 'risk_closure' ? 0 : null, + maxClosureSizeRatio: baseline.family === 'strict_closure' || baseline.family === 'risk_closure' ? 0 : null, + blockedByRate: baseline.family === 'strict_closure' || baseline.family === 'risk_closure' ? 0 : null, + }, + closure: { + candidateCount: baseline.family === 'strict_closure' || baseline.family === 'risk_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + rejectedCount: baseline.family === 'strict_closure' || baseline.family === 'risk_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + blockedByRate: baseline.family === 'strict_closure' || baseline.family === 'risk_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + maxClosureSizeRatio: baseline.family === 'strict_closure' || baseline.family === 'risk_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + closureViolationFlag: baseline.family === 'strict_closure' || baseline.family === 'risk_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + riskLevelCounts: zeroRiskLevelCounts(), + riskLevelL3Rate: baseline.family === 'strict_closure' || baseline.family === 'risk_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + }, + suggestions: { + count: baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + coveredRejectedCount: baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + blockedByCoveredCount: baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + totalBlockedByEdges: baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + coverageRate: baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + blockedByResolutionRate: baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + actionabilityRate: baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + l3EscalationRate: baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions' ? 0 : null, + }, + drift: { + equalsTargetHash: false, + distanceCounts: zeroCounts(), + distanceCountsSum: 0, + }, + assertions: { passed: false, failed: ['BASELINE_UNSUPPORTED'] }, + }; +} + +function resolveDelta(task, baseline) { + if (baseline.family === 'pipeline') { + return { kind: 'unsupported', reason: baseline.reason }; + } + + if (baseline.family === 'llm_stub') { + if (task.llmStubDelta) { + return { kind: 'ok', source: 'llm_stub', delta: task.llmStubDelta }; + } + return { kind: 'unsupported', reason: 'unsupported baseline' }; + } + + if (baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions') { + if (!closureTools.planDeltaClosure) { + return { kind: 'unsupported', reason: 'unsupported baseline' }; + } + if (task.llmStubDelta) { + return { kind: 'ok', source: 'strict_closure', delta: task.llmStubDelta }; + } + if (task.proposedDelta) { + return { kind: 'ok', source: 'strict_closure', delta: task.proposedDelta }; + } + if (task.coreStubDelta) { + return { kind: 'ok', source: 'strict_closure', delta: task.coreStubDelta }; + } + if (task.targetState) { + return { kind: 'ok', source: 'strict_closure', delta: diffState(task.baseState, task.targetState) }; + } + return { kind: 'unsupported', reason: 'unsupported baseline' }; + } + + if (baseline.family === 'risk_closure') { + if (!closureTools.planDeltaClosureV1 || !closureTools.defaultRiskPolicy) { + return { kind: 'unsupported', reason: 'unsupported baseline' }; + } + if (task.llmStubDelta) { + return { kind: 'ok', source: 'risk_closure', delta: task.llmStubDelta }; + } + if (task.proposedDelta) { + return { kind: 'ok', source: 'risk_closure', delta: task.proposedDelta }; + } + if (task.coreStubDelta) { + return { kind: 'ok', source: 'risk_closure', delta: task.coreStubDelta }; + } + if (task.targetState) { + return { kind: 'ok', source: 'risk_closure', delta: diffState(task.baseState, task.targetState) }; + } + return { kind: 'unsupported', reason: 'unsupported baseline' }; + } + + if (task.coreStubDelta) { + return { kind: 'ok', source: 'core_stub', delta: task.coreStubDelta }; + } + + if (task.targetState) { + return { kind: 'ok', source: 'diff_state', delta: diffState(task.baseState, task.targetState) }; + } + + return { kind: 'unsupported', reason: 'target state required' }; +} + +function runSingle(task, baseline, rep) { + const resolved = resolveDelta(task, baseline); + if (resolved.kind === 'unsupported') { + return unsupportedRecord(task, baseline, rep, resolved.reason); + } + + const proposedDelta = resolved.delta; + const mode = baseline.mode || 'best_effort'; + const stateHashBefore = stableHash(task.baseState); + const targetHash = task.targetState ? stableHash(task.targetState) : null; + + const closurePlan = + (baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions') && closureTools.planDeltaClosure + ? closureTools.planDeltaClosure({ + baseState: task.baseState, + proposedDelta, + mode: 'strict', + policy: { requirePostApplyZeroConflicts: true }, + }) + : null; + + const riskClosurePlan = + baseline.family === 'risk_closure' && closureTools.planDeltaClosureV1 && closureTools.defaultRiskPolicy + ? closureTools.planDeltaClosureV1({ + baseState: task.baseState, + proposedDelta, + mode: 'strict', + policy: closureTools.defaultRiskPolicy, + }) + : null; + + const plannedDelta = riskClosurePlan ? riskClosurePlan.acceptedDelta : closurePlan ? closurePlan.acceptedDelta : proposedDelta; + + const transition = applyDelta(task.baseState, plannedDelta, { + mode: + baseline.family === 'strict_closure' || + baseline.family === 'risk_closure' || + baseline.family === 'strict_closure_suggestions' + ? 'best_effort' + : mode, + }); + const postApplyConflicts = detectConflicts(transition.nextState); + const stateHashAfter = stableHash(transition.nextState); + const equalsTargetHash = targetHash === null ? false : stateHashAfter === targetHash; + + const driftDelta = task.targetState ? diffState(transition.nextState, task.targetState) : null; + const distanceCounts = driftDelta ? calculateDistanceCounts(driftDelta) : zeroCounts(); + const distanceCountsSum = driftDelta ? sumDistanceCounts(distanceCounts) : 0; + + const deltaSummary = summarizeDelta(plannedDelta); + const appliedSummary = summarizeDelta(diffState(task.baseState, transition.nextState)); + const deltaDomainCount = deltaSummary.modifiedDomains.length; + const appliedDomainCount = appliedSummary.modifiedDomains.length; + const domainRollbackRate = + deltaDomainCount === 0 ? 0 : round6(Math.max(0, Math.min(1, (deltaDomainCount - appliedDomainCount) / deltaDomainCount))); + + const conflictCount = asArray(transition.conflicts).length; + const postApplyConflictCount = asArray(postApplyConflicts).length; + const rollbackIndicator = conflictCount > 0 && stateHashAfter === stateHashBefore ? 1 : 0; + const deltaRejectedIndicator = + baseline.family === 'llm_stub' ? (mode === 'strict' && conflictCount > 0 && stateHashAfter === stateHashBefore ? 1 : 0) : null; + + const activeClosurePlan = riskClosurePlan || closurePlan; + const riskLevelCounts = zeroRiskLevelCounts(); + if (activeClosurePlan) { + for (const entry of asArray(activeClosurePlan.rejected)) { + const riskLevel = asRecord(entry).riskLevel; + if (riskLevel === 'L0' || riskLevel === 'L1' || riskLevel === 'L2' || riskLevel === 'L3') { + riskLevelCounts[riskLevel] += 1; + } + } + } + + const closureCandidateCount = activeClosurePlan ? activeClosurePlan.diagnostics.candidateCount : null; + const closureRejectedCount = activeClosurePlan ? activeClosurePlan.rejected.length : null; + const closureBlockedByRate = activeClosurePlan ? activeClosurePlan.diagnostics.blockedByRate : null; + const closureMaxClosureSizeRatio = activeClosurePlan ? activeClosurePlan.diagnostics.maxClosureSizeRatio : null; + const closureViolationFlag = activeClosurePlan ? (activeClosurePlan.diagnostics.closureViolationFlag ? 1 : 0) : null; + const riskLevelL3Rate = + closureCandidateCount && closureCandidateCount > 0 + ? round6(riskLevelCounts.L3 / closureCandidateCount) + : activeClosurePlan + ? 0 + : null; + + const suggestionRecords = + baseline.family === 'strict_closure_suggestions' && closurePlan ? asArray(closurePlan.suggestions) : []; + const rejectedRecords = + (baseline.family === 'strict_closure' || baseline.family === 'strict_closure_suggestions') && closurePlan + ? asArray(closurePlan.rejected) + : []; + const rejectedKeys = new Set(rejectedRecords.map((entry) => rejectedTargetKey(entry))); + const coveredKeys = new Set(); + const actionableKeys = new Set(); + let l3EscalationCount = 0; + + for (const suggestion of suggestionRecords) { + const key = suggestionTargetKey(suggestion); + if (rejectedKeys.has(key)) { + coveredKeys.add(key); + if (asRecord(suggestion).actionType !== 'REQUEST_HUMAN_CONFIRM') { + actionableKeys.add(key); + } + if (asRecord(suggestion).actionType === 'PROMOTE_TO_L3_REVIEW') { + l3EscalationCount += 1; + } + } + } + + const suggestionDiagnostics = + baseline.family === 'strict_closure_suggestions' && closurePlan && closurePlan.suggestionDiagnostics + ? closurePlan.suggestionDiagnostics + : null; + const totalBlockedByEdges = + baseline.family === 'strict_closure_suggestions' && closurePlan + ? rejectedRecords.reduce((total, entry) => total + asArray(asRecord(entry).blockedBy).length, 0) + : baseline.family === 'strict_closure' + ? 0 + : null; + const suggestionCount = + baseline.family === 'strict_closure_suggestions' + ? suggestionRecords.length + : baseline.family === 'strict_closure' + ? 0 + : null; + const coveredRejectedCount = + baseline.family === 'strict_closure_suggestions' + ? coveredKeys.size + : baseline.family === 'strict_closure' + ? 0 + : null; + const blockedByCoveredCount = + baseline.family === 'strict_closure_suggestions' && suggestionDiagnostics + ? suggestionDiagnostics.blockedByCoveredCount + : baseline.family === 'strict_closure' + ? 0 + : null; + const suggestionsCoverageRate = + baseline.family === 'strict_closure_suggestions' + ? round6(coveredKeys.size / Math.max(1, closureRejectedCount ?? 0)) + : baseline.family === 'strict_closure' + ? round6(0 / Math.max(1, closureRejectedCount ?? 0)) + : null; + const blockedByResolutionRate = + baseline.family === 'strict_closure_suggestions' + ? round6((blockedByCoveredCount ?? 0) / Math.max(1, totalBlockedByEdges ?? 0)) + : baseline.family === 'strict_closure' + ? 0 + : null; + const suggestionActionabilityRate = + baseline.family === 'strict_closure_suggestions' + ? round6(actionableKeys.size / Math.max(1, closureRejectedCount ?? 0)) + : baseline.family === 'strict_closure' + ? round6(0 / Math.max(1, closureRejectedCount ?? 0)) + : null; + const l3EscalationRate = + baseline.family === 'strict_closure_suggestions' + ? round6(l3EscalationCount / Math.max(1, closureRejectedCount ?? 0)) + : baseline.family === 'strict_closure' + ? 0 + : null; + + const assertions = evaluateAssertions({ + targetAssertions: task.targetAssertions, + equalsTargetHash, + conflictCount, + postApplyConflictCount, + distanceCounts, + distanceCountsSum, + modifiedDomains: deltaSummary.modifiedDomains, + nextState: toDomainState(transition.nextState), + }); + + return { + experiment: { id: 'EVAL-1', ts: null }, + task: { taskId: task.taskId, category: task.category, rep }, + baseline: { + name: baseline.name, + mode: baseline.mode, + supported: baseline.supported, + reason: baseline.reason, + }, + identity: { stateHashBefore, stateHashAfter, targetHash }, + delta: { source: resolved.source, summary: deltaSummary }, + transition: { + conflictCount, + postApplyConflictCount, + rollbackIndicator, + deltaRejectedIndicator, + deltaDomainCount, + appliedDomainCount, + domainRollbackRate, + closureViolationFlag, + rejectedCount: closureRejectedCount, + maxClosureSizeRatio: closureMaxClosureSizeRatio, + blockedByRate: closureBlockedByRate, + }, + closure: { + candidateCount: closureCandidateCount, + rejectedCount: closureRejectedCount, + blockedByRate: closureBlockedByRate, + maxClosureSizeRatio: closureMaxClosureSizeRatio, + closureViolationFlag, + riskLevelCounts, + riskLevelL3Rate, + }, + suggestions: { + count: suggestionCount, + coveredRejectedCount, + blockedByCoveredCount, + totalBlockedByEdges, + coverageRate: suggestionsCoverageRate, + blockedByResolutionRate, + actionabilityRate: suggestionActionabilityRate, + l3EscalationRate, + }, + drift: { + equalsTargetHash, + distanceCounts, + distanceCountsSum, + }, + assertions, + }; +} + +function runBench(tasksDir, outFile) { + const fixtures = loadFixtures(tasksDir); + const baselines = baselineDefinitions(); + const lines = []; + + for (const task of fixtures) { + for (const baseline of baselines) { + for (let rep = 1; rep <= task.runConfig.repetitions; rep += 1) { + lines.push(stableStringifyRecord(runSingle(task, baseline, rep))); + } + } + } + + fs.mkdirSync(path.dirname(outFile), { recursive: true }); + fs.writeFileSync(outFile, lines.length > 0 ? `${lines.join('\n')}\n` : '', 'utf8'); + return { rows: lines.length }; +} + +function main() { + const root = path.resolve(__dirname, '../../..'); + const tasksDir = path.join(root, 'bench', 'tasks'); + const outFile = path.join(root, 'bench', 'out', 'results.jsonl'); + const result = runBench(tasksDir, outFile); + process.stdout.write(`EVAL-1 runner wrote ${result.rows} rows to bench/out/results.jsonl\n`); +} + +if (require.main === module) { + main(); +} + +module.exports = { + compareStrings, + runBench, +}; diff --git a/server/src/bench/node/selftest.cjs b/server/src/bench/node/selftest.cjs new file mode 100644 index 0000000..35df4a3 --- /dev/null +++ b/server/src/bench/node/selftest.cjs @@ -0,0 +1,29 @@ +const { compareStrings, computeLineCol, stripBomFromText } = require('./validate-fixtures.cjs'); + +function run() { + const sorted = ['zeta', 'alpha', 'beta'].sort(compareStrings).join(','); + if (sorted !== 'alpha,beta,zeta') return false; + + const sample = 'line1\n line2\nline3'; + const pos = sample.indexOf('l', 8); + const lc = computeLineCol(sample, pos); + if (lc.line !== 2 || lc.col !== 3 || lc.lineText !== ' line2') return false; + + const withBom = '\uFEFF{"ok":true}'; + const stripped = stripBomFromText(withBom); + if (stripped !== '{"ok":true}') return false; + + return true; +} + +function main() { + if (!run()) { + process.stdout.write('BENCH_NODE_SELFTEST_FAIL\n'); + process.exit(1); + } + process.stdout.write('BENCH_NODE_SELFTEST_OK\n'); +} + +if (require.main === module) { + main(); +} diff --git a/server/src/bench/node/smoke.cjs b/server/src/bench/node/smoke.cjs new file mode 100644 index 0000000..e5ab616 --- /dev/null +++ b/server/src/bench/node/smoke.cjs @@ -0,0 +1,76 @@ +const fs = require('fs'); +const path = require('path'); + +const { diffState, applyDelta, stableHash } = require('./algebra-bridge.cjs'); +const { compareStrings, stripBomFromText } = require('./validate-fixtures.cjs'); + +function fail() { + const error = new Error('Bench smoke failed'); + error.code = 'E_BENCH_SMOKE_FAILED'; + throw error; +} + +function asRecord(value) { + return value && typeof value === 'object' && !Array.isArray(value) ? value : {}; +} + +function toDomainState(value) { + const record = asRecord(value); + return { + facts: Array.isArray(record.facts) ? record.facts : [], + decisions: Array.isArray(record.decisions) ? record.decisions : [], + constraints: Array.isArray(record.constraints) ? record.constraints : [], + risks: Array.isArray(record.risks) ? record.risks : [], + assumptions: Array.isArray(record.assumptions) ? record.assumptions : [], + }; +} + +function main() { + const root = path.resolve(__dirname, '../../..'); + const tasksDir = path.join(root, 'bench', 'tasks'); + const fixtures = fs + .readdirSync(tasksDir) + .filter((name) => name.endsWith('.json')) + .sort(compareStrings); + + if (fixtures.length === 0) { + fail(); + } + + const firstFile = fixtures[0]; + const text = stripBomFromText(fs.readFileSync(path.join(tasksDir, firstFile), 'utf8')); + const fixtureRaw = JSON.parse(text); + const fixture = asRecord(fixtureRaw); + + const taskId = typeof fixture.taskId === 'string' ? fixture.taskId : null; + const baseState = toDomainState(fixture.baseState); + const targetState = fixture.targetState === undefined ? baseState : toDomainState(fixture.targetState); + + if (!taskId) { + fail(); + } + + const delta = diffState(baseState, targetState); + const transition = applyDelta(baseState, delta, { mode: 'best_effort' }); + const line = { + taskId, + baseline: 'B1_CORE_BEST_EFFORT', + stateHashAfter: stableHash(transition.nextState), + conflictCount: Array.isArray(transition.conflicts) ? transition.conflicts.length : NaN, + }; + + if ( + typeof line.taskId !== 'string' || + typeof line.baseline !== 'string' || + typeof line.stateHashAfter !== 'string' || + line.stateHashAfter.length === 0 || + typeof line.conflictCount !== 'number' || + Number.isNaN(line.conflictCount) + ) { + fail(); + } + + process.stdout.write('BENCH_SMOKE_OK\n'); +} + +main(); diff --git a/server/src/bench/node/stats.cjs b/server/src/bench/node/stats.cjs new file mode 100644 index 0000000..31621eb --- /dev/null +++ b/server/src/bench/node/stats.cjs @@ -0,0 +1,1274 @@ +const fs = require('fs'); +const path = require('path'); +const crypto = require('crypto'); + +const DOMAIN_ORDER = ['facts', 'decisions', 'constraints', 'risks', 'assumptions']; +const BASELINE_ORDER = [ + 'B1_CORE_BEST_EFFORT', + 'B1_CORE_STRICT', + 'B1_PIPELINE', + 'B2_LLM_DELTA_BEST_EFFORT', + 'B2_LLM_DELTA_STRICT', + 'B3_STRICT_CLOSURE', + 'B4_STRICT_RISK_CLOSURE', + 'B5_STRICT_CLOSURE_SUGGESTIONS', +]; +const BASELINE_WHITELIST = new Set(BASELINE_ORDER); +const COMPARISON_ORDER = ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7']; +const COMPARISON_CONFIG = { + C1: { lhs: 'B1_CORE_BEST_EFFORT', rhs: 'B1_CORE_STRICT', scope: 'all' }, + C2: { lhs: 'B1_CORE_BEST_EFFORT', rhs: 'B1_PIPELINE', scope: 'all' }, + C3: { lhs: 'B1_CORE_STRICT', rhs: 'B1_PIPELINE', scope: 'all' }, + C4: { lhs: 'B1_CORE_BEST_EFFORT', rhs: 'B1_CORE_STRICT', scope: 't3' }, + C5: { lhs: 'B2_LLM_DELTA_BEST_EFFORT', rhs: 'B2_LLM_DELTA_STRICT', scope: 't3' }, + C6: { lhs: 'B3_STRICT_CLOSURE', rhs: 'B4_STRICT_RISK_CLOSURE', scope: 't3' }, + C7: { lhs: 'B3_STRICT_CLOSURE', rhs: 'B5_STRICT_CLOSURE_SUGGESTIONS', scope: 't3' }, +}; +const METRIC_ORDER = [ + 'equalsTargetRate', + 'assertionPassRate', + 'conflictCount', + 'postApplyConflictCount', + 'distanceCountsSum', + 'rollbackRate', + 'deltaRejectionRate', + 'domainRollbackRate', + 'closureViolationRate', + 'maxClosureSizeRatio', + 'blockedByRate', + 'rejectedCount', + 'riskClosureViolationRate', + 'riskClosureRejectedCountMean', + 'riskClosureBlockedByRateMean', + 'riskClosureMaxClosureSizeRatioMean', + 'riskLevelL3Rate', + 'suggestionsCoverageRate', + 'suggestionActionabilityRate', + 'l3EscalationRate', + 'blockedByResolutionRate', + 'suggestionCountMean', +]; +const ALPHA = 0.05; +const Z95 = 1.959963984540054; + +function compareStrings(a, b) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function asRecord(value) { + return value && typeof value === 'object' && !Array.isArray(value) ? value : {}; +} + +function asArray(value) { + return Array.isArray(value) ? value : []; +} + +function round6(value) { + if (!Number.isFinite(value)) return null; + return Math.round(value * 1000000) / 1000000; +} + +function parseResultsJsonl(content) { + return content + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line.length > 0) + .map((line) => JSON.parse(line)); +} + +function parseSummary(content) { + if (!content.trim()) return { rows: [] }; + const parsed = JSON.parse(content); + return { rows: asArray(parsed.rows) }; +} + +function normalizeBaseline(rawBaseline) { + const baseline = asRecord(rawBaseline); + const rawName = typeof baseline.name === 'string' ? baseline.name : 'UNKNOWN_BASELINE'; + const baselineKey = BASELINE_WHITELIST.has(rawName) ? rawName : 'UNKNOWN_BASELINE'; + const mode = typeof baseline.mode === 'string' ? baseline.mode : null; + + if (baselineKey === 'UNKNOWN_BASELINE') { + return { + name: 'UNKNOWN_BASELINE', + mode, + baselineKey: 'UNKNOWN_BASELINE', + baselineModeKey: mode || 'null', + supported: false, + reason: 'unsupported baseline', + }; + } + + return { + name: baselineKey, + mode, + baselineKey, + baselineModeKey: mode || 'null', + supported: baseline.supported === true, + reason: typeof baseline.reason === 'string' ? baseline.reason : null, + }; +} + +function mean(values) { + if (values.length === 0) return null; + let total = 0; + for (const value of values) total += value; + return total / values.length; +} + +function sampleVariance(values, meanValue) { + if (values.length < 2) return null; + let total = 0; + for (const value of values) { + const diff = value - meanValue; + total += diff * diff; + } + return total / (values.length - 1); +} + +function tCritical95(dfRaw) { + if (!Number.isFinite(dfRaw) || dfRaw <= 0) return null; + const df = Math.max(1, Math.floor(dfRaw)); + const table = { + 1: 12.706205, + 2: 4.302653, + 3: 3.182446, + 4: 2.776445, + 5: 2.570582, + 6: 2.446912, + 7: 2.364624, + 8: 2.306004, + 9: 2.262157, + 10: 2.228139, + 11: 2.200985, + 12: 2.178813, + 13: 2.160369, + 14: 2.144787, + 15: 2.13145, + 16: 2.119905, + 17: 2.109816, + 18: 2.100922, + 19: 2.093024, + 20: 2.085963, + 21: 2.079614, + 22: 2.073873, + 23: 2.068658, + 24: 2.063899, + 25: 2.059539, + 26: 2.055529, + 27: 2.051831, + 28: 2.048407, + 29: 2.04523, + 30: 2.042272, + 40: 2.021075, + 60: 2.000298, + 120: 1.97993, + }; + if (table[df]) return table[df]; + if (df > 120) return 1.959964; + if (df > 60) { + const ratio = (df - 60) / 60; + return table[60] + (table[120] - table[60]) * ratio; + } + if (df > 40) { + const ratio = (df - 40) / 20; + return table[40] + (table[60] - table[40]) * ratio; + } + if (df > 30) { + const ratio = (df - 30) / 10; + return table[30] + (table[40] - table[30]) * ratio; + } + return table[30]; +} + +function meanWithCI(values) { + if (values.length === 0) { + return { + n: 0, + mean: null, + ci: { low: null, high: null }, + }; + } + + const meanValue = mean(values); + const variance = sampleVariance(values, meanValue); + + if (variance === null) { + return { + n: values.length, + mean: round6(meanValue), + ci: { low: null, high: null }, + }; + } + + const critical = tCritical95(values.length - 1); + if (critical === null) { + return { + n: values.length, + mean: round6(meanValue), + ci: { low: null, high: null }, + }; + } + + const margin = critical * Math.sqrt(variance / values.length); + return { + n: values.length, + mean: round6(meanValue), + ci: { + low: round6(meanValue - margin), + high: round6(meanValue + margin), + }, + }; +} + +function wilsonCI(successes, total) { + if (!Number.isFinite(successes) || !Number.isFinite(total) || total <= 0) { + return { low: null, high: null }; + } + + const p = successes / total; + const denominator = 1 + (Z95 * Z95) / total; + const center = (p + (Z95 * Z95) / (2 * total)) / denominator; + const margin = + (Z95 * Math.sqrt((p * (1 - p) + (Z95 * Z95) / (4 * total)) / total)) / + denominator; + + return { + low: round6(Math.max(0, center - margin)), + high: round6(Math.min(1, center + margin)), + }; +} + +function proportionWithCI(booleanValues) { + let k = 0; + for (const value of booleanValues) { + if (value) k += 1; + } + const n = booleanValues.length; + return { + n, + k, + rate: n === 0 ? null : round6(k / n), + ci: wilsonCI(k, n), + }; +} + +function logFactorialFactory(maxN) { + const cache = new Array(maxN + 1).fill(0); + for (let i = 2; i <= maxN; i += 1) { + cache[i] = cache[i - 1] + Math.log(i); + } + return cache; +} + +function logChoose(logFactorial, n, k) { + if (k < 0 || k > n) return Number.NEGATIVE_INFINITY; + return logFactorial[n] - logFactorial[k] - logFactorial[n - k]; +} + +function fisherExactTwoSided(k1, n1, k2, n2) { + if (n1 <= 0 || n2 <= 0) return null; + + const totalSuccess = k1 + k2; + const totalN = n1 + n2; + const minA = Math.max(0, totalSuccess - n2); + const maxA = Math.min(n1, totalSuccess); + + const logFactorial = logFactorialFactory(totalN); + const observedLogP = + logChoose(logFactorial, n1, k1) + + logChoose(logFactorial, n2, k2) - + logChoose(logFactorial, totalN, totalSuccess); + + let pValue = 0; + const epsilon = 1e-12; + + for (let a = minA; a <= maxA; a += 1) { + const b = totalSuccess - a; + const logP = + logChoose(logFactorial, n1, a) + + logChoose(logFactorial, n2, b) - + logChoose(logFactorial, totalN, totalSuccess); + if (logP <= observedLogP + epsilon) { + pValue += Math.exp(logP); + } + } + + if (pValue > 1) pValue = 1; + return round6(pValue); +} + +function erf(x) { + const sign = x < 0 ? -1 : 1; + const absX = Math.abs(x); + const t = 1 / (1 + 0.3275911 * absX); + const a1 = 0.254829592; + const a2 = -0.284496736; + const a3 = 1.421413741; + const a4 = -1.453152027; + const a5 = 1.061405429; + const y = 1 - (((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX)); + return sign * y; +} + +function normalCdf(x) { + return 0.5 * (1 + erf(x / Math.sqrt(2))); +} + +function mannWhitneyUTwoSided(valuesA, valuesB) { + const n1 = valuesA.length; + const n2 = valuesB.length; + if (n1 === 0 || n2 === 0) return null; + + const combined = []; + for (const value of valuesA) combined.push({ value, group: 0 }); + for (const value of valuesB) combined.push({ value, group: 1 }); + + combined.sort((left, right) => { + if (left.value < right.value) return -1; + if (left.value > right.value) return 1; + return left.group - right.group; + }); + + const ranks = new Array(combined.length); + let tieSum = 0; + let index = 0; + + while (index < combined.length) { + let end = index + 1; + while (end < combined.length && combined[end].value === combined[index].value) { + end += 1; + } + + const averageRank = (index + 1 + end) / 2; + for (let i = index; i < end; i += 1) { + ranks[i] = averageRank; + } + + const tieSize = end - index; + if (tieSize > 1) { + tieSum += tieSize * tieSize * tieSize - tieSize; + } + + index = end; + } + + let rankSumA = 0; + for (let i = 0; i < combined.length; i += 1) { + if (combined[i].group === 0) { + rankSumA += ranks[i]; + } + } + + const u1 = rankSumA - (n1 * (n1 + 1)) / 2; + const u2 = n1 * n2 - u1; + const u = Math.min(u1, u2); + const n = n1 + n2; + const correction = n > 1 ? tieSum / (n * (n - 1)) : 0; + const variance = (n1 * n2 * (n + 1 - correction)) / 12; + + if (variance <= 0) { + return round6(u === (n1 * n2) / 2 ? 1 : 0); + } + + const meanU = (n1 * n2) / 2; + const z = (u - meanU) / Math.sqrt(variance); + const pValue = 2 * (1 - normalCdf(Math.abs(z))); + return round6(Math.max(0, Math.min(1, pValue))); +} + +function cliffsDelta(valuesA, valuesB) { + const n1 = valuesA.length; + const n2 = valuesB.length; + if (n1 === 0 || n2 === 0) return null; + + let greater = 0; + let less = 0; + for (const left of valuesA) { + for (const right of valuesB) { + if (left > right) greater += 1; + else if (left < right) less += 1; + } + } + + return round6((greater - less) / (n1 * n2)); +} + +function cohenD(valuesA, valuesB) { + if (valuesA.length < 2 || valuesB.length < 2) return null; + + const meanA = mean(valuesA); + const meanB = mean(valuesB); + const varA = sampleVariance(valuesA, meanA); + const varB = sampleVariance(valuesB, meanB); + + if (varA === null || varB === null) return null; + + const pooledNumerator = (valuesA.length - 1) * varA + (valuesB.length - 1) * varB; + const pooledDenominator = valuesA.length + valuesB.length - 2; + if (pooledDenominator <= 0) return null; + + const pooledVar = pooledNumerator / pooledDenominator; + if (pooledVar <= 0) return null; + + return round6((meanA - meanB) / Math.sqrt(pooledVar)); +} + +function differenceCIForRates(k1, n1, k2, n2) { + if (n1 <= 0 || n2 <= 0) { + return { low: null, high: null }; + } + + const p1 = k1 / n1; + const p2 = k2 / n2; + const diff = p1 - p2; + const se = Math.sqrt((p1 * (1 - p1)) / n1 + (p2 * (1 - p2)) / n2); + const margin = Z95 * se; + return { + low: round6(diff - margin), + high: round6(diff + margin), + }; +} + +function differenceCIForMeans(valuesA, valuesB) { + if (valuesA.length < 2 || valuesB.length < 2) { + return { low: null, high: null }; + } + + const meanA = mean(valuesA); + const meanB = mean(valuesB); + const varA = sampleVariance(valuesA, meanA); + const varB = sampleVariance(valuesB, meanB); + + if (varA === null || varB === null) { + return { low: null, high: null }; + } + + const n1 = valuesA.length; + const n2 = valuesB.length; + const se2 = varA / n1 + varB / n2; + if (se2 <= 0) { + return { low: null, high: null }; + } + + const numerator = se2 * se2; + const denominator = + (varA * varA) / (n1 * n1 * (n1 - 1)) + + (varB * varB) / (n2 * n2 * (n2 - 1)); + const df = denominator > 0 ? numerator / denominator : Math.min(n1, n2) - 1; + + const critical = tCritical95(df); + if (critical === null) { + return { low: null, high: null }; + } + + const diff = meanA - meanB; + const margin = critical * Math.sqrt(se2); + return { + low: round6(diff - margin), + high: round6(diff + margin), + }; +} + +function baseEffect() { + return { + riskDifference: null, + riskRatio: null, + cohenD: null, + cliffsDelta: null, + }; +} + +function skippedMetric(reason) { + return { + statStatus: 'skipped', + reason, + lhs: null, + rhs: null, + estimate: null, + ci: { low: null, high: null }, + pValue: null, + qValue: null, + effect: baseEffect(), + }; +} + +function insufficientMetric(lhs, rhs) { + return { + statStatus: 'insufficient_samples', + reason: 'insufficient samples', + lhs, + rhs, + estimate: null, + ci: { low: null, high: null }, + pValue: null, + qValue: null, + effect: baseEffect(), + }; +} + +function okMetric(lhs, rhs, estimate, ci, pValue, effect) { + const normalizedEffect = baseEffect(); + normalizedEffect.riskDifference = effect.riskDifference; + normalizedEffect.riskRatio = effect.riskRatio; + normalizedEffect.cohenD = effect.cohenD; + normalizedEffect.cliffsDelta = effect.cliffsDelta; + + return { + statStatus: 'ok', + reason: null, + lhs, + rhs, + estimate, + ci, + pValue, + qValue: null, + effect: normalizedEffect, + }; +} + +function applyBenjaminiHochberg(items) { + const eligible = items + .filter((item) => item.pValue !== null) + .sort((left, right) => { + if (left.pValue < right.pValue) return -1; + if (left.pValue > right.pValue) return 1; + return compareStrings(left.id, right.id); + }); + + const total = eligible.length; + if (total === 0) return; + + let running = 1; + for (let i = total - 1; i >= 0; i -= 1) { + const rank = i + 1; + const adjusted = Math.min(1, (eligible[i].pValue * total) / rank); + running = Math.min(running, adjusted); + eligible[i].setQValue(round6(running)); + } +} + +function buildTaskGroups(records) { + const tasks = new Map(); + + for (const record of records) { + const task = asRecord(record.task); + const taskId = typeof task.taskId === 'string' ? task.taskId : ''; + const category = task.category === 'T1' || task.category === 'T2' || task.category === 'T3' ? task.category : 'T1'; + const baseline = normalizeBaseline(record.baseline); + + if (!tasks.has(taskId)) { + tasks.set(taskId, { + taskId, + category, + baselines: new Map(), + }); + } + + const taskGroup = tasks.get(taskId); + if (!taskGroup.baselines.has(baseline.baselineKey)) { + taskGroup.baselines.set(baseline.baselineKey, { + baseline, + records: [], + }); + } + + taskGroup.baselines.get(baseline.baselineKey).records.push(record); + } + + return tasks; +} + +function toBooleanArray(records, eligibility, selector) { + const values = []; + for (const record of records) { + if (eligibility(record)) { + values.push(selector(record) === true); + } + } + return values; +} + +function toNumberArray(records, eligibility, selector) { + const values = []; + for (const record of records) { + if (!eligibility(record)) continue; + const value = selector(record); + if (typeof value === 'number' && Number.isFinite(value)) values.push(value); + } + return values; +} + +function buildRateMetric(lhsRecords, rhsRecords, eligibility, selector) { + const lhsValues = toBooleanArray(lhsRecords, eligibility, selector); + const rhsValues = toBooleanArray(rhsRecords, eligibility, selector); + const lhs = proportionWithCI(lhsValues); + const rhs = proportionWithCI(rhsValues); + + if (lhs.n === 0 || rhs.n === 0) { + return insufficientMetric(lhs, rhs); + } + + const estimate = round6(lhs.rate - rhs.rate); + const ci = differenceCIForRates(lhs.k, lhs.n, rhs.k, rhs.n); + const pValue = fisherExactTwoSided(lhs.k, lhs.n, rhs.k, rhs.n); + + if (pValue === null) { + return insufficientMetric(lhs, rhs); + } + + return okMetric(lhs, rhs, estimate, ci, pValue, { + riskDifference: estimate, + riskRatio: rhs.rate === 0 ? null : round6(lhs.rate / rhs.rate), + cohenD: null, + cliffsDelta: null, + }); +} + +function buildMeanMetric(lhsRecords, rhsRecords, eligibility, selector) { + const lhsValues = toNumberArray(lhsRecords, eligibility, selector); + const rhsValues = toNumberArray(rhsRecords, eligibility, selector); + const lhs = meanWithCI(lhsValues); + const rhs = meanWithCI(rhsValues); + + if (lhs.n === 0 || rhs.n === 0) { + return insufficientMetric(lhs, rhs); + } + + const estimate = lhs.mean === null || rhs.mean === null ? null : round6(lhs.mean - rhs.mean); + const ci = differenceCIForMeans(lhsValues, rhsValues); + const pValue = mannWhitneyUTwoSided(lhsValues, rhsValues); + + if (pValue === null) { + return insufficientMetric(lhs, rhs); + } + + return okMetric(lhs, rhs, estimate, ci, pValue, { + riskDifference: null, + riskRatio: null, + cohenD: cohenD(lhsValues, rhsValues), + cliffsDelta: cliffsDelta(lhsValues, rhsValues), + }); +} + +function aggregateBaseline(records, baseline) { + if (!baseline.supported) { + return { + baseline: { + name: baseline.name, + mode: baseline.mode, + supported: false, + reason: baseline.reason, + }, + baselineKey: baseline.baselineKey, + baselineModeKey: baseline.baselineModeKey, + repetitions: records.length, + metrics: { + hashStabilityRate: null, + equalsTargetRate: null, + assertionPassRate: null, + conflictCount: null, + postApplyConflictCount: null, + distanceCountsSum: null, + rollbackRate: null, + deltaRejectionRate: null, + domainRollbackRate: null, + closureViolationRate: null, + maxClosureSizeRatio: null, + blockedByRate: null, + rejectedCount: null, + riskClosureViolationRate: null, + riskClosureRejectedCountMean: null, + riskClosureBlockedByRateMean: null, + riskClosureMaxClosureSizeRatioMean: null, + riskLevelL3Rate: null, + suggestionsCoverageRate: null, + suggestionActionabilityRate: null, + l3EscalationRate: null, + blockedByResolutionRate: null, + suggestionCountMean: null, + }, + }; + } + + const hashes = []; + for (const record of records) { + const hash = asRecord(asRecord(record).identity).stateHashAfter; + if (typeof hash === 'string' && hash.length > 0 && !hashes.includes(hash)) { + hashes.push(hash); + } + } + + const conflictEligibility = (record) => { + const count = asRecord(asRecord(record).transition).conflictCount; + return typeof count === 'number' && count > 0; + }; + + const llmEligibility = (record) => { + const value = asRecord(asRecord(record).transition).deltaRejectedIndicator; + return value === 0 || value === 1; + }; + + const closureEligibility = (record) => { + const value = asRecord(asRecord(record).transition).closureViolationFlag; + return value === 0 || value === 1; + }; + + return { + baseline: { + name: baseline.name, + mode: baseline.mode, + supported: true, + reason: null, + }, + baselineKey: baseline.baselineKey, + baselineModeKey: baseline.baselineModeKey, + repetitions: records.length, + metrics: { + hashStabilityRate: records.length > 0 && hashes.length === 1 ? 1 : 0, + equalsTargetRate: proportionWithCI( + toBooleanArray(records, () => true, (record) => asRecord(asRecord(record).drift).equalsTargetHash) + ), + assertionPassRate: proportionWithCI( + toBooleanArray(records, () => true, (record) => asRecord(asRecord(record).assertions).passed) + ), + conflictCount: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).transition).conflictCount) + ), + postApplyConflictCount: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).transition).postApplyConflictCount) + ), + distanceCountsSum: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).drift).distanceCountsSum) + ), + rollbackRate: proportionWithCI( + toBooleanArray(records, conflictEligibility, (record) => asRecord(asRecord(record).transition).rollbackIndicator === 1) + ), + deltaRejectionRate: proportionWithCI( + toBooleanArray(records, llmEligibility, (record) => asRecord(asRecord(record).transition).deltaRejectedIndicator === 1) + ), + domainRollbackRate: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).transition).domainRollbackRate) + ), + closureViolationRate: proportionWithCI( + toBooleanArray(records, closureEligibility, (record) => asRecord(asRecord(record).transition).closureViolationFlag === 1) + ), + maxClosureSizeRatio: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).transition).maxClosureSizeRatio) + ), + blockedByRate: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).transition).blockedByRate) + ), + rejectedCount: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).transition).rejectedCount) + ), + riskClosureViolationRate: proportionWithCI( + toBooleanArray(records, closureEligibility, (record) => asRecord(asRecord(record).closure).closureViolationFlag === 1) + ), + riskClosureRejectedCountMean: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).closure).rejectedCount) + ), + riskClosureBlockedByRateMean: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).closure).blockedByRate) + ), + riskClosureMaxClosureSizeRatioMean: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).closure).maxClosureSizeRatio) + ), + riskLevelL3Rate: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).closure).riskLevelL3Rate) + ), + suggestionsCoverageRate: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).suggestions).coverageRate) + ), + suggestionActionabilityRate: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).suggestions).actionabilityRate) + ), + l3EscalationRate: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).suggestions).l3EscalationRate) + ), + blockedByResolutionRate: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).suggestions).blockedByResolutionRate) + ), + suggestionCountMean: meanWithCI( + toNumberArray(records, () => true, (record) => asRecord(asRecord(record).suggestions).count) + ), + }, + }; +} + +function buildComparisonMetrics(lhsRecords, rhsRecords) { + const conflictEligibility = (record) => { + const count = asRecord(asRecord(record).transition).conflictCount; + return typeof count === 'number' && count > 0; + }; + + const llmEligibility = (record) => { + const value = asRecord(asRecord(record).transition).deltaRejectedIndicator; + return value === 0 || value === 1; + }; + + const closureEligibility = (record) => { + const value = asRecord(asRecord(record).transition).closureViolationFlag; + return value === 0 || value === 1; + }; + + return { + equalsTargetRate: buildRateMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).drift).equalsTargetHash + ), + assertionPassRate: buildRateMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).assertions).passed + ), + conflictCount: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).transition).conflictCount + ), + postApplyConflictCount: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).transition).postApplyConflictCount + ), + distanceCountsSum: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).drift).distanceCountsSum + ), + rollbackRate: buildRateMetric( + lhsRecords, + rhsRecords, + conflictEligibility, + (record) => asRecord(asRecord(record).transition).rollbackIndicator === 1 + ), + deltaRejectionRate: buildRateMetric( + lhsRecords, + rhsRecords, + llmEligibility, + (record) => asRecord(asRecord(record).transition).deltaRejectedIndicator === 1 + ), + domainRollbackRate: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).transition).domainRollbackRate + ), + closureViolationRate: buildRateMetric( + lhsRecords, + rhsRecords, + closureEligibility, + (record) => asRecord(asRecord(record).transition).closureViolationFlag === 1 + ), + maxClosureSizeRatio: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).transition).maxClosureSizeRatio + ), + blockedByRate: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).transition).blockedByRate + ), + rejectedCount: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).transition).rejectedCount + ), + riskClosureViolationRate: buildRateMetric( + lhsRecords, + rhsRecords, + closureEligibility, + (record) => asRecord(asRecord(record).closure).closureViolationFlag === 1 + ), + riskClosureRejectedCountMean: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).closure).rejectedCount + ), + riskClosureBlockedByRateMean: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).closure).blockedByRate + ), + riskClosureMaxClosureSizeRatioMean: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).closure).maxClosureSizeRatio + ), + riskLevelL3Rate: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).closure).riskLevelL3Rate + ), + suggestionsCoverageRate: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).suggestions).coverageRate + ), + suggestionActionabilityRate: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).suggestions).actionabilityRate + ), + l3EscalationRate: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).suggestions).l3EscalationRate + ), + blockedByResolutionRate: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).suggestions).blockedByResolutionRate + ), + suggestionCountMean: buildMeanMetric( + lhsRecords, + rhsRecords, + () => true, + (record) => asRecord(asRecord(record).suggestions).count + ), + }; +} + +function buildStats(records, summaryRowsCount) { + const tasks = buildTaskGroups(records); + const byTask = []; + const comparisons = []; + const qValueTargets = []; + + const taskIds = [...tasks.keys()].sort(compareStrings); + for (const taskId of taskIds) { + const task = tasks.get(taskId); + const baselineLookup = new Map(); + const baselineRows = []; + + for (const baselineName of BASELINE_ORDER) { + const baselineGroup = task.baselines.get(baselineName); + if (!baselineGroup) { + const baseline = { + name: baselineName, + mode: null, + baselineKey: baselineName, + baselineModeKey: 'null', + supported: false, + reason: 'unsupported baseline', + }; + baselineRows.push(aggregateBaseline([], baseline)); + baselineLookup.set(baselineName, { baseline, records: [] }); + } else { + baselineRows.push(aggregateBaseline(baselineGroup.records, baselineGroup.baseline)); + baselineLookup.set(baselineName, baselineGroup); + } + } + + byTask.push({ + taskId, + category: task.category, + baselines: baselineRows, + }); + + for (const comparisonId of COMPARISON_ORDER) { + const config = COMPARISON_CONFIG[comparisonId]; + const lhs = baselineLookup.get(config.lhs); + const rhs = baselineLookup.get(config.rhs); + + let skipReason = null; + if (config.scope === 't3' && task.category !== 'T3') { + skipReason = 'comparison task filter'; + } else if (!lhs || !rhs || !lhs.baseline.supported || !rhs.baseline.supported) { + skipReason = 'unsupported baseline'; + } + + if (skipReason !== null) { + const skippedMetrics = {}; + for (const metricName of METRIC_ORDER) { + skippedMetrics[metricName] = skippedMetric(skipReason); + } + comparisons.push({ + taskId, + category: task.category, + comparison: comparisonId, + lhs: config.lhs, + rhs: config.rhs, + skipped: true, + reason: skipReason, + metrics: skippedMetrics, + }); + continue; + } + + const metrics = buildComparisonMetrics(lhs.records, rhs.records); + const row = { + taskId, + category: task.category, + comparison: comparisonId, + lhs: config.lhs, + rhs: config.rhs, + skipped: false, + reason: null, + metrics, + }; + comparisons.push(row); + + for (const metricName of METRIC_ORDER) { + const metric = metrics[metricName]; + if (metric.statStatus === 'ok' && metric.pValue !== null) { + qValueTargets.push({ + id: `${taskId}|${comparisonId}|${metricName}`, + pValue: metric.pValue, + setQValue(value) { + metric.qValue = value; + }, + }); + } + } + } + } + + applyBenjaminiHochberg(qValueTargets); + + return { + schema: 'eval2-stats-1', + evalVersion: 'eval3', + metricsAdded: [ + 'rollbackRate', + 'deltaRejectionRate', + 'domainRollbackRate', + 'closureViolationRate', + 'maxClosureSizeRatio', + 'blockedByRate', + 'rejectedCount', + 'riskClosureViolationRate', + 'riskClosureRejectedCountMean', + 'riskClosureBlockedByRateMean', + 'riskClosureMaxClosureSizeRatioMean', + 'riskLevelL3Rate', + 'suggestionsCoverageRate', + 'suggestionActionabilityRate', + 'l3EscalationRate', + 'blockedByResolutionRate', + 'suggestionCountMean', + ], + inputs: { + experimentId: 'EVAL-1', + alpha: ALPHA, + files: { + results: 'bench/out/results.jsonl', + summary: 'bench/out/summary.json', + }, + baselineOrder: BASELINE_ORDER, + comparisonOrder: COMPARISON_ORDER, + metricOrder: METRIC_ORDER, + }, + byTask, + comparisons, + determinism: { + sorted: true, + stringComparator: 'ab?1:0', + domainOrder: DOMAIN_ORDER, + timestamp: null, + summaryRowsCount, + }, + }; +} + +function generateGlobalSummaryRows(stats) { + const rows = []; + + for (const comparisonId of COMPARISON_ORDER) { + for (const metricName of METRIC_ORDER) { + const all = stats.comparisons.filter((entry) => entry.comparison === comparisonId); + const ok = all.filter((entry) => entry.metrics[metricName].statStatus === 'ok'); + const skipped = all.filter((entry) => entry.metrics[metricName].statStatus === 'skipped').length; + const insufficient = all.filter((entry) => entry.metrics[metricName].statStatus === 'insufficient_samples').length; + + let significant = 0; + let effectTotal = 0; + let effectCount = 0; + + for (const entry of ok) { + const metric = entry.metrics[metricName]; + if (metric.qValue !== null && metric.qValue <= ALPHA) { + significant += 1; + } + + let effectValue = null; + if (metricName === 'equalsTargetRate' || metricName === 'assertionPassRate' || metricName === 'rollbackRate' || metricName === 'deltaRejectionRate') { + effectValue = metric.effect.riskDifference; + } else { + effectValue = metric.effect.cohenD; + } + + if (typeof effectValue === 'number' && Number.isFinite(effectValue)) { + effectTotal += effectValue; + effectCount += 1; + } + } + + rows.push({ + comparison: comparisonId, + metric: metricName, + okTasks: ok.length, + skippedTasks: skipped, + insufficientTasks: insufficient, + significantTasks: significant, + meanEffect: effectCount === 0 ? null : round6(effectTotal / effectCount), + }); + } + } + + return rows; +} + +function metricLine(metricName, metric) { + if (metric.statStatus !== 'ok') { + return `- ${metricName}: status=${metric.statStatus} reason=${metric.reason} effect=null_due_to_status p=${metric.pValue} q=${metric.qValue}`; + } + + const effect = `riskDifference=${metric.effect.riskDifference} riskRatio=${metric.effect.riskRatio} cohenD=${metric.effect.cohenD} cliffsDelta=${metric.effect.cliffsDelta}`; + return `- ${metricName}: status=ok reason=null estimate=${metric.estimate} ci=[${metric.ci.low},${metric.ci.high}] p=${metric.pValue} q=${metric.qValue} effect(${effect})`; +} + +function buildReportMarkdown(stats) { + const lines = []; + lines.push('# EVAL-2 Statistical Report'); + lines.push(''); + lines.push('## Overview'); + lines.push(`- Schema: ${stats.schema}`); + lines.push(`- EvalVersion: ${stats.evalVersion}`); + lines.push(`- Alpha: ${stats.inputs.alpha}`); + lines.push(`- Baselines: ${stats.inputs.baselineOrder.join(', ')}`); + lines.push(`- Comparisons: ${stats.inputs.comparisonOrder.join(', ')}`); + lines.push(`- MetricsAdded: ${stats.metricsAdded.join(', ')}`); + lines.push(''); + + lines.push('## Global Summary'); + lines.push('| Comparison | Metric | OkTasks | SkippedTasks | InsufficientTasks | SignificantQ<=0.05 | MeanEffect |'); + lines.push('| --- | --- | --- | --- | --- | --- | --- |'); + for (const row of generateGlobalSummaryRows(stats)) { + lines.push( + `| ${row.comparison} | ${row.metric} | ${row.okTasks} | ${row.skippedTasks} | ${row.insufficientTasks} | ${row.significantTasks} | ${row.meanEffect === null ? 'null' : row.meanEffect} |` + ); + } + lines.push(''); + + lines.push('## Per-Task Detail'); + const taskIds = stats.byTask.map((item) => item.taskId).sort(compareStrings); + for (const taskId of taskIds) { + const task = stats.byTask.find((item) => item.taskId === taskId); + lines.push(`### ${task.taskId} (${task.category})`); + + for (const comparisonId of COMPARISON_ORDER) { + const comparison = stats.comparisons.find( + (item) => item.taskId === task.taskId && item.comparison === comparisonId + ); + lines.push(`- ${comparisonId}: ${comparison.lhs} vs ${comparison.rhs}`); + for (const metricName of METRIC_ORDER) { + lines.push(` ${metricLine(metricName, comparison.metrics[metricName])}`); + } + } + + lines.push(''); + } + + lines.push('## Appendix: Determinism'); + lines.push(`- sorted=${stats.determinism.sorted}`); + lines.push(`- stringComparator=${stats.determinism.stringComparator}`); + lines.push(`- domainOrder=${stats.determinism.domainOrder.join(',')}`); + lines.push('- timestamp=null'); + lines.push(''); + + return `${lines.join('\n').trimEnd()}\n`; +} + +function sha256Text(text) { + return crypto.createHash('sha256').update(text, 'utf8').digest('hex'); +} + +function buildDigest(resultsRows, summaryRows, statsTasks, summaryHash, statsHash) { + const lines = []; + lines.push('EVAL2_DIGEST'); + lines.push(`results_rows=${resultsRows}`); + lines.push(`summary_rows=${summaryRows}`); + lines.push(`stats_tasks=${statsTasks}`); + lines.push(`sha256_summary=${summaryHash}`); + lines.push(`sha256_eval2_stats=${statsHash}`); + return `${lines.join('\n')}\n`; +} + +function runStats(resultsFile, summaryFile, statsFile, reportFile, digestFile) { + const resultsContent = fs.existsSync(resultsFile) ? fs.readFileSync(resultsFile, 'utf8') : ''; + const summaryContent = fs.existsSync(summaryFile) ? fs.readFileSync(summaryFile, 'utf8') : ''; + + const records = parseResultsJsonl(resultsContent); + const summary = parseSummary(summaryContent); + const stats = buildStats(records, summary.rows.length); + const statsJson = `${JSON.stringify(stats, null, 2)}\n`; + const report = buildReportMarkdown(stats); + + fs.mkdirSync(path.dirname(statsFile), { recursive: true }); + fs.writeFileSync(statsFile, statsJson, 'utf8'); + fs.writeFileSync(reportFile, report, 'utf8'); + + const digest = buildDigest( + records.length, + summary.rows.length, + stats.byTask.length, + sha256Text(summaryContent), + sha256Text(statsJson) + ); + fs.writeFileSync(digestFile, digest, 'utf8'); + + return { + rows: records.length, + tasks: stats.byTask.length, + }; +} + +function main() { + const root = path.resolve(__dirname, '../../..'); + const outDir = path.join(root, 'bench', 'out'); + + const result = runStats( + path.join(outDir, 'results.jsonl'), + path.join(outDir, 'summary.json'), + path.join(outDir, 'eval2.stats.json'), + path.join(outDir, 'eval2.report.md'), + path.join(outDir, 'eval2.digest.txt') + ); + + process.stdout.write(`EVAL-2 stats wrote ${result.tasks} tasks from ${result.rows} rows\n`); +} + +if (require.main === module) { + main(); +} + +module.exports = { + compareStrings, + parseResultsJsonl, + parseSummary, + buildStats, + buildReportMarkdown, + runStats, +}; diff --git a/server/src/bench/node/store-selftest.cjs b/server/src/bench/node/store-selftest.cjs new file mode 100644 index 0000000..2a16992 --- /dev/null +++ b/server/src/bench/node/store-selftest.cjs @@ -0,0 +1,169 @@ +const fs = require('fs'); +const path = require('path'); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (fs.existsSync(candidate)) { + return require(candidate); + } + } + throw new Error('unavailable'); +} + +function loadModules() { + const root = path.resolve(__dirname, '../../..'); + const transferModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'transfer-package-v1.js'), + path.join(root, 'dist', 'src', 'services', 'transfer-package-v1.js'), + ]); + const lineageModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'lineage-binding-v1.js'), + path.join(root, 'dist', 'src', 'services', 'lineage-binding-v1.js'), + ]); + const handoffModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'handoff-record-v1.js'), + path.join(root, 'dist', 'src', 'services', 'handoff-record-v1.js'), + ]); + const bundleModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'artifact-bundle-v1.js'), + path.join(root, 'dist', 'src', 'services', 'artifact-bundle-v1.js'), + ]); + const storeModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'artifact-store-v1.js'), + path.join(root, 'dist', 'src', 'services', 'artifact-store-v1.js'), + ]); + + if ( + typeof transferModule.buildTransferPackageV1 !== 'function' || + typeof lineageModule.buildLineageBindingV1 !== 'function' || + typeof handoffModule.buildHandoffRecordV1 !== 'function' || + typeof bundleModule.buildArtifactBundleV1 !== 'function' || + typeof storeModule.buildArtifactStoreRecordV1 !== 'function' || + typeof storeModule.verifyArtifactStoreRecordV1 !== 'function' + ) { + throw new Error('unavailable'); + } + + return { transferModule, lineageModule, handoffModule, bundleModule, storeModule }; +} + +function buildTransferPackage(transferModule) { + return transferModule.buildTransferPackageV1({ + identity: { + packageId: 'pkg-1', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + trunk: { + intent: { primary: null, successCriteria: [], nonGoals: [] }, + stateDigest: { facts: ['fact-a'], decisions: [], constraints: [], risks: [], assumptions: [], openLoops: [] }, + }, + continuation: { nextActions: [], validationChecklist: [] }, + conflicts: [], + }); +} + +function buildBundle(transferModule, lineageModule, handoffModule, bundleModule) { + const transferPackageV1 = buildTransferPackage(transferModule); + const lineageBindingV1 = lineageModule.buildLineageBindingV1({ + identity: { + packageId: transferPackageV1.identity.packageId, + revisionId: transferPackageV1.identity.revisionId, + revisionHash: transferPackageV1.identity.revisionHash, + parentRevisionId: transferPackageV1.identity.parentRevisionId, + }, + bindings: { + transfer: { schema: 'transfer-package-1', transferHash: transferPackageV1.transferHash }, + closure: null, + execution: null, + handoff: null, + }, + diagnostics: { notes: [] }, + createdAt: null, + }); + const handoffRecordV1 = handoffModule.buildHandoffRecordV1({ + transferPackageV1, + verification: { transferHashRecomputed: transferPackageV1.transferHash, matchesProvidedHash: true }, + bindings: { closureContractV1: null, applyReportV1Hash: null, executionRecordV1Hash: null }, + lineageBindingV1, + createdAt: null, + }); + return bundleModule.buildArtifactBundleV1({ + identity: { + packageId: transferPackageV1.identity.packageId, + revisionId: transferPackageV1.identity.revisionId, + revisionHash: transferPackageV1.identity.revisionHash, + }, + artifacts: { + transferPackageV1, + lineageBindingV1, + handoffRecordV1, + closureContractV1: null, + }, + diagnostics: { notes: [] }, + createdAt: null, + }); +} + +function buildStoreRecord(storeModule, bundle, createdAt) { + return storeModule.buildArtifactStoreRecordV1({ + identity: { + packageId: bundle.identity.packageId, + revisionId: bundle.identity.revisionId, + revisionHash: bundle.identity.revisionHash, + }, + artifactBundleV1: bundle, + createdAt, + diagnostics: { notes: [] }, + }); +} + +function main() { + try { + const { transferModule, lineageModule, handoffModule, bundleModule, storeModule } = loadModules(); + const bundle = buildBundle(transferModule, lineageModule, handoffModule, bundleModule); + const storeA = buildStoreRecord(storeModule, bundle, '2025-01-01T00:00:00.000Z'); + const storeB = buildStoreRecord(storeModule, bundle, '2026-01-01T00:00:00.000Z'); + const mismatch = { + ...storeA, + artifactBundleV1: { + ...storeA.artifactBundleV1, + artifacts: { + ...storeA.artifactBundleV1.artifacts, + handoffRecordV1: { + ...storeA.artifactBundleV1.artifacts.handoffRecordV1, + lineageBindingV1: { + ...storeA.artifactBundleV1.artifacts.handoffRecordV1.lineageBindingV1, + bindings: { + ...storeA.artifactBundleV1.artifacts.handoffRecordV1.lineageBindingV1.bindings, + transfer: { schema: 'transfer-package-1', transferHash: 'f'.repeat(64) }, + }, + }, + }, + }, + }, + }; + const verification = storeModule.verifyArtifactStoreRecordV1(mismatch); + + const ok = + storeA.storeHash === storeB.storeHash && + storeA.createdAt === '2025-01-01T00:00:00.000Z' && + storeB.createdAt === '2026-01-01T00:00:00.000Z' && + verification.ok === true && + verification.matches === false; + + process.stdout.write(ok ? 'STORE_SELFTEST_OK\n' : 'STORE_SELFTEST_FAIL\n'); + process.exit(ok ? 0 : 1); + } catch (_error) { + process.stdout.write('STORE_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/strip-bom.cjs b/server/src/bench/node/strip-bom.cjs new file mode 100644 index 0000000..105c127 --- /dev/null +++ b/server/src/bench/node/strip-bom.cjs @@ -0,0 +1,51 @@ +const fs = require('fs'); +const path = require('path'); + +function compareStrings(a, b) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function hasUtf8Bom(buffer) { + return buffer.length >= 3 && buffer[0] === 0xef && buffer[1] === 0xbb && buffer[2] === 0xbf; +} + +function stripBomFiles(tasksDir) { + const files = fs + .readdirSync(tasksDir, { withFileTypes: true }) + .filter((entry) => entry.isFile() && entry.name.endsWith('.json')) + .map((entry) => entry.name) + .sort(compareStrings); + + let fixed = 0; + let unchanged = 0; + + for (const fileName of files) { + const absolutePath = path.join(tasksDir, fileName); + const bytes = fs.readFileSync(absolutePath); + if (hasUtf8Bom(bytes)) { + fs.writeFileSync(absolutePath, bytes.slice(3)); + fixed += 1; + } else { + unchanged += 1; + } + } + + process.stdout.write(`BOM_STRIP_SUMMARY fixed=${fixed} unchanged=${unchanged}\n`); +} + +function main() { + const root = path.resolve(__dirname, '../../..'); + const tasksDir = path.join(root, 'bench', 'tasks'); + stripBomFiles(tasksDir); +} + +if (require.main === module) { + main(); +} + +module.exports = { + compareStrings, + stripBomFiles, +}; diff --git a/server/src/bench/node/suggestion-selftest.cjs b/server/src/bench/node/suggestion-selftest.cjs new file mode 100644 index 0000000..da2087d --- /dev/null +++ b/server/src/bench/node/suggestion-selftest.cjs @@ -0,0 +1,107 @@ +const fs = require('fs'); +const path = require('path'); + +const MESSAGE_WHITELIST = new Set([ + 'Add missing dependency', + 'Request human confirm', + 'Requires L3 review', + 'Split patch', + 'Retry with context', +]); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (!fs.existsSync(candidate)) { + continue; + } + return require(candidate); + } + throw new Error('unavailable'); +} + +function loadModules() { + const rootPath = path.resolve(__dirname, '../../..'); + const suggestionModule = requireFromCandidates([ + path.join(rootPath, 'dist', 'services', 'delta-suggestion-engine.js'), + path.join(rootPath, 'dist', 'src', 'services', 'delta-suggestion-engine.js'), + ]); + const policyModule = requireFromCandidates([ + path.join(rootPath, 'dist', 'services', 'delta-risk-policy.js'), + path.join(rootPath, 'dist', 'src', 'services', 'delta-risk-policy.js'), + ]); + const contractModule = requireFromCandidates([ + path.join(rootPath, 'dist', 'services', 'closure-contract-v1.js'), + path.join(rootPath, 'dist', 'src', 'services', 'closure-contract-v1.js'), + ]); + + if ( + !suggestionModule || + typeof suggestionModule.buildClosureSuggestionsV1 !== 'function' || + !policyModule || + !policyModule.DEFAULT_RISK_POLICY_V1 || + !contractModule || + typeof contractModule.assertJsonSafe !== 'function' || + typeof contractModule.stableStringify !== 'function' + ) { + throw new Error('unavailable'); + } + + return { + buildClosureSuggestionsV1: suggestionModule.buildClosureSuggestionsV1, + policy: policyModule.DEFAULT_RISK_POLICY_V1, + assertJsonSafe: contractModule.assertJsonSafe, + stableStringify: contractModule.stableStringify, + }; +} + +function makeInput(policy) { + return { + rejected: [ + { + domain: 'decisions', + key: 'decision.conflict', + path: 'answer', + op: 'modify', + reasonCode: 'CONFLICT', + blockedBy: null, + riskLevel: 'L2', + }, + { + domain: 'facts', + key: 'fact.alpha', + path: null, + op: 'add', + reasonCode: 'DEPENDENCY_BLOCKED', + blockedBy: [{ domain: 'facts', key: 'fact.alpha', path: null }], + riskLevel: 'L3', + }, + ], + policy, + limits: { maxSuggestions: 64 }, + }; +} + +function main() { + try { + const modules = loadModules(); + const input = makeInput(modules.policy); + const resultA = modules.buildClosureSuggestionsV1(input); + const resultB = modules.buildClosureSuggestionsV1(input); + + modules.assertJsonSafe(resultA.suggestions); + + const messagesValid = resultA.suggestions.every( + (entry) => entry && entry.schema === 'closure-suggestion-1' && MESSAGE_WHITELIST.has(entry.message) + ); + const deterministic = modules.stableStringify(resultA.suggestions) === modules.stableStringify(resultB.suggestions); + + const ok = messagesValid && deterministic; + process.stdout.write(ok ? 'SUGGESTION_SELFTEST_OK\n' : 'SUGGESTION_SELFTEST_FAIL\n'); + process.exit(ok ? 0 : 1); + } catch (_error) { + process.stdout.write('SUGGESTION_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/transfer-selftest.cjs b/server/src/bench/node/transfer-selftest.cjs new file mode 100644 index 0000000..f21a7f3 --- /dev/null +++ b/server/src/bench/node/transfer-selftest.cjs @@ -0,0 +1,129 @@ +const fs = require('fs'); +const path = require('path'); + +function requireFromCandidates(candidates) { + for (const candidate of candidates) { + if (!fs.existsSync(candidate)) { + continue; + } + return require(candidate); + } + throw new Error('unavailable'); +} + +function loadTransferModule() { + const root = path.resolve(__dirname, '../../..'); + const transferModule = requireFromCandidates([ + path.join(root, 'dist', 'services', 'transfer-package-v1.js'), + path.join(root, 'dist', 'src', 'services', 'transfer-package-v1.js'), + ]); + + if ( + !transferModule || + typeof transferModule.buildTransferPackageV1 !== 'function' || + typeof transferModule.stableStringify !== 'function' + ) { + throw new Error('unavailable'); + } + + return transferModule; +} + +function makeInput() { + return { + identity: { + packageId: 'pkg-1', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + bindings: { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }, + trunk: { + intent: { + primary: 'handoff', + successCriteria: ['verify', 'ship'], + nonGoals: ['scope-creep'], + }, + stateDigest: { + facts: ['fact-a'], + openLoops: ['loop-a'], + }, + }, + continuation: { + nextActions: [ + { + code: 'NEXT_VERIFY', + message: 'Verify transfer', + expectedOutput: null, + domains: ['facts', 'decisions'], + }, + ], + validationChecklist: [ + { + code: 'CHECK_HASH', + message: 'Check transfer hash', + severity: 'must', + }, + ], + }, + conflicts: [], + }; +} + +function main() { + try { + const transferModule = loadTransferModule(); + const input = makeInput(); + const first = transferModule.buildTransferPackageV1(input); + const second = transferModule.buildTransferPackageV1(input); + + if (first.bindings.closureContractV1 !== null) { + throw new Error('fail'); + } + if (transferModule.stableStringify(first) !== transferModule.stableStringify(second)) { + throw new Error('fail'); + } + + let nonJsonSafeCaught = false; + try { + transferModule.buildTransferPackageV1({ + identity: { + packageId: 'pkg-1', + revisionId: 'rev-1', + revisionHash: 'rev-hash-1', + parentRevisionId: null, + }, + trunk: { + intent: { + primary: BigInt(1), + }, + }, + }); + } catch (error) { + if ( + error && + typeof error === 'object' && + error.code === 'E_TRANSFER_NON_JSON_SAFE' && + error.message === 'Transfer package contains non JSON-safe value' + ) { + nonJsonSafeCaught = true; + } + } + + if (!nonJsonSafeCaught) { + throw new Error('fail'); + } + + process.stdout.write('TRANSFER_SELFTEST_OK\n'); + process.exit(0); + } catch (_error) { + process.stdout.write('TRANSFER_SELFTEST_FAIL\n'); + process.exit(1); + } +} + +main(); diff --git a/server/src/bench/node/validate-fixtures.cjs b/server/src/bench/node/validate-fixtures.cjs new file mode 100644 index 0000000..4c70119 --- /dev/null +++ b/server/src/bench/node/validate-fixtures.cjs @@ -0,0 +1,109 @@ +const fs = require('fs'); +const path = require('path'); + +function compareStrings(a, b) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function stripBomFromText(text) { + return text.length > 0 && text.charCodeAt(0) === 0xfeff ? text.slice(1) : text; +} + +function computeLineCol(text, pos) { + const boundedPos = Number.isFinite(pos) ? Math.max(0, Math.min(text.length, Math.floor(pos))) : 0; + + let line = 1; + let col = 1; + let lineStart = 0; + + for (let index = 0; index < boundedPos; index += 1) { + if (text.charCodeAt(index) === 10) { + line += 1; + col = 1; + lineStart = index + 1; + } else { + col += 1; + } + } + + let lineEnd = text.indexOf('\n', lineStart); + if (lineEnd < 0) lineEnd = text.length; + + let lineText = text.slice(lineStart, lineEnd); + if (lineText.endsWith('\r')) lineText = lineText.slice(0, -1); + + return { line, col, lineText }; +} + +function extractPosition(errorMessage) { + const match = /position\s+(\d+)/i.exec(errorMessage); + if (!match) return 0; + const parsed = Number.parseInt(match[1], 10); + return Number.isFinite(parsed) ? parsed : 0; +} + +function validateFixtures(tasksDir) { + const files = fs + .readdirSync(tasksDir, { withFileTypes: true }) + .filter((entry) => entry.isFile() && entry.name.endsWith('.json')) + .map((entry) => entry.name) + .sort(compareStrings); + + const invalids = []; + + for (const fileName of files) { + const absolutePath = path.join(tasksDir, fileName); + const relativePath = `bench/tasks/${fileName}`; + let text = ''; + + try { + text = stripBomFromText(fs.readFileSync(absolutePath, 'utf8')); + JSON.parse(text); + } catch (error) { + const message = error instanceof Error ? error.message : ''; + const position = extractPosition(message); + const computed = computeLineCol(text, position); + invalids.push({ + relativePath, + line: Number.isInteger(computed.line) && computed.line > 0 ? computed.line : 1, + col: Number.isInteger(computed.col) && computed.col > 0 ? computed.col : 1, + lineText: typeof computed.lineText === 'string' ? computed.lineText : '', + }); + } + } + + if (invalids.length === 0) { + process.stdout.write('ALL_FIXTURES_VALID\n'); + return 0; + } + + for (const invalid of invalids) { + process.stdout.write(`INVALID_JSON ${invalid.relativePath} line=${invalid.line} col=${invalid.col}\n`); + process.stdout.write('CONTEXT_BEGIN\n'); + process.stdout.write(`${invalid.lineText.trimEnd()}\n`); + process.stdout.write(`${' '.repeat(Math.max(0, invalid.col - 1))}^\n`); + process.stdout.write('CONTEXT_END\n'); + } + + return 1; +} + +function main() { + const root = path.resolve(__dirname, '../../..'); + const tasksDir = path.join(root, 'bench', 'tasks'); + const exitCode = validateFixtures(tasksDir); + process.exit(exitCode); +} + +if (require.main === module) { + main(); +} + +module.exports = { + compareStrings, + computeLineCol, + stripBomFromText, + validateFixtures, +}; diff --git a/server/src/bench/runner.ts b/server/src/bench/runner.ts new file mode 100644 index 0000000..a15b9f8 --- /dev/null +++ b/server/src/bench/runner.ts @@ -0,0 +1,595 @@ +import fs from 'fs'; +import path from 'path'; + +import { diffState } from '../algebra/semanticDiff/diffState'; +import { stableHash } from '../algebra/semanticDiff/key'; +import type { DomainName, SemanticDelta } from '../algebra/semanticDiff/types'; +import { applyDelta } from '../algebra/stateTransition/applyDelta'; +import { detectConflicts } from '../algebra/stateTransition/detectConflicts'; + +const DOMAIN_ORDER: DomainName[] = ['facts', 'decisions', 'constraints', 'risks', 'assumptions']; +const BASELINE_ORDER = ['B1_CORE_BEST_EFFORT', 'B1_CORE_STRICT', 'B1_PIPELINE'] as const; + +const DIFF_MODES: Record<'B1_CORE_BEST_EFFORT' | 'B1_CORE_STRICT', 'best_effort' | 'strict'> = { + B1_CORE_BEST_EFFORT: 'best_effort', + B1_CORE_STRICT: 'strict', +}; + +type BaselineName = (typeof BASELINE_ORDER)[number]; +type TaskCategory = 'T1' | 'T2'; + +type DomainCounts = { + added: number; + removed: number; + modified: number; +}; + +type DeltaSummary = { + counts: Record; + hasCollisions: boolean; + assumptionsDerived: boolean; + modifiedDomains: DomainName[]; +}; + +type TargetAssertions = { + mustEqualTargetHash?: boolean; + mustHaveNoConflicts?: boolean; + maxDistanceCountsSum?: number; + domainMustNotChange?: DomainName[]; + requiredDomainsModified?: DomainName[]; + requiredDecisionKeys?: string[]; + requiredAssumptionKeys?: string[]; +}; + +type TaskFixture = { + taskId: string; + category: TaskCategory; + description: string; + baseState: Record; + targetState?: Record; + targetAssertions?: TargetAssertions; + runConfig: { repetitions: number }; +}; + +type BaselineRecord = { + name: BaselineName; + mode: 'best_effort' | 'strict' | null; + supported: boolean; + reason: string | null; +}; + +type BenchResultRecord = { + experiment: { id: string; ts: null }; + task: { taskId: string; category: TaskCategory; rep: number }; + baseline: BaselineRecord; + identity: { + stateHashBefore: string | null; + stateHashAfter: string | null; + targetHash: string | null; + }; + delta: { + summary: DeltaSummary | null; + }; + transition: { + conflictCount: number; + postApplyConflictCount: number; + }; + drift: { + equalsTargetHash: boolean; + distanceCounts: Record; + distanceCountsSum: number; + }; + assertions: { + passed: boolean; + failed: string[]; + }; +}; + +function compareString(a: string, b: string): number { + return a < b ? -1 : a > b ? 1 : 0; +} + +function isRecord(value: unknown): value is Record { + return !!value && typeof value === 'object' && !Array.isArray(value); +} + +function asArray(value: unknown): unknown[] { + return Array.isArray(value) ? value : []; +} + +function toDomainState(value: unknown): Record { + const record = isRecord(value) ? value : {}; + return { + facts: asArray(record.facts), + decisions: asArray(record.decisions), + constraints: asArray(record.constraints), + risks: asArray(record.risks), + assumptions: asArray(record.assumptions), + }; +} + +function parseTaskFixture(raw: unknown, fixtureFile: string): TaskFixture { + if (!isRecord(raw)) { + throw new Error(`Invalid fixture shape: ${fixtureFile}`); + } + + const taskId = typeof raw.taskId === 'string' ? raw.taskId : ''; + const category = raw.category === 'T1' || raw.category === 'T2' ? raw.category : null; + const description = typeof raw.description === 'string' ? raw.description : ''; + + if (!taskId || !category || !description) { + throw new Error(`Invalid fixture metadata: ${fixtureFile}`); + } + + const runConfigRaw = isRecord(raw.runConfig) ? raw.runConfig : {}; + const repetitionsRaw = runConfigRaw.repetitions; + const repetitions = typeof repetitionsRaw === 'number' && Number.isInteger(repetitionsRaw) && repetitionsRaw > 0 ? repetitionsRaw : 15; + + const baseState = toDomainState(raw.baseState); + const targetState = raw.targetState === undefined ? undefined : toDomainState(raw.targetState); + + stableHash(baseState); + if (targetState) stableHash(targetState); + + const assertionsRaw = isRecord(raw.targetAssertions) ? raw.targetAssertions : undefined; + const normalizeDomainArray = (value: unknown): DomainName[] => + asArray(value) + .filter((item): item is DomainName => + item === 'facts' || + item === 'decisions' || + item === 'constraints' || + item === 'risks' || + item === 'assumptions' + ) + .sort((a, b) => DOMAIN_ORDER.indexOf(a) - DOMAIN_ORDER.indexOf(b)); + + const normalizeStringArray = (value: unknown): string[] => + asArray(value) + .filter((item): item is string => typeof item === 'string' && item.length > 0) + .sort(compareString); + + const targetAssertions: TargetAssertions | undefined = assertionsRaw + ? { + mustEqualTargetHash: + typeof assertionsRaw.mustEqualTargetHash === 'boolean' ? assertionsRaw.mustEqualTargetHash : undefined, + mustHaveNoConflicts: + typeof assertionsRaw.mustHaveNoConflicts === 'boolean' ? assertionsRaw.mustHaveNoConflicts : undefined, + maxDistanceCountsSum: + typeof assertionsRaw.maxDistanceCountsSum === 'number' && Number.isFinite(assertionsRaw.maxDistanceCountsSum) + ? assertionsRaw.maxDistanceCountsSum + : undefined, + domainMustNotChange: normalizeDomainArray(assertionsRaw.domainMustNotChange), + requiredDomainsModified: normalizeDomainArray(assertionsRaw.requiredDomainsModified), + requiredDecisionKeys: normalizeStringArray(assertionsRaw.requiredDecisionKeys), + requiredAssumptionKeys: normalizeStringArray(assertionsRaw.requiredAssumptionKeys), + } + : undefined; + + return { + taskId, + category, + description, + baseState, + targetState, + targetAssertions, + runConfig: { repetitions }, + }; +} + +function zeroCounts(): Record { + return { + facts: { added: 0, removed: 0, modified: 0 }, + decisions: { added: 0, removed: 0, modified: 0 }, + constraints: { added: 0, removed: 0, modified: 0 }, + risks: { added: 0, removed: 0, modified: 0 }, + assumptions: { added: 0, removed: 0, modified: 0 }, + }; +} + +function summarizeDelta(delta: SemanticDelta): DeltaSummary { + const counts = zeroCounts(); + + for (const domain of DOMAIN_ORDER) { + counts[domain] = { + added: delta[domain].added.length, + removed: delta[domain].removed.length, + modified: delta[domain].modified.length, + }; + } + + return { + counts, + hasCollisions: delta.meta.collisions.hard.length > 0 || delta.meta.collisions.soft.length > 0, + assumptionsDerived: delta.meta.assumptionsDerived === true, + modifiedDomains: DOMAIN_ORDER.filter((domain) => { + const domainCounts = counts[domain]; + return domainCounts.added + domainCounts.removed + domainCounts.modified > 0; + }), + }; +} + +function calculateDistanceCounts(delta: SemanticDelta): Record { + const counts = zeroCounts(); + for (const domain of DOMAIN_ORDER) { + counts[domain] = { + added: delta[domain].added.length, + removed: delta[domain].removed.length, + modified: delta[domain].modified.length, + }; + } + return counts; +} + +function sumDistanceCounts(counts: Record): number { + let total = 0; + for (const domain of DOMAIN_ORDER) { + total += counts[domain].added + counts[domain].removed + counts[domain].modified; + } + return total; +} + +function hasMatchingKey(units: unknown[], candidates: string[], expected: string): boolean { + for (const unit of units) { + if (!isRecord(unit)) continue; + for (const field of candidates) { + const value = unit[field]; + if (typeof value === 'string' && value === expected) return true; + } + } + return false; +} + +function evaluateAssertions(params: { + targetAssertions: TargetAssertions | undefined; + equalsTargetHash: boolean; + conflictCount: number; + postApplyConflictCount: number; + distanceCounts: Record; + distanceCountsSum: number; + modifiedDomains: DomainName[]; + nextState: Record | null; +}): { passed: boolean; failed: string[] } { + const { + targetAssertions, + equalsTargetHash, + conflictCount, + postApplyConflictCount, + distanceCounts, + distanceCountsSum, + modifiedDomains, + nextState, + } = params; + + if (!targetAssertions) return { passed: true, failed: [] }; + + const failures: string[] = []; + const totalConflictCount = conflictCount + postApplyConflictCount; + + if (targetAssertions.mustEqualTargetHash === true && !equalsTargetHash) { + failures.push('ASSERT_EQUALS_TARGET_HASH'); + } + + if (targetAssertions.mustEqualTargetHash === false && equalsTargetHash) { + failures.push('ASSERT_NOT_EQUALS_TARGET_HASH'); + } + + if (typeof targetAssertions.mustHaveNoConflicts === 'boolean') { + if (targetAssertions.mustHaveNoConflicts && totalConflictCount > 0) { + failures.push('ASSERT_CONFLICTS_PRESENT'); + } + if (!targetAssertions.mustHaveNoConflicts && totalConflictCount === 0) { + failures.push('ASSERT_CONFLICTS_ABSENT'); + } + } + + if ( + typeof targetAssertions.maxDistanceCountsSum === 'number' && + Number.isFinite(targetAssertions.maxDistanceCountsSum) && + distanceCountsSum > targetAssertions.maxDistanceCountsSum + ) { + failures.push('ASSERT_DISTANCE_EXCEEDED'); + } + + for (const domain of targetAssertions.domainMustNotChange ?? []) { + const count = distanceCounts[domain]; + if (count.added + count.removed + count.modified > 0) { + failures.push('ASSERT_DOMAIN_MUST_NOT_CHANGE'); + break; + } + } + + for (const domain of targetAssertions.requiredDomainsModified ?? []) { + if (!modifiedDomains.includes(domain)) { + failures.push('ASSERT_REQUIRED_DOMAIN_NOT_MODIFIED'); + break; + } + } + + if (nextState) { + for (const expectedDecisionKey of targetAssertions.requiredDecisionKeys ?? []) { + if ( + !hasMatchingKey( + nextState.decisions, + ['id', 'key', 'decisionId', 'question', 'title'], + expectedDecisionKey + ) + ) { + failures.push('ASSERT_REQUIRED_DECISION_KEYS_MISSING'); + break; + } + } + + for (const expectedAssumptionKey of targetAssertions.requiredAssumptionKeys ?? []) { + if ( + !hasMatchingKey( + nextState.assumptions, + ['id', 'key', 'assumptionId', 'statement', 'topic'], + expectedAssumptionKey + ) + ) { + failures.push('ASSERT_REQUIRED_ASSUMPTION_KEYS_MISSING'); + break; + } + } + } + + const failed = [...new Set(failures)].sort(compareString); + return { + passed: failed.length === 0, + failed, + }; +} + +function normalizeDeltaSummary(summary: DeltaSummary | null): DeltaSummary | null { + if (!summary) return null; + + const counts = zeroCounts(); + for (const domain of DOMAIN_ORDER) { + const domainCounts = summary.counts[domain] ?? { added: 0, removed: 0, modified: 0 }; + counts[domain] = { + added: domainCounts.added, + removed: domainCounts.removed, + modified: domainCounts.modified, + }; + } + + return { + counts, + hasCollisions: summary.hasCollisions, + assumptionsDerived: summary.assumptionsDerived, + modifiedDomains: DOMAIN_ORDER.filter((domain) => summary.modifiedDomains.includes(domain)), + }; +} + +function normalizeDistanceCounts(counts: Record): Record { + const out = zeroCounts(); + for (const domain of DOMAIN_ORDER) { + const domainCounts = counts[domain] ?? { added: 0, removed: 0, modified: 0 }; + out[domain] = { + added: domainCounts.added, + removed: domainCounts.removed, + modified: domainCounts.modified, + }; + } + return out; +} + +function stableStringifyRecord(record: BenchResultRecord): string { + const normalized: BenchResultRecord = { + experiment: { + id: record.experiment.id, + ts: null, + }, + task: { + taskId: record.task.taskId, + category: record.task.category, + rep: record.task.rep, + }, + baseline: { + name: record.baseline.name, + mode: record.baseline.mode, + supported: record.baseline.supported, + reason: record.baseline.reason, + }, + identity: { + stateHashBefore: record.identity.stateHashBefore, + stateHashAfter: record.identity.stateHashAfter, + targetHash: record.identity.targetHash, + }, + delta: { + summary: normalizeDeltaSummary(record.delta.summary), + }, + transition: { + conflictCount: record.transition.conflictCount, + postApplyConflictCount: record.transition.postApplyConflictCount, + }, + drift: { + equalsTargetHash: record.drift.equalsTargetHash, + distanceCounts: normalizeDistanceCounts(record.drift.distanceCounts), + distanceCountsSum: record.drift.distanceCountsSum, + }, + assertions: { + passed: record.assertions.passed, + failed: [...record.assertions.failed].sort(compareString), + }, + }; + + return JSON.stringify(normalized); +} + +function baselineDefinitions(): BaselineRecord[] { + return [ + { name: 'B1_CORE_BEST_EFFORT', mode: DIFF_MODES.B1_CORE_BEST_EFFORT, supported: true, reason: null }, + { name: 'B1_CORE_STRICT', mode: DIFF_MODES.B1_CORE_STRICT, supported: true, reason: null }, + { + name: 'B1_PIPELINE', + mode: null, + supported: false, + reason: 'imports services/DB', + }, + ]; +} + +function runSingle( + task: TaskFixture, + baseline: BaselineRecord, + rep: number +): BenchResultRecord { + const stateHashBefore = stableHash(task.baseState); + const targetHash = task.targetState ? stableHash(task.targetState) : null; + + if (!baseline.supported) { + const assertions = { + passed: false, + failed: ['BASELINE_UNSUPPORTED'], + }; + + return { + experiment: { id: 'EVAL-1', ts: null }, + task: { taskId: task.taskId, category: task.category, rep }, + baseline, + identity: { + stateHashBefore, + stateHashAfter: null, + targetHash, + }, + delta: { + summary: null, + }, + transition: { + conflictCount: 0, + postApplyConflictCount: 0, + }, + drift: { + equalsTargetHash: false, + distanceCounts: zeroCounts(), + distanceCountsSum: 0, + }, + assertions, + }; + } + + if (!task.targetState) { + const assertions = { + passed: false, + failed: ['TARGET_STATE_REQUIRED'], + }; + + return { + experiment: { id: 'EVAL-1', ts: null }, + task: { taskId: task.taskId, category: task.category, rep }, + baseline, + identity: { + stateHashBefore, + stateHashAfter: null, + targetHash, + }, + delta: { + summary: null, + }, + transition: { + conflictCount: 0, + postApplyConflictCount: 0, + }, + drift: { + equalsTargetHash: false, + distanceCounts: zeroCounts(), + distanceCountsSum: 0, + }, + assertions, + }; + } + + const delta = diffState(task.baseState, task.targetState); + const transition = applyDelta(task.baseState, delta, { mode: baseline.mode ?? 'best_effort' }); + const postApplyConflicts = detectConflicts(transition.nextState); + const driftDelta = diffState(transition.nextState, task.targetState); + + const stateHashAfter = stableHash(transition.nextState); + const equalsTargetHash = stateHashAfter === targetHash; + const distanceCounts = calculateDistanceCounts(driftDelta); + const distanceCountsSum = sumDistanceCounts(distanceCounts); + + const assertions = evaluateAssertions({ + targetAssertions: task.targetAssertions, + equalsTargetHash, + conflictCount: transition.conflicts.length, + postApplyConflictCount: postApplyConflicts.length, + distanceCounts, + distanceCountsSum, + modifiedDomains: summarizeDelta(delta).modifiedDomains, + nextState: toDomainState(transition.nextState), + }); + + return { + experiment: { id: 'EVAL-1', ts: null }, + task: { taskId: task.taskId, category: task.category, rep }, + baseline, + identity: { + stateHashBefore, + stateHashAfter, + targetHash, + }, + delta: { + summary: summarizeDelta(delta), + }, + transition: { + conflictCount: transition.conflicts.length, + postApplyConflictCount: postApplyConflicts.length, + }, + drift: { + equalsTargetHash, + distanceCounts, + distanceCountsSum, + }, + assertions, + }; +} + +function loadFixtures(tasksDir: string): TaskFixture[] { + const files = fs + .readdirSync(tasksDir) + .filter((file) => file.endsWith('.json')) + .sort(compareString); + + const fixtures = files.map((file) => { + const fullPath = path.join(tasksDir, file); + const raw = JSON.parse(fs.readFileSync(fullPath, 'utf8')) as unknown; + return parseTaskFixture(raw, file); + }); + + return fixtures.sort((a, b) => compareString(a.taskId, b.taskId)); +} + +export function runBench(tasksDir: string, outFile: string): { rows: number } { + const fixtures = loadFixtures(tasksDir); + const baselines = baselineDefinitions(); + const lines: string[] = []; + + for (const task of fixtures) { + for (const baseline of baselines) { + for (let rep = 1; rep <= task.runConfig.repetitions; rep += 1) { + const row = runSingle(task, baseline, rep); + lines.push(stableStringifyRecord(row)); + } + } + } + + fs.mkdirSync(path.dirname(outFile), { recursive: true }); + const payload = lines.length > 0 ? `${lines.join('\n')}\n` : ''; + fs.writeFileSync(outFile, payload, 'utf8'); + + return { rows: lines.length }; +} + +function main(): void { + const root = process.cwd(); + const tasksDir = path.resolve(root, 'bench', 'tasks'); + const outFile = path.resolve(root, 'bench', 'out', 'results.jsonl'); + const result = runBench(tasksDir, outFile); + process.stdout.write(`EVAL-1 runner wrote ${result.rows} rows to ${outFile}\n`); +} + +if (require.main === module) { + main(); +} diff --git a/server/src/bench/validate-fixtures.ts b/server/src/bench/validate-fixtures.ts new file mode 100644 index 0000000..f413f2d --- /dev/null +++ b/server/src/bench/validate-fixtures.ts @@ -0,0 +1,103 @@ +import fs from 'fs'; +import path from 'path'; + +export function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +export function computeLineCol(text: string, pos: number): { line: number; col: number; lineText: string } { + const boundedPos = Number.isFinite(pos) ? Math.max(0, Math.min(text.length, Math.floor(pos))) : 0; + + let line = 1; + let col = 1; + let lineStart = 0; + + for (let index = 0; index < boundedPos; index += 1) { + if (text.charCodeAt(index) === 10) { + line += 1; + col = 1; + lineStart = index + 1; + } else { + col += 1; + } + } + + let lineEnd = text.indexOf('\n', lineStart); + if (lineEnd < 0) lineEnd = text.length; + + let lineText = text.slice(lineStart, lineEnd); + if (lineText.endsWith('\r')) { + lineText = lineText.slice(0, -1); + } + + return { line, col, lineText }; +} + +function extractPosition(errorMessage: string): number { + const match = /position\s+(\d+)/i.exec(errorMessage); + if (!match) return 0; + const parsed = Number.parseInt(match[1], 10); + return Number.isFinite(parsed) ? parsed : 0; +} + +function validateFixtures(tasksDir: string): number { + const entries = fs + .readdirSync(tasksDir, { withFileTypes: true }) + .filter((entry) => entry.isFile() && entry.name.endsWith('.json')) + .map((entry) => entry.name) + .sort(compareStrings); + + const invalids: Array<{ relativePath: string; line: number; col: number; lineText: string }> = []; + + for (const fileName of entries) { + const absolutePath = path.join(tasksDir, fileName); + const relativePath = `bench/tasks/${fileName}`; + let text = ''; + + try { + text = fs.readFileSync(absolutePath, 'utf8'); + JSON.parse(text); + } catch (error) { + const message = error instanceof Error ? error.message : ''; + const position = extractPosition(message); + const computed = computeLineCol(text, position); + const line = Number.isInteger(computed.line) && computed.line > 0 ? computed.line : 1; + const col = Number.isInteger(computed.col) && computed.col > 0 ? computed.col : 1; + const lineText = typeof computed.lineText === 'string' ? computed.lineText : ''; + + invalids.push({ + relativePath, + line, + col, + lineText, + }); + } + } + + if (invalids.length === 0) { + process.stdout.write('ALL_FIXTURES_VALID\n'); + return 0; + } + + for (const invalid of invalids) { + process.stdout.write(`INVALID_JSON ${invalid.relativePath} line=${invalid.line} col=${invalid.col}\n`); + process.stdout.write('CONTEXT_BEGIN\n'); + process.stdout.write(`${invalid.lineText.trimEnd()}\n`); + process.stdout.write(`${' '.repeat(Math.max(0, invalid.col - 1))}^\n`); + process.stdout.write('CONTEXT_END\n'); + } + + return 1; +} + +function main(): void { + const tasksDir = path.resolve(process.cwd(), 'bench', 'tasks'); + const exitCode = validateFixtures(tasksDir); + process.exit(exitCode); +} + +if (require.main === module) { + main(); +} diff --git a/server/src/controllers/artifact.controller.ts b/server/src/controllers/artifact.controller.ts new file mode 100644 index 0000000..2bf795d --- /dev/null +++ b/server/src/controllers/artifact.controller.ts @@ -0,0 +1,85 @@ +import type { Request, Response } from 'express'; +import { createNotFoundError, sendApiError, sendApiOk } from '../lib/api-error'; +import { artifactCreateBodySchema, artifactRouteParamsSchema, type ArtifactCreateBody } from '../types/api/artifact.api'; + +function toArtifactBundle(body: ArtifactCreateBody) { + return { + schema: body.schema, + identity: { + packageId: body.identity.packageId, + revisionId: body.identity.revisionId ?? null, + revisionHash: body.identity.revisionHash ?? null, + }, + payload: body.payload, + references: body.references, + }; +} + +export async function createArtifact(req: Request, res: Response): Promise { + try { + const body = artifactCreateBodySchema.parse(req.body); + const serviceModule = require('../services/artifact-store.service') as typeof import('../services/artifact-store.service'); + const service = new serviceModule.ArtifactStoreService(); + const artifactBundle = toArtifactBundle(body); + const stored = await service.storeArtifactBundle({ + schema: artifactBundle.schema, + identity: artifactBundle.identity, + payload: artifactBundle, + }); + + return sendApiOk(res, { + id: stored.id, + bundleHash: stored.bundleHash, + createdAt: stored.createdAt, + }); + } catch (error) { + return sendApiError(res, error); + } +} + +export async function getArtifact(req: Request, res: Response): Promise { + try { + const params = artifactRouteParamsSchema.parse(req.params); + const serviceModule = require('../services/artifact-store.service') as typeof import('../services/artifact-store.service'); + const service = new serviceModule.ArtifactStoreService(); + const found = await service.loadArtifactBundle(params); + if (!found) { + throw createNotFoundError('ERR_ARTIFACT_NOT_FOUND', 'Artifact not found'); + } + + return sendApiOk(res, { + id: found.id, + bundleHash: found.bundleHash, + createdAt: found.createdAt, + artifactBundle: found.payload, + }); + } catch (error) { + return sendApiError(res, error); + } +} + +export async function verifyArtifact(req: Request, res: Response): Promise { + try { + const params = artifactRouteParamsSchema.parse(req.params); + const serviceModule = require('../services/artifact-store.service') as typeof import('../services/artifact-store.service'); + const service = new serviceModule.ArtifactStoreService(); + const found = await service.loadArtifactBundle(params); + if (!found) { + throw createNotFoundError('ERR_ARTIFACT_NOT_FOUND', 'Artifact not found'); + } + + const verification = service.verifyArtifactBundle({ + schema: found.schema, + identity: found.identity, + payload: found.payload, + bundleHash: found.bundleHash, + }); + + return sendApiOk(res, { + bundleHash: found.bundleHash, + verified: verification.ok, + }); + } catch (error) { + return sendApiError(res, error); + } +} diff --git a/server/src/controllers/execution.controller.ts b/server/src/controllers/execution.controller.ts new file mode 100644 index 0000000..3451f04 --- /dev/null +++ b/server/src/controllers/execution.controller.ts @@ -0,0 +1,77 @@ +import type { Request, Response } from 'express'; +import { createNotFoundError, sendApiError, sendApiOk } from '../lib/api-error'; +import { + executionIdParamsSchema, + executionRecordBodySchema, + executionReplayBodySchema, +} from '../types/api/execution.api'; + +export async function recordExecution(req: Request, res: Response): Promise { + try { + const body = executionRecordBodySchema.parse(req.body); + const serviceModule = require('../services/execution.service') as typeof import('../services/execution.service'); + const service = new serviceModule.ExecutionService(); + const created = await service.recordExecution({ + packageId: body.packageId, + revisionHash: body.revisionHash, + provider: body.provider, + model: body.model, + promptHash: body.promptHash, + parameters: body.parameters, + inputArtifacts: body.inputArtifacts, + outputArtifacts: body.outputArtifacts, + status: body.status, + startedAt: body.startedAt, + finishedAt: body.finishedAt, + }); + + return sendApiOk(res, { + executionId: created.executionId, + resultHash: created.resultHash, + execution: created, + }); + } catch (error) { + return sendApiError(res, error); + } +} + +export async function getExecution(req: Request, res: Response): Promise { + try { + const params = executionIdParamsSchema.parse(req.params); + const serviceModule = require('../services/execution.service') as typeof import('../services/execution.service'); + const service = new serviceModule.ExecutionService(); + const found = await service.getExecution({ executionId: params.executionId }); + if (!found) { + throw createNotFoundError('ERR_EXECUTION_NOT_FOUND', 'Execution record not found'); + } + + return sendApiOk(res, found); + } catch (error) { + return sendApiError(res, error); + } +} + +export async function replayExecution(req: Request, res: Response): Promise { + try { + const params = executionIdParamsSchema.parse(req.params); + const body = executionReplayBodySchema.parse(req.body); + const serviceModule = require('../services/execution.service') as typeof import('../services/execution.service'); + const service = new serviceModule.ExecutionService(); + const replayed = await service.replayExecution({ + executionId: params.executionId, + promptHash: body.promptHash, + parameters: body.parameters, + inputArtifacts: body.inputArtifacts, + outputArtifacts: body.outputArtifacts, + status: body.status, + }); + + return sendApiOk(res, { + executionId: replayed.executionId, + verified: replayed.matches, + resultHash: replayed.resultHash, + }); + } catch (error) { + return sendApiError(res, error); + } +} diff --git a/server/src/controllers/migration.controller.ts b/server/src/controllers/migration.controller.ts new file mode 100644 index 0000000..00acd45 --- /dev/null +++ b/server/src/controllers/migration.controller.ts @@ -0,0 +1,77 @@ +import { existsSync } from 'fs'; +import path from 'path'; +import type { Request, Response } from 'express'; +import { createApiError, createNotFoundError, sendApiError, sendApiOk } from '../lib/api-error'; +import { migrationExportBodySchema, migrationZipBodySchema } from '../types/api/migration.api'; + +function buildZipPath(rootRevisionHash: string): string { + return path.resolve(process.cwd(), 'tmp', 'migration', `${rootRevisionHash}.zip`); +} + +function normalizeMigrationError(error: unknown): unknown { + if (!error || typeof error !== 'object') { + return error; + } + + const value = error as { code?: unknown; message?: unknown }; + if (value.code === 'ENOENT') { + return createNotFoundError('ERR_MIGRATION_NOT_FOUND', 'Migration package not found'); + } + + if (value.message === 'Migration package input is invalid') { + return createApiError(400, 'ERR_MIGRATION_INVALID_INPUT', 'Migration package input is invalid'); + } + + return error; +} + +export async function exportMigration(req: Request, res: Response): Promise { + try { + const body = migrationExportBodySchema.parse(req.body); + const serviceModule = require('../services/migration.service') as typeof import('../services/migration.service'); + const service = new serviceModule.MigrationService(); + const zipPath = await service.exportMigrationPackage(body.rootRevisionHash, buildZipPath(body.rootRevisionHash)); + const verified = await service.verifyMigrationPackage(zipPath); + + return sendApiOk(res, { + zipPath, + manifest: { + rootRevisionHash: verified.rootRevisionHash, + artifactCount: verified.artifactCount, + revisionCount: verified.revisionCount, + }, + }); + } catch (error) { + return sendApiError(res, normalizeMigrationError(error)); + } +} + +export async function verifyMigration(req: Request, res: Response): Promise { + try { + const body = migrationZipBodySchema.parse(req.body); + if (!existsSync(body.zipPath)) { + throw createNotFoundError('ERR_MIGRATION_NOT_FOUND', 'Migration package not found'); + } + const serviceModule = require('../services/migration.service') as typeof import('../services/migration.service'); + const service = new serviceModule.MigrationService(); + const verified = await service.verifyMigrationPackage(body.zipPath); + return sendApiOk(res, verified); + } catch (error) { + return sendApiError(res, normalizeMigrationError(error)); + } +} + +export async function importMigration(req: Request, res: Response): Promise { + try { + const body = migrationZipBodySchema.parse(req.body); + if (!existsSync(body.zipPath)) { + throw createNotFoundError('ERR_MIGRATION_NOT_FOUND', 'Migration package not found'); + } + const serviceModule = require('../services/migration.service') as typeof import('../services/migration.service'); + const service = new serviceModule.MigrationService(); + const imported = await service.importMigrationPackage(body.zipPath); + return sendApiOk(res, imported); + } catch (error) { + return sendApiError(res, normalizeMigrationError(error)); + } +} diff --git a/server/src/controllers/revision.controller.ts b/server/src/controllers/revision.controller.ts new file mode 100644 index 0000000..4acda20 --- /dev/null +++ b/server/src/controllers/revision.controller.ts @@ -0,0 +1,62 @@ +import type { Request, Response } from 'express'; +import { createNotFoundError, sendApiError, sendApiOk } from '../lib/api-error'; +import { + revisionCreateBodySchema, + revisionHashParamsSchema, + revisionListQuerySchema, + revisionPackageParamsSchema, +} from '../types/api/revision.api'; + +export async function createRevision(req: Request, res: Response): Promise { + try { + const body = revisionCreateBodySchema.parse(req.body); + const serviceModule = require('../services/revision.service') as typeof import('../services/revision.service'); + const service = new serviceModule.RevisionService(); + const created = await service.createRevision({ + packageId: body.packageId, + parentRevisionHash: body.parentRevisionHash, + artifacts: body.artifacts, + metadata: body.metadata, + }); + + return sendApiOk(res, { + revisionHash: created.revisionHash, + revision: created, + }); + } catch (error) { + return sendApiError(res, error); + } +} + +export async function getRevision(req: Request, res: Response): Promise { + try { + const params = revisionHashParamsSchema.parse(req.params); + const serviceModule = require('../services/revision.service') as typeof import('../services/revision.service'); + const service = new serviceModule.RevisionService(); + const found = await service.getRevision({ revisionHash: params.revisionHash }); + if (!found) { + throw createNotFoundError('ERR_REVISION_NOT_FOUND', 'Revision not found'); + } + + return sendApiOk(res, found); + } catch (error) { + return sendApiError(res, error); + } +} + +export async function listRevisions(req: Request, res: Response): Promise { + try { + const params = revisionPackageParamsSchema.parse(req.params); + const query = revisionListQuerySchema.parse(req.query); + const serviceModule = require('../services/revision.service') as typeof import('../services/revision.service'); + const service = new serviceModule.RevisionService(); + const items = await service.listRevisions({ + packageId: params.packageId, + limit: query.limit, + }); + + return sendApiOk(res, { items }); + } catch (error) { + return sendApiError(res, error); + } +} diff --git a/server/src/index.ts b/server/src/index.ts index 3506808..2ef1ed8 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -15,6 +15,10 @@ import { autoRegister } from './middleware'; import apiRouter from './api'; +import artifactRoutes from './routes/artifact.routes'; +import revisionRoutes from './routes/revision.routes'; +import executionRoutes from './routes/execution.routes'; +import migrationRoutes from './routes/migration.routes'; console.log("[BOOT] DATABASE_URL =", process.env.DATABASE_URL); @@ -89,6 +93,10 @@ async function bootstrap() { // ============================================ app.use(config.apiPrefix, apiRouter); + app.use(`${config.apiPrefix}/artifacts`, artifactRoutes); + app.use(`${config.apiPrefix}/revisions`, revisionRoutes); + app.use(`${config.apiPrefix}/executions`, executionRoutes); + app.use(`${config.apiPrefix}/migration`, migrationRoutes); // 兼容旧版本的简单 API(用于扩展初期测试) app.post('/subthread/ask', async (req, res, next) => { diff --git a/server/src/lib/api-error.ts b/server/src/lib/api-error.ts new file mode 100644 index 0000000..54c0ba9 --- /dev/null +++ b/server/src/lib/api-error.ts @@ -0,0 +1,166 @@ +import type { Response } from 'express'; +import { ZodError } from 'zod'; + +type ErrorDescriptor = { + status: number; + code: string; + message: string; +}; + +const INVALID_INPUT_ERROR: ErrorDescriptor = { + status: 400, + code: 'ERR_INVALID_INPUT', + message: 'Invalid request', +}; + +const INTERNAL_ERROR: ErrorDescriptor = { + status: 500, + code: 'ERR_INTERNAL', + message: 'Internal server error', +}; + +const ERROR_MAP: Record = { + E_ARTIFACT_VALIDATION: { + status: 400, + code: 'E_ARTIFACT_VALIDATION', + message: 'Artifact input is invalid', + }, + E_ARTIFACT_CONFLICT: { + status: 409, + code: 'E_ARTIFACT_CONFLICT', + message: 'Artifact already exists with different payload', + }, + ERR_ARTIFACT_NOT_FOUND: { + status: 404, + code: 'ERR_ARTIFACT_NOT_FOUND', + message: 'Artifact not found', + }, + ERR_REVISION_INVALID_INPUT: { + status: 400, + code: 'ERR_REVISION_INVALID_INPUT', + message: 'Revision input is invalid', + }, + ERR_REVISION_PARENT_NOT_FOUND: { + status: 404, + code: 'ERR_REVISION_PARENT_NOT_FOUND', + message: 'Revision parent not found', + }, + ERR_REVISION_NOT_FOUND: { + status: 404, + code: 'ERR_REVISION_NOT_FOUND', + message: 'Revision not found', + }, + ERR_EXECUTION_INVALID_INPUT: { + status: 400, + code: 'ERR_EXECUTION_INVALID_INPUT', + message: 'Execution input is invalid', + }, + ERR_EXECUTION_NOT_FOUND: { + status: 404, + code: 'ERR_EXECUTION_NOT_FOUND', + message: 'Execution record not found', + }, + ERR_EXECUTION_NON_DETERMINISTIC: { + status: 422, + code: 'ERR_EXECUTION_NON_DETERMINISTIC', + message: 'Execution replay is non-deterministic', + }, + ERR_EXECUTION_REPLAY_MISMATCH: { + status: 422, + code: 'ERR_EXECUTION_REPLAY_MISMATCH', + message: 'Execution replay result hash mismatch', + }, + ERR_MIGRATION_INVALID_INPUT: { + status: 400, + code: 'ERR_MIGRATION_INVALID_INPUT', + message: 'Migration package input is invalid', + }, + ERR_MIGRATION_CLOSURE_INCOMPLETE: { + status: 422, + code: 'ERR_MIGRATION_CLOSURE_INCOMPLETE', + message: 'Migration closure is incomplete', + }, + ERR_MIGRATION_VERIFY_MISMATCH: { + status: 422, + code: 'ERR_MIGRATION_VERIFY_MISMATCH', + message: 'Migration package verification failed', + }, + ERR_MIGRATION_IDENTITY_MISMATCH: { + status: 422, + code: 'ERR_MIGRATION_IDENTITY_MISMATCH', + message: 'Migration package identity mismatch', + }, + ERR_MIGRATION_INVALID_MANIFEST: { + status: 422, + code: 'ERR_MIGRATION_INVALID_MANIFEST', + message: 'Migration package manifest is invalid', + }, +}; + +export class ApiError extends Error { + readonly status: number; + readonly code: string; + + constructor(status: number, code: string, message: string) { + super(message); + this.status = status; + this.code = code; + this.name = 'ApiError'; + } +} + +function getErrorCode(error: unknown): string | null { + if (!error || typeof error !== 'object') { + return null; + } + const value = error as { code?: unknown }; + return typeof value.code === 'string' ? value.code : null; +} + +function resolveApiError(error: unknown): ErrorDescriptor { + if (error instanceof ApiError) { + return { + status: error.status, + code: error.code, + message: error.message, + }; + } + + if (error instanceof ZodError) { + return INVALID_INPUT_ERROR; + } + + const code = getErrorCode(error); + if (code && ERROR_MAP[code]) { + return ERROR_MAP[code]; + } + + return INTERNAL_ERROR; +} + +export function createApiError(status: number, code: string, message: string): ApiError { + return new ApiError(status, code, message); +} + +export function createInvalidInputError(message = INVALID_INPUT_ERROR.message): ApiError { + return new ApiError(INVALID_INPUT_ERROR.status, INVALID_INPUT_ERROR.code, message); +} + +export function createNotFoundError(code: string, message: string): ApiError { + return new ApiError(404, code, message); +} + +export function sendApiOk(res: Response, data: T, status = 200): Response { + return res.status(status).json({ ok: true, data }); +} + +export function sendApiError(res: Response, error: unknown): Response { + const mapped = resolveApiError(error); + return res.status(mapped.status).json({ + ok: false, + error: { + code: mapped.code, + message: mapped.message, + }, + }); +} diff --git a/server/src/lib/artifact-hash.ts b/server/src/lib/artifact-hash.ts new file mode 100644 index 0000000..6d9f3d0 --- /dev/null +++ b/server/src/lib/artifact-hash.ts @@ -0,0 +1,99 @@ +import { createHash } from 'crypto'; +import { assertSafeObjectKey } from './identity-guards'; + +type CanonicalJsonValue = + | null + | string + | number + | boolean + | CanonicalJsonValue[] + | { [key: string]: CanonicalJsonValue }; + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function normalizeForCanonicalJson(value: unknown): CanonicalJsonValue { + if (value === null) return null; + + const kind = typeof value; + if (kind === 'string' || kind === 'boolean') return value as string | boolean; + if (kind === 'number') { + if (!Number.isFinite(value)) { + throw new Error('Invalid JSON value for canonicalization'); + } + return value as number; + } + + if (kind === 'undefined' || kind === 'function' || kind === 'symbol' || kind === 'bigint') { + throw new Error('Invalid JSON value for canonicalization'); + } + + if (Array.isArray(value)) { + const normalizedItems: CanonicalJsonValue[] = []; + for (const item of value) { + normalizedItems.push(normalizeForCanonicalJson(item)); + } + return normalizedItems; + } + + if (!isPlainObject(value)) { + throw new Error('Invalid JSON value for canonicalization'); + } + + const normalizedObject = Object.create(null) as { [key: string]: CanonicalJsonValue }; + const keys = Object.keys(value).sort(compareStrings); + for (const key of keys) { + assertSafeObjectKey(key); + normalizedObject[key] = normalizeForCanonicalJson(value[key]); + } + + return normalizedObject; +} + +/** + * Deterministic JSON canonicalization: + * - object keys are sorted lexicographically at every level + * - array order is preserved + * - invalid JSON values are rejected + */ +export function canonicalizeJson(value: unknown): string { + return JSON.stringify(normalizeForCanonicalJson(value)); +} + +export function sha256Hex(input: string): string { + return createHash('sha256').update(input, 'utf8').digest('hex'); +} + +/** + * Computes the protocol bundle hash from canonicalized content: + * { schema, identity: { packageId, revisionId, revisionHash }, payload } + */ +export function computeBundleHash(params: { + schema: string; + packageId: string; + revisionId?: string | null; + revisionHash?: string | null; + payload: unknown; +}): string { + const hashInput = { + schema: params.schema, + identity: { + packageId: params.packageId, + revisionId: params.revisionId ?? null, + revisionHash: params.revisionHash ?? null, + }, + payload: params.payload, + }; + + return sha256Hex(canonicalizeJson(hashInput)); +} + diff --git a/server/src/lib/execution-hash.ts b/server/src/lib/execution-hash.ts new file mode 100644 index 0000000..058f016 --- /dev/null +++ b/server/src/lib/execution-hash.ts @@ -0,0 +1,49 @@ +import { createHash } from 'crypto'; +import { canonicalizeJson } from './artifact-hash'; + +export type ExecutionArtifactReference = { + bundleHash: string; + role: string; +}; + +export type ExecutionStatus = 'success' | 'failure'; + +export type ComputeExecutionResultHashInput = { + outputs: ExecutionArtifactReference[]; + status: ExecutionStatus; +}; + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function normalizeOutputs(outputs: ExecutionArtifactReference[]): ExecutionArtifactReference[] { + const normalized = outputs.map((output) => ({ + bundleHash: output.bundleHash, + role: output.role, + })); + + // Deterministic ordering rule: + // sort outputs by bundleHash ASC, role ASC + normalized.sort((a, b) => { + const bundleOrder = compareStrings(a.bundleHash, b.bundleHash); + if (bundleOrder !== 0) { + return bundleOrder; + } + return compareStrings(a.role, b.role); + }); + + return normalized; +} + +export function computeExecutionResultHash(input: ComputeExecutionResultHashInput): string { + const hashPayload = { + outputs: normalizeOutputs(input.outputs), + status: input.status, + }; + + const canonical = canonicalizeJson(hashPayload); + return createHash('sha256').update(canonical, 'utf8').digest('hex'); +} diff --git a/server/src/lib/identity-guards.ts b/server/src/lib/identity-guards.ts new file mode 100644 index 0000000..55648ae --- /dev/null +++ b/server/src/lib/identity-guards.ts @@ -0,0 +1,29 @@ +export class IdentityGuardError extends Error { + readonly code: string; + + constructor(code: string) { + super(code); + this.code = code; + this.name = 'IdentityGuardError'; + } +} + +const DANGEROUS_KEYS = new Set(['__proto__', 'constructor', 'prototype']); + +export function isDangerousKey(key: string): boolean { + return DANGEROUS_KEYS.has(key); +} + +export function assertSafeObjectKey(key: string): void { + if (isDangerousKey(key)) { + throw new IdentityGuardError('ERR_INVALID_OBJECT_KEY'); + } +} + +export function assertHashMatch(providedHash: string | undefined, computedHash: string): string { + if (typeof providedHash !== 'undefined' && providedHash !== computedHash) { + throw new IdentityGuardError('ERR_ARTIFACT_HASH_MISMATCH'); + } + + return computedHash; +} diff --git a/server/src/lib/json-sanitize.ts b/server/src/lib/json-sanitize.ts new file mode 100644 index 0000000..667c996 --- /dev/null +++ b/server/src/lib/json-sanitize.ts @@ -0,0 +1,123 @@ +import { IdentityGuardError, assertSafeObjectKey } from './identity-guards'; +const PAYLOAD_NON_JSON_SAFE_MESSAGE = 'payload contains non-JSON-safe value'; +const REMOVE_VALUE = Symbol('remove_json_value'); + +type Sanitized = unknown | typeof REMOVE_VALUE; + +function makeSanitizeError(): JsonSanitizeError { + return new JsonSanitizeError(PAYLOAD_NON_JSON_SAFE_MESSAGE); +} + +function sanitizeInternal(value: unknown, seen: WeakSet): Sanitized { + if (value === null) { + return null; + } + + if (typeof value === 'string') { + return value.replace(/\u0000/g, ''); + } + + if (typeof value === 'boolean') { + return value; + } + + if (typeof value === 'number') { + if (!Number.isFinite(value)) { + throw makeSanitizeError(); + } + return value; + } + + if (typeof value === 'undefined' || typeof value === 'function' || typeof value === 'symbol') { + return REMOVE_VALUE; + } + + if (typeof value === 'bigint') { + throw makeSanitizeError(); + } + + if (value instanceof Date) { + if (Number.isNaN(value.getTime())) { + throw makeSanitizeError(); + } + return value.toISOString(); + } + + if (typeof Buffer !== 'undefined' && Buffer.isBuffer(value)) { + throw makeSanitizeError(); + } + + if (value instanceof Uint8Array) { + throw makeSanitizeError(); + } + + if (Array.isArray(value)) { + if (seen.has(value)) { + throw makeSanitizeError(); + } + seen.add(value); + + const output: unknown[] = []; + for (const item of value) { + const sanitized = sanitizeInternal(item, seen); + if (sanitized !== REMOVE_VALUE) { + output.push(sanitized); + } + } + + seen.delete(value); + return output; + } + + if (!value || typeof value !== 'object') { + throw makeSanitizeError(); + } + + const proto = Object.getPrototypeOf(value); + if (proto !== Object.prototype && proto !== null) { + throw makeSanitizeError(); + } + + if (seen.has(value)) { + throw makeSanitizeError(); + } + seen.add(value); + + const output = Object.create(null) as Record; + for (const key of Object.keys(value)) { + try { + assertSafeObjectKey(key); + } catch (error) { + if (error instanceof IdentityGuardError) { + throw makeSanitizeError(); + } + throw error; + } + const sanitized = sanitizeInternal((value as Record)[key], seen); + if (sanitized !== REMOVE_VALUE) { + output[key] = sanitized; + } + } + + seen.delete(value); + return output; +} + +export class JsonSanitizeError extends Error { + constructor(message = PAYLOAD_NON_JSON_SAFE_MESSAGE) { + super(message); + this.name = 'JsonSanitizeError'; + } +} + +export function sanitizeJsonValue(value: unknown): unknown { + const sanitized = sanitizeInternal(value, new WeakSet()); + if (sanitized === REMOVE_VALUE) { + throw makeSanitizeError(); + } + return sanitized; +} + +export function sanitizeJsonPayload(payload: unknown): unknown { + return sanitizeJsonValue(payload); +} diff --git a/server/src/lib/migration-package.ts b/server/src/lib/migration-package.ts new file mode 100644 index 0000000..fc45519 --- /dev/null +++ b/server/src/lib/migration-package.ts @@ -0,0 +1,265 @@ +const AdmZip = require('adm-zip') as new (path?: string) => AdmZipArchive; + +import { canonicalizeJson } from './artifact-hash'; + +const MESSAGE_MIGRATION_INVALID_INPUT = 'Migration package input is invalid'; +const MESSAGE_MIGRATION_INVALID_MANIFEST = 'Migration package manifest is invalid'; + +export const MIGRATION_PACKAGE_SCHEMA = 'migration.package.v1'; +export const MIGRATION_PACKAGE_META_SCHEMA = 'migration.package.meta.v1'; +export const REVISION_CARRIER_SCHEMA = 'artifact.revision.node.v1'; + +export type ArtifactReference = { + bundleHash: string; + role: string; +}; + +export type ArtifactBundleLike = { + schema: string; + identity: { + packageId: string; + revisionId?: string | null; + revisionHash?: string | null; + }; + payload: unknown; + references?: ArtifactReference[] | null; +}; + +export type MigrationManifestV1 = { + schema: 'migration.package.v1'; + rootRevisionHash: string; + artifactCount: number; + revisionCount: number; + createdAt: string; + bundleHashAlgo: 'sha256'; + revisionHashAlgo: 'sha256'; + protocol: 'shadow-protocol-v1'; +}; + +export type MigrationMetadataV1 = { + schema: 'migration.package.meta.v1'; + exporter: { + name: string; + version: string; + }; + notes: string | null; + source: 'human' | 'ai' | 'system'; +}; + +type AdmZipEntry = { + entryName: string; + isDirectory: boolean; +}; + +type AdmZipArchive = { + addFile(entryName: string, content: Buffer): void; + getEntries(): AdmZipEntry[]; + readAsText(entry: AdmZipEntry, encoding?: string): string; + writeZip(targetFileName: string): void; +}; + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function createMigrationPackageError(code: string, message: string): Error & { code: string } { + const error = new Error(message) as Error & { code: string }; + error.code = code; + return error; +} + +function normalizeRequiredString(value: unknown): string { + if (typeof value !== 'string' || value.length === 0) { + throw new Error(MESSAGE_MIGRATION_INVALID_INPUT); + } + return value; +} + +function normalizeNullableString(value: unknown): string | null { + if (typeof value === 'string') return value; + if (value === null || typeof value === 'undefined') return null; + throw new Error(MESSAGE_MIGRATION_INVALID_INPUT); +} + +function normalizeReferences(references: unknown): ArtifactReference[] { + if (typeof references === 'undefined' || references === null) { + return []; + } + if (!Array.isArray(references)) { + throw new Error('Migration package input is invalid'); + } + + const out: ArtifactReference[] = []; + for (const item of references) { + if (!item || typeof item !== 'object') { + throw new Error(MESSAGE_MIGRATION_INVALID_INPUT); + } + const entry = item as { bundleHash?: unknown; role?: unknown }; + out.push({ + bundleHash: normalizeRequiredString(entry.bundleHash).toLowerCase(), + role: normalizeRequiredString(entry.role), + }); + } + + out.sort((a, b) => compareStrings(a.bundleHash, b.bundleHash) || compareStrings(a.role, b.role)); + return out; +} + +export function normalizeArtifactBundle(bundle: ArtifactBundleLike): ArtifactBundleLike { + return { + schema: normalizeRequiredString(bundle.schema), + identity: { + packageId: normalizeRequiredString(bundle.identity?.packageId), + revisionId: normalizeNullableString(bundle.identity?.revisionId), + revisionHash: normalizeNullableString(bundle.identity?.revisionHash), + }, + payload: bundle.payload, + references: normalizeReferences(bundle.references), + }; +} + +export function stringifyArtifactBundleLine(bundle: ArtifactBundleLike): string { + return canonicalizeJson(normalizeArtifactBundle(bundle)); +} + +export function stringifyArtifactsJsonl(artifacts: ArtifactBundleLike[]): string { + const lines = artifacts.map((artifact) => stringifyArtifactBundleLine(artifact)); + return `${lines.join('\n')}\n`; +} + +export function parseArtifactsJsonl(text: string): ArtifactBundleLike[] { + const lines = text.replace(/^\uFEFF/, '').split(/\r?\n/).filter((line) => line.length > 0); + const out: ArtifactBundleLike[] = []; + for (const line of lines) { + out.push(normalizeArtifactBundle(JSON.parse(line) as ArtifactBundleLike)); + } + return out; +} + +export function buildManifest(input: { + rootRevisionHash: string; + artifactCount: number; + revisionCount: number; + createdAt: string; +}): MigrationManifestV1 { + return { + schema: MIGRATION_PACKAGE_SCHEMA, + rootRevisionHash: normalizeRequiredString(input.rootRevisionHash).toLowerCase(), + artifactCount: input.artifactCount, + revisionCount: input.revisionCount, + createdAt: normalizeRequiredString(input.createdAt), + bundleHashAlgo: 'sha256', + revisionHashAlgo: 'sha256', + protocol: 'shadow-protocol-v1', + }; +} + +function normalizeNonNegativeInteger(value: unknown): number { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 0) { + throw createMigrationPackageError('ERR_MIGRATION_INVALID_MANIFEST', MESSAGE_MIGRATION_INVALID_MANIFEST); + } + return value; +} + +function validateManifest(input: unknown): MigrationManifestV1 { + if (!input || typeof input !== 'object') { + throw createMigrationPackageError('ERR_MIGRATION_INVALID_MANIFEST', MESSAGE_MIGRATION_INVALID_MANIFEST); + } + + const manifest = input as { + schema?: unknown; + rootRevisionHash?: unknown; + artifactCount?: unknown; + revisionCount?: unknown; + createdAt?: unknown; + bundleHashAlgo?: unknown; + revisionHashAlgo?: unknown; + protocol?: unknown; + }; + + if ( + manifest.schema !== MIGRATION_PACKAGE_SCHEMA || + manifest.bundleHashAlgo !== 'sha256' || + manifest.revisionHashAlgo !== 'sha256' || + manifest.protocol !== 'shadow-protocol-v1' + ) { + throw createMigrationPackageError('ERR_MIGRATION_INVALID_MANIFEST', MESSAGE_MIGRATION_INVALID_MANIFEST); + } + + return { + schema: MIGRATION_PACKAGE_SCHEMA, + rootRevisionHash: normalizeRequiredString(manifest.rootRevisionHash).toLowerCase(), + artifactCount: normalizeNonNegativeInteger(manifest.artifactCount), + revisionCount: normalizeNonNegativeInteger(manifest.revisionCount), + createdAt: normalizeRequiredString(manifest.createdAt), + bundleHashAlgo: 'sha256', + revisionHashAlgo: 'sha256', + protocol: 'shadow-protocol-v1', + }; +} + +export function buildMetadata(input?: Partial): MigrationMetadataV1 { + return { + schema: MIGRATION_PACKAGE_META_SCHEMA, + exporter: { + name: normalizeRequiredString(input?.exporter?.name ?? 'shadow-threads'), + version: normalizeRequiredString(input?.exporter?.version ?? '0.x'), + }, + notes: normalizeNullableString(input?.notes), + source: input?.source === 'human' || input?.source === 'ai' || input?.source === 'system' ? input.source : 'system', + }; +} + +export function writeMigrationPackageZip(input: { + outPath: string; + manifest: MigrationManifestV1; + artifactsJsonl: string; + metadata: MigrationMetadataV1; +}): string { + const zip = new AdmZip(); + zip.addFile('manifest.json', Buffer.from(canonicalizeJson(input.manifest), 'utf8')); + zip.addFile('artifacts.jsonl', Buffer.from(input.artifactsJsonl, 'utf8')); + zip.addFile('metadata.json', Buffer.from(canonicalizeJson(input.metadata), 'utf8')); + zip.writeZip(input.outPath); + return input.outPath; +} + +export function readMigrationPackageZip(zipPath: string): { + manifest: MigrationManifestV1; + artifactsJsonl: string; + metadata: MigrationMetadataV1; +} { + const zip = new AdmZip(zipPath); + const entries = zip.getEntries(); + const allowed = new Set(['manifest.json', 'artifacts.jsonl', 'metadata.json']); + const byName = new Map(); + + for (const entry of entries) { + if (entry.isDirectory) { + throw new Error(MESSAGE_MIGRATION_INVALID_INPUT); + } + if (entry.entryName.includes('/') || entry.entryName.includes('\\')) { + throw new Error(MESSAGE_MIGRATION_INVALID_INPUT); + } + if (!allowed.has(entry.entryName)) { + throw new Error(MESSAGE_MIGRATION_INVALID_INPUT); + } + byName.set(entry.entryName, entry); + } + + if (!byName.has('manifest.json') || !byName.has('artifacts.jsonl') || !byName.has('metadata.json') || byName.size !== 3) { + throw new Error('Migration package input is invalid'); + } + + const manifest = JSON.parse(zip.readAsText(byName.get('manifest.json') as AdmZipEntry, 'utf8')) as MigrationManifestV1; + const artifactsJsonl = zip.readAsText(byName.get('artifacts.jsonl') as AdmZipEntry, 'utf8'); + const metadata = JSON.parse(zip.readAsText(byName.get('metadata.json') as AdmZipEntry, 'utf8')) as MigrationMetadataV1; + + return { + manifest: validateManifest(manifest), + artifactsJsonl, + metadata: buildMetadata(metadata), + }; +} diff --git a/server/src/lib/revision-hash.ts b/server/src/lib/revision-hash.ts new file mode 100644 index 0000000..4b93e9c --- /dev/null +++ b/server/src/lib/revision-hash.ts @@ -0,0 +1,139 @@ +import { createHash } from 'crypto'; + +export type RevisionArtifactReference = { + bundleHash: string; + role: string; +}; + +export type RevisionMetadata = { + author: string; + message: string; + createdBy: string; + timestamp: string; + source: 'human' | 'ai' | 'migration' | 'system'; + tags?: string[] | null; +}; + +export type ComputeRevisionHashInput = { + packageId: string; + parentRevisionHash?: string | null; + artifacts: RevisionArtifactReference[]; + metadata: RevisionMetadata; +}; + +type JsonValue = null | string | number | boolean | JsonValue[] | { [key: string]: JsonValue }; + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) { + return false; + } + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function canonicalizeValue(value: unknown): JsonValue { + if (value === null) { + return null; + } + + if (typeof value === 'string' || typeof value === 'boolean') { + return value; + } + + if (typeof value === 'number') { + if (!Number.isFinite(value)) { + throw new Error('Revision hash input is invalid'); + } + return Object.is(value, -0) ? 0 : value; + } + + if (Array.isArray(value)) { + const out: JsonValue[] = []; + for (const item of value) { + out.push(canonicalizeValue(item)); + } + return out; + } + + if (!isPlainObject(value)) { + throw new Error('Revision hash input is invalid'); + } + + const out: { [key: string]: JsonValue } = Object.create(null); + const keys = Object.keys(value).sort(compareStrings); + for (const key of keys) { + const child = value[key]; + if (typeof child === 'undefined') { + continue; + } + out[key] = canonicalizeValue(child); + } + return out; +} + +function canonicalStringify(value: JsonValue): string { + if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') { + return JSON.stringify(value); + } + + if (Array.isArray(value)) { + return `[${value.map((entry) => canonicalStringify(entry)).join(',')}]`; + } + + const keys = Object.keys(value).sort(compareStrings); + return `{${keys.map((key) => `${JSON.stringify(key)}:${canonicalStringify(value[key])}`).join(',')}}`; +} + +function normalizeArtifacts(artifacts: RevisionArtifactReference[]): RevisionArtifactReference[] { + const out = artifacts.map((artifact) => ({ + bundleHash: artifact.bundleHash, + role: artifact.role, + })); + + out.sort((a, b) => { + const bundleHashOrder = compareStrings(a.bundleHash, b.bundleHash); + if (bundleHashOrder !== 0) { + return bundleHashOrder; + } + return compareStrings(a.role, b.role); + }); + + return out; +} + +function normalizeMetadata(metadata: RevisionMetadata): RevisionMetadata { + const tags = Array.isArray(metadata.tags) + ? [...metadata.tags].sort(compareStrings) + : metadata.tags === null + ? null + : undefined; + + return { + author: metadata.author, + message: metadata.message, + createdBy: metadata.createdBy, + timestamp: metadata.timestamp, + source: metadata.source, + ...(typeof tags === 'undefined' ? {} : { tags }), + }; +} + +export function computeRevisionHash(input: ComputeRevisionHashInput): string { + const hashPayload = { + schema: 'revision.node.v1', + packageId: input.packageId, + parentRevisionHash: input.parentRevisionHash ?? null, + artifacts: normalizeArtifacts(input.artifacts), + metadata: normalizeMetadata(input.metadata), + }; + + const canonicalPayload = canonicalizeValue(hashPayload); + const canonicalJson = canonicalStringify(canonicalPayload); + return createHash('sha256').update(canonicalJson, 'utf8').digest('hex'); +} diff --git a/server/src/protocol/artifact-bundle.v1.md b/server/src/protocol/artifact-bundle.v1.md new file mode 100644 index 0000000..fe48f65 --- /dev/null +++ b/server/src/protocol/artifact-bundle.v1.md @@ -0,0 +1,37 @@ +# Artifact Bundle v1 + +ArtifactBundle is the portable protocol object for frozen v1. + +## Shape +```json +{ + "schema": "string", + "identity": { + "packageId": "string", + "revisionId": "string (optional)", + "revisionHash": "string (optional)" + }, + "payload": {}, + "references": [ + { + "bundleHash": "string", + "role": "string" + } + ] +} +``` + +## bundleHash derivation +- `bundleHash = SHA256(canonicalJSON(sanitizedBundle))` +- Output format: + - lowercase hex + - length 64 + +## Deterministic requirements +- Canonicalization follows `shadow-canonical-json.v1.md`. +- No locale-dependent behavior. +- No time/random inputs. + +## Notes +- `references` is optional. +- Missing optional fields are normalized to `null` or omitted per canonicalization rules of the producer. diff --git a/server/src/protocol/error-codes.v1.md b/server/src/protocol/error-codes.v1.md new file mode 100644 index 0000000..6a17ce4 --- /dev/null +++ b/server/src/protocol/error-codes.v1.md @@ -0,0 +1,14 @@ +# Error Codes v1 + +Frozen error code registry for Artifact Protocol v1. + +- `ERR_PAYLOAD_TOO_LARGE` +- `ERR_PAYLOAD_UNSAFE_KEY` +- `ERR_PAYLOAD_CONTAINS_NULL_CHAR` +- `ERR_PAYLOAD_STRUCTURE_LIMIT` +- `ERR_ARTIFACT_HASH_COLLISION_OR_IMPL_BUG` + +## Usage notes +- Error code strings are fixed literals. +- Do not interpolate IDs/hashes/paths into code strings. +- Human-readable messages may be mapped externally, but protocol codes are stable. diff --git a/server/src/protocol/shadow-canonical-json.v1.md b/server/src/protocol/shadow-canonical-json.v1.md new file mode 100644 index 0000000..ec2fca3 --- /dev/null +++ b/server/src/protocol/shadow-canonical-json.v1.md @@ -0,0 +1,39 @@ +# Shadow Canonical JSON v1 + +This spec defines deterministic canonicalization for Artifact Protocol v1 hashing. + +## Encoding +- UTF-8 + +## String normalization +- Apply NFC normalization to all string values. +- Do not normalize object keys. + +## Key ordering +- Sort object keys by Unicode codepoint ascending using: + - `(a < b ? -1 : a > b ? 1 : 0)` +- `localeCompare` is not allowed. + +## undefined handling +- Object property with `undefined` is removed. +- Array `undefined` entries are removed and array is compacted. + +## Number rules +- `NaN` and `Infinity` are rejected. +- `-0` is normalized to `0`. + +## Null character rule +- If a string contains `\u0000`, reject with `ERR_PAYLOAD_CONTAINS_NULL_CHAR`. + +## Unsafe key rule +- Reject object keys: `__proto__`, `constructor`, `prototype`. +- Error code: `ERR_PAYLOAD_UNSAFE_KEY`. + +## Structure limits +- `maxDepth = 64` +- `maxNodes = 100000` +- Exceeding either limit rejects with `ERR_PAYLOAD_STRUCTURE_LIMIT`. + +## Payload size limit +- Maximum canonical payload size is 1MB (UTF-8 byte size). +- Exceeding limit rejects with `ERR_PAYLOAD_TOO_LARGE`. diff --git a/server/src/routes/artifact.routes.ts b/server/src/routes/artifact.routes.ts new file mode 100644 index 0000000..c9b5611 --- /dev/null +++ b/server/src/routes/artifact.routes.ts @@ -0,0 +1,10 @@ +import { Router } from 'express'; +import { createArtifact, getArtifact, verifyArtifact } from '../controllers/artifact.controller'; + +const router = Router(); + +router.post('/', createArtifact); +router.get('/:packageId/:bundleHash', getArtifact); +router.post('/:packageId/:bundleHash/verify', verifyArtifact); + +export default router; diff --git a/server/src/routes/execution.routes.ts b/server/src/routes/execution.routes.ts new file mode 100644 index 0000000..f4aa486 --- /dev/null +++ b/server/src/routes/execution.routes.ts @@ -0,0 +1,10 @@ +import { Router } from 'express'; +import { getExecution, recordExecution, replayExecution } from '../controllers/execution.controller'; + +const router = Router(); + +router.post('/', recordExecution); +router.get('/:executionId', getExecution); +router.post('/:executionId/replay', replayExecution); + +export default router; diff --git a/server/src/routes/migration.routes.ts b/server/src/routes/migration.routes.ts new file mode 100644 index 0000000..379a527 --- /dev/null +++ b/server/src/routes/migration.routes.ts @@ -0,0 +1,10 @@ +import { Router } from 'express'; +import { exportMigration, importMigration, verifyMigration } from '../controllers/migration.controller'; + +const router = Router(); + +router.post('/export', exportMigration); +router.post('/verify', verifyMigration); +router.post('/import', importMigration); + +export default router; diff --git a/server/src/routes/revision.routes.ts b/server/src/routes/revision.routes.ts new file mode 100644 index 0000000..aff67ab --- /dev/null +++ b/server/src/routes/revision.routes.ts @@ -0,0 +1,10 @@ +import { Router } from 'express'; +import { createRevision, getRevision, listRevisions } from '../controllers/revision.controller'; + +const router = Router(); + +router.post('/', createRevision); +router.get('/package/:packageId', listRevisions); +router.get('/:revisionHash', getRevision); + +export default router; diff --git a/server/src/selftest/artifact-store.selftest.ts b/server/src/selftest/artifact-store.selftest.ts new file mode 100644 index 0000000..883756e --- /dev/null +++ b/server/src/selftest/artifact-store.selftest.ts @@ -0,0 +1,166 @@ +import { computeBundleHash } from '../lib/artifact-hash'; +import { ArtifactHashMismatchError, ArtifactStoreService } from '../services/artifact-store.service'; +import { prisma } from '../utils'; + +export async function runArtifactStoreSelftest(): Promise { + const service = new ArtifactStoreService(); + + const identity = { + packageId: 'artifact-selftest-package', + revisionId: 'artifact-selftest-rev', + revisionHash: 'artifact-selftest-revision-hash', + }; + + const schema = 'artifact-bundle-1'; + const payload = { + schema: 'artifact-bundle-1', + identity: { + packageId: identity.packageId, + revisionId: identity.revisionId, + revisionHash: identity.revisionHash, + }, + artifacts: { + transferPackageV1: { transferHash: 'a'.repeat(64) }, + lineageBindingV1: { lineageHash: 'b'.repeat(64) }, + handoffRecordV1: { handoffHash: 'c'.repeat(64) }, + closureContractV1: null, + }, + }; + + const bundleHash = computeBundleHash({ + schema, + packageId: identity.packageId, + revisionId: identity.revisionId, + revisionHash: identity.revisionHash, + payload, + }); + + const stored = await service.storeArtifactBundle({ + schema, + identity, + payload, + bundleHash, + }); + + const loaded = await service.loadArtifactBundle({ + packageId: identity.packageId, + bundleHash, + }); + + if (!loaded) { + throw new Error('selftest_failed'); + } + + const verification = service.verifyArtifactBundle({ + schema, + identity, + payload, + bundleHash, + }); + + if (!verification.ok) { + throw new Error('selftest_failed'); + } + + const storedAgain = await service.storeArtifactBundle({ + schema, + identity, + payload, + bundleHash, + }); + + if (stored.bundleHash !== storedAgain.bundleHash) { + throw new Error('selftest_failed'); + } + + let mismatchSeen = false; + try { + await service.storeArtifactBundle({ + schema, + identity, + payload: { + ...payload, + artifacts: { + ...payload.artifacts, + transferPackageV1: { transferHash: 'd'.repeat(64) }, + }, + }, + bundleHash, + }); + } catch (error) { + if (error instanceof ArtifactHashMismatchError) { + mismatchSeen = true; + } else { + throw error; + } + } + + if (!mismatchSeen) { + throw new Error('selftest_failed'); + } + + const payloadWithNullByte = { + schema: 'artifact-bundle-1', + identity: { + packageId: 'artifact-selftest-package-null', + revisionId: 'artifact-selftest-rev-null', + revisionHash: 'artifact-selftest-revision-hash-null', + }, + notes: 'before\u0000after', + artifacts: { + transferPackageV1: { transferHash: 'e'.repeat(64) }, + lineageBindingV1: { lineageHash: 'f'.repeat(64) }, + handoffRecordV1: { handoffHash: '0'.repeat(64) }, + closureContractV1: null, + }, + }; + + const storedWithSanitizedPayload = await service.storeArtifactBundle({ + schema, + identity: { + packageId: payloadWithNullByte.identity.packageId, + revisionId: payloadWithNullByte.identity.revisionId, + revisionHash: payloadWithNullByte.identity.revisionHash, + }, + payload: payloadWithNullByte, + }); + + const loadedWithSanitizedPayload = await service.loadArtifactBundle({ + packageId: payloadWithNullByte.identity.packageId, + bundleHash: storedWithSanitizedPayload.bundleHash, + }); + + if (!loadedWithSanitizedPayload) { + throw new Error('selftest_failed'); + } + + if (!loadedWithSanitizedPayload.payload || typeof loadedWithSanitizedPayload.payload !== 'object') { + throw new Error('selftest_failed'); + } + + const loadedPayloadRecord = loadedWithSanitizedPayload.payload as Record; + if (loadedPayloadRecord.notes !== 'beforeafter') { + throw new Error('selftest_failed'); + } +} + +async function main(): Promise { + let exitCode = 0; + try { + await runArtifactStoreSelftest(); + process.stdout.write('ARTIFACT_STORE_SELFTEST_OK\n'); + } catch { + process.stdout.write('ARTIFACT_STORE_SELFTEST_FAIL\n'); + exitCode = 1; + } finally { + await prisma.$disconnect().catch(() => undefined); + } + + if (exitCode !== 0) { + process.exit(exitCode); + } +} + +if (require.main === module) { + void main(); +} \ No newline at end of file diff --git a/server/src/selftest/execution.selftest.ts b/server/src/selftest/execution.selftest.ts new file mode 100644 index 0000000..0ef9432 --- /dev/null +++ b/server/src/selftest/execution.selftest.ts @@ -0,0 +1,278 @@ +import { + computeExecutionResultHash, + type ExecutionArtifactReference, + type ExecutionStatus, +} from '../lib/execution-hash'; +import { + ExecutionService, + type ExecutionStorageAdapter, + type ExecutionRecordDTO, + ExecutionServiceError, + EXECUTION_ERROR_CODES, +} from '../services/execution.service'; + +type StoredExecutionRecord = { + executionId: string; + packageId: string; + revisionHash: string; + provider: string; + model: string; + promptHash: string; + parameters: unknown; + inputArtifacts: ExecutionArtifactReference[]; + outputArtifacts: ExecutionArtifactReference[]; + resultHash: string; + status: ExecutionStatus; + startedAt: Date; + finishedAt: Date; + createdAt: Date; +}; + +const PACKAGE_ID = 'execution-selftest-package'; +const REVISION_HASH = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +const INPUT_BUNDLE_HASH = '1111111111111111111111111111111111111111111111111111111111111111'; +const OUTPUT_BUNDLE_HASH = '2222222222222222222222222222222222222222222222222222222222222222'; +const EXECUTION_ID = '123e4567-e89b-42d3-a456-426614174000'; +const PROMPT_HASH = 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'; + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function assertCondition(condition: boolean): void { + if (!condition) { + throw new Error('execution_selftest_failed'); + } +} + +function cloneArtifacts(items: ExecutionArtifactReference[]): ExecutionArtifactReference[] { + return items.map((item) => ({ bundleHash: item.bundleHash, role: item.role })); +} + +function createInMemoryExecutionAdapter(): { + adapter: ExecutionStorageAdapter; + revisions: Set; + artifacts: Set; +} { + const revisions = new Set(); + const artifacts = new Set(); + const executions = new Map(); + + const adapter: ExecutionStorageAdapter = { + async findRevisionByHash(revisionHash: string): Promise { + return revisions.has(revisionHash); + }, + + async artifactExists(packageId: string, bundleHash: string): Promise { + return artifacts.has(`${packageId}:${bundleHash}`); + }, + + async findExecutionById(executionId: string): Promise { + return executions.get(executionId) ?? null; + }, + + async createExecution(input): Promise { + const executionId = input.executionId ?? '123e4567-e89b-42d3-a456-426614174001'; + const createdAt = new Date('2026-03-06T00:00:02.000Z'); + const stored: StoredExecutionRecord = { + executionId, + packageId: input.packageId, + revisionHash: input.revisionHash, + provider: input.provider, + model: input.model, + promptHash: input.promptHash, + parameters: input.parameters, + inputArtifacts: cloneArtifacts(input.inputArtifacts), + outputArtifacts: cloneArtifacts(input.outputArtifacts), + resultHash: input.resultHash, + status: input.status, + startedAt: new Date(input.startedAt), + finishedAt: new Date(input.finishedAt), + createdAt, + }; + executions.set(executionId, stored); + return stored; + }, + + async listExecutions(packageId: string, limit: number): Promise { + const rows: StoredExecutionRecord[] = []; + for (const execution of executions.values()) { + if (execution.packageId === packageId) { + rows.push(execution); + } + } + rows.sort((a, b) => { + const timeDiff = a.startedAt.getTime() - b.startedAt.getTime(); + if (timeDiff !== 0) { + return timeDiff < 0 ? -1 : 1; + } + return compareStrings(a.executionId, b.executionId); + }); + return rows.slice(0, limit); + }, + }; + + return { adapter, revisions, artifacts }; +} + +function addArtifact(artifacts: Set, packageId: string, bundleHash: string): void { + artifacts.add(`${packageId}:${bundleHash}`); +} + +async function runExecutionSelftest(): Promise { + const memory = createInMemoryExecutionAdapter(); + const service = new ExecutionService(memory.adapter); + + memory.revisions.add(REVISION_HASH); + addArtifact(memory.artifacts, PACKAGE_ID, INPUT_BUNDLE_HASH); + addArtifact(memory.artifacts, PACKAGE_ID, OUTPUT_BUNDLE_HASH); + + const expectedHashA = computeExecutionResultHash({ + outputs: [ + { bundleHash: OUTPUT_BUNDLE_HASH, role: 'result' }, + { bundleHash: INPUT_BUNDLE_HASH, role: 'evidence' }, + ], + status: 'success', + }); + + const expectedHashB = computeExecutionResultHash({ + outputs: [ + { bundleHash: INPUT_BUNDLE_HASH, role: 'evidence' }, + { bundleHash: OUTPUT_BUNDLE_HASH, role: 'result' }, + ], + status: 'success', + }); + + assertCondition(expectedHashA === expectedHashB); + + const recorded = await service.recordExecution({ + executionId: EXECUTION_ID, + packageId: PACKAGE_ID, + revisionHash: REVISION_HASH, + provider: 'openai', + model: 'gpt-test', + promptHash: PROMPT_HASH, + parameters: { + temperature: 0, + maxTokens: 256, + }, + inputArtifacts: [{ bundleHash: INPUT_BUNDLE_HASH, role: 'context' }], + outputArtifacts: [ + { bundleHash: OUTPUT_BUNDLE_HASH, role: 'result' }, + { bundleHash: INPUT_BUNDLE_HASH, role: 'evidence' }, + ], + status: 'success', + startedAt: '2026-03-06T00:00:00.000Z', + finishedAt: '2026-03-06T00:00:01.000Z', + }); + + assertCondition(recorded.resultHash === expectedHashA); + + const loaded = await service.getExecution({ executionId: EXECUTION_ID }); + assertCondition(loaded !== null); + assertCondition((loaded as ExecutionRecordDTO).resultHash === recorded.resultHash); + + const listed = await service.listExecutions({ packageId: PACKAGE_ID, limit: 10 }); + assertCondition(listed.length === 1); + assertCondition(listed[0].executionId === EXECUTION_ID); + + let missingArtifactCaught = false; + try { + await service.recordExecution({ + executionId: '123e4567-e89b-42d3-a456-426614174010', + packageId: PACKAGE_ID, + revisionHash: REVISION_HASH, + provider: 'openai', + model: 'gpt-test', + promptHash: PROMPT_HASH, + parameters: { temperature: 0 }, + inputArtifacts: [{ bundleHash: '3333333333333333333333333333333333333333333333333333333333333333', role: 'context' }], + outputArtifacts: [{ bundleHash: OUTPUT_BUNDLE_HASH, role: 'result' }], + status: 'success', + startedAt: '2026-03-06T00:00:00.000Z', + finishedAt: '2026-03-06T00:00:01.000Z', + }); + } catch (error) { + if ( + error instanceof ExecutionServiceError && + error.code === EXECUTION_ERROR_CODES.ERR_ARTIFACT_NOT_FOUND + ) { + missingArtifactCaught = true; + } else { + throw error; + } + } + assertCondition(missingArtifactCaught); + + const replayed = await service.replayExecution({ + executionId: EXECUTION_ID, + promptHash: PROMPT_HASH, + parameters: { + temperature: 0, + maxTokens: 256, + }, + inputArtifacts: [{ bundleHash: INPUT_BUNDLE_HASH, role: 'context' }], + }); + assertCondition(replayed.matches === true); + assertCondition(replayed.resultHash === recorded.resultHash); + + let nonDeterministicCaught = false; + try { + await service.replayExecution({ + executionId: EXECUTION_ID, + promptHash: PROMPT_HASH, + parameters: { + temperature: 1, + maxTokens: 256, + }, + inputArtifacts: [{ bundleHash: INPUT_BUNDLE_HASH, role: 'context' }], + }); + } catch (error) { + if ( + error instanceof ExecutionServiceError && + error.code === EXECUTION_ERROR_CODES.ERR_EXECUTION_NON_DETERMINISTIC + ) { + nonDeterministicCaught = true; + } else { + throw error; + } + } + assertCondition(nonDeterministicCaught); + + let replayMismatchCaught = false; + try { + await service.replayExecution({ + executionId: EXECUTION_ID, + promptHash: PROMPT_HASH, + parameters: { + temperature: 0, + maxTokens: 256, + }, + inputArtifacts: [{ bundleHash: INPUT_BUNDLE_HASH, role: 'context' }], + outputArtifacts: [{ bundleHash: OUTPUT_BUNDLE_HASH, role: 'different-role' }], + }); + } catch (error) { + if ( + error instanceof ExecutionServiceError && + error.code === EXECUTION_ERROR_CODES.ERR_EXECUTION_REPLAY_MISMATCH + ) { + replayMismatchCaught = true; + } else { + throw error; + } + } + assertCondition(replayMismatchCaught); +} + +if (require.main === module) { + runExecutionSelftest() + .then(() => { + process.stdout.write('EXECUTION_SELFTEST_OK\n'); + }) + .catch(() => { + process.stdout.write('EXECUTION_SELFTEST_FAIL\n'); + process.exit(1); + }); +} diff --git a/server/src/selftest/golden/canonicalization.json b/server/src/selftest/golden/canonicalization.json new file mode 100644 index 0000000..1e1ec63 --- /dev/null +++ b/server/src/selftest/golden/canonicalization.json @@ -0,0 +1,78 @@ +{ + "schema": "golden-canonicalization-1", + "vectors": [ + { + "id": "key-order", + "inputSpec": { + "b": 1, + "a": 2, + "nested": { + "z": 1, + "y": 2 + } + }, + "expectedCanonical": "{\"a\":2,\"b\":1,\"nested\":{\"y\":2,\"z\":1}}" + }, + { + "id": "unicode-normalization", + "inputSpec": { + "text": "Cafe\u0301" + }, + "expectedCanonical": "{\"text\":\"Caf\u00e9\"}" + }, + { + "id": "undefined-removal", + "inputSpec": { + "obj": { + "keep": 1, + "drop": { + "$st": "undefined" + } + }, + "arr": [ + 1, + { + "$st": "undefined" + }, + 2 + ] + }, + "expectedCanonical": "{\"arr\":[1,2],\"obj\":{\"keep\":1}}" + }, + { + "id": "neg-zero-normalization", + "inputSpec": { + "negZero": { + "$st": "negzero" + }, + "normal": 0 + }, + "expectedCanonical": "{\"negZero\":0,\"normal\":0}" + }, + { + "id": "nan-reject", + "inputSpec": { + "value": { + "$st": "nan" + } + }, + "expectedError": "ERR_PAYLOAD_STRUCTURE_LIMIT" + }, + { + "id": "infinity-reject", + "inputSpec": { + "value": { + "$st": "infinity" + } + }, + "expectedError": "ERR_PAYLOAD_STRUCTURE_LIMIT" + }, + { + "id": "unsafe-key-reject", + "inputSpec": { + "__proto__": {} + }, + "expectedError": "ERR_PAYLOAD_UNSAFE_KEY" + } + ] +} diff --git a/server/src/selftest/golden/hash.json b/server/src/selftest/golden/hash.json new file mode 100644 index 0000000..b8a0849 --- /dev/null +++ b/server/src/selftest/golden/hash.json @@ -0,0 +1,29 @@ +{ + "schema": "golden-hash-1", + "vectors": [ + { + "id": "artifact-bundle-basic", + "bundleSpec": { + "schema": "artifact-bundle-1", + "identity": { + "packageId": "pkg-1", + "revisionId": "rev-1", + "revisionHash": "rev-hash-1" + }, + "payload": { + "z": 1, + "a": "Cafe\u0301", + "arr": [ + 1, + { + "b": 2, + "a": 1 + } + ] + } + }, + "expectedCanonical": "{\"identity\":{\"packageId\":\"pkg-1\",\"revisionHash\":\"rev-hash-1\",\"revisionId\":\"rev-1\"},\"payload\":{\"a\":\"Caf\u00e9\",\"arr\":[1,{\"a\":1,\"b\":2}],\"z\":1},\"schema\":\"artifact-bundle-1\"}", + "expectedBundleHash": "58cd7a0cd38a6335d0c6243b44c312cb75c34790e3240192f8f19e196c0ef4e5" + } + ] +} diff --git a/server/src/selftest/http-api.e2e.ts b/server/src/selftest/http-api.e2e.ts new file mode 100644 index 0000000..4924d96 --- /dev/null +++ b/server/src/selftest/http-api.e2e.ts @@ -0,0 +1,468 @@ +import { existsSync } from 'fs'; +import { request as httpRequest } from 'http'; +import { request as httpsRequest } from 'https'; +import path from 'path'; +import { spawn } from 'child_process'; +import type { ChildProcessByStdio } from 'child_process'; +import type { Readable } from 'stream'; +import { URL } from 'url'; + +const FIXED_PACKAGE_ID = 'smoke-local-package'; +const FIXED_PROMPT_HASH = '1111111111111111111111111111111111111111111111111111111111111111'; +const FIXED_STARTED_AT = '2026-03-06T00:00:00.000Z'; +const FIXED_FINISHED_AT = '2026-03-06T00:00:01.000Z'; +const FIXED_DEVICE_ID = 'smoke-local-device'; +const READY_TIMEOUT_MS = 30000; +const HTTP_TIMEOUT_MS = 10000; +const HASH64_PATTERN = /^[0-9a-f]{64}$/; +const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/; + +const ARTIFACT_REQUEST = { + schema: 'artifact.task.state.v1', + identity: { + packageId: FIXED_PACKAGE_ID, + revisionId: null, + revisionHash: null, + }, + payload: { + name: 'smoke-state', + step: 1, + }, + references: [], +} as const; + +const REVISION_METADATA = { + author: 'smoke-test', + message: 'initial revision', + createdBy: 'smoke-test', + timestamp: FIXED_STARTED_AT, + source: 'system', + tags: ['smoke', 'local-api'], +} as const; + +type StepRecord = { + step: string; + request: unknown; + statusCode: number | null; + response: unknown; +}; + +type ServiceProcess = { + child: ChildProcessByStdio; + stdoutText: string; + stderrText: string; + baseUrl: string | null; + exitCode: number | null; + exitSignal: NodeJS.Signals | null; +}; + +type HttpResult = { + statusCode: number; + bodyText: string; + bodyJson: unknown; +}; + +type FailureContext = { + step: string; + request: unknown; + statusCode: number | null; + response: unknown; + reason: string; +}; + +function appendLog(current: string, chunk: Buffer): string { + const next = current + chunk.toString('utf8'); + return next.length > 20000 ? next.slice(next.length - 20000) : next; +} + +function extractBaseUrl(text: string): string | null { + const match = text.match(/Listening:\s+(http:\/\/(?:localhost|127\.0\.0\.1):\d+)/i); + return match ? match[1] : null; +} + +function delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function getServerRoot(): string { + return path.resolve(__dirname, '..', '..'); +} + +function getStartCommand(): { command: string; args: string[] } { + if (process.platform === 'win32') { + return { + command: 'cmd.exe', + args: ['/d', '/s', '/c', 'npm run start'], + }; + } + + return { + command: 'npm', + args: ['run', 'start'], + }; +} + +function toObject(value: unknown, step: string): Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) { + throw new Error(`${step}:invalid_json_object`); + } + return value as Record; +} + +function assertHash64(value: unknown, step: string, field: string): string { + if (typeof value !== 'string' || !HASH64_PATTERN.test(value)) { + throw new Error(`${step}:${field}`); + } + return value; +} + +function assertUuid(value: unknown, step: string, field: string): string { + if (typeof value !== 'string' || !UUID_PATTERN.test(value)) { + throw new Error(`${step}:${field}`); + } + return value; +} + +function assertString(value: unknown, step: string, field: string): string { + if (typeof value !== 'string' || value.length === 0) { + throw new Error(`${step}:${field}`); + } + return value; +} + +function assertNumber(value: unknown, step: string, field: string): number { + if (typeof value !== 'number' || !Number.isFinite(value)) { + throw new Error(`${step}:${field}`); + } + return value; +} + +function assertSuccessEnvelope(result: HttpResult, step: string): Record { + if (result.statusCode < 200 || result.statusCode >= 300) { + throw new Error(`${step}:http_status`); + } + const json = toObject(result.bodyJson, step); + if (json.ok !== true) { + throw new Error(`${step}:ok_flag`); + } + const data = json.data; + if (!data || typeof data !== 'object' || Array.isArray(data)) { + throw new Error(`${step}:data`); + } + return data as Record; +} + +function safeJsonParse(bodyText: string): unknown { + if (!bodyText) { + return null; + } + try { + return JSON.parse(bodyText); + } catch { + return bodyText; + } +} + +function sendHttpJson(method: 'GET' | 'POST', url: string, body?: unknown): Promise { + const parsedUrl = new URL(url); + const payload = typeof body === 'undefined' ? null : JSON.stringify(body); + const transport = parsedUrl.protocol === 'https:' ? httpsRequest : httpRequest; + + return new Promise((resolve, reject) => { + const req = transport( + { + protocol: parsedUrl.protocol, + hostname: parsedUrl.hostname, + port: parsedUrl.port, + path: `${parsedUrl.pathname}${parsedUrl.search}`, + method, + headers: { + 'Content-Type': 'application/json', + 'X-Device-ID': FIXED_DEVICE_ID, + ...(payload === null ? {} : { 'Content-Length': Buffer.byteLength(payload) }), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on('data', (chunk) => chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk))); + res.on('end', () => { + const bodyText = Buffer.concat(chunks).toString('utf8'); + resolve({ + statusCode: res.statusCode ?? 0, + bodyText, + bodyJson: safeJsonParse(bodyText), + }); + }); + } + ); + + req.setTimeout(HTTP_TIMEOUT_MS, () => { + req.destroy(new Error('http_timeout')); + }); + req.on('error', reject); + + if (payload !== null) { + req.write(payload); + } + req.end(); + }); +} + +function startServiceProcess(): ServiceProcess { + const startCommand = getStartCommand(); + const child = spawn(startCommand.command, startCommand.args, { + cwd: getServerRoot(), + stdio: ['ignore', 'pipe', 'pipe'], + env: process.env, + }); + + const state: ServiceProcess = { + child, + stdoutText: '', + stderrText: '', + baseUrl: null, + exitCode: null, + exitSignal: null, + }; + + child.stdout.on('data', (chunk: Buffer) => { + state.stdoutText = appendLog(state.stdoutText, chunk); + const nextBaseUrl = extractBaseUrl(state.stdoutText); + if (nextBaseUrl) { + state.baseUrl = nextBaseUrl; + } + }); + + child.stderr.on('data', (chunk: Buffer) => { + state.stderrText = appendLog(state.stderrText, chunk); + const nextBaseUrl = extractBaseUrl(state.stderrText); + if (nextBaseUrl) { + state.baseUrl = nextBaseUrl; + } + }); + + child.on('exit', (code, signal) => { + state.exitCode = code; + state.exitSignal = signal; + }); + + return state; +} + +async function waitForServiceReady(state: ServiceProcess): Promise { + const startedAt = Date.now(); + while (Date.now() - startedAt < READY_TIMEOUT_MS) { + if (state.exitCode !== null) { + throw new Error('service_exited_before_ready'); + } + + if (state.baseUrl) { + try { + await sendHttpJson('GET', `${state.baseUrl}/api/v1/revisions/package/__healthcheck__?limit=1`); + return state.baseUrl; + } catch { + // keep polling until the listener is ready + } + } + + await delay(250); + } + + throw new Error('service_ready_timeout'); +} + +function createFailure(step: string, request: unknown, statusCode: number | null, response: unknown, reason: string): FailureContext { + return { + step, + request, + statusCode, + response, + reason, + }; +} + +async function stopServiceProcess(state: ServiceProcess): Promise { + if (state.exitCode !== null) { + return true; + } + + if (typeof state.child.pid !== 'number') { + return false; + } + + if (process.platform === 'win32') { + await new Promise((resolve) => { + const killer = spawn('taskkill', ['/PID', String(state.child.pid), '/T', '/F'], { + stdio: 'ignore', + }); + killer.on('exit', () => resolve()); + killer.on('error', () => resolve()); + }); + } else { + state.child.kill('SIGTERM'); + } + + const deadline = Date.now() + 10000; + while (Date.now() < deadline) { + if (state.exitCode !== null) { + return true; + } + await delay(100); + } + + return state.exitCode !== null; +} + +async function main(): Promise { + const steps: StepRecord[] = []; + const service = startServiceProcess(); + let failure: FailureContext | null = null; + let serviceStopped = false; + + try { + const baseUrl = await waitForServiceReady(service); + + const artifactResult = await sendHttpJson('POST', `${baseUrl}/api/v1/artifacts`, ARTIFACT_REQUEST); + steps.push({ + step: 'artifact', + request: ARTIFACT_REQUEST, + statusCode: artifactResult.statusCode, + response: artifactResult.bodyJson, + }); + const artifactData = assertSuccessEnvelope(artifactResult, 'artifact'); + const bundleHash = assertHash64(artifactData.bundleHash, 'artifact', 'bundleHash'); + assertString(artifactData.id, 'artifact', 'id'); + assertString(artifactData.createdAt, 'artifact', 'createdAt'); + + const revisionRequest = { + packageId: FIXED_PACKAGE_ID, + parentRevisionHash: null, + artifacts: [ + { + bundleHash, + role: 'primary_state', + }, + ], + metadata: REVISION_METADATA, + }; + const revisionResult = await sendHttpJson('POST', `${baseUrl}/api/v1/revisions`, revisionRequest); + steps.push({ + step: 'revision', + request: revisionRequest, + statusCode: revisionResult.statusCode, + response: revisionResult.bodyJson, + }); + const revisionData = assertSuccessEnvelope(revisionResult, 'revision'); + const revisionHash = assertHash64(revisionData.revisionHash, 'revision', 'revisionHash'); + + const executionRequest = { + packageId: FIXED_PACKAGE_ID, + revisionHash, + provider: 'local-smoke', + model: 'shadow-smoke-model', + promptHash: FIXED_PROMPT_HASH, + parameters: { + temperature: 0, + }, + inputArtifacts: [ + { + bundleHash, + role: 'primary_state', + }, + ], + outputArtifacts: [ + { + bundleHash, + role: 'result', + }, + ], + status: 'success', + startedAt: FIXED_STARTED_AT, + finishedAt: FIXED_FINISHED_AT, + }; + const executionResult = await sendHttpJson('POST', `${baseUrl}/api/v1/executions`, executionRequest); + steps.push({ + step: 'execution', + request: executionRequest, + statusCode: executionResult.statusCode, + response: executionResult.bodyJson, + }); + const executionData = assertSuccessEnvelope(executionResult, 'execution'); + const executionId = assertUuid(executionData.executionId, 'execution', 'executionId'); + const resultHash = assertHash64(executionData.resultHash, 'execution', 'resultHash'); + + const migrationRequest = { + rootRevisionHash: revisionHash, + }; + const migrationResult = await sendHttpJson('POST', `${baseUrl}/api/v1/migration/export`, migrationRequest); + steps.push({ + step: 'migration_export', + request: migrationRequest, + statusCode: migrationResult.statusCode, + response: migrationResult.bodyJson, + }); + const migrationData = assertSuccessEnvelope(migrationResult, 'migration_export'); + const zipPath = assertString(migrationData.zipPath, 'migration_export', 'zipPath'); + const manifest = toObject(migrationData.manifest, 'migration_export'); + assertHash64(manifest.rootRevisionHash, 'migration_export', 'manifest.rootRevisionHash'); + assertNumber(manifest.artifactCount, 'migration_export', 'manifest.artifactCount'); + assertNumber(manifest.revisionCount, 'migration_export', 'manifest.revisionCount'); + if (manifest.rootRevisionHash !== revisionHash) { + throw new Error('migration_export:manifest.rootRevisionHash'); + } + if (!existsSync(zipPath)) { + throw new Error('migration_export:zipPathMissing'); + } + + serviceStopped = await stopServiceProcess(service); + + const summary = { + baseUrl, + bundleHash, + revisionHash, + executionId, + resultHash, + zipPath, + serviceStopped, + steps, + }; + + process.stdout.write(`${JSON.stringify(summary, null, 2)}\n`); + process.stdout.write('HTTP_API_E2E_OK\n'); + } catch (error) { + const message = error instanceof Error ? error.message : 'unknown_failure'; + const lastStep = steps.length > 0 ? steps[steps.length - 1] : null; + failure = createFailure( + lastStep ? lastStep.step : 'service_start', + lastStep ? lastStep.request : null, + lastStep ? lastStep.statusCode : null, + lastStep ? lastStep.response : null, + message + ); + } finally { + if (!serviceStopped) { + serviceStopped = await stopServiceProcess(service); + } + + if (failure) { + const failureReport = { + failure, + serviceStopped, + baseUrl: service.baseUrl, + stdoutTail: service.stdoutText, + stderrTail: service.stderrText, + steps, + }; + process.stdout.write(`${JSON.stringify(failureReport, null, 2)}\n`); + process.exit(1); + } + } +} + +if (require.main === module) { + main().catch((error) => { + const message = error instanceof Error ? error.message : 'unknown_failure'; + process.stdout.write(`${JSON.stringify({ failure: { step: 'bootstrap', reason: message } }, null, 2)}\n`); + process.exit(1); + }); +} diff --git a/server/src/selftest/http-api.error.e2e.ts b/server/src/selftest/http-api.error.e2e.ts new file mode 100644 index 0000000..789f912 --- /dev/null +++ b/server/src/selftest/http-api.error.e2e.ts @@ -0,0 +1,559 @@ +import path from 'path'; +import { spawn } from 'child_process'; +import type { ChildProcessByStdio } from 'child_process'; +import type { Readable } from 'stream'; + +const PACKAGE_ID = 'g3-error-test-package'; +const DEVICE_ID = 'g3-error-test-device'; +const READY_TIMEOUT_MS = 30000; +const HTTP_TIMEOUT_MS = 10000; +const HASH64_PATTERN = /^[0-9a-f]{64}$/; +const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/; +const HAPPY_PROMPT_HASH = '1111111111111111111111111111111111111111111111111111111111111111'; +const MISMATCH_PROMPT_HASH = '2222222222222222222222222222222222222222222222222222222222222222'; +const STARTED_AT = '2026-03-06T00:00:00.000Z'; +const FINISHED_AT = '2026-03-06T00:00:01.000Z'; +const MISSING_PARENT_HASH = 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee'; +const NOT_FOUND_BUNDLE_HASH = 'ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff'; + +type ServiceProcess = { + child: ChildProcessByStdio; + stdoutText: string; + stderrText: string; + baseUrl: string | null; + exitCode: number | null; + exitSignal: NodeJS.Signals | null; +}; + +type HttpResult = { + statusCode: number; + bodyText: string; + bodyJson: unknown; +}; + +type CaseSummary = { + caseName: string; + status: number; + errorCode: string; + pass: boolean; +}; + +type FailureContext = { + step: string; + request: unknown; + statusCode: number | null; + response: unknown; + reason: string; +}; + +function appendLog(current: string, chunk: Buffer): string { + const next = current + chunk.toString('utf8'); + return next.length > 20000 ? next.slice(next.length - 20000) : next; +} + +function extractBaseUrl(text: string): string | null { + const match = text.match(/Listening:\s+(http:\/\/(?:localhost|127\.0\.0\.1):\d+)/i); + return match ? match[1] : null; +} + +function delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function getServerRoot(): string { + return path.resolve(__dirname, '..', '..'); +} + +function getStartCommand(): { command: string; args: string[] } { + if (process.platform === 'win32') { + return { + command: 'cmd.exe', + args: ['/d', '/s', '/c', 'npm run start'], + }; + } + + return { + command: 'npm', + args: ['run', 'start'], + }; +} + +function safeJsonParse(text: string): unknown { + if (!text) { + return null; + } + + try { + return JSON.parse(text); + } catch { + return text; + } +} + +function toObject(value: unknown, step: string): Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) { + throw new Error(`${step}:invalid_object`); + } + return value as Record; +} + +function assertString(value: unknown, step: string, field: string): string { + if (typeof value !== 'string' || value.length === 0) { + throw new Error(`${step}:${field}`); + } + return value; +} + +function assertHash64(value: unknown, step: string, field: string): string { + const out = assertString(value, step, field); + if (!HASH64_PATTERN.test(out)) { + throw new Error(`${step}:${field}`); + } + return out; +} + +function assertUuid(value: unknown, step: string, field: string): string { + const out = assertString(value, step, field); + if (!UUID_PATTERN.test(out)) { + throw new Error(`${step}:${field}`); + } + return out; +} + +async function sendJsonRequest(method: 'GET' | 'POST', url: string, body?: unknown): Promise { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), HTTP_TIMEOUT_MS); + + try { + const response = await fetch(url, { + method, + headers: { + 'Content-Type': 'application/json', + 'X-Device-ID': DEVICE_ID, + }, + ...(typeof body === 'undefined' ? {} : { body: JSON.stringify(body) }), + signal: controller.signal, + }); + const bodyText = await response.text(); + return { + statusCode: response.status, + bodyText, + bodyJson: safeJsonParse(bodyText), + }; + } finally { + clearTimeout(timer); + } +} + +function startServiceProcess(): ServiceProcess { + const startCommand = getStartCommand(); + const child = spawn(startCommand.command, startCommand.args, { + cwd: getServerRoot(), + stdio: ['ignore', 'pipe', 'pipe'], + env: process.env, + }); + + const state: ServiceProcess = { + child, + stdoutText: '', + stderrText: '', + baseUrl: null, + exitCode: null, + exitSignal: null, + }; + + child.stdout.on('data', (chunk: Buffer) => { + state.stdoutText = appendLog(state.stdoutText, chunk); + const nextBaseUrl = extractBaseUrl(state.stdoutText); + if (nextBaseUrl) { + state.baseUrl = nextBaseUrl; + } + }); + + child.stderr.on('data', (chunk: Buffer) => { + state.stderrText = appendLog(state.stderrText, chunk); + const nextBaseUrl = extractBaseUrl(state.stderrText); + if (nextBaseUrl) { + state.baseUrl = nextBaseUrl; + } + }); + + child.on('exit', (code, signal) => { + state.exitCode = code; + state.exitSignal = signal; + }); + + return state; +} + +async function waitForServiceReady(state: ServiceProcess): Promise { + const startedAt = Date.now(); + while (Date.now() - startedAt < READY_TIMEOUT_MS) { + if (state.exitCode !== null) { + throw new Error('service_exited_before_ready'); + } + + if (state.baseUrl) { + try { + const response = await sendJsonRequest('GET', `${state.baseUrl}/api/v1/revisions/package/__healthcheck__?limit=1`); + if (response.statusCode > 0) { + return state.baseUrl; + } + } catch { + // continue polling + } + } + + await delay(250); + } + + throw new Error('service_ready_timeout'); +} + +async function stopServiceProcess(state: ServiceProcess): Promise { + if (state.exitCode !== null) { + return true; + } + + if (typeof state.child.pid !== 'number') { + return false; + } + + if (process.platform === 'win32') { + await new Promise((resolve) => { + const killer = spawn('taskkill', ['/PID', String(state.child.pid), '/T', '/F'], { + stdio: 'ignore', + }); + killer.on('exit', () => resolve()); + killer.on('error', () => resolve()); + }); + } else { + state.child.kill('SIGTERM'); + } + + const deadline = Date.now() + 10000; + while (Date.now() < deadline) { + if (state.exitCode !== null) { + return true; + } + await delay(100); + } + + return state.exitCode !== null; +} + +function assertSuccessEnvelope(result: HttpResult, step: string): Record { + if (result.statusCode < 200 || result.statusCode >= 300) { + throw new Error(`${step}:http_status`); + } + const body = toObject(result.bodyJson, step); + if (body.ok !== true) { + throw new Error(`${step}:ok_flag`); + } + const data = body.data; + if (!data || typeof data !== 'object' || Array.isArray(data)) { + throw new Error(`${step}:data`); + } + return data as Record; +} + +function validateErrorCase( + caseName: string, + result: HttpResult, + allowedStatuses: number[], + allowedCodes?: string[] +): CaseSummary { + const body = toObject(result.bodyJson, caseName); + if (!allowedStatuses.includes(result.statusCode)) { + throw new Error(`${caseName}:status`); + } + if (body.ok !== false) { + throw new Error(`${caseName}:ok_flag`); + } + const error = toObject(body.error, caseName); + const errorCode = assertString(error.code, caseName, 'error.code'); + assertString(error.message, caseName, 'error.message'); + if (Array.isArray(allowedCodes) && allowedCodes.length > 0 && !allowedCodes.includes(errorCode)) { + throw new Error(`${caseName}:error.code`); + } + return { + caseName, + status: result.statusCode, + errorCode, + pass: true, + }; +} + +async function createFixtureArtifact(baseUrl: string): Promise<{ bundleHash: string }> { + const artifactRequest = { + schema: 'artifact.task.state.v1', + identity: { + packageId: PACKAGE_ID, + revisionId: null, + revisionHash: null, + }, + payload: { + name: 'g3-state', + step: 1, + }, + references: [], + }; + const artifactResult = await sendJsonRequest('POST', `${baseUrl}/api/v1/artifacts`, artifactRequest); + const artifactData = assertSuccessEnvelope(artifactResult, 'fixture_artifact'); + return { + bundleHash: assertHash64(artifactData.bundleHash, 'fixture_artifact', 'bundleHash'), + }; +} + +async function createFixtureExecution(baseUrl: string, bundleHash: string): Promise<{ revisionHash: string; executionId: string }> { + const revisionRequest = { + packageId: PACKAGE_ID, + parentRevisionHash: null, + artifacts: [ + { + bundleHash, + role: 'primary_state', + }, + ], + metadata: { + author: 'smoke-test', + message: 'initial revision', + createdBy: 'smoke-test', + timestamp: STARTED_AT, + source: 'system', + tags: ['smoke', 'local-api'], + }, + }; + const revisionResult = await sendJsonRequest('POST', `${baseUrl}/api/v1/revisions`, revisionRequest); + const revisionData = assertSuccessEnvelope(revisionResult, 'fixture_revision'); + const revisionHash = assertHash64(revisionData.revisionHash, 'fixture_revision', 'revisionHash'); + + const executionRequest = { + packageId: PACKAGE_ID, + revisionHash, + provider: 'local-smoke', + model: 'shadow-smoke-model', + promptHash: HAPPY_PROMPT_HASH, + parameters: { + temperature: 0, + }, + inputArtifacts: [ + { + bundleHash, + role: 'primary_state', + }, + ], + outputArtifacts: [ + { + bundleHash, + role: 'result', + }, + ], + status: 'success', + startedAt: STARTED_AT, + finishedAt: FINISHED_AT, + }; + const executionResult = await sendJsonRequest('POST', `${baseUrl}/api/v1/executions`, executionRequest); + const executionData = assertSuccessEnvelope(executionResult, 'fixture_execution'); + + return { + revisionHash, + executionId: assertUuid(executionData.executionId, 'fixture_execution', 'executionId'), + }; +} + +async function main(): Promise { + const service = startServiceProcess(); + let serviceStopped = false; + let failure: FailureContext | null = null; + const caseSummaries: CaseSummary[] = []; + const steps: Array<{ caseName: string; request: unknown; statusCode: number | null; response: unknown }> = []; + let bundleHash: string | null = null; + let revisionHash: string | null = null; + let executionId: string | null = null; + + try { + const baseUrl = await waitForServiceReady(service); + + const fixtureArtifact = await createFixtureArtifact(baseUrl); + bundleHash = fixtureArtifact.bundleHash; + + const case1Request = { + identity: { + revisionId: null, + revisionHash: null, + }, + payload: { + name: 'broken', + }, + references: 'not-an-array', + }; + const case1Result = await sendJsonRequest('POST', `${baseUrl}/api/v1/artifacts`, case1Request); + steps.push({ caseName: 'artifact_invalid_input', request: case1Request, statusCode: case1Result.statusCode, response: case1Result.bodyJson }); + caseSummaries.push(validateErrorCase('artifact_invalid_input', case1Result, [400])); + + const case2Request = null; + const case2Result = await sendJsonRequest('GET', `${baseUrl}/api/v1/artifacts/non-existent-package/${NOT_FOUND_BUNDLE_HASH}`); + steps.push({ caseName: 'artifact_not_found', request: case2Request, statusCode: case2Result.statusCode, response: case2Result.bodyJson }); + caseSummaries.push(validateErrorCase('artifact_not_found', case2Result, [404], ['ERR_ARTIFACT_NOT_FOUND'])); + + const case3Request = { + packageId: PACKAGE_ID, + parentRevisionHash: MISSING_PARENT_HASH, + artifacts: [ + { + bundleHash, + role: 'primary_state', + }, + ], + metadata: { + author: 'smoke-test', + message: 'missing parent', + createdBy: 'smoke-test', + timestamp: STARTED_AT, + source: 'system', + tags: ['g3', 'missing-parent'], + }, + }; + const case3Result = await sendJsonRequest('POST', `${baseUrl}/api/v1/revisions`, case3Request); + steps.push({ caseName: 'revision_parent_missing', request: case3Request, statusCode: case3Result.statusCode, response: case3Result.bodyJson }); + caseSummaries.push(validateErrorCase('revision_parent_missing', case3Result, [404], ['ERR_REVISION_PARENT_NOT_FOUND'])); + + const case4Request = { + packageId: PACKAGE_ID, + revisionHash: MISSING_PARENT_HASH, + provider: 'local-smoke', + model: 'shadow-smoke-model', + promptHash: 'not-a-hex-hash', + parameters: { + temperature: 0, + }, + inputArtifacts: [ + { + bundleHash, + role: 'primary_state', + }, + ], + outputArtifacts: [ + { + bundleHash, + role: 'result', + }, + ], + status: 'done', + startedAt: STARTED_AT, + finishedAt: FINISHED_AT, + }; + const case4Result = await sendJsonRequest('POST', `${baseUrl}/api/v1/executions`, case4Request); + steps.push({ caseName: 'execution_invalid_input', request: case4Request, statusCode: case4Result.statusCode, response: case4Result.bodyJson }); + caseSummaries.push(validateErrorCase('execution_invalid_input', case4Result, [400])); + + const fixtureExecution = await createFixtureExecution(baseUrl, bundleHash); + revisionHash = fixtureExecution.revisionHash; + executionId = fixtureExecution.executionId; + + const case5Request = { + promptHash: MISMATCH_PROMPT_HASH, + parameters: { + temperature: 0, + }, + inputArtifacts: [ + { + bundleHash, + role: 'primary_state', + }, + ], + outputArtifacts: [ + { + bundleHash, + role: 'result', + }, + ], + status: 'success', + }; + const case5Result = await sendJsonRequest('POST', `${baseUrl}/api/v1/executions/${executionId}/replay`, case5Request); + steps.push({ caseName: 'execution_replay_mismatch', request: case5Request, statusCode: case5Result.statusCode, response: case5Result.bodyJson }); + caseSummaries.push( + validateErrorCase('execution_replay_mismatch', case5Result, [422], [ + 'ERR_EXECUTION_NON_DETERMINISTIC', + 'ERR_EXECUTION_REPLAY_MISMATCH', + ]) + ); + + const case6Request = { + zipPath: path.resolve(getServerRoot(), 'tmp', 'g3-error-test', 'missing.zip'), + }; + const case6Result = await sendJsonRequest('POST', `${baseUrl}/api/v1/migration/verify`, case6Request); + steps.push({ caseName: 'migration_verify_invalid_path', request: case6Request, statusCode: case6Result.statusCode, response: case6Result.bodyJson }); + caseSummaries.push(validateErrorCase('migration_verify_invalid_path', case6Result, [400, 404, 422])); + + serviceStopped = await stopServiceProcess(service); + + process.stdout.write( + `${JSON.stringify( + { + baseUrl, + happyPathFixture: { + packageId: PACKAGE_ID, + bundleHash, + revisionHash, + executionId, + }, + cases: caseSummaries, + steps, + serviceStopped, + }, + null, + 2 + )}\n` + ); + process.stdout.write('HTTP_API_ERROR_E2E_OK\n'); + } catch (error) { + const message = error instanceof Error ? error.message : 'unknown_failure'; + const lastStep = steps.length > 0 ? steps[steps.length - 1] : null; + failure = { + step: lastStep ? lastStep.caseName : 'service_start', + request: lastStep ? lastStep.request : null, + statusCode: lastStep ? lastStep.statusCode : null, + response: lastStep ? lastStep.response : null, + reason: message, + }; + } finally { + if (!serviceStopped) { + serviceStopped = await stopServiceProcess(service); + } + + if (failure) { + process.stdout.write( + `${JSON.stringify( + { + failure, + happyPathFixture: { + packageId: PACKAGE_ID, + bundleHash, + revisionHash, + executionId, + }, + cases: caseSummaries, + steps, + serviceStopped, + baseUrl: service.baseUrl, + stdoutTail: service.stdoutText, + stderrTail: service.stderrText, + }, + null, + 2 + )}\n` + ); + process.exit(1); + } + } +} + +if (require.main === module) { + main().catch((error) => { + const reason = error instanceof Error ? error.message : 'unknown_failure'; + process.stdout.write(`${JSON.stringify({ failure: { step: 'bootstrap', reason } }, null, 2)}\n`); + process.exit(1); + }); +} diff --git a/server/src/selftest/http-api.selftest.ts b/server/src/selftest/http-api.selftest.ts new file mode 100644 index 0000000..0e73673 --- /dev/null +++ b/server/src/selftest/http-api.selftest.ts @@ -0,0 +1,111 @@ +import express from 'express'; +import type { Express, Request, Response } from 'express'; +import artifactRoutes from '../routes/artifact.routes'; +import revisionRoutes from '../routes/revision.routes'; +import executionRoutes from '../routes/execution.routes'; +import migrationRoutes from '../routes/migration.routes'; +import { createArtifact } from '../controllers/artifact.controller'; +import { createRevision } from '../controllers/revision.controller'; +import { recordExecution } from '../controllers/execution.controller'; +import { verifyMigration } from '../controllers/migration.controller'; + +type MockResponse = Response & { + statusCodeValue: number; + jsonBody: unknown; +}; + +function createMockResponse(): MockResponse { + const response: { + statusCodeValue: number; + jsonBody: unknown; + status(code: number): MockResponse; + json(payload: unknown): MockResponse; + } = { + statusCodeValue: 200, + jsonBody: undefined, + status(code: number) { + response.statusCodeValue = code; + return response as unknown as MockResponse; + }, + json(payload: unknown) { + response.jsonBody = payload; + return response as unknown as MockResponse; + }, + }; + + return response as unknown as MockResponse; +} + +function assertCondition(condition: boolean): void { + if (!condition) { + throw new Error('http_api_selftest_failed'); + } +} + +function hasMountedPath(app: Express, fragment: string): boolean { + const stack = (app as unknown as { _router?: { stack?: Array<{ regexp?: { toString(): string } }> } })._router?.stack; + if (!Array.isArray(stack)) { + return false; + } + + return stack.some((layer) => { + if (!layer.regexp || typeof layer.regexp.toString !== 'function') { + return false; + } + return layer.regexp.toString().includes(fragment); + }); +} + +function assertInvalidRequestResponse(response: MockResponse): void { + const payload = response.jsonBody as { + ok?: unknown; + error?: { code?: unknown; message?: unknown }; + }; + + assertCondition(response.statusCodeValue === 400); + assertCondition(Boolean(payload) && payload.ok === false); + assertCondition(Boolean(payload.error) && payload.error?.code === 'ERR_INVALID_INPUT'); + assertCondition(typeof payload.error?.message === 'string' && payload.error.message.length > 0); +} + +async function runHttpApiSelftest(): Promise { + const app = express(); + app.use('/api/v1/artifacts', artifactRoutes); + app.use('/api/v1/revisions', revisionRoutes); + app.use('/api/v1/executions', executionRoutes); + app.use('/api/v1/migration', migrationRoutes); + + assertCondition(hasMountedPath(app, 'artifacts')); + assertCondition(hasMountedPath(app, 'revisions')); + assertCondition(hasMountedPath(app, 'executions')); + assertCondition(hasMountedPath(app, 'migration')); + + const artifactResponse = createMockResponse(); + await createArtifact({ body: {}, params: {}, query: {} } as Request, artifactResponse); + assertInvalidRequestResponse(artifactResponse); + + const revisionResponse = createMockResponse(); + await createRevision({ body: {}, params: {}, query: {} } as Request, revisionResponse); + assertInvalidRequestResponse(revisionResponse); + + const executionResponse = createMockResponse(); + await recordExecution({ body: {}, params: {}, query: {} } as Request, executionResponse); + assertInvalidRequestResponse(executionResponse); + + const migrationResponse = createMockResponse(); + await verifyMigration({ body: {}, params: {}, query: {} } as Request, migrationResponse); + assertInvalidRequestResponse(migrationResponse); +} + +if (require.main === module) { + runHttpApiSelftest() + .then(() => { + process.stdout.write('HTTP_API_SELFTEST_OK\n'); + }) + .catch(() => { + process.stdout.write('HTTP_API_SELFTEST_FAIL\n'); + process.exit(1); + }); +} + +export { runHttpApiSelftest }; diff --git a/server/src/selftest/identity-boundary.selftest.ts b/server/src/selftest/identity-boundary.selftest.ts new file mode 100644 index 0000000..142a02d --- /dev/null +++ b/server/src/selftest/identity-boundary.selftest.ts @@ -0,0 +1,213 @@ +import { + ArtifactHashMismatchError, + ArtifactStoreService, + ArtifactValidationError, +} from '../services/artifact-store.service'; +import { + RevisionService, + RevisionServiceError, + type RevisionStorageAdapter, +} from '../services/revision.service'; +import { computeBundleHash } from '../lib/artifact-hash'; + +function assertCondition(condition: boolean): void { + if (!condition) { + throw new Error('identity_boundary_selftest_failed'); + } +} + +async function testBundleHashMismatch(): Promise { + const service = new ArtifactStoreService(); + + let rejected = false; + try { + await service.storeArtifactBundle({ + schema: 'artifact.task.state.v1', + identity: { + packageId: 'identity-regression', + revisionHash: null, + revisionId: null, + }, + payload: { + name: 'hash-test', + }, + bundleHash: '0000000000000000000000000000000000000000000000000000000000000000', + }); + } catch (error) { + if (error instanceof ArtifactHashMismatchError && error.code === 'ERR_ARTIFACT_HASH_MISMATCH') { + rejected = true; + } else { + throw error; + } + } + + assertCondition(rejected); +} + +async function testDangerousKeyRejection(): Promise { + const service = new ArtifactStoreService(); + const dangerousPayload = Object.create(null) as Record; + dangerousPayload.__proto__ = { injected: true }; + + let rejected = false; + try { + await service.storeArtifactBundle({ + schema: 'artifact.task.state.v1', + identity: { + packageId: 'identity-regression', + revisionHash: null, + revisionId: null, + }, + payload: dangerousPayload, + }); + } catch (error) { + if (error instanceof ArtifactValidationError) { + rejected = true; + } else { + throw error; + } + } + + assertCondition(rejected); +} + +function createRevisionStorageAdapter(bundleHash: string): RevisionStorageAdapter { + const revisions = new Map< + string, + { + revisionHash: string; + packageId: string; + parentRevisionHash: string | null; + author: string; + message: string; + createdBy: string; + timestamp: Date; + source: string; + metadata: unknown; + createdAt: Date; + artifacts: Array<{ bundleHash: string; role: string }>; + } + >(); + + return { + async findRevisionByHash(revisionHash) { + return revisions.get(revisionHash) ?? null; + }, + + async createRevision(input) { + const stored = { + revisionHash: input.revisionHash, + packageId: input.packageId, + parentRevisionHash: input.parentRevisionHash, + author: input.metadata.author, + message: input.metadata.message, + createdBy: input.metadata.createdBy, + timestamp: new Date(input.metadata.timestamp), + source: input.metadata.source, + metadata: input.metadata, + createdAt: new Date(input.metadata.timestamp), + artifacts: input.artifacts.map((artifact) => ({ + bundleHash: artifact.bundleHash, + role: artifact.role, + })), + }; + revisions.set(input.revisionHash, stored); + return stored; + }, + + async listRevisions(packageId, limit) { + return Array.from(revisions.values()) + .filter((revision) => revision.packageId === packageId) + .slice(0, limit); + }, + + async artifactExists(packageId, requestedBundleHash) { + return packageId === 'identity-package-a' && requestedBundleHash === bundleHash; + }, + }; +} + +async function testCrossPackageParent(): Promise { + const bundleHash = computeBundleHash({ + schema: 'artifact.task.state.v1', + packageId: 'identity-package-a', + revisionId: null, + revisionHash: null, + payload: { + name: 'package-a-state', + }, + }); + + const service = new RevisionService(createRevisionStorageAdapter(bundleHash)); + const metadata = { + author: 'identity-regression', + message: 'package-a-root', + createdBy: 'identity-regression', + timestamp: '2026-03-06T00:00:00.000Z', + source: 'system' as const, + tags: ['identity', 'boundary'], + }; + + const root = await service.createRevision({ + packageId: 'identity-package-a', + parentRevisionHash: null, + artifacts: [ + { + bundleHash, + role: 'primary_state', + }, + ], + metadata, + }); + + let rejected = false; + try { + await service.createRevision({ + packageId: 'identity-package-b', + parentRevisionHash: root.revisionHash, + artifacts: [ + { + bundleHash, + role: 'primary_state', + }, + ], + metadata: { + author: 'identity-regression', + message: 'cross-package-child', + createdBy: 'identity-regression', + timestamp: '2026-03-06T00:00:01.000Z', + source: 'system', + tags: ['identity', 'boundary'], + }, + }); + } catch (error) { + if (error instanceof RevisionServiceError && error.code === 'ERR_REVISION_PARENT_PACKAGE_MISMATCH') { + rejected = true; + } else { + throw error; + } + } + + assertCondition(rejected); +} + +async function run(): Promise { + await testBundleHashMismatch(); + await testDangerousKeyRejection(); + await testCrossPackageParent(); + process.stdout.write('IDENTITY_BOUNDARY_SELFTEST_OK\n'); +} + +if (require.main === module) { + run().catch((error) => { + process.stdout.write('IDENTITY_BOUNDARY_SELFTEST_FAIL\n'); + if (error instanceof Error && error.stack) { + process.stderr.write(`${error.stack}\n`); + } else { + process.stderr.write(`${String(error)}\n`); + } + process.exit(1); + }); +} + +export { run as runIdentityBoundarySelftest }; diff --git a/server/src/selftest/migration.selftest.ts b/server/src/selftest/migration.selftest.ts new file mode 100644 index 0000000..e70c53c --- /dev/null +++ b/server/src/selftest/migration.selftest.ts @@ -0,0 +1,275 @@ +import { existsSync, mkdirSync, unlinkSync } from 'fs'; +import path from 'path'; +import { canonicalizeJson, computeBundleHash } from '../lib/artifact-hash'; +import { computeRevisionHash, type RevisionArtifactReference, type RevisionMetadata } from '../lib/revision-hash'; +import { readMigrationPackageZip, parseArtifactsJsonl, type ArtifactBundleLike } from '../lib/migration-package'; +import { MigrationService } from '../services/migration.service'; + +type StoredRevision = { + revisionHash: string; + packageId: string; + parentRevisionHash: string | null; + artifacts: RevisionArtifactReference[]; + metadata: RevisionMetadata; +}; + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function assertCondition(condition: boolean): void { + if (!condition) { + throw new Error('migration_selftest_failed'); + } +} + +function normalizeBundle(bundle: ArtifactBundleLike): ArtifactBundleLike { + return { + schema: bundle.schema, + identity: { + packageId: bundle.identity.packageId, + revisionId: bundle.identity.revisionId ?? null, + revisionHash: bundle.identity.revisionHash ?? null, + }, + payload: bundle.payload, + references: Array.isArray(bundle.references) + ? [...bundle.references].sort((a, b) => compareStrings(a.bundleHash, b.bundleHash) || compareStrings(a.role, b.role)) + : [], + }; +} + +function hashBundle(bundle: ArtifactBundleLike): string { + const normalized = normalizeBundle(bundle); + return computeBundleHash({ + schema: normalized.schema, + packageId: normalized.identity.packageId, + revisionId: normalized.identity.revisionId ?? null, + revisionHash: normalized.identity.revisionHash ?? null, + payload: normalized, + }); +} + +function cloneBundle(bundle: ArtifactBundleLike): ArtifactBundleLike { + return JSON.parse(canonicalizeJson(normalizeBundle(bundle))) as ArtifactBundleLike; +} + +function createInMemoryAdapter() { + const artifactsByHash = new Map(); + const artifactsByPackageAndHash = new Map(); + const revisionsByHash = new Map(); + + return { + adapter: { + async findRevisionByHash(revisionHash: string): Promise { + return revisionsByHash.get(revisionHash) ?? null; + }, + + async findArtifactByPackageAndHash(packageId: string, bundleHash: string): Promise { + return artifactsByPackageAndHash.get(`${packageId}:${bundleHash}`) ?? null; + }, + + async findArtifactByHash(bundleHash: string): Promise { + return artifactsByHash.get(bundleHash) ?? null; + }, + + async storeArtifactBundle(bundle: ArtifactBundleLike): Promise { + const normalized = cloneBundle(bundle); + const bundleHash = hashBundle(normalized); + const existing = artifactsByHash.get(bundleHash); + if (existing && canonicalizeJson(existing) !== canonicalizeJson(normalized)) { + throw new Error('migration_selftest_failed'); + } + artifactsByHash.set(bundleHash, normalized); + artifactsByPackageAndHash.set(`${normalized.identity.packageId}:${bundleHash}`, normalized); + }, + + async createRevision(revision: StoredRevision): Promise<{ revisionHash: string }> { + for (const artifact of revision.artifacts) { + if (!artifactsByPackageAndHash.has(`${revision.packageId}:${artifact.bundleHash}`)) { + throw new Error('migration_selftest_failed'); + } + } + const recomputed = computeRevisionHash({ + packageId: revision.packageId, + parentRevisionHash: revision.parentRevisionHash, + artifacts: revision.artifacts, + metadata: revision.metadata, + }); + if (recomputed !== revision.revisionHash) { + throw new Error('migration_selftest_failed'); + } + if (!revisionsByHash.has(revision.revisionHash)) { + revisionsByHash.set(revision.revisionHash, { + revisionHash: revision.revisionHash, + packageId: revision.packageId, + parentRevisionHash: revision.parentRevisionHash, + artifacts: revision.artifacts.map((artifact) => ({ bundleHash: artifact.bundleHash, role: artifact.role })), + metadata: { + author: revision.metadata.author, + message: revision.metadata.message, + createdBy: revision.metadata.createdBy, + timestamp: revision.metadata.timestamp, + source: revision.metadata.source, + tags: [...(revision.metadata.tags ?? [])], + }, + }); + } + return { revisionHash: revision.revisionHash }; + }, + }, + artifactsByHash, + revisionsByHash, + }; +} + +async function runMigrationSelftest(): Promise<{ zipPath: string }> { + const packageId = 'migration-selftest-package'; + const genesisTimestamp = '2026-03-05T00:00:00.000Z'; + const rootTimestamp = '2026-03-06T00:00:00.000Z'; + + const baseArtifact: ArtifactBundleLike = { + schema: 'artifact.task.state.v1', + identity: { packageId, revisionId: null, revisionHash: null }, + payload: { + name: 'base-artifact', + state: 'ready', + }, + references: [], + }; + + const derivedArtifact: ArtifactBundleLike = { + schema: 'artifact.execution.record.v1', + identity: { packageId, revisionId: null, revisionHash: null }, + payload: { + name: 'derived-artifact', + result: 'ok', + }, + references: [{ bundleHash: hashBundle(baseArtifact), role: 'depends_on' }], + }; + + const baseBundleHash = hashBundle(baseArtifact); + const derivedBundleHash = hashBundle(derivedArtifact); + + const genesisRevision: StoredRevision = { + revisionHash: computeRevisionHash({ + packageId, + parentRevisionHash: null, + artifacts: [{ bundleHash: baseBundleHash, role: 'state' }], + metadata: { + author: 'selftest', + message: 'genesis', + createdBy: 'selftest', + timestamp: genesisTimestamp, + source: 'system', + tags: ['migration', 'selftest'], + }, + }), + packageId, + parentRevisionHash: null, + artifacts: [{ bundleHash: baseBundleHash, role: 'state' }], + metadata: { + author: 'selftest', + message: 'genesis', + createdBy: 'selftest', + timestamp: genesisTimestamp, + source: 'system', + tags: ['migration', 'selftest'], + }, + }; + + const rootRevision: StoredRevision = { + revisionHash: computeRevisionHash({ + packageId, + parentRevisionHash: genesisRevision.revisionHash, + artifacts: [{ bundleHash: derivedBundleHash, role: 'execution' }], + metadata: { + author: 'selftest', + message: 'root', + createdBy: 'selftest', + timestamp: rootTimestamp, + source: 'system', + tags: ['migration', 'selftest'], + }, + }), + packageId, + parentRevisionHash: genesisRevision.revisionHash, + artifacts: [{ bundleHash: derivedBundleHash, role: 'execution' }], + metadata: { + author: 'selftest', + message: 'root', + createdBy: 'selftest', + timestamp: rootTimestamp, + source: 'system', + tags: ['migration', 'selftest'], + }, + }; + + const sourceStore = createInMemoryAdapter(); + await sourceStore.adapter.storeArtifactBundle(baseArtifact); + await sourceStore.adapter.storeArtifactBundle(derivedArtifact); + await sourceStore.adapter.createRevision(genesisRevision); + await sourceStore.adapter.createRevision(rootRevision); + + const service = new MigrationService(sourceStore.adapter); + const zipPath = path.resolve(process.cwd(), 'dist', 'selftest', 'migration_package_selftest.zip'); + mkdirSync(path.dirname(zipPath), { recursive: true }); + if (existsSync(zipPath)) { + unlinkSync(zipPath); + } + + const exportedZipPath = await service.exportMigrationPackage(rootRevision.revisionHash, zipPath); + assertCondition(exportedZipPath === zipPath); + + const verified = await service.verifyMigrationPackage(zipPath); + assertCondition(verified.ok === true); + assertCondition(verified.rootRevisionHash === rootRevision.revisionHash); + assertCondition(verified.revisionCount === 2); + assertCondition(verified.artifactCount === 4); + + const importedStore = createInMemoryAdapter(); + const importService = new MigrationService(importedStore.adapter); + const imported = await importService.importMigrationPackage(zipPath); + assertCondition(imported.ok === true); + assertCondition(imported.revisionCount === 2); + assertCondition(imported.artifactCount === 4); + assertCondition(importedStore.artifactsByHash.size === 4); + assertCondition(importedStore.revisionsByHash.size === 2); + + const zipContents = readMigrationPackageZip(zipPath); + assertCondition(zipContents.manifest.createdAt === rootTimestamp); + assertCondition(zipContents.manifest.artifactCount === 4); + assertCondition(zipContents.manifest.revisionCount === 2); + + const parsedArtifacts = parseArtifactsJsonl(zipContents.artifactsJsonl); + assertCondition(parsedArtifacts.length === 4); + + for (const bundle of parsedArtifacts) { + const recomputedBundleHash = hashBundle(bundle); + assertCondition(importedStore.artifactsByHash.has(recomputedBundleHash)); + } + + for (const revision of importedStore.revisionsByHash.values()) { + const recomputedRevisionHash = computeRevisionHash({ + packageId: revision.packageId, + parentRevisionHash: revision.parentRevisionHash, + artifacts: revision.artifacts, + metadata: revision.metadata, + }); + assertCondition(recomputedRevisionHash === revision.revisionHash); + } + + return { zipPath }; +} + +if (require.main === module) { + runMigrationSelftest() + .then(() => { + process.stdout.write('MIGRATION_SELFTEST_OK\n'); + }) + .catch(() => { + process.stdout.write('MIGRATION_SELFTEST_FAIL\n'); + process.exit(1); + }); +} diff --git a/server/src/selftest/protocol.selftest.ts b/server/src/selftest/protocol.selftest.ts new file mode 100644 index 0000000..9327b69 --- /dev/null +++ b/server/src/selftest/protocol.selftest.ts @@ -0,0 +1,267 @@ +import { createHash } from 'crypto'; +import { readFileSync } from 'fs'; +import path from 'path'; + +type ProtocolErrorCode = + | 'ERR_PAYLOAD_TOO_LARGE' + | 'ERR_PAYLOAD_UNSAFE_KEY' + | 'ERR_PAYLOAD_CONTAINS_NULL_CHAR' + | 'ERR_PAYLOAD_STRUCTURE_LIMIT' + | 'ERR_ARTIFACT_HASH_COLLISION_OR_IMPL_BUG'; + +const MAX_DEPTH = 64; +const MAX_NODES = 100000; +const MAX_PAYLOAD_BYTES = 1024 * 1024; +const REMOVE_VALUE = Symbol('remove_value'); +const UNSAFE_KEYS = new Set(['__proto__', 'constructor', 'prototype']); + +type SanitizedJson = null | string | number | boolean | SanitizedJson[] | { [key: string]: SanitizedJson }; +type Sanitized = SanitizedJson | typeof REMOVE_VALUE; + +type CanonicalVector = { + id: string; + inputSpec: unknown; + expectedCanonical?: string; + expectedError?: ProtocolErrorCode; +}; + +type HashVector = { + id: string; + bundleSpec: unknown; + expectedCanonical: string; + expectedBundleHash: string; +}; + +class ProtocolError extends Error { + code: ProtocolErrorCode; + + constructor(code: ProtocolErrorCode) { + super(code); + this.code = code; + this.name = 'ProtocolError'; + } +} + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function decodeSpec(value: unknown): unknown { + if (Array.isArray(value)) { + const out: unknown[] = []; + for (const entry of value) { + out.push(decodeSpec(entry)); + } + return out; + } + + if (!isPlainObject(value)) { + return value; + } + + const keys = Object.keys(value); + if (keys.length === 1 && keys[0] === '$st') { + const marker = value.$st; + if (marker === 'undefined') return undefined; + if (marker === 'negzero') return -0; + if (marker === 'nan') return Number.NaN; + if (marker === 'infinity') return Number.POSITIVE_INFINITY; + if (marker === 'minus_infinity') return Number.NEGATIVE_INFINITY; + } + + const out: Record = Object.create(null); + for (const key of keys) { + out[key] = decodeSpec(value[key]); + } + return out; +} + +function sanitizeValue(value: unknown, depth: number, state: { nodes: number }): Sanitized { + if (depth > MAX_DEPTH) { + throw new ProtocolError('ERR_PAYLOAD_STRUCTURE_LIMIT'); + } + + state.nodes += 1; + if (state.nodes > MAX_NODES) { + throw new ProtocolError('ERR_PAYLOAD_STRUCTURE_LIMIT'); + } + + if (value === null) return null; + + if (typeof value === 'string') { + if (value.includes('\u0000')) { + throw new ProtocolError('ERR_PAYLOAD_CONTAINS_NULL_CHAR'); + } + return value.normalize('NFC'); + } + + if (typeof value === 'boolean') return value; + + if (typeof value === 'number') { + if (!Number.isFinite(value)) { + throw new ProtocolError('ERR_PAYLOAD_STRUCTURE_LIMIT'); + } + return Object.is(value, -0) ? 0 : value; + } + + if (typeof value === 'undefined' || typeof value === 'function' || typeof value === 'symbol') { + return REMOVE_VALUE; + } + + if (typeof value === 'bigint') { + throw new ProtocolError('ERR_PAYLOAD_STRUCTURE_LIMIT'); + } + + if (Array.isArray(value)) { + const out: SanitizedJson[] = []; + for (const entry of value) { + const sanitized = sanitizeValue(entry, depth + 1, state); + if (sanitized !== REMOVE_VALUE) { + out.push(sanitized); + } + } + return out; + } + + if (!isPlainObject(value)) { + throw new ProtocolError('ERR_PAYLOAD_STRUCTURE_LIMIT'); + } + + const out: { [key: string]: SanitizedJson } = Object.create(null); + for (const key of Object.keys(value)) { + if (UNSAFE_KEYS.has(key)) { + throw new ProtocolError('ERR_PAYLOAD_UNSAFE_KEY'); + } + + const sanitized = sanitizeValue(value[key], depth + 1, state); + if (sanitized !== REMOVE_VALUE) { + out[key] = sanitized; + } + } + return out; +} + +function canonicalStringify(value: SanitizedJson): string { + if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') { + return JSON.stringify(value); + } + + if (Array.isArray(value)) { + return `[${value.map((entry) => canonicalStringify(entry)).join(',')}]`; + } + + const keys = Object.keys(value).sort(compareStrings); + return `{${keys.map((key) => `${JSON.stringify(key)}:${canonicalStringify(value[key])}`).join(',')}}`; +} + +function canonicalizeProtocolValue(value: unknown): string { + const decoded = decodeSpec(value); + const sanitized = sanitizeValue(decoded, 0, { nodes: 0 }); + if (sanitized === REMOVE_VALUE) { + throw new ProtocolError('ERR_PAYLOAD_STRUCTURE_LIMIT'); + } + + const canonical = canonicalStringify(sanitized); + if (Buffer.byteLength(canonical, 'utf8') > MAX_PAYLOAD_BYTES) { + throw new ProtocolError('ERR_PAYLOAD_TOO_LARGE'); + } + return canonical; +} + +function sha256Hex(value: string): string { + return createHash('sha256').update(value, 'utf8').digest('hex'); +} + +function assert(condition: boolean): void { + if (!condition) { + throw new Error('assert_failed'); + } +} + +function parseCanonicalVectors(raw: unknown): CanonicalVector[] { + if (!isPlainObject(raw) || !Array.isArray(raw.vectors)) { + throw new Error('invalid_golden'); + } + return raw.vectors as CanonicalVector[]; +} + +function parseHashVectors(raw: unknown): HashVector[] { + if (!isPlainObject(raw) || !Array.isArray(raw.vectors)) { + throw new Error('invalid_golden'); + } + return raw.vectors as HashVector[]; +} + +function loadJson(filePath: string): unknown { + const raw = readFileSync(filePath, 'utf8'); + const withoutBom = raw.replace(/^\uFEFF/, ''); + return JSON.parse(withoutBom); +} + +function runCanonicalizationTests(vectors: CanonicalVector[]): void { + for (const vector of vectors) { + if (vector.expectedError) { + let caught: ProtocolErrorCode | null = null; + try { + void canonicalizeProtocolValue(vector.inputSpec); + } catch (error) { + if (error instanceof ProtocolError) { + caught = error.code; + } + } + assert(caught === vector.expectedError); + continue; + } + + const first = canonicalizeProtocolValue(vector.inputSpec); + const second = canonicalizeProtocolValue(vector.inputSpec); + assert(first === second); + assert(first === vector.expectedCanonical); + } +} + +function runHashTests(vectors: HashVector[]): void { + for (const vector of vectors) { + const canonical = canonicalizeProtocolValue(vector.bundleSpec); + assert(canonical === vector.expectedCanonical); + + const hashA = sha256Hex(canonical); + const hashB = sha256Hex(canonicalizeProtocolValue(vector.bundleSpec)); + + assert(hashA === hashB); + assert(hashA === vector.expectedBundleHash); + } +} + +export function runProtocolSelftest(): void { + const goldenDir = path.resolve(process.cwd(), 'src', 'selftest', 'golden'); + const canonicalVectors = parseCanonicalVectors(loadJson(path.join(goldenDir, 'canonicalization.json'))); + const hashVectors = parseHashVectors(loadJson(path.join(goldenDir, 'hash.json'))); + + runCanonicalizationTests(canonicalVectors); + runHashTests(hashVectors); +} + +if (require.main === module) { + try { + runProtocolSelftest(); + process.stdout.write('PROTOCOL_SELFTEST_OK\n'); + } catch { + process.stdout.write('PROTOCOL_SELFTEST_FAIL\n'); + process.exit(1); + } +} + + + + + + diff --git a/server/src/selftest/revision.selftest.ts b/server/src/selftest/revision.selftest.ts new file mode 100644 index 0000000..03e12ef --- /dev/null +++ b/server/src/selftest/revision.selftest.ts @@ -0,0 +1,239 @@ +import type { RevisionArtifactReference, RevisionMetadata } from '../lib/revision-hash'; +import { + RevisionService, + type RevisionStorageAdapter, + type RevisionRecord, + RevisionServiceError, + REVISION_ERROR_CODES, +} from '../services/revision.service'; + +const SELFTEST_PACKAGE_ID = 'revision-selftest-package'; + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function assertCondition(condition: boolean): void { + if (!condition) { + throw new Error('revision_selftest_failed'); + } +} + +type StoredRevisionRecord = { + revisionHash: string; + packageId: string; + parentRevisionHash: string | null; + author: string; + message: string; + createdBy: string; + timestamp: Date; + source: string; + metadata: unknown; + createdAt: Date; + artifacts: RevisionArtifactReference[]; +}; + +function createInMemoryAdapter(): { + adapter: RevisionStorageAdapter; + artifacts: Set; +} { + const revisions = new Map(); + const artifacts = new Set(); + + const adapter: RevisionStorageAdapter = { + async findRevisionByHash(revisionHash: string): Promise { + return revisions.get(revisionHash) ?? null; + }, + + async createRevision(input: { + revisionHash: string; + packageId: string; + parentRevisionHash: string | null; + metadata: RevisionMetadata; + artifacts: RevisionArtifactReference[]; + }): Promise { + const createdAt = new Date(Date.parse('2026-03-05T00:00:00.000Z') + revisions.size * 1000); + const node: StoredRevisionRecord = { + revisionHash: input.revisionHash, + packageId: input.packageId, + parentRevisionHash: input.parentRevisionHash, + author: input.metadata.author, + message: input.metadata.message, + createdBy: input.metadata.createdBy, + timestamp: new Date(input.metadata.timestamp), + source: input.metadata.source, + metadata: input.metadata, + createdAt, + artifacts: input.artifacts.map((artifact) => ({ + bundleHash: artifact.bundleHash, + role: artifact.role, + })), + }; + + revisions.set(node.revisionHash, node); + return node; + }, + + async listRevisions(packageId: string, limit: number): Promise { + const rows: StoredRevisionRecord[] = []; + for (const revision of revisions.values()) { + if (revision.packageId === packageId) { + rows.push(revision); + } + } + + rows.sort((a, b) => { + const tsDiff = a.timestamp.getTime() - b.timestamp.getTime(); + if (tsDiff !== 0) { + return tsDiff < 0 ? -1 : 1; + } + return compareStrings(a.revisionHash, b.revisionHash); + }); + + return rows.slice(0, limit); + }, + + async artifactExists(packageId: string, bundleHash: string): Promise { + return artifacts.has(`${packageId}:${bundleHash}`); + }, + }; + + return { adapter, artifacts }; +} + +function addArtifactReference(artifacts: Set, packageId: string, bundleHash: string): void { + artifacts.add(`${packageId}:${bundleHash}`); +} + +async function runRevisionSelftest(): Promise { + const memory = createInMemoryAdapter(); + const revisionService = new RevisionService(memory.adapter); + + const firstBundleHash = '1111111111111111111111111111111111111111111111111111111111111111'; + const secondBundleHash = '2222222222222222222222222222222222222222222222222222222222222222'; + + addArtifactReference(memory.artifacts, SELFTEST_PACKAGE_ID, firstBundleHash); + addArtifactReference(memory.artifacts, SELFTEST_PACKAGE_ID, secondBundleHash); + + const firstRevision = await revisionService.createRevision({ + packageId: SELFTEST_PACKAGE_ID, + artifacts: [ + { bundleHash: secondBundleHash, role: 'output' }, + { bundleHash: firstBundleHash, role: 'input' }, + ], + metadata: { + author: 'selftest-author', + message: 'first revision', + createdBy: 'selftest', + timestamp: '2026-03-05T00:00:00.000Z', + source: 'system', + tags: ['milestone-c', 'selftest'], + }, + }); + + const firstRevisionAgain = await revisionService.createRevision({ + packageId: SELFTEST_PACKAGE_ID, + artifacts: [ + { bundleHash: firstBundleHash, role: 'input' }, + { bundleHash: secondBundleHash, role: 'output' }, + ], + metadata: { + author: 'selftest-author', + message: 'first revision', + createdBy: 'selftest', + timestamp: '2026-03-05T00:00:00.000Z', + source: 'system', + tags: ['selftest', 'milestone-c'], + }, + }); + + assertCondition(firstRevision.revisionHash === firstRevisionAgain.revisionHash); + + let parentValidationCaught = false; + try { + await revisionService.createRevision({ + packageId: SELFTEST_PACKAGE_ID, + parentRevisionHash: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', + artifacts: [{ bundleHash: firstBundleHash, role: 'input' }], + metadata: { + author: 'selftest-author', + message: 'invalid parent', + createdBy: 'selftest', + timestamp: '2026-03-05T00:00:01.000Z', + source: 'system', + }, + }); + } catch (error) { + if ( + error instanceof RevisionServiceError && + error.code === REVISION_ERROR_CODES.ERR_REVISION_PARENT_NOT_FOUND + ) { + parentValidationCaught = true; + } else { + throw error; + } + } + + assertCondition(parentValidationCaught); + + let artifactValidationCaught = false; + try { + await revisionService.createRevision({ + packageId: SELFTEST_PACKAGE_ID, + artifacts: [{ bundleHash: 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb', role: 'input' }], + metadata: { + author: 'selftest-author', + message: 'missing artifact', + createdBy: 'selftest', + timestamp: '2026-03-05T00:00:02.000Z', + source: 'system', + }, + }); + } catch (error) { + if ( + error instanceof RevisionServiceError && + error.code === REVISION_ERROR_CODES.ERR_ARTIFACT_NOT_FOUND + ) { + artifactValidationCaught = true; + } else { + throw error; + } + } + + assertCondition(artifactValidationCaught); + + const secondRevision = await revisionService.createRevision({ + packageId: SELFTEST_PACKAGE_ID, + parentRevisionHash: firstRevision.revisionHash, + artifacts: [{ bundleHash: firstBundleHash, role: 'input' }], + metadata: { + author: 'selftest-author', + message: 'second revision', + createdBy: 'selftest', + timestamp: '2026-03-05T00:00:03.000Z', + source: 'system', + }, + }); + + const loaded = await revisionService.getRevision({ revisionHash: secondRevision.revisionHash }); + assertCondition(loaded !== null); + assertCondition((loaded as RevisionRecord).parentRevisionHash === firstRevision.revisionHash); + + const listed = await revisionService.listRevisions({ packageId: SELFTEST_PACKAGE_ID, limit: 10 }); + assertCondition(listed.length >= 2); + assertCondition(listed[0].revisionHash === firstRevision.revisionHash); + assertCondition(listed[1].revisionHash === secondRevision.revisionHash); +} + +if (require.main === module) { + runRevisionSelftest() + .then(() => { + process.stdout.write('REVISION_SELFTEST_OK\n'); + }) + .catch(() => { + process.stdout.write('REVISION_SELFTEST_FAIL\n'); + process.exit(1); + }); +} diff --git a/server/src/services/__tests__/apply-report-v1.test.ts b/server/src/services/__tests__/apply-report-v1.test.ts index 379eb7a..ac9d01e 100644 --- a/server/src/services/__tests__/apply-report-v1.test.ts +++ b/server/src/services/__tests__/apply-report-v1.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it } from 'vitest'; import { stableHash } from '../../algebra/semanticDiff/key'; -import { buildApplyReportV1 } from '../apply-report-v1'; +import { buildApplyReportV1, type BuildApplyReportV1Input } from '../apply-report-v1'; describe('buildApplyReportV1', () => { it('builds legacy minimal report deterministically', () => { @@ -125,46 +125,48 @@ describe('buildApplyReportV1', () => { }); it('produces stable output for permuted conflict input', () => { - const baseInput = { - llmMode: 'legacy' as const, - transitionMode: 'best_effort' as const, - transition: { - deltaSummary: { - modifiedDomains: ['constraints', 'facts'], - counts: { - facts: { added: 1, removed: 0, modified: 0 }, - decisions: { added: 0, removed: 0, modified: 0 }, - constraints: { added: 0, removed: 1, modified: 0 }, - risks: { added: 0, removed: 0, modified: 0 }, - assumptions: { added: 0, removed: 0, modified: 0 }, - }, - hasCollisions: true, - assumptionsDerived: true, + const transition = { + deltaSummary: { + modifiedDomains: ['constraints', 'facts'], + counts: { + facts: { added: 1, removed: 0, modified: 0 }, + decisions: { added: 0, removed: 0, modified: 0 }, + constraints: { added: 0, removed: 1, modified: 0 }, + risks: { added: 0, removed: 0, modified: 0 }, + assumptions: { added: 0, removed: 0, modified: 0 }, }, - conflicts: [ - { domain: 'constraints', code: 'B', key: '2', path: 'p2', message: 'm2' }, - { domain: 'facts', code: 'A', key: '1', path: 'p1', message: 'm1' }, - ], - postApplyConflicts: [ - { domain: 'risks', code: 'C', key: '3', path: 'p3', message: 'm3' }, - ], - findings: [ - { code: 'B', message: 'bbb', count: 2, domains: ['constraints', 'facts'] }, - { code: 'A', message: 'aaa', count: 1, domains: ['assumptions'] }, - ], + hasCollisions: true, + assumptionsDerived: true, }, - }; + conflicts: [ + { domain: 'constraints', code: 'B', key: '2', path: 'p2', message: 'm2' }, + { domain: 'facts', code: 'A', key: '1', path: 'p1', message: 'm1' }, + ], + postApplyConflicts: [{ domain: 'risks', code: 'C', key: '3', path: 'p3', message: 'm3' }], + findings: [ + { code: 'B', message: 'bbb', count: 2, domains: ['constraints', 'facts'] }, + { code: 'A', message: 'aaa', count: 1, domains: ['assumptions'] }, + ], + } satisfies NonNullable; + + const baseInput = { + llmMode: 'legacy', + transitionMode: 'best_effort', + transition, + } satisfies BuildApplyReportV1Input; const first = buildApplyReportV1(baseInput); const second = buildApplyReportV1({ ...baseInput, transition: { - ...baseInput.transition, - conflicts: [...baseInput.transition.conflicts].reverse(), - findings: [...baseInput.transition.findings].reverse(), + ...transition, + conflicts: [...transition.conflicts].reverse(), + findings: [...transition.findings].reverse(), }, }); expect(stableHash(first)).toBe(stableHash(second)); }); }); + + diff --git a/server/src/services/artifact-bundle-v1.ts b/server/src/services/artifact-bundle-v1.ts new file mode 100644 index 0000000..e9ebe9c --- /dev/null +++ b/server/src/services/artifact-bundle-v1.ts @@ -0,0 +1,639 @@ +import { createHash } from 'crypto'; +import type { TransferPackageV1 } from './transfer-package-v1'; +import type { LineageBindingV1 } from './lineage-binding-v1'; +import type { HandoffRecordV1 } from './handoff-record-v1'; + +export type ClosureContractRefV1 = { + schema: 'closure-contract-1'; + proposedHash: string; + acceptedHash: string; +} | null; + +export type ArtifactBundleV1 = { + schema: 'artifact-bundle-1'; + identity: { + packageId: string; + revisionId: string | null; + revisionHash: string | null; + }; + artifacts: { + transferPackageV1: TransferPackageV1; + lineageBindingV1: LineageBindingV1; + handoffRecordV1: HandoffRecordV1; + closureContractV1: ClosureContractRefV1; + }; + diagnostics: { + invariants: Array<{ code: string; ok: boolean; message: string }>; + notes: string[]; + }; + createdAt: string | null; + bundleHash: string; +}; + +export type BuildArtifactBundleV1Input = { + identity: { + packageId: string; + revisionId?: string | null; + revisionHash?: string | null; + }; + artifacts: { + transferPackageV1: TransferPackageV1; + lineageBindingV1: LineageBindingV1; + handoffRecordV1: HandoffRecordV1; + closureContractV1?: ClosureContractRefV1; + }; + diagnostics?: { + notes?: string[]; + }; + createdAt?: string | null; +}; + +type BundleHashPayload = Omit; + +type InvariantCode = + | 'INV_TRANSFER_HASH_MATCH_LINEAGE' + | 'INV_EMBEDDED_LINEAGE_HASH_MATCH_TOP' + | 'INV_NO_HANDOFF_BINDING_IN_LINEAGE' + | 'INV_JSON_SAFE'; + +const INVARIANT_ORDER: readonly InvariantCode[] = [ + 'INV_TRANSFER_HASH_MATCH_LINEAGE', + 'INV_EMBEDDED_LINEAGE_HASH_MATCH_TOP', + 'INV_NO_HANDOFF_BINDING_IN_LINEAGE', + 'INV_JSON_SAFE', +]; + +function makeBundleError( + code: 'E_BUNDLE_INVALID' | 'E_BUNDLE_NON_JSON_SAFE' | 'E_BUNDLE_HASH_MISMATCH', + message: string +): Error & { code: string } { + const error = new Error(message) as Error & { code: string }; + error.code = code; + return error; +} + +export function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function normalizeNullableString(value: unknown): string | null { + if (typeof value === 'string') return value; + if (value === null || value === undefined) return null; + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); +} + +function normalizeStringArray(value: unknown): string[] { + if (!Array.isArray(value)) return []; + const normalized: string[] = []; + for (const entry of value) { + if (typeof entry !== 'string') { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + normalized.push(entry); + } + return [...normalized].sort(compareStrings); +} + +export function assertJsonSafe(value: unknown): void { + if (value === null) return; + + const valueType = typeof value; + if (valueType === 'string' || valueType === 'boolean') return; + if (valueType === 'number') { + if (!Number.isFinite(value)) { + throw makeBundleError('E_BUNDLE_NON_JSON_SAFE', 'Artifact bundle contains non JSON-safe value'); + } + return; + } + if (valueType === 'undefined' || valueType === 'function' || valueType === 'symbol' || valueType === 'bigint') { + throw makeBundleError('E_BUNDLE_NON_JSON_SAFE', 'Artifact bundle contains non JSON-safe value'); + } + + if (Array.isArray(value)) { + for (const entry of value) { + assertJsonSafe(entry); + } + return; + } + + if (!isPlainObject(value)) { + throw makeBundleError('E_BUNDLE_NON_JSON_SAFE', 'Artifact bundle contains non JSON-safe value'); + } + + const keys = Object.keys(value).sort(compareStrings); + for (const key of keys) { + assertJsonSafe(value[key]); + } +} + +export function stableStringify(value: unknown): string { + assertJsonSafe(value); + + if (value === null || typeof value === 'string' || typeof value === 'boolean' || typeof value === 'number') { + return JSON.stringify(value); + } + + if (Array.isArray(value)) { + return `[${value.map((entry) => stableStringify(entry)).join(',')}]`; + } + + const record = value as Record; + const keys = Object.keys(record).sort(compareStrings); + return `{${keys.map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(',')}}`; +} + +export function sha256Hex(text: string): string { + return createHash('sha256').update(text, 'utf8').digest('hex'); +} + +function normalizeClosureContract(value: unknown): ClosureContractRefV1 { + if (value === null || value === undefined) return null; + if ( + !isPlainObject(value) || + value.schema !== 'closure-contract-1' || + typeof value.proposedHash !== 'string' || + typeof value.acceptedHash !== 'string' + ) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + return { + schema: 'closure-contract-1', + proposedHash: value.proposedHash, + acceptedHash: value.acceptedHash, + }; +} + +function normalizeTransferPackageV1(value: unknown): TransferPackageV1 { + if (!isPlainObject(value) || value.schema !== 'transfer-package-1' || typeof value.transferHash !== 'string') { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if (!isPlainObject(value.identity)) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if ( + typeof value.identity.packageId !== 'string' || + typeof value.identity.revisionId !== 'string' || + typeof value.identity.revisionHash !== 'string' || + !(typeof value.identity.parentRevisionId === 'string' || value.identity.parentRevisionId === null) + ) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + assertJsonSafe(value); + return value as TransferPackageV1; +} + +function normalizeLineageBindingV1(value: unknown): LineageBindingV1 { + if (!isPlainObject(value) || value.schema !== 'lineage-binding-1' || typeof value.lineageHash !== 'string') { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if (!(typeof value.createdAt === 'string' || value.createdAt === null)) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if (!isPlainObject(value.identity) || !isPlainObject(value.bindings) || !isPlainObject(value.diagnostics)) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if ( + typeof value.identity.packageId !== 'string' || + typeof value.identity.revisionId !== 'string' || + typeof value.identity.revisionHash !== 'string' || + !(typeof value.identity.parentRevisionId === 'string' || value.identity.parentRevisionId === null) + ) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if ( + !isPlainObject(value.bindings.transfer) || + value.bindings.transfer.schema !== 'transfer-package-1' || + typeof value.bindings.transfer.transferHash !== 'string' + ) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if (!(Array.isArray(value.diagnostics.missing) && Array.isArray(value.diagnostics.notes))) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + assertJsonSafe(value); + return value as LineageBindingV1; +} + +function normalizeHandoffRecordV1(value: unknown): HandoffRecordV1 { + if (!isPlainObject(value) || value.schema !== 'handoff-record-1' || typeof value.handoffHash !== 'string') { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if (!(typeof value.createdAt === 'string' || value.createdAt === null)) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if (!isPlainObject(value.identity) || !isPlainObject(value.transfer) || !isPlainObject(value.lineageBindingV1)) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if ( + typeof value.identity.packageId !== 'string' || + typeof value.identity.revisionId !== 'string' || + typeof value.identity.revisionHash !== 'string' || + !(typeof value.identity.parentRevisionId === 'string' || value.identity.parentRevisionId === null) + ) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if (value.transfer.schema !== 'transfer-package-1' || typeof value.transfer.transferHash !== 'string') { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + normalizeLineageBindingV1(value.lineageBindingV1); + assertJsonSafe(value); + return value as HandoffRecordV1; +} + +function cloneTransferPackageV1(transferPackageV1: TransferPackageV1): TransferPackageV1 { + return { + schema: 'transfer-package-1', + identity: { + packageId: transferPackageV1.identity.packageId, + revisionId: transferPackageV1.identity.revisionId, + revisionHash: transferPackageV1.identity.revisionHash, + parentRevisionId: transferPackageV1.identity.parentRevisionId, + }, + bindings: { + closureContractV1: transferPackageV1.bindings.closureContractV1 + ? { + schema: 'closure-contract-1', + proposedHash: transferPackageV1.bindings.closureContractV1.proposedHash, + acceptedHash: transferPackageV1.bindings.closureContractV1.acceptedHash, + } + : null, + applyReportV1Hash: transferPackageV1.bindings.applyReportV1Hash, + executionRecordV1Hash: transferPackageV1.bindings.executionRecordV1Hash, + }, + trunk: { + intent: { + primary: transferPackageV1.trunk.intent.primary, + successCriteria: [...transferPackageV1.trunk.intent.successCriteria], + nonGoals: [...transferPackageV1.trunk.intent.nonGoals], + }, + stateDigest: { + facts: [...transferPackageV1.trunk.stateDigest.facts], + decisions: [...transferPackageV1.trunk.stateDigest.decisions], + constraints: [...transferPackageV1.trunk.stateDigest.constraints], + risks: [...transferPackageV1.trunk.stateDigest.risks], + assumptions: [...transferPackageV1.trunk.stateDigest.assumptions], + openLoops: [...transferPackageV1.trunk.stateDigest.openLoops], + }, + }, + continuation: { + nextActions: transferPackageV1.continuation.nextActions.map((entry) => ({ + code: entry.code, + message: entry.message, + expectedOutput: entry.expectedOutput, + domains: [...entry.domains], + })), + validationChecklist: transferPackageV1.continuation.validationChecklist.map((entry) => ({ + code: entry.code, + message: entry.message, + severity: entry.severity, + })), + }, + conflicts: transferPackageV1.conflicts.map((entry) => ({ + domain: entry.domain, + code: entry.code, + key: entry.key, + path: entry.path, + message: entry.message, + })), + determinism: { + sorted: true, + domainOrder: ['facts', 'decisions', 'constraints', 'risks', 'assumptions'], + }, + transferHash: transferPackageV1.transferHash, + }; +} + +function cloneLineageBindingV1(lineageBindingV1: LineageBindingV1, createdAt: string | null): LineageBindingV1 { + return { + schema: 'lineage-binding-1', + identity: { + packageId: lineageBindingV1.identity.packageId, + revisionId: lineageBindingV1.identity.revisionId, + revisionHash: lineageBindingV1.identity.revisionHash, + parentRevisionId: lineageBindingV1.identity.parentRevisionId, + }, + bindings: { + transfer: lineageBindingV1.bindings.transfer + ? { + schema: 'transfer-package-1', + transferHash: lineageBindingV1.bindings.transfer.transferHash, + } + : null, + closure: lineageBindingV1.bindings.closure + ? { + schema: 'closure-contract-1', + proposedHash: lineageBindingV1.bindings.closure.proposedHash, + acceptedHash: lineageBindingV1.bindings.closure.acceptedHash, + } + : null, + execution: lineageBindingV1.bindings.execution + ? { + schema: 'execution-record-1', + reportHash: lineageBindingV1.bindings.execution.reportHash, + deltaHash: lineageBindingV1.bindings.execution.deltaHash, + } + : null, + handoff: lineageBindingV1.bindings.handoff + ? { + schema: 'handoff-record-1', + handoffHash: lineageBindingV1.bindings.handoff.handoffHash, + } + : null, + }, + diagnostics: { + missing: [...lineageBindingV1.diagnostics.missing], + notes: [...lineageBindingV1.diagnostics.notes], + }, + createdAt, + lineageHash: lineageBindingV1.lineageHash, + }; +} + +function cloneHandoffRecordV1(handoffRecordV1: HandoffRecordV1, createdAt: string | null, lineageCreatedAt: string | null): HandoffRecordV1 { + return { + schema: 'handoff-record-1', + transfer: { + schema: 'transfer-package-1', + transferHash: handoffRecordV1.transfer.transferHash, + }, + identity: { + packageId: handoffRecordV1.identity.packageId, + revisionId: handoffRecordV1.identity.revisionId, + revisionHash: handoffRecordV1.identity.revisionHash, + parentRevisionId: handoffRecordV1.identity.parentRevisionId, + }, + bindings: { + closureContractV1: handoffRecordV1.bindings.closureContractV1 + ? { + schema: 'closure-contract-1', + proposedHash: handoffRecordV1.bindings.closureContractV1.proposedHash, + acceptedHash: handoffRecordV1.bindings.closureContractV1.acceptedHash, + } + : null, + applyReportV1Hash: handoffRecordV1.bindings.applyReportV1Hash, + executionRecordV1Hash: handoffRecordV1.bindings.executionRecordV1Hash, + }, + trunk: { + intent: { + primary: handoffRecordV1.trunk.intent.primary, + successCriteria: [...handoffRecordV1.trunk.intent.successCriteria], + nonGoals: [...handoffRecordV1.trunk.intent.nonGoals], + }, + stateDigest: { + facts: [...handoffRecordV1.trunk.stateDigest.facts], + decisions: [...handoffRecordV1.trunk.stateDigest.decisions], + constraints: [...handoffRecordV1.trunk.stateDigest.constraints], + risks: [...handoffRecordV1.trunk.stateDigest.risks], + assumptions: [...handoffRecordV1.trunk.stateDigest.assumptions], + openLoops: [...handoffRecordV1.trunk.stateDigest.openLoops], + }, + }, + continuation: { + nextActions: handoffRecordV1.continuation.nextActions.map((entry) => ({ + code: entry.code, + message: entry.message, + expectedOutput: entry.expectedOutput, + domains: [...entry.domains], + })), + validationChecklist: handoffRecordV1.continuation.validationChecklist.map((entry) => ({ + code: entry.code, + message: entry.message, + severity: entry.severity, + })), + }, + diagnostics: { + verified: true, + verification: { + transferHashRecomputed: handoffRecordV1.diagnostics.verification.transferHashRecomputed, + matchesProvidedHash: handoffRecordV1.diagnostics.verification.matchesProvidedHash, + }, + }, + lineageBindingV1: cloneLineageBindingV1(handoffRecordV1.lineageBindingV1, lineageCreatedAt), + createdAt, + handoffHash: handoffRecordV1.handoffHash, + }; +} + +function buildInvariant(code: InvariantCode, ok: boolean): { code: string; ok: boolean; message: string } { + if (code === 'INV_TRANSFER_HASH_MATCH_LINEAGE') { + return { + code, + ok, + message: ok ? 'Transfer hash matches lineage binding' : 'Transfer hash does not match lineage binding', + }; + } + if (code === 'INV_EMBEDDED_LINEAGE_HASH_MATCH_TOP') { + return { + code, + ok, + message: ok ? 'Embedded lineage hash matches top-level lineage' : 'Embedded lineage hash does not match top-level lineage', + }; + } + if (code === 'INV_NO_HANDOFF_BINDING_IN_LINEAGE') { + return { + code, + ok, + message: ok ? 'Lineage has no handoff binding' : 'Lineage has a handoff binding', + }; + } + return { + code, + ok, + message: ok ? 'Artifact bundle is JSON-safe' : 'Artifact bundle is not JSON-safe', + }; +} + +function buildCanonicalBundle(input: BuildArtifactBundleV1Input): Omit { + if (!isPlainObject(input) || !isPlainObject(input.identity) || !isPlainObject(input.artifacts)) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + + assertJsonSafe(input); + + if (typeof input.identity.packageId !== 'string') { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + + const transferPackageV1 = cloneTransferPackageV1(normalizeTransferPackageV1(input.artifacts.transferPackageV1)); + const lineageBindingV1 = cloneLineageBindingV1(normalizeLineageBindingV1(input.artifacts.lineageBindingV1), normalizeNullableString((input.artifacts.lineageBindingV1 as LineageBindingV1).createdAt)); + const handoffRecordV1 = cloneHandoffRecordV1( + normalizeHandoffRecordV1(input.artifacts.handoffRecordV1), + normalizeNullableString((input.artifacts.handoffRecordV1 as HandoffRecordV1).createdAt), + normalizeNullableString((input.artifacts.handoffRecordV1 as HandoffRecordV1).lineageBindingV1.createdAt) + ); + const closureContractV1 = normalizeClosureContract(input.artifacts.closureContractV1); + + const invariants: Array<{ code: string; ok: boolean; message: string }> = [ + buildInvariant( + 'INV_TRANSFER_HASH_MATCH_LINEAGE', + lineageBindingV1.bindings.transfer !== null && + lineageBindingV1.bindings.transfer.transferHash === transferPackageV1.transferHash + ), + buildInvariant( + 'INV_EMBEDDED_LINEAGE_HASH_MATCH_TOP', + handoffRecordV1.lineageBindingV1.lineageHash === lineageBindingV1.lineageHash + ), + buildInvariant('INV_NO_HANDOFF_BINDING_IN_LINEAGE', lineageBindingV1.bindings.handoff === null), + buildInvariant('INV_JSON_SAFE', true), + ]; + + const bundle: Omit = { + schema: 'artifact-bundle-1', + identity: { + packageId: input.identity.packageId, + revisionId: normalizeNullableString(input.identity.revisionId), + revisionHash: normalizeNullableString(input.identity.revisionHash), + }, + artifacts: { + transferPackageV1, + lineageBindingV1, + handoffRecordV1, + closureContractV1, + }, + diagnostics: { + invariants, + notes: normalizeStringArray(input.diagnostics?.notes), + }, + createdAt: normalizeNullableString(input.createdAt), + }; + + assertJsonSafe(bundle); + return bundle; +} + +function canonicalizeBundleForHash(bundle: Omit | ArtifactBundleV1): BundleHashPayload { + return { + schema: 'artifact-bundle-1', + identity: { + packageId: bundle.identity.packageId, + revisionId: bundle.identity.revisionId, + revisionHash: bundle.identity.revisionHash, + }, + artifacts: { + transferPackageV1: cloneTransferPackageV1(bundle.artifacts.transferPackageV1), + lineageBindingV1: cloneLineageBindingV1(bundle.artifacts.lineageBindingV1, null), + handoffRecordV1: cloneHandoffRecordV1(bundle.artifacts.handoffRecordV1, null, null), + closureContractV1: bundle.artifacts.closureContractV1 + ? { + schema: 'closure-contract-1', + proposedHash: bundle.artifacts.closureContractV1.proposedHash, + acceptedHash: bundle.artifacts.closureContractV1.acceptedHash, + } + : null, + }, + diagnostics: { + invariants: bundle.diagnostics.invariants.map((entry) => ({ + code: entry.code, + ok: entry.ok, + message: entry.message, + })), + notes: [...bundle.diagnostics.notes], + }, + }; +} + +function normalizeInvariantArray(value: unknown): Array<{ code: string; ok: boolean; message: string }> { + if (!Array.isArray(value) || value.length != INVARIANT_ORDER.length) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + const normalized: Array<{ code: string; ok: boolean; message: string }> = []; + for (let index = 0; index < INVARIANT_ORDER.length; index += 1) { + const entry = value[index]; + if (!isPlainObject(entry)) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + const codeValue = entry.code; + const okValue = entry.ok; + const messageValue = entry.message; + if (codeValue != INVARIANT_ORDER[index] || typeof okValue !== 'boolean' || typeof messageValue !== 'string') { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + normalized.push({ + code: INVARIANT_ORDER[index], + ok: okValue, + message: messageValue, + }); + } + return normalized; +} + +function normalizeArtifactBundleV1(bundle: unknown): ArtifactBundleV1 { + if (!isPlainObject(bundle) || bundle.schema !== 'artifact-bundle-1' || typeof bundle.bundleHash !== 'string') { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if (!isPlainObject(bundle.identity) || !isPlainObject(bundle.artifacts) || !isPlainObject(bundle.diagnostics)) { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + if (typeof bundle.identity.packageId !== 'string') { + throw makeBundleError('E_BUNDLE_INVALID', 'Artifact bundle input is invalid'); + } + + const normalized: ArtifactBundleV1 = { + schema: 'artifact-bundle-1', + identity: { + packageId: bundle.identity.packageId, + revisionId: normalizeNullableString(bundle.identity.revisionId), + revisionHash: normalizeNullableString(bundle.identity.revisionHash), + }, + artifacts: { + transferPackageV1: cloneTransferPackageV1(normalizeTransferPackageV1(bundle.artifacts.transferPackageV1)), + lineageBindingV1: cloneLineageBindingV1(normalizeLineageBindingV1(bundle.artifacts.lineageBindingV1), normalizeNullableString((bundle.artifacts.lineageBindingV1 as LineageBindingV1).createdAt)), + handoffRecordV1: cloneHandoffRecordV1( + normalizeHandoffRecordV1(bundle.artifacts.handoffRecordV1), + normalizeNullableString((bundle.artifacts.handoffRecordV1 as HandoffRecordV1).createdAt), + normalizeNullableString((bundle.artifacts.handoffRecordV1 as HandoffRecordV1).lineageBindingV1.createdAt) + ), + closureContractV1: normalizeClosureContract(bundle.artifacts.closureContractV1), + }, + diagnostics: { + invariants: normalizeInvariantArray(bundle.diagnostics.invariants), + notes: normalizeStringArray(bundle.diagnostics.notes), + }, + createdAt: normalizeNullableString(bundle.createdAt), + bundleHash: bundle.bundleHash, + }; + + assertJsonSafe(normalized); + return normalized; +} + +export function buildArtifactBundleV1(input: BuildArtifactBundleV1Input): ArtifactBundleV1 { + const bundleWithoutHash = buildCanonicalBundle(input); + const bundleHash = sha256Hex(stableStringify(canonicalizeBundleForHash(bundleWithoutHash))); + const bundle: ArtifactBundleV1 = { + ...bundleWithoutHash, + bundleHash, + }; + assertJsonSafe(bundle); + return bundle; +} + +export function recomputeArtifactBundleV1Hash(bundle: ArtifactBundleV1): string { + const normalized = normalizeArtifactBundleV1(bundle); + return sha256Hex(stableStringify(canonicalizeBundleForHash(normalized))); +} + +export function verifyArtifactBundleV1(bundle: ArtifactBundleV1): { ok: true; recomputedHash: string; matches: boolean } { + const normalized = normalizeArtifactBundleV1(bundle); + const recomputedHash = sha256Hex(stableStringify(canonicalizeBundleForHash(normalized))); + return { + ok: true, + recomputedHash, + matches: recomputedHash === normalized.bundleHash, + }; +} + +export function verifyArtifactBundleV1OrThrow(bundle: ArtifactBundleV1): void { + const verification = verifyArtifactBundleV1(bundle); + if (!verification.matches) { + throw makeBundleError('E_BUNDLE_HASH_MISMATCH', 'Artifact bundle hash mismatch'); + } +} diff --git a/server/src/services/artifact-bundle.service.ts b/server/src/services/artifact-bundle.service.ts new file mode 100644 index 0000000..25db1fd --- /dev/null +++ b/server/src/services/artifact-bundle.service.ts @@ -0,0 +1,90 @@ +import { + buildArtifactBundleV1, + verifyArtifactBundleV1, + type ArtifactBundleV1, + type BuildArtifactBundleV1Input, + type ClosureContractRefV1, +} from './artifact-bundle-v1'; +import type { TransferPackageV1 } from './transfer-package-v1'; +import type { LineageBindingV1 } from './lineage-binding-v1'; +import type { HandoffRecordV1 } from './handoff-record-v1'; + +export type BuildArtifactBundleV1FromTransferFlowInput = { + transferPackageV1: TransferPackageV1; + lineageBindingV1: LineageBindingV1; + handoffRecordV1: HandoffRecordV1; + closureContractV1?: ClosureContractRefV1; + identity?: { + revisionId?: string | null; + revisionHash?: string | null; + }; + createdAt?: string | null; + notes?: string[]; +}; + +export function buildArtifactBundleV1FromTransferFlow( + input: BuildArtifactBundleV1FromTransferFlowInput +): ArtifactBundleV1 { + return buildArtifactBundleV1({ + identity: { + packageId: input.transferPackageV1.identity.packageId, + revisionId: + typeof input.identity?.revisionId === 'string' + ? input.identity.revisionId + : input.identity?.revisionId === null + ? null + : input.transferPackageV1.identity.revisionId, + revisionHash: + typeof input.identity?.revisionHash === 'string' + ? input.identity.revisionHash + : input.identity?.revisionHash === null + ? null + : input.transferPackageV1.identity.revisionHash, + }, + artifacts: { + transferPackageV1: input.transferPackageV1, + lineageBindingV1: input.lineageBindingV1, + handoffRecordV1: input.handoffRecordV1, + closureContractV1: input.closureContractV1 ?? null, + }, + diagnostics: { + notes: input.notes ?? [], + }, + createdAt: typeof input.createdAt === 'string' ? input.createdAt : input.createdAt === null ? null : null, + }); +} + +export function verifyArtifactBundleV1ForApi( + artifactBundleV1: ArtifactBundleV1 +): { ok: true; recomputedHash: string; matches: boolean } { + return verifyArtifactBundleV1(artifactBundleV1); +} + +export class ArtifactBundleService { + buildArtifactBundleV1( + input: Omit & { + artifacts: { + transferPackageV1: unknown; + lineageBindingV1: unknown; + handoffRecordV1: unknown; + closureContractV1?: unknown; + }; + } + ): ArtifactBundleV1 { + return buildArtifactBundleV1({ + identity: input.identity, + artifacts: { + transferPackageV1: input.artifacts.transferPackageV1 as TransferPackageV1, + lineageBindingV1: input.artifacts.lineageBindingV1 as LineageBindingV1, + handoffRecordV1: input.artifacts.handoffRecordV1 as HandoffRecordV1, + closureContractV1: (input.artifacts.closureContractV1 as ClosureContractRefV1 | undefined) ?? null, + }, + diagnostics: input.diagnostics, + createdAt: input.createdAt, + }); + } + + verifyArtifactBundleV1(input: { artifactBundleV1: unknown }): { ok: true; recomputedHash: string; matches: boolean } { + return verifyArtifactBundleV1ForApi(input.artifactBundleV1 as ArtifactBundleV1); + } +} diff --git a/server/src/services/artifact-store-v1.ts b/server/src/services/artifact-store-v1.ts new file mode 100644 index 0000000..cff8f4c --- /dev/null +++ b/server/src/services/artifact-store-v1.ts @@ -0,0 +1,268 @@ +import { createHash } from 'crypto'; +import type { ArtifactBundleV1 } from './artifact-bundle-v1'; + +export type ArtifactStoreRecordV1 = { + schema: 'artifact-store-record-1'; + identity: { + packageId: string; + revisionId: string | null; + revisionHash: string | null; + }; + bundleHash: string; + artifactBundleV1: ArtifactBundleV1; + createdAt: string | null; + diagnostics: { + notes: string[]; + }; + storeHash: string; +}; + +export type BuildArtifactStoreRecordV1Input = { + identity: { + packageId: string; + revisionId?: string | null; + revisionHash?: string | null; + }; + bundleHash?: string | null; + artifactBundleV1: unknown; + createdAt?: string | null; + diagnostics?: { + notes?: string[]; + }; +}; + +type StoreHashPayload = Omit; + +function makeStoreError( + code: 'E_STORE_INVALID' | 'E_STORE_NON_JSON_SAFE' | 'E_STORE_HASH_MISMATCH', + message: string +): Error & { code: string } { + const error = new Error(message) as Error & { code: string }; + error.code = code; + return error; +} + +export function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function normalizeNullableString(value: unknown): string | null { + if (typeof value === 'string') return value; + if (value === null || value === undefined) return null; + throw makeStoreError('E_STORE_INVALID', 'Artifact store record input is invalid'); +} + +function normalizeRequiredString(value: unknown): string { + if (typeof value === 'string') return value; + throw makeStoreError('E_STORE_INVALID', 'Artifact store record input is invalid'); +} + +function normalizeStringArray(value: unknown): string[] { + if (!Array.isArray(value)) return []; + const normalized: string[] = []; + for (const entry of value) { + if (typeof entry !== 'string') { + throw makeStoreError('E_STORE_INVALID', 'Artifact store record input is invalid'); + } + normalized.push(entry); + } + return [...normalized].sort(compareStrings); +} + +export function assertJsonSafe(value: unknown): void { + if (value === null) return; + + const valueType = typeof value; + if (valueType === 'string' || valueType === 'boolean') return; + if (valueType === 'number') { + if (!Number.isFinite(value)) { + throw makeStoreError('E_STORE_NON_JSON_SAFE', 'Artifact store record contains non JSON-safe value'); + } + return; + } + if (valueType === 'undefined' || valueType === 'function' || valueType === 'symbol' || valueType === 'bigint') { + throw makeStoreError('E_STORE_NON_JSON_SAFE', 'Artifact store record contains non JSON-safe value'); + } + + if (Array.isArray(value)) { + for (const entry of value) { + assertJsonSafe(entry); + } + return; + } + + if (!isPlainObject(value)) { + throw makeStoreError('E_STORE_NON_JSON_SAFE', 'Artifact store record contains non JSON-safe value'); + } + + const keys = Object.keys(value).sort(compareStrings); + for (const key of keys) { + assertJsonSafe(value[key]); + } +} + +export function stableStringify(value: unknown): string { + assertJsonSafe(value); + + if (value === null || typeof value === 'string' || typeof value === 'boolean' || typeof value === 'number') { + return JSON.stringify(value); + } + + if (Array.isArray(value)) { + return `[${value.map((entry) => stableStringify(entry)).join(',')}]`; + } + + const record = value as Record; + const keys = Object.keys(record).sort(compareStrings); + return `{${keys.map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(',')}}`; +} + +export function sha256Hex(text: string): string { + return createHash('sha256').update(text, 'utf8').digest('hex'); +} + +function normalizeArtifactBundleV1(value: unknown): ArtifactBundleV1 { + if (!isPlainObject(value) || value.schema !== 'artifact-bundle-1' || typeof value.bundleHash !== 'string') { + throw makeStoreError('E_STORE_INVALID', 'Artifact store record input is invalid'); + } + if (!isPlainObject(value.identity) || typeof value.identity.packageId !== 'string') { + throw makeStoreError('E_STORE_INVALID', 'Artifact store record input is invalid'); + } + assertJsonSafe(value); + return value as ArtifactBundleV1; +} + +function cloneArtifactBundleForStore(bundle: ArtifactBundleV1): ArtifactBundleV1 { + const clone = JSON.parse(JSON.stringify(bundle)) as ArtifactBundleV1; + clone.createdAt = null; + if (clone.artifacts && clone.artifacts.lineageBindingV1) { + clone.artifacts.lineageBindingV1.createdAt = null; + } + if (clone.artifacts && clone.artifacts.handoffRecordV1) { + clone.artifacts.handoffRecordV1.createdAt = null; + if (clone.artifacts.handoffRecordV1.lineageBindingV1) { + clone.artifacts.handoffRecordV1.lineageBindingV1.createdAt = null; + } + } + return clone; +} + +function buildCanonicalRecord(input: BuildArtifactStoreRecordV1Input): Omit { + if (!isPlainObject(input) || !isPlainObject(input.identity)) { + throw makeStoreError('E_STORE_INVALID', 'Artifact store record input is invalid'); + } + + assertJsonSafe(input); + + if (typeof input.identity.packageId !== 'string') { + throw makeStoreError('E_STORE_INVALID', 'Artifact store record input is invalid'); + } + + const artifactBundleV1 = normalizeArtifactBundleV1(input.artifactBundleV1); + const bundleHash = typeof input.bundleHash === 'string' ? input.bundleHash : artifactBundleV1.bundleHash; + + return { + schema: 'artifact-store-record-1', + identity: { + packageId: input.identity.packageId, + revisionId: normalizeNullableString(input.identity.revisionId), + revisionHash: normalizeNullableString(input.identity.revisionHash), + }, + bundleHash, + artifactBundleV1, + createdAt: normalizeNullableString(input.createdAt), + diagnostics: { + notes: normalizeStringArray(input.diagnostics?.notes), + }, + }; +} + +function canonicalizeRecordForHash(record: Omit | ArtifactStoreRecordV1): StoreHashPayload { + return { + schema: 'artifact-store-record-1', + identity: { + packageId: record.identity.packageId, + revisionId: record.identity.revisionId, + revisionHash: record.identity.revisionHash, + }, + bundleHash: record.bundleHash, + artifactBundleV1: cloneArtifactBundleForStore(record.artifactBundleV1), + diagnostics: { + notes: [...record.diagnostics.notes], + }, + }; +} + +function normalizeStoreRecord(record: unknown): ArtifactStoreRecordV1 { + if (!isPlainObject(record) || record.schema !== 'artifact-store-record-1' || typeof record.storeHash !== 'string') { + throw makeStoreError('E_STORE_INVALID', 'Artifact store record input is invalid'); + } + if (!isPlainObject(record.identity) || typeof record.identity.packageId !== 'string') { + throw makeStoreError('E_STORE_INVALID', 'Artifact store record input is invalid'); + } + + const normalized: ArtifactStoreRecordV1 = { + schema: 'artifact-store-record-1', + identity: { + packageId: record.identity.packageId, + revisionId: normalizeNullableString(record.identity.revisionId), + revisionHash: normalizeNullableString(record.identity.revisionHash), + }, + bundleHash: normalizeRequiredString(record.bundleHash), + artifactBundleV1: normalizeArtifactBundleV1(record.artifactBundleV1), + createdAt: normalizeNullableString(record.createdAt), + diagnostics: { + notes: isPlainObject(record.diagnostics) ? normalizeStringArray(record.diagnostics.notes) : [], + }, + storeHash: record.storeHash, + }; + + assertJsonSafe(normalized); + return normalized; +} + +export function buildArtifactStoreRecordV1(input: BuildArtifactStoreRecordV1Input): ArtifactStoreRecordV1 { + const recordWithoutHash = buildCanonicalRecord(input); + const storeHash = sha256Hex(stableStringify(canonicalizeRecordForHash(recordWithoutHash))); + const record: ArtifactStoreRecordV1 = { + ...recordWithoutHash, + storeHash, + }; + assertJsonSafe(record); + return record; +} + +export function recomputeArtifactStoreRecordV1Hash(record: ArtifactStoreRecordV1): string { + const normalized = normalizeStoreRecord(record); + return sha256Hex(stableStringify(canonicalizeRecordForHash(normalized))); +} + +export function verifyArtifactStoreRecordV1( + record: ArtifactStoreRecordV1 +): { ok: true; recomputedHash: string; matches: boolean } { + const normalized = normalizeStoreRecord(record); + const recomputedHash = sha256Hex(stableStringify(canonicalizeRecordForHash(normalized))); + return { + ok: true, + recomputedHash, + matches: recomputedHash === normalized.storeHash, + }; +} + +export function verifyArtifactStoreRecordV1OrThrow(record: ArtifactStoreRecordV1): void { + const verification = verifyArtifactStoreRecordV1(record); + if (!verification.matches) { + throw makeStoreError('E_STORE_HASH_MISMATCH', 'Artifact store record hash mismatch'); + } +} + + + diff --git a/server/src/services/artifact-store.service.ts b/server/src/services/artifact-store.service.ts new file mode 100644 index 0000000..a2db3ba --- /dev/null +++ b/server/src/services/artifact-store.service.ts @@ -0,0 +1,421 @@ +import { Prisma } from '@prisma/client'; +import { canonicalizeJson, computeBundleHash } from '../lib/artifact-hash'; +import { IdentityGuardError, assertHashMatch } from '../lib/identity-guards'; +import { JsonSanitizeError, sanitizeJsonPayload } from '../lib/json-sanitize'; +import { prisma } from '../utils'; +import { + buildArtifactStoreRecordV1, + verifyArtifactStoreRecordV1, + type ArtifactStoreRecordV1, + type BuildArtifactStoreRecordV1Input, +} from './artifact-store-v1'; + +const BUNDLE_HASH_PATTERN = /^[0-9a-f]{64}$/; +const ARTIFACT_VALIDATION_MESSAGE = 'Artifact input is invalid'; +const ARTIFACT_CONFLICT_MESSAGE = 'Artifact already exists with different payload'; +const ARTIFACT_PAYLOAD_INVALID_MESSAGE = 'payload contains non-JSON-safe value'; +const ARTIFACT_HASH_MISMATCH_MESSAGE = 'Artifact hash mismatch'; + +export class ArtifactValidationError extends Error { + code = 'E_ARTIFACT_VALIDATION'; + + constructor(message = ARTIFACT_VALIDATION_MESSAGE) { + super(message); + this.name = 'ArtifactValidationError'; + } +} + +export class ArtifactConflictError extends Error { + code = 'E_ARTIFACT_CONFLICT'; + + constructor(message = ARTIFACT_CONFLICT_MESSAGE) { + super(message); + this.name = 'ArtifactConflictError'; + } +} + +export class ArtifactHashMismatchError extends Error { + code = 'ERR_ARTIFACT_HASH_MISMATCH'; + + constructor(message = ARTIFACT_HASH_MISMATCH_MESSAGE) { + super(message); + this.name = 'ArtifactHashMismatchError'; + } +} + +export type ArtifactStoreIdentity = { + packageId: string; + revisionId: string | null; + revisionHash: string | null; +}; + +export type StoreArtifactBundleInput = { + schema: string; + identity: { + packageId: string; + revisionId?: string | null; + revisionHash?: string | null; + }; + payload: unknown; + bundleHash?: string; + createdAt?: string | null; +}; + +export type ArtifactStoreRecordDTO = { + id: string; + schema: string; + identity: ArtifactStoreIdentity; + bundleHash: string; + payload: unknown; + createdAt: string; +}; + +export type LoadArtifactBundleQuery = { + packageId: string; + bundleHash: string; +}; + +export type VerifyArtifactBundleInput = { + schema: string; + identity: { + packageId: string; + revisionId?: string | null; + revisionHash?: string | null; + }; + payload: unknown; + bundleHash: string; +}; + +export type SaveBundleV1Input = { + artifactBundleV1: unknown; + createdAt?: string | null; + notes?: string[]; +}; + +export type GetBundleV1Query = { + packageId: string; + bundleHash: string; +}; + +type ArtifactStorePrismaRecord = { + id: string; + schema: string; + packageId: string; + revisionId: string | null; + revisionHash: string | null; + bundleHash: string; + payload: Prisma.JsonValue; + createdAt: Date; +}; + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function normalizeRequiredString(value: unknown): string { + if (typeof value !== 'string' || value.length === 0) { + throw new ArtifactValidationError(); + } + return value; +} + +function normalizeNullableString(value: unknown): string | null { + if (typeof value === 'string') return value; + if (value === null || value === undefined) return null; + throw new ArtifactValidationError(); +} + +function normalizeIdentity(identity: StoreArtifactBundleInput['identity']): ArtifactStoreIdentity { + if (!identity || typeof identity !== 'object') { + throw new ArtifactValidationError(); + } + + return { + packageId: normalizeRequiredString(identity.packageId), + revisionId: normalizeNullableString(identity.revisionId), + revisionHash: normalizeNullableString(identity.revisionHash), + }; +} + +function normalizeBundleHash(bundleHash: string): string { + const normalized = bundleHash.toLowerCase(); + if (!BUNDLE_HASH_PATTERN.test(normalized)) { + throw new ArtifactValidationError(); + } + return normalized; +} + +function normalizeCreatedAtForWrite(createdAt: string | null | undefined): Date | undefined { + if (createdAt === null || createdAt === undefined) { + return undefined; + } + const parsed = new Date(createdAt); + if (Number.isNaN(parsed.getTime())) { + throw new ArtifactValidationError(); + } + return parsed; +} + +function sanitizePayloadOrThrow(payload: unknown): unknown { + try { + return sanitizeJsonPayload(payload); + } catch (error) { + if (error instanceof JsonSanitizeError) { + throw new ArtifactValidationError(ARTIFACT_PAYLOAD_INVALID_MESSAGE); + } + throw new ArtifactValidationError(ARTIFACT_PAYLOAD_INVALID_MESSAGE); + } +} + +function toPrismaJsonValue(payload: unknown): { sanitizedPayload: unknown; canonical: string; jsonValue: Prisma.InputJsonValue } { + const sanitizedPayload = sanitizePayloadOrThrow(payload); + const canonical = canonicalizeJson(sanitizedPayload); + const normalizedPayload = JSON.parse(canonical) as Prisma.InputJsonValue; + return { + sanitizedPayload, + canonical, + jsonValue: normalizedPayload, + }; +} + +function toDto(record: ArtifactStorePrismaRecord): ArtifactStoreRecordDTO { + return { + id: record.id, + schema: record.schema, + identity: { + packageId: record.packageId, + revisionId: record.revisionId, + revisionHash: record.revisionHash, + }, + bundleHash: record.bundleHash, + payload: record.payload, + createdAt: record.createdAt.toISOString(), + }; +} + +function extractArtifactBundleIdentity(value: unknown): ArtifactStoreIdentity { + if (!isPlainObject(value) || !isPlainObject(value.identity)) { + throw new ArtifactValidationError(); + } + + return { + packageId: normalizeRequiredString(value.identity.packageId), + revisionId: normalizeNullableString(value.identity.revisionId), + revisionHash: normalizeNullableString(value.identity.revisionHash), + }; +} + +function extractArtifactBundleSchema(value: unknown): string { + if (!isPlainObject(value)) { + throw new ArtifactValidationError(); + } + + return normalizeRequiredString(value.schema); +} + +function extractArtifactBundleHash(value: unknown): string | undefined { + if (!isPlainObject(value) || typeof value.bundleHash !== 'string') { + return undefined; + } + return normalizeBundleHash(value.bundleHash); +} + +function extractNotes(notes: string[] | undefined): string[] { + if (!Array.isArray(notes)) return []; + return notes.filter((note) => typeof note === 'string'); +} + +export function recomputeBundleHashFromRecord(record: { + schema: string; + packageId: string; + revisionId?: string | null; + revisionHash?: string | null; + payload: unknown; +}): string { + const sanitizedPayload = sanitizePayloadOrThrow(record.payload); + return computeBundleHash({ + schema: record.schema, + packageId: record.packageId, + revisionId: record.revisionId ?? null, + revisionHash: record.revisionHash ?? null, + payload: sanitizedPayload, + }); +} + +export class ArtifactStoreService { + async storeArtifactBundle(input: StoreArtifactBundleInput): Promise { + const schema = normalizeRequiredString(input.schema); + const identity = normalizeIdentity(input.identity); + const payload = toPrismaJsonValue(input.payload); + const computedBundleHash = computeBundleHash({ + schema, + packageId: identity.packageId, + revisionId: identity.revisionId, + revisionHash: identity.revisionHash, + payload: payload.sanitizedPayload, + }); + let bundleHash = computedBundleHash; + if (input.bundleHash) { + try { + bundleHash = assertHashMatch(normalizeBundleHash(input.bundleHash), computedBundleHash); + } catch (error) { + if (error instanceof IdentityGuardError && error.code === 'ERR_ARTIFACT_HASH_MISMATCH') { + throw new ArtifactHashMismatchError(); + } + throw error; + } + } + + const where = { + packageId_bundleHash: { + packageId: identity.packageId, + bundleHash, + }, + }; + + const existing = await prisma.artifactStoreRecord.findUnique({ where }); + if (existing) { + const existingCanonical = canonicalizeJson(existing.payload); + if (existingCanonical !== payload.canonical) { + throw new ArtifactConflictError(); + } + return toDto(existing); + } + + const createdAt = normalizeCreatedAtForWrite(input.createdAt); + const persisted = await prisma.artifactStoreRecord.upsert({ + where, + create: { + schema, + packageId: identity.packageId, + revisionId: identity.revisionId, + revisionHash: identity.revisionHash, + bundleHash, + payload: payload.jsonValue, + ...(createdAt ? { createdAt } : {}), + }, + update: {}, + }); + + const persistedCanonical = canonicalizeJson(persisted.payload); + if (persistedCanonical !== payload.canonical) { + throw new ArtifactConflictError(); + } + + return toDto(persisted); + } + + async loadArtifactBundle(where: LoadArtifactBundleQuery): Promise { + const packageId = normalizeRequiredString(where.packageId); + const bundleHash = normalizeBundleHash(where.bundleHash); + + const found = await prisma.artifactStoreRecord.findUnique({ + where: { + packageId_bundleHash: { + packageId, + bundleHash, + }, + }, + }); + + return found ? toDto(found) : null; + } + + verifyArtifactBundle(input: VerifyArtifactBundleInput): { ok: true } | { ok: false; reason: string } { + try { + const schema = normalizeRequiredString(input.schema); + const identity = normalizeIdentity(input.identity); + const normalizedHash = normalizeBundleHash(input.bundleHash); + const sanitizedPayload = sanitizePayloadOrThrow(input.payload); + const recomputedHash = computeBundleHash({ + schema, + packageId: identity.packageId, + revisionId: identity.revisionId, + revisionHash: identity.revisionHash, + payload: sanitizedPayload, + }); + + if (recomputedHash !== normalizedHash) { + return { ok: false, reason: 'bundle_hash_mismatch' }; + } + + return { ok: true }; + } catch (error) { + if (error instanceof ArtifactValidationError) { + return { ok: false, reason: 'invalid_artifact_input' }; + } + return { ok: false, reason: 'invalid_artifact_input' }; + } + } + + async saveBundleV1(input: SaveBundleV1Input): Promise<{ artifactStoreRecordV1: ArtifactStoreRecordV1 }> { + const identity = extractArtifactBundleIdentity(input.artifactBundleV1); + const schema = extractArtifactBundleSchema(input.artifactBundleV1); + const bundleHash = extractArtifactBundleHash(input.artifactBundleV1); + + const stored = await this.storeArtifactBundle({ + schema, + identity, + payload: input.artifactBundleV1, + bundleHash, + createdAt: input.createdAt ?? null, + }); + + const artifactStoreRecordV1 = buildArtifactStoreRecordV1({ + identity: { + packageId: stored.identity.packageId, + revisionId: stored.identity.revisionId, + revisionHash: stored.identity.revisionHash, + }, + bundleHash: stored.bundleHash, + artifactBundleV1: stored.payload, + createdAt: stored.createdAt, + diagnostics: { + notes: extractNotes(input.notes), + }, + }); + + return { artifactStoreRecordV1 }; + } + + async getBundleV1(query: GetBundleV1Query): Promise<{ artifactStoreRecordV1: ArtifactStoreRecordV1 | null }> { + const loaded = await this.loadArtifactBundle(query); + if (!loaded) { + return { artifactStoreRecordV1: null }; + } + + const artifactStoreRecordV1 = buildArtifactStoreRecordV1({ + identity: { + packageId: loaded.identity.packageId, + revisionId: loaded.identity.revisionId, + revisionHash: loaded.identity.revisionHash, + }, + bundleHash: loaded.bundleHash, + artifactBundleV1: loaded.payload, + createdAt: loaded.createdAt, + diagnostics: { + notes: [], + }, + }); + + return { artifactStoreRecordV1 }; + } + + async verifyStoredBundleV1( + query: GetBundleV1Query + ): Promise<{ ok: true; recomputedHash: string; matches: boolean } | null> { + const found = await this.getBundleV1(query); + if (!found.artifactStoreRecordV1) { + return null; + } + return verifyArtifactStoreRecordV1(found.artifactStoreRecordV1); + } +} + +export function buildArtifactStoreRecordV1ForPersistence( + input: BuildArtifactStoreRecordV1Input +): ArtifactStoreRecordV1 { + return buildArtifactStoreRecordV1(input); +} diff --git a/server/src/services/closure-contract-v1.ts b/server/src/services/closure-contract-v1.ts new file mode 100644 index 0000000..7111d34 --- /dev/null +++ b/server/src/services/closure-contract-v1.ts @@ -0,0 +1,302 @@ +import { createHash } from 'crypto'; +import type { SemanticDelta } from '../algebra/semanticDiff/types'; + +const DOMAIN_ORDER = ['facts', 'decisions', 'constraints', 'risks', 'assumptions'] as const; + +type DomainNameLike = (typeof DOMAIN_ORDER)[number] | string; + +export type ClosureActionType = + | 'ADD_MISSING_DEP' + | 'REQUEST_HUMAN_CONFIRM' + | 'PROMOTE_TO_L3_REVIEW' + | 'SPLIT_PATCH' + | 'RETRY_WITH_CONTEXT'; + +export type ClosureSuggestionV1 = { + schema: 'closure-suggestion-1'; + code: string; + message: string; + actionType: ClosureActionType; + payload: unknown; + riskLevel?: 'L0' | 'L1' | 'L2' | 'L3'; +}; + +export type RejectedPatchSummaryV1 = { + domain: string; + key: string; + path: string | null; + op: string; +}; + +export type ClosureRejectedV1 = { + reasonCode: string; + reasonMessage: string; + riskLevel: 'L0' | 'L1' | 'L2' | 'L3'; + blockedBy: Array<{ domain: string; key: string; path: string | null }>; + patch: RejectedPatchSummaryV1; +}; + +export type AcceptedDeltaSummaryV1 = { + acceptedCount: number; + rejectedCount: number; + acceptedHash: string; + proposedHash: string; +}; + +export type ClosureDiagnosticsV1 = { + closureViolationFlag: boolean; + maxClosureSizeRatio?: number; + blockedByRate?: number; + rejectedCount?: number; + suggestionCoverageRate?: number; + suggestionActionabilityRate?: number; + l3EscalationRate?: number; +}; + +export type ClosureContractV1 = { + schema: 'closure-contract-1'; + accepted: AcceptedDeltaSummaryV1; + rejected: ClosureRejectedV1[]; + suggestions: ClosureSuggestionV1[]; + diagnostics: ClosureDiagnosticsV1; +}; + +type BuildRejectedInput = { + domain: DomainNameLike; + key?: string | null; + path?: string | null; + op: string; + reasonCode: string; + reasonMessage: string; + riskLevel: 'L0' | 'L1' | 'L2' | 'L3'; + blockedBy?: Array<{ domain: DomainNameLike; key?: string | null; path?: string | null }> | null; +}; + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function compareDomains(a: string, b: string): number { + const leftIndex = DOMAIN_ORDER.indexOf(a as (typeof DOMAIN_ORDER)[number]); + const rightIndex = DOMAIN_ORDER.indexOf(b as (typeof DOMAIN_ORDER)[number]); + const leftRank = leftIndex >= 0 ? leftIndex : DOMAIN_ORDER.length; + const rightRank = rightIndex >= 0 ? rightIndex : DOMAIN_ORDER.length; + if (leftRank < rightRank) return -1; + if (leftRank > rightRank) return 1; + return compareStrings(a, b); +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +export function assertJsonSafe(value: unknown): void { + if (value === null) return; + + const valueType = typeof value; + if (valueType === 'string' || valueType === 'boolean') return; + if (valueType === 'number') { + if (!Number.isFinite(value)) { + throw new Error('Contract contains non JSON-safe value'); + } + return; + } + if (valueType === 'undefined' || valueType === 'function' || valueType === 'symbol' || valueType === 'bigint') { + throw new Error('Contract contains non JSON-safe value'); + } + + if (Array.isArray(value)) { + for (const entry of value) { + assertJsonSafe(entry); + } + return; + } + + if (!isPlainObject(value)) { + throw new Error('Contract contains non JSON-safe value'); + } + + const keys = Object.keys(value).sort(compareStrings); + for (const key of keys) { + assertJsonSafe(value[key]); + } +} + +export function stableStringify(value: unknown): string { + assertJsonSafe(value); + + if (value === null || typeof value === 'string' || typeof value === 'boolean' || typeof value === 'number') { + return JSON.stringify(value); + } + + if (Array.isArray(value)) { + return `[${value.map((entry) => stableStringify(entry)).join(',')}]`; + } + + const record = value as Record; + const keys = Object.keys(record).sort(compareStrings); + return `{${keys.map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(',')}}`; +} + +export function sha256Hex(str: string): string { + return createHash('sha256').update(str, 'utf8').digest('hex'); +} + +function normalizeBlockedBy(blockedBy: BuildRejectedInput['blockedBy']): Array<{ domain: string; key: string; path: string | null }> { + return [...(blockedBy ?? [])] + .map((entry) => ({ + domain: entry.domain, + key: entry.key ?? 'NULL', + path: entry.path ?? null, + })) + .sort((left, right) => { + return ( + compareDomains(left.domain, right.domain) || + compareStrings(left.key, right.key) || + compareStrings(left.path ?? 'NULL', right.path ?? 'NULL') + ); + }); +} + +function normalizeRejected(rejected: BuildRejectedInput[]): ClosureRejectedV1[] { + return [...rejected] + .map((entry) => ({ + reasonCode: entry.reasonCode, + reasonMessage: entry.reasonMessage, + riskLevel: entry.riskLevel, + blockedBy: normalizeBlockedBy(entry.blockedBy), + patch: { + domain: entry.domain, + key: entry.key ?? 'NULL', + path: entry.path ?? null, + op: entry.op, + }, + })) + .sort((left, right) => { + return ( + compareDomains(left.patch.domain, right.patch.domain) || + compareStrings(left.patch.key, right.patch.key) || + compareStrings(left.patch.path ?? 'NULL', right.patch.path ?? 'NULL') || + compareStrings(left.patch.op, right.patch.op) || + compareStrings(left.reasonCode, right.reasonCode) || + compareStrings(left.riskLevel, right.riskLevel) + ); + }); +} + +function normalizeSuggestions(suggestions: ClosureSuggestionV1[]): ClosureSuggestionV1[] { + return [...suggestions] + .map((entry): ClosureSuggestionV1 => ({ + schema: 'closure-suggestion-1', + code: entry.code, + message: entry.message, + actionType: entry.actionType, + payload: entry.payload, + ...(entry.riskLevel ? { riskLevel: entry.riskLevel } : {}), + })) + .sort((left, right) => { + return ( + compareStrings(left.actionType, right.actionType) || + compareStrings(left.code, right.code) || + compareStrings(stableStringify(left.payload), stableStringify(right.payload)) || + compareStrings(left.message, right.message) || + compareStrings(left.riskLevel ?? '', right.riskLevel ?? '') + ); + }); +} + +function countDeltaEntries(delta: SemanticDelta): number { + let total = 0; + for (const domain of DOMAIN_ORDER) { + const domainDelta = delta[domain]; + total += domainDelta.added.length; + total += domainDelta.removed.length; + total += domainDelta.modified.length; + } + return total; +} + +function rejectedCoverageKey(entry: ClosureRejectedV1): string { + return `${entry.patch.domain}|${entry.patch.key}|${entry.patch.path ?? 'NULL'}|${entry.patch.op}`; +} + +function suggestionCoverageKey(entry: ClosureSuggestionV1): string { + const payload = isPlainObject(entry.payload) ? entry.payload : {}; + const appliesTo = isPlainObject(payload.appliesTo) ? payload.appliesTo : {}; + const domain = typeof appliesTo.domain === 'string' ? appliesTo.domain : 'NULL'; + const key = typeof appliesTo.key === 'string' ? appliesTo.key : 'NULL'; + const pathValue = typeof appliesTo.path === 'string' ? appliesTo.path : appliesTo.path === null ? 'NULL' : 'NULL'; + const op = typeof appliesTo.op === 'string' ? appliesTo.op : 'NULL'; + return `${domain}|${key}|${pathValue}|${op}`; +} + +export function buildClosureContractV1(args: { + proposedDelta: SemanticDelta; + acceptedDelta: SemanticDelta; + rejected: BuildRejectedInput[]; + suggestions: ClosureSuggestionV1[]; + diagnostics: { + closureViolationFlag: boolean; + maxClosureSizeRatio?: number; + blockedByRate?: number; + rejectedCount?: number; + }; +}): ClosureContractV1 { + const rejected = normalizeRejected(args.rejected); + const suggestions = normalizeSuggestions(args.suggestions); + const rejectedKeys = new Set(rejected.map(rejectedCoverageKey)); + const coveredKeys = new Set(); + const actionableKeys = new Set(); + let l3RejectedCount = 0; + let l3SuggestionCount = 0; + + for (const entry of rejected) { + if (entry.riskLevel === 'L3') { + l3RejectedCount += 1; + } + } + + for (const suggestion of suggestions) { + const key = suggestionCoverageKey(suggestion); + if (!rejectedKeys.has(key)) { + continue; + } + + coveredKeys.add(key); + if (suggestion.actionType !== 'REQUEST_HUMAN_CONFIRM') { + actionableKeys.add(key); + } + if (suggestion.actionType === 'PROMOTE_TO_L3_REVIEW') { + l3SuggestionCount += 1; + } + } + + const rejectedCount = rejected.length; + const contract: ClosureContractV1 = { + schema: 'closure-contract-1', + accepted: { + acceptedCount: countDeltaEntries(args.acceptedDelta), + rejectedCount, + acceptedHash: sha256Hex(stableStringify(args.acceptedDelta)), + proposedHash: sha256Hex(stableStringify(args.proposedDelta)), + }, + rejected, + suggestions, + diagnostics: { + closureViolationFlag: args.diagnostics.closureViolationFlag, + ...(args.diagnostics.maxClosureSizeRatio !== undefined ? { maxClosureSizeRatio: args.diagnostics.maxClosureSizeRatio } : {}), + ...(args.diagnostics.blockedByRate !== undefined ? { blockedByRate: args.diagnostics.blockedByRate } : {}), + ...(args.diagnostics.rejectedCount !== undefined ? { rejectedCount: args.diagnostics.rejectedCount } : {}), + suggestionCoverageRate: rejectedCount === 0 ? 1 : coveredKeys.size / rejectedCount, + suggestionActionabilityRate: rejectedCount === 0 ? 1 : actionableKeys.size / rejectedCount, + l3EscalationRate: l3RejectedCount === 0 ? 0 : l3SuggestionCount / l3RejectedCount, + }, + }; + + assertJsonSafe(contract); + return contract; +} diff --git a/server/src/services/delta-closure-planner.ts b/server/src/services/delta-closure-planner.ts new file mode 100644 index 0000000..91953f7 --- /dev/null +++ b/server/src/services/delta-closure-planner.ts @@ -0,0 +1,675 @@ +import { composeDelta } from '../algebra/deltaCompose/composeDelta'; +import { computeUnitKey, stableHash } from '../algebra/semanticDiff/key'; +import type { DomainDelta, DomainName, FieldChange, SemanticDelta } from '../algebra/semanticDiff/types'; +import { applyDelta } from '../algebra/stateTransition/applyDelta'; +import { detectConflicts } from '../algebra/stateTransition/detectConflicts'; +import { + DEFAULT_RISK_POLICY_V1, + normalizeRiskPolicyV1, + type RiskLevel, + type RiskPolicyV1, + type RiskRuleId, +} from './delta-risk-policy'; +import { buildClosureSuggestionsV1, type ClosureSuggestionV1 } from './delta-suggestion-engine'; + +type CandidateOp = 'remove' | 'add' | 'set' | 'append' | 'modify'; +type CandidateKind = 'add' | 'remove' | 'modify'; + +type Candidate = { + id: string; + kind: CandidateKind; + domain: DomainName; + key: string; + path: string | null; + op: CandidateOp; + rawOp?: FieldChange['op']; + delta: SemanticDelta; + dependsOn: string[]; +}; + +type CandidateReference = { + domain: DomainName; + key: string; + path?: string; + op?: CandidateOp; +}; + +type RejectedReasonCode = + | 'CONFLICT' + | 'DEPENDENCY_BLOCKED' + | 'UNSAFE_PATH' + | 'NON_JSON_SAFE' + | 'INVALID_OP' + | 'POST_APPLY_CONFLICT'; + +const DOMAIN_ORDER: DomainName[] = ['facts', 'decisions', 'constraints', 'risks', 'assumptions']; +const DOMAIN_RANK = new Map(DOMAIN_ORDER.map((domain, index) => [domain, index])); +const OP_RANK: Record = { + remove: 0, + add: 1, + set: 2, + append: 3, + modify: 4, +}; +const UNSAFE_PATH_SEGMENTS = new Set(['__proto__', 'prototype', 'constructor']); +const RULE_ORDER: RiskRuleId[] = [ + 'POST_APPLY_CONFLICT', + 'MISSING_KEY', + 'DUP_KEY_ADD', + 'INVARIANT_BREAK', + 'CROSS_DOMAIN_DEP', + 'UNKNOWN', +]; + +const REJECTED_REASON_MESSAGE: Record = { + CONFLICT: 'Rejected: conflict', + DEPENDENCY_BLOCKED: 'Rejected: dependency blocked', + UNSAFE_PATH: 'Rejected: unsafe path', + NON_JSON_SAFE: 'Rejected: non JSON-safe value', + INVALID_OP: 'Rejected: invalid operation', + POST_APPLY_CONFLICT: 'Rejected: post-apply conflict', +}; + +export type ClosureSuggestion = ClosureSuggestionV1; + +export type ClosureRejected = { + domain: DomainName; + key?: string; + path?: string; + op: string; + reasonCode: RejectedReasonCode; + reasonMessage: string; + blockedBy?: Array<{ domain: DomainName; key?: string; path?: string }>; + riskLevel: RiskLevel; +}; + +export type DeltaClosurePlan = { + schema: 'delta-closure-plan-1'; + policy: RiskPolicyV1; + acceptedDelta: SemanticDelta; + rejected: ClosureRejected[]; + suggestions: ClosureSuggestionV1[]; + suggestionDiagnostics: { + suggestionCount: number; + coveredRejectedCount: number; + blockedByCoveredCount: number; + }; + diagnostics: { + candidateCount: number; + acceptedCount: number; + rejectedCount: number; + blockedByRate: number; + maxClosureSizeRatio: number; + closureViolationFlag: boolean; + }; +}; + +export type RejectedPatch = ClosureRejected; + +type LegacyPlannerInput = { + baseState: unknown; + proposedDelta: SemanticDelta; + mode: 'strict' | 'best_effort'; + policy: { requirePostApplyZeroConflicts: true }; +}; + +type LegacyPlannerOutput = { + acceptedDelta: SemanticDelta; + rejected: ClosureRejected[]; + suggestions: ClosureSuggestionV1[]; + suggestionDiagnostics: DeltaClosurePlan['suggestionDiagnostics']; + diagnostics: DeltaClosurePlan['diagnostics']; +}; + +type ExpansionResult = { + candidates: Candidate[]; + immediateRejected: ClosureRejected[]; + candidateCount: number; +}; + +type ConflictOutcome = { + reasonCode: RejectedReasonCode; + ruleId: RiskRuleId; +}; + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function round6(value: number): number { + return Math.round(value * 1000000) / 1000000; +} + +function asArray(value: unknown): unknown[] { + return Array.isArray(value) ? value : []; +} + +function asRecord(value: unknown): Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return {}; + return value as Record; +} + +function isJsonSafe(value: unknown): boolean { + try { + stableHash(value); + return true; + } catch { + return false; + } +} + +function compareCandidateRefs(a: CandidateReference, b: CandidateReference): number { + return ( + (DOMAIN_RANK.get(a.domain) ?? 0) - (DOMAIN_RANK.get(b.domain) ?? 0) || + compareStrings(a.key, b.key) || + compareStrings(a.path ?? '\uffff', b.path ?? '\uffff') || + compareStrings(a.op ?? 'modify', b.op ?? 'modify') + ); +} + +function compareRejected(a: ClosureRejected, b: ClosureRejected): number { + return ( + (DOMAIN_RANK.get(a.domain) ?? 0) - (DOMAIN_RANK.get(b.domain) ?? 0) || + compareStrings(a.key ?? '', b.key ?? '') || + compareStrings(a.path ?? '\uffff', b.path ?? '\uffff') || + compareStrings(a.op, b.op) || + compareStrings(a.reasonCode, b.reasonCode) + ); +} + +function compareCandidates(a: Candidate, b: Candidate): number { + return ( + (DOMAIN_RANK.get(a.domain) ?? 0) - (DOMAIN_RANK.get(b.domain) ?? 0) || + compareStrings(a.key, b.key) || + compareStrings(a.path ?? '\uffff', b.path ?? '\uffff') || + OP_RANK[a.op] - OP_RANK[b.op] || + compareStrings(a.id, b.id) + ); +} + +function deriveKey(domain: DomainName, item: Record): string | null { + if (typeof item.key === 'string' && item.key.length > 0) return item.key; + if (Object.prototype.hasOwnProperty.call(item, 'unit') && isJsonSafe(item.unit)) return computeUnitKey(domain, item.unit); + if (Object.prototype.hasOwnProperty.call(item, 'before') && isJsonSafe(item.before)) return computeUnitKey(domain, item.before); + if (Object.prototype.hasOwnProperty.call(item, 'after') && isJsonSafe(item.after)) return computeUnitKey(domain, item.after); + return null; +} + +function isUnsafePath(path: string): boolean { + if (path.length === 0) return true; + const parts = path.split('.'); + if (parts.some((part) => part.length === 0)) return true; + return parts.some((part) => UNSAFE_PATH_SEGMENTS.has(part)); +} + +function isPathPrefix(parent: string, child: string): boolean { + if (parent === child) return false; + if (child.length <= parent.length) return false; + return child.startsWith(`${parent}.`); +} + +function emptyDomainDelta(): DomainDelta { + return { added: [], removed: [], modified: [] }; +} + +function makeCounts(delta: SemanticDelta): Record { + const counts: Record = {}; + for (const domain of DOMAIN_ORDER) { + counts[`${domain}.added`] = delta[domain].added.length; + counts[`${domain}.removed`] = delta[domain].removed.length; + counts[`${domain}.modified`] = delta[domain].modified.length; + } + counts['collisions.soft'] = delta.meta.collisions.soft.length; + counts['collisions.hard'] = delta.meta.collisions.hard.length; + return counts; +} + +function makeIdentityDelta(proposedDelta: SemanticDelta): SemanticDelta { + return { + schemaVersion: 'sdiff-0.1', + base: { revisionHash: proposedDelta.base.revisionHash }, + target: { revisionHash: proposedDelta.target.revisionHash }, + facts: emptyDomainDelta(), + decisions: emptyDomainDelta(), + constraints: emptyDomainDelta(), + risks: emptyDomainDelta(), + assumptions: emptyDomainDelta(), + meta: { + determinism: { + canonicalVersion: 'tpkg-0.2-canon-v1', + keyStrategy: 'sig-hash-v1', + tieBreakers: ['risk-closure-planner-v1'], + }, + collisions: { + hard: [], + soft: [], + }, + counts: { + 'facts.added': 0, + 'facts.removed': 0, + 'facts.modified': 0, + 'decisions.added': 0, + 'decisions.removed': 0, + 'decisions.modified': 0, + 'constraints.added': 0, + 'constraints.removed': 0, + 'constraints.modified': 0, + 'risks.added': 0, + 'risks.removed': 0, + 'risks.modified': 0, + 'assumptions.added': 0, + 'assumptions.removed': 0, + 'assumptions.modified': 0, + 'collisions.soft': 0, + 'collisions.hard': 0, + }, + }, + }; +} + +function withCounts(delta: SemanticDelta): SemanticDelta { + return { + ...delta, + meta: { + ...delta.meta, + counts: makeCounts(delta), + }, + }; +} + +function makeCandidateDelta(proposedDelta: SemanticDelta): SemanticDelta { + return makeIdentityDelta(proposedDelta); +} + +function normalizeRiskLevel(policy: RiskPolicyV1, ruleId: RiskRuleId): RiskLevel { + const rule = policy.classification.rules.find((entry) => entry.ruleId === ruleId); + return rule ? rule.level : 'L1'; +} + +function toBlockedBy(candidate: Candidate[]): Array<{ domain: DomainName; key?: string; path?: string }> { + return candidate + .map((entry) => { + const item: { domain: DomainName; key?: string; path?: string } = { domain: entry.domain }; + item.key = entry.key; + if (entry.path) item.path = entry.path; + return item; + }) + .sort((left, right) => { + return ( + (DOMAIN_RANK.get(left.domain) ?? 0) - (DOMAIN_RANK.get(right.domain) ?? 0) || + compareStrings(left.key ?? '', right.key ?? '') || + compareStrings(left.path ?? '\uffff', right.path ?? '\uffff') + ); + }); +} + +function makeRejected( + policy: RiskPolicyV1, + candidate: Pick, + reasonCode: RejectedReasonCode, + ruleId: RiskRuleId, + blockedBy?: Candidate[] +): ClosureRejected { + const rejected: ClosureRejected = { + domain: candidate.domain, + key: candidate.key, + op: candidate.op, + reasonCode, + reasonMessage: REJECTED_REASON_MESSAGE[reasonCode], + riskLevel: normalizeRiskLevel(policy, ruleId), + }; + + if (candidate.path) rejected.path = candidate.path; + if (blockedBy && blockedBy.length > 0) { + rejected.blockedBy = toBlockedBy(blockedBy); + } + + return rejected; +} + +function determineConflictOutcome(conflictCodes: string[]): ConflictOutcome { + if (conflictCodes.some((code) => code === 'E_REMOVE_MISSING' || code === 'E_MODIFY_MISSING')) { + return { reasonCode: 'CONFLICT', ruleId: 'MISSING_KEY' }; + } + if (conflictCodes.some((code) => code === 'E_ADD_EXISTS' || code === 'E_MODIFY_KEY_COLLISION')) { + return { reasonCode: 'CONFLICT', ruleId: 'DUP_KEY_ADD' }; + } + return { reasonCode: 'CONFLICT', ruleId: 'UNKNOWN' }; +} + +function uniqueSorted(items: T[], compare: (a: T, b: T) => number, hash: (value: T) => string): T[] { + const seen = new Set(); + const output: T[] = []; + for (const item of [...items].sort(compare)) { + const key = hash(item); + if (seen.has(key)) continue; + seen.add(key); + output.push(item); + } + return output; +} + +function createCandidate( + proposedDelta: SemanticDelta, + domain: DomainName, + key: string, + kind: CandidateKind, + op: CandidateOp, + path: string | null, + mutate: (delta: SemanticDelta) => void, + rawOp?: FieldChange['op'] +): Candidate { + const delta = makeCandidateDelta(proposedDelta); + mutate(delta); + return { + id: '', + kind, + domain, + key, + path, + op, + rawOp, + delta: withCounts(delta), + dependsOn: [], + }; +} + +function expandCandidates(proposedDelta: SemanticDelta, policy: RiskPolicyV1): ExpansionResult { + const candidates: Candidate[] = []; + const immediateRejected: ClosureRejected[] = []; + + for (const domain of DOMAIN_ORDER) { + const domainDelta = proposedDelta[domain]; + + for (const rawItem of domainDelta.added) { + const item = asRecord(rawItem); + const key = deriveKey(domain, item); + if (!key) { + immediateRejected.push(makeRejected(policy, { domain, key: '', path: null, op: 'add' }, 'INVALID_OP', 'UNKNOWN')); + continue; + } + if (!isJsonSafe(item.unit)) { + immediateRejected.push(makeRejected(policy, { domain, key, path: null, op: 'add' }, 'NON_JSON_SAFE', 'UNKNOWN')); + continue; + } + candidates.push( + createCandidate(proposedDelta, domain, key, 'add', 'add', null, (delta) => { + delta[domain].added.push({ key, unit: item.unit }); + }) + ); + } + + for (const rawItem of domainDelta.removed) { + const item = asRecord(rawItem); + const key = deriveKey(domain, item); + if (!key) { + immediateRejected.push(makeRejected(policy, { domain, key: '', path: null, op: 'remove' }, 'INVALID_OP', 'UNKNOWN')); + continue; + } + if (Object.prototype.hasOwnProperty.call(item, 'unit') && !isJsonSafe(item.unit)) { + immediateRejected.push(makeRejected(policy, { domain, key, path: null, op: 'remove' }, 'NON_JSON_SAFE', 'UNKNOWN')); + continue; + } + candidates.push( + createCandidate(proposedDelta, domain, key, 'remove', 'remove', null, (delta) => { + delta[domain].removed.push({ key, unit: Object.prototype.hasOwnProperty.call(item, 'unit') ? item.unit : null }); + }) + ); + } + + for (const rawItem of domainDelta.modified) { + const item = asRecord(rawItem); + const key = deriveKey(domain, item); + if (!key) { + immediateRejected.push(makeRejected(policy, { domain, key: '', path: null, op: 'modify' }, 'INVALID_OP', 'UNKNOWN')); + continue; + } + + const before = Object.prototype.hasOwnProperty.call(item, 'before') ? item.before : item.after; + const after = Object.prototype.hasOwnProperty.call(item, 'after') ? item.after : item.before; + if (!isJsonSafe(before) || !isJsonSafe(after)) { + immediateRejected.push(makeRejected(policy, { domain, key, path: null, op: 'modify' }, 'NON_JSON_SAFE', 'UNKNOWN')); + continue; + } + + const changes = asArray(item.changes); + const splitByField = policy.strict.fieldLevelModify === 'on' && changes.length > 0; + if (!splitByField) { + candidates.push( + createCandidate(proposedDelta, domain, key, 'modify', 'modify', null, (delta) => { + delta[domain].modified.push({ + key, + before, + after, + changes: changes as FieldChange[], + }); + }) + ); + continue; + } + + for (const rawChange of changes) { + const change = asRecord(rawChange); + const path = typeof change.path === 'string' ? change.path : ''; + const rawOp = change.op; + if (rawOp !== 'set' && rawOp !== 'unset' && rawOp !== 'append' && rawOp !== 'remove') { + immediateRejected.push(makeRejected(policy, { domain, key, path, op: 'modify' }, 'INVALID_OP', 'UNKNOWN')); + continue; + } + if (isUnsafePath(path)) { + immediateRejected.push(makeRejected(policy, { domain, key, path, op: 'modify' }, 'UNSAFE_PATH', 'INVARIANT_BREAK')); + continue; + } + if ((rawOp === 'append' || rawOp === 'remove') && !Object.prototype.hasOwnProperty.call(change, 'value')) { + immediateRejected.push(makeRejected(policy, { domain, key, path, op: rawOp }, 'INVALID_OP', 'UNKNOWN')); + continue; + } + if (Object.prototype.hasOwnProperty.call(change, 'value') && !isJsonSafe(change.value)) { + immediateRejected.push(makeRejected(policy, { domain, key, path, op: rawOp === 'append' || rawOp === 'remove' ? rawOp : 'set' }, 'NON_JSON_SAFE', 'UNKNOWN')); + continue; + } + if (Object.prototype.hasOwnProperty.call(change, 'after') && !isJsonSafe(change.after)) { + immediateRejected.push(makeRejected(policy, { domain, key, path, op: 'set' }, 'NON_JSON_SAFE', 'UNKNOWN')); + continue; + } + + const normalizedChange: FieldChange = + rawOp === 'append' || rawOp === 'remove' + ? { path, op: rawOp, value: change.value } + : rawOp === 'set' + ? Object.prototype.hasOwnProperty.call(change, 'after') + ? { path, op: 'set', after: change.after, before: change.before } + : Object.prototype.hasOwnProperty.call(change, 'value') + ? { path, op: 'set', value: change.value, before: change.before } + : { path, op: 'set' } + : Object.prototype.hasOwnProperty.call(change, 'before') + ? { path, op: 'unset', before: change.before } + : { path, op: 'unset' }; + + const op: CandidateOp = + rawOp === 'append' ? 'append' : rawOp === 'remove' ? 'remove' : 'set'; + candidates.push( + createCandidate(proposedDelta, domain, key, 'modify', op, path, (delta) => { + delta[domain].modified.push({ + key, + before, + after, + changes: [normalizedChange], + }); + }, rawOp) + ); + } + } + } + + candidates.sort(compareCandidates); + for (let index = 0; index < candidates.length; index += 1) { + candidates[index].id = `c${String(index + 1).padStart(6, '0')}`; + } + + const byDomainKey = new Map(); + for (const candidate of candidates) { + const bucketKey = `${candidate.domain}|${candidate.key}`; + const bucket = byDomainKey.get(bucketKey) ?? []; + bucket.push(candidate); + byDomainKey.set(bucketKey, bucket); + } + + for (const bucket of byDomainKey.values()) { + bucket.sort(compareCandidates); + for (let index = 1; index < bucket.length; index += 1) { + const current = bucket[index]; + const previous = bucket[index - 1]; + current.dependsOn.push(previous.id); + } + + if (policy.strict.fieldLevelModify === 'on') { + const fieldCandidates = bucket.filter((candidate) => candidate.path !== null); + for (const child of fieldCandidates) { + for (const parent of fieldCandidates) { + if (parent.id === child.id || !parent.path || !child.path) continue; + if (parent.rawOp !== 'set' && parent.rawOp !== 'unset') continue; + if (!isPathPrefix(parent.path, child.path)) continue; + if (!child.dependsOn.includes(parent.id)) child.dependsOn.push(parent.id); + } + } + } + } + + for (const candidate of candidates) { + candidate.dependsOn = uniqueSorted(candidate.dependsOn, compareStrings, (value) => value); + } + + return { + candidates, + immediateRejected: immediateRejected.sort(compareRejected), + candidateCount: candidates.length + immediateRejected.length, + }; +} + +function applyCandidateSequence(baseState: unknown, delta: SemanticDelta): { conflictCodes: string[]; postApplyConflictCount: number } { + const transition = applyDelta(baseState, delta, { mode: 'best_effort' }); + const postApplyConflicts = detectConflicts(transition.nextState); + return { + conflictCodes: transition.conflicts.map((conflict) => conflict.code), + postApplyConflictCount: postApplyConflicts.length, + }; +} + +function buildPlan(baseState: unknown, proposedDelta: SemanticDelta, policy: RiskPolicyV1): DeltaClosurePlan { + const expansion = expandCandidates(proposedDelta, policy); + const acceptedIds = new Set(); + const rejected: ClosureRejected[] = [...expansion.immediateRejected]; + const identity = makeIdentityDelta(proposedDelta); + const byId = new Map(expansion.candidates.map((candidate) => [candidate.id, candidate])); + let acceptedDelta = identity; + + for (const candidate of expansion.candidates) { + const blockers = candidate.dependsOn + .filter((dependencyId) => !acceptedIds.has(dependencyId)) + .map((dependencyId) => byId.get(dependencyId)) + .filter((dependency): dependency is Candidate => Boolean(dependency)) + .sort(compareCandidates); + + if (blockers.length > 0) { + const hasCrossDomain = blockers.some((blocker) => blocker.domain !== candidate.domain); + const ruleId: RiskRuleId = hasCrossDomain ? 'CROSS_DOMAIN_DEP' : 'UNKNOWN'; + const entry = makeRejected(policy, candidate, 'DEPENDENCY_BLOCKED', ruleId, blockers); + rejected.push(entry); + continue; + } + + const tentative = composeDelta(acceptedDelta, candidate.delta); + const outcome = applyCandidateSequence(baseState, tentative); + + if (outcome.conflictCodes.length > 0) { + const conflictOutcome = determineConflictOutcome(outcome.conflictCodes); + const entry = makeRejected(policy, candidate, conflictOutcome.reasonCode, conflictOutcome.ruleId); + rejected.push(entry); + continue; + } + + if (policy.strict.requirePostApplyConflictsZero && outcome.postApplyConflictCount > 0) { + rejected.push(makeRejected(policy, candidate, 'POST_APPLY_CONFLICT', 'POST_APPLY_CONFLICT')); + continue; + } + + acceptedIds.add(candidate.id); + acceptedDelta = tentative; + } + + const finalOutcome = applyCandidateSequence(baseState, acceptedDelta); + const rejectedSorted = rejected.sort(compareRejected); + const suggestionResult = buildClosureSuggestionsV1({ + rejected: rejectedSorted, + policy, + limits: { maxSuggestions: 64 }, + }); + const acceptedCount = acceptedIds.size; + const rejectedCount = rejectedSorted.length; + const candidateCount = expansion.candidateCount; + const dependencyBlockedCount = rejectedSorted.filter((entry) => entry.reasonCode === 'DEPENDENCY_BLOCKED').length; + + return { + schema: 'delta-closure-plan-1', + policy, + acceptedDelta: withCounts(acceptedDelta), + rejected: rejectedSorted, + suggestions: suggestionResult.suggestions, + suggestionDiagnostics: suggestionResult.diagnostics, + diagnostics: { + candidateCount, + acceptedCount, + rejectedCount, + blockedByRate: rejectedCount === 0 ? 0 : round6(dependencyBlockedCount / rejectedCount), + maxClosureSizeRatio: candidateCount === 0 ? 1 : round6(acceptedCount / candidateCount), + closureViolationFlag: + finalOutcome.conflictCodes.length > 0 || + (policy.strict.requirePostApplyConflictsZero && finalOutcome.postApplyConflictCount > 0), + }, + }; +} + +export function planDeltaClosureV1(input: { + baseState: unknown; + proposedDelta: SemanticDelta; + mode: 'strict'; + policy?: RiskPolicyV1; +}): DeltaClosurePlan { + const normalizedPolicy = normalizeRiskPolicyV1(input.policy); + const effectivePolicy = normalizedPolicy ?? normalizeRiskPolicyV1(DEFAULT_RISK_POLICY_V1)!; + return buildPlan(input.baseState, input.proposedDelta, effectivePolicy); +} + +export function planDeltaClosure(input: LegacyPlannerInput): LegacyPlannerOutput { + const legacyPolicy = { + schema: DEFAULT_RISK_POLICY_V1.schema, + strict: { + requirePostApplyConflictsZero: true, + fieldLevelModify: 'on', + dependencyScope: 'same_domain', + priority: 'acceptance', + targetAcceptanceRatio: DEFAULT_RISK_POLICY_V1.strict.targetAcceptanceRatio, + }, + classification: { + rules: DEFAULT_RISK_POLICY_V1.classification.rules.map((rule) => ({ ...rule })), + }, + } satisfies RiskPolicyV1; + + const plan = planDeltaClosureV1({ + baseState: input.baseState, + proposedDelta: input.proposedDelta, + mode: 'strict', + policy: legacyPolicy, + }); + + return { + acceptedDelta: plan.acceptedDelta, + rejected: plan.rejected, + suggestions: plan.suggestions, + suggestionDiagnostics: plan.suggestionDiagnostics, + diagnostics: plan.diagnostics, + }; +} diff --git a/server/src/services/delta-risk-policy.ts b/server/src/services/delta-risk-policy.ts new file mode 100644 index 0000000..406851b --- /dev/null +++ b/server/src/services/delta-risk-policy.ts @@ -0,0 +1,150 @@ +import type { DomainName } from '../algebra/semanticDiff/types'; + +export type RiskLevel = 'L0' | 'L1' | 'L2' | 'L3'; +export type RiskRuleId = + | 'POST_APPLY_CONFLICT' + | 'MISSING_KEY' + | 'DUP_KEY_ADD' + | 'INVARIANT_BREAK' + | 'CROSS_DOMAIN_DEP' + | 'UNKNOWN'; + +export type RiskPolicyV1 = { + schema: 'risk-policy-1'; + strict: { + requirePostApplyConflictsZero: true; + fieldLevelModify: 'off' | 'on'; + dependencyScope: 'same_domain' | 'cross_domain'; + priority: 'explainability' | 'acceptance'; + targetAcceptanceRatio: number; + }; + classification: { + rules: Array<{ + ruleId: RiskRuleId; + level: RiskLevel; + }>; + }; +}; + +const RULE_ORDER: RiskRuleId[] = [ + 'POST_APPLY_CONFLICT', + 'MISSING_KEY', + 'DUP_KEY_ADD', + 'INVARIANT_BREAK', + 'CROSS_DOMAIN_DEP', + 'UNKNOWN', +]; + +const DOMAIN_ORDER: DomainName[] = ['facts', 'decisions', 'constraints', 'risks', 'assumptions']; + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function asRecord(value: unknown): Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return {}; + return value as Record; +} + +function isRiskLevel(value: unknown): value is RiskLevel { + return value === 'L0' || value === 'L1' || value === 'L2' || value === 'L3'; +} + +function isRiskRuleId(value: unknown): value is RiskRuleId { + return RULE_ORDER.includes(value as RiskRuleId); +} + +function normalizeRules(input: unknown): RiskPolicyV1['classification']['rules'] | null { + if (!Array.isArray(input)) return null; + + const byRule = new Map(); + for (const rawRule of input) { + const rule = asRecord(rawRule); + if (!isRiskRuleId(rule.ruleId) || !isRiskLevel(rule.level)) return null; + byRule.set(rule.ruleId, rule.level); + } + + const normalized = RULE_ORDER.map((ruleId) => ({ + ruleId, + level: byRule.get(ruleId) ?? (ruleId === 'UNKNOWN' ? 'L1' : null), + })); + + if (normalized.some((rule) => rule.level === null)) return null; + + return normalized.map((rule) => ({ + ruleId: rule.ruleId, + level: rule.level as RiskLevel, + })); +} + +export const DEFAULT_RISK_POLICY_V1 = { + schema: 'risk-policy-1', + strict: { + requirePostApplyConflictsZero: true, + fieldLevelModify: 'off', + dependencyScope: 'same_domain', + priority: 'explainability', + targetAcceptanceRatio: 0.75, + }, + classification: { + rules: [ + { ruleId: 'POST_APPLY_CONFLICT', level: 'L3' }, + { ruleId: 'MISSING_KEY', level: 'L2' }, + { ruleId: 'DUP_KEY_ADD', level: 'L2' }, + { ruleId: 'INVARIANT_BREAK', level: 'L3' }, + { ruleId: 'CROSS_DOMAIN_DEP', level: 'L2' }, + { ruleId: 'UNKNOWN', level: 'L1' }, + ], + }, +} as const satisfies RiskPolicyV1; + +export function normalizeRiskPolicyV1(input?: RiskPolicyV1): RiskPolicyV1 | null { + if (input === undefined) { + return { + schema: DEFAULT_RISK_POLICY_V1.schema, + strict: { ...DEFAULT_RISK_POLICY_V1.strict }, + classification: { + rules: DEFAULT_RISK_POLICY_V1.classification.rules.map((rule) => ({ ...rule })), + }, + }; + } + + const record = asRecord(input); + if (record.schema !== 'risk-policy-1') return null; + + const strict = asRecord(record.strict); + if (strict.requirePostApplyConflictsZero !== true) return null; + if (strict.fieldLevelModify !== 'off' && strict.fieldLevelModify !== 'on') return null; + if (strict.dependencyScope !== 'same_domain' && strict.dependencyScope !== 'cross_domain') return null; + if (strict.priority !== 'explainability' && strict.priority !== 'acceptance') return null; + if (typeof strict.targetAcceptanceRatio !== 'number' || !Number.isFinite(strict.targetAcceptanceRatio)) return null; + if (strict.targetAcceptanceRatio < 0 || strict.targetAcceptanceRatio > 1) return null; + + const classification = asRecord(record.classification); + const rules = normalizeRules(classification.rules); + if (!rules) return null; + + return { + schema: 'risk-policy-1', + strict: { + requirePostApplyConflictsZero: true, + fieldLevelModify: strict.fieldLevelModify, + dependencyScope: strict.dependencyScope, + priority: strict.priority, + targetAcceptanceRatio: strict.targetAcceptanceRatio, + }, + classification: { + rules, + }, + }; +} + +export function isRiskPolicyV1(input: unknown): input is RiskPolicyV1 { + return normalizeRiskPolicyV1(input as RiskPolicyV1 | undefined) !== null; +} + +export function compareDomainName(a: DomainName, b: DomainName): number { + return DOMAIN_ORDER.indexOf(a) - DOMAIN_ORDER.indexOf(b) || compareStrings(a, b); +} diff --git a/server/src/services/delta-suggestion-engine.ts b/server/src/services/delta-suggestion-engine.ts new file mode 100644 index 0000000..b42fd92 --- /dev/null +++ b/server/src/services/delta-suggestion-engine.ts @@ -0,0 +1,225 @@ +import type { DomainName } from '../algebra/semanticDiff/types'; +import type { RiskLevel, RiskPolicyV1 } from './delta-risk-policy'; +import type { ClosureActionType, ClosureSuggestionV1 } from './closure-contract-v1'; + +const DOMAIN_ORDER: DomainName[] = ['facts', 'decisions', 'constraints', 'risks', 'assumptions']; +const ACTION_ORDER: ClosureActionType[] = [ + 'ADD_MISSING_DEP', + 'REQUEST_HUMAN_CONFIRM', + 'SPLIT_PATCH', + 'RETRY_WITH_CONTEXT', + 'PROMOTE_TO_L3_REVIEW', +]; +const DEFAULT_MAX_SUGGESTIONS = 64; + +type RejectedInput = { + domain: DomainName; + key?: string | null; + path?: string | null; + op?: string | null; + reasonCode: string; + blockedBy?: Array<{ domain: DomainName; key?: string | null; path?: string | null }> | null; + riskLevel: RiskLevel; +}; + +export type { ClosureSuggestionV1 } from './closure-contract-v1'; + +function makeError(code: 'E_SUGGESTION_INPUT_INVALID' | 'E_SUGGESTION_NON_JSON_SAFE', message: string): Error & { code: string } { + const error = new Error(message) as Error & { code: string }; + error.code = code; + return error; +} + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function compareDomains(a: DomainName, b: DomainName): number { + return DOMAIN_ORDER.indexOf(a) - DOMAIN_ORDER.indexOf(b) || compareStrings(a, b); +} + +function isDomainName(value: unknown): value is DomainName { + return value === 'facts' || value === 'decisions' || value === 'constraints' || value === 'risks' || value === 'assumptions'; +} + +function isRiskLevel(value: unknown): value is RiskLevel { + return value === 'L0' || value === 'L1' || value === 'L2' || value === 'L3'; +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function isJsonSafe(value: unknown): boolean { + if (value === null) return true; + const valueType = typeof value; + if (valueType === 'string' || valueType === 'boolean') return true; + if (valueType === 'number') return Number.isFinite(value); + if (valueType === 'undefined' || valueType === 'function' || valueType === 'symbol' || valueType === 'bigint') return false; + if (Array.isArray(value)) return value.every((entry) => isJsonSafe(entry)); + if (!isPlainObject(value)) return false; + for (const key of Object.keys(value)) { + if (!isJsonSafe(value[key])) return false; + } + return true; +} + +function ensureJsonSafe(value: unknown): void { + if (!isJsonSafe(value)) { + throw makeError('E_SUGGESTION_NON_JSON_SAFE', 'Suggestion contains non JSON-safe value'); + } +} + +function normalizeBlockedBy( + blockedBy: RejectedInput['blockedBy'] +): Array<{ domain: DomainName; key?: string | null; path?: string | null }> { + if (!Array.isArray(blockedBy) || blockedBy.length === 0) return []; + + return blockedBy + .filter((entry) => entry && isDomainName(entry.domain)) + .map((entry) => ({ + domain: entry.domain, + key: typeof entry.key === 'string' ? entry.key : null, + path: typeof entry.path === 'string' ? entry.path : null, + })) + .sort((left, right) => { + return ( + compareDomains(left.domain, right.domain) || + compareStrings(left.key ?? 'NULL', right.key ?? 'NULL') || + compareStrings(left.path ?? 'NULL', right.path ?? 'NULL') + ); + }); +} + +function compareSuggestions(a: ClosureSuggestionV1, b: ClosureSuggestionV1): number { + const left = isPlainObject(a.payload) ? a.payload : {}; + const right = isPlainObject(b.payload) ? b.payload : {}; + const leftAppliesTo = isPlainObject(left.appliesTo) ? left.appliesTo : {}; + const rightAppliesTo = isPlainObject(right.appliesTo) ? right.appliesTo : {}; + const leftDomain = isDomainName(leftAppliesTo.domain) ? leftAppliesTo.domain : 'facts'; + const rightDomain = isDomainName(rightAppliesTo.domain) ? rightAppliesTo.domain : 'facts'; + + return ( + compareDomains(leftDomain, rightDomain) || + compareStrings(typeof leftAppliesTo.key === 'string' ? leftAppliesTo.key : 'NULL', typeof rightAppliesTo.key === 'string' ? rightAppliesTo.key : 'NULL') || + compareStrings(typeof leftAppliesTo.path === 'string' ? leftAppliesTo.path : 'NULL', typeof rightAppliesTo.path === 'string' ? rightAppliesTo.path : 'NULL') || + ACTION_ORDER.indexOf(a.actionType) - ACTION_ORDER.indexOf(b.actionType) || + compareStrings(a.code, b.code) + ); +} + +function makeMessage(actionType: ClosureActionType): string { + if (actionType === 'ADD_MISSING_DEP') return 'Add missing dependency'; + if (actionType === 'REQUEST_HUMAN_CONFIRM') return 'Request human confirm'; + if (actionType === 'PROMOTE_TO_L3_REVIEW') return 'Requires L3 review'; + if (actionType === 'SPLIT_PATCH') return 'Split patch'; + return 'Retry with context'; +} + +function makeSuggestion( + rejected: RejectedInput, + actionType: ClosureActionType, + blockedBy: Array<{ domain: DomainName; key?: string | null; path?: string | null }> +): ClosureSuggestionV1 { + const payload = { + appliesTo: { + domain: rejected.domain, + key: rejected.key ?? null, + path: rejected.path ?? null, + op: rejected.op ?? null, + }, + blockedBy: blockedBy.length > 0 ? blockedBy : null, + }; + + ensureJsonSafe(payload); + + return { + schema: 'closure-suggestion-1', + code: actionType, + message: makeMessage(actionType), + actionType, + payload, + riskLevel: rejected.riskLevel, + }; +} + +function validateInput(input: { + rejected: RejectedInput[]; + policy: RiskPolicyV1; + limits?: { maxSuggestions?: number }; +}): { rejected: RejectedInput[]; policy: RiskPolicyV1; maxSuggestions: number } { + if (!input || !Array.isArray(input.rejected) || !input.policy || input.policy.schema !== 'risk-policy-1') { + throw makeError('E_SUGGESTION_INPUT_INVALID', 'Suggestion input is invalid'); + } + + for (const rejected of input.rejected) { + if (!rejected || !isDomainName(rejected.domain) || !isRiskLevel(rejected.riskLevel) || typeof rejected.reasonCode !== 'string') { + throw makeError('E_SUGGESTION_INPUT_INVALID', 'Suggestion input is invalid'); + } + } + + const rawLimit = input.limits && typeof input.limits.maxSuggestions === 'number' ? input.limits.maxSuggestions : DEFAULT_MAX_SUGGESTIONS; + const maxSuggestions = Number.isFinite(rawLimit) && rawLimit > 0 ? Math.floor(rawLimit) : DEFAULT_MAX_SUGGESTIONS; + return { rejected: input.rejected, policy: input.policy, maxSuggestions }; +} + +export function buildClosureSuggestionsV1(input: { + rejected: RejectedInput[]; + policy: RiskPolicyV1; + limits?: { maxSuggestions?: number }; +}): { + suggestions: ClosureSuggestionV1[]; + diagnostics: { suggestionCount: number; coveredRejectedCount: number; blockedByCoveredCount: number }; +} { + const validated = validateInput(input); + const suggestions: ClosureSuggestionV1[] = []; + let coveredRejectedCount = 0; + let blockedByCoveredCount = 0; + + for (const rejected of validated.rejected) { + const blockedBy = normalizeBlockedBy(rejected.blockedBy); + const actions: ClosureActionType[] = []; + + if (rejected.reasonCode === 'DEPENDENCY_BLOCKED' || blockedBy.length > 0) { + actions.push('ADD_MISSING_DEP'); + } else if (rejected.reasonCode === 'CONFLICT') { + actions.push('REQUEST_HUMAN_CONFIRM'); + actions.push('RETRY_WITH_CONTEXT'); + } else if (rejected.reasonCode === 'INVALID_OP' && rejected.op === 'modify' && validated.policy.strict.fieldLevelModify === 'on') { + actions.push('SPLIT_PATCH'); + } else if (rejected.reasonCode === 'NON_JSON_SAFE') { + actions.push('RETRY_WITH_CONTEXT'); + } + + if (rejected.riskLevel === 'L3') { + actions.push('PROMOTE_TO_L3_REVIEW'); + } + + if (actions.length === 0) continue; + + coveredRejectedCount += 1; + if (blockedBy.length > 0) { + blockedByCoveredCount += blockedBy.length; + } + + for (const actionType of actions) { + suggestions.push(makeSuggestion(rejected, actionType, blockedBy)); + } + } + + const ordered = [...suggestions].sort(compareSuggestions).slice(0, validated.maxSuggestions); + ensureJsonSafe(ordered); + + return { + suggestions: ordered, + diagnostics: { + suggestionCount: ordered.length, + coveredRejectedCount, + blockedByCoveredCount, + }, + }; +} diff --git a/server/src/services/execution-record-v1.ts b/server/src/services/execution-record-v1.ts index ef0892f..a6e3751 100644 --- a/server/src/services/execution-record-v1.ts +++ b/server/src/services/execution-record-v1.ts @@ -176,7 +176,7 @@ function sortFindings( function normalizeSummary( summary: ApplyReportV1['delta']['summary'] | null | undefined -): ApplyReportV1['delta']['summary'] { +): ExecutionRecordV1['inputs']['deltaSummary'] { if (!summary) return null; return { modifiedDomains: normalizeDomainOrder(summary.modifiedDomains), @@ -374,3 +374,5 @@ export function buildExecutionRecordV1(input: BuildExecutionRecordV1Input): Exec safeStableHash(record); return record; } + + diff --git a/server/src/services/execution.service.ts b/server/src/services/execution.service.ts new file mode 100644 index 0000000..fc28ade --- /dev/null +++ b/server/src/services/execution.service.ts @@ -0,0 +1,521 @@ +import type { PrismaClient } from '@prisma/client'; +import { canonicalizeJson } from '../lib/artifact-hash'; +import { + computeExecutionResultHash, + type ExecutionArtifactReference, + type ExecutionStatus, +} from '../lib/execution-hash'; +import { sanitizeJsonPayload } from '../lib/json-sanitize'; + +const EXECUTION_HASH_PATTERN = /^[0-9a-f]{64}$/; +const EXECUTION_ID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/; + +const ERR_EXECUTION_INVALID_INPUT = 'ERR_EXECUTION_INVALID_INPUT'; +const ERR_REVISION_NOT_FOUND = 'ERR_REVISION_NOT_FOUND'; +const ERR_ARTIFACT_NOT_FOUND = 'ERR_ARTIFACT_NOT_FOUND'; +const ERR_EXECUTION_NOT_FOUND = 'ERR_EXECUTION_NOT_FOUND'; +const ERR_EXECUTION_NON_DETERMINISTIC = 'ERR_EXECUTION_NON_DETERMINISTIC'; +const ERR_EXECUTION_REPLAY_MISMATCH = 'ERR_EXECUTION_REPLAY_MISMATCH'; + +const MESSAGE_EXECUTION_INVALID_INPUT = 'Execution input is invalid'; +const MESSAGE_REVISION_NOT_FOUND = 'Revision not found'; +const MESSAGE_ARTIFACT_NOT_FOUND = 'Artifact not found'; +const MESSAGE_EXECUTION_NOT_FOUND = 'Execution record not found'; +const MESSAGE_EXECUTION_NON_DETERMINISTIC = 'Execution replay is non-deterministic'; +const MESSAGE_EXECUTION_REPLAY_MISMATCH = 'Execution replay result hash mismatch'; + +export type RecordExecutionInput = { + executionId?: string; + packageId: string; + revisionHash: string; + provider: string; + model: string; + promptHash: string; + parameters: unknown; + inputArtifacts: ExecutionArtifactReference[]; + outputArtifacts: ExecutionArtifactReference[]; + status: ExecutionStatus; + startedAt: string; + finishedAt: string; +}; + +export type GetExecutionQuery = { + executionId: string; +}; + +export type ListExecutionsQuery = { + packageId: string; + limit?: number; +}; + +export type ReplayExecutionInput = { + executionId: string; + promptHash: string; + parameters: unknown; + inputArtifacts: ExecutionArtifactReference[]; + outputArtifacts?: ExecutionArtifactReference[]; + status?: ExecutionStatus; +}; + +export type ExecutionRecordDTO = { + executionId: string; + packageId: string; + revisionHash: string; + provider: string; + model: string; + promptHash: string; + parameters: unknown; + inputArtifacts: ExecutionArtifactReference[]; + outputArtifacts: ExecutionArtifactReference[]; + resultHash: string; + status: ExecutionStatus; + startedAt: string; + finishedAt: string; + createdAt: string; +}; + +type StoredExecutionRecord = { + executionId: string; + packageId: string; + revisionHash: string; + provider: string; + model: string; + promptHash: string; + parameters: unknown; + inputArtifacts: ExecutionArtifactReference[]; + outputArtifacts: ExecutionArtifactReference[]; + resultHash: string; + status: ExecutionStatus; + startedAt: Date; + finishedAt: Date; + createdAt: Date; +}; + +export type ReplayExecutionResult = { + ok: true; + executionId: string; + resultHash: string; + matches: true; +}; + +export type ExecutionStorageAdapter = { + findRevisionByHash(revisionHash: string): Promise; + artifactExists(packageId: string, bundleHash: string): Promise; + findExecutionById(executionId: string): Promise; + createExecution(input: { + executionId?: string; + packageId: string; + revisionHash: string; + provider: string; + model: string; + promptHash: string; + parameters: unknown; + inputArtifacts: ExecutionArtifactReference[]; + outputArtifacts: ExecutionArtifactReference[]; + resultHash: string; + status: ExecutionStatus; + startedAt: string; + finishedAt: string; + }): Promise; + listExecutions(packageId: string, limit: number): Promise; +}; + +export class ExecutionServiceError extends Error { + code: string; + + constructor(code: string, message: string) { + super(message); + this.code = code; + this.name = 'ExecutionServiceError'; + } +} + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function normalizeRequiredString(value: unknown): string { + if (typeof value !== 'string' || value.length === 0) { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } + return value; +} + +function normalizeHash(value: unknown): string { + if (typeof value !== 'string') { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } + const normalized = value.toLowerCase(); + if (!EXECUTION_HASH_PATTERN.test(normalized)) { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } + return normalized; +} + +function normalizeExecutionId(value: unknown): string { + if (typeof value !== 'string') { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } + const normalized = value.toLowerCase(); + if (!EXECUTION_ID_PATTERN.test(normalized)) { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } + return normalized; +} + +function normalizeTimestamp(value: unknown): string { + if (typeof value !== 'string' || value.length === 0) { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } + const parsed = new Date(value); + if (Number.isNaN(parsed.getTime())) { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } + return parsed.toISOString(); +} + +function normalizeStatus(value: unknown): ExecutionStatus { + if (value === 'success' || value === 'failure') { + return value; + } + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); +} + +function normalizeParameters(value: unknown): unknown { + try { + return sanitizeJsonPayload(value); + } catch { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } +} + +function normalizeArtifacts(artifacts: unknown): ExecutionArtifactReference[] { + if (!Array.isArray(artifacts)) { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } + + const out: ExecutionArtifactReference[] = []; + for (const item of artifacts) { + if (!item || typeof item !== 'object') { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } + + const entry = item as { bundleHash?: unknown; role?: unknown }; + out.push({ + bundleHash: normalizeHash(entry.bundleHash), + role: normalizeRequiredString(entry.role), + }); + } + + out.sort((a, b) => { + const bundleOrder = compareStrings(a.bundleHash, b.bundleHash); + if (bundleOrder !== 0) { + return bundleOrder; + } + return compareStrings(a.role, b.role); + }); + + return out; +} + +function artifactsEqual(a: ExecutionArtifactReference[], b: ExecutionArtifactReference[]): boolean { + if (a.length !== b.length) { + return false; + } + for (let index = 0; index < a.length; index += 1) { + if (a[index].bundleHash !== b[index].bundleHash || a[index].role !== b[index].role) { + return false; + } + } + return true; +} + +function parametersEqual(a: unknown, b: unknown): boolean { + return canonicalizeJson(a) === canonicalizeJson(b); +} + +function toExecutionRecordDTO(record: StoredExecutionRecord): ExecutionRecordDTO { + return { + executionId: record.executionId, + packageId: record.packageId, + revisionHash: record.revisionHash, + provider: record.provider, + model: record.model, + promptHash: record.promptHash, + parameters: record.parameters, + inputArtifacts: record.inputArtifacts.map((artifact) => ({ ...artifact })), + outputArtifacts: record.outputArtifacts.map((artifact) => ({ ...artifact })), + resultHash: record.resultHash, + status: record.status, + startedAt: record.startedAt.toISOString(), + finishedAt: record.finishedAt.toISOString(), + createdAt: record.createdAt.toISOString(), + }; +} + +function toStoredExecutionRecordFromPrisma(row: { + executionId: string; + packageId: string; + revisionHash: string; + provider: string; + model: string; + promptHash: string; + parameters: unknown; + resultHash: string; + status: string; + startedAt: Date; + finishedAt: Date; + createdAt: Date; + inputs: { bundleHash: string; role: string }[]; + outputs: { bundleHash: string; role: string }[]; +}): StoredExecutionRecord { + return { + executionId: row.executionId, + packageId: row.packageId, + revisionHash: row.revisionHash, + provider: row.provider, + model: row.model, + promptHash: row.promptHash, + parameters: row.parameters, + inputArtifacts: row.inputs + .map((entry) => ({ bundleHash: entry.bundleHash, role: entry.role })) + .sort((a, b) => compareStrings(a.bundleHash, b.bundleHash) || compareStrings(a.role, b.role)), + outputArtifacts: row.outputs + .map((entry) => ({ bundleHash: entry.bundleHash, role: entry.role })) + .sort((a, b) => compareStrings(a.bundleHash, b.bundleHash) || compareStrings(a.role, b.role)), + resultHash: row.resultHash, + status: normalizeStatus(row.status), + startedAt: row.startedAt, + finishedAt: row.finishedAt, + createdAt: row.createdAt, + }; +} + +function createDefaultExecutionStorageAdapter(): ExecutionStorageAdapter { + const { prisma } = require('../utils') as { prisma: PrismaClient }; + + return { + async findRevisionByHash(revisionHash: string): Promise { + const revision = await prisma.revisionNode.findUnique({ + where: { revisionHash }, + select: { revisionHash: true }, + }); + return Boolean(revision); + }, + + async artifactExists(packageId: string, bundleHash: string): Promise { + const artifact = await prisma.artifactStoreRecord.findUnique({ + where: { + packageId_bundleHash: { + packageId, + bundleHash, + }, + }, + select: { id: true }, + }); + return Boolean(artifact); + }, + + async findExecutionById(executionId: string): Promise { + const record = await prisma.executionRecord.findUnique({ + where: { executionId }, + include: { + inputs: true, + outputs: true, + }, + }); + return record ? toStoredExecutionRecordFromPrisma(record) : null; + }, + + async createExecution(input): Promise { + const created = await prisma.executionRecord.create({ + data: { + ...(input.executionId ? { executionId: input.executionId } : {}), + packageId: input.packageId, + revisionHash: input.revisionHash, + provider: input.provider, + model: input.model, + promptHash: input.promptHash, + parameters: input.parameters as object, + resultHash: input.resultHash, + status: input.status, + startedAt: input.startedAt, + finishedAt: input.finishedAt, + inputs: { + createMany: { + data: input.inputArtifacts.map((artifact) => ({ + bundleHash: artifact.bundleHash, + role: artifact.role, + })), + }, + }, + outputs: { + createMany: { + data: input.outputArtifacts.map((artifact) => ({ + bundleHash: artifact.bundleHash, + role: artifact.role, + })), + }, + }, + }, + include: { + inputs: true, + outputs: true, + }, + }); + + return toStoredExecutionRecordFromPrisma(created); + }, + + async listExecutions(packageId: string, limit: number): Promise { + const rows = await prisma.executionRecord.findMany({ + where: { packageId }, + take: limit, + orderBy: [{ startedAt: 'asc' }, { executionId: 'asc' }], + include: { + inputs: true, + outputs: true, + }, + }); + + return rows.map((row) => toStoredExecutionRecordFromPrisma(row)); + }, + }; +} + +export class ExecutionService { + private readonly storage: ExecutionStorageAdapter; + + constructor(storage?: ExecutionStorageAdapter) { + this.storage = storage ?? createDefaultExecutionStorageAdapter(); + } + + async recordExecution(input: RecordExecutionInput): Promise { + const executionId = typeof input.executionId === 'undefined' ? undefined : normalizeExecutionId(input.executionId); + const packageId = normalizeRequiredString(input.packageId); + const revisionHash = normalizeHash(input.revisionHash); + const provider = normalizeRequiredString(input.provider); + const model = normalizeRequiredString(input.model); + const promptHash = normalizeHash(input.promptHash); + const parameters = normalizeParameters(input.parameters); + const inputArtifacts = normalizeArtifacts(input.inputArtifacts); + const outputArtifacts = normalizeArtifacts(input.outputArtifacts); + const status = normalizeStatus(input.status); + const startedAt = normalizeTimestamp(input.startedAt); + const finishedAt = normalizeTimestamp(input.finishedAt); + + if (!(await this.storage.findRevisionByHash(revisionHash))) { + throw new ExecutionServiceError(ERR_REVISION_NOT_FOUND, MESSAGE_REVISION_NOT_FOUND); + } + + for (const artifact of [...inputArtifacts, ...outputArtifacts]) { + const exists = await this.storage.artifactExists(packageId, artifact.bundleHash); + if (!exists) { + throw new ExecutionServiceError(ERR_ARTIFACT_NOT_FOUND, MESSAGE_ARTIFACT_NOT_FOUND); + } + } + + const resultHash = computeExecutionResultHash({ + outputs: outputArtifacts, + status, + }); + + const created = await this.storage.createExecution({ + executionId, + packageId, + revisionHash, + provider, + model, + promptHash, + parameters, + inputArtifacts, + outputArtifacts, + resultHash, + status, + startedAt, + finishedAt, + }); + + return toExecutionRecordDTO(created); + } + + async getExecution(query: GetExecutionQuery): Promise { + const executionId = normalizeExecutionId(query.executionId); + const found = await this.storage.findExecutionById(executionId); + return found ? toExecutionRecordDTO(found) : null; + } + + async listExecutions(query: ListExecutionsQuery): Promise { + const packageId = normalizeRequiredString(query.packageId); + const limit = typeof query.limit === 'undefined' ? 100 : query.limit; + if (!Number.isInteger(limit) || limit <= 0 || limit > 500) { + throw new ExecutionServiceError(ERR_EXECUTION_INVALID_INPUT, MESSAGE_EXECUTION_INVALID_INPUT); + } + + const rows = await this.storage.listExecutions(packageId, limit); + return rows.map((row) => toExecutionRecordDTO(row)); + } + + async replayExecution(input: ReplayExecutionInput): Promise { + const executionId = normalizeExecutionId(input.executionId); + const stored = await this.storage.findExecutionById(executionId); + if (!stored) { + throw new ExecutionServiceError(ERR_EXECUTION_NOT_FOUND, MESSAGE_EXECUTION_NOT_FOUND); + } + + if (!(await this.storage.findRevisionByHash(stored.revisionHash))) { + throw new ExecutionServiceError(ERR_REVISION_NOT_FOUND, MESSAGE_REVISION_NOT_FOUND); + } + + for (const artifact of [...stored.inputArtifacts, ...stored.outputArtifacts]) { + const exists = await this.storage.artifactExists(stored.packageId, artifact.bundleHash); + if (!exists) { + throw new ExecutionServiceError(ERR_ARTIFACT_NOT_FOUND, MESSAGE_ARTIFACT_NOT_FOUND); + } + } + + const promptHash = normalizeHash(input.promptHash); + const parameters = normalizeParameters(input.parameters); + const inputArtifacts = normalizeArtifacts(input.inputArtifacts); + + if ( + promptHash !== stored.promptHash || + !parametersEqual(parameters, stored.parameters) || + !artifactsEqual(inputArtifacts, stored.inputArtifacts) + ) { + throw new ExecutionServiceError(ERR_EXECUTION_NON_DETERMINISTIC, MESSAGE_EXECUTION_NON_DETERMINISTIC); + } + + const replayOutputs = typeof input.outputArtifacts === 'undefined' ? stored.outputArtifacts : normalizeArtifacts(input.outputArtifacts); + const replayStatus = typeof input.status === 'undefined' ? stored.status : normalizeStatus(input.status); + const replayResultHash = computeExecutionResultHash({ + outputs: replayOutputs, + status: replayStatus, + }); + + if (replayResultHash !== stored.resultHash) { + throw new ExecutionServiceError(ERR_EXECUTION_REPLAY_MISMATCH, MESSAGE_EXECUTION_REPLAY_MISMATCH); + } + + return { + ok: true, + executionId: stored.executionId, + resultHash: replayResultHash, + matches: true, + }; + } +} + +export const EXECUTION_ERROR_CODES = { + ERR_EXECUTION_INVALID_INPUT, + ERR_REVISION_NOT_FOUND, + ERR_ARTIFACT_NOT_FOUND, + ERR_EXECUTION_NOT_FOUND, + ERR_EXECUTION_NON_DETERMINISTIC, + ERR_EXECUTION_REPLAY_MISMATCH, +} as const; + + + diff --git a/server/src/services/handoff-record-v1.ts b/server/src/services/handoff-record-v1.ts new file mode 100644 index 0000000..153c238 --- /dev/null +++ b/server/src/services/handoff-record-v1.ts @@ -0,0 +1,677 @@ +import { createHash } from 'crypto'; +import { DOMAIN_ORDER, type DomainName, type TransferClosureBindingInput, type TransferPackageV1 } from './transfer-package-v1'; +import type { LineageBindingV1 } from './lineage-binding-v1'; + +export type HandoffRecordV1 = { + schema: 'handoff-record-1'; + transfer: { + schema: 'transfer-package-1'; + transferHash: string; + }; + identity: { + packageId: string; + revisionId: string; + revisionHash: string; + parentRevisionId: string | null; + }; + bindings: { + closureContractV1: { schema: 'closure-contract-1'; proposedHash: string; acceptedHash: string } | null; + applyReportV1Hash: string | null; + executionRecordV1Hash: string | null; + }; + trunk: TransferPackageV1['trunk']; + continuation: TransferPackageV1['continuation']; + diagnostics: { + verified: true; + verification: { + transferHashRecomputed: string; + matchesProvidedHash: boolean; + }; + }; + lineageBindingV1: LineageBindingV1; + createdAt: string | null; + handoffHash: string; +}; + +export type BuildHandoffRecordV1Input = { + transferPackageV1: TransferPackageV1; + verification: { + transferHashRecomputed: string; + matchesProvidedHash: boolean; + }; + bindings?: { + closureContractV1?: TransferClosureBindingInput; + applyReportV1Hash?: string | null; + executionRecordV1Hash?: string | null; + }; + lineageBindingV1: LineageBindingV1; + createdAt?: string | null; +}; + +type HandoffHashPayload = Omit; + +const LINEAGE_MISSING_KEYS = ['transfer', 'closure', 'execution', 'handoff'] as const; +type LineageMissingKey = (typeof LINEAGE_MISSING_KEYS)[number]; + +function makeHandoffError( + code: 'E_HANDOFF_INVALID' | 'E_HANDOFF_NON_JSON_SAFE' | 'E_HANDOFF_HASH_MISMATCH', + message: string +): Error & { code: string } { + const error = new Error(message) as Error & { code: string }; + error.code = code; + return error; +} + +export function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function compareDomains(a: DomainName, b: DomainName): number { + const leftIndex = DOMAIN_ORDER.indexOf(a); + const rightIndex = DOMAIN_ORDER.indexOf(b); + if (leftIndex < rightIndex) return -1; + if (leftIndex > rightIndex) return 1; + return compareStrings(a, b); +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function isDomainName(value: unknown): value is DomainName { + return value === 'facts' || value === 'decisions' || value === 'constraints' || value === 'risks' || value === 'assumptions'; +} + +function isLineageMissingKey(value: unknown): value is LineageMissingKey { + return value === 'transfer' || value === 'closure' || value === 'execution' || value === 'handoff'; +} + +export function assertJsonSafe(value: unknown): void { + if (value === null) return; + + const valueType = typeof value; + if (valueType === 'string' || valueType === 'boolean') return; + if (valueType === 'number') { + if (!Number.isFinite(value)) { + throw makeHandoffError('E_HANDOFF_NON_JSON_SAFE', 'Handoff record contains non JSON-safe value'); + } + return; + } + if (valueType === 'undefined' || valueType === 'function' || valueType === 'symbol' || valueType === 'bigint') { + throw makeHandoffError('E_HANDOFF_NON_JSON_SAFE', 'Handoff record contains non JSON-safe value'); + } + + if (Array.isArray(value)) { + for (const entry of value) { + assertJsonSafe(entry); + } + return; + } + + if (!isPlainObject(value)) { + throw makeHandoffError('E_HANDOFF_NON_JSON_SAFE', 'Handoff record contains non JSON-safe value'); + } + + const keys = Object.keys(value).sort(compareStrings); + for (const key of keys) { + assertJsonSafe(value[key]); + } +} + +export function stableStringify(value: unknown): string { + assertJsonSafe(value); + + if (value === null || typeof value === 'string' || typeof value === 'boolean' || typeof value === 'number') { + return JSON.stringify(value); + } + + if (Array.isArray(value)) { + return `[${value.map((entry) => stableStringify(entry)).join(',')}]`; + } + + const record = value as Record; + const keys = Object.keys(record).sort(compareStrings); + return `{${keys.map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(',')}}`; +} + +export function sha256Hex(str: string): string { + return createHash('sha256').update(str, 'utf8').digest('hex'); +} + +function normalizeClosureBinding(value: unknown): HandoffRecordV1['bindings']['closureContractV1'] { + if (value === null || value === undefined) return null; + if ( + !isPlainObject(value) || + value.schema !== 'closure-contract-1' || + typeof value.proposedHash !== 'string' || + typeof value.acceptedHash !== 'string' + ) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + return { + schema: 'closure-contract-1', + proposedHash: value.proposedHash, + acceptedHash: value.acceptedHash, + }; +} + +function normalizeStringArray(value: unknown): string[] { + if (!Array.isArray(value)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + const normalized: string[] = []; + for (const entry of value) { + if (typeof entry !== 'string') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + normalized.push(entry); + } + return normalized; +} + +function normalizeDomains(domains: unknown): DomainName[] { + if (!Array.isArray(domains)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + const normalized: DomainName[] = []; + for (const entry of domains) { + if (!isDomainName(entry)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + normalized.push(entry); + } + return [...normalized].sort(compareDomains); +} + +function normalizeNextActions(nextActions: unknown): HandoffRecordV1['continuation']['nextActions'] { + if (!Array.isArray(nextActions)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + + return [...nextActions] + .map((entry) => { + if (!isPlainObject(entry) || typeof entry.code !== 'string' || typeof entry.message !== 'string') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + const expectedOutput = + typeof entry.expectedOutput === 'string' ? entry.expectedOutput : entry.expectedOutput === null ? null : null; + return { + code: entry.code, + message: entry.message, + expectedOutput, + domains: normalizeDomains(entry.domains), + }; + }) + .sort((left, right) => { + const leftDomains = left.domains.join('|'); + const rightDomains = right.domains.join('|'); + return ( + compareStrings(left.code, right.code) || + compareStrings(left.message, right.message) || + compareStrings(left.expectedOutput ?? '', right.expectedOutput ?? '') || + compareStrings(leftDomains, rightDomains) + ); + }); +} + +function normalizeValidationChecklist(checklist: unknown): HandoffRecordV1['continuation']['validationChecklist'] { + if (!Array.isArray(checklist)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + + return [...checklist] + .map((entry) => { + if (!isPlainObject(entry) || typeof entry.code !== 'string' || typeof entry.message !== 'string') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + if (entry.severity !== 'must' && entry.severity !== 'should') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + const severity: 'must' | 'should' = entry.severity; + return { + code: entry.code, + message: entry.message, + severity, + }; + }) + .sort((left, right) => { + return ( + compareStrings(left.severity, right.severity) || + compareStrings(left.code, right.code) || + compareStrings(left.message, right.message) + ); + }); +} + +function normalizeIdentity(value: unknown): HandoffRecordV1['identity'] { + if (!isPlainObject(value)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + if ( + typeof value.packageId !== 'string' || + typeof value.revisionId !== 'string' || + typeof value.revisionHash !== 'string' || + !(typeof value.parentRevisionId === 'string' || value.parentRevisionId === null) + ) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + return { + packageId: value.packageId, + revisionId: value.revisionId, + revisionHash: value.revisionHash, + parentRevisionId: value.parentRevisionId, + }; +} + +function normalizeTransfer(value: unknown): HandoffRecordV1['transfer'] { + if (!isPlainObject(value) || value.schema !== 'transfer-package-1' || typeof value.transferHash !== 'string') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + return { + schema: 'transfer-package-1', + transferHash: value.transferHash, + }; +} + +function normalizeBindings(value: unknown): HandoffRecordV1['bindings'] { + if (value === undefined) { + return { + closureContractV1: null, + applyReportV1Hash: null, + executionRecordV1Hash: null, + }; + } + if (!isPlainObject(value)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + return { + closureContractV1: normalizeClosureBinding(value.closureContractV1), + applyReportV1Hash: + typeof value.applyReportV1Hash === 'string' ? value.applyReportV1Hash : value.applyReportV1Hash === null ? null : null, + executionRecordV1Hash: + typeof value.executionRecordV1Hash === 'string' + ? value.executionRecordV1Hash + : value.executionRecordV1Hash === null + ? null + : null, + }; +} + +function normalizeDiagnostics(value: unknown): HandoffRecordV1['diagnostics'] { + if (!isPlainObject(value) || value.verified !== true || !isPlainObject(value.verification)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + if ( + typeof value.verification.transferHashRecomputed !== 'string' || + typeof value.verification.matchesProvidedHash !== 'boolean' + ) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + return { + verified: true, + verification: { + transferHashRecomputed: value.verification.transferHashRecomputed, + matchesProvidedHash: value.verification.matchesProvidedHash, + }, + }; +} + +function normalizeLineageMissing(value: unknown): string[] { + if (!Array.isArray(value)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + const normalized: string[] = []; + for (const entry of value) { + if (!isLineageMissingKey(entry)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + normalized.push(entry); + } + return normalized; +} + +function normalizeLineageNotes(value: unknown): string[] { + if (!Array.isArray(value)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + const normalized: string[] = []; + for (const entry of value) { + if (typeof entry !== 'string') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + normalized.push(entry); + } + return normalized; +} + +function normalizeEmbeddedLineage( + value: unknown, + expectedIdentity: HandoffRecordV1['identity'], + expectedTransferHash: string, + enforceTransferHashMatch: boolean +): LineageBindingV1 { + if (!isPlainObject(value) || value.schema !== 'lineage-binding-1') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + if (typeof value.lineageHash !== 'string') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + if (!(typeof value.createdAt === 'string' || value.createdAt === null)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + + const identity = normalizeIdentity(value.identity); + if ( + identity.packageId !== expectedIdentity.packageId || + identity.revisionId !== expectedIdentity.revisionId || + identity.revisionHash !== expectedIdentity.revisionHash || + identity.parentRevisionId !== expectedIdentity.parentRevisionId + ) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + + if (!isPlainObject(value.bindings)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + const transferBinding = normalizeTransfer(value.bindings.transfer); + if (enforceTransferHashMatch && transferBinding.transferHash !== expectedTransferHash) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + if (!(value.bindings.handoff === null || value.bindings.handoff === undefined)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + + if (!isPlainObject(value.diagnostics)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + + return { + schema: 'lineage-binding-1', + identity, + bindings: { + transfer: transferBinding, + closure: normalizeClosureBinding(value.bindings.closure) as LineageBindingV1['bindings']['closure'], + execution: + value.bindings.execution === null || value.bindings.execution === undefined + ? null + : (() => { + if (!isPlainObject(value.bindings.execution) || value.bindings.execution.schema !== 'execution-record-1') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + if ( + !( + typeof value.bindings.execution.reportHash === 'string' || + value.bindings.execution.reportHash === null || + value.bindings.execution.reportHash === undefined + ) || + !( + typeof value.bindings.execution.deltaHash === 'string' || + value.bindings.execution.deltaHash === null || + value.bindings.execution.deltaHash === undefined + ) + ) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + return { + schema: 'execution-record-1', + reportHash: + typeof value.bindings.execution.reportHash === 'string' ? value.bindings.execution.reportHash : null, + deltaHash: + typeof value.bindings.execution.deltaHash === 'string' ? value.bindings.execution.deltaHash : null, + }; + })(), + handoff: null, + }, + diagnostics: { + missing: normalizeLineageMissing(value.diagnostics.missing), + notes: normalizeLineageNotes(value.diagnostics.notes), + }, + createdAt: value.createdAt, + lineageHash: value.lineageHash, + }; +} + +function canonicalizeLineageForHash(lineageBindingV1: LineageBindingV1): LineageBindingV1 { + return { + schema: 'lineage-binding-1', + identity: { + packageId: lineageBindingV1.identity.packageId, + revisionId: lineageBindingV1.identity.revisionId, + revisionHash: lineageBindingV1.identity.revisionHash, + parentRevisionId: lineageBindingV1.identity.parentRevisionId, + }, + bindings: { + transfer: lineageBindingV1.bindings.transfer + ? { + schema: 'transfer-package-1', + transferHash: lineageBindingV1.bindings.transfer.transferHash, + } + : null, + closure: lineageBindingV1.bindings.closure + ? { + schema: 'closure-contract-1', + proposedHash: lineageBindingV1.bindings.closure.proposedHash, + acceptedHash: lineageBindingV1.bindings.closure.acceptedHash, + } + : null, + execution: lineageBindingV1.bindings.execution + ? { + schema: 'execution-record-1', + reportHash: lineageBindingV1.bindings.execution.reportHash, + deltaHash: lineageBindingV1.bindings.execution.deltaHash, + } + : null, + handoff: null, + }, + diagnostics: { + missing: [...lineageBindingV1.diagnostics.missing], + notes: [...lineageBindingV1.diagnostics.notes], + }, + createdAt: null, + lineageHash: lineageBindingV1.lineageHash, + }; +} + +function buildCanonicalPayloadFromTransfer(input: BuildHandoffRecordV1Input): { + payload: HandoffHashPayload; + lineageBindingV1: LineageBindingV1; +} { + if ( + !input || + !input.transferPackageV1 || + !input.verification || + typeof input.verification.transferHashRecomputed !== 'string' || + typeof input.verification.matchesProvidedHash !== 'boolean' + ) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + + assertJsonSafe(input); + + const transfer = input.transferPackageV1; + if (transfer.schema !== 'transfer-package-1') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + + const identity = { + packageId: transfer.identity.packageId, + revisionId: transfer.identity.revisionId, + revisionHash: transfer.identity.revisionHash, + parentRevisionId: transfer.identity.parentRevisionId ?? null, + }; + + const lineageBindingV1 = normalizeEmbeddedLineage(input.lineageBindingV1, identity, transfer.transferHash, true); + const payload: HandoffHashPayload = { + schema: 'handoff-record-1', + transfer: { + schema: 'transfer-package-1', + transferHash: transfer.transferHash, + }, + identity, + bindings: { + closureContractV1: normalizeClosureBinding(input.bindings?.closureContractV1), + applyReportV1Hash: + typeof input.bindings?.applyReportV1Hash === 'string' + ? input.bindings.applyReportV1Hash + : input.bindings?.applyReportV1Hash === null + ? null + : null, + executionRecordV1Hash: + typeof input.bindings?.executionRecordV1Hash === 'string' + ? input.bindings.executionRecordV1Hash + : input.bindings?.executionRecordV1Hash === null + ? null + : null, + }, + trunk: { + intent: { + primary: transfer.trunk.intent.primary ?? null, + successCriteria: [...transfer.trunk.intent.successCriteria], + nonGoals: [...transfer.trunk.intent.nonGoals], + }, + stateDigest: { + facts: [...transfer.trunk.stateDigest.facts], + decisions: [...transfer.trunk.stateDigest.decisions], + constraints: [...transfer.trunk.stateDigest.constraints], + risks: [...transfer.trunk.stateDigest.risks], + assumptions: [...transfer.trunk.stateDigest.assumptions], + openLoops: [...transfer.trunk.stateDigest.openLoops], + }, + }, + continuation: { + nextActions: normalizeNextActions(transfer.continuation.nextActions), + validationChecklist: normalizeValidationChecklist(transfer.continuation.validationChecklist), + }, + diagnostics: { + verified: true, + verification: { + transferHashRecomputed: input.verification.transferHashRecomputed, + matchesProvidedHash: input.verification.matchesProvidedHash, + }, + }, + lineageBindingV1: canonicalizeLineageForHash(lineageBindingV1), + }; + + return { payload, lineageBindingV1 }; +} + +function buildCanonicalPayloadFromRecord(handoffRecordV1: unknown): { + payload: HandoffHashPayload; + createdAt: string | null; + handoffHash: string; + lineageBindingV1: LineageBindingV1; +} { + if (!isPlainObject(handoffRecordV1) || handoffRecordV1.schema !== 'handoff-record-1') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + if (!(typeof handoffRecordV1.createdAt === 'string' || handoffRecordV1.createdAt === null)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + if (typeof handoffRecordV1.handoffHash !== 'string') { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + + assertJsonSafe(handoffRecordV1); + + const transfer = normalizeTransfer(handoffRecordV1.transfer); + const identity = normalizeIdentity(handoffRecordV1.identity); + const bindings = normalizeBindings(handoffRecordV1.bindings); + const diagnostics = normalizeDiagnostics(handoffRecordV1.diagnostics); + + const trunkValue = handoffRecordV1.trunk; + if (!isPlainObject(trunkValue) || !isPlainObject(trunkValue.intent) || !isPlainObject(trunkValue.stateDigest)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + const trunk: HandoffRecordV1['trunk'] = { + intent: { + primary: + typeof trunkValue.intent.primary === 'string' + ? trunkValue.intent.primary + : trunkValue.intent.primary === null + ? null + : null, + successCriteria: normalizeStringArray(trunkValue.intent.successCriteria), + nonGoals: normalizeStringArray(trunkValue.intent.nonGoals), + }, + stateDigest: { + facts: normalizeStringArray(trunkValue.stateDigest.facts), + decisions: normalizeStringArray(trunkValue.stateDigest.decisions), + constraints: normalizeStringArray(trunkValue.stateDigest.constraints), + risks: normalizeStringArray(trunkValue.stateDigest.risks), + assumptions: normalizeStringArray(trunkValue.stateDigest.assumptions), + openLoops: normalizeStringArray(trunkValue.stateDigest.openLoops), + }, + }; + + const continuationValue = handoffRecordV1.continuation; + if (!isPlainObject(continuationValue)) { + throw makeHandoffError('E_HANDOFF_INVALID', 'Handoff record input is invalid'); + } + const continuation: HandoffRecordV1['continuation'] = { + nextActions: normalizeNextActions(continuationValue.nextActions), + validationChecklist: normalizeValidationChecklist(continuationValue.validationChecklist), + }; + + const lineageBindingV1 = normalizeEmbeddedLineage(handoffRecordV1.lineageBindingV1, identity, transfer.transferHash, false); + const payload: HandoffHashPayload = { + schema: 'handoff-record-1', + transfer, + identity, + bindings, + trunk, + continuation, + diagnostics, + lineageBindingV1: canonicalizeLineageForHash(lineageBindingV1), + }; + + return { + payload, + createdAt: handoffRecordV1.createdAt, + handoffHash: handoffRecordV1.handoffHash, + lineageBindingV1, + }; +} + +export function buildHandoffRecordV1(input: BuildHandoffRecordV1Input): HandoffRecordV1 { + const canonical = buildCanonicalPayloadFromTransfer(input); + assertJsonSafe(canonical.payload); + + const handoffHash = sha256Hex(stableStringify(canonical.payload)); + const handoffRecord: HandoffRecordV1 = { + ...canonical.payload, + lineageBindingV1: canonical.lineageBindingV1, + createdAt: typeof input.createdAt === 'string' ? input.createdAt : input.createdAt === null ? null : null, + handoffHash, + }; + + assertJsonSafe(handoffRecord); + return handoffRecord; +} + +export function recomputeHandoffRecordV1Hash(handoffRecordV1: HandoffRecordV1): string { + const canonical = buildCanonicalPayloadFromRecord(handoffRecordV1); + return sha256Hex(stableStringify(canonical.payload)); +} + +export function verifyHandoffRecordV1( + handoffRecordV1: HandoffRecordV1 +): { ok: true; recomputedHash: string; matches: boolean } { + const canonical = buildCanonicalPayloadFromRecord(handoffRecordV1); + const recomputedHash = sha256Hex(stableStringify(canonical.payload)); + return { + ok: true, + recomputedHash, + matches: recomputedHash === canonical.handoffHash, + }; +} + +export function verifyHandoffRecordV1OrThrow(handoffRecordV1: HandoffRecordV1): void { + const verification = verifyHandoffRecordV1(handoffRecordV1); + if (!verification.matches) { + throw makeHandoffError('E_HANDOFF_HASH_MISMATCH', 'Handoff record hash mismatch'); + } +} diff --git a/server/src/services/lineage-binding-v1.ts b/server/src/services/lineage-binding-v1.ts new file mode 100644 index 0000000..8130098 --- /dev/null +++ b/server/src/services/lineage-binding-v1.ts @@ -0,0 +1,397 @@ +import { createHash } from 'crypto'; + +export type RevisionIdentityV1 = { + packageId: string; + revisionId: string; + revisionHash: string; + parentRevisionId: string | null; +}; + +export type LineageTransferBindingV1 = { + schema: 'transfer-package-1'; + transferHash: string; +} | null; + +export type LineageClosureBindingV1 = { + schema: 'closure-contract-1'; + proposedHash: string; + acceptedHash: string; +} | null; + +export type LineageExecutionBindingV1 = { + schema: 'execution-record-1'; + reportHash: string | null; + deltaHash: string | null; +} | null; + +export type LineageHandoffBindingV1 = { + schema: 'handoff-record-1'; + handoffHash: string; +} | null; + +export type LineageBindingV1 = { + schema: 'lineage-binding-1'; + identity: RevisionIdentityV1; + bindings: { + transfer: LineageTransferBindingV1; + closure: LineageClosureBindingV1; + execution: LineageExecutionBindingV1; + handoff: LineageHandoffBindingV1; + }; + diagnostics: { + missing: string[]; + notes: string[]; + }; + createdAt: string | null; + lineageHash: string; +}; + +export type BuildLineageBindingV1Input = { + identity: RevisionIdentityV1; + bindings?: { + transfer?: LineageTransferBindingV1; + closure?: LineageClosureBindingV1; + execution?: LineageExecutionBindingV1; + handoff?: LineageHandoffBindingV1; + }; + diagnostics?: { + notes?: string[]; + }; + createdAt?: string | null; +}; + +const MISSING_KEYS = ['transfer', 'closure', 'execution', 'handoff'] as const; + +type MissingKey = (typeof MISSING_KEYS)[number]; +type CanonicalBindings = LineageBindingV1['bindings']; +type CanonicalPayload = Omit; + +function makeLineageError( + code: 'E_LINEAGE_INVALID' | 'E_LINEAGE_NON_JSON_SAFE' | 'E_LINEAGE_HASH_MISMATCH', + message: string +): Error & { code: string } { + const error = new Error(message) as Error & { code: string }; + error.code = code; + return error; +} + +export function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function isLowerHex64(value: unknown): value is string { + return typeof value === 'string' && /^[0-9a-f]{64}$/.test(value); +} + +function isMissingKey(value: unknown): value is MissingKey { + return value === 'transfer' || value === 'closure' || value === 'execution' || value === 'handoff'; +} + +export function assertJsonSafe(value: unknown): void { + if (value === null) return; + + const valueType = typeof value; + if (valueType === 'string' || valueType === 'boolean') return; + if (valueType === 'number') { + if (!Number.isFinite(value)) { + throw makeLineageError('E_LINEAGE_NON_JSON_SAFE', 'Lineage binding contains non JSON-safe value'); + } + return; + } + if (valueType === 'undefined' || valueType === 'function' || valueType === 'symbol' || valueType === 'bigint') { + throw makeLineageError('E_LINEAGE_NON_JSON_SAFE', 'Lineage binding contains non JSON-safe value'); + } + + if (Array.isArray(value)) { + for (const entry of value) { + assertJsonSafe(entry); + } + return; + } + + if (!isPlainObject(value)) { + throw makeLineageError('E_LINEAGE_NON_JSON_SAFE', 'Lineage binding contains non JSON-safe value'); + } + + const keys = Object.keys(value).sort(compareStrings); + for (const key of keys) { + assertJsonSafe(value[key]); + } +} + +export function stableStringify(value: unknown): string { + assertJsonSafe(value); + + if (value === null || typeof value === 'string' || typeof value === 'boolean' || typeof value === 'number') { + return JSON.stringify(value); + } + + if (Array.isArray(value)) { + return `[${value.map((entry) => stableStringify(entry)).join(',')}]`; + } + + const record = value as Record; + const keys = Object.keys(record).sort(compareStrings); + return `{${keys.map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(',')}}`; +} + +export function sha256Hex(text: string): string { + return createHash('sha256').update(text, 'utf8').digest('hex'); +} + +function normalizeIdentity(value: unknown): RevisionIdentityV1 { + if (!isPlainObject(value)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + if ( + typeof value.packageId !== 'string' || + typeof value.revisionId !== 'string' || + typeof value.revisionHash !== 'string' || + !(typeof value.parentRevisionId === 'string' || value.parentRevisionId === null) + ) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + + return { + packageId: value.packageId, + revisionId: value.revisionId, + revisionHash: value.revisionHash, + parentRevisionId: value.parentRevisionId, + }; +} + +function normalizeTransferBinding(value: unknown): LineageTransferBindingV1 { + if (value === null || value === undefined) return null; + if (!isPlainObject(value)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + if (value.schema !== 'transfer-package-1' || !isLowerHex64(value.transferHash)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + return { + schema: 'transfer-package-1', + transferHash: value.transferHash, + }; +} + +function normalizeClosureBinding(value: unknown): LineageClosureBindingV1 { + if (value === null || value === undefined) return null; + if (!isPlainObject(value)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + if (value.schema !== 'closure-contract-1' || !isLowerHex64(value.proposedHash) || !isLowerHex64(value.acceptedHash)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + return { + schema: 'closure-contract-1', + proposedHash: value.proposedHash, + acceptedHash: value.acceptedHash, + }; +} + +function normalizeExecutionHash(value: unknown): string | null { + if (value === null || value === undefined) return null; + if (!isLowerHex64(value)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + return value; +} + +function normalizeExecutionBinding(value: unknown): LineageExecutionBindingV1 { + if (value === null || value === undefined) return null; + if (!isPlainObject(value)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + if (value.schema !== 'execution-record-1') { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + return { + schema: 'execution-record-1', + reportHash: normalizeExecutionHash(value.reportHash), + deltaHash: normalizeExecutionHash(value.deltaHash), + }; +} + +function normalizeHandoffBinding(value: unknown): LineageHandoffBindingV1 { + if (value === null || value === undefined) return null; + if (!isPlainObject(value)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + if (value.schema !== 'handoff-record-1' || !isLowerHex64(value.handoffHash)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + return { + schema: 'handoff-record-1', + handoffHash: value.handoffHash, + }; +} + +function normalizeBindings(value: unknown, allowMissingObject: boolean): CanonicalBindings { + if (value === undefined && allowMissingObject) { + return { + transfer: null, + closure: null, + execution: null, + handoff: null, + }; + } + if (!isPlainObject(value)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + return { + transfer: normalizeTransferBinding(value.transfer), + closure: normalizeClosureBinding(value.closure), + execution: normalizeExecutionBinding(value.execution), + handoff: normalizeHandoffBinding(value.handoff), + }; +} + +function normalizeNotes(value: unknown): string[] { + if (value === undefined) return []; + if (!Array.isArray(value)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + assertJsonSafe(value); + const normalized: string[] = []; + for (const entry of value) { + if (typeof entry !== 'string') { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + normalized.push(entry); + } + return normalized.sort(compareStrings); +} + +function validateProvidedMissing(value: unknown): void { + if (!Array.isArray(value)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + assertJsonSafe(value); + for (const entry of value) { + if (!isMissingKey(entry)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + } +} + +function computeMissing(bindings: CanonicalBindings): string[] { + const missing: string[] = []; + for (const key of MISSING_KEYS) { + if (bindings[key] === null) { + missing.push(key); + } + } + return missing; +} + +function buildCanonicalPayload(args: { + identity: unknown; + bindings: unknown; + notes: unknown; + allowMissingBindingsObject: boolean; +}): CanonicalPayload { + const identity = normalizeIdentity(args.identity); + const bindings = normalizeBindings(args.bindings, args.allowMissingBindingsObject); + const notes = normalizeNotes(args.notes); + + return { + schema: 'lineage-binding-1', + identity, + bindings, + diagnostics: { + missing: computeMissing(bindings), + notes, + }, + }; +} + +function getVerificationInput(binding: unknown): { + lineageHash: string; + createdAt: string | null; + canonicalPayload: CanonicalPayload; +} { + if (!isPlainObject(binding) || binding.schema !== 'lineage-binding-1') { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + if (!isLowerHex64(binding.lineageHash)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + if (!(typeof binding.createdAt === 'string' || binding.createdAt === null)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + if (!isPlainObject(binding.diagnostics)) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + + validateProvidedMissing(binding.diagnostics.missing); + + const canonicalPayload = buildCanonicalPayload({ + identity: binding.identity, + bindings: binding.bindings, + notes: binding.diagnostics.notes, + allowMissingBindingsObject: false, + }); + + return { + lineageHash: binding.lineageHash, + createdAt: binding.createdAt, + canonicalPayload, + }; +} + +export function buildLineageBindingV1(input: BuildLineageBindingV1Input): LineageBindingV1 { + if (!input || !isPlainObject(input) || input.identity === undefined) { + throw makeLineageError('E_LINEAGE_INVALID', 'Lineage binding input is invalid'); + } + + const canonicalPayload = buildCanonicalPayload({ + identity: input.identity, + bindings: input.bindings, + notes: input.diagnostics?.notes, + allowMissingBindingsObject: true, + }); + + const createdAt = typeof input.createdAt === 'string' ? input.createdAt : input.createdAt === null ? null : null; + const lineageHash = sha256Hex(stableStringify(canonicalPayload)); + + const binding: LineageBindingV1 = { + ...canonicalPayload, + createdAt, + lineageHash, + }; + + assertJsonSafe(binding); + return binding; +} + +export function recomputeLineageBindingV1Hash(binding: LineageBindingV1): string { + assertJsonSafe(binding); + const verificationInput = getVerificationInput(binding); + return sha256Hex(stableStringify(verificationInput.canonicalPayload)); +} + +export function verifyLineageBindingV1(binding: LineageBindingV1): { ok: true; recomputedHash: string; matches: boolean } { + assertJsonSafe(binding); + const verificationInput = getVerificationInput(binding); + const recomputedHash = sha256Hex(stableStringify(verificationInput.canonicalPayload)); + return { + ok: true, + recomputedHash, + matches: recomputedHash === verificationInput.lineageHash, + }; +} + +export function verifyLineageBindingV1OrThrow(binding: LineageBindingV1): void { + const verification = verifyLineageBindingV1(binding); + if (!verification.matches) { + throw makeLineageError('E_LINEAGE_HASH_MISMATCH', 'Lineage binding hash mismatch'); + } +} diff --git a/server/src/services/migration.service.ts b/server/src/services/migration.service.ts new file mode 100644 index 0000000..daff6aa --- /dev/null +++ b/server/src/services/migration.service.ts @@ -0,0 +1,621 @@ +import { mkdirSync } from 'fs'; +import path from 'path'; +import type { PrismaClient } from '@prisma/client'; +import { computeBundleHash } from '../lib/artifact-hash'; +import { computeRevisionHash, type RevisionArtifactReference, type RevisionMetadata } from '../lib/revision-hash'; +import { + REVISION_CARRIER_SCHEMA, + buildManifest, + buildMetadata, + normalizeArtifactBundle, + parseArtifactsJsonl, + readMigrationPackageZip, + stringifyArtifactsJsonl, + writeMigrationPackageZip, + type ArtifactBundleLike, + type ArtifactReference, + type MigrationManifestV1, + type MigrationMetadataV1, +} from '../lib/migration-package'; + +const HASH_PATTERN = /^[0-9a-f]{64}$/; +const ERR_MIGRATION_INVALID_INPUT = 'ERR_MIGRATION_INVALID_INPUT'; +const ERR_MIGRATION_CLOSURE_INCOMPLETE = 'ERR_MIGRATION_CLOSURE_INCOMPLETE'; +const ERR_MIGRATION_VERIFY_MISMATCH = 'ERR_MIGRATION_VERIFY_MISMATCH'; +const ERR_MIGRATION_IDENTITY_MISMATCH = 'ERR_MIGRATION_IDENTITY_MISMATCH'; + +const MESSAGE_MIGRATION_INVALID_INPUT = 'Migration package input is invalid'; +const MESSAGE_MIGRATION_CLOSURE_INCOMPLETE = 'Migration closure is incomplete'; +const MESSAGE_MIGRATION_VERIFY_MISMATCH = 'Migration package verification failed'; +const MESSAGE_MIGRATION_IDENTITY_MISMATCH = 'Migration package identity mismatch'; + +export type MigrationRevisionRecord = { + revisionHash: string; + packageId: string; + parentRevisionHash: string | null; + artifacts: RevisionArtifactReference[]; + metadata: RevisionMetadata; +}; + +export type ClosureResult = { + rootRevisionHash: string; + revisions: MigrationRevisionRecord[]; + artifacts: Array<{ + bundleHash: string; + bundle: ArtifactBundleLike; + }>; +}; + +export type VerifyResult = { + ok: true; + rootRevisionHash: string; + artifactCount: number; + revisionCount: number; + matches: true; +}; + +export type ImportResult = { + ok: true; + rootRevisionHash: string; + artifactCount: number; + revisionCount: number; +}; + +type StoredRevisionNode = { + revisionHash: string; + packageId: string; + parentRevisionHash: string | null; + artifacts: RevisionArtifactReference[]; + metadata: RevisionMetadata; +}; + +type MigrationStorageAdapter = { + findRevisionByHash(revisionHash: string): Promise; + findArtifactByPackageAndHash(packageId: string, bundleHash: string): Promise; + findArtifactByHash(bundleHash: string): Promise; + storeArtifactBundle(bundle: ArtifactBundleLike): Promise; + createRevision(revision: MigrationRevisionRecord): Promise<{ revisionHash: string }>; +}; + +export class MigrationServiceError extends Error { + code: string; + + constructor(code: string, message: string) { + super(message); + this.code = code; + this.name = 'MigrationServiceError'; + } +} + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function normalizeRequiredString(value: unknown): string { + if (typeof value !== 'string' || value.length === 0) { + throw new MigrationServiceError(ERR_MIGRATION_INVALID_INPUT, MESSAGE_MIGRATION_INVALID_INPUT); + } + return value; +} + +function normalizeHash(value: unknown): string { + if (typeof value !== 'string') { + throw new MigrationServiceError(ERR_MIGRATION_INVALID_INPUT, MESSAGE_MIGRATION_INVALID_INPUT); + } + const normalized = value.toLowerCase(); + if (!HASH_PATTERN.test(normalized)) { + throw new MigrationServiceError(ERR_MIGRATION_INVALID_INPUT, MESSAGE_MIGRATION_INVALID_INPUT); + } + return normalized; +} + +function normalizeMetadata(value: unknown): RevisionMetadata { + if (!value || typeof value !== 'object') { + throw new MigrationServiceError(ERR_MIGRATION_INVALID_INPUT, MESSAGE_MIGRATION_INVALID_INPUT); + } + + const metadata = value as { + author?: unknown; + message?: unknown; + createdBy?: unknown; + timestamp?: unknown; + source?: unknown; + tags?: unknown; + }; + + if ( + metadata.source !== 'human' && + metadata.source !== 'ai' && + metadata.source !== 'migration' && + metadata.source !== 'system' + ) { + throw new MigrationServiceError(ERR_MIGRATION_INVALID_INPUT, MESSAGE_MIGRATION_INVALID_INPUT); + } + + const tags = Array.isArray(metadata.tags) + ? metadata.tags.map((tag) => normalizeRequiredString(tag)).sort(compareStrings) + : []; + + const timestamp = normalizeRequiredString(metadata.timestamp); + const parsed = new Date(timestamp); + if (Number.isNaN(parsed.getTime())) { + throw new MigrationServiceError(ERR_MIGRATION_INVALID_INPUT, MESSAGE_MIGRATION_INVALID_INPUT); + } + + return { + author: normalizeRequiredString(metadata.author), + message: normalizeRequiredString(metadata.message), + createdBy: normalizeRequiredString(metadata.createdBy), + timestamp: parsed.toISOString(), + source: metadata.source, + tags, + }; +} + +function normalizeRevisionArtifacts(value: unknown): RevisionArtifactReference[] { + if (!Array.isArray(value)) { + throw new MigrationServiceError(ERR_MIGRATION_INVALID_INPUT, MESSAGE_MIGRATION_INVALID_INPUT); + } + const out: RevisionArtifactReference[] = []; + for (const item of value) { + if (!item || typeof item !== 'object') { + throw new MigrationServiceError(ERR_MIGRATION_INVALID_INPUT, MESSAGE_MIGRATION_INVALID_INPUT); + } + const entry = item as { bundleHash?: unknown; role?: unknown }; + out.push({ + bundleHash: normalizeHash(entry.bundleHash), + role: normalizeRequiredString(entry.role), + }); + } + out.sort((a, b) => compareStrings(a.bundleHash, b.bundleHash) || compareStrings(a.role, b.role)); + return out; +} + +function normalizeReferences(references: ArtifactReference[] | null | undefined): ArtifactReference[] { + const list = Array.isArray(references) ? references.map((ref) => ({ bundleHash: normalizeHash(ref.bundleHash), role: normalizeRequiredString(ref.role) })) : []; + list.sort((a, b) => compareStrings(a.bundleHash, b.bundleHash) || compareStrings(a.role, b.role)); + return list; +} + +function computeBundleHashForBundle(bundle: ArtifactBundleLike): string { + const normalized = normalizeArtifactBundle(bundle); + return computeBundleHash({ + schema: normalized.schema, + packageId: normalized.identity.packageId, + revisionId: normalized.identity.revisionId ?? null, + revisionHash: normalized.identity.revisionHash ?? null, + payload: normalized, + }); +} + +function createRevisionCarrierBundle(revision: MigrationRevisionRecord): ArtifactBundleLike { + return normalizeArtifactBundle({ + schema: REVISION_CARRIER_SCHEMA, + identity: { + packageId: revision.packageId, + revisionId: null, + revisionHash: null, + }, + payload: { + revisionHash: revision.revisionHash, + packageId: revision.packageId, + parentRevisionHash: revision.parentRevisionHash, + artifacts: revision.artifacts.map((artifact) => ({ bundleHash: artifact.bundleHash, role: artifact.role })), + metadata: { + author: revision.metadata.author, + message: revision.metadata.message, + createdBy: revision.metadata.createdBy, + timestamp: revision.metadata.timestamp, + source: revision.metadata.source, + tags: [...(revision.metadata.tags ?? [])], + }, + }, + references: [], + }); +} + +function parseRevisionCarrier(bundle: ArtifactBundleLike): MigrationRevisionRecord { + if (bundle.schema !== REVISION_CARRIER_SCHEMA || !bundle.payload || typeof bundle.payload !== 'object') { + throw new MigrationServiceError(ERR_MIGRATION_INVALID_INPUT, MESSAGE_MIGRATION_INVALID_INPUT); + } + + const payload = bundle.payload as { + revisionHash?: unknown; + packageId?: unknown; + parentRevisionHash?: unknown; + artifacts?: unknown; + metadata?: unknown; + }; + + const parentValue = payload.parentRevisionHash; + const parentRevisionHash = parentValue === null || typeof parentValue === 'undefined' ? null : normalizeHash(parentValue); + + return { + revisionHash: normalizeHash(payload.revisionHash), + packageId: normalizeRequiredString(payload.packageId), + parentRevisionHash, + artifacts: normalizeRevisionArtifacts(payload.artifacts), + metadata: normalizeMetadata(payload.metadata), + }; +} + +function closeRevisionCarrierIdentity(bundle: ArtifactBundleLike, revision: MigrationRevisionRecord): MigrationRevisionRecord { + if (bundle.identity.packageId !== revision.packageId) { + throw new MigrationServiceError(ERR_MIGRATION_IDENTITY_MISMATCH, MESSAGE_MIGRATION_IDENTITY_MISMATCH); + } + + return { + revisionHash: revision.revisionHash, + packageId: bundle.identity.packageId, + parentRevisionHash: revision.parentRevisionHash, + artifacts: revision.artifacts.map((artifact) => ({ bundleHash: artifact.bundleHash, role: artifact.role })), + metadata: revision.metadata, + }; +} + +function validateRevisionRecord(revision: MigrationRevisionRecord): void { + const recomputed = computeRevisionHash({ + packageId: revision.packageId, + parentRevisionHash: revision.parentRevisionHash, + artifacts: revision.artifacts, + metadata: revision.metadata, + }); + + if (recomputed !== revision.revisionHash) { + throw new MigrationServiceError(ERR_MIGRATION_VERIFY_MISMATCH, MESSAGE_MIGRATION_VERIFY_MISMATCH); + } +} + +function createDefaultMigrationStorageAdapter(): MigrationStorageAdapter { + const { prisma } = require('../utils') as { prisma: PrismaClient }; + const { ArtifactStoreService } = require('./artifact-store.service') as { ArtifactStoreService: new () => { storeArtifactBundle(input: { schema: string; identity: { packageId: string; revisionId?: string | null; revisionHash?: string | null }; payload: unknown; bundleHash?: string; createdAt?: string | null }): Promise } }; + const { RevisionService } = require('./revision.service') as { RevisionService: new () => { createRevision(input: { packageId: string; parentRevisionHash?: string | null; artifacts: RevisionArtifactReference[]; metadata: { author: string; message: string; createdBy: string; timestamp: string; source: 'human' | 'ai' | 'migration' | 'system'; tags?: string[] } }): Promise<{ revisionHash: string }> } }; + const artifactStoreService = new ArtifactStoreService(); + const revisionService = new RevisionService(); + + return { + async findRevisionByHash(revisionHash: string): Promise { + const row = await prisma.revisionNode.findUnique({ + where: { revisionHash }, + include: { artifacts: true }, + }); + if (!row) { + return null; + } + return { + revisionHash: row.revisionHash, + packageId: row.packageId, + parentRevisionHash: row.parentRevisionHash, + artifacts: row.artifacts + .map((artifact) => ({ bundleHash: artifact.bundleHash, role: artifact.role })) + .sort((a, b) => compareStrings(a.bundleHash, b.bundleHash) || compareStrings(a.role, b.role)), + metadata: normalizeMetadata(row.metadata), + }; + }, + + async findArtifactByPackageAndHash(packageId: string, bundleHash: string): Promise { + const row = await prisma.artifactStoreRecord.findUnique({ + where: { + packageId_bundleHash: { + packageId, + bundleHash, + }, + }, + select: { payload: true }, + }); + return row ? normalizeArtifactBundle(row.payload as ArtifactBundleLike) : null; + }, + + async findArtifactByHash(bundleHash: string): Promise { + const row = await prisma.artifactStoreRecord.findFirst({ + where: { bundleHash }, + orderBy: { packageId: 'asc' }, + select: { payload: true }, + }); + return row ? normalizeArtifactBundle(row.payload as ArtifactBundleLike) : null; + }, + + async storeArtifactBundle(bundle: ArtifactBundleLike): Promise { + const normalized = normalizeArtifactBundle(bundle); + await artifactStoreService.storeArtifactBundle({ + schema: normalized.schema, + identity: normalized.identity, + payload: normalized, + }); + }, + + async createRevision(revision: MigrationRevisionRecord): Promise<{ revisionHash: string }> { + const created = await revisionService.createRevision({ + packageId: revision.packageId, + parentRevisionHash: revision.parentRevisionHash, + artifacts: revision.artifacts, + metadata: { + author: revision.metadata.author, + message: revision.metadata.message, + createdBy: revision.metadata.createdBy, + timestamp: revision.metadata.timestamp, + source: revision.metadata.source, + ...(Array.isArray(revision.metadata.tags) ? { tags: [...revision.metadata.tags] } : {}), + }, + }); + return { revisionHash: created.revisionHash }; + }, + }; +} + +function sortArtifactsForExport(artifacts: Array<{ bundleHash: string; bundle: ArtifactBundleLike }>): Array<{ bundleHash: string; bundle: ArtifactBundleLike }> { + return [...artifacts].sort((a, b) => compareStrings(a.bundleHash, b.bundleHash)); +} + +function sortRevisionsAncestorFirst(rootRevisionHash: string, revisionsByHash: Map): MigrationRevisionRecord[] { + const chain: MigrationRevisionRecord[] = []; + let currentHash: string | null = rootRevisionHash; + + while (currentHash) { + const current = revisionsByHash.get(currentHash); + if (!current) { + throw new MigrationServiceError(ERR_MIGRATION_CLOSURE_INCOMPLETE, MESSAGE_MIGRATION_CLOSURE_INCOMPLETE); + } + chain.push(current); + currentHash = current.parentRevisionHash; + } + + chain.reverse(); + return chain; +} + +function validateParsedPackage(parsed: { + manifest: MigrationManifestV1; + artifacts: ArtifactBundleLike[]; + metadata: MigrationMetadataV1; +}): { + manifest: MigrationManifestV1; + metadata: MigrationMetadataV1; + allArtifacts: Array<{ bundleHash: string; bundle: ArtifactBundleLike }>; + regularArtifacts: Array<{ bundleHash: string; bundle: ArtifactBundleLike }>; + revisionArtifacts: Array<{ bundleHash: string; bundle: ArtifactBundleLike }>; + revisionsAncestorFirst: MigrationRevisionRecord[]; +} { + const allArtifacts: Array<{ bundleHash: string; bundle: ArtifactBundleLike }> = []; + const regularArtifacts: Array<{ bundleHash: string; bundle: ArtifactBundleLike }> = []; + const revisionArtifacts: Array<{ bundleHash: string; bundle: ArtifactBundleLike }> = []; + const bundleHashes = new Set(); + const revisionsByHash = new Map(); + + for (const bundle of parsed.artifacts) { + const normalized = normalizeArtifactBundle(bundle); + const bundleHash = computeBundleHashForBundle(normalized); + if (bundleHashes.has(bundleHash)) { + throw new MigrationServiceError(ERR_MIGRATION_VERIFY_MISMATCH, MESSAGE_MIGRATION_VERIFY_MISMATCH); + } + bundleHashes.add(bundleHash); + allArtifacts.push({ bundleHash, bundle: normalized }); + + if (normalized.schema === REVISION_CARRIER_SCHEMA) { + const revision = closeRevisionCarrierIdentity(normalized, parseRevisionCarrier(normalized)); + validateRevisionRecord(revision); + revisionsByHash.set(revision.revisionHash, revision); + revisionArtifacts.push({ bundleHash, bundle: normalized }); + } else { + regularArtifacts.push({ bundleHash, bundle: normalized }); + } + } + + for (const artifact of allArtifacts) { + for (const reference of normalizeReferences(artifact.bundle.references)) { + if (!bundleHashes.has(reference.bundleHash)) { + throw new MigrationServiceError(ERR_MIGRATION_CLOSURE_INCOMPLETE, MESSAGE_MIGRATION_CLOSURE_INCOMPLETE); + } + } + } + + const rootRevisionHash = normalizeHash(parsed.manifest.rootRevisionHash); + const revisionsAncestorFirst = sortRevisionsAncestorFirst(rootRevisionHash, revisionsByHash); + + if (revisionsAncestorFirst.length !== revisionsByHash.size) { + throw new MigrationServiceError(ERR_MIGRATION_CLOSURE_INCOMPLETE, MESSAGE_MIGRATION_CLOSURE_INCOMPLETE); + } + + for (const revision of revisionsAncestorFirst) { + for (const artifact of revision.artifacts) { + if (!bundleHashes.has(artifact.bundleHash)) { + throw new MigrationServiceError(ERR_MIGRATION_CLOSURE_INCOMPLETE, MESSAGE_MIGRATION_CLOSURE_INCOMPLETE); + } + } + } + + if (parsed.manifest.artifactCount !== allArtifacts.length || parsed.manifest.revisionCount !== revisionsByHash.size) { + throw new MigrationServiceError(ERR_MIGRATION_VERIFY_MISMATCH, MESSAGE_MIGRATION_VERIFY_MISMATCH); + } + + return { + manifest: parsed.manifest, + metadata: parsed.metadata, + allArtifacts: sortArtifactsForExport(allArtifacts), + regularArtifacts: sortArtifactsForExport(regularArtifacts), + revisionArtifacts: sortArtifactsForExport(revisionArtifacts), + revisionsAncestorFirst, + }; +} + +export class MigrationService { + private readonly storage: MigrationStorageAdapter; + + constructor(storage?: MigrationStorageAdapter) { + this.storage = storage ?? createDefaultMigrationStorageAdapter(); + } + + async computeClosure(rootRevisionHash: string): Promise { + const normalizedRoot = normalizeHash(rootRevisionHash); + const revisions: MigrationRevisionRecord[] = []; + const visitedRevisions = new Set(); + const artifactsByHash = new Map(); + const pendingReferences: ArtifactReference[] = []; + + let current = await this.storage.findRevisionByHash(normalizedRoot); + if (!current) { + throw new MigrationServiceError(ERR_MIGRATION_CLOSURE_INCOMPLETE, MESSAGE_MIGRATION_CLOSURE_INCOMPLETE); + } + + while (current) { + if (visitedRevisions.has(current.revisionHash)) { + break; + } + + visitedRevisions.add(current.revisionHash); + const revision: MigrationRevisionRecord = { + revisionHash: current.revisionHash, + packageId: current.packageId, + parentRevisionHash: current.parentRevisionHash, + artifacts: current.artifacts.map((artifact) => ({ bundleHash: artifact.bundleHash, role: artifact.role })), + metadata: current.metadata, + }; + revisions.push(revision); + + for (const artifactRef of revision.artifacts) { + const artifact = await this.storage.findArtifactByPackageAndHash(revision.packageId, artifactRef.bundleHash); + if (!artifact) { + throw new MigrationServiceError(ERR_MIGRATION_CLOSURE_INCOMPLETE, MESSAGE_MIGRATION_CLOSURE_INCOMPLETE); + } + const bundleHash = computeBundleHashForBundle(artifact); + if (!artifactsByHash.has(bundleHash)) { + artifactsByHash.set(bundleHash, artifact); + for (const reference of normalizeReferences(artifact.references)) { + pendingReferences.push(reference); + } + } + } + + if (!revision.parentRevisionHash) { + break; + } + + current = await this.storage.findRevisionByHash(revision.parentRevisionHash); + if (!current) { + throw new MigrationServiceError(ERR_MIGRATION_CLOSURE_INCOMPLETE, MESSAGE_MIGRATION_CLOSURE_INCOMPLETE); + } + } + + while (pendingReferences.length > 0) { + const reference = pendingReferences.shift() as ArtifactReference; + if (artifactsByHash.has(reference.bundleHash)) { + continue; + } + const artifact = await this.storage.findArtifactByHash(reference.bundleHash); + if (!artifact) { + throw new MigrationServiceError(ERR_MIGRATION_CLOSURE_INCOMPLETE, MESSAGE_MIGRATION_CLOSURE_INCOMPLETE); + } + const bundleHash = computeBundleHashForBundle(artifact); + if (bundleHash !== reference.bundleHash) { + throw new MigrationServiceError(ERR_MIGRATION_VERIFY_MISMATCH, MESSAGE_MIGRATION_VERIFY_MISMATCH); + } + artifactsByHash.set(bundleHash, artifact); + for (const nested of normalizeReferences(artifact.references)) { + pendingReferences.push(nested); + } + } + + const revisionArtifacts = [...revisions] + .reverse() + .map((revision) => { + const bundle = createRevisionCarrierBundle(revision); + return { + bundleHash: computeBundleHashForBundle(bundle), + bundle, + }; + }); + + const artifactEntries = sortArtifactsForExport([ + ...[...artifactsByHash.entries()].map(([bundleHash, bundle]) => ({ bundleHash, bundle })), + ...revisionArtifacts, + ]); + + return { + rootRevisionHash: normalizedRoot, + revisions: [...revisions].reverse(), + artifacts: artifactEntries, + }; + } + + async exportMigrationPackage(rootRevisionHash: string, outPath: string): Promise { + const closure = await this.computeClosure(rootRevisionHash); + const rootRevision = closure.revisions[closure.revisions.length - 1]; + const manifest = buildManifest({ + rootRevisionHash: closure.rootRevisionHash, + artifactCount: closure.artifacts.length, + revisionCount: closure.revisions.length, + createdAt: rootRevision.metadata.timestamp, + }); + const metadata = buildMetadata({ + notes: null, + source: 'system', + }); + + mkdirSync(path.dirname(outPath), { recursive: true }); + return writeMigrationPackageZip({ + outPath, + manifest, + artifactsJsonl: stringifyArtifactsJsonl(closure.artifacts.map((artifact) => artifact.bundle)), + metadata, + }); + } + + async verifyMigrationPackage(zipPath: string): Promise { + const read = readMigrationPackageZip(zipPath); + const validated = validateParsedPackage({ + manifest: read.manifest, + metadata: read.metadata, + artifacts: parseArtifactsJsonl(read.artifactsJsonl), + }); + + return { + ok: true, + rootRevisionHash: validated.manifest.rootRevisionHash, + artifactCount: validated.manifest.artifactCount, + revisionCount: validated.manifest.revisionCount, + matches: true, + }; + } + + async importMigrationPackage(zipPath: string): Promise { + const read = readMigrationPackageZip(zipPath); + const validated = validateParsedPackage({ + manifest: read.manifest, + metadata: read.metadata, + artifacts: parseArtifactsJsonl(read.artifactsJsonl), + }); + + for (const artifact of validated.regularArtifacts) { + await this.storage.storeArtifactBundle(artifact.bundle); + } + + for (const artifact of validated.revisionArtifacts) { + await this.storage.storeArtifactBundle(artifact.bundle); + } + + for (const revision of validated.revisionsAncestorFirst) { + const created = await this.storage.createRevision(revision); + if (created.revisionHash !== revision.revisionHash) { + throw new MigrationServiceError(ERR_MIGRATION_VERIFY_MISMATCH, MESSAGE_MIGRATION_VERIFY_MISMATCH); + } + } + + return { + ok: true, + rootRevisionHash: validated.manifest.rootRevisionHash, + artifactCount: validated.manifest.artifactCount, + revisionCount: validated.manifest.revisionCount, + }; + } +} + +export const MIGRATION_ERROR_CODES = { + ERR_MIGRATION_INVALID_INPUT, + ERR_MIGRATION_CLOSURE_INCOMPLETE, + ERR_MIGRATION_VERIFY_MISMATCH, + ERR_MIGRATION_IDENTITY_MISMATCH, +} as const; + + diff --git a/server/src/services/revision.service.ts b/server/src/services/revision.service.ts new file mode 100644 index 0000000..1d421d0 --- /dev/null +++ b/server/src/services/revision.service.ts @@ -0,0 +1,441 @@ +import type { PrismaClient } from '@prisma/client'; +import { + computeRevisionHash, + type RevisionArtifactReference, + type RevisionMetadata, +} from '../lib/revision-hash'; + +const REVISION_HASH_PATTERN = /^[0-9a-f]{64}$/; +const BUNDLE_HASH_PATTERN = /^[0-9a-f]{64}$/; + +const ERR_REVISION_INVALID_INPUT = 'ERR_REVISION_INVALID_INPUT'; +const ERR_REVISION_PARENT_NOT_FOUND = 'ERR_REVISION_PARENT_NOT_FOUND'; +const ERR_ARTIFACT_NOT_FOUND = 'ERR_ARTIFACT_NOT_FOUND'; +const ERR_REVISION_PARENT_PACKAGE_MISMATCH = 'ERR_REVISION_PARENT_PACKAGE_MISMATCH'; + +const MESSAGE_REVISION_INVALID_INPUT = 'Revision input is invalid'; +const MESSAGE_REVISION_PARENT_NOT_FOUND = 'Revision parent not found'; +const MESSAGE_ARTIFACT_NOT_FOUND = 'Artifact not found'; +const MESSAGE_REVISION_PARENT_PACKAGE_MISMATCH = 'Revision parent package mismatch'; + +export type RevisionSource = 'human' | 'ai' | 'migration' | 'system'; + +export type CreateRevisionInput = { + packageId: string; + parentRevisionHash?: string | null; + artifacts: RevisionArtifactReference[]; + metadata: { + author: string; + message: string; + createdBy: string; + timestamp: string; + source: RevisionSource; + tags?: string[]; + }; +}; + +export type GetRevisionQuery = { + revisionHash: string; +}; + +export type ListRevisionsQuery = { + packageId: string; + limit?: number; +}; + +export type RevisionRecord = { + revisionHash: string; + packageId: string; + parentRevisionHash: string | null; + author: string; + message: string; + createdBy: string; + timestamp: string; + source: string; + metadata: unknown; + createdAt: string; + artifacts: RevisionArtifactReference[]; +}; + +type StoredRevisionRecord = { + revisionHash: string; + packageId: string; + parentRevisionHash: string | null; + author: string; + message: string; + createdBy: string; + timestamp: Date; + source: string; + metadata: unknown; + createdAt: Date; + artifacts: RevisionArtifactReference[]; +}; + +export type RevisionStorageAdapter = { + findRevisionByHash(revisionHash: string): Promise; + createRevision(input: { + revisionHash: string; + packageId: string; + parentRevisionHash: string | null; + metadata: RevisionMetadata; + artifacts: RevisionArtifactReference[]; + }): Promise; + listRevisions(packageId: string, limit: number): Promise; + artifactExists(packageId: string, bundleHash: string): Promise; +}; + +export class RevisionServiceError extends Error { + code: string; + + constructor(code: string, message: string) { + super(message); + this.code = code; + this.name = 'RevisionServiceError'; + } +} + +function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function normalizeRequiredString(value: unknown): string { + if (typeof value !== 'string' || value.length === 0) { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + return value; +} + +function normalizeNullableHash(value: unknown): string | null { + if (value === null || typeof value === 'undefined') { + return null; + } + + if (typeof value !== 'string') { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + + const normalized = value.toLowerCase(); + if (!REVISION_HASH_PATTERN.test(normalized)) { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + return normalized; +} + +function normalizeBundleHash(value: unknown): string { + if (typeof value !== 'string') { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + const normalized = value.toLowerCase(); + if (!BUNDLE_HASH_PATTERN.test(normalized)) { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + return normalized; +} + +function normalizeTimestamp(value: unknown): string { + if (typeof value !== 'string' || value.length === 0) { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + + const parsed = new Date(value); + if (Number.isNaN(parsed.getTime())) { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + + return parsed.toISOString(); +} + +function normalizeArtifacts(artifacts: unknown): RevisionArtifactReference[] { + if (!Array.isArray(artifacts) || artifacts.length === 0) { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + + const out: RevisionArtifactReference[] = []; + for (const item of artifacts) { + if (!item || typeof item !== 'object') { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + + const entry = item as { bundleHash?: unknown; role?: unknown }; + out.push({ + bundleHash: normalizeBundleHash(entry.bundleHash), + role: normalizeRequiredString(entry.role), + }); + } + + out.sort((a, b) => { + const bundleHashOrder = compareStrings(a.bundleHash, b.bundleHash); + if (bundleHashOrder !== 0) { + return bundleHashOrder; + } + return compareStrings(a.role, b.role); + }); + + return out; +} + +function normalizeMetadata(metadata: unknown): RevisionMetadata { + if (!metadata || typeof metadata !== 'object') { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + + const value = metadata as { + author?: unknown; + message?: unknown; + createdBy?: unknown; + timestamp?: unknown; + source?: unknown; + tags?: unknown; + }; + + if ( + value.source !== 'human' && + value.source !== 'ai' && + value.source !== 'migration' && + value.source !== 'system' + ) { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + + let tags: string[] | undefined; + if (typeof value.tags !== 'undefined') { + if (!Array.isArray(value.tags)) { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + tags = value.tags.map((tag) => normalizeRequiredString(tag)).sort(compareStrings); + } + + return { + author: normalizeRequiredString(value.author), + message: normalizeRequiredString(value.message), + createdBy: normalizeRequiredString(value.createdBy), + timestamp: normalizeTimestamp(value.timestamp), + source: value.source, + ...(tags ? { tags } : {}), + }; +} + +function normalizeLimit(value: unknown): number { + if (typeof value === 'undefined') { + return 100; + } + if (typeof value !== 'number' || !Number.isInteger(value) || value <= 0 || value > 500) { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + return value; +} + +function toRevisionRecord(node: StoredRevisionRecord): RevisionRecord { + const artifacts = node.artifacts + .map((artifact) => ({ + bundleHash: artifact.bundleHash, + role: artifact.role, + })) + .sort((a, b) => { + const bundleHashOrder = compareStrings(a.bundleHash, b.bundleHash); + if (bundleHashOrder !== 0) { + return bundleHashOrder; + } + return compareStrings(a.role, b.role); + }); + + return { + revisionHash: node.revisionHash, + packageId: node.packageId, + parentRevisionHash: node.parentRevisionHash, + author: node.author, + message: node.message, + createdBy: node.createdBy, + timestamp: node.timestamp.toISOString(), + source: node.source, + metadata: node.metadata, + createdAt: node.createdAt.toISOString(), + artifacts, + }; +} + +function toStoredRevisionRecordFromPrisma(node: { + revisionHash: string; + packageId: string; + parentRevisionHash: string | null; + author: string; + message: string; + createdBy: string; + timestamp: Date; + source: string; + metadata: unknown; + createdAt: Date; + artifacts: { bundleHash: string; role: string }[]; +}): StoredRevisionRecord { + return { + revisionHash: node.revisionHash, + packageId: node.packageId, + parentRevisionHash: node.parentRevisionHash, + author: node.author, + message: node.message, + createdBy: node.createdBy, + timestamp: node.timestamp, + source: node.source, + metadata: node.metadata, + createdAt: node.createdAt, + artifacts: node.artifacts.map((artifact) => ({ + bundleHash: artifact.bundleHash, + role: artifact.role, + })), + }; +} + +function createDefaultRevisionStorageAdapter(): RevisionStorageAdapter { + const { prisma } = require('../utils') as { prisma: PrismaClient }; + + return { + async findRevisionByHash(revisionHash: string): Promise { + const found = await prisma.revisionNode.findUnique({ + where: { revisionHash }, + include: { artifacts: true }, + }); + return found ? toStoredRevisionRecordFromPrisma(found) : null; + }, + + async createRevision(input): Promise { + const created = await prisma.$transaction(async (tx) => { + const node = await tx.revisionNode.create({ + data: { + revisionHash: input.revisionHash, + packageId: input.packageId, + parentRevisionHash: input.parentRevisionHash, + author: input.metadata.author, + message: input.metadata.message, + createdBy: input.metadata.createdBy, + timestamp: input.metadata.timestamp, + source: input.metadata.source, + metadata: input.metadata, + }, + }); + + await tx.revisionArtifact.createMany({ + data: input.artifacts.map((artifact) => ({ + revisionHash: input.revisionHash, + bundleHash: artifact.bundleHash, + role: artifact.role, + })), + }); + + return tx.revisionNode.findUniqueOrThrow({ + where: { revisionHash: node.revisionHash }, + include: { artifacts: true }, + }); + }); + + return toStoredRevisionRecordFromPrisma(created); + }, + + async listRevisions(packageId: string, limit: number): Promise { + const rows = await prisma.revisionNode.findMany({ + where: { packageId }, + take: limit, + orderBy: [{ timestamp: 'asc' }, { revisionHash: 'asc' }], + include: { artifacts: true }, + }); + + return rows.map((row) => toStoredRevisionRecordFromPrisma(row)); + }, + + async artifactExists(packageId: string, bundleHash: string): Promise { + const found = await prisma.artifactStoreRecord.findUnique({ + where: { + packageId_bundleHash: { + packageId, + bundleHash, + }, + }, + select: { id: true }, + }); + + return Boolean(found); + }, + }; +} + +export class RevisionService { + private readonly storage: RevisionStorageAdapter; + + constructor(storage?: RevisionStorageAdapter) { + this.storage = storage ?? createDefaultRevisionStorageAdapter(); + } + + async createRevision(input: CreateRevisionInput): Promise { + const packageId = normalizeRequiredString(input.packageId); + const parentRevisionHash = normalizeNullableHash(input.parentRevisionHash); + const artifacts = normalizeArtifacts(input.artifacts); + const metadata = normalizeMetadata(input.metadata); + + if (parentRevisionHash) { + const parent = await this.storage.findRevisionByHash(parentRevisionHash); + if (!parent) { + throw new RevisionServiceError(ERR_REVISION_PARENT_NOT_FOUND, MESSAGE_REVISION_PARENT_NOT_FOUND); + } + if (parent.packageId !== packageId) { + throw new RevisionServiceError(ERR_REVISION_PARENT_PACKAGE_MISMATCH, MESSAGE_REVISION_PARENT_PACKAGE_MISMATCH); + } + } + + for (const artifact of artifacts) { + const exists = await this.storage.artifactExists(packageId, artifact.bundleHash); + if (!exists) { + throw new RevisionServiceError(ERR_ARTIFACT_NOT_FOUND, MESSAGE_ARTIFACT_NOT_FOUND); + } + } + + const revisionHash = computeRevisionHash({ + packageId, + parentRevisionHash, + artifacts, + metadata, + }); + + const existing = await this.storage.findRevisionByHash(revisionHash); + if (existing) { + return toRevisionRecord(existing); + } + + const created = await this.storage.createRevision({ + revisionHash, + packageId, + parentRevisionHash, + metadata, + artifacts, + }); + + return toRevisionRecord(created); + } + + async getRevision(query: GetRevisionQuery): Promise { + const revisionHash = normalizeNullableHash(query.revisionHash); + if (!revisionHash) { + throw new RevisionServiceError(ERR_REVISION_INVALID_INPUT, MESSAGE_REVISION_INVALID_INPUT); + } + + const found = await this.storage.findRevisionByHash(revisionHash); + return found ? toRevisionRecord(found) : null; + } + + async listRevisions(query: ListRevisionsQuery): Promise { + const packageId = normalizeRequiredString(query.packageId); + const limit = normalizeLimit(query.limit); + + const rows = await this.storage.listRevisions(packageId, limit); + return rows.map((row) => toRevisionRecord(row)); + } +} + +export const REVISION_ERROR_CODES = { + ERR_REVISION_INVALID_INPUT, + ERR_REVISION_PARENT_NOT_FOUND, + ERR_ARTIFACT_NOT_FOUND, + ERR_REVISION_PARENT_PACKAGE_MISMATCH, +} as const; + + diff --git a/server/src/services/task-package.service.ts b/server/src/services/task-package.service.ts index 73f61ac..b08c47d 100644 --- a/server/src/services/task-package.service.ts +++ b/server/src/services/task-package.service.ts @@ -17,6 +17,9 @@ import { parseLLMDelta } from './llm-delta-parser'; import { buildApplyReportV1, type ApplyReportV1 } from './apply-report-v1'; import { buildExecutionRecordV1, type ExecutionRecordV1 } from './execution-record-v1'; import { replayExecutionRecordV1 } from './execution-replay.service'; +import { planDeltaClosureV1, type ClosureRejected, type ClosureSuggestion } from './delta-closure-planner'; +import { DEFAULT_RISK_POLICY_V1, normalizeRiskPolicyV1, type RiskPolicyV1 } from './delta-risk-policy'; +import { buildClosureContractV1, type ClosureContractV1 } from './closure-contract-v1'; export type CreatePackageFromSnapshotInput = { title?: string; @@ -48,6 +51,11 @@ type ApplyExecutionOptions = { toRevisionId: string; mode?: TransitionMode; }; + riskPolicy?: RiskPolicyV1; + taskProfile?: { + name: string; + riskPolicy?: RiskPolicyV1; + }; audit?: { record?: boolean; replay?: boolean; @@ -106,6 +114,38 @@ type ApplyReportV2 = { stateHashBefore: string; stateHashAfter: string; delta?: SemanticDelta; + closure?: { + schema: 'delta-closure-plan-1'; + policy: { + schema: 'risk-policy-1'; + strict: { + requirePostApplyConflictsZero: true; + fieldLevelModify: 'off' | 'on'; + dependencyScope: 'same_domain' | 'cross_domain'; + priority: 'explainability' | 'acceptance'; + targetAcceptanceRatio: number; + }; + }; + acceptedSummary?: ReturnType; + rejectedCount: number; + rejectedPreview?: ClosureRejected[]; + rejected: ClosureRejected[]; + suggestions: ClosureSuggestion[]; + suggestionDiagnostics: { + suggestionCount: number; + coveredRejectedCount: number; + blockedByCoveredCount: number; + }; + contractV1: ClosureContractV1; + diagnostics: { + candidateCount: number; + acceptedCount: number; + rejectedCount: number; + maxClosureSizeRatio: number; + blockedByRate: number; + closureViolationFlag: boolean; + }; + }; }; v1?: ApplyReportV1; executionRecordV1?: ExecutionRecordV1; @@ -124,6 +164,7 @@ type ErrCode = | 'E_LLM_DELTA_CONFLICT' | 'E_REPLAY_MISMATCH' | 'E_REPLAY_UNSUPPORTED' + | 'E_RISK_POLICY_INVALID' | 'CONFLICT_RETRY_EXHAUSTED'; type Ok = { ok: true; data: T }; @@ -138,6 +179,7 @@ const SERVICE_ERROR_CODES = new Set([ 'E_LLM_DELTA_CONFLICT', 'E_REPLAY_MISMATCH', 'E_REPLAY_UNSUPPORTED', + 'E_RISK_POLICY_INVALID', 'CONFLICT_RETRY_EXHAUSTED', ]); @@ -162,6 +204,7 @@ function normalizeServiceError(err: unknown): ErrCode { if (rawCode === 'E_LLM_DELTA_CONFLICT') return 'E_LLM_DELTA_CONFLICT'; if (rawCode === 'E_REPLAY_MISMATCH') return 'E_REPLAY_MISMATCH'; if (rawCode === 'E_REPLAY_UNSUPPORTED') return 'E_REPLAY_UNSUPPORTED'; + if (rawCode === 'E_RISK_POLICY_INVALID') return 'E_RISK_POLICY_INVALID'; if (typeof rawCode === 'string' && rawCode.startsWith('LLM_')) return 'INVALID_INPUT'; if (typeof rawCode === 'string' && rawCode.startsWith('E_LLM_DELTA_')) return 'INVALID_INPUT'; if (typeof rawCode === 'string' && rawCode.startsWith('E_EXECUTION_RECORD_')) return 'INVALID_INPUT'; @@ -232,6 +275,74 @@ function mergeTransitionFindings(base: TransitionFinding[], postApply: Transitio return next.sort(sortTransitionFinding); } +function compareLiteral(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function sortClosureRejected(a: ClosureRejected, b: ClosureRejected): number { + return ( + (TRANSITION_DOMAIN_RANK.get(a.domain) ?? 0) - (TRANSITION_DOMAIN_RANK.get(b.domain) ?? 0) || + compareLiteral(a.key ?? '', b.key ?? '') || + compareLiteral(a.path ?? '\uffff', b.path ?? '\uffff') || + compareLiteral(a.op, b.op) || + compareLiteral(a.reasonCode, b.reasonCode) + ); +} + +function sortClosureSuggestion(a: ClosureSuggestion, b: ClosureSuggestion): number { + return ( + compareLiteral(a.actionType, b.actionType) || + compareLiteral(a.code, b.code) || + compareLiteral(stableHash(a.payload), stableHash(b.payload)) || + compareLiteral(a.message, b.message) || + compareLiteral(a.riskLevel ?? '', b.riskLevel ?? '') + ); +} + +function sortReportV1Finding( + a: ApplyReportV1['findings'][number], + b: ApplyReportV1['findings'][number] +): number { + return ( + compareLiteral(a.code, b.code) || + compareLiteral(a.message ?? '', b.message ?? '') || + (a.count ?? 0) - (b.count ?? 0) || + compareLiteral((a.domains ?? []).join(','), (b.domains ?? []).join(',')) + ); +} + +function buildClosureDiagnosticsFindings( + diagnostics: { + candidateCount: number; + acceptedCount: number; + rejectedCount: number; + maxClosureSizeRatio: number; + blockedByRate: number; + closureViolationFlag: boolean; + }, + hasPostApplyConflict: boolean +): ApplyReportV1['findings'] { + const findings: ApplyReportV1['findings'] = [ + { code: 'CLOSURE_CANDIDATE_COUNT', count: diagnostics.candidateCount }, + { code: 'CLOSURE_ACCEPTED_COUNT', count: diagnostics.acceptedCount }, + { code: 'CLOSURE_REJECTED_COUNT', count: diagnostics.rejectedCount }, + { code: 'CLOSURE_MAX_CLOSURE_SIZE_RATIO', count: diagnostics.maxClosureSizeRatio }, + { code: 'CLOSURE_BLOCKED_BY_RATE', count: diagnostics.blockedByRate }, + ]; + + if (diagnostics.closureViolationFlag) { + findings.push({ code: 'CLOSURE_VIOLATION_FLAG', count: 1 }); + } + + if (hasPostApplyConflict) { + findings.push({ code: 'CLOSURE_POST_APPLY_CONFLICT' }); + } + + return findings.sort(sortReportV1Finding); +} + export class TaskPackageService { private llmService = new LLMService(); private userService = new UserService(); @@ -671,6 +782,13 @@ export class TaskPackageService { const transitionMode: TransitionMode = revisionNetDeltaOptions?.mode ?? 'best_effort'; const auditRecordEnabled = opts?.audit?.record === true; const auditReplayEnabled = opts?.audit?.replay === true; + const rawRiskPolicy = opts?.taskProfile?.riskPolicy ?? opts?.riskPolicy ?? DEFAULT_RISK_POLICY_V1; + const effectiveRiskPolicy = normalizeRiskPolicyV1(rawRiskPolicy); + if (!effectiveRiskPolicy) { + const riskPolicyError = new Error('Risk policy is invalid'); + (riskPolicyError as { code?: string }).code = 'E_RISK_POLICY_INVALID'; + throw riskPolicyError; + } const baseRevision: RevisionLike = { payload: currentPayload, @@ -928,24 +1046,59 @@ export class TaskPackageService { const llmDelta = parseLLMDelta(llmDeltaRaw); const llmBaseState = revisionToSemanticState(baseRevision); - const llmTransition = applyDelta(llmBaseState, llmDelta, { mode: llmDeltaMode }); + const closurePlan = + llmDeltaMode === 'strict' + ? planDeltaClosureV1({ + baseState: llmBaseState, + proposedDelta: llmDelta, + mode: 'strict', + policy: effectiveRiskPolicy, + }) + : null; + const closureContract = + closurePlan + ? buildClosureContractV1({ + proposedDelta: llmDelta, + acceptedDelta: closurePlan.acceptedDelta, + rejected: closurePlan.rejected, + suggestions: closurePlan.suggestions, + diagnostics: closurePlan.diagnostics, + }) + : null; + const appliedDelta = closurePlan ? closurePlan.acceptedDelta : llmDelta; + const llmTransition = applyDelta(llmBaseState, appliedDelta, { + mode: llmDeltaMode === 'strict' ? 'best_effort' : llmDeltaMode, + }); const llmTransitionConflicts = [...llmTransition.conflicts].sort(sortTransitionConflict); - - if (llmDeltaMode === 'strict' && llmTransitionConflicts.length > 0) { - const strictConflictError = new Error('LLM delta contains conflicts'); - (strictConflictError as any).code = 'E_LLM_DELTA_CONFLICT'; - throw strictConflictError; - } - const llmPostApplyConflicts = detectTransitionConflicts(llmTransition.nextState).sort(sortTransitionConflict); + const llmDeltaReport: NonNullable = { mode: 'delta', - deltaSummary: summarizeDelta(llmDelta), + deltaSummary: summarizeDelta(appliedDelta), conflicts: llmTransitionConflicts, postApplyConflicts: llmPostApplyConflicts, stateHashBefore: stableHash(llmBaseState), stateHashAfter: stableHash(llmTransition.nextState), - delta: llmDelta, + delta: appliedDelta, + ...(closurePlan + ? { + closure: { + schema: closurePlan.schema, + policy: { + schema: closurePlan.policy.schema, + strict: { ...closurePlan.policy.strict }, + }, + acceptedSummary: summarizeDelta(closurePlan.acceptedDelta), + rejectedCount: closurePlan.rejected.length, + rejectedPreview: [...closurePlan.rejected].sort(sortClosureRejected).slice(0, 20), + rejected: [...closurePlan.rejected].sort(sortClosureRejected), + suggestions: [...closurePlan.suggestions].sort(sortClosureSuggestion), + suggestionDiagnostics: closurePlan.suggestionDiagnostics, + contractV1: closureContract!, + diagnostics: closurePlan.diagnostics, + }, + } + : {}), }; applyReport = this.buildApplyReportV2( @@ -958,6 +1111,28 @@ export class TaskPackageService { llmDeltaReport ); applyReport = attachV1(applyReport, llmDeltaReport); + if (closurePlan && applyReport.v1) { + applyReport = { + ...applyReport, + v1: { + ...applyReport.v1, + findings: [ + ...applyReport.v1.findings, + ...buildClosureDiagnosticsFindings(closurePlan.diagnostics, llmPostApplyConflicts.length > 0), + { + code: 'SUGGESTIONS_EMITTED', + count: closurePlan.suggestionDiagnostics.suggestionCount, + message: 'Suggestions emitted', + }, + { + code: 'CLOSURE_CONTRACT_V1_EMITTED', + count: closurePlan.suggestionDiagnostics.suggestionCount, + message: 'Closure contract v1 emitted', + }, + ].sort(sortReportV1Finding), + }, + }; + } applyReport = await attachExecutionAudit(applyReport, { delta: applyReport.llmDelta?.delta ?? null, replayBaseState: llmBaseState, diff --git a/server/src/services/transfer-package-v1.ts b/server/src/services/transfer-package-v1.ts new file mode 100644 index 0000000..6dcb7d8 --- /dev/null +++ b/server/src/services/transfer-package-v1.ts @@ -0,0 +1,476 @@ +import { createHash } from 'crypto'; + +export type DomainName = 'facts' | 'decisions' | 'constraints' | 'risks' | 'assumptions'; + +export const DOMAIN_ORDER: readonly DomainName[] = ['facts', 'decisions', 'constraints', 'risks', 'assumptions']; + +export type TransferPackageV1 = { + schema: 'transfer-package-1'; + identity: { + packageId: string; + revisionId: string; + revisionHash: string; + parentRevisionId: string | null; + }; + bindings: { + closureContractV1: { + schema: 'closure-contract-1'; + proposedHash: string; + acceptedHash: string; + } | null; + applyReportV1Hash: string | null; + executionRecordV1Hash: string | null; + }; + trunk: { + intent: { + primary: string | null; + successCriteria: string[]; + nonGoals: string[]; + }; + stateDigest: { + facts: string[]; + decisions: string[]; + constraints: string[]; + risks: string[]; + assumptions: string[]; + openLoops: string[]; + }; + }; + continuation: { + nextActions: Array<{ + code: string; + message: string; + expectedOutput: string | null; + domains: DomainName[]; + }>; + validationChecklist: Array<{ + code: string; + message: string; + severity: 'must' | 'should'; + }>; + }; + conflicts: Array<{ + domain: string; + code: string; + key: string | null; + path: string | null; + message: string; + }>; + determinism: { + sorted: true; + domainOrder: ['facts', 'decisions', 'constraints', 'risks', 'assumptions']; + }; + transferHash: string; +}; + +export type TransferClosureBindingInput = { + schema: 'closure-contract-1'; + proposedHash: string; + acceptedHash: string; +} | null; + +export type BuildTransferPackageV1Input = { + identity: { + packageId: string; + revisionId: string; + revisionHash: string; + parentRevisionId?: string | null; + }; + bindings?: { + closureContractV1?: TransferClosureBindingInput; + applyReportV1Hash?: string | null; + executionRecordV1Hash?: string | null; + }; + trunk?: { + intent?: { + primary?: string | null; + successCriteria?: string[]; + nonGoals?: string[]; + }; + stateDigest?: Partial>; + }; + continuation?: { + nextActions?: Array<{ + code: string; + message: string; + expectedOutput?: string | null; + domains?: DomainName[]; + }>; + validationChecklist?: Array<{ + code: string; + message: string; + severity?: 'must' | 'should'; + }>; + }; + conflicts?: Array<{ + domain: string; + code: string; + key?: string | null; + path?: string | null; + message: string; + }>; +}; + +type NextActionInput = { + code: string; + message: string; + expectedOutput?: string | null; + domains?: DomainName[]; +}; + +type ValidationChecklistInput = { + code: string; + message: string; + severity?: 'must' | 'should'; +}; + +function makeTransferError(code: 'E_TRANSFER_INVALID' | 'E_TRANSFER_NON_JSON_SAFE' | 'E_TRANSFER_HASH_MISMATCH', message: string): Error & { code: string } { + const error = new Error(message) as Error & { code: string }; + error.code = code; + return error; +} + +export function compareStrings(a: string, b: string): number { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +function compareDomains(a: DomainName, b: DomainName): number { + const leftIndex = DOMAIN_ORDER.indexOf(a); + const rightIndex = DOMAIN_ORDER.indexOf(b); + if (leftIndex < rightIndex) return -1; + if (leftIndex > rightIndex) return 1; + return compareStrings(a, b); +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function isDomainName(value: unknown): value is DomainName { + return value === 'facts' || value === 'decisions' || value === 'constraints' || value === 'risks' || value === 'assumptions'; +} + +function normalizeStringArray(value: string[] | undefined): string[] { + if (!Array.isArray(value)) return []; + const normalized = value.filter((entry): entry is string => typeof entry === 'string'); + return [...normalized].sort(compareStrings); +} + +function normalizeDomains(value: DomainName[] | undefined): DomainName[] { + if (!Array.isArray(value)) return []; + const normalized = value.filter((entry): entry is DomainName => isDomainName(entry)); + return [...normalized].sort(compareDomains); +} + + +export function assertJsonSafe(value: unknown): void { + if (value === null) return; + + const valueType = typeof value; + if (valueType === 'string' || valueType === 'boolean') return; + if (valueType === 'number') { + if (!Number.isFinite(value)) { + throw makeTransferError('E_TRANSFER_NON_JSON_SAFE', 'Transfer package contains non JSON-safe value'); + } + return; + } + if (valueType === 'undefined' || valueType === 'function' || valueType === 'symbol' || valueType === 'bigint') { + throw makeTransferError('E_TRANSFER_NON_JSON_SAFE', 'Transfer package contains non JSON-safe value'); + } + + if (Array.isArray(value)) { + for (const entry of value) { + assertJsonSafe(entry); + } + return; + } + + if (!isPlainObject(value)) { + throw makeTransferError('E_TRANSFER_NON_JSON_SAFE', 'Transfer package contains non JSON-safe value'); + } + + const keys = Object.keys(value).sort(compareStrings); + for (const key of keys) { + assertJsonSafe(value[key]); + } +} + +export function stableStringify(value: unknown): string { + assertJsonSafe(value); + + if (value === null || typeof value === 'string' || typeof value === 'boolean' || typeof value === 'number') { + return JSON.stringify(value); + } + + if (Array.isArray(value)) { + return `[${value.map((entry) => stableStringify(entry)).join(',')}]`; + } + + const record = value as Record; + const keys = Object.keys(record).sort(compareStrings); + return `{${keys.map((key) => `${JSON.stringify(key)}:${stableStringify(record[key])}`).join(',')}}`; +} + +export function sha256Hex(str: string): string { + return createHash('sha256').update(str, 'utf8').digest('hex'); +} + +function normalizeClosureBinding(value: TransferClosureBindingInput | undefined): TransferPackageV1['bindings']['closureContractV1'] { + if (value === null || value === undefined) return null; + if ( + value.schema !== 'closure-contract-1' || + typeof value.proposedHash !== 'string' || + typeof value.acceptedHash !== 'string' + ) { + throw makeTransferError('E_TRANSFER_INVALID', 'Transfer package input is invalid'); + } + return { + schema: 'closure-contract-1', + proposedHash: value.proposedHash, + acceptedHash: value.acceptedHash, + }; +} + +function normalizeNextActions( + value: NextActionInput[] | undefined +): TransferPackageV1['continuation']['nextActions'] { + if (!Array.isArray(value)) return []; + + const normalized = value.map((entry) => { + if (!entry || typeof entry.code !== 'string' || typeof entry.message !== 'string') { + throw makeTransferError('E_TRANSFER_INVALID', 'Transfer package input is invalid'); + } + + const domains = normalizeDomains(entry.domains); + return { + code: entry.code, + message: entry.message, + expectedOutput: typeof entry.expectedOutput === 'string' ? entry.expectedOutput : entry.expectedOutput === null ? null : null, + domains, + }; + }); + + return normalized.sort((left, right) => { + const leftDomains = left.domains.join('|'); + const rightDomains = right.domains.join('|'); + return ( + compareStrings(left.code, right.code) || + compareStrings(left.message, right.message) || + compareStrings(left.expectedOutput ?? 'NULL', right.expectedOutput ?? 'NULL') || + compareStrings(leftDomains, rightDomains) + ); + }); +} + +function normalizeValidationChecklist( + value: ValidationChecklistInput[] | undefined +): TransferPackageV1['continuation']['validationChecklist'] { + if (!Array.isArray(value)) return []; + + const normalized = value.map((entry) => { + if (!entry || typeof entry.code !== 'string' || typeof entry.message !== 'string') { + throw makeTransferError('E_TRANSFER_INVALID', 'Transfer package input is invalid'); + } + const severity: 'must' | 'should' = entry.severity === 'must' ? 'must' : 'should'; + return { + code: entry.code, + message: entry.message, + severity, + }; + }); + + return normalized.sort((left, right) => { + return ( + compareStrings(left.severity, right.severity) || + compareStrings(left.code, right.code) || + compareStrings(left.message, right.message) + ); + }); +} + +function normalizeConflicts(value: BuildTransferPackageV1Input['conflicts']): TransferPackageV1['conflicts'] { + if (!Array.isArray(value)) return []; + + const normalized = value.map((entry) => { + if (!entry || typeof entry.domain !== 'string' || typeof entry.code !== 'string' || typeof entry.message !== 'string') { + throw makeTransferError('E_TRANSFER_INVALID', 'Transfer package input is invalid'); + } + return { + domain: entry.domain, + code: entry.code, + key: typeof entry.key === 'string' ? entry.key : entry.key === null ? null : null, + path: typeof entry.path === 'string' ? entry.path : entry.path === null ? null : null, + message: entry.message, + }; + }); + + return normalized.sort((left, right) => { + return ( + compareStrings(left.domain, right.domain) || + compareStrings(left.code, right.code) || + compareStrings(left.key ?? 'NULL', right.key ?? 'NULL') || + compareStrings(left.path ?? 'NULL', right.path ?? 'NULL') || + compareStrings(left.message, right.message) + ); + }); +} + +export function buildTransferPackageV1(input: BuildTransferPackageV1Input): TransferPackageV1 { + if (!input || !input.identity) { + throw makeTransferError('E_TRANSFER_INVALID', 'Transfer package input is invalid'); + } + + assertJsonSafe(input); + + const { identity } = input; + if ( + typeof identity.packageId !== 'string' || + typeof identity.revisionId !== 'string' || + typeof identity.revisionHash !== 'string' + ) { + throw makeTransferError('E_TRANSFER_INVALID', 'Transfer package input is invalid'); + } + + const stateDigestInput = input.trunk?.stateDigest ?? {}; + const nextActions = normalizeNextActions(input.continuation?.nextActions); + const validationChecklist = normalizeValidationChecklist(input.continuation?.validationChecklist); + const conflicts = normalizeConflicts(input.conflicts); + + const contractWithoutHash = { + schema: 'transfer-package-1' as const, + identity: { + packageId: identity.packageId, + revisionId: identity.revisionId, + revisionHash: identity.revisionHash, + parentRevisionId: typeof identity.parentRevisionId === 'string' ? identity.parentRevisionId : identity.parentRevisionId === null ? null : null, + }, + bindings: { + closureContractV1: normalizeClosureBinding(input.bindings?.closureContractV1), + applyReportV1Hash: + typeof input.bindings?.applyReportV1Hash === 'string' + ? input.bindings.applyReportV1Hash + : input.bindings?.applyReportV1Hash === null + ? null + : null, + executionRecordV1Hash: + typeof input.bindings?.executionRecordV1Hash === 'string' + ? input.bindings.executionRecordV1Hash + : input.bindings?.executionRecordV1Hash === null + ? null + : null, + }, + trunk: { + intent: { + primary: + typeof input.trunk?.intent?.primary === 'string' + ? input.trunk.intent.primary + : input.trunk?.intent?.primary === null + ? null + : null, + successCriteria: normalizeStringArray(input.trunk?.intent?.successCriteria), + nonGoals: normalizeStringArray(input.trunk?.intent?.nonGoals), + }, + stateDigest: { + facts: normalizeStringArray(stateDigestInput.facts), + decisions: normalizeStringArray(stateDigestInput.decisions), + constraints: normalizeStringArray(stateDigestInput.constraints), + risks: normalizeStringArray(stateDigestInput.risks), + assumptions: normalizeStringArray(stateDigestInput.assumptions), + openLoops: normalizeStringArray(stateDigestInput.openLoops), + }, + }, + continuation: { + nextActions, + validationChecklist, + }, + conflicts, + determinism: { + sorted: true as const, + domainOrder: ['facts', 'decisions', 'constraints', 'risks', 'assumptions'] as ['facts', 'decisions', 'constraints', 'risks', 'assumptions'], + }, + }; + + assertJsonSafe(contractWithoutHash); + const transferHash = sha256Hex(stableStringify(contractWithoutHash)); + const contract: TransferPackageV1 = { + ...contractWithoutHash, + transferHash, + }; + + assertJsonSafe(contract); + return contract; +} + + +export function recomputeTransferPackageV1Hash(input: TransferPackageV1): string { + if (!input || input.schema !== 'transfer-package-1' || typeof input.transferHash !== 'string') { + throw makeTransferError('E_TRANSFER_INVALID', 'Transfer package input is invalid'); + } + + assertJsonSafe(input); + + const rebuilt = buildTransferPackageV1({ + identity: { + packageId: input.identity.packageId, + revisionId: input.identity.revisionId, + revisionHash: input.identity.revisionHash, + parentRevisionId: input.identity.parentRevisionId, + }, + bindings: { + closureContractV1: input.bindings.closureContractV1, + applyReportV1Hash: input.bindings.applyReportV1Hash, + executionRecordV1Hash: input.bindings.executionRecordV1Hash, + }, + trunk: { + intent: { + primary: input.trunk.intent.primary, + successCriteria: [...input.trunk.intent.successCriteria], + nonGoals: [...input.trunk.intent.nonGoals], + }, + stateDigest: { + facts: [...input.trunk.stateDigest.facts], + decisions: [...input.trunk.stateDigest.decisions], + constraints: [...input.trunk.stateDigest.constraints], + risks: [...input.trunk.stateDigest.risks], + assumptions: [...input.trunk.stateDigest.assumptions], + openLoops: [...input.trunk.stateDigest.openLoops], + }, + }, + continuation: { + nextActions: input.continuation.nextActions.map((entry) => ({ + code: entry.code, + message: entry.message, + expectedOutput: entry.expectedOutput, + domains: [...entry.domains], + })), + validationChecklist: input.continuation.validationChecklist.map((entry) => ({ + code: entry.code, + message: entry.message, + severity: entry.severity, + })), + }, + conflicts: input.conflicts.map((entry) => ({ + domain: entry.domain, + code: entry.code, + key: entry.key, + path: entry.path, + message: entry.message, + })), + }); + + return rebuilt.transferHash; +} + +export function verifyTransferPackageV1(input: TransferPackageV1): { ok: true; recomputedHash: string } { + const recomputedHash = recomputeTransferPackageV1Hash(input); + + if (recomputedHash != input.transferHash) { + throw makeTransferError('E_TRANSFER_HASH_MISMATCH', 'Transfer package hash mismatch'); + } + + return { ok: true, recomputedHash }; +} diff --git a/server/src/services/transfer-package.service.ts b/server/src/services/transfer-package.service.ts new file mode 100644 index 0000000..92e30db --- /dev/null +++ b/server/src/services/transfer-package.service.ts @@ -0,0 +1,365 @@ +import { prisma } from '../utils'; +import { TaskPackageService } from './task-package.service'; +import { + buildTransferPackageV1, + type BuildTransferPackageV1Input, + type TransferClosureBindingInput, + type TransferPackageV1, + recomputeTransferPackageV1Hash, + verifyTransferPackageV1, +} from './transfer-package-v1'; +import { + buildHandoffRecordV1, + verifyHandoffRecordV1 as verifyHandoffRecordV1Contract, + type HandoffRecordV1, +} from './handoff-record-v1'; +import { + buildLineageBindingV1, + verifyLineageBindingV1 as verifyLineageBindingV1Contract, + type LineageBindingV1, +} from './lineage-binding-v1'; + +type TransferOverrides = { + trunk?: BuildTransferPackageV1Input['trunk']; + continuation?: BuildTransferPackageV1Input['continuation']; + conflicts?: BuildTransferPackageV1Input['conflicts']; +}; + +export type BuildTransferPackageV1FromApplyContextInput = { + identity: { + packageId: string; + revisionId: string; + revisionHash: string; + parentRevisionId?: string | null; + }; + closureContractV1?: TransferClosureBindingInput; + applyReportV1Hash?: string | null; + executionRecordV1Hash?: string | null; + userProvided?: { + primaryIntent?: string | null; + successCriteria?: string[]; + nonGoals?: string[]; + stateDigest?: Partial>; + nextActions?: Array<{ + code: string; + message: string; + expectedOutput?: string | null; + domains?: Array<'facts' | 'decisions' | 'constraints' | 'risks' | 'assumptions'>; + }>; + validationChecklist?: Array<{ + code: string; + message: string; + severity?: 'must' | 'should'; + }>; + }; + overrides?: TransferOverrides; +}; + +export type CreateTransferPackageInput = { + revisionId?: string; + include?: { + closureContractV1?: boolean; + applyReportV1Hash?: boolean; + executionRecordV1Hash?: boolean; + }; + closureContractV1?: TransferClosureBindingInput; + applyReportV1Hash?: string | null; + executionRecordV1Hash?: string | null; + trunk?: BuildTransferPackageV1Input['trunk']; + continuation?: BuildTransferPackageV1Input['continuation']; +}; + +export type IngestTransferPackageV1Input = { + transferPackageV1: TransferPackageV1; + include?: { + closureContractV1?: boolean; + applyReportV1Hash?: boolean; + executionRecordV1Hash?: boolean; + }; + bindings?: { + closureContractV1?: TransferClosureBindingInput; + applyReportV1Hash?: string | null; + executionRecordV1Hash?: string | null; + }; + createdAt?: string | null; +}; + +export type BuildLineageBindingForTransferFlowV1Input = { + transferPackageV1: TransferPackageV1; + include?: { + closure?: boolean; + execution?: boolean; + handoff?: boolean; + }; + closureContractV1?: TransferClosureBindingInput; + applyReportV1Hash?: string | null; + executionRecordV1Hash?: string | null; + handoffRecordV1?: HandoffRecordV1 | null; + createdAt?: string | null; +}; + +function makeError(code: 'NOT_FOUND' | 'NO_REVISION', message: string): Error & { code: string } { + const error = new Error(message) as Error & { code: string }; + error.code = code; + return error; +} + +export function buildTransferPackageV1FromApplyContext( + input: BuildTransferPackageV1FromApplyContextInput +): TransferPackageV1 { + return buildTransferPackageV1({ + identity: { + packageId: input.identity.packageId, + revisionId: input.identity.revisionId, + revisionHash: input.identity.revisionHash, + parentRevisionId: input.identity.parentRevisionId ?? null, + }, + bindings: { + closureContractV1: input.closureContractV1 ?? null, + applyReportV1Hash: input.applyReportV1Hash ?? null, + executionRecordV1Hash: input.executionRecordV1Hash ?? null, + }, + trunk: { + intent: { + primary: input.userProvided?.primaryIntent ?? null, + successCriteria: input.userProvided?.successCriteria ?? [], + nonGoals: input.userProvided?.nonGoals ?? [], + }, + stateDigest: input.userProvided?.stateDigest ?? input.overrides?.trunk?.stateDigest ?? {}, + }, + continuation: { + nextActions: input.userProvided?.nextActions ?? input.overrides?.continuation?.nextActions ?? [], + validationChecklist: + input.userProvided?.validationChecklist ?? input.overrides?.continuation?.validationChecklist ?? [], + }, + conflicts: input.overrides?.conflicts ?? [], + }); +} + +export function verifyTransferPackageV1OrThrow(transferPackageV1: TransferPackageV1): string { + return verifyTransferPackageV1(transferPackageV1).recomputedHash; +} + +export function getTransferPackageV1VerificationResult( + transferPackageV1: TransferPackageV1 +): { ok: true; recomputedHash: string; matches: boolean } { + const recomputedHash = recomputeTransferPackageV1Hash(transferPackageV1); + return { + ok: true, + recomputedHash, + matches: recomputedHash === transferPackageV1.transferHash, + }; +} + +export function buildLineageBindingForTransferFlowV1( + input: BuildLineageBindingForTransferFlowV1Input +): LineageBindingV1 { + const transferPackageV1 = input.transferPackageV1; + const includeClosure = input.include?.closure === true; + const includeExecution = input.include?.execution === true; + + const closureBinding = includeClosure && input.closureContractV1 ? input.closureContractV1 : null; + const hasExecutionBindingInput = + typeof input.applyReportV1Hash === 'string' || typeof input.executionRecordV1Hash === 'string'; + const executionBinding = + includeExecution && hasExecutionBindingInput + ? { + schema: 'execution-record-1' as const, + reportHash: typeof input.applyReportV1Hash === 'string' ? input.applyReportV1Hash : null, + deltaHash: typeof input.executionRecordV1Hash === 'string' ? input.executionRecordV1Hash : null, + } + : null; + return buildLineageBindingV1({ + identity: { + packageId: transferPackageV1.identity.packageId, + revisionId: transferPackageV1.identity.revisionId, + revisionHash: transferPackageV1.identity.revisionHash, + parentRevisionId: transferPackageV1.identity.parentRevisionId, + }, + bindings: { + transfer: { + schema: 'transfer-package-1', + transferHash: transferPackageV1.transferHash, + }, + closure: closureBinding, + execution: executionBinding, + handoff: null, + }, + createdAt: typeof input.createdAt === 'string' ? input.createdAt : input.createdAt === null ? null : null, + }); +} + +export function getLineageBindingV1VerificationResult( + lineageBindingV1: LineageBindingV1 +): { ok: true; recomputedHash: string; matches: boolean } { + return verifyLineageBindingV1Contract(lineageBindingV1); +} + +export function getHandoffRecordV1VerificationResult( + handoffRecordV1: HandoffRecordV1 +): { ok: true; recomputedHash: string; matches: boolean } { + return verifyHandoffRecordV1Contract(handoffRecordV1); +} + +export function ingestTransferPackageV1(input: IngestTransferPackageV1Input): HandoffRecordV1 { + const recomputedHash = verifyTransferPackageV1OrThrow(input.transferPackageV1); + const includeClosure = input.include?.closureContractV1 === true; + const includeApplyReportV1Hash = input.include?.applyReportV1Hash === true; + const includeExecutionRecordV1Hash = input.include?.executionRecordV1Hash === true; + const createdAt = typeof input.createdAt === 'string' ? input.createdAt : input.createdAt === null ? null : null; + const lineageBindingV1 = buildLineageBindingForTransferFlowV1({ + transferPackageV1: input.transferPackageV1, + include: { + closure: includeClosure, + execution: includeApplyReportV1Hash || includeExecutionRecordV1Hash, + handoff: false, + }, + closureContractV1: includeClosure ? input.bindings?.closureContractV1 ?? null : null, + applyReportV1Hash: includeApplyReportV1Hash ? input.bindings?.applyReportV1Hash ?? null : null, + executionRecordV1Hash: includeExecutionRecordV1Hash ? input.bindings?.executionRecordV1Hash ?? null : null, + createdAt, + }); + + return buildHandoffRecordV1({ + transferPackageV1: input.transferPackageV1, + verification: { + transferHashRecomputed: recomputedHash, + matchesProvidedHash: true, + }, + bindings: { + closureContractV1: includeClosure ? input.bindings?.closureContractV1 ?? null : null, + applyReportV1Hash: includeApplyReportV1Hash ? input.bindings?.applyReportV1Hash ?? null : null, + executionRecordV1Hash: includeExecutionRecordV1Hash ? input.bindings?.executionRecordV1Hash ?? null : null, + }, + lineageBindingV1, + createdAt, + }); +} + +export class TransferPackageService { + private readonly taskPackageService = new TaskPackageService(); + + verifyTransferPackageV1(input: { transferPackageV1: unknown }): { ok: true; recomputedHash: string; matches: boolean } { + return getTransferPackageV1VerificationResult(input.transferPackageV1 as TransferPackageV1); + } + + verifyLineageBindingV1(input: { lineageBindingV1: unknown }): { ok: true; recomputedHash: string; matches: boolean } { + return getLineageBindingV1VerificationResult(input.lineageBindingV1 as LineageBindingV1); + } + + verifyHandoffRecordV1(input: { handoffRecordV1: unknown }): { ok: true; recomputedHash: string; matches: boolean } { + return getHandoffRecordV1VerificationResult(input.handoffRecordV1 as HandoffRecordV1); + } + + buildLineageBindingForTransferFlowV1( + input: Omit & { transferPackageV1: unknown } + ): LineageBindingV1 { + return buildLineageBindingForTransferFlowV1({ + ...input, + transferPackageV1: input.transferPackageV1 as TransferPackageV1, + }); + } + + ingestTransferPackageV1( + input: Omit & { transferPackageV1: unknown } + ): HandoffRecordV1 { + return ingestTransferPackageV1({ + ...input, + transferPackageV1: input.transferPackageV1 as TransferPackageV1, + }); + } + + async createTransferPackage( + userId: string, + packageId: string, + input: CreateTransferPackageInput + ): Promise { + const pkg = await this.taskPackageService.getOwned(userId, packageId); + + let revision: + | { + id: string; + revisionHash: string; + parentRevisionId: string | null; + } + | null = null; + + if (typeof input.revisionId === 'string') { + if (pkg.currentRevision && pkg.currentRevision.id === input.revisionId) { + revision = { + id: pkg.currentRevision.id, + revisionHash: String(pkg.currentRevision.revisionHash), + parentRevisionId: + typeof pkg.currentRevision.parentRevisionId === 'string' + ? pkg.currentRevision.parentRevisionId + : pkg.currentRevision.parentRevisionId === null + ? null + : null, + }; + } else { + const found = await prisma.taskPackageRevision.findUnique({ + where: { id: input.revisionId }, + select: { + id: true, + packageId: true, + revisionHash: true, + parentRevisionId: true, + }, + }); + + if (!found || found.packageId !== packageId) { + throw makeError('NOT_FOUND', 'NOT_FOUND'); + } + + revision = { + id: found.id, + revisionHash: found.revisionHash, + parentRevisionId: found.parentRevisionId, + }; + } + } else if (pkg.currentRevision) { + revision = { + id: pkg.currentRevision.id, + revisionHash: String(pkg.currentRevision.revisionHash), + parentRevisionId: + typeof pkg.currentRevision.parentRevisionId === 'string' + ? pkg.currentRevision.parentRevisionId + : pkg.currentRevision.parentRevisionId === null + ? null + : null, + }; + } + + if (!revision) { + throw makeError('NO_REVISION', 'NO_REVISION'); + } + + const includeClosure = input.include?.closureContractV1 === true; + const includeApplyReportV1Hash = input.include?.applyReportV1Hash === true; + const includeExecutionRecordV1Hash = input.include?.executionRecordV1Hash === true; + + return buildTransferPackageV1FromApplyContext({ + identity: { + packageId, + revisionId: revision.id, + revisionHash: revision.revisionHash, + parentRevisionId: revision.parentRevisionId, + }, + closureContractV1: includeClosure && input.closureContractV1 ? input.closureContractV1 : null, + applyReportV1Hash: + includeApplyReportV1Hash && typeof input.applyReportV1Hash === 'string' ? input.applyReportV1Hash : null, + executionRecordV1Hash: + includeExecutionRecordV1Hash && typeof input.executionRecordV1Hash === 'string' + ? input.executionRecordV1Hash + : null, + userProvided: { + primaryIntent: input.trunk?.intent?.primary ?? null, + successCriteria: input.trunk?.intent?.successCriteria ?? [], + nonGoals: input.trunk?.intent?.nonGoals ?? [], + stateDigest: input.trunk?.stateDigest ?? {}, + nextActions: input.continuation?.nextActions ?? [], + validationChecklist: input.continuation?.validationChecklist ?? [], + }, + }); + } +} diff --git a/server/src/types/api/artifact.api.ts b/server/src/types/api/artifact.api.ts new file mode 100644 index 0000000..ba872d3 --- /dev/null +++ b/server/src/types/api/artifact.api.ts @@ -0,0 +1,31 @@ +import { z } from 'zod'; + +const hash64Schema = z.string().regex(/^[0-9a-f]{64}$/); +const nullableHash64Schema = z.union([hash64Schema, z.null()]); +const nullableStringSchema = z.union([z.string().min(1), z.null()]); + +export const artifactReferenceSchema = z.object({ + bundleHash: hash64Schema, + role: z.string().min(1), +}); + +export const artifactIdentitySchema = z.object({ + packageId: z.string().min(1), + revisionId: nullableStringSchema.optional().default(null), + revisionHash: nullableHash64Schema.optional().default(null), +}); + +export const artifactCreateBodySchema = z.object({ + schema: z.string().min(1), + identity: artifactIdentitySchema, + payload: z.unknown(), + references: z.array(artifactReferenceSchema).optional().default([]), +}); + +export const artifactRouteParamsSchema = z.object({ + packageId: z.string().min(1), + bundleHash: hash64Schema, +}); + +export type ArtifactCreateBody = z.infer; +export type ArtifactRouteParams = z.infer; diff --git a/server/src/types/api/execution.api.ts b/server/src/types/api/execution.api.ts new file mode 100644 index 0000000..bf7718a --- /dev/null +++ b/server/src/types/api/execution.api.ts @@ -0,0 +1,38 @@ +import { z } from 'zod'; + +const hash64Schema = z.string().regex(/^[0-9a-f]{64}$/); + +export const executionArtifactSchema = z.object({ + bundleHash: hash64Schema, + role: z.string().min(1), +}); + +export const executionRecordBodySchema = z.object({ + packageId: z.string().min(1), + revisionHash: hash64Schema, + provider: z.string().min(1), + model: z.string().min(1), + promptHash: hash64Schema, + parameters: z.unknown(), + inputArtifacts: z.array(executionArtifactSchema), + outputArtifacts: z.array(executionArtifactSchema), + status: z.enum(['success', 'failure']), + startedAt: z.string().datetime({ offset: true }), + finishedAt: z.string().datetime({ offset: true }), +}); + +export const executionIdParamsSchema = z.object({ + executionId: z.string().uuid(), +}); + +export const executionReplayBodySchema = z.object({ + promptHash: hash64Schema, + parameters: z.unknown(), + inputArtifacts: z.array(executionArtifactSchema), + outputArtifacts: z.array(executionArtifactSchema), + status: z.enum(['success', 'failure']), +}); + +export type ExecutionRecordBody = z.infer; +export type ExecutionIdParams = z.infer; +export type ExecutionReplayBody = z.infer; diff --git a/server/src/types/api/migration.api.ts b/server/src/types/api/migration.api.ts new file mode 100644 index 0000000..0b75497 --- /dev/null +++ b/server/src/types/api/migration.api.ts @@ -0,0 +1,14 @@ +import { z } from 'zod'; + +const hash64Schema = z.string().regex(/^[0-9a-f]{64}$/); + +export const migrationExportBodySchema = z.object({ + rootRevisionHash: hash64Schema, +}); + +export const migrationZipBodySchema = z.object({ + zipPath: z.string().min(1), +}); + +export type MigrationExportBody = z.infer; +export type MigrationZipBody = z.infer; diff --git a/server/src/types/api/revision.api.ts b/server/src/types/api/revision.api.ts new file mode 100644 index 0000000..e99d220 --- /dev/null +++ b/server/src/types/api/revision.api.ts @@ -0,0 +1,42 @@ +import { z } from 'zod'; + +const hash64Schema = z.string().regex(/^[0-9a-f]{64}$/); +const nullableHash64Schema = z.union([hash64Schema, z.null()]); + +export const revisionArtifactSchema = z.object({ + bundleHash: hash64Schema, + role: z.string().min(1), +}); + +export const revisionMetadataSchema = z.object({ + author: z.string().min(1), + message: z.string().min(1), + createdBy: z.string().min(1), + timestamp: z.string().datetime({ offset: true }), + source: z.enum(['human', 'ai', 'migration', 'system']), + tags: z.array(z.string().min(1)).optional().default([]), +}); + +export const revisionCreateBodySchema = z.object({ + packageId: z.string().min(1), + parentRevisionHash: nullableHash64Schema.optional().default(null), + artifacts: z.array(revisionArtifactSchema).min(1), + metadata: revisionMetadataSchema, +}); + +export const revisionHashParamsSchema = z.object({ + revisionHash: hash64Schema, +}); + +export const revisionPackageParamsSchema = z.object({ + packageId: z.string().min(1), +}); + +export const revisionListQuerySchema = z.object({ + limit: z.coerce.number().int().positive().max(500).optional(), +}); + +export type RevisionCreateBody = z.infer; +export type RevisionHashParams = z.infer; +export type RevisionPackageParams = z.infer; +export type RevisionListQuery = z.infer; diff --git a/server/src/utils/db.ts b/server/src/utils/db.ts index 8216300..907cb32 100644 --- a/server/src/utils/db.ts +++ b/server/src/utils/db.ts @@ -13,20 +13,18 @@ declare global { var prisma: PrismaClient | undefined; } -// 创建 Prisma 客户端 +// 创建 Prisma 客户? function createPrismaClient(): PrismaClient { const client = new PrismaClient({ - log: config.isDev + log: config.isDev ? [ { level: 'query', emit: 'event' }, { level: 'error', emit: 'stdout' }, - { level: 'warn', emit: 'stdout' } - ] - : [ - { level: 'error', emit: 'stdout' } + { level: 'warn', emit: 'stdout' }, ] + : [{ level: 'error', emit: 'stdout' }], }); - + // 开发环境下记录查询日志 if (config.isDev) { // @ts-ignore - Prisma 事件类型 @@ -34,33 +32,58 @@ function createPrismaClient(): PrismaClient { logger.debug(`Query: ${e.query}`, { duration: `${e.duration}ms` }); }); } - + return client; } // 使用单例模式 export const prisma = global.prisma || createPrismaClient(); -// 开发环境下保存到全局,避免热重载时创建多个连接 +// 开发环境下保存到全局,避免热重载时创建多个连? if (config.isDev) { global.prisma = prisma; } -// 优雅关闭连接 -async function disconnectPrisma() { - await prisma.$disconnect(); - logger.info('Database connection closed'); +let disconnectPromise: Promise | null = null; +let disconnectLogged = false; + +// 优雅关闭连接(去重入,避免多次日? +function disconnectPrisma(): Promise { + if (disconnectPromise) { + return disconnectPromise; + } + + disconnectPromise = prisma + .$disconnect() + .catch((error: unknown) => { + const message = error instanceof Error ? error.message : 'Unknown disconnect error'; + logger.error('Database disconnect failed', { message }); + }) + .finally(() => { + if (!disconnectLogged) { + disconnectLogged = true; + logger.info('Database connection closed'); + } + }); + + return disconnectPromise; } -// 监听进程退出事件 -process.on('beforeExit', disconnectPrisma); -process.on('SIGINT', async () => { - await disconnectPrisma(); - process.exit(0); +// 监听进程退出事? +process.once('beforeExit', () => { + void disconnectPrisma(); +}); + +process.once('SIGINT', () => { + void disconnectPrisma().finally(() => { + process.exit(0); + }); }); -process.on('SIGTERM', async () => { - await disconnectPrisma(); - process.exit(0); + +process.once('SIGTERM', () => { + void disconnectPrisma().finally(() => { + process.exit(0); + }); }); export default prisma;