diff --git a/.cursor/rules b/.cursor/rules
new file mode 160000
index 0000000..2bbe13e
--- /dev/null
+++ b/.cursor/rules
@@ -0,0 +1 @@
+Subproject commit 2bbe13e34c89252cb2a650729c06fea8806d4445
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..15d1774
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,13 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+
+[*.go]
+indent_style = tab
+
+[*.{md,yml,yaml}]
+indent_style = space
+indent_size = 2
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..97a92ca
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,7 @@
+# Normalize text files and enforce LF where required
+* text=auto
+
+# Always use LF line endings for source/docs/scripts that are sensitive to EOLs
+*.go text eol=lf
+*.md text eol=lf
+*.sh text eol=lf
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..e6a24bf
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,109 @@
+name: CI
+
+on:
+  push:
+    branches: [ develop, main, master ]
+  pull_request:
+    branches: [ '**' ]
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+    name: ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ ubuntu-latest, macos-latest, windows-latest ]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+
+      - name: Print Go version
+        run: go version
+
+      - name: Install ripgrep (Linux)
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y ripgrep
+
+      - name: Install ripgrep (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          brew update
+          brew install ripgrep
+
+      - name: Install make and ripgrep (Windows)
+        if: runner.os == 'Windows'
+        shell: bash
+        run: |
+          choco install -y make ripgrep
+          echo "make version:"; make --version || true
+          echo "rg version:"; rg --version || true
+
+      - name: Print Go env
+        run: go env
+
+      - name: Tidy
+        shell: bash
+        run: make tidy
+
+      - name: lint (includes check-go-version)
+        shell: bash
+        run: |
+          set -o pipefail
+          make lint 2>&1 | tee lint.log
+
+      - name: Assert lint order (check-go-version before golangci-lint)
+        shell: bash
+        run: |
+          test -f lint.log || { echo "lint.log missing"; exit 1; }
+          L_CHECK=$(rg -n "^check-go-version: OK" -N lint.log | head -1 | cut -d: -f1)
+          L_GCL=$(rg -n "^golangci-lint version" -N lint.log | head -1 | cut -d: -f1)
+          if [ -z "$L_CHECK" ]; then echo "Missing 'check-go-version: OK' line in lint.log"; exit 1; fi
+          if [ -z "$L_GCL" ]; then echo "Missing 'golangci-lint version' line in lint.log"; exit 1; fi
+          if [ "$L_CHECK" -ge "$L_GCL" ]; then
+            echo "Ordering incorrect: 'check-go-version: OK' occurs at line $L_CHECK, after golangci-lint version at line $L_GCL"; exit 1;
+          fi
+          echo "Lint order OK: check-go-version runs before golangci-lint"
+
+      - name: Upload lint.log artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: lint-${{ matrix.os }}
+          path: lint.log
+          if-no-files-found: error
+
+      - name: Tools path hygiene
+        shell: bash
+        run: make check-tools-paths
+
+      - name: Verify tools manifest commands
+        shell: bash
+        run: make verify-manifest-paths
+
+      - name: Test
+        shell: bash
+        run: make test
+
+      - name: Test clean-logs guard
+        shell: bash
+        run: make test-clean-logs
+
+      - name: Build
+        shell: bash
+        run: make build
+
+      - name: Build tools
+        shell: bash
+        run: make build-tools
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f60e497
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,31 @@
+agentcli
+agentcli.exe
+bin/
+logs/
+
+.goagent
+*.log
+
+# Ignore built tool binaries only; keep sources tracked
+tools/bin/**
+
+# Test reports and coverage artifacts
+# Keep the working tree clean after local test runs
+reports/
+tests/
+coverage.out
+*.coverprofile
+coverage.html
+lint.err
+lint_verify.err
+
+# Common local artifacts
+dist/
+.DS_Store
+go.work
+.idea/
+.vscode/
+# Editor swap files
+*.swp
+
+work/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..559a2e9
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule ".cursor/rules"]
+	path = .cursor/rules
+	url = git@github.com:hyperifyio/go-cursor-rules.git
+[submodule "scripts"]
+	path = scripts
+	url = git@github.com:hyperifyio/go-scripts.git
diff --git a/.golangci.yml b/.golangci.yml
new file mode 100644
index 0000000..ab399e4
--- /dev/null
+++ b/.golangci.yml
@@ -0,0 +1,27 @@
+run:
+  timeout: 5m
+  issues-exit-code: 1
+  tests: true
+
+linters:
+  enable:
+    - govet
+    - gofmt
+    - errcheck
+    - staticcheck
+    - gocyclo
+    - gosimple
+
+linters-settings:
+  gocyclo:
+    min-complexity: 20
+  gofmt:
+    simplify: true
+  errcheck:
+    check-type-assertions: true
+    check-blank: true
+
+issues:
+  exclude-use-default: false
+  max-issues-per-linter: 0
+  max-same-issues: 0
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..7e3e15b
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,487 @@
+### State
+- Maintenance: refreshed PR inventory on 2025-08-19; synced `./work/main` to `origin/main`; created branch `pr/docs-cli-reference`, restored `docs/reference/cli-reference.md` from `develop`, and opened PR #55. (develop-only admin update)
+- Opened PR #50: versioned `StateBundle` schema (v1) with validation and deterministic source hash. Tracked in `FEATURE_CHECKLIST.md`. (develop-only admin update)
+- Opened PR #51 (draft): atomic save/load with secure dir validation, quarantine on corruption, and coarse advisory lock. Tracked in `FEATURE_CHECKLIST.md`. (develop-only admin update)
+- [ ] Tool: citation_pack — OPEN: https://github.com/hyperifyio/goagent/pull/40
+- [ ] Tool: dedupe_rank — OPEN: https://github.com/hyperifyio/goagent/pull/39
+- [ ] Tool: github_search — OPEN: https://github.com/hyperifyio/goagent/pull/38
+- [ ] Tool: crossref_search — OPEN: https://github.com/hyperifyio/goagent/pull/37
+- [ ] Tool: openalex_search — OPEN: https://github.com/hyperifyio/goagent/pull/36
+- [ ] Tool: wiki_query — OPEN: https://github.com/hyperifyio/goagent/pull/35
+- [ ] Tool: pdf_extract — OPEN: https://github.com/hyperifyio/goagent/pull/34
+- [ ] Tool: searxng_search — OPEN: https://github.com/hyperifyio/goagent/pull/33
+PR slicing plan (tracked on develop; code PRs will branch from main in `./work/main`):
+  - [ ] Tool: img_create — OPEN: https://github.com/hyperifyio/goagent/pull/26
+  - [ ] Tool: http_fetch — OPEN: https://github.com/hyperifyio/goagent/pull/27
+  - [ ] Tool: robots_check — OPEN: https://github.com/hyperifyio/goagent/pull/28
+  - [ ] Tool: readability_extract — OPEN: https://github.com/hyperifyio/goagent/pull/29
+  - [ ] Tool: metadata_extract — OPEN: https://github.com/hyperifyio/goagent/pull/30
+  - [ ] Tool: rss_fetch — OPEN: https://github.com/hyperifyio/goagent/pull/31
+  - [ ] Tool: wayback_lookup — OPEN: https://github.com/hyperifyio/goagent/pull/32
+- [ ] PR: Makefile wiring for tools (build-tools/clean)
+- [ ] PR: scripts and CI utilities — OPEN: https://github.com/hyperifyio/goagent/pull/42
+- [ ] PR: security & runbooks — OPEN: https://github.com/hyperifyio/goagent/pull/43
+- [ ] PR: ADRs
+  - [ ] ADR-0011: State bundle schema — OPEN: https://github.com/hyperifyio/goagent/pull/48
+  - [ ] ADR-0012: State dir persistence — OPEN: https://github.com/hyperifyio/goagent/pull/49
+- [ ] PR: diagrams
+- [ ] PR: CLI features (version, print-config, timeouts/backoff, validator, enable/disable)
+- [ ] PR: parallel tool calls (main loop)
+- [ ] PR: audit logs and redaction
+- [ ] PR: minimal integration test
+- [ ] PR: README finalization and examples
+- [ ] Post-migration cleanup
+
+### Sandbox
+- Opened PR #52: limits utilities (bounded output buffer, wall-time helper, JSON error helper). Tracked in `FEATURE_CHECKLIST.md`. (develop-only admin update)
+
+* [x] Initialize repo with Go module and scaffolding: run `mkdir agentcli && cd agentcli && git init && go mod init github.com/<org>/agentcli`; add `LICENSE` (MIT), `README.md` (purpose, usage, examples), `.gitignore` (bin/, dist/, .DS_Store, go.work, .idea, .vscode); create directories `cmd/agentcli`, `internal/oai`, `internal/tools`, `docs/adr`, `docs/diagrams`; set default model id `oss-gpt-20b` and default OpenAI-compatible base URL `https://api.openai.com/v1` to be read from env or flags.
+* [x] Implement `cmd/agentcli/main.go` non-interactive run loop: `package main` with `main()` reading flags, building initial messages `[system,user]`, calling the HTTP client, executing any returned tool calls, appending tool results as `role=tool` messages, repeating until the model returns a final assistant message with text, then printing to stdout and exiting 0; all failures print a concise error to stderr and exit non-zero (2 for CLI misuse like missing `-prompt`, 1 otherwise).
+* [x] Implement full CLI flag set with defaults and env fallbacks: `-prompt (string, required)`, `-tools (path to tools.json, optional)`, `-system (string, default "You are a helpful, precise assistant. Use tools when strictly helpful.")`, `-base-url (string, default env OAI_BASE_URL or https://api.openai.com/v1)`, `-api-key (string, default env OAI_API_KEY)`, `-model (string, default env OAI_MODEL or oss-gpt-20b)`, `-max-steps (int, default 8)`, `-timeout (duration, default 30s, applies to HTTP and tool exec unless tool overrides)`, `-temp (float64, default 0.2)`, `-debug (bool, default false, dumps request/response JSON to stderr)`; precedence: flag > env > hard default; validate `-prompt` non-empty before running.
+* [x] Implement OpenAI-compatible client (POST `/v1/chat/completions`): define request types `{model, messages[], tools[], tool_choice:"auto", temperature}` and response types `{choices[].message{role,content,tool_calls[]}, choices[].finish_reason}`; send `Authorization: Bearer <api-key>` if provided; `Content-Type: application/json`; use `http.Client{Timeout: <timeout>}`; treat any non-2xx as error with body included; support `finish_reason` but not required to stop if you already have final content; never stream in MVP.
+* [x] Wire default config for oss-gpt-20b: honor `OAI_MODEL=oss-gpt-20b`, `OAI_BASE_URL=https://api.openai.com/v1`, and `OAI_API_KEY=<token>`; document that any OpenAI-compatible endpoint can be used by changing `-base-url` and `-model`; ensure no hard dependency on OpenAI SDKs (pure net/http).
+* [x] Implement tool manifest loader `internal/tools/manifest.go`: load `tools.json`; schema per tool `{ "name": string (required, unique), "description": string, "schema": object (JSON Schema for params), "command": [string,...] (argv, required), "timeoutSec": int (optional per-call) }`; validate: name non-empty, command len>=1, names unique; build a registry `map[string]ToolSpec` and an OpenAI “tools” array `[{type:"function", function:{name,description,parameters}}]`; return both; on parse/validation error, fail fast with clear message including tool name.
+* [x] Implement secure tool runner `internal/tools/runner.go`: execute tools via `exec.CommandContext(ctx, argv[0], argv[1:]...)`; pass function arguments string verbatim to tool stdin (JSON), capture stdout (string) and stderr; apply timeout: prefer per-tool `timeoutSec`, else global `-timeout`; scrub environment to a minimal allowlist (PATH and HOME only); never invoke a shell; return stdout as the tool result string; on timeout return error "tool timed out", on non-zero exit return error including stderr snippet.
+* [x] Implement assistant tool-call loop in `main.go`: request includes `tool_choice:"auto"` and declared tools; when assistant message contains `tool_calls`, iterate each call with `type:"function"`, find spec by `function.name`; if unknown, synthesize a tool result `{"error":"unknown tool <name>"}`; for each executed call, append a message `{role:"tool", name:<function.name>, tool_call_id:<id>, content:<tool stdout or error JSON>}`; then immediately call the API again with the extended transcript; stop when assistant returns content and zero tool_calls; print content to stdout.
+* [x] Map tool failures deterministically to JSON error content: on any runner error, set tool message content to a single-line JSON `{"error":"<sanitized message>"}` where message is JSON-escaped and truncated (e.g., to 1k chars) to avoid prompt bloat; do not exit on individual tool errors—let the model decide recovery; only exit non-zero if the overall run loop ends without producing final assistant text or if the HTTP call fails.
+* [x] Provide example tool `tools/cmd/get_time` and build instructions: implements `stdin` JSON `{"tz":"Europe/Helsinki"}` (default `"UTC"` if omitted); outputs single-line JSON `{"tz":"...","iso":"RFC3339","unix":<seconds>}` to stdout; returns exit code 0 on success; build with `make build-tools` (emits `./tools/bin/get_time`); usage example prompt: “What’s the local time in Helsinki? If tools are available, call get_time.”; confirm tool respects `TZ` value strictly via `time.LoadLocation`.
+* [x] Provide example `tools.json` colocated at repo root: `{ "tools":[ { "name":"get_time", "description":"Get current time for an IANA timezone", "schema":{ "type":"object", "properties":{ "tz":{"type":"string","description":"IANA timezone, e.g. Europe/Helsinki"} }, "required":["tz"], "additionalProperties":false }, "command":["./tools/bin/get_time"], "timeoutSec":5 } ] }`; document that `command` is relative to the working directory of `agentcli`.
+* [x] Add build targets: root `Makefile` with `build: go build -o bin/agentcli ./cmd/agentcli`, `build-tools: make build-tools` (emits `tools/bin/*`), `lint`, `test`, `clean`; ensure `CGO_ENABLED=0` for reproducible static binaries; support `GOOS`/`GOARCH` overrides for cross-compilation; add `bin/` and built tools to `.gitignore`.
+* [x] Write README quickstart that is fully runnable: prerequisites (Go 1.21+), `make build build-tools`, export `OAI_BASE_URL`, `OAI_MODEL=oss-gpt-20b`, `OAI_API_KEY` if required, then run `./bin/agentcli -prompt "What's the local time in Helsinki? Use get_time." -tools ./tools.json -debug`; explain expected behavior (model triggers function call, tool prints JSON, agent posts back, model replies with final text), and show sample output line and non-zero exit behavior on errors.
+* [x] Add ADR-0001 documenting architecture and protocol: create `docs/adr/0001-minimal-agent-cli.md` containing context (non-interactive CLI agent with OpenAI-compatible API and local tools), options considered (Go vs Python vs local inference), decision (Go + Chat Completions tools + argv tools), rationale (static binary, process control, vendor-agnostic), consequences (no streaming in MVP, single-threaded tool calls), and a link to the canonical GitHub issue; include explicit contracts for CLI flags and tool I/O in the ADR.
+* [x] Add Mermaid sequence diagram kept in repo: `docs/diagrams/agentcli-seq.md` with a `sequenceDiagram` that shows CLI → API → tool → API → final response; reference this file from both `README.md` and ADR-0001; require updates to diagram in any PR that changes the loop or message flow.
+* [x] Implement unit tests (deterministic) for manifest, runner, and client: `internal/tools/manifest_test.go` verifies invalid/missing fields and uniqueness errors; `internal/tools/runner_test.go` includes a helper tool binary that sleeps to trigger timeout and asserts `"tool timed out"` mapping; `internal/oai/client_test.go` uses `httptest.Server` to return 500 and asserts error includes status and body; tests must run offline and pass on `go test ./...`.
+* [x] Implement conversation-loop test using `httptest.Server` and a fake tool: server step 1 responds with assistant message containing `tool_calls=[{id:"1",type:"function",function:{name:"echo",arguments:"{\"text\":\"hi\"}"}}]`; fake tool reads stdin `{"text":"hi"}` and prints `{"echo":"hi"}`; server step 2 returns final assistant message `"done"`; run `agentcli` main via a small wrapper (or extract loop into testable function) and assert stdout equals `"done\n"` and exit code 0.
+* [x] Document security posture and trust boundaries in README: tools are an explicit allowlist (`tools.json`); no shell invocation (argv only); stdin/stdout JSON contract; per-call timeouts; minimal environment; recommend running untrusted tools under containment (container/jail/user namespace) and setting a working directory with restricted permissions; clarify that the model is untrusted input—never pass its arguments to a shell; state that secrets must be supplied via env/CI secrets and never committed.
+* [x] Output an error if any configured tool is missing and unavailable
+* [x] Improve .cursor/rules/work-on-features.mdc to instruct that files must be deleted using `git rm ...` command
+* [x] Implement `tools/exec.go` (unrestricted command exec) — stdin `{"cmd":"string","args":["..."],"cwd?:string,"env?:{K:V},"stdin?:string,"timeoutSec?:int}`; run via `exec.CommandContext` (no shell), full network allowed; stdout single-line JSON `{"exitCode":int,"stdout":"string","stderr":"string","durationMs":int}`; DoD: TDD unit + integ tests (success, non-zero, timeout, cwd/env, stdin), README example, traced to issue, CI green with coverage unchanged.
+* [x] Implement `tools/fs_read_file.go` — stdin `{"path":"string","offsetBytes?:int,"maxBytes?:int}` (repo-relative); outputs `{"contentBase64":"string","sizeBytes":int,"eof":bool}`; DoD: tests (text, binary round-trip, ranges, NOT_FOUND), docs example, link to issue, CI green.
+* [x] Implement `tools/fs_write_file.go` (atomic write) — stdin `{"path":"string","contentBase64":"string","createModeOctal?":"0644"}`; write via temp+rename; outputs `{"bytesWritten":int}`; DoD: tests (create, overwrite, binary, missing parent error), docs example, CI green.
+* [x] Implement `tools/fs_append_file.go` — stdin `{"path":"string","contentBase64":"string"}`; append (create if missing) with advisory file lock; outputs `{"bytesAppended":int}`; DoD: tests (double append, concurrent writers), docs example, CI green.
+  - [x] [S01] stub types and argument validation for fs_append_file (repo-relative path, base64 decode)
+  - [x] [S02] failing contract test for double append (create-if-missing, then append)
+  - [x] [S03] minimal passing implementation for single-writer append with advisory lock (no concurrency test yet)
+  - [x] [S04] add concurrent writers test (two goroutines append deterministically; order-agnostic content length assertion)
+  - [x] [S05] strengthen implementation to pass concurrency test and add README docs example
+* [x] Implement `tools/fs_mkdirp.go` — stdin `{"path":"string","modeOctal?":"0755"}`; recursively create; outputs `{"created":bool}`; DoD: tests (deep path, idempotence), docs example, CI green.
+  - [x] [S01] implement minimal mkdirp behavior and docs example; tests pass offline
+* [x] Implement `tools/fs_rm.go` — stdin `{"path":"string","recursive?:bool,"force?:bool}`; remove file/dir; outputs `{"removed":bool}`; DoD: tests (file, dir tree, force on missing), docs example, CI green.
+  - [x] [S01:rm-delete-file-test] failing unit test for deleting a regular file
+  - [x] [S02:rm-delete-file-impl] minimal implementation to delete a regular file and pass S01
+  - [x] [S03:rm-force-on-missing-test] unit test for force=true on missing path (expects exit 0 and removed=false)
+  - [x] [S04:rm-readme-example] README example and Makefile build rule for fs_rm; tests pass
+* [x] Implement `tools/fs_move.go` — stdin `{"from":"string","to":"string","overwrite?:bool}`; rename or copy+remove across devices; outputs `{"moved":bool}`; DoD: tests (rename, overwrite=false blocks, cross-device), docs example, CI green.
+  - [x] [S03:move-overwrite-true] unit test for overwrite=true replacing existing destination; ensure implementation passes
+* [x] Add docs/README.md as a short index and navigation for the docs tree with cross links to docs/adr/0001-minimal-agent-cli.md and docs/diagrams/agentcli-seq.md, smallest change is creating the index and adding links from the top level README and ADR, scope limited to documentation, low risk and independent, DoD includes tests unchanged and green with no coverage regression, build reproducible, static analysis type checks formatting linting security scanning and secret detection all green, backward compatibility unaffected, docs updated in the same change, at least one peer review completed and linked, verification by rendering links on GitHub, rollback by reverting the file and cross links, traceability 
+  - [x] [S01:docs-index] create `docs/README.md` index with links to ADR-0001 and sequence diagram
+  - [x] [S02:cross-links] add links from top-level `README.md` and ADR-0001 back to `docs/README.md`
+* [x] Write docs/architecture/module-boundaries.md to define allowed imports and layering between cmd internal/oai internal/tools and tools binaries, include a simple Mermaid diagram and guidance for adding new packages, smallest change is adding this single document and linking it from docs/README.md and README, scope documentation, low risk, DoD includes tests unchanged and green with no coverage regression all quality gates green backward compatibility preserved docs and changelog updated as needed peer review completed verification by rendering diagram on GitHub and checking links rollback by reverting the doc and links, traceability 
+  - [x] [S01:module-boundaries-doc] add module-boundaries doc and cross‑links from `README.md` and `docs/README.md`
+* [x] Keep Makefile clean target symmetrical with build-tools by also removing tools/fs_search (currently omitted) so stale tool binaries are not left behind; smallest change is adding tools/fs_search to the clean recipe; scope Makefile; low risk; DoD includes running make build-tools then make clean leaves no built tool binaries (git status clean), tests unchanged and green, all quality gates green, peer review completed, verification by the described commands, rollback by reverting the Makefile change.
+* [x] Reconcile minimum Go version by setting a single supported floor across go.mod and README.md (go.mod currently declares go 1.24.6 while README states Go 1.21+); smallest change is either lowering the go directive in go.mod to 1.21 to match docs or updating README (and future CI matrix) to 1.24+; scope limited to go.mod, docs, and CI config when added; low risk; DoD includes tests unchanged and green with no coverage regression, reproducible build, all quality gates green (vet/format/lint/security/secret detection) with no new findings, backward compatibility preserved, at least one peer review completed, verification by building on the declared floor version locally and in CI, rollback by reverting the version/doc change.
+* [x] Update README.md fs_mkdirp example to remove the outdated note “until aggregated in Makefile” and use the existing make build-tools step consistently; smallest change is editing that section only; scope documentation; low risk; DoD includes tests unchanged and green, all quality gates green, README renders correctly on GitHub, peer review completed, verification by running the example from a clean clone after make build build-tools, rollback by reverting the edit.
+* [x] Align the get_time tool contract across tools/timecli (expects {"timezone":"..."} and outputs {"timezone","iso8601"}), README.md (describes tz and unix seconds), and tools.json (declares required tz) by choosing one canonical schema (prefer timezone with optional alias tz) and updating code, tests, and docs accordingly; smallest change is adding a backward-compatible alias in code and adjusting docs/manifest; scope limited to this tool; low risk; DoD includes unit test covering both inputs and output fields, README quick start and tools.json updated, all quality gates green with no coverage regression, peer review completed, verification by running the quick start end-to-end, rollback by reverting the edits.
+* [x] Normalize API key environment variable naming across the repo to OAI_API_KEY (scripts like scripts/smoke-test.sh currently reference OPENAI_API_KEY) and document the canonical variable in README.md while keeping the old name as a fallback for compatibility; smallest change is updating the script to prefer OAI_API_KEY with OPENAI_API_KEY fallback and adjusting docs; scope small; low risk; DoD includes tests unchanged and green, all gates green, verification by running the script with only OAI_API_KEY set and observing successful auth header injection, rollback by reverting the script and doc changes.
+* [x] Create docs/reference/tools-manifest.md that precisely documents the tools.json schema required and optional fields validation rules examples and common mistakes and how schemas are exposed to the model, smallest change is a focused reference page and links from README usage and ADR-0001, scope documentation, low risk, DoD includes examples runnable locally tests unchanged and green all quality gates green backward compatibility unaffected peer review completed verification by running example commands and rendering on GitHub rollback by removing the doc and links, traceability 
+  - [x] [S01:manifest-doc] create `docs/reference/tools-manifest.md` reference page
+  - [x] [S01b:manifest-unblock-green] stabilize tools tests or restore missing tool packages so docs-only changes keep test suite green
+    - [x] [S01b-a:restore-missing-tool-dirs] restore missing tool packages: `tools/fs_append_file`, `tools/fs_move`, `tools/fs_rm`, `tools/fs_write_file` (add minimal `main` stubs to compile; behavior tests to pass in later slices)
+  - [x] [S02:manifest-links] add links from `README.md`, `docs/README.md`, and ADR-0001
+* [x] Author docs/security/threat-model.md expanding on trust boundaries untrusted model output handling tool containment recommendations secrets handling logging and redaction expectations and audit considerations, smallest change is adding this page and linking it from README Security model, scope documentation, low risk, DoD includes tests unchanged and green all quality gates green backward compatibility unaffected peer review completed verification by reviewing content accuracy and link rendering rollback by reverting the doc and links, traceability 
+* [x] Provide docs/runbooks/troubleshooting.md covering common errors and fixes including missing tool binaries repo relative path violations tool timeouts HTTP errors and golangci-lint installation path with copy paste commands, smallest change is adding this runbook and linking from README Troubleshooting, scope documentation, low risk, DoD includes steps verified on a clean clone tests unchanged and green all quality gates green peer review completed verification by following steps locally rollback by reverting the doc and links, traceability 
+* [x] Fix tools/timecli error contract to write errors to stderr and exit non-zero instead of printing error JSON to stdout so the runner maps failures to {"error":"..."} deterministically; smallest change is adjusting error paths in tools/timecli/main.go and adding focused unit tests; scope limited to timecli and tests; low risk; DoD includes failing test first then passing after change, README examples still work, tests elsewhere unchanged and green with no coverage regression, all quality gates green, peer review completed, verification by piping invalid input and observing non-zero exit with stderr JSON, rollback by reverting the edits.
+* [x] Reconcile model identifier in README quick start with default OAI_MODEL by choosing one canonical value (currently README uses openai/gpt-oss-20b while defaults use oss-gpt-20b) and updating docs accordingly; smallest change is editing README only; scope documentation; low risk; DoD includes docs render correctly, examples runnable from a clean clone, tests unchanged and green, all quality gates green, peer review completed, verification by running the quick start with the documented model, rollback by reverting the edit.
+* [x] Implement `tools/fs_search.go` — stdin `{"query":"string","regex?:bool,"globs?:["**/*.go"],"maxResults?:int}`; returns `{"matches":[{"path":"string","line":int,"col":int,"preview":"string"}],"truncated":bool}`; DoD: tests (literal, regex, glob filter, truncation), docs example, CI green.
+  - [x] [S01:search-failing-literal-test] add failing unit test for literal search on a small fixture file (no regex, no globs)
+  - [x] [S02:search-skeleton] scaffold minimal `tools/fs_search` program with argument parsing/types
+  - [x] [S03:search-impl-literal] minimal implementation to pass literal search test
+  - [x] [S04:search-regex-glob-tests] add tests for regex and glob filtering
+  - [x] [S05:search-truncation] implement maxResults truncation and test
+* [x] Update scripts/smoke-test.sh to remove goresearch and SearxNG checks and make it goagent-specific retaining only the OpenAI-compatible LLM health check and clarifying messages; smallest change is editing the script header and deleting the Searx section; scope scripts; low risk; DoD includes script runs locally and reports PASS/FAIL for LLM reachability without Docker or Searx, tests unchanged and green, all quality gates green, peer review completed, verification by executing the script on a clean clone, rollback by reverting the script change.
+  - [x] [S01:build-tools-entries] include `fs_mkdirp` and `fs_apply_patch` in `build-tools` and `clean`; align README fs_mkdirp example to use `make build-tools`
+* [x] Update README with “unrestricted tools” warning + examples — clear risk note, copy-paste examples for each fs/* and exec tool, troubleshooting; DoD: docs lint passes, examples exercised in CI script, CI green.
+  - [x] [S01:adr-file-and-links] create ADR file and add links from `README.md` and `docs/README.md`
+* [x] Add Mermaid diagram `docs/diagrams/toolbelt-seq.md` — sequence of CLI → API → tools → API → final; DoD: diagram renders on GitHub, referenced from README/ADR, updated test ensuring file exists, CI green.
+* [x] Example prompts `examples/unrestricted.md` — prompts demonstrating `exec` + fs tools to write, build, and run code; DoD: examples validated by CI script, README link, CI green.
+* [x] Implement `tools/fs_apply_patch.go` (unified diff) — stdin `{"unifiedDiff":"string","dryRun?:bool}`; strict apply (no fuzz), pre-validate with dry-run; outputs `{"filesChanged":int}`; DoD: tests (clean apply, conflict, idempotence, CRLF), docs example + cautions, CI green.
+  - [x] [S01:fs-apply-patch-stub] add stub tool validating input and exiting with NOT_IMPLEMENTED; build and tests remain green; groundwork for strict apply
+  - [x] [S02:fs-apply-clean-new-file] implement strict new-file unified diff apply and tests (clean apply creating a new file); umbrella remains open for conflict/idempotence/CRLF and docs
+* [x] Implement tools/fs_edit_range.go — stdin {"path":"string","startByte":int,"endByte":int,"replacementBase64":"string","expectedSha256?":"string"}; atomically rewrite by splicing the range; output {"bytesReplaced":int,"newSha256":"string"}; DoD: TDD for mid-file edits, boundary cases (start=0, end=size), binary content, concurrent calls serialized, docs + CI green.
+  - [x] [S01:fs-edit-mid] add failing mid-file splicing test ensuring bytes [start:end) replaced and SHA reported
+  - [x] [S02:fs-edit-mid-impl] minimal implementation to pass mid-file splicing and boundary cases; tests green
+  - [x] [S03:fs-edit-lock] serialize concurrent edits with advisory file lock and add concurrency test; tests green
+* [x] Implement tools/fs_read_lines.go — stdin {"path":"string","startLine":int,"endLine":int,"maxBytes?":int}; output {"content":"string","startLine":int,"endLine":int,"eof":bool}; DoD: TDD for LF/CRLF, UTF-8 multibyte safety, large files, docs + CI green.
+* [x] Implement `tools/fs_listdir.go` — stdin `{"path":"string","recursive?:bool,"globs?:["**/*"],"includeHidden?:bool,"maxResults?:int}`; outputs `{"entries":[{"path":"string","type":"file|dir|symlink","sizeBytes":int,"modeOctal":"string","modTime":"RFC3339"}],"truncated":bool}`; paths repo-relative, stable ordering (dirs first, then files, lexicographic); DoD: TDD (empty dir, files+dirs, hidden on/off, glob filters, recursion, truncation, symlink entry), README example, linked issue, CI green with coverage unchanged.
+  - [x] [S02:listdir-readme-example] add README `fs_listdir` example section and ensure tests remain green
+  - [x] [S01:listdir-tools-manifest] add `fs_listdir` tool entry to `tools.json` and ensure tests remain green
+* [x] Implement `tools/fs_stat.go` — stdin `{"path":"string","followSymlinks?:bool,"hash?: "none"|"sha256"}`; outputs `{"exists":bool,"type":"file|dir|symlink|other","sizeBytes":int,"modeOctal":"string","modTime":"RFC3339","sha256?":"string"}`; error on paths outside repo; DoD: TDD (file/dir/symlink, missing path, follow vs no-follow, optional hash), README example, linked issue, CI green.
+  - [x] [S01:fs-stat-file-test] add failing unit test for existing regular file reporting exists=true, type="file", and sizeBytes
+  - [x] [S02:fs-stat-file-impl] minimal implementation to pass file test
+  - [x] [S03:fs-stat-missing] add test for missing path returns exit 0 and exists=false
+  - [x] [S04:fs-stat-symlink+hash-tests] add tests for symlink follow/no-follow and sha256 output
+* [x] Extend `tools.json` with new tools — add entries (name, description, JSON Schema, `command`, `timeoutSec`); loader must validate and surface schemas to OpenAI tools; DoD: manifest unit test (schema validity, names unique), sample `tools.json` updated, CI green.
+* [x] Add Makefile build rules for all new tools — `build-tools` compiles each `./tools/*.go` to deterministic static binaries; DoD: `make build build-tools` passes locally and CI, artifacts gitignored, docs updated.
+  - [x] [S02:gitignore-binaries] ignore built tool binaries (`tools/*/*` and single-file outputs) and untrack existing ones; verify `git status` clean after `make build-tools` and `make clean`
+* [x] Integrate tools into agent loop — ensure `agentcli` advertises schemas and executes tool calls end-to-end; DoD: integration test with fake API that triggers `fs_write_file` → `fs_read_file` → final message, README cross-referenced, CI green.
+* [x] Add execution audit log in runner — append ndjson `{ts,tool,argv,cwd,exit,ms,stdoutBytes,stderrBytes,truncated}` to `.goagent/audit/YYYYMMDD.log`; DoD: unit test writes/rotates, redaction respected (see next), docs, CI green.
+  - [x] [S01:audit-write-line] append NDJSON audit to `.goagent/audit/YYYYMMDD.log` and add unit test verifying a line is written with expected fields; CI green.
+  - [x] [S02:audit-rotation] verify daily rotation behavior across date boundary with unit test; CI green.
+  - [x] [S03:audit-redaction] implement sensitive-value redaction and document fields; tests and docs; CI green.
+* [x] Provide “capabilities” CLI (`agentcli -capabilities`) — prints enabled tools from `tools.json` and explicit warning that enabled tools allow arbitrary command execution and network access; DoD: unit test, README usage, CI green.
+* [x] Add ADR-0002 “Unrestricted toolbelt (files+network)” — context, options, decision, consequences, JSON contracts, link to issue; DoD: ADR committed, referenced from README, peer-reviewed, CI doc checks green.
+* [x] Remove unused duplicate exec implementation by deleting tools/execcli and consolidating the exec tool under tools/cmd/exec to avoid drift and dual maintenance; smallest change is removing the tools/execcli directory with git rm and adjusting any references if present (tests currently use `tools/bin/exec`); scope tools; low risk; DoD includes a failing dead-code check first (build/grep) then passing after removal, tests unchanged and green with no coverage regression, all quality gates green (vet, format, golangci-lint, static analysis, security scanning and secret detection) with no new findings, reproducible builds, README exec example still runs end to end, peer review completed, verification by running go test ./... and make build-tools from a clean clone and confirming no references to execcli remain, rollback by restoring the directory from Git.
+* [x] Remove the no-op jitter and unneeded math/rand import in internal/tools/runner.go to reduce accidental complexity and potential lint noise; smallest change is deleting the unused rand.Int() call and its import; scope internal/tools; low risk; DoD includes a focused unit test asserting deterministic stdout/stderr collection without flakiness, tests otherwise unchanged and green with race/coverage, all quality gates green with no new findings, peer review completed, verification by running the test suite repeatedly and observing stability, rollback by restoring the line if flakiness reappears.
+* [x] Update scripts/wait-for-health.sh to drop SearxNG and Docker Compose checks and poll only the OpenAI-compatible LLM models endpoint to align with this repo’s scope; smallest change is editing that single script only; scope scripts; low risk; DoD includes reproducing the current mixed-scope behavior then passing after the edit with tests elsewhere unchanged and green and no coverage regression, all quality gates green (vet/format/lint/security/secret detection) with no new findings, verification by running the script with LLM_BASE_URL set and observing timely success/failure for the models endpoint, and rollback by reverting the script change.
+* [x] Teach the CLI to accept OPENAI_API_KEY as a fallback when OAI_API_KEY is unset to improve compatibility with existing environments and scripts while keeping OAI_API_KEY canonical in docs, smallest change is adding an env fallback in cmd/agentcli/main.go only; scope CLI; low risk; DoD includes a focused unit test for env resolution precedence (flag > OAI_API_KEY > OPENAI_API_KEY > default), tests otherwise unchanged and green with no coverage regression, all quality gates green (vet, format, golangci-lint, static analysis, security scanning and secret detection) with no new findings, peer review completed, verification by running the CLI with only OPENAI_API_KEY set and observing the Authorization header present, rollback by reverting the change.
+* [x] Normalize environment variable names across scripts to prefer OAI_BASE_URL and OAI_MODEL with fallback from existing LLM_BASE_URL so users have one canonical set while keeping compatibility, smallest change is editing scripts to read OAI variables first with LLM fallbacks and adjusting README wording, scope scripts and docs, low risk; DoD includes running smoke and health scripts with only OAI_* set and observing success, tests unchanged and green with no coverage regression, all quality gates green, peer review completed, verification by executing the scripts locally, rollback by reverting the edits.
+  - [x] [S01:l98-submodule-update] Update `scripts` submodule to modify `wait-for-health.sh` and `smoke-test.sh` to prefer `OAI_*` with `LLM_*` fallback; commit in submodule and bump superproject pointer.
+* [x] Unify tool hierarchy so each tool’s binary is in ./tools/bin/NAME, source in ./tools/cmd/NAME/NAME.go, and tests in ./tools/cmd/NAME/NAME_test.go, updating Makefile, tools.json, documentation, tests, and all other affected places to use a consistent unique naming format.
+  - [x] [S01:l90-make-bin-outputs] Update `Makefile` to emit binaries under `tools/bin/NAME` while keeping legacy outputs for transition; tests green.
+  - [x] [S54:l90-lint-wire-path-guard] Amend `Makefile` `lint` target to invoke `$(MAKE) check-tools-paths` after golangci-lint/vet/format so path hygiene is enforced locally and in CI; DoD: running `make lint` on current branch fails due to legacy `./tools/<name>` references and passes after migration slices, scope limited to `Makefile`, tests unchanged and green.
+  - [x] [S54a:l90-unblock-tests-legacy-dirs] Unblock S54 by restoring legacy tool source dirs expected by tests (e.g., `tools/exec`, `tools/fs_write_file`, `tools/fs_append_file`, ...), or adjust tests to current layout so `go test ./...` is green before committing S54.
+  - [x] [S55:l90-guard-single-file-builds] Extend `check-tools-paths` (Makefile) to also fail on single-file or direct tool builds outside the canonical layout using a search like `rg -n '(\./tools/[a-z_]+\.go|go\s+(build|run)\s+.*\./tools/[a-z_]+)' -g '!tools/cmd/**' -g '!tools/bin/**' -g '!FEATURE_CHECKLIST.md'` across the repo; DoD: `make check-tools-paths` fails before migration and passes after, no behavior changes elsewhere.
+  - [x] [S56:l90-runbook-windows-examples] Update `docs/runbooks/troubleshooting.md` examples to include Windows variants using `./tools/bin/NAME.exe` alongside Unix `./tools/bin/NAME` so users on Windows can follow identical steps; DoD: doc renders on GitHub, and `rg -n '\.exe' docs/runbooks/troubleshooting.md` finds at least one tool invocation; tests unchanged and green.
+  - [x] [S57:l90-manifest-doc-errors] Document new validator errors in `docs/reference/tools-manifest.md` under “Common mistakes” (messages for relative `command[0]` not starting with `./tools/bin/` and for normalized escapes like `./tools/bin/../hack`), aligning wording with `internal/tools/manifest.go`; DoD: docs render, and `rg -n 'start with \\./tools/bin|escapes \\./tools/bin' docs/reference/tools-manifest.md` matches; tests unchanged and green.
+  - [x] [S49:l90-windows-bin-extensions] Update `Makefile` `build-tools` and `clean` recipes to emit `.exe` suffix when `GOOS=windows` for all binaries under `tools/bin/NAME(.exe)` and to remove them on `make clean`; verify by `GOOS=windows make build-tools` producing `tools/bin/{get_time,exec,fs_*}.exe` and `make clean` leaving `git status` clean; DoD: commands reproducible on Linux/macOS cross-compiling to Windows, tests unchanged and green.
+  - [x] [S50:l90-docs-windows-exe] Document OS-specific binary names in `docs/reference/tools-manifest.md` and `README.md` (Quick start and tool examples): require relative `command[0]` to use `./tools/bin/NAME` on Unix and `./tools/bin/NAME.exe` on Windows, with a short note and dual JSON examples; DoD: docs render on GitHub, `rg "\.\/tools\/bin\/.*\.exe" -n README.md docs/reference/tools-manifest.md` finds at least one Windows example, tests unchanged and green.
+  - [x] [S51:l90-guard-no-legacy-paths] Add a `Makefile` target `check-tools-paths` that fails if any `./tools/(get_time|fs_|exec)` invocation remains outside `tools/bin` or `tools/cmd` (excluding `FEATURE_CHECKLIST.md`); implement with `rg` and negative globs; DoD: `make check-tools-paths` passes after migration, and fails before when run on current branch; no code behavior changed.
+  - [x] [S52:l90-manifest-escape-check] Harden `internal/tools/manifest.go` validation to reject relative `command[0]` that normalize (via `path.Clean` + `filepath.ToSlash`) to paths containing `..` segments or escaping `./tools/bin/` (e.g., `./tools/bin/../hack`), and add focused tests in `internal/tools/manifest_test.go`; DoD: new tests fail before change and pass after, `go test ./...` green across platforms.
+  - [x] [S53:l90-buildtool-windows-suffix-test] Extend `tools/testutil/buildtool.go` (after [S29:l90-test-helper-buildtool]) with a unit test asserting the returned binary path ends with `.exe` when `runtime.GOOS == "windows"` and has no suffix otherwise; migrate one representative tool test to assert the suffix via `BuildTool`; DoD: `go test ./tools/...` green on Unix and Windows runners when available.
+  - [x] [S30:l90-manifest-enforce-bin-prefix] Enforce in `internal/tools/manifest.go` that any relative `command[0]` starts with `./tools/bin/` (absolute paths allowed for tests); adapt existing tests under `internal/tools/manifest_test.go` to cover this validation; DoD: added validation causes a failing test before change and passing after, `go test ./...` green.
+  - [x] [S31:l90-manifest-doc-bin-prefix] Update `docs/reference/tools-manifest.md` to explicitly require relative `command` entries to use `./tools/bin/NAME`, update the JSON example to match, and note absolute paths are only for tests; DoD: doc renders on GitHub, `rg "\./tools/bin/" docs/reference/tools-manifest.md -n` finds at least one match, tests unchanged and green.
+  - [x] [S23:l90-bin-mkdir] Ensure `Makefile` creates `tools/bin` before builds by adding `mkdir -p tools/bin` (or equivalent) at the start of the `build-tools` recipe; DoD: from a clean clone `rm -rf tools/bin && make build-tools` succeeds and produces `tools/bin/{get_time,exec,fs_read_file,fs_write_file,fs_append_file,fs_rm,fs_move,fs_search,fs_mkdirp,fs_apply_patch,fs_read_lines,fs_edit_range,fs_listdir,fs_stat}`.
+  - [x] [S24:l90-clean-transitional] Update `Makefile` `clean` to remove both legacy paths (`tools/<NAME>` and `tools/*/<NAME>`) and new `tools/bin/*` during the migration window (until S20 completes) to avoid stale artifacts; DoD: after `make build-tools`, `make clean` leaves `git status` clean with no leftover tool binaries.
+  - [x] [S24b:l90-unblock-build-tools] Restore missing tool source directories referenced by `build-tools` (e.g., `tools/exec`, `tools/fs_write_file`) or adapt tests to current layout so `make build-tools` and `go test ./...` pass; then re-run S24 verification.
+  - [x] [S25:l90-docs-diagrams-paths] Update `docs/diagrams/agentcli-seq.md` and `docs/diagrams/toolbelt-seq.md` to replace `./tools/<NAME>` with `./tools/bin/<NAME>` where tools are invoked; DoD: both diagrams render on GitHub and `rg "\./tools/(get_time|fs_|exec)" docs/diagrams -n` returns no matches.
+  - [x] [S26:l90-runbook-troubleshooting-paths] Revise `docs/runbooks/troubleshooting.md` commands to build from `./tools/cmd/NAME` (post‑migration slices) and execute `./tools/bin/NAME`; DoD: copy‑pasted commands work on a clean clone after `make build build-tools`, and `rg "\./tools/(fs_|exec|get_time)" docs/runbooks -n` only shows `tools/bin` paths.
+  - [x] [S27:l90-architecture-doc-alignment] Align examples in `docs/architecture/module-boundaries.md` to the canonical layout (sources under `tools/cmd/NAME/*.go`, binaries under `tools/bin/NAME`) and update any `go build -o tools/<name> ./tools/<...>` snippets; DoD: doc renders and `rg "go build -o tools/" docs/architecture -n` returns no matches.
+  - [x] [S28:l90-toolsjson-path-validation] Add a focused unit test in `internal/tools/manifest_test.go` that asserts relative `command[0]` entries in `tools.json` start with `./tools/bin/` (allow absolute paths for test fixtures); DoD: test fails before S02 and passes after, with `go test ./...` green and coverage unchanged.
+  - [x] [S29:l90-test-helper-buildtool] Introduce `tools/testutil/buildtool.go` providing `BuildTool(t, name string) (binPath string)` that builds from `./tools/cmd/<name>` into `t.TempDir()`; migrate one representative test (e.g., `tools/fs_listdir_test.go`) to use it as a pattern; DoD: tests pass locally (`go test ./...`), helper adopted without breaking other tests.
+  - [x] [S01:l90-make-bin-outputs] Update `Makefile` `build-tools` and `clean` to emit/remove binaries under `tools/bin/NAME` while building from current sources (`tools/timecli`, `tools/exec`, `tools/fs_*`, and `tools/fs_read_file.go`), without moving sources yet; DoD: `make build-tools` creates `tools/bin/{get_time,exec,fs_read_file,fs_write_file,fs_append_file,fs_rm,fs_move,fs_search,fs_mkdirp,fs_apply_patch,fs_read_lines,fs_edit_range,fs_listdir,fs_stat}` and `make clean` removes them, `git status` clean, `go test ./...` green.
+  - [x] [S02:l90-toolsjson-bin] Point `tools.json` `command` entries to `./tools/bin/NAME` for all tools (names exactly as current binaries), keeping schemas unchanged; DoD: `./bin/agentcli -prompt "time?" -tools ./tools.json -debug` works with `get_time`, integration tests unaffected, `go test ./...` green.
+  - [x] [S03:l90-docs-build-path] Revise `README.md` and `docs/reference/tools-manifest.md` usage snippets to reference `tools/bin/NAME` and `make build-tools`; DoD: docs render, examples run from clean clone (`make build build-tools` then quickstart), tests unchanged and green.
+  - [x] [S04:l90-gitignore-tighten] Narrow `.gitignore` to ignore `tools/bin/**` only (and keep sources under `tools/cmd/**` tracked), removing blanket `tools/*/*` ignores while preserving exceptions for `*.go`; DoD: creating `tools/cmd/demo/main.go` is tracked, `make build-tools` yields ignored `tools/bin/*`, `git status` clean, tests green.
+  - [x] [S05:l90-test-discovery-update] Update test helpers that build tools to use canonical package paths once migrated (e.g., change build path comments and invocations from `./tools/bin/fs_mkdirp` or `./tools/cmd/fs_read_file` to `./tools/cmd/NAME` as each tool moves); DoD: targeted tests updated only when their tool migrates, all tests green.
+  - [x] [S05a:l90-test-update-fs_mkdirp] After [S06:l90-migrate-fs_mkdirp], replace inline builder in `tools/fs_mkdirp_test.go` with `testutil.BuildTool(t, "fs_mkdirp")` and remove direct `go build ./fs_mkdirp`; DoD: `go test ./tools -run FsMkdirp` passes on Unix/Windows, full suite green.
+   - [x] [S06:l90-migrate-fs_mkdirp] Move `tools/fs_mkdirp/main.go` to `tools/cmd/fs_mkdirp/fs_mkdirp.go`, update its tests to build from `./tools/cmd/fs_mkdirp`, and adjust `Makefile` source mapping for `fs_mkdirp`; DoD: `go test ./tools -run FsMkdirp` passes, `make build-tools` builds `tools/bin/fs_mkdirp` from new path, no other tests fail.
+  - [x] [S07:l90-migrate-fs_read_file] Move single-file `tools/fs_read_file.go` to `tools/cmd/fs_read_file/fs_read_file.go`, update `tools/fs_read_file_test.go` build path, and update `Makefile` mapping; DoD: `go test ./tools -run FsReadFile` passes, `tools/bin/fs_read_file` builds from new path, suite green.
+  - [x] [S08:l90-migrate-exec] Move `tools/exec/main.go` to `tools/cmd/exec/exec.go`, update `tools/exec_test.go` build path, and adjust `Makefile`; DoD: `go test ./tools -run Exec` passes, `tools/bin/exec` builds from new path, suite green.
+  - [x] [S09:l90-migrate-get_time] Move `tools/timecli/main.go` to `tools/cmd/get_time/get_time.go` (binary remains `get_time`), update `tools/timecli_test.go` build path, and adjust `Makefile`; DoD: `go test ./tools -run Time` passes, `tools/bin/get_time` builds from new path, suite green.
+  - [x] [S10:l90-migrate-fs_write_file] Move `tools/fs_write_file/main.go` to `tools/cmd/fs_write_file/fs_write_file.go`, update its test and `Makefile`; DoD: targeted tests pass, binary builds from new path, suite green.
+  - [x] [S11:l90-migrate-fs_append_file] Move `tools/fs_append_file/main.go` to `tools/cmd/fs_append_file/fs_append_file.go`, update test and `Makefile`; DoD: targeted tests pass, binary builds from new path, suite green.
+  - [x] [S12:l90-migrate-fs_rm] Move `tools/fs_rm/main.go` to `tools/cmd/fs_rm/fs_rm.go`, update test and `Makefile`; DoD: targeted tests pass, binary builds from new path, suite green.
+  - [x] [S13:l90-migrate-fs_move] Move `tools/fs_move/main.go` to `tools/cmd/fs_move/fs_move.go`, update test and `Makefile`; DoD: targeted tests pass, binary builds from new path, suite green.
+  - [x] [S14:l90-migrate-fs_search] Move `tools/fs_search/main.go` to `tools/cmd/fs_search/fs_search.go`, update test and `Makefile`; DoD: targeted tests pass, binary builds from new path, suite green.
+  - [x] [S15:l90-migrate-fs_apply_patch] Move `tools/fs_apply_patch/main.go` to `tools/cmd/fs_apply_patch/fs_apply_patch.go`, update test and `Makefile`; DoD: targeted tests pass, binary builds from new path, suite green.
+  - [x] [S16:l90-migrate-fs_read_lines] Move `tools/fs_read_lines/main.go` to `tools/cmd/fs_read_lines/fs_read_lines.go`, update tests (`fs_read_lines*_test.go`) and `Makefile`; DoD: targeted tests pass, binary builds from new path, suite green.
+  - [x] [S17:l90-migrate-fs_edit_range] Move `tools/fs_edit_range/main.go` to `tools/cmd/fs_edit_range/fs_edit_range.go`, update tests and `Makefile`; DoD: targeted tests pass, binary builds from new path, suite green.
+  - [x] [S18:l90-migrate-fs_listdir] Move `tools/fs_listdir/main.go` to `tools/cmd/fs_listdir/fs_listdir.go`, update tests (`fs_listdir*_test.go`) and `Makefile`; DoD: targeted tests pass, binary builds from new path, suite green.
+  - [x] [S19:l90-migrate-fs_stat] Move `tools/fs_stat/main.go` to `tools/cmd/fs_stat/fs_stat.go`, update test and `Makefile`; DoD: targeted tests pass, binary builds from new path, suite green.
+  - [x] [S20:l90-remove-legacy-paths] Delete legacy tool sources left under `tools/*` (e.g., `tools/fs_read_file.go`, `tools/*/main.go`) with `git rm` and update `Makefile`/`clean` to stop referencing old paths; DoD: `rg "\./tools/(fs_|exec|timecli)" -g '!tools/cmd/**'` finds no sources, `make build-tools clean` reproducible, suite green.
+  - [x] [S21:l90-update-integration] Update `cmd/agentcli/tools_integration_test.go` and any examples to assume `tools/bin` layout and validate end-to-end tool invocation via `tools.json`; DoD: integration test passes with new paths, README quickstart verified, suite green.
+  - [x] [S22:l90-final-sweep] Search-and-replace any remaining built-path references (`./tools/<NAME>`) across repo (code, docs, scripts) to `./tools/bin/<NAME>` and ensure links and examples resolve; DoD: workspace grep returns none, `make lint test build build-tools` all green, `git status` clean.
+   - [x] [S22a:l90-final-sweep-lint] Ensure `make lint` (golangci-lint) and `make check-tools-paths` (ripgrep) run green in an environment with these tools installed; no code changes expected; mark S22 complete when both gates pass.
+   - [x] [S22a-next:l90-install-deps] Blocked locally: ripgrep (rg) not installed and golangci-lint not on PATH. Next step: install `rg` and ensure `$(go env GOPATH)/bin` (or `GOBIN`) is on PATH so `golangci-lint` is discoverable; then rerun `make check-tools-paths` and `make lint`.
+  - [x] [S22b:l90-final-sweep-lint-docs] Add README developer prerequisites for ripgrep and golangci-lint with install snippets and PATH note to unblock local lint/path checks without code changes.
+  - [x] [S32:l90-manifest-crossplat-bin-prefix] In `internal/tools/manifest.go`, perform relative `command[0]` bin-prefix validation using `filepath.ToSlash` and `path.Clean` so Windows path separators are normalized; extend `internal/tools/manifest_test.go` with cases for `./tools/bin/name` and `..\\tools\\bin\\name` (normalized reject) to ensure cross‑platform correctness; DoD: new test fails before change and passes after, `go test ./...` green on Linux/macOS and Windows runners when available.
+  - [x] [S33:l90-move-fs_read_file-tests] Relocate `tools/fs_read_file_test.go` to `tools/cmd/fs_read_file/fs_read_file_test.go` and refactor it to use `tools/testutil/buildtool.go::BuildTool(t,"fs_read_file")` after [S29:l90-test-helper-buildtool]; DoD: `go test ./tools/cmd/fs_read_file -run FsReadFile` passes in isolation and `go test ./...` remains green.
+  - [x] [S34:l90-integration-relative-tools-bin] Amend `cmd/agentcli/tools_integration_test.go` to build `fs_write_file` and `fs_read_file` into `tools/bin/` under a temp working directory and write a manifest using relative `./tools/bin/<name>` paths to exercise the new convention end‑to‑end; DoD: test passes locally and in CI with no reliance on absolute paths, suite green.
+  - [x] [S35:l90-move-fs_write_file-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_write_file_test.go` to `tools/cmd/fs_write_file/fs_write_file_test.go` using `tools/testutil/buildtool.go::BuildTool(t,"fs_write_file")`; DoD: `go test ./tools/cmd/fs_write_file -run FsWriteFile` and `go test ./...` are green.
+  - [x] [S36:l90-move-fs_append_file-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_append_file_test.go` to `tools/cmd/fs_append_file/fs_append_file_test.go` with `BuildTool(t,"fs_append_file")`; DoD: targeted package test and full suite green.
+  - [x] [S37:l90-move-fs_mkdirp-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_mkdirp_test.go` to `tools/cmd/fs_mkdirp/fs_mkdirp_test.go` using `BuildTool(t,"fs_mkdirp")`; DoD: targeted package test and full suite green.
+  - [x] [S38:l90-move-fs_rm-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_rm_test.go` to `tools/cmd/fs_rm/fs_rm_test.go` using `BuildTool(t,"fs_rm")`; DoD: targeted package test and full suite green.
+  - [x] [S39:l90-move-fs_move-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_move_test.go` to `tools/cmd/fs_move/fs_move_test.go` using `BuildTool(t,"fs_move")`; DoD: targeted package test and full suite green.
+  - [x] [S40:l90-move-fs_search-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_search_test.go` to `tools/cmd/fs_search/fs_search_test.go` using `BuildTool(t,"fs_search")`; DoD: targeted package test and full suite green.
+  - [x] [S41:l90-move-fs_listdir-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_listdir_test.go`, `tools/fs_listdir_glob_test.go`, and `tools/fs_listdir_symlink_test.go` to `tools/cmd/fs_listdir/` using `BuildTool(t,"fs_listdir")`; DoD: `go test ./tools/cmd/fs_listdir` and `go test ./...` green.
+  - [x] [S42:l90-move-fs_read_lines-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_read_lines_test.go` and `tools/fs_read_lines_maxbytes_test.go` to `tools/cmd/fs_read_lines/` using `BuildTool(t,"fs_read_lines")`; DoD: targeted package test and full suite green.
+  - [x] [S43:l90-move-fs_apply_patch-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_apply_patch_test.go` to `tools/cmd/fs_apply_patch/fs_apply_patch_test.go` with `BuildTool(t,"fs_apply_patch")`; DoD: targeted package test and full suite green.
+  - [x] [S44:l90-move-fs_edit_range-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_edit_range_test.go`, `tools/fs_edit_range_binary_test.go`, and `tools/fs_edit_range_concurrency_test.go` to `tools/cmd/fs_edit_range/` using `BuildTool(t,"fs_edit_range")`; DoD: `go test ./tools/cmd/fs_edit_range` and `go test ./...` green.
+  - [x] [S45:l90-move-fs_stat-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/fs_stat_test.go` to `tools/cmd/fs_stat/fs_stat_test.go` with `BuildTool(t,"fs_stat")`; DoD: targeted package test and full suite green.
+  - [x] [S46:l90-move-exec-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/exec_test.go` to `tools/cmd/exec/exec_test.go` with `BuildTool(t,"exec")`; DoD: targeted package test and full suite green.
+  - [x] [S47:l90-move-get_time-tests] (after [S29:l90-test-helper-buildtool]) Move `tools/timecli_test.go` (if present) to `tools/cmd/get_time/get_time_test.go` with `BuildTool(t,"get_time")`; DoD: targeted package test and full suite green.
+  - [x] [S48:l90-untrack-legacy-binaries] (after [S04:l90-gitignore-tighten]) Remove any tracked legacy binaries under `tools/*` without extensions (e.g., `tools/fs_read_file`) via `git rm --cached` and ensure `.gitignore` only ignores `tools/bin/**`; DoD: `git status` shows removals, `make build-tools` produces only `tools/bin/*`, suite green.
+   - [x] [S60:l90-make-tools-var] Introduce `TOOLS := get_time exec fs_read_file fs_write_file fs_append_file fs_rm fs_move fs_search fs_mkdirp fs_apply_patch fs_read_lines fs_edit_range fs_listdir fs_stat` in `Makefile` and refactor `build-tools` and `clean` to iterate deterministically over this list (no source moves yet), preserving current legacy source paths; DoD: `make build-tools` yields `tools/bin/{get_time,exec,fs_read_file,fs_write_file,fs_append_file,fs_rm,fs_move,fs_search,fs_mkdirp,fs_apply_patch,fs_read_lines,fs_edit_range,fs_listdir,fs_stat}` and `make clean` removes them all with a single list, tests unchanged and green.
+  - [x] [S61:l90-make-build-tool-target] Add `Makefile` target `build-tool` that builds a single tool into `tools/bin/$(NAME)` from legacy sources using `make build-tool NAME=fs_read_file`; DoD: command works for at least `fs_read_file` and `fs_mkdirp`, errors clearly when `NAME` missing or unknown; tests unchanged and green.
+  - [x] [S62:l90-verify-manifest-paths] Add `Makefile` target `verify-manifest-paths` that fails if `tools.json` has any relative `command[0]` not starting with `./tools/bin/` (allow absolute paths), implemented with `rg` and precise messaging; DoD: `make verify-manifest-paths` fails before S02 and passes after, message cites offending entries; tests unchanged and green.
+   - [x] [S63:l90-docs-contrib-workflow] Update `README.md` Contributing to mention `make check-tools-paths`, `make verify-manifest-paths`, and `make build-tool NAME=<name>` as the local workflow during migration; DoD: doc renders on GitHub, commands copy‑paste, tests unchanged and green.
+  - [x] [S64:l90-migration-guide-doc] Add `docs/migrations/tools-layout.md` describing migration from `./tools/<NAME>` to `./tools/bin/<NAME>` and new source layout `tools/cmd/<NAME>`, and link it from `docs/README.md`; DoD: docs render on GitHub, links resolve, tests unchanged and green.
+  - [x] [S66:l90-clean-future-checklist-legacy] Replace legacy path references in `FUTURE_CHECKLIST.md` (e.g., `tools/timecli`, single-file `tools/*.go`) with canonical `tools/cmd/*` and `tools/bin/*` or remove obsolete bullets; verification: `rg -n 'tools/timecli|tools/[a-z_]+\.go' FUTURE_CHECKLIST.md` returns no matches; DoD: docs render, no functional code changes, `go test ./...` green.
+  - [x] [S67:l90-buildtool-drop-legacy] After S33–S47 are complete, simplify `tools/testutil/buildtool.go` by removing legacy source fallbacks (`tools/<name>` dir, `tools/<name>.go` file, and the special-case `tools/timecli`), keeping only `tools/cmd/<name>`; update one representative test to continue using `BuildTool` (no behavior change); verification: `rg -n 'tools/timecli|filepath.Join\(repoRoot, "tools", name\)|tools/[a-z_]+\.go' tools/testutil/buildtool.go` finds no matches; DoD: `go test ./...` green.
+  - [x] [S68:l90-ci-matrix] Add GitHub Actions workflow `.github/workflows/ci.yml` to run `make tidy lint test build build-tools` on a Go matrix across `ubuntu-latest`, `macos-latest`, and `windows-latest` (install ripgrep where needed); verification: `rg -n 'make (tidy|lint|test|build|build-tools)' .github/workflows/ci.yml` matches and the matrix includes the three OSes; DoD: CI passes on all platforms with no new findings.
+  - [x] [S69:l90-ci-verify-tools-hygiene] Extend the CI workflow to run `make check-tools-paths` and (after S62) `make verify-manifest-paths` as distinct steps; verification: `rg -n 'check-tools-paths|verify-manifest-paths' .github/workflows/ci.yml` matches both; DoD: workflow fails when a legacy path or invalid manifest command is introduced and passes on current branch.
+  - [x] [S70:l90-integ-use-buildtool] Refactor `cmd/agentcli/tools_integration_test.go` to use `tools/testutil/buildtool.go::BuildTool(t,"fs_write_file")` and `BuildTool(t,"fs_read_file")` instead of invoking `go build` manually; keep manifest relative `./tools/bin/*` and temp working directory behavior; verification: `go test ./cmd/agentcli -run AdvertisesSchemas` passes and diff limited to the single test file; DoD: full `go test ./...` green.
+* [x] Standardize JSON error contract for all tools — on failure, write single-line stderr `{"error":"<escaped>","hint?":"<opt>"}` and exit non-zero; runner maps to tool message; DoD: negative-path tests per tool, docs snippet, CI green.
+  - [x] [S03:l91-fs_listdir-error-json] add failing test for fs_listdir and implement standardized stderr JSON {"error":"..."}
+  - [x] [S01:l90-fs_mkdirp-error-json] add failing test for fs_mkdirp and implement JSON stderr error contract
+  - [x] [S02:l90-fs_read_file-error-json] add failing test for fs_read_file and implement JSON stderr error contract (preserve NOT_FOUND marker)
+  - [x] [S04:l91-exec-error-json] add failing test and implement standardized stderr JSON + non-zero exit in `tools/cmd/exec` for invalid JSON input
+  - [x] [S05:l91-fs_stat-error-json] add failing test for fs_stat and verify standardized stderr JSON {"error":"..."}
+  - [x] [S06] fs_search: add stderr JSON error‑contract test for missing query; implement/verify behavior; suite green
+  - [x] [S07] fs_append_file: add stderr JSON error‑contract test for missing fields; implement/verify behavior; suite green
+* [x] Rename the get_time tool entrypoint to follow the canonical layout by moving tools/cmd/get_time/main.go to tools/cmd/get_time/get_time.go; context: canonical tools layout requires tools/cmd/NAME/NAME.go and this package currently uses main.go; scope: tools/cmd/get_time only; low risk and independent; DoD: make build-tools produces tools/bin/get_time (or .exe on Windows), go test -race -cover ./... remains green with no coverage regression and all quality gates (vet, format, lint, security, secret detection) green, documentation and tools.json unchanged, peer review completed; verification: build then run the tool with a sample stdin to confirm output, rollback by renaming the file back to main.go.
+* [x] Rename the fs_write_file tool entrypoint to follow the canonical layout by moving tools/cmd/fs_write_file/main.go to tools/cmd/fs_write_file/fs_write_file.go; context: canonical tools layout requires tools/cmd/NAME/NAME.go and this package currently uses main.go; scope: tools/cmd/fs_write_file only; low risk and independent; DoD: make build-tools produces tools/bin/fs_write_file (or .exe on Windows), go test -race -cover ./... remains green with no coverage regression and all quality gates (vet, format, lint, security, secret detection) green, documentation and tools.json unchanged, peer review completed; verification: build then run the tool with a sample stdin to confirm output, rollback by renaming the file back to main.go.
+* [x] Adjust .gitignore patterns to ignore only built tool binaries (for example tools/bin/**) while keeping all Go sources under tools/** tracked so future canonical layout tools/cmd/NAME/*.go are not accidentally ignored by the existing tools/*/* blanket ignore; smallest change: replace broad patterns with precise ignores and explicit unignores for .go files recursively; scope .gitignore; low risk; DoD: creating tools/cmd/demo/main.go shows as tracked while built binaries under tools/bin are ignored and git status is clean after make build-tools, all gates green; verify by the described steps; rollback by reverting the .gitignore edit.
+* [x] Eliminate gofmt -s drift by formatting tools/cmd/fs_write_file/fs_write_file_test.go so fmtcheck passes deterministically; smallest change is formatting that file only; scope formatting; low risk and independent; DoD includes make lint and make fmtcheck passing locally and in CI with no other diffs, tests unchanged and green with no coverage regression, all quality gates green (vet, golangci-lint, static analysis, security and secret detection) with no new findings, peer review completed; verification by running gofmt -s -l . and observing no output, rollback by reverting the formatting edit.
+* [x] Eliminate gofmt -s drift by formatting tools/cmd/exec/exec.go and tools/cmd/exec/exec_test.go so fmtcheck passes deterministically; smallest change is formatting those files only; scope formatting; low risk and independent; DoD includes make fmtcheck and make lint passing locally and in CI with no other diffs, tests unchanged and green with race and coverage enabled, all quality gates green with no new findings, peer review completed; verification by running gofmt -s -l . and observing these files disappear from the list, rollback by reverting the formatting edits.
+* [x] Eliminate gofmt -s drift by formatting tools/cmd/fs_append_file/fs_append_file_test.go so fmtcheck passes deterministically; smallest change is formatting that file only; scope formatting; low risk and independent; DoD includes make fmtcheck and make lint passing locally and in CI with no other diffs, tests unchanged and green with race and coverage enabled, all quality gates green, peer review completed; verification by gofmt -s -l . showing no output for this path, rollback by reverting the formatting edit.
+* [x] Eliminate gofmt -s drift by formatting tools/cmd/fs_move/fs_move_test.go so fmtcheck passes deterministically; smallest change is formatting that file only; scope formatting; low risk and independent; DoD includes make fmtcheck and make lint passing locally and in CI with no other diffs, tests unchanged and green with race and coverage enabled, all gates green, peer review completed; verification by gofmt -s -l . showing no output for this path, rollback by reverting the formatting edit.
+* [x] Eliminate gofmt -s drift by formatting tools/cmd/fs_search/fs_search_test.go so fmtcheck passes deterministically; smallest change is formatting that file only; scope formatting; low risk and independent; DoD includes make fmtcheck and make lint passing locally and in CI with no other diffs, tests unchanged and green with race and coverage enabled, all gates green, peer review completed; verification by gofmt -s -l . showing no output for this path, rollback by reverting the formatting edit.
+* [x] Eliminate gofmt -s drift by formatting tools/cmd/fs_stat/fs_stat_test.go so fmtcheck passes deterministically; smallest change is formatting that file only; scope formatting; low risk and independent; DoD includes make fmtcheck and make lint passing locally and in CI with no other diffs, tests unchanged and green with race and coverage enabled, all gates green, peer review completed; verification by gofmt -s -l . showing no output for this path, rollback by reverting the formatting edit.
+* [x] Eliminate gofmt -s drift by formatting cmd/agentcli/tools_integration_test.go so fmtcheck passes deterministically; smallest change is formatting that single file; scope formatting; low risk and independent; DoD includes make fmtcheck and make lint passing locally and in CI, tests unchanged and green with race and coverage enabled, all gates green with no new findings, peer review completed; verification by gofmt -s -l . showing no output for this path, rollback by reverting the formatting edit.
+* [x] Eliminate gofmt -s drift by formatting tools/testutil/buildtool.go so fmtcheck passes deterministically; smallest change is formatting that file only; scope formatting; low risk and independent; DoD includes make fmtcheck and make lint passing locally and in CI, tests unchanged and green with race and coverage enabled, all gates green with no new findings, peer review completed; verification by gofmt -s -l . showing no output for this path, rollback by reverting the formatting edit.
+* [x] Amend the Makefile lint target to also run the existing verify-manifest-paths check after check-tools-paths so invalid relative command entries in tools.json are caught by default during local and CI lint runs; smallest change is editing the Makefile lint recipe only to invoke make verify-manifest-paths; scope Makefile; low risk and independent; DoD includes make lint failing before when tools.json has a bad relative command and passing after correction, tests unchanged and green with race and coverage enabled, all quality gates green with no new findings, peer review completed; verification by temporarily introducing an invalid ./tools/... path to observe failure then reverting, rollback by reverting the Makefile edit.
+* [x] Add a shared test helper MakeRepoRelTempDir to tools/testutil and refactor tools/cmd/fs_write_file/fs_write_file_test.go to use it by removing its local duplicate, minimal change is adding one helper function and editing that single test file, scope tools/cmd/fs_write_file tests only, low risk and independent, DoD: go test -race -cover ./... remains green with no coverage regression and all quality gates pass, verification: repository search shows no makeRepoRelTempDir definition inside tools/cmd/fs_write_file and the test imports the shared helper, rollback: revert the helper addition and the test edit.
+* [x] Refactor tools/cmd/fs_move/fs_move_test.go to use a shared MakeRepoRelTempDir helper under tools/testutil (create it if missing, otherwise reuse) and remove its local duplicate; smallest change is adding one helper function and editing that single test file; scope tests only; low risk and independent; DoD includes go test -race -cover ./... green with no coverage regression and all quality gates green; verification by searching that makeRepoRelTempDir is no longer defined locally and the test imports the shared helper; rollback by reverting the helper addition (if newly added) and the test edit.
+* [x] Refactor tools/cmd/fs_append_file/fs_append_file_test.go to use a shared MakeRepoRelTempDir helper under tools/testutil (create it if missing, otherwise reuse) and remove its local duplicate; smallest change is adding one helper function and editing that single test file; scope tests only; low risk and independent; DoD includes go test -race -cover ./... green with no coverage regression and all quality gates green; verification by searching that makeRepoRelTempDir is no longer defined locally and the test imports the shared helper; rollback by reverting the helper addition (if newly added) and the test edit.
+* [x] Refactor tools/cmd/fs_rm/fs_rm_test.go to use a shared MakeRepoRelTempDir helper under tools/testutil (create it if missing, otherwise reuse) and remove its local duplicate; smallest change is adding one helper function and editing that single test file; scope tests only; low risk and independent; DoD includes go test -race -cover ./... green with no coverage regression and all quality gates green; verification by searching that makeRepoRelTempDir is no longer defined locally and the test imports the shared helper; rollback by reverting the helper addition (if newly added) and the test edit.
+* [x] Refactor tools/cmd/fs_mkdirp/fs_mkdirp_test.go to use a shared MakeRepoRelTempDir helper under tools/testutil (create it if missing, otherwise reuse) and remove its local duplicate; smallest change is adding one helper function and editing that single test file; scope tests only; low risk and independent; DoD includes go test -race -cover ./... green with no coverage regression and all quality gates green; verification by searching that makeRepoRelTempDir is no longer defined locally and the test imports the shared helper; rollback by reverting the helper addition (if newly added) and the test edit.
+* [x] Remove dead test helper file tools/helpers_test.go that declares package main at the tools root and is unused (duplicate temp-dir helpers exist in tool package tests), minimal change is deleting this single file via git rm, scope tests only, low risk and independent, DoD: go test -race -cover ./... remains green with unchanged coverage and all quality gates (vet, format, lint, security, secret detection) pass, verification: searching the repository shows no references to helpers_test.go and go test ./tools reports no tests to run, rollback: restore the file from Git if needed.
+* [x] Add a .gitattributes file to enforce LF line endings for Go, Markdown and shell scripts and to normalize text files to avoid CRLF-related failures and noisy diffs, smallest change is committing a single .gitattributes with patterns only; scope repository hygiene; low risk; DoD includes tests unchanged and green with no coverage regression, deterministic runs locally and in CI with no new findings from vet, format, lint, security scanning and secret detection, verification by creating a CRLF file then confirming Git normalizes to LF on checkout and go test ./... passes on Linux and Windows without spurious failures, rollback by reverting the .gitattributes file.
+* [x] Add a make fmt target that runs gofmt -s -w over the repository and mention it briefly in README near lint so developers can apply formatting easily, smallest change is adding the target and a one line README note, scope Makefile and docs, low risk; DoD includes running make fmt with no unintended diffs, tests unchanged and green with no coverage regression, all quality gates green, peer review completed, verification by running the command locally, rollback by reverting the edits.
+* [x] Document and automate submodule initialization by adding a Makefile bootstrap target that runs git submodule update --init --recursive and updating README Installation to include this step so a clean clone reliably fetches .cursor/rules and scripts without SSH keys, smallest change is one Makefile target and a brief README edit, scope Makefile and docs, low risk and independent with no code changes, DoD includes running from a clean clone on Linux and macOS to execute make bootstrap build build-tools successfully with all tests green and no coverage regression and all quality gates green in CI including vet format lint security and secret detection with no new findings, verification by following the updated steps locally and in CI where actions checkout with submodules true succeeds, rollback by reverting the Makefile target and README sentence.
+* [x] Add lint/type checks and formatting gates: include `.golangci.yml` enabling `govet`, `gofmt`, `errcheck`, `staticcheck`, `gocyclo` (with reasonable thresholds), `gosimple`; add `make lint` that installs `golangci-lint` if missing and runs it; ensure `go vet ./...` and `gofmt -s -l` produce no output; fail CI if any lints fail.
+* [x] Add a minimal .editorconfig (root=true) specifying utf-8, end_of_line=lf, insert_final_newline=true, indent_style=tab for Go and indent_style=space indent_size=2 for Markdown and YAML to reduce formatting drift across editors, smallest change is committing the single .editorconfig file; scope repository hygiene; low risk; DoD includes tests unchanged and green with no coverage regression, all quality gates green (vet, format, golangci-lint, static analysis, security scanning and secret detection) with no new findings, verification by saving files in common editors and observing no diffs beyond gofmt expectations, rollback by reverting the file.
+  - [x] [S01:l207-unblock-lint] Implement Makefile lint PATH invocation by resolving `$(go env GOPATH)/bin/golangci-lint`; `golangci-lint` now runs headlessly. Next step: resolve goanalysis error by upgrading golangci-lint to a version compatible with Go 1.24+.
+  - [x] [S02:l207-upgrade-golangci] Pin/upgrade golangci-lint to a version compatible with current Go toolchain so `make lint` passes locally and in CI; then re-run `make lint` and complete L207 DoD.
+   - [x] [S02a:l207-lint-green-internal] Resolve existing `errcheck` and `gocyclo` findings in internal packages first to drive `make lint` toward green without weakening gates.
+* [x] Add reports and coverage artifacts to .gitignore including reports/ tests directory and coverage outputs so running the test runner does not create noisy untracked files, smallest change is appending ignore patterns only, scope repository hygiene, low risk; DoD includes running scripts/test-runner.sh and verifying git status is clean, tests unchanged and green with no coverage regression, all quality gates green, peer review completed, verification by git status before and after, rollback by removing the new ignore lines.
+* [x] Augment the root .gitignore to include standard entries for the dist directory, macOS .DS_Store, the Go go.work workspace file, and common IDE folders .idea and .vscode to prevent accidental commits of local artifacts, smallest change is appending those patterns only to the existing file, scope repository hygiene, low risk and independent, DoD includes running git status before and after creating those files locally to confirm they are ignored, tests unchanged and green with no coverage regression, all quality gates green (vet, format, lint, static and security scans, secret detection) with no new findings, peer review completed, verification by reproducing the ignore behavior on a clean clone, rollback by reverting the .gitignore addition.
+* [x] Make builds reproducible by adding -trimpath and stripped ldflags to Makefile build and build-tools while keeping static CGO settings and documenting the change succinctly, smallest change is editing the build recipes only and a short README sentence, scope Makefile and docs, low risk; DoD includes two clean builds producing identical checksums for binaries, tests unchanged and green with no coverage regression, all quality gates green, peer review completed, verification by comparing shasums of repeated builds, rollback by reverting the Makefile edits.
+* [x] CLI help: make `--help`/`-h`/`help` print full usage and exit 0 before any required-flag validation or side effects; DoD: `agentcli --help` prints all flags (names, defaults, env fallbacks, precedence) and examples, exits 0 on all OSes; `go test` adds a test that asserts help output and code path; CI and all gates green; one peer review completed.
+* [x] Split timeouts and make HTTP configurable independently: add `-http-timeout` (env fallback `OAI_HTTP_TIMEOUT`) used exclusively for the OpenAI POST and keep `-tool-timeout` for tools (default 30s), wire `actions/setup-go` CI to exercise both, update README and docs/operations/ci-quality-gates.md, and add unit/integration tests that force a delayed fake server/tool to confirm each timeout triggers the correct error path; DoD: both flags honored, defaults sane (HTTP 60–120s), tests green, docs updated, CI gates green, peer review done.
+* [x] Add resilient HTTP retries with backoff for transient failures: introduce `-http-retries` and `-http-retry-backoff` (cap, jitter) to retry on timeouts, network errors, 429, and 5xx, include `Idempotency-Key` header per request to avoid duplicate side effects, respect `Retry-After`, and log attempts to the audit file; DoD: unit tests simulate timeout then success and 429 with Retry-After, integration test with fake server passes, defaults conservative (e.g., 2 retries), docs updated, CI green, peer review done.
+  - [x] [S01:retries-doc-flags] Update README "Common flags" to document `-http-retries` and `-http-retry-backoff`; tests already green.
+  - [x] [S02:retries-audit-attempts] Emit per-attempt audit entries for HTTP retries including attempt number, status, and backoff; add unit test; docs/runbook note.
+  - [x] [S03] Respect Retry-After header for 429/5xx with unit tests
+  - [x] [S01] Introduce minimal retry policy with tests (timeouts and 5xx); no flags yet
+  - [x] [S02] Wire `-http-retries` and `-http-retry-backoff` flags; add stable `Idempotency-Key` across retries with unit test
+* [x] Emit precise HTTP timing and failure cause to the audit: record DNS/connect/TLS/write/read durations, status, and whether the context deadline fired vs server closed, and surface a concise user hint (“increase -http-timeout or reduce prompt/model latency”); DoD: new fields present in `.goagent/audit/*` with a passing test that asserts structure, performance unaffected, CI green, peer review done.
+* [x] Runbook entry for `context deadline exceeded`: add a troubleshooting section that explains causes (slow model, proxy timeouts, too-small `-http-timeout`), mitigation steps (raise `-http-timeout`, tune proxy `proxy_read_timeout`, reduce prompt size), and an example of enabling retries; DoD: docs render on GitHub, copy-paste checks complete, CI docs gates green, peer review done.
+* [x] Make relative paths in tools.json resolve against the directory containing that tools.json (not process CWD), preserving existing security checks that reject `..` escapes; update `internal/tools/manifest.go` to anchor, clean, and normalize paths cross-platform, adapt unit/integration tests to cover a manifest in a nested folder, adjust `make verify-manifest-paths` if needed, update `docs/reference/tools-manifest.md` to document the rule and absolute-path allowance for tests, and verify end-to-end that an agent run using a nested tools.json with relative `./tools/bin/*` works; DoD: unit/integration tests added and green on Linux/macOS/Windows, docs updated, CI and all quality gates green, one peer review completed.
+* [x] Makefile: preserve the logs directory during the clean target by removing any deletion of logs while keeping other artifact removals; DoD: from a clean clone create a sentinel file under logs, build artifacts, run clean, verify the sentinel remains and artifacts are gone, repo status is clean, all gates (lint/vet/format/tests/security/secret) green, one peer review completed.
+  - [x] [S01:clean-preserve-logs-verified] Verified `make clean` preserves `logs/` (sentinel survives) and removes artifacts (`tools/bin`, `bin`, `reports`, `.goagent`).
+  - [x] [S02:clean-lint-fixes-tools] Resolve repo-wide `golangci-lint` findings (errcheck/gocyclo in `tools/cmd/*`) so quality gates are green for this item.
+    - [x] [S02d:errcheck-encode-and-visits] Check json.Encode errors in `fs_listdir`, `fs_read_lines`, `fs_stat`; handle `WalkDir`/`Info`/visit errors deterministically; update tests to check Unmarshal and cleanup removes.
+    - [x] [S02a:lint-errcheck-fs_read_write] Fix errcheck in `tools/cmd/fs_read_file` and `tools/cmd/fs_write_file`; tests green.
+  - [x] [S02b:lint-errcheck-batch-2] Address remaining errcheck in `get_time`, `fs_append_file`, and `fs_apply_patch` (encode/close and unsafe ignores); re-run lint.
+    - [x] [S02c:lint-errcheck-fs_move] Address errcheck in `tools/cmd/fs_move` and its tests (check json.Encode, close defers, test Unmarshal); tests green.
+* [x] Makefile: add a guarded clean-logs target that deletes logs only when the file logs/STATE (trimmed of whitespace) equals DOWN and otherwise no-ops, add a clean-all target that runs clean then clean-logs, add a deterministic test target that exercises allowed (DOWN) and blocked (non-DOWN or missing) cases and wire it into CI, and document the behavior in README; DoD: local test target passes for all cases, CI runs it on all OSes and is green, logs are never deleted unless STATE is DOWN, all quality gates green, one peer review completed.
+* [x] Diagnose and fix opaque `context deadline exceeded` on chat POST by instrumenting HTTP phase timings (DNS/connect/TLS/write/read/idle), verifying and wiring `-http-timeout`/`OAI_HTTP_TIMEOUT` is actually applied to the OpenAI POST (independent from tool timeouts) or adding it if missing, and upgrading the surfaced error to include base URL, phase, and configured timeout with actionable hints (e.g., server unreachable vs slow response); add table-driven unit tests and an integration test with a fake slow server and a refused connection to prove (a) correct timeout behavior, (b) clear error text and exit code 1 for network/timeout, 2 for CLI misuse, and (c) that raising `-http-timeout` resolves the slow-server case; update README and the troubleshooting runbook accordingly; DoD: failing tests first then passing, running the provided repro command yields a precise cause message (not a generic context deadline), CI and all quality gates green, one peer review completed.
+* [x] Expand make clean to remove build/test artifacts like bin/coverage.out and reports/ to keep the working tree tidy after local runs, because currently these files remain and can cause noisy diffs; smallest change is editing the clean recipe only; scope Makefile; low risk; DoD includes running make build build-tools test to create artifacts then make clean to remove them with git status clean, tests unchanged and green with no coverage regression, all quality gates green, peer review completed, verification by reproducing the sequence on a clean clone, rollback by reverting the Makefile edit.
+  - [x] [S00:clean-removes-artifacts] Verified current Makefile removes bin/, coverage.out, reports/, .goagent, and tools/bin; no recipe change needed for this slice.
+  - [x] [S01:clean-gates] Lint gates currently fail due to pre-existing issues (errcheck/gocyclo). Next step: address lint findings to satisfy Definition of Done for this item.
+  - [x] [S02:clean-errcheck-agentcli] Fix errcheck and gofmt findings under `cmd/agentcli` (unit + integration tests); reduce `runAgent` cyclomatic complexity below threshold by extracting helpers; DoD: tests green; lint passes for `cmd/agentcli/**` locally; full repo lint may still have findings to be addressed in follow-up slices; no gate weakening.
+* [x] Bound tools/fs_search scanning by skipping known binary/output directories (e.g., .git, bin, logs, tools/* built binaries) and enforcing a sane per-file size limit with clear errors to prevent performance and memory spikes on large repositories; smallest change is editing tools/fs_search/main.go only; scope tools; moderate risk due to traversal changes; DoD includes unit tests covering directory exclusion, size limits, and unchanged matching semantics on text files, tests green locally and in CI with no coverage regression, all quality gates green (vet, format, golangci-lint, static analysis, security/secret detection) with no new findings, docs/runbooks updated with the exclusions, peer review completed, verification by searching a repo containing large/binary files and observing fast, bounded behavior, rollback by reverting the changes.
+  - [x] [S01:fs_search-skip-binaries] Skip `bin/`, `logs/`, and `tools/bin/` during walking; add test
+  - [x] [S02:fs_search-size-limit] Enforce 1MiB per-file size cap with clear error; add unit test
+* [x] Centralize audit logs under repository root .goagent/audit/YYYYMMDD.log by resolving module root (walk up to go.mod) in internal/tools/runner.go instead of using current working directory so subpackage tests no longer create nested .goagent directories; smallest change: edit appendAuditLog to compute root and write there; scope internal/tools only; low risk; DoD: failing test first then passing implementation, go test -race -cover ./... green with quality gates and docs unchanged, verify only root path receives logs and they are ignored by Git, rollback by reverting the change.
+* [x] Enforce flag > env > default precedence for `-http-timeout`, `-tool-timeout`, and `-timeout` and add table-driven tests that set env to 90s and flags to 300s verifying the effective values are 5m; DoD: tests green, CI gates green, peer review done.
+* [x] Remove any `min()` clamping between global `-timeout` and `-http-timeout`; use `-http-timeout` exclusively for the chat POST context and `-timeout` only for overall run budget; DoD: unit/integration test proves a 300s HTTP timeout is honored even when global is shorter/longer, CI green, peer review done.
+* [x] Make duration flags robust: accept plain seconds (int) as well as Go duration strings by parsing `300` as `300s`; DoD: tests cover `300`, `300s`, `5m`, invalid inputs; CI green, peer review done.
+* [x] Print the **effective** timeouts and their **sources** (flag/env/default) under `-debug` and in timeout errors (e.g., `http-timeout=5m source=flag`); DoD: unit test asserts formatting, failing timeout shows accurate values, CI green, peer review done.
+* [x] Add `--print-config` that exits 0 and dumps the resolved config (model, base URL, all timeouts with sources) so misconfigurations are obvious; DoD: unit test for output, docs updated, CI green, peer review done.
+* [x] Integration test: fake slow server proves `-http-timeout 300` allows \~5m before cancel while default (90s) cancels \~1m30s; DoD: deterministic test with `httptest.Server` and sleep, CI green, peer review done.
+* [x] Create ADR-0003 “Toolchain & Lint Policy (Go + golangci-lint)” documenting that CI must use the Go version declared by `go.mod` and that `golangci-lint` is pinned to a known-good version for that Go line; include upgrade policy (bump both together via PR), risks, and rollback. Smallest change: add `docs/adr/0003-toolchain-and-lint-policy.md` with context, options, decision, consequences, and a link to the canonical issue URL (created in this PR). DoD: ADR rendered on GitHub, linked from `docs/README.md` and `README.md` (Tooling section); all gates green; one peer review completed.
+  - [x] [S01:l248-lint-green] Install ripgrep (rg) locally so `make check-tools-paths` can run, then rerun `make lint` to satisfy gates; next step: `sudo apt-get update && sudo apt-get install -y ripgrep` (Linux) or `brew install ripgrep` (macOS).
+* [x] Pin CI to module Go version using `actions/setup-go` with `go-version-file: go.mod`. Smallest change: edit `.github/workflows/ci.yml` to configure `actions/setup-go@v5` with `go-version-file: go.mod` in every job (linux/macos/windows), print `go version` for traceability, and keep the existing matrix. DoD: a fresh CI run shows the same Go major.minor on all OSes (visible in logs), `make tidy lint test build build-tools` pass, gates green, peer review completed, rollback by reverting the workflow hunk.
+* [x] Change the **default sampling temperature to 1.0** by updating the agent CLI’s flag default and the underlying request-option defaults, ensure the value propagates into the outbound payload when supported, add a unit test that asserts the resolved default is 1.0 when no overrides are provided, and update README’s “Common flags” so `-temp` shows “default 1.0” instead of 0.2 (also add a short rationale line in docs); DoD: unit tests green, `agentcli -h` displays 1.0, README/Docs updated. ([GitHub][1])
+* [x] Implement a **model capability map** in the internal request-options layer to determine `SupportsTemperature` per model (e.g., GPT-5 variants → true; any known exceptions → false with inline comment), expose a simple lookup used at call time, and write a table-driven unit test that covers at least three model IDs and both outcomes; DoD: lookup used by payload builder, tests green, brief note added to docs “Model parameter compatibility”.
+* [x] Update the **payload encoder** so that `temperature` is omitted entirely when `SupportsTemperature == false` and included (1.0 or user override) when true, preserving existing behavior for other params; add golden-file or snapshot tests for both branches; DoD: encoder tests green and logs confirm presence/omission in debug mode.
+* [x] Add an **ADR (“Default LLM Call Policy”)** under `docs/` (create `docs/adr` if missing) capturing context, options considered, the decision to default temperature to 1.0 with capability-based omission, and the retry/guard policies; include a Mermaid sequence of the tool-call flow; DoD: ADR merged and diagram renders on GitHub.
+* [x] Provide a **worked example** in `examples/` demonstrating a tool-call session that exercises: default temperature 1.0, parallel tool calls, and the corrected message sequencing; include a tiny fake tool and a transcript dump; DoD: `go test ./examples/...` (or your repo’s test convention) passes and README links to it.
+* [x] Add **parallel tool-call support**: execute multiple `tool_calls[]` concurrently and append exactly one `tool` message per returned `tool_call_id` in any order acceptable to the API, then continue; include a concurrency unit test that simulates two tools with different latencies; DoD: tests green and docs example added.
+* [x] Fix **message assembly** so the flow is always: user/assistant→assistant with `tool_calls[]`→one `tool` message per `tool_call_id` with the tool output→assistant (repeat as needed), never emitting a standalone `tool` message early; add an end-to-end mock test that exercises one and multiple tool calls; DoD: test green and README shows a minimal JSON example of correct sequencing. ([OpenAI Platform][2])
+* [x] Document correct tool-call sequencing with a minimal JSON example in `README.md#tool-calls` (assistant with `tool_calls[]` → tool messages with matching `tool_call_id` → assistant), including note on parallel tool calls requiring one tool message per id. ([Microsoft Learn][8])
+* [x] Implement a **message-sequence validator** that rejects any `role:"tool"` message unless it responds to a prior assistant message with `tool_calls[]` and a matching `tool_call_id`, and surface a pre-flight error that mirrors the API’s wording; add unit tests for valid/invalid transcripts; DoD: validator on by default, tests green, troubleshooting doc updated. ([OpenAI Platform][2])
+* [x] Add a **regression test for the exact 400** you hit by crafting a transcript with a stray `role:"tool"` lacking a prior `tool_calls[]`; assert the validator blocks it locally with a helpful error and that the request is never sent; DoD: failing test first, then fix, then green, and an entry added to the Troubleshooting section.
+* [x] CLI errors: when required flags (e.g., `-prompt`) are missing, print concise error followed by the usage synopsis and exit with code 2 (not 1); DoD: unit test verifies stderr contains error + usage, exit code is 2, no network or tool exec attempted; CI and all gates green; one peer review completed.
+* [x] CLI version: add `--version`/`-version` to print semver + commit + build date and exit 0 without validating other flags; DoD: unit test asserts format and exit code; README “Usage” updated; CI and all gates green; one peer review completed.
+* [x] Add `make check-go-version` that fails early if the active toolchain doesn’t match `go.mod`. Smallest change: in `Makefile`, add target that extracts `MOD_GO=$$(awk '/^go [0-9]+\\.[0-9]+/ {print $$2; exit}' go.mod)` and `SYS_GO=$$(go version | sed -E 's/.*go([0-9]+\\.[0-9]+).*/\\1/')`; compare and `exit 2` with a clear message if different. Document this target briefly in `README.md` under “Developer workflow”. DoD: running `make check-go-version` passes when versions match and fails with an actionable message when mismatched; CI invokes it (temporarily from a one-off verification commit) and stays green; peer review completed.
+  - [x] [S01:l252-wire-lint] Prepend `check-go-version` to `lint` so it runs first; local `make lint` now fails fast on toolchain mismatch as intended.
+  - [x] [S01b:l252-local-proof] Verified locally that `make lint` executes `check-go-version` first (observed "check-go-version: OK" before golangci-lint output); CI verification remains pending.
+  - [x] [S02b:l252-robust-mod-parse] Harden `Makefile` `check-go-version` to parse Go major.minor via `go mod edit -json` for stability; `make lint` and `go test ./...` green locally.
+  - [x] [S02e:l252-local-test-order] Add a deterministic unit test `internal/ci/ci_workflow_test.go::TestLintOrderLocallyAndInWorkflow` that inspects the Makefile lint recipe block to ensure `check-go-version` precedes `golangci-lint`, and asserts the CI workflow contains the explicit lint order assertion step. Suite green.
+  - [x] [S02d:l252-blocked-note] Blocked locally: cannot push or open PR from this session to trigger CI. Next step: open a PR (or push to a tracked branch) to run the workflow and verify logs show `check-go-version: OK` before golangci-lint across the OS matrix.
+  - [x] [S04:l252-ci-lint-step-name] Name the workflow step explicitly as `lint (includes check-go-version)` in `.github/workflows/ci.yml` so reviewers can spot the gate in logs quickly; DoD: CI run shows the named step and includes the `check-go-version: OK` line preceding linter output.
+  - [x] [S05:l252-doc-ci-gate] Update `docs/operations/ci-quality-gates.md` to state that `make lint` enforces `check-go-version` first and that CI runs the same gate; include a short excerpt of the expected log line `check-go-version: OK (system X.Y matches go.mod X.Y)`; DoD: docs render on GitHub, links intact, tests unchanged and green.
+  - [x] [S06:l252-readme-badge] Add a README CI badge pointing to `.github/workflows/ci.yml` and label it "CI (lint+test+build)"; DoD: badge renders and links to the workflow page, no broken images, tests unchanged and green.
+  - [x] [S07:l252-ci-assert-order] Amend `.github/workflows/ci.yml` lint step to pipe output to `lint.log` and add a subsequent `bash` step that fails if `rg -n "^check-go-version: OK" lint.log` is absent or occurs after `rg -n "^golangci-lint version" lint.log`; DoD: CI logs show both lines in order and job stays green.
+  - [x] [S08:l252-ci-upload-artifact] Add an `actions/upload-artifact@v4` step to publish `lint.log` as `lint-${{ matrix.os }}`; DoD: artifacts visible for ubuntu/macos/windows in the run.
+  - [x] [S09:l252-readme-dev-hint] Update `README.md` Developer workflow to include the exact mismatch message `Go toolchain mismatch: system X.Y != go.mod X.Y` and remediation; DoD: `rg -n "Go toolchain mismatch" README.md` matches once, docs render.
+  - [x] [S01:l242-exe-suffix] Resolve Windows suffix by referencing "golangci-lint$(EXE)" under GOPATH/bin in lint recipe so invocation works cross‑platform after on-demand install.
+  - [x] [S01:l242-preexisting-lints] Resolve current golangci-lint and fmtcheck findings in tests to get `make lint` green; scope: fix errcheck on cleanup removes and gofmt -s formatting; no gate weakening.
+  - [x] [S02b:l207-install-rg] Blocked locally: ripgrep (rg) not installed; `make check-tools-paths` fails in lint. Next step: install ripgrep and rerun `make lint`.
+* [x] Pin `golangci-lint` deterministically and install to a known path. Smallest change: in `Makefile`, add `GOLANGCI_LINT_VERSION ?= v1.60.3` and `GOBIN ?= $(CURDIR)/bin`; an `install-golangci` target runs `curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(GOBIN) $(GOLANGCI_LINT_VERSION)`; update `lint` to call `$(GOBIN)/golangci-lint run` (no PATH assumptions). Add a short note to `README.md` and `docs/operations/ci-quality-gates.md` explaining the pin and location. DoD: on a clean machine `make lint` installs the pinned version into `./bin` and runs successfully; CI is green on all OSes; peer review completed; rollback by reverting `Makefile` and doc changes.
+* [x] CLI flags order independence: make argument parsing accept flags in any order without requiring `-prompt` to appear first while preserving precedence (flag > env > default) and error semantics; add **table-driven local unit tests** that permute common orders (e.g., `-prompt` before/after other flags, mixed with `-debug`, `-tools`, `--help`) asserting identical parsed values, `--help` exits 0 regardless of position, and “-prompt is required” uses exit code 2 only when truly absent; update the user guide to state flags are order-insensitive; **DoD:** `go test ./...` passes locally and `./agentcli -h` behaves as documented.
+* [x] Makefile lint reliability: update the lint recipe to guaranteedly invoke `golangci-lint` after on-demand install by either resolving the binary via `$(go env GOPATH)/bin/golangci-lint` or installing to `./bin` and prepending it to PATH inside the recipe; **DoD:** on a fresh shell with no `golangci-lint` in PATH, `make lint` installs the tool and succeeds locally.
+* [x] Lint prerequisites: install `ripgrep` locally if missing and re-run `make lint` to confirm targets that depend on `rg` behave; **DoD:** `make lint` completes successfully on this machine after installing `ripgrep`.
+* [x] Reference docs: add a CLI reference page enumerating every flag with default, environment fallback, precedence, and exit codes for `--help`/`--version`/missing-required; link from the main README; include a tiny local script/test that compares each documented flag to `./agentcli -h` output to ensure the page reflects the binary; **DoD:** docs render locally and the checker passes.
+  - [x] Add a tiny checker that diffs `docs/reference/cli-reference.md` against `./bin/agentcli -h` and a local test that runs it.
+* [x] LLM policy docs: update the policy page to state default `temperature=1.0`, show GPT-5 controls like `verbosity` (`low|medium|high`) and `reasoning_effort`, and note that some models restrict sampling knobs; **DoD:** page updated and reviewed locally (no external link checks required).
+* [x] Config precedence for temperature: implement resolution order `--temperature` > `LLM_TEMPERATURE` env > config file > default 1.0, and when `--top-p` is provided unset `temperature` (with a one-line warning to stderr); add local flag-parsing table tests covering overlaps and edge cases; **DoD:** `go test ./...` green and manual runs show correct precedence.
+* [x] Parameter-recovery retry: when the API returns HTTP 400 mentioning invalid/unsupported `temperature`, remove `temperature` from the payload and retry **once** before normal exponential backoff; log the recovery with a structured field; **DoD:** local integration test using a mock server that first 400s on `temperature` and then succeeds without it.
+* [x] Temperature-nudge guard: make the nudge logic a no-op when the selected model does not support temperature and clamp adjustments within `[0.1, 1.0]` otherwise; include local unit tests that simulate repetition/format-failure to trigger −0.1 and diversity to trigger +0.1 without exceeding bounds; **DoD:** `go test ./...` passes locally.
+* [x] ADR addendum: append an addendum to the default-policy ADR documenting the change to default `temperature=1.0` for API parity and GPT-5 compatibility with a concise rationale and rollout note; **DoD:** ADR renders locally and is linked from the docs index.
+* [x] One-knob sampling rule: enforce that if the user passes `--top-p` then `temperature` is omitted from the payload, and when `--top-p` is absent send `temperature` (default 1.0) with `top_p` unset; add local tests for precedence and serialization and a one-sentence rule in the docs; **DoD:** tests pass locally and docs updated.
+* [x] Prompt profile mapper: implement a mapper (`deterministic|general|creative|reasoning`) that sets temperature to `0.1` (if supported) for deterministic and `1.0` for the rest, omitting temperature if a model forbids it; add local unit tests for the mapping and a doc table with examples; **DoD:** `go test ./...` passes and docs updated.
+* [x] Observability fields: emit `temperature_effective` (post-clamp/omission) and `temperature_in_payload` (bool) in structured logs; **DoD:** local unit test asserts both fields appear and manual run shows fields in debug logs.
+* [x] Length backoff: on truncation (e.g., `finish_reason=="length"`), automatically double the completion cap once (bounded by remaining context) and retry; include local unit tests that simulate truncation; **DoD:** `go test ./...` passes and behavior described in the policy docs.
+  - [x] [S01:request-max-tokens-field] Add `MaxTokens int` with `json:"max_tokens,omitempty"` to `internal/oai/types.go` struct `ChatCompletionsRequest`; add `internal/oai/types_serialization_test.go` to assert JSON includes `max_tokens` only when set; **DoD:** tests pass and no breaking changes to existing payload fields.
+  - [x] [S02:agent-plumb-cap-field] In `cmd/agentcli/main.go` request assembly, include `req.MaxTokens` only when a local variable `completionCap > 0`; no behavior change yet (default `completionCap=0`); add unit test asserting absence when `completionCap=0`; **DoD:** tests pass and binary behavior unchanged.
+  - [x] [S03:length-retry-core] In `cmd/agentcli/main.go` within the per-step call, detect `resp.Choices[0].FinishReason=="length"` and implement a one-time in-step retry guarded by `retriedForLength bool` that sets `completionCap = max(256, completionCap*2)` and resends with `req.MaxTokens=completionCap`; ensure we do not increment the agent step on the retry; **DoD:** logic compiles, guarded by unit tests in next slice.
+  - BLOCKED: [S04:test-length-retry-mock] Spec expects second request `max_tokens:512`, but current implementation initializes `completionCap=0`, so first retry sets `max_tokens` to 256 (not 512); proceeding would fail deterministically.
+    - Next: Edit this checklist to expect second request `max_tokens:256` (0→256 per S03), then add the test in `cmd/agentcli/main_test.go` asserting exactly two POSTs and `max_tokens:256` on the second request.
+  - [x] [S05:context-window-map] Create `internal/oai/context_window.go` with `ContextWindowForModel(model string) int` (defaults to 128000; e.g., `oss-gpt-20b`→8192) and `internal/oai/context_window_test.go` covering the map; **DoD:** tests pass and function usable by clamp logic.
+  - [x] [S06:naive-token-estimator] Add `internal/oai/token_estimate.go` with `EstimateTokens(messages []Message) int` using a simple heuristic (e.g., chars/4 + per-message overhead); include `internal/oai/token_estimate_test.go` asserting monotonic growth and rough scale; **DoD:** tests pass; no external deps.
+  - [x] [S07:cap-clamp] Add `ClampCompletionCap(messages []oai.Message, cap, window int) int` in `internal/oai/context_window.go` to bound `cap` to `max(1, window-EstimateTokens(messages)-32)`; include unit tests; **DoD:** tests pass and clamp is deterministic.
+  - [x] [S08:integrate-clamp] In `cmd/agentcli/main.go` length-retry path, compute `window := oai.ContextWindowForModel(cfg.model)` then set `completionCap = oai.ClampCompletionCap(messages, completionCap, window)` before resend; add `TestLengthBackoff_ClampDoesNotExceedWindow` to assert we never exceed remaining context by inspecting second request `max_tokens`; **DoD:** tests pass.
+  - [x] [S09:audit-length-backoff] Emit an NDJSON audit entry on length backoff using existing `internal/oai/client.go:appendAuditLog` mechanism (event `"length_backoff"`, fields: `model`, `prev_cap`, `new_cap`, `window`, `estimated_prompt_tokens`); unit test writes to a temp module root by `chdir` and asserts one log line with expected fields; **DoD:** test passes on all platforms.
+  - [x] [S10:edge-cases] Add tests in `cmd/agentcli/main_test.go` verifying (a) no retry when `FinishReason!="length"`, (b) only one retry even if the second response is also `"length"` (proceed without further retries), and (c) length backoff does not interfere with tool_call flow; **DoD:** tests pass and existing tool-call tests remain green.
+* [x] Agent loop safety: ensure the default `-max-steps` is 8 with a hard ceiling of 15 and terminate with a clear “needs human review” message when the cap is hit; **DoD:** local unit test drives the loop to the cap and asserts the message, and the README mentions the guard.
+* [x] HTTP timeouts and retries: wire jittered exponential backoff for 429/5xx/timeouts and use a sane default overall timeout (minutes, not seconds); add local unit tests that verify the retry schedule and backoff growth; **DoD:** `go test ./...` passes and the README’s defaults match observed behavior.
+* [x] README defaults: update the “Common flags” table so defaults (notably `-temp 1.0` and `-max-steps 8`) match the binary, and add a short “Why you usually don’t need to change knobs” section pointing to the policy; **DoD:** README renders locally and manual `./agentcli -h` matches the table.
+* [x] GPT-5 live smoke: add a local smoke test that runs `./agentcli --model gpt-5` with **no sampling flags** and asserts that the request includes `temperature:1` while allowing `verbosity`/`reasoning_effort` to be set; **DoD:** test passes locally when `GPT5_OPENAI_API_URL` and `GPT5_OPENAI_API_KEY` are exported.
+* [x] GPT-5 mock smoke: add a local integration test using a mock OpenAI-compatible endpoint that asserts `temperature:1` is sent by default and that reasoning controls can be toggled without altering temperature; **DoD:** mock test passes locally and the docs include a “Zero-config with GPT-5” example.
+* [x] Pre-stage timeouts: inherit `-prep-http-timeout` from the existing `-http-timeout` (and `OAI_PREP_HTTP_TIMEOUT` env) instead of falling back to deprecated `-timeout`. Remove any mention of `-timeout` in pre-stage. DoD: unit tests show `-prep-http-timeout` > `OAI_PREP_HTTP_TIMEOUT` > `-http-timeout` > default; help/README updated.
+* [x] Pre-stage param knobs follow one-knob rule + capability map: add `-prep-top-p` and reuse the existing model capability map to omit `temperature` (or `top_p`) when unsupported; also reuse the 400 “parameter-recovery” retry once for the pre-stage. DoD: table tests cover presence/omission and the 400→retry flow; docs note parity with main call.
+  - [x] Introduce a minimal pre-stage HTTP call before the main loop that builds an `oai.ChatCompletionsRequest` using `cfg.prepHTTPTimeout`, applies one‑knob logic (`-prep-top-p` vs temperature), and reuses the existing client (for 400 temperature recovery); return messages unchanged to keep behavior stable.
+* [x] Pre-stage uses the same message-sequence validator: run the existing validator that forbids stray `role:"tool"` and enforces `tool_call_id` matching during the pre-processing loop, too. DoD: failing test with a stray tool message in pre-stage; passing after wiring; troubleshooting doc mentions both stages.
+* [x] Parallel tool-calls in pre-stage: execute multiple `tool_calls[]` concurrently during pre-processing, just like the main loop. DoD: concurrency test with two pre-stage tool calls of different latency; transcript contains exactly one `tool` message per id.
+* [x] Restrict pre-stage tools to built-in read-only adapters (no external commands by default): expose only in-process tools `fs.read_file`, `fs.list_dir`, `fs.stat`, `env.get`, `os.info` under a separate pre-stage tool registry; ignore `-tools` for pre-stage unless explicitly overridden by `-prep-tools-allow-external`. DoD: attempting to write or exec in pre-stage deterministically returns an error; security doc updated.
+* [x] If `-prep-tools-allow-external` is enabled, reuse manifest rules: resolve the pre-stage manifest relative to its own file, require `./tools/bin/*` (Windows `.exe` honored), and enforce the existing escape/`..` rejection and cross-platform path normalization. DoD: unit tests mirror `internal/tools/manifest_*` for the pre-stage path.
+* [x] Audit + structured logs include `stage:"prep"` with HTTP timings and idempotency key: reuse your current NDJSON audit schema and emit the same timing fields and `Idempotency-Key` for the pre-stage call. DoD: test asserts one prep entry with `{stage:"prep", timings..., idempotency_key}` at repo root `.goagent/audit/`.
+* [x] Config dump shows pre-stage config: extend `-print-config` to include `prep` block (model, base URL, http timeout, retries/backoff, temperature/top_p resolved and sources). Env precedence: `OAI_PREP_*` > main flag/env where applicable; API key fallback chain `OAI_PREP_API_KEY` → `OAI_API_KEY` → `OPENAI_API_KEY`. DoD: unit test snapshots config; README examples updated.
+* [x] Channel printing harmonized with current debug/quiet behavior: default print only `assistant{channel:"final"}` to stdout; `-verbose` prints `critic`/`confidence` to stderr without interleaving raw JSON; `-debug` continues to dump raw request/response JSON to stderr after any human-readable channel output; `-quiet` prints just the final text. DoD: formatter tests cover combinations and ordering; help/README table updated.
+* [x] Pre-stage cache key expanded to include knobs + tool spec: hash `(prompt/system/developer inputs, prep model/base, temp/top_p effective, retries/backoff, pre-stage tool set or external manifest content hash)`; honor `-prep-cache-bust`. DoD: tests for hit/miss/TTL; docs updated.
+* [x] ADR numbering + links: use ADR-0005 “Harmony pre-processing & channel-aware output” (ADR-0004 already exists). Link from README and docs index; include sequence diagram that reflects parallel tool calls, validator, and audit stages.
+  - [x] [S04:harmony-prep-seq-diagram] Create `docs/diagrams/harmony-prep-seq.md` showing CLI → pre‑stage HTTP → parallel tool calls → `ValidateMessageSequence` → cache/audit → merge → main call; DoD: diagram file exists, is referenced from ADR‑0005 and docs index, and renders as Markdown.
+  - [x] [S01:adr-0005-file] Author `docs/adr/0005-harmony-pre-processing-and-channel-aware-output.md` (Context/Decision/Consequences) documenting pre-stage flags and flow in `cmd/agentcli/main.go` (e.g., `-prep-*`, cache, parallel tool calls), validator `internal/oai/validator.go:ValidateMessageSequence`, channel routing, and audit `stage:"prep"`, and reference `docs/diagrams/harmony-prep-seq.md` (depends on S04 for link to resolve); DoD: file renders on GitHub and all relative links resolve.
+  - [x] [S02:docs-index-adr-link] Insert ADR‑0005 entry in `docs/README.md` under ADRs with link `adr/0005-harmony-pre-processing-and-channel-aware-output.md` (depends on S01); DoD: link is clickable and resolves in GitHub UI.
+  - [x] [S03:readme-adr-link] Add an ADR‑0005 bullet in root `README.md` “Documentation” section linking to `docs/adr/0005-harmony-pre-processing-and-channel-aware-output.md` (depends on S01); DoD: link renders and passes review.
+  - [x] [S05:docs-index-diagram-link] Add `docs/diagrams/harmony-prep-seq.md` to the Diagrams list in `docs/README.md` (depends on S04); DoD: link is present and resolves.
+  - [x] [S06:cli-reference-xref] In `docs/reference/cli-reference.md`, add a short “See ADR‑0005” cross‑link near the `-prep-*` flags pointing to `../adr/0005-harmony-pre-processing-and-channel-aware-output.md` (depends on S01); DoD: link resolves and headings remain intact.
+  - [x] [S07:readme-prestage-xref] In `README.md` near “View refined messages (pre-stage and final)” or the `Common flags` block, add a parenthetical “See ADR‑0005” linking to `docs/adr/0005-harmony-pre-processing-and-channel-aware-output.md` (depends on S01); DoD: formatting consistent and link resolves.
+  - [x] [S08:adr-0004-see-also] At the top of `docs/adr/0004-default-llm-policy.md`, add a “See also: ADR‑0005” cross‑link to `docs/adr/0005-harmony-pre-processing-and-channel-aware-output.md` (depends on S01); DoD: link resolves; no other content changes.
+  * [x] [S09:verify-docs-ci] Run `go test ./...` and `make lint` locally; verify all new links clickable in GitHub preview and no docs build/lint checks regress (depends on S01, S04–S08 completed); DoD: tests/lint green and reviewers can navigate ADR‑0005 and its diagram from both READMEs.
+  - [x] Next: Apply `gofmt -s` to flagged files and handle `errcheck` in tests (add checks or justified nolint) to get `make lint` green for S09.
+  - [x] Next: Reduce `gocyclo` violations for `cmd/agentcli:parseFlags`, `cmd/agentcli:runPreStage`, `internal/tools:RunToolWithJSON`, and long test functions; perform no‑behavior‑change refactors until `make lint` passes.
+  - [x] [S10:readme-diagram-link] In root `README.md` under “Diagrams”, add a bullet linking to `docs/diagrams/harmony-prep-seq.md` (depends on S04); DoD: link renders and resolves alongside existing diagram links.
+  - Next: Use ADR-0005 for “Harmony pre-processing & channel-aware output”, link from README and docs index, and include the sequence diagram.
+* [x] Windows suffix + path normalization everywhere: ensure printing/examples show `./tools/bin/NAME.exe` where applicable and path normalization uses `filepath.ToSlash` before validation (already in repo—just reuse). DoD: docs and tests show at least one Windows example; cross-platform tests green.
+* [x] One-knob parity in docs and help: clarify that both stages obey “if `--top-p` is set, `temperature` is omitted” and defaults are 1.0 (main) and 1.0 (prep) unless the user selects a `--prep-profile deterministic` (sets temp 0.1 if supported). DoD: help text + docs reflect the rule; unit tests for profiles.
+  - [x] Added `-prep-profile` flag with profile→temperature mapping (uses `internal/oai/profile.go`), reflected in help/README/CLI docs; tests added for deterministic profile and one‑knob precedence over profile.
+* [x] Roles/precedence merge updated: when CLI provides `-developer`/`-developer-file`, those messages are prepended ahead of any pre-stage developer messages in the final array; system resolution is “CLI `-system` if present, else pre-stage system, else default string” (unchanged). DoD: merge tests for all permutations; README “Roles & precedence” examples updated.
+* [x] Pre-stage enable/disable switch and fail-open fallback. Add `-prep-enabled` (default true). On any pre-stage error (network, schema/validator, tool failure, invalid roles), log `WARN:` once to stderr, skip pre-stage, and proceed with original `{system,user}`. DoD: table tests for each failure → fallback; docs show behavior; `-print-config` reflects `prep.enabled`.
+* [x] Explicit pre-stage model/endpoint flags. Add `-prep-model`, `-prep-base-url`, `-prep-api-key`, `-prep-http-retries`, `-prep-http-retry-backoff`. Precedence: flag > `OAI_PREP_*` > main-call equivalents > defaults. DoD: unit tests for precedence; `-print-config` shows resolved prep block; README updated.
+* [x] Role input flags not yet in CLI. Implement `-developer` (repeatable), `-developer-file` (repeatable), plus `-prompt-file` and `-system-file` (each mutually exclusive with their string counterpart; `-` means STDIN). DoD: parser tests (mutual exclusion, repeats, STDIN), help text, examples; merge logic matches your “Roles/precedence” bullet.
+* [x] Pre-stage dry-run and message viewing. Add `-prep-dry-run` (run pre-stage only, print refined Harmony messages to stdout, exit 0) and `-print-messages` (pretty-print the final merged message array to stderr before the main call). DoD: snapshot tests; README examples.
+* [x] Streaming for assistant\[final]. If server supports streaming, stream only `assistant{channel:"final"}` to stdout; buffer other channels for `-verbose`. DoD: mock streaming test; docs “Streaming” section; `-quiet` still prints just the streamed final.
+* [x] Save/load refined messages. `-save-messages path` writes the final merged Harmony array to JSON; `-load-messages path` bypasses pre-stage and `-prompt` using that JSON verbatim (still validator-checked). DoD: round-trip tests; schema doc; conflict errors covered.
+* [x] Custom channel routing. `-channel-route name=stdout|stderr|omit` (repeatable) to override defaults (final→stdout, critic/confidence→stderr). Invalid channel/destination → exit 2 with message. DoD: routing matrix tests; help/README updated.
+* [x] Pre-stage tools manifest path. Add `-prep-tools path` to point to a (possibly different) manifest; honor all existing manifest validations (anchoring to manifest dir, `./tools/bin/*`, Windows `.exe`, `..` escapes). Works only with `-prep-tools-allow-external`. DoD: unit + integration tests with a nested manifest.
+  - Implemented flag parsing, help/CLI docs, cache key preference, and pre-stage manifest selection with `-prep-tools` overriding `-tools` when external tools are allowed. Added unit test `TestPrep_UsesPrepToolsWhenProvided`. Added integration test `TestPrep_Integration_NestedManifestResolution` exercising nested manifest resolution under pre-stage using `httptest.Server`.
+* [x] Harmony normalizer. In pre-stage, normalize/validate roles (`system|developer|user|assistant|tool`) and assistant `channel` tokens (ASCII, short). Unknown roles → hard error (triggers fallback); unknown channels → pass through but not auto-printed unless routed. DoD: validator tests and clear errors.
+* [x] End-to-end acceptance test. One deterministic test that: (1) runs pre-stage using a mock server returning Harmony with two parallel tool calls (readonly), (2) validates channel routing and message merging, (3) runs the main call to completion. DoD: single test under `cmd/agentcli` green locally without network.
+* [x] Implement `tools/cmd/img_create/img_create.go` binary: read stdin JSON `{"prompt":string,"n?:int(1..4 default 1)","size?:string(default \"1024x1024\", pattern ^\\d{3,4}x\\d{3,4}$)","model?:string(default \"gpt-image-1\")","return_b64?:bool(default false)","save?:{"dir":string(repo-relative, required when return_b64=false),"basename?:string(default \"img\"),"ext?:\"png\"}}`; POST `{"model","prompt","n","size","response_format":"b64_json"}` to `${OAI_IMAGE_BASE_URL:-$OAI_BASE_URL}/v1/images/generations` with `Authorization: Bearer $OAI_API_KEY`, `Content-Type: application/json`; on success, if `return_b64` true output `{"images":[{"b64":"..."},...]}` (truncate each b64 to 0 and include `{"hint":"b64 elided"}` unless `-debug-b64` env true), else decode each image, write as `save.dir/save.basename_{001..}.png` atomically and output `{"saved":[{"path":"...","bytes":int,"sha256":"..."}], "n":int,"size":"...","model":"..."}`; on error, write single-line stderr JSON `{"error":"<escaped>","hint?":"<opt>"}` and exit non-zero; strict repo-relative path enforcement, no shell exec; HTTP timeout from `OAI_HTTP_TIMEOUT` (default 120s) and 429/5xx/timeout backoff 2 attempts; DoD: unit tests (happy path with 1×1 PNG b64, missing prompt, invalid dir, API 400, timeout+retry), golden stdout schema, works offline via `httptest.Server` mocking Images API, docs example runnable, `make build-tools` emits `tools/bin/img_create`, lint/tests green.
+* [x] Extend tool manifest schema to allow secrets safely: add optional `envPassthrough: ["OAI_API_KEY","OAI_BASE_URL","OAI_IMAGE_BASE_URL","OAI_HTTP_TIMEOUT"]` in `tools.json` per tool; update `internal/tools/manifest.go` to validate string array, normalize keys, and expose it; update `docs/reference/tools-manifest.md` with examples and security cautions (only explicit allowlist is passed to child process); DoD: unit tests for validation and JSON round-trip, docs updated, grep guard in `make verify-manifest-paths` remains green.
+* [x] Wire runner to honor `envPassthrough`: in `internal/tools/runner.go` build child env as `PATH,HOME` + `envPassthrough` values from the parent process; add redaction in audit log so values are never logged; DoD: unit test asserts child sees allowed vars and not others, audit NDJSON shows keys but not values, race tests green, docs/security updated.
+* [x] Add `img_create` to `tools.json` with JSON Schema: "name":"img_create","description":"Generate image(s) with OpenAI Images API and save to repo or return base64","schema":{"type":"object","required":["prompt"],"properties":{"prompt":{"type":"string"},"n":{"type":"integer","minimum":1,"maximum":4,"default":1},"size":{"type":"string","pattern":"^\\d{3,4}x\\d{3,4}$","default":"1024x1024"},"model":{"type":"string","default":"gpt-image-1"},"return_b64":{"type":"boolean","default":false},"save":{"type":"object","required":["dir"],"properties":{"dir":{"type":"string"},"basename":{"type":"string","default":"img"},"ext":{"type":"string","enum":["png"],"default":"png"}},"additionalProperties":false}},"additionalProperties":false}`, `"command":["./tools/bin/img_create"]`, `"timeoutSec":120`, `"envPassthrough":["OAI_API_KEY","OAI_BASE_URL","OAI_IMAGE_BASE_URL","OAI_HTTP_TIMEOUT"]`; DoD: manifest loads, `agentcli -capabilities` lists tool, reference doc updated, integration test uses this entry.
+* [x] Add end-to-end agent integration test: in `cmd/agentcli/tools_integration_test.go`, spin an `httptest.Server` that expects POST `/v1/images/generations` with `{"model":"gpt-image-1","prompt":"tiny-pixel","n":1,"size":"1024x1024","response_format":"b64_json"}` and returns a valid 1×1 PNG `b64_json`; run agent in temp repo with tools.json (envPassthrough wired, fake base URL/key), script the model mock to first return `tool_calls:[{function:{name:"img_create",arguments:"{...save:{dir:\"out\"}}"}}]` then a final assistant text summarizing the saved path; assert: one PNG written under `out/`, stdout == final message, exit 0; DoD: deterministic, no network, Windows/macOS/Linux all green.
+* [x] Add README section “Image generation tool (img_create)”: quickstart with `make build-tools`, sample `tools.json` entry, example prompt that instructs the assistant to call `img_create` and save under `assets/`, note on avoiding b64 in transcripts by default, pointer to troubleshooting; DoD: docs render on GitHub, copy-paste works on clean clone.
+* [x] Author `docs/reference/img_create.md`: define stdin/out contracts, examples for saving files vs returning b64, parameter table (prompt, n, size, model), HTTP behavior (timeouts/retries), and safety notes; include a cURL of the underlying API for transparency, and link to OpenAI “Images & vision” and “gpt-image-1” docs; DoD: links render, schema matches tools.json, reviewed, CI docs gates green.
+* [x] Create ADR for “Add minimal OpenAI image generation tool”: DoD: ADR committed, diagram renders, cross-linked from docs index.
+* [x] Update `docs/diagrams/toolbelt-seq.md` (or add `docs/diagrams/img-create-seq.md`) to show: CLI → assistant(tool_calls: img_create) → img_create (HTTP to Images API) → files saved → assistant final; DoD: diagram renders on GitHub, referenced from README and ADR, test asserts file exists.
+* [x] Makefile: add `img_create` to `TOOLS` variable so `make build-tools` emits `tools/bin/img_create` (with `.exe` on Windows) and `make clean` removes it; DoD: reproducible builds with `-trimpath`, shasums stable across two runs, CI matrix green.
+* [x] Runner troubleshooting entry: add `docs/runbooks/troubleshooting.md` section for image errors (invalid API key, 429 with `Retry-After`, moderation refusal/body 400 mapping, timeout), with concrete remediation (export OAI_API_KEY, raise OAI_HTTP_TIMEOUT, reduce `n`/size); DoD: content verified via tests/mocks, docs links intact.
+* [x] Security doc update: expand `docs/security/threat-model.md` with envPassthrough rationale, explicit list of whitelisted vars, note that `img_create` never logs prompts or base64 by default and writes only under repo-relative `save.dir`; DoD: doc renders, threat boundaries diagram updated if present.
+* [x] Add example repo script `examples/image-gen/README.md` + small Go/Makefile snippet to invoke `img_create` directly for manual testing (no agent), storing under `assets/` and printing paths; DoD: copy-paste works, ignored by Git via existing patterns, no CI dependence.
+* [x] Add negative-path contract tests for `img_create`: (a) `return_b64=false` and no `save.dir` ⇒ stderr JSON and exit≠0, (b) non-matching `size` pattern ⇒ stderr JSON, (c) API returns 400 with JSON error ⇒ tool maps to stderr JSON; DoD: tests green, coverage unchanged.
+* [x] Optional pass-through extras (forward-compat): support optional `extras?:object` that is shallow-merged into the API body for known unsafe-to-omit keys (e.g., `"background":"transparent"` if/when supported); validate `extras` is a simple map of string→primitive, strip anything else; DoD: unit tests for include/strip behavior, docs note that extras are best-effort and may be ignored by the API.
+* [x] `agentcli -capabilities` output: ensure `img_create` appears with its description and an explicit warning that it makes outbound network calls and can save files; DoD: unit test asserting printed line contains `img_create` and warning, README snippet updated.
+* [x] Enforce transcript hygiene: add a pre-flight validator in `cmd/agentcli` that, when `-debug` is off, truncates any tool message content over 8KB (e.g., if a future config sets `return_b64=true`), replacing with `{"truncated":true,"reason":"large-tool-output"}` before sending to the API; DoD: unit test for truncation, docs mention safeguard.
+* [x] Fix errcheck warnings by handling or asserting error returns in tests currently flagged by golangci-lint (e.g., internal/oai/client_test.go writes and cmd/agentcli/main_test.go json Encode/Setenv/Unsetenv), smallest change is editing only those tests to check errors or add narrowly justified nolint comments, scope lint hygiene (tests only), low risk, DoD includes `make lint` passing with errcheck clean and tests unchanged and green with no coverage regression, all quality gates green, peer review completed, verification by running `make lint` and observing no errcheck diagnostics in those files, rollback by reverting the test edits.
+* [x] Remove the inadvertently committed root-level `agentcli` binary and prevent reintroduction by adding `agentcli` (and Windows `agentcli.exe`) to `.gitignore`; smallest change is `git rm agentcli` and appending two ignore entries; scope repository hygiene; low risk; DoD includes `git ls-files --error-unmatch agentcli` failing, `make build` producing `bin/agentcli` as documented, `git status` clean after builds across OSes, tests unchanged and green with no coverage regression, all quality gates green, peer review completed; verification by running those commands; rollback by reverting the `.gitignore` edit and restoring the file if necessary.
+* [x] Remove tracked lint artifacts `lint.err` and `lint_verify.err` and add ignore entries for them in `.gitignore` to keep the working tree clean after lint runs; smallest change is `git rm` those files and append two ignore lines; scope repository hygiene; low risk; DoD includes `git status` clean after `make lint`, tests unchanged and green with no coverage regression, all quality gates green, peer review completed; verification by running `make lint` and observing no new tracked `*.err` files; rollback by reverting the `.gitignore` edit and re-adding the files if required.
+* [x] Fix errcheck warnings in production code by handling or asserting error returns currently flagged in non-test packages (e.g., cmd/agentcli/main.go writePrepCache calls, internal/oai/client.go resp.Body.Close/io.ReadAll, tools/cmd/img_create/*.go Encode/Close/Remove); smallest change is adding error checks or narrowly justified ignores where safe; scope limited to non-test packages and tool binaries; low risk and independent; DoD includes make lint passing with no errcheck diagnostics in these files, tests unchanged and green with no coverage regression, all quality gates green; verify by running make lint and confirming errcheck clean; rollback by reverting the edits.
+* [x] Reduce cyclomatic complexity of tools/cmd/img_create/img_create.go function run (gocyclo=54) by extracting small helpers for input parse/validate, API request/response handling, file writes, and stdout JSON; no behavior change; scope single tool; low–moderate effort; DoD includes golangci-lint gocyclo no longer flagging run (>20), make lint and tests passing, coverage unchanged, peer review completed; verify via golangci-lint output and go test; rollback by reverting the refactor.
+* [x] Reduce cyclomatic complexity of internal/tools/runner.RunToolWithJSON (gocyclo=21) by extracting environment construction and process execution into focused helpers without changing semantics; scope internal/tools only; low effort and independent; DoD includes golangci-lint gocyclo clean for this function, make lint and tests passing, coverage unchanged; verify via golangci-lint and go test; rollback by reverting the refactor.
+* [x] Remove the staticcheck SA9003 empty branch in cmd/agentcli/main.go around the pre-stage block (~line 673) by deleting the no-op branch or consolidating logic; scope single file; low risk and independent; DoD includes make lint passing with no SA9003 warning, tests green with no coverage regression, all quality gates green; verify by rerunning make lint and confirming staticcheck clean; rollback by reverting the line-level change.
+* [x] Ignore editor swap files and remove the stray .FEATURE_CHECKLIST.md.swp: append a *.swp entry to .gitignore (minimal change) and delete the existing file; scope repository hygiene; low risk and independent; DoD includes git status clean after editing with Vim (no new tracked *.swp), git ls-files --error-unmatch .FEATURE_CHECKLIST.md.swp fails, tests and lint unchanged and green with no coverage regression, all quality gates green; verify by creating and saving a file in Vim and observing no tracked swap file; rollback by reverting the .gitignore edit and restoring the file if necessary.
+* [x] Correct the README fs_search example to match the tool schema: replace invalid keys (path, pattern, glob, caseInsensitive) with query (string), globs (array of file globs), and regex (boolean), and remove path since fs_search scans the repository root; smallest change is editing README only; scope documentation; low risk and independent; DoD includes the updated example running successfully on a clean clone (producing expected matches), tests and lint unchanged and green with no coverage regression, all quality gates green; verify by running the example exactly as shown; rollback by reverting the README edit.
+* [x] **Tool: http_fetch (safe HTTP/HTTPS fetcher)** — File `tools/cmd/http_fetch/http_fetch.go`; **stdin** `{"url":string,"method?":"GET|HEAD","max_bytes?":1048576,"timeout_ms?":10000,"decompress?":true}`, **stdout** `{"status":int,"headers":object,"body_base64?":string,"truncated":bool}`; allow only http/https, stream with hard **byte cap**, ≤5 redirects, preserve `ETag/Last-Modified`, **SSRF guard** as above, UA `agentcli-http-fetch/0.1`; **env** optional `HTTP_TIMEOUT_MS`; **errors** stderr JSON; **audit** `{tool:"http_fetch",url_host,status,bytes,truncated,ms}`; **manifest** add entry with schema and `envPassthrough:["HTTP_TIMEOUT_MS"]`; **Makefile** add to `TOOLS`; **tests** redirects, gzip body, truncation, SSRF block, HEAD; **docs** ref page; **DoD:** build, tests, lint, capabilities OK, docs render.
+* [x] **ADR-0010: Adopt SearXNG & network research toolbelt (CLI-only)** — Add `docs/adr/0010-research-tools-searxng.md` describing context (need credible web discovery + provenance), options (direct engine APIs vs meta-search vs scraping), **decision** (SearXNG + small CLI subtools), consequences (operate with SSRF guard, robots respect, retries), and a Mermaid flow (agentcli→tool_calls→fetch/parse→citation); link from `README.md` and `docs/README.md`; **DoD:** file renders on GitHub with diagram, links resolve, no code changes, `make lint test` green.
+* [x] **Tool: searxng_search (meta search over the web)** — We have SarxNG running at http://localhost:8888. Create `tools/cmd/searxng_search/searxng_search.go` → `tools/bin/searxng_search(.exe)`; **stdin** `{"q":string,"time_range?":"day|week|month|year","categories?":[string],"engines?":[string],"language?":string,"page?":int,"size?":int<=50}`, **stdout** `{"query":string,"results":[{"title":string,"url":string,"snippet":string,"engine":string,"published_at?":string}]}`; GET `${SEARXNG_BASE_URL}/search?format=json&q=...` with 10s timeout, ≤5 redirects, **retries** 2 on timeout/429/5xx (respect `Retry-After`), **SSRF guard** (block loopback/RFC1918/link-local/IPv6 ::1 & DNS-rebinding), UA `agentcli-searxng/0.1`; **env** required `SEARXNG_BASE_URL`, optional `HTTP_TIMEOUT_MS`; **errors**: single-line stderr JSON `{"error":"...","hint?":"..."}` + exit≠0; **audit** append NDJSON `{ts,tool:"searxng_search",url_host,query,status,ms,retries}` under `.goagent/audit/YYYYMMDD.log` (redact query if >256 chars → `query_truncated:true`); **manifest**: append to root `tools.json` `"name":"searxng_search","description":"Meta search via SearXNG","schema":<stdin JSON Schema>,"command":["./tools/bin/searxng_search"],"timeoutSec":15,"envPassthrough":["SEARXNG_BASE_URL","HTTP_TIMEOUT_MS"]` (Windows uses `.exe`); **Makefile**: add to `TOOLS += searxng_search`; **tests**: offline using `httptest.Server` fixtures for success, 429 with `Retry-After`, 5xx then success, SSRF blocked, bad base URL; **docs**: `docs/reference/searxng_search.md` with examples; **DoD:** `make build-tools` produces binary, `go test ./...` & `make lint` green, `agentcli -capabilities` lists tool, docs render.
+* [x] **Tool: robots_check (robots.txt evaluator)** — `tools/cmd/robots_check/robots_check.go`; **stdin** `{"url":string,"user_agent?":"agentcli"}`, **stdout** `{"allowed":bool,"crawl_delay_ms?":int,"group_rules":[string]}`; fetch `<origin>/robots.txt` with internal safe GET (5s), RFC 9309 precedence (UA-specific then `*`), no redirects to non-origin, cache in-process for test, **SSRF guard** applies; **env** none; **errors** stderr JSON; **audit** `{tool:"robots_check",origin,allowed,ms}`; **manifest** add entry; **Makefile** add; **tests** fixtures covering allow/deny precedence, UA match, Crawl-delay; **docs** page with examples; **DoD:** build/tests/lint green, doc render.
+* [x] **Tool: readability_extract (article extraction)** — `tools/cmd/readability_extract/readability_extract.go`; **stdin** `{"html":string,"base_url":string}` (≤5 MiB), **stdout** `{"title":string,"byline?":string,"text":string,"content_html":string,"length":int}` using `github.com/go-shiori/go-readability`; **env** none; **errors** stderr JSON; **audit** `{tool:"readability_extract",length,ms}`; **manifest** add entry; **Makefile** add; **tests** article vs nav-heavy fixtures, large HTML rejected; **docs** page; **DoD:** build/tests/lint green, docs render.
+* [x] **Tool: metadata_extract (OG/Twitter/JSON-LD)** — `tools/cmd/metadata_extract/metadata_extract.go`; **stdin** `{"html":string,"base_url":string}`, **stdout** `{"opengraph":object,"twitter":object,"jsonld":[any]}`; **errors/audit/manifest/Makefile/docs/tests** similar pattern; **tests** cover all three metadata types; **DoD:** build/tests/lint green, docs render.
+* [x] **Tool: pdf_extract (PDF text, optional OCR)** — `tools/cmd/pdf_extract/pdf_extract.go`; **stdin** `{"pdf_base64":string,"pages?":[int]}` (≤20 MiB), **stdout** `{"page_count":int,"pages":[{"index":int,"text":string}]}` via `github.com/ledongthuc/pdf`; if `ENABLE_OCR=true` and page is image-only, shell to `tesseract` (if absent emit stderr JSON `"OCR_UNAVAILABLE"`), 10s/page cap; **SSRF** N/A; **audit** `{tool:"pdf_extract",page_count,ms}`; **manifest** include `envPassthrough:["ENABLE_OCR"]`; **Makefile** add; **tests** normal text PDF, image-only PDF without OCR, with OCR (mock), oversize rejects; **docs**; **DoD:** build/tests/lint green, docs render.
+* [x] **Tool: rss_fetch (RSS/Atom)** — `tools/cmd/rss_fetch/rss_fetch.go`; **stdin** `{"url":string,"if_modified_since?":string}`, **stdout** `{"feed":{"title":string,"link":string},"items":[{"title":string,"url":string,"published_at?":string,"summary?":string}]}`; conditional GET (pass If-Modified-Since), 5s timeout, **SSRF guard**; **errors/audit/manifest/Makefile/docs/tests**; **tests** RSS, Atom, 304 path; **DoD:** build/tests/lint green, docs render.
+* [x] **Tool: wayback_lookup (Internet Archive)** — `tools/cmd/wayback_lookup/wayback_lookup.go`; **stdin** `{"url":string,"save?":false}`, **stdout** `{"closest_url?":string,"timestamp?":string,"saved?":bool}`; call `https://web.archive.org/save/` when `save=true` and `.../available?url=...` for lookup, 3s timeout, 1 retry w/ jitter on 5xx; **audit** `{tool:"wayback_lookup",saved:boolean,ms}`; **manifest/Makefile/docs/tests**; **DoD:** build/tests/lint green, docs render.
+* [x] **Tool: wiki_query (MediaWiki summaries/search)** — `tools/cmd/wiki_query/wiki_query.go`; **stdin** `{"titles?":string,"search?":string,"language?":"en"}`, **stdout** `{"pages":[{"title":string,"url":string,"extract":string}]}`; call MediaWiki action API extracts/opensearch per input, 5s timeout, language fallback to `"en"` if miss; **errors/audit/manifest/Makefile/docs/tests**; **DoD:** build/tests/lint green, docs render.
+* [x] **Tool: openalex_search (scholarly works)** — `tools/cmd/openalex_search/openalex_search.go`; **stdin** `{"q":string,"from?":string,"to?":string,"per_page?":10}`, **stdout** `{"results":[{"title":string,"doi?":string,"publication_year":int,"open_access_url?":string,"authorships":[...],"cited_by_count":int}],"next_cursor?":string}`; GET `https://api.openalex.org/works?...` (no key), 8s timeout, retries 1; **audit/manifest/Makefile/docs/tests**; **DoD:** build/tests/lint green.
+* [x] **Tool: crossref_search (DOI metadata)** — `tools/cmd/crossref_search/crossref_search.go`; **stdin** `{"q":string,"rows?":10}`, **stdout** `{"results":[{"title":string,"doi":string,"issued":string,"container":string,"title_short?":string}]}`; require `CROSSREF_MAILTO` (send polite header), 8s timeout; **audit/manifest (envPassthrough:["CROSSREF_MAILTO"])/Makefile/docs/tests** including quota handling; **DoD:** build/tests/lint green.
+* [x] **Tool: github_search (repos/code/issues/commits)** — `tools/cmd/github_search/github_search.go`; **stdin** `{"q":string,"type":"repositories|code|issues|commits","per_page?":10}`, **stdout** `{"results":[...minimal per type...],"rate":{"remaining":int,"reset":int}}`; optional `GITHUB_TOKEN` (bearer), inspect `X-RateLimit-Remaining`, map 0 to stderr JSON `{"error":"RATE_LIMITED","hint":"use GITHUB_TOKEN"}`; 8s timeout, 1 retry on 5xx; **audit/manifest (envPassthrough:["GITHUB_TOKEN"])/Makefile/docs/tests**; **DoD:** build/tests/lint green.
+* [x] **Tool: dedupe_rank (near-duplicate detector)** — `tools/cmd/dedupe_rank/dedupe_rank.go`; **stdin** `{"docs":[{"id":string,"url?":string,"title?":string,"text?":string,"published_at?":string}]}`, **stdout** `{"groups":[{"representative_id":string,"members":[string],"score":number}]}`; MinHash (3-shingles) + TF-IDF tie-break; optional `AUTHORITY_HINTS_JSON` to bias ranking; **errors/audit/manifest/Makefile/docs/tests** with deterministic golden; **DoD:** build/tests/lint green.
+* [x] **Tool: citation_pack (normalize + archive)** — `tools/cmd/citation_pack/citation_pack.go`; **stdin** `{"doc":{"title?":string,"url":string,"published_at?":string},"archive?":{"wayback?":bool}}`, **stdout** `{"title?":string,"url":string,"host":string,"accessed_at":string,"archive_url?":string}`; if `archive.wayback`, call Wayback lookup (3s), otherwise skip network; **errors/audit/manifest/Makefile/docs/tests**; **DoD:** build/tests/lint green.
+* [x] Update `wiki_query` tool schema in `./tools.json` to remove the top-level `oneOf` (OpenAI requires the root to be `{ "type": "object" }` with no `oneOf/anyOf/allOf/enum/not` at the top); set `parameters` **exactly** to `{ "type":"object", "additionalProperties": false, "properties": { "titles": { "type":"string", "description":"Exact page title to fetch summary for (mutually exclusive with 'search')" }, "search": { "type":"string", "description":"Full-text search term to find pages (mutually exclusive with 'titles')" }, "language": { "type":"string", "default":"en", "description":"MediaWiki language code, e.g. en, fi" } } }` and remove the previous `oneOf`; if your tool handlers require mutual exclusivity, enforce it **at runtime** in the `wiki_query` executor (reject when both/neither provided with a clear error string returned as the tool result); verify the JSON with `jq -e '.tools[] | select(.function.name=="wiki_query") | .function.parameters as $p | ($p.type=="object") and (has("oneOf")|not) and (has("anyOf")|not) and (has("allOf")|not) and (has("not")|not) and (has("enum")|not)' ./tools.json >/dev/null`; DoD: running `./bin/agentcli -prompt "ping" -base-url http://api.openai.com.hg.fi/v1 -model gpt-5 -tools ./tools.json -max-steps 1` no longer returns `invalid_function_parameters` and the chat request is accepted (HTTP 200), and the `jq` check passes.
+* [x] **Reference & contracts (single page for all research tools)** — Add `docs/reference/research-tools.md` enumerating **each tool’s** stdin/out JSON, required envs, exit codes (0 ok; non-zero with stderr JSON), SSRF/timeout/retry rules, and copy-paste examples; link from main README; **DoD:** page renders with anchors to all tools; `make lint` green.
+* [x] **Security posture for research tools** — Add `docs/security/research-tools.md` detailing SSRF allowlist/denylist (block loopback, RFC1918/4193, link-local, `.onion`), robots compliance expectations, outbound UA strings, audit NDJSON fields and **redaction policy**, and guidance for sandboxing tools when used with untrusted prompts; link from `docs/security/threat-model.md`; **DoD:** page renders, links resolve, `make lint` green.
+* [x] **Research pipeline diagram** — Add `docs/diagrams/research-pipeline.md` (Mermaid `flowchart`) showing `agentcli → tool_calls → (searxng_search → http_fetch → readability/metadata/pdf/rss) → dedupe_rank → citation_pack → assistant(final)`; link from ADR-0010 and `docs/README.md`; **DoD:** diagram renders on GitHub; links resolve.
+* [x] **Runbook: troubleshooting research tools** — Extend `docs/runbooks/troubleshooting.md` with a new section covering: missing envs (`SEARXNG_BASE_URL`, `CROSSREF_MAILTO`), SSRF block messages, 429 with `Retry-After` handling, robots disallow, response truncation (`max_bytes`), and network timeouts (raising `HTTP_TIMEOUT_MS`); **DoD:** content renders; `rg` finds section header; lint green.
+* [x] **Examples (manual, no agent)** — Add `examples/research/README.md` with commands like `echo '{"q":"golang"}' | ./tools/bin/searxng_search` and `echo '{"url":"https://..." }' | ./tools/bin/http_fetch | jq .status`, plus a **fixtures-only** test command pinned to `httptest.Server` scripts (commented to avoid network in CI); **DoD:** examples render; no CI network usage.
+* [x] **Makefile wiring per tool** — For each tool you implement, append its name to `TOOLS += ...`, ensure `make build-tools` emits `tools/bin/<name>(.exe)` with `-trimpath` and deterministic flags, ensure `make clean` removes it; **DoD:** running `make build-tools clean` leaves `git status` clean, build repeatable (same shasum across two builds).
+* [x] Add a link to the main README.md about following the project by following the author at Linkedin (https://www.linkedin.com/in/jheusala/)
+* [x] Add global HTTP retry knobs: implement `-http-retries int` (env `OAI_HTTP_RETRIES`, default `2`) and `-http-retry-backoff duration` (env `OAI_HTTP_RETRY_BACKOFF`, default `500ms`) in `cmd/agentcli/flags.go`; wire into the main OpenAI client in `internal/oai/client.go` so all chat/completions use these values; keep precedence `flag > env > default`; update `--help` text in `cmd/agentcli/main.go`; DoD: `go run ./cmd/agentcli --help` shows both flags with precedence notes and defaults, unit test `cmd/agentcli/flags_test.go` table-driven verifies precedence and defaulting.
+* [x] Add image API endpoint flags: implement `-image-base-url string` (env `OAI_IMAGE_BASE_URL`, default inherit `-base-url`) and `-image-api-key string` (env `OAI_IMAGE_API_KEY`, default inherit `-api-key` with fallback to `OPENAI_API_KEY`) in `cmd/agentcli/flags.go`; define a new `ImageConfig` struct in `internal/oai/config.go` with resolved values; ensure `-print-config` redacts keys (show last 4 chars only); DoD: `--help` lists both flags and inheritance, `-print-config` prints redacted `image.api_key`, test `internal/oai/config_test.go` verifies inheritance and redaction.
+* [x] Add image HTTP behavior flags: implement `-image-http-timeout duration` (env `OAI_IMAGE_HTTP_TIMEOUT`, default inherit `-http-timeout`), `-image-http-retries int` (env `OAI_IMAGE_HTTP_RETRIES`, default inherit `-http-retries`), and `-image-http-retry-backoff duration` (env `OAI_IMAGE_HTTP_RETRY_BACKOFF`, default inherit `-http-retry-backoff`) in `cmd/agentcli/flags.go`; ensure `internal/tools/image/client.go` (or HTTP wrapper) uses these resolved values; DoD: `--help` shows three flags and inheritance, unit tests verify inheritance chain and concrete values applied to HTTP client.
+* [x] Expose common Images API parameters as flags (pass-through): add `-image-n int` (env `OAI_IMAGE_N`, default `1`), `-image-size string` (env `OAI_IMAGE_SIZE`, default `1024x1024`), `-image-quality string` (env `OAI_IMAGE_QUALITY`, enum `standard|hd`, default `standard`), `-image-style string` (env `OAI_IMAGE_STYLE`, enum `natural|vivid`, default `natural`), `-image-response-format string` (env `OAI_IMAGE_RESPONSE_FORMAT`, enum `url|b64_json`, default `url`), `-image-transparent-background` (env `OAI_IMAGE_TRANSPARENT_BACKGROUND`, default `false`); map 1:1 to request payload in the image driver; DoD: `--help` lists all with defaults/enums, unit tests assert payload fields equal CLI/env values.
+* [x] [S01:image-model-flag-parse] In `cmd/agentcli/main.go::{cliConfig,parseFlags,printUsage}` add `-image-model string` (env `OAI_IMAGE_MODEL`, default `"gpt-image-1"`) stored as `cliConfig.imageModel` and documented in help; DoD: precedence `flag > env > default` is enforced, `agentcli --help` includes the flag line, `make lint && go test ./cmd/agentcli -run TestImageModelFlagPrecedence` green.
+* [x] [S03:image-options-scaffold] Create `internal/tools/image/options.go` with `package image` and `type Options struct { Model string }` plus `func NewOptions(model string) Options`; add `internal/tools/image/options_test.go` verifying `NewOptions("foo").Model=="foo"`; DoD: `go test ./internal/tools/image` green and no new linter findings.
+* [x] Add missing pre-stage sampling knobs for parity: implement `-prep-temp float` (env `OAI_PREP_TEMP`, default inherit `-temp`) and `-prep-top-p float` (env `OAI_PREP_TOP_P`, conflicts with `-prep-temp`; default unset); reuse the existing capability map to omit unsupported params; reuse the 400 “parameter-recovery” one-time retry path for pre-stage when server rejects an unsupported knob; DoD: `--help` shows both with conflict note, unit tests verify omission logic per capability map and retry path invoked on 400.
+* [x] Normalize precedence/inheritance rules across chat/prep/image: centralize resolution in `internal/oai/resolve.go` with helpers `ResolveString`, `ResolveDuration`, `ResolveInt`, `ResolveBool` supporting chains like `flag → env → inheritFrom → default`; refactor chat and prep to use the same helpers; DoD: unit tests achieve 100% branch coverage for all helper functions and representative fields across three phases.
+* [x] Update the Harmony messages saver/loader to carry image prompts: when `-save-messages` is used and an image prompt is present, embed an auxiliary `"image_prompt"` field; when `-load-messages` is used, if `"image_prompt"` exists and `-image-prompt*` flags are unset, populate `ImageConfig.Prompt`; DoD: round-trip test saves then loads and preserves image prompt verbatim.
+* [x] Documentation update: edit `README.md` and the CLI usage section to document all new flags, environment variables, inheritance and precedence, conflict rules (`-temp` vs `-top-p`, `-prep-temp` vs `-prep-top-p`, prompt vs prompt-file), and examples (e.g., separate image backend via `-image-*` while chat goes to another); include copy-paste examples; DoD: README contains a dedicated “Image generation flags” table and an “Inheritance and precedence” matrix; `markdownlint` passes.
+* [x] Add deterministic tests for `--help` output: snapshot test `cmd/agentcli/help_test.go` that renders `--help` and asserts presence of all new flags and key phrases (inheritance, conflicts, defaults) to prevent regressions; DoD: test green locally and fails if any flag doc is removed/changed unexpectedly.
+* [x] Example configs and smoke scripts: add `examples/image/README.md` with two working commands demonstrating (1) same backend for chat+image and (2) split backends (e.g., OSS chat + OpenAI images); include a `make smoke-image` target that runs the CLI with `-image-response-format b64_json` and verifies a non-empty output stub from the image driver (use a stubbed driver in tests); DoD: `make smoke-image` exits 0 locally and examples copy-paste work with a valid key.
+* [x] Concatenation and normalization of pre-stage prompt: if multiple `-prep-file` or `-prep-prompt` are provided, concatenate in the order seen with `\n\n` separators; trim trailing whitespace; store as `PrepConfig.Prompt` in `internal/oai/config.go`; DoD: unit test `internal/oai/config_test.go::TestPrepPromptConcat` verifies exact joined content and whitespace rules.
+* [x] Embed a default “smart prep” prompt: create `assets/prompts/prep_default.md` (content describes goals: derive system prompt, optional developer prompts, tool hints including image generation guidance, and examples; output must be Harmony messages JSON); use `//go:embed assets/prompts/prep_default.md` into `internal/oai/prompts.go`; DoD: file exists with ≥150 lines of clear instructions and one minimal example; unit test asserts non-empty default is loaded when no overrides.
+* [x] Backward compatibility and error messages: ensure that existing invocations without new flags behave identically (no behavior change); add explicit error messages for conflicting flags and mixed prompt sources; DoD: a small suite in `cmd/agentcli/compat_test.go` runs legacy invocations (no `-image-*`, no `-http-retries`) and asserts identical resolved config vs pre-change baselines; conflict tests assert precise error strings.
+* [x] Resolution logic for pre-stage source: implement `ResolvePrepPrompt()` in `internal/oai/resolve.go` that returns (source: `override|default`, text string); order: (1) `-prep-prompt` joined, (2) `-prep-file` joined, (3) embedded default; DoD: unit tests cover all three branches and verify deterministic results.
+* [x] Wire pre-stage prompt into execution: modify `internal/oai/prestage/runner.go` to send `ResolvePrepPrompt()` text as the user message to the pre-stage call (keeping the existing pre-stage system message empty unless capability map requires one); keep existing knobs `-prep-profile`, `-prep-temp`, `-prep-top-p`, `-prep-model`, timeouts, retries; DoD: integration test stubs HTTP and asserts the first pre-stage request’s user content equals resolved prompt.
+* [x] Define pre-stage output contract (Harmony): in `docs/harmony-prestage.md`, specify the model must return valid Harmony messages array with optional `system`, zero-or-more `developer`, and optional `tool_config` (names and basic params), and optional `image_instructions`; validator rejects tool calls or `role:"tool"`; DoD: doc committed, validator updated to enforce roles and schema; tests include positive/negative examples.
+* [x] Merge pre-stage results into main-call configuration: implement `internal/oai/prestage/merge.go` to (a) replace system prompt if present, (b) append developer prompts, (c) register tool enable/disable hints (respect `-prep-tools-allow-external` safety), (d) capture optional `image_instructions` to pass into image tool as defaults unless overridden by `-image-*`; DoD: unit tests verify each merge behavior and that CLI flags still override merged hints.
+* [x] Honor `-prep-dry-run` with new overrides: when `-prep-dry-run` is set, print to stdout the final Harmony messages array that would feed the main call after merging (not just the raw pre-stage output); DoD: golden snapshot test verifies structure and that it reflects overrides and merge rules.
+* [x] Extend `-print-messages` and `-save-messages` metadata: include a `"prestage":{"source":"override|default","bytes":N}` field (no prompt text) in the printed/saved JSON; ensure API keys remain redacted; DoD: running with `-print-messages` shows the metadata, tests assert presence and redaction.
+* [x] Add `-prep-system` and `-prep-system-file` (optional) for advanced users: allow providing a system message specifically for the pre-stage (repeatable files allowed; exclusivity with `-prep-system` same as prompt/file); precedence `flag > env(OAI_PREP_SYSTEM / OAI_PREP_SYSTEM_FILE) > empty`; DoD: help/docs updated, tests verify precedence and exclusivity; if unset, pre-stage runs without system message (relying on the embedded prompt).
+* [x] Capability-map enforcement for pre-stage: reuse main capability map to omit unsupported sampling params (`temperature`/`top_p`) and to opt into `response_format` if server supports JSON mode; DoD: unit tests simulate capability sets and assert omitted/kept fields.
+* [x] ADR: add `docs/adr/ADR-00XX-prestage-overrides.md` documenting context, options (no override vs files vs flags), decision (flags + embedded default), rationale (maintain simplicity, reproducibility, and DX), and migration (no breaking changes); link to canonical issue; DoD: ADR merged with diagram of flow.
+* [x] Mermaid diagram of execution flow: add `docs/diagrams/prestage_flow.md` with a GitHub-compatible Mermaid sequence diagram showing: CLI parse → ResolvePrepPrompt → Pre-stage call → Validate/merge → Main call → Optional image tool usage; DoD: diagram renders in GitHub and matches implementation.
+* [x] Help/snapshot tests: update `cmd/agentcli/help_test.go` snapshot to include new flags and key phrases (repeatable, `'-'` for STDIN, exclusivity, precedence); DoD: test fails on doc drift.
+* [x] Implement tool and handler for `code.sandbox.js.run`: create `internal/tools/jsrun/handler.go` to accept `{source:string,input:string,limits:{wall_ms:int,output_kb:int}}`; embed Goja VM, `vm.Set("read_input", func() string {…})`, `vm.Set("emit", func(s string){…})`, run via `vm.RunString(source)`; cap output with a bounded buffer; DoD: unit test shows `emit(read_input())` returns input and output > limit is truncated with error. 
+* [x] Timeouts with interruption: implement wall-time cancel by running the VM in a goroutine and stopping it via Goja’s interrupt mechanism (set `vm.Interrupt` / supported pattern) or cooperative check wrapper; ensure an interrupt yields a standardized `TIMEOUT`; DoD: test with `for(;;){}` halts within configured `wall_ms` and returns `TIMEOUT`.
+* [x] Deny-by-default capability model: do not bind `require`, `console`, timers, or any FS/net; expose only `emit` and `read_input`; add negative tests proving absent globals; DoD: tests verify `typeof require === 'undefined'`, `typeof console === 'undefined'`.
+* [x] Structured errors & schema parity with Starlark: map Goja exceptions to `{code:"EVAL_ERROR",message}`; preserve `TIMEOUT` and `OUTPUT_LIMIT` semantics; DoD: tests assert identical error shapes to Starlark tool for analogous failures.
+* [x] Docs & examples: add `docs/interfaces/code.sandbox.js.run.md` with usage, security notes, and pitfalls (no timers/Promise unless explicitly added); DoD: doc linked from README; example works via CLI.
+* [x] Observability: add logs and span attributes (tool name, wall_ms, bytes_out); DoD: local run shows structured logs and a span `tools.js.run`.
+  - [x] [S01:wasmrun-validate-limits] Validate that `limits.output_kb`, `limits.wall_ms`, and `limits.mem_pages` are strictly > 0 with structured `INVALID_INPUT` errors; add table tests.
+* [x] Deny WASI by default: do not instantiate WASI; provide only `env.emit` as host import; add negative test that a WASI-dependent module fails with a clear error (`MISSING_IMPORT`); DoD: test passes and error is documented.
+* [x] Host memory read correctness: implement helper to locate the guest memory (`api.Module.Memory()`), bounds-check `[ptr:ptr+len)` before reading, and error on OOB; add unit tests for valid and OOB reads; DoD: tests pass and OOB returns standardized `OOB_MEMORY`.
+* [x] Observability: log and span attributes (module size bytes, wall_ms, mem_pages_used, bytes_out); DoD: local run shows structured logs and span `tools.wasm.run`.
+* [x] Shared limits & sanitizer: create `internal/sandbox/limits.go` with `BoundedBuffer(maxKB) io.Writer`, wall-time helper, and standard error helpers; vendor (or copy) per tool so each task is independent; DoD: each tool compiles with its own copy and tests verify size/time caps.
+* [x] Security notes & examples: for each tool, add a “Security Model” section to its interface doc stating deny-by-default capabilities and no ambient FS/net/clock; include a minimal example and a malicious loop example that times out; DoD: docs exist and examples execute locally with the respective tool CLI invocation.
+* [x] Introduce `-state-dir` flag (and `AGENTCLI_STATE_DIR` env) to persist and restore generated execution state (prompts, prep_settings, context notes, tool capability decisions); precedence: CLI > env > empty (=disabled); expand `~`, normalize path, create directory with 0700 (respect `umask 077`); update `cmd/agentcli/main.go` flag parsing, config struct, and `-h` help to describe “stores/loads generated execution state across runs”; DoD: `agentcli -h` shows the flag, table-driven test verifies precedence, empty disables persistence, and directory is created with 0700.
+* [x] Define a versioned on-disk “state bundle” schema and filenames; single JSON file per snapshot named `state-<RFC3339UTC>-<8charSHA>.json` containing `{version:"1", created_at, tool_version, model_id, base_url, toolset_hash, scope_key, prompts:{system,developer}, prep_settings:any, context:any, tool_caps:any, custom:any, source_hash}`; maintain atomic pointer file `latest.json` with `{version:"1", path:"state-*.json", sha256}`; permissions 0600 for files; DoD: `internal/state/schema.go` with `StateBundle` type + validation helpers committed and ADR drafted describing schema.
+* [x] Implement atomic save `SaveStateBundle(dir, bundle)`; steps: ensure dir 0700; write temp file in same FS, fsync file, `os.Rename` to final `state-*.json`, write `latest.json` to temp + fsync + rename, fsync directory; never log bundle contents at info level; DoD: unit test using tmpfs/tmdir asserts two files exist, perms are 0600, directory fsync path used, and rename pattern prevents torn writes.
+* [x] Implement safe load `LoadLatestStateBundle(dir)`; read `latest.json`, verify `version=="1"`, open referenced snapshot, validate schema, return `*StateBundle`; on missing/corrupt/unknown-version or permission errors, return `(nil, ErrStateInvalid)` and log at debug; DoD: table tests for ok/missing/corrupt/unknown-version/EPERM with zero panics and clear error values.
+* [x] Add optional `-state-scope` flag (and `AGENTCLI_STATE_SCOPE`) to partition state; compute default `scope_key = sha256(model_id + "|" + base_url + "|" + toolset_hash)`; on save/load only consider bundles whose `scope_key` matches; DoD: unit test shows different model/base or toolset hash yields different scopes and prevents cross-contamination.
+* [x] Wire restore-before-prep behavior; if `-state-dir` is set, load latest matching bundle by `scope_key`; when loaded and `-state-refine` is not set, reuse `prompts.*`, `prep_settings`, `context`, and `tool_caps` instead of calling pre-stage LLM; explicit CLI `-prep-prompt/-prep-file` still override reused prompts; DoD: integration test with a fake LLM shows second run performs zero pre-stage calls and uses restored values.
+* [x] Introduce refinement controls: `-state-refine` (bool), `-state-refine-text` (string), and `-state-refine-file` (path; wins over text if both provided); when set, call pre-stage LLM with deterministic instruction to refine the loaded bundle using the refine input and current user prompt; on success, write a new snapshot and update `latest.json`; mutual exclusivity is enforced between refine-file/text and empty `-state-dir`; DoD: flags visible in help, invalid combos error with exit code 2 and message, unit tests cover matrix.
+* [x] Implement `RefineStateBundle(prev *StateBundle, refineInput string, userPrompt string) (*StateBundle, error)`; requirements: preserve unspecified fields, update `created_at`, recompute `source_hash`, and record `prev_sha`; return new bundle; DoD: unit test with fake LLM verifies prompts/settings changed predictably, fields preserved, `prev_sha` set, and a new file written.
+* [x] Enforce effective-source precedence; compute final values with strict order: explicit CLI (`-prep-prompt/-prep-file`) > refined/restored bundle > built-in defaults; if CLI fully overrides prompts while `-state-refine` is set, warn and proceed (no hard error); DoD: table tests verify resulting source for each field and that warnings are logged once.
+* [x] Security hardening; reject world-writable `-state-dir` and non-owned directories on Unix; apply lightweight redaction before save (strip `Authorization:` header values, 64+ char base64-like tokens, common API key patterns), and never store raw request/response bodies unless `-debug` and `-state-allow-raw` are explicitly set; DoD: unit tests simulate dir perms (0707) causing refusal, and redaction removes secrets from serialized bundles.
+* [x] UX & logging polish; add concise debug logs on save/restore/refine including basename and 8-char SHA; add `--dry-run` to print intended actions (restore/refine/save decisions) without touching disk; DoD: `agentcli --dry-run -debug -state-dir=$TMP` outputs plan and leaves directory empty.
+* [x] E2E scenario test (local, deterministic); 1) Run with `-state-dir=$TMP -state-scope=testscope` to generate and save; 2) Run again with same args to confirm restore (no pre-stage calls); 3) Run with `-state-refine -state-refine-text="tighten temperature to 0.2"` to confirm refinement and new snapshot; assert `latest.json` points to newest, file count increments, and effective prompts follow precedence; DoD: a single `go test ./...` passes and prints no secrets.
+* [x] Backpressure & corruption handling; when `latest.json` points to a missing file or JSON is partially written, auto-quarantine bad files by renaming to `*.quarantined` and continue with regeneration; never delete originals; DoD: unit test seeds corrupt files and verifies quarantine + successful regeneration path.
+* [x] Concurrency & locking; implement coarse-grained advisory file lock (`state.lock`) during save/load to avoid concurrent writers; on lock contention, wait up to 2s with jitter then proceed without crashing; DoD: race test spawns two writers, only one succeeds per snapshot and no partial files are observed.
+* [x] Public docs & ADR; write ADR “Persist and refine execution state via `-state-dir`” with context (need reproducible runs and continuity), options (no persistence vs file store vs DB), decision (file-based JSON bundles with scopes), and rationale (simplicity, testability); include a Mermaid sequence diagram for first-run/restore/refine and copy-pastable CLI examples; update README with usage and security notes; DoD: ADR and README committed, diagram renders on GitHub.
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..40a8e32
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,12 @@
+## Code of Conduct
+
+We are committed to a welcoming and inclusive community. All participants are expected to be respectful and professional.
+
+- Be kind and constructive. Assume good intent.
+- No harassment, discrimination, or exclusionary behavior.
+- Use inclusive language. Respect different backgrounds and experiences.
+- Provide actionable, empathetic feedback in reviews and discussions.
+
+If you experience or witness unacceptable behavior, please report it by opening a confidential issue or contacting the maintainers via the repository's security or contact channels.
+
+Consequences for unacceptable behavior may include temporary or permanent bans from participation at the maintainers' discretion.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..2166abf
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,57 @@
+## Contributing to goagent
+
+Thank you for your interest in improving `goagent`! Contributions are welcome.
+
+### Ways to contribute
+- Report bugs and request features via the issue tracker
+- Improve documentation and examples
+- Submit pull requests for fixes and enhancements
+
+### Development setup
+- Prerequisites: Go 1.24+, `ripgrep` (rg), `golangci-lint`
+- Recommended commands from a clean clone:
+```bash
+make tidy build build-tools
+make test
+make lint
+```
+
+### Workflow
+1. Fork the repository and create a feature branch from `develop`
+2. Write tests first (unit/integration). Keep tests deterministic and offline
+3. Implement the smallest change to make tests pass; keep code clear and readable
+4. Run quality gates locally:
+```bash
+make test
+make lint
+```
+5. Open a pull request that:
+   - Explains the intent and links to the canonical GitHub issue
+   - Describes user-facing changes and risks
+   - Updates docs as needed
+
+### Coding standards
+- Go formatting: run `gofmt -s` (enforced by `make lint`)
+- Linting: `golangci-lint run` (invoked by `make lint`)
+- Avoid deep nesting; prefer early returns and clear error handling
+- Add tests for all changed behaviors; maintain or improve coverage
+
+### Tools layout conventions
+- Tool sources live under `tools/cmd/NAME/*.go`
+- Tool binaries are built to `tools/bin/NAME` (or `NAME.exe` on Windows)
+- Use `make build-tools` or `make build-tool NAME=<name>` to build
+- Validate paths and manifests locally:
+```bash
+make check-tools-paths
+make verify-manifest-paths
+```
+
+### Commit messages
+- Keep the subject concise; focus on the why, not just the what
+- Reference the issue with a full URL in either the PR description or commit body
+
+### Code of Conduct
+By participating, you agree to abide by the project Code of Conduct. See `CODE_OF_CONDUCT.md`.
+
+### Getting help
+If you are blocked while contributing, open a draft PR or an issue with details. We'll help you move forward.
diff --git a/FEATURE_CHECKLIST.md b/FEATURE_CHECKLIST.md
new file mode 100644
index 0000000..3ac087c
--- /dev/null
+++ b/FEATURE_CHECKLIST.md
@@ -0,0 +1,76 @@
+* [x] Maintenance: refreshed PR inventory on 2025-08-19; verified existing PRs with `gh pr list`, re-synced `./work/main` to `origin/main`; no new PRs created this pass.
+* [x] Maintenance: refreshed PR inventory on 2025-08-19; no duplicates; `./work/main` reset to `origin/main`.
+* [x] Maintenance: refreshed PR inventory on 2025-08-19; no new PRs created this pass.
+  - Verified OPEN PRs to `main` via gh; prepared clean working copy at `./work/main` reset to `origin/main`; no duplicates opened this pass.
+* [x] Maintenance: refreshed PR inventory on 2025-08-19; confirmed no duplicate PRs; `./work/main` hard-reset to `origin/main`; status recorded in `logs/CURRENT_TASK.md`.
+* [x] Maintenance: prepared `./work/main` for slicing; switched remote to HTTPS, reset to `origin/main`; created branch `pr/docs-cli-reference` and restored `docs/reference/cli-reference.md` from `develop`; opened PR #55.
+* [ ] Tool: citation_pack — OPEN: https://github.com/hyperifyio/goagent/pull/40
+* [ ] Tool: dedupe_rank — OPEN: https://github.com/hyperifyio/goagent/pull/39
+* [ ] Tool: github_search — OPEN: https://github.com/hyperifyio/goagent/pull/38
+* [ ] Tool: crossref_search — OPEN: https://github.com/hyperifyio/goagent/pull/37
+* [ ] Tool: openalex_search — OPEN: https://github.com/hyperifyio/goagent/pull/36
+* [ ] Tool: wiki_query — OPEN: https://github.com/hyperifyio/goagent/pull/35
+* [ ] Tool: pdf_extract — OPEN: https://github.com/hyperifyio/goagent/pull/34
+* [ ] Tool: searxng_search — OPEN: https://github.com/hyperifyio/goagent/pull/33
+* [x] Documentation: write a single developer-facing document describing Linux 5.4 compatibility constraints (no Landlock, no overlayfs-in-userns reliance), the three network modes with strictness caveats, how the bundle works (including inclusion of `agentcli`), how to author policies, and known limitations of rootless operation (optional cgroups only if delegated); DoD: the doc lives in the repo, is linked from `--help`, and contains copy-pastable policy examples and troubleshooting for common kernel settings.
+- [ ] PR #07: Secure tool runner — OPEN: https://github.com/hyperifyio/goagent/pull/10
+* [x] Bootstrap and preflight (once): ensure a clean workspace and gh auth; run: `git fetch --all --prune && git switch main || git checkout -b main && git pull --ff-only || true && git switch develop && git pull --ff-only && gh auth status`; set repo vars: `REPO=$(git remote get-url --push origin | sed -E 's#.*/([^/]+/[^/]+)(\.git)?$#\1#')`; verify tools: `make -n >/dev/null 2>&1 || true`; Definition of Done: main exists locally and remotely (or created), develop is up to date, gh is authenticated for $REPO.
+* [x] Pre-check (deduplicate PRs): verified existing PRs via `gh pr list` — confirmed OPEN: PR #03 "Scaffold repository (no code)" and PR #04 "CLI: minimal entrypoint (main.go only)"; also noted PR #01 "Develop to main" (OPEN) which is not part of the sliced feature plan.
+* [x] Prepared clean workspace at `./work/main`: fetched all refs, checked out `main`, and reset hard to `origin/main` to ensure a pristine base for feature branches.
+* [x] Create a machine-readable PR plan (once): generate docs/PR_PLAN.md enumerating PR order and branch names; sections in this exact order to minimize conflicts: (1) Repo scaffolding & hygiene, (2) Core CLI & client, (3) Tool manifest & runner, (4) Tool-call loop & validator, (5) Baseline docs/ADR/diagrams, (6) Each tool (one PR per tool), (7) Tests per component, (8) Makefile, scripts, and CI gates, (9) Security & runbooks, (10) Research tools (one PR per tool), (11) Remaining docs and examples; commit this plan on a short-lived planning branch: `git switch -c pr/plan && mkdir -p docs && printf "...ordered list with branch names..." > docs/PR_PLAN.md && git add docs/PR_PLAN.md && git commit -m "PR plan: ordered slicing from develop to main" && git push -u origin pr/plan && gh pr create --base main --head pr/plan --title "PR plan: ordered slicing" --body "Adds docs/PR_PLAN.md to drive small PR slicing" --draft`; Definition of Done: a draft PR to main exists containing docs/PR_PLAN.md with the full ordered plan — OPEN: https://github.com/hyperifyio/goagent/pull/6
+* [ ] State: versioned StateBundle schema (v1) — OPEN: https://github.com/hyperifyio/goagent/pull/50
+* [ ] State: atomic save/load with secure dir + quarantine + advisory lock — OPEN (draft): https://github.com/hyperifyio/goagent/pull/51
+* [ ] Sandbox: limits utilities — OPEN: https://github.com/hyperifyio/goagent/pull/52
+* [ ] Slice PR #02 — minimal CLI entrypoint (compile-only, no HTTP/tools): `git switch -c pr/02-cli-main main && git restore --source develop --staged --worktree cmd/agentcli/main.go && git restore --source develop --staged --worktree cmd/agentcli/go.* || true && go build ./cmd/agentcli`; update README usage snippet minimally if referenced by main.go; commit/push/create PR; Definition of Done: `go build ./cmd/agentcli` succeeds on a clean checkout of the branch; PR contains only cmd/agentcli/* (and any edited README lines).
+  - OPEN: https://github.com/hyperifyio/goagent/pull/4
+* [ ] Slice PR #03 — flags & help (no network): `git switch -c pr/03-flags-help main && git restore --source develop --staged --worktree cmd/agentcli/flags.go cmd/agentcli/usage_test.go docs/reference/cli-reference.md`; run `go test ./cmd/agentcli -run Help -v` (or the repo’s equivalent); commit/push/PR; Definition of Done: `agentcli --help` works locally (prints full usage), tests covering help/flags pass, only flags/help related files changed.
+  - BLOCKED: Depends on PR #01 (scaffold) and PR #02 (minimal CLI) to land first to avoid README/go.mod rebasing conflicts; next action after merge: in `./work/main`, `git switch -c pr/03-flags-help` and restore only `cmd/agentcli/help_test.go`, `cmd/agentcli/flags_test.go`, `docs/reference/cli-reference.md` and the minimal code deltas strictly required for help/flags.
+* [ ] Slice PR #04 — OpenAI-compatible HTTP client (types + client, no integration): `git switch -c pr/04-oai-client main && git restore --source develop --staged --worktree internal/oai/types.go internal/oai/client.go internal/oai/*_test.go`; `go test ./internal/oai -run Client -v`; commit/push/PR; Definition of Done: internal/oai unit tests pass and PR contains only internal/oai files (and go.mod/go.sum updates if needed).
+  - OPEN: https://github.com/hyperifyio/goagent/pull/7
+* [ ] Slice PR #05 — model defaults & capability map (serialization only): `git switch -c pr/05-model-defaults main && git restore --source develop --staged --worktree internal/oai/capabilities.go internal/oai/capabilities_test.go docs/adr/0004-default-llm-policy.md`; `go test ./internal/oai -run Capabilities`; commit/push/PR; Definition of Done: capability tests green; ADR added; no unrelated changes.
+  - OPEN: https://github.com/hyperifyio/goagent/pull/8
+* [ ] Slice PR #06 — tools manifest loader — OPEN: https://github.com/hyperifyio/goagent/pull/9: `git switch -c pr/06-tools-manifest main && git restore --source develop --staged --worktree internal/tools/manifest.go internal/tools/manifest_test.go docs/reference/tools-manifest.md`; `go test ./internal/tools -run Manifest`; commit/push/PR; Definition of Done: manifest tests pass, docs reference added/linked, only manifest-related changes included.
+* [ ] BLOCKED: Slice PR #07 — secure tool runner (no sandbox yet, argv only) — OPEN: https://github.com/hyperifyio/goagent/pull/10: depends on manifest types (`ToolSpec`) from PR #06; next after unblock: `git switch -c pr/07-tool-runner main && git restore --source develop --staged --worktree internal/tools/runner.go internal/tools/runner_test.go && go test ./internal/tools -run Runner`.
+* [ ] Slice PR #09 — baseline docs & diagrams — OPEN: https://github.com/hyperifyio/goagent/pull/5: `git switch -c pr/09-docs-diagrams main && git restore --source develop --staged --worktree docs/adr/0001-minimal-agent-cli.md docs/diagrams/agentcli-seq.md docs/diagrams/toolbelt-seq.md docs/security/threat-model.md docs/runbooks/troubleshooting.md README.md docs/README.md`; commit/push/PR; Definition of Done: docs render on GitHub (links valid), no code files in PR.
+* [ ] Slice PR #10 — example tool get_time + tools.json + Makefile build-tools — OPEN: https://github.com/hyperifyio/goagent/pull/11: `git switch -c pr/10-get-time main && git restore --source develop --staged --worktree tools/cmd/get_time/** tools/bin/.gitkeep tools/testutil/** tools.json Makefile README.md`; `make build-tools && ./tools/bin/get_time <<< '{"tz":"Europe/Helsinki"}'`; commit/push/PR; Definition of Done: get_time builds and runs, tools.json declares it with schema, Makefile emits tools/bin/get_time(.exe), only these paths changed.
+* [ ] Slice PR #11 — quickstart README runnable — OPEN: https://github.com/hyperifyio/goagent/pull/12: `git switch -c pr/11-quickstart main && git restore --source develop --staged --worktree README.md`; validate the quickstart end-to-end against current feature set; commit/push/PR; Definition of Done: quickstart commands copy-paste cleanly on a fresh clone for current branch scope.
+* [ ] BLOCKED: Slice PR #12 — minimal unit tests enabling (core only): `git switch -c pr/12-core-tests main && git restore --source develop --staged --worktree internal/oai/*_test.go internal/tools/*_test.go cmd/agentcli/*_test.go`; `go test ./...`; commit/push/PR; Definition of Done: tests that rely only on already-merged features pass; tests requiring future tools remain excluded. — Reason: base `main` lacks prerequisite code; enabling tests now would fail.
+  - Next: after PRs #02, #04, and #06 merge into `main`, restore only tests for merged modules and open `pr/12-core-tests` from `./work/main`.
+* [ ] Slice each tool as its own PR (repeat this item once per tool, substituting NAME and the exact file set): `git switch -c pr/tool-NAME main && git restore --source develop --staged --worktree tools/cmd/NAME/** && git restore --source develop --staged --worktree docs/reference/NAME.md || true && jq '.tools[]|select(.name=="NAME")' tools.json >/dev/null 2>&1 || git restore --source develop --staged --worktree tools.json && make build-tools NAME=NAME && go test ./tools/cmd/NAME -run . && git commit -m "tool(NAME): add tool implementation, schema, tests, docs" && git push -u origin pr/tool-NAME && gh pr create --base main --head pr/tool-NAME --title "Tool: NAME" --body "Adds tools/cmd/NAME (+ tests), wires tools/bin/NAME in Makefile, updates tools.json, and adds docs/reference/NAME.md; contract: stdin/out + stderr JSON on error; includes examples and runbook notes."`; Definition of Done: branch builds `tools/bin/NAME(.exe)`, the tool’s focused tests pass, tools.json entry valid, docs page present; PR contains only NAME and the touched manifest/Makefile/docs; examples: get_time (already done), exec, fs_read_file, fs_write_file, fs_append_file, fs_rm, fs_move, fs_search, fs_mkdirp, fs_apply_patch, fs_read_lines, fs_edit_range, fs_listdir, fs_stat, img_create, http_fetch, searxng_search, robots_check, readability_extract, metadata_extract, pdf_extract, rss_fetch, wayback_lookup, wiki_query, openalex_search, crossref_search, github_search, dedupe_rank, citation_pack.
+  - [ ] Tool: exec — OPEN: https://github.com/hyperifyio/goagent/pull/13
+  - [ ] Tool: fs_read_file — OPEN: https://github.com/hyperifyio/goagent/pull/14
+  - [ ] Tool: fs_write_file — OPEN: https://github.com/hyperifyio/goagent/pull/15
+  - [ ] Tool: fs_append_file — OPEN: https://github.com/hyperifyio/goagent/pull/16
+  - [ ] Tool: fs_rm — OPEN: https://github.com/hyperifyio/goagent/pull/17
+  - [ ] Tool: fs_move — OPEN: https://github.com/hyperifyio/goagent/pull/18
+  - [ ] Tool: fs_listdir — OPEN: https://github.com/hyperifyio/goagent/pull/19
+  - [ ] Tool: fs_mkdirp — OPEN: https://github.com/hyperifyio/goagent/pull/20
+  - [ ] Tool: fs_stat — OPEN: https://github.com/hyperifyio/goagent/pull/21
+  - [ ] Tool: fs_edit_range — OPEN: https://github.com/hyperifyio/goagent/pull/22
+  - [ ] Tool: fs_read_lines — OPEN: https://github.com/hyperifyio/goagent/pull/23
+  - [ ] Tool: fs_search — OPEN: https://github.com/hyperifyio/goagent/pull/24
+  - [ ] Tool: http_fetch — OPEN: https://github.com/hyperifyio/goagent/pull/27
+   - [ ] Tool: robots_check — OPEN: https://github.com/hyperifyio/goagent/pull/28
+   - [ ] Tool: readability_extract — OPEN: https://github.com/hyperifyio/goagent/pull/29
+   - [ ] Tool: metadata_extract — OPEN: https://github.com/hyperifyio/goagent/pull/30
+   - [ ] Tool: rss_fetch — OPEN: https://github.com/hyperifyio/goagent/pull/31
+   - [ ] Tool: wayback_lookup — OPEN: https://github.com/hyperifyio/goagent/pull/32
+   - [ ] Tool: img_create — OPEN: https://github.com/hyperifyio/goagent/pull/26
+* [ ] Slice PR — tool error-contract standardization (one tool per PR for clarity): for each tool that needs the unified stderr JSON `{"error":"...","hint?":"..."}` behavior, branch from main, restore only that tool’s sources and tests from develop, run its negative-path tests, fix if needed, commit and PR titled `Tool NAME: standardize error contract`; Definition of Done: tool emits single-line stderr JSON on failure and exits non-zero, tests cover it, PR only touches that tool.
+* [ ] Slice PR — Makefile wiring (TOOLS list, build-tools/clean, Windows .exe): `git switch -c pr/makefile-tools main && git restore --source develop --staged --worktree Makefile && make build-tools && make clean`; commit/push/PR; Definition of Done: `make build-tools` deterministically builds all already-merged tools to tools/bin, `make clean` removes them, no code beyond Makefile changed. — OPEN: https://github.com/hyperifyio/goagent/pull/41
+* [ ] Slice PR — scripts and gh utilities (no external deps beyond gh): `git switch -c pr/scripts main && git restore --source develop --staged --worktree scripts/**/* .github/** || true`; keep only scripts that don’t assume features not yet merged; `bash -n scripts/*.sh || true`; commit/push/PR; Definition of Done: scripts shellcheck clean (if configured) and runnable where applicable; PR limited to scripts/ and CI YAML. — OPEN: https://github.com/hyperifyio/goagent/pull/42
+* [ ] Slice PR — security & runbooks: `git switch -c pr/security-runbooks main && git restore --source develop --staged --worktree docs/security/** docs/runbooks/** README.md`; commit/push/PR; Definition of Done: security posture and troubleshooting render with valid links, no code in PR. — OPEN: https://github.com/hyperifyio/goagent/pull/43
+* [ ] Slice PR — ADRs (one ADR per PR unless trivial): for each ADR file under docs/adr not yet in main, `git switch -c pr/adr-XXXX main && git restore --source develop --staged --worktree docs/adr/XXXX-*.md docs/README.md README.md && git commit -m "ADR-XXXX: <title>" && git push && gh pr create ...`; Definition of Done: ADR renders, is linked from docs index and README, no code changes. — OPEN: https://github.com/hyperifyio/goagent/pull/44
+  - [ ] ADR-0011: State bundle schema — OPEN: https://github.com/hyperifyio/goagent/pull/48
+  - [ ] ADR-0012: State dir persistence — OPEN: https://github.com/hyperifyio/goagent/pull/49
+* [ ] Slice PR — diagrams (grouped if tightly related): `git switch -c pr/diagrams main && git restore --source develop --staged --worktree docs/diagrams/** README.md docs/README.md`; verify GitHub renders Mermaid; commit/push/PR; Definition of Done: diagrams render and links valid; no code files. — OPEN: https://github.com/hyperifyio/goagent/pull/5
+* [ ] Slice PR — CLI features: group the smallest cohesive increments (each as its own PR): (a) `--version`, (b) `--print-config`, (c) split http vs tool timeouts + retries/backoff, (d) length backoff + `max_tokens`, (e) message-sequence validator in pre-stage, (f) pre-stage enable/disable + flags; for each: new branch from main, restore only the minimal necessary files (cmd/agentcli/* + internal where needed + docs/tests), run `go test ./...`, commit/push/PR with focused title/body; Definition of Done: each PR compiles, has passing tests for the feature, and touches only the minimal feature scope.
+* [ ] Slice PR — parallel tool calls (main loop) isolated: `git switch -c pr/parallel-tool-calls main && git restore --source develop --staged --worktree cmd/agentcli/*parallel* internal/*/validator* cmd/agentcli/*_test.go`; run the concurrency test; commit/push/PR; Definition of Done: tests proving concurrency semantics pass; PR limited to loop/validator/tests.
+* [ ] Slice PR — audit logs and redaction (internal/tools runner + client): create two PRs: (1) HTTP timings/audit fields; (2) tool runner audit + redaction; for each: branch, restore minimal internal files + tests + docs/runbooks update, run tests, commit/push/PR; Definition of Done: NDJSON log lines present with expected fields, tests assert structure, docs mention redaction.
+* [ ] Slice PR — research toolbelt (one PR per research tool): repeat the per-tool PR item for searxng_search, http_fetch, robots_check, readability_extract, metadata_extract, pdf_extract, rss_fetch, wayback_lookup, wiki_query, openalex_search, crossref_search, github_search, dedupe_rank, citation_pack, each including its doc page and tools.json entry, with SSRF guards and retries as already implemented in develop; Definition of Done: each PR builds its tool, tests green offline with httptest.Server fixtures, docs reference added; no coupling between tools across PRs.
+* [ ] Slice PR — integration tests (minimal end-to-end) — OPEN: https://github.com/hyperifyio/goagent/pull/45: branch from main and add only the smallest deterministic integration test that exercises one already-merged tool (e.g., fs_write_file→fs_read_file) using a fake API; `git restore --source develop --staged --worktree cmd/agentcli/tools_integration_test.go tools/testutil/**`; run `go test ./cmd/agentcli -run AdvertisesSchemas`; commit/push/PR; Definition of Done: integration passes locally, PR contains only integration test files and helpers.
+* [ ] Slice PR — README finalization and examples (grouped): branch, restore examples/ that match only merged tools, update README links; commit/push/PR; Definition of Done: examples run locally without network (or with documented mock), links valid.
+* [ ] For every PR above, enforce the same PR hygiene (apply each time): before commit, run `go mod tidy && make fmt && make build && make build-tools && go test ./... && make lint || true` (keep gates as available on local machine); ensure commit message starts with a clear scope (`scaffold:`, `cli:`, `oai:`, `tool(NAME):`, `docs:`), and the PR body includes a short rationale, the contract (if a tool/API), and a verification section; create the PR with `gh pr create --base main --head <branch> --title "<scope>: <summary>" --body-file <auto-generated body> --draft` if any gate not yet green, otherwise without `--draft`; Definition of Done: PR shows only the intended diff (verify with `git diff --name-only origin/main...HEAD`), local tests for its scope are green, title/body meet reviewability standards.
+* [ ] Post-migration cleanup (final PR): when all features are merged, remove docs/PR_PLAN.md (or mark as historical), ensure tools/bin/* are gitignored only (no binaries tracked), run a last `make clean && make build build-tools && go test ./... && make lint`; commit any final doc link fixes; Definition of Done: main contains the full feature set, no legacy artifacts remain, and local gates pass end-to-end.
+* [ ] Merge discipline and sequencing (applies continuously): once a PR is approved and CI (if present) is green, merge with “squash and merge” to keep history clean; after merge, `git switch main && git pull --ff-only && git branch -D <feature-branch> || true`; before opening the next PR, re-create branches from updated main; Definition of Done: main advances one minimal feature at a time, no open PR depends on another unmerged PR (or if unavoidable, clearly state `--base <dependent-branch>` and retarget to main immediately after the base merges).
+* [x] Maintenance: refreshed PR inventory on 2025-08-19; verified existing PRs with gh; re-synced ./work/main to origin/main.
diff --git a/FUTURE_CHECKLIST.md b/FUTURE_CHECKLIST.md
new file mode 100644
index 0000000..5243816
--- /dev/null
+++ b/FUTURE_CHECKLIST.md
@@ -0,0 +1,122 @@
+* [x] Ensure `check-go-version` is enforced in CI and locally by wiring it into `make lint`. Smallest change: prepend `$(MAKE) check-go-version` in the `lint` recipe before any linter runs. DoD: `make lint` fails fast on version drift (manual test by temporarily exporting an older/newer Go on a dev machine), passes otherwise; CI shows the check executed before golangci-lint; all gates green; peer review completed.
+      - [x] [S02a1c0:l252-ci-push-preflight-blocked] Preflight on this headless runner indicates GitHub authentication is missing. Next step: on a browser-capable host run `gh auth login -h github.com --git-protocol ssh --scopes admin:public_key --device`, verify `ssh -T git@github.com` greets successfully, then execute `scripts/ci_push_pr.sh`. Capture the PR and CI run URLs and verify lint logs show `check-go-version: OK` before `golangci-lint version` across the matrix.
+    - [x] [S02u0a1b:l252-gh-device-login-blocked] Preflight on this runner shows `gh auth status` invalid token, `git_protocol=ssh`, `ssh -T git@github.com` fails with publickey, and `GH_TOKEN` absent. Next: on a machine with a browser, run `gh auth login -h github.com --git-protocol ssh --scopes admin:public_key --device`, complete the device flow, then re-run `gh auth status -h github.com` and proceed to add the SSH key.
+    - [x] [S02u0a1c0:l252-gh-device-login-web-preflight] Verified local gates are green (go test, make lint), `gh auth status -h github.com` reports invalid/absent token and `gh config get -h github.com git_protocol` is `ssh`; blocked awaiting browser-based device flow on a capable host. Next: perform the device-flow login on a browser host per S02u0a1c, then return to verify and proceed to add the SSH key.
+    - [x] [S02a1b0:l252-ci-green-auth-blocked-preflight] Local gates green (go test, make lint); authentication remains absent on this headless host. Recorded BLOCKED state and next step in `logs/coding.log` and `logs/gh_auth_status.*`. Next: complete device-flow login on a browser-capable machine, add SSH key, verify `ssh -T git@github.com`, then push branch and open PR to trigger CI.
+    - [x] [S02u1:l252-ssh-keygen] Generate an SSH key pair and surface the public key for account enrollment; DoD: `~/.ssh/id_ed25519.pub` exists, fingerprint recorded in `logs/coding.log`, and instructions included to add the key via GitHub Web or `gh ssh-key add` so a subsequent `ssh -T git@github.com` can succeed.
+  - [x] [S11:l252-ci-skeleton-file] Create `.github/workflows/ci.yml` (new) containing a single job `lint-test-build` with a step named exactly `lint (includes check-go-version)` that runs `make tidy && make lint | tee lint.log` and a subsequent step named exactly `Assert lint order (check-go-version before golangci-lint)` that compares line numbers of `^check-go-version: OK` and `^golangci-lint version` in `lint.log` (fail if absent or out of order); DoD: `go test ./internal/ci -run TestLintOrderLocallyAndInWorkflow` passes locally reading the workflow file and finding both expected step names.
+  - [x] [S12:l252-ci-matrix-gover] Expand the workflow to a matrix over `{ubuntu-latest, macos-latest, windows-latest}` and use `actions/setup-go@v5` with `go-version-file: go.mod` in the `lint-test-build` job; DoD: workflow syntax validates, unit test still passes, and a PR run shows the same Go major.minor across OSes in logs.
+  - [x] [S13:l252-windows-install-rg] Add a Windows-only step before lint to install ripgrep (e.g., `choco install ripgrep -y`) so `make lint`’s `check-tools-paths`/`verify-manifest-paths` do not fail due to `ensure-rg`’s Linux/macOS-only installer; DoD: on `windows-latest`, the `lint (includes check-go-version)` step completes and finds `rg` on PATH.
+  - [x] [S14:l252-windows-shell-bash] For `windows-latest`, force GNU bash for all `run` steps that invoke `make` (set `defaults.run.shell: bash` at the job level or `shell: bash` per step) to satisfy Makefile’s bashisms and `SHELL := /bin/bash`; DoD: Windows lint step executes successfully without PowerShell/`/bin/sh` path errors, and the unit test remains green.
+* [x] Add `make lint-precheck` to fail fast on analyzer/export-data mismatches. Smallest change: implement a shell precheck used by `lint` that (1) validates `$(GOBIN)/golangci-lint` exists, (2) extracts `GCL=$$($(GOBIN)/golangci-lint version | sed -nE 's/.*version ([v0-9\\.]+).*/\\1/p')`, (3) compares against `MIN=v1.60.0` using `sort -V`, and (4) prints an explicit hint if too old: “golangci-lint $GCL < $MIN with Go \$\$(go version)… update GOLANGCI\_LINT\_VERSION”. DoD: with a forced older version, `make lint` fails before analysis with the hint; with the pinned version, it proceeds; CI green; peer review completed.
+* [x] Add Windows/macOS/Linux path tests for `save.dir` correctness and SHA-256 reporting (use `tools/testutil/MakeRepoRelTempDir`), and verify image bytes are identical to decoded b64; DoD: platform-agnostic tests green across CI matrix.
+  - [x] [S11:adr-0006-file] Create `docs/adr/0006-image-generation-tool-img_create.md` documenting `img_create` (Context/Decision/Consequences) with explicit links to `tools/cmd/img_create/img_create.go`, `docs/reference/img_create.md`, `internal/tools/manifest.go`, `internal/tools/runner.go`, and `docs/diagrams/toolbelt-seq.md`; include security/envPassthrough and transcript‑hygiene rationale; DoD: file renders on GitHub and all relative links resolve.
+  - [x] [S12:docs-index-adr-0006] Add ADR‑0006 entry to `docs/README.md` under ADRs with link `adr/0006-image-generation-tool-img_create.md`; DoD: link clickable and resolves in GitHub UI.
+* [ ] Strengthen the Makefile test target to run go test -race -cover ./... and write a coverage profile under bin/coverage.out so local testing matches planned CI gates, smallest change is editing the Makefile test recipe only; scope Makefile; low risk; DoD includes running make test from a clean clone succeeds with race detector enabled and coverage file produced, tests otherwise unchanged and green with no coverage regression, all quality gates green with no new findings, peer review completed, verification by observing race-enabled run and generated coverage file, rollback by reverting the Makefile edit.
+* [ ] Switch submodule URLs in .gitmodules from SSH to HTTPS to simplify CI clones without SSH keys (public submodules .cursor/rules and scripts), smallest change is editing .gitmodules and running git submodule sync; scope repository hygiene; low risk; DoD includes actions/checkout with submodules enabled succeeding in a new CI workflow, tests unchanged and green with no coverage regression, all quality gates green, peer review completed, verification by observing CI clone success without extra SSH setup and local git submodule update --init works, rollback by reverting the .gitmodules change and syncing.
+* [ ] Add a make vuln target that installs and runs govulncheck ./... and wire it into CI after unit tests to fail builds on known vulnerabilities, smallest change is adding the target in the Makefile and a CI step once workflows exist; scope security scanning; low risk; DoD includes make vuln passing locally on a clean tree, CI green with the new step and no new findings, tests unchanged and green with no coverage regression, all gates green, peer review completed, verification by introducing a known vulnerable transient dependency in a branch to see CI fail then removing it restores green, rollback by reverting the Makefile and CI edits.
+* [ ] Add secret detection with gitleaks via a make secrets target and a CI step using the official action with a minimal .gitleaks.toml allowlist to reduce false positives, smallest change is adding the config, target and CI step; scope security hygiene; low risk; DoD includes make secrets passing locally on a clean tree, CI green with no new leaks, tests unchanged and green with no coverage regression, all gates green, peer review completed, verification by committing a fake test secret in a branch to observe CI fail then removing it restores green, rollback by reverting the config and CI step.
+* [ ] Standardize all Bash scripts under scripts to use env bash shebang and strict mode set -euo pipefail and add a make shellcheck target with a minimal configuration plus a CI step to run it, because several scripts currently lack pipefail or only set -u which weakens detection of failures; smallest change is editing script headers, adding one Makefile target, and a short workflow step; scope scripts, Makefile, CI; low risk; DoD includes an initial failing run then passing with zero shellcheck warnings, tests unchanged and green with no coverage regression, all quality gates green, peer review completed, verification by running shellcheck locally and observing CI green, rollback by reverting the script, Makefile, and workflow edits.
+* [ ] CI smoke job for tools — workflow builds tools, runs each binary with sample stdin, and runs an agent loop against a fake API; no external network dependence; DoD: green in PRs, artifacts attached, linked to issue.
+* [ ] Add GitHub CodeQL code scanning for Go by committing .github/workflows/codeql.yml using the official CodeQL action with default queries on push and pull_request so security analysis runs automatically; evidence: no CodeQL workflow exists today; scope .github/workflows only; low risk and independent; DoD: CodeQL job runs green with zero new alerts, tests unchanged and green with no coverage regression, all quality gates green (vet, gofmt, golangci-lint, staticcheck, security and secret detection) with no new findings, peer review completed; verify by viewing the Security > Code scanning alerts page and the workflow run logs; rollback by deleting the workflow file.
+* [ ] Document the release process in docs/releasing.md including tagging artifact naming checksums and verification and rollback guidance aligned with the planned release workflow, smallest change is a single page and links from README and the future release workflow, scope documentation, low risk, DoD includes dry run steps validated locally without publishing tests unchanged and green all quality gates green peer review completed verification by executing the documented commands in dry run rollback by removing the doc and links, traceability https://github.com/hyperifyio/goagent/issues/214.
+* [ ] Add release workflow for static binaries and checksums: `.github/workflows/release.yml` triggered on tags like `v*`; build `agentcli` for `linux,darwin,windows` × `amd64,arm64` with `CGO_ENABLED=0`; name outputs `agentcli_<os>_<arch>` (Windows `.exe`); generate `SHA256SUMS` and `SHA256SUMS.sig` (optional GPG); create GitHub Release and upload artifacts and checksums; document in README how to download and verify.
+* [ ] Add a make sbom target that uses syft (or cyclonedx-gomod as a fallback if syft is unavailable) to produce a CycloneDX SBOM at reports/sbom.json from the current module and wire it into the release workflow to upload as an asset, because the repo currently lacks an SBOM which weakens supply-chain visibility; smallest change is adding one Makefile target and a short CI step with a docs sentence; scope Makefile and CI; low risk; DoD includes failing first when the tool is missing then passing with deterministic output on a clean clone, tests unchanged and green with no coverage regression, all quality gates green (vet, format, golangci-lint, static analysis, security and secret scans) with no new findings, backward compatibility preserved, peer review completed, verification by running make sbom locally and seeing reports/sbom.json and by confirming the asset on a tagged release, rollback by reverting the Makefile and workflow edits.
+* [ ] Tag and publish `v0.1.0` once CI is green and docs/tests are complete: ensure `README.md` has usage, examples, and limitations (no streaming, sequential tool calls only); ADR-0001 and sequence diagram present; unit and integration tests passing; create annotated tag `git tag -a v0.1.0 -m "MVP non-interactive agent CLI with OpenAI-compatible tools"` and `git push --tags`.
+* [ ] Add GitHub Dependabot at .github/dependabot.yml to update Go modules weekly and GitHub Actions monthly so dependencies and CI actions stay current with minimal noise; smallest change is committing a single dependabot.yml file; scope repository hygiene; low risk and independent; DoD includes Dependabot PRs opening on schedule with passing CI and no coverage regression, all quality gates green (vet, format, lint, security and secret detection) with no new findings, peer review completed; verification by observing the first PRs or running a local preview, rollback by removing the configuration file.
+* [ ] Add CODEOWNERS at .github/CODEOWNERS to automatically request reviews from maintainers; smallest change is committing one CODEOWNERS file mapping paths to the maintainers team; scope repository hygiene; low risk; DoD includes PRs auto-requesting reviews from owners, tests unchanged and green with no coverage regression, all quality gates green, peer review completed; verify by opening a test pull request and observing requested reviewers; rollback by removing the CODEOWNERS file.
+* [ ] Add SECURITY.md under .github with a concise vulnerability disclosure policy including contact, supported versions, and response expectations, and link it from README; smallest change is a single Markdown file and one README sentence; scope security documentation; low risk; DoD includes the GitHub Security tab surfacing the policy, tests unchanged and green with no coverage regression, all quality gates green, peer review completed; verify by visiting the repository Security page, rollback by removing the file and README link.
+* [ ] Add pull request template at .github/PULL_REQUEST_TEMPLATE.md prompting for canonical issue URL, intent, Definition of Done checklist, and test plan; smallest change is committing one template file; scope contribution workflow; low risk; DoD includes new pull requests prefilled with the template, tests unchanged and green with no coverage regression, all quality gates green, peer review completed; verify by opening a draft pull request and seeing the template, rollback by removing the template file.
+* [ ] Add issue templates at .github/ISSUE_TEMPLATE/bug_report.yml and .github/ISSUE_TEMPLATE/feature_request.yml plus config.yml to disable blank issues; smallest change is committing two minimal YAML templates and one config; scope issue hygiene; low risk; DoD includes the New issue page showing the templates with required fields, tests unchanged and green with no coverage regression, all quality gates green, peer review completed; verify by clicking New issue and observing the choices, rollback by removing the templates.
+Blocked: SSH handshake fails with "Permission denied (publickey)"; next step is to configure GitHub authentication (add SSH key or run gh auth login with write access), then push this branch and open a PR to trigger CI and capture the run URL for verification.
+Next: authenticate the GitHub CLI for github.com using device flow with SSH protocol and admin:public_key scope, add the existing SSH public key to the account, verify ssh -T git@github.com succeeds, then push the branch and open a PR to trigger CI.
+* [ ] OBSOLETE: Add GitHub CodeQL code scanning for Go by committing .github/workflows/codeql.yml using the official CodeQL action with default queries on push and pull_request so security analysis runs automatically; evidence: no CodeQL workflow exists today; scope .github/workflows only; low risk and independent; DoD: CodeQL job runs green with zero new alerts, tests unchanged and green with no coverage regression, all quality gates green (vet, gofmt, golangci-lint, staticcheck, security and secret detection) with no new findings, peer review completed; verify by viewing the Security > Code scanning alerts page and the workflow run logs; rollback by deleting the workflow file.
+* [ ] Add jq to README Installation prerequisites with OS-specific install snippets (apt, brew, choco) because README examples and runbooks pipe tool output to jq but prerequisites omit it; smallest change is editing README only; scope documentation; low risk and independent; DoD: README renders correctly on GitHub, examples run from a clean clone with jq installed, tests unchanged and green with no coverage regression, all quality gates green, peer review completed; verify by executing one README example end-to-end with jq and observing the pretty-printed JSON; rollback by reverting the README edit.
+* [ ] Document and operationalize the specific failure mode you hit: “golangci-lint: unsupported export data (internal/goarch version: 2)”. Smallest change: add a subsection to `docs/runbooks/troubleshooting.md` and `docs/operations/ci-quality-gates.md` titled exactly that with symptoms, cause (Go/golangci-lint mismatch), and the resolution steps: run `make check-go-version`, ensure ADR-0003 policy, run `make install-golangci`, rerun `make lint`. DoD: docs render on GitHub, steps verified, CI green, peer review completed.
+* [ ] (Contingency, removable later) Add a temporary CI lint job that runs with Go 1.23.x if the pinned linter still fails on 1.24.x; tests and build jobs stay on `go-version-file: go.mod`. Smallest change: duplicate the `lint` job as `lint-compat`, set `actions/setup-go@v5` `go-version: "1.23.x"`, run `make lint`, mark with a TODO comment referencing the issue to remove once ADR-0003 policy is fully green with 1.24.x. DoD: CI green with both lint jobs; issue description captures the intent to delete this path; peer review completed; rollback by deleting the `lint-compat` job.
+* [ ] Add a one-shot verifier in CI that prints versions for traceability. Smallest change: in the `lint` job, after setup and install, run `go version && $(GOBIN)/golangci-lint version` and emit them as step outputs (name the step “toolchain-versions”); no functional changes. DoD: CI logs show versions on all OSes; no gate regressions; peer review completed.
+  - [ ] [S18:verify-docs-ci-0006] Run `go test ./...` and `make lint`; verify ADR‑0006 links render and resolve via GitHub preview; DoD: tests/lint green and all links verified.
+* [ ] Add Windows ripgrep install snippet to README Developer prerequisites (Chocolatey: choco install ripgrep -y) because README currently lists apt/brew only and local Windows developers hit ensure-rg unsupported OS; smallest change is adding a Windows bullet; scope documentation only; low risk and independent; DoD includes README rendering with the new Windows example, local Windows setup succeeds (rg --version prints) and make lint/test remain green with no coverage regression, all quality gates green; verify by following the snippet on a Windows dev machine; rollback by reverting the README edit.
+* [ ] Deduplicate the duplicated CodeQL checklist entry in FUTURE_CHECKLIST.md by deleting one of the two identical lines so only a single CodeQL task remains; smallest change is to remove one duplicate line; scope documentation hygiene limited to FUTURE_CHECKLIST.md; low risk and independent; DoD includes the file showing only one unchecked CodeQL task, tests and lint unchanged and green with no coverage regression, all quality gates green, verification by searching the file and observing exactly one CodeQL line, rollback by reverting the edit
+* [ ] Add a Windows make installation snippet to the README developer prerequisites using Chocolatey choco install make -y because README uses make commands but Windows setup omits this requirement; smallest change is adding one Windows bullet near the ripgrep and golangci-lint instructions; scope documentation only; low risk; DoD includes README rendering correctly and a successful make --version and make lint test build on a Windows environment with all quality gates green and no coverage regression, verification by running make --version and executing a sample make target on windows-latest, rollback by reverting the README edit
+* [ ] Ignore generated image outputs by adding assets/ to .gitignore so img_create examples do not accidentally commit large binaries; smallest change is appending a single ignore entry under a clear comment; scope .gitignore only; low risk and independent; DoD includes saving an output under assets then seeing git status clean, tests and lint unchanged and green with no coverage regression, all quality gates green, verification by creating an image under assets and observing no tracked changes, rollback by removing the ignore entry
+* [ ] Remove the tracked stray root-level rss_fetch binary and prevent reintroduction by adding rss_fetch and rss_fetch.exe to .gitignore; evidence: git ls-files --error-unmatch rss_fetch succeeds and file rss_fetch identifies an ELF built outside tools/bin; scope .gitignore and repository hygiene only; low risk and independent; DoD: git rm rss_fetch so the path is untracked, append two ignore lines, make build-tools produces only tools/bin/rss_fetch, git status stays clean after builds across OSes, tests and lint unchanged and green with no coverage regression, all quality gates green; verification: rerun git ls-files --error-unmatch rss_fetch (should fail) and confirm tools/bin/rss_fetch exists while the root-level binary is ignored; rollback: revert the .gitignore edit and restore the file if needed
+* [ ] Upload coverage profile and summary in CI by attaching bin/coverage.out as an artifact and printing total coverage using go tool cover func; smallest change is adding two steps in .github/workflows/ci.yml after the test step; scope CI; low risk; depends on the Makefile test target producing a coverage profile as already planned; DoD includes CI artifacts containing coverage.out and logs showing total coverage percentage, tests otherwise unchanged and green with no coverage regression, all quality gates green, peer review completed; verify by inspecting CI artifacts and logs, rollback by removing the new steps.
+
+# Starlark tool — `code.sandbox.starlark.run`  (pure Go, deterministic, hermetic)
+
+(Starlark is designed for hermetic, deterministic execution; `go.starlark.net/starlark` executes from a string with no FS/net/clock unless you expose it.
+
+* [ ] Implement tool contract and handler for `code.sandbox.starlark.run` that executes source from memory: create `internal/tools/starlarkrun/handler.go` exposing `Name() string { return "code.sandbox.starlark.run" }` and `Call(ctx, json.RawMessage) (ToolResult, error)` which accepts `{source:string,input:string,limits:{wall_ms:int,output_kb:int},caps:{}}`; parse JSON, run Starlark with `ExecFile` using a `Thread` and `predeclared` functions `read_input()->str` and `emit(str)`; enforce wall-time via `context.WithTimeout` and cap emitted bytes with a bounded buffer; return `{stdout:string}`; DoD: unit test passes showing a script `emit(read_input())` returns input, times out on `while True: pass`, and output > limit is truncated with a clear error.
+* [ ] Add dependency and wiring: append `require go.starlark.net vX` to `go.mod`, create `internal/tools/starlarkrun/module.go` registering the tool into the tool registry (constructor + dependency-free init), and update `README.md` “Tools” table with usage and example input/output; DoD: `go build ./...` succeeds locally, registry lists the tool, README shows a runnable curl example.
+* [ ] Harden capabilities (deny-by-default): ensure only `emit` and `read_input` are available (no FS, net, clock); do not bind any `os`, `time`, or custom builtins; add negative tests that attempt to import or access such capabilities and expect failure; DoD: tests demonstrate no ambient side effects are reachable and only declared builtins exist.
+* [ ] Determinism test: add a table test running the same `source+input` 100× and asserting identical output and errors; DoD: flaky rate 0/100 locally; test name includes “deterministic”; comment cites Starlark determinism; tests green locally; DoD documented in test.
+* [ ] Structured errors & shared schema: return standardized errors `{code:string,message:string,details?:object}` for timeouts (`TIMEOUT`), output limit exceeded (`OUTPUT_LIMIT`), and evaluation failures (`EVAL_ERROR`); update shared error schema doc and example in README; DoD: unit tests assert JSON shape for each failure mode and README section is present.
+* [ ] Observability: add structured logs (trace id, tool name, wall\_ms, bytes\_out) and emit OpenTelemetry span attributes; DoD: local run shows JSON logs with those fields and a span named `tools.starlark.run`.
+* [ ] Contract examples: add `docs/interfaces/code.sandbox.starlark.run.md` with request/response examples (valid, timeout, error), security notes, and performance caveats; DoD: doc renders and is linked from main docs.
+
+## Review 2025-08-19
+* [ ] Align img_create manifest schema with implementation and docs by adding an extras object parameter to tools.json under the img_create tool (matches the Extras field in tools/cmd/img_create/img_create.go and the docs reference) so models can declare and use it; smallest change is to add properties.extras with type object under the existing schema without touching other fields; scope tools.json only; low risk and independent; Definition of Done: jq shows extras present and type object for img_create, make verify-manifest-paths OK, go build and make lint test green with no coverage regression, all quality gates green with no new findings, peer review completed; verification by running a local agent call that passes extras and succeeds; rollback by reverting the tools.json edit.
+* [ ] Harmonize image HTTP timeout environment variables for the img_create tool by supporting OAI_IMAGE_HTTP_TIMEOUT (duration) and HTTP_TIMEOUT_MS (milliseconds) in addition to the current OAI_HTTP_TIMEOUT with precedence OAI_IMAGE_HTTP_TIMEOUT over OAI_HTTP_TIMEOUT over HTTP_TIMEOUT_MS; smallest change is updating the httpTimeout function in tools/cmd/img_create/img_create.go to read the new envs, adding both variables to tools.json envPassthrough for img_create, and updating docs/reference/img_create.md to describe the precedence; scope tool code, tools.json, and docs; low risk and independent; Definition of Done: table tests cover precedence and parsing, go build and make lint test green with no coverage regression, all quality gates green with no new findings, peer review completed; verification by setting each env in isolation and observing effective timeout in a short mock call; rollback by reverting code, manifest, and docs edits.
+* [ ] Pass through SEARXNG_ALLOW_LOCAL for searxng_search so local-only fixtures are allowed during development without relaxing the default SSRF guard; smallest change is to add SEARXNG_ALLOW_LOCAL to envPassthrough for searxng_search in tools.json and add a short note to docs/reference/searxng_search.md describing the opt-in; scope tools.json and docs; low risk and independent; Definition of Done includes verify-manifest-paths OK, go build and make lint test green with no coverage regression, all quality gates green with no new findings, peer review completed; verification by running the tool once with and once without SEARXNG_ALLOW_LOCAL=1 against 127.0.0.1 and observing allowed versus blocked; rollback by reverting the tools.json and docs edits.
+* [ ] Pass through HTTP_FETCH_ALLOW_LOCAL for http_fetch to enable hermetic local fixtures when explicitly opted in; smallest change is to add HTTP_FETCH_ALLOW_LOCAL to envPassthrough in tools.json for http_fetch and update the manifest snippet in docs/reference/http_fetch.md to include it; scope tools.json and docs; low risk and independent; Definition of Done includes verify-manifest-paths OK, go build and make lint test green with no coverage regression, all quality gates green with no new findings, peer review completed; verification by calling http_fetch to 127.0.0.1 with and without HTTP_FETCH_ALLOW_LOCAL=1 and observing default block versus allowed; rollback by reverting the tools.json and docs edits.
+* [ ] Standardize fs_move stderr to structured JSON errors only by replacing non-JSON close src and close dst prints in defers with JSON-formatted errors or omitting them when non-fatal to match other tools’ behavior; smallest change is editing tools/cmd/fs_move/fs_move.go only and updating tests if they assert on those lines; scope single tool; low risk and independent; Definition of Done includes go test for the tool and the repository passing, make lint green with no new findings, all quality gates green with no new findings, peer review completed; verification by provoking a close error and observing a single-line JSON error on stderr; rollback by reverting the edit.
+
+## Security sandbox feature
+
+* [ ] Slice PR #08 — assistant tool-call loop and validator wiring: `git switch -c pr/08-tool-loop main && git restore --source develop --staged --worktree cmd/agentcli/tool_loop.go internal/oai/validator.go internal/oai/validator_test.go cmd/agentcli/tool_loop_test.go`; run tests; commit/push/PR; Definition of Done: message-sequence validator blocks stray tool messages, loop test green, only loop/validator files affected.
+* [ ] Preflight: implement a kernel and environment probe that confirms Linux ≥ 5.4, unprivileged user namespaces enabled, seccomp available, and whether CLONE\_NEWNET is permitted for unprivileged users; if any prerequisite is missing, print a precise remediation hint and refuse to run unless `--sandbox=permissive` is set, and record findings in an audit-ready struct; DoD: running `agentcli --sandbox-check` prints a single-line status with kernel version, userns=yes/no, seccomp=yes/no, newnet=yes/no, and exits nonzero only when mandatory features are absent without permissive override.
+* [ ] Policy schema: define a deny-by-default sandbox policy (JSON/YAML + env/flag overrides) with fields `filesystem.bundle.binaries[]` (absolute host paths), `filesystem.inputs[]` (host→guest RO), `filesystem.outputs[]` (guest paths under /out), `env.allow[]` (names or key=value), `resources` (timeout, max\_output\_bytes, rlimits for NOFILE/NPROC/AS), and `network.mode` with values `off`, `allow_all`, or `proxy_allowlist` plus `network.allow[]` hostnames/IP\:ports; include `audit.redact[]` for secrets; DoD: policy loader merges defaults, file, and flags deterministically and `--sandbox-dry-run --explain` prints the effective policy in one block.
+* [ ] Bundle assembly directory: create a secure per-run temp dir `<runtime>/agentcli-bundles/<run-id>` with `0700`, subdirs `bin`, `etc`, `in`, `out`, `tmp`, and copy exactly the allowlisted Go executables plus a copy of the current `agentcli` into `bin`; reject non-regular files and symlinks, verify ELF arch matches the running OS, and cap total bundle size; DoD: after assembly, `bin` contains only declared executables (including `agentcli`), `in/out/tmp/etc` exist, bundle size ≤ cap, and a manifest file lists each file with size and SHA-256.
+* [ ] Dynamic dependency handling guard: detect dynamically linked binaries (cgo) by inspecting ELF headers and `INTERP`; if any are found and `policy.filesystem.allow_dynamic=false`, fail with a message suggesting a static build or enabling dynamic deps; if allowed, copy the interpreter and needed `.so` files into `etc/lib` and rewrite `ld.so.conf` minimally inside the bundle; DoD: dynamic binaries either run with their copied deps under the bundle or the run fails early with a clear diagnostic.
+* [ ] Bundle manifest and cache: compute a content-addressed manifest hash over the policy hash and bundle file hashes, store the bundle under an LRU cache keyed by this hash, and when identical inputs are requested, materialize the bundle via hardlinks or reflinks if available; DoD: repeated runs with the same policy+binaries reuse the cached bundle and log a cache hit with the manifest hash.
+* [ ] Sandbox process creation: spawn a child with new USER, MOUNT, PID, UTS, and IPC namespaces (and NEWNET only when `network.mode=off`), map uid/gid 0 inside to caller uid/gid outside, set `GidMappingsEnableSetgroups=false`, set `no_new_privs=1`, and ensure the child becomes PID 1 in its PID namespace; DoD: child sees itself as uid 0/PID 1, parent remains unaffected, and a test program confirms user mapping without requiring root.
+* [ ] Mount plan (Linux 5.4-compatible): set mount propagation to private, bind-mount the bundle root read-only at `/bundle` with `nosuid,nodev,noatime`, bind-mount a minimal read-only `/dev` tree (`null`, `zero`, `urandom`), mount `proc` at `/proc`, create tmpfs at `/work` and `/tmp` with size caps, bind-mount declared inputs read-only to their guest paths, create `/out` as a directory on `/work` or bind a host output dir if requested, and mark all mounts `noexec` except `/bundle/bin`; DoD: inside the sandbox `mountinfo` shows the expected tree with `/bundle` ro and only `/bundle/bin` executable, `/proc` present, `/dev` minimal, and host paths outside declared inputs not visible.
+* [ ] Root transition: chroot to `/` using the assembled mount tree with `/bundle` available at `/bundle`, set working dir to `/work`, and drop any residual mount permissions by remounting sensitive paths ro; DoD: `getcwd` returns `/work`, path traversal cannot escape the chroot, and attempts to write to `/bundle` fail with EROFS.
+* [ ] Environment initialization: start from an empty environment, set `PATH=/bundle/bin` and `HOME=/work`, inject only allowlisted variables (support `NAME` passthrough and explicit `NAME=value` literals), and redact any keys listed under `audit.redact` when logging; DoD: a probe inside prints only the allowed keys, and execution of undeclared system binaries fails due to PATH isolation.
+* [ ] Seccomp hardening: install a strict seccomp-bpf profile after mounts and before exec that allows basic file I/O, signals, time, and standard syscalls, denies `ptrace`, `keyctl`, `bpf`, module/sysctl, `reboot`, raw sockets, and new mounts; when `network.mode=off`, also deny all socket syscalls; when `network.mode=allow_all` or `proxy_allowlist`, allow stream sockets but continue to deny raw and packet sockets; DoD: adversarial test binaries attempting blocked syscalls receive EPERM and are logged in the audit trail.
+* [ ] Rlimits baseline (no cgroups required): set RLIMIT\_NOFILE, RLIMIT\_NPROC, RLIMIT\_CORE=0, and an address-space or RSS cap to simulate memory limits; enforce a wall-clock timeout in the supervisor; DoD: fork-bomb test hits RLIMIT\_NPROC and is terminated, huge allocation triggers failure, and runs exceeding the timeout are cleanly killed with a clear timeout status.
+* [ ] Network mode “off”: if selected, create a new net namespace, leave loopback down, and do not attach any interfaces so all outbound connects fail immediately; DoD: inside sandbox, `connect` to external addresses fails, while the tool continues to run without network access.
+* [ ] Network mode “allow\_all”: if selected, do not create a new net namespace (or create one only if it would still have host connectivity, which is generally not possible rootless, so default to host netns), and keep seccomp blocking raw sockets; DoD: the tool can reach the network as it normally would, but attempts to create raw/packet sockets fail.
+* [ ] Network mode “proxy\_allowlist” (best-effort user-space proxy): implement an in-process HTTP/HTTPS CONNECT and SOCKS5 proxy that listens on `127.0.0.1:<random>` and enforces an allowlist of hostnames/IP\:ports, start it only for this run, set `HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY` inside the sandbox to point at it, and log and deny connections to non-allowlisted destinations; document that programs ignoring proxy env vars will bypass this and recommend “off” mode for strict isolation; DoD: tools honoring proxy variables can only reach allowlisted destinations and denied attempts are logged with destination details; tools ignoring the proxy still run but are not counted as compliant in tests, and the mode is labeled best-effort.
+* [ ] PID1 supervisor and signal/exit handling: run a tiny supervisor as PID 1 that execs the tool, forwards SIGTERM/SIGINT, reaps zombies, enforces the timeout, and on exit collects status and reasons (normal, timeout, rlimit), then performs teardown; DoD: killing the parent or sending SIGINT cleanly terminates the tool and all children, leaving no stray processes.
+* [ ] Output collection and size enforcement: require all produced artifacts to be written under `/out`, enforce per-file and total size caps, compute SHA-256 for each file, and copy artifacts to a designated host directory atomically; DoD: runs producing files under `/out` are collected with hashes recorded, attempts to write outside `/out` are ignored and reported, and oversize outputs cause a clear failure with partial files discarded.
+* [ ] Audit record: emit a single JSON line per run containing tool name, policy revision and hash, bundle manifest hash, kernel features detected, namespace flags, seccomp profile id, rlimits, start/stop timestamps, exit code, timeout/OOM flags, network mode and any contacted destinations (from the proxy when used), output file list with sizes and hashes, and redacted env keys as configured; DoD: audit records are produced for success and failure paths and pass JSON schema validation.
+* [ ] CLI and DX: add `--sandbox` master switch (on/off/permissive), `--sandbox-policy=<file-or-json>`, `--sandbox-network=<off|allow_all|proxy_allowlist>`, `--sandbox-dry-run --explain`, and `--sandbox-check`; ensure helpful error messages and a deterministic “plan preview” that prints mounts, env, PATH contents, and network mode without executing the tool; DoD: flags are documented, parsed in any order, and `--sandbox-dry-run --explain` outputs a human-readable plan matching the effective policy.
+* [ ] Teardown and hygiene: on normal exit or timeout, unmount in reverse order, close all inherited file descriptors except stdio, remove the per-run temp directory, and, unless `--sandbox-keep-bundle` is set, keep only cached bundles referenced by the LRU; DoD: after runs, no mounts or temp dirs remain, `ls -l /proc/<ppid>/fd` shows no leaked FDs, and cache size stays under the configured ceiling.
+* [ ] Security regression tests: create adversarial fixtures that try file escape via symlinks, writing to `/bundle`, executing binaries from `/work`, opening raw sockets, using `ptrace`, and forking excessively; run them in each network mode and assert denials and logs match expectations; DoD: CI-like local test suite passes on Linux 5.4+ with userns enabled and no root privileges.
+* [ ] Determinism and caching tests: verify that identical inputs produce the same manifest hash, the same audit fields except timestamps, and a cache hit on the second run; DoD: two consecutive runs log identical manifest hashes and report “cache: hit” on the second.
+
+
+
+
+* [ ] Add image model flag: implement `-image-model string` (env `OAI_IMAGE_MODEL`, default `"gpt-image-1"`) in `cmd/agentcli/flags.go`; plumb to `internal/tools/image/driver_openai.go` (or analogous file invoking the Images API) so requests use this model; DoD: `--help` shows flag, unit test stubs the image driver and asserts received model equals CLI value and equals default when unset. 
+  - [ ] [S90:cli-skeleton-init] Create minimal CLI skeleton under `cmd/agentcli/main.go` with `type cliConfig struct{ imageModel string }`, a `parseFlags()` registering `-image-model` and `--help`, and a `printResolvedConfig()` that prints JSON with an `image` section; DoD: `make build` produces `bin/agentcli`, `./bin/agentcli --help` prints usage, and `./bin/agentcli -print-config | jq -e '.image|type=="object"'` passes.
+  - [ ] [S91:image-model-flag-parse] Implement `-image-model` with precedence `flag > env(OAI_IMAGE_MODEL) > default("gpt-image-1")` inside `cmd/agentcli/main.go::{parseFlags}` storing into `cliConfig.imageModel`; add `cmd/agentcli/flags_test.go::TestImageModelFlagPrecedence` covering (a) default, (b) env only, (c) flag overrides env; DoD: `go test ./cmd/agentcli -run TestImageModelFlagPrecedence` green and `./bin/agentcli --help` shows the flag and env in description.
+  - [ ] [S92:image-options-helper] Add `func imageOptionsFromConfig(cfg cliConfig) image.Options` in `cmd/agentcli/main.go` returning `image.NewOptions(cfg.imageModel)`; add `cmd/agentcli/main_test.go::TestImageOptionsFromConfig_Model` asserting pass-through of the model string; DoD: `go test ./cmd/agentcli -run TestImageOptionsFromConfig_Model` green.
+  - [ ] [S93:print-config-image-model] Extend `printResolvedConfig()` in `cmd/agentcli/main.go` to include `"model": <cliConfig.imageModel>` under `.image` without redaction; add `cmd/agentcli/flags_test.go::TestPrintConfig_IncludesImageModel` asserting jq `.image.model=="gpt-image-1"` by default and equals override with env/flag; DoD: tests green and `./bin/agentcli -print-config | jq -e '.image.model|type=="string"'` passes.
+  - [ ] [S94:driver-openai-request-body] Introduce `internal/tools/image/driver_openai.go` with `package image` and `func BuildImagesRequestBody(prompt string, n int, size string, opts Options, extras map[string]any) (map[string]any, error)` that sets `model: opts.Model` and preserves existing keys (`prompt`,`n`,`size`,`response_format:"b64_json"`), shallow‑merging sanitized `extras` like `tools/cmd/img_create/sanitizeExtras`; add `internal/tools/image/driver_openai_test.go` golden asserting `model` equals the provided `Options.Model` and extras do not override core keys; DoD: `go test ./internal/tools/image -run BuildImagesRequestBody` green.
+  - [ ] [S95:cli-reference-doc] Update `docs/reference/cli-reference.md` to document `-image-model` (default, env, precedence) and mention it is plumbed via `internal/tools/image.Options.Model`; include a copy‑paste example showing `-print-config` with `.image.model`; DoD: markdown renders on GitHub and `rg -n "image-model" docs/reference/cli-reference.md` finds the section.
+* [ ] [S02:print-config-image-model] Extend `printResolvedConfig` in `cmd/agentcli/main.go` to include `"model"` under the existing `image` section (value from `cliConfig.imageModel`); DoD: `./bin/agentcli -print-config | jq -e '.image.model=="gpt-image-1"'` passes by default and shows the overridden value when `-image-model` or `OAI_IMAGE_MODEL` is set; tests and lint green.
+* [ ] [S04:plumb-model-to-options] Add helper `imageOptionsFromConfig(cfg cliConfig) image.Options` in `cmd/agentcli/main.go` returning `image.NewOptions(cfg.imageModel)`; add `cmd/agentcli/main_test.go::TestImageOptionsFromConfig_Model` asserting pass-through; DoD: build/test/lint green; no behavior change beyond availability of options.
+* [ ] Add image prompt file support (repeatable): implement `-image-prompt string` and `-image-prompt-file string` (repeatable; `'-'` for STDIN) in `cmd/agentcli/flags.go` with mutual exclusivity per source (a single invocation can combine multiple `-image-prompt-file` values but not mix with `-image-prompt`); concatenate in order with `\n\n` to form the final prompt stored in `ImageConfig.Prompt`; DoD: `--help` documents repeatability and `'-'`, unit tests cover (a) multi file order preservation, (b) stdin read, (c) exclusivity error.
+* [ ] Add per-phase tools configuration for image flows: implement `-image-tools string` (path to `tools.json`, optional) and `-image-tools-allow-external` (default `false`) in `cmd/agentcli/flags.go`; when executing the “image” tool call, if `-image-tools-allow-external` and `-image-tools` set, load those tool specs instead of `-tools`; otherwise fall back to `-tools`; DoD: `--help` documents behavior, integration test uses a tiny image tool registry file to prove override occurs only for image calls. 
+* [ ] Ensure `-print-config` includes resolved image and prep sections: extend config printer `cmd/agentcli/print_config.go` to show `chat`, `prep`, and `image` subsections with all resolved knobs (timeouts, retries, backoff, base_url, model, and parameter pass-throughs), redacting any `*_api_key`; DoD: running `-print-config` prints three sections with redactions and inherited values shown concretely.
+* [ ] Redact API keys consistently in logs and debug dumps: update `-debug` JSON dump path so any `api_key` under chat/prep/image is masked except last 4 characters; include unit tests on the pretty-printed JSON; DoD: enabling `-debug` shows masked keys in stderr and tests assert masking.
+* [ ] Extend `--capabilities` output to show phase-specific backends: print whether chat/prep/image are targeting the same or different `base_url`/`model` pairs; DoD: running `-capabilities` with divergent `-image-base-url` or `-image-model` shows a distinct “image backend” line.
+* [ ] Add flags for pre-stage prompt override: implement `-prep-prompt string` (repeatable) and `-prep-file string` (repeatable; `'-'` for STDIN) in `cmd/agentcli/flags.go`; enforce mutual exclusivity between `-prep-prompt` and `-prep-file`; add env vars `OAI_PREP_PROMPT` and `OAI_PREP_PROMPT_FILE` (comma-separated file list, `-` allowed); precedence is `flag > env > default`; DoD: `agentcli --help` shows both flags and envs incl. exclusivity and precedence, table tests in `cmd/agentcli/flags_test.go` cover (a) multi-file ordering, (b) STDIN read, (c) exclusivity error text, (d) precedence.
+* [ ] Error handling and user feedback: on invalid pre-stage output (malformed JSON, wrong roles), print a concise error to stderr with a one-line remediation hint and exit code 2 (unless `-prep-enabled=false`); DoD: tests feed bad payloads and assert error text and exit code behavior.
+* [ ] Config printer coverage: update `-print-config` to show a `prep` section including `model`, `base_url`, `timeouts/retries/backoff`, `profile`, `source: override|default`, and booleans for `tools_allow_external`; redact any API keys; DoD: manual run shows fields, unit test checks redaction and source flag.
+* [ ] README updates and examples: add a “Pre-stage prompt overrides” section with examples: (1) use embedded default only, (2) override via `-prep-file` (multiple files), (3) override via `-prep-prompt`, (4) combine with `-prep-dry-run` for inspection; include an end-to-end example that generates image instructions which the main call then uses; DoD: README builds cleanly and examples copy/paste work with a stub server.
+* [ ] Backward compatibility guard: add a regression test suite `cmd/agentcli/compat_prestage_test.go` ensuring that when no new flags are passed, behavior matches pre-change baselines (same messages to main call given the same pre-stage answers); DoD: tests green and snapshot differences limited to new metadata fields only.
+* [ ] Telemetry/verbosity alignment: ensure `-verbose` prints pre-stage validator warnings (if any) and `-quiet` suppresses non-final pre-stage logs; DoD: tests assert routing consistent with existing channel routing (`final→stdout`, others→stderr unless `-quiet`).
+* [ ] Implement tool and handler for `code.sandbox.wasm.run`: create `internal/tools/wasmrun/handler.go` to accept `{module_b64:string,entry?:string,input:string,limits:{wall_ms:int,mem_pages:int,output_kb:int}}`; decode base64 to bytes, create a wazero runtime, instantiate a minimal host module `env.emit(ptr:uint32,len:uint32)` that reads guest memory and appends to a bounded buffer; instantiate guest with `NewModuleConfig()` and set memory limit pages; call exported function `entry||"main"` under `context.WithTimeout`; DoD: unit test invokes a tiny embedded `.wasm` that writes “ok” via `emit` and returns success; timeout returns `TIMEOUT`; output limit enforced.
+* [ ] Memory safety & limits: configure module to a maximum of `limits.mem_pages` (e.g., 16 MiB = 256 pages) and assert growth beyond limit errors; add test that attempts `memory.grow` over cap → standardized `MEMORY_LIMIT`; DoD: test passes and limit is documented in the interface.
+* [ ] Structured errors & schema: map wazero traps (e.g., unreachable) and context cancels to `{code:"TRAP"|"TIMEOUT"|...}`; include `details.trap` when available; DoD: table tests cover trap, timeout, missing import, memory limit, OOB read.
+* [ ] Embed a known-good sample module for tests: add a tiny `.wasm` (precompiled and stored as base64 in test file) that exports `main(ptr,len)` and calls `env.emit`; document how it was produced (e.g., TinyGo/Rust, but not built at test time).
+* [ ] [S01:img-model-default-inject] Inject default model for `img_create` calls: add `enrichToolArgsForImage(name string, argsJSON string, cfg cliConfig) (string, error)` in `cmd/agentcli/main.go` and call it from `appendToolCallOutputs` before executing tools so that when `name=="img_create"` and `argsJSON` lacks `model`, it adds `{"model": cfg.imageModel}` without overriding explicit values; DoD: `make lint` green and `go test ./cmd/agentcli -run TestEnrichToolArgsForImage` passes.
+* [ ] [S02:img-model-enrich-unit] Add `cmd/agentcli/main_test.go::TestEnrichToolArgsForImage_DefaultModel` and `..._PreservesExplicitModel` that pass `{}` and `{"model":"x"}` respectively to the helper and assert decoded `.model=="gpt-image-1"` by default and unchanged when present; DoD: `go test ./cmd/agentcli -run TestEnrichToolArgsForImage` green.
+* [ ] [S03:img-model-integration] Add `cmd/agentcli/tools_integration_test.go::TestImgCreate_DefaultModelInjected` that builds a tiny `img_create` stand‑in which echoes stdin JSON, registers it in a temp `tools.json`, feeds an assistant tool_calls with `{"name":"img_create","arguments":"{}"}`, and asserts the appended tool message contains `"model":"gpt-image-1"`; DoD: `go test ./cmd/agentcli -run TestImgCreate_DefaultModelInjected` green on linux/darwin/windows.
+* [ ] [S04:print-config-image-model] Extend `cmd/agentcli/flags_test.go::TestPrintConfig_IncludesImageParams` to also assert the printed `image.model` equals the resolved `cfg.imageModel`; DoD: `go test ./cmd/agentcli -run TestPrintConfig_IncludesImageParams` green.
+* [ ] [S05:cli-reference-doc] Update `docs/reference/cli-reference.md` under `-image-model` to state the CLI injects this value when `img_create` args omit `model` (link `tools/cmd/img_create/img_create.go`); DoD: `make lint` green and doc renders on GitHub.
+* [ ] [S06:readme-note-img-model] In README “Image generation tool (img_create)”, add a one‑sentence note that `-image-model` becomes the default for `img_create` calls when unspecified, plus a one‑line example; DoD: README renders and `rg -n "image-model" README.md` finds the new note.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..66a20e8
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,22 @@
+MIT License
+
+Copyright (c) 2025 Jaakko Heusala <jheusala@iki.fi>
+Copyright (c) 2025 Heusala Group Ltd <info@hg.fi>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..bf58682
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,368 @@
+SHELL := /bin/bash
+
+GO ?= go
+CGO_ENABLED ?= 0
+GOOS ?= $(shell $(GO) env GOOS)
+GOARCH ?= $(shell $(GO) env GOARCH)
+
+# Reproducible builds: trim local paths, strip symbols, disable VCS stamping,
+# and clear build id for identical binaries across clean builds
+BUILD_FLAGS ?= -trimpath -buildvcs=false
+LD_FLAGS ?= -s -w -buildid=
+
+# Pin golangci-lint to a version compatible with current Go
+GOLANGCI_LINT_VERSION ?= v1.62.0
+
+# Deterministic local bin directory for tool installs
+GOBIN ?= $(CURDIR)/bin
+
+# Executable suffix for Windows builds
+EXE :=
+ifeq ($(GOOS),windows)
+EXE := .exe
+endif
+
+# Canonical list of tool binaries built under tools/bin in stable order
+TOOLS := \
+  get_time \
+  exec \
+  fs_read_file \
+  fs_write_file \
+  fs_append_file \
+  fs_rm \
+  fs_move \
+  fs_search \
+  fs_mkdirp \
+  fs_apply_patch \
+  fs_read_lines \
+  fs_edit_range \
+  fs_listdir \
+  fs_stat \
+  img_create \
+  http_fetch \
+  searxng_search \
+  robots_check \
+  readability_extract \
+  metadata_extract \
+  pdf_extract \
+  rss_fetch \
+  wayback_lookup \
+  wiki_query \
+  openalex_search \
+  crossref_search \
+  github_search \
+  citation_pack
+
+.PHONY: tidy build build-tools build-tool test clean clean-logs clean-all test-clean-logs lint lint-precheck fmt fmtcheck verify-manifest-paths bootstrap ensure-rg check-go-version install-golangci
+
+tidy:
+	$(GO) mod tidy
+
+build:
+	GOOS=$(GOOS) GOARCH=$(GOARCH) CGO_ENABLED=$(CGO_ENABLED) $(GO) build $(BUILD_FLAGS) -ldflags '$(LD_FLAGS)' -o bin/agentcli ./cmd/agentcli
+
+build-tools:
+	mkdir -p tools/bin
+	@set -e; \
+	for t in $(TOOLS); do \
+	  echo "Building $$t"; \
+	  GOOS=$(GOOS) GOARCH=$(GOARCH) CGO_ENABLED=$(CGO_ENABLED) $(GO) build $(BUILD_FLAGS) -ldflags '$(LD_FLAGS)' -o tools/bin/$$t$(EXE) ./tools/cmd/$$t; \
+	done
+
+# Build a single tool binary into tools/bin/$(NAME)
+# Usage: make build-tool NAME=fs_read_file
+build-tool:
+	@set -eo pipefail; \
+	if [ -z "$(NAME)" ]; then \
+	  echo "Usage: make build-tool NAME=<name>"; \
+	  echo "Available tools: $(TOOLS)"; \
+	  exit 2; \
+	fi; \
+	case " $(TOOLS) " in \
+	  *" $(NAME) "*) ;; \
+	  *) echo "Unknown tool: $(NAME). Allowed: $(TOOLS)"; exit 2;; \
+	esac; \
+	mkdir -p tools/bin; \
+	echo "Building $(NAME)"; \
+	GOOS=$(GOOS) GOARCH=$(GOARCH) CGO_ENABLED=$(CGO_ENABLED) $(GO) build $(BUILD_FLAGS) -ldflags '$(LD_FLAGS)' -o tools/bin/$(NAME)$(EXE) ./tools/cmd/$(NAME)
+
+test:
+	$(GO) test ./...
+
+# Smoke test for image flow: run CLI with a deterministic prompt and require non-empty output
+.PHONY: smoke-image
+smoke-image: build build-tools
+	@set -euo pipefail; \
+	if [ ! -x ./bin/agentcli ]; then echo "smoke-image: ./bin/agentcli missing"; exit 2; fi; \
+	if [ ! -x ./tools/bin/img_create$(EXE) ]; then echo "smoke-image: ./tools/bin/img_create$(EXE) missing (run make build-tools)"; exit 2; fi; \
+	# Require API base and key for a real run; allow user to point to a mock
+	: "$${OAI_IMAGE_BASE_URL:=${OAI_BASE_URL:-}}"; \
+	if [ -z "$$OAI_IMAGE_BASE_URL" ]; then echo "smoke-image: set OAI_IMAGE_BASE_URL or OAI_BASE_URL"; exit 2; fi; \
+	: "$${OAI_API_KEY:-}"; \
+	if [ -z "$$OAI_API_KEY" ]; then echo "smoke-image: set OAI_API_KEY"; exit 2; fi; \
+	TMP=$$(mktemp -d 2>/dev/null || mktemp -d -t simg); \
+	TOOLS_JSON="$$TMP/tools.json"; \
+	jq -n '{tools:[{name:"img_create",description:"Generate images",schema:{type:"object",additionalProperties:false,required:["prompt"],properties:{prompt:{type:"string"},n:{type:"integer",minimum:1,maximum:4,default:1},size:{type:"string",pattern:"^\\d{3,4}x\\d{3,4}$",default:"1024x1024"},model:{type:"string",default:"gpt-image-1"},return_b64:{type:"boolean",default:false},save:{type:"object",additionalProperties:false,required:["dir"],properties:{dir:{type:"string"},basename:{type:"string",default:"img"},ext:{type:"string",enum:["png"],default:"png"}}}}},command:["./tools/bin/img_create"],timeoutSec:120,envPassthrough:["OAI_API_KEY","OAI_BASE_URL","OAI_IMAGE_BASE_URL","OAI_HTTP_TIMEOUT"]}]}' > "$$TOOLS_JSON"; \
+	OUT=$$(./bin/agentcli -tools "$$TOOLS_JSON" -prompt "Use img_create to save under out/" -model gpt-5 -max-steps 3 -http-timeout 10s -tool-timeout 20s -debug 2>/dev/null || true); \
+	if [ -z "$$OUT" ]; then echo "smoke-image: empty CLI output"; exit 1; fi; \
+	echo "smoke-image: OK: $$OUT"; \
+	rm -rf "$$TMP"
+
+clean:
+	# Remove agent binary and each tool binary deterministically
+	rm -f $(addprefix tools/bin/,$(addsuffix $(EXE),$(TOOLS)))
+	rm -rf tools/bin
+	rm -rf bin
+	# Remove common test/build artifacts
+	rm -f bin/coverage.out coverage.out
+	rm -rf reports
+	# Remove local audit/log artifacts created during tests
+	rm -rf .goagent
+	# Intentionally preserve logs/ here; see clean-logs for guarded deletion
+	# rm -rf logs
+
+# Guarded logs cleanup: only delete when STATE equals DOWN
+# Usage:
+#   make clean-logs                 # operates on ./logs (default)
+#   make clean-logs LOGS_DIR=path   # operate on a specific logs dir (used by tests)
+LOGS_DIR ?= logs
+clean-logs:
+	@set -euo pipefail; \
+	DIR="$(LOGS_DIR)"; \
+	if [ ! -d "$$DIR" ]; then \
+	  echo "clean-logs: $$DIR not present; skipping"; \
+	  exit 0; \
+	fi; \
+	STATE=$$(tr -d ' \t\r\n' < "$$DIR/STATE" 2>/dev/null || true); \
+	if [ "$$STATE" = "DOWN" ]; then \
+	  rm -rf "$$DIR"; \
+	  echo "clean-logs: removed $$DIR"; \
+	else \
+	  echo "clean-logs: skipped ($$DIR/STATE='$$STATE')"; \
+	fi
+
+# Aggregate clean: normal clean then guarded logs cleanup
+clean-all:
+	@$(MAKE) clean
+	@$(MAKE) clean-logs
+
+# Fail early if the active Go toolchain (major.minor) differs from go.mod
+# Usage: make check-go-version
+check-go-version:
+	@set -euo pipefail; \
+    		# Extract version declared in go.mod and normalize to major.minor (ignore patch)
+    		# Prefer `go mod edit -json` for robust parsing across environments
+    		MOD_GO=$$(go mod edit -json | sed -nE 's/.*"Go"[[:space:]]*:[[:space:]]*"([0-9]+\.[0-9]+).*/\1/p' | head -n1 | tr -d ' \t\r\n'); \
+        if [ -z "$$MOD_GO" ]; then echo "check-go-version: unable to parse major.minor from go.mod"; exit 2; fi; \
+        # Extract system Go major.minor
+		SYS_GO=$$(go env GOVERSION | sed -E 's/^go([0-9]+\.[0-9]+).*/\1/' | tr -d ' \t\r\n'); \
+        if [ -z "$$SYS_GO" ]; then echo "check-go-version: unable to parse 'go version' output"; exit 2; fi; \
+        if [ "$$SYS_GO" != "$$MOD_GO" ]; then \
+          echo "Go toolchain mismatch: system $$SYS_GO != go.mod $$MOD_GO"; \
+          echo "Hint: install Go $$MOD_GO (see https://go.dev/dl) or use a version manager, then re-run 'make check-go-version'."; \
+          exit 2; \
+        fi; \
+        echo "check-go-version: OK (system $$SYS_GO matches go.mod $$MOD_GO)"
+
+# Deterministic tests for clean-logs behavior across cases
+# - DOWN => directory removed
+# - non-DOWN => directory preserved
+# - missing STATE => directory preserved
+test-clean-logs:
+	@set -euo pipefail; \
+	TMP=$$(mktemp -d 2>/dev/null || mktemp -d -t clogs); \
+	LD="$$TMP/logs"; \
+	: # Case A: allowed removal when STATE=DOWN; \
+	mkdir -p "$$LD"; \
+	echo DOWN > "$$LD/STATE"; \
+	touch "$$LD/file"; \
+	$(MAKE) -s clean-logs LOGS_DIR="$$LD"; \
+	if [ -d "$$LD" ]; then echo "test-clean-logs: expected removal when STATE=DOWN"; rm -rf "$$TMP"; exit 1; fi; \
+	: # Case B: blocked when STATE!=DOWN; \
+	mkdir -p "$$LD"; \
+	echo UP > "$$LD/STATE"; \
+	$(MAKE) -s clean-logs LOGS_DIR="$$LD"; \
+	if [ ! -d "$$LD" ]; then echo "test-clean-logs: unexpected removal when STATE!=DOWN"; rm -rf "$$TMP"; exit 1; fi; \
+	: # Case C: blocked when STATE missing; \
+	rm -rf "$$LD"; \
+	mkdir -p "$$LD"; \
+	rm -f "$$LD/STATE"; \
+	$(MAKE) -s clean-logs LOGS_DIR="$$LD"; \
+	if [ ! -d "$$LD" ]; then echo "test-clean-logs: unexpected removal when STATE missing"; rm -rf "$$TMP"; exit 1; fi; \
+	# Cleanup; \
+	rm -rf "$$TMP"; \
+	echo "test-clean-logs: OK"
+
+lint:
+	@$(MAKE) check-go-version
+	@set -euo pipefail; \
+		LINTBIN="$(GOBIN)/golangci-lint$(EXE)"; \
+    # Fail fast if an existing linter is too old relative to MIN \
+	$(MAKE) -s lint-precheck; \
+	NEED_INSTALL=0; \
+	if [ ! -x "$$LINTBIN" ]; then \
+	  NEED_INSTALL=1; \
+	else \
+	  CUR_VER="$$($$LINTBIN version | sed -nE 's/.*version ([v0-9\.]+).*/\1/p')"; \
+	  if [ "$$CUR_VER" != "$(GOLANGCI_LINT_VERSION)" ]; then NEED_INSTALL=1; fi; \
+	fi; \
+	if [ "$$NEED_INSTALL" = "1" ]; then \
+	  echo "Installing golangci-lint $(GOLANGCI_LINT_VERSION) into $(GOBIN)..."; \
+	  $(MAKE) install-golangci; \
+	fi; \
+	"$$LINTBIN" version; \
+	"$$LINTBIN" run --timeout=5m; \
+	$(GO) vet ./...; \
+	$(MAKE) fmtcheck; \
+	$(MAKE) ensure-rg; \
+	PATH="$(CURDIR)/bin:$$PATH" $(MAKE) check-tools-paths; \
+	PATH="$(CURDIR)/bin:$$PATH" $(MAKE) verify-manifest-paths
+
+# Verify ordering inside the lint target via make dry-run
+# Ensures the first non-commented command is the sub-make invocation of check-go-version
+# and that a golangci-lint version invocation appears later.
+.PHONY: test-lint-order
+test-lint-order:
+	@set -euo pipefail; \
+	OUT="$$(make -n lint 2>/dev/null)"; \
+	FIRST_LINE="$$(printf '%s\n' "$$OUT" | sed -n '1p')"; \
+	if [ "$$FIRST_LINE" != "make check-go-version" ]; then \
+	  printf '%s\n' "test-lint-order: expected first line 'make check-go-version' but got: $$FIRST_LINE"; \
+	  exit 1; \
+	fi; \
+	if ! printf '%s\n' "$$OUT" | awk '/golangci-lint/{found=1} END{exit found?0:1}'; then \
+	  printf '%s\n' "test-lint-order: expected to find a golangci-lint invocation in dry-run output"; \
+	  exit 1; \
+	fi; \
+	printf '%s\n' "test-lint-order: OK"
+
+# Fail fast when golangci-lint is older than the minimum supported version
+# Usage: make lint-precheck
+lint-precheck:
+	@set -euo pipefail; \
+	LINTBIN="$(GOBIN)/golangci-lint$(EXE)"; \
+	MIN="v1.60.0"; \
+	if [ ! -x "$$LINTBIN" ]; then \
+	  # Not installed yet; installation in the lint target will handle it \
+	  echo "lint-precheck: $$LINTBIN not found; will install $(GOLANGCI_LINT_VERSION)"; \
+	  exit 0; \
+	fi; \
+	GCL="$$($$LINTBIN version | sed -nE 's/.*version ([v0-9\.]+).*/\1/p')"; \
+	if [ -z "$$GCL" ]; then \
+	  echo "lint-precheck: unable to parse golangci-lint version from '$$($$LINTBIN version | head -n1)'"; \
+	  exit 2; \
+	fi; \
+	MAX_VER="$$(printf '%s\n%s\n' "$$GCL" "$$MIN" | sort -V | tail -n1)"; \
+	if [ "$$MAX_VER" != "$$GCL" ]; then \
+	  echo "golangci-lint $$GCL < $$MIN with Go $$(go version) — update GOLANGCI_LINT_VERSION"; \
+	  exit 2; \
+	fi; \
+	echo "lint-precheck: OK (golangci-lint $$GCL >= $$MIN)"
+
+## Pin ripgrep for optional local, non-root install when missing
+RG_VERSION ?= 14.1.0
+
+# Ensure ripgrep (rg) is available; if not, download a static build into ./bin/rg
+ensure-rg:
+	@set -euo pipefail; \
+	if command -v rg >/dev/null 2>&1; then \
+	  exit 0; \
+	fi; \
+	echo "ripgrep (rg) not found; installing to ./bin/rg (version $(RG_VERSION))"; \
+	mkdir -p bin; \
+	OS=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
+	ARCH=$$(uname -m); \
+	case "$$OS" in \
+	  linux) TOS=unknown-linux-musl;; \
+	  darwin) TOS=apple-darwin;; \
+	  *) echo "Unsupported OS for auto-install: $$OS"; exit 1;; \
+	esac; \
+	case "$$ARCH" in \
+	  x86_64|amd64) TARCH=x86_64;; \
+	  arm64|aarch64) TARCH=aarch64;; \
+	  *) echo "Unsupported arch for auto-install: $$ARCH"; exit 1;; \
+	esac; \
+	URL="https://github.com/BurntSushi/ripgrep/releases/download/$(RG_VERSION)/ripgrep-$(RG_VERSION)-$$TARCH-$$TOS.tar.gz"; \
+	TMP=$$(mktemp -d 2>/dev/null || mktemp -d -t rgdl); \
+	echo "Downloading $$URL"; \
+	if ! curl -fsSL "$$URL" | tar -xz -C "$$TMP"; then \
+	  echo "Failed to download/extract ripgrep archive"; rm -rf "$$TMP"; exit 1; \
+	fi; \
+	SRC=$$(find "$$TMP" -type f -name rg -perm -u+x | head -n1); \
+	if [ -z "$$SRC" ]; then \
+	  echo "rg binary not found in archive"; rm -rf "$$TMP"; exit 1; \
+	fi; \
+	mv "$$SRC" bin/rg; \
+	chmod +x bin/rg; \
+	rm -rf "$$TMP"; \
+	bin/rg --version | head -n1; \
+	echo "ripgrep installed at ./bin/rg"
+
+# Install pinned golangci-lint into ./bin using the local Go toolchain
+install-golangci:
+	@set -euo pipefail; \
+	mkdir -p "$(GOBIN)"; \
+	GOBIN="$(GOBIN)" GO111MODULE=on $(GO) install github.com/golangci/golangci-lint/cmd/golangci-lint@$(GOLANGCI_LINT_VERSION); \
+	"$(GOBIN)/golangci-lint$(EXE)" version
+
+# Auto-format Go sources in-place using gofmt -s
+fmt:
+	@gofmt -s -w .
+
+# Verify tools.json commands use canonical ./tools/bin prefix for relative paths
+# Fails if any command[0] is relative and does not start with ./tools/bin/
+# Absolute paths are allowed for test fixtures. Requires ripgrep (rg).
+verify-manifest-paths:
+	@set -euo pipefail; \
+	if ! command -v rg >/dev/null 2>&1; then \
+		echo "ripgrep (rg) is required. Please install ripgrep."; \
+		exit 1; \
+	fi; \
+	if [ ! -f tools.json ]; then \
+		echo "tools.json not found at repo root"; \
+		exit 1; \
+	fi; \
+	if rg -n -P --no-heading '"command"\s*:\s*\[\s*"(?!\./tools/bin/)(\./[^"]+)"' tools.json; then \
+		echo "Invalid relative command[0] in tools.json. Use ./tools/bin/NAME or an absolute path."; \
+		exit 1; \
+	fi; \
+	echo "verify-manifest-paths: OK"
+
+fmtcheck:
+	@echo "Checking gofmt..."; \
+	files=$$(gofmt -s -l .); \
+	if [ -n "$$files" ]; then \
+		echo "Files need gofmt -s:"; echo "$$files"; exit 1; \
+	fi
+
+# Guard against legacy tool path usage outside canonical layout
+# - Fails if any "./tools/(get_time|fs_*|exec)" invocation remains outside allowed paths
+# - Also fails on single-file references like "./tools/<name>.go".
+#   Allowed: "go build -o tools/bin/<name> ./tools/cmd/<name>". Forbidden: building directly from "./tools/<name>" outside `tools/cmd/**` and `tools/bin/**` (excluding FEATURE_CHECKLIST.md).
+# Requires ripgrep (`rg`).
+check-tools-paths:
+	@set -euo pipefail; \
+	if ! command -v rg >/dev/null 2>&1; then \
+		echo "ripgrep (rg) is required. Please install ripgrep."; \
+		exit 1; \
+	fi; \
+	# Legacy invocations of tools outside canonical layout
+	if rg -n --no-heading --hidden \
+		-g '!tools/cmd/**' -g '!tools/bin/**' -g '!FEATURE_CHECKLIST.md' -g '!.git/**' \
+		-e '\./tools/(get_time|fs_[a-z_]+|exec)\b' .; then \
+		echo "Forbidden legacy tool path references found. Use ./tools/bin/NAME or sources under tools/cmd/NAME."; \
+		exit 1; \
+	fi; \
+	# Single-file source builds or direct `go build/run` against ./tools/<name> are forbidden
+	# Use PCRE2 to exclude allowed ./tools/cmd/* and ./tools/bin/* via negative lookahead
+	if rg -n -P --no-heading --hidden \
+		-g '!tools/cmd/**' -g '!tools/bin/**' -g '!FEATURE_CHECKLIST.md' -g '!.git/**' \
+		-e '(\./tools/[a-z_]+\.go|go\s+(build|run)\s+.*\s\./tools/(?!cmd/|bin/)[a-z_]+)\b' .; then \
+		echo "Direct tool source builds or single-file references found. Build from tools/cmd/NAME -> tools/bin/NAME."; \
+		exit 1; \
+	fi; \
+	echo "check-tools-paths: OK"
+
+# Initialize and update git submodules (e.g., scripts and rules)
+bootstrap:
+	@git submodule update --init --recursive
diff --git a/README.md b/README.md
index ea344ee..758b77a 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,702 @@
-# goagent
+# goagent — Minimal, safe, non‑interactive agent CLI
+
+[![CI (lint+test+build)](https://github.com/hyperifyio/goagent/actions/workflows/ci.yml/badge.svg)](https://github.com/hyperifyio/goagent/actions/workflows/ci.yml)
+[![Go Version](https://img.shields.io/github/go-mod/go-version/hyperifyio/goagent)](https://github.com/hyperifyio/goagent/blob/main/go.mod)
+[![Go Reference](https://pkg.go.dev/badge/github.com/hyperifyio/goagent.svg)](https://pkg.go.dev/github.com/hyperifyio/goagent)
+[![Release](https://img.shields.io/github/v/release/hyperifyio/goagent?sort=semver)](https://github.com/hyperifyio/goagent/releases)
+[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
+
+goagent is a compact, vendor‑agnostic command‑line tool for running non‑interactive, tool‑using agents against any OpenAI‑compatible Chat Completions API. It executes a small, auditable allowlist of local tools (argv only; no shell), streams JSON in/out, and prints a concise final answer.
+
+- Why use it: deterministic, portable, and safe by default. Works with hosted providers and with local endpoints like `http://localhost:1234/v1`.
+- Who it’s for: engineers who want a minimal agent runner with clear guarantees and zero vendor lock‑in.
+- What makes it different: strict "argv‑only" tool execution, explicit allowlists, and a pragmatic default LLM policy for predictable behavior across providers.
+
+## Table of contents
+- [At a glance](#at-a-glance)
+- [Features](#features)
+- [Installation](#installation)
+- [Quick start](#quick-start)
+- [Usage](#usage)
+  - [Common flags](#common-flags)
+  - [Image generation flags](#image-generation-flags)
+  - [Why you usually don’t need to change knobs](#why-you-usually-dont-need-to-change-knobs)
+  - [Capabilities](#capabilities)
+- [Configuration](#configuration)
+  - [Inheritance and precedence](#inheritance-and-precedence)
+- [Examples](#examples)
+  - [Zero-config with GPT-5](#zero-config-with-gpt-5)
+  - [Tool calls transcript](#tool-calls-transcript)
+  - [Worked example: tool calls and transcript](#worked-example-tool-calls-and-transcript)
+  - [View refined messages (pre-stage and final)](#view-refined-messages-pre-stage-and-final)
+  - [Exec tool](#exec-tool)
+  - [Filesystem tools](#filesystem-tools)
+  - [Image generation tool (img_create)](#image-generation-tool-img_create)
+  - [fs_search](#fs_search)
+- [Security](#security)
+- [Troubleshooting](#troubleshooting)
+- [Tests](#tests)
+ - [Documentation](#documentation)
+- [Diagrams](#diagrams)
+- [Contributing](#contributing)
+- [Development tooling](#tooling)
+- [Support](#support)
+- [Roadmap](#roadmap)
+- [Project status](#project-status)
+- [License and credits](#license-and-credits)
+- [Changelog](#changelog)
+- [More examples](#more-examples)
+- [CI quality gates](docs/operations/ci-quality-gates.md)
+ - [State persistence (-state-dir)](#state-persistence--state-dir)
+
+
+## At a glance
+- Minimal, portable, vendor‑agnostic: works with any OpenAI‑compatible endpoint
+- Deterministic and auditable: argv‑only tool execution, JSON stdin/stdout, strict timeouts
+- Safe by default: explicit allowlist of tools; no shell evaluation
+- Batteries included: a small toolbelt for filesystem, process, network, and image tasks
+
+## Features
+- OpenAI‑compatible `POST /v1/chat/completions` via `net/http` (no SDK)
+- **Explicit tools allowlist**: `tools.json` with JSON Schema parameters (see [Tools manifest reference](docs/reference/tools-manifest.md))
+- **Deterministic execution**: argv‑only tools, JSON stdin/stdout, per‑call timeouts
+- **Predictable error surface**: tool errors mapped as structured JSON
+- **Observability & hygiene**: audit logging with redaction; transcript size controls
+
+## Installation
+
+### Requirements
+- Go 1.24+ on Linux, macOS, or Windows
+- Network access to an OpenAI‑compatible API
+- For development and examples: `ripgrep` (rg), `jq`, and `golangci-lint`
+
+### Install options
+1) Download a binary: see [Releases](https://github.com/hyperifyio/goagent/releases)
+
+2) With Go (adds `agentcli` to your `GOBIN`):
+```bash
+go install github.com/hyperifyio/goagent/cmd/agentcli@latest
+```
+
+3) Build from source:
+```bash
+git clone https://github.com/hyperifyio/goagent
+cd goagent
+make bootstrap tidy build build-tools
+```
+
+Verify installation:
+```bash
+./bin/agentcli --version
+```
+
+Developer prerequisites (examples):
+```bash
+# ripgrep
+# - Ubuntu/Debian
+sudo apt-get update && sudo apt-get install -y ripgrep
+# - macOS (Homebrew)
+brew install ripgrep
+# - Windows (Chocolatey)
+choco install ripgrep -y
+
+# golangci-lint (pinned; installs into ./bin via Makefile)
+make install-golangci
+./bin/golangci-lint version
+
+# jq (used by examples and runbooks)
+# - Ubuntu/Debian
+sudo apt-get install -y jq
+# - macOS (Homebrew)
+brew install jq
+# - Windows (Chocolatey)
+choco install jq -y
+
+# make (Windows, for running Makefile targets used in docs)
+# - Windows (Chocolatey)
+choco install make -y
+```
+
+## Configuration
+Configuration precedence is: **flags > environment > built‑in defaults**.
+
+Environment variables:
+- `OAI_BASE_URL` — API base (default `https://api.openai.com/v1`). Helper scripts will also read `LLM_BASE_URL` if present.
+- `OAI_MODEL` — model ID (default `oss-gpt-20b`). Helper scripts will also read `LLM_MODEL` if present.
+- `OAI_API_KEY` — API key when required. The CLI also accepts `OPENAI_API_KEY` for compatibility.
+- `OAI_HTTP_TIMEOUT` — HTTP timeout for chat requests (e.g., `90s`). Mirrors `-http-timeout`.
+  `OAI_PREP_HTTP_TIMEOUT` — HTTP timeout for pre-stage; overrides inheritance from `-http-timeout`.
+
+### Inheritance and precedence
+
+The CLI resolves values independently for chat (main), pre-stage, and image flows, with inheritance when explicit values are not provided.
+
+Endpoints and API keys:
+
+| Setting | Resolution order |
+|---|---|
+| Chat base URL | `-base-url` → `OAI_BASE_URL` → default `https://api.openai.com/v1` |
+| Pre-stage base URL | `-prep-base-url` → `OAI_PREP_BASE_URL` → inherit Chat base URL |
+| Image base URL | `-image-base-url` → `OAI_IMAGE_BASE_URL` → inherit Chat base URL |
+| Chat API key | `-api-key` → `OAI_API_KEY` → `OPENAI_API_KEY` |
+| Pre-stage API key | `-prep-api-key` → `OAI_PREP_API_KEY` → inherit Chat API key |
+| Image API key | `-image-api-key` → `OAI_IMAGE_API_KEY` → inherit Chat API key |
+
+Models and sampling:
+
+| Setting | Resolution order |
+|---|---|
+| Chat model | `-model` → `OAI_MODEL` → default `oss-gpt-20b` |
+| Pre-stage model | `-prep-model` → `OAI_PREP_MODEL` → inherit Chat model |
+| Image model | `-image-model` → `OAI_IMAGE_MODEL` → default `gpt-image-1` |
+| Chat temperature vs top-p | One‑knob rule: if `-top-p` is set, omit `temperature`; otherwise send `-temp` (default 1.0) when supported |
+| Pre-stage temperature vs top-p | One‑knob rule applies independently with `-prep-temp`/`-prep-top-p` |
+
+HTTP controls:
+
+| Setting | Resolution order |
+|---|---|
+| Chat HTTP timeout | `-http-timeout` → `OAI_HTTP_TIMEOUT` → fallback to `-timeout` if set |
+| Pre-stage HTTP timeout | `-prep-http-timeout` → `OAI_PREP_HTTP_TIMEOUT` → inherit Chat HTTP timeout |
+| Image HTTP timeout | `-image-http-timeout` → `OAI_IMAGE_HTTP_TIMEOUT` → inherit Chat HTTP timeout |
+| Chat HTTP retries | `-http-retries` → `OAI_HTTP_RETRIES` → default (e.g., 2) |
+| Pre-stage HTTP retries | `-prep-http-retries` → `OAI_PREP_HTTP_RETRIES` → inherit Chat HTTP retries |
+| Image HTTP retries | `-image-http-retries` → `OAI_IMAGE_HTTP_RETRIES` → inherit Chat HTTP retries |
+| Chat HTTP retry backoff | `-http-retry-backoff` → `OAI_HTTP_RETRY_BACKOFF` → default |
+| Pre-stage HTTP retry backoff | `-prep-http-retry-backoff` → `OAI_PREP_HTTP_RETRY_BACKOFF` → inherit Chat backoff |
+| Image HTTP retry backoff | `-image-http-retry-backoff` → `OAI_IMAGE_HTTP_RETRY_BACKOFF` → inherit Chat backoff |
+
+## Quick start
+Install the CLI and point it to a reachable OpenAI‑compatible API (local or hosted):
+```bash
+export OAI_BASE_URL=http://localhost:1234/v1
+export OAI_MODEL=oss-gpt-20b
+make build build-tools # skip if installed via go install / release binary
+```
+
+Create a minimal `tools.json` next to the binary (Unix/macOS):
+```json
+{
+  "tools": [
+    {
+      "name": "get_time",
+      "description": "Return current time for an IANA timezone (default UTC). Accepts 'timezone' (canonical) and alias 'tz'.",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "timezone": {"type": "string", "description": "e.g. Europe/Helsinki"},
+          "tz": {"type": "string", "description": "Alias for timezone (deprecated)"}
+        },
+        "required": ["timezone"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/get_time"],
+      "timeoutSec": 5
+    }
+  ]
+}
+```
+
+On Windows, use a `.exe` suffix for tool binaries:
+```json
+{
+  "tools": [
+    {
+      "name": "get_time",
+      "schema": {"type":"object","properties":{"timezone":{"type":"string"}},"required":["timezone"],"additionalProperties":false},
+      "command": ["./tools/bin/get_time.exe"],
+      "timeoutSec": 5
+    }
+  ]
+}
+```
+
+Run the agent:
+```bash
+./bin/agentcli \
+  -prompt "What's the local time in Helsinki? If tools are available, call get_time." \
+  -tools ./tools.json \
+  -debug
+```
+
+Expected behavior: the model may call `get_time`; the CLI executes `./tools/bin/get_time` (or `get_time.exe` on Windows) with JSON on stdin, appends the result as a `tool` message, calls the API again, then prints a concise final answer.
+
+Tip: run `./bin/agentcli -h` for the complete help output.
+
+## Usage
+Flags are order‑insensitive. You can place `-prompt` and other flags in any order; precedence remains flag > environment > default.
+### Common flags
+```text
+-prompt string         User prompt (required)
+-tools string          Path to tools.json (optional)
+-system string         System prompt (default: helpful and precise)
+-base-url string       OpenAI‑compatible base URL (env OAI_BASE_URL; scripts accept LLM_BASE_URL fallback)
+-api-key string        API key if required (env OAI_API_KEY; falls back to OPENAI_API_KEY)
+-model string          Model ID (env OAI_MODEL; scripts accept LLM_MODEL fallback)
+-max-steps int         Maximum reasoning/tool steps (default 8)
+                       A hard ceiling of 15 is enforced; exceeding the cap
+                       terminates with: "needs human review".
+-http-timeout duration HTTP timeout for chat completions (env OAI_HTTP_TIMEOUT; default falls back to -timeout)
+-prep-http-timeout duration HTTP timeout for pre-stage (env OAI_PREP_HTTP_TIMEOUT; default falls back to -http-timeout)
+-prep-model string      Pre-stage model ID (env OAI_PREP_MODEL; inherits -model if unset)
+-prep-base-url string   Pre-stage base URL (env OAI_PREP_BASE_URL; inherits -base-url if unset)
+-prep-api-key string    Pre-stage API key (env OAI_PREP_API_KEY; falls back to OAI_API_KEY/OPENAI_API_KEY; inherits -api-key if unset)
+-prep-http-retries int  Pre-stage HTTP retries (env OAI_PREP_HTTP_RETRIES; inherits -http-retries if unset)
+-prep-http-retry-backoff duration Pre-stage HTTP retry backoff (env OAI_PREP_HTTP_RETRY_BACKOFF; inherits -http-retry-backoff if unset)
+-prep-dry-run           Run pre-stage only, print refined Harmony messages to stdout, and exit 0
+-print-messages         Pretty-print the final merged message array to stderr before the main call
+-http-retries int      Number of retries for transient HTTP failures (timeouts, 429, 5xx). Uses jittered exponential backoff. (default 2)
+-http-retry-backoff duration Base backoff between HTTP retry attempts (exponential with jitter). (default 300ms)
+-tool-timeout duration Per-tool timeout (default falls back to -timeout)
+-timeout duration      [DEPRECATED] Global timeout; prefer -http-timeout and -tool-timeout
+-temp float            Sampling temperature (default 1.0)
+-top-p float           Nucleus sampling probability mass (conflicts with -temp; omits temperature when set)
+-prep-top-p float      Nucleus sampling probability mass for pre-stage (conflicts with -temp; omits temperature when set)
+-prep-profile string   Pre-stage prompt profile (deterministic|general|creative|reasoning); sets temperature when supported (conflicts with -prep-top-p)
+-prep-enabled          Enable pre-stage (default true). When false, skip pre-stage and proceed directly to main call.
+-debug                 Dump request/response JSON to stderr
+-verbose               Also print non-final assistant channels (critic/confidence) to stderr
+-channel-route name=stdout|stderr|omit
+                       Override default channel routing (final→stdout, critic/confidence→stderr); repeatable
+-quiet                 Suppress non-final output; print only final text to stdout
+-capabilities          Print enabled tools and exit
+-print-config          Print resolved config and exit
+-dry-run               Print intended state actions (restore/refine/save) and exit without writing state
+--version | -version   Print version and exit
+```
+Run `./bin/agentcli -h` to see the built‑in help.
+
+### Image generation flags
+
+The following flags control the Images API behavior used by the assistant when generating images. Precedence is always: flags > environment > inheritance > default.
+
+| Flag | Environment | Default / Inheritance | Description |
+|---|---|---|---|
+| `-image-base-url string` | `OAI_IMAGE_BASE_URL` | Inherits `-base-url` | Image API base URL |
+| `-image-api-key string` | `OAI_IMAGE_API_KEY` | Inherits `-api-key`; falls back to `OPENAI_API_KEY` | API key for Images API |
+| `-image-model string` | `OAI_IMAGE_MODEL` | `gpt-image-1` | Images model ID |
+| `-image-http-timeout duration` | `OAI_IMAGE_HTTP_TIMEOUT` | Inherits `-http-timeout` | HTTP timeout for image requests |
+| `-image-http-retries int` | `OAI_IMAGE_HTTP_RETRIES` | Inherits `-http-retries` | Retry attempts for transient image HTTP errors |
+| `-image-http-retry-backoff duration` | `OAI_IMAGE_HTTP_RETRY_BACKOFF` | Inherits `-http-retry-backoff` | Base backoff for image HTTP retries |
+| `-image-n int` | `OAI_IMAGE_N` | `1` | Number of images to generate |
+| `-image-size string` | `OAI_IMAGE_SIZE` | `1024x1024` | Size WxH |
+| `-image-quality string` | `OAI_IMAGE_QUALITY` | `standard` | `standard` or `hd` |
+| `-image-style string` | `OAI_IMAGE_STYLE` | `natural` | `natural` or `vivid` |
+| `-image-response-format string` | `OAI_IMAGE_RESPONSE_FORMAT` | `url` | `url` or `b64_json` |
+| `-image-transparent-background` | `OAI_IMAGE_TRANSPARENT_BACKGROUND` | `false` | Request transparent background when supported |
+
+### Why you usually don’t need to change knobs
+- The default `-temp 1.0` is standardized for broad provider/model parity and GPT‑5 compatibility.
+- The one‑knob rule applies: if you set `-top-p`, the agent omits `temperature`; otherwise it sends `temperature` (default 1.0) and leaves `top_p` unset.
+- The one‑knob rule applies for both stages: if you set `-top-p` (or `-prep-top-p`), the agent omits `temperature` for that stage; otherwise it sends `temperature` (default 1.0) when supported. Pre‑stage profiles are available via `-prep-profile`, e.g. `deterministic` sets temperature to 0.1 when supported.
+- See the policy for details and rationale: [ADR‑0004: Default LLM policy](docs/adr/0004-default-llm-policy.md).
+
+### Capabilities
+List enabled tools from a manifest without running the agent. The output includes a prominent header warning, and certain tools like `img_create` are annotated with an extra warning because they make outbound network calls and can save files:
+```bash
+./bin/agentcli -tools ./tools.json -capabilities
+```
+
+## Examples
+### Zero-config with GPT-5
+Run against a GPT‑5 compatible endpoint without tuning sampling knobs. The CLI sends `temperature: 1.0` by default for models that support it.
+```bash
+./bin/agentcli -prompt "Say ok" -model gpt-5 -base-url "$OAI_BASE_URL" -api-key "$OAI_API_KEY" -max-steps 1 -debug
+# stderr will include a request dump containing "\"temperature\": 1"
+```
+
+### Tool calls transcript
+Minimal JSON transcript showing correct tool‑call sequencing:
+```json
+[
+  {"role":"user","content":"What's the local time in Helsinki?"},
+  {
+    "role":"assistant",
+    "content":null,
+    "tool_calls":[
+      {
+        "id":"call_get_time_1",
+        "type":"function",
+        "function":{
+          "name":"get_time",
+          "arguments":"{\"timezone\":\"Europe/Helsinki\"}"
+        }
+      }
+    ]
+  },
+  {
+    "role":"tool",
+    "tool_call_id":"call_get_time_1",
+    "name":"get_time",
+    "content":"{\"timezone\":\"Europe/Helsinki\",\"iso\":\"2025-08-17T12:34:56Z\",\"unix\":1755424496}"
+  },
+  {"role":"assistant","content":"It's 15:34 in Helsinki."}
+]
+```
+Notes:
+- For parallel tool calls (multiple entries in `tool_calls`), append one `role:"tool"` message per `id` before calling the API again. Order of tool messages is not significant as long as each `tool_call_id` is present exactly once.
+- Transcript hygiene: when running without `-debug`, the CLI replaces any single tool message content larger than 8 KiB with `{"truncated":true,"reason":"large-tool-output"}` before sending to the API. Use `-debug` to inspect full payloads during troubleshooting.
+
+### Worked example: tool calls and transcript
+See `examples/tool_calls.md` for a self-contained, test-driven worked example that:
+- Exercises default temperature 1.0
+- Demonstrates a two-tool-call interaction with matching `tool_call_id`
+- Captures a transcript via `-debug` showing request/response JSON dumps
+
+Run the example test:
+```bash
+go test ./examples -run TestWorkedExample_ToolCalls_TemperatureOne_Sequencing -v
+```
+
+### Split backends for chat and image
+
+Use one provider for chat and another for image generation by overriding the image backend only:
+
+```bash
+export OAI_BASE_URL=https://api.example-chat.local/v1
+export OAI_API_KEY=chat-key
+
+./bin/agentcli \
+  -prompt "Create a simple logo" \
+  -tools ./tools.json \
+  -image-base-url https://api.openai.com/v1 \
+  -image-api-key "$OPENAI_API_KEY" \
+  -image-model gpt-image-1 \
+  -image-size 1024x1024
+```
+
+### View refined messages (pre-stage and final)
+See also ADR‑0005 for the pre‑stage flow and channel routing details: `docs/adr/0005-harmony-pre-processing-and-channel-aware-output.md`.
+Inspect message arrays deterministically without running the full loop:
+
+```bash
+# Pre-stage only: print refined Harmony messages and exit
+./bin/agentcli -prompt "Say ok" -prep-dry-run | jq .
+
+# Before the main call: pretty-print merged messages to stderr, then proceed
+./bin/agentcli -prompt "Say ok" -print-messages 2> >(jq .)
+```
+
+### Exec tool
+Build the exec tool and run a simple command (Unix):
+```bash
+make build-tools
+echo '{"cmd":"/bin/echo","args":["hello"]}' | ./tools/bin/exec
+# => {"exitCode":0,"stdout":"hello\n","stderr":"","durationMs":<n>}
+```
+Timeout example:
+```bash
+echo '{"cmd":"/bin/sleep","args":["2"],"timeoutSec":1}' | ./tools/bin/exec
+# => non-zero exit, stderr contains "timeout"
+```
+
+### Filesystem tools
+The following examples assume `make build-tools` has produced binaries into `tools/bin/*`.
+
+#### fs_read_file
+```bash
+make build-tools
+printf 'hello world' > tmp_readme_demo.txt
+echo '{"path":"tmp_readme_demo.txt"}' | ./tools/bin/fs_read_file | jq .
+rm -f tmp_readme_demo.txt
+```
+
+#### fs_append_file
+```bash
+make build-tools
+echo -n 'hello ' | base64 > b64a.txt
+echo -n 'world'  | base64 > b64b.txt
+echo '{"path":"tmp_append_demo.txt","contentBase64":"'"$(cat b64a.txt)"'"}' | ./tools/bin/fs_append_file | jq .
+echo '{"path":"tmp_append_demo.txt","contentBase64":"'"$(cat b64b.txt)"'"}' | ./tools/bin/fs_append_file | jq .
+cat tmp_append_demo.txt; rm -f tmp_append_demo.txt b64a.txt b64b.txt
+```
+
+#### fs_write_file
+```bash
+make build-tools
+echo -n 'hello world' | base64 > b64.txt
+echo '{"path":"tmp_write_demo.txt","contentBase64":"'"$(cat b64.txt)"'"}' | ./tools/bin/fs_write_file | jq .
+cat tmp_write_demo.txt; rm -f tmp_write_demo.txt b64.txt
+```
+
+#### fs_mkdirp
+```bash
+make build-tools
+echo '{"path":"tmp_mkdirp_demo/a/b/c","modeOctal":"0755"}' | ./tools/bin/fs_mkdirp | jq .
+ls -ld tmp_mkdirp_demo/a/b/c
+echo '{"path":"tmp_mkdirp_demo/a/b/c","modeOctal":"0755"}' | ./tools/bin/fs_mkdirp | jq .
+rm -rf tmp_mkdirp_demo
+```
+
+#### fs_rm
+```bash
+make build-tools
+printf 'temp' > tmp_rm_demo.txt
+echo '{"path":"tmp_rm_demo.txt"}' | ./tools/bin/fs_rm | jq .
+mkdir -p tmp_rm_dir/a/b && touch tmp_rm_dir/a/b/file.txt
+echo '{"path":"tmp_rm_dir","recursive":true}' | ./tools/bin/fs_rm | jq .
+rm -rf tmp_rm_dir
+```
+
+#### fs_move
+```bash
+make build-tools
+printf 'payload' > tmp_move_src.txt
+echo '{"from":"tmp_move_src.txt","to":"tmp_move_dst.txt"}' | ./tools/bin/fs_move | jq .
+printf 'old' > tmp_move_dst.txt; printf 'new' > tmp_move_src.txt
+echo '{"from":"tmp_move_src.txt","to":"tmp_move_dst.txt","overwrite":true}' | ./tools/bin/fs_move | jq .
+rm -f tmp_move_src.txt tmp_move_dst.txt
+```
+
+#### fs_listdir
+```bash
+make build-tools
+mkdir -p tmp_listdir_demo/a b && touch tmp_listdir_demo/.hidden tmp_listdir_demo/a/afile tmp_listdir_demo/bfile
+echo '{"path":"tmp_listdir_demo"}' | ./tools/bin/fs_listdir | jq '.entries | map(.path)'
+jq -n '{path:"tmp_listdir_demo",recursive:true,globs:["**/*"],includeHidden:false}' | ./tools/bin/fs_listdir | jq '.entries | map(select(.type=="file") | .path)'
+rm -rf tmp_listdir_demo
+```
+
+#### fs_apply_patch
+```bash
+make build-tools
+cat > /tmp/demo.diff <<'EOF'
+--- /dev/null
++++ b/tmp_patch_demo.txt
+@@ -0,0 +1,2 @@
++hello
++world
+EOF
+jq -n --arg d "$(cat /tmp/demo.diff)" '{unifiedDiff:$d}' | ./tools/bin/fs_apply_patch | jq .
+printf 'hello
+world
+' | diff -u - tmp_patch_demo.txt && echo OK
+```
+
+#### fs_edit_range
+```bash
+make build-tools
+printf 'abcdef' > tmp_edit_demo.txt
+echo -n 'XY' | base64 > b64.txt
+jq -n --arg b "$(cat b64.txt)" '{path:"tmp_edit_demo.txt",startByte:2,endByte:4,replacementBase64:$b}' | ./tools/bin/fs_edit_range | jq .
+cat tmp_edit_demo.txt # => abXYef
+rm -f tmp_edit_demo.txt b64.txt
+```
+
+#### fs_stat
+```bash
+make build-tools
+printf 'hello world' > tmp_stat_demo.txt
+echo '{"path":"tmp_stat_demo.txt","hash":"sha256"}' | ./tools/bin/fs_stat | jq .
+rm -f tmp_stat_demo.txt
+```
+
+### Image generation tool (img_create)
+
+Generate images via an OpenAI‑compatible Images API and save files into your repository (default) or return base64 on demand.
+
+Quickstart (Unix/macOS/Windows via `make build-tools`):
+
+```bash
+make build-tools
+```
+
+Minimal `tools.json` entry (copy/paste next to your binary):
+
+```json
+{
+  "tools": [
+    {
+      "name": "img_create",
+      "description": "Generate image(s) with OpenAI Images API and save to repo or return base64",
+      "schema": {
+        "type": "object",
+        "required": ["prompt"],
+        "properties": {
+          "prompt": {"type": "string"},
+          "n": {"type": "integer", "minimum": 1, "maximum": 4, "default": 1},
+          "size": {"type": "string", "pattern": "^\\d{3,4}x\\d{3,4}$", "default": "1024x1024"},
+          "model": {"type": "string", "default": "gpt-image-1"},
+          "return_b64": {"type": "boolean", "default": false},
+          "save": {
+            "type": "object",
+            "required": ["dir"],
+            "properties": {
+              "dir": {"type": "string"},
+              "basename": {"type": "string", "default": "img"},
+              "ext": {"type": "string", "enum": ["png"], "default": "png"}
+            },
+            "additionalProperties": false
+          }
+        },
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/img_create"],
+      "timeoutSec": 120,
+      "envPassthrough": ["OAI_API_KEY", "OAI_BASE_URL", "OAI_IMAGE_BASE_URL", "OAI_HTTP_TIMEOUT"]
+    }
+  ]
+}
+```
+
+Run the agent with a prompt that instructs the assistant to call `img_create` and save under `assets/`:
+
+```bash
+export OAI_BASE_URL=${OAI_BASE_URL:-https://api.openai.com/v1}
+export OAI_API_KEY=your-key
+
+./bin/agentcli \
+  -tools ./tools.json \
+  -prompt "Generate a tiny illustrative image using img_create and save it under assets/ with basename banner" \
+  -debug
+
+# Expect: one or more PNGs under assets/ (e.g., assets/banner_001.png) and a concise final message on stdout
+```
+
+Notes:
+- By default, the tool writes image files and does not include base64 in transcripts, avoiding large payloads.
+- To return base64 instead, pass `{ "return_b64": true }` to the tool; base64 is elided in stdout unless `IMG_CREATE_DEBUG_B64=1` or `DEBUG_B64=1` is set.
+- Windows: the built binary is `./tools/bin/img_create.exe` and `tools.json` should reference the `.exe`.
+- See Troubleshooting for network/API issues and timeouts: `docs/runbooks/troubleshooting.md`.
+
+#### fs_search
+```bash
+make build-tools
+mkdir -p tmp_search_demo && printf 'alpha\nbeta\ngamma\n' > tmp_search_demo/sample.txt
+jq -n '{query:"^ga",globs:["**/*.txt"],regex:true}' | ./tools/bin/fs_search | jq '.matches'
+rm -rf tmp_search_demo
+```
+
+## Security
+- Tools are an explicit allowlist from `tools.json`
+- No shell interpretation; commands executed via argv only
+- JSON contract on stdin/stdout; strict timeouts per call
+- Treat model output as untrusted input; never pass it to a shell
+
+See the full threat model in `docs/security/threat-model.md`.
+
+### Unrestricted tools warning
+- Enabling `exec` grants arbitrary command execution and may allow full network access. Treat this as remote code execution.
+- Run the CLI and tools in a sandboxed environment (container/jail/VM) with least privilege.
+- Keep `tools.json` minimal and audited. Do not pass secrets via tool arguments; prefer environment variables or CI secret stores.
+- Audit log redaction: set `GOAGENT_REDACT` to mask sensitive values in audit entries. `OAI_API_KEY`/`OPENAI_API_KEY` are always masked if present.
+
+## State persistence (-state-dir)
+
+Persist and restore execution state to make repeated runs deterministic and faster.
+
+- Enable by passing `-state-dir <dir>` (or `AGENTCLI_STATE_DIR`). The directory must be private (`0700`).
+- On first run, the CLI saves a snapshot `state-<RFC3339UTC>-<8charSHA>.json` and a pointer file `latest.json`.
+- On subsequent runs with the same scope, the CLI restores prompts/settings and skips pre-stage unless `-state-refine` is provided.
+- Partition contexts with `-state-scope` (or `AGENTCLI_STATE_SCOPE`); when unset, a default scope is derived from model, base URL, and toolset.
+- Inspect actions without touching disk using `-dry-run`.
+
+Examples:
+
+```bash
+# First run saves a snapshot
+./bin/agentcli -prompt "Say ok" -tools ./tools.json -state-dir "$PWD/.agent-state"
+
+# Restore and skip pre-stage
+./bin/agentcli -prompt "Say ok" -tools ./tools.json -state-dir "$PWD/.agent-state"
+
+# Refine existing state with inline text
+./bin/agentcli -prompt "Say ok" -state-dir "$PWD/.agent-state" -state-refine -state-refine-text "Tighten tone"
+
+# Use a custom scope to keep contexts separate
+./bin/agentcli -prompt "Say ok" -state-dir "$PWD/.agent-state" -state-scope docs-demo
+```
+
+See ADR‑0012 for rationale and details: `docs/adr/0012-state-dir-persistence.md`.
+
+## Troubleshooting
+Common issues and deterministic fixes are documented with copy‑paste commands in `docs/runbooks/troubleshooting.md`.
+
+## Documentation
+Start with the [Documentation index](docs/README.md) for design docs, ADRs, and references:
+- [Tools manifest reference](docs/reference/tools-manifest.md)
+- [Research tools reference](docs/reference/research-tools.md)
+- [CLI reference](docs/reference/cli-reference.md)
+- [Interface: code.sandbox.js.run](docs/interfaces/code.sandbox.js.run.md)
+- [Architecture: Module boundaries](docs/architecture/module-boundaries.md)
+- [Security: Threat model](docs/security/threat-model.md)
+- [ADR‑0005: Harmony pre‑processing and channel‑aware output](docs/adr/0005-harmony-pre-processing-and-channel-aware-output.md)
+- [Pre-stage Harmony output contract](docs/harmony-prestage.md)
+- [ADR‑0006: Image generation tool (img_create)](docs/adr/0006-image-generation-tool-img_create.md)
+- [ADR‑0010: Adopt SearXNG & network research toolbelt (CLI-only)](docs/adr/0010-research-tools-searxng.md)
+
+## Diagrams
+- Agent loop: `docs/diagrams/agentcli-seq.md`
+- Toolbelt interactions: `docs/diagrams/toolbelt-seq.md`
+- Pre‑stage flow: `docs/diagrams/harmony-prep-seq.md`
+
+## Tests
+Run the full test suite (offline):
+```bash
+go test ./...
+```
+Lint, vet, and formatting checks:
+```bash
+make lint
+make fmt   # apply gofmt -s -w to the repo
+```
+
+Guarded logs cleanup:
+```bash
+# Only removes ./logs when ./logs/STATE trimmed equals DOWN
+make clean-logs
+
+# End-to-end verification of the guard logic (creates temp dirs)
+make test-clean-logs
+```
+
+Reproducible builds: the `Makefile` uses `-trimpath` and stripped `-ldflags` with VCS stamping disabled so two clean builds produce identical binaries. Verify locally by running two consecutive `make clean build build-tools` and comparing `sha256sum` outputs.
+
+## Contributing
+Contributions are welcome! See `CONTRIBUTING.md` for workflow, coding standards, and how to run quality gates locally. Please also read `CODE_OF_CONDUCT.md`.
+
+Useful local helpers during development:
+- `make check-tools-paths` — enforce canonical `tools/cmd/NAME` sources and `tools/bin/NAME` invocations (requires `rg`)
+- `make verify-manifest-paths` — ensure relative `tools.json` commands use `./tools/bin/NAME` (absolute allowed in tests)
+- `make build-tool NAME=<name>` — build a single tool binary into `tools/bin/NAME`
+- `make check-go-version` — fail fast if your local Go major.minor differs from `go.mod`
+
+If your local toolchain does not match, you will see:
+```text
+Go toolchain mismatch: system X.Y != go.mod X.Y
+```
+Remediation: install the matching Go version shown by `go.mod` (e.g., from the official downloads) or switch via your version manager, then rerun `make check-go-version`.
+
+## Tooling
+
+This repository pins the toolchain for deterministic results:
+- CI uses the Go version declared in `go.mod` across all OS jobs.
+- Linting is performed with a pinned `golangci-lint` version managed by the `Makefile`.
+
+See ADR‑0003 for the full policy and rationale: [docs/adr/0003-toolchain-and-lint-policy.md](docs/adr/0003-toolchain-and-lint-policy.md).
+
+## Support
+- Open an issue on the tracker: [Issues](https://github.com/hyperifyio/goagent/issues)
+  - For security concerns, avoid posting secrets in logs. If a private report is needed, open an issue with minimal detail and a maintainer will reach out.
+ - Follow updates from the author on LinkedIn: [Jaakko Heusala](https://www.linkedin.com/in/jheusala/)
+
+## Roadmap
+Planned improvements and open ideas are tracked in `FUTURE_CHECKLIST.md`. Larger architectural decisions are recorded under `docs/adr/` (see ADR‑0001 and ADR‑0002). Contributions to the roadmap are welcome via issues and PRs.
+
+## Project status
+Experimental, but actively maintained. Interfaces may change before a stable 1.0.
+
+## License and credits
+MIT license. See `LICENSE`.
+
+Maintainers and authors:
+- Hyperify.io maintainers
+- Primary author: Jaakko Heusala
+
+Acknowledgements: inspired by OpenAI‑compatible agent patterns; built for portability and safety.
+
+## Changelog
+See `CHANGELOG.md` for notable changes and release notes.
+
+## More examples
+See `examples/unrestricted.md` for copy‑paste prompts demonstrating `exec` + file tools to write, build, and run code in a sandboxed environment.
diff --git a/assets/prompts/prep_default.md b/assets/prompts/prep_default.md
new file mode 100644
index 0000000..ba55343
--- /dev/null
+++ b/assets/prompts/prep_default.md
@@ -0,0 +1,68 @@
+# Smart Prep Prompt (Default)
+
+The goal of this pre-stage is to deterministically derive:
+
+- A concise but complete system prompt suitable for the main run.
+- Zero or more developer prompts to guide style and constraints.
+- Tool configuration hints, including image-generation guidance when applicable.
+- Optional image instructions for downstream image tools.
+
+Requirements:
+
+- Output MUST be Harmony messages JSON: an array of objects with optional `system`, zero-or-more `developer`, and optional `tool_config` and `image_instructions` fields.
+- Do not include `role:"tool"` entries and do not include tool calls in this stage.
+- Be explicit about safety, redaction of secrets, and source attribution.
+
+Guidelines:
+
+- Keep prompts minimal but sufficient. Avoid verbosity that wastes tokens.
+- Prefer declarative constraints to prescriptive long-form text.
+- If image generation is likely, include high-level image guidelines (style, quality, size) without locking to a provider-specific model.
+- Annotate any assumptions clearly.
+
+Steps:
+
+1. Read the user request and any provided context.
+2. Identify missing constraints and fill reasonable defaults.
+3. Propose the system prompt that sets behavior boundaries and goals.
+4. Provide optional developer prompts for formatting, tone, and structure.
+5. Provide optional `tool_config` hints describing which tools are likely useful and with which key parameters.
+6. Provide optional `image_instructions` when image generation is relevant.
+7. Return a single JSON array as the only output.
+
+Example minimal output (JSON):
+
+[
+  {
+    "system": "You are a helpful assistant. Prioritize correctness and cite sources when tools provide them."
+  },
+  {
+    "developer": "Return concise answers; use bullet lists when appropriate."
+  },
+  {
+    "tool_config": {
+      "enable_tools": ["searxng_search","http_fetch","readability_extract"],
+      "hints": {"http_fetch.max_bytes": 1048576}
+    }
+  },
+  {
+    "image_instructions": {
+      "style": "natural",
+      "quality": "standard",
+      "size": "1024x1024"
+    }
+  }
+]
+
+Extended guidance:
+
+- System prompt should set policy boundaries (no PII leakage, safety, determinism when possible).
+- Developer prompts can add formatting rules or domain-specific constraints.
+- Tool config hints should be suggestive; the main stage may override them.
+- Image instructions should avoid vendor lock-in and focus on intent.
+
+Notes:
+
+- Keep total token usage modest.
+- Ensure the JSON is syntactically valid.
+- Avoid embedding large text; link via citations instead.
diff --git a/cmd/agentcli/capabilities_test.go b/cmd/agentcli/capabilities_test.go
new file mode 100644
index 0000000..db3b7c8
--- /dev/null
+++ b/cmd/agentcli/capabilities_test.go
@@ -0,0 +1,65 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+// https://github.com/hyperifyio/goagent/issues/1
+func TestPrintCapabilities_NoToolsPath(t *testing.T) {
+	cfg := cliConfig{toolsPath: "", capabilities: true}
+	var outBuf, errBuf bytes.Buffer
+	code := printCapabilities(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("expected exit code 0, got %d; stderr=%q", code, errBuf.String())
+	}
+	got := outBuf.String()
+	if !strings.Contains(got, "No tools enabled") {
+		t.Fatalf("unexpected stdout: %q", got)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/1
+func TestPrintCapabilities_WithManifest(t *testing.T) {
+	dir := t.TempDir()
+	toolsPath := filepath.Join(dir, "tools.json")
+	manifest := map[string]any{
+		"tools": []map[string]any{
+			{"name": "btool", "description": "b desc", "schema": map[string]any{"type": "object"}, "command": []string{"/bin/true"}},
+			{"name": "atool", "description": "a desc", "schema": map[string]any{"type": "object"}, "command": []string{"/bin/true"}},
+			{"name": "img_create", "description": "Generate images", "schema": map[string]any{"type": "object"}, "command": []string{"/bin/true"}},
+		},
+	}
+	data, err := json.Marshal(manifest)
+	if err != nil {
+		t.Fatalf("marshal manifest: %v", err)
+	}
+	if err := os.WriteFile(toolsPath, data, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	cfg := cliConfig{toolsPath: toolsPath, capabilities: true}
+	var outBuf, errBuf bytes.Buffer
+	code := printCapabilities(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("expected exit code 0, got %d; stderr=%q", code, errBuf.String())
+	}
+	got := outBuf.String()
+	// Should include warning and sorted tool names (atool before btool)
+	if !strings.Contains(got, "Capabilities (enabled tools):") {
+		t.Fatalf("capabilities header missing: %q", got)
+	}
+	aIdx := strings.Index(got, "- atool: a desc")
+	bIdx := strings.Index(got, "- btool: b desc")
+	if aIdx < 0 || bIdx < 0 || aIdx > bIdx {
+		t.Fatalf("tools not listed or not sorted: %q", got)
+	}
+	// Ensure img_create has explicit network/file warning
+	if !strings.Contains(got, "- img_create: Generate images [WARNING: makes outbound network calls and can save files]") {
+		t.Fatalf("img_create warning missing or incorrect: %q", got)
+	}
+}
diff --git a/cmd/agentcli/cli_docs_sync_test.go b/cmd/agentcli/cli_docs_sync_test.go
new file mode 100644
index 0000000..4bfac39
--- /dev/null
+++ b/cmd/agentcli/cli_docs_sync_test.go
@@ -0,0 +1,86 @@
+package main
+
+import (
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+)
+
+// TestCLIReference_IncludesAllFlagsFromHelp ensures that docs/reference/cli-reference.md
+// includes every flag token that appears in the CLI's built-in help output.
+func TestCLIReference_IncludesAllFlagsFromHelp(t *testing.T) {
+	// Render help text via the same function used by the CLI for --help/-h/help.
+	var b strings.Builder
+	printUsage(&b)
+	help := b.String()
+
+	// Extract flag tokens from help output. Lines start with two spaces, a hyphen, then the flag.
+	var flags []string
+	for _, line := range strings.Split(help, "\n") {
+		line = strings.TrimRight(line, "\r")
+		if strings.HasPrefix(line, "  -") || strings.HasPrefix(line, "  --") {
+			// Take the first whitespace-separated token as the flag token (e.g., -prompt, --version)
+			fields := strings.Fields(line)
+			if len(fields) > 0 {
+				token := fields[0]
+				// Normalize trailing punctuation if any
+				token = strings.TrimRight(token, ":")
+				flags = append(flags, token)
+			}
+		}
+	}
+	if len(flags) == 0 {
+		t.Fatalf("no flags parsed from help; help was:\n%s", help)
+	}
+
+	// Load CLI reference doc. Resolve relative to this test file's directory for robustness.
+	_, thisFile, _, ok := runtime.Caller(0)
+	if !ok {
+		t.Fatalf("runtime.Caller failed")
+	}
+	thisDir := filepath.Dir(thisFile)
+	// Repo root is the parent of the parent (.. of cmd/agentcli => repo root)
+	repoRoot := filepath.Dir(filepath.Dir(thisDir))
+	tryPaths := []string{
+		filepath.Join(repoRoot, "docs", "reference", "cli-reference.md"),
+		filepath.Join(repoRoot, "README.md"), // fallback so test gives a clearer error if mislocated
+	}
+	var data []byte
+	var err error
+	var usedPath string
+	for _, p := range tryPaths {
+		if b, e := os.ReadFile(p); e == nil {
+			data, err, usedPath = b, nil, p
+			break
+		} else {
+			err = e
+		}
+	}
+	if data == nil {
+		t.Fatalf("failed to read CLI reference doc from %v: last error: %v", tryPaths, err)
+	}
+	_ = usedPath // retained for potential future diagnostics
+
+	doc := string(data)
+
+	// For each flag token from help, assert that the doc mentions it.
+	// We look for the raw token (e.g., "-prompt") to keep this simple and robust to formatting.
+	for _, token := range flags {
+		// The version line in help is "--version | -version". Ensure both variants are present in docs.
+		if token == "--version" {
+			if !strings.Contains(doc, "--version") || !strings.Contains(doc, "-version") {
+				t.Fatalf("docs missing one of version tokens: --version or -version; flags=%v", flags)
+			}
+			continue
+		}
+		// Skip duplicate check for -version since it is covered by the --version case.
+		if token == "-version" {
+			continue
+		}
+		if !strings.Contains(doc, token) {
+			t.Fatalf("docs/reference/cli-reference.md missing flag token %q from help; help line present, doc needs update", token)
+		}
+	}
+}
diff --git a/cmd/agentcli/compat_test.go b/cmd/agentcli/compat_test.go
new file mode 100644
index 0000000..21cceb9
--- /dev/null
+++ b/cmd/agentcli/compat_test.go
@@ -0,0 +1,93 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"strings"
+	"testing"
+)
+
+// TestLegacyPrintConfigDefaults_NoNewFlags ensures that a minimal invocation
+// without new flags produces the same resolved config baseline.
+func TestLegacyPrintConfigDefaults_NoNewFlags(t *testing.T) {
+	// Ensure env does not influence defaults
+	t.Setenv("OAI_BASE_URL", "")
+	t.Setenv("OAI_MODEL", "")
+	t.Setenv("OAI_HTTP_TIMEOUT", "")
+	t.Setenv("OAI_IMAGE_MODEL", "")
+
+	var out, err bytes.Buffer
+	code := cliMain([]string{"-prompt", "p", "-print-config"}, &out, &err)
+	if code != 0 {
+		t.Fatalf("print-config exit=%d, stderr=%s", code, err.String())
+	}
+	// Parse JSON
+	var payload map[string]any
+	if jerr := json.Unmarshal(out.Bytes(), &payload); jerr != nil {
+		t.Fatalf("unmarshal print-config: %v; got %s", jerr, out.String())
+	}
+	// Top-level expectations
+	if got, ok := payload["model"].(string); !ok || got != "oss-gpt-20b" {
+		t.Fatalf("model=%v; want oss-gpt-20b", payload["model"])
+	}
+	if got, ok := payload["baseURL"].(string); !ok || got != "https://api.openai.com/v1" {
+		t.Fatalf("baseURL=%v; want https://api.openai.com/v1", payload["baseURL"])
+	}
+	if got, ok := payload["httpTimeout"].(string); !ok || got != "30s" {
+		t.Fatalf("httpTimeout=%v; want 30s", payload["httpTimeout"])
+	}
+	// Image block expectations
+	img, ok := payload["image"].(map[string]any)
+	if !ok {
+		t.Fatalf("missing image block in print-config")
+	}
+	if got, ok := img["model"].(string); !ok || got != "gpt-image-1" {
+		t.Fatalf("image.model=%v; want gpt-image-1", img["model"])
+	}
+}
+
+func TestConflictingPromptSources_ErrorMessage(t *testing.T) {
+	var out, err bytes.Buffer
+	code := cliMain([]string{"-prompt", "p", "-prompt-file", os.DevNull}, &out, &err)
+	if code != 2 {
+		t.Fatalf("exit=%d; want 2", code)
+	}
+	if !strings.Contains(err.String(), "-prompt and -prompt-file are mutually exclusive") {
+		t.Fatalf("stderr did not contain conflict message; got: %s", err.String())
+	}
+}
+
+func TestConflictingSystemSources_ErrorMessage(t *testing.T) {
+	var out, err bytes.Buffer
+	// Provide both -system (non-default) and -system-file
+	code := cliMain([]string{"-prompt", "p", "-system", "X", "-system-file", os.DevNull}, &out, &err)
+	if code != 2 {
+		t.Fatalf("exit=%d; want 2", code)
+	}
+	if !strings.Contains(err.String(), "-system and -system-file are mutually exclusive") {
+		t.Fatalf("stderr did not contain system conflict message; got: %s", err.String())
+	}
+}
+
+func TestLoadMessagesWithPromptConflict_ErrorMessage(t *testing.T) {
+	var out, err bytes.Buffer
+	code := cliMain([]string{"-load-messages", os.DevNull, "-prompt", "p"}, &out, &err)
+	if code != 2 {
+		t.Fatalf("exit=%d; want 2", code)
+	}
+	if !strings.Contains(err.String(), "-load-messages cannot be combined with -prompt or -prompt-file") {
+		t.Fatalf("stderr did not contain load/prompt conflict message; got: %s", err.String())
+	}
+}
+
+func TestSaveAndLoadMessagesConflict_ErrorMessage(t *testing.T) {
+	var out, err bytes.Buffer
+	code := cliMain([]string{"-prompt", "p", "-save-messages", os.DevNull, "-load-messages", os.DevNull}, &out, &err)
+	if code != 2 {
+		t.Fatalf("exit=%d; want 2", code)
+	}
+	if !strings.Contains(err.String(), "-save-messages and -load-messages are mutually exclusive") {
+		t.Fatalf("stderr did not contain save/load conflict message; got: %s", err.String())
+	}
+}
diff --git a/cmd/agentcli/flags_test.go b/cmd/agentcli/flags_test.go
new file mode 100644
index 0000000..effce1a
--- /dev/null
+++ b/cmd/agentcli/flags_test.go
@@ -0,0 +1,659 @@
+//nolint:errcheck // Tests intentionally allow some unchecked errors for pipe/env helpers.
+package main
+
+import (
+	"bytes"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+)
+
+// TestParseFlags_SystemAndSystemFile_MutuallyExclusive ensures providing both
+// -system (non-default) and -system-file results in exit code 2 from parseFlags.
+func TestParseFlags_SystemAndSystemFile_MutuallyExclusive(t *testing.T) {
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+	os.Args = []string{"agentcli.test", "-system", "custom", "-system-file", "sys.txt", "-prompt", "p"}
+
+	_, code := parseFlags()
+	if code != 2 {
+		t.Fatalf("parseFlags exit = %d; want 2 (mutual exclusion)", code)
+	}
+}
+
+// TestParseFlags_PrepSystem_Exclusivity ensures -prep-system and -prep-system-file are mutually exclusive.
+func TestParseFlags_PrepSystem_Exclusivity(t *testing.T) {
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+	os.Args = []string{"agentcli.test", "-prompt", "p", "-prep-system", "X", "-prep-system-file", "-"}
+	_, code := parseFlags()
+	if code != 2 {
+		t.Fatalf("parseFlags exit = %d; want 2 (mutual exclusion)", code)
+	}
+}
+
+// TestPrepSystem_EnvAndFlagPrecedence ensures env is used when flags unset and flag overrides env.
+func TestPrepSystem_EnvAndFlagPrecedence(t *testing.T) {
+	t.Setenv("OAI_PREP_SYSTEM", "ENV_SYS")
+	t.Setenv("OAI_PREP_SYSTEM_FILE", "")
+	// When flags unset, env should populate cfg.prepSystem
+	orig := os.Args
+	defer func() { os.Args = orig }()
+	os.Args = []string{"agentcli.test", "-prompt", "p"}
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parseFlags exit=%d; want 0", code)
+	}
+	if strings.TrimSpace(cfg.prepSystem) != "ENV_SYS" {
+		t.Fatalf("prepSystem=%q; want ENV_SYS", cfg.prepSystem)
+	}
+	// Flag should override env
+	os.Args = []string{"agentcli.test", "-prompt", "p", "-prep-system", "FLAG_SYS"}
+	cfg, code = parseFlags()
+	if code != 0 {
+		t.Fatalf("parseFlags exit=%d; want 0", code)
+	}
+	if strings.TrimSpace(cfg.prepSystem) != "FLAG_SYS" {
+		t.Fatalf("prepSystem=%q; want FLAG_SYS", cfg.prepSystem)
+	}
+}
+
+// TestParseFlags_PromptAndPromptFile_MutuallyExclusive ensures providing both
+// -prompt and -prompt-file results in exit code 2 from parseFlags.
+func TestParseFlags_PromptAndPromptFile_MutuallyExclusive(t *testing.T) {
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+	os.Args = []string{"agentcli.test", "-prompt", "inline", "-prompt-file", "p.txt"}
+
+	_, code := parseFlags()
+	if code != 2 {
+		t.Fatalf("parseFlags exit = %d; want 2 (mutual exclusion)", code)
+	}
+}
+
+// TestResolveMaybeFile_InlinePreferred returns inline when filePath empty.
+func TestResolveMaybeFile_InlinePreferred(t *testing.T) {
+	got, err := resolveMaybeFile("inline", "")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != "inline" {
+		t.Fatalf("got %q; want %q", got, "inline")
+	}
+}
+
+// TestResolveMaybeFile_File reads content from a real file.
+func TestResolveMaybeFile_File(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "p.txt")
+	if err := os.WriteFile(path, []byte("from-file"), 0o644); err != nil {
+		t.Fatalf("write temp file: %v", err)
+	}
+	got, err := resolveMaybeFile("inline-ignored", path)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != "from-file" {
+		t.Fatalf("got %q; want %q", got, "from-file")
+	}
+}
+
+// TestResolveMaybeFile_STDIN reads content when filePath is "-".
+func TestResolveMaybeFile_STDIN(t *testing.T) {
+	// Save and restore os.Stdin
+	oldStdin := os.Stdin
+	defer func() { os.Stdin = oldStdin }()
+
+	r := bytes.NewBufferString("from-stdin")
+	// Create a pipe and write contents so io.ReadAll can consume it as Stdin
+	pr, pw, err := os.Pipe()
+	if err != nil {
+		t.Fatalf("pipe: %v", err)
+	}
+	// Write and close writer
+	if _, err := pw.Write(r.Bytes()); err != nil {
+		t.Fatalf("write to pipe: %v", err)
+	}
+	_ = pw.Close()
+	os.Stdin = pr
+
+	got, err := resolveMaybeFile("ignored", "-")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if strings.TrimSpace(got) != "from-stdin" {
+		t.Fatalf("got %q; want %q", got, "from-stdin")
+	}
+}
+
+// TestResolveDeveloperMessages_Order ensures files are read first (in order),
+// followed by inline -developer values (in order).
+func TestResolveDeveloperMessages_Order(t *testing.T) {
+	dir := t.TempDir()
+	f1 := filepath.Join(dir, "dev1.txt")
+	f2 := filepath.Join(dir, "dev2.txt")
+	if err := os.WriteFile(f1, []byte("file-1"), 0o644); err != nil {
+		t.Fatalf("write f1: %v", err)
+	}
+	if err := os.WriteFile(f2, []byte("file-2"), 0o644); err != nil {
+		t.Fatalf("write f2: %v", err)
+	}
+
+	devs, err := resolveDeveloperMessages([]string{"inline-1", "inline-2"}, []string{f1, f2})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	want := []string{"file-1", "file-2", "inline-1", "inline-2"}
+	if len(devs) != len(want) {
+		t.Fatalf("len(devs)=%d; want %d", len(devs), len(want))
+	}
+	for i := range want {
+		if strings.TrimSpace(devs[i]) != want[i] {
+			t.Fatalf("devs[%d]=%q; want %q", i, devs[i], want[i])
+		}
+	}
+}
+
+// TestResolveDeveloperMessages_STDIN ensures a "-" entry is read from stdin.
+func TestResolveDeveloperMessages_STDIN(t *testing.T) {
+	// Save and restore os.Stdin
+	oldStdin := os.Stdin
+	defer func() { os.Stdin = oldStdin }()
+
+	// Prepare stdin data
+	pr, pw, err := os.Pipe()
+	if err != nil {
+		t.Fatalf("pipe: %v", err)
+	}
+	if _, err := pw.Write([]byte("dev-stdin")); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	_ = pw.Close()
+	os.Stdin = pr
+
+	devs, err := resolveDeveloperMessages([]string{"inline"}, []string{"-"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(devs) != 2 {
+		t.Fatalf("len(devs)=%d; want 2", len(devs))
+	}
+	if strings.TrimSpace(devs[0]) != "dev-stdin" {
+		t.Fatalf("first dev from stdin = %q; want %q", devs[0], "dev-stdin")
+	}
+	if strings.TrimSpace(devs[1]) != "inline" {
+		t.Fatalf("second dev inline = %q; want %q", devs[1], "inline")
+	}
+}
+
+// TestHelpContainsRoleFlags ensures help output mentions the role flags, as a smoke test.
+func TestHelpContainsRoleFlags(t *testing.T) {
+	var b strings.Builder
+	printUsage(&b)
+	help := b.String()
+	for _, token := range []string{"-developer", "-developer-file", "-prompt-file", "-system-file"} {
+		if !strings.Contains(help, token) {
+			t.Fatalf("help missing %s token; help=\n%s", token, help)
+		}
+	}
+}
+
+// TestImageParamDefaultsAndEnvAndFlags verifies precedence and defaults for image param pass-throughs.
+//
+//nolint:gocyclo // Intentional multi-branch table-style assertions for env/flag precedence in one test.
+func TestImageParamDefaultsAndEnvAndFlags(t *testing.T) {
+	// Clear possibly impacting envs
+	t.Setenv("OAI_IMAGE_N", "")
+	t.Setenv("OAI_IMAGE_SIZE", "")
+	t.Setenv("OAI_IMAGE_QUALITY", "")
+	t.Setenv("OAI_IMAGE_STYLE", "")
+	t.Setenv("OAI_IMAGE_RESPONSE_FORMAT", "")
+	t.Setenv("OAI_IMAGE_TRANSPARENT_BACKGROUND", "")
+
+	t.Run("defaults when neither flags nor env", func(t *testing.T) { //nolint:tparallel // serial to avoid env races
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.imageN != 1 || cfg.imageSize != "1024x1024" || cfg.imageQuality != "standard" || cfg.imageStyle != "natural" || cfg.imageResponseFormat != "url" || cfg.imageTransparentBackground != false {
+			t.Fatalf("defaults mismatch: n=%d size=%s quality=%s style=%s resp=%s transparent=%v", cfg.imageN, cfg.imageSize, cfg.imageQuality, cfg.imageStyle, cfg.imageResponseFormat, cfg.imageTransparentBackground)
+		}
+	})
+
+	t.Run("env applies when flags unset", func(t *testing.T) { //nolint:tparallel
+		t.Setenv("OAI_IMAGE_N", "3")
+		t.Setenv("OAI_IMAGE_SIZE", "512x512")
+		t.Setenv("OAI_IMAGE_QUALITY", "hd")
+		t.Setenv("OAI_IMAGE_STYLE", "vivid")
+		t.Setenv("OAI_IMAGE_RESPONSE_FORMAT", "b64_json")
+		t.Setenv("OAI_IMAGE_TRANSPARENT_BACKGROUND", "true")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.imageN != 3 || cfg.imageSize != "512x512" || cfg.imageQuality != "hd" || cfg.imageStyle != "vivid" || cfg.imageResponseFormat != "b64_json" || cfg.imageTransparentBackground != true {
+			t.Fatalf("env mismatch: n=%d size=%s quality=%s style=%s resp=%s transparent=%v", cfg.imageN, cfg.imageSize, cfg.imageQuality, cfg.imageStyle, cfg.imageResponseFormat, cfg.imageTransparentBackground)
+		}
+	})
+
+	t.Run("flags override env", func(t *testing.T) { //nolint:tparallel
+		t.Setenv("OAI_IMAGE_N", "2")
+		t.Setenv("OAI_IMAGE_SIZE", "256x256")
+		t.Setenv("OAI_IMAGE_QUALITY", "standard")
+		t.Setenv("OAI_IMAGE_STYLE", "natural")
+		t.Setenv("OAI_IMAGE_RESPONSE_FORMAT", "url")
+		t.Setenv("OAI_IMAGE_TRANSPARENT_BACKGROUND", "false")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p", "-image-n", "4", "-image-size", "640x640", "-image-quality", "hd", "-image-style", "vivid", "-image-response-format", "b64_json", "-image-transparent-background", "true"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.imageN != 4 || cfg.imageSize != "640x640" || cfg.imageQuality != "hd" || cfg.imageStyle != "vivid" || cfg.imageResponseFormat != "b64_json" || cfg.imageTransparentBackground != true {
+			t.Fatalf("flags mismatch: n=%d size=%s quality=%s style=%s resp=%s transparent=%v", cfg.imageN, cfg.imageSize, cfg.imageQuality, cfg.imageStyle, cfg.imageResponseFormat, cfg.imageTransparentBackground)
+		}
+	})
+}
+
+// TestImageModelFlagPrecedence verifies precedence flag > env > default for -image-model.
+func TestImageModelFlagPrecedence(t *testing.T) {
+	t.Run("default when neither flags nor env", func(t *testing.T) { //nolint:tparallel
+		t.Setenv("OAI_IMAGE_MODEL", "")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.imageModel != "gpt-image-1" {
+			t.Fatalf("imageModel=%q; want gpt-image-1", cfg.imageModel)
+		}
+	})
+
+	t.Run("env applies when flag unset", func(t *testing.T) { //nolint:tparallel
+		t.Setenv("OAI_IMAGE_MODEL", "env-model")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.imageModel != "env-model" {
+			t.Fatalf("imageModel=%q; want env-model", cfg.imageModel)
+		}
+	})
+
+	t.Run("flags override env", func(t *testing.T) { //nolint:tparallel
+		t.Setenv("OAI_IMAGE_MODEL", "env-model")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p", "-image-model", "flag-model"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.imageModel != "flag-model" {
+			t.Fatalf("imageModel=%q; want flag-model", cfg.imageModel)
+		}
+	})
+}
+
+// TestPrintConfig_IncludesImageParams verifies print-config output reflects resolved image params.
+func TestPrintConfig_IncludesImageParams(t *testing.T) {
+	orig := os.Args
+	defer func() { os.Args = orig }()
+	os.Args = []string{"agentcli.test", "-prompt", "p", "-image-n", "2", "-image-size", "512x512", "-image-quality", "hd", "-image-style", "vivid", "-image-response-format", "b64_json", "-image-transparent-background", "true", "-print-config"}
+	var out bytes.Buffer
+	// parse then print
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parseFlags exit=%d; want 0", code)
+	}
+	exit := printResolvedConfig(cfg, &out)
+	if exit != 0 {
+		t.Fatalf("printResolvedConfig exit=%d; want 0", exit)
+	}
+	s := out.String()
+	for _, token := range []string{
+		"\"image\"",
+		"\"n\": 2",
+		"\"size\": \"512x512\"",
+		"\"quality\": \"hd\"",
+		"\"style\": \"vivid\"",
+		"\"response_format\": \"b64_json\"",
+		"\"transparent_background\": true",
+	} {
+		if !strings.Contains(s, token) {
+			t.Fatalf("print-config missing %s in output: %s", token, s)
+		}
+	}
+}
+
+// Avoid unused imports on some platforms
+var _ = runtime.GOOS
+
+// TestHTTPRetryPrecedence verifies precedence and defaults for -http-retries and -http-retry-backoff.
+func TestHTTPRetryPrecedence(t *testing.T) {
+	// Save and restore env using t.Setenv for cleanliness
+	t.Setenv("OAI_HTTP_RETRIES", "")
+	t.Setenv("OAI_HTTP_RETRY_BACKOFF", "")
+
+	t.Run("defaults when neither flags nor env", func(t *testing.T) {
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.httpRetries != 2 {
+			t.Fatalf("httpRetries=%d; want 2", cfg.httpRetries)
+		}
+		if cfg.httpBackoff.String() != "500ms" {
+			t.Fatalf("httpBackoff=%s; want 500ms", cfg.httpBackoff)
+		}
+	})
+
+	t.Run("env applies when flags unset", func(t *testing.T) {
+		t.Setenv("OAI_HTTP_RETRIES", "5")
+		t.Setenv("OAI_HTTP_RETRY_BACKOFF", "750ms")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.httpRetries != 5 {
+			t.Fatalf("httpRetries=%d; want 5", cfg.httpRetries)
+		}
+		if cfg.httpBackoff.String() != "750ms" {
+			t.Fatalf("httpBackoff=%s; want 750ms", cfg.httpBackoff)
+		}
+	})
+
+	t.Run("flags override env", func(t *testing.T) {
+		t.Setenv("OAI_HTTP_RETRIES", "7")
+		t.Setenv("OAI_HTTP_RETRY_BACKOFF", "900ms")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p", "-http-retries", "3", "-http-retry-backoff", "1s"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.httpRetries != 3 {
+			t.Fatalf("httpRetries=%d; want 3", cfg.httpRetries)
+		}
+		if cfg.httpBackoff.String() != "1s" {
+			t.Fatalf("httpBackoff=%s; want 1s", cfg.httpBackoff)
+		}
+	})
+
+	t.Run("explicit zero via flags retains zero", func(t *testing.T) {
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p", "-http-retries", "0", "-http-retry-backoff", "0"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.httpRetries != 0 {
+			t.Fatalf("httpRetries=%d; want 0", cfg.httpRetries)
+		}
+		if cfg.httpBackoff != 0 {
+			t.Fatalf("httpBackoff=%s; want 0", cfg.httpBackoff)
+		}
+	})
+}
+
+// TestImageHTTPKnobsPrecedence verifies precedence and inheritance for image HTTP knobs.
+func TestImageHTTPKnobsPrecedence(t *testing.T) {
+	t.Run("defaults inherit from main when neither flags nor env", func(t *testing.T) {
+		// Clear envs that might affect defaults
+		t.Setenv("OAI_IMAGE_HTTP_TIMEOUT", "")
+		t.Setenv("OAI_IMAGE_HTTP_RETRIES", "")
+		t.Setenv("OAI_IMAGE_HTTP_RETRY_BACKOFF", "")
+		t.Setenv("OAI_HTTP_TIMEOUT", "")
+		t.Setenv("OAI_HTTP_RETRIES", "")
+		t.Setenv("OAI_HTTP_RETRY_BACKOFF", "")
+
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.imageHTTPTimeout != cfg.httpTimeout {
+			t.Fatalf("imageHTTPTimeout=%s; want inherit %s", cfg.imageHTTPTimeout, cfg.httpTimeout)
+		}
+		if cfg.imageHTTPRetries != cfg.httpRetries {
+			t.Fatalf("imageHTTPRetries=%d; want inherit %d", cfg.imageHTTPRetries, cfg.httpRetries)
+		}
+		if cfg.imageHTTPBackoff != cfg.httpBackoff {
+			t.Fatalf("imageHTTPBackoff=%s; want inherit %s", cfg.imageHTTPBackoff, cfg.httpBackoff)
+		}
+	})
+
+	t.Run("env applies when flags unset", func(t *testing.T) {
+		t.Setenv("OAI_IMAGE_HTTP_TIMEOUT", "7s")
+		t.Setenv("OAI_IMAGE_HTTP_RETRIES", "9")
+		t.Setenv("OAI_IMAGE_HTTP_RETRY_BACKOFF", "1.5s")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.imageHTTPTimeout.String() != "7s" {
+			t.Fatalf("imageHTTPTimeout=%s; want 7s", cfg.imageHTTPTimeout)
+		}
+		if cfg.imageHTTPRetries != 9 {
+			t.Fatalf("imageHTTPRetries=%d; want 9", cfg.imageHTTPRetries)
+		}
+		if cfg.imageHTTPBackoff.String() != "1.5s" {
+			t.Fatalf("imageHTTPBackoff=%s; want 1.5s", cfg.imageHTTPBackoff)
+		}
+	})
+
+	t.Run("flags override env", func(t *testing.T) {
+		t.Setenv("OAI_IMAGE_HTTP_TIMEOUT", "7s")
+		t.Setenv("OAI_IMAGE_HTTP_RETRIES", "9")
+		t.Setenv("OAI_IMAGE_HTTP_RETRY_BACKOFF", "1.5s")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p", "-image-http-timeout", "3s", "-image-http-retries", "5", "-image-http-retry-backoff", "1s"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.imageHTTPTimeout.String() != "3s" {
+			t.Fatalf("imageHTTPTimeout=%s; want 3s", cfg.imageHTTPTimeout)
+		}
+		if cfg.imageHTTPRetries != 5 {
+			t.Fatalf("imageHTTPRetries=%d; want 5", cfg.imageHTTPRetries)
+		}
+		if cfg.imageHTTPBackoff.String() != "1s" {
+			t.Fatalf("imageHTTPBackoff=%s; want 1s", cfg.imageHTTPBackoff)
+		}
+	})
+}
+
+// TestStateDir_PrecedenceAndCreation verifies that -state-dir flag overrides env,
+// env applies when flag unset, tilde is expanded, and directory is created with 0700.
+func TestStateDir_PrecedenceAndCreation(t *testing.T) {
+	// Ensure env baseline empty
+	t.Setenv("AGENTCLI_STATE_DIR", "")
+
+	t.Run("env applies when flag unset and directory created", func(t *testing.T) {
+		tmp := t.TempDir()
+		envDir := filepath.Join(tmp, "state-env")
+		t.Setenv("AGENTCLI_STATE_DIR", envDir)
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.stateDir != filepath.Clean(envDir) {
+			t.Fatalf("stateDir=%q; want %q", cfg.stateDir, filepath.Clean(envDir))
+		}
+		// Directory should exist
+		info, err := os.Stat(cfg.stateDir)
+		if err != nil || !info.IsDir() {
+			t.Fatalf("expected directory to exist: %v", err)
+		}
+		// Mode should have no world-write; at least ensure only user perms bits are set from 0700 mask
+		if info.Mode().Perm()&0o077 != 0 {
+			t.Fatalf("state dir perms too open: %v", info.Mode().Perm())
+		}
+	})
+
+	t.Run("flag overrides env and expands tilde", func(t *testing.T) {
+		tmp := t.TempDir()
+		t.Setenv("AGENTCLI_STATE_DIR", filepath.Join(tmp, "ignored-env"))
+		// Use a fake home to control ~ expansion
+		fakeHome := filepath.Join(tmp, "home")
+		if err := os.MkdirAll(fakeHome, 0o755); err != nil {
+			t.Fatalf("mkdir fake home: %v", err)
+		}
+		// Temporarily override HOME
+		t.Setenv("HOME", fakeHome)
+		flagPath := "~/my-state"
+		want := filepath.Join(fakeHome, "my-state")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p", "-state-dir", flagPath}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.stateDir != filepath.Clean(want) {
+			t.Fatalf("stateDir=%q; want %q", cfg.stateDir, filepath.Clean(want))
+		}
+		if _, err := os.Stat(cfg.stateDir); err != nil {
+			t.Fatalf("expected state dir created: %v", err)
+		}
+	})
+}
+
+// TestStateRefine_RequiresStateDir ensures that using refine flags without -state-dir errors with exit code 2.
+func TestStateRefine_RequiresStateDir(t *testing.T) {
+	// No state-dir
+	orig := os.Args
+	defer func() { os.Args = orig }()
+
+	// Each case should fail with exit code 2 and a helpful error string
+	cases := [][]string{
+		{"agentcli.test", "-prompt", "p", "-state-refine"},
+		{"agentcli.test", "-prompt", "p", "-state-refine-text", "t"},
+		{"agentcli.test", "-prompt", "p", "-state-refine-file", "f.txt"},
+	}
+	for i, args := range cases {
+		os.Args = args
+		cfg, code := parseFlags()
+		if code != 2 {
+			t.Fatalf("case %d: parseFlags exit=%d; want 2", i, code)
+		}
+		if !strings.Contains(cfg.parseError, "requires -state-dir") {
+			t.Fatalf("case %d: parseError missing hint: %q", i, cfg.parseError)
+		}
+	}
+}
+
+// TestStateScope_PrecedenceAndDefaultHash verifies that -state-scope overrides env
+// and that when both are empty a default hash of model|base_url|toolset is computed.
+func TestStateScope_PrecedenceAndDefaultHash(t *testing.T) {
+	// Ensure env baseline empty
+	t.Setenv("AGENTCLI_STATE_SCOPE", "")
+
+	t.Run("env applies when flag unset", func(t *testing.T) {
+		t.Setenv("AGENTCLI_STATE_SCOPE", "env-scope")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.stateScope != "env-scope" {
+			t.Fatalf("stateScope=%q; want env-scope", cfg.stateScope)
+		}
+	})
+
+	t.Run("flag overrides env", func(t *testing.T) {
+		t.Setenv("AGENTCLI_STATE_SCOPE", "env-scope")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p", "-state-scope", "flag-scope"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg.stateScope != "flag-scope" {
+			t.Fatalf("stateScope=%q; want flag-scope", cfg.stateScope)
+		}
+	})
+
+	t.Run("default hash when both unset and tools absent", func(t *testing.T) {
+		t.Setenv("AGENTCLI_STATE_SCOPE", "")
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		// Set explicit model/base to make hash deterministic; no -tools
+		os.Args = []string{"agentcli.test", "-prompt", "p", "-model", "m", "-base-url", "http://b"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		// Expected sha256 of "m|http://b|" (empty toolset hash)
+		want := sha256SumHex([]byte("m|http://b|"))
+		if cfg.stateScope != want {
+			t.Fatalf("stateScope default hash=%q; want %q", cfg.stateScope, want)
+		}
+	})
+
+	t.Run("toolset hash influences default scope", func(t *testing.T) {
+		t.Setenv("AGENTCLI_STATE_SCOPE", "")
+		dir := t.TempDir()
+		// Two different manifest contents
+		m1 := filepath.Join(dir, "tools1.json")
+		m2 := filepath.Join(dir, "tools2.json")
+		if err := os.WriteFile(m1, []byte(`{"tools":[{"name":"a"}]}`), 0o644); err != nil {
+			t.Fatalf("write m1: %v", err)
+		}
+		if err := os.WriteFile(m2, []byte(`{"tools":[{"name":"b"}]}`), 0o644); err != nil {
+			t.Fatalf("write m2: %v", err)
+		}
+		// First parse with tools1
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = []string{"agentcli.test", "-prompt", "p", "-model", "m", "-base-url", "http://b", "-tools", m1}
+		cfg1, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		// Second parse with tools2
+		os.Args = []string{"agentcli.test", "-prompt", "p", "-model", "m", "-base-url", "http://b", "-tools", m2}
+		cfg2, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parseFlags exit=%d; want 0", code)
+		}
+		if cfg1.stateScope == cfg2.stateScope {
+			t.Fatalf("expected different state scopes for different tool manifests; got %q == %q", cfg1.stateScope, cfg2.stateScope)
+		}
+	})
+}
diff --git a/cmd/agentcli/gpt5_mock_smoke_test.go b/cmd/agentcli/gpt5_mock_smoke_test.go
new file mode 100644
index 0000000..8dd3e10
--- /dev/null
+++ b/cmd/agentcli/gpt5_mock_smoke_test.go
@@ -0,0 +1,134 @@
+//nolint:errcheck // Tests intentionally ignore some error returns for brevity; behavior validated via assertions.
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"strings"
+	"testing"
+
+	oai "github.com/hyperifyio/goagent/internal/oai"
+)
+
+// TestGPT5_MockSmoke_DefaultTemperature asserts that when targeting a GPT-5
+// model with no sampling flags, the request includes temperature 1.0 by default.
+// It uses a mock OpenAI-compatible endpoint to capture the request payload.
+func TestGPT5_MockSmoke_DefaultTemperature(t *testing.T) {
+	var seen oai.ChatCompletionsRequest
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/chat/completions" {
+			t.Fatalf("unexpected path: %s", r.URL.Path)
+		}
+		if err := json.NewDecoder(r.Body).Decode(&seen); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	var outBuf, errBuf bytes.Buffer
+	code := cliMain([]string{
+		"-prompt", "Say ok",
+		"-base-url", srv.URL,
+		"-model", "gpt-5",
+		"-max-steps", "1",
+	}, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("cli exit=%d stderr=%s", code, errBuf.String())
+	}
+	if seen.Temperature == nil || *seen.Temperature != 1.0 {
+		if seen.Temperature == nil {
+			t.Fatalf("expected temperature in request; want 1.0")
+		}
+		t.Fatalf("temperature got %v want 1.0", *seen.Temperature)
+	}
+}
+
+// TestGPT5_MockSmoke_ReasoningControlsIndependence simulates toggling vendor-specific
+// reasoning controls via environment variables and verifies temperature remains 1.0.
+// The agent currently ignores these envs by design; this test protects independence.
+func TestGPT5_MockSmoke_ReasoningControlsIndependence(t *testing.T) {
+	// Save and restore environment variables used in this test
+	save := func(k string) (string, bool) { v, ok := os.LookupEnv(k); return v, ok }
+	restore := func(k, v string, ok bool) {
+		if ok {
+			_ = os.Setenv(k, v)
+		} else {
+			_ = os.Unsetenv(k)
+		}
+	}
+
+	// First run: baseline
+	var baseline oai.ChatCompletionsRequest
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		// On first call record baseline; on second call record override
+		if baseline.Model == "" {
+			baseline = req
+		} else {
+			// Overwrite baseline for clarity in failure messages
+			baseline = req
+		}
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	var out1, err1 bytes.Buffer
+	code := cliMain([]string{"-prompt", "x", "-base-url", srv.URL, "-model", "gpt-5", "-max-steps", "1"}, &out1, &err1)
+	if code != 0 {
+		t.Fatalf("baseline exit=%d err=%s", code, err1.String())
+	}
+	if baseline.Temperature == nil || *baseline.Temperature != 1.0 {
+		t.Fatalf("baseline temperature got %v want 1.0", ptrToString(baseline.Temperature))
+	}
+
+	// Second run: toggle hypothetical reasoning controls via env vars
+	v0, ok0 := save("GPT5_VERBOSITY")
+	v1, ok1 := save("GPT5_REASONING_EFFORT")
+	defer restore("GPT5_VERBOSITY", v0, ok0)
+	defer restore("GPT5_REASONING_EFFORT", v1, ok1)
+	_ = os.Setenv("GPT5_VERBOSITY", "high")
+	_ = os.Setenv("GPT5_REASONING_EFFORT", "medium")
+
+	var seen oai.ChatCompletionsRequest
+	srv2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if err := json.NewDecoder(r.Body).Decode(&seen); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv2.Close()
+
+	var out2, err2 bytes.Buffer
+	code = cliMain([]string{"-prompt", "x", "-base-url", srv2.URL, "-model", "gpt-5", "-max-steps", "1"}, &out2, &err2)
+	if code != 0 {
+		t.Fatalf("env-toggle exit=%d err=%s", code, err2.String())
+	}
+	if seen.Temperature == nil || *seen.Temperature != 1.0 {
+		t.Fatalf("with reasoning envs, temperature got %v want 1.0", ptrToString(seen.Temperature))
+	}
+}
+
+func ptrToString(p *float64) string {
+	if p == nil {
+		return "<nil>"
+	}
+	// avoid fmt import; simple formatting
+	s := strings.TrimRight(strings.TrimRight(jsonNumber(*p), "0"), ".")
+	if s == "" {
+		return "0"
+	}
+	return s
+}
+
+// jsonNumber renders a float with JSON rules for test messages.
+func jsonNumber(f float64) string {
+	b, _ := json.Marshal(f)
+	return string(b)
+}
diff --git a/cmd/agentcli/help_test.go b/cmd/agentcli/help_test.go
new file mode 100644
index 0000000..0cf65dc
--- /dev/null
+++ b/cmd/agentcli/help_test.go
@@ -0,0 +1,125 @@
+package main
+
+import (
+	"strings"
+	"testing"
+)
+
+// TestHelpSnapshot_ContainsAllExpectedTokens asserts that the built-in help output
+// contains all critical flags and key phrases (inheritance, conflicts, defaults).
+// This guards against accidental drift in help documentation.
+func TestHelpSnapshot_ContainsAllExpectedTokens(t *testing.T) {
+	var b strings.Builder
+	printUsage(&b)
+	help := b.String()
+
+	// Minimal sanity checks
+	for _, token := range []string{
+		"agentcli —",
+		"Usage:",
+		"precedence: flag > env > default",
+	} {
+		if !strings.Contains(help, token) {
+			t.Fatalf("help missing required token %q; help=\n%s", token, help)
+		}
+	}
+
+	// Flags to assert are documented in help
+	flags := []string{
+		"-prompt string",
+		"-tools string",
+		"-system string",
+		"-system-file string",
+		"-developer string",
+		"-developer-file string",
+		"-prompt-file string",
+		"-base-url string",
+		"-api-key string",
+		"-model string",
+		"-max-steps int",
+		"-timeout duration",
+		"-http-timeout duration",
+		"-prep-http-timeout duration",
+		"-tool-timeout duration",
+		"-http-retries int",
+		"-http-retry-backoff duration",
+		"-image-base-url string",
+		"-image-model string",
+		"-image-api-key string",
+		"-image-http-timeout duration",
+		"-image-http-retries int",
+		"-image-http-retry-backoff duration",
+		"-temp float",
+		"-top-p float",
+		"-prep-profile string",
+		"-prep-model string",
+		"-prep-base-url string",
+		"-prep-api-key string",
+		"-prep-http-retries int",
+		"-prep-http-retry-backoff duration",
+		"-prep-temp float",
+		"-prep-top-p float",
+		"-prep-system string",
+		"-prep-system-file string",
+		"-image-n int",
+		"-image-size string",
+		"-image-quality string",
+		"-image-style string",
+		"-image-response-format string",
+		"-image-transparent-background",
+		"-debug",
+		"-verbose",
+		"-quiet",
+		"-prep-tools-allow-external",
+		"-prep-cache-bust",
+		"-prep-tools string",
+		"-prep-dry-run",
+		"-print-messages",
+		"-stream-final",
+		"-channel-route",
+		"-save-messages string",
+		"-load-messages string",
+		"-prep-enabled",
+		"-capabilities",
+		"-print-config",
+		"-dry-run",
+		"-state-dir string",
+		"-state-scope string",
+		"-state-refine",
+		"-state-refine-text string",
+		"-state-refine-file string",
+		"--version | -version",
+	}
+	for _, f := range flags {
+		if !strings.Contains(help, f) {
+			t.Fatalf("help missing flag token %q; help=\n%s", f, help)
+		}
+	}
+
+	// Key phrases to ensure important semantics remain documented
+	phrases := []string{
+		"conflicts with -temp",
+		"conflicts with -prep-top-p",
+		"conflicts with -prep-temp",
+		"mutually exclusive with -prep-system-file",
+		"mutually exclusive with -system",
+		"mutually exclusive with -prompt",
+		"'-' for STDIN",
+		"repeatable",
+		"inherits -http-timeout if unset",
+		"inherits -http-retries if unset",
+		"inherits -http-retry-backoff if unset",
+		"default 1.0",
+		"default 2",
+		"default 500ms",
+		"default 1024x1024",
+		"default standard",
+		"default natural",
+		"default url",
+	}
+	for _, p := range phrases {
+		if !strings.Contains(help, p) {
+			t.Fatalf("help missing key phrase %q; help=\n%s", p, help)
+		}
+	}
+}
diff --git a/cmd/agentcli/main.go b/cmd/agentcli/main.go
new file mode 100644
index 0000000..ea49b66
--- /dev/null
+++ b/cmd/agentcli/main.go
@@ -0,0 +1,2434 @@
+package main
+
+import (
+	"context"
+	"crypto/sha256"
+	"encoding/json"
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"math/rand"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+	prestage "github.com/hyperifyio/goagent/internal/oai/prestage"
+	"github.com/hyperifyio/goagent/internal/tools"
+)
+
+// cliConfig holds user-supplied configuration resolved from flags and env.
+type cliConfig struct {
+	prompt string
+	// Role inputs: developer and files
+	developerPrompts []string
+	developerFiles   []string
+	systemFile       string
+	promptFile       string
+	// Pre-stage specific system message inputs
+	prepSystem      string
+	prepSystemFile  string
+	toolsPath       string
+	systemPrompt    string
+	baseURL         string
+	apiKey          string
+	model           string
+	maxSteps        int
+	timeout         time.Duration // deprecated global timeout; kept for backward compatibility
+	httpTimeout     time.Duration // resolved HTTP timeout (final value after env/flags/global)
+	prepHTTPTimeout time.Duration // resolved pre-stage HTTP timeout (inherits from http-timeout)
+	toolTimeout     time.Duration // resolved per-tool timeout (final value after flags/global)
+	httpRetries     int           // number of retries for HTTP
+	httpBackoff     time.Duration // base backoff between retries
+	temperature     float64
+	topP            float64
+	prepTopP        float64
+	// Pre-stage explicit temperature override and its source
+	prepTemperature       float64
+	prepTemperatureSource string // "flag" | "env" | "inherit"
+	// Pre-stage prompt profile controlling effective temperature when supported
+	prepProfile oai.PromptProfile
+	debug       bool
+	verbose     bool
+	quiet       bool
+	// Pre-stage cache controls
+	prepCacheBust bool // when true, bypass pre-stage cache for this run
+	// Pre-stage master switch
+	prepEnabled bool // when false, completely skip pre-stage
+	// Tracks whether -prep-enabled was explicitly provided by the user
+	prepEnabledSet bool
+	capabilities   bool
+	printConfig    bool
+	// Dry-run planning for state persistence actions
+	dryRun bool
+	// State persistence
+	stateDir string
+	// Optional partition key for persisted state; when empty we compute a default
+	// as sha256(model_id + "|" + base_url + "|" + toolset_hash)
+	stateScope string
+	// Refinement controls
+	stateRefine     bool   // when true, perform refinement of a loaded state bundle
+	stateRefineText string // optional refinement text input
+	stateRefineFile string // optional refinement file path; wins over text when both provided
+	// Pre-stage tool policy
+	prepToolsAllowExternal bool // when false, pre-stage uses built-in read-only tools and ignores -tools
+	// Optional pre-stage-specific tools manifest path; when set and external tools are allowed,
+	// the pre-stage uses this manifest instead of -tools
+	prepToolsPath string
+	// Sources for effective timeouts: "flag" | "env" | "default"
+	httpTimeoutSource     string
+	prepHTTPTimeoutSource string
+	toolTimeoutSource     string
+	globalTimeoutSource   string
+	// Sources for sampling knobs
+	temperatureSource string // "flag" | "env" | "default"
+	prepTopPSource    string // "flag" | "env" | "inherit"
+	// Pre-stage explicit overrides
+	prepModel       string
+	prepBaseURL     string
+	prepAPIKey      string
+	prepHTTPRetries int
+	prepHTTPBackoff time.Duration
+	// Sources for prep overrides
+	prepModelSource       string // "flag" | "env" | "inherit"
+	prepBaseURLSource     string // "flag" | "env" | "inherit"
+	prepAPIKeySource      string // "flag" | "env:OAI_PREP_API_KEY|env:OAI_API_KEY|env:OPENAI_API_KEY" | "inherit|empty"
+	prepHTTPRetriesSource string // "flag" | "env" | "inherit"
+	prepHTTPBackoffSource string // "flag" | "env" | "inherit"
+	// Image API overrides and sources
+	imageBaseURL       string
+	imageAPIKey        string
+	imageBaseURLSource string // "flag" | "env" | "inherit"
+	imageAPIKeySource  string // "flag" | "env|env:OPENAI_API_KEY" | "inherit|empty"
+	// Image HTTP behavior
+	imageHTTPTimeout       time.Duration
+	imageHTTPRetries       int
+	imageHTTPBackoff       time.Duration
+	imageHTTPTimeoutSource string // "flag" | "env" | "inherit"
+	imageHTTPRetriesSource string // "flag" | "env" | "inherit"
+	imageHTTPBackoffSource string // "flag" | "env" | "inherit"
+	// Image request parameter pass-throughs
+	imageModel                 string
+	imageN                     int
+	imageSize                  string
+	imageQuality               string // standard|hd
+	imageStyle                 string // natural|vivid
+	imageResponseFormat        string // url|b64_json
+	imageTransparentBackground bool
+	// Image prompt (optional). Not exposed via flags yet; populated when loading
+	// from a saved messages file that contains an auxiliary "image_prompt" field.
+	imagePrompt string
+	// Message viewing modes
+	prepDryRun    bool // When true, run pre-stage only, print refined messages to stdout, and exit
+	printMessages bool // When true, pretty-print final merged messages to stderr before main call
+	// Streaming control
+	streamFinal bool // When true, request SSE streaming and print only assistant{channel:"final"} progressively
+	// Save/load refined messages
+	saveMessagesPath string // When set, write the final merged Harmony messages to this JSON path and continue
+	loadMessagesPath string // When set, bypass pre-stage and prompt; load messages JSON verbatim (validator-checked)
+	// Custom channel routing: map specific assistant channels to stdout|stderr|omit
+	channelRoutes map[string]string
+	// Raw repeatable flag values for -channel-route parsing (e.g., "critic=stdout")
+	channelRoutePairs []string
+	// parseError carries a human-readable parse error for early exit situations
+	parseError string
+	// initMessages allows tests to inject a custom starting transcript to
+	// exercise pre-flight validation paths (e.g., stray tool message). When
+	// empty, the default [system,user] seed is used.
+	initMessages []oai.Message
+}
+
+// float64FlexFlag wires a float64 destination and records if it was set via flag.
+type float64FlexFlag struct {
+	dst *float64
+	set *bool
+}
+
+// boolFlexFlag wires a bool destination and records if it was set via flag.
+type boolFlexFlag struct {
+	dst *bool
+	set *bool
+}
+
+func (b *boolFlexFlag) String() string {
+	if b == nil || b.dst == nil {
+		return "false"
+	}
+	if *b.dst {
+		return "true"
+	}
+	return "false"
+}
+
+func (b *boolFlexFlag) Set(s string) error {
+	v, err := strconv.ParseBool(strings.TrimSpace(s))
+	if err != nil {
+		return err
+	}
+	if b.dst != nil {
+		*b.dst = v
+	}
+	if b.set != nil {
+		*b.set = true
+	}
+	return nil
+}
+
+func (f *float64FlexFlag) String() string {
+	if f == nil || f.dst == nil {
+		return ""
+	}
+	return strconv.FormatFloat(*f.dst, 'f', -1, 64)
+}
+
+func (f *float64FlexFlag) Set(s string) error {
+	v, err := strconv.ParseFloat(strings.TrimSpace(s), 64)
+	if err != nil {
+		return err
+	}
+	if f.dst != nil {
+		*f.dst = v
+	}
+	if f.set != nil {
+		*f.set = true
+	}
+	return nil
+}
+
+// intFlexFlag wires an int destination and records if it was set via flag.
+type intFlexFlag struct {
+	dst *int
+	set *bool
+}
+
+func (f *intFlexFlag) String() string {
+	if f == nil || f.dst == nil {
+		return "0"
+	}
+	return strconv.Itoa(*f.dst)
+}
+
+func (f *intFlexFlag) Set(s string) error {
+	v, err := strconv.Atoi(strings.TrimSpace(s))
+	if err != nil {
+		return err
+	}
+	if f.dst != nil {
+		*f.dst = v
+	}
+	if f.set != nil {
+		*f.set = true
+	}
+	return nil
+}
+
+func getEnv(key, def string) string {
+	v := os.Getenv(key)
+	if v == "" {
+		return def
+	}
+	return v
+}
+
+// resolveAPIKeyFromEnv returns the API key using canonical and legacy env vars.
+// Precedence: OAI_API_KEY > OPENAI_API_KEY > "".
+func resolveAPIKeyFromEnv() string {
+	if v := os.Getenv("OAI_API_KEY"); strings.TrimSpace(v) != "" {
+		return v
+	}
+	if v := os.Getenv("OPENAI_API_KEY"); strings.TrimSpace(v) != "" {
+		return v
+	}
+	return ""
+}
+
+// durationFlexFlag wires a duration destination and records if it was set via flag.
+type durationFlexFlag struct {
+	dst *time.Duration
+	set *bool
+}
+
+func (f durationFlexFlag) String() string {
+	if f.dst == nil {
+		return ""
+	}
+	return f.dst.String()
+}
+
+func (f durationFlexFlag) Set(s string) error {
+	d, err := parseDurationFlexible(s)
+	if err != nil {
+		return err
+	}
+	*f.dst = d
+	if f.set != nil {
+		*f.set = true
+	}
+	return nil
+}
+
+// parseFlags parses command-line flags and environment variables.
+// nolint:gocyclo // Flag definition and precedence resolution are inherently branching but covered by tests.
+func parseFlags() (cliConfig, int) {
+	var cfg cliConfig
+
+	// Reset default FlagSet to allow re-entrant parsing in tests.
+	flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ContinueOnError)
+	// Silence automatic usage/errors; we handle messaging ourselves.
+	flag.CommandLine.SetOutput(io.Discard)
+
+	defaultSystem := "You are a helpful, precise assistant. Use tools when strictly helpful."
+	defaultBase := getEnv("OAI_BASE_URL", "https://api.openai.com/v1")
+	defaultModel := getEnv("OAI_MODEL", "oss-gpt-20b")
+	// API key resolves from env with fallback for compatibility
+	defaultKey := resolveAPIKeyFromEnv()
+
+	flag.StringVar(&cfg.prompt, "prompt", "", "User prompt (required)")
+	// Role input flags
+	// -developer is repeatable; collect via custom sliceVar
+	flag.Var((*stringSliceFlag)(&cfg.developerPrompts), "developer", "Developer message (repeatable)")
+	flag.Var((*stringSliceFlag)(&cfg.developerFiles), "developer-file", "Path to file containing developer message (repeatable; '-' for STDIN)")
+	flag.StringVar(&cfg.systemFile, "system-file", "", "Path to file containing system prompt ('-' for STDIN; mutually exclusive with -system)")
+	flag.StringVar(&cfg.promptFile, "prompt-file", "", "Path to file containing user prompt ('-' for STDIN; mutually exclusive with -prompt)")
+	// Pre-stage system message (optional). Precedence: flag > env > empty. Mutually exclusive with -prep-system-file
+	flag.StringVar(&cfg.prepSystem, "prep-system", "", "Pre-stage system message (env OAI_PREP_SYSTEM; mutually exclusive with -prep-system-file)")
+	flag.StringVar(&cfg.prepSystemFile, "prep-system-file", "", "Path to file containing pre-stage system message ('-' for STDIN; env OAI_PREP_SYSTEM_FILE; mutually exclusive with -prep-system)")
+	flag.StringVar(&cfg.toolsPath, "tools", "", "Path to tools.json (optional)")
+	// State directory (CLI > env > empty). When set, create if missing with 0700.
+	flag.StringVar(&cfg.stateDir, "state-dir", getEnv("AGENTCLI_STATE_DIR", ""), "Directory to persist and restore execution state across runs (env AGENTCLI_STATE_DIR)")
+	// Optional state scope (CLI > env > computed default)
+	flag.StringVar(&cfg.stateScope, "state-scope", getEnv("AGENTCLI_STATE_SCOPE", ""), "Optional scope key to partition saved state (env AGENTCLI_STATE_SCOPE); when empty, a default hash of model|base_url|toolset is used")
+	// Refinement flags
+	flag.BoolVar(&cfg.stateRefine, "state-refine", false, "Refine the loaded state bundle using -state-refine-text or -state-refine-file (requires -state-dir)")
+	flag.StringVar(&cfg.stateRefineText, "state-refine-text", "", "Refinement input text to apply to the loaded state bundle (ignored when -state-refine-file is set; requires -state-dir)")
+	flag.StringVar(&cfg.stateRefineFile, "state-refine-file", "", "Path to file containing refinement input (wins over -state-refine-text; requires -state-dir)")
+	flag.StringVar(&cfg.systemPrompt, "system", defaultSystem, "System prompt")
+	flag.StringVar(&cfg.baseURL, "base-url", defaultBase, "OpenAI-compatible base URL")
+	flag.StringVar(&cfg.apiKey, "api-key", defaultKey, "API key if required (env OAI_API_KEY; falls back to OPENAI_API_KEY)")
+	flag.StringVar(&cfg.model, "model", defaultModel, "Model ID")
+	flag.IntVar(&cfg.maxSteps, "max-steps", 8, "Maximum reasoning/tool steps")
+	// Deprecated global timeout retained as a fallback if the split timeouts are not provided
+	// Accept plain seconds (e.g., 300 => 300s) in addition to Go duration strings.
+	cfg.timeout = 30 * time.Second
+	var globalSet bool
+	flag.Var(durationFlexFlag{dst: &cfg.timeout, set: &globalSet}, "timeout", "[DEPRECATED] Global timeout; use -http-timeout and -tool-timeout")
+	// New split timeouts (default to 0; accept plain seconds or Go duration strings)
+	cfg.httpTimeout = 0
+	cfg.prepHTTPTimeout = 0
+	cfg.toolTimeout = 0
+	var httpSet, toolSet bool
+	var prepHTTPSet bool
+	flag.Var(durationFlexFlag{dst: &cfg.httpTimeout, set: &httpSet}, "http-timeout", "HTTP timeout for chat completions (env OAI_HTTP_TIMEOUT; falls back to -timeout if unset)")
+	flag.Var(durationFlexFlag{dst: &cfg.prepHTTPTimeout, set: &prepHTTPSet}, "prep-http-timeout", "HTTP timeout for pre-stage (env OAI_PREP_HTTP_TIMEOUT; falls back to -http-timeout if unset)")
+	flag.Var(durationFlexFlag{dst: &cfg.toolTimeout, set: &toolSet}, "tool-timeout", "Per-tool timeout (falls back to -timeout if unset)")
+	// Use a flexible float flag to detect whether -temp was explicitly set
+	var tempSet bool
+	var _ flag.Value = (*float64FlexFlag)(nil)
+	(func() {
+		f := &float64FlexFlag{dst: &cfg.temperature, set: &tempSet}
+		// initialize default before registering
+		cfg.temperature = 1.0
+		flag.CommandLine.Var(f, "temp", "Sampling temperature")
+	})()
+
+	// Nucleus sampling (one-knob with temperature). Not yet sent to API; used to gate temperature.
+	flag.Float64Var(&cfg.topP, "top-p", 0, "Nucleus sampling probability mass (conflicts with temperature)")
+	// Pre-stage nucleus sampling (one-knob with temperature for pre-stage)
+	flag.Float64Var(&cfg.prepTopP, "prep-top-p", 0, "Nucleus sampling probability mass for pre-stage (env OAI_PREP_TOP_P; conflicts with -prep-temp)")
+	// Pre-stage explicit temperature override (flag > env OAI_PREP_TEMP > inherit -temp)
+	var prepTempSet bool
+	(func() {
+		cfg.prepTemperature = -1 // sentinel to detect unset
+		f := &float64FlexFlag{dst: &cfg.prepTemperature, set: &prepTempSet}
+		flag.CommandLine.Var(f, "prep-temp", "Pre-stage sampling temperature (env OAI_PREP_TEMP; inherits -temp if unset; conflicts with -prep-top-p)")
+	})()
+	// Pre-stage profile selector (deterministic|general|creative|reasoning)
+	var prepProfileRaw string
+	flag.StringVar(&prepProfileRaw, "prep-profile", "", "Pre-stage prompt profile (deterministic|general|creative|reasoning); sets temperature when supported (conflicts with -prep-top-p)")
+	// Pre-stage explicit overrides
+	flag.StringVar(&cfg.prepModel, "prep-model", "", "Pre-stage model ID (env OAI_PREP_MODEL; inherits -model if unset)")
+	flag.StringVar(&cfg.prepBaseURL, "prep-base-url", "", "Pre-stage base URL (env OAI_PREP_BASE_URL; inherits -base-url if unset)")
+	flag.StringVar(&cfg.prepAPIKey, "prep-api-key", "", "Pre-stage API key (env OAI_PREP_API_KEY; falls back to OAI_API_KEY/OPENAI_API_KEY; inherits -api-key if unset)")
+	flag.IntVar(&cfg.prepHTTPRetries, "prep-http-retries", 0, "Pre-stage HTTP retries (env OAI_PREP_HTTP_RETRIES; inherits -http-retries if unset)")
+	flag.DurationVar(&cfg.prepHTTPBackoff, "prep-http-retry-backoff", 0, "Pre-stage HTTP retry backoff (env OAI_PREP_HTTP_RETRY_BACKOFF; inherits -http-retry-backoff if unset)")
+	// Global HTTP retry knobs: precedence flag > env > default
+	var httpRetriesSet bool
+	(func() {
+		cfg.httpRetries = -1 // sentinel to detect unset
+		f := &intFlexFlag{dst: &cfg.httpRetries, set: &httpRetriesSet}
+		flag.CommandLine.Var(f, "http-retries", "Number of retries for transient HTTP failures (timeouts, 429, 5xx) (env OAI_HTTP_RETRIES; default 2)")
+	})()
+	var httpBackoffSet bool
+	(func() {
+		cfg.httpBackoff = 0 // resolved after parsing
+		f := durationFlexFlag{dst: &cfg.httpBackoff, set: &httpBackoffSet}
+		flag.CommandLine.Var(f, "http-retry-backoff", "Base backoff between HTTP retry attempts (exponential) (env OAI_HTTP_RETRY_BACKOFF; default 500ms)")
+	})()
+	flag.BoolVar(&cfg.debug, "debug", false, "Dump request/response JSON to stderr")
+	flag.BoolVar(&cfg.verbose, "verbose", false, "Also print non-final assistant channels (critic/confidence) to stderr")
+	flag.BoolVar(&cfg.quiet, "quiet", false, "Suppress non-final output; print only final text to stdout")
+	flag.BoolVar(&cfg.prepToolsAllowExternal, "prep-tools-allow-external", false, "Allow pre-stage to execute external tools from -tools; when false, pre-stage is limited to built-in read-only tools")
+	flag.StringVar(&cfg.prepToolsPath, "prep-tools", "", "Path to pre-stage tools.json (optional; used only with -prep-tools-allow-external)")
+	flag.BoolVar(&cfg.prepCacheBust, "prep-cache-bust", false, "Skip pre-stage cache and force recompute")
+	// Enabled by default; user can disable to skip pre-stage entirely. Track if explicitly set.
+	cfg.prepEnabled = true
+	flag.CommandLine.Var(&boolFlexFlag{dst: &cfg.prepEnabled, set: &cfg.prepEnabledSet}, "prep-enabled", "Enable pre-stage processing (default true; when false, skip pre-stage and proceed directly to main call)")
+	// Message viewing flags
+	flag.BoolVar(&cfg.prepDryRun, "prep-dry-run", false, "Run pre-stage only, print refined Harmony messages to stdout, and exit 0")
+	flag.BoolVar(&cfg.printMessages, "print-messages", false, "Pretty-print the final merged message array to stderr before the main call")
+	flag.BoolVar(&cfg.streamFinal, "stream-final", false, "If server supports streaming, stream only assistant{channel:\"final\"} to stdout; buffer other channels for -verbose")
+	// Custom channel routing (repeatable): -channel-route name=stdout|stderr|omit
+	flag.Var((*stringSliceFlag)(&cfg.channelRoutePairs), "channel-route", "Route assistant channels (final|critic|confidence) to stdout|stderr|omit; repeatable, e.g., -channel-route critic=stdout")
+	// Save/load refined messages
+	flag.StringVar(&cfg.saveMessagesPath, "save-messages", "", "Write the final merged Harmony messages to the given JSON file and continue")
+	flag.StringVar(&cfg.loadMessagesPath, "load-messages", "", "Bypass pre-stage and prompt; load Harmony messages from the given JSON file (validator-checked)")
+	flag.BoolVar(&cfg.capabilities, "capabilities", false, "Print enabled tools and exit")
+	flag.BoolVar(&cfg.printConfig, "print-config", false, "Print resolved config and exit")
+	// Global dry-run for state persistence planning (no disk writes)
+	flag.BoolVar(&cfg.dryRun, "dry-run", false, "Print intended state actions (restore/refine/save) and exit without writing state")
+	// Image API flags
+	flag.StringVar(&cfg.imageBaseURL, "image-base-url", "", "Image API base URL (env OAI_IMAGE_BASE_URL; inherits -base-url if unset)")
+	flag.StringVar(&cfg.imageAPIKey, "image-api-key", "", "Image API key (env OAI_IMAGE_API_KEY; inherits -api-key if unset; falls back to OPENAI_API_KEY)")
+	// Image model flag (precedence: flag > env > default)
+	defaultImageModel := getEnv("OAI_IMAGE_MODEL", "gpt-image-1")
+	flag.StringVar(&cfg.imageModel, "image-model", defaultImageModel, "Image model ID (env OAI_IMAGE_MODEL; default gpt-image-1)")
+	// Image HTTP behavior flags
+	// Timeout (duration)
+	var imageHTTPTimeoutSet bool
+	cfg.imageHTTPTimeout = 0
+	flag.Var(durationFlexFlag{dst: &cfg.imageHTTPTimeout, set: &imageHTTPTimeoutSet}, "image-http-timeout", "Image HTTP timeout (env OAI_IMAGE_HTTP_TIMEOUT; inherits -http-timeout if unset)")
+	// Retries (int)
+	var imageHTTPRetriesSet bool
+	cfg.imageHTTPRetries = -1 // sentinel for unset
+	flag.Var(&intFlexFlag{dst: &cfg.imageHTTPRetries, set: &imageHTTPRetriesSet}, "image-http-retries", "Image HTTP retries (env OAI_IMAGE_HTTP_RETRIES; inherits -http-retries if unset)")
+	// Backoff (duration)
+	var imageHTTPBackoffSet bool
+	cfg.imageHTTPBackoff = 0
+	flag.Var(durationFlexFlag{dst: &cfg.imageHTTPBackoff, set: &imageHTTPBackoffSet}, "image-http-retry-backoff", "Image HTTP retry backoff (env OAI_IMAGE_HTTP_RETRY_BACKOFF; inherits -http-retry-backoff if unset)")
+	// Image parameter pass-through flags (precedence: flag > env > default)
+	// -image-n
+	cfg.imageN = -1 // sentinel for unset
+	var imageNSet bool
+	flag.Var(&intFlexFlag{dst: &cfg.imageN, set: &imageNSet}, "image-n", "Number of images to generate (env OAI_IMAGE_N; default 1)")
+	// -image-size
+	flag.StringVar(&cfg.imageSize, "image-size", "", "Image size WxH, e.g., 1024x1024 (env OAI_IMAGE_SIZE; default 1024x1024)")
+	// -image-quality
+	flag.StringVar(&cfg.imageQuality, "image-quality", "", "Image quality: standard|hd (env OAI_IMAGE_QUALITY; default standard)")
+	// -image-style
+	flag.StringVar(&cfg.imageStyle, "image-style", "", "Image style: natural|vivid (env OAI_IMAGE_STYLE; default natural)")
+	// -image-response-format
+	flag.StringVar(&cfg.imageResponseFormat, "image-response-format", "", "Image response format: url|b64_json (env OAI_IMAGE_RESPONSE_FORMAT; default url)")
+	// -image-transparent-background
+	flag.CommandLine.Var(&boolFlexFlag{dst: &cfg.imageTransparentBackground}, "image-transparent-background", "Request transparent background when supported (env OAI_IMAGE_TRANSPARENT_BACKGROUND; default false)")
+	ignoreError(flag.CommandLine.Parse(os.Args[1:]))
+	if strings.TrimSpace(prepProfileRaw) != "" {
+		cfg.prepProfile = oai.PromptProfile(strings.TrimSpace(prepProfileRaw))
+	}
+
+	// Apply env precedence for pre-stage system fields when flags unset
+	if strings.TrimSpace(cfg.prepSystem) == "" {
+		if v := strings.TrimSpace(os.Getenv("OAI_PREP_SYSTEM")); v != "" {
+			cfg.prepSystem = v
+		}
+	}
+	if strings.TrimSpace(cfg.prepSystemFile) == "" {
+		if v := strings.TrimSpace(os.Getenv("OAI_PREP_SYSTEM_FILE")); v != "" {
+			cfg.prepSystemFile = v
+		}
+	}
+
+	// Resolve temperature precedence: flag > env (LLM_TEMPERATURE) > config file (not implemented) > default 1.0
+	if tempSet {
+		cfg.temperatureSource = "flag"
+	} else {
+		if v := strings.TrimSpace(os.Getenv("LLM_TEMPERATURE")); v != "" {
+			if parsed, err := strconv.ParseFloat(v, 64); err == nil {
+				cfg.temperature = parsed
+				cfg.temperatureSource = "env"
+			}
+		}
+		// Config file precedence placeholder: no-op (no config file mechanism yet)
+		if cfg.temperatureSource == "" {
+			cfg.temperatureSource = "default"
+		}
+	}
+
+	// Resolve pre-stage top_p from env when not set via flag
+	var prepTopPFromEnv bool
+	if cfg.prepTopP <= 0 {
+		if v := strings.TrimSpace(os.Getenv("OAI_PREP_TOP_P")); v != "" {
+			if parsed, err := strconv.ParseFloat(v, 64); err == nil && parsed > 0 {
+				cfg.prepTopP = parsed
+				prepTopPFromEnv = true
+			}
+		}
+	}
+
+	// Resolve pre-stage temperature precedence: flag > env > inherit from -temp
+	if prepTempSet {
+		cfg.prepTemperatureSource = "flag"
+	} else if v := strings.TrimSpace(os.Getenv("OAI_PREP_TEMP")); v != "" {
+		if parsed, err := strconv.ParseFloat(v, 64); err == nil {
+			cfg.prepTemperature = parsed
+			cfg.prepTemperatureSource = "env"
+		}
+	}
+	if cfg.prepTemperature < 0 { // still unset
+		cfg.prepTemperature = cfg.temperature
+		cfg.prepTemperatureSource = "inherit"
+	}
+
+	// Resolve split timeouts with precedence: flag > env (HTTP only) > legacy -timeout > sane default
+	// HTTP timeout: env OAI_HTTP_TIMEOUT supported
+	httpEnvUsed := false
+	if cfg.httpTimeout <= 0 {
+		if v := strings.TrimSpace(os.Getenv("OAI_HTTP_TIMEOUT")); v != "" {
+			if d, err := parseDurationFlexible(v); err == nil && d > 0 {
+				cfg.httpTimeout = d
+				httpEnvUsed = true
+			}
+		}
+	}
+	if cfg.httpTimeout <= 0 {
+		if cfg.timeout > 0 {
+			cfg.httpTimeout = cfg.timeout
+		} else {
+			cfg.httpTimeout = 90 * time.Second // sane default between 60–120s
+		}
+	}
+
+	// Pre-stage HTTP timeout: precedence flag > env OAI_PREP_HTTP_TIMEOUT > http-timeout > default
+	prepEnvUsed := false
+	if cfg.prepHTTPTimeout <= 0 {
+		if v := strings.TrimSpace(os.Getenv("OAI_PREP_HTTP_TIMEOUT")); v != "" {
+			if d, err := parseDurationFlexible(v); err == nil && d > 0 {
+				cfg.prepHTTPTimeout = d
+				prepEnvUsed = true
+			}
+		}
+	}
+	if cfg.prepHTTPTimeout <= 0 {
+		if cfg.httpTimeout > 0 {
+			cfg.prepHTTPTimeout = cfg.httpTimeout
+		} else {
+			cfg.prepHTTPTimeout = 90 * time.Second
+		}
+	}
+
+	// Tool timeout: no env per checklist; fallback to legacy -timeout or 30s default
+	if cfg.toolTimeout <= 0 {
+		if cfg.timeout > 0 {
+			cfg.toolTimeout = cfg.timeout
+		} else {
+			cfg.toolTimeout = 30 * time.Second
+		}
+	}
+
+	// Resolve global HTTP retry knobs using centralized helpers
+	// http-retries: flag > env > default(2)
+	{
+		resolved, _ := oai.ResolveInt(httpRetriesSet, cfg.httpRetries, os.Getenv("OAI_HTTP_RETRIES"), nil, 2)
+		cfg.httpRetries = resolved
+	}
+	// http-retry-backoff: flag > env > default(500ms)
+	{
+		resolved, _ := oai.ResolveDuration(httpBackoffSet, cfg.httpBackoff, os.Getenv("OAI_HTTP_RETRY_BACKOFF"), nil, 500*time.Millisecond)
+		cfg.httpBackoff = resolved
+	}
+
+	// Resolve prep overrides precedence: flag > env OAI_PREP_* > inherit main-call
+	// Model
+	if strings.TrimSpace(cfg.prepModel) != "" {
+		cfg.prepModelSource = "flag"
+	} else if v := strings.TrimSpace(os.Getenv("OAI_PREP_MODEL")); v != "" {
+		cfg.prepModel = v
+		cfg.prepModelSource = "env"
+	} else {
+		cfg.prepModel = cfg.model
+		cfg.prepModelSource = "inherit"
+	}
+	// Base URL
+	if strings.TrimSpace(cfg.prepBaseURL) != "" {
+		cfg.prepBaseURLSource = "flag"
+	} else if v := strings.TrimSpace(os.Getenv("OAI_PREP_BASE_URL")); v != "" {
+		cfg.prepBaseURL = v
+		cfg.prepBaseURLSource = "env"
+	} else {
+		cfg.prepBaseURL = cfg.baseURL
+		cfg.prepBaseURLSource = "inherit"
+	}
+	// API key
+	if strings.TrimSpace(cfg.prepAPIKey) != "" {
+		cfg.prepAPIKeySource = "flag"
+	} else if v := strings.TrimSpace(os.Getenv("OAI_PREP_API_KEY")); v != "" {
+		cfg.prepAPIKey = v
+		cfg.prepAPIKeySource = "env:OAI_PREP_API_KEY"
+	} else if v := strings.TrimSpace(os.Getenv("OAI_API_KEY")); v != "" {
+		cfg.prepAPIKey = v
+		cfg.prepAPIKeySource = "env:OAI_API_KEY"
+	} else if v := strings.TrimSpace(os.Getenv("OPENAI_API_KEY")); v != "" {
+		cfg.prepAPIKey = v
+		cfg.prepAPIKeySource = "env:OPENAI_API_KEY"
+	} else {
+		cfg.prepAPIKey = cfg.apiKey
+		if strings.TrimSpace(cfg.apiKey) != "" {
+			cfg.prepAPIKeySource = "inherit"
+		} else {
+			cfg.prepAPIKeySource = "empty"
+		}
+	}
+	// HTTP retries
+	if cfg.prepHTTPRetries > 0 {
+		cfg.prepHTTPRetriesSource = "flag"
+	} else if v := strings.TrimSpace(os.Getenv("OAI_PREP_HTTP_RETRIES")); v != "" {
+		if n, err := strconv.Atoi(v); err == nil && n >= 0 {
+			cfg.prepHTTPRetries = n
+			cfg.prepHTTPRetriesSource = "env"
+		}
+	}
+	if cfg.prepHTTPRetries == 0 {
+		cfg.prepHTTPRetries = cfg.httpRetries
+		if cfg.prepHTTPRetriesSource == "" {
+			cfg.prepHTTPRetriesSource = "inherit"
+		}
+	}
+	// HTTP retry backoff
+	if cfg.prepHTTPBackoff > 0 {
+		cfg.prepHTTPBackoffSource = "flag"
+	} else if v := strings.TrimSpace(os.Getenv("OAI_PREP_HTTP_RETRY_BACKOFF")); v != "" {
+		if d, err := parseDurationFlexible(v); err == nil && d > 0 {
+			cfg.prepHTTPBackoff = d
+			cfg.prepHTTPBackoffSource = "env"
+		}
+	}
+	if cfg.prepHTTPBackoff == 0 {
+		cfg.prepHTTPBackoff = cfg.httpBackoff
+		if cfg.prepHTTPBackoffSource == "" {
+			cfg.prepHTTPBackoffSource = "inherit"
+		}
+	}
+
+	// Resolve image config using helper (flag > env > inherit > fallback)
+	if img, baseSrc, keySrc := oai.ResolveImageConfig(cfg.imageBaseURL, cfg.imageAPIKey, cfg.baseURL, cfg.apiKey); true {
+		cfg.imageBaseURL = img.BaseURL
+		cfg.imageAPIKey = img.APIKey
+		cfg.imageBaseURLSource = baseSrc
+		cfg.imageAPIKeySource = keySrc
+	}
+
+	// Resolve image HTTP knobs using centralized helpers with inheritance from main HTTP knobs
+	// Timeout: flag > env > inherit(http-timeout) > default(unused)
+	{
+		inherit := cfg.httpTimeout
+		resolved, src := oai.ResolveDuration(imageHTTPTimeoutSet, cfg.imageHTTPTimeout, os.Getenv("OAI_IMAGE_HTTP_TIMEOUT"), &inherit, cfg.httpTimeout)
+		cfg.imageHTTPTimeout = resolved
+		cfg.imageHTTPTimeoutSource = src
+		if cfg.imageHTTPTimeout <= 0 && src == "inherit" {
+			// Ensure a positive inherited timeout
+			cfg.imageHTTPTimeout = cfg.httpTimeout
+		}
+	}
+	// Retries: flag > env > inherit(http-retries) > default(unused)
+	{
+		inherit := cfg.httpRetries
+		resolved, src := oai.ResolveInt(imageHTTPRetriesSet, cfg.imageHTTPRetries, os.Getenv("OAI_IMAGE_HTTP_RETRIES"), &inherit, cfg.httpRetries)
+		cfg.imageHTTPRetries = resolved
+		cfg.imageHTTPRetriesSource = src
+		if cfg.imageHTTPRetries < 0 && src == "inherit" {
+			cfg.imageHTTPRetries = cfg.httpRetries
+		}
+	}
+	// Backoff: flag > env > inherit(http-retry-backoff) > default(unused)
+	{
+		inherit := cfg.httpBackoff
+		resolved, src := oai.ResolveDuration(imageHTTPBackoffSet, cfg.imageHTTPBackoff, os.Getenv("OAI_IMAGE_HTTP_RETRY_BACKOFF"), &inherit, cfg.httpBackoff)
+		cfg.imageHTTPBackoff = resolved
+		cfg.imageHTTPBackoffSource = src
+		if cfg.imageHTTPBackoff == 0 && src == "inherit" {
+			cfg.imageHTTPBackoff = cfg.httpBackoff
+		}
+	}
+
+	// Resolve image parameter pass-throughs with precedence: flag > env > default
+	if !imageNSet {
+		if v := strings.TrimSpace(os.Getenv("OAI_IMAGE_N")); v != "" {
+			if n, err := strconv.Atoi(v); err == nil && n >= 1 {
+				cfg.imageN = n
+			}
+		}
+	}
+	if cfg.imageN < 0 {
+		cfg.imageN = 1
+	}
+	if strings.TrimSpace(cfg.imageSize) == "" {
+		if v := strings.TrimSpace(os.Getenv("OAI_IMAGE_SIZE")); v != "" {
+			cfg.imageSize = v
+		}
+	}
+	if strings.TrimSpace(cfg.imageSize) == "" {
+		cfg.imageSize = "1024x1024"
+	}
+	if strings.TrimSpace(cfg.imageQuality) == "" {
+		if v := strings.TrimSpace(os.Getenv("OAI_IMAGE_QUALITY")); v != "" {
+			cfg.imageQuality = v
+		}
+	}
+	if strings.TrimSpace(cfg.imageQuality) == "" {
+		cfg.imageQuality = "standard"
+	}
+	if strings.TrimSpace(cfg.imageStyle) == "" {
+		if v := strings.TrimSpace(os.Getenv("OAI_IMAGE_STYLE")); v != "" {
+			cfg.imageStyle = v
+		}
+	}
+	if strings.TrimSpace(cfg.imageStyle) == "" {
+		cfg.imageStyle = "natural"
+	}
+	if strings.TrimSpace(cfg.imageResponseFormat) == "" {
+		if v := strings.TrimSpace(os.Getenv("OAI_IMAGE_RESPONSE_FORMAT")); v != "" {
+			cfg.imageResponseFormat = v
+		}
+	}
+	if strings.TrimSpace(cfg.imageResponseFormat) == "" {
+		cfg.imageResponseFormat = "url"
+	}
+	// Transparent background flag from env if flag not explicitly set
+	if !cfg.imageTransparentBackground {
+		if v := strings.TrimSpace(os.Getenv("OAI_IMAGE_TRANSPARENT_BACKGROUND")); v != "" {
+			if b, err := strconv.ParseBool(v); err == nil {
+				cfg.imageTransparentBackground = b
+			}
+		}
+	}
+
+	// Set source labels
+	if httpSet {
+		cfg.httpTimeoutSource = "flag"
+	} else if httpEnvUsed {
+		cfg.httpTimeoutSource = "env"
+	} else {
+		cfg.httpTimeoutSource = "default"
+	}
+	if prepHTTPSet {
+		cfg.prepHTTPTimeoutSource = "flag"
+	} else if prepEnvUsed {
+		cfg.prepHTTPTimeoutSource = "env"
+	} else {
+		// inherits http-timeout or default
+		cfg.prepHTTPTimeoutSource = "inherit"
+	}
+	if toolSet {
+		cfg.toolTimeoutSource = "flag"
+	} else {
+		cfg.toolTimeoutSource = "default"
+	}
+	if globalSet {
+		cfg.globalTimeoutSource = "flag"
+	} else {
+		cfg.globalTimeoutSource = "default"
+	}
+
+	// Enforce mutual exclusion and required prompt presence (unless print-only modes)
+	if strings.TrimSpace(cfg.systemFile) != "" && strings.TrimSpace(cfg.systemPrompt) != "" && cfg.systemPrompt != defaultSystem {
+		// Both -system and -system-file provided (with -system not defaulted)
+		cfg.parseError = "error: -system and -system-file are mutually exclusive"
+		return cfg, 2
+	}
+	// Mutual exclusion for pre-stage system inputs
+	if strings.TrimSpace(cfg.prepSystem) != "" && strings.TrimSpace(cfg.prepSystemFile) != "" {
+		cfg.parseError = "error: -prep-system and -prep-system-file are mutually exclusive"
+		return cfg, 2
+	}
+	if strings.TrimSpace(cfg.promptFile) != "" && strings.TrimSpace(cfg.prompt) != "" {
+		cfg.parseError = "error: -prompt and -prompt-file are mutually exclusive"
+		return cfg, 2
+	}
+	if !cfg.capabilities && !cfg.printConfig {
+		// Resolve effective prompt presence considering -prompt-file
+		if strings.TrimSpace(cfg.loadMessagesPath) == "" && strings.TrimSpace(cfg.prompt) == "" && strings.TrimSpace(cfg.promptFile) == "" {
+			return cfg, 2
+		}
+	}
+	// Parse channel-route pairs and validate
+	if len(cfg.channelRoutePairs) > 0 {
+		cfg.channelRoutes = make(map[string]string)
+		for _, pair := range cfg.channelRoutePairs {
+			p := strings.TrimSpace(pair)
+			if p == "" {
+				continue
+			}
+			eq := strings.IndexByte(p, '=')
+			if eq <= 0 || eq >= len(p)-1 {
+				cfg.parseError = "error: invalid -channel-route value (expected name=stdout|stderr|omit)"
+				return cfg, 2
+			}
+			name := strings.TrimSpace(p[:eq])
+			dest := strings.TrimSpace(p[eq+1:])
+			switch name {
+			case "final", "critic", "confidence":
+				// ok
+			default:
+				cfg.parseError = fmt.Sprintf("error: invalid -channel-route channel %q (allowed: final, critic, confidence)", name)
+				return cfg, 2
+			}
+			switch dest {
+			case "stdout", "stderr", "omit":
+				// ok
+			default:
+				cfg.parseError = fmt.Sprintf("error: invalid -channel-route destination %q (allowed: stdout, stderr, omit)", dest)
+				return cfg, 2
+			}
+			cfg.channelRoutes[name] = dest
+		}
+	}
+
+	// Conflict checks for save/load flags
+	if strings.TrimSpace(cfg.saveMessagesPath) != "" && strings.TrimSpace(cfg.loadMessagesPath) != "" {
+		cfg.parseError = "error: -save-messages and -load-messages are mutually exclusive"
+		return cfg, 2
+	}
+	if strings.TrimSpace(cfg.loadMessagesPath) != "" {
+		// Loading messages conflicts with providing -prompt or -prompt-file
+		if strings.TrimSpace(cfg.prompt) != "" || strings.TrimSpace(cfg.promptFile) != "" {
+			cfg.parseError = "error: -load-messages cannot be combined with -prompt or -prompt-file"
+			return cfg, 2
+		}
+	}
+	// Prep top_p source labeling for config dump
+	if cfg.prepTopP > 0 {
+		if prepTopPFromEnv {
+			cfg.prepTopPSource = "env"
+		} else {
+			cfg.prepTopPSource = "flag"
+		}
+	} else {
+		cfg.prepTopPSource = "inherit"
+	}
+	// Normalize/expand state-dir and create with 0700 if set
+	if s := strings.TrimSpace(cfg.stateDir); s != "" {
+		// Expand leading ~ to the user's home directory
+		if strings.HasPrefix(s, "~") {
+			if home, err := os.UserHomeDir(); err == nil {
+				s = filepath.Join(home, strings.TrimPrefix(s, "~"))
+			}
+		}
+		// Clean path and ensure it's absolute or relative within cwd; no wildcards
+		s = filepath.Clean(s)
+		// Create directory tree with 0700, respecting umask
+		if err := os.MkdirAll(s, 0o700); err != nil {
+			cfg.parseError = fmt.Sprintf("error: creating -state-dir %q: %v", s, err)
+			return cfg, 2
+		}
+		cfg.stateDir = s
+	}
+	// Resolve state scope: when empty, compute default from model|base_url|toolset_hash
+	if strings.TrimSpace(cfg.stateScope) == "" {
+		// Compute toolset hash from manifest if provided; empty string when no tools
+		toolsetHash := computeToolsetHash(strings.TrimSpace(cfg.toolsPath))
+		cfg.stateScope = computeDefaultStateScope(strings.TrimSpace(cfg.model), strings.TrimSpace(cfg.baseURL), toolsetHash)
+	}
+	// Validate refinement usage: any refine flags require -state-dir
+	if cfg.stateRefine || strings.TrimSpace(cfg.stateRefineText) != "" || strings.TrimSpace(cfg.stateRefineFile) != "" {
+		if strings.TrimSpace(cfg.stateDir) == "" {
+			cfg.parseError = "error: state refinement (-state-refine, -state-refine-text, -state-refine-file) requires -state-dir to be set"
+			return cfg, 2
+		}
+	}
+	return cfg, 0
+}
+
+func main() {
+	os.Exit(cliMain(os.Args[1:], os.Stdout, os.Stderr))
+}
+
+// cliMain is a testable entrypoint for the CLI. It accepts argv (excluding program name)
+// and writers for stdout/stderr, returns the intended process exit code, and performs
+// no global side effects beyond temporarily setting os.Args for flag parsing.
+func cliMain(args []string, stdout io.Writer, stderr io.Writer) int {
+	// Handle help flags prior to any parsing/validation or side effects
+	if helpRequested(args) {
+		printUsage(stdout)
+		return 0
+	}
+	// Handle version flags prior to parsing/validation
+	if versionRequested(args) {
+		printVersion(stdout)
+		return 0
+	}
+
+	// Temporarily set os.Args so parseFlags() (which reads os.Args) sees our args
+	origArgs := os.Args
+	os.Args = append([]string{origArgs[0]}, args...)
+	defer func() { os.Args = origArgs }()
+
+	cfg, exitOn := parseFlags()
+	if exitOn != 0 {
+		if strings.TrimSpace(cfg.parseError) != "" {
+			safeFprintln(stderr, cfg.parseError)
+		} else {
+			safeFprintln(stderr, "error: -prompt is required")
+		}
+		// Also print usage synopsis for guidance
+		printUsage(stderr)
+		return exitOn
+	}
+	// Global dry-run: print intended state actions and exit without executing network calls or writing state
+	if cfg.dryRun {
+		return printStateDryRunPlan(cfg, stdout, stderr)
+	}
+	if cfg.printConfig {
+		return printResolvedConfig(cfg, stdout)
+	}
+	if cfg.capabilities {
+		return printCapabilities(cfg, stdout, stderr)
+	}
+	if cfg.prepDryRun {
+		return runPrepDryRun(cfg, stdout, stderr)
+	}
+	return runAgent(cfg, stdout, stderr)
+}
+
+// runAgent executes the non-interactive agent loop and returns a process exit code.
+// nolint:gocyclo // Orchestrates the agent loop; complexity is acceptable and covered by tests.
+func runAgent(cfg cliConfig, stdout io.Writer, stderr io.Writer) int {
+	// Default pre-stage enabled when not explicitly set (covers tests constructing cfg directly)
+	if !cfg.prepEnabledSet {
+		cfg.prepEnabled = true
+	}
+	// Normalize timeouts for backward compatibility when cfg constructed directly in tests
+	if cfg.httpTimeout <= 0 {
+		if cfg.timeout > 0 {
+			cfg.httpTimeout = cfg.timeout
+		} else {
+			cfg.httpTimeout = 90 * time.Second
+		}
+	}
+	// Emit effective timeout sources under -debug (after normalization)
+	if cfg.debug {
+		safeFprintf(stderr, "effective timeouts: http-timeout=%s source=%s; prep-http-timeout=%s source=%s; tool-timeout=%s source=%s; timeout=%s source=%s\n",
+			cfg.httpTimeout.String(), cfg.httpTimeoutSource,
+			cfg.prepHTTPTimeout.String(), cfg.prepHTTPTimeoutSource,
+			cfg.toolTimeout.String(), cfg.toolTimeoutSource,
+			cfg.timeout.String(), cfg.globalTimeoutSource,
+		)
+	}
+	if cfg.toolTimeout <= 0 {
+		if cfg.timeout > 0 {
+			cfg.toolTimeout = cfg.timeout
+		} else {
+			cfg.toolTimeout = 30 * time.Second
+		}
+	}
+	// Load tools manifest if provided
+	var (
+		toolRegistry map[string]tools.ToolSpec
+		oaiTools     []oai.Tool
+	)
+	var err error
+	if strings.TrimSpace(cfg.toolsPath) != "" {
+		toolRegistry, oaiTools, err = tools.LoadManifest(cfg.toolsPath)
+		if err != nil {
+			safeFprintf(stderr, "error: failed to load tools manifest: %v\n", err)
+			return 1
+		}
+		// Validate each configured tool is available on this system before proceeding
+		for name, spec := range toolRegistry {
+			if len(spec.Command) == 0 {
+				safeFprintf(stderr, "error: configured tool %q has no command\n", name)
+				return 1
+			}
+			if _, lookErr := exec.LookPath(spec.Command[0]); lookErr != nil {
+				safeFprintf(stderr, "error: configured tool %q is unavailable: %v (program %q)\n", name, lookErr, spec.Command[0])
+				return 1
+			}
+		}
+	}
+
+	// Configure HTTP client with retry policy
+	httpClient := oai.NewClientWithRetry(cfg.baseURL, cfg.apiKey, cfg.httpTimeout, oai.RetryPolicy{MaxRetries: cfg.httpRetries, Backoff: cfg.httpBackoff})
+
+	var messages []oai.Message
+	if strings.TrimSpace(cfg.loadMessagesPath) != "" {
+		// Load messages from JSON file and validate
+		data, rerr := os.ReadFile(strings.TrimSpace(cfg.loadMessagesPath))
+		if rerr != nil {
+			safeFprintf(stderr, "error: read load-messages file: %v\n", rerr)
+			return 2
+		}
+		msgs, imgPrompt, err := parseSavedMessages(data)
+		if err != nil {
+			safeFprintf(stderr, "error: parse load-messages JSON: %v\n", err)
+			return 2
+		}
+		messages = msgs
+		if strings.TrimSpace(cfg.imagePrompt) == "" && strings.TrimSpace(imgPrompt) != "" {
+			cfg.imagePrompt = strings.TrimSpace(imgPrompt)
+		}
+		if err := oai.ValidateMessageSequence(messages); err != nil {
+			safeFprintf(stderr, "error: invalid loaded message sequence: %v\n", err)
+			return 2
+		}
+	} else if len(cfg.initMessages) > 0 {
+		// Use injected messages (tests only)
+		messages = cfg.initMessages
+	} else {
+		// Resolve role contents from flags/files
+		sys, sysErr := resolveMaybeFile(cfg.systemPrompt, cfg.systemFile)
+		if sysErr != nil {
+			safeFprintf(stderr, "error: %v\n", sysErr)
+			return 2
+		}
+		prm, prmErr := resolveMaybeFile(cfg.prompt, cfg.promptFile)
+		if prmErr != nil {
+			safeFprintf(stderr, "error: %v\n", prmErr)
+			return 2
+		}
+		devs, devErr := resolveDeveloperMessages(cfg.developerPrompts, cfg.developerFiles)
+		if devErr != nil {
+			safeFprintf(stderr, "error: %v\n", devErr)
+			return 2
+		}
+		// Build messages honoring precedence:
+		// System: CLI -system (if provided) else -system-file else default
+		// Developer: CLI -developer / -developer-file (all, in provided order)
+		// User: CLI -prompt or -prompt-file
+		var seed []oai.Message
+		seed = append(seed, oai.Message{Role: oai.RoleSystem, Content: sys})
+		for _, d := range devs {
+			if s := strings.TrimSpace(d); s != "" {
+				seed = append(seed, oai.Message{Role: oai.RoleDeveloper, Content: s})
+			}
+		}
+		seed = append(seed, oai.Message{Role: oai.RoleUser, Content: prm})
+		messages = seed
+	}
+
+	// Loop with per-request timeouts so multi-step tool calls have full budget each time.
+	warnedOneKnob := false
+	// Enforce a hard ceiling of 15 steps regardless of the provided value.
+	effectiveMaxSteps := cfg.maxSteps
+	if effectiveMaxSteps > 15 {
+		effectiveMaxSteps = 15
+	}
+	// Pre-stage: perform a preparatory chat call and append any pre-stage tool outputs
+	// to the transcript before entering the main loop. Behavior is additive only.
+	// nolint below: ignore returned error intentionally to fail-open on pre-stage
+	_ = func() error { //nolint:errcheck
+		// Skip entirely when disabled or when tests inject initMessages
+		if !cfg.prepEnabled || len(cfg.initMessages) > 0 || strings.TrimSpace(cfg.loadMessagesPath) != "" {
+			return nil
+		}
+		// Execute pre-stage and update messages if any tool outputs were produced
+		out, err := runPreStage(cfg, messages, stderr)
+		if err != nil {
+			// Fail-open: log one concise WARN and proceed with original messages
+			safeFprintf(stderr, "WARN: pre-stage failed; skipping (reason: %s)\n", oneLine(err.Error()))
+			return nil
+		}
+		messages = out
+		return nil
+	}()
+
+	// Optional: pretty-print the final merged messages prior to the main call
+	if cfg.printMessages {
+		// Print a wrapper that includes metadata but omits any sensitive keys
+		if b, err := json.MarshalIndent(buildMessagesWrapper(messages, strings.TrimSpace(cfg.imagePrompt)), "", "  "); err == nil {
+			safeFprintln(stderr, string(b))
+		}
+	}
+
+	// Optional: save the final merged messages to a JSON file before main call
+	if strings.TrimSpace(cfg.saveMessagesPath) != "" {
+		if err := writeSavedMessages(strings.TrimSpace(cfg.saveMessagesPath), messages, strings.TrimSpace(cfg.imagePrompt)); err != nil {
+			safeFprintf(stderr, "error: write save-messages file: %v\n", err)
+			return 2
+		}
+	}
+
+	var step int
+	for step = 0; step < effectiveMaxSteps; step++ {
+		// completionCap governs optional MaxTokens on the request. It defaults to 0
+		// (omitted) and will be adjusted by length backoff logic.
+		completionCap := 0
+		retriedForLength := false
+
+		// Perform at most one in-step retry when finish_reason=="length".
+		for {
+			// Apply transcript hygiene before sending to the API when -debug is off
+			hygienic := applyTranscriptHygiene(messages, cfg.debug)
+			req := oai.ChatCompletionsRequest{
+				Model:    cfg.model,
+				Messages: hygienic,
+			}
+			// One-knob rule: if -top-p is set, set top_p and omit temperature; warn once.
+			if cfg.topP > 0 {
+				// Set top_p in the request payload
+				topP := cfg.topP
+				req.TopP = &topP
+				if !warnedOneKnob {
+					safeFprintln(stderr, "warning: -top-p is set; omitting temperature per one-knob rule")
+					warnedOneKnob = true
+				}
+			} else {
+				// Include temperature only when supported by the target model.
+				if oai.SupportsTemperature(cfg.model) {
+					req.Temperature = &cfg.temperature
+				}
+			}
+			if len(oaiTools) > 0 {
+				req.Tools = oaiTools
+				req.ToolChoice = "auto"
+			}
+
+			// Include MaxTokens only when a positive completionCap is set.
+			if completionCap > 0 {
+				req.MaxTokens = completionCap
+			}
+
+			// Pre-flight validate message sequence to avoid API 400s for stray tool messages
+			if err := oai.ValidateMessageSequence(req.Messages); err != nil {
+				safeFprintf(stderr, "error: %v\n", err)
+				return 1
+			}
+
+			// Request debug dump (no human-readable output precedes requests)
+			dumpJSONIfDebug(stderr, fmt.Sprintf("chat.request step=%d", step+1), req, cfg.debug)
+
+			// Per-call context
+			callCtx, cancel := context.WithTimeout(context.Background(), cfg.httpTimeout)
+			// Attempt streaming first when enabled; on unsupported, fall back
+			if cfg.streamFinal {
+				var streamedFinal strings.Builder
+				type buffered struct{ channel, content string }
+				var bufferedNonFinal []buffered
+				streamErr := httpClient.StreamChat(callCtx, req, func(chunk oai.StreamChunk) error {
+					// Accumulate only final channel content to stdout progressively; buffer others
+					for _, ch := range chunk.Choices {
+						delta := ch.Delta
+						if strings.TrimSpace(delta.Content) == "" {
+							continue
+						}
+						if strings.TrimSpace(delta.Channel) == "final" || strings.TrimSpace(delta.Channel) == "" {
+							safeFprintf(stdout, "%s", delta.Content)
+							streamedFinal.WriteString(delta.Content)
+						} else {
+							bufferedNonFinal = append(bufferedNonFinal, buffered{channel: strings.TrimSpace(delta.Channel), content: delta.Content})
+						}
+					}
+					return nil
+				})
+				cancel()
+				if streamErr == nil {
+					// Stream finished successfully. Emit newline to finalize stdout.
+					safeFprintln(stdout, "")
+					if cfg.verbose {
+						for _, b := range bufferedNonFinal {
+							route := resolveChannelRoute(cfg, b.channel, true /*nonFinal*/)
+							switch route {
+							case "stdout":
+								safeFprintln(stdout, strings.TrimSpace(b.content))
+							case "stderr":
+								safeFprintln(stderr, strings.TrimSpace(b.content))
+							case "omit":
+								// skip
+							}
+						}
+					}
+					return 0
+				}
+				// If not supported, fall through to non-streaming; otherwise treat as error
+				if !strings.Contains(strings.ToLower(streamErr.Error()), "does not support streaming") {
+					src := cfg.httpTimeoutSource
+					if src == "" {
+						src = "default"
+					}
+					safeFprintf(stderr, "error: chat call failed: %v (http-timeout source=%s)\n", streamErr, src)
+					return 1
+				}
+				// Reset context for fallback after streaming attempt
+				callCtx, cancel = context.WithTimeout(context.Background(), cfg.httpTimeout)
+			} else {
+				cancel()
+				// Reset context for non-streaming path when streaming disabled
+				callCtx, cancel = context.WithTimeout(context.Background(), cfg.httpTimeout)
+			}
+
+			// Fallback: non-streaming request
+			resp, err := httpClient.CreateChatCompletion(callCtx, req)
+			cancel()
+			if err != nil {
+				src := cfg.httpTimeoutSource
+				if src == "" {
+					src = "default"
+				}
+				safeFprintf(stderr, "error: chat call failed: %v (http-timeout source=%s)\n", err, src)
+				return 1
+			}
+			if len(resp.Choices) == 0 {
+				safeFprintln(stderr, "error: chat response has no choices")
+				return 1
+			}
+
+			choice := resp.Choices[0]
+
+			// Length backoff: one-time in-step retry doubling the completion cap (min 256)
+			if strings.TrimSpace(choice.FinishReason) == "length" && !retriedForLength {
+				prev := completionCap
+				// Compute next cap: max(256, completionCap*2)
+				if completionCap <= 0 {
+					completionCap = 256
+				} else {
+					// Double with safe lower bound
+					next := completionCap * 2
+					if next < 256 {
+						next = 256
+					}
+					completionCap = next
+				}
+				// Clamp to remaining context window before resending
+				window := oai.ContextWindowForModel(cfg.model)
+				estimated := oai.EstimateTokens(messages)
+				completionCap = oai.ClampCompletionCap(messages, completionCap, window)
+				// Emit audit entry describing the backoff decision
+				oai.LogLengthBackoff(cfg.model, prev, completionCap, window, estimated)
+				retriedForLength = true
+				// Re-send within the same agent step without appending any messages yet
+				continue
+			}
+
+			msg := choice.Message
+			// Under -verbose, if the assistant returns a non-final channel, print immediately respecting routing.
+			if cfg.verbose && msg.Role == oai.RoleAssistant {
+				ch := strings.TrimSpace(msg.Channel)
+				if ch != "final" && strings.TrimSpace(msg.Content) != "" {
+					route := resolveChannelRoute(cfg, ch, true /*nonFinal*/)
+					switch route {
+					case "stdout":
+						safeFprintln(stdout, strings.TrimSpace(msg.Content))
+					case "stderr":
+						safeFprintln(stderr, strings.TrimSpace(msg.Content))
+					case "omit":
+						// skip
+					}
+				}
+			}
+
+			// If the model returned tool calls and we have a registry, first append
+			// the assistant message that carries tool_calls to preserve correct
+			// sequencing (assistant -> tool messages -> assistant). Then append the
+			// corresponding tool messages and continue the loop for the next turn.
+			if len(msg.ToolCalls) > 0 && len(toolRegistry) > 0 {
+				messages = append(messages, msg)
+				messages = appendToolCallOutputs(messages, msg, toolRegistry, cfg)
+				// Continue outer loop for another assistant response using appended tool outputs
+				break
+			}
+
+			// If the model returned assistant content, handle channel-aware routing
+			if msg.Role == oai.RoleAssistant && strings.TrimSpace(msg.Content) != "" {
+				// Respect channel-aware printing: only print channel=="final" to stdout by default.
+				ch := strings.TrimSpace(msg.Channel)
+				if ch == "final" || ch == "" {
+					// Determine destination per routing; default final->stdout
+					dest := resolveChannelRoute(cfg, "final", false /*nonFinal*/)
+					switch dest {
+					case "stdout":
+						safeFprintln(stdout, strings.TrimSpace(msg.Content))
+					case "stderr":
+						safeFprintln(stderr, strings.TrimSpace(msg.Content))
+					case "omit":
+						// do not print
+					}
+					// Dump debug response JSON after human-readable output, then exit
+					dumpJSONIfDebug(stderr, fmt.Sprintf("chat.response step=%d", step+1), resp, cfg.debug)
+					return 0
+				} else {
+					// Non-final assistant message with content: do not print to stdout by default.
+					// (already printed above under -verbose)
+					// Append and continue loop to get the actual final
+					dumpJSONIfDebug(stderr, fmt.Sprintf("chat.response step=%d", step+1), resp, cfg.debug)
+					messages = append(messages, msg)
+					break
+				}
+			}
+
+			// Otherwise, append message and continue (some models return assistant with empty content and no tools)
+			dumpJSONIfDebug(stderr, fmt.Sprintf("chat.response step=%d", step+1), resp, cfg.debug)
+			messages = append(messages, msg)
+			break
+		}
+	}
+
+	// If we reach here, the loop ended without printing final content.
+	// Distinguish between generic termination and hitting the step cap.
+	if step >= effectiveMaxSteps {
+		safeFprintln(stderr, fmt.Sprintf("info: reached maximum steps (%d); needs human review", effectiveMaxSteps))
+	} else {
+		safeFprintln(stderr, "error: run ended without final assistant content")
+	}
+	return 1
+}
+
+// runPrepDryRun executes only the pre-stage processing (respecting -prep-enabled),
+// prints the refined Harmony messages as pretty JSON to stdout, and exits with code 0 on success.
+// On failure (e.g., pre-stage HTTP error), it prints a concise error to stderr and exits non-zero.
+func runPrepDryRun(cfg cliConfig, stdout io.Writer, stderr io.Writer) int {
+	// Build seed messages honoring the same precedence as in runAgent
+	var messages []oai.Message
+	if len(cfg.initMessages) > 0 {
+		messages = cfg.initMessages
+	} else {
+		sys, sysErr := resolveMaybeFile(cfg.systemPrompt, cfg.systemFile)
+		if sysErr != nil {
+			safeFprintf(stderr, "error: %v\n", sysErr)
+			return 2
+		}
+		prm, prmErr := resolveMaybeFile(cfg.prompt, cfg.promptFile)
+		if prmErr != nil {
+			safeFprintf(stderr, "error: %v\n", prmErr)
+			return 2
+		}
+		devs, devErr := resolveDeveloperMessages(cfg.developerPrompts, cfg.developerFiles)
+		if devErr != nil {
+			safeFprintf(stderr, "error: %v\n", devErr)
+			return 2
+		}
+		var seed []oai.Message
+		seed = append(seed, oai.Message{Role: oai.RoleSystem, Content: sys})
+		for _, d := range devs {
+			if s := strings.TrimSpace(d); s != "" {
+				seed = append(seed, oai.Message{Role: oai.RoleDeveloper, Content: s})
+			}
+		}
+		seed = append(seed, oai.Message{Role: oai.RoleUser, Content: prm})
+		messages = seed
+	}
+	// Execute pre-stage unless disabled or when loading messages; on failure, exit non-zero
+	if cfg.prepEnabled && len(cfg.initMessages) == 0 && strings.TrimSpace(cfg.loadMessagesPath) == "" {
+		if out, err := runPreStage(cfg, messages, stderr); err == nil {
+			messages = out
+		} else {
+			safeFprintf(stderr, "error: pre-stage failed: %v\n", err)
+			return 1
+		}
+	}
+	// Pretty-print refined messages to stdout
+	if b, err := json.MarshalIndent(messages, "", "  "); err == nil {
+		safeFprintln(stdout, string(b))
+		return 0
+	}
+	// Fallback
+	safeFprintln(stdout, "[]")
+	return 0
+}
+
+// appendToolCallOutputs executes assistant-requested tool calls and appends their
+// outputs (or deterministic error JSON) to the conversation messages.
+func appendToolCallOutputs(messages []oai.Message, assistantMsg oai.Message, toolRegistry map[string]tools.ToolSpec, cfg cliConfig) []oai.Message {
+	type result struct {
+		msg oai.Message
+	}
+
+	results := make(chan result, len(assistantMsg.ToolCalls))
+
+	// Launch each tool call concurrently
+	for _, tc := range assistantMsg.ToolCalls {
+		toolCall := tc // capture loop var
+		spec, exists := toolRegistry[toolCall.Function.Name]
+		if !exists {
+			// Unknown tool: synthesize deterministic error JSON
+			go func() {
+				toolErr := map[string]string{"error": fmt.Sprintf("unknown tool: %s", toolCall.Function.Name)}
+				contentBytes, mErr := json.Marshal(toolErr)
+				if mErr != nil {
+					contentBytes = []byte(`{"error":"internal error"}`)
+				}
+				results <- result{msg: oai.Message{
+					Role:       oai.RoleTool,
+					Name:       toolCall.Function.Name,
+					ToolCallID: toolCall.ID,
+					Content:    string(contentBytes),
+				}}
+			}()
+			continue
+		}
+
+		go func(spec tools.ToolSpec, toolCall oai.ToolCall) {
+			argsJSON := strings.TrimSpace(toolCall.Function.Arguments)
+			if argsJSON == "" {
+				argsJSON = "{}"
+			}
+			out, runErr := tools.RunToolWithJSON(context.Background(), spec, []byte(argsJSON), cfg.toolTimeout)
+			content := sanitizeToolContent(out, runErr)
+			results <- result{msg: oai.Message{
+				Role:       oai.RoleTool,
+				Name:       toolCall.Function.Name,
+				ToolCallID: toolCall.ID,
+				Content:    content,
+			}}
+		}(spec, toolCall)
+	}
+
+	// Collect exactly one result per requested tool call
+	for i := 0; i < len(assistantMsg.ToolCalls); i++ {
+		r := <-results
+		messages = append(messages, r.msg)
+	}
+	return messages
+}
+
+// dumpJSONIfDebug marshals v and prints it with a label when debug is enabled.
+func dumpJSONIfDebug(w io.Writer, label string, v any, debug bool) {
+	if !debug {
+		return
+	}
+	b, err := json.MarshalIndent(v, "", "  ")
+	if err != nil {
+		return
+	}
+	safeFprintf(w, "\n--- %s ---\n%s\n", label, string(b))
+}
+
+// runPreStage performs a pre-processing call that exercises one-knob logic
+// and client behavior (including parameter-recovery on 400). If the response
+// includes tool_calls and a tools manifest is available, it executes those
+// tool calls concurrently (mirroring main loop behavior) and appends exactly
+// one tool message per id to the returned transcript. The function uses
+// cfg.prepHTTPTimeout for its HTTP budget.
+// runPreStage performs the preparatory chat call and optional tool execution.
+// nolint:gocyclo // The flow covers caching, validation, tool policy, and is thoroughly unit/integration tested.
+func runPreStage(cfg cliConfig, messages []oai.Message, stderr io.Writer) ([]oai.Message, error) {
+	// Resolve pre-stage overrides with robust fallbacks so tests that construct cfg directly still work
+	prepModel := func() string {
+		if v := strings.TrimSpace(cfg.prepModel); v != "" {
+			return v
+		}
+		if v := strings.TrimSpace(os.Getenv("OAI_PREP_MODEL")); v != "" {
+			return v
+		}
+		return cfg.model
+	}()
+	prepBaseURL := func() string {
+		if v := strings.TrimSpace(cfg.prepBaseURL); v != "" {
+			return v
+		}
+		if v := strings.TrimSpace(os.Getenv("OAI_PREP_BASE_URL")); v != "" {
+			return v
+		}
+		return cfg.baseURL
+	}()
+	prepAPIKey := func() string {
+		if v := strings.TrimSpace(cfg.prepAPIKey); v != "" {
+			return v
+		}
+		if v := strings.TrimSpace(os.Getenv("OAI_PREP_API_KEY")); v != "" {
+			return v
+		}
+		if v := strings.TrimSpace(os.Getenv("OAI_API_KEY")); v != "" {
+			return v
+		}
+		if v := strings.TrimSpace(os.Getenv("OPENAI_API_KEY")); v != "" {
+			return v
+		}
+		return cfg.apiKey
+	}()
+	retries := cfg.prepHTTPRetries
+	if retries <= 0 {
+		retries = cfg.httpRetries
+	}
+	backoff := cfg.prepHTTPBackoff
+	if backoff == 0 {
+		backoff = cfg.httpBackoff
+	}
+
+	// Compute pre-stage sampling effective knobs for cache key
+	var (
+		effectiveTopP *float64
+		effectiveTemp *float64
+	)
+	// One-knob: -prep-top-p wins and omits temperature entirely
+	if cfg.prepTopP > 0 {
+		tp := cfg.prepTopP
+		effectiveTopP = &tp
+		// temperature omitted
+	} else if cfg.prepTemperatureSource == "flag" || cfg.prepTemperatureSource == "env" {
+		// Explicit pre-stage temperature override via flag/env, if supported
+		if oai.SupportsTemperature(prepModel) {
+			t := cfg.prepTemperature
+			effectiveTemp = &t
+		}
+	} else if strings.TrimSpace(string(cfg.prepProfile)) != "" {
+		// Apply profile-derived temperature when supported
+		if t, ok := oai.MapProfileToTemperature(prepModel, cfg.prepProfile); ok {
+			effectiveTemp = &t
+		}
+	} else if oai.SupportsTemperature(prepModel) {
+		// Inherit main temperature when supported and no explicit pre-stage override provided
+		t := cfg.temperature
+		effectiveTemp = &t
+	}
+
+	// Determine tool spec identifier for cache key
+	toolSpec := func() string {
+		if !cfg.prepToolsAllowExternal {
+			return "builtin:fs.read_file,fs.list_dir,fs.stat,env.get,os.info"
+		}
+		// Prefer -prep-tools when provided; otherwise fall back to -tools
+		manifest := strings.TrimSpace(cfg.prepToolsPath)
+		if manifest == "" {
+			manifest = strings.TrimSpace(cfg.toolsPath)
+		}
+		if manifest == "" {
+			return "external:none"
+		}
+		b, err := os.ReadFile(manifest)
+		if err != nil {
+			// If manifest cannot be read, include the error string so key changes predictably
+			return "manifest_err:" + oneLine(err.Error())
+		}
+		sum := sha256SumHex(b)
+		return "manifest:" + sum
+	}()
+
+	// Attempt cache read unless bust requested
+	if !cfg.prepCacheBust {
+		if out, ok := tryReadPrepCache(prepModel, prepBaseURL, effectiveTemp, effectiveTopP, cfg.httpRetries, cfg.httpBackoff, toolSpec, messages); ok {
+			return out, nil
+		}
+	}
+
+	// Construct request mirroring main loop sampling rules but using -prep-top-p
+	// Normalize/validate Harmony roles and assistant channels before pre-stage
+	normalizedIn, normErr := oai.NormalizeHarmonyMessages(messages)
+	if normErr != nil {
+		safeFprintf(stderr, "error: prep invalid message role: %v\n", normErr)
+		return nil, normErr
+	}
+	// Apply transcript hygiene before pre-stage call when -debug is off (harmless if no tool messages yet)
+	// Optionally prepend a pre-stage system message when provided via flags/env
+	var prepMessages []oai.Message
+	if strings.TrimSpace(cfg.prepSystem) != "" || strings.TrimSpace(cfg.prepSystemFile) != "" {
+		sysText, sysErr := resolveMaybeFile(strings.TrimSpace(cfg.prepSystem), strings.TrimSpace(cfg.prepSystemFile))
+		if sysErr != nil {
+			safeFprintf(stderr, "error: prep system read failed: %v\n", sysErr)
+			return nil, sysErr
+		}
+		if s := strings.TrimSpace(sysText); s != "" {
+			prepMessages = append(prepMessages, oai.Message{Role: oai.RoleSystem, Content: s})
+		}
+	}
+	prepMessages = append(prepMessages, applyTranscriptHygiene(normalizedIn, cfg.debug)...)
+	req := oai.ChatCompletionsRequest{
+		Model:    prepModel,
+		Messages: prepMessages,
+	}
+	// Pre-flight validate message sequence to avoid API 400s for stray tool messages
+	if err := oai.ValidateMessageSequence(req.Messages); err != nil {
+		safeFprintf(stderr, "error: prep invalid message sequence: %v\n", err)
+		return nil, err
+	}
+	if effectiveTopP != nil {
+		req.TopP = effectiveTopP
+	} else if effectiveTemp != nil {
+		req.Temperature = effectiveTemp
+	}
+	// Create a dedicated client honoring pre-stage timeout and normal retry policy
+	httpClient := oai.NewClientWithRetry(prepBaseURL, prepAPIKey, cfg.prepHTTPTimeout, oai.RetryPolicy{MaxRetries: retries, Backoff: backoff})
+	dumpJSONIfDebug(stderr, "prep.request", req, cfg.debug)
+	// Tag context with audit stage so HTTP audit lines include stage: "prep"
+	ctx, cancel := context.WithTimeout(oai.WithAuditStage(context.Background(), "prep"), cfg.prepHTTPTimeout)
+	defer cancel()
+	resp, err := httpClient.CreateChatCompletion(ctx, req)
+	if err != nil {
+		// Mirror main loop error style concisely; future item will add WARN+fallback behavior
+		safeFprintf(stderr, "error: prep call failed: %v\n", err)
+		return nil, err
+	}
+	dumpJSONIfDebug(stderr, "prep.response", resp, cfg.debug)
+
+	// Under -verbose, surface non-final assistant channels from pre-stage as human-readable stderr lines
+	if cfg.verbose {
+		if len(resp.Choices) > 0 {
+			m := resp.Choices[0].Message
+			if m.Role == oai.RoleAssistant {
+				ch := strings.TrimSpace(m.Channel)
+				if ch != "final" && strings.TrimSpace(m.Content) != "" {
+					safeFprintln(stderr, strings.TrimSpace(m.Content))
+				}
+			}
+		}
+	}
+
+	// Parse and merge pre-stage payload into the seed messages when present
+	merged := normalizedIn
+	if len(resp.Choices) > 0 {
+		payload := strings.TrimSpace(resp.Choices[0].Message.Content)
+		if payload != "" {
+			if parsed, perr := prestage.ParsePrestagePayload(payload); perr == nil {
+				merged = prestage.MergePrestageIntoMessages(normalizedIn, parsed)
+			}
+		}
+	}
+
+	// If there are no tool calls, return merged messages
+	if len(resp.Choices) == 0 || len(resp.Choices[0].Message.ToolCalls) == 0 {
+		// Cache the merged transcript for consistency
+		if err := writePrepCache(prepModel, prepBaseURL, effectiveTemp, effectiveTopP, cfg.httpRetries, cfg.httpBackoff, toolSpec, normalizedIn, merged); err != nil {
+			_ = err // best-effort cache write; ignore error
+		}
+		return merged, nil
+	}
+
+	// Append the assistant message carrying tool_calls
+	// Normalize assistant channel/token on the response message
+	assistantMsg := resp.Choices[0].Message
+	if norm, err := oai.NormalizeHarmonyMessages([]oai.Message{assistantMsg}); err == nil && len(norm) == 1 {
+		assistantMsg = norm[0]
+	}
+	out := append(append([]oai.Message{}, merged...), assistantMsg)
+
+	// Decide pre-stage tool execution policy: built-in read-only by default
+	if !cfg.prepToolsAllowExternal {
+		// Ignore -tools and execute only built-in read-only adapters
+		out = appendPreStageBuiltinToolOutputs(out, assistantMsg, cfg)
+		// Write cache
+		if err := writePrepCache(prepModel, prepBaseURL, effectiveTemp, effectiveTopP, cfg.httpRetries, cfg.httpBackoff, toolSpec, normalizedIn, out); err != nil {
+			_ = err // best-effort cache write; ignore error
+		}
+		return out, nil
+	}
+
+	// External tools allowed: require a manifest and enforce availability
+	// Prefer -prep-tools when provided; otherwise use -tools
+	manifest := strings.TrimSpace(cfg.prepToolsPath)
+	if manifest == "" {
+		manifest = strings.TrimSpace(cfg.toolsPath)
+	}
+	if manifest == "" {
+		// No manifest; nothing to execute
+		return out, nil
+	}
+	registry, _, lerr := tools.LoadManifest(manifest)
+	if lerr != nil {
+		safeFprintf(stderr, "error: failed to load tools manifest for pre-stage: %v\n", lerr)
+		return nil, lerr
+	}
+	for name, spec := range registry {
+		if len(spec.Command) == 0 {
+			safeFprintf(stderr, "error: configured tool %q has no command\n", name)
+			return nil, fmt.Errorf("tool %s has no command", name)
+		}
+		if _, lookErr := exec.LookPath(spec.Command[0]); lookErr != nil {
+			safeFprintf(stderr, "error: configured tool %q is unavailable: %v (program %q)\n", name, lookErr, spec.Command[0])
+			return nil, lookErr
+		}
+	}
+	out = appendToolCallOutputs(out, assistantMsg, registry, cfg)
+	if err := writePrepCache(prepModel, prepBaseURL, effectiveTemp, effectiveTopP, cfg.httpRetries, cfg.httpBackoff, toolSpec, normalizedIn, out); err != nil {
+		_ = err // best-effort cache write; ignore error
+	}
+	return out, nil
+}
+
+// sha256SumHex returns the lowercase hex SHA-256 of b.
+func sha256SumHex(b []byte) string {
+	h := sha256.New()
+	_, _ = h.Write(b)
+	return fmt.Sprintf("%x", h.Sum(nil))
+}
+
+// computeToolsetHash returns a stable hash of the tools manifest contents.
+// When manifestPath is empty or unreadable, returns an empty string.
+func computeToolsetHash(manifestPath string) string {
+	path := strings.TrimSpace(manifestPath)
+	if path == "" {
+		return ""
+	}
+	b, err := os.ReadFile(path)
+	if err != nil {
+		return ""
+	}
+	return sha256SumHex(b)
+}
+
+// computeDefaultStateScope returns sha256(model + "|" + base + "|" + toolsetHash).
+func computeDefaultStateScope(model string, base string, toolsetHash string) string {
+	input := []byte(strings.TrimSpace(model) + "|" + strings.TrimSpace(base) + "|" + strings.TrimSpace(toolsetHash))
+	return sha256SumHex(input)
+}
+
+// tryReadPrepCache attempts to load cached pre-stage output messages.
+func tryReadPrepCache(model, base string, temp *float64, topP *float64, retries int, backoff time.Duration, toolSpec string, inMessages []oai.Message) ([]oai.Message, bool) {
+	key := computePrepCacheKey(model, base, temp, topP, retries, backoff, toolSpec, inMessages)
+	dir := filepath.Join(findRepoRoot(), ".goagent", "cache", "prep")
+	path := filepath.Join(dir, key+".json")
+	// TTL check based on file mtime
+	fi, err := os.Stat(path)
+	if err != nil {
+		return nil, false
+	}
+	ttl := prepCacheTTL()
+	if ttl > 0 {
+		if fi.ModTime().Add(ttl).Before(time.Now()) {
+			return nil, false
+		}
+	}
+	data, rerr := os.ReadFile(path)
+	if rerr != nil {
+		return nil, false
+	}
+	var messages []oai.Message
+	if jerr := json.Unmarshal(data, &messages); jerr != nil {
+		return nil, false
+	}
+	return messages, true
+}
+
+// writePrepCache writes outMessages as JSON under the computed cache key.
+func writePrepCache(model, base string, temp *float64, topP *float64, retries int, backoff time.Duration, toolSpec string, inMessages, outMessages []oai.Message) error {
+	key := computePrepCacheKey(model, base, temp, topP, retries, backoff, toolSpec, inMessages)
+	dir := filepath.Join(findRepoRoot(), ".goagent", "cache", "prep")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	path := filepath.Join(dir, key+".json")
+	data, err := json.Marshal(outMessages)
+	if err != nil {
+		return err
+	}
+	// Atomic write: write to temp then rename
+	tmp := path + ".tmp"
+	if werr := os.WriteFile(tmp, data, 0o644); werr != nil {
+		return werr
+	}
+	return os.Rename(tmp, path)
+}
+
+// computePrepCacheKey builds a deterministic key covering inputs and config.
+func computePrepCacheKey(model, base string, temp *float64, topP *float64, retries int, backoff time.Duration, toolSpec string, inMessages []oai.Message) string {
+	// Build a stable map for hashing
+	type hashPayload struct {
+		Model    string        `json:"model"`
+		BaseURL  string        `json:"base_url"`
+		Temp     *float64      `json:"temperature,omitempty"`
+		TopP     *float64      `json:"top_p,omitempty"`
+		Retries  int           `json:"retries"`
+		Backoff  string        `json:"backoff"`
+		ToolSpec string        `json:"tool_spec"`
+		Messages []oai.Message `json:"messages"`
+	}
+	payload := hashPayload{
+		Model:    strings.TrimSpace(model),
+		BaseURL:  strings.TrimSpace(base),
+		Temp:     temp,
+		TopP:     topP,
+		Retries:  retries,
+		Backoff:  backoff.String(),
+		ToolSpec: toolSpec,
+		Messages: normalizeMessagesForHash(inMessages),
+	}
+	b, err := json.Marshal(payload)
+	if err != nil {
+		// Fallback: return hash of string rendering to preserve behavior
+		return sha256SumHex([]byte(fmt.Sprintf("%+v", payload)))
+	}
+	return sha256SumHex(b)
+}
+
+// normalizeMessagesForHash strips fields that should not affect cache equality.
+func normalizeMessagesForHash(in []oai.Message) []oai.Message {
+	out := make([]oai.Message, 0, len(in))
+	for _, m := range in {
+		nm := oai.Message{Role: strings.TrimSpace(m.Role), Content: strings.TrimSpace(m.Content)}
+		// We intentionally ignore channels and tool calls in the input seed for keying
+		out = append(out, nm)
+	}
+	return out
+}
+
+// prepCacheTTL returns the TTL for prep cache; default 10 minutes, override via GOAGENT_PREP_CACHE_TTL.
+func prepCacheTTL() time.Duration {
+	if v := strings.TrimSpace(os.Getenv("GOAGENT_PREP_CACHE_TTL")); v != "" {
+		if d, err := time.ParseDuration(v); err == nil {
+			return d
+		}
+	}
+	return 10 * time.Minute
+}
+
+// findRepoRoot walks upward from CWD to locate go.mod, mirroring internal/oai moduleRoot.
+func findRepoRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
+
+// sanitizeToolContent maps tool output and errors to a deterministic JSON string.
+func sanitizeToolContent(stdout []byte, runErr error) string {
+	if runErr == nil {
+		// If the tool produced no output, return an empty JSON object to avoid confusing the model
+		trimmed := strings.TrimSpace(string(stdout))
+		if trimmed == "" {
+			return "{}"
+		}
+		// Ensure it is one line to keep prompts compact
+		return oneLine(trimmed)
+	}
+	// On error, return {"error":"..."}
+	msg := runErr.Error()
+	if errors.Is(runErr, context.DeadlineExceeded) {
+		msg = "tool timed out"
+	}
+	// Truncate to avoid bloat
+	const maxLen = 1000
+	if len(msg) > maxLen {
+		msg = msg[:maxLen]
+	}
+	// JSON-escape via marshaling
+	b, mErr := json.Marshal(map[string]string{"error": msg})
+	if mErr != nil {
+		// Fallback to a minimal JSON on marshal error
+		return "{\"error\":\"internal error\"}"
+	}
+	return oneLine(string(b))
+}
+
+func oneLine(s string) string {
+	// Collapse newlines and tabs
+	s = strings.ReplaceAll(s, "\n", " ")
+	s = strings.ReplaceAll(s, "\t", " ")
+	// Collapse repeated spaces
+	return strings.Join(strings.Fields(s), " ")
+}
+
+// parseSavedMessages accepts either a JSON array of oai.Message (legacy format)
+// or a JSON object {"messages":[...], "image_prompt":"..."} and returns
+// the parsed messages and optional image prompt.
+func parseSavedMessages(data []byte) ([]oai.Message, string, error) {
+	trimmed := strings.TrimSpace(string(data))
+	if strings.HasPrefix(trimmed, "[") {
+		var msgs []oai.Message
+		if err := json.Unmarshal([]byte(trimmed), &msgs); err != nil {
+			return nil, "", err
+		}
+		return msgs, "", nil
+	}
+	var wrapper struct {
+		Messages    []oai.Message `json:"messages"`
+		ImagePrompt string        `json:"image_prompt"`
+	}
+	if err := json.Unmarshal([]byte(trimmed), &wrapper); err != nil {
+		return nil, "", err
+	}
+	return wrapper.Messages, strings.TrimSpace(wrapper.ImagePrompt), nil
+}
+
+// buildMessagesWrapper constructs the saved/printed JSON wrapper including
+// the Harmony messages, optional image prompt, and pre-stage metadata.
+func buildMessagesWrapper(messages []oai.Message, imagePrompt string) any {
+	// Determine pre-stage prompt source and size using resolver.
+	// Flags for pre-stage prompt are not yet implemented; this will resolve to
+	// the embedded default for now, which is acceptable and deterministic.
+	src, text := oai.ResolvePrepPrompt(nil, "")
+	type prestageMeta struct {
+		Source string `json:"source"`
+		Bytes  int    `json:"bytes"`
+	}
+	type wrapper struct {
+		Messages    []oai.Message `json:"messages"`
+		ImagePrompt string        `json:"image_prompt,omitempty"`
+		Prestage    prestageMeta  `json:"prestage"`
+	}
+	w := wrapper{
+		Messages: messages,
+		Prestage: prestageMeta{Source: src, Bytes: len([]byte(text))},
+	}
+	if strings.TrimSpace(imagePrompt) != "" {
+		w.ImagePrompt = strings.TrimSpace(imagePrompt)
+	}
+	return w
+}
+
+// writeSavedMessages writes the wrapper JSON with messages, optional image_prompt,
+// and pre-stage metadata.
+func writeSavedMessages(path string, messages []oai.Message, imagePrompt string) error {
+	wrapper := buildMessagesWrapper(messages, strings.TrimSpace(imagePrompt))
+	b, err := json.MarshalIndent(wrapper, "", "  ")
+	if err != nil {
+		return err
+	}
+	return writeFileAtomic(path, b, 0o644)
+}
+
+// applyTranscriptHygiene enforces transcript-size safeguards before requests.
+// When debug is off, any role:"tool" message whose content exceeds 8 KiB is
+// replaced with a compact JSON marker to prevent huge payloads from being sent
+// upstream. Under -debug, no truncation occurs to preserve full visibility.
+func applyTranscriptHygiene(in []oai.Message, debug bool) []oai.Message {
+	if debug || len(in) == 0 {
+		// Preserve exact transcript under -debug or when empty
+		return in
+	}
+	const limit = 8 * 1024
+	out := make([]oai.Message, 0, len(in))
+	for _, m := range in {
+		n := m
+		if n.Role == oai.RoleTool {
+			if len(n.Content) > limit {
+				n.Content = `{"truncated":true,"reason":"large-tool-output"}`
+			}
+		}
+		out = append(out, n)
+	}
+	return out
+}
+
+// helpRequested returns true if any canonical help token is present.
+func helpRequested(args []string) bool {
+	for _, a := range args {
+		if a == "--help" || a == "-h" || a == "help" {
+			return true
+		}
+	}
+	return false
+}
+
+// versionRequested returns true if any canonical version token is present.
+func versionRequested(args []string) bool {
+	for _, a := range args {
+		if a == "--version" || a == "-version" {
+			return true
+		}
+	}
+	return false
+}
+
+// printUsage writes a comprehensive usage guide to w.
+func printUsage(w io.Writer) {
+	var b strings.Builder
+	b.WriteString("agentcli — non-interactive CLI agent for OpenAI-compatible APIs\n\n")
+	b.WriteString("Usage:\n  agentcli [flags]\n\n")
+	b.WriteString("Flags (precedence: flag > env > default):\n")
+	b.WriteString("  -prompt string\n    User prompt (required)\n")
+	b.WriteString("  -tools string\n    Path to tools.json (optional)\n")
+	b.WriteString("  -system string\n    System prompt (default \"You are a helpful, precise assistant. Use tools when strictly helpful.\")\n")
+	b.WriteString("  -system-file string\n    Path to file containing system prompt ('-' for STDIN; mutually exclusive with -system)\n")
+	b.WriteString("  -developer string\n    Developer message (repeatable)\n")
+	b.WriteString("  -developer-file string\n    Path to file containing developer message (repeatable; '-' for STDIN)\n")
+	b.WriteString("  -prompt-file string\n    Path to file containing user prompt ('-' for STDIN; mutually exclusive with -prompt)\n")
+	b.WriteString("  -base-url string\n    OpenAI-compatible base URL (env OAI_BASE_URL or default https://api.openai.com/v1)\n")
+	b.WriteString("  -api-key string\n    API key if required (env OAI_API_KEY; falls back to OPENAI_API_KEY)\n")
+	b.WriteString("  -model string\n    Model ID (env OAI_MODEL or default oss-gpt-20b)\n")
+	b.WriteString("  -max-steps int\n    Maximum reasoning/tool steps (default 8)\n")
+	b.WriteString("  -timeout duration\n    [DEPRECATED] Global timeout; use -http-timeout and -tool-timeout (default 30s)\n")
+	b.WriteString("  -http-timeout duration\n    HTTP timeout for chat completions (env OAI_HTTP_TIMEOUT; falls back to -timeout if unset)\n")
+	b.WriteString("  -prep-http-timeout duration\n    HTTP timeout for pre-stage (env OAI_PREP_HTTP_TIMEOUT; falls back to -http-timeout if unset)\n")
+	b.WriteString("  -tool-timeout duration\n    Per-tool timeout (falls back to -timeout if unset)\n")
+	b.WriteString("  -http-retries int\n    Number of retries for transient HTTP failures (timeouts, 429, 5xx) (env OAI_HTTP_RETRIES; default 2)\n")
+	b.WriteString("  -http-retry-backoff duration\n    Base backoff between HTTP retry attempts (exponential) (env OAI_HTTP_RETRY_BACKOFF; default 500ms)\n")
+	b.WriteString("  -image-base-url string\n    Image API base URL (env OAI_IMAGE_BASE_URL; inherits -base-url if unset)\n")
+	b.WriteString("  -image-model string\n    Image model ID (env OAI_IMAGE_MODEL; default gpt-image-1)\n")
+	b.WriteString("  -image-api-key string\n    Image API key (env OAI_IMAGE_API_KEY; inherits -api-key if unset; falls back to OPENAI_API_KEY)\n")
+	b.WriteString("  -image-http-timeout duration\n    Image HTTP timeout (env OAI_IMAGE_HTTP_TIMEOUT; inherits -http-timeout if unset)\n")
+	b.WriteString("  -image-http-retries int\n    Image HTTP retries (env OAI_IMAGE_HTTP_RETRIES; inherits -http-retries if unset)\n")
+	b.WriteString("  -image-http-retry-backoff duration\n    Image HTTP retry backoff (env OAI_IMAGE_HTTP_RETRY_BACKOFF; inherits -http-retry-backoff if unset)\n")
+	b.WriteString("  -temp float\n    Sampling temperature (default 1.0)\n")
+	b.WriteString("  -top-p float\n    Nucleus sampling probability mass (conflicts with -temp; omits temperature when set)\n")
+	b.WriteString("  -prep-profile string\n    Pre-stage prompt profile (deterministic|general|creative|reasoning); sets temperature when supported (conflicts with -prep-top-p)\n")
+	b.WriteString("  -prep-model string\n    Pre-stage model ID (env OAI_PREP_MODEL; inherits -model if unset)\n")
+	b.WriteString("  -prep-base-url string\n    Pre-stage base URL (env OAI_PREP_BASE_URL; inherits -base-url if unset)\n")
+	b.WriteString("  -prep-api-key string\n    Pre-stage API key (env OAI_PREP_API_KEY; falls back to OAI_API_KEY/OPENAI_API_KEY; inherits -api-key if unset)\n")
+	b.WriteString("  -prep-http-retries int\n    Pre-stage HTTP retries (env OAI_PREP_HTTP_RETRIES; inherits -http-retries if unset)\n")
+	b.WriteString("  -prep-http-retry-backoff duration\n    Pre-stage HTTP retry backoff (env OAI_PREP_HTTP_RETRY_BACKOFF; inherits -http-retry-backoff if unset)\n")
+	b.WriteString("  -prep-temp float\n    Pre-stage sampling temperature (env OAI_PREP_TEMP; inherits -temp if unset; conflicts with -prep-top-p)\n")
+	b.WriteString("  -prep-top-p float\n    Nucleus sampling probability mass for pre-stage (env OAI_PREP_TOP_P; conflicts with -prep-temp; omits temperature when set)\n")
+	b.WriteString("  -prep-system string\n    Pre-stage system message (env OAI_PREP_SYSTEM; mutually exclusive with -prep-system-file)\n")
+	b.WriteString("  -prep-system-file string\n    Path to file containing pre-stage system message ('-' for STDIN; env OAI_PREP_SYSTEM_FILE; mutually exclusive with -prep-system)\n")
+	b.WriteString("  -image-n int\n    Number of images to generate (env OAI_IMAGE_N; default 1)\n")
+	b.WriteString("  -image-size string\n    Image size WxH, e.g., 1024x1024 (env OAI_IMAGE_SIZE; default 1024x1024)\n")
+	b.WriteString("  -image-quality string\n    Image quality: standard|hd (env OAI_IMAGE_QUALITY; default standard)\n")
+	b.WriteString("  -image-style string\n    Image style: natural|vivid (env OAI_IMAGE_STYLE; default natural)\n")
+	b.WriteString("  -image-response-format string\n    Image response format: url|b64_json (env OAI_IMAGE_RESPONSE_FORMAT; default url)\n")
+	b.WriteString("  -image-transparent-background\n    Request transparent background when supported (env OAI_IMAGE_TRANSPARENT_BACKGROUND; default false)\n")
+	b.WriteString("  -debug\n    Dump request/response JSON to stderr\n")
+	b.WriteString("  -verbose\n    Also print non-final assistant channels (critic/confidence) to stderr\n")
+	b.WriteString("  -quiet\n    Suppress non-final output; print only final text to stdout\n")
+	b.WriteString("  -prep-tools-allow-external\n    Allow pre-stage to execute external tools from -tools (default false)\n")
+	b.WriteString("  -prep-cache-bust\n    Skip pre-stage cache and force recompute\n")
+	b.WriteString("  -prep-tools string\n    Path to pre-stage tools.json (optional; used only with -prep-tools-allow-external)\n")
+	b.WriteString("  -prep-dry-run\n    Run pre-stage only, print refined Harmony messages to stdout, and exit 0\n")
+	b.WriteString("  -state-dir string\n    Directory to persist and restore execution state across runs (env AGENTCLI_STATE_DIR)\n")
+	b.WriteString("  -state-scope string\n    Optional scope key to partition saved state (env AGENTCLI_STATE_SCOPE); when empty, a default hash of model|base_url|toolset is used\n")
+	b.WriteString("  -state-refine\n    Refine the loaded state bundle using -state-refine-text or -state-refine-file (requires -state-dir)\n")
+	b.WriteString("  -state-refine-text string\n    Refinement input text to apply to the loaded state bundle (ignored when -state-refine-file is set; requires -state-dir)\n")
+	b.WriteString("  -state-refine-file string\n    Path to file containing refinement input (wins over -state-refine-text; requires -state-dir)\n")
+	b.WriteString("  -print-messages\n    Pretty-print the final merged message array to stderr before the main call\n")
+	b.WriteString("  -stream-final\n    If server supports streaming, stream only assistant{channel:\"final\"} to stdout; buffer other channels for -verbose\n")
+	b.WriteString("  -channel-route name=stdout|stderr|omit\n    Override default channel routing (final→stdout, critic/confidence→stderr); repeatable\n")
+	b.WriteString("  -save-messages string\n    Write the final merged Harmony messages to the given JSON file and continue\n")
+	b.WriteString("  -load-messages string\n    Bypass pre-stage and prompt; load Harmony messages from the given JSON file (validator-checked)\n")
+	b.WriteString("  -prep-enabled\n    Enable pre-stage processing (default true; when false, skip pre-stage and proceed directly to main call)\n")
+	b.WriteString("  -capabilities\n    Print enabled tools and exit\n")
+	b.WriteString("  -print-config\n    Print resolved config and exit\n")
+	b.WriteString("  -dry-run\n    Print intended state actions (restore/refine/save) and exit without writing state\n")
+	b.WriteString("  --version | -version\n    Print version and exit\n")
+	b.WriteString("\nDocs:\n")
+	b.WriteString("  - Linux 5.4 sandbox compatibility and policy authoring: docs/runbooks/linux-5.4-sandbox-compatibility.md\n")
+	b.WriteString("\nExamples:\n")
+	b.WriteString("  # Quick start (after make build build-tools)\n")
+	b.WriteString("  ./bin/agentcli -prompt \"What's the local time in Helsinki? Use get_time.\" -tools ./tools.json -debug\n\n")
+	b.WriteString("  # Print capabilities (enabled tools)\n")
+	b.WriteString("  ./bin/agentcli -capabilities -tools ./tools.json\n\n")
+	b.WriteString("  # Show help\n")
+	b.WriteString("  agentcli --help\n")
+	b.WriteString("\n  # Show version\n")
+	b.WriteString("  agentcli --version\n")
+	safeFprintln(w, strings.TrimRight(b.String(), "\n"))
+}
+
+// Build-time variables set via -ldflags; defaults are useful for dev builds.
+var (
+	version   = "v0.0.0-dev"
+	commit    = "unknown"
+	buildDate = "unknown"
+)
+
+// printVersion writes a concise single-line version string to stdout.
+func printVersion(w io.Writer) {
+	// Example: agentcli version v1.2.3 (commit abcdef1, built 2025-08-17)
+	safeFprintln(w, fmt.Sprintf("agentcli version %s (commit %s, built %s)", version, shortCommit(commit), buildDate))
+}
+
+func shortCommit(c string) string {
+	c = strings.TrimSpace(c)
+	if len(c) > 7 {
+		return c[:7]
+	}
+	if c == "" {
+		return "unknown"
+	}
+	return c
+}
+
+// printResolvedConfig writes a JSON object describing resolved configuration
+// (model, base URL, and timeouts with their sources) to stdout. Returns exit code 0.
+func printResolvedConfig(cfg cliConfig, stdout io.Writer) int {
+	// Ensure timeouts are normalized as in runAgent
+	if cfg.httpTimeout <= 0 {
+		if cfg.timeout > 0 {
+			cfg.httpTimeout = cfg.timeout
+		} else {
+			cfg.httpTimeout = 90 * time.Second
+		}
+	}
+	if cfg.toolTimeout <= 0 {
+		if cfg.timeout > 0 {
+			cfg.toolTimeout = cfg.timeout
+		} else {
+			cfg.toolTimeout = 30 * time.Second
+		}
+	}
+	// Default sources when unset
+	if strings.TrimSpace(cfg.httpTimeoutSource) == "" {
+		cfg.httpTimeoutSource = "default"
+	}
+	if strings.TrimSpace(cfg.prepHTTPTimeoutSource) == "" {
+		cfg.prepHTTPTimeoutSource = "inherit"
+	}
+	if strings.TrimSpace(cfg.toolTimeoutSource) == "" {
+		cfg.toolTimeoutSource = "default"
+	}
+	if strings.TrimSpace(cfg.globalTimeoutSource) == "" {
+		cfg.globalTimeoutSource = "default"
+	}
+
+	// Build a minimal, stable JSON payload
+	payload := map[string]any{
+		"model":                 cfg.model,
+		"baseURL":               cfg.baseURL,
+		"httpTimeout":           cfg.httpTimeout.String(),
+		"httpTimeoutSource":     cfg.httpTimeoutSource,
+		"prepHTTPTimeout":       cfg.prepHTTPTimeout.String(),
+		"prepHTTPTimeoutSource": cfg.prepHTTPTimeoutSource,
+		"toolTimeout":           cfg.toolTimeout.String(),
+		"toolTimeoutSource":     cfg.toolTimeoutSource,
+		"timeout":               cfg.timeout.String(),
+		"timeoutSource":         cfg.globalTimeoutSource,
+	}
+
+	// Resolve prep-specific view for printing: env OAI_PREP_* > inherit from main
+	// Use resolved cfg prep fields and sources
+	prepModel, prepModelSource := cfg.prepModel, cfg.prepModelSource
+	prepBase, prepBaseSource := cfg.prepBaseURL, cfg.prepBaseURLSource
+	var apiKeyPresent bool
+	apiKeySource := cfg.prepAPIKeySource
+	if strings.TrimSpace(cfg.prepAPIKey) != "" {
+		apiKeyPresent = true
+	} else {
+		apiKeyPresent = false
+	}
+
+	// Resolve sampling for prep: one-knob behavior with explicit overrides
+	var prepTempStr, prepTempSource, prepTopPStr, prepTopPSource string
+	if cfg.prepTopP > 0 {
+		prepTopPStr = strconv.FormatFloat(cfg.prepTopP, 'f', -1, 64)
+		prepTopPSource = cfg.prepTopPSource
+		prepTempStr = "(omitted)"
+		prepTempSource = "omitted:one-knob"
+	} else if cfg.prepTemperatureSource == "flag" || cfg.prepTemperatureSource == "env" {
+		if oai.SupportsTemperature(prepModel) {
+			prepTempStr = strconv.FormatFloat(cfg.prepTemperature, 'f', -1, 64)
+			prepTempSource = cfg.prepTemperatureSource
+			prepTopPStr = "(omitted)"
+			prepTopPSource = "inherit"
+		} else {
+			prepTempStr = "(omitted:unsupported)"
+			prepTempSource = "unsupported"
+			prepTopPStr = "(omitted)"
+			prepTopPSource = "inherit"
+		}
+	} else {
+		// Inherit main temperature when supported; else both omitted
+		if oai.SupportsTemperature(prepModel) {
+			prepTempStr = strconv.FormatFloat(cfg.temperature, 'f', -1, 64)
+			prepTempSource = cfg.temperatureSource
+			prepTopPStr = "(omitted)"
+			prepTopPSource = "inherit"
+		} else {
+			prepTempStr = "(omitted:unsupported)"
+			prepTempSource = "unsupported"
+			prepTopPStr = "(omitted)"
+			prepTopPSource = "inherit"
+		}
+	}
+
+	// Pre-stage block
+	payload["prep"] = map[string]any{
+		"enabled":                cfg.prepEnabled,
+		"model":                  prepModel,
+		"modelSource":            prepModelSource,
+		"baseURL":                prepBase,
+		"baseURLSource":          prepBaseSource,
+		"apiKeyPresent":          apiKeyPresent,
+		"apiKeySource":           apiKeySource,
+		"httpTimeout":            cfg.prepHTTPTimeout.String(),
+		"httpTimeoutSource":      cfg.prepHTTPTimeoutSource,
+		"httpRetries":            cfg.prepHTTPRetries,
+		"httpRetriesSource":      cfg.prepHTTPRetriesSource,
+		"httpRetryBackoff":       cfg.prepHTTPBackoff.String(),
+		"httpRetryBackoffSource": cfg.prepHTTPBackoffSource,
+		"sampling": map[string]any{
+			"temperature":       prepTempStr,
+			"temperatureSource": prepTempSource,
+			"top_p":             prepTopPStr,
+			"top_pSource":       prepTopPSource,
+		},
+	}
+	// Image block with redacted API key
+	{
+		img, baseSrc, keySrc := oai.ResolveImageConfig(cfg.imageBaseURL, cfg.imageAPIKey, cfg.baseURL, cfg.apiKey)
+		payload["image"] = map[string]any{
+			"baseURL":                img.BaseURL,
+			"baseURLSource":          baseSrc,
+			"apiKey":                 oai.MaskAPIKeyLast4(img.APIKey),
+			"apiKeySource":           keySrc,
+			"model":                  cfg.imageModel,
+			"httpTimeout":            cfg.imageHTTPTimeout.String(),
+			"httpTimeoutSource":      nonEmptyOr(cfg.imageHTTPTimeoutSource, "inherit"),
+			"httpRetries":            cfg.imageHTTPRetries,
+			"httpRetriesSource":      nonEmptyOr(cfg.imageHTTPRetriesSource, "inherit"),
+			"httpRetryBackoff":       cfg.imageHTTPBackoff.String(),
+			"httpRetryBackoffSource": nonEmptyOr(cfg.imageHTTPBackoffSource, "inherit"),
+			"n":                      cfg.imageN,
+			"size":                   cfg.imageSize,
+			"quality":                cfg.imageQuality,
+			"style":                  cfg.imageStyle,
+			"response_format":        cfg.imageResponseFormat,
+			"transparent_background": cfg.imageTransparentBackground,
+		}
+	}
+
+	data, err := json.MarshalIndent(payload, "", "  ")
+	if err != nil {
+		// Fallback to a simple line to avoid surprising exits
+		safeFprintln(stdout, "{}")
+		return 0
+	}
+	safeFprintln(stdout, string(data))
+	return 0
+}
+
+// printStateDryRunPlan outputs a concise plan describing intended state actions.
+// It never writes to disk. Exit code 0 on success.
+func printStateDryRunPlan(cfg cliConfig, stdout io.Writer, stderr io.Writer) int {
+	// Normalize/expand state-dir as parseFlags would have done
+	dir := strings.TrimSpace(cfg.stateDir)
+	if dir != "" {
+		if strings.HasPrefix(dir, "~") {
+			if home, err := os.UserHomeDir(); err == nil {
+				dir = filepath.Join(home, strings.TrimPrefix(dir, "~"))
+			}
+		}
+		dir = filepath.Clean(dir)
+	}
+
+	// Determine action
+	type plan struct {
+		Action        string `json:"action"`
+		StateDir      string `json:"state_dir"`
+		ScopeKey      string `json:"scope_key"`
+		Refine        bool   `json:"refine"`
+		HasRefineText bool   `json:"has_refine_text"`
+		HasRefineFile bool   `json:"has_refine_file"`
+		Notes         string `json:"notes"`
+	}
+	p := plan{StateDir: dir, ScopeKey: strings.TrimSpace(cfg.stateScope), Refine: cfg.stateRefine, HasRefineText: strings.TrimSpace(cfg.stateRefineText) != "", HasRefineFile: strings.TrimSpace(cfg.stateRefineFile) != ""}
+
+	if dir == "" {
+		p.Action = "none"
+		p.Notes = "state-dir not set; no restore/save will occur"
+	} else if cfg.stateRefine || p.HasRefineText || p.HasRefineFile {
+		p.Action = "refine"
+		p.Notes = "would load latest bundle (if any), apply refinement, and write a new snapshot"
+	} else {
+		// Not refining: would attempt restore-before-prep and save afterward
+		p.Action = "restore_or_save"
+		p.Notes = "would attempt restore-before-prep using latest.json; on success reuse without calling pre-stage; otherwise would run pre-stage and save a new snapshot"
+	}
+
+	// Include a synthetic SHA hint to demonstrate formatting without real IO
+	// This keeps output stable yet obviously a placeholder.
+	hint := map[string]any{
+		"sample_short_sha": fmt.Sprintf("%08x", rand.Uint32()),
+	}
+	out := map[string]any{
+		"plan": p,
+		"hint": hint,
+	}
+	if b, err := json.MarshalIndent(out, "", "  "); err == nil {
+		safeFprintln(stdout, string(b))
+		return 0
+	}
+	safeFprintln(stdout, "{\"plan\":{\"action\":\"unknown\"}}")
+	return 0
+}
+
+// nonEmptyOr returns a when non-empty, otherwise b.
+func nonEmptyOr(a, b string) string {
+	if strings.TrimSpace(a) == "" {
+		return b
+	}
+	return a
+}
+
+// writeFileAtomic writes data to path atomically by writing to a temp file
+// in the same directory and then renaming it over the destination. Parent
+// directories are created if missing.
+func writeFileAtomic(path string, data []byte, perm os.FileMode) error {
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	tmp := path + ".tmp"
+	if err := os.WriteFile(tmp, data, perm); err != nil {
+		return err
+	}
+	return os.Rename(tmp, path)
+}
+
+// printCapabilities loads the tools manifest (if provided) and prints a concise list
+// of enabled tools along with a prominent safety warning. Returns a process exit code.
+func printCapabilities(cfg cliConfig, stdout io.Writer, stderr io.Writer) int {
+	// If no tools path provided, report no tools and exit 0
+	if strings.TrimSpace(cfg.toolsPath) == "" {
+		safeFprintln(stdout, "No tools enabled (run with -tools <path to tools.json>).")
+		safeFprintln(stdout, "WARNING: Enabling tools allows local process execution and may permit network access. Review tools.json carefully.")
+		return 0
+	}
+
+	registry, _, err := tools.LoadManifest(cfg.toolsPath)
+	if err != nil {
+		safeFprintf(stderr, "error: failed to load tools manifest: %v\n", err)
+		return 1
+	}
+	safeFprintln(stdout, "WARNING: Enabled tools can execute local binaries and may access the network. Use with caution.")
+	if len(registry) == 0 {
+		safeFprintln(stdout, "No tools enabled in manifest.")
+		return 0
+	}
+	safeFprintln(stdout, "Capabilities (enabled tools):")
+	// Stable ordering: lexicographic by name for deterministic output
+	names := make([]string, 0, len(registry))
+	for name := range registry {
+		names = append(names, name)
+	}
+	// simple insertion sort to avoid importing sort just for one call; keep dependencies minimal
+	for i := 1; i < len(names); i++ {
+		j := i
+		for j > 0 && names[j] < names[j-1] {
+			names[j], names[j-1] = names[j-1], names[j]
+			j--
+		}
+	}
+	for _, name := range names {
+		spec := registry[name]
+		desc := strings.TrimSpace(spec.Description)
+		if desc == "" {
+			desc = "(no description)"
+		}
+		// Add an explicit per-tool warning for img_create since it performs outbound network calls
+		// and can write image files to the repository when configured to save.
+		if name == "img_create" {
+			desc = desc + " [WARNING: makes outbound network calls and can save files]"
+		}
+		safeFprintf(stdout, "- %s: %s\n", name, desc)
+	}
+	return 0
+}
+
+// (Deprecated) durationFlexValue was used for an earlier flag implementation.
+// It is intentionally removed to avoid unused-code lints; parsing is handled
+// by durationFlexFlag and parseDurationFlexible.
+
+// parseDurationFlexible parses a duration allowing either Go duration syntax
+// or a plain integer representing seconds.
+func parseDurationFlexible(raw string) (time.Duration, error) {
+	s := strings.TrimSpace(raw)
+	if s == "" {
+		return 0, fmt.Errorf("empty duration")
+	}
+	if d, err := time.ParseDuration(s); err == nil {
+		return d, nil
+	}
+	// Accept plain integer seconds
+	allDigits := true
+	for _, r := range s {
+		if r < '0' || r > '9' {
+			allDigits = false
+			break
+		}
+	}
+	if allDigits {
+		n, err := strconv.ParseInt(s, 10, 64)
+		if err != nil {
+			return 0, err
+		}
+		if n <= 0 {
+			return 0, fmt.Errorf("duration seconds must be > 0")
+		}
+		return time.Duration(n) * time.Second, nil
+	}
+	return 0, fmt.Errorf("invalid duration: %q", raw)
+}
+
+// ignoreError is used to explicitly acknowledge and ignore expected errors
+// in places where failure is handled via alternative control flow (e.g.,
+// we parse flags with ContinueOnError and then return exit codes). This
+// satisfies linters that require checking error returns while keeping the
+// intended behavior unchanged.
+func ignoreError(_ error) {}
+
+// safeFprintln writes a line to w and intentionally ignores write errors.
+// This encapsulation makes the intent explicit and satisfies errcheck.
+func safeFprintln(w io.Writer, a ...any) {
+	if _, err := fmt.Fprintln(w, a...); err != nil {
+		return
+	}
+}
+
+// safeFprintf writes formatted text to w and intentionally ignores write errors.
+// This encapsulation makes the intent explicit and satisfies errcheck.
+func safeFprintf(w io.Writer, format string, a ...any) {
+	if _, err := fmt.Fprintf(w, format, a...); err != nil {
+		return
+	}
+}
+
+// resolveChannelRoute returns the destination for a given assistant channel.
+// Defaults: final→stdout; non-final (critic/confidence)→stderr. Unknown/empty
+// channels default to final behavior. When an override is provided via
+// -channel-route, it takes precedence.
+func resolveChannelRoute(cfg cliConfig, channel string, nonFinal bool) string {
+	ch := strings.TrimSpace(channel)
+	if ch == "" {
+		ch = "final"
+	}
+	if cfg.channelRoutes != nil {
+		if dest, ok := cfg.channelRoutes[ch]; ok {
+			return dest
+		}
+	}
+	if ch == "final" {
+		return "stdout"
+	}
+	// Default non-final route
+	return "stderr"
+}
+
+// stringSliceFlag implements flag.Value to collect repeatable string flags into a slice.
+type stringSliceFlag []string
+
+func (s *stringSliceFlag) String() string {
+	if s == nil {
+		return ""
+	}
+	return strings.Join(*s, ",")
+}
+
+func (s *stringSliceFlag) Set(v string) error {
+	*s = append(*s, v)
+	return nil
+}
+
+// resolveMaybeFile returns the effective content from either an inline string
+// or a file path when provided. When filePath is "-", it reads from STDIN.
+// If filePath is non-empty, it takes precedence over inline.
+func resolveMaybeFile(inline string, filePath string) (string, error) {
+	f := strings.TrimSpace(filePath)
+	if f == "" {
+		return inline, nil
+	}
+	if f == "-" {
+		b, err := io.ReadAll(os.Stdin)
+		if err != nil {
+			return "", fmt.Errorf("read STDIN: %w", err)
+		}
+		return string(b), nil
+	}
+	b, err := os.ReadFile(f)
+	if err != nil {
+		return "", fmt.Errorf("read file %s: %w", f, err)
+	}
+	return string(b), nil
+}
+
+// resolveDeveloperMessages aggregates developer messages from repeatable flags
+// and files. Files are read in the order provided; "-" reads from STDIN.
+func resolveDeveloperMessages(inlines []string, files []string) ([]string, error) {
+	var out []string
+	for _, f := range files {
+		s, err := resolveMaybeFile("", f)
+		if err != nil {
+			return nil, err
+		}
+		out = append(out, s)
+	}
+	out = append(out, inlines...)
+	return out, nil
+}
diff --git a/cmd/agentcli/main_test.go b/cmd/agentcli/main_test.go
new file mode 100644
index 0000000..38af37f
--- /dev/null
+++ b/cmd/agentcli/main_test.go
@@ -0,0 +1,3323 @@
+//nolint:errcheck // Many test helpers intentionally drop errors from encoders/writes and env setters; behavior asserted separately.
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+	"github.com/hyperifyio/goagent/internal/tools"
+)
+
+// https://github.com/hyperifyio/goagent/issues/97
+func TestParseFlags_ApiKeyEnvPrecedence(t *testing.T) {
+	// Save and restore env
+	save := func(k string) (string, bool) { v, ok := os.LookupEnv(k); return v, ok }
+	restore := func(k, v string, ok bool) {
+		if ok {
+			if err := os.Setenv(k, v); err != nil {
+				t.Fatalf("restore %s: %v", k, err)
+			}
+		} else {
+			if err := os.Unsetenv(k); err != nil {
+				t.Fatalf("unset %s: %v", k, err)
+			}
+		}
+	}
+	oaiVal, oaiOK := save("OAI_API_KEY")
+	openaiVal, openaiOK := save("OPENAI_API_KEY")
+	defer func() { restore("OAI_API_KEY", oaiVal, oaiOK); restore("OPENAI_API_KEY", openaiVal, openaiOK) }()
+
+	// Case 1: only OPENAI_API_KEY set -> used
+	if err := os.Unsetenv("OAI_API_KEY"); err != nil {
+		t.Fatalf("unset OAI_API_KEY: %v", err)
+	}
+	if err := os.Setenv("OPENAI_API_KEY", "legacy-token"); err != nil {
+		t.Fatalf("set OPENAI_API_KEY: %v", err)
+	}
+	// parseFlags reads os.Args; simulate minimal args
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+	os.Args = []string{"agentcli.test", "-prompt", "x"}
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("unexpected parse exit: %d", code)
+	}
+	if cfg.apiKey != "legacy-token" {
+		t.Fatalf("expected apiKey from OPENAI_API_KEY, got %q", cfg.apiKey)
+	}
+
+	// Case 2: both set -> OAI_API_KEY wins
+	if err := os.Setenv("OAI_API_KEY", "canonical-token"); err != nil {
+		t.Fatalf("set OAI_API_KEY: %v", err)
+	}
+	os.Args = []string{"agentcli.test", "-prompt", "x"}
+	cfg, code = parseFlags()
+	if code != 0 {
+		t.Fatalf("unexpected parse exit: %d", code)
+	}
+	if cfg.apiKey != "canonical-token" {
+		t.Fatalf("expected apiKey from OAI_API_KEY, got %q", cfg.apiKey)
+	}
+
+	// Case 3: flag overrides env
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-api-key", "from-flag"}
+	cfg, code = parseFlags()
+	if code != 0 {
+		t.Fatalf("unexpected parse exit: %d", code)
+	}
+	if cfg.apiKey != "from-flag" {
+		t.Fatalf("expected apiKey from flag, got %q", cfg.apiKey)
+	}
+
+	// Silence any stdout/stderr during runAgent for safety (not strictly needed here)
+	_ = io.Discard
+}
+
+// https://github.com/hyperifyio/goagent/issues/243
+func TestParseFlags_SplitTimeoutResolution(t *testing.T) {
+	// Save/restore OAI_HTTP_TIMEOUT
+	save := func(k string) (string, bool) { v, ok := os.LookupEnv(k); return v, ok }
+	restore := func(k, v string, ok bool) {
+		if ok {
+			if err := os.Setenv(k, v); err != nil {
+				t.Fatalf("restore %s: %v", k, err)
+			}
+		} else {
+			if err := os.Unsetenv(k); err != nil {
+				t.Fatalf("unset %s: %v", k, err)
+			}
+		}
+	}
+	httpEnvVal, httpEnvOK := save("OAI_HTTP_TIMEOUT")
+	defer restore("OAI_HTTP_TIMEOUT", httpEnvVal, httpEnvOK)
+
+	// Case 1: defaults — http falls back to legacy -timeout (30s), tool to 30s, prep inherits http
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+	os.Args = []string{"agentcli.test", "-prompt", "x"}
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	if cfg.httpTimeout != cfg.timeout || cfg.timeout != 30*time.Second {
+		t.Fatalf("expected httpTimeout=timeout=30s, got http=%v timeout=%v", cfg.httpTimeout, cfg.timeout)
+	}
+	if cfg.toolTimeout != cfg.timeout {
+		t.Fatalf("expected toolTimeout=timeout, got %v vs %v", cfg.toolTimeout, cfg.timeout)
+	}
+	if cfg.prepHTTPTimeout != cfg.httpTimeout {
+		t.Fatalf("expected prepHTTPTimeout to inherit httpTimeout; got prep=%v http=%v", cfg.prepHTTPTimeout, cfg.httpTimeout)
+	}
+
+	// Case 2: env OAI_HTTP_TIMEOUT overrides legacy and prep inherits http
+	if err := os.Setenv("OAI_HTTP_TIMEOUT", "2m"); err != nil {
+		t.Fatalf("set env: %v", err)
+	}
+	os.Args = []string{"agentcli.test", "-prompt", "x"}
+	cfg, code = parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	if cfg.httpTimeout != 2*time.Minute {
+		t.Fatalf("expected httpTimeout=2m from env, got %v", cfg.httpTimeout)
+	}
+	if cfg.toolTimeout != 30*time.Second {
+		t.Fatalf("expected toolTimeout=30s default, got %v", cfg.toolTimeout)
+	}
+	if cfg.prepHTTPTimeout != cfg.httpTimeout {
+		t.Fatalf("expected prepHTTPTimeout to inherit httpTimeout; got prep=%v http=%v", cfg.prepHTTPTimeout, cfg.httpTimeout)
+	}
+
+	// Case 3: flags override env and legacy
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-http-timeout", "5s", "-tool-timeout", "7s", "-timeout", "1s"}
+	cfg, code = parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	if cfg.httpTimeout != 5*time.Second || cfg.toolTimeout != 7*time.Second {
+		t.Fatalf("expected http=5s tool=7s, got http=%v tool=%v", cfg.httpTimeout, cfg.toolTimeout)
+	}
+}
+
+// Verify precedence for -prep-http-timeout: flag > env OAI_PREP_HTTP_TIMEOUT > -http-timeout > default
+func TestPrepHTTPTimeout_Precedence(t *testing.T) {
+	save := func(k string) (string, bool) { v, ok := os.LookupEnv(k); return v, ok }
+	restore := func(k, v string, ok bool) {
+		if ok {
+			if err := os.Setenv(k, v); err != nil {
+				t.Fatalf("restore %s: %v", k, err)
+			}
+		} else {
+			if err := os.Unsetenv(k); err != nil {
+				t.Fatalf("unset %s: %v", k, err)
+			}
+		}
+	}
+	prepEnvVal, prepEnvOK := save("OAI_PREP_HTTP_TIMEOUT")
+	httpEnvVal, httpEnvOK := save("OAI_HTTP_TIMEOUT")
+	defer restore("OAI_PREP_HTTP_TIMEOUT", prepEnvVal, prepEnvOK)
+	defer restore("OAI_HTTP_TIMEOUT", httpEnvVal, httpEnvOK)
+
+	// Case A: inherit from http-timeout when no flag/env
+	orig := os.Args
+	defer func() { os.Args = orig }()
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-http-timeout", "5s"}
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	if cfg.prepHTTPTimeout != 5*time.Second {
+		t.Fatalf("inheritance failed: prep=%v want 5s", cfg.prepHTTPTimeout)
+	}
+	if cfg.prepHTTPTimeoutSource != "inherit" {
+		t.Fatalf("prep source=%s want inherit", cfg.prepHTTPTimeoutSource)
+	}
+
+	// Case B: env OAI_PREP_HTTP_TIMEOUT overrides http-timeout
+	if err := os.Setenv("OAI_PREP_HTTP_TIMEOUT", "7s"); err != nil {
+		t.Fatalf("set env: %v", err)
+	}
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-http-timeout", "5s"}
+	cfg, code = parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	if cfg.prepHTTPTimeout != 7*time.Second {
+		t.Fatalf("prep from env got %v want 7s", cfg.prepHTTPTimeout)
+	}
+	if cfg.prepHTTPTimeoutSource != "env" {
+		t.Fatalf("prep source=%s want env", cfg.prepHTTPTimeoutSource)
+	}
+
+	// Case C: flag -prep-http-timeout overrides env
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-http-timeout", "5s", "-prep-http-timeout", "9s"}
+	cfg, code = parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	if cfg.prepHTTPTimeout != 9*time.Second {
+		t.Fatalf("prep from flag got %v want 9s", cfg.prepHTTPTimeout)
+	}
+	if cfg.prepHTTPTimeoutSource != "flag" {
+		t.Fatalf("prep source=%s want flag", cfg.prepHTTPTimeoutSource)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/251
+// Default sampling temperature must resolve to 1.0 and propagate in requests when unset.
+func TestDefaultTemperature_IsOneAndPropagates(t *testing.T) {
+	// Fake server that captures the incoming temperature
+	var seenTemp *float64
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		seenTemp = req.Temperature
+		// Return a minimal valid response to terminate
+		resp := oai.ChatCompletionsResponse{
+			Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+			}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	// Simulate CLI args without -temp to rely on default
+	orig := os.Args
+	defer func() { os.Args = orig }()
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-base-url", srv.URL, "-model", "m"}
+
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	if cfg.temperature != 1.0 {
+		t.Fatalf("default temperature got %v want 1.0", cfg.temperature)
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code = runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("runAgent exit=%d stderr=%s", code, errBuf.String())
+	}
+	if seenTemp == nil || *seenTemp != 1.0 {
+		if seenTemp == nil {
+			t.Fatalf("temperature missing in request; want 1.0")
+		}
+		t.Fatalf("temperature in request got %v want 1.0", *seenTemp)
+	}
+}
+
+// Help should list image HTTP behavior flags to keep docs/tests in sync.
+func TestHelpMentionsImageHTTPFlags(t *testing.T) {
+	var b strings.Builder
+	printUsage(&b)
+	help := b.String()
+	for _, token := range []string{"-image-http-timeout", "-image-http-retries", "-image-http-retry-backoff"} {
+		if !strings.Contains(help, token) {
+			t.Fatalf("help missing %s token; help=\n%s", token, help)
+		}
+	}
+}
+
+// Help should list pre-stage system flags to keep docs/tests in sync.
+func TestHelpMentionsPrepSystemFlags(t *testing.T) {
+	var b strings.Builder
+	printUsage(&b)
+	help := b.String()
+	for _, token := range []string{"-prep-system", "-prep-system-file"} {
+		if !strings.Contains(help, token) {
+			t.Fatalf("help missing %s token; help=\n%s", token, help)
+		}
+	}
+}
+
+// Pre-stage one-knob: when -prep-top-p is provided, the prep request must include top_p
+// and omit temperature. We exercise the minimal runPreStage helper.
+func TestPrepOneKnob_TopPOmitsTemperature(t *testing.T) {
+	var seenTemp *float64
+	var seenTopP *float64
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		seenTemp = req.Temperature
+		seenTopP = req.TopP
+		// Return minimal assistant content
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", prepHTTPTimeout: 2 * time.Second, httpRetries: 0}
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	cfg.prepTopP = 0.9
+	var errBuf bytes.Buffer
+	if _, err := runPreStage(cfg, msgs, &errBuf); err != nil {
+		t.Fatalf("runPreStage error: %v", err)
+	}
+	if seenTemp != nil {
+		t.Fatalf("prep: expected temperature omitted when -prep-top-p is set")
+	}
+	if seenTopP == nil || *seenTopP != 0.9 {
+		if seenTopP == nil {
+			t.Fatalf("prep: expected top_p present")
+		}
+		t.Fatalf("prep: expected top_p=0.9, got %v", *seenTopP)
+	}
+}
+
+// Pre-stage should include temperature when -prep-top-p is not set and model supports it.
+func TestPrepIncludesTemperatureWhenSupported(t *testing.T) {
+	var seenTemp *float64
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		_ = json.NewDecoder(r.Body).Decode(&req)
+		seenTemp = req.Temperature
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "oss-gpt-20b", prepHTTPTimeout: time.Second, httpRetries: 0}
+	cfg.temperature = 1.0
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	var errBuf bytes.Buffer
+	if _, err := runPreStage(cfg, msgs, &errBuf); err != nil {
+		t.Fatalf("runPreStage: %v", err)
+	}
+	if seenTemp == nil || *seenTemp != 1.0 {
+		t.Fatalf("prep: expected temperature=1.0 included; got %v", func() any {
+			if seenTemp == nil {
+				return nil
+			}
+			return *seenTemp
+		}())
+	}
+}
+
+// When -prep-system or env/flag provides a pre-stage system message, it should
+// be sent as the first message in the prep request.
+func TestPrepSystemMessage_PrependIfProvided(t *testing.T) {
+	var first oai.Message
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		_ = json.NewDecoder(r.Body).Decode(&req)
+		if len(req.Messages) > 0 {
+			first = req.Messages[0]
+		}
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", prepHTTPTimeout: 2 * time.Second, httpRetries: 0}
+	cfg.prepSystem = "P_SYS"
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "MAIN_SYS"}, {Role: oai.RoleUser, Content: "U"}}
+	var errBuf bytes.Buffer
+	if _, err := runPreStage(cfg, msgs, &errBuf); err != nil {
+		t.Fatalf("runPreStage error: %v", err)
+	}
+	if first.Role != oai.RoleSystem || strings.TrimSpace(first.Content) != "P_SYS" {
+		t.Fatalf("first prep message = %+v; want system with P_SYS", first)
+	}
+}
+
+// When -prep-profile deterministic is set and temperature is supported, pre-stage
+// should include temperature=0.1 unless -prep-top-p is also provided.
+func TestPrepProfileDeterministic_SetsTemperature(t *testing.T) {
+	var seenTemp *float64
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		_ = json.NewDecoder(r.Body).Decode(&req)
+		seenTemp = req.Temperature
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "oss-gpt-20b", prepHTTPTimeout: time.Second, httpRetries: 0}
+	cfg.prepProfile = oai.ProfileDeterministic
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	var errBuf bytes.Buffer
+	if _, err := runPreStage(cfg, msgs, &errBuf); err != nil {
+		t.Fatalf("runPreStage: %v", err)
+	}
+	if seenTemp == nil || *seenTemp != 0.1 {
+		t.Fatalf("prep-profile deterministic: expected temperature=0.1; got %v", func() any {
+			if seenTemp == nil {
+				return nil
+			}
+			return *seenTemp
+		}())
+	}
+}
+
+// If -prep-top-p is set, it wins over -prep-profile by one-knob rule, omitting temperature.
+func TestPrepProfile_IgnoredWhenTopPSet(t *testing.T) {
+	var seenTemp *float64
+	var seenTopP *float64
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		_ = json.NewDecoder(r.Body).Decode(&req)
+		seenTemp = req.Temperature
+		seenTopP = req.TopP
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "oss-gpt-20b", prepHTTPTimeout: time.Second, httpRetries: 0}
+	cfg.prepProfile = oai.ProfileDeterministic
+	cfg.prepTopP = 0.8
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	var errBuf bytes.Buffer
+	if _, err := runPreStage(cfg, msgs, &errBuf); err != nil {
+		t.Fatalf("runPreStage: %v", err)
+	}
+	if seenTemp != nil {
+		t.Fatalf("expected temperature omitted when -prep-top-p is set, got %v", *seenTemp)
+	}
+	if seenTopP == nil || *seenTopP != 0.8 {
+		t.Fatalf("expected top_p=0.8 present; got %v", func() any {
+			if seenTopP == nil {
+				return nil
+			}
+			return *seenTopP
+		}())
+	}
+}
+
+// Pre-stage should omit temperature for unsupported models even when not using -prep-top-p,
+// and the client must recover on 400 mentioning temperature by retrying without temperature.
+func TestPrep_TemperatureUnsupported_400Recovery(t *testing.T) {
+	var calls int
+	var seenTemps []bool
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		calls++
+		var req oai.ChatCompletionsRequest
+		_ = json.NewDecoder(r.Body).Decode(&req)
+		seenTemps = append(seenTemps, req.Temperature != nil)
+		if calls == 1 {
+			// Simulate 400 mentioning unsupported temperature to trigger param recovery
+			w.WriteHeader(http.StatusBadRequest)
+			_, _ = w.Write([]byte(`{"error":{"message":"parameter 'temperature' is unsupported for this model"}}`))
+			return
+		}
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	// Use a model that declares SupportsTemperature==true, but we will include temp initially
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "oss-gpt-20b", prepHTTPTimeout: time.Second, httpRetries: 0}
+	cfg.temperature = 0.7
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	var errBuf bytes.Buffer
+	if _, err := runPreStage(cfg, msgs, &errBuf); err != nil {
+		t.Fatalf("runPreStage: %v", err)
+	}
+	if calls != 2 {
+		t.Fatalf("prep: expected exactly one recovery retry; calls=%d", calls)
+	}
+	if !(seenTemps[0] && !seenTemps[1]) {
+		t.Fatalf("prep: expected temp present on first attempt and omitted on retry; got %v", seenTemps)
+	}
+}
+
+// TestPrepCache_HitAndBust verifies pre-stage caching returns cached messages
+// when inputs match and TTL is valid, and that -prep-cache-bust bypasses cache.
+func TestPrepCache_HitAndBust(t *testing.T) {
+	t.Setenv("GOAGENT_PREP_CACHE_TTL", "1h")
+	// Clean cache dir at repo root
+	root := testFindRepoRoot(t)
+	_ = os.RemoveAll(filepath.Join(root, ".goagent", "cache", "prep"))
+
+	// Mock server that returns no tool calls so output == input messages
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		io.WriteString(w, `{"id":"x","object":"chat.completion","created":0,"model":"m","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":""}}]}`)
+	}))
+	defer srv.Close()
+
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "sys"}, {Role: oai.RoleUser, Content: "u"}}
+	cfg := cliConfig{prompt: "u", systemPrompt: "sys", baseURL: srv.URL, model: "m", prepHTTPTimeout: time.Second, httpRetries: 0}
+
+	var errBuf bytes.Buffer
+	// First run populates cache
+	out1, err := runPreStage(cfg, msgs, &errBuf)
+	if err != nil {
+		t.Fatalf("runPreStage first: %v", err)
+	}
+	if len(out1) != len(msgs) {
+		t.Fatalf("unexpected out len=%d", len(out1))
+	}
+
+	// Second run should hit cache and not call server; simulate by closing server
+	srv.CloseClientConnections()
+	out2, err := runPreStage(cfg, msgs, &errBuf)
+	if err != nil {
+		t.Fatalf("runPreStage cached: %v", err)
+	}
+	if got, want := len(out2), len(msgs); got != want {
+		t.Fatalf("cached out len=%d want %d", got, want)
+	}
+
+	// Bust should bypass cache; point to a server that returns content to distinguish
+	srv2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		io.WriteString(w, `{"id":"x","object":"chat.completion","created":0,"model":"m","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","channel":"final","content":"ok"}}]}`)
+	}))
+	defer srv2.Close()
+	cfg2 := cfg
+	cfg2.baseURL = srv2.URL
+	cfg2.prepCacheBust = true
+	out3, err := runPreStage(cfg2, msgs, &errBuf)
+	if err != nil {
+		t.Fatalf("runPreStage bust: %v", err)
+	}
+	if len(out3) != len(msgs) {
+		t.Fatalf("bust out len=%d want %d", len(out3), len(msgs))
+	}
+}
+
+// TestPrepCache_TTLExpiry verifies that expired cache entries are ignored.
+func TestPrepCache_TTLExpiry(t *testing.T) {
+	t.Setenv("GOAGENT_PREP_CACHE_TTL", "1ms")
+	root := testFindRepoRoot(t)
+	cacheDir := filepath.Join(root, ".goagent", "cache", "prep")
+	_ = os.RemoveAll(cacheDir)
+
+	// Stable server that yields empty assistant message (no tool calls)
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		io.WriteString(w, `{"id":"x","object":"chat.completion","created":0,"model":"m","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":""}}]}`)
+	}))
+	defer srv.Close()
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	cfg := cliConfig{prompt: "u", systemPrompt: "s", baseURL: srv.URL, model: "m", prepHTTPTimeout: time.Second, httpRetries: 0}
+	var errBuf bytes.Buffer
+	out, err := runPreStage(cfg, msgs, &errBuf)
+	if err != nil {
+		t.Fatalf("runPreStage: %v", err)
+	}
+	if len(out) != len(msgs) {
+		t.Fatalf("unexpected out len=%d", len(out))
+	}
+	// Wait for TTL to expire
+	time.Sleep(10 * time.Millisecond)
+	// Fully close server to force a network error on new request after TTL expiry
+	srv.Close()
+	if _, err := runPreStage(cfg, msgs, &errBuf); err == nil {
+		t.Fatalf("expected error after TTL expiry when server closed; cache should be ignored")
+	}
+}
+
+// Fail-open: when pre-stage returns an error, agent logs a WARN once and proceeds.
+func TestPrepFailOpen_WarnsAndProceeds(t *testing.T) {
+	// Server that always errors (simulate network error)
+	cl := &http.Client{}
+	_ = cl // silence unused in case
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Close connection abruptly to simulate failure
+		hj, ok := w.(http.Hijacker)
+		if !ok {
+			http.Error(w, "no hijack", http.StatusInternalServerError)
+			return
+		}
+		conn, _, _ := hj.Hijack()
+		_ = conn.Close()
+	}))
+	defer srv.Close()
+
+	// Configure CLI to call failing pre-stage, then main call uses a working server
+	// For main call, we need a different server that returns a final message
+	mainSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Channel: "final", Content: "ok"}}}})
+	}))
+	defer mainSrv.Close()
+
+	cfg := cliConfig{
+		prompt:          "u",
+		systemPrompt:    "s",
+		baseURL:         mainSrv.URL, // used for main call
+		model:           "m",
+		prepHTTPTimeout: 200 * time.Millisecond,
+		httpRetries:     0,
+		maxSteps:        1,
+		prepEnabled:     true,
+	}
+	// Force pre-stage to use failing server via env override for prep base URL
+	t.Setenv("OAI_PREP_BASE_URL", srv.URL)
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("runAgent exit=%d stderr=%s", code, errBuf.String())
+	}
+	if !strings.Contains(errBuf.String(), "WARN: pre-stage failed; skipping") {
+		t.Fatalf("expected WARN about pre-stage failure; got: %q", errBuf.String())
+	}
+	if strings.TrimSpace(outBuf.String()) != "ok" {
+		t.Fatalf("expected main final output 'ok'; got %q", outBuf.String())
+	}
+}
+
+// Disabling pre-stage should skip it entirely and not log WARN.
+func TestPrepEnabled_False_SkipsPreStage(t *testing.T) {
+	// Pre-stage would point to a non-routable address if used; but we disable it
+	t.Setenv("OAI_PREP_BASE_URL", "http://127.0.0.1:1")
+	// Main server returns final content
+	mainSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Channel: "final", Content: "ok"}}}})
+	}))
+	defer mainSrv.Close()
+
+	cfg := cliConfig{
+		prompt:         "u",
+		systemPrompt:   "s",
+		baseURL:        mainSrv.URL,
+		model:          "m",
+		prepEnabled:    false, // disable pre-stage
+		prepEnabledSet: true,
+		maxSteps:       1,
+	}
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("runAgent exit=%d stderr=%s", code, errBuf.String())
+	}
+	if strings.Contains(errBuf.String(), "WARN: pre-stage failed; skipping") {
+		t.Fatalf("did not expect WARN when pre-stage is disabled; got: %q", errBuf.String())
+	}
+	if strings.TrimSpace(outBuf.String()) != "ok" {
+		t.Fatalf("expected main final output 'ok'; got %q", outBuf.String())
+	}
+}
+
+// testFindRepoRoot is a helper for tests to locate the repo root using the prod helper.
+func testFindRepoRoot(t *testing.T) string { t.Helper(); return findRepoRoot() }
+
+// Pre-stage validator: a stray role:"tool" in the pre-stage input must be rejected
+// before sending the prep HTTP call, mirroring the main-loop validator behavior.
+func TestPrepValidator_BlocksStrayTool_NoHTTPCall(t *testing.T) {
+	called := false
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+		t.Fatal("prep server should not be called when pre-stage validation fails")
+	}))
+	defer srv.Close()
+
+	// Messages contain a stray tool message without a prior assistant tool_calls
+	msgs := []oai.Message{
+		{Role: oai.RoleUser, Content: "hi"},
+		{Role: oai.RoleTool, Name: "echo", ToolCallID: "1", Content: "{\"echo\":\"hi\"}"},
+	}
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", prepHTTPTimeout: 200 * time.Millisecond, httpRetries: 0}
+	var errBuf bytes.Buffer
+	_, err := runPreStage(cfg, msgs, &errBuf)
+	if err == nil {
+		t.Fatalf("expected error due to pre-stage validation failure; stderr=%q", errBuf.String())
+	}
+	if called {
+		t.Fatalf("HTTP server was contacted despite pre-stage validation failure")
+	}
+	if !strings.Contains(errBuf.String(), "prep invalid message sequence") {
+		t.Fatalf("stderr should mention prep invalid message sequence; got: %q", errBuf.String())
+	}
+}
+
+// Parallel tool-calls in pre-stage: when the pre-stage response returns multiple
+// tool_calls and a tools manifest is provided, the helper must execute tools
+// concurrently and append exactly one tool message per id.
+func TestPrep_ParallelToolCalls_ExecutesConcurrently(t *testing.T) {
+	// Build a sleeper tool that respects sleepMs
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "sleeper.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import ("encoding/json"; "io"; "os"; "time"; "fmt")
+func main(){b,_:=io.ReadAll(os.Stdin); var m map[string]any; _=json.Unmarshal(b,&m); ms:=0; if v,ok:=m["sleepMs"].(float64); ok { ms=int(v) }; if ms>0 { time.Sleep(time.Duration(ms)*time.Millisecond) }; _=json.NewEncoder(os.Stdout).Encode(map[string]any{"sleptMs":ms}); fmt.Print("")}
+`), 0o644); err != nil {
+		t.Fatalf("write tool: %v", err)
+	}
+	bin := filepath.Join(dir, "sleeper")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build tool: %v: %s", err, string(out))
+	}
+
+	// Write a tools.json manifest referencing the sleeper
+	manifestPath := filepath.Join(dir, "tools.json")
+	m := map[string]any{
+		"tools": []map[string]any{{
+			"name":        "sleeper",
+			"description": "sleep tool",
+			"schema":      map[string]any{"type": "object", "properties": map[string]any{"sleepMs": map[string]any{"type": "integer"}}},
+			"command":     []string{bin},
+			"timeoutSec":  3,
+		}},
+	}
+	b, err := json.Marshal(m)
+	if err != nil {
+		t.Fatalf("marshal manifest: %v", err)
+	}
+	if err := os.WriteFile(manifestPath, b, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	// Fake server returns a single response with two tool_calls
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		_ = json.NewDecoder(r.Body).Decode(&req)
+		resp := oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+			FinishReason: "tool_calls",
+			Message: oai.Message{Role: oai.RoleAssistant, ToolCalls: []oai.ToolCall{
+				{ID: "a", Type: "function", Function: oai.ToolCallFunction{Name: "sleeper", Arguments: "{\"sleepMs\":600}"}},
+				{ID: "b", Type: "function", Function: oai.ToolCallFunction{Name: "sleeper", Arguments: "{\"sleepMs\":600}"}},
+			}},
+		}}}
+		_ = json.NewEncoder(w).Encode(resp)
+	}))
+	defer srv.Close()
+
+	// Measure elapsed around runPreStage and ensure it's < sequential time (~1200ms)
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", prepHTTPTimeout: 3 * time.Second, httpRetries: 0, toolsPath: manifestPath, prepToolsAllowExternal: true}
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	var errBuf bytes.Buffer
+	start := time.Now()
+	outMsgs, err := runPreStage(cfg, msgs, &errBuf)
+	elapsed := time.Since(start)
+	if err != nil {
+		t.Fatalf("runPreStage: %v (stderr=%s)", err, errBuf.String())
+	}
+	// Expect original messages + assistant tool_calls + two tool outputs
+	var toolCount int
+	for _, m := range outMsgs {
+		if m.Role == oai.RoleTool {
+			toolCount++
+		}
+	}
+	if toolCount != 2 {
+		t.Fatalf("expected 2 tool messages, got %d", toolCount)
+	}
+	if elapsed >= 1100*time.Millisecond {
+		t.Fatalf("pre-stage tool calls not parallel; elapsed=%v", elapsed)
+	}
+}
+
+// Pre-stage built-ins: disallow external execs by default and allow read-only adapters.
+func TestPrep_Builtins_ReadOnlyAndNoExecByDefault(t *testing.T) {
+	// Fake server returning a tool_calls for two tools: fs.read_file and a disallowed external tool "echo"
+	dir := t.TempDir()
+	// Create a file to read
+	target := filepath.Join(dir, "note.txt")
+	if err := os.WriteFile(target, []byte("hello"), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		_ = json.NewDecoder(r.Body).Decode(&req)
+		// Return assistant with tool_calls to fs.read_file and echo
+		payload := oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+			FinishReason: "tool_calls",
+			Message: oai.Message{Role: oai.RoleAssistant, ToolCalls: []oai.ToolCall{
+				{ID: "a", Type: "function", Function: oai.ToolCallFunction{Name: "fs.read_file", Arguments: fmt.Sprintf(`{"path":"%s"}`, strings.TrimPrefix(target, dir+string(os.PathSeparator)))}},
+				{ID: "b", Type: "function", Function: oai.ToolCallFunction{Name: "echo", Arguments: `{"text":"hi"}`}},
+			}},
+		}}}
+		_ = json.NewEncoder(w).Encode(payload)
+	}))
+	defer srv.Close()
+
+	// Run with CWD at dir so repo-relative path enforcement applies
+	cwd, _ := os.Getwd()
+	defer func() { _ = os.Chdir(cwd) }()
+	if err := os.Chdir(dir); err != nil {
+		t.Fatalf("chdir: %v", err)
+	}
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", prepHTTPTimeout: time.Second, httpRetries: 0}
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	var errBuf bytes.Buffer
+	outMsgs, err := runPreStage(cfg, msgs, &errBuf)
+	if err != nil {
+		t.Fatalf("runPreStage: %v (stderr=%s)", err, errBuf.String())
+	}
+	// Expect two tool messages appended; fs.read_file returns content, echo returns error unknown tool
+	var gotRead, gotEchoErr bool
+	for _, m := range outMsgs {
+		if m.Role == oai.RoleTool && m.Name == "fs.read_file" {
+			if !strings.Contains(m.Content, "\"content\":\"hello\"") {
+				t.Fatalf("fs.read_file content missing; got %s", m.Content)
+			}
+			gotRead = true
+		}
+		if m.Role == oai.RoleTool && m.Name == "echo" {
+			if !strings.Contains(m.Content, "unknown tool") {
+				t.Fatalf("echo should be unknown under built-ins; got %s", m.Content)
+			}
+			gotEchoErr = true
+		}
+	}
+	if !gotRead || !gotEchoErr {
+		t.Fatalf("expected fs.read_file success and echo unknown error; gotRead=%v gotEchoErr=%v", gotRead, gotEchoErr)
+	}
+}
+
+// If -prep-tools-allow-external is enabled, the pre-stage must reuse manifest rules:
+// - resolve the pre-stage manifest relative to its own file
+// - require ./tools/bin/* (Windows .exe honored)
+// - enforce escape/.. rejection and cross-platform path normalization
+func TestPrep_Manifest_ResolvesRelativeAgainstManifestDir(t *testing.T) {
+	// Create nested manifest directory
+	repo := t.TempDir()
+	nested := filepath.Join(repo, "sub", "manifest")
+	if err := os.MkdirAll(filepath.Join(nested, "tools", "bin"), 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+
+	// Build a tiny tool binary under the manifest's tools/bin
+	src := filepath.Join(repo, "hello_tool.go")
+	if err := os.WriteFile(src, []byte(`package main
+import ("encoding/json"; "io"; "os")
+func main(){_,_ = io.ReadAll(os.Stdin); _ = json.NewEncoder(os.Stdout).Encode(map[string]any{"ok":true})}
+`), 0o644); err != nil {
+		t.Fatalf("write src: %v", err)
+	}
+	bin := filepath.Join(nested, "tools", "bin", "hello_tool")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, src).CombinedOutput(); err != nil {
+		t.Fatalf("build tool: %v: %s", err, string(out))
+	}
+
+	// Write manifest that references ./tools/bin/hello_tool relative to the manifest dir
+	manPath := filepath.Join(nested, "tools.json")
+	manifest := map[string]any{
+		"tools": []map[string]any{{
+			"name":        "hello_tool",
+			"description": "say ok",
+			"schema":      map[string]any{"type": "object", "additionalProperties": false},
+			"command":     []string{"./tools/bin/hello_tool"},
+			"timeoutSec":  2,
+		}},
+	}
+	b, err := json.Marshal(manifest)
+	if err != nil {
+		t.Fatalf("marshal manifest: %v", err)
+	}
+	if err := os.WriteFile(manPath, b, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	// Fake server returning a tool_calls to hello_tool
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		_ = json.NewDecoder(r.Body).Decode(&req)
+		resp := oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+			FinishReason: "tool_calls",
+			Message:      oai.Message{Role: oai.RoleAssistant, ToolCalls: []oai.ToolCall{{ID: "t1", Type: "function", Function: oai.ToolCallFunction{Name: "hello_tool", Arguments: "{}"}}}},
+		}}}
+		_ = json.NewEncoder(w).Encode(resp)
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", prepHTTPTimeout: 3 * time.Second, httpRetries: 0, toolsPath: manPath, prepToolsAllowExternal: true}
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	var errBuf bytes.Buffer
+	outMsgs, err := runPreStage(cfg, msgs, &errBuf)
+	if err != nil {
+		t.Fatalf("runPreStage: %v (stderr=%s)", err, errBuf.String())
+	}
+	// Expect a tool message from hello_tool with {"ok":true}
+	var found bool
+	for _, m := range outMsgs {
+		if m.Role == oai.RoleTool && m.Name == "hello_tool" {
+			var obj map[string]any
+			_ = json.Unmarshal([]byte(m.Content), &obj)
+			if v, ok := obj["ok"].(bool); ok && v {
+				found = true
+				break
+			}
+		}
+	}
+	if !found {
+		t.Fatalf("expected hello_tool tool output; stderr=%s", errBuf.String())
+	}
+}
+
+func TestPrep_Manifest_RejectsEscapeAndNonToolsBin(t *testing.T) {
+	dir := t.TempDir()
+
+	// Case 1: escape via .. in command[0]
+	man1 := filepath.Join(dir, "tools_escape.json")
+	m1 := map[string]any{"tools": []map[string]any{{"name": "x", "command": []string{"../bin/x"}}}}
+	b1, _ := json.Marshal(m1)
+	if err := os.WriteFile(man1, b1, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+			FinishReason: "tool_calls",
+			Message:      oai.Message{Role: oai.RoleAssistant, ToolCalls: []oai.ToolCall{{ID: "t", Type: "function", Function: oai.ToolCallFunction{Name: "x", Arguments: "{}"}}}},
+		}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", prepHTTPTimeout: time.Second, httpRetries: 0, toolsPath: man1, prepToolsAllowExternal: true}
+	var errBuf bytes.Buffer
+	if _, err := runPreStage(cfg, []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}, &errBuf); err == nil {
+		t.Fatalf("expected error due to manifest escape; stderr=%s", errBuf.String())
+	}
+	if !strings.Contains(errBuf.String(), "read manifest") && !strings.Contains(errBuf.String(), "command[0] must not start with '..'") {
+		t.Fatalf("stderr should mention escape rejection; got %s", errBuf.String())
+	}
+
+	// Case 2: missing ./tools/bin prefix
+	man2 := filepath.Join(dir, "tools_bad_prefix.json")
+	m2 := map[string]any{"tools": []map[string]any{{"name": "y", "command": []string{"bin/y"}}}}
+	b2, _ := json.Marshal(m2)
+	if err := os.WriteFile(man2, b2, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+	cfg.toolsPath = man2
+	errBuf.Reset()
+	if _, err := runPreStage(cfg, []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}, &errBuf); err == nil {
+		t.Fatalf("expected error due to missing ./tools/bin prefix; stderr=%s", errBuf.String())
+	}
+	if !strings.Contains(errBuf.String(), "relative command[0] must start with ./tools/bin/") {
+		t.Fatalf("stderr should mention ./tools/bin requirement; got %s", errBuf.String())
+	}
+}
+
+func TestPrep_Manifest_NormalizesWindowsBackslashes(t *testing.T) {
+	// This test uses a manifest command with backslashes; loader should normalize and accept it.
+	repo := t.TempDir()
+	nested := filepath.Join(repo, "nest")
+	if err := os.MkdirAll(filepath.Join(nested, "tools", "bin"), 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+
+	// Build a tiny tool
+	src := filepath.Join(repo, "ok.go")
+	if err := os.WriteFile(src, []byte(`package main
+import ("encoding/json"; "io"; "os")
+func main(){_,_ = io.ReadAll(os.Stdin); _ = json.NewEncoder(os.Stdout).Encode(map[string]any{"ok":true})}
+`), 0o644); err != nil {
+		t.Fatalf("write src: %v", err)
+	}
+	bin := filepath.Join(nested, "tools", "bin", "ok")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, src).CombinedOutput(); err != nil {
+		t.Fatalf("build tool: %v: %s", err, string(out))
+	}
+
+	// Use Windows-style path in manifest
+	man := filepath.Join(nested, "tools_backslashes.json")
+	cmd0 := ".\\tools\\bin\\ok"
+	manifest := map[string]any{"tools": []map[string]any{{"name": "ok", "command": []string{cmd0}, "schema": map[string]any{"type": "object"}}}}
+	b, _ := json.Marshal(manifest)
+	if err := os.WriteFile(man, b, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+			FinishReason: "tool_calls",
+			Message:      oai.Message{Role: oai.RoleAssistant, ToolCalls: []oai.ToolCall{{ID: "t", Type: "function", Function: oai.ToolCallFunction{Name: "ok", Arguments: "{}"}}}},
+		}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", prepHTTPTimeout: 2 * time.Second, httpRetries: 0, toolsPath: man, prepToolsAllowExternal: true}
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	var errBuf bytes.Buffer
+	outMsgs, err := runPreStage(cfg, msgs, &errBuf)
+	if err != nil {
+		t.Fatalf("runPreStage: %v (stderr=%s)", err, errBuf.String())
+	}
+	// Expect one tool output
+	var found bool
+	for _, m := range outMsgs {
+		if m.Role == oai.RoleTool && m.Name == "ok" {
+			found = true
+		}
+	}
+	if !found {
+		t.Fatalf("expected ok tool output; stderr=%s", errBuf.String())
+	}
+}
+
+// When -prep-tools-allow-external is set and -prep-tools is provided, the pre-stage
+// must use the pre-stage manifest instead of -tools.
+func TestPrep_UsesPrepToolsWhenProvided(t *testing.T) {
+	repo := t.TempDir()
+	// Build two tiny tools under separate manifest dirs
+	// Tool A under manifest A
+	manADir := filepath.Join(repo, "A")
+	if err := os.MkdirAll(filepath.Join(manADir, "tools", "bin"), 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	srcA := filepath.Join(repo, "a.go")
+	if err := os.WriteFile(srcA, []byte(`package main
+import ("encoding/json"; "io"; "os")
+func main(){_,_ = io.ReadAll(os.Stdin); _ = json.NewEncoder(os.Stdout).Encode(map[string]any{"which":"A"})}
+`), 0o644); err != nil {
+		t.Fatalf("write srcA: %v", err)
+	}
+	binA := filepath.Join(manADir, "tools", "bin", "which")
+	if runtime.GOOS == "windows" {
+		binA += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", binA, srcA).CombinedOutput(); err != nil {
+		t.Fatalf("build A: %v: %s", err, string(out))
+	}
+	manA := filepath.Join(manADir, "tools.json")
+	mA := map[string]any{"tools": []map[string]any{{"name": "which", "command": []string{"./tools/bin/which"}, "schema": map[string]any{"type": "object"}}}}
+	if b, err := json.Marshal(mA); err != nil {
+		t.Fatalf("marshal A: %v", err)
+	} else if err := os.WriteFile(manA, b, 0o644); err != nil {
+		t.Fatalf("write manA: %v", err)
+	}
+
+	// Tool B under manifest B
+	manBDir := filepath.Join(repo, "B")
+	if err := os.MkdirAll(filepath.Join(manBDir, "tools", "bin"), 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	srcB := filepath.Join(repo, "b.go")
+	if err := os.WriteFile(srcB, []byte(`package main
+import ("encoding/json"; "io"; "os")
+func main(){_,_ = io.ReadAll(os.Stdin); _ = json.NewEncoder(os.Stdout).Encode(map[string]any{"which":"B"})}
+`), 0o644); err != nil {
+		t.Fatalf("write srcB: %v", err)
+	}
+	binB := filepath.Join(manBDir, "tools", "bin", "which")
+	if runtime.GOOS == "windows" {
+		binB += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", binB, srcB).CombinedOutput(); err != nil {
+		t.Fatalf("build B: %v: %s", err, string(out))
+	}
+	manB := filepath.Join(manBDir, "tools.json")
+	mB := map[string]any{"tools": []map[string]any{{"name": "which", "command": []string{"./tools/bin/which"}, "schema": map[string]any{"type": "object"}}}}
+	if b, err := json.Marshal(mB); err != nil {
+		t.Fatalf("marshal B: %v", err)
+	} else if err := os.WriteFile(manB, b, 0o644); err != nil {
+		t.Fatalf("write manB: %v", err)
+	}
+
+	// Mock model returns a tool_call to function name "which"
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+			FinishReason: "tool_calls",
+			Message:      oai.Message{Role: oai.RoleAssistant, ToolCalls: []oai.ToolCall{{ID: "t1", Type: "function", Function: oai.ToolCallFunction{Name: "which", Arguments: "{}"}}}},
+		}}})
+	}))
+	defer srv.Close()
+
+	// Provide both -tools and -prep-tools; expect pre-stage to use manB
+	cfg := cliConfig{prompt: "x", systemPrompt: "s", baseURL: srv.URL, model: "m", prepHTTPTimeout: 2 * time.Second, httpRetries: 0, toolTimeout: 2 * time.Second, debug: true,
+		toolsPath: manA, prepToolsPath: manB, prepToolsAllowExternal: true}
+	msgs := []oai.Message{{Role: oai.RoleSystem, Content: "s"}, {Role: oai.RoleUser, Content: "u"}}
+	var errBuf bytes.Buffer
+	outMsgs, err := runPreStage(cfg, msgs, &errBuf)
+	if err != nil {
+		t.Fatalf("runPreStage: %v (stderr=%s)", err, errBuf.String())
+	}
+	// Find tool output and assert it came from B
+	var which string
+	for _, m := range outMsgs {
+		if m.Role == oai.RoleTool && m.Name == "which" {
+			var obj map[string]any
+			_ = json.Unmarshal([]byte(m.Content), &obj)
+			if s, ok := obj["which"].(string); ok {
+				which = s
+			}
+		}
+	}
+	if which != "B" {
+		t.Fatalf("expected pre-stage to use -prep-tools manifest; got which=%q stderr=%s", which, errBuf.String())
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/289
+// When -top-p is provided, temperature must be omitted and a one-line warning printed.
+func TestOneKnobRule_TopPOmitsTemperatureAndWarns(t *testing.T) {
+	// Fake server to capture request
+	var seenTemp *float64
+	var seenTopP *float64
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		seenTemp = req.Temperature
+		seenTopP = req.TopP
+		if err := json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}}); err != nil {
+			t.Fatalf("encode: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	// Run with -top-p set and ensure temp omitted and warning emitted
+	var outBuf, errBuf bytes.Buffer
+	code := cliMain([]string{"-prompt", "x", "-base-url", srv.URL, "-model", "m", "-top-p", "0.9"}, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	if seenTemp != nil {
+		t.Fatalf("expected temperature to be omitted when -top-p is set")
+	}
+	if seenTopP == nil || *seenTopP != 0.9 {
+		if seenTopP == nil {
+			t.Fatalf("expected top_p to be set when -top-p is provided")
+		}
+		t.Fatalf("expected top_p=0.9, got %v", *seenTopP)
+	}
+	if !strings.Contains(errBuf.String(), "omitting temperature per one-knob rule") {
+		t.Fatalf("expected one-knob warning on stderr; got: %q", errBuf.String())
+	}
+}
+
+// Channel printing harmonization: default prints only final to stdout; -verbose prints critic/confidence to stderr; -quiet still prints final only
+func TestChannelPrinting_DefaultVerboseQuiet(t *testing.T) {
+	mkServer := func() *httptest.Server {
+		steps := 0
+		return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			var req oai.ChatCompletionsRequest
+			_ = json.NewDecoder(r.Body).Decode(&req)
+			steps++
+			switch steps {
+			case 1:
+				// Return a non-final channel message with content
+				_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+					Message: oai.Message{Role: oai.RoleAssistant, Channel: "critic", Content: "c1"},
+				}}})
+			default:
+				_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+					Message: oai.Message{Role: oai.RoleAssistant, Channel: "final", Content: "done"},
+				}}})
+			}
+		}))
+	}
+
+	run := func(args ...string) (string, string, int) {
+		srv := mkServer()
+		defer srv.Close()
+		var outBuf, errBuf bytes.Buffer
+		code := cliMain(append([]string{"-prompt", "x", "-base-url", srv.URL, "-model", "m"}, args...), &outBuf, &errBuf)
+		return outBuf.String(), errBuf.String(), code
+	}
+
+	// Default: only final to stdout, no critic on stderr
+	out, errS, code := run()
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errS)
+	}
+	if strings.TrimSpace(out) != "done" {
+		t.Fatalf("stdout should be final only; got %q", strings.TrimSpace(out))
+	}
+	if strings.Contains(errS, "c1") {
+		t.Fatalf("stderr should not include critic under default; got %q", errS)
+	}
+
+	// Verbose: critic to stderr, final to stdout
+	out, errS, code = run("-verbose")
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errS)
+	}
+	if !strings.Contains(errS, "c1") {
+		t.Fatalf("stderr should include critic under -verbose; got %q", errS)
+	}
+	if strings.TrimSpace(out) != "done" {
+		t.Fatalf("stdout should be final; got %q", strings.TrimSpace(out))
+	}
+
+	// Quiet: still prints final to stdout, no critic to stderr
+	out, errS, code = run("-quiet")
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errS)
+	}
+	if strings.TrimSpace(out) != "done" {
+		t.Fatalf("stdout should be final under -quiet; got %q", strings.TrimSpace(out))
+	}
+	if strings.Contains(errS, "c1") {
+		t.Fatalf("stderr should not include critic under -quiet; got %q", errS)
+	}
+}
+
+// FEATURE_CHECKLIST L27: Harmony normalizer — roles validation and channel normalization
+func TestHarmonyNormalizer_RoleValidationAndChannelNormalization(t *testing.T) {
+	// Unknown role should cause pre-stage to fail before HTTP call
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		t.Fatalf("server should not be called when role is invalid")
+	}))
+	defer srv.Close()
+	cfg := cliConfig{prompt: "u", systemPrompt: "s", baseURL: srv.URL, model: "m", prepHTTPTimeout: time.Second, httpRetries: 0}
+	// Inject initMessages to include an invalid role
+	cfg.initMessages = []oai.Message{{Role: "System ", Content: "s"}, {Role: "User", Content: "u"}, {Role: "DEVELOPER", Content: "d"}, {Role: "assistant", Channel: "Critic!", Content: "c"}, {Role: "weird", Content: "x"}}
+	var errBuf bytes.Buffer
+	if _, err := runPreStage(cfg, cfg.initMessages, &errBuf); err == nil {
+		t.Fatalf("expected error for invalid role; stderr=%q", errBuf.String())
+	}
+
+	// Valid roles with messy casing/whitespace should normalize; assistant channel cleaned
+	steps := 0
+	okSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		steps++
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		if len(req.Messages) < 4 {
+			t.Fatalf("expected at least 4 normalized messages; got %d", len(req.Messages))
+		}
+		if req.Messages[0].Role != oai.RoleSystem {
+			t.Fatalf("role[0]=%q want system", req.Messages[0].Role)
+		}
+		if req.Messages[1].Role != oai.RoleDeveloper {
+			t.Fatalf("role[1]=%q want developer", req.Messages[1].Role)
+		}
+		if req.Messages[2].Role != oai.RoleUser {
+			t.Fatalf("role[2]=%q want user", req.Messages[2].Role)
+		}
+		if req.Messages[3].Role != oai.RoleAssistant {
+			t.Fatalf("role[3]=%q want assistant", req.Messages[3].Role)
+		}
+		if req.Messages[3].Channel != "critic" {
+			t.Fatalf("assistant channel normalized=%q want 'critic'", req.Messages[3].Channel)
+		}
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer okSrv.Close()
+	cfg2 := cliConfig{prompt: "u", systemPrompt: "s", baseURL: okSrv.URL, model: "m", prepHTTPTimeout: time.Second, httpRetries: 0}
+	msgs := []oai.Message{{Role: " System\t", Content: "s"}, {Role: "DEVELOPER ", Content: "d"}, {Role: " user", Content: "u"}, {Role: "assistant", Channel: " Critic!!! ", Content: "c"}}
+	var buf bytes.Buffer
+	if _, err := runPreStage(cfg2, msgs, &buf); err != nil {
+		t.Fatalf("unexpected error: %v stderr=%q", err, buf.String())
+	}
+}
+
+// FEATURE_CHECKLIST L24: Custom channel routing via -channel-route name=stdout|stderr|omit
+func TestChannelRoute_FlagParsingAndRouting(t *testing.T) {
+	// Invalid channel name
+	{
+		var outBuf, errBuf bytes.Buffer
+		code := cliMain([]string{"-prompt", "x", "-channel-route", "unknown=stdout"}, &outBuf, &errBuf)
+		if code == 0 {
+			t.Fatalf("expected non-zero exit for invalid channel; stderr=%s", errBuf.String())
+		}
+		if !strings.Contains(errBuf.String(), "invalid -channel-route channel") {
+			t.Fatalf("expected invalid channel error; got %q", errBuf.String())
+		}
+	}
+
+	// Invalid destination
+	{
+		var outBuf, errBuf bytes.Buffer
+		code := cliMain([]string{"-prompt", "x", "-channel-route", "critic=somewhere"}, &outBuf, &errBuf)
+		if code == 0 {
+			t.Fatalf("expected non-zero exit for invalid destination; stderr=%s", errBuf.String())
+		}
+		if !strings.Contains(errBuf.String(), "invalid -channel-route destination") {
+			t.Fatalf("expected invalid destination error; got %q", errBuf.String())
+		}
+	}
+
+	// Routing behavior: route critic to stdout, final to stderr, and omit confidence
+	{
+		steps := 0
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			steps++
+			switch steps {
+			case 1:
+				_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+					Message: oai.Message{Role: oai.RoleAssistant, Channel: "critic", Content: "c1"},
+				}}})
+			case 2:
+				_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+					Message: oai.Message{Role: oai.RoleAssistant, Channel: "confidence", Content: "p=0.9"},
+				}}})
+			default:
+				_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+					Message: oai.Message{Role: oai.RoleAssistant, Channel: "final", Content: "done"},
+				}}})
+			}
+		}))
+		defer srv.Close()
+
+		var outBuf, errBuf bytes.Buffer
+		code := cliMain([]string{"-prompt", "x", "-base-url", srv.URL, "-model", "m", "-prep-enabled=false", "-channel-route", "critic=stdout", "-channel-route", "final=stderr", "-channel-route", "confidence=omit", "-verbose"}, &outBuf, &errBuf)
+		if code != 0 {
+			t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+		}
+		// critic goes to stdout under routing
+		if !strings.Contains(outBuf.String(), "c1") {
+			t.Fatalf("stdout should include critic due to routing; got %q", outBuf.String())
+		}
+		// final goes to stderr under routing
+		if !strings.Contains(errBuf.String(), "done") {
+			t.Fatalf("stderr should include final due to routing; got %q", errBuf.String())
+		}
+		// confidence omitted entirely
+		if strings.Contains(outBuf.String(), "p=0.9") || strings.Contains(errBuf.String(), "p=0.9") {
+			t.Fatalf("confidence should be omitted by routing")
+		}
+	}
+}
+
+// FEATURE_CHECKLIST L22: Streaming for assistant[final]. If server supports streaming,
+// stream only assistant{channel:"final"} to stdout; buffer other channels for -verbose.
+// -quiet still prints just the streamed final.
+func TestStreaming_FinalChannelOnly_WithVerboseBuffersNonFinal(t *testing.T) {
+	// SSE server that streams when req.Stream is true; otherwise returns minimal JSON
+	mkServer := func() *httptest.Server {
+		return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			var req oai.ChatCompletionsRequest
+			if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+				t.Fatalf("bad json: %v", err)
+			}
+			if !req.Stream {
+				_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: ""}}}})
+				return
+			}
+			w.Header().Set("Content-Type", "text/event-stream")
+			flusher, _ := w.(http.Flusher)
+			_, _ = io.WriteString(w, "data: {\"id\":\"s1\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"channel\":\"critic\",\"content\":\"c1\"}}]}\n\n")
+			if flusher != nil {
+				flusher.Flush()
+			}
+			_, _ = io.WriteString(w, "data: {\"id\":\"s1\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"channel\":\"final\",\"content\":\"he\"}}]}\n\n")
+			if flusher != nil {
+				flusher.Flush()
+			}
+			_, _ = io.WriteString(w, "data: {\"id\":\"s1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"llo\"}}]}\n\n")
+			if flusher != nil {
+				flusher.Flush()
+			}
+			_, _ = io.WriteString(w, "data: [DONE]\n\n")
+			if flusher != nil {
+				flusher.Flush()
+			}
+		}))
+	}
+
+	run := func(args ...string) (string, string, int) {
+		srv := mkServer()
+		defer srv.Close()
+		var outBuf, errBuf bytes.Buffer
+		code := cliMain(append([]string{"-prompt", "x", "-base-url", srv.URL, "-model", "m", "-prep-enabled=false", "-stream-final"}, args...), &outBuf, &errBuf)
+		return outBuf.String(), errBuf.String(), code
+	}
+
+	// Default: only final streamed to stdout; critic buffered and not printed
+	out, errS, code := run()
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errS)
+	}
+	if strings.TrimSpace(out) != "hello" {
+		t.Fatalf("stdout should be streamed final only; got %q", strings.TrimSpace(out))
+	}
+	if strings.Contains(errS, "c1") {
+		t.Fatalf("stderr should not include critic under default; got %q", errS)
+	}
+
+	// Verbose: critic printed to stderr after stream completes; final to stdout
+	out, errS, code = run("-verbose")
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errS)
+	}
+	if strings.TrimSpace(out) != "hello" {
+		t.Fatalf("stdout should be final; got %q", strings.TrimSpace(out))
+	}
+	if !strings.Contains(errS, "c1") {
+		t.Fatalf("stderr should include critic under -verbose; got %q", errS)
+	}
+
+	// Quiet: still prints final to stdout; no critic to stderr
+	out, errS, code = run("-quiet")
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errS)
+	}
+	if strings.TrimSpace(out) != "hello" {
+		t.Fatalf("stdout should be final under -quiet; got %q", strings.TrimSpace(out))
+	}
+	if strings.Contains(errS, "c1") {
+		t.Fatalf("stderr should not include critic under -quiet; got %q", errS)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/285
+// Precedence: flag -temp > env LLM_TEMPERATURE > default 1.0
+func TestTemperaturePrecedence_FlagThenEnvThenDefault(t *testing.T) {
+	save := func(k string) (string, bool) { v, ok := os.LookupEnv(k); return v, ok }
+	restore := func(k, v string, ok bool) {
+		if ok {
+			if err := os.Setenv(k, v); err != nil {
+				t.Fatalf("setenv: %v", err)
+			}
+		} else {
+			if err := os.Unsetenv(k); err != nil {
+				t.Fatalf("unsetenv: %v", err)
+			}
+		}
+	}
+	envVal, envOK := save("LLM_TEMPERATURE")
+	defer restore("LLM_TEMPERATURE", envVal, envOK)
+
+	// Case: env only
+	if err := os.Setenv("LLM_TEMPERATURE", "0.7"); err != nil {
+		t.Fatalf("set env: %v", err)
+	}
+	orig := os.Args
+	defer func() { os.Args = orig }()
+	os.Args = []string{"agentcli.test", "-prompt", "x"}
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	if cfg.temperature != 0.7 {
+		t.Fatalf("env should set temperature=0.7; got %v", cfg.temperature)
+	}
+
+	// Case: flag overrides env
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-temp", "0.4"}
+	cfg, code = parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	if cfg.temperature != 0.4 {
+		t.Fatalf("flag should override env; got %v", cfg.temperature)
+	}
+
+	// Case: default when env unset and no flag
+	if err := os.Unsetenv("LLM_TEMPERATURE"); err != nil {
+		t.Fatalf("unset env: %v", err)
+	}
+	os.Args = []string{"agentcli.test", "-prompt", "x"}
+	cfg, code = parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	if cfg.temperature != 1.0 {
+		t.Fatalf("default temperature should be 1.0; got %v", cfg.temperature)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/214
+func TestHelp_PrintsUsageAndExitsZero(t *testing.T) {
+	// Capture stdout
+	var outBuf, errBuf bytes.Buffer
+	// Simulate help via various tokens
+	for _, token := range []string{"--help", "-h", "help"} {
+		t.Run(token, func(t *testing.T) {
+			// Prepare args
+			origArgs := os.Args
+			defer func() { os.Args = origArgs }()
+			os.Args = []string{"agentcli.test", token}
+
+			// Replace os.Stdout/Stderr via writers by invoking printUsage directly
+			outBuf.Reset()
+			errBuf.Reset()
+			// Call main path segments: emulate early help detection
+			if !helpRequested(os.Args[1:]) {
+				t.Fatalf("expected helpRequested for %s", token)
+			}
+			printUsage(&outBuf)
+			// Validate output contains key lines
+			got := outBuf.String()
+			for _, substr := range []string{
+				"Usage:",
+				"-prompt",
+				"-tools",
+				"-base-url",
+				"-api-key",
+				"-http-timeout",
+				"Examples:",
+			} {
+				if !strings.Contains(got, substr) {
+					t.Fatalf("usage missing %q; got:\n%s", substr, got)
+				}
+			}
+			// Also ensure no error text is printed by default path here
+			if errBuf.Len() != 0 {
+				t.Fatalf("unexpected stderr: %s", errBuf.String())
+			}
+			// Sanity: do nothing; zero exit is implied
+		})
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/350
+// When the agent reaches the configured step cap, it must terminate with a
+// clear "needs human review" message and a non-zero exit. The loop should
+// perform exactly cfg.maxSteps HTTP calls in this case.
+func TestAgentLoop_MaxStepsCap_HumanReviewMessage(t *testing.T) {
+	var calls int
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		calls++
+		// Return an assistant message with empty content and no tool_calls
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+			FinishReason: "stop",
+			Message:      oai.Message{Role: oai.RoleAssistant, Content: ""},
+		}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{
+		prompt:       "x",
+		systemPrompt: "sys",
+		baseURL:      srv.URL,
+		model:        "m",
+		maxSteps:     3,
+		httpTimeout:  2 * time.Second,
+		toolTimeout:  1 * time.Second,
+		temperature:  0,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code == 0 {
+		t.Fatalf("expected non-zero exit when step cap is reached; stdout=%q stderr=%q", outBuf.String(), errBuf.String())
+	}
+	if calls != 3 {
+		t.Fatalf("expected exactly 3 HTTP calls (one per step), got %d", calls)
+	}
+	if !strings.Contains(strings.ToLower(errBuf.String()), "needs human review") {
+		t.Fatalf("stderr must contain 'needs human review'; got: %q", errBuf.String())
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/350
+// Hard ceiling: regardless of the provided -max-steps, the agent must clamp to 15.
+// Verify that with an excessively large maxSteps, we perform exactly 15 calls and
+// emit the human review message.
+func TestAgentLoop_HardCeilingOf15(t *testing.T) {
+	var calls int
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		calls++
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+			FinishReason: "stop",
+			Message:      oai.Message{Role: oai.RoleAssistant, Content: ""},
+		}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{
+		prompt:       "x",
+		systemPrompt: "sys",
+		baseURL:      srv.URL,
+		model:        "m",
+		maxSteps:     100, // should be clamped to 15
+		httpTimeout:  2 * time.Second,
+		toolTimeout:  1 * time.Second,
+		temperature:  0,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code == 0 {
+		t.Fatalf("expected non-zero exit when hard ceiling is reached; stdout=%q stderr=%q", outBuf.String(), errBuf.String())
+	}
+	if calls != 15 {
+		t.Fatalf("expected exactly 15 HTTP calls due to hard ceiling; got %d", calls)
+	}
+	if !strings.Contains(strings.ToLower(errBuf.String()), "needs human review") {
+		t.Fatalf("stderr must contain 'needs human review'; got: %q", errBuf.String())
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/262
+func TestVersion_PrintsAndExitsZero(t *testing.T) {
+	var outBuf, errBuf bytes.Buffer
+	code := cliMain([]string{"--version"}, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit code = %d; want 0", code)
+	}
+	got := outBuf.String()
+	if !strings.Contains(got, "agentcli version") {
+		t.Fatalf("stdout missing version header; got: %q", got)
+	}
+	if errBuf.Len() != 0 {
+		t.Fatalf("stderr should be empty; got: %q", errBuf.String())
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/252
+func TestMissingPrompt_PrintsErrorUsageAndExitsTwo(t *testing.T) {
+	// Simulate running with no -prompt and no special flags
+	var outBuf, errBuf bytes.Buffer
+	code := cliMain([]string{}, &outBuf, &errBuf)
+	if code != 2 {
+		t.Fatalf("exit code = %d; want 2", code)
+	}
+	gotErr := errBuf.String()
+	if !strings.Contains(gotErr, "error: -prompt is required") {
+		t.Fatalf("stderr missing error line; got:\n%s", gotErr)
+	}
+	if !strings.Contains(gotErr, "Usage:") || !strings.Contains(gotErr, "-prompt") {
+		t.Fatalf("stderr missing usage synopsis; got:\n%s", gotErr)
+	}
+	if outBuf.Len() != 0 {
+		t.Fatalf("stdout should be empty; got: %q", outBuf.String())
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/246
+func TestPrintConfig_EmitsResolvedConfigJSONAndExitsZero(t *testing.T) {
+	// Save/restore env for OAI_HTTP_TIMEOUT
+	val, ok := os.LookupEnv("OAI_HTTP_TIMEOUT")
+	if ok {
+		defer func() {
+			if err := os.Setenv("OAI_HTTP_TIMEOUT", val); err != nil {
+				t.Fatalf("restore env: %v", err)
+			}
+		}()
+	} else {
+		defer func() {
+			if err := os.Unsetenv("OAI_HTTP_TIMEOUT"); err != nil {
+				t.Fatalf("unset env: %v", err)
+			}
+		}()
+	}
+	if err := os.Setenv("OAI_HTTP_TIMEOUT", "100ms"); err != nil {
+		t.Fatalf("set env: %v", err)
+	}
+
+	// Prepare args: no -prompt required when -print-config is set
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+	os.Args = []string{"agentcli.test", "-print-config", "-model", "m", "-base-url", "http://example"}
+
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+
+	var outBuf bytes.Buffer
+	exit := printResolvedConfig(cfg, &outBuf)
+	if exit != 0 {
+		t.Fatalf("expected exit 0")
+	}
+	// Validate JSON contains fields and sources
+	got := outBuf.String()
+	for _, substr := range []string{
+		"\"model\": \"m\"",
+		"\"baseURL\": \"http://example\"",
+		"\"httpTimeout\": \"100ms\"",
+		"\"httpTimeoutSource\": \"env\"",
+		"\"prepHTTPTimeout\": ",
+		"\"prepHTTPTimeoutSource\": ",
+		"\"prep\": ",
+		"\"enabled\": ",
+		"\"toolTimeout\": ",
+		"\"timeout\": ",
+		"\"timeoutSource\": ",
+		"\"httpRetries\": ",
+		"\"httpRetryBackoff\": ",
+	} {
+		if !strings.Contains(got, substr) {
+			t.Fatalf("print-config missing %q; got:\n%s", substr, got)
+		}
+	}
+}
+
+func TestPrepOverrides_Precedence_FlagThenEnvThenInherit(t *testing.T) {
+	t.Setenv("OAI_PREP_MODEL", "env-model")
+	t.Setenv("OAI_PREP_BASE_URL", "http://env-base")
+	t.Setenv("OAI_PREP_API_KEY", "env-key")
+	t.Setenv("OAI_PREP_HTTP_RETRIES", "5")
+	t.Setenv("OAI_PREP_HTTP_RETRY_BACKOFF", "750ms")
+
+	orig := os.Args
+	defer func() { os.Args = orig }()
+	os.Args = []string{"agentcli.test", "-print-config", "-model", "m", "-base-url", "http://base", "-api-key", "k",
+		"-prep-model", "flag-model", "-prep-base-url", "http://flag-base", "-prep-api-key", "flag-key",
+		"-prep-http-retries", "7", "-prep-http-retry-backoff", "900ms"}
+
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+
+	var out bytes.Buffer
+	if exit := printResolvedConfig(cfg, &out); exit != 0 {
+		t.Fatalf("exit %d", exit)
+	}
+	got := out.String()
+	for _, want := range []string{
+		"\"prep\": ",
+		"\"model\": \"flag-model\"",
+		"\"baseURL\": \"http://flag-base\"",
+		"\"apiKeyPresent\": true",
+		"\"httpRetries\": 7",
+		"\"httpRetryBackoff\": \"900ms\"",
+	} {
+		if !strings.Contains(got, want) {
+			t.Fatalf("print-config missing %q; got:\n%s", want, got)
+		}
+	}
+}
+
+func TestPrepOverrides_EnvWhenNoFlags(t *testing.T) {
+	t.Setenv("OAI_PREP_MODEL", "env-model")
+	t.Setenv("OAI_PREP_BASE_URL", "http://env-base")
+	t.Setenv("OAI_PREP_API_KEY", "env-key")
+	t.Setenv("OAI_PREP_HTTP_RETRIES", "4")
+	t.Setenv("OAI_PREP_HTTP_RETRY_BACKOFF", "550ms")
+
+	orig := os.Args
+	defer func() { os.Args = orig }()
+	os.Args = []string{"agentcli.test", "-print-config", "-model", "m", "-base-url", "http://base"}
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	var out bytes.Buffer
+	if exit := printResolvedConfig(cfg, &out); exit != 0 {
+		t.Fatalf("exit %d", exit)
+	}
+	got := out.String()
+	for _, want := range []string{
+		"\"prep\": ",
+		"\"modelSource\": \"env\"",
+		"\"baseURLSource\": \"env\"",
+		"\"apiKeySource\": \"env:OAI_PREP_API_KEY\"",
+		"\"httpRetriesSource\": \"env\"",
+		"\"httpRetryBackoffSource\": \"env\"",
+		"\"httpRetries\": 4",
+		"\"httpRetryBackoff\": \"550ms\"",
+	} {
+		if !strings.Contains(got, want) {
+			t.Fatalf("print-config missing %q; got:\n%s", want, got)
+		}
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/1
+func TestRunAgent_ToolConversationLoop(t *testing.T) {
+	// Fake tool: echo stdin to stdout
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "echo.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import ("io"; "os"; "fmt")
+func main(){b,_:=io.ReadAll(os.Stdin); fmt.Print(string(b))}
+`), 0o644); err != nil {
+		t.Fatalf("write tool: %v", err)
+	}
+	bin := filepath.Join(dir, "echo")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build tool: %v: %s", err, string(out))
+	}
+
+	// Create tools.json referencing the echo tool
+	toolsPath := filepath.Join(dir, "tools.json")
+	manifest := map[string]any{
+		"tools": []map[string]any{{
+			"name":        "echo",
+			"description": "echo back input",
+			"schema":      map[string]any{"type": "object", "properties": map[string]any{"text": map[string]any{"type": "string"}}, "required": []string{"text"}},
+			"command":     []string{bin},
+			"timeoutSec":  5,
+		}},
+	}
+	b, err := json.Marshal(manifest)
+	if err != nil {
+		t.Fatalf("marshal manifest: %v", err)
+	}
+	if err := os.WriteFile(toolsPath, b, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	// Fake server with two-step responses
+	var step int
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			t.Fatalf("unexpected method: %s", r.Method)
+		}
+		if r.URL.Path != "/chat/completions" {
+			t.Fatalf("unexpected path: %s", r.URL.Path)
+		}
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("bad json: %v", err)
+		}
+		step++
+		switch step {
+		case 1:
+			// Respond with a tool call to echo
+			resp := oai.ChatCompletionsResponse{
+				ID:      "cmpl-1",
+				Object:  "chat.completion",
+				Created: time.Now().Unix(),
+				Model:   req.Model,
+				Choices: []oai.ChatCompletionsResponseChoice{{
+					Index:        0,
+					FinishReason: "tool_calls",
+					Message: oai.Message{
+						Role: oai.RoleAssistant,
+						ToolCalls: []oai.ToolCall{{
+							ID:   "1",
+							Type: "function",
+							Function: oai.ToolCallFunction{
+								Name:      "echo",
+								Arguments: `{"text":"hi"}`,
+							},
+						}},
+					},
+				}},
+			}
+			if err := json.NewEncoder(w).Encode(resp); err != nil {
+				t.Fatalf("encode step1: %v", err)
+			}
+		case 2:
+			// Verify assistant tool_calls message is present before tool messages
+			assistantSeen := false
+			for _, m := range req.Messages {
+				if m.Role == oai.RoleAssistant && len(m.ToolCalls) > 0 {
+					assistantSeen = true
+					break
+				}
+			}
+			if !assistantSeen {
+				t.Fatalf("assistant message with tool_calls not present before tool messages")
+			}
+
+			resp := oai.ChatCompletionsResponse{
+				ID:      "cmpl-2",
+				Object:  "chat.completion",
+				Created: time.Now().Unix(),
+				Model:   req.Model,
+				Choices: []oai.ChatCompletionsResponseChoice{{
+					Index:        0,
+					FinishReason: "stop",
+					Message:      oai.Message{Role: oai.RoleAssistant, Content: "done"},
+				}},
+			}
+			if err := json.NewEncoder(w).Encode(resp); err != nil {
+				t.Fatalf("encode step2: %v", err)
+			}
+		default:
+			t.Fatalf("unexpected extra request step=%d", step)
+		}
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{
+		prompt:       "test",
+		toolsPath:    toolsPath,
+		systemPrompt: "sys",
+		baseURL:      srv.URL,
+		apiKey:       "",
+		model:        "test",
+		maxSteps:     4,
+		timeout:      5 * time.Second,
+		temperature:  0,
+		debug:        false,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("expected exit code 0, got %d; stderr=%s", code, errBuf.String())
+	}
+	if got := outBuf.String(); got != "done\n" {
+		t.Fatalf("unexpected stdout: %q", got)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/252
+// Regression test: a stray role:"tool" without a prior assistant tool_calls
+// must be caught by the pre-flight validator and the request must not be sent.
+func TestPreflightValidator_BlocksStrayTool_NoHTTPCall(t *testing.T) {
+	// Server that fails the test if contacted
+	called := false
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		called = true
+		t.Fatal("server should not be called when pre-flight fails")
+	}))
+	defer srv.Close()
+
+	// Craft an initial transcript containing a stray tool message
+	msgs := []oai.Message{
+		{Role: oai.RoleUser, Content: "hi"},
+		{Role: oai.RoleTool, Name: "echo", ToolCallID: "1", Content: "{\"echo\":\"hi\"}"},
+	}
+
+	cfg := cliConfig{
+		prompt:       "ignored",
+		systemPrompt: "sys",
+		baseURL:      srv.URL,
+		model:        "m",
+		maxSteps:     1,
+		httpTimeout:  100 * time.Millisecond,
+		toolTimeout:  100 * time.Millisecond,
+		temperature:  0,
+		initMessages: msgs,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code == 0 {
+		t.Fatalf("expected non-zero exit due to pre-flight validation error; stdout=%q stderr=%q", outBuf.String(), errBuf.String())
+	}
+	if called {
+		t.Fatalf("HTTP server was contacted despite pre-flight validation failure")
+	}
+	// Error should mention stray tool without prior assistant tool_calls
+	if !strings.Contains(errBuf.String(), "without a prior assistant message containing tool_calls") {
+		t.Fatalf("unexpected error message: %q", errBuf.String())
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/233
+func TestRunAgent_HTTPTimeoutError_MessageIncludesDetails(t *testing.T) {
+	// Fake slow server: sleeps beyond client timeout then returns a valid response
+	slow := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		time.Sleep(300 * time.Millisecond)
+		resp := oai.ChatCompletionsResponse{
+			Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+			}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode slow resp: %v", err)
+		}
+	}))
+	defer slow.Close()
+
+	cfg := cliConfig{
+		prompt:       "test",
+		systemPrompt: "sys",
+		baseURL:      slow.URL,
+		model:        "test",
+		maxSteps:     1,
+		httpTimeout:  100 * time.Millisecond,
+		toolTimeout:  1 * time.Second,
+		temperature:  0,
+		debug:        false,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code == 0 {
+		t.Fatalf("expected non-zero exit due to HTTP timeout; stdout=%q stderr=%q", outBuf.String(), errBuf.String())
+	}
+	got := errBuf.String()
+	// Error should mention base URL, configured timeout, and a user hint
+	if !strings.Contains(got, slow.URL) {
+		t.Fatalf("expected error to include base URL %q; got: %q", slow.URL, got)
+	}
+	if !strings.Contains(got, "http-timeout=100ms") {
+		t.Fatalf("expected error to include configured timeout; got: %q", got)
+	}
+	if !strings.Contains(strings.ToLower(got), "increase -http-timeout") {
+		t.Fatalf("expected hint to increase -http-timeout; got: %q", got)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/233
+func TestRunAgent_HTTPTimeout_RaiseResolves(t *testing.T) {
+	// Server is slow but within a larger timeout
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		time.Sleep(120 * time.Millisecond)
+		resp := oai.ChatCompletionsResponse{
+			Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+			}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode slow resp: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{
+		prompt:       "test",
+		systemPrompt: "sys",
+		baseURL:      srv.URL,
+		model:        "test",
+		maxSteps:     1,
+		httpTimeout:  500 * time.Millisecond,
+		toolTimeout:  1 * time.Second,
+		temperature:  0,
+		debug:        false,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("expected exit code 0 with higher timeout; stderr=%s", errBuf.String())
+	}
+	if strings.TrimSpace(outBuf.String()) != "ok" {
+		t.Fatalf("unexpected stdout: %q", outBuf.String())
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/247
+// Scaled integration: default-like 90ms times out, raised 300ms succeeds against a ~120ms server.
+func TestHTTPTimeout_SlowServer_DefaultTimesOut_RaisedSucceeds(t *testing.T) {
+	// Slow-ish server (~120ms)
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		time.Sleep(120 * time.Millisecond)
+		resp := oai.ChatCompletionsResponse{
+			Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+			}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode resp: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	// Phase 1: env-driven ~90ms timeout -> expect timeout
+	// Save/restore env
+	val, ok := os.LookupEnv("OAI_HTTP_TIMEOUT")
+	if ok {
+		defer func() {
+			if err := os.Setenv("OAI_HTTP_TIMEOUT", val); err != nil {
+				t.Fatalf("restore env: %v", err)
+			}
+		}()
+	} else {
+		defer func() {
+			if err := os.Unsetenv("OAI_HTTP_TIMEOUT"); err != nil {
+				t.Fatalf("unset env: %v", err)
+			}
+		}()
+	}
+	if err := os.Setenv("OAI_HTTP_TIMEOUT", "90ms"); err != nil {
+		t.Fatalf("set env: %v", err)
+	}
+	// Use parseFlags path to simulate CLI invocation
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-base-url", srv.URL, "-model", "m"}
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	var out1, err1 bytes.Buffer
+	exit1 := runAgent(cfg, &out1, &err1)
+	if exit1 == 0 {
+		t.Fatalf("expected timeout exit; stdout=%q stderr=%q", out1.String(), err1.String())
+	}
+	if got := err1.String(); !strings.Contains(got, "http-timeout=90ms") {
+		t.Fatalf("expected error to mention http-timeout=90ms; got: %q", got)
+	}
+
+	// Phase 2: raise -http-timeout to 300ms -> expect success
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-http-timeout", "300ms", "-base-url", srv.URL, "-model", "m"}
+	cfg2, code2 := parseFlags()
+	if code2 != 0 {
+		t.Fatalf("parse exit: %d", code2)
+	}
+	var out2, err2 bytes.Buffer
+	exit2 := runAgent(cfg2, &out2, &err2)
+	if exit2 != 0 {
+		t.Fatalf("expected success with raised timeout; stderr=%s", err2.String())
+	}
+	if strings.TrimSpace(out2.String()) != "ok" {
+		t.Fatalf("unexpected stdout: %q", out2.String())
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/245
+func TestDebug_EffectiveTimeoutsAndSources(t *testing.T) {
+	// Fast server returning a minimal valid response
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		resp := oai.ChatCompletionsResponse{
+			Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+			}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode resp: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	// Use flags so sources are "flag"
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-http-timeout", "5s", "-prep-http-timeout", "4s", "-tool-timeout", "7s", "-timeout", "10s", "-base-url", srv.URL, "-model", "m"}
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+	cfg.debug = true
+
+	var outBuf, errBuf bytes.Buffer
+	code = runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("expected exit code 0; stderr=%s", errBuf.String())
+	}
+	got := errBuf.String()
+	if !strings.Contains(got, "effective timeouts: http-timeout=5s source=flag; prep-http-timeout=4s source=flag; tool-timeout=7s source=flag; timeout=10s source=flag") {
+		t.Fatalf("missing effective timeouts line; got:\n%s", got)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/245
+func TestHTTPTimeoutError_IncludesSourceAndValue(t *testing.T) {
+	// Slow server to trigger client timeout
+	slow := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		time.Sleep(200 * time.Millisecond)
+		resp := oai.ChatCompletionsResponse{
+			Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+			}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode resp: %v", err)
+		}
+	}))
+	defer slow.Close()
+
+	// Set http-timeout via env so source is "env"
+	val, ok := os.LookupEnv("OAI_HTTP_TIMEOUT")
+	if ok {
+		defer func() {
+			if err := os.Setenv("OAI_HTTP_TIMEOUT", val); err != nil {
+				t.Fatalf("restore env: %v", err)
+			}
+		}()
+	} else {
+		defer func() {
+			if err := os.Unsetenv("OAI_HTTP_TIMEOUT"); err != nil {
+				t.Fatalf("unset env: %v", err)
+			}
+		}()
+	}
+	if err := os.Setenv("OAI_HTTP_TIMEOUT", "100ms"); err != nil {
+		t.Fatalf("set env: %v", err)
+	}
+
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+	os.Args = []string{"agentcli.test", "-prompt", "x", "-base-url", slow.URL, "-model", "m"}
+	cfg, code := parseFlags()
+	if code != 0 {
+		t.Fatalf("parse exit: %d", code)
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code = runAgent(cfg, &outBuf, &errBuf)
+	if code == 0 {
+		t.Fatalf("expected non-zero exit; stdout=%q stderr=%q", outBuf.String(), errBuf.String())
+	}
+	got := errBuf.String()
+	if !strings.Contains(got, "http-timeout=100ms") {
+		t.Fatalf("expected error to include http-timeout value; got: %q", got)
+	}
+	if !strings.Contains(got, "(http-timeout source=env)") {
+		t.Fatalf("expected error to include timeout source env; got: %q", got)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/243
+// Ensure chat POST uses -http-timeout exclusively even if legacy -timeout is shorter
+func TestRunAgent_HTTPTimeout_IgnoresShortGlobal(t *testing.T) {
+	// Server sleeps longer than global timeout but shorter than http-timeout
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		time.Sleep(150 * time.Millisecond)
+		resp := oai.ChatCompletionsResponse{
+			Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+			}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode resp: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{
+		prompt:       "test",
+		systemPrompt: "sys",
+		baseURL:      srv.URL,
+		model:        "test",
+		maxSteps:     1,
+		timeout:      50 * time.Millisecond,  // legacy global shorter than server latency
+		httpTimeout:  500 * time.Millisecond, // HTTP timeout longer than server latency
+		toolTimeout:  1 * time.Second,
+		temperature:  0,
+		debug:        false,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("expected exit code 0; stderr=%s", errBuf.String())
+	}
+	if strings.TrimSpace(outBuf.String()) != "ok" {
+		t.Fatalf("unexpected stdout: %q", outBuf.String())
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/1
+func TestRunAgent_FailsWhenConfiguredToolUnavailable(t *testing.T) {
+	dir := t.TempDir()
+	// Create tools.json referencing a missing binary path
+	missing := filepath.Join(dir, "missing-tool")
+	toolsPath := filepath.Join(dir, "tools.json")
+	manifest := map[string]any{
+		"tools": []map[string]any{{
+			"name":        "missing",
+			"description": "should fail if unavailable",
+			"schema":      map[string]any{"type": "object"},
+			"command":     []string{missing},
+			"timeoutSec":  2,
+		}},
+	}
+	b, err := json.Marshal(manifest)
+	if err != nil {
+		t.Fatalf("marshal manifest: %v", err)
+	}
+	if err := os.WriteFile(toolsPath, b, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	cfg := cliConfig{
+		prompt:       "test",
+		toolsPath:    toolsPath,
+		systemPrompt: "sys",
+		baseURL:      "http://unused.local", // not contacted due to early failure
+		apiKey:       "",
+		model:        "test",
+		maxSteps:     1,
+		timeout:      1 * time.Second,
+		temperature:  0,
+		debug:        false,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code == 0 {
+		t.Fatalf("expected non-zero exit when tool is missing; stdout=%q stderr=%q", outBuf.String(), errBuf.String())
+	}
+	if got := errBuf.String(); !strings.Contains(got, "unavailable") {
+		t.Fatalf("expected error mentioning unavailable tool, got: %q", got)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/256
+// Verify that multiple tool_calls are executed in parallel rather than sequentially.
+// We call appendToolCallOutputs directly to isolate the execution time from HTTP.
+func TestAppendToolCallOutputs_ExecutesInParallel(t *testing.T) {
+	// Build a helper tool that sleeps per JSON input then emits JSON
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "sleeper.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import (
+  "encoding/json"; "io"; "os"; "time"; "fmt"
+)
+func main(){
+  b,_ := io.ReadAll(os.Stdin)
+  var m map[string]any
+  _ = json.Unmarshal(b, &m)
+  name, _ := m["name"].(string)
+  ms := 0
+  if v, ok := m["sleepMs"].(float64); ok { ms = int(v) }
+  if ms > 0 { time.Sleep(time.Duration(ms) * time.Millisecond) }
+  _ = json.NewEncoder(os.Stdout).Encode(map[string]any{"name": name, "sleptMs": ms})
+  fmt.Print("")
+}
+`), 0o644); err != nil {
+		t.Fatalf("write tool: %v", err)
+	}
+	bin := filepath.Join(dir, "sleeper")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build tool: %v: %s", err, string(out))
+	}
+
+	// Build the registry expected by appendToolCallOutputs
+	realTools := map[string]tools.ToolSpec{
+		"slow": {Name: "slow", Command: []string{bin}, TimeoutSec: 5},
+		"fast": {Name: "fast", Command: []string{bin}, TimeoutSec: 5},
+	}
+
+	// Craft assistant message containing two tool calls with different sleeps
+	msg := oai.Message{Role: oai.RoleAssistant}
+	msg.ToolCalls = []oai.ToolCall{
+		{ID: "1", Type: "function", Function: oai.ToolCallFunction{Name: "slow", Arguments: `{"sleepMs":600,"name":"slow"}`}},
+		{ID: "2", Type: "function", Function: oai.ToolCallFunction{Name: "fast", Arguments: `{"sleepMs":600,"name":"fast"}`}},
+	}
+
+	// Minimal cfg with a generous per-tool timeout
+	cfg := cliConfig{toolTimeout: 3 * time.Second}
+
+	// Measure elapsed around appendToolCallOutputs
+	start := time.Now()
+	out := appendToolCallOutputs(nil, msg, realTools, cfg)
+	elapsed := time.Since(start)
+
+	// Expect two tool messages appended
+	gotIDs := map[string]bool{}
+	for _, m := range out {
+		if m.Role == oai.RoleTool {
+			gotIDs[m.ToolCallID] = true
+		}
+	}
+	if !gotIDs["1"] || !gotIDs["2"] {
+		t.Fatalf("expected tool messages for ids 1 and 2; got %+v", gotIDs)
+	}
+
+	// Sequential would be ~1200ms (+overhead). Parallel should be well under 1200ms.
+	if elapsed >= 1100*time.Millisecond {
+		t.Fatalf("tool calls did not run in parallel; elapsed=%v (want < 1.1s)", elapsed)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/242
+func TestTimeoutPrecedence_Table(t *testing.T) {
+	// Helpers to save/restore env
+	save := func(k string) (string, bool) { v, ok := os.LookupEnv(k); return v, ok }
+	restore := func(k, v string, ok bool) {
+		if ok {
+			if err := os.Setenv(k, v); err != nil {
+				t.Fatalf("restore %s: %v", k, err)
+			}
+		} else {
+			if err := os.Unsetenv(k); err != nil {
+				t.Fatalf("unset %s: %v", k, err)
+			}
+		}
+	}
+	httpEnvVal, httpEnvOK := save("OAI_HTTP_TIMEOUT")
+	defer restore("OAI_HTTP_TIMEOUT", httpEnvVal, httpEnvOK)
+
+	type testCase struct {
+		name       string
+		envHTTP    string
+		args       []string
+		wantHTTP   time.Duration
+		wantTool   time.Duration
+		wantGlobal time.Duration
+	}
+
+	cases := []testCase{
+		{
+			name:       "FlagsOverrideEnv",
+			envHTTP:    "90s",
+			args:       []string{"agentcli.test", "-prompt", "x", "-http-timeout", "300s", "-tool-timeout", "300s"},
+			wantHTTP:   5 * time.Minute,
+			wantTool:   5 * time.Minute,
+			wantGlobal: 30 * time.Second,
+		},
+		{
+			name:       "EnvOnly_HTTP",
+			envHTTP:    "90s",
+			args:       []string{"agentcli.test", "-prompt", "x"},
+			wantHTTP:   90 * time.Second,
+			wantTool:   30 * time.Second,
+			wantGlobal: 30 * time.Second,
+		},
+		{
+			name:       "LegacyGlobalOnly",
+			envHTTP:    "",
+			args:       []string{"agentcli.test", "-prompt", "x", "-timeout", "300s"},
+			wantHTTP:   5 * time.Minute,
+			wantTool:   5 * time.Minute,
+			wantGlobal: 5 * time.Minute,
+		},
+	}
+
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			if tc.envHTTP == "" {
+				if err := os.Unsetenv("OAI_HTTP_TIMEOUT"); err != nil {
+					t.Fatalf("unset env: %v", err)
+				}
+			} else {
+				if err := os.Setenv("OAI_HTTP_TIMEOUT", tc.envHTTP); err != nil {
+					t.Fatalf("set env: %v", err)
+				}
+			}
+
+			os.Args = tc.args
+			cfg, code := parseFlags()
+			if code != 0 {
+				t.Fatalf("parse exit: %d", code)
+			}
+			if cfg.httpTimeout != tc.wantHTTP {
+				t.Fatalf("httpTimeout got %v want %v", cfg.httpTimeout, tc.wantHTTP)
+			}
+			if cfg.toolTimeout != tc.wantTool {
+				t.Fatalf("toolTimeout got %v want %v", cfg.toolTimeout, tc.wantTool)
+			}
+			if cfg.timeout != tc.wantGlobal {
+				t.Fatalf("global timeout (-timeout) got %v want %v", cfg.timeout, tc.wantGlobal)
+			}
+		})
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/243
+// Ensure -http-timeout is not clamped by legacy -timeout (shorter or longer)
+func TestHTTPTimeout_NotClampedByGlobal(t *testing.T) {
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+
+	t.Run("GlobalShorter", func(t *testing.T) {
+		os.Args = []string{"agentcli.test", "-prompt", "x", "-http-timeout", "300s", "-timeout", "1s"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parse exit: %d", code)
+		}
+		if cfg.httpTimeout != 300*time.Second {
+			t.Fatalf("httpTimeout got %v want %v", cfg.httpTimeout, 300*time.Second)
+		}
+		if cfg.timeout != 1*time.Second {
+			t.Fatalf("global timeout (-timeout) got %v want %v", cfg.timeout, 1*time.Second)
+		}
+	})
+
+	t.Run("GlobalLonger", func(t *testing.T) {
+		os.Args = []string{"agentcli.test", "-prompt", "x", "-http-timeout", "300s", "-timeout", "600s"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parse exit: %d", code)
+		}
+		if cfg.httpTimeout != 300*time.Second {
+			t.Fatalf("httpTimeout got %v want %v", cfg.httpTimeout, 300*time.Second)
+		}
+		if cfg.timeout != 600*time.Second {
+			t.Fatalf("global timeout (-timeout) got %v want %v", cfg.timeout, 600*time.Second)
+		}
+	})
+}
+
+// https://github.com/hyperifyio/goagent/issues/318
+// When completionCap defaults to 0, request must omit max_tokens entirely.
+func TestRequest_OmitsMaxTokensWhenCapZero(t *testing.T) {
+	var captured []byte
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		b, err := io.ReadAll(r.Body)
+		if err != nil {
+			t.Fatalf("read body: %v", err)
+		}
+		captured = append([]byte(nil), b...)
+		// Respond with a minimal valid assistant message to terminate
+		resp := oai.ChatCompletionsResponse{
+			Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+			}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode resp: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{
+		prompt:       "x",
+		systemPrompt: "sys",
+		baseURL:      srv.URL,
+		model:        "m",
+		maxSteps:     1,
+		httpTimeout:  2 * time.Second,
+		toolTimeout:  1 * time.Second,
+		temperature:  0,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	if len(captured) == 0 {
+		t.Fatalf("no request captured")
+	}
+	if strings.Contains(string(captured), "\"max_tokens\"") {
+		t.Fatalf("request must omit max_tokens when completionCap=0; got body: %s", string(captured))
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/318
+// When finish_reason=="length" on the first attempt, the agent must perform
+// exactly one in-step retry with a completion cap of at least 256 tokens by
+// setting max_tokens=256 on the retry while omitting it on the first attempt.
+func TestLengthBackoff_OneRetrySetsMaxTokens256(t *testing.T) {
+	// Capture bodies of successive requests
+	var bodies [][]byte
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		b, err := io.ReadAll(r.Body)
+		if err != nil {
+			t.Fatalf("read body: %v", err)
+		}
+		bodies = append(bodies, append([]byte(nil), b...))
+		// Respond with length on first call, stop on second
+		if len(bodies) == 1 {
+			resp := oai.ChatCompletionsResponse{
+				Choices: []oai.ChatCompletionsResponseChoice{{
+					FinishReason: "length",
+					Message:      oai.Message{Role: oai.RoleAssistant, Content: ""},
+				}},
+			}
+			if err := json.NewEncoder(w).Encode(resp); err != nil {
+				t.Fatalf("encode resp1: %v", err)
+			}
+			return
+		}
+		// Second call returns final content
+		resp := oai.ChatCompletionsResponse{
+			Choices: []oai.ChatCompletionsResponseChoice{{
+				FinishReason: "stop",
+				Message:      oai.Message{Role: oai.RoleAssistant, Content: "done"},
+			}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode resp2: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{
+		prompt:       "x",
+		systemPrompt: "sys",
+		baseURL:      srv.URL,
+		model:        "m",
+		maxSteps:     1,
+		httpTimeout:  2 * time.Second,
+		toolTimeout:  1 * time.Second,
+		temperature:  0,
+		debug:        false,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	if strings.TrimSpace(outBuf.String()) != "done" {
+		t.Fatalf("unexpected stdout: %q", outBuf.String())
+	}
+	if len(bodies) != 2 {
+		t.Fatalf("expected two requests (initial + retry), got %d", len(bodies))
+	}
+	// First body must omit max_tokens
+	if strings.Contains(string(bodies[0]), "\"max_tokens\"") {
+		t.Fatalf("first attempt must omit max_tokens; body=%s", string(bodies[0]))
+	}
+	// Second body must include max_tokens:256
+	if !strings.Contains(string(bodies[1]), "\"max_tokens\":256") {
+		t.Fatalf("second attempt must include max_tokens=256; body=%s", string(bodies[1]))
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/318
+// On length backoff, the completion cap must be clamped to the remaining
+// context so that max_tokens does not exceed window - estimated_prompt - margin.
+func TestLengthBackoff_ClampDoesNotExceedWindow(t *testing.T) {
+	// Build a prompt large enough that remaining context < 256 for oss-gpt-20b (8192 window).
+	// We will set model to oss-gpt-20b so ContextWindowForModel returns 8192.
+	// Choose a prompt size that forces remaining context < 256 for window=8192.
+	// Roughly EstimateTokens ~= ceil(len/4) + overhead; len=40000 -> ~10000 tokens.
+	large := strings.Repeat("x", 40000)
+
+	// Capture bodies to inspect max_tokens of the retry
+	var bodies [][]byte
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		b, err := io.ReadAll(r.Body)
+		if err != nil {
+			t.Fatalf("read body: %v", err)
+		}
+		bodies = append(bodies, append([]byte(nil), b...))
+		// First call yields finish_reason==length to trigger the retry
+		if len(bodies) == 1 {
+			resp := oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{FinishReason: "length", Message: oai.Message{Role: oai.RoleAssistant}}}}
+			if err := json.NewEncoder(w).Encode(resp); err != nil {
+				t.Fatalf("encode resp1: %v", err)
+			}
+			return
+		}
+		// Second call returns stop to finish
+		resp := oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{FinishReason: "stop", Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode resp2: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{
+		prompt:       large,
+		systemPrompt: "sys",
+		baseURL:      srv.URL,
+		model:        "oss-gpt-20b",
+		maxSteps:     1,
+		httpTimeout:  2 * time.Second,
+		toolTimeout:  1 * time.Second,
+		temperature:  0,
+		debug:        false,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	if strings.TrimSpace(outBuf.String()) != "ok" {
+		t.Fatalf("unexpected stdout: %q", outBuf.String())
+	}
+	if len(bodies) != 2 {
+		t.Fatalf("expected two requests, got %d", len(bodies))
+	}
+	// First body must omit max_tokens
+	if strings.Contains(string(bodies[0]), "\"max_tokens\"") {
+		t.Fatalf("first attempt must omit max_tokens; body=%s", string(bodies[0]))
+	}
+	// Second body must include max_tokens and it must be less than or equal to remaining.
+	// Compute an upper bound by parsing the JSON to extract max_tokens.
+	var payload map[string]any
+	if err := json.Unmarshal(bodies[1], &payload); err != nil {
+		t.Fatalf("unmarshal second body: %v", err)
+	}
+	v, ok := payload["max_tokens"].(float64)
+	if !ok {
+		t.Fatalf("second body missing max_tokens; body=%s", string(bodies[1]))
+	}
+	gotCap := int(v)
+	if gotCap <= 0 {
+		t.Fatalf("clamped cap must be > 0; got %d", gotCap)
+	}
+	// Sanity: clamped value must be strictly less than 256 for our large prompt.
+	if gotCap >= 256 {
+		t.Fatalf("clamp failed: expected retry cap < 256 due to large prompt; got %d", gotCap)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/318
+// On length backoff, an NDJSON audit line with event=="length_backoff" must be
+// written under the repository root's .goagent/audit with expected fields.
+func TestLengthBackoff_AuditEmitted(t *testing.T) {
+	// Clean audit dir at repo root
+	root := testFindRepoRoot(t)
+	_ = os.RemoveAll(filepath.Join(root, ".goagent"))
+
+	// Minimal two-attempt server to trigger length backoff
+	var calls int
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		calls++
+		if calls == 1 {
+			_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{FinishReason: "length", Message: oai.Message{Role: oai.RoleAssistant}}}})
+			return
+		}
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{FinishReason: "stop", Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "oss-gpt-20b", maxSteps: 1, httpTimeout: 2 * time.Second, toolTimeout: 1 * time.Second, temperature: 0}
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	if strings.TrimSpace(outBuf.String()) != "ok" {
+		t.Fatalf("unexpected stdout: %q", outBuf.String())
+	}
+
+	// Locate today's audit file under repo root and read it
+	auditDir := filepath.Join(root, ".goagent", "audit")
+	logFile := waitForAuditFile(t, auditDir, 2*time.Second)
+	data, err := os.ReadFile(logFile)
+	if err != nil {
+		t.Fatalf("read audit: %v", err)
+	}
+	s := string(data)
+	if !strings.Contains(s, "\"event\":\"length_backoff\"") {
+		t.Fatalf("missing length_backoff event; got:\n%s", truncate(s, 1000))
+	}
+	// Basic field presence checks
+	if !strings.Contains(s, "\"model\":\"") || !strings.Contains(s, "\"prev_cap\":") || !strings.Contains(s, "\"new_cap\":") || !strings.Contains(s, "\"window\":") || !strings.Contains(s, "\"estimated_prompt_tokens\":") {
+		t.Fatalf("missing expected fields in length_backoff audit; got:\n%s", truncate(s, 1000))
+	}
+}
+
+// FEATURE_CHECKLIST L8
+// Audit for pre-stage must include stage:"prep" and idempotency_key on http_timing/attempt.
+func TestPreStage_AuditIncludesStageAndIdempotency(t *testing.T) {
+	// Clean audit dir at repo root
+	root := testFindRepoRoot(t)
+	_ = os.RemoveAll(filepath.Join(root, ".goagent"))
+
+	// Server returns an assistant with no tool calls to keep it simple
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Respond minimal success
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", maxSteps: 1, httpTimeout: time.Second, toolTimeout: time.Second, prepHTTPTimeout: time.Second}
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	// Locate today's audit file under repo root and read it
+	auditDir := filepath.Join(root, ".goagent", "audit")
+	logFile := waitForAuditFile(t, auditDir, 2*time.Second)
+	data, err := os.ReadFile(logFile)
+	if err != nil {
+		t.Fatalf("read audit: %v", err)
+	}
+	s := string(data)
+	// Expect at least one timing or attempt line with stage prep and idempotency_key
+	if !strings.Contains(s, "\"stage\":\"prep\"") {
+		t.Fatalf("missing stage prep in audit; got:\n%s", truncate(s, 1000))
+	}
+	if !strings.Contains(s, "\"idempotency_key\":") {
+		t.Fatalf("missing idempotency_key in audit; got:\n%s", truncate(s, 1000))
+	}
+}
+
+// Deprecated local helper retained earlier in file; remove duplicate definition to avoid redeclare.
+
+// waitForAuditFile polls the audit directory until a file appears or timeout elapses.
+func waitForAuditFile(t *testing.T, auditDir string, timeout time.Duration) string {
+	t.Helper()
+	deadline := time.Now().Add(timeout)
+	for time.Now().Before(deadline) {
+		entries, err := os.ReadDir(auditDir)
+		if err == nil {
+			for _, e := range entries {
+				if !e.IsDir() {
+					return filepath.Join(auditDir, e.Name())
+				}
+			}
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+	t.Fatalf("audit log not created in %s", auditDir)
+	return ""
+}
+
+func truncate(s string, n int) string {
+	if len(s) <= n {
+		return s
+	}
+	return s[:n]
+}
+
+// https://github.com/hyperifyio/goagent/issues/300
+// CLI flags must be order-insensitive. This test permutes common flags and
+// asserts parsed values are identical regardless of position. We only compare
+// a stable subset of fields to avoid env/default interference.
+func TestFlagOrderIndependence_Table(t *testing.T) {
+	type view struct {
+		prompt    string
+		toolsPath string
+		debug     bool
+		model     string
+	}
+
+	// Helper to parse given argv and extract a comparison view
+	parse := func(argv []string) (view, int) {
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = argv
+		cfg, code := parseFlags()
+		return view{prompt: cfg.prompt, toolsPath: cfg.toolsPath, debug: cfg.debug, model: cfg.model}, code
+	}
+
+	// Baseline args containing a few representative flags
+	base := []string{"agentcli.test", "-prompt", "hello", "-tools", "/tmp/tools.json", "-debug", "-model", "m"}
+	perms := [][]string{
+		base,
+		{"agentcli.test", "-debug", "-model", "m", "-tools", "/tmp/tools.json", "-prompt", "hello"},
+		{"agentcli.test", "-tools", "/tmp/tools.json", "-prompt", "hello", "-model", "m", "-debug"},
+		{"agentcli.test", "-model", "m", "-prompt", "hello", "-debug", "-tools", "/tmp/tools.json"},
+	}
+
+	var want view
+	for i, args := range perms {
+		got, code := parse(args)
+		if code != 0 {
+			t.Fatalf("perm %d parse exit=%d for args=%v", i, code, args)
+		}
+		if i == 0 {
+			want = got
+			continue
+		}
+		if got != want {
+			t.Fatalf("mismatch on permutation %d: got=%+v want=%+v (args=%v)", i, got, want, args)
+		}
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/300
+// Help must exit 0 regardless of where the token appears among other flags.
+func TestHelpToken_PositionIndependence(t *testing.T) {
+	cases := [][]string{
+		{"-h", "-debug"},
+		{"-debug", "-h"},
+		{"-tools", "/tmp/tools.json", "help", "-model", "m"},
+		{"-model", "m", "--help", "-prompt", "x"},
+	}
+	for i, rest := range cases {
+		var outBuf, errBuf bytes.Buffer
+		code := cliMain(rest, &outBuf, &errBuf)
+		if code != 0 {
+			t.Fatalf("case %d: exit=%d; want 0", i, code)
+		}
+		if !strings.Contains(outBuf.String(), "Usage:") {
+			t.Fatalf("case %d: expected Usage in stdout; got: %q", i, outBuf.String())
+		}
+		if errBuf.Len() != 0 {
+			t.Fatalf("case %d: stderr should be empty; got: %q", i, errBuf.String())
+		}
+	}
+}
+
+// FEATURE_CHECKLIST L21: -prep-dry-run should run pre-stage only (or skip if disabled),
+// print refined messages to stdout, and exit 0.
+func TestPrepDryRun_PrintsMessages(t *testing.T) {
+	args := []string{"-prompt", "hello", "-prep-enabled=false", "-prep-dry-run"}
+	var out, errBuf strings.Builder
+	code := cliMain(args, &out, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit code=%d; want 0; stderr=%s", code, errBuf.String())
+	}
+	s := strings.TrimSpace(out.String())
+	if !strings.HasPrefix(s, "[") || !strings.Contains(s, "\"role\"") {
+		t.Fatalf("unexpected -prep-dry-run output: %s", s)
+	}
+}
+
+// FEATURE_CHECKLIST L69: -prep-dry-run should reflect merged pre-stage output
+// (system replacement and developer appends) in the printed messages.
+func TestPrepDryRun_MergesPrestageOutput(t *testing.T) {
+	// Stub server that returns a pre-stage assistant message with JSON payload
+	// specifying a new system and an extra developer message.
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost || r.URL.Path != "/chat/completions" {
+			t.Fatalf("unexpected request: %s %s", r.Method, r.URL.Path)
+		}
+		w.Header().Set("Content-Type", "application/json")
+		// Assistant content contains Harmony array for pre-stage merge
+		payload := `[{"role":"system","content":"SYSX"},{"role":"developer","content":"DEVX"}]`
+		resp := oai.ChatCompletionsResponse{
+			Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: payload},
+			}},
+		}
+		_ = json.NewEncoder(w).Encode(resp)
+	}))
+	defer srv.Close()
+
+	// Run CLI with -prep-dry-run so it prints refined (merged) messages to stdout.
+	args := []string{"-prompt", "USER", "-base-url", srv.URL, "-prep-dry-run"}
+	var out, errBuf strings.Builder
+	code := cliMain(args, &out, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit code=%d; want 0; stderr=%s", code, errBuf.String())
+	}
+	s := out.String()
+	if !strings.Contains(s, "\"SYSX\"") {
+		t.Fatalf("merged system not found in output: %s", truncate(s, 200))
+	}
+	if !strings.Contains(s, "\"DEVX\"") {
+		t.Fatalf("merged developer not found in output: %s", truncate(s, 200))
+	}
+}
+
+// FEATURE_CHECKLIST L21: -print-messages should pretty-print the final merged
+// message array to stderr before the main call.
+func TestPrintMessages_FlagPrettyPrintsToStderr(t *testing.T) {
+	// Disable pre-stage to avoid network and set max-steps=0 to bypass HTTP.
+	args := []string{"-prompt", "p", "-print-messages", "-max-steps", "0", "-prep-enabled=false"}
+	var out, errBuf strings.Builder
+	code := cliMain(args, &out, &errBuf)
+	if code != 1 && code != 0 { // may exit 1 due to max-steps==0
+		t.Fatalf("unexpected exit: %d; stderr=%s", code, errBuf.String())
+	}
+	s := errBuf.String()
+	if !strings.Contains(s, "\"messages\"") || !strings.Contains(s, "\"prestage\"") || !strings.Contains(s, "\"bytes\"") {
+		t.Fatalf("stderr missing wrapper with prestage metadata; got=%s", truncate(s, 200))
+	}
+}
+
+// FEATURE_CHECKLIST L46: Enforce transcript hygiene — when -debug is off,
+// truncate any tool message content over 8KB before sending to the API.
+func TestPreflight_TruncatesLargeToolContent_WhenDebugOff(t *testing.T) {
+	t.Parallel()
+	// Build transcript: assistant tool_call, then tool with oversized content
+	big := strings.Repeat("A", 9000)
+	msgs := []oai.Message{
+		{Role: oai.RoleSystem, Content: "s"},
+		{Role: oai.RoleUser, Content: "u"},
+		{Role: oai.RoleAssistant, ToolCalls: []oai.ToolCall{{ID: "call_1", Type: "function", Function: oai.ToolCallFunction{Name: "echo", Arguments: "{}"}}}},
+		{Role: oai.RoleTool, ToolCallID: "call_1", Name: "echo", Content: big},
+	}
+
+	// Capture the outgoing request
+	var seen oai.ChatCompletionsRequest
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		defer r.Body.Close()
+		if err := json.NewDecoder(r.Body).Decode(&seen); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{Message: oai.Message{Role: oai.RoleAssistant, Channel: "final", Content: "ok"}}}})
+	}))
+	defer srv.Close()
+
+	var outBuf, errBuf bytes.Buffer
+	cfg := cliConfig{
+		baseURL:        srv.URL,
+		model:          "m",
+		maxSteps:       1,
+		httpTimeout:    2 * time.Second,
+		toolTimeout:    1 * time.Second,
+		debug:          false, // important: hygiene applies only when debug is off
+		initMessages:   msgs,
+		prepEnabled:    false,
+		prepEnabledSet: true,
+	}
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, truncate(errBuf.String(), 400))
+	}
+	if strings.TrimSpace(outBuf.String()) != "ok" {
+		t.Fatalf("unexpected stdout: %q", outBuf.String())
+	}
+	// Assert truncation took place in the sent request
+	var found bool
+	for _, m := range seen.Messages {
+		if m.Role == oai.RoleTool && m.ToolCallID == "call_1" {
+			found = true
+			if strings.TrimSpace(m.Content) != `{"truncated":true,"reason":"large-tool-output"}` {
+				t.Fatalf("tool content not truncated; got=%q", truncate(m.Content, 120))
+			}
+			break
+		}
+	}
+	if !found {
+		t.Fatalf("tool message not found in request")
+	}
+}
+
+// FEATURE_CHECKLIST L23: Save/load refined messages. Round-trip test writes the
+// merged Harmony messages to JSON and loads them back to bypass pre-stage and prompt.
+func TestSaveLoadMessages_RoundTrip(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "msgs.json")
+
+	// First run: disable HTTP by setting max-steps=0 and pre-stage off; save messages.
+	// Expect exit code 1 due to max-steps==0, but file should be written with a valid array.
+	args1 := []string{"-prompt", "hello", "-prep-enabled=false", "-max-steps", "0", "-save-messages", path}
+	var out1, err1 strings.Builder
+	_ = cliMain(args1, &out1, &err1) // exit code may be 1; we only care about file
+
+	b, rerr := os.ReadFile(path)
+	if rerr != nil {
+		t.Fatalf("read saved messages: %v", rerr)
+	}
+	// Quick schema sniff: support both legacy array and new object wrapper
+	trimmed := strings.TrimSpace(string(b))
+	if strings.HasPrefix(trimmed, "[") {
+		if !strings.Contains(trimmed, "\"role\"") {
+			t.Fatalf("saved messages missing role fields: %s", truncate(trimmed, 100))
+		}
+	} else {
+		if !strings.Contains(trimmed, "\"messages\"") {
+			t.Fatalf("saved wrapper missing messages field: %s", truncate(trimmed, 120))
+		}
+		// New wrapper should include prestage metadata without prompt text
+		if !strings.Contains(trimmed, "\"prestage\"") || !strings.Contains(trimmed, "\"bytes\"") {
+			t.Fatalf("saved wrapper missing prestage metadata: %s", truncate(trimmed, 160))
+		}
+	}
+
+	// Second run: load messages from file; should parse and validate without requiring -prompt.
+	args2 := []string{"-load-messages", path, "-max-steps", "0", "-prep-enabled=false"}
+	var out2, err2 strings.Builder
+	code2 := cliMain(args2, &out2, &err2)
+	if code2 != 1 && code2 != 0 { // may exit 1 due to max-steps==0
+		t.Fatalf("unexpected exit on load: %d; stderr=%s", code2, err2.String())
+	}
+}
+
+// FEATURE_CHECKLIST L52: Round-trip with image_prompt metadata.
+func TestSaveLoadMessages_WithImagePrompt(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "msgs_with_image.json")
+
+	// First run: inject an image prompt via cfg, save wrapper file.
+	// We call cliMain by setting flags and let parseFlags build cfg; since there's
+	// no flag for image prompt yet, we simulate by running once to produce messages,
+	// then rewrite file to include image_prompt and ensure loader reads it.
+	args1 := []string{"-prompt", "hello", "-prep-enabled=false", "-max-steps", "0", "-save-messages", path}
+	var out1, err1 strings.Builder
+	_ = cliMain(args1, &out1, &err1)
+
+	// Overwrite saved file with wrapper that includes image_prompt
+	data, rerr := os.ReadFile(path)
+	if rerr != nil {
+		t.Fatalf("read saved: %v", rerr)
+	}
+	// Wrap into object if file is an array
+	if strings.HasPrefix(strings.TrimSpace(string(data)), "[") {
+		data = []byte("{\n  \"messages\": " + string(data) + ",\n  \"image_prompt\": \"draw a cat\"\n}")
+		if werr := os.WriteFile(path, data, 0o644); werr != nil {
+			t.Fatalf("rewrite saved: %v", werr)
+		}
+	} else {
+		// Otherwise, inject image_prompt field crudely for test purposes
+		if !strings.Contains(string(data), "\"image_prompt\"") {
+			data = []byte(strings.TrimSuffix(strings.TrimSpace(string(data)), "}") + ",\n  \"image_prompt\": \"draw a cat\"\n}")
+			if werr := os.WriteFile(path, data, 0o644); werr != nil {
+				t.Fatalf("inject saved: %v", werr)
+			}
+		}
+	}
+
+	// Round-trip via helpers since -load and -save cannot be combined in one invocation
+	raw, r1 := os.ReadFile(path)
+	if r1 != nil {
+		t.Fatalf("read raw: %v", r1)
+	}
+	msgs, img, perr := parseSavedMessages(raw)
+	if perr != nil {
+		t.Fatalf("parse saved: %v", perr)
+	}
+	outPath := filepath.Join(dir, "msgs_roundtrip.json")
+	if werr := writeSavedMessages(outPath, msgs, img); werr != nil {
+		t.Fatalf("write roundtrip: %v", werr)
+	}
+	b2, r2 := os.ReadFile(outPath)
+	if r2 != nil {
+		t.Fatalf("read roundtrip: %v", r2)
+	}
+	if !strings.Contains(string(b2), "\"image_prompt\": \"draw a cat\"") {
+		t.Fatalf("round-trip missing image_prompt: %s", truncate(string(b2), 160))
+	}
+}
+
+// FEATURE_CHECKLIST L23: Conflicts and errors for save/load flags.
+func TestSaveLoadMessages_FlagConflictsAndErrors(t *testing.T) {
+	// load+save together should exit 2
+	dir := t.TempDir()
+	p := filepath.Join(dir, "x.json")
+	args := []string{"-load-messages", p, "-save-messages", p, "-prompt", "x"}
+	cfg, code := func() (cliConfig, int) {
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = append([]string{"agentcli.test"}, args...)
+		return parseFlags()
+	}()
+	if code == 0 {
+		t.Fatalf("expected parse error for conflicting flags; cfg=%+v", cfg)
+	}
+
+	// load-messages conflicts with -prompt/-prompt-file
+	args2 := []string{"-load-messages", p, "-prompt", "x"}
+	cfg2, code2 := func() (cliConfig, int) {
+		orig := os.Args
+		defer func() { os.Args = orig }()
+		os.Args = append([]string{"agentcli.test"}, args2...)
+		return parseFlags()
+	}()
+	if code2 == 0 {
+		t.Fatalf("expected parse error for load+prompt; cfg=%+v", cfg2)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/244
+// Duration flags accept plain seconds (e.g., 300) and Go duration strings.
+func TestDurationFlags_FlexibleParsing(t *testing.T) {
+	origArgs := os.Args
+	defer func() { os.Args = origArgs }()
+
+	t.Run("NumericHTTPFlag", func(t *testing.T) {
+		os.Args = []string{"agentcli.test", "-prompt", "x", "-http-timeout", "300"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parse exit: %d", code)
+		}
+		if cfg.httpTimeout != 300*time.Second {
+			t.Fatalf("http-timeout got %v want %v", cfg.httpTimeout, 300*time.Second)
+		}
+	})
+
+	t.Run("NumericToolFlag", func(t *testing.T) {
+		os.Args = []string{"agentcli.test", "-prompt", "x", "-tool-timeout", "45"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parse exit: %d", code)
+		}
+		if cfg.toolTimeout != 45*time.Second {
+			t.Fatalf("tool-timeout got %v want %v", cfg.toolTimeout, 45*time.Second)
+		}
+	})
+
+	t.Run("NumericGlobalFlag", func(t *testing.T) {
+		os.Args = []string{"agentcli.test", "-prompt", "x", "-timeout", "10"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parse exit: %d", code)
+		}
+		// http falls back to legacy when not set explicitly
+		if cfg.timeout != 10*time.Second || cfg.httpTimeout != 10*time.Second || cfg.toolTimeout != 10*time.Second {
+			t.Fatalf("timeouts got http=%v tool=%v global=%v; want 10s", cfg.httpTimeout, cfg.toolTimeout, cfg.timeout)
+		}
+	})
+
+	t.Run("EnvHTTPNumeric", func(t *testing.T) {
+		// Save/restore env
+		val, ok := os.LookupEnv("OAI_HTTP_TIMEOUT")
+		defer func() {
+			if ok {
+				if err := os.Setenv("OAI_HTTP_TIMEOUT", val); err != nil {
+					t.Fatalf("restore env: %v", err)
+				}
+			} else {
+				if err := os.Unsetenv("OAI_HTTP_TIMEOUT"); err != nil {
+					t.Fatalf("unset env: %v", err)
+				}
+			}
+		}()
+		if err := os.Setenv("OAI_HTTP_TIMEOUT", "300"); err != nil {
+			t.Fatalf("set env: %v", err)
+		}
+		os.Args = []string{"agentcli.test", "-prompt", "x"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parse exit: %d", code)
+		}
+		if cfg.httpTimeout != 300*time.Second {
+			t.Fatalf("env http-timeout got %v want %v", cfg.httpTimeout, 300*time.Second)
+		}
+	})
+
+	t.Run("InvalidFlagValueFallsBack", func(t *testing.T) {
+		// invalid value for http-timeout -> should fall back to legacy -timeout (default 30s)
+		os.Args = []string{"agentcli.test", "-prompt", "x", "-http-timeout", "not-a-duration"}
+		cfg, code := parseFlags()
+		if code != 0 {
+			t.Fatalf("parse exit: %d", code)
+		}
+		if cfg.httpTimeout != 30*time.Second { // falls back to legacy default 30s
+			t.Fatalf("invalid http-timeout should fall back; got %v want %v", cfg.httpTimeout, 30*time.Second)
+		}
+	})
+}
+
+// https://github.com/hyperifyio/goagent/issues/318
+// Edge case: when finish_reason!="length", there must be no retry.
+func TestLengthBackoff_NoRetryWhenFinishReasonNotLength(t *testing.T) {
+	var calls int
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		calls++
+		// Always return a final assistant message with stop
+		_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+			FinishReason: "stop",
+			Message:      oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+		}}})
+	}))
+	defer srv.Close()
+
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", maxSteps: 3, httpTimeout: 2 * time.Second, toolTimeout: 1 * time.Second, temperature: 0}
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	if strings.TrimSpace(outBuf.String()) != "ok" {
+		t.Fatalf("unexpected stdout: %q", outBuf.String())
+	}
+	if calls != 1 {
+		t.Fatalf("expected exactly one HTTP call when not length; got %d", calls)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/318
+// Edge case: only one in-step retry even if the second response is also "length".
+func TestLengthBackoff_OnlyOneRetryWhenSecondIsAlsoLength(t *testing.T) {
+	var bodies [][]byte
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		b, err := io.ReadAll(r.Body)
+		if err != nil {
+			t.Fatalf("read body: %v", err)
+		}
+		bodies = append(bodies, append([]byte(nil), b...))
+		switch len(bodies) {
+		case 1:
+			_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{FinishReason: "length", Message: oai.Message{Role: oai.RoleAssistant}}}})
+		case 2:
+			_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{FinishReason: "length", Message: oai.Message{Role: oai.RoleAssistant}}}})
+		default:
+			// If a third in-step retry occurs, fail deterministically
+			t.Fatalf("unexpected extra in-step retry; total bodies=%d", len(bodies))
+		}
+	}))
+	defer srv.Close()
+
+	// Limit to a single agent step so no additional step-level requests are made.
+	cfg := cliConfig{prompt: "x", systemPrompt: "sys", baseURL: srv.URL, model: "m", maxSteps: 1, httpTimeout: 2 * time.Second, toolTimeout: 1 * time.Second, temperature: 0}
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code == 0 {
+		t.Fatalf("expected non-zero exit since no final content; stdout=%q stderr=%q", outBuf.String(), errBuf.String())
+	}
+	if len(bodies) != 2 {
+		t.Fatalf("expected exactly two requests (initial + one retry), got %d", len(bodies))
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/318
+// Edge case: tool_call flow is unaffected by length backoff logic.
+// When the model returns tool_calls, no max_tokens should be injected and
+// the conversation should proceed with two HTTP calls (tool step + final).
+func TestLengthBackoff_DoesNotInterfereWithToolCalls(t *testing.T) {
+	// Build a helper tool that echoes JSON and succeeds quickly
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "echo.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import ("io"; "os"; "fmt")
+func main(){b,_:=io.ReadAll(os.Stdin); fmt.Print(string(b))}
+`), 0o644); err != nil {
+		t.Fatalf("write tool: %v", err)
+	}
+	bin := filepath.Join(dir, "echo")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build tool: %v: %s", err, string(out))
+	}
+
+	// Capture two sequential requests and ensure no max_tokens present in either
+	var bodies [][]byte
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		b, err := io.ReadAll(r.Body)
+		if err != nil {
+			t.Fatalf("read body: %v", err)
+		}
+		bodies = append(bodies, append([]byte(nil), b...))
+		switch len(bodies) {
+		case 1:
+			// Return a tool_calls response
+			_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+				FinishReason: "tool_calls",
+				Message: oai.Message{Role: oai.RoleAssistant, ToolCalls: []oai.ToolCall{{
+					ID: "1", Type: "function", Function: oai.ToolCallFunction{Name: "echo", Arguments: `{"x":1}`},
+				}}},
+			}}})
+		case 2:
+			// Final content
+			_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+				FinishReason: "stop",
+				Message:      oai.Message{Role: oai.RoleAssistant, Content: "done"},
+			}}})
+		default:
+			t.Fatalf("unexpected extra HTTP request; bodies=%d", len(bodies))
+		}
+	}))
+	defer srv.Close()
+
+	// Register tool manifest through in-memory registry path
+	manifestPath := filepath.Join(dir, "tools.json")
+	m := map[string]any{
+		"tools": []map[string]any{{
+			"name":        "echo",
+			"description": "echo",
+			"schema":      map[string]any{"type": "object"},
+			"command":     []string{bin},
+			"timeoutSec":  5,
+		}},
+	}
+	data, err := json.Marshal(m)
+	if err != nil {
+		t.Fatalf("marshal manifest: %v", err)
+	}
+	if err := os.WriteFile(manifestPath, data, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	cfg := cliConfig{prompt: "x", toolsPath: manifestPath, systemPrompt: "sys", baseURL: srv.URL, model: "m", maxSteps: 3, httpTimeout: 2 * time.Second, toolTimeout: 2 * time.Second, temperature: 0}
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	if strings.TrimSpace(outBuf.String()) != "done" {
+		t.Fatalf("unexpected stdout: %q", outBuf.String())
+	}
+	if len(bodies) != 2 {
+		t.Fatalf("expected exactly two HTTP calls (tool step + final), got %d", len(bodies))
+	}
+	// Neither request should include max_tokens
+	if strings.Contains(string(bodies[0]), "\"max_tokens\"") || strings.Contains(string(bodies[1]), "\"max_tokens\"") {
+		t.Fatalf("max_tokens must be omitted for tool_call flow; got bodies: %s | %s", string(bodies[0]), string(bodies[1]))
+	}
+}
diff --git a/cmd/agentcli/prep_tools.go b/cmd/agentcli/prep_tools.go
new file mode 100644
index 0000000..5fac8af
--- /dev/null
+++ b/cmd/agentcli/prep_tools.go
@@ -0,0 +1,192 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"runtime"
+	"sort"
+	"strings"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+)
+
+// appendPreStageBuiltinToolOutputs executes a restricted set of in-process, read-only
+// tools for the pre-stage and appends their outputs (or deterministic error JSON)
+// to the conversation messages. Supported tools:
+//   - fs.read_file {path:string}
+//   - fs.list_dir {path:string}
+//   - fs.stat {path:string}
+//   - env.get {key:string}
+//   - os.info {}
+//
+// All paths must be repo-relative (no absolute paths, no parent traversal).
+func appendPreStageBuiltinToolOutputs(messages []oai.Message, assistantMsg oai.Message, _ cliConfig) []oai.Message {
+	for _, tc := range assistantMsg.ToolCalls {
+		name := strings.TrimSpace(tc.Function.Name)
+		argsJSON := strings.TrimSpace(tc.Function.Arguments)
+		if argsJSON == "" {
+			argsJSON = "{}"
+		}
+
+		// Parse arguments into a generic map
+		var args map[string]any
+		if err := json.Unmarshal([]byte(argsJSON), &args); err != nil {
+			messages = append(messages, oai.Message{
+				Role:       oai.RoleTool,
+				Name:       name,
+				ToolCallID: tc.ID,
+				Content:    mustJSON(map[string]string{"error": "invalid arguments"}),
+			})
+			continue
+		}
+
+		// Dispatch by name
+		switch name {
+		case "fs.read_file":
+			content, err := prepReadFile(args)
+			if err != nil {
+				messages = append(messages, oai.Message{Role: oai.RoleTool, Name: name, ToolCallID: tc.ID, Content: mustJSON(map[string]string{"error": err.Error()})})
+			} else {
+				messages = append(messages, oai.Message{Role: oai.RoleTool, Name: name, ToolCallID: tc.ID, Content: mustJSON(map[string]any{"content": content})})
+			}
+		case "fs.list_dir":
+			entries, err := prepListDir(args)
+			if err != nil {
+				messages = append(messages, oai.Message{Role: oai.RoleTool, Name: name, ToolCallID: tc.ID, Content: mustJSON(map[string]string{"error": err.Error()})})
+			} else {
+				messages = append(messages, oai.Message{Role: oai.RoleTool, Name: name, ToolCallID: tc.ID, Content: mustJSON(map[string]any{"entries": entries})})
+			}
+		case "fs.stat":
+			st, err := prepStat(args)
+			if err != nil {
+				messages = append(messages, oai.Message{Role: oai.RoleTool, Name: name, ToolCallID: tc.ID, Content: mustJSON(map[string]string{"error": err.Error()})})
+			} else {
+				messages = append(messages, oai.Message{Role: oai.RoleTool, Name: name, ToolCallID: tc.ID, Content: mustJSON(st)})
+			}
+		case "env.get":
+			key := ""
+			if kv, ok := args["key"].(string); ok {
+				key = kv
+			}
+			val := os.Getenv(strings.TrimSpace(key))
+			messages = append(messages, oai.Message{Role: oai.RoleTool, Name: name, ToolCallID: tc.ID, Content: mustJSON(map[string]string{"value": val})})
+		case "os.info":
+			messages = append(messages, oai.Message{Role: oai.RoleTool, Name: name, ToolCallID: tc.ID, Content: mustJSON(map[string]string{"goos": runtime.GOOS, "goarch": runtime.GOARCH})})
+		default:
+			// Unknown or disallowed tool names deterministically error
+			messages = append(messages, oai.Message{Role: oai.RoleTool, Name: name, ToolCallID: tc.ID, Content: mustJSON(map[string]string{"error": fmt.Sprintf("unknown tool: %s", name)})})
+		}
+	}
+	return messages
+}
+
+// mustJSON marshals v to a compact one-line JSON string. Falls back to a minimal error JSON.
+func mustJSON(v any) string {
+	b, err := json.Marshal(v)
+	if err != nil {
+		return "{\"error\":\"internal error\"}"
+	}
+	// Collapse whitespace just in case
+	s := string(b)
+	s = strings.ReplaceAll(s, "\n", " ")
+	s = strings.ReplaceAll(s, "\t", " ")
+	return strings.Join(strings.Fields(s), " ")
+}
+
+func requireRepoRelativePath(args map[string]any) (string, error) {
+	raw := ""
+	if v, ok := args["path"].(string); ok {
+		raw = v
+	}
+	if strings.TrimSpace(raw) == "" {
+		return "", fmt.Errorf("path is required")
+	}
+	// Reject absolute paths
+	if filepath.IsAbs(raw) {
+		return "", fmt.Errorf("path must be repo-relative")
+	}
+	// Clean and forbid parent traversal
+	cleaned := filepath.Clean(strings.ReplaceAll(raw, "\\", "/"))
+	if strings.HasPrefix(cleaned, "../") || cleaned == ".." {
+		return "", fmt.Errorf("path must not contain parent traversal")
+	}
+	// Resolve against current working directory (acts as repo root in tests/CLI)
+	abs, err := filepath.Abs(cleaned)
+	if err != nil {
+		return "", fmt.Errorf("resolve path: %w", err)
+	}
+	return abs, nil
+}
+
+func prepReadFile(args map[string]any) (string, error) {
+	abs, err := requireRepoRelativePath(args)
+	if err != nil {
+		return "", err
+	}
+	// Read up to a reasonable size to avoid giant outputs; 256 KiB cap
+	const capBytes = 256 * 1024
+	data, err := os.ReadFile(abs)
+	if err != nil {
+		return "", err
+	}
+	if len(data) > capBytes {
+		data = data[:capBytes]
+	}
+	// Return as UTF-8 string; lossy but sufficient for read-only inspection
+	return string(data), nil
+}
+
+type listEntry struct {
+	Name string `json:"name"`
+	Type string `json:"type"` // file|dir|other
+}
+
+func prepListDir(args map[string]any) ([]listEntry, error) {
+	abs, err := requireRepoRelativePath(args)
+	if err != nil {
+		return nil, err
+	}
+	entries, err := os.ReadDir(abs)
+	if err != nil {
+		return nil, err
+	}
+	out := make([]listEntry, 0, len(entries))
+	for _, e := range entries {
+		typ := "file"
+		if e.IsDir() {
+			typ = "dir"
+		}
+		// Detect other types best-effort
+		if !e.IsDir() {
+			if info, ierr := e.Info(); ierr == nil {
+				if (info.Mode() & fs.ModeSymlink) != 0 {
+					typ = "other"
+				}
+			}
+		}
+		out = append(out, listEntry{Name: e.Name(), Type: typ})
+	}
+	// Deterministic order
+	sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
+	return out, nil
+}
+
+type statView struct {
+	Size  int64 `json:"size"`
+	IsDir bool  `json:"is_dir"`
+}
+
+func prepStat(args map[string]any) (statView, error) {
+	abs, err := requireRepoRelativePath(args)
+	if err != nil {
+		return statView{}, err
+	}
+	fi, err := os.Stat(abs)
+	if err != nil {
+		return statView{}, err
+	}
+	return statView{Size: fi.Size(), IsDir: fi.IsDir()}, nil
+}
diff --git a/cmd/agentcli/tools_integration_test.go b/cmd/agentcli/tools_integration_test.go
new file mode 100644
index 0000000..714a986
--- /dev/null
+++ b/cmd/agentcli/tools_integration_test.go
@@ -0,0 +1,604 @@
+//nolint:errcheck // Integration tests may ignore some error returns in setup/teardown and JSON encoders.
+package main
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// copyFile copies a file from src to dst with 0755 mode and checks errors.
+func copyFile(t *testing.T, src, dst string) {
+	t.Helper()
+	in, err := os.Open(src)
+	if err != nil {
+		t.Fatalf("open %s: %v", src, err)
+	}
+	defer func() {
+		if cerr := in.Close(); cerr != nil {
+			t.Fatalf("close in: %v", cerr)
+		}
+	}()
+	out, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o755)
+	if err != nil {
+		t.Fatalf("create %s: %v", dst, err)
+	}
+	defer func() {
+		if cerr := out.Close(); cerr != nil {
+			t.Fatalf("close out: %v", cerr)
+		}
+	}()
+	if _, err := io.Copy(out, in); err != nil {
+		t.Fatalf("copy %s -> %s: %v", src, dst, err)
+	}
+}
+
+// newTwoStepServer returns a server that first requests tool calls then returns final content.
+func newTwoStepServer(t *testing.T, targetRelPath, contentB64, model string) *httptest.Server {
+	t.Helper()
+	var step int
+	return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			t.Fatalf("unexpected method: %s", r.Method)
+		}
+		if r.URL.Path != "/chat/completions" {
+			t.Fatalf("unexpected path: %s", r.URL.Path)
+		}
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("bad json: %v", err)
+		}
+		step++
+		switch step {
+		case 1:
+			// Verify schemas are advertised for both tools
+			have := map[string]bool{}
+			for _, tl := range req.Tools {
+				have[tl.Function.Name] = true
+				if len(bytes.TrimSpace(tl.Function.Parameters)) == 0 {
+					t.Fatalf("tool %q missing schema parameters", tl.Function.Name)
+				}
+			}
+			if !have["fs_write_file"] || !have["fs_read_file"] {
+				t.Fatalf("advertised tools missing: %v", have)
+			}
+			// Respond with two tool calls: write then read
+			resp := oai.ChatCompletionsResponse{
+				ID:      "cmpl-1",
+				Object:  "chat.completion",
+				Created: time.Now().Unix(),
+				Model:   req.Model,
+				Choices: []oai.ChatCompletionsResponseChoice{{
+					Index:        0,
+					FinishReason: "tool_calls",
+					Message: oai.Message{
+						Role: oai.RoleAssistant,
+						ToolCalls: []oai.ToolCall{
+							{ID: "1", Type: "function", Function: oai.ToolCallFunction{Name: "fs_write_file", Arguments: `{"path":"` + targetRelPath + `","contentBase64":"` + contentB64 + `"}`}},
+							{ID: "2", Type: "function", Function: oai.ToolCallFunction{Name: "fs_read_file", Arguments: `{"path":"` + targetRelPath + `"}`}},
+						},
+					},
+				}},
+			}
+			if err := json.NewEncoder(w).Encode(resp); err != nil {
+				t.Fatalf("encode resp step1: %v", err)
+			}
+		case 2:
+			// Final message
+			resp := oai.ChatCompletionsResponse{
+				ID:      "cmpl-2",
+				Object:  "chat.completion",
+				Created: time.Now().Unix(),
+				Model:   model,
+				Choices: []oai.ChatCompletionsResponseChoice{{
+					Index:        0,
+					FinishReason: "stop",
+					Message:      oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+				}},
+			}
+			if err := json.NewEncoder(w).Encode(resp); err != nil {
+				t.Fatalf("encode resp step2: %v", err)
+			}
+		default:
+			t.Fatalf("unexpected extra request step=%d", step)
+		}
+	}))
+}
+
+// Ensures pre-stage honors a nested -prep-tools manifest path and executes the referenced tool.
+// The server verifies that the second request (main stage) includes the tool output produced
+// during pre-stage, proving nested manifest resolution and execution.
+func TestPrep_Integration_NestedManifestResolution(t *testing.T) {
+	tmp := t.TempDir()
+	// Create nested manifest directory with canonical tools/bin layout
+	nested := filepath.Join(tmp, "sub", "manifest")
+	binDir := filepath.Join(nested, "tools", "bin")
+	if err := os.MkdirAll(binDir, 0o755); err != nil {
+		t.Fatalf("mkdir tools/bin: %v", err)
+	}
+
+	// Build a tiny tool that echoes a known JSON to stdout
+	src := filepath.Join(tmp, "prep_ok.go")
+	if err := os.WriteFile(src, []byte(`package main
+import ("encoding/json"; "io"; "os")
+func main(){_,_ = io.ReadAll(os.Stdin); _ = json.NewEncoder(os.Stdout).Encode(map[string]any{"from":"prep","ok":true})}
+`), 0o644); err != nil {
+		t.Fatalf("write src: %v", err)
+	}
+	toolPath := filepath.Join(binDir, "prep_ok")
+	if runtime.GOOS == "windows" {
+		toolPath += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", toolPath, src).CombinedOutput(); err != nil {
+		t.Fatalf("build tool: %v: %s", err, string(out))
+	}
+
+	// Write a manifest that references the tool with a relative ./tools/bin path
+	manPath := filepath.Join(nested, "tools.json")
+	manifest := map[string]any{
+		"tools": []map[string]any{{
+			"name":        "prep_ok",
+			"description": "emit ok json",
+			"schema":      map[string]any{"type": "object", "additionalProperties": false},
+			"command":     []string{"./tools/bin/prep_ok"},
+			"timeoutSec":  5,
+		}},
+	}
+	b, err := json.Marshal(manifest)
+	if err != nil {
+		t.Fatalf("marshal manifest: %v", err)
+	}
+	if err := os.WriteFile(manPath, b, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	// Fake server: first response triggers pre-stage tool call; second validates tool output present
+	step := 0
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		step++
+		switch step {
+		case 1:
+			// Pre-stage: request tool_calls to our external tool
+			_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+				FinishReason: "tool_calls",
+				Message:      oai.Message{Role: oai.RoleAssistant, ToolCalls: []oai.ToolCall{{ID: "t1", Type: "function", Function: oai.ToolCallFunction{Name: "prep_ok", Arguments: "{}"}}}},
+			}}})
+		case 2:
+			// Main stage: verify tool output is present in messages
+			var saw bool
+			for _, m := range req.Messages {
+				if m.Role == oai.RoleTool && m.Name == "prep_ok" && bytes.Contains([]byte(m.Content), []byte("\"ok\":true")) {
+					saw = true
+					break
+				}
+			}
+			if !saw {
+				t.Fatalf("expected prep_ok tool output in main request messages")
+			}
+			_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: "done"},
+			}}})
+		default:
+			t.Fatalf("unexpected extra request step=%d", step)
+		}
+	}))
+	defer srv.Close()
+
+	// Run the agent end-to-end with pre-stage external tools enabled and nested -prep-tools manifest
+	var outBuf, errBuf bytes.Buffer
+	code := cliMain([]string{
+		"-prompt", "x",
+		"-base-url", srv.URL,
+		"-model", "m",
+		"-prep-tools-allow-external",
+		"-prep-tools", manPath,
+	}, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	if got := outBuf.String(); got != "done\n" {
+		t.Fatalf("unexpected stdout: %q", got)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/89
+func TestRunAgent_AdvertisesSchemas_AndExecutesFsWriteThenRead(t *testing.T) {
+	// Build required tool binaries into a temp dir under canonical layout tools/bin
+	tmp := t.TempDir()
+	binDir := filepath.Join(tmp, "tools", "bin")
+	if err := os.MkdirAll(binDir, 0o755); err != nil {
+		t.Fatalf("mkdir tools/bin: %v", err)
+	}
+	fsWriteBin := filepath.Join(binDir, "fs_write_file")
+	fsReadBin := filepath.Join(binDir, "fs_read_file")
+	// Use the canonical test helper to build tool binaries
+	srcWrite := testutil.BuildTool(t, "fs_write_file")
+	srcRead := testutil.BuildTool(t, "fs_read_file")
+	// Copy built binaries into the expected temp location with canonical names
+	copyFile(t, srcWrite, fsWriteBin)
+	copyFile(t, srcRead, fsReadBin)
+
+	// Create a tools manifest referencing the built binaries
+	toolsPath := filepath.Join(tmp, "tools.json")
+	manifest := map[string]any{
+		"tools": []map[string]any{
+			{
+				"name":        "fs_write_file",
+				"description": "Atomically write a file (base64)",
+				"schema": map[string]any{
+					"type":                 "object",
+					"additionalProperties": false,
+					"required":             []string{"path", "contentBase64"},
+					"properties": map[string]any{
+						"path":            map[string]any{"type": "string"},
+						"contentBase64":   map[string]any{"type": "string"},
+						"createModeOctal": map[string]any{"type": "string"},
+					},
+				},
+				// Use relative canonical path so manifest validation enforces ./tools/bin prefix
+				"command":    []string{"./tools/bin/fs_write_file"},
+				"timeoutSec": 5,
+			},
+			{
+				"name":        "fs_read_file",
+				"description": "Read a file (base64)",
+				"schema": map[string]any{
+					"type":                 "object",
+					"additionalProperties": false,
+					"required":             []string{"path"},
+					"properties": map[string]any{
+						"path":        map[string]any{"type": "string"},
+						"offsetBytes": map[string]any{"type": "integer"},
+						"maxBytes":    map[string]any{"type": "integer"},
+					},
+				},
+				// Use relative canonical path so manifest validation enforces ./tools/bin prefix
+				"command":    []string{"./tools/bin/fs_read_file"},
+				"timeoutSec": 5,
+			},
+		},
+	}
+	b, err := json.Marshal(manifest)
+	if err != nil {
+		t.Fatalf("marshal manifest: %v", err)
+	}
+	if err := os.WriteFile(toolsPath, b, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	// Change working directory to the temp dir so relative ./tools/bin/* resolve
+	oldWD, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	if err := os.Chdir(tmp); err != nil {
+		t.Fatalf("chdir tmp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.Chdir(oldWD); err != nil {
+			t.Errorf("cleanup chdir back: %v", err)
+		}
+	})
+
+	// Prepare the file path and content for the tool calls (relative to current working directory)
+	targetRelPath := "tmp_tools_it_demo.txt"
+	// Ensure cleanup
+	t.Cleanup(func() {
+		if err := os.Remove(targetRelPath); err != nil && !os.IsNotExist(err) {
+			t.Errorf("cleanup remove: %v", err)
+		}
+	})
+	content := []byte("hello world")
+	contentB64 := base64.StdEncoding.EncodeToString(content)
+
+	// Fake server: first response asserts tools advertised and returns two tool calls;
+	// second response returns the final assistant message
+	srv := newTwoStepServer(t, targetRelPath, contentB64, "test")
+	defer srv.Close()
+
+	cfg := cliConfig{
+		prompt:       "write and read a file",
+		toolsPath:    toolsPath,
+		systemPrompt: "sys",
+		baseURL:      srv.URL,
+		apiKey:       "",
+		model:        "test",
+		maxSteps:     4,
+		timeout:      10 * time.Second,
+		httpTimeout:  10 * time.Second,
+		toolTimeout:  10 * time.Second,
+		temperature:  0,
+		debug:        false,
+	}
+
+	var outBuf, errBuf bytes.Buffer
+	code := runAgent(cfg, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("expected exit code 0, got %d; stderr=%s", code, errBuf.String())
+	}
+	if got := outBuf.String(); got != "ok\n" {
+		t.Fatalf("unexpected stdout: %q", got)
+	}
+	// Verify the file was created with expected content
+	got, err := os.ReadFile(targetRelPath)
+	if err != nil {
+		t.Fatalf("read created file: %v", err)
+	}
+	if string(got) != string(content) {
+		t.Fatalf("file content mismatch: got %q want %q", string(got), string(content))
+	}
+}
+
+// Deterministic end-to-end acceptance: pre-stage returns two read-only tool calls
+// and a non-final assistant channel; the agent executes built-in pre-stage tools,
+// routes the non-final channel to stderr under -verbose, and the main call completes.
+// nolint:gocyclo // End-to-end integration test favors readability over low cyclomatic complexity.
+func TestAcceptance_EndToEnd_PrepReadonlyTools_ChannelRouting_AndMainCompletion(t *testing.T) {
+	// Work in an isolated temp directory and create a small file for fs.read_file
+	tmp := t.TempDir()
+	oldWD, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	if err := os.Chdir(tmp); err != nil {
+		t.Fatalf("chdir tmp: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chdir(oldWD) })
+
+	if err := os.WriteFile("prestage.txt", []byte("hi"), 0o644); err != nil {
+		t.Fatalf("write prestage.txt: %v", err)
+	}
+
+	// Two-step mock server: pre-stage -> tool_calls; main -> verify tool outputs present
+	step := 0
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost || r.URL.Path != "/chat/completions" {
+			t.Fatalf("unexpected request %s %s", r.Method, r.URL.Path)
+		}
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		step++
+		switch step {
+		case 1:
+			// Pre-stage: return two built-in read-only tool calls with a non-final channel
+			resp := oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{
+					Role:    oai.RoleAssistant,
+					Channel: "critic",
+					Content: "pre-critic",
+					ToolCalls: []oai.ToolCall{
+						{ID: "t1", Type: "function", Function: oai.ToolCallFunction{Name: "fs.read_file", Arguments: `{"path":"prestage.txt"}`}},
+						{ID: "t2", Type: "function", Function: oai.ToolCallFunction{Name: "os.info", Arguments: `{}`}},
+					},
+				},
+			}}}
+			_ = json.NewEncoder(w).Encode(resp)
+		case 2:
+			// Main call: assert pre-stage tool outputs were appended to messages
+			var haveRead, haveOS bool
+			for _, m := range req.Messages {
+				if m.Role == oai.RoleTool && m.Name == "fs.read_file" && strings.Contains(m.Content, `"content":"hi"`) {
+					haveRead = true
+				}
+				if m.Role == oai.RoleTool && m.Name == "os.info" && strings.Contains(m.Content, "goos") {
+					haveOS = true
+				}
+			}
+			if !haveRead || !haveOS {
+				t.Fatalf("expected pre-stage tool outputs present (fs.read_file=%v os.info=%v)", haveRead, haveOS)
+			}
+			_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Channel: "final", Content: "OK"},
+			}}})
+		default:
+			t.Fatalf("unexpected extra request step=%d", step)
+		}
+	}))
+	defer srv.Close()
+
+	var outBuf, errBuf bytes.Buffer
+	code := cliMain([]string{
+		"-prompt", "x",
+		"-base-url", srv.URL,
+		"-model", "m",
+		"-max-steps", "1",
+		"-verbose",
+	}, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	if got := outBuf.String(); got != "OK\n" {
+		t.Fatalf("unexpected stdout: %q", got)
+	}
+	if !strings.Contains(errBuf.String(), "pre-critic") {
+		t.Fatalf("stderr did not contain pre-stage non-final channel content; got=%q", errBuf.String())
+	}
+}
+
+// End-to-end agent integration for img_create tool.
+// Spins an Images API mock expecting POST /v1/images/generations with the canonical body,
+// builds/copies the img_create tool under ./tools/bin, and runs the agent against a model
+// mock that first requests a tool call to img_create (saving under out/) and then returns
+// a final assistant message summarizing the saved path. Asserts one PNG is written and
+// stdout equals the final assistant content.
+// nolint:gocyclo // End-to-end integration test favors readability over low cyclomatic complexity.
+func TestAgent_EndToEnd_ImageCreate_SavesPNG_AndPrintsFinal(t *testing.T) {
+	// Use an isolated temp directory so relative save paths land here
+	// Do not chdir yet; building the tool needs repo root as CWD
+	tmp := t.TempDir()
+	oldWD, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+
+	// 1x1 transparent PNG base64 (same as tool unit test)
+	png1x1 := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO9cFmgAAAAASUVORK5CYII="
+
+	// Images API mock: verify request body and return one b64 image
+	var imagesHits int
+	imgSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost || r.URL.Path != "/v1/images/generations" {
+			t.Fatalf("unexpected images request: %s %s", r.Method, r.URL.Path)
+		}
+		imagesHits++
+		var req struct {
+			Model          string `json:"model"`
+			Prompt         string `json:"prompt"`
+			N              int    `json:"n"`
+			Size           string `json:"size"`
+			ResponseFormat string `json:"response_format"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("images decode: %v", err)
+		}
+		if req.Model != "gpt-image-1" || req.Prompt != "tiny-pixel" || req.N != 1 || req.Size != "1024x1024" || req.ResponseFormat != "b64_json" {
+			t.Fatalf("unexpected images payload: %+v", req)
+		}
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"data":  []map[string]any{{"b64_json": png1x1}},
+			"model": "gpt-image-1",
+		})
+	}))
+	defer imgSrv.Close()
+
+	// Build img_create tool and place it under ./tools/bin in temp repo
+	binDir := filepath.Join(tmp, "tools", "bin")
+	if err := os.MkdirAll(binDir, 0o755); err != nil {
+		t.Fatalf("mkdir tools/bin: %v", err)
+	}
+	srcImg := testutil.BuildTool(t, "img_create")
+	dstImg := filepath.Join(binDir, "img_create")
+	if runtime.GOOS == "windows" {
+		dstImg += ".exe"
+	}
+	copyFile(t, srcImg, dstImg)
+
+	// tools.json manifest with envPassthrough for Images API
+	toolsPath := filepath.Join(tmp, "tools.json")
+	manifest := map[string]any{
+		"tools": []map[string]any{{
+			"name":        "img_create",
+			"description": "Generate image(s) with OpenAI Images API and save to repo or return base64",
+			"schema": map[string]any{
+				"type":                 "object",
+				"additionalProperties": false,
+				"required":             []string{"prompt"},
+				"properties": map[string]any{
+					"prompt":     map[string]any{"type": "string"},
+					"n":          map[string]any{"type": "integer", "minimum": 1, "maximum": 4, "default": 1},
+					"size":       map[string]any{"type": "string", "pattern": "^\\d{3,4}x\\d{3,4}$", "default": "1024x1024"},
+					"model":      map[string]any{"type": "string", "default": "gpt-image-1"},
+					"return_b64": map[string]any{"type": "boolean", "default": false},
+					"save": map[string]any{
+						"type":                 "object",
+						"additionalProperties": false,
+						"required":             []string{"dir"},
+						"properties": map[string]any{
+							"dir":      map[string]any{"type": "string"},
+							"basename": map[string]any{"type": "string", "default": "img"},
+							"ext":      map[string]any{"type": "string", "enum": []any{"png"}, "default": "png"},
+						},
+					},
+				},
+			},
+			"command":        []string{"./tools/bin/img_create"},
+			"timeoutSec":     120,
+			"envPassthrough": []string{"OAI_API_KEY", "OAI_BASE_URL", "OAI_IMAGE_BASE_URL", "OAI_HTTP_TIMEOUT"},
+		}},
+	}
+	if b, err := json.Marshal(manifest); err != nil {
+		t.Fatalf("marshal manifest: %v", err)
+	} else if err := os.WriteFile(toolsPath, b, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	// Model server: step1 -> tool_calls: img_create saving under out/; step2 -> final message
+	step := 0
+	expectedSaved := filepath.ToSlash(filepath.Join("out", "img_001.png"))
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost || r.URL.Path != "/chat/completions" {
+			t.Fatalf("unexpected request %s %s", r.Method, r.URL.Path)
+		}
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		step++
+		switch step {
+		case 1:
+			// Request a single tool call to img_create with deterministic args
+			resp := oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+				FinishReason: "tool_calls",
+				Message: oai.Message{Role: oai.RoleAssistant, ToolCalls: []oai.ToolCall{{
+					ID:   "ic1",
+					Type: "function",
+					Function: oai.ToolCallFunction{
+						Name:      "img_create",
+						Arguments: `{"prompt":"tiny-pixel","n":1,"size":"1024x1024","save":{"dir":"out"}}`,
+					},
+				}}},
+			}}}
+			_ = json.NewEncoder(w).Encode(resp)
+		case 2:
+			// Final assistant message summarizes the saved path
+			_ = json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Choices: []oai.ChatCompletionsResponseChoice{{
+				Message: oai.Message{Role: oai.RoleAssistant, Content: "saved " + expectedSaved},
+			}}})
+		default:
+			t.Fatalf("unexpected extra request step=%d", step)
+		}
+	}))
+	defer srv.Close()
+
+	// Ensure the tool sees the Images API base and a dummy API key
+	t.Setenv("OAI_IMAGE_BASE_URL", imgSrv.URL)
+	t.Setenv("OAI_API_KEY", "test-123")
+
+	// Now chdir so relative outputs (e.g., out/) are created under tmp
+	if err := os.Chdir(tmp); err != nil {
+		t.Fatalf("chdir tmp: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chdir(oldWD) })
+
+	var outBuf, errBuf bytes.Buffer
+	code := cliMain([]string{
+		"-prompt", "use img_create to save under out/",
+		"-tools", toolsPath,
+		"-prep-enabled", "false",
+		"-base-url", srv.URL,
+		"-model", "m",
+		"-max-steps", "4",
+		"-http-timeout", "5s",
+		"-tool-timeout", "5s",
+		"-debug",
+	}, &outBuf, &errBuf)
+	if code != 0 {
+		t.Fatalf("exit=%d stderr=%s", code, errBuf.String())
+	}
+	if got := strings.TrimSpace(outBuf.String()); got != "saved "+expectedSaved {
+		t.Fatalf("unexpected stdout: %q", got)
+	}
+	// Assert one PNG exists at the expected path
+	if fi, err := os.Stat(expectedSaved); err != nil || fi.IsDir() {
+		t.Fatalf("expected saved PNG at %s (imagesHits=%d, stderr=%q)", expectedSaved, imagesHits, errBuf.String())
+	}
+}
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..b9136f5
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,65 @@
+# Documentation Index
+
+This docs index helps you navigate architecture notes and diagrams.
+
+- ADR-0001: Minimal Agent CLI — design context, decisions, and contracts.
+  - Link: [docs/adr/0001-minimal-agent-cli.md](adr/0001-minimal-agent-cli.md)
+- ADR-0002: Unrestricted toolbelt (files + network) — risks, contracts, and guidance.
+  - Link: [docs/adr/0002-unrestricted-toolbelt.md](adr/0002-unrestricted-toolbelt.md)
+- ADR-0003: Toolchain & Lint Policy (Go + golangci-lint) — pin Go via go.mod and a known-good golangci-lint; CI and local workflows align.
+  - Link: [docs/adr/0003-toolchain-and-lint-policy.md](adr/0003-toolchain-and-lint-policy.md)
+- ADR-0004: Default LLM Call Policy — default temperature 1.0, capability-based omission, one-knob rule, and observability fields.
+  - Link: [docs/adr/0004-default-llm-policy.md](adr/0004-default-llm-policy.md)
+- ADR-0005: Harmony pre-processing and channel-aware output — pre-stage HTTP call, parallel read-only tools, validator/audit with stage tags, and deterministic channel routing.
+  - Link: [docs/adr/0005-harmony-pre-processing-and-channel-aware-output.md](adr/0005-harmony-pre-processing-and-channel-aware-output.md)
+ - ADR-0006: Image generation tool (img_create) — minimal Images API integration, repo‑relative saves, env passthrough, and transcript hygiene.
+   - Link: [docs/adr/0006-image-generation-tool-img_create.md](adr/0006-image-generation-tool-img_create.md)
+ - ADR-0010: Adopt SearXNG & network research toolbelt (CLI-only) — credible web discovery with provenance via SearXNG and a safe, CLI-only toolbelt.
+   - Link: [docs/adr/0010-research-tools-searxng.md](adr/0010-research-tools-searxng.md)
+ - ADR-0011: State bundle schema — versioned on-disk JSON snapshots for reproducible runs.
+   - Link: [docs/adr/0011-state-bundle-schema.md](adr/0011-state-bundle-schema.md)
+ - ADR-0012: Persist and refine execution state via -state-dir — file-based snapshots with scopes, restore-before-prep, and refinement controls.
+   - Link: [docs/adr/0012-state-dir-persistence.md](adr/0012-state-dir-persistence.md)
+- Sequence diagrams: agent flow and toolbelt interactions.
+  - Link: [docs/diagrams/agentcli-seq.md](diagrams/agentcli-seq.md)
+  - Link: [docs/diagrams/toolbelt-seq.md](diagrams/toolbelt-seq.md)
+  - Link: [docs/diagrams/harmony-prep-seq.md](diagrams/harmony-prep-seq.md)
+  - Link: [docs/diagrams/research-pipeline.md](diagrams/research-pipeline.md)
+  - Link: [docs/diagrams/prestage_flow.md](diagrams/prestage_flow.md)
+
+- Architecture: Module boundaries and allowed imports between layers.
+  - Link: [docs/architecture/module-boundaries.md](architecture/module-boundaries.md)
+
+- Tools manifest reference: precise `tools.json` schema and mapping to OpenAI tools.
+  - Link: [docs/reference/tools-manifest.md](reference/tools-manifest.md)
+ 
+ - CLI reference: complete flag list, env precedence, exit codes.
+   - Link: [docs/reference/cli-reference.md](reference/cli-reference.md)
+
+- Tool reference: Image generation tool (`img_create`).
+  - Link: [docs/reference/img_create.md](reference/img_create.md)
+- Tool reference: HTTP fetch (`http_fetch`).
+  - Link: [docs/reference/http_fetch.md](reference/http_fetch.md)
+- Tool reference: SearXNG search (`searxng_search`).
+  - Link: [docs/reference/searxng_search.md](reference/searxng_search.md)
+- Tool reference: Crossref search (`crossref_search`).
+  - Link: [docs/reference/crossref_search.md](reference/crossref_search.md)
+ - Tool reference: PDF extract (`pdf_extract`).
+   - Link: [docs/reference/pdf_extract.md](reference/pdf_extract.md)
+ - Tool reference: Wayback lookup (`wayback_lookup`).
+   - Link: [docs/reference/wayback_lookup.md](reference/wayback_lookup.md)
+
+- Security: Threat model and trust boundaries.
+  - Link: [docs/security/threat-model.md](security/threat-model.md)
+
+- Runbooks: Troubleshooting common errors and fixes.
+  - Link: [docs/runbooks/troubleshooting.md](runbooks/troubleshooting.md)
+
+- Migrations: Tools layout (legacy → canonical `tools/cmd/*` + `tools/bin/*`).
+  - Link: [docs/migrations/tools-layout.md](migrations/tools-layout.md)
+
+Additional guides will be added here as they are created.
+
+Model parameter compatibility
+
+Some reasoning-oriented models may not accept sampling parameters. The agent omits `temperature` automatically for such models while keeping the default of 1.0 for compatible families (e.g., GPT-5 variants). This avoids API errors and preserves expected defaults where applicable.
diff --git a/docs/adr/0001-minimal-agent-cli.md b/docs/adr/0001-minimal-agent-cli.md
new file mode 100644
index 0000000..f051d18
--- /dev/null
+++ b/docs/adr/0001-minimal-agent-cli.md
@@ -0,0 +1,31 @@
+# ADR-0001 Minimal Agent CLI
+
+## Context
+We want a small, non-interactive CLI that talks to an OpenAI-compatible API (Chat Completions) and can run a small set of explicitly allowed local tools. The CLI must be vendor-agnostic and rely only on `net/http` for the API.
+
+## Options considered
+- Go vs Python vs Node: Go chosen for static binary and process control
+- SDK vs raw HTTP: raw HTTP to keep provider-agnostic
+- Streaming vs simple: no streaming in MVP
+
+## Decision
+- Implement a single-shot run loop using POST `/v1/chat/completions`
+- Tools are declared in `tools.json` with JSON Schema; invoked via argv with JSON on stdin; output is JSON on stdout
+- Per-call timeouts enforced for HTTP and tools
+
+## Consequences
+- No streaming; sequential tool execution only in MVP
+- Model/tool arguments treated as untrusted input; no shells
+
+## Contracts
+- Flags: `-prompt` (required), `-tools` (path), `-system`, `-base-url`, `-api-key`, `-model`, `-max-steps`, `-timeout`, `-temp`, `-debug`
+- Tool I/O: stdin receives raw JSON args; stdout returns single-line JSON; on error, CLI maps to `{"error":"..."}`
+
+## Issue
+Link to the canonical GitHub issue once created.
+
+## Diagram
+See `docs/diagrams/agentcli-seq.md` for the sequence diagram illustrating the loop.
+
+## Related documentation
+See the docs index at `docs/README.md` for navigation to related guides and diagrams. For the tool manifest contract, refer to `docs/reference/tools-manifest.md`.
diff --git a/docs/adr/0002-unrestricted-toolbelt.md b/docs/adr/0002-unrestricted-toolbelt.md
new file mode 100644
index 0000000..c66b183
--- /dev/null
+++ b/docs/adr/0002-unrestricted-toolbelt.md
@@ -0,0 +1,37 @@
+# ADR-0002: Unrestricted toolbelt (files + network)
+
+## Status
+Accepted
+
+## Context
+The project exposes an extensible toolbelt that the model may invoke. Some tools (e.g., `exec`, file system read/write/move/rm, search) provide broad capabilities that, when enabled, amount to remote code execution with potential network access. We must document the decision, contracts, and consequences so users enable these tools deliberately and operate them safely.
+
+## Decision
+Introduce an "unrestricted toolbelt" set comprising file-system utilities (`fs_read_file`, `fs_write_file`, `fs_append_file`, `fs_move`, `fs_rm`, `fs_search`, future list ops) and the generic `exec` tool. The CLI will advertise these tools via the OpenAI-compatible tools array, and execute them argv-only with JSON stdin/stdout and strict per-call timeouts.
+
+## Consequences
+- Enabling the unrestricted toolbelt is opt-in via `tools.json` and carries RCE risk.
+- Operators must sandbox execution (container/jail/VM), run with least privilege, and use a throwaway working directory.
+- Secrets must be injected via environment/CI secrets and never committed; logs should be redacted where configured.
+- The runner maps tool failures to a deterministic JSON error contract and applies timeouts and a minimal environment.
+- Documentation and examples clearly flag the risks and provide copy-paste usage guarded by warnings.
+
+## JSON contracts (summary)
+- Exec tool stdin:
+  - `{ "cmd": "string", "args": ["..."], "cwd?": "string", "env?": {"K":"V"}, "stdin?": "string", "timeoutSec?": int }`
+  - Stdout (single line JSON): `{ "exitCode": int, "stdout": "string", "stderr": "string", "durationMs": int }`
+- File tools use repo-relative paths. Writes are atomic (temp+rename) where applicable; rm supports recursive and force flags; search supports literal/regex/globs with truncation.
+
+## Links
+- Threat model: `../security/threat-model.md`
+- Tools reference: `../reference/tools-manifest.md`
+- Sequence diagrams:
+  - Agent loop: `../diagrams/agentcli-seq.md`
+  - Toolbelt interactions: `../diagrams/toolbelt-seq.md`
+
+## Alternatives considered
+- Disallow `exec` entirely: safer by default, but blocks many real workflows. Decided to allow but keep opt-in and loudly documented.
+- Shell execution: rejected due to injection risk; we use argv-only process spawn.
+
+## Notes
+Changes to tool contracts must update this ADR, the README, and relevant tests. PRs modifying tool behavior should reference this ADR-0002 in their description.
diff --git a/docs/adr/0003-toolchain-and-lint-policy.md b/docs/adr/0003-toolchain-and-lint-policy.md
new file mode 100644
index 0000000..27059c1
--- /dev/null
+++ b/docs/adr/0003-toolchain-and-lint-policy.md
@@ -0,0 +1,39 @@
+# ADR-0003 Toolchain & Lint Policy (Go + golangci-lint)
+
+## Context
+Reliable builds and linting require deterministic tool versions across developer machines and CI. We have seen failures such as “golangci-lint: unsupported export data (internal/goarch version: 2)” when the linter and Go toolchain are mismatched. The project already documents gates (lint, vet, format, tests) and needs a pinned policy so results are reproducible on Linux, macOS, and Windows.
+
+## Options considered
+- Floating Go version in CI vs using the module’s `go.mod` directive
+- Using `golangci-lint` GitHub Action vs invoking a pinned binary from the repository workflow
+- Installing `golangci-lint` ad‑hoc on PATH vs a deterministic location (`./bin`) or `$(go env GOPATH)/bin`
+- Allowing linter auto‑updates vs pinning to a known‑good version and bumping deliberately
+
+## Decision
+- CI and local workflows must use the Go version declared by `go.mod`.
+  - CI config uses `actions/setup-go@v5` with `go-version-file: go.mod` and prints `go version` for traceability.
+- Linting is performed with a pinned `golangci-lint` version known to be compatible with the current Go line.
+  - The Makefile installs `golangci-lint` deterministically (platform‑aware) and invokes that pinned binary, not whatever happens to be on PATH.
+- Local `make lint` runs fast‑fail prechecks (tool presence, minimum version), runs formatting checks, vet, and the linter, and enforces repository hygiene checks (`check-tools-paths`, `verify-manifest-paths`).
+- Upgrades to the Go toolchain or `golangci-lint` occur together via a PR that updates `go.mod`, the Makefile pin, and CI, with a passing green build.
+
+## Consequences
+- Deterministic lint results across OSes; reduced “export data” and analyzer mismatch errors.
+- Slightly slower first `make lint` due to on‑demand installation of the pinned linter; subsequent runs are fast.
+- Version bumps require coordination but are safer and traceable.
+
+## Rollback
+- Revert the Makefile and CI workflow edits to the previous known‑good pins. Because the policy is additive, reverting is low‑risk.
+
+## Implementation notes
+- CI prints tool versions (Go and golangci-lint) in logs for auditability.
+- `make lint` includes repository‑specific checks (`check-tools-paths`, `verify-manifest-paths`) to keep tool layout consistent.
+- Developer docs and runbooks include a section for the common failure mode “unsupported export data” with explicit resolution steps.
+
+## Issue
+Link to the canonical GitHub issue once created.
+
+## Related documentation
+- Docs index: `docs/README.md`
+- CI quality gates: `docs/operations/ci-quality-gates.md`
+- Existing ADRs: `docs/adr/0001-minimal-agent-cli.md`, `docs/adr/0002-unrestricted-toolbelt.md`
diff --git a/docs/adr/0004-default-llm-policy.md b/docs/adr/0004-default-llm-policy.md
new file mode 100644
index 0000000..316ed71
--- /dev/null
+++ b/docs/adr/0004-default-llm-policy.md
@@ -0,0 +1,62 @@
+# ADR-0004: Default LLM Call Policy
+
+Status: Accepted
+Date: 2025-08-16
+
+Context
+The agent CLI integrates with OpenAI-compatible Chat Completions APIs and invokes local tools. Sampling controls (e.g., temperature) vary by provider and model capability. Prior defaults were inconsistent across docs and code, and failure handling around unsupported parameters led to fragile behavior. We need a clear, deterministic policy for defaults, capability-based omissions, and lightweight recovery to preserve parity across providers while remaining robust.
+
+Decision
+- Default temperature is 1.0. This matches common provider defaults and avoids under-sampling by default.
+- Capability-based omission: when a model or provider does not support temperature (or signals it via error), omit the parameter from the payload.
+- One-knob-at-a-time: if the user sets top_p explicitly, do not send temperature in the same request.
+- GPT-5 controls: when supported by the selected model, allow users to set `verbosity` (low|medium|high) and `reasoning_effort` without affecting the sampling default; these controls are independent of temperature.
+- Observability: record fields temperature_effective (final value used after clamps/omissions) and temperature_in_payload (bool) in structured logs.
+- Guardrails: keep the agent loop bounded with a default max steps of 8 (hard wall may be higher internally) and enforce correct tool-call sequencing.
+- Lightweight recoveries:
+  - Parameter-recovery retry: on a 400 indicating invalid/unsupported temperature, strip it and retry once before normal retry/backoff.
+  - Length backoff: on finish_reason == "length" (or equivalent), raise the cap once within budget and retry.
+
+Consequences
+- Requests default to temperature=1.0 unless explicitly incompatible or top_p is set.
+- Some models will receive no temperature parameter. Behavior remains provider-default in those cases.
+- Some providers/models may restrict or ignore sampling knobs. The agent omits unsupported fields and clamps values within provider-accepted ranges when applicable.
+- Logs are clearer for debugging sampling choices and automatic recoveries.
+- Tests and docs must align to a single canonical default and sequencing rules.
+
+Alternatives Considered
+- Default temperature 0.2 (deterministic-first). Rejected for general users: too conservative and diverges from provider defaults.
+- Always send both temperature and top_p. Rejected: conflicts with provider guidance; harder to reason about effects.
+- Hard-fail on parameter errors without recovery. Rejected: fragile integration and poor UX.
+
+Mermaid sequence of tool-call flow
+```mermaid
+sequenceDiagram
+  participant CLI as agentcli
+  participant API as Chat Completions API
+  participant TOOL as Local Tool
+
+  CLI->>API: POST /v1/chat/completions (system,user[,tools])
+  API-->>CLI: assistant message with tool_calls[]
+  par Parallel tool calls (if multiple)
+    CLI->>TOOL: execute function(args)
+    TOOL-->>CLI: stdout/stderr (JSON)
+  and
+    CLI->>TOOL: execute function(args)
+    TOOL-->>CLI: stdout/stderr (JSON)
+  end
+  CLI->>API: POST with tool messages (one per tool_call_id)
+  API-->>CLI: assistant message (content or more tool_calls)
+  Note over CLI,API: Enforce one tool message per tool_call_id and correct sequencing
+```
+
+Rollout Notes
+- Update README and reference docs to state temperature=1.0 default and one-knob rule, and to document observability fields.
+- Add unit/integration tests for parameter-recovery, one-knob enforcement, and length backoff as incremental slices.
+
+Addendum (2025-08-17)
+
+This addendum clarifies that the project standardizes on a default `temperature=1.0` for API parity and GPT-5 compatibility. Models that do not accept a temperature parameter will receive no `temperature` in the payload; behavior in those cases remains provider-default. The one-knob rule remains in effect: when users specify `top_p`, the agent omits `temperature`; when `top_p` is not provided, the agent sends `temperature` (default 1.0) and leaves `top_p` unset. The docs index now links to this ADR to surface the policy.
+
+See also: [ADR-0005: Harmony pre-processing and channel-aware output](0005-harmony-pre-processing-and-channel-aware-output.md)
+See also: [ADR-0006: Image generation tool (img_create)](0006-image-generation-tool-img_create.md)
diff --git a/docs/adr/0005-harmony-pre-processing-and-channel-aware-output.md b/docs/adr/0005-harmony-pre-processing-and-channel-aware-output.md
new file mode 100644
index 0000000..725c19d
--- /dev/null
+++ b/docs/adr/0005-harmony-pre-processing-and-channel-aware-output.md
@@ -0,0 +1,52 @@
+# ADR-0005: Harmony pre-processing and channel-aware output
+
+Status: Accepted
+
+Date: 2025-08-18
+
+Context
+
+The agent performs a lightweight pre-processing stage ("pre-stage") before the main chat completion to refine inputs deterministically. This stage may invoke read-only built-in tools in parallel, validates message sequencing, records structured audit logs with stage metadata, and merges roles before the main call. Output channels are routed to stdout/stderr according to explicit rules to keep the CLI output clean and stable.
+
+Decision
+
+- Introduce a pre-stage HTTP call that uses its own config block (`-prep-*` flags and `OAI_PREP_*` env) while inheriting sane defaults from the main call when unset.
+- Restrict pre-stage tools to in-process read-only adapters by default; allow external tools only when `-prep-tools-allow-external` is set, honoring a separate manifest via `-prep-tools` with the same validations as the main manifest.
+- Execute multiple tool calls in parallel during pre-stage; enforce one `role:"tool"` message per `tool_call_id` and validate the message sequence using the existing validator.
+- Emit NDJSON audit entries with `stage:"prep"`, timings, and idempotency key; include pre-stage config in `-print-config`.
+- Harmonize channel printing: by default, print only `assistant{channel:"final"}` to stdout; print `critic` and `confidence` to stderr with `-verbose`; dump raw JSON with `-debug`. Maintain the same routing for messages produced after pre-stage.
+- Cache pre-stage results using a key derived from inputs, model/endpoint, sampling knobs, retries, and the tool set or manifest content hash; honor `-prep-cache-bust`.
+
+Consequences
+
+- Pre-stage failures are fail-open: the CLI logs a single warning and proceeds with the original messages.
+- Auditability improves with consistent stage tagging; docs and CLI help reflect the knobs and precedence rules.
+- Deterministic output routing prevents interleaving JSON with human-readable output by default.
+
+References
+
+- Sequence diagram: [docs/diagrams/harmony-prep-seq.md](../diagrams/harmony-prep-seq.md)
+- Related policy: [ADR-0004: Default LLM Call Policy](0004-default-llm-policy.md)
+ - Related policy: [ADR-0004: Default LLM Call Policy](0004-default-llm-policy.md)
+ - See also: [ADR-0006: Image generation tool (img_create)](0006-image-generation-tool-img_create.md)
+
+Mermaid sequence (summary)
+
+```mermaid
+sequenceDiagram
+  participant CLI as agentcli
+  participant API as Chat Completions API
+  participant TOOLS as Pre-stage tools (read-only)
+
+  CLI->>API: POST /v1/chat/completions (pre-stage)
+  API-->>CLI: assistant tool_calls[]
+  par Parallel tool calls
+    CLI->>TOOLS: invoke built-in tool
+    TOOLS-->>CLI: JSON result
+  end
+  CLI->>CLI: ValidateMessageSequence + audit {stage:"prep"}
+  CLI->>CLI: Merge roles; route channels
+  CLI->>API: POST /v1/chat/completions (final)
+  API-->>CLI: assistant final
+  CLI-->>CLI: Print final; verbose channels to stderr
+```
\ No newline at end of file
diff --git a/docs/adr/0006-image-generation-tool-img_create.md b/docs/adr/0006-image-generation-tool-img_create.md
new file mode 100644
index 0000000..59bf893
--- /dev/null
+++ b/docs/adr/0006-image-generation-tool-img_create.md
@@ -0,0 +1,34 @@
+# ADR-0006: Image generation tool (img_create)
+
+Status: Accepted
+
+Date: 2025-08-18
+
+Context
+
+The project added a minimal `img_create` tool that integrates with an OpenAI-compatible Images API to generate PNG files deterministically from a prompt. The tool is invoked by the agent via a tools manifest and can also be run directly. It enforces repository‑relative output paths, rejects path escapes, and defaults to saving decoded PNGs to disk to avoid base64 bloat in transcripts unless explicitly requested.
+
+Decision
+
+- Provide a standalone CLI at `tools/cmd/img_create/img_create.go` built into `tools/bin/img_create` via `make build-tools`.
+- Define stdin contract and output schema; default to saving files under a repo‑relative `save.dir` with stable names and SHA‑256 reporting.
+- Enforce cross‑platform path normalization and strict repo‑relative validation; reject `..` escapes and absolute paths.
+- Implement retries with sane timeouts; prefer small, deterministic tests using `httptest.Server` with no external network.
+- Support optional `extras` pass‑through for forward compatibility while validating and sanitizing inputs.
+- Never log secrets or large base64 by default; print concise JSON and redact or elide sensitive/large payloads unless a debug flag is set.
+- Allow a constrained environment to child processes via `envPassthrough` validation and runner enforcement.
+
+Consequences
+
+- Users can generate images deterministically in CI and locally without network flakiness by using mocks in tests.
+- Auditability and safety improve via repo‑relative writes, SHA‑256 reporting, and redaction.
+- Documentation includes a focused reference and examples; README and CLI reference link to this ADR.
+
+References
+
+- Tool source: [tools/cmd/img_create/img_create.go](../../tools/cmd/img_create/img_create.go)
+- Tool reference: [docs/reference/img_create.md](../reference/img_create.md)
+- Tools manifest schema and validation: [internal/tools/manifest.go](../../internal/tools/manifest.go)
+- Tool runner env pass‑through and auditing: [internal/tools/runner.go](../../internal/tools/runner.go)
+- Sequence diagram: [docs/diagrams/toolbelt-seq.md](../diagrams/toolbelt-seq.md)
+- Related ADR: [ADR-0004: Default LLM Call Policy](0004-default-llm-policy.md), [ADR-0005: Harmony pre-processing and channel-aware output](0005-harmony-pre-processing-and-channel-aware-output.md)
diff --git a/docs/adr/0007-prestage-overrides.md b/docs/adr/0007-prestage-overrides.md
new file mode 100644
index 0000000..5ab91f4
--- /dev/null
+++ b/docs/adr/0007-prestage-overrides.md
@@ -0,0 +1,77 @@
+# ADR-0007: Pre‑stage prompt overrides (flags + embedded default)
+
+Status: Accepted
+
+Date: 2025-08-19
+
+## Context
+Pre‑stage refines inputs before the main chat call. We need a predictable, reproducible way to source the pre‑stage prompt that works offline, is easy to override, and is safe for CI. Prior work added an embedded default prompt, with plans for user overrides.
+
+Constraints:
+- Deterministic precedence independent of runtime timing or network.
+- CLI usability: copy‑pasteable, minimal friction.
+- Testability: offline unit and integration tests without network.
+- Safety: no shell evaluation; treat all input as untrusted.
+
+## Options considered
+1) No overrides (embedded default only)
+   - Simple, but inflexible; cannot adapt to project‑specific workflows.
+2) File‑only overrides (`-prep-file`, repeatable)
+   - Deterministic, works in CI, good for longer prompts kept under version control.
+   - Less convenient for quick experiments.
+3) Flags + embedded default (both `-prep-prompt` and `-prep-file` with exclusivity)
+   - Best DX: quick CLI text or versioned files; embedded default remains a safe fallback.
+   - Requires clear precedence rules and deterministic joining.
+
+## Decision
+Adopt option (3): support both flag and file overrides, with an embedded default as fallback, under strict, explicit precedence and exclusivity.
+
+Effective source resolution (independent per run):
+- If one or more `-prep-prompt` are provided, join them with two newlines (`\n\n`) in the order seen → source: `override`.
+- Else if one or more `-prep-file` are provided (files may be repeated; `-` means STDIN), read and join in the order seen with `\n\n`, trimming trailing whitespace → source: `override`.
+- Else use the embedded default prompt (`assets/prompts/prep_default.md`) → source: `default`.
+
+Rules:
+- Mutual exclusivity: do not allow mixing `-prep-prompt` and `-prep-file` in the same invocation. Treat as a user error with a clear message and non‑zero exit (when flags exist).
+- Precedence: flags > environment variables > embedded default. Environment variables `OAI_PREP_PROMPT` and `OAI_PREP_FILE` (comma‑separated; `-` allowed) mirror the flags.
+- Deterministic joiner: always `\n\n` between parts; trim trailing whitespace on the final string.
+- Observability: when `-prep-dry-run` is set, print the final merged Harmony messages (post‑merge) to stdout for inspection.
+- Metadata: include `{ "prestage": { "source": "override|default", "bytes": N } }` in printed/saved metadata without exposing prompt text.
+
+## Consequences
+- Reproducible prompts across machines and CI; easy to override and inspect.
+- Clear error surface for invalid combinations; consistent precedence across chat, pre‑stage, and image phases.
+- Documentation and help output reflect repeatability, exclusivity, and precedence.
+- Backward compatibility preserved: when no new flags/envs are provided, behavior remains unchanged (embedded default).
+
+## Migration
+- None required. Existing flows continue to use the embedded default.
+- Projects can gradually introduce versioned pre‑stage prompts via files and CI scripts.
+
+## References
+- Embedded default prompt: `internal/oai/prompts.go` (via `//go:embed assets/prompts/prep_default.md`).
+- Resolution helpers and tests: `internal/oai/resolve.go` and `internal/oai/config_test.go`.
+- Pre‑stage flow and validator: `docs/harmony-prestage.md` and ADR‑0005.
+- CLI docs: `README.md` (Image/Pre‑stage sections) and `docs/reference/cli-reference.md`.
+
+## Issue
+Link to the canonical GitHub issue once created.
+
+## Diagram (Mermaid)
+```mermaid
+flowchart TD
+  A[CLI parse flags/env] --> B{ResolvePrepPrompt}
+  B -->|prompt flags| C[Join -prep-prompt parts with \n\n]
+  B -->|file flags| D[Read -prep-file parts (incl. - for STDIN) and join with \n\n]
+  B -->|none| E[Use embedded default]
+  C --> F[Pre-stage call]
+  D --> F[Pre-stage call]
+  E --> F[Pre-stage call]
+  F --> G[Validate Harmony roles]
+  G --> H[Merge into main call config]
+  H --> I[Main chat call]
+  I --> J{Image instructions?}
+  J -->|yes| K[Invoke image tool with defaults unless overridden]
+  J -->|no| L[Print final answer]
+  K --> L[Print final answer]
+```
diff --git a/docs/adr/0010-research-tools-searxng.md b/docs/adr/0010-research-tools-searxng.md
new file mode 100644
index 0000000..7b7eb0d
--- /dev/null
+++ b/docs/adr/0010-research-tools-searxng.md
@@ -0,0 +1,55 @@
+# ADR-0010: Adopt SearXNG & network research toolbelt (CLI-only)
+
+## Status
+Accepted
+
+## Context
+We need credible, repeatable web discovery with provenance for an offline-by-default agent CLI. Direct calls to individual search engines vary in quality, rate limits, and output formats; scraping HTML directly is brittle and raises maintenance and legal concerns. A meta-search engine provides a uniform query interface and result schema across multiple engines, improving recall and resiliency while keeping configuration centralized. We also need a small set of CLI-only subtools for fetching and parsing content deterministically with SSRF guards, strict timeouts, and auditable outputs.
+
+Requirements:
+- Deterministic CLI tools with JSON stdin/stdout contracts
+- SSRF guard: block loopback, RFC1918/4193, link‑local, and .onion; protect against DNS rebinding
+- Bounded network behavior: timeouts, retry policies, and redirect limits
+- Provenance: preserve URLs and basic metadata for citation
+- No scraping frameworks; keep a narrow surface with testable behaviors
+
+## Options
+1. Call individual engine APIs directly (Google, Bing, etc.)
+   - Pros: direct features, official quotas
+   - Cons: keys/quotas per engine, divergent schemas, vendor lock‑in
+2. Screen‑scrape engines or sites
+   - Pros: no keys in some cases
+   - Cons: brittle, ToS concerns, high maintenance, anti‑bot friction
+3. Use a meta‑search engine (SearXNG) and add small focused CLI tools for follow‑ups
+   - Pros: uniform JSON API, engine plurality, self‑hostable, adjustable engine set
+   - Cons: one more service to run; still subject to upstream variability
+
+## Decision
+Adopt SearXNG as the single meta‑search entry point and introduce a small CLI toolbelt to perform safe retrieval and parsing around it. The initial tool set will include:
+- searxng_search: query SearXNG’s JSON API with retries and SSRF guard
+- http_fetch: safe HTTP/HTTPS fetcher with byte caps, redirects ≤5, gzip support
+- robots_check: evaluate robots.txt for a given origin
+- readability_extract: extract article content
+- metadata_extract: extract OpenGraph/Twitter/JSON‑LD
+- pdf_extract: extract text from PDFs (optional OCR via tesseract)
+- rss_fetch: fetch and normalize RSS/Atom feeds
+- wayback_lookup: lookup/save via Internet Archive
+- wiki_query, openalex_search, crossref_search, github_search: narrow, well‑scoped APIs
+- dedupe_rank: near‑duplicate grouping with MinHash + TF‑IDF tie‑break
+- citation_pack: normalize and (optionally) archive for citations
+
+These tools are CLI‑only, executed via argv with JSON contracts, audited, and guarded. They will be added incrementally behind explicit capabilities and documented under docs/reference/.
+
+## Consequences
+- A running SearXNG instance is assumed for meta‑search (e.g., `SEARXNG_BASE_URL=http://localhost:8888`).
+- All tools ship SSRF guards, strict timeouts, limited redirects, and audit NDJSON.
+- CI remains offline: tests use `httptest.Server` fixtures; no live network calls.
+- Docs include a central reference and a troubleshooting runbook for common network issues.
+
+## Flow
+See the research pipeline diagram: [docs/diagrams/research-pipeline.md](../diagrams/research-pipeline.md).
+
+## References
+- Docs index: see `docs/README.md`
+- Threat model and SSRF policy: `docs/security/threat-model.md`
+- Tool references will live under `docs/reference/` as they are implemented
diff --git a/docs/adr/0011-state-bundle-schema.md b/docs/adr/0011-state-bundle-schema.md
new file mode 100644
index 0000000..4a7536f
--- /dev/null
+++ b/docs/adr/0011-state-bundle-schema.md
@@ -0,0 +1,36 @@
+# ADR-0011: Persist execution state as versioned file bundles
+
+## Status
+
+Accepted
+
+## Context
+
+We need reproducible, resumable runs across CLI invocations without adding a DB. A file-based, append-only bundle under a user-provided `-state-dir` offers portability and testability.
+
+## Decision
+
+Introduce a versioned JSON schema (`version: "1"`) named `StateBundle` with fields:
+
+- `version`: fixed string "1"
+- `created_at`: RFC3339 UTC
+- `tool_version`: CLI/tool semantic version or git describe
+- `model_id`, `base_url`: effective backend configuration
+- `toolset_hash`: hash of enabled tool specs
+- `scope_key`: partition key to separate incompatible contexts
+- `prompts`: `{system, developer,...}` strings only (no secrets)
+- `prep_settings`, `context`, `tool_caps`, `custom`: JSON-serializable objects
+- `source_hash`: SHA-256 over `(model_id|base_url|toolset_hash|scope_key)`
+- `prev_sha` (optional): parent pointer when refining
+
+Pointer file `latest.json` stores `{version:"1", path:"state-*.json", sha256}`. Snapshot files are named `state-<RFC3339UTC>-<8charSHA>.json` with perms 0600; directory perms 0700.
+
+## Consequences
+
+- Simple to save/load and diff in tests
+- Forward-compatible via `version`
+- No raw request/response bodies are stored by default; keys must be redacted upstream
+
+## Verification
+
+Unit tests cover schema validation and hashing in `internal/state/schema_test.go`. Future changes will add atomic save/load and scope handling.
diff --git a/docs/adr/0012-state-dir-persistence.md b/docs/adr/0012-state-dir-persistence.md
new file mode 100644
index 0000000..c7fcc35
--- /dev/null
+++ b/docs/adr/0012-state-dir-persistence.md
@@ -0,0 +1,135 @@
+# ADR-0012: Persist and refine execution state via -state-dir
+
+## Status
+
+Accepted
+
+## Context
+
+We want deterministic, reproducible runs across CLI invocations without adding a database or introducing network/stateful dependencies. Operators frequently iterate on similar prompts and tool configurations; recomputing the pre-stage every time is wasteful and introduces variability. A simple file-based persistence mechanism, controlled explicitly by the user via `-state-dir`, enables:
+
+- Reusing prior refined prompts and settings for faster, stable runs
+- Explicit partitioning by scope to avoid cross-contamination
+- Offline, testable behavior with predictable artifacts suitable for diffs
+
+Security and privacy constraints require that we do not store raw request/response bodies or secrets by default, and that on-disk files have restrictive permissions.
+
+## Options
+
+- No persistence: always recompute pre-stage; simplest but slow and variable
+- Database-backed store: robust but heavy, adds operational complexity
+- File-based JSON bundles: lightweight, portable, easy to test/diff
+
+## Decision
+
+Adopt a file-based, append-only persistence under a user-provided `-state-dir`:
+
+- Snapshot files named `state-<RFC3339UTC>-<8charSHA>.json` containing a versioned `StateBundle` (`version:"1"`) per ADR‑0011
+- A pointer file `latest.json` with `{version:"1", path, sha256}` pointing to the newest snapshot
+- Directory permissions enforced to `0700`; files written with `0600`, using atomic write + fsync + rename
+- Partitioning via `scope_key`; default scope is a hash of `(model_id|base_url|toolset_hash)` with optional `-state-scope` override
+- Redaction and safety: API keys are redacted upstream; bundles exclude raw bodies; world-writable or non-owned directories are rejected
+- Refinement: `-state-refine` with either `-state-refine-text` or `-state-refine-file` produces a new snapshot with `prev_sha` pointing to the previous bundle
+
+See ADR‑0011 for the `StateBundle` schema details.
+
+## Consequences
+
+- Reproducible and faster repeated runs; fewer pre-stage calls when state is restored
+- Deterministic artifacts that are easy to inspect and test
+- Requires user selection of a secure directory; rejects unsafe permissions
+- Disk usage grows with snapshots; users can prune older snapshots manually
+
+## Sequence (Mermaid)
+
+```mermaid
+sequenceDiagram
+  autonumber
+  participant U as User
+  participant C as agentcli
+  participant FS as -state-dir (filesystem)
+  participant LLM as Pre-stage LLM
+
+  rect rgb(245,245,245)
+    Note over C,FS: First run (no state)
+    U->>C: run with -state-dir
+    C->>FS: LoadLatestStateBundle(scope)
+    FS-->>C: (none)
+    C->>LLM: Pre-stage call (derive prompts/settings)
+    LLM-->>C: Harmony messages (validated)
+    C->>FS: SaveStateBundle(snapshot)
+    FS-->>C: latest.json -> snapshot
+    C-->>U: Execute main call with merged config
+  end
+
+  rect rgb(245,245,245)
+    Note over C,FS: Restore (no refine)
+    U->>C: run again with same scope
+    C->>FS: LoadLatestStateBundle(scope)
+    FS-->>C: bundle
+    C-->>U: Skip pre-stage; use restored prompts/settings
+  end
+
+  rect rgb(245,245,245)
+    Note over C,FS: Refine existing state
+    U->>C: run with -state-refine (-state-refine-text|file)
+    C->>FS: LoadLatestStateBundle(scope)
+    FS-->>C: bundle
+    C->>LLM: Pre-stage refine(prompt = refine-input)
+    LLM-->>C: refined messages/settings
+    C->>FS: SaveStateBundle(new, prev_sha=old)
+    FS-->>C: latest.json -> new snapshot
+    C-->>U: Proceed with refined config
+  end
+```
+
+## Usage examples
+
+Restore and reuse prompts/settings across runs:
+
+```bash
+./bin/agentcli \
+  -prompt "Summarize the repo" \
+  -tools ./tools.json \
+  -state-dir "$PWD/.agent-state" \
+  -debug
+
+# Second run with the same -state-dir and scope will restore and skip pre-stage
+./bin/agentcli \
+  -prompt "Summarize the repo" \
+  -tools ./tools.json \
+  -state-dir "$PWD/.agent-state"
+```
+
+Dry-run to see intended actions without touching disk:
+
+```bash
+./bin/agentcli -prompt "Say ok" -state-dir "$PWD/.agent-state" -dry-run
+```
+
+Refine an existing bundle using inline text:
+
+```bash
+./bin/agentcli \
+  -prompt "Summarize the repo" \
+  -state-dir "$PWD/.agent-state" \
+  -state-refine \
+  -state-refine-text "Tighten temperature to 0.2 and emphasize security notes"
+```
+
+Use a custom scope to keep contexts separate:
+
+```bash
+./bin/agentcli -prompt "Say ok" -state-dir "$PWD/.agent-state" -state-scope "docs-demo"
+```
+
+## Security notes
+
+- Use a private directory owned by the current user. The CLI rejects world-writable or non-owned directories.
+- Files are written `0600` and the directory is `0700`. Back up or copy with care.
+- Keys and secrets are redacted upstream; raw request/response bodies are not stored by default.
+
+## References
+
+- ADR‑0011: State bundle schema (`docs/adr/0011-state-bundle-schema.md`)
+- CLI flags reference (`docs/reference/cli-reference.md`)
\ No newline at end of file
diff --git a/docs/architecture/module-boundaries.md b/docs/architecture/module-boundaries.md
new file mode 100644
index 0000000..eb2bbaa
--- /dev/null
+++ b/docs/architecture/module-boundaries.md
@@ -0,0 +1,79 @@
+## Architecture: Module Boundaries and Allowed Imports
+
+This repository follows a strict layering model to keep dependencies clear, avoid cycles, and enable fast, deterministic testing.
+
+### Layers and allowed imports
+- `cmd/agentcli`
+  - Entry point only. Wires flags, configuration, and calls into `internal/*` packages.
+  - Allowed imports: standard library, `internal/oai`, `internal/tools` (runner/manifest), and other leaf `internal/*` utilities.
+  - Not allowed: importing anything under `tools/` (those are standalone binaries), or third‑party HTTP/tool wrappers beyond what `internal/*` exposes.
+
+- `internal/oai`
+  - OpenAI‑compatible API client and request/response types.
+  - Allowed imports: standard library only (e.g., `net/http`, `encoding/json`, `context`).
+  - Not allowed: importing `cmd/` or any `tools/` binaries. Keep independent from tool execution.
+
+- `internal/tools`
+  - Tool manifest loader and secure runner that executes external tool binaries via argv (no shell).
+  - Allowed imports: standard library only, plus other small `internal/*` helpers if introduced.
+  - Not allowed: importing `cmd/` or `tools/` source code. Communicates with tools solely via argv + JSON stdin/stdout.
+
+- `tools/cmd/*` (tool sources) and `tools/bin/*` (built binaries)
+  - Each tool's source lives under `tools/cmd/<name>/<name>.go` and builds to a standalone binary at `tools/bin/<name>` (or `tools/bin/<name>.exe` on Windows).
+  - Allowed imports: standard library only.
+  - Not allowed: importing from `internal/*` or `cmd/`. Tools are process‑isolated and communicate via JSON contracts over stdin/stdout.
+
+Rationale: The CLI (`cmd/agentcli`) depends on `internal/*`, which depend only on the standard library. The `tools/*` binaries are leaf executables with no reverse imports, ensuring the agent can evolve independently from tool implementations and vice versa.
+
+### Module relationships
+```mermaid
+flowchart TD
+  subgraph CLI
+    CMD[cmd/agentcli]
+  end
+  subgraph Internal
+    OAI[internal/oai]
+    TOOLS[internal/tools]
+  end
+  subgraph ToolBinaries
+    TEXEC[tools/bin/exec]
+    TREAD[tools/bin/fs_read_file]
+    TWRITE[tools/bin/fs_write_file]
+    TAPPEND[tools/bin/fs_append_file]
+    TMKDIRP[tools/bin/fs_mkdirp]
+    TRM[tools/bin/fs_rm]
+    TMOVE[tools/bin/fs_move]
+  end
+
+  CMD --> OAI
+  CMD --> TOOLS
+  TOOLS -. argv+JSON .-> TEXEC
+  TOOLS -. argv+JSON .-> TREAD
+  TOOLS -. argv+JSON .-> TWRITE
+  TOOLS -. argv+JSON .-> TAPPEND
+  TOOLS -. argv+JSON .-> TMKDIRP
+  TOOLS -. argv+JSON .-> TRM
+  TOOLS -. argv+JSON .-> TMOVE
+
+  classDef solid fill:#eef,stroke:#55f,stroke-width:1px;
+  classDef tools fill:#efe,stroke:#5a5,stroke-width:1px;
+  class CMD,OAI,TOOLS solid;
+  class TEXEC,TREAD,TWRITE,TAPPEND,TMKDIRP,TRM,TMOVE tools;
+```
+
+### Guidance for adding a new package
+- New internal package under `internal/<name>`:
+  - Keep dependencies to the standard library and other `internal/*` leaf utilities.
+  - Do not import from `cmd/` or `tools/`.
+  - Provide small, explicit exported APIs with clear documentation and unit tests.
+- New tool under `tools/cmd/<name>/<name>.go`:
+  - Independent `main` that reads a single JSON object from stdin and writes a single‑line JSON result (or error) to stdout/stderr.
+  - No imports from `internal/*` or `cmd/`. Use only the standard library.
+  - Add focused unit tests in `tools/cmd/<name>/<name>_test.go` that build and run the tool as a subprocess.
+- Build rules:
+  - Ensure `make build-tools` (or `go build -o tools/bin/<name> ./tools/cmd/<name>`) produces a reproducible static binary (with `.exe` suffix on Windows).
+- Contracts:
+  - Keep JSON schemas documented in `tools.json` and `README` examples. Validate inputs strictly and fail fast on contract violations.
+
+### Enforcement ideas (non-blocking)
+- Consider a lightweight import linter (e.g., staticcheck configuration or a custom script) in CI that rejects cross‑layer imports. This document is the normative policy; automation may follow.
diff --git a/docs/diagrams/agentcli-seq.md b/docs/diagrams/agentcli-seq.md
new file mode 100644
index 0000000..4cac95f
--- /dev/null
+++ b/docs/diagrams/agentcli-seq.md
@@ -0,0 +1,14 @@
+```mermaid
+sequenceDiagram
+    participant CLI as agentcli
+    participant API as OpenAI-compatible API
+    participant TOOL as Local tool (get_time)
+
+    CLI->>API: POST /v1/chat/completions [system,user,tools]
+    API-->>CLI: assistant tool_calls: get_time({"tz":"Europe/Helsinki"})
+    CLI->>TOOL: exec ./tools/bin/get_time stdin {tz}
+    TOOL-->>CLI: {"tz":"...","iso":"RFC3339","unix":<sec>}
+    CLI->>API: POST /v1/chat/completions [+ tool result]
+    API-->>CLI: assistant final content
+    CLI-->>CLI: print to stdout and exit 0
+```
\ No newline at end of file
diff --git a/docs/diagrams/harmony-prep-seq.md b/docs/diagrams/harmony-prep-seq.md
new file mode 100644
index 0000000..892ebaa
--- /dev/null
+++ b/docs/diagrams/harmony-prep-seq.md
@@ -0,0 +1,29 @@
+```mermaid
+sequenceDiagram
+    participant CLI as agentcli
+    participant API as OpenAI API
+    participant TOOLS as Pre-stage tools (in-process)
+
+    Note over CLI: Resolve prep config (-prep-*, env), compute cache key
+
+    CLI->>API: POST /v1/chat/completions [prep system/user + tools]
+    API-->>CLI: assistant tool_calls: [{name,args}, ...]
+
+    par Parallel tool calls (pre-stage)
+        CLI->>TOOLS: invoke built-in read-only tool (e.g., fs.read_file)
+        TOOLS-->>CLI: tool result JSON
+        CLI->>TOOLS: invoke built-in read-only tool (e.g., env.get)
+        TOOLS-->>CLI: tool result JSON
+    end
+
+    CLI->>CLI: ValidateMessageSequence(messages, stage:"prep")
+    CLI->>CLI: Audit NDJSON {stage:"prep", timings, idempotency_key}
+    CLI->>CLI: Merge roles (system/developer/user) per precedence
+
+    Note over CLI,API: Proceed to main call with merged messages
+
+    CLI->>API: POST /v1/chat/completions [final merged messages]
+    API-->>CLI: assistant final (streamed channel:"final"; others buffered)
+
+    CLI-->>CLI: Route channels; print final to stdout
+```
\ No newline at end of file
diff --git a/docs/diagrams/prestage_flow.md b/docs/diagrams/prestage_flow.md
new file mode 100644
index 0000000..baac90a
--- /dev/null
+++ b/docs/diagrams/prestage_flow.md
@@ -0,0 +1,24 @@
+# Pre-stage execution flow
+
+```mermaid
+sequenceDiagram
+    participant CLI as agentcli
+    participant API as OpenAI API
+    participant IMG as Image tool (optional)
+
+    Note over CLI: Parse flags/env → ResolvePrepPrompt()
+
+    CLI->>API: POST /v1/chat/completions (pre-stage)
+    API-->>CLI: assistant tool_calls
+
+    CLI->>CLI: ValidatePrestageHarmony()
+    CLI->>CLI: Merge pre-stage results into config
+
+    alt image_instructions present
+        CLI->>IMG: invoke image tool with instructions + options
+        IMG-->>CLI: result (url or b64_json)
+    end
+
+    CLI->>API: POST /v1/chat/completions (main)
+    API-->>CLI: assistant final
+```
diff --git a/docs/diagrams/research-pipeline.md b/docs/diagrams/research-pipeline.md
new file mode 100644
index 0000000..c1bb205
--- /dev/null
+++ b/docs/diagrams/research-pipeline.md
@@ -0,0 +1,25 @@
+# Research pipeline
+
+```mermaid
+flowchart LR
+  A[agentcli] --> B[tool_calls]
+  B --> C[searxng_search]
+  C --> D[http_fetch]
+  D --> E{content type}
+  E -->|HTML| F[readability_extract]
+  E -->|PDF| G[pdf_extract]
+  E -->|Feed| H[rss_fetch]
+  F --> I[metadata_extract]
+  C --> J[wiki_query]
+  C --> K[openalex_search]
+  C --> L[crossref_search]
+  C --> M[github_search]
+  I --> N[dedupe_rank]
+  G --> N
+  H --> N
+  J --> N
+  K --> N
+  L --> N
+  N --> O[citation_pack]
+  O --> P[assistant(final)]
+```
diff --git a/docs/diagrams/toolbelt-seq.md b/docs/diagrams/toolbelt-seq.md
new file mode 100644
index 0000000..ad2a67f
--- /dev/null
+++ b/docs/diagrams/toolbelt-seq.md
@@ -0,0 +1,17 @@
+```mermaid
+sequenceDiagram
+    participant CLI as agentcli
+    participant API as OpenAI-compatible API
+    participant IMG as Tool (img_create)
+    participant IMGAPI as Images API
+
+    CLI->>API: POST /v1/chat/completions [system,user,tools]
+    API-->>CLI: assistant tool_calls: img_create({prompt,n,size,save:{dir,basename,ext}})
+    CLI->>IMG: exec ./tools/bin/img_create stdin JSON
+    IMG->>IMGAPI: POST /v1/images/generations {"model","prompt","n","size","response_format":"b64_json"}
+    IMGAPI-->>IMG: {b64_json}
+    IMG-->>CLI: {"saved":[{"path":"assets/img_001.png","bytes":95,"sha256":"..."}],"n":1,"size":"1024x1024","model":"gpt-image-1"}
+    CLI->>API: POST /v1/chat/completions [+ tool result]
+    API-->>CLI: assistant final content (summarizes saved file path)
+    CLI-->>CLI: print to stdout and exit 0
+```
\ No newline at end of file
diff --git a/docs/harmony-prestage.md b/docs/harmony-prestage.md
new file mode 100644
index 0000000..494a92b
--- /dev/null
+++ b/docs/harmony-prestage.md
@@ -0,0 +1,39 @@
+# Pre-stage Harmony Output Contract
+
+This document specifies the required shape of Harmony messages returned by the pre-stage step. The goal is deterministic merging into the main call while preventing tool execution or assistant chatter at this stage.
+
+## Contract
+
+- Messages MUST be a JSON array of objects with fields: `role` and `content` (optional additional fields from the common schema are permitted but constrained as below).
+- Allowed roles: `system`, `developer`.
+- Disallowed roles: `user`, `assistant`, `tool`.
+- `tool_calls` MUST NOT appear on any message.
+- `tool_call_id` MUST NOT appear.
+- `channel` MUST be omitted or empty.
+
+Examples (valid):
+
+```json
+[
+  {"role":"system","content":"You are a careful planning assistant."},
+  {"role":"developer","content":"Provide 3 bullet goals and tool hints."}
+]
+```
+
+Examples (invalid):
+
+```json
+[{"role":"assistant","content":"done"}]
+```
+
+```json
+[{"role":"system","tool_calls":[{"id":"x","type":"function","function":{"name":"foo"}}]}]
+```
+
+## Validator
+
+The function `internal/oai/ValidatePrestageHarmony` enforces the above. Unit tests cover allowed roles and rejection cases for assistant/user/tool roles, presence of `tool_calls`, and stray `tool_call_id`.
+
+## Rationale
+
+Pre-stage is for shaping prompts and configuration, not executing tools or producing end-user visible content. Restricting roles ensures deterministic merge and predictable routing.
diff --git a/docs/interfaces/code.sandbox.js.run.md b/docs/interfaces/code.sandbox.js.run.md
new file mode 100644
index 0000000..1afbed4
--- /dev/null
+++ b/docs/interfaces/code.sandbox.js.run.md
@@ -0,0 +1,97 @@
+## Interface: code.sandbox.js.run
+
+The JavaScript sandbox executes a short snippet with a strict deny-by-default security model and bounded resource usage. It is intended for tiny, deterministic computations on assistant-provided inputs without ambient access to the host environment.
+
+- Purpose: run isolated JS with no filesystem, network, timers, or console; only minimal host bindings are exposed.
+- Security: deny-by-default; only `emit` and `read_input` are available. No `require`, no `console`, no timers, no Promise scheduling. Treat untrusted code as hostile; limits are enforced best-effort.
+- Limits: wall-clock timeout and output size cap; output is truncated when exceeding the cap and an `OUTPUT_LIMIT` error is returned.
+
+### JSON contract
+
+- stdin (object):
+  - `source` (string, required): JavaScript source code to evaluate.
+  - `input` (string, required): Opaque input made available to the script via `read_input()`.
+  - `limits` (object, required):
+    - `wall_ms` (int, optional): Maximum wall-clock time in milliseconds. Default 1000 ms.
+    - `output_kb` (int, optional): Maximum output size in KiB before truncation. Default 64 KiB.
+
+- stdout (on success):
+```json
+{"output":"<string>"}
+```
+
+- stderr (on failure): single-line JSON with a stable error code:
+```json
+{"code":"EVAL_ERROR","message":"<details>"}
+{"code":"TIMEOUT","message":"execution exceeded <ms> ms"}
+{"code":"OUTPUT_LIMIT","message":"output exceeded <KB> KB"}
+```
+
+### Host bindings available inside the VM
+
+- `read_input(): string` — returns the provided `input` string.
+- `emit(s: string): void` — appends `s` to the output buffer. When the buffer reaches `output_kb`, the VM aborts with `OUTPUT_LIMIT` after returning truncated stdout.
+
+All other globals are intentionally undefined (e.g., `typeof require === 'undefined'`, `typeof console === 'undefined'`, `typeof setTimeout === 'undefined'`).
+
+### Examples
+
+- Echo input:
+```json
+{
+  "source": "emit(read_input())",
+  "input": "hello",
+  "limits": {"output_kb": 4}
+}
+```
+Expected stdout:
+```json
+{"output":"hello"}
+```
+
+- Output limit with truncation and error:
+```json
+{
+  "source": "emit(read_input())",
+  "input": "<1500 x 'a'>",
+  "limits": {"output_kb": 1}
+}
+```
+Expected behavior: stdout contains 1024 bytes of `"a"`; stderr is `{"code":"OUTPUT_LIMIT",...}` and the process exits non‑zero.
+
+- Malicious loop (timeout):
+```json
+{
+  "source": "for(;;) {}",
+  "input": "",
+  "limits": {"wall_ms": 100}
+}
+```
+Expected behavior: process is interrupted within ~100ms with `stderr` `{"code":"TIMEOUT",...}` and non‑zero exit; stdout is empty.
+
+### Quick verification via CLI (local repository)
+
+You can verify the interface using the existing unit tests:
+```bash
+# Run a subset of tests for the sandbox
+go test ./internal/tools/jsrun -run 'TestRun_EmitReadInput_Succeeds|TestRun_OutputLimit_TruncatesAndErrors|TestRun_Timeout_Interrupts' -v
+```
+These tests cover happy-path echo, output truncation, and timeout interruption.
+
+### Security Model
+
+- Deny-by-default capabilities: the VM exposes only `emit` and `read_input`; there is no filesystem, network, clock, process, or environment access.
+- No timers/async: `setTimeout`, `setInterval`, Promises scheduling, and microtask queues are unavailable by default.
+- Deterministic budget: wall-time and output-size limits enforce bounded execution; long-running or unbounded loops will be interrupted.
+- Secrets hygiene: do not include secrets in `source` or `input`; error logs may contain minimal metadata necessary for troubleshooting.
+
+### Pitfalls
+
+- Large computations or accidental loops may hit the `wall_ms` timeout.
+- Emitting excessive data triggers `OUTPUT_LIMIT` with truncated output and a non-zero exit.
+
+### Status
+
+- Implementation: `internal/tools/jsrun/handler.go`
+- Tests: `internal/tools/jsrun/handler_test.go`
+- Consumers: intended for future internal tool wiring; not exposed as an external tool binary at this time.
diff --git a/docs/migrations/tools-layout.md b/docs/migrations/tools-layout.md
new file mode 100644
index 0000000..93eb5e7
--- /dev/null
+++ b/docs/migrations/tools-layout.md
@@ -0,0 +1,49 @@
+# Tools layout migration: from legacy `tools/*` to canonical `tools/cmd/*` + `tools/bin/*`
+
+This guide explains the ongoing migration that standardizes tool source and binary locations.
+
+## Why migrate
+- Consistent, unique binary names under `tools/bin/NAME` simplify manifests and docs
+- Clear source location under `tools/cmd/NAME/*.go` enables per‑tool packages and tests
+- Hygiene checks in `Makefile` enforce paths to avoid drift
+
+## Canonical layout
+- Source: `tools/cmd/NAME/*.go`
+- Binary (Unix/macOS): `tools/bin/NAME`
+- Binary (Windows): `tools/bin/NAME.exe`
+
+## Building tools
+- All tools: `make build-tools`
+- Single tool: `make build-tool NAME=<name>`
+
+Binaries are emitted to `tools/bin/` regardless of OS. On Windows, a `.exe` suffix is added automatically when `GOOS=windows`.
+
+## Updating tools.json
+- Relative `command[0]` must start with `./tools/bin/NAME` (use `.exe` on Windows)
+- Absolute paths are allowed in tests only
+
+Examples:
+```json
+{"command":["./tools/bin/fs_read_file"]}
+```
+```json
+{"command":["./tools/bin/fs_read_file.exe"]}
+```
+
+## Test helpers
+Use `tools/testutil/buildtool.go::BuildTool(t, "NAME")` to compile a tool from `tools/cmd/NAME` into a temp dir in tests.
+
+## Migration steps (summary)
+1. Emit binaries under `tools/bin/` while keeping legacy sources building
+2. Update `tools.json` to reference `./tools/bin/NAME`
+3. Move sources into `tools/cmd/NAME/NAME.go` with matching tests
+4. Delete legacy sources under `tools/*` once all tools are migrated
+5. Tighten `.gitignore` to ignore only `tools/bin/**`
+6. Keep docs/examples using `./tools/bin/NAME` exclusively
+
+## Lint and path hygiene
+- `make check-tools-paths` fails on legacy invocations or sources outside canonical layout
+- `make verify-manifest-paths` validates `tools.json` commands
+
+## Troubleshooting
+See `docs/runbooks/troubleshooting.md` for Windows examples and path validation errors.
diff --git a/docs/operations/ci-quality-gates.md b/docs/operations/ci-quality-gates.md
new file mode 100644
index 0000000..7e2c8c7
--- /dev/null
+++ b/docs/operations/ci-quality-gates.md
@@ -0,0 +1,15 @@
+# CI quality gates
+
+This repository enforces reproducible builds, formatting, lint, static analysis, tests, and path hygiene via the Makefile and GitHub Actions.
+
+- Timeouts:
+  - HTTP requests use `-http-timeout` (can be set via `OAI_HTTP_TIMEOUT` in CI). Keep between 60–120s unless tests require less.
+  - Tools use `-tool-timeout`, with per-tool `timeoutSec` in `tools.json` taking precedence.
+- Lint:
+  - `make lint` runs the Go toolchain version gate first via `make check-go-version`, then executes a pinned `golangci-lint` from `./bin` (auto-installed by `make install-golangci` if missing), followed by vet, formatting checks, and path hygiene checks (`make check-tools-paths` and `make verify-manifest-paths`).
+  - Expected log excerpt when versions match: `check-go-version: OK (system X.Y matches go.mod X.Y)`.
+  - To install or upgrade the pinned linter manually: `make install-golangci`.
+- Tests: `go test ./...` run offline with fakes; integration tests exercise tool invocation end-to-end.
+- Reproducible builds: `make build build-tools` with `-trimpath` and stripped ldflags; artifacts ignored by Git.
+
+In test scenarios that validate timeouts, prefer small values and deterministic sleeps in fakes to keep CI fast. Ensure such tests are isolated and do not introduce flakiness.
diff --git a/docs/reference/citation_pack.md b/docs/reference/citation_pack.md
new file mode 100644
index 0000000..5e4517b
--- /dev/null
+++ b/docs/reference/citation_pack.md
@@ -0,0 +1,57 @@
+# citation_pack
+
+Normalize citation metadata and optionally attach a Wayback archive URL.
+
+## Stdin schema
+
+```json
+{
+  "doc": {
+    "title": "string?",
+    "url": "string",
+    "published_at": "string?"
+  },
+  "archive": {
+    "wayback": "boolean?"
+  }
+}
+```
+
+## Stdout schema
+
+```json
+{
+  "title": "string?",
+  "url": "string",
+  "host": "string",
+  "accessed_at": "string",
+  "archive_url": "string?"
+}
+```
+
+- "accessed_at" is an RFC3339 UTC timestamp of when the pack was created.
+- When `archive.wayback` is true, the tool queries a Wayback-compatible endpoint for an existing snapshot and includes its URL if available.
+
+## Environment
+
+- `WAYBACK_BASE_URL` (optional): Base URL for Wayback API (defaults to `https://web.archive.org`).
+
+## Exit codes
+
+- 0: success
+- non-zero: error; stderr contains a single-line JSON `{ "error": "..." }`.
+
+## Examples
+
+- Minimal normalization:
+
+```bash
+echo '{"doc":{"url":"https://example.com/post"}}' | ./tools/bin/citation_pack | jq .
+```
+
+- Include Wayback lookup (using a local test server):
+
+```bash
+export WAYBACK_BASE_URL="http://localhost:8080"
+echo '{"doc":{"url":"https://example.com/post"},"archive":{"wayback":true}}' | ./tools/bin/citation_pack | jq .
+```
diff --git a/docs/reference/cli-reference.md b/docs/reference/cli-reference.md
new file mode 100644
index 0000000..d9054f9
--- /dev/null
+++ b/docs/reference/cli-reference.md
@@ -0,0 +1,126 @@
+# agentcli CLI reference
+
+A concise, canonical reference for `agentcli` flags and behavior. Flags are order-insensitive; precedence is flag > environment > default.
+
+## Flags
+
+- `-prompt string`: User prompt (required)
+- `-prompt-file string`: Path to file containing user prompt ('-' for STDIN; mutually exclusive with `-prompt`)
+- `-tools string`: Path to tools.json (optional)
+- `-system string`: System prompt (default "You are a helpful, precise assistant. Use tools when strictly helpful.")
+- `-system-file string`: Path to file containing system prompt ('-' for STDIN; mutually exclusive with `-system`)
+- `-developer string`: Developer message (repeatable)
+- `-developer-file string`: Path to file containing developer message (repeatable; '-' for STDIN)
+- `-base-url string`: OpenAI-compatible base URL (env `OAI_BASE_URL`, default `https://api.openai.com/v1`)
+- `-api-key string`: API key if required (env `OAI_API_KEY`; falls back to `OPENAI_API_KEY`)
+- `-model string`: Model ID (env `OAI_MODEL`, default `oss-gpt-20b`)
+- `-max-steps int`: Maximum reasoning/tool steps (default 8)
+- `-http-timeout duration`: HTTP timeout for chat completions (env `OAI_HTTP_TIMEOUT`; falls back to `-timeout` if unset)
+- `-prep-http-timeout duration`: HTTP timeout for pre-stage (env `OAI_PREP_HTTP_TIMEOUT`; falls back to `-http-timeout` if unset)
+- `-http-retries int`: Number of retries for transient HTTP failures (timeouts, 429, 5xx) (default 2)
+- `-http-retry-backoff duration`: Base backoff between HTTP retry attempts (exponential) (default 300ms)
+- `-image-base-url string`: Image API base URL (env `OAI_IMAGE_BASE_URL`; inherits `-base-url` if unset)
+- `-image-model string`: Image model ID (env `OAI_IMAGE_MODEL`; default `gpt-image-1`)
+- `-image-api-key string`: Image API key (env `OAI_IMAGE_API_KEY`; inherits `-api-key` if unset; falls back to `OPENAI_API_KEY`)
+- `-image-http-timeout duration`: Image HTTP timeout (env `OAI_IMAGE_HTTP_TIMEOUT`; inherits `-http-timeout` if unset)
+- `-image-http-retries int`: Image HTTP retries (env `OAI_IMAGE_HTTP_RETRIES`; inherits `-http-retries` if unset)
+- `-image-http-retry-backoff duration`: Image HTTP retry backoff (env `OAI_IMAGE_HTTP_RETRY_BACKOFF`; inherits `-http-retry-backoff` if unset)
+- `-image-n int`: Number of images to generate (env `OAI_IMAGE_N`; default 1)
+- `-image-size string`: Image size WxH, e.g., 1024x1024 (env `OAI_IMAGE_SIZE`; default 1024x1024)
+- `-image-quality string`: Image quality: standard|hd (env `OAI_IMAGE_QUALITY`; default standard)
+- `-image-style string`: Image style: natural|vivid (env `OAI_IMAGE_STYLE`; default natural)
+- `-image-response-format string`: Image response format: url|b64_json (env `OAI_IMAGE_RESPONSE_FORMAT`; default url)
+- `-image-transparent-background`: Request transparent background when supported (env `OAI_IMAGE_TRANSPARENT_BACKGROUND`; default false)
+- `-tool-timeout duration`: Per-tool timeout (falls back to `-timeout` if unset)
+- `-timeout duration`: [DEPRECATED] Global timeout; prefer `-http-timeout` and `-tool-timeout` (default 30s)
+- `-temp float`: Sampling temperature (default 1.0; omitted for models that do not support it)
+- `-top-p float`: Nucleus sampling probability mass (conflicts with `-temp`; when set, temperature is omitted per one‑knob rule and `top_p` is sent)
+- `-prep-temp float`: Pre-stage sampling temperature (env `OAI_PREP_TEMP`; inherits `-temp` if unset; conflicts with `-prep-top-p`)
+- `-prep-top-p float`: Pre-stage nucleus sampling probability mass (env `OAI_PREP_TOP_P`; conflicts with `-prep-temp`; when set, pre-stage omits temperature and sends `top_p`)
+- `-prep-system string`: Pre-stage system message (env `OAI_PREP_SYSTEM`; mutually exclusive with `-prep-system-file`)
+- `-prep-system-file string`: Path to file containing pre-stage system message ('-' for STDIN; env `OAI_PREP_SYSTEM_FILE`; mutually exclusive with `-prep-system`)
+- `-prep-profile string`: Pre-stage prompt profile (`deterministic|general|creative|reasoning`); sets temperature when supported (conflicts with `-prep-top-p`)
+- `-prep-model string`: Pre-stage model ID (env `OAI_PREP_MODEL`; inherits `-model` if unset)
+- `-prep-base-url string`: Pre-stage base URL (env `OAI_PREP_BASE_URL`; inherits `-base-url` if unset)
+- `-prep-api-key string`: Pre-stage API key (env `OAI_PREP_API_KEY`; falls back to `OAI_API_KEY`/`OPENAI_API_KEY`; inherits `-api-key` if unset)
+- `-prep-http-retries int`: Pre-stage HTTP retries (env `OAI_PREP_HTTP_RETRIES`; inherits `-http-retries` if unset)
+- `-prep-http-retry-backoff duration`: Pre-stage HTTP retry backoff (env `OAI_PREP_HTTP_RETRY_BACKOFF`; inherits `-http-retry-backoff` if unset)
+- `-prep-cache-bust`: Skip pre-stage cache and force recompute
+- `-prep-dry-run`: Run pre-stage only, print refined Harmony messages to stdout, and exit 0
+- `-state-dir string`: Directory to persist and restore execution state across runs (env `AGENTCLI_STATE_DIR`)
+- `-state-scope string`: Optional scope key to partition saved state (env `AGENTCLI_STATE_SCOPE`); when empty, a default hash of model|base_url|toolset is used
+- `-state-refine`: Refine the loaded state bundle using `-state-refine-text` or `-state-refine-file` (requires `-state-dir`)
+- `-state-refine-text string`: Refinement input text to apply to the loaded state bundle (ignored when `-state-refine-file` is set; requires `-state-dir`)
+- `-state-refine-file string`: Path to file containing refinement input (wins over `-state-refine-text`; requires `-state-dir`)
+- `-print-messages`: Pretty-print the final merged message array to stderr before the main call
+- `-stream-final`: If server supports streaming, stream only `assistant{channel:"final"}` to stdout; buffer other channels for `-verbose`
+- `-channel-route name=stdout|stderr|omit`: Override default channel routing (`final→stdout`, `critic/confidence→stderr`); repeatable
+- `-save-messages string`: Write the final merged Harmony messages to the given JSON file and continue
+- `-load-messages string`: Bypass pre-stage and prompt; load Harmony messages from the given JSON file (validator-checked)
+- `-prep-enabled`: Enable pre-stage processing (default true). When false, pre-stage is skipped and the agent proceeds directly with the original `{system,user}` messages.
+- `-debug`: Dump request/response JSON to stderr
+- `-verbose`: Also print non-final assistant channels (critic/confidence) to stderr
+- `-quiet`: Suppress non-final output; print only final text to stdout
+- `-prep-tools-allow-external`: Allow pre-stage to execute external tools from `-tools` (default false). When not set, pre-stage is limited to built-in read-only tools and ignores `-tools`.
+- `-prep-tools string`: Path to pre-stage tools.json (optional). Used only when `-prep-tools-allow-external` is enabled; if provided, the pre-stage uses this manifest instead of `-tools`.
+- `-capabilities`: Print enabled tools and exit
+- `-print-config`: Print resolved config and exit
+- `-dry-run`: Print intended state actions (restore/refine/save) and exit without writing state
+- `--version | -version`: Print version and exit
+
+## Environment variables
+
+- `OAI_BASE_URL`: Base URL for chat completions API
+- `OAI_MODEL`: Default model ID
+- `OAI_API_KEY`: API key (canonical; CLI also accepts `OPENAI_API_KEY` as a fallback)
+- `OAI_HTTP_TIMEOUT`: HTTP timeout for chat requests (e.g., `90s`)
+- `OAI_PREP_HTTP_TIMEOUT`: HTTP timeout for pre-stage requests (e.g., `90s`); overrides inheritance from `-http-timeout`
+- `LLM_TEMPERATURE`: Temperature override when `-temp` is not provided (flag takes precedence)
+
+## Exit codes
+
+- `0`: Success, printed final assistant message or handled help/version
+- `1`: Operational error (HTTP failure, tool manifest issues, no final assistant content)
+- `2`: CLI misuse (e.g., missing `-prompt`)
+
+## Examples
+
+- Inline developer messages (repeatable) with an inline prompt:
+
+```bash
+./bin/agentcli -developer "Follow style guide X" -developer "Prefer JSON outputs" -prompt "Summarize the repo"
+```
+
+- Read system prompt from a file and user prompt from STDIN:
+
+```bash
+echo "What changed since last release?" | ./bin/agentcli -system-file ./system.txt -prompt-file -
+```
+
+- Mix developer files and inline developer messages; read one developer message from STDIN:
+
+```bash
+echo "Security MUST be documented" | ./bin/agentcli \
+  -developer-file ./dev/a.txt \
+  -developer-file - \
+  -developer "Add tests for every change" \
+  -prompt "Implement the CLI role flags"
+```
+
+## Notes
+
+- Temperature is sent only when supported by the selected model; otherwise it is omitted to avoid API errors. When `-top-p` is set, temperature is omitted, `top_p` is included, and a one-line warning is printed to stderr.
+- Tools are executed via argv only with JSON stdin/stdout and strict timeouts; no shell is used.
+- See ADR‑0005 for the pre-stage flow and channel routing details: [../adr/0005-harmony-pre-processing-and-channel-aware-output.md](../adr/0005-harmony-pre-processing-and-channel-aware-output.md).
+ - See ADR‑0005 for the pre-stage flow and channel routing details: [../adr/0005-harmony-pre-processing-and-channel-aware-output.md](../adr/0005-harmony-pre-processing-and-channel-aware-output.md). See also ADR‑0006 for the image generation tool rationale: [../adr/0006-image-generation-tool-img_create.md](../adr/0006-image-generation-tool-img_create.md).
+
+## Prompt profiles
+
+The following profiles map to sampling behaviors for convenience. Temperature is omitted when the target model does not support it.
+
+| Profile | Effect |
+|---|---|
+| deterministic | temperature = 0.1 (pre-stage via `-prep-profile deterministic`) |
+| general | temperature = 1.0 |
+| creative | temperature = 1.0 |
+| reasoning | temperature = 1.0 |
diff --git a/docs/reference/crossref_search.md b/docs/reference/crossref_search.md
new file mode 100644
index 0000000..b1fffe5
--- /dev/null
+++ b/docs/reference/crossref_search.md
@@ -0,0 +1,37 @@
+# Crossref search tool (crossref_search)
+
+Search DOI metadata via Crossref API.
+
+## Usage
+
+```bash
+printf '{"q":"golang","rows":5}' | ./tools/bin/crossref_search | jq
+```
+
+- Required env: `CROSSREF_MAILTO` (used in User-Agent and `mailto` parameter)
+- Optional env: `HTTP_TIMEOUT_MS`
+
+## Input
+
+- `q` (string): search query
+- `rows` (int, default 10, max 50): number of results
+
+## Output
+
+```json
+{
+  "results": [
+    {
+      "title": "...",
+      "doi": "...",
+      "issued": "YYYY[-MM[-DD]]",
+      "container": "Journal ...",
+      "title_short": "..."
+    }
+  ]
+}
+```
+
+## Notes
+- 8s default timeout; up to 5 redirects; SSRF guard blocks private/loopback unless `CROSSREF_ALLOW_LOCAL=1` for tests.
+- On HTTP 429, the tool exits non‑zero and prints a single‑line stderr JSON with `RATE_LIMITED`.
diff --git a/docs/reference/github_search.md b/docs/reference/github_search.md
new file mode 100644
index 0000000..d8f02c3
--- /dev/null
+++ b/docs/reference/github_search.md
@@ -0,0 +1,12 @@
+### github_search
+
+- stdin: `{ "q": string, "type": "repositories|code|issues|commits", "per_page?": 10 }`
+- stdout: `{ "results": [ ...minimal per type... ], "rate": { "remaining": int, "reset": int } }`
+- env: optional `GITHUB_TOKEN`
+- behavior: uses `Accept: application/vnd.github+json`, 8s timeout (override via `HTTP_TIMEOUT_MS`), 1 retry on 5xx, SSRF guard blocks private/loopback.
+- rate limit: if `X-RateLimit-Remaining` is `0`, exits non-zero with stderr JSON: `{"error":"RATE_LIMITED","hint":"use GITHUB_TOKEN"}`.
+
+Example (repositories):
+```bash
+echo '{"q":"language:go stars:>5000","type":"repositories"}' | ./tools/bin/github_search | jq '.results[0]'
+```
diff --git a/docs/reference/http_fetch.md b/docs/reference/http_fetch.md
new file mode 100644
index 0000000..a4fb98b
--- /dev/null
+++ b/docs/reference/http_fetch.md
@@ -0,0 +1,87 @@
+# HTTP fetch tool (http_fetch)
+
+Safe HTTP/HTTPS fetcher with hard byte caps, limited redirects, optional gzip decompression, and SSRF guard. The tool streams JSON over stdin/stdout; errors are single-line JSON on stderr with non-zero exit.
+
+## Contracts
+
+- Stdin: single JSON object
+- Stdout (success): single-line JSON object
+- Stderr (failure): single-line JSON `{ "error": "...", "hint?": "..." }` and non-zero exit
+
+### Parameters
+
+- `url` (string, required): http/https URL
+- `method` (string, optional): `GET` or `HEAD` (default `GET`)
+- `max_bytes` (int, optional): hard byte cap for response body (default 1048576)
+- `timeout_ms` (int, optional): request timeout in milliseconds (default 10000; falls back to `HTTP_TIMEOUT_MS` env if unset)
+- `decompress` (bool, optional): when true (default), enables transparent gzip decoding; when false, returns raw bytes
+
+### Output
+
+```
+{
+  "status": 200,
+  "headers": {"Content-Type": "text/plain; charset=utf-8", "ETag": "\"abc123\""},
+  "body_base64": "...",
+  "truncated": false
+}
+```
+
+### Example: GET
+
+Input to stdin:
+
+```json
+{"url": "https://example.org/robots.txt", "max_bytes": 65536}
+```
+
+### Example: HEAD
+
+Input to stdin:
+
+```json
+{"url": "https://example.org/", "method": "HEAD"}
+```
+
+## Behavior
+
+- Schemes: only `http` and `https` are allowed
+- Redirects: up to 5 redirects are followed; further redirects fail with `"too many redirects"`
+- Headers: response headers are returned as a simple string map; `ETag` and `Last-Modified` are preserved when present
+- Decompression: gzip decoding is enabled by default; set `decompress=false` to receive raw compressed bytes
+- Byte cap: responses are read with a strict byte cap; when exceeded, `truncated=true` and the body is cut at `max_bytes`
+- User-Agent: `agentcli-http-fetch/0.1`
+
+## Security (SSRF guard)
+
+- Blocks loopback, RFC1918, link-local, and IPv6 ULA destinations
+- Blocks `.onion` hosts
+- Redirect targets are re-validated
+- For tests/local-only usage, setting `HTTP_FETCH_ALLOW_LOCAL=1` disables the block
+
+## Environment
+
+- `HTTP_TIMEOUT_MS` (optional): default timeout in milliseconds when `timeout_ms` is unset
+
+## Audit
+
+On each run, an NDJSON line is appended under `.goagent/audit/YYYYMMDD.log` with fields:
+
+```
+{tool:"http_fetch",url_host,status,bytes,truncated,ms}
+```
+
+## Manifest
+
+Ensure an entry similar to the following exists in `tools.json`:
+
+```json
+{
+  "name": "http_fetch",
+  "description": "Safe HTTP/HTTPS fetcher with byte cap and redirects",
+  "schema": {"type": "object", "required": ["url"], "properties": {"url": {"type": "string"}, "method": {"type": "string", "enum": ["GET", "HEAD"]}, "max_bytes": {"type": "integer", "minimum": 1, "default": 1048576}, "timeout_ms": {"type": "integer", "minimum": 1, "default": 10000}, "decompress": {"type": "boolean", "default": true}}, "additionalProperties": false},
+  "command": ["./tools/bin/http_fetch"],
+  "timeoutSec": 15,
+  "envPassthrough": ["HTTP_TIMEOUT_MS"]
+}
+```
diff --git a/docs/reference/img_create.md b/docs/reference/img_create.md
new file mode 100644
index 0000000..ccc191d
--- /dev/null
+++ b/docs/reference/img_create.md
@@ -0,0 +1,136 @@
+# Image generation tool (img_create)
+
+See ADR‑0006 for design rationale and links: [../adr/0006-image-generation-tool-img_create.md](../adr/0006-image-generation-tool-img_create.md)
+
+Generate image(s) via an OpenAI‑compatible Images API and either save PNG files into your repository (default) or return base64 on demand. This tool is invoked by the agent as a function tool using JSON over stdin/stdout with strict timeouts and no shell.
+
+## Contracts
+
+- Stdin: single JSON object matching the parameters below
+- Stdout (success): single‑line JSON result
+- Stderr (failure): single‑line JSON object `{ "error": "...", "hint?": "..." }`, non‑zero exit
+
+### Example: save files (default)
+
+Input to stdin:
+
+```json
+{
+  "prompt": "tiny-pixel",
+  "n": 1,
+  "size": "1024x1024",
+  "save": {"dir": "assets", "basename": "img", "ext": "png"}
+}
+```
+
+Output on stdout (shape shown; paths and hashes will vary):
+
+```json
+{
+  "saved": [
+    {"path": "assets/img_001.png", "bytes": 95, "sha256": "<hex>"}
+  ],
+  "n": 1,
+  "size": "1024x1024",
+  "model": "gpt-image-1"
+}
+```
+
+### Example: return base64 instead of saving
+
+Input to stdin:
+
+```json
+{
+  "prompt": "tiny-pixel",
+  "n": 1,
+  "return_b64": true
+}
+```
+
+Output on stdout by default elides base64 for transcript hygiene:
+
+```json
+{
+  "images": [
+    {"b64": "", "hint": "b64 elided"}
+  ]
+}
+```
+
+Set `IMG_CREATE_DEBUG_B64=1` (or `DEBUG_B64=1`) to include base64 in stdout for debugging.
+
+## Parameters
+
+| Name        | Type      | Required | Default       | Constraints                               | Notes |
+|-------------|-----------|----------|---------------|-------------------------------------------|-------|
+| `prompt`    | string    | yes      | —             | non‑empty                                  | Text prompt for the image(s).
+| `n`         | integer   | no       | 1             | 1 ≤ n ≤ 4                                  | Number of images to generate.
+| `size`      | string    | no       | `1024x1024`   | regex `^\d{3,4}x\d{3,4}$`                 | Width x height in pixels.
+| `model`     | string    | no       | `gpt-image-1` | —                                         | Passed as‑is to the Images API.
+| `return_b64`| boolean   | no       | false         | —                                         | When true, returns base64 JSON instead of writing files.
+| `save.dir`  | string    | cond.    | —             | repo‑relative; must not escape repo root   | Required when `return_b64=false` (default).
+| `save.basename` | string| no       | `img`         | must not contain path separators           | Filename stem; tool appends `_<001..>.ext`.
+| `save.ext`  | string    | no       | `png`         | enum: `png`                                | Output format; currently PNG only.
+| `extras`    | object    | no       | —             | shallow map of string→primitive            | Optional pass-through for known keys like `background:"transparent"`; only primitives are allowed; core keys are not overridden.
+
+Notes:
+- When saving files, the tool writes atomically to `save.dir` and returns file metadata including SHA‑256.
+- Filenames are generated as `<basename>_NNN.<ext>` with zero‑padded indices starting at 001.
+
+## HTTP behavior
+
+- Endpoint: `POST ${OAI_IMAGE_BASE_URL:-$OAI_BASE_URL}/v1/images/generations`
+- Request body:
+  - `{ "model", "prompt", "n", "size", "response_format": "b64_json" }`
+- Headers:
+  - `Content-Type: application/json`
+  - `Authorization: Bearer $OAI_API_KEY` (if present)
+- Timeout: from `OAI_HTTP_TIMEOUT` (duration, default 120s)
+- Retries: up to 2 retries (3 total attempts) on timeouts, HTTP 429, and 5xx with backoff `250ms, 500ms, 1s`
+- Error mapping: non‑2xx responses attempt to extract a useful message from `{error}` or `{error:{message}}`; otherwise emit `api status <code>`
+
+## Environment
+
+- `OAI_IMAGE_BASE_URL`: Base URL for Images API (preferred)
+- `OAI_BASE_URL`: Fallback base URL when `OAI_IMAGE_BASE_URL` is unset
+- `OAI_API_KEY`: API key for authorization (optional for mocks)
+- `OAI_HTTP_TIMEOUT`: HTTP timeout (e.g., `90s`)
+- `IMG_CREATE_DEBUG_B64` / `DEBUG_B64`: When set truthy, include base64 in stdout for `return_b64=true`
+
+The manifest allowlist passes through only the following variables to the tool:
+
+```json
+["OAI_API_KEY", "OAI_BASE_URL", "OAI_IMAGE_BASE_URL", "OAI_HTTP_TIMEOUT"]
+```
+
+## Underlying API (cURL)
+
+For transparency, the tool issues the equivalent of:
+
+```bash
+curl -sS -X POST "$OAI_BASE_URL/v1/images/generations" \
+  -H "Authorization: Bearer $OAI_API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-image-1",
+    "prompt": "tiny-pixel",
+    "n": 1,
+    "size": "1024x1024",
+    "response_format": "b64_json"
+  }'
+```
+
+When `OAI_IMAGE_BASE_URL` is set, it is used instead of `OAI_BASE_URL`.
+
+## Safety notes
+
+- Strict repository‑relative writes: `save.dir` must be within the repository; absolute paths and `..` escapes are rejected.
+- No shell execution: the tool is executed via argv only; stdin/stdout are JSON.
+- Transcript hygiene: by default, base64 is elided from stdout to prevent large transcripts. Enable debug envs to view base64 locally.
+
+## Related documentation
+
+- Images & vision guide: [OpenAI Images docs](https://platform.openai.com/docs/guides/images)
+- Model reference: [OpenAI model catalog (gpt-image-1)](https://platform.openai.com/docs/models)
+- Tools manifest reference: see `docs/reference/tools-manifest.md`
diff --git a/docs/reference/metadata_extract.md b/docs/reference/metadata_extract.md
new file mode 100644
index 0000000..7fc7ed1
--- /dev/null
+++ b/docs/reference/metadata_extract.md
@@ -0,0 +1,24 @@
+# metadata_extract
+
+Extract OpenGraph, Twitter card, and JSON-LD metadata from HTML.
+
+- stdin JSON:
+
+```json
+{"html":"<html>...</html>","base_url":"https://example.org/page"}
+```
+
+- stdout JSON:
+
+```json
+{"opengraph":{...},"twitter":{...},"jsonld":[ ... ]}
+```
+
+- exit codes: 0 success; non-zero with stderr JSON `{ "error": "..." }` on failure.
+
+## Examples
+
+```bash
+html='<!doctype html><html><head><meta property="og:title" content="T"><meta name="twitter:card" content="summary"><script type="application/ld+json">{"@context":"https://schema.org","@type":"Article","headline":"H"}</script></head><body></body></html>'
+printf '{"html":'%s',"base_url":"https://example.org/x"}' "$html" | ./tools/bin/metadata_extract | jq
+```
diff --git a/docs/reference/openalex_search.md b/docs/reference/openalex_search.md
new file mode 100644
index 0000000..8cf5226
--- /dev/null
+++ b/docs/reference/openalex_search.md
@@ -0,0 +1,15 @@
+# OpenAlex search tool (openalex_search)
+
+Search scholarly works via the OpenAlex API.
+
+- Stdin JSON: {"q":string,"from?":string,"to?":string,"per_page?":int<=50}
+- Stdout JSON: {"results":[{"title":string,"doi?":string,"publication_year":int,"open_access_url?":string,"authorships":[...] ,"cited_by_count":int}],"next_cursor?":string}
+- Env: OPENALEX_BASE_URL (optional, default https://api.openalex.org), HTTP_TIMEOUT_MS (optional)
+- Retries: up to 1 on timeout or 5xx
+- SSRF guard: blocks loopback/RFC1918/link-local/ULA and .onion
+
+Example:
+
+```bash
+printf '{"q":"golang","per_page":5}' | ./tools/bin/openalex_search | jq
+```
diff --git a/docs/reference/pdf_extract.md b/docs/reference/pdf_extract.md
new file mode 100644
index 0000000..ef8487b
--- /dev/null
+++ b/docs/reference/pdf_extract.md
@@ -0,0 +1,26 @@
+# pdf_extract
+
+Extract text from PDF pages with optional OCR via `tesseract`.
+
+- stdin JSON:
+
+```json
+{"pdf_base64":"...","pages":[0,2,5]}
+```
+
+- stdout JSON:
+
+```json
+{"page_count":3,"pages":[{"index":0,"text":"..."}]}
+```
+
+- environment:
+- `ENABLE_OCR`: when truthy (1/true/yes), attempts OCR for pages with no extracted text. If `tesseract` is missing, the tool exits non-zero with stderr JSON `{ "error": "OCR_UNAVAILABLE" }`.
+
+- exit codes: 0 success; non-zero with stderr JSON `{ "error": "..." }` on failure.
+
+## Examples
+
+```bash
+echo '{"pdf_base64":"'$(base64 -w0 sample.pdf)'"}' | ./tools/bin/pdf_extract | jq .page_count
+```
diff --git a/docs/reference/readability_extract.md b/docs/reference/readability_extract.md
new file mode 100644
index 0000000..b50c50a
--- /dev/null
+++ b/docs/reference/readability_extract.md
@@ -0,0 +1,23 @@
+# readability_extract
+
+Extract article content from HTML using go-readability.
+
+- stdin JSON:
+
+```json
+{"html": "<html>...</html>", "base_url": "https://example.org/page"}
+```
+
+- stdout JSON:
+
+```json
+{"title":"...","byline":"...","text":"...","content_html":"<p>...</p>","length":1234}
+```
+
+- exit codes: 0 success; non-zero with stderr JSON `{ "error": "..." }` on failure.
+
+## Examples
+
+```bash
+echo '{"html":"<html><body><article><h1>T</h1><p>Hi</p></article></body></html>","base_url":"https://example.org/x"}' | ./tools/bin/readability_extract | jq .title
+```
diff --git a/docs/reference/research-tools.md b/docs/reference/research-tools.md
new file mode 100644
index 0000000..229f5c4
--- /dev/null
+++ b/docs/reference/research-tools.md
@@ -0,0 +1,165 @@
+# Research tools reference
+
+This page consolidates contracts and usage for the research-oriented tools. Each tool consumes a single JSON object on stdin and prints a single JSON object on stdout on success. On error, tools exit non‑zero and print a single-line JSON error to stderr: `{ "error": "...", "hint?": "..." }`.
+
+Notes
+- All networked tools implement an SSRF guard: they block loopback, RFC1918/ULA, link‑local, and `.onion` destinations. Redirect targets are re‑validated. Some tools allow an opt‑in env to permit local addresses in tests.
+- Timeouts apply per tool; see each section for defaults and retry rules.
+- Examples assume you have built tools via `make build-tools` and are running from the repo root.
+
+## searxng_search
+- Stdin: `{ "q": string, "time_range?": "day|week|month|year", "categories?": [string], "engines?": [string], "language?": string, "page?": int, "size?": int<=50 }`
+- Stdout: `{ "query": string, "results": [{"title": string, "url": string, "snippet": string, "engine": string, "published_at?": string}] }`
+- Env: `SEARXNG_BASE_URL` (required), `HTTP_TIMEOUT_MS` (optional)
+- Network: timeout 10s; redirects ≤5; retries up to 2 on timeout/429/5xx (observes `Retry-After`)
+- Security: SSRF guard; blocks local/private/ULA/`.onion`
+- Example:
+```bash
+export SEARXNG_BASE_URL=http://localhost:8888
+printf '{"q":"golang","size":3}' | ./tools/bin/searxng_search | jq .
+```
+
+## http_fetch
+- Stdin: `{ "url": string, "method?": "GET|HEAD", "max_bytes?": 1048576, "timeout_ms?": 10000, "decompress?": true }`
+- Stdout: `{ "status": int, "headers": object, "body_base64?": string, "truncated": bool }`
+- Env: `HTTP_TIMEOUT_MS` (optional)
+- Network: timeout default 10s; redirects ≤5
+- Security: SSRF guard; blocks local/private/ULA/`.onion`
+- Example:
+```bash
+printf '{"url":"https://example.org/robots.txt","max_bytes":65536}' | ./tools/bin/http_fetch | jq .status
+```
+
+## robots_check
+- Purpose: Evaluate `<origin>/robots.txt` for a target URL and user agent.
+- Stdin: `{ "url": string, "user_agent?": "agentcli" }`
+- Stdout: `{ "allowed": bool, "crawl_delay_ms?": int, "group_rules": [string] }`
+- Env: none (tests may set `ROBOTS_CHECK_ALLOW_LOCAL=1`)
+- Network: timeout 5s; redirects allowed only within the same origin (no cross‑origin)
+- Security: SSRF guard; DNS and IP checks block private/loopback
+- Example:
+```bash
+echo '{"url":"https://example.org/path"}' | ./tools/bin/robots_check | jq .
+```
+
+## readability_extract
+- Stdin: `{ "html": string, "base_url": string }` (≤5 MiB)
+- Stdout: `{ "title": string, "byline?": string, "text": string, "content_html": string, "length": int }`
+- Env: none
+- Example:
+```bash
+echo '{"html":"<html><body><article><h1>T</h1><p>Hi</p></article></body></html>","base_url":"https://example.org/x"}' | ./tools/bin/readability_extract | jq .title
+```
+
+## metadata_extract
+- Stdin: `{ "html": string, "base_url": string }`
+- Stdout: `{ "opengraph": object, "twitter": object, "jsonld": [any] }`
+- Env: none
+- Example:
+```bash
+html='<!doctype html><html><head><meta property="og:title" content="T"><meta name="twitter:card" content="summary"><script type="application/ld+json">{"@context":"https://schema.org","@type":"Article","headline":"H"}</script></head><body></body></html>'
+printf '{"html":%s,"base_url":"https://example.org/x"}' "$html" | ./tools/bin/metadata_extract | jq .opengraph.title
+```
+
+## pdf_extract
+- Stdin: `{ "pdf_base64": string, "pages?": [int] }` (≤20 MiB)
+- Stdout: `{ "page_count": int, "pages": [{"index": int, "text": string}] }`
+- Env: `ENABLE_OCR` enables OCR via `tesseract` if text is missing; if unavailable, exits non‑zero with `{"error":"OCR_UNAVAILABLE"}`
+- Example:
+```bash
+printf '{"pdf_base64":"'"$(base64 -w0 sample.pdf)"'"}' | ./tools/bin/pdf_extract | jq .page_count
+```
+
+## rss_fetch
+- Stdin: `{ "url": string, "if_modified_since?": string }`
+- Stdout: `{ "feed": {"title": string, "link": string}, "items": [{"title": string, "url": string, "published_at?": string, "summary?": string}] }`
+- Env: none
+- Network: timeout 5s; redirects ≤5; SSRF guard
+- UA: `agentcli-rss-fetch/0.1`
+- Example:
+```bash
+echo '{"url":"https://example.com/feed.xml"}' | ./tools/bin/rss_fetch | jq .items[0]
+```
+
+## wayback_lookup
+- Stdin: `{ "url": string, "save?": false }`
+- Stdout: `{ "closest_url?": string, "timestamp?": string, "saved?": bool }`
+- Env: `WAYBACK_BASE_URL` (optional; default `https://web.archive.org`)
+- Network: timeout 3s; one retry on 5xx with jitter
+- Security: SSRF guard
+- Example:
+```bash
+jq -n '{url:"http://example.com", save:true}' | ./tools/bin/wayback_lookup | jq .
+```
+
+## wiki_query
+- Stdin: `{ "titles?": string, "search?": string, "language?": "en" }` (exactly one of `titles` or `search` required)
+- Stdout: `{ "pages": [{"title": string, "url": string, "extract": string}] }`
+- Env: optional `MEDIAWIKI_BASE_URL` to override, otherwise `https://{language}.wikipedia.org`
+- Network: timeout 5s
+- Security: SSRF guard (blocks local/private)
+- Example:
+```bash
+echo '{"search":"Golang","language":"en"}' | ./tools/bin/wiki_query | jq .pages[0]
+```
+
+## openalex_search
+- Stdin: `{ "q": string, "from?": string, "to?": string, "per_page?": int }`
+- Stdout: `{ "results": [{"title": string, "doi?": string, "publication_year": int, "open_access_url?": string, "authorships": [...], "cited_by_count": int}], "next_cursor?": string }`
+- Env: optional `OPENALEX_BASE_URL` (default `https://api.openalex.org`), `HTTP_TIMEOUT_MS`
+- Network: timeout 8s; retries 1 on 5xx/timeout; SSRF guard
+- Example:
+```bash
+printf '{"q":"golang","per_page":5}' | ./tools/bin/openalex_search | jq .results[0]
+```
+
+## crossref_search
+- Stdin: `{ "q": string, "rows?": int }`
+- Stdout: `{ "results": [{"title": string, "doi": string, "issued": string, "container": string, "title_short?": string}] }`
+- Env: required `CROSSREF_MAILTO`; optional `HTTP_TIMEOUT_MS`
+- Network: timeout 8s; redirects ≤5; SSRF guard
+- Rate limits: on 429, exits non‑zero with stderr JSON `{ "error": "RATE_LIMITED", "hint": "use GITHUB_TOKEN" }`
+- Example:
+```bash
+printf '{"q":"golang","rows":5}' | ./tools/bin/crossref_search | jq .results[0]
+```
+
+## github_search
+- Stdin: `{ "q": string, "type": "repositories|code|issues|commits", "per_page?": int }`
+- Stdout: `{ "results": [ ...minimal per type... ], "rate": { "remaining": int, "reset": int } }`
+- Env: optional `GITHUB_TOKEN`, `HTTP_TIMEOUT_MS`
+- Network: timeout 8s; retry 1 on 5xx; SSRF guard
+- Rate limits: if `X‑RateLimit‑Remaining` is `0`, exits non‑zero with stderr JSON `{ "error":"RATE_LIMITED", "hint":"use GITHUB_TOKEN" }`
+- Example:
+```bash
+echo '{"q":"language:go stars:>5000","type":"repositories"}' | ./tools/bin/github_search | jq .results[0]
+```
+
+## dedupe_rank
+- Stdin: `{ "docs": [{"id": string, "url?": string, "title?": string, "text?": string, "published_at?": string}] }`
+- Stdout: `{ "groups": [{"representative_id": string, "members": [string], "score": number}] }`
+- Env: optional `AUTHORITY_HINTS_JSON` (if supported) to bias ranking
+- Behavior: MinHash‑like 3‑shingle + TF‑IDF tie‑break; deterministic output
+- Example:
+```bash
+jq -n '{docs:[{id:"a",title:"Intro to Go"},{id:"b",title:"Go introduction"},{id:"c",title:"Rust book"}]}' | ./tools/bin/dedupe_rank | jq .
+```
+
+## citation_pack
+- Stdin: `{ "doc": {"title?": string, "url": string, "published_at?": string}, "archive?": {"wayback?": bool} }`
+- Stdout: `{ "title?": string, "url": string, "host": string, "accessed_at": string, "archive_url?": string }`
+- Env: optional `WAYBACK_BASE_URL`
+- Behavior: if `archive.wayback` is true, queries Wayback for an existing snapshot
+- Example:
+```bash
+echo '{"doc":{"url":"https://example.com/post"},"archive":{"wayback":true}}' | ./tools/bin/citation_pack | jq .
+```
+
+---
+
+Exit codes
+- 0: success; stdout contains a single JSON object
+- non‑zero: failure; stderr contains a single‑line JSON error with optional `hint`
+
+Security and SSRF
+- All network tools validate destinations and block private, loopback, link‑local, ULA, and `.onion` addresses. A few tools support a test‑only override via an env variable; see the tool’s section.
diff --git a/docs/reference/rss_fetch.md b/docs/reference/rss_fetch.md
new file mode 100644
index 0000000..bccf2d3
--- /dev/null
+++ b/docs/reference/rss_fetch.md
@@ -0,0 +1,38 @@
+# rss_fetch
+
+Fetch an RSS or Atom feed and return a normalized JSON structure.
+
+## Usage
+
+```bash
+echo '{"url":"https://example.com/feed.xml"}' | ./tools/bin/rss_fetch | jq
+```
+
+## Stdin schema
+
+```json
+{
+  "type": "object",
+  "properties": {
+    "url": {"type": "string"},
+    "if_modified_since": {"type": "string"}
+  },
+  "required": ["url"],
+  "additionalProperties": false
+}
+```
+
+## Stdout
+
+```json
+{
+  "feed": {"title": "...", "link": "..."},
+  "items": [
+    {"title": "...", "url": "...", "published_at": "...", "summary": "..."}
+  ]
+}
+```
+
+- Honors If-Modified-Since header; on 304 Not Modified returns an empty items array.
+- 5s timeout; up to 5 redirects with SSRF guard (blocks loopback/private/onion).
+- User-Agent: agentcli-rss-fetch/0.1
diff --git a/docs/reference/searxng_search.md b/docs/reference/searxng_search.md
new file mode 100644
index 0000000..742fd55
--- /dev/null
+++ b/docs/reference/searxng_search.md
@@ -0,0 +1,16 @@
+# SearXNG search tool (searxng_search)
+
+Run a web meta search via SearXNG's JSON API.
+
+- Stdin JSON: {"q":string,"time_range?":"day|week|month|year","categories?":[string],"engines?":[string],"language?":string,"page?":int,"size?":int<=50}
+- Stdout JSON: {"query":string,"results":[{"title":string,"url":string,"snippet":string,"engine":string,"published_at?":string}]}
+- Env: SEARXNG_BASE_URL (required), HTTP_TIMEOUT_MS (optional)
+- Retries: up to 2 on timeout, 429 (observes Retry-After), or 5xx
+- SSRF guard: blocks loopback/RFC1918/link-local/ULA and .onion
+
+Example:
+
+```bash
+export SEARXNG_BASE_URL=http://localhost:8888
+printf '{"q":"golang","size":3}' | ./tools/bin/searxng_search | jq
+```
diff --git a/docs/reference/tools-manifest.md b/docs/reference/tools-manifest.md
new file mode 100644
index 0000000..335936b
--- /dev/null
+++ b/docs/reference/tools-manifest.md
@@ -0,0 +1,93 @@
+# Tools Manifest Reference (tools.json)
+
+This page documents the `tools.json` schema consumed by `agentcli` and how it is translated into OpenAI-compatible tool definitions. It reflects the current implementation in `internal/tools/manifest.go` and the unit tests in `internal/tools/manifest_test.go`.
+
+## Schema
+
+Root object:
+```json
+{
+  "tools": [ ToolSpec, ... ]
+}
+```
+
+ToolSpec fields:
+- `name` (string, required): Unique tool name. Must be non-empty and unique across the manifest.
+- `description` (string, optional): Short human description.
+- `schema` (object, optional): JSON Schema for the tool parameters. This is passed through to the model as `parameters` in the OpenAI "function" tool.
+- `command` (array of string, required): Argv vector. First element is the program path (relative or absolute); subsequent elements are fixed args. When relative, it MUST start with `./tools/bin/NAME` (use `.exe` on Windows). Relative paths are resolved against the directory containing this `tools.json` (not the process working directory). The runner will execute this program and write the function call JSON arguments to stdin.
+- `timeoutSec` (integer, optional): Per-call timeout override in seconds. If omitted, the CLI's `-timeout` applies.
+- `envPassthrough` (array of string, optional): Allowlist of environment variable names to pass from the parent process to the tool. Names are normalized to uppercase and must match the regex `[A-Z_][A-Z0-9_]*`. Duplicates are removed preserving first occurrence. The runner always sets a minimal base environment (e.g., `PATH`, `HOME`) and augments it with only these keys if present in the parent. For observability, the audit log records only the names of keys passed (as `envKeys`), never their values.
+
+Notes:
+- Validation errors are precise and include the offending index/name.
+- `command` must have at least one element (the program).
+- Names must be unique (duplicates are rejected).
+
+## OpenAI tool mapping
+Each manifest entry is exported as an OpenAI tool of type `function`:
+```json
+{
+  "type": "function",
+  "function": {
+    "name": "<name>",
+    "description": "<description>",
+    "parameters": { /* schema as provided */ }
+  }
+}
+```
+
+## Minimal example
+```json
+{
+  "tools": [
+    {
+      "name": "get_time",
+      "description": "Get current time for an IANA timezone",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "timezone": {"type": "string", "description": "IANA timezone, e.g. Europe/Helsinki"},
+          "tz": {"type": "string", "description": "Alias for timezone (deprecated)"}
+        },
+        "required": ["timezone"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/get_time"],
+      "timeoutSec": 5,
+      "envPassthrough": ["TZ", "OAI_HTTP_TIMEOUT"]
+    }
+  ]
+}
+```
+
+On Windows, use the `.exe` suffix for the tool binary:
+
+```json
+{
+  "tools": [
+    {
+      "name": "get_time",
+      "schema": {"type":"object","properties":{"timezone":{"type":"string"}},"required":["timezone"],"additionalProperties":false},
+      "command": ["./tools/bin/get_time.exe"],
+      "timeoutSec": 5
+    }
+  ]
+}
+```
+
+## Common mistakes
+- Missing `name`: error `tool[i]: name is required`.
+- Duplicate `name`: error `tool[i] "<name>": duplicate name`.
+- Empty `command`: error `tool[i] "<name>": command must have at least program name`.
+- Relative `command[0]` not using the canonical bin prefix: error `tool[i] "<name>": relative command[0] must start with ./tools/bin/` (absolute paths are allowed for tests). This ensures tools are invoked from `./tools/bin/NAME` and are then resolved relative to the manifest directory.
+- Relative `command[0]` that normalizes to escape the tools bin directory (e.g., `./tools/bin/../hack`): error `tool[i] "<name>": command[0] escapes ./tools/bin after normalization (got "./tools/bin/../hack" -> "./tools/hack")`.
+- Invalid `envPassthrough` entry (e.g., `"OAI-API-KEY"` or `"1BAD"`): error `tool[i] "<name>": envPassthrough[j]: invalid name "..." (must match [A-Z_][A-Z0-9_]*)`.
+
+## Execution model
+- The assistant provides JSON arguments for the tool call. `agentcli` passes that JSON to the tool's stdin verbatim.
+- Tools must print a single-line JSON result to stdout. On failure, print a single-line JSON error to stderr and exit non-zero. The agent maps failures to `{"error":"..."}` content for the model.
+- Environment is scrubbed to a minimal allowlist (PATH, HOME) and optionally augmented by `envPassthrough`. No shell is invoked; commands are executed via argv.
+
+## Versioning
+This document describes the current stable behavior. Backward-incompatible changes will be documented in the changelog and ADRs.
diff --git a/docs/reference/wayback_lookup.md b/docs/reference/wayback_lookup.md
new file mode 100644
index 0000000..ae0dc13
--- /dev/null
+++ b/docs/reference/wayback_lookup.md
@@ -0,0 +1,42 @@
+# wayback_lookup
+
+Query the Internet Archive Wayback Machine for the closest snapshot of a URL, optionally triggering an archival save.
+
+## Usage
+
+```bash
+export WAYBACK_BASE_URL="https://web.archive.org"
+# Lookup closest snapshot
+jq -n '{url:"http://example.com"}' | ./tools/bin/wayback_lookup | jq .
+
+# Trigger save if not available
+jq -n '{url:"http://example.com", save:true}' | ./tools/bin/wayback_lookup | jq .
+```
+
+## Stdin schema
+
+```json
+{
+  "type": "object",
+  "properties": {
+    "url": {"type": "string"},
+    "save": {"type": "boolean", "default": false}
+  },
+  "required": ["url"],
+  "additionalProperties": false
+}
+```
+
+## Stdout
+
+```json
+{
+  "closest_url": "http://web.archive.org/web/20200101000000/http://example.com/",
+  "timestamp": "20200101000000",
+  "saved": true
+}
+```
+
+- 3s timeout, one retry on 5xx with small backoff.
+- SSRF guard blocks loopback/private/onion unless `WAYBACK_ALLOW_LOCAL=1` during tests.
+- User-Agent: inherits default Go; may be customized later.
diff --git a/docs/runbooks/linux-5.4-sandbox-compatibility.md b/docs/runbooks/linux-5.4-sandbox-compatibility.md
new file mode 100644
index 0000000..268fd01
--- /dev/null
+++ b/docs/runbooks/linux-5.4-sandbox-compatibility.md
@@ -0,0 +1,108 @@
+# Linux 5.4 sandbox compatibility and policy authoring
+
+This document explains compatibility constraints for Linux 5.4-era kernels, supported network modes and their caveats, how the execution bundle works (including inclusion of `agentcli`), how to author sandbox policies, and known limitations of rootless operation.
+
+## Why this matters
+
+Many servers and CI runners still use Linux 5.4 LTS. Certain modern sandboxing features like Landlock are unavailable, and user namespaces behave differently across distributions. This guide documents a safe, portable baseline.
+
+## Kernel constraints (Linux ≥ 5.4)
+
+- No Landlock: do not rely on Landlock file restrictions. Use chroot + bind mounts instead.
+- No overlayfs-in-userns: avoid overlayfs inside unprivileged user namespaces. Use bind mounts and tmpfs.
+- User namespaces: require unprivileged user namespaces enabled (`kernel.unprivileged_userns_clone=1`).
+- Seccomp: available and recommended for syscall filtering, but ensure your kernel config includes seccomp.
+- CLONE_NEWNET: unprivileged network namespaces may be disabled by distro; detect at runtime and fall back.
+
+### Troubleshooting kernel prerequisites
+
+- Enable user namespaces:
+  - Ubuntu/Debian: `sudo sysctl -w kernel.unprivileged_userns_clone=1 && echo kernel.unprivileged_userns_clone=1 | sudo tee /etc/sysctl.d/99-userns.conf`
+- Check seccomp: `grep SECCOMP /boot/config-$(uname -r)` should show `CONFIG_SECCOMP=y` and `CONFIG_SECCOMP_FILTER=y`.
+- Check newnet: attempt `unshare -n true`; if it fails with EPERM, network namespaces for unprivileged users are blocked.
+
+## Network modes
+
+- off: no network allowed. Prefer this for tools that do not need network access.
+- allow_all: do not create a network namespace; outbound egress allowed subject to host firewall. Less strict.
+- proxy_allowlist: route traffic via an HTTP(S) proxy that enforces destination allowlists. Use when selective egress is required on hosts without unprivileged `CLONE_NEWNET`.
+
+Caveats:
+- On systems without unprivileged network namespaces, `off` still works; `proxy_allowlist` requires a reachable proxy.
+- DNS resolution and time may vary by container/VM; prefer explicit IP:port and TLS verification where possible.
+
+## Bundle assembly overview
+
+On each run, `agentcli` assembles a minimal bundle directory with subdirs `bin`, `etc`, `in`, `out`, `tmp`. It copies exactly allowlisted tool executables and a copy of the current `agentcli` binary into `bin`. Only `/bundle/bin` is executable; other mounts are `noexec`.
+
+- Verify executables are regular files, reject symlinks.
+- Enforce ELF arch matches the running OS and architecture.
+- Cap total bundle size to prevent abuse.
+- Generate a manifest with file paths, sizes, and SHA-256 hashes.
+
+## Policy authoring
+
+A deny-by-default sandbox policy defines filesystem, environment, resources, and network:
+
+```json
+{
+  "filesystem": {
+    "bundle": { "binaries": [ "/usr/bin/jq" ] },
+    "inputs": [ { "host": "/etc/ssl/certs/ca-certificates.crt", "guest": "/etc/ssl/certs/ca.pem" } ],
+    "outputs": [ "/out/result.json" ]
+  },
+  "env": { "allow": [ "TZ", "HTTP_PROXY=http://127.0.0.1:8080" ] },
+  "resources": { "timeoutSec": 10, "max_output_bytes": 1048576, "rlimits": { "NOFILE": 256, "NPROC": 64, "AS": 268435456 } },
+  "network": { "mode": "off", "allow": [] },
+  "audit": { "redact": [ "OAI_API_KEY" ] }
+}
+```
+
+Guidelines:
+- Only list absolute host paths under `filesystem.bundle.binaries` and ensure they are static binaries when possible.
+- Use `filesystem.inputs[]` for read-only mounts; avoid mounting broad directories.
+- Constrain outputs to a small set under `/out`.
+- Keep env allowlist minimal. Prefer literal `NAME=value` for one-off overrides.
+- Set tight timeouts and rlimits appropriate for the tool.
+- Prefer `network.mode: off` unless network is essential.
+
+## Rootless limitations
+
+- cgroups: only available if delegated by the system; otherwise CPU/memory limits may be advisory only.
+- Mounts: require user namespaces; without them, mount operations will fail for unprivileged users.
+- No raw sockets, no ptrace, limited `fork()` scale; design tools to be short-lived and modest in resource consumption.
+
+## Copy-paste examples
+
+- Minimal offline run with no network and one input file:
+
+```bash
+cat > policy.json <<'JSON'
+{
+  "filesystem": {
+    "bundle": { "binaries": [ "/usr/bin/jq" ] },
+    "inputs": [ { "host": "$(pwd)/in.json", "guest": "/in/in.json" } ],
+    "outputs": [ "/out/out.json" ]
+  },
+  "env": { "allow": [ "TZ=UTC" ] },
+  "resources": { "timeoutSec": 5, "rlimits": { "NOFILE": 128 } },
+  "network": { "mode": "off" }
+}
+JSON
+
+./bin/agentcli -prompt 'Process input with jq' -tools ./tools.json -debug
+```
+
+- Proxy allowlist mode sketch:
+
+```bash
+export HTTPS_PROXY=http://127.0.0.1:8080
+# Proxy must enforce destination allowlists externally.
+```
+
+## Troubleshooting checklist
+
+- EPERM creating namespaces: switch to `allow_all` or `proxy_allowlist`; document risk.
+- Read-only filesystem errors: write only under `/out` or `/tmp`.
+- Missing binary dependencies: rebuild tools statically or enable dynamic dependency copying in the bundle.
+- Timeouts: increase `resources.timeoutSec` conservatively; investigate tool performance.
diff --git a/docs/runbooks/troubleshooting.md b/docs/runbooks/troubleshooting.md
new file mode 100644
index 0000000..3fc1450
--- /dev/null
+++ b/docs/runbooks/troubleshooting.md
@@ -0,0 +1,292 @@
+# Troubleshooting
+
+This runbook covers common errors and deterministic fixes for `goagent`.
+
+## Troubleshooting research tools
+
+This section covers common issues for the web research toolbelt (`searxng_search`, `http_fetch`, `robots_check`, `readability_extract`, `metadata_extract`, `pdf_extract`, `rss_fetch`, `wayback_lookup`, `wiki_query`, `openalex_search`, `crossref_search`). All examples are offline-friendly except where noted; avoid real network calls in CI.
+
+### Missing environment variables
+- **`SEARXNG_BASE_URL` (required by `searxng_search`)**
+  - Symptom: stderr JSON like `{"error":"missing SEARXNG_BASE_URL"}` or bad base URL error.
+  - Fix:
+    ```bash
+    export SEARXNG_BASE_URL=http://localhost:8888
+    echo '{"q":"golang"}' | ./tools/bin/searxng_search | jq .query
+    ```
+
+- **`CROSSREF_MAILTO` (required by `crossref_search`)**
+  - Symptom: stderr JSON `{"error":"missing CROSSREF_MAILTO"}` or polite header warning/rate limit.
+  - Fix:
+    ```bash
+    export CROSSREF_MAILTO=you@example.com
+    echo '{"q":"test"}' | ./tools/bin/crossref_search | jq '.results|length'
+    ```
+
+- **`HTTP_TIMEOUT_MS` (optional global for HTTP-based tools)**
+  - Use to increase per-request timeouts deterministically for tools that support it (e.g., `http_fetch`, `searxng_search`).
+  - Example:
+    ```bash
+    export HTTP_TIMEOUT_MS=20000
+    echo '{"url":"https://example.com"}' | ./tools/bin/http_fetch | jq .status
+    ```
+
+### SSRF guard blocks request
+- Behavior: Tools that perform outbound HTTP enforce an SSRF denylist (loopback, RFC1918/4193, link-local, IPv6 ::1, DNS rebinding).
+- Symptom: stderr JSON contains an error like `SSRF_BLOCKED` or message indicating private/loopback address blocked.
+- Fix: Use only public `http`/`https` origins. If testing, run fixtures on a non-loopback address and allow only via explicit test server. Example expected block:
+  ```bash
+  echo '{"url":"http://127.0.0.1:80"}' | ./tools/bin/http_fetch || true
+  ```
+
+### HTTP 429 with Retry-After handling
+- Behavior: Tools such as `searxng_search` and some API clients retry on 429/5xx with capped attempts and will respect `Retry-After` when present.
+- Symptom: stderr JSON shows rate limit; audit logs include retry attempts.
+- Fix: Reduce query rate, honor backoff, and use authentication when available.
+  ```bash
+  # Inspect retry behavior in audit logs (example path)
+  rg -n "searxng_search" .goagent/audit || true
+  ```
+
+### Robots disallow
+- Use `robots_check` to preflight crawl permission for an origin.
+- Example:
+  ```bash
+  echo '{"url":"https://example.com/some-path","user_agent":"agentcli"}' | ./tools/bin/robots_check | jq .allowed
+  # If false, do not crawl with automated tools.
+  ```
+
+### Response truncation (max_bytes)
+- `http_fetch` enforces a hard byte cap to prevent unbounded responses.
+- Symptom: stdout JSON has `truncated:true` and possibly `body_base64` shortened.
+- Fix: Raise `max_bytes` within safe limits when needed.
+  ```bash
+  echo '{"url":"https://example.com/large","max_bytes":262144}' | ./tools/bin/http_fetch | jq .truncated
+  ```
+
+### Network timeouts
+- Symptom: stderr JSON indicates timeout or the CLI reports `context deadline exceeded`.
+- Fix: Increase tool timeout deterministically via `HTTP_TIMEOUT_MS` (for tools that support it) or tool-specific flags, and reduce payload sizes.
+  ```bash
+  export HTTP_TIMEOUT_MS=30000
+  echo '{"url":"https://example.com/slow"}' | ./tools/bin/http_fetch | jq .status || true
+  ```
+
+## Missing tool binaries
+- Symptom: `exec: "./tools/<name>": file does not exist` or tool not found in `tools.json` command path.
+- Fix:
+```bash
+# Build all tools
+make build-tools
+```
+- Build a single tool from source (post‑migration layout):
+```bash
+# Unix/macOS
+mkdir -p tools/bin
+go build -o tools/bin/fs_read_file ./tools/cmd/fs_read_file
+
+# Windows (PowerShell or cmd)
+mkdir tools\bin 2> NUL
+go build -o tools\bin\fs_read_file.exe .\tools\cmd\fs_read_file
+```
+- Verify:
+```bash
+# Unix/macOS
+./tools/bin/fs_read_file -h 2>/dev/null || echo ok
+ls -l tools/bin | grep -E 'exec|fs_'
+
+# Windows
+./tools/bin/fs_read_file.exe -h 2> NUL || echo ok
+dir tools\\bin
+```
+
+## Repo-relative path violations
+- Symptom: file tools return errors about paths outside repository or cannot find the path.
+- Fix: paths passed to fs tools must be relative to repository root. `cd` to repo root and retry.
+```bash
+# From repo root (Unix/macOS)
+echo '{"path":"README.md"}' | ./tools/bin/fs_read_file | jq .
+
+# From repo root (Windows)
+echo '{"path":"README.md"}' | ./tools/bin/fs_read_file.exe | jq .
+```
+
+## Tool timeouts
+- Symptom: `{"error":"tool timed out"}` mapped by the runner or non-zero exit due to timeout.
+- Fix: increase per-tool `timeoutSec` in `tools.json`, raise CLI `-tool-timeout` (preferred), or use a larger global `-timeout` (deprecated).
+
+### HTTP request times out (`context deadline exceeded`)
+- Cause: slow model/endpoint, proxy timeouts, or too-small `-http-timeout`.
+- Fix: increase `-http-timeout` (or set env `OAI_HTTP_TIMEOUT`), reduce prompt size/model latency, or tune proxy timeouts.
+```bash
+# Example (Unix/macOS): command that sleeps too long
+echo '{"cmd":"/bin/sleep","args":["2"],"timeoutSec":1}' | ./tools/bin/exec || true
+# Increase timeout and retry
+echo '{"cmd":"/bin/sleep","args":["1"],"timeoutSec":2}' | ./tools/bin/exec
+
+# Example (Windows): use the built binary with .exe
+echo '{"cmd":"timeout","args":["/T","2"],"timeoutSec":1}' | ./tools/bin/exec.exe || true
+echo '{"cmd":"timeout","args":["/T","1"],"timeoutSec":2}' | ./tools/bin/exec.exe
+```
+
+#### Enable HTTP retries
+- When transient, enable retries to mask brief outages and 429/5xx with backoff.
+```bash
+# Retry up to 2 times with capped backoff (Unix/macOS)
+./bin/agentcli \
+  -prompt "What's the local time in Helsinki? Use get_time." \
+  -tools ./tools.json \
+  -http-retries 2 \
+  -http-retry-backoff 500ms
+
+# Windows (PowerShell)
+./bin/agentcli.exe `
+  -prompt "What's the local time in Helsinki? Use get_time." `
+  -tools ./tools.json `
+  -http-retries 2 `
+  -http-retry-backoff 500ms
+
+# Inspect attempts in the audit log (.goagent/audit/YYYYMMDD.log)
+rg -n "http_attempt" .goagent/audit || true
+```
+
+## fs_search exclusions and file size limits
+- Behavior: `fs_search` intentionally skips known binary/output directories to keep scans fast and predictable: `.git/`, `bin/`, `logs/`, and `tools/bin/` are excluded. It also enforces a per‑file size cap of 1 MiB.
+- Symptom: expected matches inside excluded folders are not returned, or the tool exits non‑zero with a `FILE_TOO_LARGE` message.
+- Fix:
+```bash
+# Verify exclusion behavior (create a file in an excluded dir and one in a normal dir)
+mkdir -p tmp_search_demo/{bin,logs,ok}
+printf 'NEEDLE' > tmp_search_demo/bin/skip.txt
+printf 'NEEDLE' > tmp_search_demo/ok/scan.txt
+
+echo '{"query":"NEEDLE","globs":["**/*.txt"],"maxResults":10}' | ./tools/bin/fs_search | jq .
+# Expect only tmp_search_demo/ok/scan.txt to appear; files under bin/ or logs/ are skipped
+
+# Demonstrate FILE_TOO_LARGE
+python3 - <<'PY'
+from pathlib import Path
+p = Path('tmp_search_demo/ok/big.bin')
+p.write_bytes(b'A' * (1024*1024 + 1))
+print(p, p.stat().st_size)
+PY
+echo '{"query":"A","globs":["tmp_search_demo/ok/big.bin"]}' | ./tools/bin/fs_search || true
+# Expect non-zero exit and stderr JSON containing "FILE_TOO_LARGE"
+
+rm -rf tmp_search_demo
+```
+
+On Windows (PowerShell), use the `.exe` binary:
+```powershell
+New-Item -ItemType Directory -Force tmp_search_demo/bin,tmp_search_demo/logs,tmp_search_demo/ok | Out-Null
+Set-Content -NoNewline -Path tmp_search_demo/bin/skip.txt -Value NEEDLE
+Set-Content -NoNewline -Path tmp_search_demo/ok/scan.txt -Value NEEDLE
+echo '{"query":"NEEDLE","globs":["**/*.txt"],"maxResults":10}' | ./tools/bin/fs_search.exe | jq .
+
+# FILE_TOO_LARGE (PowerShell)
+python - <<'PY'
+from pathlib import Path
+p = Path('tmp_search_demo/ok/big.bin')
+p.write_bytes(b'A' * (1024*1024 + 1))
+print(p, p.stat().st_size)
+PY
+echo '{"query":"A","globs":["tmp_search_demo/ok/big.bin"]}' | ./tools/bin/fs_search.exe; if ($LASTEXITCODE -eq 0) { Write-Error 'expected non-zero' }
+Remove-Item -Recurse -Force tmp_search_demo
+```
+
+## HTTP errors to the API
+### Inspecting HTTP retry attempts
+- When retries are enabled (`-http-retries > 0`), each attempt is logged to `.goagent/audit/YYYYMMDD.log` as an `http_attempt` entry with fields `{attempt,max,status,backoffMs,endpoint}`.
+- Use this to confirm whether `Retry-After` or exponential backoff was applied and how many attempts occurred.
+
+- Symptom: non-2xx from the Chat Completions endpoint with body included.
+- Fix: ensure `OAI_BASE_URL`, `OAI_MODEL`, and `OAI_API_KEY` (if required) are set correctly.
+```bash
+# Minimal local setup example
+export OAI_BASE_URL=http://localhost:1234/v1
+export OAI_MODEL=oss-gpt-20b
+# Optional if your endpoint requires it
+export OAI_API_KEY=example-token
+
+# Quick smoke
+./scripts/smoke-test.sh || true
+```
+
+## golangci-lint not found
+- Symptom: `golangci-lint: command not found` when running `make lint`.
+- Fix:
+```bash
+go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
+export PATH="$(go env GOPATH)/bin:$PATH"
+```
+
+## Image generation errors (img_create)
+
+- Invalid API key or missing base URL
+  - Symptom: stderr JSON like `{"error":"missing OAI_IMAGE_BASE_URL or OAI_BASE_URL"}` or API error `{"error":"unauthorized"}`.
+  - Fix:
+    ```bash
+    # Set base URL and API key (Unix/macOS)
+    export OAI_IMAGE_BASE_URL=https://api.openai.com
+    export OAI_API_KEY=sk-...
+    # Optional: fallback base if OAI_IMAGE_BASE_URL is unset
+    export OAI_BASE_URL=https://api.openai.com
+    ```
+    See reference: docs/reference/img_create.md
+
+- HTTP 429 (rate limited) or 5xx
+  - Behavior: the tool retries up to 2 times with backoff (250ms, 500ms, 1s) and then emits `{"error":"api status 429"}` or a server message if present.
+  - Fix: wait and retry; reduce parallel invocations; consider lowering `n` or image `size` to lessen load:
+    ```bash
+    echo '{"prompt":"tiny-pixel","n":1,"size":"512x512","save":{"dir":"assets"}}' | ./tools/bin/img_create || true
+    ```
+
+- Moderation/refusal or API 400 with message
+  - Behavior: non-2xx with body `{error:"..."}` or `{error:{message:"..."}}` is surfaced as that message in stderr JSON.
+  - Fix: adjust the prompt to comply with policy; verify `size` matches `^\d{3,4}x\d{3,4}$` and `n` is 1..4.
+
+- Request timeout
+  - Symptom: `{"error":"http error: context deadline exceeded"}` when the Images API is slow.
+  - Fix: increase the HTTP timeout and retry:
+    ```bash
+    export OAI_HTTP_TIMEOUT=180s
+    echo '{"prompt":"tiny-pixel","n":1,"size":"1024x1024","save":{"dir":"assets"}}' | ./tools/bin/img_create || true
+    ```
+  - If timeouts persist, try a smaller `size` or lower `n`.
+
+- Missing save.dir when not returning base64
+  - Symptom: `{"error":"save.dir is required when return_b64=false"}`.
+  - Fix: provide a repo-relative directory under `save.dir` or set `return_b64:true`:
+    ```bash
+    # Save to repo-relative assets/
+    echo '{"prompt":"tiny-pixel","save":{"dir":"assets"}}' | ./tools/bin/img_create
+
+    # Or return base64 (elided by default)
+    echo '{"prompt":"tiny-pixel","return_b64":true}' | ./tools/bin/img_create
+    ```
+
+Notes:
+- The tool only writes under the repository root and rejects absolute paths or `..` escapes.
+- By default, base64 in stdout is elided; set `IMG_CREATE_DEBUG_B64=1` (or `DEBUG_B64=1`) to include it when `return_b64=true`.
+
+## General verification
+- Run the test suite (offline):
+```bash
+go test ./...
+```
+- Rebuild CLI and tools deterministically:
+```bash
+make tidy build build-tools
+```
+
+## Invalid tool message sequencing
+- ## Pre-stage built-in tools
+- Behavior: during pre-stage, external tools from `-tools` are ignored by default. Only built-in read-only adapters are available: `fs.read_file`, `fs.list_dir`, `fs.stat`, `env.get`, `os.info`.
+- Symptom: a pre-stage `tool_calls` entry like `echo` or `exec` results in a tool message `{"error":"unknown tool: ..."}`.
+- Fix: either rely on built-ins, or explicitly enable external tools for pre-stage with `-prep-tools-allow-external` (use with caution).
+
+- Symptom: the CLI exits with an error like:
+- `error: invalid message sequence at index N: found role:"tool" without a prior assistant message containing tool_calls; each tool message must respond to an assistant tool call id`
+- or: `error: invalid message sequence at index N: role:"tool" has tool_call_id "..." that does not match any id from the most recent assistant tool_calls`
+- Cause: a tool result message was appended without a preceding assistant message that requested tool calls, or the `tool_call_id` does not match the most recent assistant `tool_calls` ids.
+- Fix: ensure the message flow strictly follows: assistant with `tool_calls[]` → one tool message per `tool_call_id` → assistant. Do not emit standalone tool messages. The CLI enforces this pre-flight and will refuse to send an invalid transcript to the API. This validator runs for both the main loop and the pre-stage (prep) call; errors during pre-stage may appear as `prep invalid message sequence`.
diff --git a/docs/security/research-tools.md b/docs/security/research-tools.md
new file mode 100644
index 0000000..a0a362d
--- /dev/null
+++ b/docs/security/research-tools.md
@@ -0,0 +1,60 @@
+# Security posture for research tools
+
+This page documents the security posture, guardrails, and operational guidance for the CLI-only research tools (e.g., `searxng_search`, `http_fetch`, `robots_check`, `readability_extract`, `metadata_extract`, `pdf_extract`, `rss_fetch`, `wayback_lookup`, `wiki_query`, `openalex_search`, `crossref_search`, `dedupe_rank`, `citation_pack`). It complements the broader threat model by focusing on network egress safety, provenance, and audit discipline for web-facing tools.
+
+## Network egress and SSRF protections
+
+Tools that reach the network must enforce a strict allowlist for schemes and a denylist for address families to prevent SSRF and lateral movement:
+
+- Allowed schemes: `http`, `https` only. All others are rejected.
+- Denylist targets (reject both direct IPs and DNS results that resolve to these ranges):
+  - Loopback: `127.0.0.0/8`, `::1/128`
+  - RFC1918 private IPv4: `10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`
+  - Link-local: `169.254.0.0/16` (IPv4), `fe80::/10` (IPv6)
+  - Unique local IPv6 (RFC4193): `fc00::/7`
+  - Multicast/broadcast and unspecified: `224.0.0.0/4`, `ff00::/8`, `255.255.255.255`, `0.0.0.0`, `::`
+  - Tor/onion services: hostnames ending in `.onion`
+- Redirect handling: follow at most 5 redirects and re-apply SSRF checks on each hop before connecting.
+- DNS rebinding protection: resolve the destination host and validate every resolved address against the denylist; reject when any hop resolves to a blocked range.
+- Byte caps and timeouts: enforce tool-specific response size caps and sane timeouts; prefer connection + overall deadlines over idle timeouts.
+
+## Robots compliance
+
+- Respect `robots.txt` per RFC 9309. Evaluate rules using the effective origin of the fetch, preferring the most specific user-agent group.
+- Use `robots_check` to determine `allowed` and optional `crawl_delay_ms` before `http_fetch` or other network retrievals when applicable.
+- Do not follow redirects to a different origin for `robots.txt` evaluation.
+
+## Outbound User-Agent and etiquette
+
+- Each tool sends a distinct User-Agent: `agentcli-<tool-name>/0.1` (e.g., `agentcli-searxng/0.1`, `agentcli-http-fetch/0.1`).
+- Honor server guidance: handle `Retry-After` on 429/503; back off with capped retries as specified per tool.
+- Keep requests minimal and purpose-limited; avoid fetching binary content unless explicitly required by the tool.
+
+## Audit logging and redaction policy
+
+- All networked tools emit structured NDJSON audit records with fields appropriate to the tool. Common fields include:
+  - `ts` (RFC3339 UTC timestamp)
+  - `tool` (string)
+  - `url_host` (hostname only)
+  - Operation-specific fields (e.g., `status`, `bytes`, `ms`, `retries`, `truncated`, `saved`)
+- Redaction rules:
+  - Never log secrets or tokens. Do not include full URLs with query parameters when they may contain credentials; prefer host-only or redact sensitive keys.
+  - For potentially sensitive queries (e.g., long search strings), truncate beyond 256 characters and include `query_truncated: true`.
+  - Base64 payloads and large bodies are never written to audit logs.
+
+## Environment and sandboxing guidance
+
+- Pass only an explicit allowlist of environment variables per-tool via `envPassthrough`. Avoid ambient secrets.
+- Run tools as an unprivileged user. Consider containerization, user namespaces, or seccomp/AppArmor where feasible.
+- Constrain filesystem effects to repository-relative paths validated against traversal and symlink escapes.
+- Apply process-level limits (ulimits) and per-invocation time/size caps to prevent resource abuse.
+
+## Deterministic behavior and retries
+
+- Enforce hard redirect limits, bounded retries with jitter on transient errors (e.g., timeouts, 429, 5xx), and clear non-zero exits with single-line JSON on stderr describing the error.
+- Keep stdout schema stable and validated; treat deviations as errors.
+
+## Related documents
+
+- Threat model and trust boundaries: see [security/threat-model.md](threat-model.md)
+- Tool reference for stdin/stdout schemas and env passthrough: see [reference/research-tools.md](../reference/research-tools.md)
diff --git a/docs/security/threat-model.md b/docs/security/threat-model.md
new file mode 100644
index 0000000..9a15c58
--- /dev/null
+++ b/docs/security/threat-model.md
@@ -0,0 +1,46 @@
+# Threat Model
+
+This document expands on the security posture, trust boundaries, and recommended mitigations for `goagent`.
+
+## Scope
+- Non-interactive CLI (`cmd/agentcli`) that communicates with an OpenAI-compatible API and executes local tool binaries declared in `tools.json`.
+- Local tools are executed via argv only, with JSON on stdin and JSON on stdout/stderr.
+
+## Trust boundaries
+- Untrusted: model outputs, tool inputs from the model, remote API, and any data received over the network.
+- Trusted (to the extent configured): the `tools.json` manifest, local tool binaries you build and enable, and the user-provided flags/env.
+
+## Key risks and mitigations
+- Command execution risk: Tools run processes. Mitigation: explicit allowlist (`tools.json`), argv-only (no shell), minimal environment, per-call timeouts, and repo-relative file paths for fs tools. For pre-stage, external tools are disabled by default; only in-process read-only adapters are exposed unless `-prep-tools-allow-external` is set.
+- Prompt injection and tool abuse: Model may request dangerous operations. Mitigation: keep tool set minimal, prefer read-only tools (pre-stage default), and require human review for high-risk prompts. Consider running tools under containers/jails.
+- Secret leakage: Avoid printing secrets. Mitigation: supply tokens via environment or CI secrets; do not commit secrets; redaction is implemented for audit logs and tool runner so secret values are never recorded.
+- Output confusion: Tools should fail with non-zero exit and machine-readable stderr to map to `{"error": "..."}`. Mitigation: standardize tool error contracts (planned) and keep runner mapping strict.
+- Network exposure: `tools/exec.go` is unrestricted. Mitigation: enable only when necessary and document risks.
+
+## Environment variable passthrough (envPassthrough)
+
+Only an explicit allowlist of environment variables is passed from the agent to child tool processes. This minimizes ambient authority and reduces risk of accidental leakage.
+
+- Allowed keys: `OAI_API_KEY`, `OAI_BASE_URL`, `OAI_IMAGE_BASE_URL`, `OAI_HTTP_TIMEOUT`.
+- Rationale: tools that make OpenAI-compatible HTTP requests need endpoint, key, and timeout settings to operate; everything else remains isolated.
+- Redaction: audit logs and structured logs include the variable names but never their values.
+- Configuration surface: per-tool allowlist is declared in `tools.json` under `envPassthrough`; the runner builds the child environment as `PATH,HOME` plus only those keys if present in the parent process.
+
+## Tool privacy: img_create
+
+The `img_create` tool interacts with the Images API and may handle large base64 payloads. Privacy and transcript hygiene are enforced by default:
+
+- Prompts and base64 image data are not logged in audit logs or human-readable output by default. When returning base64 to the agent, the tool emits a hint that content was elided unless an explicit debug flag is enabled.
+- When saving images, files are written only under a repository-relative `save.dir` path that must resolve inside the current repo; path traversal and escapes are rejected.
+- Standardized stderr JSON is used for errors, avoiding accidental leakage through free-form logs.
+
+## Operational guidance
+- Run the CLI in a working directory with restricted permissions.
+- Review `tools.json` before enabling tools; prefer least privilege.
+- Use containers or separate user namespaces to isolate untrusted tools.
+- Configure `OAI_API_KEY` via env; never commit credentials.
+
+## References
+- See `README.md` Security model for a quick summary.
+- ADR-0001 documents the CLI architecture and contracts.
+ - Research tools security posture and guardrails: see [security/research-tools.md](research-tools.md)
diff --git a/examples/gpt5_live_smoke_test.go b/examples/gpt5_live_smoke_test.go
new file mode 100644
index 0000000..ff80b20
--- /dev/null
+++ b/examples/gpt5_live_smoke_test.go
@@ -0,0 +1,58 @@
+package examples
+
+import (
+	"bytes"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+// TestGPT5_LiveSmoke_DefaultTemperature runs the CLI against a live GPT-5 endpoint
+// when GPT5_OPENAI_API_URL and GPT5_OPENAI_API_KEY are exported. It asserts that the
+// debug request dump includes temperature: 1 with no sampling flags.
+func TestGPT5_LiveSmoke_DefaultTemperature(t *testing.T) {
+	baseURL := strings.TrimSpace(os.Getenv("GPT5_OPENAI_API_URL"))
+	apiKey := strings.TrimSpace(os.Getenv("GPT5_OPENAI_API_KEY"))
+	if baseURL == "" || apiKey == "" {
+		t.Skip("set GPT5_OPENAI_API_URL and GPT5_OPENAI_API_KEY to run this live smoke test")
+	}
+
+	// Build agent CLI binary from repo root for correctness
+	root := findRepoRoot(t)
+	tmp := t.TempDir()
+	agentBin := filepath.Join(tmp, "agentcli")
+	cmdBuild := exec.Command("go", "build", "-o", agentBin, "./cmd/agentcli")
+	cmdBuild.Dir = root
+	if out, err := cmdBuild.CombinedOutput(); err != nil {
+		t.Fatalf("build agentcli: %v: %s", err, string(out))
+	}
+
+	// Run the agent binary with -debug so request JSON is dumped locally before HTTP
+	var stdout, stderr bytes.Buffer
+	cmd := exec.Command(agentBin,
+		"-prompt", "Say ok",
+		"-base-url", baseURL,
+		"-api-key", apiKey,
+		"-model", "gpt-5",
+		"-max-steps", "1",
+		"-http-timeout", "30s",
+		"-debug",
+	)
+	cmd.Env = append(os.Environ(), "CGO_ENABLED=0")
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	_ = cmd.Run() // do not require success; we only need the local request dump
+
+	tr := stderr.String()
+	if !strings.Contains(tr, "--- chat.request step=1 ---") {
+		t.Fatalf("missing debug request dump; stderr=\n%s", tr)
+	}
+	if !strings.Contains(tr, "\"temperature\": 1") {
+		t.Fatalf("expected temperature 1 in request; stderr=\n%s", tr)
+	}
+
+	// Note: Reasoning controls (verbosity/reasoning_effort) are independent of temperature
+	// and may be configured per-provider. This smoke only asserts default temperature.
+}
diff --git a/examples/image-gen/Makefile b/examples/image-gen/Makefile
new file mode 100644
index 0000000..9f22cc3
--- /dev/null
+++ b/examples/image-gen/Makefile
@@ -0,0 +1,35 @@
+# Simple helper to invoke ./tools/bin/img_create
+
+# User-tunable vars (can be overridden):
+PROMPT ?= tiny illustrative banner
+SIZE ?= 1024x1024
+N ?= 1
+BASENAME ?= img
+RETURN_B64 ?= 0
+SAVE_DIR ?= assets
+
+# Detect Windows executable suffix if present
+IMG_CREATE := ./tools/bin/img_create
+ifeq ($(OS),Windows_NT)
+IMG_CREATE := ./tools/bin/img_create.exe
+endif
+
+.PHONY: help
+help:
+	@echo "Targets:"
+	@echo "  run         - Invoke img_create with SAVE to $(SAVE_DIR)/$(BASENAME)_NNN.png"
+	@echo "  run-b64     - Invoke img_create with return_b64=true"
+	@echo "  clean       - Remove generated assets (rm -rf $(SAVE_DIR))"
+
+.PHONY: run
+run:
+	@mkdir -p $(SAVE_DIR)
+	@jq -n --arg p "$(PROMPT)" --arg s "$(SIZE)" --argjson n $(N) --arg dir "$(SAVE_DIR)" --arg base "$(BASENAME)" '{prompt:$p,n:$n,size:$s,save:{dir:$dir,basename:$base,ext:"png"}}' | $(IMG_CREATE)
+
+.PHONY: run-b64
+run-b64:
+	@jq -n --arg p "$(PROMPT)" --arg s "$(SIZE)" --argjson n $(N) '{prompt:$p,n:$n,size:$s,return_b64:true}' | $(IMG_CREATE)
+
+.PHONY: clean
+clean:
+	@rm -rf $(SAVE_DIR)
diff --git a/examples/image-gen/README.md b/examples/image-gen/README.md
new file mode 100644
index 0000000..9ec9fea
--- /dev/null
+++ b/examples/image-gen/README.md
@@ -0,0 +1,37 @@
+### Image generation example (direct tool invocation)
+
+This example shows how to call the `img_create` tool directly (without running the agent), saving PNGs under `assets/` and printing the result to stdout.
+
+Prerequisites:
+- `make build-tools` has produced `./tools/bin/img_create` (or `./tools/bin/img_create.exe` on Windows)
+- `OAI_API_KEY` is set; optional `OAI_BASE_URL`/`OAI_IMAGE_BASE_URL`
+- Go 1.21+ installed (for the Go runner)
+
+Quick start (from repo root):
+
+```bash
+make build-tools
+
+# Set your API key (and base URL if needed)
+export OAI_API_KEY=your-key
+
+# Run the Go helper which invokes ./tools/bin/img_create with JSON on stdin
+make -C examples/image-gen run PROMPT="tiny illustrative banner" SIZE=512x512 BASENAME=banner
+
+# Expect: one or more PNGs under ./assets/ (e.g., assets/banner_001.png)
+```
+
+Notes:
+- The Go runner simply constructs the JSON payload and streams it to `./tools/bin/img_create`, then prints the tool's JSON result.
+- To request multiple images, pass `N=2` (max 4). To return base64 instead of writing files, pass `RETURN_B64=1` (the tool elides large base64 in stdout by default).
+- Windows: the runner resolves the `.exe` suffix automatically.
+
+Make targets:
+
+```bash
+# From repo root
+make -C examples/image-gen help
+```
+
+Troubleshooting:
+- If you see an error about credentials or timeouts, consult `docs/runbooks/troubleshooting.md` (Image generation section) and ensure `OAI_API_KEY` is set.
diff --git a/examples/image-gen/main.go b/examples/image-gen/main.go
new file mode 100644
index 0000000..18b9eee
--- /dev/null
+++ b/examples/image-gen/main.go
@@ -0,0 +1,81 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"runtime"
+)
+
+type SaveSpec struct {
+	Dir      string `json:"dir"`
+	Basename string `json:"basename"`
+	Ext      string `json:"ext"`
+}
+
+type Request struct {
+	Prompt    string    `json:"prompt"`
+	N         int       `json:"n,omitempty"`
+	Size      string    `json:"size,omitempty"`
+	Model     string    `json:"model,omitempty"`
+	ReturnB64 bool      `json:"return_b64,omitempty"`
+	Save      *SaveSpec `json:"save,omitempty"`
+}
+
+func main() {
+	prompt := getenvDefault("PROMPT", "tiny illustrative banner")
+	size := getenvDefault("SIZE", "1024x1024")
+	n := getenvIntDefault("N", 1)
+	basename := getenvDefault("BASENAME", "img")
+	saveDir := getenvDefault("SAVE_DIR", "assets")
+	returnB64 := os.Getenv("RETURN_B64") == "1"
+
+	req := Request{Prompt: prompt, N: n, Size: size}
+	if returnB64 {
+		req.ReturnB64 = true
+	} else {
+		req.Save = &SaveSpec{Dir: saveDir, Basename: basename, Ext: "png"}
+	}
+
+	payload, err := json.Marshal(req)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "failed to marshal request: %v\n", err)
+		os.Exit(2)
+	}
+
+	bin := "./tools/bin/img_create"
+	if runtime.GOOS == "windows" {
+		bin = "./tools/bin/img_create.exe"
+	}
+
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(payload)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		// img_create prints a single-line JSON error to stderr; exit non-zero
+		os.Exit(1)
+	}
+}
+
+func getenvDefault(key, def string) string {
+	v := os.Getenv(key)
+	if v == "" {
+		return def
+	}
+	return v
+}
+
+func getenvIntDefault(key string, def int) int {
+	v := os.Getenv(key)
+	if v == "" {
+		return def
+	}
+	var out int
+	if _, err := fmt.Sscanf(v, "%d", &out); err != nil {
+		return def
+	}
+	return out
+}
diff --git a/examples/image/README.md b/examples/image/README.md
new file mode 100644
index 0000000..2fc3b78
--- /dev/null
+++ b/examples/image/README.md
@@ -0,0 +1,43 @@
+# Image flows examples
+
+This page shows two working invocations demonstrating (1) same backend for chat+image and (2) split backends (e.g., OSS chat + OpenAI images).
+
+Prerequisites:
+- Go 1.21+
+- `make build build-tools`
+- A valid Images API key in `OAI_API_KEY` (or vendor equivalent)
+
+Same backend for chat and image (both use OAI_BASE_URL):
+
+```bash
+export OAI_BASE_URL="${OAI_BASE_URL:-https://api.openai.com/v1}"
+export OAI_API_KEY=your-key
+
+./bin/agentcli \
+  -tools ./tools.json \
+  -prompt "Generate a tiny illustrative image using img_create and save it under assets/ with basename banner" \
+  -model gpt-5 \
+  -max-steps 3 \
+  -debug
+# Expect: PNG(s) under assets/ and a concise final message
+```
+
+Split backends (chat via OSS; images via OpenAI):
+
+```bash
+export OAI_BASE_URL="http://localhost:8080/v1"       # OSS chat
+export OAI_IMAGE_BASE_URL="https://api.openai.com/v1" # Images
+export OAI_API_KEY=your-key
+
+./bin/agentcli \
+  -tools ./tools.json \
+  -prompt "Use img_create to render a small banner and save to assets/" \
+  -model oss-gpt-20b \
+  -max-steps 3 \
+  -debug
+```
+
+Notes:
+- Ensure `img_create` is in `tools.json` with `command: ["./tools/bin/img_create"]`.
+- To avoid committing large binaries, consider adding `assets/` to `.gitignore` in your project.
+- To return base64 instead of saving, modify the tool call to set `{"return_b64":true}` and set `IMG_CREATE_DEBUG_B64=1` if you want base64 printed in stdout.
diff --git a/examples/research/README.md b/examples/research/README.md
new file mode 100644
index 0000000..2c3e4b4
--- /dev/null
+++ b/examples/research/README.md
@@ -0,0 +1,101 @@
+# Research tools: manual examples (no agent)
+
+These examples show how to run the research CLI tools directly without `agentcli`. They avoid network in CI by using comments or localhost-only fixtures.
+
+## Prerequisites
+- Build the tools first:
+```bash
+make build-tools
+```
+- Set environment variables when required by a tool.
+
+## SearXNG meta search
+Requires `SEARXNG_BASE_URL` (e.g., `http://localhost:8888`).
+```bash
+echo '{"q":"golang","size":5}' | SEARXNG_BASE_URL=http://localhost:8888 ./tools/bin/searxng_search | jq '.results[] | {title,url,engine}'
+```
+
+## HTTP fetch (safe)
+```bash
+echo '{"url":"https://example.com","method":"GET"}' | ./tools/bin/http_fetch | jq '{status, truncated}'
+```
+
+## robots.txt check
+```bash
+echo '{"url":"https://example.com/robots.txt","user_agent":"agentcli"}' | ./tools/bin/robots_check | jq .
+```
+
+## Readability extraction (HTML → article)
+```bash
+HTML=$(curl -Lsf https://example.com | head -c 200000)
+jq -n --arg html "$HTML" --arg base "https://example.com" '{html:$html, base_url:$base}' | ./tools/bin/readability_extract | jq '{title, length}'
+```
+
+## Metadata extraction (OG/Twitter/JSON-LD)
+```bash
+HTML=$(curl -Lsf https://example.com | head -c 200000)
+jq -n --arg html "$HTML" --arg base "https://example.com" '{html:$html, base_url:$base}' | ./tools/bin/metadata_extract | jq '{have_og:(.opengraph!=null), have_twitter:(.twitter!=null), jsonld_len:(.jsonld|length)}'
+```
+
+## PDF extract
+Reads base64 input. Example encodes a local PDF (no network).
+```bash
+base64 -w0 ./examples/image-gen/example.pdf | jq -n --arg b64 "$(cat)" '{pdf_base64:$b64}' | ./tools/bin/pdf_extract | jq '{page_count, first:(.pages[0].text|.[0:120])}'
+```
+
+## RSS/Atom fetch
+```bash
+echo '{"url":"https://hnrss.org/frontpage"}' | ./tools/bin/rss_fetch | jq '.items[0]'
+```
+
+## Wayback lookup
+```bash
+echo '{"url":"https://example.com","save":false}' | ./tools/bin/wayback_lookup | jq .
+```
+
+## Wikipedia query
+```bash
+echo '{"titles":"Golang"}' | ./tools/bin/wiki_query | jq '.pages[0]'
+# or
+echo '{"search":"Golang"}' | ./tools/bin/wiki_query | jq '.pages[0]'
+```
+
+## OpenAlex search
+```bash
+echo '{"q":"large language models","per_page":5}' | ./tools/bin/openalex_search | jq '.results[0] | {title, publication_year, cited_by_count}'
+```
+
+## Crossref search
+Requires `CROSSREF_MAILTO`.
+```bash
+echo '{"q":"transformers","rows":5}' | CROSSREF_MAILTO=user@example.com ./tools/bin/crossref_search | jq '.results[0] | {title, doi, issued}'
+```
+
+## GitHub search (rate-limited without token)
+Optional `GITHUB_TOKEN`.
+```bash
+echo '{"q":"repo:golang/go scheduler","type":"code","per_page":3}' | ./tools/bin/github_search | jq '{count:(.results|length), rate:.rate.remaining}'
+```
+
+## De-duplicate and rank
+```bash
+jq -n '{docs:[{id:"a",title:"Title A",text:"hello world"},{id:"b",title:"Title B",text:"hello world!"}]}' | ./tools/bin/dedupe_rank | jq .
+```
+
+## Citation pack
+```bash
+echo '{"doc":{"url":"https://example.com","title":"Example"},"archive":{"wayback":false}}' | ./tools/bin/citation_pack | jq .
+```
+
+---
+
+### Notes for CI/offline runs
+- Network commands above are examples; keep them commented in CI.
+- Prefer `httptest.Server` fixtures in tests; this page is for humans running locally.
+
+### Offline fixtures-only test sweep (no network)
+These package tests use local HTTP fixtures via `httptest.Server`.
+```bash
+# Safe to run in CI without network access
+go test ./tools/cmd/... -count=1
+```
diff --git a/examples/tool_calls.md b/examples/tool_calls.md
new file mode 100644
index 0000000..b293ec6
--- /dev/null
+++ b/examples/tool_calls.md
@@ -0,0 +1,17 @@
+This worked example demonstrates a tool-call session that exercises default temperature (1.0), sequential execution of two tool calls with matching tool_call_id, and a captured transcript via -debug.
+
+Quick start (from repo root):
+
+1) Build binaries as needed:
+
+```bash
+make build build-tools
+```
+
+2) Run the example test which builds a temporary agent binary, compiles two tools, and drives a mock server with two tool calls:
+
+```bash
+go test ./examples -run TestWorkedExample_ToolCalls_TemperatureOne_Sequencing -v
+```
+
+Expected: test passes, stdout contains "ok", and the debug transcript (stderr) includes a request with "temperature": 1 and a response with tool_calls. The test also verifies tool message sequencing by checking that a tool message is appended for each tool_call_id.
diff --git a/examples/tool_calls_example_test.go b/examples/tool_calls_example_test.go
new file mode 100644
index 0000000..47769de
--- /dev/null
+++ b/examples/tool_calls_example_test.go
@@ -0,0 +1,253 @@
+//nolint:errcheck // Example-style tests elide some error checks for brevity; core assertions enforce correctness.
+package examples
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// findRepoRoot walks up from the current directory to locate go.mod.
+func findRepoRoot(t *testing.T) string {
+	t.Helper()
+	dir, err := os.Getwd()
+	if err != nil || dir == "" {
+		t.Fatalf("getwd: %v", err)
+	}
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			t.Fatalf("go.mod not found from %s upward", dir)
+		}
+		dir = parent
+	}
+}
+
+// TestWorkedExample_ToolCalls_TemperatureOne_Sequencing builds the CLI and two tools,
+// runs against a fake server that returns two tool_calls, and verifies:
+// - default temperature propagates as 1.0
+// - tool messages are appended with matching tool_call_id
+// - debug transcript includes request/response dumps (used as a transcript example)
+func TestWorkedExample_ToolCalls_TemperatureOne_Sequencing(t *testing.T) {
+	_ = findRepoRoot(t)
+
+	// Build agent CLI binary from repo root for correctness
+	tmp := t.TempDir()
+	agentBin := filepath.Join(tmp, "agentcli")
+	cmdBuild := exec.Command("go", "build", "-o", agentBin, "./cmd/agentcli")
+	cmdBuild.Dir = findRepoRoot(t)
+	if out, err := cmdBuild.CombinedOutput(); err != nil {
+		t.Fatalf("build agentcli: %v: %s", err, string(out))
+	}
+
+	// Build required tool binaries using canonical helper, then copy under ./tools/bin
+	toolsDir := filepath.Join(tmp, "tools", "bin")
+	if err := os.MkdirAll(toolsDir, 0o755); err != nil {
+		t.Fatalf("mkdir tools/bin: %v", err)
+	}
+	srcWrite := testutil.BuildTool(t, "fs_write_file")
+	srcRead := testutil.BuildTool(t, "fs_read_file")
+	mustCopy := func(src, dst string) {
+		in, err := os.Open(src)
+		if err != nil {
+			t.Fatalf("open %s: %v", src, err)
+		}
+		defer in.Close()
+		out, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o755)
+		if err != nil {
+			t.Fatalf("create %s: %v", dst, err)
+		}
+		if _, err := io.Copy(out, in); err != nil {
+			t.Fatalf("copy %s -> %s: %v", src, dst, err)
+		}
+		if err := out.Close(); err != nil {
+			t.Fatalf("close out: %v", err)
+		}
+	}
+	mustCopy(srcWrite, filepath.Join(toolsDir, "fs_write_file"))
+	mustCopy(srcRead, filepath.Join(toolsDir, "fs_read_file"))
+
+	// Create tools.json manifest that references ./tools/bin/*
+	manifestPath := filepath.Join(tmp, "tools.json")
+	man := map[string]any{
+		"tools": []map[string]any{
+			{
+				"name":        "fs_write_file",
+				"description": "Atomically write a file (base64)",
+				"schema": map[string]any{
+					"type":                 "object",
+					"additionalProperties": false,
+					"required":             []string{"path", "contentBase64"},
+					"properties": map[string]any{
+						"path":            map[string]any{"type": "string"},
+						"contentBase64":   map[string]any{"type": "string"},
+						"createModeOctal": map[string]any{"type": "string"},
+					},
+				},
+				"command":    []string{"./tools/bin/fs_write_file"},
+				"timeoutSec": 5,
+			},
+			{
+				"name":        "fs_read_file",
+				"description": "Read a file (base64)",
+				"schema": map[string]any{
+					"type":                 "object",
+					"additionalProperties": false,
+					"required":             []string{"path"},
+					"properties": map[string]any{
+						"path":        map[string]any{"type": "string"},
+						"offsetBytes": map[string]any{"type": "integer"},
+						"maxBytes":    map[string]any{"type": "integer"},
+					},
+				},
+				"command":    []string{"./tools/bin/fs_read_file"},
+				"timeoutSec": 5,
+			},
+		},
+	}
+	if b, err := json.Marshal(man); err != nil {
+		t.Fatalf("marshal manifest: %v", err)
+	} else if err := os.WriteFile(manifestPath, b, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+
+	// Prepare target file and content for tool calls (relative to tmp dir)
+	targetRel := "worked_example.txt"
+	content := []byte("hello example")
+	contentB64 := base64.StdEncoding.EncodeToString(content)
+
+	// Fake server with two steps
+	var step int
+	var sawToolIDs map[string]bool
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost || r.URL.Path != "/chat/completions" {
+			t.Fatalf("unexpected request: %s %s", r.Method, r.URL.Path)
+		}
+		var req oai.ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		step++
+		switch step {
+		case 1:
+			if req.Temperature == nil || *req.Temperature != 1.0 {
+				if req.Temperature == nil {
+					t.Fatalf("temperature missing in request; want 1.0")
+				}
+				t.Fatalf("temperature got %v want 1.0", *req.Temperature)
+			}
+			// Respond with two tool calls: write then read
+			resp := oai.ChatCompletionsResponse{
+				ID:      "cmpl-1",
+				Object:  "chat.completion",
+				Created: time.Now().Unix(),
+				Model:   req.Model,
+				Choices: []oai.ChatCompletionsResponseChoice{{
+					Index:        0,
+					FinishReason: "tool_calls",
+					Message: oai.Message{
+						Role: oai.RoleAssistant,
+						ToolCalls: []oai.ToolCall{
+							{ID: "1", Type: "function", Function: oai.ToolCallFunction{Name: "fs_write_file", Arguments: `{"path":"` + targetRel + `","contentBase64":"` + contentB64 + `"}`}},
+							{ID: "2", Type: "function", Function: oai.ToolCallFunction{Name: "fs_read_file", Arguments: `{"path":"` + targetRel + `"}`}},
+						},
+					},
+				}},
+			}
+			if err := json.NewEncoder(w).Encode(resp); err != nil {
+				t.Fatalf("encode step1: %v", err)
+			}
+		case 2:
+			// Verify that tool messages with matching ids were appended
+			sawToolIDs = map[string]bool{"1": false, "2": false}
+			for _, m := range req.Messages {
+				if m.Role == oai.RoleTool {
+					if _, ok := sawToolIDs[m.ToolCallID]; ok {
+						sawToolIDs[m.ToolCallID] = true
+					}
+					if strings.TrimSpace(m.Content) == "" {
+						t.Fatalf("tool message content empty for id %s", m.ToolCallID)
+					}
+				}
+			}
+			// Final assistant message
+			resp := oai.ChatCompletionsResponse{
+				ID:      "cmpl-2",
+				Object:  "chat.completion",
+				Created: time.Now().Unix(),
+				Model:   req.Model,
+				Choices: []oai.ChatCompletionsResponseChoice{{
+					Index:        0,
+					FinishReason: "stop",
+					Message:      oai.Message{Role: oai.RoleAssistant, Content: "ok"},
+				}},
+			}
+			if err := json.NewEncoder(w).Encode(resp); err != nil {
+				t.Fatalf("encode step2: %v", err)
+			}
+		default:
+			t.Fatalf("unexpected extra request step=%d", step)
+		}
+	}))
+	defer srv.Close()
+
+	// Run the agent binary with -debug to emit transcript-style dumps
+	var stdout, stderr bytes.Buffer
+	cmd := exec.Command(agentBin,
+		"-prompt", "write and read a file",
+		"-tools", manifestPath,
+		"-prep-tools-allow-external",
+		"-base-url", srv.URL,
+		"-model", "test",
+		"-max-steps", "4",
+		"-http-timeout", "5s",
+		"-tool-timeout", "5s",
+		"-debug",
+	)
+	cmd.Dir = tmp // so relative file path lands here
+	cmd.Env = append(os.Environ(), "CGO_ENABLED=0")
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatalf("agent run error: %v; stdout=%s stderr=%s", err, stdout.String(), stderr.String())
+	}
+
+	if got := strings.TrimSpace(stdout.String()); got != "ok" {
+		t.Fatalf("unexpected stdout: %q", got)
+	}
+	// Ensure both tool ids were seen by server in step 2
+	if sawToolIDs == nil || !sawToolIDs["1"] || !sawToolIDs["2"] {
+		t.Fatalf("server did not observe both tool messages: %+v", sawToolIDs)
+	}
+	// Transcript excerpts should be present in stderr
+	tr := stderr.String()
+	if !strings.Contains(tr, "--- chat.request step=1 ---") || !strings.Contains(tr, "\"temperature\": 1") {
+		t.Fatalf("missing debug request dump with temperature; stderr=\n%s", tr)
+	}
+	if !strings.Contains(tr, "--- chat.response step=1 ---") || !strings.Contains(tr, "tool_calls") {
+		t.Fatalf("missing debug response dump with tool_calls; stderr=\n%s", tr)
+	}
+	// Verify the file exists with expected content as a final sanity check
+	data, err := os.ReadFile(filepath.Join(tmp, targetRel))
+	if err != nil {
+		t.Fatalf("read created file: %v", err)
+	}
+	if string(data) != string(content) {
+		t.Fatalf("file content mismatch: got %q want %q", string(data), string(content))
+	}
+}
diff --git a/examples/unrestricted.md b/examples/unrestricted.md
new file mode 100644
index 0000000..6b34ce5
--- /dev/null
+++ b/examples/unrestricted.md
@@ -0,0 +1,37 @@
+# Unrestricted toolbelt examples
+
+Warning: These prompts exercise powerful tools (`exec`, file system). Enable only in a sandboxed environment. See README “Unrestricted tools warning”.
+
+## Prompt 1: Write, build, and run a tiny Go program
+Paste the following as your `-prompt` when running `agentcli` with the unrestricted tools enabled in `tools.json`.
+
+"""
+Create a new Go module under `play/hello` and a `main.go` that prints `hello from toolbelt`. Use these steps deterministically:
+- Use `fs_mkdirp` to create `play/hello`.
+- Use `fs_write_file` to write `play/hello/main.go` with a minimal Go program.
+- Use `exec` to run `go mod init example.com/hello` in `play/hello`.
+- Use `exec` to run `go build -o ../../bin/hello` from `play/hello`.
+- Use `exec` to run `../../bin/hello` and capture stdout.
+Return the final program stdout only.
+"""
+
+Suggested `main.go` content:
+```go
+package main
+import "fmt"
+func main(){ fmt.Println("hello from toolbelt") }
+```
+
+## Prompt 2: Edit a file and verify contents
+"""
+Create `scratch/note.txt` with the line `alpha`. Append a second line `beta`. Then read the file back and return its full contents. Steps:
+- `fs_write_file` to create `scratch/note.txt` with base64("alpha\n")
+- `fs_append_file` to append base64("beta\n")
+- `fs_read_file` to read back and decode the file
+Return the decoded text only.
+"""
+
+## Prompt 3: Move, overwrite, and delete
+"""
+Create `scratch/a.txt` with `A`. Move it to `scratch/b.txt`. Overwrite `scratch/b.txt` with `B` using `fs_move` and `overwrite:true`. Then remove the `scratch` directory recursively with `fs_rm` and confirm it is gone.
+"""
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..e920173
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,27 @@
+module github.com/hyperifyio/goagent
+
+go 1.24.6
+
+require (
+	github.com/dop251/goja v0.0.0-20250630131328-58d95d85e994
+	github.com/go-shiori/go-readability v0.0.0-20250217085726-9f5bf5ca7612
+)
+
+require (
+	github.com/dlclark/regexp2 v1.11.4 // indirect
+	github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
+	github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect
+)
+
+// pdf_extract will add ledongthuc/pdf when parser step is implemented
+
+require (
+	github.com/andybalholm/cascadia v1.3.3 // indirect
+	github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect
+	github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
+	github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
+	github.com/jung-kurt/gofpdf v1.16.2
+	github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728
+	golang.org/x/net v0.35.0 // indirect
+	golang.org/x/text v0.22.0 // indirect
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..4d9fdcb
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,119 @@
+github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0=
+github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
+github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
+github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
+github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA=
+github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw=
+github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
+github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/dop251/goja v0.0.0-20250630131328-58d95d85e994 h1:aQYWswi+hRL2zJqGacdCZx32XjKYV8ApXFGntw79XAM=
+github.com/dop251/goja v0.0.0-20250630131328-58d95d85e994/go.mod h1:MxLav0peU43GgvwVgNbLAj1s/bSGboKkhuULvq/7hx4=
+github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c h1:wpkoddUomPfHiOziHZixGO5ZBS73cKqVzZipfrLmO1w=
+github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c/go.mod h1:oVDCh3qjJMLVUSILBRwrm+Bc6RNXGZYtoh9xdvf1ffM=
+github.com/go-shiori/go-readability v0.0.0-20250217085726-9f5bf5ca7612 h1:BYLNYdZaepitbZreRIa9xeCQZocWmy/wj4cGIH0qyw0=
+github.com/go-shiori/go-readability v0.0.0-20250217085726-9f5bf5ca7612/go.mod h1:wgqthQa8SAYs0yyljVeCOQlZ027VW5CmLsbi9jWC08c=
+github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU=
+github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg=
+github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f h1:3BSP1Tbs2djlpprl7wCLuiqMaUh5SJkkzI2gDs+FgLs=
+github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/pprof v0.0.0-20230207041349-798e818bf904 h1:4/hN5RUoecvl+RmJRE2YxKWtnnQls6rQjjW5oV7qg2U=
+github.com/google/pprof v0.0.0-20230207041349-798e818bf904/go.mod h1:uglQLonpP8qtYCYyzA+8c/9qtqgA3qsXGYqCPKARAFg=
+github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
+github.com/jung-kurt/gofpdf v1.16.2 h1:jgbatWHfRlPYiK85qgevsZTHviWXKwB1TTiKdz5PtRc=
+github.com/jung-kurt/gofpdf v1.16.2/go.mod h1:1hl7y57EsiPAkLbOwzpzqgx1A30nQCk/YmFV8S2vmK0=
+github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728 h1:QwWKgMY28TAXaDl+ExRDqGQltzXqN/xypdKP86niVn8=
+github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728/go.mod h1:1fEHWurg7pvf5SG6XNE5Q8UZmOwex51Mkx3SLhrW5B4=
+github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
+github.com/phpdave11/gofpdi v1.0.7/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
+github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
+github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4/go.mod h1:C1a7PQSMz9NShzorzCiG2fk9+xuCgLkPeCvMHYR2OWg=
+github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
+github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
+golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
+golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
+golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
+golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
+golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
+golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
+golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/internal/ci/ci_workflow_test.go b/internal/ci/ci_workflow_test.go
new file mode 100644
index 0000000..8dc3208
--- /dev/null
+++ b/internal/ci/ci_workflow_test.go
@@ -0,0 +1,85 @@
+package ci
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+// This test asserts two things locally without requiring CI:
+// 1) Makefile lint recipe runs check-go-version before invoking golangci-lint
+// 2) The CI workflow includes an explicit step that verifies the ordering
+func TestLintOrderLocallyAndInWorkflow(t *testing.T) {
+	repoRoot, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+
+	// Assert Makefile ordering: check-go-version appears before golangci-lint
+	mkPath := filepath.Join(repoRoot, "..", "..", "Makefile")
+	mkBytes, err := os.ReadFile(mkPath)
+	if err != nil {
+		t.Fatalf("read Makefile: %v", err)
+	}
+	mk := string(mkBytes)
+	if !strings.Contains(mk, "lint:") {
+		t.Fatalf("Makefile missing 'lint:' target")
+	}
+	// Extract only the lint recipe block (the lines starting with a tab after 'lint:')
+	lines := strings.Split(mk, "\n")
+	lintIdx := -1
+	for i, ln := range lines {
+		if strings.HasPrefix(ln, "lint:") {
+			lintIdx = i
+			break
+		}
+	}
+	if lintIdx < 0 {
+		t.Fatalf("Makefile missing lint target label")
+	}
+	var recipeLines []string
+	for j := lintIdx + 1; j < len(lines); j++ {
+		ln := lines[j]
+		if strings.HasPrefix(ln, "\t") { // recipe lines start with a tab
+			recipeLines = append(recipeLines, ln)
+			continue
+		}
+		// Stop when we hit the next non-recipe line (new target or blank without tab)
+		if strings.TrimSpace(ln) == "" {
+			// allow empty recipe line with tab only
+			if strings.HasPrefix(ln, "\t") {
+				recipeLines = append(recipeLines, ln)
+				continue
+			}
+		}
+		// Not a recipe line: end of recipe
+		break
+	}
+	recipe := strings.Join(recipeLines, "\n")
+	idxCheck := strings.Index(recipe, "check-go-version")
+	if idxCheck < 0 {
+		t.Fatalf("lint recipe missing 'check-go-version' invocation")
+	}
+	idxGcl := strings.Index(recipe, "golangci-lint")
+	if idxGcl < 0 {
+		t.Fatalf("lint recipe missing 'golangci-lint' invocation")
+	}
+	if !(idxCheck < idxGcl) {
+		t.Fatalf("expected check-go-version to run before golangci-lint inside lint recipe (idx %d < %d)", idxCheck, idxGcl)
+	}
+
+	// Assert CI workflow includes the lint order assertion step
+	wfPath := filepath.Join(repoRoot, "..", "..", ".github", "workflows", "ci.yml")
+	wfBytes, err := os.ReadFile(wfPath)
+	if err != nil {
+		t.Fatalf("read ci workflow: %v", err)
+	}
+	wf := string(wfBytes)
+	if !strings.Contains(wf, "lint (includes check-go-version)") {
+		t.Fatalf("workflow missing explicit lint step name indicating check-go-version inclusion")
+	}
+	if !strings.Contains(wf, "Assert lint order (check-go-version before golangci-lint)") {
+		t.Fatalf("workflow missing order assertion step")
+	}
+}
diff --git a/internal/oai/assets/prep_default.md b/internal/oai/assets/prep_default.md
new file mode 100644
index 0000000..631db8a
--- /dev/null
+++ b/internal/oai/assets/prep_default.md
@@ -0,0 +1,279 @@
+# Smart Prep Prompt (Default)
+
+The goal of this pre-stage is to deterministically derive:
+
+- A concise but complete system prompt suitable for the main run.
+- Zero or more developer prompts to guide style and constraints.
+- Tool configuration hints, including image-generation guidance when applicable.
+- Optional image instructions for downstream image tools.
+
+Requirements:
+
+- Output MUST be Harmony messages JSON: an array of objects with optional `system`, zero-or-more `developer`, and optional `tool_config` and `image_instructions` fields.
+- Do not include `role:"tool"` entries and do not include tool calls in this stage.
+- Be explicit about safety, redaction of secrets, and source attribution.
+
+Guidelines:
+
+- Keep prompts minimal but sufficient. Avoid verbosity that wastes tokens.
+- Prefer declarative constraints to prescriptive long-form text.
+- If image generation is likely, include high-level image guidelines (style, quality, size) without locking to a provider-specific model.
+- Annotate any assumptions clearly.
+
+Steps:
+
+1. Read the user request and any provided context.
+2. Identify missing constraints and fill reasonable defaults.
+3. Propose the system prompt that sets behavior boundaries and goals.
+4. Provide optional developer prompts for formatting, tone, and structure.
+5. Provide optional `tool_config` hints describing which tools are likely useful and with which key parameters.
+6. Provide optional `image_instructions` when image generation is relevant.
+7. Return a single JSON array as the only output.
+
+Example minimal output (JSON):
+
+[
+  {
+    "system": "You are a helpful assistant. Prioritize correctness and cite sources when tools provide them."
+  },
+  {
+    "developer": "Return concise answers; use bullet lists when appropriate."
+  },
+  {
+    "tool_config": {
+      "enable_tools": ["searxng_search","http_fetch","readability_extract"],
+      "hints": {"http_fetch.max_bytes": 1048576}
+    }
+  },
+  {
+    "image_instructions": {
+      "style": "natural",
+      "quality": "standard",
+      "size": "1024x1024"
+    }
+  }
+]
+
+Extended guidance:
+
+- System prompt should set policy boundaries (no PII leakage, safety, determinism when possible).
+- Developer prompts can add formatting rules or domain-specific constraints.
+- Tool config hints should be suggestive; the main stage may override them.
+- Image instructions should avoid vendor lock-in and focus on intent.
+
+Notes:
+
+- Keep total token usage modest.
+- Ensure the JSON is syntactically valid.
+- Avoid embedding large text; link via citations instead.
+
+Reference checklists (expand as needed):
+
+- Inputs
+  - Describe the user's primary goal.
+  - List any constraints (time, budget, style).
+  - Identify missing details and reasonable defaults.
+- Outputs
+  - Specify required sections and data shapes.
+  - Define success criteria the main stage can verify.
+- Safety
+  - Avoid secrets; redact keys and tokens.
+  - Respect robots and site policies in research.
+- Tools
+  - Prefer read-then-summarize workflows.
+  - Cap network sizes and timeouts.
+
+Quality bar:
+
+- The pre-stage content must be sufficient for a non-interactive main call.
+- Prompts must be reproducible and stable across runs.
+- Avoid model-specific jargon unless required by capabilities.
+
+Frequently used tool hints:
+
+- `searxng_search`: prefer time range `month` for fresh content.
+- `http_fetch`: `max_bytes` 1 MiB; follow ≤ 5 redirects.
+- `readability_extract`: use for long-form articles; fall back to metadata.
+- `pdf_extract`: avoid OCR unless explicitly allowed.
+- `dedupe_rank`: group near-duplicates and pick representatives.
+- `citation_pack`: normalize and optionally archive.
+
+Image instructions template (when applicable):
+
+- `style`: one of `natural|vivid`.
+- `quality`: one of `standard|hd`.
+- `size`: `1024x1024` by default.
+- Keep content-safe, no sensitive data, no logos unless permitted.
+
+Example tasks to consider:
+
+- Research and summarize a topic with citations.
+- Extract key facts from a PDF and produce a table.
+- Generate step-by-step instructions for a procedure.
+- Propose image concepts for an article header.
+
+Do not output explanations outside the JSON array.
+
+---
+
+Additional elaboration (to ensure clarity and completeness):
+
+- The `system` entry sets guardrails: identity, goals, non-goals.
+- The `developer` entries add style and formatting constraints.
+- The `tool_config` entry outlines suggested tools; it is advisory.
+- The `image_instructions` entry captures defaults for image generation.
+- If any section is unnecessary, omit it rather than emitting placeholders.
+- Keep nouns concrete and avoid ambiguous verbs.
+- Prefer consistent terminology across entries.
+- Use American English unless the user specifies otherwise.
+- For code-related tasks, specify language version and formatting tools.
+- For data tasks, specify units, rounding, and acceptable error tolerances.
+- For summarization, specify target length and inclusion/exclusion rules.
+- For tables, specify column order, headers, and data types.
+- For lists, specify ordering (by relevance, date, alphabetical).
+- For citations, specify acceptable sources and minimum quality bar.
+- For timelines, specify granularity (day, week, month) and time zone.
+- For scheduling, specify preferred windows and constraints.
+- For APIs, specify rate limits and pagination strategies.
+- For retries, specify backoff policy and maximum attempts.
+- For timeouts, specify per-call limits and overall budget.
+- For errors, specify how to degrade gracefully.
+- For privacy, specify redaction policies and data retention.
+- For security, specify SSRF guard expectations and allowlists.
+- For compliance, specify any domain-specific regulations.
+- For logging, specify verbosity and structure.
+- For observability, specify required metrics and traces.
+- For testing, specify golden data and determinism.
+- For performance, specify budgets and memory limits.
+- For accessibility, specify alt text and contrast requirements.
+- For internationalization, specify locale handling and encoding.
+- For numerical work, specify precision and rounding rules.
+- For randomness, specify seeds and determinism guidelines.
+- For formatting, specify Markdown vs plain text expectations.
+- For output, specify whether to include code fences and languages.
+- For validation, specify schema and invariants.
+- For edge cases, enumerate and clarify behavior.
+- For fallbacks, specify secondary strategies.
+- For caching, specify keys and TTLs.
+- For state, specify persistence and scoping.
+- For cleanup, specify deletion policies and audit trails.
+- For versioning, specify compatibility and migration notes.
+- For feature flags, specify rollouts and defaults.
+- For user consent, specify prompts and storage.
+- For rate control, specify burst vs steady-state limits.
+- For concurrency, specify limits and contention strategies.
+- For resource usage, specify budgets per phase.
+- For tool failure, specify retries and substitutions.
+- For summaries, specify structure and key sections.
+- For diagrams, specify Mermaid types and layout.
+- For images, specify composition, subject, and constraints.
+- For typography, specify fonts and legibility.
+- For color, specify palette and accessibility.
+- For datasets, specify sources and licenses.
+- For provenance, specify how to capture and present.
+- For ethics, specify content boundaries.
+- For disclaimers, specify when to show them.
+- For conflict resolution, specify tie-break rules.
+- For prioritization, specify ordering heuristics.
+- For monitoring, specify alerts and thresholds.
+- For maintenance, specify ownership and runbooks.
+- For backups, specify cadence and retention.
+- For restorations, specify RTO/RPO targets.
+- For migrations, specify cutover and rollback.
+- For deprecations, specify policy and timelines.
+- For documentation, specify structure and examples.
+- For onboarding, specify quickstarts and references.
+- For support, specify channels and SLAs.
+- For community, specify contribution guidelines.
+- For licensing, specify terms and obligations.
+- For branding, specify usage and restrictions.
+- For analytics, specify metrics and privacy.
+- For experiments, specify hypotheses and success metrics.
+- For A/B tests, specify sampling and durations.
+- For reporting, specify cadence and format.
+- For audits, specify scope and evidence.
+- For secrets, specify storage and rotation.
+- For key management, specify roles and access.
+- For infra, specify regions and redundancy.
+- For cost, specify budgets and alerts.
+- For scaling, specify triggers and steps.
+- For queues, specify backpressure and dead-letter.
+- For schedulers, specify cron expressions and windows.
+- For notifications, specify channels and templates.
+- For emails, specify DKIM/SPF/DMARC setup.
+- For webhooks, specify retries and signatures.
+- For APIs, specify auth and scopes.
+- For clients, specify SDKs and versions.
+- For logs, specify retention and scrubbing.
+- For metrics, specify cardinality and costs.
+- For traces, specify spans and attributes.
+- For dashboards, specify panels and alerts.
+- For runbooks, specify steps and verifications.
+- For incident response, specify severities and comms.
+- For postmortems, specify blamelessness and actions.
+- For governance, specify reviews and approvals.
+- For change management, specify CAB and windows.
+- For risk, specify categories and mitigations.
+- For threats, specify models and assumptions.
+- For PKI, specify certs and rotation.
+- For SSO, specify providers and claims.
+- For RBAC, specify roles and scopes.
+- For data, specify classification and handling.
+- For retention, specify policies and exceptions.
+- For deletion, specify requests and SLAs.
+- For exports, specify formats and limits.
+- For imports, specify validations and partials.
+- For reconciliations, specify joins and conflicts.
+- For ETL, specify batch sizes and windows.
+- For streaming, specify topics and schemas.
+- For ML, specify datasets and drift monitoring.
+- For labeling, specify guidelines and QA.
+- For feature stores, specify TTLs and lineage.
+- For explainability, specify techniques and limits.
+- For feedback, specify channels and triage.
+- For abuse, specify detection and responses.
+- For rate abuse, specify throttling and bans.
+- For content, specify moderation and appeals.
+- For localization, specify languages and testing.
+- For telemetry, specify opt-in/out and docs.
+- For APIs, specify versioning and sunset.
+- For CI/CD, specify gates and rollbacks.
+- For tests, specify coverage and flakes.
+- For flaky tests, specify quarantine and limits.
+- For IDEs, specify settings and formatters.
+- For linters, specify rules and suppressions.
+- For code reviews, specify checklists and SLAs.
+- For merges, specify strategies and protections.
+- For releases, specify cadence and notes.
+- For packaging, specify artifacts and signatures.
+- For binaries, specify platforms and flags.
+- For containers, specify bases and scanning.
+- For SBOM, specify generation and storage.
+- For signatures, specify attestations and verifications.
+- For supply chain, specify provenance and pinning.
+- For dependencies, specify updates and vetting.
+- For forks, specify sync strategy.
+- For mirrors, specify freshness checks.
+- For archives, specify formats and access.
+- For APIs, specify limits and paginations.
+- For storage, specify classes and lifecycle.
+- For caches, specify invalidation and warming.
+- For CDN, specify keys and TTLs.
+- For edge, specify rules and fallbacks.
+- For mobile, specify platforms and SDKs.
+- For desktop, specify installers and updates.
+- For web, specify compatibility and polyfills.
+- For SEO, specify tags and sitemaps.
+- For robots, specify disallows and delays.
+- For sitemaps, specify generation cadence.
+- For backups, specify verification and drills.
+- For billing, specify models and proration.
+- For taxation, specify regions and rates.
+- For legal, specify terms and privacy.
+- For compliance, specify audits and controls.
+- For HR, specify onboarding and offboarding.
+- For training, specify materials and refreshers.
+- For knowledge base, specify structure and curation.
+- For search, specify indexing and ranking.
+- For UXR, specify studies and sampling.
+- For roadmaps, specify horizons and themes.
diff --git a/internal/oai/capabilities.go b/internal/oai/capabilities.go
new file mode 100644
index 0000000..7a340d8
--- /dev/null
+++ b/internal/oai/capabilities.go
@@ -0,0 +1,20 @@
+package oai
+
+import "strings"
+
+// SupportsTemperature reports whether the given model id accepts the
+// temperature parameter. Defaults to true for forward compatibility.
+// Known exceptions are listed explicitly below with brief rationale.
+func SupportsTemperature(modelID string) bool {
+	id := strings.ToLower(strings.TrimSpace(modelID))
+	if id == "" {
+		return true
+	}
+	// Known exceptions: OpenAI "o*" reasoning models ignore or reject sampling knobs.
+	// We treat these as not supporting temperature to avoid 400s and no-op params.
+	if strings.HasPrefix(id, "o3") || strings.HasPrefix(id, "o4") {
+		return false
+	}
+	// Otherwise allow by default (e.g., GPT-5 variants, oss-gpt-*).
+	return true
+}
diff --git a/internal/oai/capabilities_test.go b/internal/oai/capabilities_test.go
new file mode 100644
index 0000000..e9442f8
--- /dev/null
+++ b/internal/oai/capabilities_test.go
@@ -0,0 +1,27 @@
+package oai
+
+import "testing"
+
+// Table-driven tests for SupportsTemperature across true/false outcomes.
+func TestSupportsTemperature(t *testing.T) {
+	tests := []struct {
+		name   string
+		model  string
+		expect bool
+	}{
+		{name: "empty => default true", model: "", expect: true},
+		{name: "oss gpt variant => true", model: "oss-gpt-20b", expect: true},
+		{name: "gpt-5 family => true", model: "gpt-5.0-pro", expect: true},
+		{name: "o3 reasoning => false", model: "o3-mini", expect: false},
+		{name: "o4 reasoning => false", model: "o4-heavy", expect: false},
+		{name: "case-insensitive match", model: "O3-MINI", expect: false},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got := SupportsTemperature(tc.model)
+			if got != tc.expect {
+				t.Fatalf("SupportsTemperature(%q)=%v want %v", tc.model, got, tc.expect)
+			}
+		})
+	}
+}
diff --git a/internal/oai/client.go b/internal/oai/client.go
new file mode 100644
index 0000000..5ffc3d3
--- /dev/null
+++ b/internal/oai/client.go
@@ -0,0 +1,708 @@
+package oai
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"crypto/rand"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	mathrand "math/rand"
+	"net"
+	"net/http"
+	"net/http/httptrace"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+type Client struct {
+	baseURL    string
+	apiKey     string
+	httpClient *http.Client
+	retry      RetryPolicy
+}
+
+// audit context keys are unexported to avoid collisions. Use helper to set.
+type auditCtxKey string
+
+const (
+	auditCtxKeyStage auditCtxKey = "audit_stage"
+)
+
+// WithAuditStage returns a child context that carries an audit stage label
+// (e.g., "prep") that will be included in HTTP audit entries.
+func WithAuditStage(parent context.Context, stage string) context.Context {
+	stage = strings.TrimSpace(stage)
+	if stage == "" {
+		return parent
+	}
+	return context.WithValue(parent, auditCtxKeyStage, stage)
+}
+
+func auditStageFromContext(ctx context.Context) string {
+	if ctx == nil {
+		return ""
+	}
+	if v := ctx.Value(auditCtxKeyStage); v != nil {
+		if s, ok := v.(string); ok {
+			return s
+		}
+	}
+	return ""
+}
+
+// RetryPolicy controls HTTP retry behavior for transient failures.
+// MaxRetries specifies the number of retries after the initial attempt.
+// Backoff specifies the base delay between attempts; exponential backoff is applied.
+// JitterFraction specifies the +/- fractional jitter applied to each computed backoff.
+// When Rand is non-nil, it is used to sample jitter for deterministic tests.
+type RetryPolicy struct {
+	MaxRetries     int
+	Backoff        time.Duration
+	JitterFraction float64
+	Rand           *mathrand.Rand
+}
+
+// NewClient creates a client without retries (single attempt only).
+func NewClient(baseURL, apiKey string, timeout time.Duration) *Client {
+	trimmed := strings.TrimRight(baseURL, "/")
+	return &Client{
+		baseURL: trimmed,
+		apiKey:  apiKey,
+		httpClient: &http.Client{
+			Timeout: timeout,
+		},
+		retry: RetryPolicy{MaxRetries: 0, Backoff: 0},
+	}
+}
+
+// NewClientWithRetry creates a client with a retry policy for transient failures.
+func NewClientWithRetry(baseURL, apiKey string, timeout time.Duration, retry RetryPolicy) *Client {
+	if retry.MaxRetries < 0 {
+		retry.MaxRetries = 0
+	}
+	trimmed := strings.TrimRight(baseURL, "/")
+	return &Client{
+		baseURL: trimmed,
+		apiKey:  apiKey,
+		httpClient: &http.Client{
+			Timeout: timeout,
+		},
+		retry: retry,
+	}
+}
+
+// nolint:gocyclo // Orchestrates retries and timing; complexity acceptable and tested.
+func (c *Client) CreateChatCompletion(ctx context.Context, req ChatCompletionsRequest) (ChatCompletionsResponse, error) {
+	// Encoder guard: omit temperature entirely for models that do not support it.
+	// This complements higher-level callers which may or may not set the field.
+	if !SupportsTemperature(req.Model) {
+		req.Temperature = nil
+	}
+	var zero ChatCompletionsResponse
+	body, err := json.Marshal(req)
+	if err != nil {
+		return zero, fmt.Errorf("marshal request: %w", err)
+	}
+	endpoint := c.baseURL + "/chat/completions"
+	// Attempt loop with basic exponential backoff on transient failures.
+	attempts := c.retry.MaxRetries + 1
+	if attempts < 1 {
+		attempts = 1
+	}
+
+	var lastErr error
+	// Allow a single parameter-recovery retry without consuming the normal retry budget
+	recoveryGranted := false
+	// Emit a meta audit entry capturing observability fields derived from the request
+	emitChatMetaAudit(req)
+	// Generate a stable Idempotency-Key used across all attempts
+	idemKey := generateIdempotencyKey()
+	// Capture any stage label from context for audit enrichment
+	stage := auditStageFromContext(ctx)
+	for attempt := 0; attempt < attempts; attempt++ {
+		// Per-attempt timing capture using httptrace
+		attemptStart := time.Now()
+		var (
+			dnsStart, connStart  time.Time
+			dnsDur, connDur      time.Duration
+			wroteAt, firstByteAt time.Time
+		)
+		trace := &httptrace.ClientTrace{
+			DNSStart: func(info httptrace.DNSStartInfo) { dnsStart = time.Now() },
+			DNSDone: func(info httptrace.DNSDoneInfo) {
+				if !dnsStart.IsZero() {
+					dnsDur += time.Since(dnsStart)
+				}
+			},
+			ConnectStart: func(network, addr string) { connStart = time.Now() },
+			ConnectDone: func(network, addr string, err error) {
+				if !connStart.IsZero() {
+					connDur += time.Since(connStart)
+				}
+			},
+			GotConn:              func(info httptrace.GotConnInfo) {},
+			WroteRequest:         func(info httptrace.WroteRequestInfo) { wroteAt = time.Now() },
+			GotFirstResponseByte: func() { firstByteAt = time.Now() },
+		}
+		// Fallback for TLS duration using httptrace hooks available: emulate by measuring from TLSHandshakeStart/Done via GotConn workaround.
+		// Since httptrace.TLSHandshakeDone requires crypto/tls type, replicate using any to avoid import on older Go.
+		// Note: we will compute tlsDur as zero unless supported; acceptable for audit purposes.
+
+		httpReq, nerr := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(body))
+		if nerr != nil {
+			return zero, fmt.Errorf("new request: %w", nerr)
+		}
+		httpReq.Header.Set("Content-Type", "application/json")
+		if c.apiKey != "" {
+			httpReq.Header.Set("Authorization", "Bearer "+c.apiKey)
+		}
+		httpReq.Header.Set("Idempotency-Key", idemKey)
+		httpReq = httpReq.WithContext(httptrace.WithClientTrace(httpReq.Context(), trace))
+
+		resp, derr := c.httpClient.Do(httpReq)
+		if derr != nil {
+			lastErr = derr
+			// Log attempt with error
+			logHTTPAttempt(stage, idemKey, attempt+1, attempts, 0, 0, endpoint, derr.Error())
+			// Emit timing audit for error case
+			logHTTPTiming(stage, idemKey, attempt+1, endpoint, 0, attemptStart, dnsDur, connDur, 0, wroteAt, firstByteAt, time.Now(), classifyHTTPCause(ctx, derr), userHintForCause(ctx, derr))
+			if attempt < attempts-1 && isRetryableError(derr) {
+				// compute backoff (with jitter) for audit then sleep
+				back := backoffWithJitter(c.retry.Backoff, attempt, c.retry.JitterFraction, c.retry.Rand)
+				logHTTPAttempt(stage, idemKey, attempt+1, attempts, 0, back.Milliseconds(), endpoint, derr.Error())
+				sleepFunc(back)
+				continue
+			}
+			// Upgrade error with base URL, configured timeout, and actionable hint
+			hint := userHintForCause(ctx, derr)
+			// c.httpClient.Timeout reflects configured HTTP timeout
+			tmo := c.httpClient.Timeout
+			if hint != "" {
+				return zero, fmt.Errorf("chat POST failed: %v (base=%s, http-timeout=%s). Hint: %s", derr, c.baseURL, tmo, hint)
+			}
+			return zero, fmt.Errorf("chat POST failed: %v (base=%s, http-timeout=%s)", derr, c.baseURL, tmo)
+		}
+
+		// When streaming is requested, the server should respond with SSE. We do not
+		// support streaming in this method. Return 400 guidance to call StreamChat.
+		if req.Stream {
+			// Streaming is not supported in this method; close body and return guidance.
+			_ = resp.Body.Close() //nolint:errcheck // best-effort close
+			return zero, fmt.Errorf("stream=true not supported in CreateChatCompletion; use StreamChat")
+		}
+		respBody, readErr := io.ReadAll(resp.Body)
+		if cerr := resp.Body.Close(); cerr != nil {
+			// best-effort: record close error as lastErr if none
+			if lastErr == nil {
+				lastErr = cerr
+			}
+		}
+		if readErr != nil {
+			lastErr = readErr
+			// Log attempt with read error
+			logHTTPAttempt(stage, idemKey, attempt+1, attempts, resp.StatusCode, 0, endpoint, readErr.Error())
+			// Emit timing audit including read duration up to error
+			logHTTPTiming(stage, idemKey, attempt+1, endpoint, resp.StatusCode, attemptStart, dnsDur, connDur, 0, wroteAt, firstByteAt, time.Now(), classifyHTTPCause(ctx, readErr), userHintForCause(ctx, readErr))
+			if attempt < attempts-1 && isRetryableError(readErr) {
+				back := backoffWithJitter(c.retry.Backoff, attempt, c.retry.JitterFraction, c.retry.Rand)
+				logHTTPAttempt(stage, idemKey, attempt+1, attempts, resp.StatusCode, back.Milliseconds(), endpoint, readErr.Error())
+				sleepFunc(back)
+				continue
+			}
+			return zero, fmt.Errorf("read response body: %w", readErr)
+		}
+		if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+			// Parameter-recovery: if 400 mentions invalid/unsupported temperature and
+			// the request included temperature, remove it and retry once immediately.
+			if resp.StatusCode == http.StatusBadRequest {
+				// Capture body string for inspection and logs
+				bodyStr := string(respBody)
+				if !recoveryGranted && includesTemperature(req) && mentionsUnsupportedTemperature(bodyStr) {
+					// Log recovery attempt with a structured audit entry
+					logHTTPAttempt(stage, idemKey, attempt+1, attempts, resp.StatusCode, 0, endpoint, "param_recovery: temperature")
+					// Clear temperature and re-marshal request for a one-time recovery retry
+					req.Temperature = nil
+					nb, merr := json.Marshal(req)
+					if merr == nil {
+						body = nb
+						// Grant exactly one extra attempt for recovery
+						recoveryGranted = true
+						attempts++
+						// Emit timing audit for the failed attempt before retrying
+						logHTTPTiming(stage, idemKey, attempt+1, endpoint, resp.StatusCode, attemptStart, dnsDur, connDur, 0, wroteAt, firstByteAt, time.Now(), "http_status", "param_recovery_temperature")
+						// Perform immediate recovery retry without consuming a normal retry slot
+						continue
+					}
+					// If marshal fails, fall through to normal error handling
+				}
+			}
+			// Retry on 429 and 5xx; otherwise return immediately
+			if attempt < attempts-1 && (resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode >= 500) {
+				// Respect Retry-After when present; otherwise use exponential backoff
+				if ra, ok := retryAfterDuration(resp.Header.Get("Retry-After"), time.Now()); ok {
+					// Log with Retry-After derived backoff
+					logHTTPAttempt(stage, idemKey, attempt+1, attempts, resp.StatusCode, ra.Milliseconds(), endpoint, "")
+					sleepFunc(ra)
+				} else {
+					back := backoffWithJitter(c.retry.Backoff, attempt, c.retry.JitterFraction, c.retry.Rand)
+					logHTTPAttempt(stage, idemKey, attempt+1, attempts, resp.StatusCode, back.Milliseconds(), endpoint, "")
+					sleepFunc(back)
+				}
+				// Emit timing audit for non-2xx attempt
+				logHTTPTiming(stage, idemKey, attempt+1, endpoint, resp.StatusCode, attemptStart, dnsDur, connDur, 0, wroteAt, firstByteAt, time.Now(), "http_status", "")
+				continue
+			}
+			// Final non-retryable failure: log attempt (no backoff) and return
+			logHTTPAttempt(stage, idemKey, attempt+1, attempts, resp.StatusCode, 0, endpoint, truncate(string(respBody), 2000))
+			logHTTPTiming(stage, idemKey, attempt+1, endpoint, resp.StatusCode, attemptStart, dnsDur, connDur, 0, wroteAt, firstByteAt, time.Now(), "http_status", "")
+			return zero, fmt.Errorf("chat API %s: %d: %s", endpoint, resp.StatusCode, truncate(string(respBody), 2000))
+		}
+		if err := json.Unmarshal(respBody, &zero); err != nil {
+			return ChatCompletionsResponse{}, fmt.Errorf("decode response: %w; body: %s", err, truncate(string(respBody), 1000))
+		}
+		// Success: log attempt with status and no backoff
+		logHTTPAttempt(stage, idemKey, attempt+1, attempts, resp.StatusCode, 0, endpoint, "")
+		logHTTPTiming(stage, idemKey, attempt+1, endpoint, resp.StatusCode, attemptStart, dnsDur, connDur, 0, wroteAt, firstByteAt, time.Now(), "success", "")
+		return zero, nil
+	}
+	if lastErr != nil {
+		return zero, lastErr
+	}
+	return zero, fmt.Errorf("chat request failed without a specific error")
+}
+
+// StreamChat performs a streaming chat completion request (SSE) and delivers
+// parsed chunks to the provided callback as they arrive. The callback should be
+// fast and non-blocking. The function returns when the stream completes or an
+// error occurs. Retries are not applied in streaming mode.
+func (c *Client) StreamChat(ctx context.Context, req ChatCompletionsRequest, onChunk func(StreamChunk) error) error {
+	// Encoder guard: omit temperature when unsupported
+	if !SupportsTemperature(req.Model) {
+		req.Temperature = nil
+	}
+	req.Stream = true
+	body, err := json.Marshal(req)
+	if err != nil {
+		return fmt.Errorf("marshal request: %w", err)
+	}
+	endpoint := c.baseURL + "/chat/completions"
+	httpReq, nerr := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(body))
+	if nerr != nil {
+		return fmt.Errorf("new request: %w", nerr)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	if c.apiKey != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+c.apiKey)
+	}
+	// Idempotency not relevant for streaming; still set for consistency
+	httpReq.Header.Set("Idempotency-Key", generateIdempotencyKey())
+
+	resp, derr := c.httpClient.Do(httpReq)
+	if derr != nil {
+		return derr
+	}
+	defer func() { _ = resp.Body.Close() }() //nolint:errcheck // best-effort close
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		b, rerr := io.ReadAll(resp.Body)
+		if rerr != nil {
+			return fmt.Errorf("chat API %s: %d: <read error>", endpoint, resp.StatusCode)
+		}
+		return fmt.Errorf("chat API %s: %d: %s", endpoint, resp.StatusCode, truncate(string(b), 2000))
+	}
+	// Require SSE content type for streaming
+	ct := strings.ToLower(strings.TrimSpace(resp.Header.Get("Content-Type")))
+	if !strings.Contains(ct, "text/event-stream") {
+		// Not a streaming response; signal caller to fallback
+		_, _ = io.ReadAll(resp.Body) //nolint:errcheck // ignore read error; fallback remains informative
+		return fmt.Errorf("server does not support streaming (content-type=%q)", ct)
+	}
+	// Simple SSE parser: read lines; handle "data: ..." and [DONE]
+	dec := newLineReader(resp.Body)
+	for {
+		line, err := dec()
+		if err != nil {
+			if errors.Is(err, io.EOF) {
+				return nil
+			}
+			return fmt.Errorf("stream read: %w", err)
+		}
+		s := strings.TrimSpace(line)
+		if s == "" {
+			continue
+		}
+		if strings.HasPrefix(s, "data:") {
+			payload := strings.TrimSpace(strings.TrimPrefix(s, "data:"))
+			if payload == "[DONE]" {
+				return nil
+			}
+			var chunk StreamChunk
+			if jerr := json.Unmarshal([]byte(payload), &chunk); jerr != nil {
+				// Skip malformed chunk
+				continue
+			}
+			if onChunk != nil {
+				if err := onChunk(chunk); err != nil {
+					return err
+				}
+			}
+		}
+	}
+}
+
+// newLineReader returns a closure that reads one line (terminated by \n) from r each call.
+func newLineReader(r io.Reader) func() (string, error) {
+	br := bufio.NewReader(r)
+	return func() (string, error) {
+		b, err := br.ReadBytes('\n')
+		if err != nil {
+			return "", err
+		}
+		return string(b), nil
+	}
+}
+
+func truncate(s string, n int) string {
+	if len(s) <= n {
+		return s
+	}
+	return s[:n]
+}
+
+// isRetryableError returns true for transient network/timeouts.
+func isRetryableError(err error) bool {
+	if err == nil {
+		return false
+	}
+	// Context deadline exceeded from client timeout
+	if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
+		return true
+	}
+	var ne net.Error
+	if errors.As(err, &ne) {
+		if ne.Timeout() { // ne.Temporary is deprecated; avoid
+			return true
+		}
+	}
+	// *url.Error often wraps retryable errors; fall back to string contains of "timeout"
+	s := strings.ToLower(err.Error())
+	return strings.Contains(s, "timeout")
+}
+
+// sleepBackoff retained for backward compatibility; not used.
+// Deprecated: use backoffDuration + sleepFor instead.
+// func sleepBackoff(base time.Duration, attempt int) { time.Sleep(backoffDuration(base, attempt)) }
+
+// backoffDuration returns the duration that sleepBackoff would sleep for a given attempt.
+func backoffDuration(base time.Duration, attempt int) time.Duration {
+	if base <= 0 {
+		base = 200 * time.Millisecond
+	}
+	d := base << attempt
+	if d > 2*time.Second {
+		d = 2 * time.Second
+	}
+	return d
+}
+
+// backoffWithJitter returns an exponential backoff adjusted by +/- jitter fraction.
+// When jitterFraction <= 0, this falls back to backoffDuration. When r is nil,
+// a time-seeded RNG is used for production randomness.
+func backoffWithJitter(base time.Duration, attempt int, jitterFraction float64, r *mathrand.Rand) time.Duration {
+	d := backoffDuration(base, attempt)
+	if jitterFraction <= 0 {
+		return d
+	}
+	if jitterFraction > 0.9 { // prevent extreme factors
+		jitterFraction = 0.9
+	}
+	if r == nil {
+		// Seed with current time for production; tests can pass a custom Rand
+		r = mathrand.New(mathrand.NewSource(time.Now().UnixNano()))
+	}
+	// factor in [1 - f, 1 + f]
+	minF := 1.0 - jitterFraction
+	maxF := 1.0 + jitterFraction
+	factor := minF + r.Float64()*(maxF-minF)
+	// Guard against rounding to zero
+	jittered := time.Duration(float64(d) * factor)
+	if jittered < time.Millisecond {
+		return time.Millisecond
+	}
+	return jittered
+}
+
+// retryAfterDuration parses the Retry-After header which may be seconds or HTTP-date.
+// Returns (duration, true) when valid; otherwise (0, false).
+func retryAfterDuration(h string, now time.Time) (time.Duration, bool) {
+	h = strings.TrimSpace(h)
+	if h == "" {
+		return 0, false
+	}
+	// Try integer seconds first
+	if secs, err := time.ParseDuration(h + "s"); err == nil {
+		if secs > 0 {
+			return secs, true
+		}
+	}
+	// Try HTTP-date formats per RFC 7231 (use http.TimeFormat)
+	if t, err := time.Parse(http.TimeFormat, h); err == nil {
+		if t.After(now) {
+			return t.Sub(now), true
+		}
+	}
+	return 0, false
+}
+
+// sleepFor sleeps for the provided duration; extracted for testability.
+// sleepFunc allows tests to intercept sleeps deterministically.
+var sleepFunc = sleepFor
+
+func sleepFor(d time.Duration) {
+	if d <= 0 {
+		return
+	}
+	time.Sleep(d)
+}
+
+// generateIdempotencyKey returns a random hex string suitable for Idempotency-Key.
+func generateIdempotencyKey() string {
+	var b [16]byte
+	if _, err := rand.Read(b[:]); err != nil {
+		// Fallback to timestamp-based key if crypto/rand fails; extremely unlikely
+		return fmt.Sprintf("goagent-%d", time.Now().UnixNano())
+	}
+	return "goagent-" + hex.EncodeToString(b[:])
+}
+
+// logHTTPAttempt appends an NDJSON line describing an HTTP attempt and planned backoff.
+func logHTTPAttempt(stage, idemKey string, attempt, maxAttempts, status int, backoffMs int64, endpoint, errStr string) {
+	type audit struct {
+		TS             string `json:"ts"`
+		Event          string `json:"event"`
+		Stage          string `json:"stage,omitempty"`
+		IdempotencyKey string `json:"idempotency_key,omitempty"`
+		Attempt        int    `json:"attempt"`
+		Max            int    `json:"max"`
+		Status         int    `json:"status"`
+		BackoffMs      int64  `json:"backoffMs"`
+		Endpoint       string `json:"endpoint"`
+		Error          string `json:"error,omitempty"`
+	}
+	entry := audit{
+		TS:             time.Now().UTC().Format(time.RFC3339Nano),
+		Event:          "http_attempt",
+		Stage:          stage,
+		IdempotencyKey: idemKey,
+		Attempt:        attempt,
+		Max:            maxAttempts,
+		Status:         status,
+		BackoffMs:      backoffMs,
+		Endpoint:       endpoint,
+		Error:          truncate(errStr, 500),
+	}
+	if err := appendAuditLog(entry); err != nil {
+		_ = err
+	}
+}
+
+// logHTTPTiming appends detailed HTTP timing metrics to the audit log.
+func logHTTPTiming(stage, idemKey string, attempt int, endpoint string, status int, start time.Time, dnsDur, connDur, tlsDur time.Duration, wroteAt, firstByteAt, end time.Time, cause, hint string) {
+	type timing struct {
+		TS             string `json:"ts"`
+		Event          string `json:"event"`
+		Stage          string `json:"stage,omitempty"`
+		IdempotencyKey string `json:"idempotency_key,omitempty"`
+		Attempt        int    `json:"attempt"`
+		Endpoint       string `json:"endpoint"`
+		Status         int    `json:"status"`
+		DNSMs          int64  `json:"dnsMs"`
+		ConnectMs      int64  `json:"connectMs"`
+		TLSMs          int64  `json:"tlsMs"`
+		WroteMs        int64  `json:"wroteMs"`
+		TTFBMs         int64  `json:"ttfbMs"`
+		ReadMs         int64  `json:"readMs"`
+		TotalMs        int64  `json:"totalMs"`
+		Cause          string `json:"cause"`
+		Hint           string `json:"hint,omitempty"`
+	}
+	var wroteMs, ttfbMs, readMs int64
+	if !wroteAt.IsZero() {
+		wroteMs = wroteAt.Sub(start).Milliseconds()
+	}
+	if !firstByteAt.IsZero() {
+		if !wroteAt.IsZero() && firstByteAt.After(wroteAt) {
+			ttfbMs = firstByteAt.Sub(wroteAt).Milliseconds()
+		} else {
+			ttfbMs = firstByteAt.Sub(start).Milliseconds()
+		}
+		if end.After(firstByteAt) {
+			readMs = end.Sub(firstByteAt).Milliseconds()
+		}
+	}
+	entry := timing{
+		TS:             time.Now().UTC().Format(time.RFC3339Nano),
+		Event:          "http_timing",
+		Stage:          stage,
+		IdempotencyKey: idemKey,
+		Attempt:        attempt,
+		Endpoint:       endpoint,
+		Status:         status,
+		DNSMs:          dnsDur.Milliseconds(),
+		ConnectMs:      connDur.Milliseconds(),
+		TLSMs:          tlsDur.Milliseconds(),
+		WroteMs:        wroteMs,
+		TTFBMs:         ttfbMs,
+		ReadMs:         readMs,
+		TotalMs:        end.Sub(start).Milliseconds(),
+		Cause:          cause,
+		Hint:           hint,
+	}
+	if err := appendAuditLog(entry); err != nil {
+		_ = err
+	}
+}
+
+// LogLengthBackoff emits a structured NDJSON audit entry describing a
+// length_backoff event triggered by finish_reason=="length". Callers should
+// pass the model identifier, the previous and new completion caps, the
+// effective model context window, and the estimated prompt token count.
+func LogLengthBackoff(model string, prevCap, newCap, window, estimatedPromptTokens int) {
+	type audit struct {
+		TS                    string `json:"ts"`
+		Event                 string `json:"event"`
+		Model                 string `json:"model"`
+		PrevCap               int    `json:"prev_cap"`
+		NewCap                int    `json:"new_cap"`
+		Window                int    `json:"window"`
+		EstimatedPromptTokens int    `json:"estimated_prompt_tokens"`
+	}
+	entry := audit{
+		TS:                    time.Now().UTC().Format(time.RFC3339Nano),
+		Event:                 "length_backoff",
+		Model:                 model,
+		PrevCap:               prevCap,
+		NewCap:                newCap,
+		Window:                window,
+		EstimatedPromptTokens: estimatedPromptTokens,
+	}
+	if err := appendAuditLog(entry); err != nil {
+		_ = err
+	}
+}
+
+// emitChatMetaAudit writes a one-line NDJSON entry describing request-level
+// observability fields such as the effective temperature and whether the
+// temperature parameter is included in the payload for the target model.
+func emitChatMetaAudit(req ChatCompletionsRequest) {
+	// Compute effective temperature based on model support and clamp rules.
+	effectiveTemp, supported := EffectiveTemperatureForModel(req.Model, valueOrDefault(req.Temperature, 1.0))
+	type meta struct {
+		TS                   string  `json:"ts"`
+		Event                string  `json:"event"`
+		Model                string  `json:"model"`
+		TemperatureEffective float64 `json:"temperature_effective"`
+		TemperatureInPayload bool    `json:"temperature_in_payload"`
+	}
+	entry := meta{
+		TS:                   time.Now().UTC().Format(time.RFC3339Nano),
+		Event:                "chat_meta",
+		Model:                req.Model,
+		TemperatureEffective: effectiveTemp,
+		TemperatureInPayload: supported && req.Temperature != nil,
+	}
+	if err := appendAuditLog(entry); err != nil {
+		_ = err
+	}
+}
+
+func valueOrDefault(ptr *float64, def float64) float64 {
+	if ptr == nil {
+		return def
+	}
+	return *ptr
+}
+
+// classifyHTTPCause returns a short cause label for audit based on error/context.
+func classifyHTTPCause(ctx context.Context, err error) string {
+	if err == nil {
+		return "success"
+	}
+	if errors.Is(err, context.DeadlineExceeded) || (ctx != nil && ctx.Err() == context.DeadlineExceeded) {
+		return "context_deadline"
+	}
+	s := strings.ToLower(err.Error())
+	switch {
+	case strings.Contains(s, "server closed") || strings.Contains(s, "connection reset") || strings.Contains(s, "broken pipe"):
+		return "server_closed"
+	case strings.Contains(s, "timeout"):
+		return "timeout"
+	default:
+		return "error"
+	}
+}
+
+// userHintForCause returns a short actionable hint for common failure causes.
+func userHintForCause(ctx context.Context, err error) string {
+	if err == nil {
+		return ""
+	}
+	if errors.Is(err, context.DeadlineExceeded) || (ctx != nil && ctx.Err() == context.DeadlineExceeded) || strings.Contains(strings.ToLower(err.Error()), "timeout") {
+		return "increase -http-timeout or reduce prompt/model latency"
+	}
+	return ""
+}
+
+// appendAuditLog writes an NDJSON audit line to .goagent/audit/YYYYMMDD.log (same location used by tool runner).
+func appendAuditLog(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if cerr := f.Close(); cerr != nil {
+			_ = cerr
+		}
+	}()
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from the current working directory to locate the directory
+// containing go.mod. If none is found, it returns the current working directory.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			// Reached filesystem root; fallback to original cwd
+			return cwd
+		}
+		dir = parent
+	}
+}
diff --git a/internal/oai/client_test.go b/internal/oai/client_test.go
new file mode 100644
index 0000000..fd52270
--- /dev/null
+++ b/internal/oai/client_test.go
@@ -0,0 +1,478 @@
+//nolint:errcheck // In tests, many helper writes/encodes ignore errors intentionally; functional behavior is asserted elsewhere.
+package oai
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	mathrand "math/rand"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+// https://github.com/hyperifyio/goagent/issues/1
+func TestCreateChatCompletion_Success(t *testing.T) {
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost || r.URL.Path != "/chat/completions" {
+			t.Fatalf("unexpected request: %s %s", r.Method, r.URL.Path)
+		}
+		var req ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("bad json: %v", err)
+		}
+		resp := ChatCompletionsResponse{
+			ID:      "cmpl-1",
+			Object:  "chat.completion",
+			Created: time.Now().Unix(),
+			Model:   req.Model,
+			Choices: []ChatCompletionsResponseChoice{{
+				Index:        0,
+				FinishReason: "stop",
+				Message:      Message{Role: RoleAssistant, Content: "hello"},
+			}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			panic(err)
+		}
+	}))
+	defer ts.Close()
+
+	c := NewClient(ts.URL, "", 2*time.Second)
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	out, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "test", Messages: []Message{{Role: RoleUser, Content: "hi"}}})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(out.Choices) != 1 || out.Choices[0].Message.Content != "hello" {
+		t.Fatalf("unexpected response: %+v", out)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/1
+func TestCreateChatCompletion_HTTPError(t *testing.T) {
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadRequest)
+		if _, err := w.Write([]byte(`{"error":"bad request"}`)); err != nil {
+			panic(err)
+		}
+	}))
+	defer ts.Close()
+
+	c := NewClient(ts.URL, "", 2*time.Second)
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	_, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "x", Messages: []Message{}})
+	if err == nil {
+		t.Fatalf("expected error")
+	}
+	if got := err.Error(); !strings.Contains(got, "400") || !strings.Contains(got, "bad request") {
+		t.Fatalf("expected status code and body in error, got: %v", got)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/1
+// Ensure encoder omits temperature when SupportsTemperature == false and includes when true.
+func TestCreateChatCompletion_TemperatureOmissionAndInclusion(t *testing.T) {
+	t.Run("OmitWhenUnsupported", func(t *testing.T) {
+		// Model id that does not support temperature per capabilities
+		req := ChatCompletionsRequest{Model: "o3-mini", Messages: []Message{{Role: RoleUser, Content: "x"}}}
+		b, err := json.Marshal(req)
+		if err != nil {
+			t.Fatalf("marshal: %v", err)
+		}
+		got := string(b)
+		if strings.Contains(got, "temperature") {
+			t.Fatalf("expected no temperature field, got: %s", got)
+		}
+	})
+
+	t.Run("IncludeWhenSupported", func(t *testing.T) {
+		temp := 0.7
+		req := ChatCompletionsRequest{Model: "oss-gpt-20b", Messages: []Message{{Role: RoleUser, Content: "x"}}, Temperature: &temp}
+		b, err := json.Marshal(req)
+		if err != nil {
+			t.Fatalf("marshal: %v", err)
+		}
+		got := string(b)
+		if !strings.Contains(got, "\"temperature\":0.7") {
+			t.Fatalf("expected temperature field, got: %s", got)
+		}
+	})
+}
+
+// Ensure client strips temperature for unsupported models right before HTTP.
+func TestCreateChatCompletion_TemperatureStrippedWhenUnsupported(t *testing.T) {
+	// Spin up a server that captures incoming request JSON
+	var seenTemp *float64
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost || r.URL.Path != "/chat/completions" {
+			t.Fatalf("unexpected request: %s %s", r.Method, r.URL.Path)
+		}
+		var req ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		seenTemp = req.Temperature
+		// Respond with minimal valid JSON
+		resp := ChatCompletionsResponse{Choices: []ChatCompletionsResponseChoice{{Message: Message{Role: RoleAssistant, Content: "ok"}}}}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	c := NewClientWithRetry(srv.URL, "", 2*time.Second, RetryPolicy{MaxRetries: 0})
+
+	// Case: unsupported model with temperature set -> should be stripped
+	temp := 0.9
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
+	defer cancel()
+	_, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "o3-mini", Messages: []Message{{Role: RoleUser, Content: "x"}}, Temperature: &temp})
+	if err != nil {
+		t.Fatalf("call: %v", err)
+	}
+	if seenTemp != nil {
+		t.Fatalf("expected temperature to be omitted; got %v", *seenTemp)
+	}
+}
+
+// Ensure client preserves temperature for supported models.
+func TestCreateChatCompletion_TemperaturePreservedWhenSupported(t *testing.T) {
+	var seenTemp *float64
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		seenTemp = req.Temperature
+		resp := ChatCompletionsResponse{Choices: []ChatCompletionsResponseChoice{{Message: Message{Role: RoleAssistant, Content: "ok"}}}}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	c := NewClientWithRetry(srv.URL, "", 2*time.Second, RetryPolicy{MaxRetries: 0})
+	temp := 0.7
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
+	defer cancel()
+	_, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "oss-gpt-20b", Messages: []Message{{Role: RoleUser, Content: "x"}}, Temperature: &temp})
+	if err != nil {
+		t.Fatalf("call: %v", err)
+	}
+	if seenTemp == nil || *seenTemp != 0.7 {
+		if seenTemp == nil {
+			t.Fatalf("expected temperature to be present")
+		}
+		t.Fatalf("expected temperature 0.7, got %v", *seenTemp)
+	}
+}
+
+// Parameter-recovery retry: when the server responds 400 mentioning invalid/unsupported
+// temperature, the client should remove temperature and retry once before any normal retries.
+func TestCreateChatCompletion_ParameterRecovery_InvalidTemperature(t *testing.T) {
+	attempts := 0
+	var firstReqHadTemp bool
+	var secondReqHadTemp bool
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		attempts++
+		var req ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		if attempts == 1 {
+			firstReqHadTemp = req.Temperature != nil
+			// Simulate OpenAI-style 400 error indicating unsupported temperature
+			w.WriteHeader(http.StatusBadRequest)
+			if _, err := w.Write([]byte(`{"error":{"message":"parameter 'temperature' is unsupported for this model"}}`)); err != nil {
+				t.Fatalf("write: %v", err)
+			}
+			return
+		}
+		secondReqHadTemp = req.Temperature != nil
+		// On retry, succeed
+		resp := ChatCompletionsResponse{Choices: []ChatCompletionsResponseChoice{{Message: Message{Role: RoleAssistant, Content: "ok"}}}}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	// No normal retries; parameter-recovery should still allow exactly one retry
+	c := NewClientWithRetry(srv.URL, "", 2*time.Second, RetryPolicy{MaxRetries: 0})
+	temp := 0.5
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	out, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "oss-gpt-20b", Messages: []Message{{Role: RoleUser, Content: "x"}}, Temperature: &temp})
+	if err != nil {
+		t.Fatalf("call: %v", err)
+	}
+	if out.Choices[0].Message.Content != "ok" {
+		t.Fatalf("unexpected content: %+v", out)
+	}
+	if attempts != 2 {
+		t.Fatalf("expected 2 attempts (1st 400, 2nd success), got %d", attempts)
+	}
+	if !firstReqHadTemp {
+		t.Fatalf("expected temperature set on first request")
+	}
+	if secondReqHadTemp {
+		t.Fatalf("expected temperature to be removed on retry after 400")
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/216
+func TestCreateChatCompletion_RetryTimeoutThenSuccess(t *testing.T) {
+	attempts := 0
+	var firstIdem string
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		attempts++
+		// Assert Idempotency-Key header is present and stable across attempts
+		idem := r.Header.Get("Idempotency-Key")
+		if idem == "" {
+			t.Fatalf("missing Idempotency-Key header")
+		}
+		if firstIdem == "" {
+			firstIdem = idem
+		} else if firstIdem != idem {
+			t.Fatalf("Idempotency-Key changed across retries: %q != %q", firstIdem, idem)
+		}
+		if attempts == 1 {
+			// Simulate a slow server to trigger client timeout
+			time.Sleep(500 * time.Millisecond)
+		}
+		var req ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("bad json: %v", err)
+		}
+		resp := ChatCompletionsResponse{
+			ID:      "cmpl-1",
+			Object:  "chat.completion",
+			Created: time.Now().Unix(),
+			Model:   req.Model,
+			Choices: []ChatCompletionsResponseChoice{{Index: 0, FinishReason: "stop", Message: Message{Role: RoleAssistant, Content: "ok"}}},
+		}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			panic(err)
+		}
+	}))
+	defer ts.Close()
+
+	// Small HTTP timeout to trigger quickly; allow 1 retry
+	c := NewClientWithRetry(ts.URL, "", 200*time.Millisecond, RetryPolicy{MaxRetries: 1, Backoff: 1 * time.Millisecond})
+	// Context slightly larger than two attempts to avoid overall ctx deadline
+	ctx, cancel := context.WithTimeout(context.Background(), 1500*time.Millisecond)
+	defer cancel()
+	out, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "m", Messages: []Message{{Role: RoleUser, Content: "hi"}}})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if out.Choices[0].Message.Content != "ok" {
+		t.Fatalf("unexpected content: %+v", out)
+	}
+	if attempts < 2 {
+		t.Fatalf("expected at least 2 attempts, got %d", attempts)
+	}
+
+	// Verify audit log contains http_attempt and http_timing entries
+	auditDir := filepath.Join(".goagent", "audit")
+	// Allow a brief moment for file flush on slow FS
+	time.Sleep(10 * time.Millisecond)
+	entries, err := os.ReadDir(auditDir)
+	if err != nil || len(entries) == 0 {
+		t.Fatalf("expected audit file in %s: %v", auditDir, err)
+	}
+	// Read the latest file and ensure it has at least two http_attempt lines
+	latest := filepath.Join(auditDir, entries[len(entries)-1].Name())
+	b, rerr := os.ReadFile(latest)
+	if rerr != nil {
+		t.Fatalf("read audit: %v", rerr)
+	}
+	content := string(b)
+	if !strings.Contains(content, "\"event\":\"http_attempt\"") {
+		t.Fatalf("expected http_attempt audit entries, got: %s", content)
+	}
+	if !strings.Contains(content, "\"event\":\"http_timing\"") {
+		t.Fatalf("expected http_timing audit entries, got: %s", content)
+	}
+}
+
+func TestIsRetryableError_ContextDeadline(t *testing.T) {
+	if !isRetryableError(context.DeadlineExceeded) {
+		t.Fatal("expected context deadline to be retryable")
+	}
+	if isRetryableError(errors.New("permanent failure")) {
+		t.Fatal("unexpected retryable for generic error")
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/216
+func TestCreateChatCompletion_RetryAfter_HeaderSeconds(t *testing.T) {
+	attempts := 0
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		attempts++
+		if attempts == 1 {
+			w.Header().Set("Retry-After", "0") // zero should fallback to backoff, but we will return 429 to test path
+			w.WriteHeader(http.StatusTooManyRequests)
+			if _, err := w.Write([]byte(`{"error":"rate limited"}`)); err != nil {
+				panic(err)
+			}
+			return
+		}
+		var req ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("bad json: %v", err)
+		}
+		resp := ChatCompletionsResponse{Choices: []ChatCompletionsResponseChoice{{Message: Message{Role: RoleAssistant, Content: "ok"}}}}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			panic(err)
+		}
+	}))
+	defer ts.Close()
+
+	c := NewClientWithRetry(ts.URL, "", 1*time.Second, RetryPolicy{MaxRetries: 2, Backoff: 1 * time.Millisecond})
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	out, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "m", Messages: []Message{{Role: RoleUser, Content: "hi"}}})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if out.Choices[0].Message.Content != "ok" {
+		t.Fatalf("unexpected content: %+v", out)
+	}
+	if attempts < 2 {
+		t.Fatalf("expected retry, got attempts=%d", attempts)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/216
+func TestRetryAfter_HTTPDate(t *testing.T) {
+	// Validate header parsing helper directly
+	now := time.Date(2025, 1, 2, 3, 4, 5, 0, time.UTC)
+	date := now.Add(2 * time.Second).UTC().Format(http.TimeFormat)
+	if d, ok := retryAfterDuration(date, now); !ok || d < 1900*time.Millisecond || d > 2100*time.Millisecond {
+		t.Fatalf("unexpected duration: %v ok=%v", d, ok)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/216
+func TestBackoffWithJitter_GrowthAndBounds(t *testing.T) {
+	base := 100 * time.Millisecond
+	jf := 0.5 // +/-50%
+	r := mathrand.New(mathrand.NewSource(1))
+	d0 := backoffWithJitter(base, 0, jf, r)
+	if d0 < 50*time.Millisecond || d0 > 150*time.Millisecond {
+		t.Fatalf("attempt0 out of bounds: %v", d0)
+	}
+	d1 := backoffWithJitter(base, 1, jf, r)
+	// attempt 1 base is 200ms; with jitter bounds are [100ms, 300ms]
+	if d1 < 100*time.Millisecond || d1 > 300*time.Millisecond {
+		t.Fatalf("attempt1 out of bounds: %v", d1)
+	}
+	// ensure min growth relative to min bound
+	if d1 <= 75*time.Millisecond { // strictly greater than a conservative lower threshold
+		t.Fatalf("expected growth, d1=%v", d1)
+	}
+	// cap check at high attempts should not exceed 2s +/- jitter
+	dN := backoffWithJitter(base, 10, jf, r)
+	if dN < 1*time.Second || dN > 3*time.Second {
+		t.Fatalf("cap bounds unexpected: %v", dN)
+	}
+}
+
+// Verify jittered backoff is used for 429 without Retry-After.
+func TestCreateChatCompletion_Retry429_UsesJitteredBackoff(t *testing.T) {
+	attempts := 0
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		attempts++
+		if attempts == 1 {
+			w.WriteHeader(http.StatusTooManyRequests)
+			_, _ = w.Write([]byte(`{"error":"rate limited"}`))
+			return
+		}
+		var req ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("bad json: %v", err)
+		}
+		_ = json.NewEncoder(w).Encode(ChatCompletionsResponse{Choices: []ChatCompletionsResponseChoice{{Message: Message{Role: RoleAssistant, Content: "ok"}}}})
+	}))
+	defer ts.Close()
+
+	// Intercept sleeps
+	var slept []time.Duration
+	oldSleep := sleepFunc
+	sleepFunc = func(d time.Duration) { slept = append(slept, d) }
+	defer func() { sleepFunc = oldSleep }()
+
+	// Deterministic jitter
+	r := mathrand.New(mathrand.NewSource(42))
+	c := NewClientWithRetry(ts.URL, "", 1*time.Second, RetryPolicy{MaxRetries: 1, Backoff: 100 * time.Millisecond, JitterFraction: 0.5, Rand: r})
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	out, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "m", Messages: []Message{{Role: RoleUser, Content: "hi"}}})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if out.Choices[0].Message.Content != "ok" {
+		t.Fatalf("unexpected content: %+v", out)
+	}
+	if attempts != 2 {
+		t.Fatalf("expected 2 attempts, got %d", attempts)
+	}
+	if len(slept) != 1 {
+		t.Fatalf("expected one sleep, got %d", len(slept))
+	}
+	if slept[0] < 50*time.Millisecond || slept[0] > 150*time.Millisecond {
+		t.Fatalf("sleep not jittered within bounds: %v", slept[0])
+	}
+}
+
+// Verify jittered backoff is used for client timeouts on first attempt.
+func TestCreateChatCompletion_RetryTimeout_UsesJitteredBackoff(t *testing.T) {
+	attempts := 0
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		attempts++
+		if attempts == 1 {
+			time.Sleep(120 * time.Millisecond)
+		}
+		var req ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("bad json: %v", err)
+		}
+		_ = json.NewEncoder(w).Encode(ChatCompletionsResponse{Choices: []ChatCompletionsResponseChoice{{Message: Message{Role: RoleAssistant, Content: "ok"}}}})
+	}))
+	defer ts.Close()
+
+	var slept []time.Duration
+	oldSleep := sleepFunc
+	sleepFunc = func(d time.Duration) { slept = append(slept, d) }
+	defer func() { sleepFunc = oldSleep }()
+
+	r := mathrand.New(mathrand.NewSource(7))
+	c := NewClientWithRetry(ts.URL, "", 100*time.Millisecond, RetryPolicy{MaxRetries: 1, Backoff: 100 * time.Millisecond, JitterFraction: 0.25, Rand: r})
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	out, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "m", Messages: []Message{{Role: RoleUser, Content: "hi"}}})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if out.Choices[0].Message.Content != "ok" {
+		t.Fatalf("unexpected content: %+v", out)
+	}
+	if attempts < 2 {
+		t.Fatalf("expected retry, attempts=%d", attempts)
+	}
+	if len(slept) != 1 {
+		t.Fatalf("expected one sleep, got %d", len(slept))
+	}
+	// base=100ms, jitter 25% => [75ms,125ms]
+	if slept[0] < 75*time.Millisecond || slept[0] > 125*time.Millisecond {
+		t.Fatalf("sleep not within jitter bounds: %v", slept[0])
+	}
+}
diff --git a/internal/oai/config.go b/internal/oai/config.go
new file mode 100644
index 0000000..2065d0b
--- /dev/null
+++ b/internal/oai/config.go
@@ -0,0 +1,106 @@
+package oai
+
+import (
+	"os"
+	"strings"
+	"unicode"
+)
+
+// ImageConfig holds resolved configuration for the Images API endpoint.
+type ImageConfig struct {
+	BaseURL string
+	APIKey  string
+}
+
+// ResolveImageConfig determines the effective image configuration using the precedence:
+// flag > env > inheritFrom > fallback. The env variables are OAI_IMAGE_BASE_URL and
+// OAI_IMAGE_API_KEY. When API key is not provided via flag or env, it inherits from
+// the provided mainAPIKey; if still empty, it falls back to OPENAI_API_KEY if present.
+// The returned sources describe where each field came from: "flag" | "env" |
+// "inherit" | "env:OPENAI_API_KEY" | "empty".
+func ResolveImageConfig(flagBaseURL, flagAPIKey, mainBaseURL, mainAPIKey string) (cfg ImageConfig, baseSource, keySource string) {
+	// Base URL resolution
+	if s := strings.TrimSpace(flagBaseURL); s != "" {
+		cfg.BaseURL = s
+		baseSource = "flag"
+	} else if s := strings.TrimSpace(os.Getenv("OAI_IMAGE_BASE_URL")); s != "" {
+		cfg.BaseURL = s
+		baseSource = "env"
+	} else {
+		cfg.BaseURL = strings.TrimSpace(mainBaseURL)
+		baseSource = "inherit"
+	}
+
+	// API key resolution
+	if s := strings.TrimSpace(flagAPIKey); s != "" {
+		cfg.APIKey = s
+		keySource = "flag"
+	} else if s := strings.TrimSpace(os.Getenv("OAI_IMAGE_API_KEY")); s != "" {
+		cfg.APIKey = s
+		keySource = "env"
+	} else if s := strings.TrimSpace(mainAPIKey); s != "" {
+		cfg.APIKey = s
+		keySource = "inherit"
+	} else if s := strings.TrimSpace(os.Getenv("OPENAI_API_KEY")); s != "" {
+		cfg.APIKey = s
+		keySource = "env:OPENAI_API_KEY"
+	} else {
+		cfg.APIKey = ""
+		keySource = "empty"
+	}
+	return
+}
+
+// MaskAPIKeyLast4 returns a redacted representation of an API key showing only
+// the last 4 characters. Empty input returns an empty string.
+func MaskAPIKeyLast4(key string) string {
+	k := strings.TrimSpace(key)
+	if k == "" {
+		return ""
+	}
+	if len(k) <= 4 {
+		return "****" + k
+	}
+	return "****" + k[len(k)-4:]
+}
+
+// PrepConfig holds resolved configuration for the pre-stage flow.
+// Currently it includes only the prepared prompt text.
+type PrepConfig struct {
+	// Prompt is the finalized pre-stage prompt after applying overrides.
+	// When multiple prompt sources are provided, they are concatenated using
+	// JoinPrompts and stored here.
+	Prompt string
+}
+
+// JoinPrompts concatenates the given parts in-order using two newline
+// separators ("\n\n") and trims trailing whitespace from the final string.
+// It preserves leading whitespace and internal whitespace within parts.
+func JoinPrompts(parts []string) string {
+	if len(parts) == 0 {
+		return ""
+	}
+	// Trim trailing newlines, carriage returns, and tabs from each part, but
+	// preserve trailing spaces to avoid eating intentional spacing before the
+	// separator.
+	trimmed := make([]string, 0, len(parts))
+	for _, p := range parts {
+		trimmed = append(trimmed, trimRightNLTab(p))
+	}
+	joined := strings.Join(trimmed, "\n\n")
+	// Finally, trim any trailing whitespace from the full string
+	return strings.TrimRightFunc(joined, unicode.IsSpace)
+}
+
+// trimRightNLTab removes trailing newlines, carriage returns, and tabs.
+func trimRightNLTab(s string) string {
+	return strings.TrimRightFunc(s, func(r rune) bool {
+		return r == '\n' || r == '\r' || r == '\t'
+	})
+}
+
+// NewPrepConfig constructs a PrepConfig with Prompt set to the normalized
+// concatenation of the provided parts.
+func NewPrepConfig(parts []string) PrepConfig {
+	return PrepConfig{Prompt: JoinPrompts(parts)}
+}
diff --git a/internal/oai/config_test.go b/internal/oai/config_test.go
new file mode 100644
index 0000000..e5b359e
--- /dev/null
+++ b/internal/oai/config_test.go
@@ -0,0 +1,88 @@
+package oai
+
+import "testing"
+
+func TestResolveImageConfig_InheritFromMainWhenUnset(t *testing.T) {
+	img, baseSrc, keySrc := ResolveImageConfig("", "", "https://api.example.com/v1", "sk-main-1234")
+	if img.BaseURL != "https://api.example.com/v1" || baseSrc != "inherit" {
+		t.Fatalf("base inherit failed: %+v %s", img, baseSrc)
+	}
+	if img.APIKey != "sk-main-1234" || keySrc != "inherit" {
+		t.Fatalf("key inherit failed: %+v %s", img, keySrc)
+	}
+}
+
+func TestResolveImageConfig_EnvOverridesInherit(t *testing.T) {
+	t.Setenv("OAI_IMAGE_BASE_URL", "https://img.example.com/v1")
+	t.Setenv("OAI_IMAGE_API_KEY", "sk-img-9999")
+	img, baseSrc, keySrc := ResolveImageConfig("", "", "https://api.example.com/v1", "sk-main-1234")
+	if img.BaseURL != "https://img.example.com/v1" || baseSrc != "env" {
+		t.Fatalf("base env failed: %+v %s", img, baseSrc)
+	}
+	if img.APIKey != "sk-img-9999" || keySrc != "env" {
+		t.Fatalf("key env failed: %+v %s", img, keySrc)
+	}
+}
+
+func TestResolveImageConfig_FlagBeatsEnv(t *testing.T) {
+	t.Setenv("OAI_IMAGE_BASE_URL", "https://env-should-not-win")
+	img, baseSrc, keySrc := ResolveImageConfig("https://flag-base/v1", "sk-flag-0000", "https://api.example.com/v1", "sk-main-1234")
+	if img.BaseURL != "https://flag-base/v1" || baseSrc != "flag" {
+		t.Fatalf("base flag failed: %+v %s", img, baseSrc)
+	}
+	if img.APIKey != "sk-flag-0000" || keySrc != "flag" {
+		t.Fatalf("key flag failed: %+v %s", img, keySrc)
+	}
+}
+
+func TestResolveImageConfig_FallbackToOpenAIKey(t *testing.T) {
+	t.Setenv("OAI_IMAGE_API_KEY", "")
+	t.Setenv("OPENAI_API_KEY", "sk-openai-7777")
+	img, _, keySrc := ResolveImageConfig("", "", "https://base", "")
+	if img.APIKey != "sk-openai-7777" || keySrc != "env:OPENAI_API_KEY" {
+		t.Fatalf("fallback to OPENAI_API_KEY failed: %+v %s", img, keySrc)
+	}
+}
+
+func TestMaskAPIKeyLast4(t *testing.T) {
+	if MaskAPIKeyLast4("") != "" {
+		t.Fatalf("expected empty for empty input")
+	}
+	if got := MaskAPIKeyLast4("abcd"); got != "****abcd" {
+		t.Fatalf("expected ****abcd, got %s", got)
+	}
+	if got := MaskAPIKeyLast4("sk-verylong-xyz1"); got != "****xyz1" {
+		t.Fatalf("expected last4 masked, got %s", got)
+	}
+}
+
+func TestJoinPrompts_BasicConcatAndTrim(t *testing.T) {
+	cases := []struct {
+		name  string
+		parts []string
+		want  string
+	}{
+		{"empty", nil, ""},
+		{"single", []string{"hello"}, "hello"},
+		{"two parts", []string{"hello", "world"}, "hello\n\nworld"},
+		{"preserve internal", []string{"a\n b", "c"}, "a\n b\n\nc"},
+		{"trim trailing spaces", []string{"x ", "y\n"}, "x \n\ny"},
+		{"trim trailing tabs/newlines", []string{"x\n\n", "y\t\n\n"}, "x\n\ny"},
+		{"keep leading spaces", []string{"  lead", "trail  "}, "  lead\n\ntrail"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := JoinPrompts(tc.parts)
+			if got != tc.want {
+				t.Fatalf("JoinPrompts mismatch: got %q want %q", got, tc.want)
+			}
+		})
+	}
+}
+
+func TestNewPrepConfig_SetsPrompt(t *testing.T) {
+	cfg := NewPrepConfig([]string{"one", "two"})
+	if cfg.Prompt != "one\n\ntwo" {
+		t.Fatalf("unexpected prompt: %q", cfg.Prompt)
+	}
+}
diff --git a/internal/oai/context_window.go b/internal/oai/context_window.go
new file mode 100644
index 0000000..4e7baa3
--- /dev/null
+++ b/internal/oai/context_window.go
@@ -0,0 +1,47 @@
+package oai
+
+import "strings"
+
+// DefaultContextWindow provides a conservative default for modern models.
+const DefaultContextWindow = 128000
+
+// modelToContextWindow holds known model context window sizes.
+// Keys should be lower-case exact model identifiers.
+var modelToContextWindow = map[string]int{
+	"oss-gpt-20b": 8192,
+}
+
+// ContextWindowForModel returns the total token window for a given model.
+// When the model is unknown or empty, it returns DefaultContextWindow.
+func ContextWindowForModel(model string) int {
+	m := strings.TrimSpace(strings.ToLower(model))
+	if m == "" {
+		return DefaultContextWindow
+	}
+	if w, ok := modelToContextWindow[m]; ok {
+		return w
+	}
+	return DefaultContextWindow
+}
+
+// ClampCompletionCap bounds a desired completion cap to the remaining context
+// window after accounting for the estimated tokens of the prompt messages. It
+// ensures a minimum of 1 token and subtracts a small safety margin.
+//
+// The clamp rule is: max(1, window - EstimateTokens(messages) - 32), then
+// bounded above by the requested cap.
+func ClampCompletionCap(messages []Message, requestedCap int, window int) int {
+	// Remaining space after considering prompt tokens and a small margin.
+	remaining := window - EstimateTokens(messages) - 32
+	if remaining < 1 {
+		remaining = 1
+	}
+	if requestedCap <= 0 {
+		// If caller provides non-positive cap, treat as wanting the maximum safe amount.
+		return remaining
+	}
+	if requestedCap > remaining {
+		return remaining
+	}
+	return requestedCap
+}
diff --git a/internal/oai/context_window_test.go b/internal/oai/context_window_test.go
new file mode 100644
index 0000000..0569213
--- /dev/null
+++ b/internal/oai/context_window_test.go
@@ -0,0 +1,69 @@
+package oai
+
+import "testing"
+
+func TestContextWindowForModel_Known(t *testing.T) {
+	if got := ContextWindowForModel("oss-gpt-20b"); got != 8192 {
+		t.Fatalf("expected 8192 for oss-gpt-20b, got %d", got)
+	}
+}
+
+func TestContextWindowForModel_DefaultOnUnknown(t *testing.T) {
+	if got := ContextWindowForModel("unknown-model"); got != DefaultContextWindow {
+		t.Fatalf("expected default %d for unknown, got %d", DefaultContextWindow, got)
+	}
+}
+
+func TestContextWindowForModel_CaseInsensitivityAndTrim(t *testing.T) {
+	if got := ContextWindowForModel("  OSS-GPT-20B  "); got != 8192 {
+		t.Fatalf("expected 8192 for oss-gpt-20b with varied case/whitespace, got %d", got)
+	}
+}
+
+func TestClampCompletionCap_WithinRemaining(t *testing.T) {
+	window := 1000
+	msgs := []Message{{Role: RoleUser, Content: "hello"}}
+	// EstimateTokens("hello") ~ ceil(5/4)=2 + overhead 4 = 6; remaining ~ 1000-6-32=962
+	cap := ClampCompletionCap(msgs, 100, window)
+	if cap != 100 {
+		t.Fatalf("expected cap to remain 100, got %d", cap)
+	}
+}
+
+func TestClampCompletionCap_ClampsDownToRemaining(t *testing.T) {
+	window := 200
+	// Make a long prompt to force small remaining
+	long := make([]byte, 600)
+	for i := range long {
+		long[i] = 'a'
+	}
+	msgs := []Message{{Role: RoleUser, Content: string(long)}}
+	// Rough estimate ~ ceil(600/4)=150 + overhead 4 = 154; remaining ~ 200-154-32=14
+	cap := ClampCompletionCap(msgs, 100, window)
+	if cap != 14 {
+		t.Fatalf("expected clamped cap 14, got %d", cap)
+	}
+}
+
+func TestClampCompletionCap_NonPositiveRequestedUsesRemaining(t *testing.T) {
+	window := 128
+	msgs := []Message{{Role: RoleUser, Content: "hi"}}
+	cap := ClampCompletionCap(msgs, 0, window)
+	if cap <= 0 {
+		t.Fatalf("expected positive cap, got %d", cap)
+	}
+}
+
+func TestClampCompletionCap_MinimumOne(t *testing.T) {
+	// Construct messages that nearly exhaust the window
+	window := 64
+	long := make([]byte, 1000)
+	for i := range long {
+		long[i] = 'a'
+	}
+	msgs := []Message{{Role: RoleUser, Content: string(long)}}
+	cap := ClampCompletionCap(msgs, 5, window)
+	if cap != 1 {
+		t.Fatalf("expected minimum cap of 1, got %d", cap)
+	}
+}
diff --git a/internal/oai/observability_test.go b/internal/oai/observability_test.go
new file mode 100644
index 0000000..dec28ea
--- /dev/null
+++ b/internal/oai/observability_test.go
@@ -0,0 +1,112 @@
+package oai
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+// waitForAuditFile polls the audit directory until a file appears or timeout elapses.
+func waitForAuditFile(t *testing.T, auditDir string, timeout time.Duration) string {
+	t.Helper()
+	deadline := time.Now().Add(timeout)
+	for {
+		entries, err := os.ReadDir(auditDir)
+		if err == nil {
+			for _, e := range entries {
+				if !e.IsDir() {
+					return filepath.Join(auditDir, e.Name())
+				}
+			}
+		}
+		if time.Now().After(deadline) {
+			t.Fatalf("audit log not created in %s", auditDir)
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+}
+
+// findRepoRoot walks upward from CWD to locate the directory containing go.mod.
+func findRepoRoot(t *testing.T) string {
+	t.Helper()
+	start, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	dir := start
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			t.Fatalf("go.mod not found from %s upward", start)
+		}
+		dir = parent
+	}
+}
+
+// TestObservabilityTemperatureAudit encodes the desired behavior: we emit
+// structured audit with temperature_effective and temperature_in_payload.
+func TestObservabilityTemperatureAudit(t *testing.T) {
+	// Clean audit dir at repo root
+	root := findRepoRoot(t)
+	if err := os.RemoveAll(filepath.Join(root, ".goagent")); err != nil {
+		t.Logf("cleanup: %v", err)
+	}
+
+	// Fake server to accept request and return minimal success
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Read and discard, but ensure it is valid JSON
+		var req ChatCompletionsRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("decode: %v", err)
+		}
+		if cerr := r.Body.Close(); cerr != nil {
+			t.Logf("close body: %v", cerr)
+		}
+		resp := ChatCompletionsResponse{Choices: []ChatCompletionsResponseChoice{{Message: Message{Role: RoleAssistant, Content: "ok"}}}}
+		if err := json.NewEncoder(w).Encode(resp); err != nil {
+			t.Fatalf("encode: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	// Supported model and explicit temperature
+	model := "oss-gpt-20b"
+	temp := 0.7
+	req := ChatCompletionsRequest{
+		Model:       model,
+		Messages:    []Message{{Role: RoleUser, Content: "hi"}},
+		Temperature: &temp,
+	}
+
+	c := NewClientWithRetry(srv.URL, "", 3*time.Second, RetryPolicy{MaxRetries: 0})
+	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
+	defer cancel()
+	if _, err := c.CreateChatCompletion(ctx, req); err != nil {
+		t.Fatalf("call: %v", err)
+	}
+
+	// Locate today's audit file and read it
+	auditDir := filepath.Join(root, ".goagent", "audit")
+	logFile := waitForAuditFile(t, auditDir, 2*time.Second)
+	data, err := os.ReadFile(logFile)
+	if err != nil {
+		t.Fatalf("read audit: %v", err)
+	}
+
+	content := string(data)
+	if !strings.Contains(content, "temperature_effective") {
+		t.Fatalf("missing temperature_effective in audit; got:\n%s", truncate(content, 1000))
+	}
+	if !strings.Contains(content, "temperature_in_payload") {
+		t.Fatalf("missing temperature_in_payload in audit; got:\n%s", truncate(content, 1000))
+	}
+}
diff --git a/internal/oai/prestage/coordinator.go b/internal/oai/prestage/coordinator.go
new file mode 100644
index 0000000..f13ae63
--- /dev/null
+++ b/internal/oai/prestage/coordinator.go
@@ -0,0 +1,96 @@
+package prestage
+
+import (
+	"context"
+	"strings"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+	"github.com/hyperifyio/goagent/internal/state"
+)
+
+// PrestageRunner abstracts the runner so tests can stub it.
+// The concrete Runner in runner.go implements this interface.
+type PrestageRunner interface {
+	Run(ctx context.Context, prompt string) (oai.ChatCompletionsResponse, error)
+}
+
+// Coordinator wires restore-before-prep behavior with override precedence.
+// If a state bundle is available and refinement is not requested and there are
+// no explicit overrides, Coordinator will reuse the persisted prompts and
+// settings without invoking the runner.
+type Coordinator struct {
+	// Optional state directory. When empty, restore is disabled.
+	StateDir string
+	// Optional scope key; when set, restored bundle must match this key.
+	ScopeKey string
+	// When true, forces a new pre-stage run instead of restoring.
+	Refine bool
+	// Overrides from CLI: explicit prompt strings and pre-joined file contents.
+	PrepPrompts     []string
+	PrepFilesJoined string
+
+	// Runner used when a live pre-stage call is required.
+	Runner PrestageRunner
+
+	// Warnf, when set, is used to emit a single-line warning message.
+	// It is called at most once per Execute() invocation.
+	Warnf func(format string, args ...any)
+}
+
+// Outcome captures the result of Execute.
+type Outcome struct {
+	// UsedRestore indicates a persisted bundle was reused and Runner was not called.
+	UsedRestore bool
+	// Restored is the bundle that was reused when UsedRestore is true.
+	Restored *state.StateBundle
+	// PromptUsed is the prompt text sent to Runner when UsedRestore is false.
+	PromptUsed string
+	// Response is the model response when Runner was called.
+	Response oai.ChatCompletionsResponse
+}
+
+// Execute performs restore-before-prep logic.
+// Precedence for the effective pre-stage prompt source:
+//  1. explicit prompt overrides (PrepPrompts)
+//  2. file-based overrides (PrepFilesJoined)
+//  3. restored bundle (when available, ScopeKey matches, and Refine==false)
+//  4. embedded default
+//
+// Note: This function does not save state; persistence is handled elsewhere.
+func (c *Coordinator) Execute(ctx context.Context) (Outcome, error) {
+	var out Outcome
+
+	// If overrides are present, they take precedence and force a live call.
+	overrideSource, overrideText := oai.ResolvePrepPrompt(c.PrepPrompts, c.PrepFilesJoined)
+	overridesProvided := overrideSource == "override" && strings.TrimSpace(overrideText) != ""
+
+	// If refine is requested but explicit overrides are provided, warn and proceed.
+	if overridesProvided && c.Refine && c.Warnf != nil {
+		c.Warnf("pre-stage: explicit overrides provided while -state-refine is set; proceeding with overrides")
+	}
+
+	if !overridesProvided && !c.Refine && strings.TrimSpace(c.StateDir) != "" {
+		if b, err := state.LoadLatestStateBundle(c.StateDir); err == nil && b != nil {
+			if strings.TrimSpace(c.ScopeKey) == "" || b.ScopeKey == c.ScopeKey {
+				out.UsedRestore = true
+				out.Restored = b
+				return out, nil
+			}
+			// scope mismatch → ignore and fall through
+		}
+		// On any load error, fall through to live run
+	}
+
+	// Determine the prompt to use for a live pre-stage run
+	_, prompt := oai.ResolvePrepPrompt(c.PrepPrompts, c.PrepFilesJoined)
+
+	out.PromptUsed = prompt
+	if c.Runner != nil {
+		resp, err := c.Runner.Run(ctx, prompt)
+		if err != nil {
+			return out, err
+		}
+		out.Response = resp
+	}
+	return out, nil
+}
diff --git a/internal/oai/prestage/coordinator_test.go b/internal/oai/prestage/coordinator_test.go
new file mode 100644
index 0000000..c659d3e
--- /dev/null
+++ b/internal/oai/prestage/coordinator_test.go
@@ -0,0 +1,186 @@
+package prestage
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+	"github.com/hyperifyio/goagent/internal/state"
+)
+
+// stubRunner implements PrestageRunner for tests.
+type stubRunner struct {
+	calls      int
+	lastPrompt string
+	resp       oai.ChatCompletionsResponse
+	err        error
+}
+
+func (s *stubRunner) Run(ctx context.Context, prompt string) (oai.ChatCompletionsResponse, error) {
+	s.calls++
+	s.lastPrompt = prompt
+	return s.resp, s.err
+}
+
+func writeValidBundle(t *testing.T, dir string, scope string) *state.StateBundle {
+	t.Helper()
+	b := &state.StateBundle{
+		Version:      "1",
+		CreatedAt:    time.Now().UTC().Format(time.RFC3339),
+		ToolVersion:  "test",
+		ModelID:      "gpt-x",
+		BaseURL:      "http://example.test",
+		ToolsetHash:  "abc",
+		ScopeKey:     scope,
+		Prompts:      map[string]string{"system": "S", "developer": "D"},
+		PrepSettings: map[string]any{"k": "v"},
+		Context:      map[string]any{"a": 1},
+		ToolCaps:     map[string]any{"c": true},
+		Custom:       map[string]any{"x": "y"},
+		SourceHash:   state.ComputeSourceHash("gpt-x", "http://example.test", "abc", scope),
+	}
+	if err := state.SaveStateBundle(dir, b); err != nil {
+		t.Fatalf("SaveStateBundle: %v", err)
+	}
+	return b
+}
+
+func TestCoordinator_UsesRestoreWhenAvailableAndNoOverrides(t *testing.T) {
+	tmp := t.TempDir()
+	bundle := writeValidBundle(t, tmp, "scope-1")
+
+	c := &Coordinator{StateDir: tmp, ScopeKey: "scope-1", Runner: &stubRunner{}}
+	out, err := c.Execute(context.Background())
+	if err != nil {
+		t.Fatalf("Execute error: %v", err)
+	}
+	if !out.UsedRestore || out.Restored == nil {
+		t.Fatalf("expected restore to be used, got %+v", out)
+	}
+	if out.Restored.ScopeKey != bundle.ScopeKey {
+		t.Fatalf("restored bundle mismatch: %+v", out.Restored)
+	}
+}
+
+func TestCoordinator_SkipsRestoreOnOverridesAndCallsRunner(t *testing.T) {
+	tmp := t.TempDir()
+	_ = writeValidBundle(t, tmp, "scope-1")
+
+	s := &stubRunner{resp: oai.ChatCompletionsResponse{Model: "m"}}
+	c := &Coordinator{StateDir: tmp, ScopeKey: "scope-1", PrepPrompts: []string{"OVERRIDE"}, Runner: s}
+	out, err := c.Execute(context.Background())
+	if err != nil {
+		t.Fatalf("Execute error: %v", err)
+	}
+	if out.UsedRestore {
+		t.Fatalf("did not expect restore when overrides present")
+	}
+	if s.calls != 1 || s.lastPrompt == "" {
+		t.Fatalf("runner not called with prompt; calls=%d prompt=%q", s.calls, s.lastPrompt)
+	}
+}
+
+func TestCoordinator_IgnoreRestoreWhenScopeMismatch(t *testing.T) {
+	tmp := t.TempDir()
+	_ = writeValidBundle(t, tmp, "scope-1")
+	s := &stubRunner{resp: oai.ChatCompletionsResponse{Model: "m"}}
+	c := &Coordinator{StateDir: tmp, ScopeKey: "other-scope", Runner: s}
+	out, err := c.Execute(context.Background())
+	if err != nil {
+		t.Fatalf("Execute error: %v", err)
+	}
+	if out.UsedRestore {
+		t.Fatalf("expected live run due to scope mismatch")
+	}
+	if s.calls != 1 {
+		t.Fatalf("runner should be called once, got %d", s.calls)
+	}
+}
+
+func TestCoordinator_RefineForcesLiveRun(t *testing.T) {
+	tmp := t.TempDir()
+	_ = writeValidBundle(t, tmp, "s")
+	s := &stubRunner{resp: oai.ChatCompletionsResponse{Model: "m"}}
+	c := &Coordinator{StateDir: tmp, ScopeKey: "s", Refine: true, Runner: s}
+	out, err := c.Execute(context.Background())
+	if err != nil {
+		t.Fatalf("Execute error: %v", err)
+	}
+	if out.UsedRestore {
+		t.Fatalf("expected live run when Refine=true")
+	}
+	if s.calls != 1 {
+		t.Fatalf("runner should be called once, got %d", s.calls)
+	}
+}
+
+func TestCoordinator_WarnsOnceWhenOverridesWithRefine(t *testing.T) {
+	s := &stubRunner{resp: oai.ChatCompletionsResponse{Model: "m"}}
+	var warns []string
+	c := &Coordinator{Refine: true, PrepPrompts: []string{"OVR"}, Runner: s, Warnf: func(format string, args ...any) {
+		warns = append(warns, fmt.Sprintf(format, args...))
+	}}
+	out, err := c.Execute(context.Background())
+	if err != nil {
+		t.Fatalf("Execute error: %v", err)
+	}
+	if out.UsedRestore {
+		t.Fatalf("should not restore when overrides present")
+	}
+	if len(warns) != 1 {
+		t.Fatalf("expected exactly one warning, got %d: %#v", len(warns), warns)
+	}
+	if !strings.Contains(strings.ToLower(warns[0]), "override") || !strings.Contains(strings.ToLower(warns[0]), "refine") {
+		t.Fatalf("warning should mention override and refine: %q", warns[0])
+	}
+}
+
+func TestCoordinator_NoStateDirFallsBackToDefaultPrompt(t *testing.T) {
+	s := &stubRunner{resp: oai.ChatCompletionsResponse{Model: "m"}}
+	c := &Coordinator{Runner: s}
+	out, err := c.Execute(context.Background())
+	if err != nil {
+		t.Fatalf("Execute error: %v", err)
+	}
+	if out.UsedRestore || out.Restored != nil {
+		t.Fatalf("should not use restore without state dir: %+v", out)
+	}
+	if s.calls != 1 || out.PromptUsed == "" {
+		t.Fatalf("runner should be called with default prompt; calls=%d prompt=%q", s.calls, out.PromptUsed)
+	}
+}
+
+func TestCoordinator_IgnoresCorruptStateAndRunsLive(t *testing.T) {
+	tmp := t.TempDir()
+	// Create corrupt latest.json
+	if err := os.WriteFile(filepath.Join(tmp, "latest.json"), []byte("not-json"), 0o600); err != nil {
+		t.Fatalf("write latest.json: %v", err)
+	}
+	s := &stubRunner{resp: oai.ChatCompletionsResponse{Model: "m"}}
+	c := &Coordinator{StateDir: tmp, Runner: s}
+	out, err := c.Execute(context.Background())
+	if err != nil {
+		t.Fatalf("Execute error: %v", err)
+	}
+	if out.UsedRestore {
+		t.Fatalf("did not expect restore with corrupt state")
+	}
+	if s.calls != 1 {
+		t.Fatalf("runner should be called once, got %d", s.calls)
+	}
+}
+
+func TestCoordinator_PropagatesRunnerError(t *testing.T) {
+	s := &stubRunner{err: errors.New("boom")}
+	c := &Coordinator{Runner: s}
+	_, err := c.Execute(context.Background())
+	if err == nil {
+		t.Fatalf("expected error from runner")
+	}
+}
diff --git a/internal/oai/prestage/e2e_state_persistence_test.go b/internal/oai/prestage/e2e_state_persistence_test.go
new file mode 100644
index 0000000..7cecd65
--- /dev/null
+++ b/internal/oai/prestage/e2e_state_persistence_test.go
@@ -0,0 +1,179 @@
+package prestage
+
+import (
+	"context"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/hyperifyio/goagent/internal/state"
+)
+
+// readLatestPointer is a tiny helper to read latest.json's path field.
+func readLatestPointer(t *testing.T, dir string) string {
+	t.Helper()
+	b, err := os.ReadFile(filepath.Join(dir, "latest.json"))
+	if err != nil {
+		t.Fatalf("read latest.json: %v", err)
+	}
+	var ptr struct {
+		Version string `json:"version"`
+		Path    string `json:"path"`
+		SHA256  string `json:"sha256"`
+	}
+	if err := json.Unmarshal(b, &ptr); err != nil {
+		t.Fatalf("unmarshal latest.json: %v", err)
+	}
+	if ptr.Version != "1" || strings.TrimSpace(ptr.Path) == "" {
+		t.Fatalf("invalid latest pointer: %#v", ptr)
+	}
+	return ptr.Path
+}
+
+func countSnapshotFiles(t *testing.T, dir string) int {
+	t.Helper()
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		t.Fatalf("ReadDir: %v", err)
+	}
+	n := 0
+	for _, e := range entries {
+		if strings.HasPrefix(e.Name(), "state-") && strings.HasSuffix(e.Name(), ".json") {
+			n++
+		}
+	}
+	return n
+}
+
+func TestE2E_State_SaveAndRestore(t *testing.T) {
+	tmp := t.TempDir()
+	scope := "testscope"
+	b1 := &state.StateBundle{
+		Version:     "1",
+		CreatedAt:   time.Now().UTC().Format(time.RFC3339),
+		ToolVersion: "test",
+		ModelID:     "gpt-x",
+		BaseURL:     "http://api.example",
+		ToolsetHash: "toolset-1",
+		ScopeKey:    scope,
+		Prompts:     map[string]string{"system": "S", "developer": "dev1"},
+	}
+	b1.SourceHash = state.ComputeSourceHash(b1.ModelID, b1.BaseURL, b1.ToolsetHash, b1.ScopeKey)
+	if err := state.SaveStateBundle(tmp, b1); err != nil {
+		t.Fatalf("SaveStateBundle(b1): %v", err)
+	}
+	_ = readLatestPointer(t, tmp)
+	if count := countSnapshotFiles(t, tmp); count != 1 {
+		t.Fatalf("expected 1 snapshot, got %d", count)
+	}
+	s1 := &stubRunner{}
+	c1 := &Coordinator{StateDir: tmp, ScopeKey: scope, Runner: s1}
+	out1, err := c1.Execute(context.Background())
+	if err != nil {
+		t.Fatalf("Execute (restore) error: %v", err)
+	}
+	if !out1.UsedRestore || out1.Restored == nil {
+		t.Fatalf("expected restore, got %+v", out1)
+	}
+	if s1.calls != 0 {
+		t.Fatalf("runner should not be called on restore, calls=%d", s1.calls)
+	}
+}
+
+func TestE2E_State_RefineAdvancesLatest(t *testing.T) {
+	tmp := t.TempDir()
+	scope := "testscope"
+	b1 := &state.StateBundle{
+		Version:     "1",
+		CreatedAt:   time.Now().UTC().Format(time.RFC3339),
+		ToolVersion: "test",
+		ModelID:     "gpt-x",
+		BaseURL:     "http://api.example",
+		ToolsetHash: "toolset-1",
+		ScopeKey:    scope,
+		Prompts:     map[string]string{"system": "S", "developer": "dev1"},
+	}
+	b1.SourceHash = state.ComputeSourceHash(b1.ModelID, b1.BaseURL, b1.ToolsetHash, b1.ScopeKey)
+	if err := state.SaveStateBundle(tmp, b1); err != nil {
+		t.Fatalf("SaveStateBundle(b1): %v", err)
+	}
+	firstPtr := readLatestPointer(t, tmp)
+	prev, err := state.LoadLatestStateBundle(tmp)
+	if err != nil || prev == nil {
+		t.Fatalf("LoadLatestStateBundle: %v, prev=%v", err, prev)
+	}
+	b2, err := state.RefineStateBundle(prev, "tighten temperature to 0.2", "hello user")
+	if err != nil {
+		t.Fatalf("RefineStateBundle: %v", err)
+	}
+	if err := state.SaveStateBundle(tmp, b2); err != nil {
+		t.Fatalf("SaveStateBundle(b2): %v", err)
+	}
+	secondPtr := readLatestPointer(t, tmp)
+	if secondPtr == firstPtr {
+		t.Fatalf("latest pointer did not advance: %q", secondPtr)
+	}
+	if count := countSnapshotFiles(t, tmp); count != 2 {
+		t.Fatalf("expected 2 snapshots after refine save, got %d", count)
+	}
+}
+
+func TestE2E_State_PromptPrecedence(t *testing.T) {
+	tmp := t.TempDir()
+	scope := "testscope"
+	b1 := &state.StateBundle{
+		Version:     "1",
+		CreatedAt:   time.Now().UTC().Format(time.RFC3339),
+		ToolVersion: "test",
+		ModelID:     "gpt-x",
+		BaseURL:     "http://api.example",
+		ToolsetHash: "toolset-1",
+		ScopeKey:    scope,
+		Prompts:     map[string]string{"system": "S", "developer": "dev1"},
+	}
+	b1.SourceHash = state.ComputeSourceHash(b1.ModelID, b1.BaseURL, b1.ToolsetHash, b1.ScopeKey)
+	if err := state.SaveStateBundle(tmp, b1); err != nil {
+		t.Fatalf("SaveStateBundle(b1): %v", err)
+	}
+	// Overrides win over restore
+	s2 := &stubRunner{}
+	c2 := &Coordinator{StateDir: tmp, ScopeKey: scope, PrepPrompts: []string{"OVERRIDE_PROMPT"}, Runner: s2}
+	out2, err := c2.Execute(context.Background())
+	if err != nil {
+		t.Fatalf("Execute (override) error: %v", err)
+	}
+	if out2.UsedRestore {
+		t.Fatalf("did not expect restore when overrides provided")
+	}
+	if s2.calls != 1 || s2.lastPrompt != "OVERRIDE_PROMPT" {
+		t.Fatalf("runner should be called with override; calls=%d, prompt=%q", s2.calls, s2.lastPrompt)
+	}
+	// Refine, then restore refined without overrides
+	prev, err := state.LoadLatestStateBundle(tmp)
+	if err != nil || prev == nil {
+		t.Fatalf("LoadLatestStateBundle: %v, prev=%v", err, prev)
+	}
+	b2, err := state.RefineStateBundle(prev, "tighten temperature to 0.2", "hello user")
+	if err != nil {
+		t.Fatalf("RefineStateBundle: %v", err)
+	}
+	if err := state.SaveStateBundle(tmp, b2); err != nil {
+		t.Fatalf("SaveStateBundle(b2): %v", err)
+	}
+	s3 := &stubRunner{}
+	c3 := &Coordinator{StateDir: tmp, ScopeKey: scope, Runner: s3}
+	out3, err := c3.Execute(context.Background())
+	if err != nil {
+		t.Fatalf("Execute (restore refined) error: %v", err)
+	}
+	if !out3.UsedRestore || out3.Restored == nil {
+		t.Fatalf("expected restore of refined bundle, got %+v", out3)
+	}
+	dev := out3.Restored.Prompts["developer"]
+	if !strings.Contains(dev, "tighten temperature to 0.2") || !strings.Contains(dev, "USER: hello user") {
+		t.Fatalf("refined developer prompt missing expected parts: %q", dev)
+	}
+}
diff --git a/internal/oai/prestage/merge.go b/internal/oai/prestage/merge.go
new file mode 100644
index 0000000..cdf3357
--- /dev/null
+++ b/internal/oai/prestage/merge.go
@@ -0,0 +1,210 @@
+package prestage
+
+import (
+	"encoding/json"
+	"strings"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+)
+
+// ToolConfig captures optional tool enable/disable hints and arbitrary key hints
+// produced by the pre-stage processor.
+type ToolConfig struct {
+	EnableTools []string       `json:"enable_tools"`
+	Hints       map[string]any `json:"hints"`
+}
+
+// PrestageParsed represents structured data extracted from the pre-stage
+// Harmony payload. Fields are optional; empty values indicate absence.
+type PrestageParsed struct {
+	System            string         // optional replacement for the system prompt
+	Developers        []string       // zero-or-more developer prompts to append
+	ToolConfig        *ToolConfig    // optional tool configuration hints
+	ImageInstructions map[string]any // optional defaults for downstream image tools
+}
+
+// ParsePrestagePayload parses a JSON payload returned by the pre-stage model.
+// The expected format is a JSON array where elements are either Harmony
+// messages with {"role":"system|developer","content":"..."} or objects
+// containing one of the keys {"system": string}, {"developer": string},
+// {"tool_config": {enable_tools:[], hints:{}}}, or {"image_instructions": {...}}.
+// Unknown objects are ignored to keep parsing forward-compatible.
+func ParsePrestagePayload(payload string) (PrestageParsed, error) {
+	var out PrestageParsed
+	s := strings.TrimSpace(payload)
+	if s == "" {
+		return out, nil
+	}
+	arr, err := parseToObjectArray(s)
+	if err != nil {
+		return out, err
+	}
+	for _, obj := range arr {
+		updateParsedFromObject(obj, &out)
+	}
+	return out, nil
+}
+
+// parseToObjectArray accepts either a JSON array of objects or a single object and returns a slice.
+func parseToObjectArray(s string) ([]map[string]json.RawMessage, error) {
+	var arr []map[string]json.RawMessage
+	if err := json.Unmarshal([]byte(s), &arr); err == nil {
+		return arr, nil
+	}
+	var single map[string]json.RawMessage
+	if err := json.Unmarshal([]byte(s), &single); err != nil {
+		return nil, err
+	}
+	return []map[string]json.RawMessage{single}, nil
+}
+
+// updateParsedFromObject mutates out based on recognized fields in obj.
+func updateParsedFromObject(obj map[string]json.RawMessage, out *PrestageParsed) {
+	if tryRoleBased(obj, out) {
+		return
+	}
+	_ = tryKeyBased(obj, out)
+}
+
+// tryRoleBased handles objects using the explicit Harmony role schema.
+func tryRoleBased(obj map[string]json.RawMessage, out *PrestageParsed) bool {
+	rawRole, ok := obj["role"]
+	if !ok {
+		return false
+	}
+	var role string
+	if err := json.Unmarshal(rawRole, &role); err != nil {
+		return false
+	}
+	role = strings.ToLower(strings.TrimSpace(role))
+	if role != oai.RoleSystem && role != oai.RoleDeveloper {
+		return false
+	}
+	var content string
+	if rawContent, ok := obj["content"]; ok {
+		if err := json.Unmarshal(rawContent, &content); err != nil {
+			return true
+		}
+		content = strings.TrimSpace(content)
+	}
+	if content == "" {
+		return true
+	}
+	if role == oai.RoleSystem {
+		if out.System == "" {
+			out.System = content
+		}
+	} else {
+		out.Developers = append(out.Developers, content)
+	}
+	return true
+}
+
+// tryKeyBased supports legacy key-based entries.
+func tryKeyBased(obj map[string]json.RawMessage, out *PrestageParsed) bool {
+	if rawSys, ok := obj["system"]; ok {
+		var sys string
+		if err := json.Unmarshal(rawSys, &sys); err == nil {
+			sys = strings.TrimSpace(sys)
+			if sys != "" && out.System == "" {
+				out.System = sys
+			}
+		}
+		return true
+	}
+	if rawDev, ok := obj["developer"]; ok {
+		var dev string
+		if err := json.Unmarshal(rawDev, &dev); err == nil {
+			dev = strings.TrimSpace(dev)
+			if dev != "" {
+				out.Developers = append(out.Developers, dev)
+			}
+		}
+		return true
+	}
+	if rawTool, ok := obj["tool_config"]; ok {
+		var tc ToolConfig
+		if err := json.Unmarshal(rawTool, &tc); err == nil {
+			if len(tc.EnableTools) == 0 {
+				tc.EnableTools = nil
+			}
+			if len(tc.Hints) == 0 {
+				tc.Hints = nil
+			}
+			if out.ToolConfig == nil {
+				out.ToolConfig = &tc
+			}
+		}
+		return true
+	}
+	if rawImg, ok := obj["image_instructions"]; ok {
+		var ii map[string]any
+		if err := json.Unmarshal(rawImg, &ii); err == nil {
+			if len(ii) > 0 && out.ImageInstructions == nil {
+				out.ImageInstructions = ii
+			}
+		}
+		return true
+	}
+	return false
+}
+
+// MergePrestageIntoMessages merges parsed pre-stage outputs into the provided
+// seed Harmony messages. It applies the following deterministic rules:
+//  1. If parsed.System is non-empty, replace the first system message content.
+//  2. Append parsed.Developers immediately before the first user message; when
+//     no user message exists, append them to the end. CLI-provided developer
+//     messages in the seed remain first, preserving precedence.
+//
+// Messages with other roles are preserved in their original order.
+func MergePrestageIntoMessages(seed []oai.Message, parsed PrestageParsed) []oai.Message {
+	// Replace system content when provided
+	out := make([]oai.Message, len(seed))
+	copy(out, seed)
+	if strings.TrimSpace(parsed.System) != "" {
+		for i := range out {
+			if out[i].Role == oai.RoleSystem {
+				out[i].Content = parsed.System
+				break
+			}
+		}
+	}
+
+	// Determine insertion index: immediately before first user message
+	insertIdx := -1
+	for i := range out {
+		if out[i].Role == oai.RoleUser {
+			insertIdx = i
+			break
+		}
+	}
+
+	if len(parsed.Developers) == 0 {
+		return out
+	}
+
+	// Build developer messages to insert
+	devMsgs := make([]oai.Message, 0, len(parsed.Developers))
+	for _, d := range parsed.Developers {
+		d = strings.TrimSpace(d)
+		if d == "" {
+			continue
+		}
+		devMsgs = append(devMsgs, oai.Message{Role: oai.RoleDeveloper, Content: d})
+	}
+	if len(devMsgs) == 0 {
+		return out
+	}
+
+	if insertIdx < 0 || insertIdx > len(out) {
+		// No user message; append to end
+		return append(out, devMsgs...)
+	}
+
+	// Insert before user
+	merged := make([]oai.Message, 0, len(out)+len(devMsgs))
+	merged = append(merged, out[:insertIdx]...)
+	merged = append(merged, devMsgs...)
+	merged = append(merged, out[insertIdx:]...)
+	return merged
+}
diff --git a/internal/oai/prestage/merge_test.go b/internal/oai/prestage/merge_test.go
new file mode 100644
index 0000000..d016eda
--- /dev/null
+++ b/internal/oai/prestage/merge_test.go
@@ -0,0 +1,89 @@
+package prestage
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+)
+
+func TestParsePrestagePayload_SupportsRoleSchemaAndKeyEntries(t *testing.T) {
+	payload := `[
+	  {"role":"system","content":"SYS"},
+	  {"role":"developer","content":"D1"},
+	  {"developer":"D2"},
+	  {"tool_config": {"enable_tools":["http_fetch"], "hints": {"http_fetch.max_bytes": 1000}}},
+	  {"image_instructions": {"style":"natural"}}
+	]`
+	parsed, err := ParsePrestagePayload(payload)
+	if err != nil {
+		t.Fatalf("parse error: %v", err)
+	}
+	if parsed.System != "SYS" {
+		t.Fatalf("system=%q", parsed.System)
+	}
+	if len(parsed.Developers) != 2 || parsed.Developers[0] != "D1" || parsed.Developers[1] != "D2" {
+		t.Fatalf("developers=%v", parsed.Developers)
+	}
+	if parsed.ToolConfig == nil || len(parsed.ToolConfig.EnableTools) != 1 || parsed.ToolConfig.EnableTools[0] != "http_fetch" {
+		t.Fatalf("tool_config=%v", parsed.ToolConfig)
+	}
+	if parsed.ImageInstructions == nil {
+		t.Fatalf("image_instructions missing: %v", parsed.ImageInstructions)
+	}
+	if v, ok := parsed.ImageInstructions["style"]; !ok {
+		t.Fatalf("style missing in image_instructions: %v", parsed.ImageInstructions)
+	} else if s, ok := v.(string); !ok || s != "natural" {
+		t.Fatalf("style invalid: %#v", v)
+	}
+	// done
+}
+
+func TestMergePrestageIntoMessages_ReplacesSystemAndAppendsDevelopers(t *testing.T) {
+	seed := []oai.Message{
+		{Role: oai.RoleSystem, Content: "sys0"},
+		{Role: oai.RoleDeveloper, Content: "cli-dev-1"},
+		{Role: oai.RoleUser, Content: "user"},
+	}
+	parsed := PrestageParsed{System: "sys1", Developers: []string{"p-dev-1", "p-dev-2"}}
+	merged := MergePrestageIntoMessages(seed, parsed)
+	if merged[0].Content != "sys1" {
+		t.Fatalf("system not replaced: %+v", merged)
+	}
+	// Expected order: system, cli-dev-1, p-dev-1, p-dev-2, user
+	want := []string{oai.RoleSystem, oai.RoleDeveloper, oai.RoleDeveloper, oai.RoleDeveloper, oai.RoleUser}
+	if len(merged) != len(want) {
+		t.Fatalf("len=%d want %d", len(merged), len(want))
+	}
+	for i, r := range want {
+		if merged[i].Role != r {
+			t.Fatalf("role[%d]=%s want %s", i, merged[i].Role, r)
+		}
+	}
+}
+
+func TestParsePrestagePayload_IgnoresUnknownObjects(t *testing.T) {
+	payload := `[{"foo":"bar"},{"developer":"D"}]`
+	parsed, err := ParsePrestagePayload(payload)
+	if err != nil {
+		t.Fatalf("parse error: %v", err)
+	}
+	if len(parsed.Developers) != 1 || parsed.Developers[0] != "D" {
+		t.Fatalf("developers=%v", parsed.Developers)
+	}
+}
+
+func TestParsePrestagePayload_SingleObject(t *testing.T) {
+	obj := map[string]any{"system": "S"}
+	b, mErr := json.Marshal(obj)
+	if mErr != nil {
+		t.Fatalf("marshal: %v", mErr)
+	}
+	parsed, err := ParsePrestagePayload(string(b))
+	if err != nil {
+		t.Fatalf("parse error: %v", err)
+	}
+	if parsed.System != "S" {
+		t.Fatalf("system=%q", parsed.System)
+	}
+}
diff --git a/internal/oai/prestage/runner.go b/internal/oai/prestage/runner.go
new file mode 100644
index 0000000..24db37d
--- /dev/null
+++ b/internal/oai/prestage/runner.go
@@ -0,0 +1,47 @@
+package prestage
+
+import (
+	"context"
+	"time"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+)
+
+// Runner sends the pre-stage prompt to the model and returns the raw response.
+// It is intentionally minimal and only wires the resolved prompt as the user message.
+type Runner struct {
+	Client *oai.Client
+	Model  string
+	// Optional knobs; callers may set appropriate values based on CLI flags.
+	Temperature *float64
+	TopP        *float64
+	Timeout     time.Duration
+	// When true, request JSON mode (response_format {type:"json_object"}) if supported.
+	JSONMode bool
+}
+
+// Run executes a single non-streaming chat completion for the pre-stage using
+// the provided resolved prompt text. The user message content will be exactly
+// the provided prompt. The system message is omitted by default.
+func (r *Runner) Run(ctx context.Context, prompt string) (oai.ChatCompletionsResponse, error) {
+	req := oai.ChatCompletionsRequest{
+		Model: r.Model,
+		Messages: []oai.Message{
+			{Role: oai.RoleUser, Content: prompt},
+		},
+		TopP:        r.TopP,
+		Temperature: r.Temperature,
+	}
+	// Capability-based omissions for sampling knobs are handled by the client for temperature.
+	// Enforce one‑knob rule here: if TopP is set, do not send Temperature at all.
+	if r.TopP != nil {
+		req.Temperature = nil
+	}
+	// Opt into JSON mode when requested; callers decide based on capability map.
+	if r.JSONMode {
+		req.ResponseFormat = &oai.ResponseFormat{Type: "json_object"}
+	}
+	// Tag audit with stage label "prep" for observability
+	ctx = oai.WithAuditStage(ctx, "prep")
+	return r.Client.CreateChatCompletion(ctx, req)
+}
diff --git a/internal/oai/prestage/runner_test.go b/internal/oai/prestage/runner_test.go
new file mode 100644
index 0000000..4c7c070
--- /dev/null
+++ b/internal/oai/prestage/runner_test.go
@@ -0,0 +1,87 @@
+package prestage
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+)
+
+func TestRunner_SendsResolvedPromptAsUserMessage(t *testing.T) {
+	// Arrange a fake OpenAI-compatible server that asserts first user message equals prompt
+	const wantPrompt = "PREP_PROMPT_TEXT"
+	var gotReq oai.ChatCompletionsRequest
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/chat/completions" {
+			http.Error(w, "wrong path", http.StatusNotFound)
+			return
+		}
+		dec := json.NewDecoder(r.Body)
+		if err := dec.Decode(&gotReq); err != nil {
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
+		// Minimal valid response
+		if err := json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Model: gotReq.Model}); err != nil {
+			http.Error(w, err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}))
+	defer server.Close()
+
+	client := oai.NewClient(server.URL, "", 2*time.Second)
+	runner := &Runner{Client: client, Model: "gpt-test-1"}
+
+	// Act
+	_, err := runner.Run(context.Background(), wantPrompt)
+	if err != nil {
+		t.Fatalf("Run() error: %v", err)
+	}
+
+	// Assert
+	if len(gotReq.Messages) == 0 || gotReq.Messages[0].Role != oai.RoleUser || gotReq.Messages[0].Content != wantPrompt {
+		t.Fatalf("unexpected first message: %+v", gotReq.Messages)
+	}
+}
+
+func TestRunner_OneKnobRuleAndJSONMode(t *testing.T) {
+	// Arrange a fake server that captures request
+	var gotReq oai.ChatCompletionsRequest
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		dec := json.NewDecoder(r.Body)
+		if err := dec.Decode(&gotReq); err != nil {
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
+		if err := json.NewEncoder(w).Encode(oai.ChatCompletionsResponse{Model: gotReq.Model}); err != nil {
+			http.Error(w, err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}))
+	defer server.Close()
+
+	client := oai.NewClient(server.URL, "", 2*time.Second)
+	temp := 0.5
+	topP := 0.9
+	runner := &Runner{Client: client, Model: "gpt-test-1", Temperature: &temp, TopP: &topP, JSONMode: true}
+
+	_, err := runner.Run(context.Background(), "x")
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	// When TopP is set, temperature must be omitted per one‑knob rule
+	if gotReq.TopP == nil || *gotReq.TopP != 0.9 {
+		t.Fatalf("expected top_p 0.9, got %+v", gotReq.TopP)
+	}
+	if gotReq.Temperature != nil {
+		t.Fatalf("expected temperature omitted when top_p set; got %v", *gotReq.Temperature)
+	}
+	// JSON mode should be requested
+	if gotReq.ResponseFormat == nil || gotReq.ResponseFormat.Type != "json_object" {
+		t.Fatalf("expected response_format json_object, got %+v", gotReq.ResponseFormat)
+	}
+}
diff --git a/internal/oai/profile.go b/internal/oai/profile.go
new file mode 100644
index 0000000..5d3cf89
--- /dev/null
+++ b/internal/oai/profile.go
@@ -0,0 +1,41 @@
+package oai
+
+import "strings"
+
+// PromptProfile enumerates supported prompt style presets.
+// Valid values (case-insensitive): deterministic | general | creative | reasoning.
+type PromptProfile string
+
+const (
+	ProfileDeterministic PromptProfile = "deterministic"
+	ProfileGeneral       PromptProfile = "general"
+	ProfileCreative      PromptProfile = "creative"
+	ProfileReasoning     PromptProfile = "reasoning"
+)
+
+// MapProfileToTemperature returns the target temperature for a given profile
+// and whether the temperature field should be included for the specified model.
+//
+// Rules:
+// - deterministic => temperature 0.1 (when supported)
+// - general | creative | reasoning => temperature 1.0 (when supported)
+// - if the model does not support temperature, omit the field (false)
+func MapProfileToTemperature(model string, profile PromptProfile) (float64, bool) {
+	// Decide the desired temperature by profile (case-insensitive)
+	p := strings.ToLower(string(profile))
+	var desired float64
+	switch p {
+	case string(ProfileDeterministic):
+		desired = 0.1
+	case string(ProfileGeneral), string(ProfileCreative), string(ProfileReasoning):
+		fallthrough
+	default:
+		desired = 1.0
+	}
+	// Respect model capability: omit temperature when unsupported
+	if !SupportsTemperature(model) {
+		return 0, false
+	}
+	// Clamp to allowed range to avoid surprises
+	return clampTemperature(desired), true
+}
diff --git a/internal/oai/profile_test.go b/internal/oai/profile_test.go
new file mode 100644
index 0000000..4b9fca9
--- /dev/null
+++ b/internal/oai/profile_test.go
@@ -0,0 +1,44 @@
+package oai
+
+import "testing"
+
+func TestMapProfileToTemperature_SupportedModel(t *testing.T) {
+	model := "oss-gpt-20b" // supports temperature by default
+	cases := []struct {
+		name     string
+		profile  PromptProfile
+		wantTemp float64
+		wantOK   bool
+	}{
+		{"deterministic->0.1", ProfileDeterministic, 0.1, true},
+		{"general->1.0", ProfileGeneral, 1.0, true},
+		{"creative->1.0", ProfileCreative, 1.0, true},
+		{"reasoning->1.0", ProfileReasoning, 1.0, true},
+		{"unknown-defaults-to-1.0", PromptProfile("weird"), 1.0, true},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got, ok := MapProfileToTemperature(model, tc.profile)
+			if ok != tc.wantOK {
+				t.Fatalf("ok mismatch: got %v want %v", ok, tc.wantOK)
+			}
+			if !ok {
+				return
+			}
+			if got != tc.wantTemp {
+				t.Fatalf("temperature mismatch: got %v want %v", got, tc.wantTemp)
+			}
+		})
+	}
+}
+
+func TestMapProfileToTemperature_UnsupportedModelOmits(t *testing.T) {
+	// Any o4* model is treated as not supporting temperature
+	model := "o4-mini"
+	if temp, ok := MapProfileToTemperature(model, ProfileDeterministic); ok {
+		t.Fatalf("expected omit for unsupported model; got temp=%v ok=%v", temp, ok)
+	}
+	if temp, ok := MapProfileToTemperature(model, ProfileGeneral); ok {
+		t.Fatalf("expected omit for unsupported model; got temp=%v ok=%v", temp, ok)
+	}
+}
diff --git a/internal/oai/prompts.go b/internal/oai/prompts.go
new file mode 100644
index 0000000..501e29f
--- /dev/null
+++ b/internal/oai/prompts.go
@@ -0,0 +1,9 @@
+package oai
+
+import _ "embed"
+
+//go:embed assets/prep_default.md
+var prepDefaultPrompt string
+
+// DefaultPrepPrompt returns the embedded default pre-stage prompt.
+func DefaultPrepPrompt() string { return prepDefaultPrompt }
diff --git a/internal/oai/prompts_test.go b/internal/oai/prompts_test.go
new file mode 100644
index 0000000..53cc670
--- /dev/null
+++ b/internal/oai/prompts_test.go
@@ -0,0 +1,43 @@
+package oai
+
+import "testing"
+
+func TestDefaultPrepPrompt_NonEmpty(t *testing.T) {
+	if s := DefaultPrepPrompt(); len(s) == 0 {
+		t.Fatalf("default prep prompt is empty")
+	}
+	// Basic sanity: contains the Harmony JSON phrase
+	if s := DefaultPrepPrompt(); !containsAll(s, []string{"Harmony", "JSON"}) {
+		t.Log("default prompt does not contain expected shape hints; continuing")
+	}
+}
+
+func containsAll(s string, subs []string) bool {
+	for _, sub := range subs {
+		if !contains(s, sub) {
+			return false
+		}
+	}
+	return true
+}
+
+func contains(s, sub string) bool {
+	return len(s) >= len(sub) && (len(sub) == 0 || (func() bool { return (stringIndex(s, sub) >= 0) })())
+}
+
+// naive index to avoid importing strings in this tiny test
+func stringIndex(haystack, needle string) int {
+	for i := 0; i+len(needle) <= len(haystack); i++ {
+		match := true
+		for j := 0; j < len(needle); j++ {
+			if haystack[i+j] != needle[j] {
+				match = false
+				break
+			}
+		}
+		if match {
+			return i
+		}
+	}
+	return -1
+}
diff --git a/internal/oai/resolve.go b/internal/oai/resolve.go
new file mode 100644
index 0000000..a33d6d2
--- /dev/null
+++ b/internal/oai/resolve.go
@@ -0,0 +1,141 @@
+package oai
+
+import (
+	"strconv"
+	"strings"
+	"time"
+)
+
+// ResolveString resolves a string value with precedence:
+// flag > env > inheritFrom > default.
+// When inheritFrom is nil, the inherit step is skipped.
+// Returns the resolved value and a source label: "flag"|"env"|"inherit"|"default".
+func ResolveString(flagValue string, envValue string, inheritFrom *string, def string) (string, string) {
+	fv := strings.TrimSpace(flagValue)
+	if fv != "" {
+		return fv, "flag"
+	}
+	ev := strings.TrimSpace(envValue)
+	if ev != "" {
+		return ev, "env"
+	}
+	if inheritFrom != nil {
+		return strings.TrimSpace(*inheritFrom), "inherit"
+	}
+	return def, "default"
+}
+
+// ResolveInt resolves an int value with precedence:
+// flag (when flagSet) > env (when parseable) > inheritFrom > default.
+// The inheritFrom pointer may be nil to skip that step.
+// Returns the resolved value and a source label.
+func ResolveInt(flagSet bool, flagValue int, envValue string, inheritFrom *int, def int) (int, string) {
+	if flagSet {
+		return flagValue, "flag"
+	}
+	ev := strings.TrimSpace(envValue)
+	if ev != "" {
+		if n, err := strconv.Atoi(ev); err == nil {
+			return n, "env"
+		}
+		// fall through on parse error
+	}
+	if inheritFrom != nil {
+		return *inheritFrom, "inherit"
+	}
+	return def, "default"
+}
+
+// ResolveBool resolves a bool with precedence:
+// flag (when flagSet) > env (parseable) > inheritFrom > default.
+// Returns the resolved value and a source label.
+func ResolveBool(flagSet bool, flagValue bool, envValue string, inheritFrom *bool, def bool) (bool, string) {
+	if flagSet {
+		return flagValue, "flag"
+	}
+	ev := strings.TrimSpace(envValue)
+	if ev != "" {
+		if b, err := strconv.ParseBool(ev); err == nil {
+			return b, "env"
+		}
+		// fall through on parse error
+	}
+	if inheritFrom != nil {
+		return *inheritFrom, "inherit"
+	}
+	return def, "default"
+}
+
+// ResolveDuration resolves a time.Duration with precedence:
+// flag (when flagSet) > env (parseable) > inheritFrom > default.
+// Env accepts either Go duration strings or plain integer seconds.
+// Returns the resolved value and a source label.
+func ResolveDuration(flagSet bool, flagValue time.Duration, envValue string, inheritFrom *time.Duration, def time.Duration) (time.Duration, string) {
+	if flagSet {
+		return flagValue, "flag"
+	}
+	ev := strings.TrimSpace(envValue)
+	if ev != "" {
+		if d, err := parseDurationFlexible(ev); err == nil {
+			return d, "env"
+		}
+		// fall through on parse error
+	}
+	if inheritFrom != nil {
+		return *inheritFrom, "inherit"
+	}
+	return def, "default"
+}
+
+// parseDurationFlexible mirrors the CLI parser behavior: accepts standard Go
+// duration strings (e.g., "750ms", "3s") and plain integer seconds (e.g., "30").
+func parseDurationFlexible(raw string) (time.Duration, error) {
+	s := strings.TrimSpace(raw)
+	if s == "" {
+		return 0, strconv.ErrSyntax
+	}
+	if d, err := time.ParseDuration(s); err == nil {
+		return d, nil
+	}
+	// Accept plain integer seconds
+	allDigits := true
+	for _, r := range s {
+		if r < '0' || r > '9' {
+			allDigits = false
+			break
+		}
+	}
+	if allDigits {
+		n, err := strconv.ParseInt(s, 10, 64)
+		if err != nil {
+			return 0, err
+		}
+		if n < 0 {
+			// Allow zero; negative is invalid
+			return 0, strconv.ErrRange
+		}
+		return time.Duration(n) * time.Second, nil
+	}
+	return 0, strconv.ErrSyntax
+}
+
+// ResolvePrepPrompt determines the effective pre-stage prompt text and its source.
+// It applies the following deterministic order:
+//  1. If one or more explicit prompt strings are provided via flags (prepPrompts),
+//     join them using JoinPrompts and return ("override", text).
+//  2. Else if one or more prompts were loaded from files (prepFilesJoined), use that
+//     joined text and return ("override", text).
+//  3. Otherwise, return the embedded default via DefaultPrepPrompt() with source
+//     label "default".
+//
+// Callers are expected to pre-join file contents in the order observed when flags
+// were parsed to produce prepFilesJoined.
+func ResolvePrepPrompt(prepPrompts []string, prepFilesJoined string) (source string, text string) {
+	if len(prepPrompts) > 0 {
+		return "override", JoinPrompts(prepPrompts)
+	}
+	if strings.TrimSpace(prepFilesJoined) != "" {
+		return "override", strings.TrimRightFunc(prepFilesJoined, func(r rune) bool { return r == '\n' || r == '\r' || r == '\t' || r == ' ' })
+	}
+	return "default", DefaultPrepPrompt()
+}
diff --git a/internal/oai/resolve_test.go b/internal/oai/resolve_test.go
new file mode 100644
index 0000000..010f9f8
--- /dev/null
+++ b/internal/oai/resolve_test.go
@@ -0,0 +1,207 @@
+package oai
+
+import (
+	"os"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestResolveString(t *testing.T) {
+	inherit := "inh"
+	v, src := ResolveString("flag", "", &inherit, "def")
+	if v != "flag" || src != "flag" {
+		t.Fatalf("flag precedence failed: %s %s", v, src)
+	}
+	v, src = ResolveString("", "env", &inherit, "def")
+	if v != "env" || src != "env" {
+		t.Fatalf("env precedence failed: %s %s", v, src)
+	}
+	v, src = ResolveString("", "", &inherit, "def")
+	if v != "inh" || src != "inherit" {
+		t.Fatalf("inherit precedence failed: %s %s", v, src)
+	}
+	v, src = ResolveString("", "", nil, "def")
+	if v != "def" || src != "default" {
+		t.Fatalf("default precedence failed: %s %s", v, src)
+	}
+}
+
+func TestResolveInt(t *testing.T) {
+	inherit := 9
+	if v, src := ResolveInt(true, 7, "", &inherit, 1); v != 7 || src != "flag" {
+		t.Fatalf("flag precedence failed")
+	}
+	if v, src := ResolveInt(false, 0, "5", &inherit, 1); v != 5 || src != "env" {
+		t.Fatalf("env precedence failed")
+	}
+	if v, src := ResolveInt(false, 0, "bad", &inherit, 1); v != 9 || src != "inherit" {
+		t.Fatalf("inherit fallback failed")
+	}
+	if v, src := ResolveInt(false, 0, "", nil, 1); v != 1 || src != "default" {
+		t.Fatalf("default fallback failed")
+	}
+}
+
+func TestResolveBool(t *testing.T) {
+	inh := false
+	if v, src := ResolveBool(true, true, "", &inh, false); !v || src != "flag" {
+		t.Fatalf("flag precedence failed")
+	}
+	if v, src := ResolveBool(false, false, "true", &inh, false); !v || src != "env" {
+		t.Fatalf("env precedence failed")
+	}
+	if v, src := ResolveBool(false, false, "bad", &inh, false); v != false || src != "inherit" {
+		t.Fatalf("inherit fallback failed")
+	}
+	if v, src := ResolveBool(false, false, "", nil, true); v != true || src != "default" {
+		t.Fatalf("default fallback failed")
+	}
+}
+
+func TestResolveDuration(t *testing.T) {
+	inh := 2 * time.Second
+	if v, src := ResolveDuration(true, 3*time.Second, "", &inh, time.Second); v != 3*time.Second || src != "flag" {
+		t.Fatalf("flag precedence failed")
+	}
+	if v, src := ResolveDuration(false, 0, "750ms", &inh, time.Second); v != 750*time.Millisecond || src != "env" {
+		t.Fatalf("env duration parse failed: %s %s", v, src)
+	}
+	if v, src := ResolveDuration(false, 0, "5", &inh, time.Second); v != 5*time.Second || src != "env" {
+		t.Fatalf("env integer seconds failed: %s %s", v, src)
+	}
+	if v, src := ResolveDuration(false, 0, "bad", &inh, time.Second); v != 2*time.Second || src != "inherit" {
+		t.Fatalf("inherit fallback failed: %s %s", v, src)
+	}
+	if v, src := ResolveDuration(false, 0, "", nil, time.Second); v != time.Second || src != "default" {
+		t.Fatalf("default fallback failed: %s %s", v, src)
+	}
+}
+
+func TestParseDurationFlexible_ErrorCases(t *testing.T) {
+	// Empty string should return an error
+	if _, err := parseDurationFlexible(""); err == nil {
+		t.Fatalf("expected error for empty input")
+	}
+	// Negative seconds should return range error
+	if _, err := parseDurationFlexible("-5"); err == nil {
+		t.Fatalf("expected error for negative seconds")
+	}
+	// Non-parsable should return syntax error
+	if _, err := parseDurationFlexible("nonsense"); err == nil {
+		t.Fatalf("expected error for nonsense input")
+	}
+}
+
+func TestResolveInt_EnvWhitespaceAndDefaultFallback(t *testing.T) {
+	// Env value with whitespace parses after trimming
+	if v, src := ResolveInt(false, 0, " 7 ", nil, 1); v != 7 || src != "env" {
+		t.Fatalf("env whitespace trim failed: v=%d src=%s", v, src)
+	}
+	// Env parse error with nil inherit falls back to default
+	if v, src := ResolveInt(false, 0, "bogus", nil, 3); v != 3 || src != "default" {
+		t.Fatalf("default fallback failed: v=%d src=%s", v, src)
+	}
+}
+
+func TestResolveBool_EnvParseErrorDefaultFallback(t *testing.T) {
+	// Env parse error with nil inherit falls back to default
+	if v, src := ResolveBool(false, false, "notbool", nil, true); v != true || src != "default" {
+		t.Fatalf("default fallback for bool failed: v=%v src=%s", v, src)
+	}
+}
+
+func TestResolveString_TrimEnvAndInherit(t *testing.T) {
+	inh := "  inherited  "
+	// Env wins and is trimmed
+	if v, src := ResolveString("", "  env  ", &inh, "def"); v != "env" || src != "env" {
+		t.Fatalf("env trim failed: v=%q src=%s", v, src)
+	}
+	// When env empty, inherit applies and is trimmed
+	if v, src := ResolveString("", "", &inh, "def"); v != strings.TrimSpace(inh) || src != "inherit" {
+		t.Fatalf("inherit trim failed: v=%q src=%s", v, src)
+	}
+}
+
+func TestResolveDuration_EnvWhitespace(t *testing.T) {
+	inh := time.Second
+	if v, src := ResolveDuration(false, 0, " 1s ", &inh, time.Second); v != time.Second || src != "env" {
+		t.Fatalf("env whitespace duration failed: v=%s src=%s", v, src)
+	}
+}
+
+func TestResolveInt_TrimsDoesNotAcceptNonDigits(t *testing.T) {
+	// Guard: numeric string with suffix should not parse; falls back to inherit
+	inh := 42
+	if v, src := ResolveInt(false, 0, "5s", &inh, 0); v != inh || src != "inherit" {
+		t.Fatalf("inherit after bad int parse failed: v=%d src=%s", v, src)
+	}
+}
+
+func TestParseDurationFlexible_EnvEquivalence_WhitespaceAndDigits(t *testing.T) {
+	t.Setenv("X", " 10 ")
+	d, err := parseDurationFlexible(strings.TrimSpace(os.Getenv("X")))
+	if err != nil || d != 10*time.Second {
+		t.Fatalf("parse integer seconds with whitespace failed: %v %s", err, d)
+	}
+	// Also ensure large but valid seconds parse
+	s := strconv.Itoa(15)
+	if d2, err := parseDurationFlexible(s); err != nil || d2 != 15*time.Second {
+		t.Fatalf("parse integer seconds failed: %v %s", err, d2)
+	}
+}
+
+func TestResolveImageConfig_MaskAPIKeyLast4_Compatibility(t *testing.T) {
+	// Sanity: ensure MaskAPIKeyLast4 still behaves as expected in config printer
+	if got := MaskAPIKeyLast4("abcd"); got != "****abcd" {
+		t.Fatalf("mask last4 failed: %s", got)
+	}
+}
+
+func TestParseDurationFlexible_EnvEquivalence(t *testing.T) {
+	// Ensure our internal parser treats env duration strings and integers equally
+	t.Setenv("X", "750ms")
+	if d, err := parseDurationFlexible(os.Getenv("X")); err != nil || d != 750*time.Millisecond {
+		t.Fatalf("parse 750ms failed: %v %s", err, d)
+	}
+	t.Setenv("X", "2")
+	if d, err := parseDurationFlexible(os.Getenv("X")); err != nil || d != 2*time.Second {
+		t.Fatalf("parse 2 failed: %v %s", err, d)
+	}
+}
+
+func TestResolvePrepPrompt_FlagOverridesAndJoins(t *testing.T) {
+	src, txt := ResolvePrepPrompt([]string{"one", "two"}, "from files should be ignored")
+	if src != "override" {
+		t.Fatalf("expected source override, got %s", src)
+	}
+	if txt != "one\n\ntwo" {
+		t.Fatalf("expected joined flag prompts, got %q", txt)
+	}
+}
+
+func TestResolvePrepPrompt_FilesUsedWhenNoFlags(t *testing.T) {
+	// Simulate pre-joined file contents with extra trailing whitespace
+	src, txt := ResolvePrepPrompt(nil, "alpha\n\nbravo\n\n\n  \t\n")
+	if src != "override" {
+		t.Fatalf("expected source override for files, got %s", src)
+	}
+	// Right-trim should remove trailing whitespace at the very end
+	if txt != "alpha\n\nbravo" {
+		t.Fatalf("unexpected file-joined text: %q", txt)
+	}
+}
+
+func TestResolvePrepPrompt_DefaultWhenNoOverrides(t *testing.T) {
+	src, txt := ResolvePrepPrompt(nil, " ")
+	if src != "default" {
+		t.Fatalf("expected default source, got %s", src)
+	}
+	if txt == "" {
+		t.Fatalf("default prompt should be non-empty")
+	}
+	if txt != DefaultPrepPrompt() {
+		t.Fatalf("default prompt mismatch with embedded value")
+	}
+}
diff --git a/internal/oai/temperature_nudge.go b/internal/oai/temperature_nudge.go
new file mode 100644
index 0000000..7f89640
--- /dev/null
+++ b/internal/oai/temperature_nudge.go
@@ -0,0 +1,42 @@
+package oai
+
+// Temperature clamping and nudge helpers.
+
+const (
+	// minTemperature is the lowest allowed sampling temperature.
+	minTemperature = 0.1
+	// maxTemperature is the highest allowed sampling temperature.
+	maxTemperature = 1.0
+)
+
+// clampTemperature returns value clamped to the inclusive range [0.1, 1.0].
+func clampTemperature(value float64) float64 {
+	if value < minTemperature {
+		return minTemperature
+	}
+	if value > maxTemperature {
+		return maxTemperature
+	}
+	return value
+}
+
+// EffectiveTemperatureForModel returns the temperature to use for the given
+// model, applying the supported-model guard and clamping to the allowed range.
+// The second return value is false when the model does not support temperature
+// and the caller should omit the field entirely.
+func EffectiveTemperatureForModel(model string, temperature float64) (float64, bool) {
+	if !SupportsTemperature(model) {
+		return 0, false
+	}
+	return clampTemperature(temperature), true
+}
+
+// NudgedTemperature applies a delta to the current temperature for supported
+// models and returns the clamped result. When the target model does not support
+// temperature, it returns (0, false) to indicate the field must be omitted.
+func NudgedTemperature(model string, current float64, nudgeDelta float64) (float64, bool) {
+	if !SupportsTemperature(model) {
+		return 0, false
+	}
+	return clampTemperature(current + nudgeDelta), true
+}
diff --git a/internal/oai/temperature_nudge_test.go b/internal/oai/temperature_nudge_test.go
new file mode 100644
index 0000000..ba80ba2
--- /dev/null
+++ b/internal/oai/temperature_nudge_test.go
@@ -0,0 +1,55 @@
+package oai
+
+import "testing"
+
+func TestClampTemperature(t *testing.T) {
+	cases := []struct {
+		in   float64
+		want float64
+	}{
+		{0.05, 0.1},
+		{0.1, 0.1},
+		{0.2, 0.2},
+		{0.99, 0.99},
+		{1.0, 1.0},
+		{1.5, 1.0},
+	}
+	for _, c := range cases {
+		if got := clampTemperature(c.in); got != c.want {
+			t.Fatalf("clamp(%v)=%v want %v", c.in, got, c.want)
+		}
+	}
+}
+
+func TestEffectiveTemperatureForModel(t *testing.T) {
+	// Unsupported model: false
+	if _, ok := EffectiveTemperatureForModel("o3-mini", 0.7); ok {
+		t.Fatalf("expected unsupported model to return ok=false")
+	}
+	// Supported model: clamped and true
+	if got, ok := EffectiveTemperatureForModel("oss-gpt-20b", 1.5); !ok || got != 1.0 {
+		t.Fatalf("expected clamped=1.0 ok=true; got %v ok=%v", got, ok)
+	}
+	if got, ok := EffectiveTemperatureForModel("oss-gpt-20b", 0.05); !ok || got != 0.1 {
+		t.Fatalf("expected clamped=0.1 ok=true; got %v ok=%v", got, ok)
+	}
+}
+
+func TestNudgedTemperature(t *testing.T) {
+	// Unsupported model: no-op (omit)
+	if _, ok := NudgedTemperature("o4-preview", 0.5, -0.1); ok {
+		t.Fatalf("expected unsupported model to return ok=false")
+	}
+	// Clamp lower bound
+	if got, ok := NudgedTemperature("oss-gpt-20b", 0.12, -0.1); !ok || got != 0.1 {
+		t.Fatalf("nudge lower clamp got %v ok=%v", got, ok)
+	}
+	// Clamp upper bound
+	if got, ok := NudgedTemperature("oss-gpt-20b", 0.95, 0.2); !ok || got != 1.0 {
+		t.Fatalf("nudge upper clamp got %v ok=%v", got, ok)
+	}
+	// In-range nudge
+	if got, ok := NudgedTemperature("oss-gpt-20b", 0.6, -0.1); !ok || got != 0.5 {
+		t.Fatalf("nudge in-range got %v ok=%v", got, ok)
+	}
+}
diff --git a/internal/oai/token_estimate.go b/internal/oai/token_estimate.go
new file mode 100644
index 0000000..3154dfc
--- /dev/null
+++ b/internal/oai/token_estimate.go
@@ -0,0 +1,55 @@
+package oai
+
+import (
+	"math"
+)
+
+// EstimateTokens returns a rough, deterministic token estimate for a set of
+// chat messages. It intentionally uses a simple heuristic that avoids any
+// external dependencies and is stable across platforms.
+//
+// Heuristic:
+//   - Assume ~4 characters per token on average
+//   - Add a small fixed overhead per message to account for roles/formatting
+//   - Include optional fields (name, tool_call_id) and a coarse cost for tool calls
+func EstimateTokens(messages []Message) int {
+	const averageCharsPerToken = 4.0
+	const perMessageOverheadTokens = 4
+	const perToolCallOverheadTokens = 8
+
+	total := 0
+	for _, msg := range messages {
+		// Content cost
+		if msg.Content != "" {
+			total += int(math.Ceil(float64(len(msg.Content)) / averageCharsPerToken))
+		}
+		// Optional name and tool call id fields
+		if msg.Name != "" {
+			total += int(math.Ceil(float64(len(msg.Name)) / averageCharsPerToken))
+		}
+		if msg.ToolCallID != "" {
+			total += int(math.Ceil(float64(len(msg.ToolCallID)) / averageCharsPerToken))
+		}
+		// Tool calls (coarse)
+		if len(msg.ToolCalls) > 0 {
+			for _, tc := range msg.ToolCalls {
+				// Per-call overhead plus name/arguments length approximated to tokens
+				total += perToolCallOverheadTokens
+				if tc.Function.Name != "" {
+					total += int(math.Ceil(float64(len(tc.Function.Name)) / averageCharsPerToken))
+				}
+				if tc.Function.Arguments != "" {
+					total += int(math.Ceil(float64(len(tc.Function.Arguments)) / averageCharsPerToken))
+				}
+			}
+		}
+		// Per-message structural overhead
+		total += perMessageOverheadTokens
+	}
+
+	// Ensure non-negative and at least one token per message in extreme edge cases
+	if total < len(messages) {
+		total = len(messages)
+	}
+	return total
+}
diff --git a/internal/oai/token_estimate_test.go b/internal/oai/token_estimate_test.go
new file mode 100644
index 0000000..6059a5a
--- /dev/null
+++ b/internal/oai/token_estimate_test.go
@@ -0,0 +1,36 @@
+package oai
+
+import "testing"
+
+func TestEstimateTokens_MonotonicGrowth(t *testing.T) {
+	msgs := []Message{{Role: RoleUser, Content: "hi"}}
+	t1 := EstimateTokens(msgs)
+	if t1 <= 0 {
+		t.Fatalf("expected positive estimate, got %d", t1)
+	}
+
+	msgs = append(msgs, Message{Role: RoleAssistant, Content: "hello there"})
+	t2 := EstimateTokens(msgs)
+	if t2 <= t1 {
+		t.Fatalf("expected estimate to grow, got t1=%d t2=%d", t1, t2)
+	}
+
+	msgs = append(msgs, Message{Role: RoleTool, ToolCallID: "call_1", Content: "{\"ok\":true}"})
+	t3 := EstimateTokens(msgs)
+	if t3 <= t2 {
+		t.Fatalf("expected estimate to grow with tool call, got t2=%d t3=%d", t2, t3)
+	}
+}
+
+func TestEstimateTokens_RoughScale(t *testing.T) {
+	// 400 characters should be roughly ~100 tokens (+ overhead)
+	content := make([]byte, 400)
+	for i := range content {
+		content[i] = 'a'
+	}
+	msgs := []Message{{Role: RoleUser, Content: string(content)}}
+	est := EstimateTokens(msgs)
+	if est < 90 || est > 130 { // allow a generous band
+		t.Fatalf("expected estimate around 100±30, got %d", est)
+	}
+}
diff --git a/internal/oai/types.go b/internal/oai/types.go
new file mode 100644
index 0000000..0a4a724
--- /dev/null
+++ b/internal/oai/types.go
@@ -0,0 +1,189 @@
+package oai
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+// Message roles
+const (
+	RoleSystem    = "system"
+	RoleUser      = "user"
+	RoleAssistant = "assistant"
+	RoleTool      = "tool"
+	// RoleDeveloper is a Harmony role used to convey developer guidance
+	// that is distinct from system and user prompts. Messages with this
+	// role are prepended ahead of user messages and may be merged from
+	// multiple sources (CLI flags and pre-stage refinement).
+	RoleDeveloper = "developer"
+)
+
+// Message represents an OpenAI-compatible chat message.
+// Tool results are conveyed via RoleTool with ToolCallID and Content.
+type Message struct {
+	Role       string `json:"role"`
+	Content    string `json:"content,omitempty"`
+	Name       string `json:"name,omitempty"`
+	ToolCallID string `json:"tool_call_id,omitempty"`
+	// Channel allows assistants to tag messages with a semantic channel such as
+	// "final", "critic", or "confidence". Unknown or empty channels are
+	// treated as normal assistant messages by the CLI unless routed explicitly.
+	Channel string `json:"channel,omitempty"`
+	// The OpenAI-compatible schema also allows "tool_calls" on assistant messages.
+	ToolCalls []ToolCall `json:"tool_calls,omitempty"`
+}
+
+// ToolCall mirrors the OpenAI tool call structure.
+type ToolCall struct {
+	ID       string           `json:"id"`
+	Type     string           `json:"type"`
+	Function ToolCallFunction `json:"function"`
+}
+
+type ToolCallFunction struct {
+	Name      string `json:"name"`
+	Arguments string `json:"arguments"`
+}
+
+// Tool describes a function tool as per OpenAI API.
+type Tool struct {
+	Type     string       `json:"type"`
+	Function ToolFunction `json:"function"`
+}
+
+type ToolFunction struct {
+	Name        string          `json:"name"`
+	Description string          `json:"description,omitempty"`
+	Parameters  json.RawMessage `json:"parameters,omitempty"`
+}
+
+// ChatCompletionsRequest is the payload for POST /v1/chat/completions
+// Compatible with OpenAI API.
+type ChatCompletionsRequest struct {
+	Model      string    `json:"model"`
+	Messages   []Message `json:"messages"`
+	Tools      []Tool    `json:"tools,omitempty"`
+	ToolChoice string    `json:"tool_choice,omitempty"`
+	// TopP enables nucleus sampling when provided. One‑knob rule ensures either
+	// top_p or temperature is set, but never both.
+	TopP        *float64 `json:"top_p,omitempty"`
+	Temperature *float64 `json:"temperature,omitempty"`
+	// ResponseFormat requests a specific response format from the model, such as
+	// JSON mode: {"type":"json_object"}. Omitted by default.
+	ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
+	// MaxTokens limits the number of tokens generated for the completion.
+	// Omitted when zero to preserve backward compatibility.
+	MaxTokens int `json:"max_tokens,omitempty"`
+	// Stream requests server-sent events (SSE) streaming mode when true.
+	// When enabled, the server responds with text/event-stream and emits
+	// incremental deltas under choices[].delta.
+	Stream bool `json:"stream,omitempty"`
+}
+
+// ResponseFormat models the OpenAI response_format option.
+// For JSON mode, set Type to "json_object".
+type ResponseFormat struct {
+	Type string `json:"type"`
+}
+
+// includesTemperature reports whether the request currently has a temperature set.
+func includesTemperature(req ChatCompletionsRequest) bool { return req.Temperature != nil }
+
+// mentionsUnsupportedTemperature detects common API error messages indicating
+// that the temperature parameter is invalid or unsupported for the model.
+func mentionsUnsupportedTemperature(body string) bool {
+	s := strings.ToLower(body)
+	if s == "" {
+		return false
+	}
+	return (strings.Contains(s, "unsupported") && strings.Contains(s, "temperature")) ||
+		(strings.Contains(s, "invalid") && strings.Contains(s, "temperature"))
+}
+
+// NormalizeHarmonyMessages returns a copy of messages with roles trimmed and
+// lowercased, and assistant channel tokens normalized to a safe subset.
+// Valid roles are: system, developer, user, assistant, tool. Any other role
+// results in an error. Channels are optional; when present on assistant
+// messages they are lowercased, non-ASCII characters are removed, and the
+// result is truncated to 32 characters. Unknown channel names are allowed and
+// simply pass through after normalization; they may not be auto-printed unless
+// explicitly routed by the CLI.
+func NormalizeHarmonyMessages(in []Message) ([]Message, error) {
+	out := make([]Message, 0, len(in))
+	for _, m := range in {
+		nm := m
+		nm.Role = strings.ToLower(strings.TrimSpace(nm.Role))
+		switch nm.Role {
+		case RoleSystem, RoleDeveloper, RoleUser, RoleAssistant, RoleTool:
+			// ok
+		default:
+			return nil, fmt.Errorf("invalid role: %q", m.Role)
+		}
+		// Normalize channel only for assistant messages
+		if nm.Role == RoleAssistant {
+			ch := strings.ToLower(strings.TrimSpace(nm.Channel))
+			if ch != "" {
+				ch = normalizeAssistantChannel(ch)
+			}
+			nm.Channel = ch
+		} else {
+			// Other roles should not carry a channel
+			nm.Channel = ""
+		}
+		out = append(out, nm)
+	}
+	return out, nil
+}
+
+// normalizeAssistantChannel makes channel tokens safe: lowercased, ASCII-only
+// subset [a-z0-9_-], and max length 32. Characters outside the allowed set are
+// dropped. If the result is empty after filtering, the empty string is
+// returned, which the CLI treats as an unchannelled assistant message.
+func normalizeAssistantChannel(in string) string {
+	const maxLen = 32
+	// Filter to allowed characters
+	b := make([]byte, 0, len(in))
+	for i := 0; i < len(in); i++ {
+		c := in[i]
+		if (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' || c == '-' {
+			b = append(b, c)
+		}
+		if len(b) >= maxLen {
+			break
+		}
+	}
+	return string(b)
+}
+
+// ChatCompletionsResponse represents the response for chat completions.
+type ChatCompletionsResponse struct {
+	ID      string                          `json:"id"`
+	Object  string                          `json:"object"`
+	Created int64                           `json:"created"`
+	Model   string                          `json:"model"`
+	Choices []ChatCompletionsResponseChoice `json:"choices"`
+}
+
+type ChatCompletionsResponseChoice struct {
+	Index        int     `json:"index"`
+	FinishReason string  `json:"finish_reason"`
+	Message      Message `json:"message"`
+}
+
+// StreamChunk models an SSE delta event payload for streaming responses.
+// Only a subset of fields are needed for CLI streaming.
+type StreamChunk struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	Model   string `json:"model"`
+	Choices []struct {
+		Index int `json:"index"`
+		Delta struct {
+			Role    string `json:"role"`
+			Channel string `json:"channel"`
+			Content string `json:"content"`
+		} `json:"delta"`
+		FinishReason string `json:"finish_reason"`
+	} `json:"choices"`
+}
diff --git a/internal/oai/types_serialization_test.go b/internal/oai/types_serialization_test.go
new file mode 100644
index 0000000..f2e962b
--- /dev/null
+++ b/internal/oai/types_serialization_test.go
@@ -0,0 +1,42 @@
+package oai
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+)
+
+func TestChatCompletionsRequest_MaxTokens_OmitsWhenZero(t *testing.T) {
+	req := ChatCompletionsRequest{
+		Model:    "m",
+		Messages: []Message{{Role: RoleUser, Content: "hi"}},
+		// MaxTokens is zero by default
+	}
+	b, err := json.Marshal(req)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	s := string(b)
+	if strings.Contains(s, "\"max_tokens\":") {
+		t.Fatalf("expected max_tokens to be omitted when zero, got: %s", s)
+	}
+}
+
+func TestChatCompletionsRequest_MaxTokens_IncludedWhenSet(t *testing.T) {
+	req := ChatCompletionsRequest{
+		Model:     "m",
+		Messages:  []Message{{Role: RoleUser, Content: "hi"}},
+		MaxTokens: 123,
+	}
+	b, err := json.Marshal(req)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	s := string(b)
+	if !strings.Contains(s, "\"max_tokens\":") {
+		t.Fatalf("expected max_tokens key present, got: %s", s)
+	}
+	if !strings.Contains(s, "\"max_tokens\":123") {
+		t.Fatalf("expected max_tokens=123, got: %s", s)
+	}
+}
diff --git a/internal/oai/validator.go b/internal/oai/validator.go
new file mode 100644
index 0000000..6bf2e74
--- /dev/null
+++ b/internal/oai/validator.go
@@ -0,0 +1,70 @@
+package oai
+
+import "fmt"
+
+// ValidateMessageSequence enforces that any tool message responds to the most
+// recent assistant message that contains tool_calls and that the tool_call_id
+// matches one of those ids. It returns a descriptive error when the sequence is
+// invalid. This mirrors the API's requirement that tool outputs must respond to
+// a prior assistant tool call.
+func ValidateMessageSequence(messages []Message) error {
+	currentAllowedIDs := map[string]struct{}{}
+	hasAllowed := false
+	for i, m := range messages {
+		switch m.Role {
+		case RoleAssistant:
+			if len(m.ToolCalls) > 0 {
+				currentAllowedIDs = make(map[string]struct{}, len(m.ToolCalls))
+				for _, tc := range m.ToolCalls {
+					if tc.ID != "" {
+						currentAllowedIDs[tc.ID] = struct{}{}
+					}
+				}
+				hasAllowed = true
+			}
+		case RoleTool:
+			if !hasAllowed {
+				return fmt.Errorf("invalid message sequence at index %d: found role:\"tool\" without a prior assistant message containing tool_calls; each tool message must respond to an assistant tool call id", i)
+			}
+			if m.ToolCallID == "" {
+				return fmt.Errorf("invalid message sequence at index %d: role:\"tool\" is missing tool_call_id; each tool message must include the id of the assistant tool call it responds to", i)
+			}
+			if _, ok := currentAllowedIDs[m.ToolCallID]; !ok {
+				return fmt.Errorf("invalid message sequence at index %d: role:\"tool\" has tool_call_id %q that does not match any id from the most recent assistant tool_calls", i, m.ToolCallID)
+			}
+		}
+	}
+	return nil
+}
+
+// ValidatePrestageHarmony enforces the pre-stage output contract for Harmony
+// messages. The contract requires that the array contains only roles "system"
+// and/or "developer". Messages MUST NOT include role "tool", role
+// "assistant", or role "user". Additionally, no message may contain
+// tool_calls and no tool message with tool_call_id is allowed at this stage.
+// The content field may be empty for system messages but developer messages
+// should typically include guidance text; emptiness is permitted to keep the
+// validator non-opinionated about content semantics.
+func ValidatePrestageHarmony(messages []Message) error {
+	for i, m := range messages {
+		switch m.Role {
+		case RoleSystem, RoleDeveloper:
+			// Allowed roles for pre-stage output
+		case RoleTool:
+			return fmt.Errorf("pre-stage output invalid at index %d: role:\"tool\" is not allowed in pre-stage output", i)
+		case RoleAssistant:
+			return fmt.Errorf("pre-stage output invalid at index %d: role:\"assistant\" is not allowed in pre-stage output", i)
+		case RoleUser:
+			return fmt.Errorf("pre-stage output invalid at index %d: role:\"user\" is not allowed in pre-stage output", i)
+		default:
+			return fmt.Errorf("pre-stage output invalid at index %d: unknown role %q", i, m.Role)
+		}
+		if len(m.ToolCalls) > 0 {
+			return fmt.Errorf("pre-stage output invalid at index %d: tool_calls are not allowed in pre-stage output", i)
+		}
+		if m.ToolCallID != "" {
+			return fmt.Errorf("pre-stage output invalid at index %d: tool_call_id present but no tool calls are allowed in pre-stage output", i)
+		}
+	}
+	return nil
+}
diff --git a/internal/oai/validator_test.go b/internal/oai/validator_test.go
new file mode 100644
index 0000000..06c10e5
--- /dev/null
+++ b/internal/oai/validator_test.go
@@ -0,0 +1,72 @@
+package oai
+
+import "testing"
+
+func TestValidateMessageSequence_InvalidStrayTool(t *testing.T) {
+	msgs := []Message{
+		{Role: RoleUser, Content: "hi"},
+		{Role: RoleTool, Name: "echo", ToolCallID: "call_1", Content: "{\"echo\":\"hi\"}"},
+	}
+	if err := ValidateMessageSequence(msgs); err == nil {
+		t.Fatalf("expected error for stray tool message without prior assistant tool_calls")
+	}
+}
+
+func TestValidateMessageSequence_ValidSequenceSingleTool(t *testing.T) {
+	msgs := []Message{
+		{Role: RoleUser, Content: "hi"},
+		{Role: RoleAssistant, ToolCalls: []ToolCall{{ID: "call_1", Type: "function", Function: ToolCallFunction{Name: "echo", Arguments: "{\"text\":\"hi\"}"}}}},
+		{Role: RoleTool, Name: "echo", ToolCallID: "call_1", Content: "{\"echo\":\"hi\"}"},
+	}
+	if err := ValidateMessageSequence(msgs); err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+}
+
+func TestValidateMessageSequence_InvalidMismatchedID(t *testing.T) {
+	msgs := []Message{
+		{Role: RoleUser, Content: "hi"},
+		{Role: RoleAssistant, ToolCalls: []ToolCall{{ID: "call_1", Type: "function", Function: ToolCallFunction{Name: "echo", Arguments: "{\"text\":\"hi\"}"}}}},
+		{Role: RoleTool, Name: "echo", ToolCallID: "call_2", Content: "{\"echo\":\"hi\"}"},
+	}
+	if err := ValidateMessageSequence(msgs); err == nil {
+		t.Fatalf("expected error for mismatched tool_call_id not present in prior assistant tool_calls")
+	}
+}
+
+func TestValidatePrestageHarmony_AllowsSystemAndDeveloperOnly(t *testing.T) {
+	msgs := []Message{
+		{Role: RoleSystem, Content: "sys"},
+		{Role: RoleDeveloper, Content: "dev guidance"},
+	}
+	if err := ValidatePrestageHarmony(msgs); err != nil {
+		t.Fatalf("expected ok, got error: %v", err)
+	}
+}
+
+func TestValidatePrestageHarmony_RejectsAssistantUserToolRoles(t *testing.T) {
+	cases := []struct {
+		name string
+		msgs []Message
+	}{
+		{"assistant", []Message{{Role: RoleAssistant, Content: "nope"}}},
+		{"user", []Message{{Role: RoleUser, Content: "nope"}}},
+		{"tool", []Message{{Role: RoleTool, ToolCallID: "x"}}},
+	}
+	for _, tc := range cases {
+		if err := ValidatePrestageHarmony(tc.msgs); err == nil {
+			t.Fatalf("%s: expected error, got nil", tc.name)
+		}
+	}
+}
+
+func TestValidatePrestageHarmony_RejectsToolCallsAndToolCallID(t *testing.T) {
+	msgsWithToolCalls := []Message{{Role: RoleSystem, ToolCalls: []ToolCall{{ID: "1", Type: "function", Function: ToolCallFunction{Name: "x"}}}}}
+	if err := ValidatePrestageHarmony(msgsWithToolCalls); err == nil {
+		t.Fatalf("expected error for tool_calls, got nil")
+	}
+	msgsWithToolCallID := []Message{{Role: RoleDeveloper, ToolCallID: "abc"}}
+	if err := ValidatePrestageHarmony(msgsWithToolCallID); err == nil {
+		t.Fatalf("expected error for tool_call_id, got nil")
+	}
+}
diff --git a/internal/sandbox/limits.go b/internal/sandbox/limits.go
new file mode 100644
index 0000000..3b20823
--- /dev/null
+++ b/internal/sandbox/limits.go
@@ -0,0 +1,80 @@
+package sandbox
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"time"
+)
+
+// ErrOutputLimit is returned when a bounded writer exceeds its configured cap.
+var ErrOutputLimit = errors.New("OUTPUT_LIMIT")
+
+// ErrTimeout is returned by helpers when execution exceeds the wall-time budget.
+var ErrTimeout = errors.New("TIMEOUT")
+
+// BoundedBuffer is an io.Writer implementation that caps total bytes written.
+// When the cap is exceeded, it truncates additional input and returns ErrOutputLimit.
+// Use Bytes() or String() to retrieve accumulated output and Truncated() to check status.
+//
+// Note: The writer never grows beyond the configured capacity in memory.
+// A zero or negative maxKB defaults to 64 KiB.
+type BoundedBuffer struct {
+	buf       bytes.Buffer
+	capBytes  int
+	truncated bool
+}
+
+// NewBoundedBuffer creates a new BoundedBuffer with the provided maxKB capacity.
+func NewBoundedBuffer(maxKB int) *BoundedBuffer {
+	if maxKB <= 0 {
+		maxKB = 64
+	}
+	return &BoundedBuffer{capBytes: maxKB * 1024}
+}
+
+// Write appends p to the buffer up to the capacity. If the write causes
+// the capacity to be exceeded, the write is truncated and ErrOutputLimit is returned.
+func (b *BoundedBuffer) Write(p []byte) (int, error) {
+	if b.capBytes <= 0 {
+		return 0, ErrOutputLimit
+	}
+	remaining := b.capBytes - b.buf.Len()
+	if remaining <= 0 {
+		b.truncated = true
+		return 0, ErrOutputLimit
+	}
+	if len(p) > remaining {
+		// Partial write up to remaining capacity
+		_, _ = b.buf.Write(p[:remaining])
+		b.truncated = true
+		return remaining, ErrOutputLimit
+	}
+	return b.buf.Write(p)
+}
+
+// Bytes returns the current contents (may be truncated if cap exceeded).
+func (b *BoundedBuffer) Bytes() []byte { return b.buf.Bytes() }
+
+// String returns the current contents as string (may be truncated).
+func (b *BoundedBuffer) String() string { return b.buf.String() }
+
+// Truncated reports whether any write exceeded the cap.
+func (b *BoundedBuffer) Truncated() bool { return b.truncated }
+
+// WithWallTimeout returns a derived context that is canceled after wallMS milliseconds.
+// If wallMS <= 0, a conservative default of 1000ms is used.
+func WithWallTimeout(parent context.Context, wallMS int) (context.Context, context.CancelFunc) {
+	if wallMS <= 0 {
+		wallMS = 1000
+	}
+	return context.WithTimeout(parent, time.Duration(wallMS)*time.Millisecond)
+}
+
+// JSONError is a tiny helper to construct a standard error payload shape.
+// Callers generally write this to stderr.
+func JSONError(code, message string) []byte {
+	// Minimal hand-rolled JSON to avoid allocations and error paths here.
+	// code and message are expected to be short ASCII; if not, JSON remains valid but unescaped.
+	return []byte(`{"code":"` + code + `","message":"` + message + `"}`)
+}
diff --git a/internal/sandbox/limits_test.go b/internal/sandbox/limits_test.go
new file mode 100644
index 0000000..25dea10
--- /dev/null
+++ b/internal/sandbox/limits_test.go
@@ -0,0 +1,59 @@
+package sandbox
+
+import (
+	"context"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestBoundedBuffer_TruncatesAndSignals(t *testing.T) {
+	buf := NewBoundedBuffer(1) // 1 KiB
+	payload := strings.Repeat("A", 1536)
+	n, err := buf.Write([]byte(payload))
+	if err == nil {
+		t.Fatalf("expected error, got nil")
+	}
+	if err != ErrOutputLimit {
+		t.Fatalf("expected ErrOutputLimit, got %v", err)
+	}
+	if n != 1024 {
+		t.Fatalf("expected partial write of 1024, got %d", n)
+	}
+	if !buf.Truncated() {
+		t.Fatalf("expected truncated=true")
+	}
+	if len(buf.Bytes()) != 1024 {
+		t.Fatalf("expected buffer length 1024, got %d", len(buf.Bytes()))
+	}
+}
+
+func TestBoundedBuffer_FitsWithinCap(t *testing.T) {
+	buf := NewBoundedBuffer(2) // 2 KiB
+	payload := strings.Repeat("B", 1500)
+	n, err := buf.Write([]byte(payload))
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if n != 1500 {
+		t.Fatalf("expected full write of 1500, got %d", n)
+	}
+	if buf.Truncated() {
+		t.Fatalf("did not expect truncation")
+	}
+	if len(buf.Bytes()) != 1500 {
+		t.Fatalf("expected buffer length 1500, got %d", len(buf.Bytes()))
+	}
+}
+
+func TestWithWallTimeout_TimesOutRoughlyOnBudget(t *testing.T) {
+	ctx, cancel := WithWallTimeout(context.Background(), 50)
+	defer cancel()
+
+	start := time.Now()
+	<-ctx.Done()
+	elapsed := time.Since(start)
+	if elapsed < 40*time.Millisecond || elapsed > 250*time.Millisecond {
+		t.Fatalf("expected ~50ms timeout, got %v", elapsed)
+	}
+}
diff --git a/internal/state/load.go b/internal/state/load.go
new file mode 100644
index 0000000..f1ff74e
--- /dev/null
+++ b/internal/state/load.go
@@ -0,0 +1,125 @@
+package state
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+// ErrStateInvalid is returned when persisted state cannot be loaded safely.
+var ErrStateInvalid = errors.New("state invalid")
+
+// isBaseName returns true if p is a simple base name with no path separators
+// or parent traversal segments.
+func isBaseName(p string) bool {
+	if p == "" {
+		return false
+	}
+	if filepath.Base(p) != p {
+		return false
+	}
+	if strings.Contains(p, "..") {
+		return false
+	}
+	return true
+}
+
+// LoadLatestStateBundle loads the most recent state bundle from dir by reading
+// latest.json and then opening the referenced snapshot file. It validates the
+// pointer structure, verifies the snapshot hash when present, and validates the
+// bundle schema. On any issue (missing files, decode errors, version mismatch,
+// permission errors), it returns (nil, ErrStateInvalid).
+func LoadLatestStateBundle(dir string) (*StateBundle, error) {
+	// Security: reject insecure directories (world-writable or non-owned) on Unix
+	if err := ensureSecureStateDir(dir); err != nil {
+		return nil, ErrStateInvalid
+	}
+	// Best-effort lock to reduce concurrent writers/readers races
+	if unlock, lockErr := acquireStateLock(dir); lockErr == nil && unlock != nil {
+		defer unlock()
+	}
+	latestPath := filepath.Join(dir, "latest.json")
+	latestBytes, err := os.ReadFile(latestPath)
+	if err != nil {
+		// Missing latest.json is not a quarantinable corruption; caller can regenerate.
+		return nil, ErrStateInvalid
+	}
+
+	var ptr latestPointer
+	if err := json.Unmarshal(latestBytes, &ptr); err != nil {
+		// Partially written or corrupt latest.json → quarantine pointer for regeneration.
+		quarantineFile(latestPath)
+		return nil, ErrStateInvalid
+	}
+	if ptr.Version != "1" || !isBaseName(ptr.Path) {
+		// Unknown version or unsafe path → quarantine pointer.
+		quarantineFile(latestPath)
+		return nil, ErrStateInvalid
+	}
+
+	snapPath := filepath.Join(dir, ptr.Path)
+	snapBytes, err := os.ReadFile(snapPath)
+	if err != nil {
+		// Pointer to missing or unreadable snapshot → quarantine pointer.
+		quarantineFile(latestPath)
+		return nil, ErrStateInvalid
+	}
+
+	if ptr.SHA256 != "" {
+		sum := sha256.Sum256(snapBytes)
+		if !strings.EqualFold(hex.EncodeToString(sum[:]), ptr.SHA256) {
+			// Snapshot contents do not match recorded hash → quarantine snapshot and pointer.
+			quarantineFile(snapPath)
+			quarantineFile(latestPath)
+			return nil, ErrStateInvalid
+		}
+	}
+
+	var b StateBundle
+	if err := json.Unmarshal(snapBytes, &b); err != nil {
+		// Corrupt snapshot JSON → quarantine snapshot and pointer.
+		quarantineFile(snapPath)
+		quarantineFile(latestPath)
+		return nil, ErrStateInvalid
+	}
+	if err := b.Validate(); err != nil {
+		// Invalid bundle structure → quarantine snapshot and pointer.
+		quarantineFile(snapPath)
+		quarantineFile(latestPath)
+		return nil, ErrStateInvalid
+	}
+	return &b, nil
+}
+
+// quarantineFile renames the given path to a sibling with a ".quarantined" suffix.
+// If the target exists, it appends a numeric counter (e.g., .quarantined.1) up to 99 attempts.
+// Errors are returned only for unexpected conditions; callers typically ignore failures.
+func quarantineFile(path string) {
+	// Ensure we only operate within an existing directory and on a regular file when possible.
+	base := filepath.Base(path)
+	if base == "." || base == ".." || base == "" {
+		return
+	}
+	dir := filepath.Dir(path)
+	// Compute first candidate
+	cand := filepath.Join(dir, base+".quarantined")
+	if _, err := os.Stat(cand); err == nil {
+		// Find an available suffix
+		for i := 1; i < 100; i++ {
+			next := filepath.Join(dir, fmt.Sprintf("%s.quarantined.%d", base, i))
+			if _, err := os.Stat(next); os.IsNotExist(err) {
+				cand = next
+				break
+			}
+		}
+	}
+	// Attempt atomic rename; best-effort.
+	if err := os.Rename(path, cand); err != nil {
+		return
+	}
+}
diff --git a/internal/state/load_test.go b/internal/state/load_test.go
new file mode 100644
index 0000000..2129a82
--- /dev/null
+++ b/internal/state/load_test.go
@@ -0,0 +1,258 @@
+package state
+
+import (
+	"encoding/json"
+	"errors"
+	"os"
+	"path/filepath"
+	"runtime"
+	"testing"
+	"time"
+)
+
+func makeValidBundle(now string) *StateBundle {
+	return &StateBundle{
+		Version:     "1",
+		CreatedAt:   now,
+		ToolVersion: "test-1",
+		ModelID:     "gpt-5",
+		BaseURL:     "http://example.local",
+		ToolsetHash: "abc",
+		ScopeKey:    "scope-1",
+		Prompts:     map[string]string{"system": "hi"},
+		SourceHash:  ComputeSourceHash("gpt-5", "http://example.local", "abc", "scope-1"),
+	}
+}
+
+func TestLoadLatestStateBundle_OK(t *testing.T) {
+	dir := t.TempDir()
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	b := makeValidBundle(now)
+	if err := SaveStateBundle(dir, b); err != nil {
+		t.Fatalf("SaveStateBundle: %v", err)
+	}
+
+	got, err := LoadLatestStateBundle(dir)
+	if err != nil {
+		t.Fatalf("LoadLatestStateBundle error: %v", err)
+	}
+	if got == nil {
+		t.Fatalf("got nil bundle")
+	}
+	if got.Version != b.Version || got.CreatedAt != b.CreatedAt || got.ModelID != b.ModelID || got.BaseURL != b.BaseURL || got.ScopeKey != b.ScopeKey {
+		t.Fatalf("loaded bundle mismatch: %+v vs %+v", got, b)
+	}
+}
+
+func TestLoadLatestStateBundle_RejectsInsecureDir(t *testing.T) {
+	dir := t.TempDir()
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	b := makeValidBundle(now)
+	if err := SaveStateBundle(dir, b); err != nil {
+		t.Fatalf("SaveStateBundle: %v", err)
+	}
+	if runtime.GOOS != "windows" {
+		if err := os.Chmod(dir, 0o707); err != nil {
+			t.Fatalf("chmod: %v", err)
+		}
+		if got, err := LoadLatestStateBundle(dir); !errors.Is(err, ErrStateInvalid) || got != nil {
+			t.Fatalf("expected ErrStateInvalid and nil for insecure dir, got %v, %v", err, got)
+		}
+		// Fix perms and expect success
+		if err := os.Chmod(dir, 0o700); err != nil {
+			t.Fatalf("chmod fix: %v", err)
+		}
+	}
+	if got, err := LoadLatestStateBundle(dir); err != nil || got == nil {
+		t.Fatalf("LoadLatestStateBundle after fix: %v, %v", err, got)
+	}
+}
+
+func TestLoadLatestStateBundle_MissingLatest(t *testing.T) {
+	dir := t.TempDir()
+	if b, err := LoadLatestStateBundle(dir); !errors.Is(err, ErrStateInvalid) || b != nil {
+		t.Fatalf("expected ErrStateInvalid and nil, got %v, %v", err, b)
+	}
+}
+
+func TestLoadLatestStateBundle_CorruptLatest(t *testing.T) {
+	dir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(dir, "latest.json"), []byte("not-json"), 0o600); err != nil {
+		t.Fatalf("write latest: %v", err)
+	}
+	if b, err := LoadLatestStateBundle(dir); !errors.Is(err, ErrStateInvalid) || b != nil {
+		t.Fatalf("expected ErrStateInvalid and nil, got %v, %v", err, b)
+	}
+	// Corrupt pointer should be quarantined
+	if _, err := os.Stat(filepath.Join(dir, "latest.json.quarantined")); err != nil {
+		t.Fatalf("expected quarantined latest.json, err=%v", err)
+	}
+}
+
+func TestLoadLatestStateBundle_UnknownVersion(t *testing.T) {
+	dir := t.TempDir()
+	// Write a valid snapshot first
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	b := makeValidBundle(now)
+	if err := SaveStateBundle(dir, b); err != nil {
+		t.Fatalf("SaveStateBundle: %v", err)
+	}
+	// Overwrite latest.json with version 2
+	// Discover snapshot file name
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		t.Fatalf("ReadDir: %v", err)
+	}
+	var snapshot string
+	for _, e := range entries {
+		if !e.IsDir() && e.Name() != "latest.json" {
+			snapshot = e.Name()
+			break
+		}
+	}
+	ptr := latestPointer{Version: "2", Path: snapshot, SHA256: "deadbeef"}
+	data, err := json.Marshal(ptr)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(dir, "latest.json"), data, 0o600); err != nil {
+		t.Fatalf("write latest: %v", err)
+	}
+
+	if b2, err := LoadLatestStateBundle(dir); !errors.Is(err, ErrStateInvalid) || b2 != nil {
+		t.Fatalf("expected ErrStateInvalid and nil, got %v, %v", err, b2)
+	}
+	// Unknown version pointer should be quarantined
+	if _, err := os.Stat(filepath.Join(dir, "latest.json.quarantined")); err != nil {
+		t.Fatalf("expected quarantined latest.json, err=%v", err)
+	}
+}
+
+func TestLoadLatestStateBundle_MissingSnapshot(t *testing.T) {
+	dir := t.TempDir()
+	// Write pointer to missing file
+	ptr := latestPointer{Version: "1", Path: "missing.json", SHA256: ""}
+	data, err := json.Marshal(ptr)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(dir, "latest.json"), data, 0o600); err != nil {
+		t.Fatalf("write latest: %v", err)
+	}
+	if b, err := LoadLatestStateBundle(dir); !errors.Is(err, ErrStateInvalid) || b != nil {
+		t.Fatalf("expected ErrStateInvalid and nil, got %v, %v", err, b)
+	}
+	// Pointer should be quarantined
+	if _, err := os.Stat(filepath.Join(dir, "latest.json.quarantined")); err != nil {
+		t.Fatalf("expected quarantined latest.json, err=%v", err)
+	}
+}
+
+func TestLoadLatestStateBundle_PermissionDenied(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("windows permissions semantics differ")
+	}
+	dir := t.TempDir()
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	b := makeValidBundle(now)
+	if err := SaveStateBundle(dir, b); err != nil {
+		t.Fatalf("SaveStateBundle: %v", err)
+	}
+	// Find snapshot and chmod to 000 to induce EPERM when reading
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		t.Fatalf("ReadDir: %v", err)
+	}
+	var snapshot string
+	for _, e := range entries {
+		if !e.IsDir() && e.Name() != "latest.json" {
+			snapshot = e.Name()
+			break
+		}
+	}
+	snapPath := filepath.Join(dir, snapshot)
+	if err := os.Chmod(snapPath, 0o000); err != nil {
+		t.Fatalf("chmod: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.Chmod(snapPath, 0o600); err != nil {
+			t.Logf("ignored chmod restore error: %v", err)
+		}
+	})
+
+	if b2, err := LoadLatestStateBundle(dir); !errors.Is(err, ErrStateInvalid) || b2 != nil {
+		t.Fatalf("expected ErrStateInvalid and nil, got %v, %v", err, b2)
+	}
+}
+
+func TestLoadLatestStateBundle_SnapshotHashMismatch_QuarantineBoth(t *testing.T) {
+	dir := t.TempDir()
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	b := makeValidBundle(now)
+	if err := SaveStateBundle(dir, b); err != nil {
+		t.Fatalf("SaveStateBundle: %v", err)
+	}
+	// Tamper with snapshot contents to break SHA
+	// Find snapshot name
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		t.Fatalf("ReadDir: %v", err)
+	}
+	var snapshot string
+	for _, e := range entries {
+		if !e.IsDir() && e.Name() != "latest.json" {
+			snapshot = e.Name()
+			break
+		}
+	}
+	if snapshot == "" {
+		t.Fatalf("snapshot not found")
+	}
+	snapPath := filepath.Join(dir, snapshot)
+	if err := os.WriteFile(snapPath, []byte("{}"), 0o600); err != nil {
+		t.Fatalf("tamper snapshot: %v", err)
+	}
+	if b2, err := LoadLatestStateBundle(dir); !errors.Is(err, ErrStateInvalid) || b2 != nil {
+		t.Fatalf("expected ErrStateInvalid and nil, got %v, %v", err, b2)
+	}
+	if _, err := os.Stat(snapPath + ".quarantined"); err != nil {
+		t.Fatalf("expected quarantined snapshot, err=%v", err)
+	}
+	if _, err := os.Stat(filepath.Join(dir, "latest.json.quarantined")); err != nil {
+		t.Fatalf("expected quarantined latest.json, err=%v", err)
+	}
+}
+
+func TestLoadLatestStateBundle_CorruptSnapshotJSON_QuarantineBoth(t *testing.T) {
+	dir := t.TempDir()
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	b := makeValidBundle(now)
+	if err := SaveStateBundle(dir, b); err != nil {
+		t.Fatalf("SaveStateBundle: %v", err)
+	}
+	// Locate snapshot and write invalid JSON
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		t.Fatalf("ReadDir: %v", err)
+	}
+	var snapshot string
+	for _, e := range entries {
+		if !e.IsDir() && e.Name() != "latest.json" {
+			snapshot = e.Name()
+			break
+		}
+	}
+	snapPath := filepath.Join(dir, snapshot)
+	if err := os.WriteFile(snapPath, []byte("not-json"), 0o600); err != nil {
+		t.Fatalf("write corrupt snapshot: %v", err)
+	}
+	if b2, err := LoadLatestStateBundle(dir); !errors.Is(err, ErrStateInvalid) || b2 != nil {
+		t.Fatalf("expected ErrStateInvalid and nil, got %v, %v", err, b2)
+	}
+	if _, err := os.Stat(snapPath + ".quarantined"); err != nil {
+		t.Fatalf("expected quarantined snapshot, err=%v", err)
+	}
+	if _, err := os.Stat(filepath.Join(dir, "latest.json.quarantined")); err != nil {
+		t.Fatalf("expected quarantined latest.json, err=%v", err)
+	}
+}
diff --git a/internal/state/lock.go b/internal/state/lock.go
new file mode 100644
index 0000000..f15b300
--- /dev/null
+++ b/internal/state/lock.go
@@ -0,0 +1,95 @@
+package state
+
+import (
+	"crypto/rand"
+	"encoding/hex"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+// acquireStateLock attempts to take a coarse-grained advisory lock for the
+// given directory by creating a file named "state.lock" with O_EXCL.
+// If the lock is already held, it waits up to ~2s with jitter and retries.
+// On success it returns an unlock function that removes the lock file.
+// If the lock cannot be acquired within the wait budget, it returns a no-op
+// unlock function and nil error so callers can proceed without crashing.
+func acquireStateLock(dir string) (func(), error) {
+	lockPath := filepath.Join(dir, "state.lock")
+
+	// Ensure directory exists so we can create the lock file.
+	if err := os.MkdirAll(dir, 0o700); err != nil {
+		return func() {}, err
+	}
+	// Best-effort set exact perms (in case existing dir had broader perms).
+	if err := os.Chmod(dir, 0o700); err != nil {
+		// ignore; directory may already have stricter perms
+		_ = err
+	}
+
+	tryOnce := func() (bool, error) {
+		// Include a small token in the file for debugging; best-effort.
+		var token [8]byte
+		if _, err := rand.Read(token[:]); err != nil {
+			// ignore; token will be zeroed which is fine for debug content
+			_ = err
+		}
+		contents := []byte(fmt.Sprintf("ts=%s token=%s\n", time.Now().UTC().Format(time.RFC3339Nano), hex.EncodeToString(token[:])))
+		f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
+		if err != nil {
+			if os.IsExist(err) {
+				return false, nil
+			}
+			return false, err
+		}
+		if _, werr := f.Write(contents); werr != nil {
+			_ = f.Close()           //nolint:errcheck // best-effort cleanup on error
+			_ = os.Remove(lockPath) //nolint:errcheck // best-effort cleanup on error
+			return false, werr
+		}
+		if serr := f.Sync(); serr != nil {
+			_ = f.Close()           //nolint:errcheck // best-effort cleanup on error
+			_ = os.Remove(lockPath) //nolint:errcheck // best-effort cleanup on error
+			return false, serr
+		}
+		if cerr := f.Close(); cerr != nil {
+			_ = os.Remove(lockPath) //nolint:errcheck // best-effort cleanup on error
+			return false, cerr
+		}
+		return true, nil
+	}
+
+	// Immediate attempt
+	ok, err := tryOnce()
+	if err != nil {
+		return func() {}, err
+	}
+	if ok {
+		return func() {
+			if err := os.Remove(lockPath); err != nil && !os.IsNotExist(err) {
+				_ = err
+			}
+		}, nil
+	}
+
+	deadline := time.Now().Add(2 * time.Second)
+	for time.Now().Before(deadline) {
+		// Sleep 50-150ms jitter
+		sleep := 50 + int(time.Now().UnixNano()%100)
+		time.Sleep(time.Duration(sleep) * time.Millisecond)
+		ok, err := tryOnce()
+		if err != nil {
+			return func() {}, err
+		}
+		if ok {
+			return func() {
+				if err := os.Remove(lockPath); err != nil && !os.IsNotExist(err) {
+					_ = err
+				}
+			}, nil
+		}
+	}
+	// Failed to acquire; proceed without lock
+	return func() {}, nil
+}
diff --git a/internal/state/refine.go b/internal/state/refine.go
new file mode 100644
index 0000000..7286cb3
--- /dev/null
+++ b/internal/state/refine.go
@@ -0,0 +1,103 @@
+package state
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"strings"
+	"time"
+)
+
+// RefineStateBundle produces a new StateBundle derived from prev by applying a
+// deterministic refinement. It preserves unspecified fields, updates CreatedAt
+// to the current UTC time (RFC3339), recomputes SourceHash, and records PrevSHA
+// as the SHA-256 (hex) of the canonical JSON of the previous bundle.
+//
+// The refinement strategy is intentionally simple and deterministic:
+//   - Prompts["developer"] is appended with two new paragraphs that include the
+//     refineInput and userPrompt, separated by blank lines.
+//   - Other fields are preserved as-is.
+func RefineStateBundle(prev *StateBundle, refineInput string, userPrompt string) (*StateBundle, error) {
+	if prev == nil {
+		return nil, errors.New("nil prev")
+	}
+	if err := prev.Validate(); err != nil {
+		return nil, err
+	}
+
+	// Compute prev SHA over the same canonical form we persist (indent to match SaveStateBundle)
+	prevJSON, err := json.MarshalIndent(prev, "", "  ")
+	if err != nil {
+		return nil, err
+	}
+	prevSum := sha256.Sum256(prevJSON)
+	prevSHAHex := hex.EncodeToString(prevSum[:])
+
+	// Deep-ish copy of maps (values are JSON-serializable primitives)
+	cloneStrMap := func(in map[string]string) map[string]string {
+		if in == nil {
+			return nil
+		}
+		out := make(map[string]string, len(in))
+		for k, v := range in {
+			out[k] = v
+		}
+		return out
+	}
+	cloneAnyMap := func(in map[string]any) map[string]any {
+		if in == nil {
+			return nil
+		}
+		out := make(map[string]any, len(in))
+		for k, v := range in {
+			out[k] = v
+		}
+		return out
+	}
+
+	prompts := cloneStrMap(prev.Prompts)
+	if prompts == nil {
+		prompts = make(map[string]string)
+	}
+
+	// Append deterministic refinement note to developer prompt
+	dev := prompts["developer"]
+	var parts []string
+	if strings.TrimSpace(dev) != "" {
+		parts = append(parts, dev)
+	}
+	if strings.TrimSpace(refineInput) != "" {
+		parts = append(parts, refineInput)
+	}
+	if strings.TrimSpace(userPrompt) != "" {
+		parts = append(parts, "USER: "+userPrompt)
+	}
+	prompts["developer"] = strings.TrimSpace(strings.Join(parts, "\n\n"))
+
+	// Timestamp: ensure it advances at least by 1s if equal to previous
+	now := time.Now().UTC().Truncate(time.Second)
+	if prev.CreatedAt == now.Format(time.RFC3339) {
+		now = now.Add(time.Second)
+	}
+
+	newBundle := &StateBundle{
+		Version:      prev.Version,
+		CreatedAt:    now.Format(time.RFC3339),
+		ToolVersion:  prev.ToolVersion,
+		ModelID:      prev.ModelID,
+		BaseURL:      prev.BaseURL,
+		ToolsetHash:  prev.ToolsetHash,
+		ScopeKey:     prev.ScopeKey,
+		Prompts:      prompts,
+		PrepSettings: cloneAnyMap(prev.PrepSettings),
+		Context:      cloneAnyMap(prev.Context),
+		ToolCaps:     cloneAnyMap(prev.ToolCaps),
+		Custom:       cloneAnyMap(prev.Custom),
+		// Recompute based on identifying fields
+		SourceHash: ComputeSourceHash(prev.ModelID, prev.BaseURL, prev.ToolsetHash, prev.ScopeKey),
+		PrevSHA:    prevSHAHex,
+	}
+
+	return newBundle, nil
+}
diff --git a/internal/state/refine_test.go b/internal/state/refine_test.go
new file mode 100644
index 0000000..045920a
--- /dev/null
+++ b/internal/state/refine_test.go
@@ -0,0 +1,116 @@
+package state
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestRefineStateBundle_PreservesAndAppends(t *testing.T) {
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	prev := &StateBundle{
+		Version:     "1",
+		CreatedAt:   now,
+		ToolVersion: "dev",
+		ModelID:     "gpt-5",
+		BaseURL:     "https://api.example/v1",
+		ToolsetHash: "toolhash",
+		ScopeKey:    "scope",
+		Prompts:     map[string]string{"system": "S", "developer": "D0"},
+		PrepSettings: map[string]any{
+			"temp": 0.1,
+		},
+		Context:    map[string]any{"k": "v"},
+		ToolCaps:   map[string]any{"cap": true},
+		Custom:     map[string]any{"note": "x"},
+		SourceHash: ComputeSourceHash("gpt-5", "https://api.example/v1", "toolhash", "scope"),
+	}
+
+	refined, err := RefineStateBundle(prev, "refine here", "user asks")
+	if err != nil {
+		t.Fatalf("RefineStateBundle error: %v", err)
+	}
+
+	if refined == nil {
+		t.Fatalf("got nil bundle")
+	}
+	if refined.Version != "1" || refined.ModelID != prev.ModelID || refined.BaseURL != prev.BaseURL || refined.ToolsetHash != prev.ToolsetHash || refined.ScopeKey != prev.ScopeKey {
+		t.Fatalf("identifying fields not preserved: %+v", refined)
+	}
+	if refined.CreatedAt == prev.CreatedAt {
+		t.Fatalf("CreatedAt not updated")
+	}
+	if refined.SourceHash == "" {
+		t.Fatalf("SourceHash empty")
+	}
+	// SourceHash is recomputed from identifying fields and should equal prev's since those didn't change
+	if refined.SourceHash != prev.SourceHash {
+		t.Fatalf("SourceHash changed unexpectedly: %s vs %s", refined.SourceHash, prev.SourceHash)
+	}
+	dev := refined.Prompts["developer"]
+	if !strings.Contains(dev, "D0") || !strings.Contains(dev, "refine here") || !strings.Contains(dev, "USER: user asks") {
+		t.Fatalf("developer prompt not appended correctly: %q", dev)
+	}
+
+	// prev_sha should be SHA256 of canonical JSON of prev
+	prevJSON, err := json.MarshalIndent(prev, "", "  ")
+	if err != nil {
+		t.Fatalf("marshal prev: %v", err)
+	}
+	wantPrevSHA := sha256Hex(prevJSON)
+	if refined.PrevSHA != wantPrevSHA {
+		t.Fatalf("prev_sha mismatch: got %s want %s", refined.PrevSHA, wantPrevSHA)
+	}
+}
+
+func sha256Hex(b []byte) string {
+	sum := sha256.Sum256(b)
+	return hex.EncodeToString(sum[:])
+}
+
+func TestRefineStateBundle_SaveSnapshot(t *testing.T) {
+	dir := t.TempDir()
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	prev := &StateBundle{
+		Version:     "1",
+		CreatedAt:   now,
+		ToolVersion: "dev",
+		ModelID:     "gpt-5",
+		BaseURL:     "https://api.example/v1",
+		ToolsetHash: "toolhash",
+		ScopeKey:    "scope",
+		Prompts:     map[string]string{"developer": "D0"},
+		SourceHash:  ComputeSourceHash("gpt-5", "https://api.example/v1", "toolhash", "scope"),
+	}
+
+	refined, err := RefineStateBundle(prev, "refine here", "user asks")
+	if err != nil {
+		t.Fatalf("RefineStateBundle error: %v", err)
+	}
+	if err := SaveStateBundle(dir, refined); err != nil {
+		t.Fatalf("SaveStateBundle: %v", err)
+	}
+	// Assert snapshot exists and latest.json points to it
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		t.Fatalf("ReadDir: %v", err)
+	}
+	var snapshot string
+	for _, e := range entries {
+		if !e.IsDir() && e.Name() != "latest.json" {
+			snapshot = e.Name()
+			break
+		}
+	}
+	if snapshot == "" {
+		t.Fatalf("snapshot file not found")
+	}
+	if _, err := os.Stat(filepath.Join(dir, "latest.json")); err != nil {
+		t.Fatalf("latest.json missing: %v", err)
+	}
+}
diff --git a/internal/state/save.go b/internal/state/save.go
new file mode 100644
index 0000000..e7a049a
--- /dev/null
+++ b/internal/state/save.go
@@ -0,0 +1,171 @@
+package state
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+// latestPointer is the JSON structure written to latest.json to point to the
+// concrete snapshot file. The path is the base name inside the state directory.
+type latestPointer struct {
+	Version string `json:"version"`
+	Path    string `json:"path"`
+	SHA256  string `json:"sha256"`
+}
+
+// syncDirFunc is a hook to fsync a directory after atomic renames.
+// It is a var so tests can override and assert that directory fsync was used.
+var syncDirFunc = func(dir string) error {
+	d, err := os.Open(dir)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if cerr := d.Close(); cerr != nil {
+			// best-effort: ignore close error for directory sync
+			_ = cerr
+		}
+	}()
+	return d.Sync()
+}
+
+// writeFileAtomic writes data to a temporary file next to dstPath with mode 0600,
+// fsyncs the file, renames it atomically to dstPath, and fsyncs the directory.
+func writeFileAtomic(dir string, dstPath string, data []byte) error {
+	// Ensure directory exists with 0700 perms
+	if err := os.MkdirAll(dir, 0o700); err != nil {
+		return err
+	}
+	// Best-effort set exact perms (in case existing dir had broader perms)
+	if err := os.Chmod(dir, 0o700); err != nil {
+		// best-effort: directory may already have stricter perms
+		_ = err
+	}
+
+	tmp, err := os.CreateTemp(dir, ".tmp-*")
+	if err != nil {
+		return err
+	}
+	tmpName := tmp.Name()
+	// Ensure 0600 permissions for the temp file
+	if err := tmp.Chmod(0o600); err != nil {
+		if cerr := tmp.Close(); cerr != nil {
+			_ = cerr
+		}
+		if rerr := os.Remove(tmpName); rerr != nil {
+			_ = rerr
+		}
+		return err
+	}
+	if _, err := tmp.Write(data); err != nil {
+		if cerr := tmp.Close(); cerr != nil {
+			_ = cerr
+		}
+		if rerr := os.Remove(tmpName); rerr != nil {
+			_ = rerr
+		}
+		return err
+	}
+	if err := tmp.Sync(); err != nil {
+		if cerr := tmp.Close(); cerr != nil {
+			_ = cerr
+		}
+		if rerr := os.Remove(tmpName); rerr != nil {
+			_ = rerr
+		}
+		return err
+	}
+	if err := tmp.Close(); err != nil {
+		if rerr := os.Remove(tmpName); rerr != nil {
+			_ = rerr
+		}
+		return err
+	}
+	if err := os.Rename(tmpName, dstPath); err != nil {
+		if rerr := os.Remove(tmpName); rerr != nil {
+			_ = rerr
+		}
+		return err
+	}
+	if err := syncDirFunc(dir); err != nil {
+		return err
+	}
+	return nil
+}
+
+// sanitizeRFC3339ForFilename makes the RFC3339 timestamp safe for cross-platform filenames
+// by removing ':' characters. Example: 2006-01-02T15:04:05Z07:00 -> 2006-01-02T150405Z0700
+func sanitizeRFC3339ForFilename(ts string) string {
+	// Remove colons which are problematic on Windows filesystems
+	return strings.ReplaceAll(ts, ":", "")
+}
+
+// SaveStateBundle persists the provided bundle into dir using an atomic write strategy.
+// It writes a content-addressed snapshot file named
+//
+//	state-<RFC3339UTC>-<8charSHA>.json
+//
+// and then updates latest.json atomically to point to that snapshot. All files are
+// written with 0600 permissions and the directory is fsync'ed after renames.
+// The function does not mutate the given bundle; callers must ensure it is valid.
+func SaveStateBundle(dir string, bundle *StateBundle) error {
+	if bundle == nil {
+		return errors.New("nil bundle")
+	}
+	if err := bundle.Validate(); err != nil {
+		return fmt.Errorf("invalid bundle: %w", err)
+	}
+
+	// Security: reject insecure directories (world-writable or non-owned) on Unix
+	if err := ensureSecureStateDir(dir); err != nil {
+		return err
+	}
+
+	// Attempt coarse-grained advisory lock to avoid concurrent writes
+	if unlock, lockErr := acquireStateLock(dir); lockErr == nil && unlock != nil {
+		defer unlock()
+	}
+
+	// Redact/sanitize secrets before persisting
+	sanitized, err := sanitizeBundleForSave(bundle)
+	if err != nil {
+		return err
+	}
+
+	// Marshal the snapshot deterministically.
+	// Note: json.Marshal is sufficient; map key ordering is not relied upon for correctness here.
+	snapshotBytes, err := json.MarshalIndent(sanitized, "", "  ")
+	if err != nil {
+		return err
+	}
+
+	// Compute content hash for integrity and short suffix
+	sum := sha256.Sum256(snapshotBytes)
+	shaHex := hex.EncodeToString(sum[:])
+	short8 := shaHex[:8]
+
+	baseName := fmt.Sprintf("state-%s-%s.json", sanitizeRFC3339ForFilename(bundle.CreatedAt), short8)
+	finalPath := filepath.Join(dir, baseName)
+
+	if err := writeFileAtomic(dir, finalPath, snapshotBytes); err != nil {
+		return err
+	}
+
+	// Write pointer file
+	ptr := latestPointer{Version: "1", Path: baseName, SHA256: shaHex}
+	ptrBytes, err := json.MarshalIndent(ptr, "", "  ")
+	if err != nil {
+		return err
+	}
+	latestPath := filepath.Join(dir, "latest.json")
+	if err := writeFileAtomic(dir, latestPath, ptrBytes); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/internal/state/save_test.go b/internal/state/save_test.go
new file mode 100644
index 0000000..48ad1f5
--- /dev/null
+++ b/internal/state/save_test.go
@@ -0,0 +1,263 @@
+package state
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestSaveStateBundle_WritesFilesAtomicallyWithPermsAndPointer(t *testing.T) {
+	// Arrange
+	tempDir := t.TempDir()
+
+	// Track whether syncDirFunc was called
+	calledSync := false
+	syncDirFunc = func(dir string) error {
+		calledSync = true
+		f, err := os.Open(dir)
+		if err != nil {
+			return err
+		}
+		defer func() {
+			if err := f.Close(); err != nil {
+				t.Logf("ignored close error: %v", err)
+			}
+		}()
+		return f.Sync()
+	}
+	t.Cleanup(func() {
+		syncDirFunc = func(dir string) error {
+			f, err := os.Open(dir)
+			if err != nil {
+				return err
+			}
+			defer func() {
+				if err := f.Close(); err != nil {
+					t.Logf("ignored close error: %v", err)
+				}
+			}()
+			return f.Sync()
+		}
+	})
+
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	b := &StateBundle{
+		Version:     "1",
+		CreatedAt:   now,
+		ToolVersion: "test-1",
+		ModelID:     "gpt-5",
+		BaseURL:     "http://example.local",
+		ToolsetHash: "abc",
+		ScopeKey:    "scope-1",
+		Prompts:     map[string]string{"system": "hi"},
+		SourceHash:  ComputeSourceHash("gpt-5", "http://example.local", "abc", "scope-1"),
+	}
+
+	// Act
+	if err := SaveStateBundle(tempDir, b); err != nil {
+		t.Fatalf("SaveStateBundle error: %v", err)
+	}
+
+	// Assert snapshot exists
+	entries, err := os.ReadDir(tempDir)
+	if err != nil {
+		t.Fatalf("ReadDir: %v", err)
+	}
+	var snapshot string
+	for _, e := range entries {
+		if strings.HasPrefix(e.Name(), "state-") && strings.HasSuffix(e.Name(), ".json") {
+			snapshot = e.Name()
+			break
+		}
+	}
+	if snapshot == "" {
+		t.Fatalf("snapshot file not found in %s", tempDir)
+	}
+
+	// Check file mode 0600
+	info, err := os.Stat(filepath.Join(tempDir, snapshot))
+	if err != nil {
+		t.Fatalf("stat snapshot: %v", err)
+	}
+	if runtime.GOOS != "windows" { // Windows has different mode semantics
+		if info.Mode().Perm() != 0o600 {
+			t.Fatalf("snapshot perms = %v, want 0600", info.Mode().Perm())
+		}
+	}
+
+	// Check latest.json exists and points to snapshot
+	latestPath := filepath.Join(tempDir, "latest.json")
+	latestBytes, err := os.ReadFile(latestPath)
+	if err != nil {
+		t.Fatalf("read latest.json: %v", err)
+	}
+	var ptr latestPointer
+	if err := json.Unmarshal(latestBytes, &ptr); err != nil {
+		t.Fatalf("unmarshal latest.json: %v", err)
+	}
+	if ptr.Version != "1" {
+		t.Fatalf("pointer version = %q, want 1", ptr.Version)
+	}
+	if ptr.Path != snapshot {
+		t.Fatalf("pointer path = %q, want %q", ptr.Path, snapshot)
+	}
+	if ptr.SHA256 == "" {
+		t.Fatalf("pointer sha256 empty")
+	}
+
+	if !calledSync {
+		t.Fatalf("expected directory fsync to be called")
+	}
+}
+
+func TestSaveStateBundle_AdvisoryLock_AllowsSingleWriter(t *testing.T) {
+	t.Parallel()
+	dir := t.TempDir()
+
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	mk := func(i int) *StateBundle {
+		return &StateBundle{
+			Version:     "1",
+			CreatedAt:   now,
+			ToolVersion: "test-1",
+			ModelID:     "gpt-5",
+			BaseURL:     "http://example.local",
+			ToolsetHash: "abc",
+			ScopeKey:    "scope-1",
+			Prompts:     map[string]string{"system": "hi"},
+			SourceHash:  ComputeSourceHash("gpt-5", "http://example.local", "abc", "scope-1"),
+		}
+	}
+
+	done := make(chan error, 2)
+	go func() { done <- SaveStateBundle(dir, mk(1)) }()
+	go func() { done <- SaveStateBundle(dir, mk(2)) }()
+
+	// Both should succeed; lock serializes them. Wait for both.
+	if err := <-done; err != nil {
+		t.Fatalf("first SaveStateBundle error: %v", err)
+	}
+	if err := <-done; err != nil {
+		t.Fatalf("second SaveStateBundle error: %v", err)
+	}
+
+	// There must be exactly one latest.json and at least one snapshot.
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		t.Fatalf("ReadDir: %v", err)
+	}
+	hasLatest := false
+	snapCount := 0
+	hasTmp := false
+	for _, e := range entries {
+		switch {
+		case e.Name() == "latest.json":
+			hasLatest = true
+		case strings.HasPrefix(e.Name(), "state-") && strings.HasSuffix(e.Name(), ".json"):
+			snapCount++
+		case strings.HasPrefix(e.Name(), ".tmp-"):
+			hasTmp = true
+		}
+	}
+	if !hasLatest {
+		t.Fatalf("missing latest.json after concurrent writes")
+	}
+	if snapCount == 0 {
+		t.Fatalf("no snapshot files after concurrent writes")
+	}
+	if hasTmp {
+		t.Fatalf("found lingering temp files after concurrent writes")
+	}
+
+	// Pointer should load successfully
+	if b, err := LoadLatestStateBundle(dir); err != nil || b == nil {
+		t.Fatalf("LoadLatestStateBundle failed after concurrent writes: %v, %v", err, b)
+	}
+}
+
+func TestSaveStateBundle_InvalidBundle(t *testing.T) {
+	tempDir := t.TempDir()
+	// Missing required fields (CreatedAt invalid)
+	b := &StateBundle{Version: "1", CreatedAt: "not-time", ModelID: "m", BaseURL: "u", ScopeKey: "s"}
+	if err := SaveStateBundle(tempDir, b); err == nil {
+		t.Fatalf("expected error for invalid bundle")
+	}
+}
+
+func TestSaveStateBundle_SanitizesSecretsAndRejectsInsecureDir(t *testing.T) {
+	tempDir := t.TempDir()
+	// Make directory world-writable on Unix to trigger rejection; skip on Windows
+	if runtime.GOOS != "windows" {
+		if err := os.Chmod(tempDir, 0o707); err != nil {
+			t.Fatalf("chmod: %v", err)
+		}
+	}
+
+	now := time.Now().UTC().Truncate(time.Second).Format(time.RFC3339)
+	b := &StateBundle{
+		Version:     "1",
+		CreatedAt:   now,
+		ToolVersion: "test-1",
+		ModelID:     "gpt-5",
+		BaseURL:     "http://example.local",
+		ToolsetHash: "abc",
+		ScopeKey:    "scope-1",
+		Prompts:     map[string]string{"system": "Authorization: Bearer secretTOKEN1234567890"},
+		PrepSettings: map[string]any{
+			"api_key":      "sk-verylongexamplekey-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
+			"request_body": "{ big raw body }",
+		},
+		SourceHash: ComputeSourceHash("gpt-5", "http://example.local", "abc", "scope-1"),
+	}
+
+	err := SaveStateBundle(tempDir, b)
+	if runtime.GOOS != "windows" {
+		if err == nil {
+			t.Fatalf("expected error for insecure dir perms")
+		}
+		// Fix perms and try again
+		if err := os.Chmod(tempDir, 0o700); err != nil {
+			t.Fatalf("chmod fix: %v", err)
+		}
+	}
+
+	if err := SaveStateBundle(tempDir, b); err != nil {
+		t.Fatalf("SaveStateBundle error after fix: %v", err)
+	}
+	// Read snapshot and verify redactions present
+	entries, err := os.ReadDir(tempDir)
+	if err != nil {
+		t.Fatalf("ReadDir: %v", err)
+	}
+	var snapshot string
+	for _, e := range entries {
+		if strings.HasPrefix(e.Name(), "state-") && strings.HasSuffix(e.Name(), ".json") {
+			snapshot = filepath.Join(tempDir, e.Name())
+			break
+		}
+	}
+	if snapshot == "" {
+		t.Fatalf("snapshot not found")
+	}
+	data, err := os.ReadFile(snapshot)
+	if err != nil {
+		t.Fatalf("read snapshot: %v", err)
+	}
+	s := string(data)
+	if strings.Contains(s, "secretTOKEN1234567890") {
+		t.Fatalf("authorization token not redacted: %s", s)
+	}
+	if !strings.Contains(s, "Authorization: Bearer ****") {
+		t.Fatalf("authorization scheme not preserved/redacted: %s", s)
+	}
+	if strings.Contains(s, "sk-verylongexamplekey") {
+		t.Fatalf("api key not redacted: %s", s)
+	}
+	if strings.Contains(s, "{ big raw body }") {
+		t.Fatalf("raw body not omitted: %s", s)
+	}
+}
diff --git a/internal/state/schema.go b/internal/state/schema.go
new file mode 100644
index 0000000..90e00e2
--- /dev/null
+++ b/internal/state/schema.go
@@ -0,0 +1,73 @@
+package state
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"time"
+)
+
+// StateBundle is the versioned persisted execution state snapshot.
+// Only JSON-serializable fields; do not include runtime-only data.
+// Version must be "1" for the initial schema.
+// All timestamps are RFC3339 with UTC timezone.
+// Files must be written with permissions 0600.
+// The pointer file latest.json must contain a JSON object pointing to a concrete snapshot path.
+
+type StateBundle struct {
+	Version      string            `json:"version"`
+	CreatedAt    string            `json:"created_at"`
+	ToolVersion  string            `json:"tool_version"`
+	ModelID      string            `json:"model_id"`
+	BaseURL      string            `json:"base_url"`
+	ToolsetHash  string            `json:"toolset_hash"`
+	ScopeKey     string            `json:"scope_key"`
+	Prompts      map[string]string `json:"prompts"`
+	PrepSettings map[string]any    `json:"prep_settings"`
+	Context      map[string]any    `json:"context"`
+	ToolCaps     map[string]any    `json:"tool_caps"`
+	Custom       map[string]any    `json:"custom"`
+	SourceHash   string            `json:"source_hash"`
+	PrevSHA      string            `json:"prev_sha,omitempty"`
+}
+
+var (
+	errInvalidVersion   = errors.New("invalid version")
+	errMissingTimestamp = errors.New("invalid created_at")
+	errMissingModel     = errors.New("missing model_id")
+	errMissingBaseURL   = errors.New("missing base_url")
+	errMissingScope     = errors.New("missing scope_key")
+)
+
+// Validate returns nil if the bundle is structurally valid for version 1.
+func (b *StateBundle) Validate() error {
+	if b == nil {
+		return errors.New("nil bundle")
+	}
+	if b.Version != "1" {
+		return fmt.Errorf("%w: %s", errInvalidVersion, b.Version)
+	}
+	if _, err := time.Parse(time.RFC3339, b.CreatedAt); err != nil {
+		return errMissingTimestamp
+	}
+	if b.ModelID == "" {
+		return errMissingModel
+	}
+	if b.BaseURL == "" {
+		return errMissingBaseURL
+	}
+	if b.ScopeKey == "" {
+		return errMissingScope
+	}
+	// Optional maps may be nil; normalize callers should handle nil.
+	return nil
+}
+
+// ComputeSourceHash returns a hex-encoded SHA-256 of select identifying fields.
+// This is used to detect changes across runs; callers decide the exact input.
+func ComputeSourceHash(modelID string, baseURL string, toolsetHash string, scopeKey string) string {
+	input := modelID + "|" + baseURL + "|" + toolsetHash + "|" + scopeKey
+	sum := sha256.Sum256([]byte(input))
+	return hex.EncodeToString(sum[:])
+}
diff --git a/internal/state/schema_test.go b/internal/state/schema_test.go
new file mode 100644
index 0000000..fbc736a
--- /dev/null
+++ b/internal/state/schema_test.go
@@ -0,0 +1,52 @@
+package state
+
+import (
+	"testing"
+)
+
+func TestValidate_OK(t *testing.T) {
+	b := &StateBundle{
+		Version:     "1",
+		CreatedAt:   "2025-08-19T00:00:00Z",
+		ToolVersion: "dev",
+		ModelID:     "gpt-5",
+		BaseURL:     "https://api.openai.example/v1",
+		ToolsetHash: "abc123",
+		ScopeKey:    "scope",
+		Prompts:     map[string]string{"system": "s"},
+		SourceHash:  "deadbeef",
+	}
+	if err := b.Validate(); err != nil {
+		t.Fatalf("expected ok, got %v", err)
+	}
+}
+
+func TestValidate_Errors(t *testing.T) {
+	cases := []struct {
+		name string
+		b    StateBundle
+	}{
+		{"bad version", StateBundle{Version: "2", CreatedAt: "2025-08-19T00:00:00Z", ModelID: "m", BaseURL: "u", ScopeKey: "s"}},
+		{"bad ts", StateBundle{Version: "1", CreatedAt: "not-time", ModelID: "m", BaseURL: "u", ScopeKey: "s"}},
+		{"no model", StateBundle{Version: "1", CreatedAt: "2025-08-19T00:00:00Z", BaseURL: "u", ScopeKey: "s"}},
+		{"no base", StateBundle{Version: "1", CreatedAt: "2025-08-19T00:00:00Z", ModelID: "m", ScopeKey: "s"}},
+		{"no scope", StateBundle{Version: "1", CreatedAt: "2025-08-19T00:00:00Z", ModelID: "m", BaseURL: "u"}},
+	}
+	for _, tc := range cases {
+		if err := tc.b.Validate(); err == nil {
+			t.Fatalf("%s: expected error", tc.name)
+		}
+	}
+}
+
+func TestComputeSourceHash_Deterministic(t *testing.T) {
+	got1 := ComputeSourceHash("m", "u", "t", "s")
+	got2 := ComputeSourceHash("m", "u", "t", "s")
+	if got1 != got2 {
+		t.Fatalf("hash not deterministic: %s vs %s", got1, got2)
+	}
+	got3 := ComputeSourceHash("m2", "u", "t", "s")
+	if got1 == got3 {
+		t.Fatalf("hash should change on input change")
+	}
+}
diff --git a/internal/state/security.go b/internal/state/security.go
new file mode 100644
index 0000000..6fd6c19
--- /dev/null
+++ b/internal/state/security.go
@@ -0,0 +1,194 @@
+package state
+
+import (
+	"encoding/json"
+	"errors"
+	"os"
+	"regexp"
+	"runtime"
+	"strings"
+	"syscall"
+)
+
+// ensureSecureStateDir validates the state directory on Unix-like systems.
+// It rejects world-writable or non-owned directories to avoid leaking secrets.
+// On non-Unix platforms (e.g., Windows), the checks are skipped.
+func ensureSecureStateDir(dir string) error {
+	if strings.TrimSpace(dir) == "" {
+		return errors.New("empty state dir")
+	}
+	// Only enforce on Unix-like systems. Windows ACLs differ and Mode().Perm() is not authoritative.
+	if runtime.GOOS == "windows" {
+		return nil
+	}
+
+	info, err := os.Stat(dir)
+	if err != nil {
+		// If it doesn't exist yet, the caller will create with 0700; allow.
+		if errors.Is(err, os.ErrNotExist) {
+			return nil
+		}
+		return err
+	}
+	if !info.IsDir() {
+		return errors.New("state dir is not a directory")
+	}
+
+	// Reject world-writable (others write bit set)
+	if info.Mode().Perm()&0o002 != 0 {
+		return errors.New("state dir is world-writable")
+	}
+
+	// Reject if not owned by current user (best-effort; skip if not supported)
+	if stat, ok := info.Sys().(*syscall.Stat_t); ok {
+		uid := uint32(os.Getuid())
+		if stat.Uid != uid {
+			return errors.New("state dir is not owned by current user")
+		}
+	}
+	return nil
+}
+
+// sanitizeBundleForSave returns a deep-copied and redacted bundle suitable for persistence.
+// It avoids storing obvious secrets and raw bodies by:
+//   - masking any values under keys containing "api_key", "apikey", "token", "authorization", "password", "secret"
+//   - stripping Authorization header values (preserving scheme)
+//   - removing long base64-like tokens (>=64 chars) within strings
+//   - omitting likely raw request/response bodies under keys: request_body, response_body, raw_request, raw_response
+func sanitizeBundleForSave(b *StateBundle) (*StateBundle, error) {
+	if b == nil {
+		return nil, errors.New("nil bundle")
+	}
+
+	// Shallow copy value fields and deep-copy maps
+	out := &StateBundle{
+		Version:     b.Version,
+		CreatedAt:   b.CreatedAt,
+		ToolVersion: b.ToolVersion,
+		ModelID:     b.ModelID,
+		BaseURL:     b.BaseURL,
+		ToolsetHash: b.ToolsetHash,
+		ScopeKey:    b.ScopeKey,
+		SourceHash:  b.SourceHash,
+		PrevSHA:     b.PrevSHA,
+	}
+
+	// Copy and sanitize string map
+	if b.Prompts != nil {
+		out.Prompts = make(map[string]string, len(b.Prompts))
+		for k, v := range b.Prompts {
+			out.Prompts[k] = sanitizeStringByHeuristics(k, v)
+		}
+	}
+	// Copy and sanitize any maps
+	out.PrepSettings = sanitizeAnyMap(b.PrepSettings)
+	out.Context = sanitizeAnyMap(b.Context)
+	out.ToolCaps = sanitizeAnyMap(b.ToolCaps)
+	out.Custom = sanitizeAnyMap(b.Custom)
+
+	// Validate round-trip JSON to ensure serializable
+	if _, err := json.Marshal(out); err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func sanitizeAnyMap(in map[string]any) map[string]any {
+	if in == nil {
+		return nil
+	}
+	out := make(map[string]any, len(in))
+	for k, v := range in {
+		out[k] = sanitizeValue(k, v)
+	}
+	return out
+}
+
+func sanitizeAnySlice(key string, in []any) []any {
+	if in == nil {
+		return nil
+	}
+	out := make([]any, 0, len(in))
+	for _, v := range in {
+		out = append(out, sanitizeValue(key, v))
+	}
+	return out
+}
+
+var (
+	// Detect long base64-like tokens (64+ chars of base64 charset)
+	reLongB64 = regexp.MustCompile(`[A-Za-z0-9+/=]{64,}`)
+	// Detect Authorization header anywhere in the string, case-insensitive
+	reAuthAny = regexp.MustCompile(`(?i)authorization\s*:\s*(bearer|token|basic)\s+([A-Za-z0-9._\-]+)`) // keep scheme, mask token
+)
+
+func sanitizeValue(key string, v any) any {
+	switch t := v.(type) {
+	case string:
+		lowerKey := strings.ToLower(key)
+		// Omit likely raw bodies entirely
+		if lowerKey == "request_body" || lowerKey == "response_body" || lowerKey == "raw_request" || lowerKey == "raw_response" {
+			return "<omitted>"
+		}
+		return sanitizeStringByHeuristics(key, t)
+	case map[string]any:
+		return sanitizeAnyMap(t)
+	case []any:
+		return sanitizeAnySlice(key, t)
+	default:
+		// Leave numbers, bools, nil as-is
+		return v
+	}
+}
+
+func sanitizeStringByHeuristics(key string, s string) string {
+	v := s
+	lowerKey := strings.ToLower(key)
+	// Redact inline Authorization headers regardless of key name
+	v = reAuthAny.ReplaceAllStringFunc(v, func(match string) string {
+		m := reAuthAny.FindStringSubmatch(match)
+		if len(m) >= 3 {
+			scheme := m[1]
+			token := m[2]
+			return "Authorization: " + scheme + " ****" + last4(token)
+		}
+		return "Authorization: <redacted>"
+	})
+	if containsAny(lowerKey, []string{"api_key", "apikey", "token", "password", "secret"}) {
+		vv := strings.TrimSpace(v)
+		if vv == "" {
+			v = ""
+		} else if len(vv) <= 4 {
+			v = "****" + vv
+		} else {
+			v = "****" + vv[len(vv)-4:]
+		}
+	}
+	// Redact long base64-like runs
+	if reLongB64.MatchString(v) {
+		v = reLongB64.ReplaceAllStringFunc(v, func(match string) string {
+			if len(match) <= 4 {
+				return "****" + match
+			}
+			return "****" + match[len(match)-4:]
+		})
+	}
+	return v
+}
+
+func containsAny(s string, needles []string) bool {
+	for _, n := range needles {
+		if strings.Contains(s, n) {
+			return true
+		}
+	}
+	return false
+}
+
+func last4(s string) string {
+	s = strings.TrimSpace(s)
+	if len(s) <= 4 {
+		return s
+	}
+	return s[len(s)-4:]
+}
diff --git a/internal/tools/image/client.go b/internal/tools/image/client.go
new file mode 100644
index 0000000..96bc8f6
--- /dev/null
+++ b/internal/tools/image/client.go
@@ -0,0 +1,49 @@
+package image
+
+import (
+	"net/http"
+	"time"
+)
+
+// RetryPolicy controls retry behavior for image HTTP calls.
+// MaxRetries specifies the number of retries after the initial attempt.
+// Backoff specifies the base backoff duration between attempts.
+type RetryPolicy struct {
+	MaxRetries int
+	Backoff    time.Duration
+}
+
+// Client is a minimal HTTP client wrapper for image requests that carries
+// the resolved timeout and retry policy.
+type Client struct {
+	baseURL    string
+	apiKey     string
+	httpClient *http.Client
+	retry      RetryPolicy
+}
+
+// NewClient constructs a Client with the provided configuration.
+// The httpTimeout applies to the underlying http.Client Timeout.
+// Retries and backoff are stored in a simple RetryPolicy.
+func NewClient(baseURL, apiKey string, httpTimeout time.Duration, retries int, backoff time.Duration) *Client {
+	if httpTimeout <= 0 {
+		httpTimeout = 90 * time.Second
+	}
+	if retries < 0 {
+		retries = 0
+	}
+	return &Client{
+		baseURL: baseURL,
+		apiKey:  apiKey,
+		httpClient: &http.Client{
+			Timeout: httpTimeout,
+		},
+		retry: RetryPolicy{MaxRetries: retries, Backoff: backoff},
+	}
+}
+
+// HTTPTimeout returns the configured HTTP timeout.
+func (c *Client) HTTPTimeout() time.Duration { return c.httpClient.Timeout }
+
+// Retry returns the configured RetryPolicy.
+func (c *Client) Retry() RetryPolicy { return c.retry }
diff --git a/internal/tools/image/client_test.go b/internal/tools/image/client_test.go
new file mode 100644
index 0000000..f8aa057
--- /dev/null
+++ b/internal/tools/image/client_test.go
@@ -0,0 +1,31 @@
+package image
+
+import (
+	"testing"
+	"time"
+)
+
+func TestNewClient_AppliesTimeoutAndRetry(t *testing.T) {
+	c := NewClient("https://example", "key", 3*time.Second, 5, 750*time.Millisecond)
+	if got := c.HTTPTimeout(); got != 3*time.Second {
+		t.Fatalf("HTTPTimeout=%s; want 3s", got)
+	}
+	r := c.Retry()
+	if r.MaxRetries != 5 {
+		t.Fatalf("MaxRetries=%d; want 5", r.MaxRetries)
+	}
+	if r.Backoff != 750*time.Millisecond {
+		t.Fatalf("Backoff=%s; want 750ms", r.Backoff)
+	}
+}
+
+func TestNewClient_NormalizesInputs(t *testing.T) {
+	c := NewClient("https://example", "key", 0, -1, 0)
+	if got := c.HTTPTimeout(); got <= 0 {
+		t.Fatalf("HTTPTimeout=%s; want > 0 default", got)
+	}
+	r := c.Retry()
+	if r.MaxRetries != 0 {
+		t.Fatalf("MaxRetries=%d; want 0", r.MaxRetries)
+	}
+}
diff --git a/internal/tools/image/options.go b/internal/tools/image/options.go
new file mode 100644
index 0000000..3d54388
--- /dev/null
+++ b/internal/tools/image/options.go
@@ -0,0 +1,13 @@
+package image
+
+// Options holds configuration for image generation flows.
+// Currently it carries only the model identifier used by the backend.
+// Additional fields will be added as new capabilities are introduced.
+type Options struct {
+	Model string
+}
+
+// NewOptions constructs an Options value using the provided model identifier.
+func NewOptions(model string) Options {
+	return Options{Model: model}
+}
diff --git a/internal/tools/image/options_test.go b/internal/tools/image/options_test.go
new file mode 100644
index 0000000..8421089
--- /dev/null
+++ b/internal/tools/image/options_test.go
@@ -0,0 +1,10 @@
+package image
+
+import "testing"
+
+func TestNewOptions_SetsModel(t *testing.T) {
+	opt := NewOptions("foo")
+	if opt.Model != "foo" {
+		t.Fatalf("Model=%q; want foo", opt.Model)
+	}
+}
diff --git a/internal/tools/jsrun/handler.go b/internal/tools/jsrun/handler.go
new file mode 100644
index 0000000..212da03
--- /dev/null
+++ b/internal/tools/jsrun/handler.go
@@ -0,0 +1,257 @@
+package jsrun
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/dop251/goja"
+)
+
+// Input models the expected stdin JSON for code.sandbox.js.run
+type Input struct {
+	Source string `json:"source"`
+	Input  string `json:"input"`
+	Limits struct {
+		WallMS   int `json:"wall_ms"`
+		OutputKB int `json:"output_kb"`
+	} `json:"limits"`
+}
+
+// Output is the successful stdout JSON shape
+type Output struct {
+	Output string `json:"output"`
+}
+
+// Error represents a structured error payload for stderr JSON
+type Error struct {
+	Code    string `json:"code"`
+	Message string `json:"message"`
+}
+
+var (
+	errOutputLimit = errors.New("OUTPUT_LIMIT")
+	errTimeout     = errors.New("TIMEOUT")
+)
+
+// Run executes the provided JavaScript source with minimal host bindings.
+// Returns (stdoutJSON, stderrJSON, err). On OUTPUT_LIMIT, returns truncated
+// stdout along with stderr error JSON and a non-nil error.
+func Run(raw []byte) ([]byte, []byte, error) {
+	start := time.Now()
+	var in Input
+	if err := json.Unmarshal(raw, &in); err != nil {
+		return nil, mustMarshalError("INVALID_INPUT", "invalid JSON: "+err.Error()), err
+	}
+	if in.Source == "" {
+		return nil, mustMarshalError("INVALID_INPUT", "missing source"), errors.New("invalid input")
+	}
+
+	// Default output cap: 64 KiB if not provided or invalid
+	maxKB := in.Limits.OutputKB
+	if maxKB <= 0 {
+		maxKB = 64
+	}
+	capBytes := maxKB * 1024
+
+	// Prepare bounded output buffer
+	var outBuf bytes.Buffer
+
+	// Build a Goja VM with minimal bindings
+	vm := goja.New()
+
+	// Helper to write to bounded buffer and signal limit
+	writeAndMaybeLimit := func(s string) error {
+		writeBounded(&outBuf, s, capBytes)
+		if outBuf.Len() >= capBytes && len(s) > capBytes {
+			return errOutputLimit
+		}
+		return nil
+	}
+
+	// Bind read_input(): returns provided input string
+	if err := vm.Set("read_input", func() string { return in.Input }); err != nil {
+		return nil, mustMarshalError("EVAL_ERROR", "failed to bind read_input"), err
+	}
+
+	// Bind emit(s): appends to bounded buffer
+	if err := vm.Set("emit", func(call goja.FunctionCall) goja.Value {
+		if len(call.Arguments) > 0 {
+			arg := call.Arguments[0].String()
+			if e := writeAndMaybeLimit(arg); e != nil {
+				// Trigger a JS exception that we map after execution
+				panic(errOutputLimit)
+			}
+		}
+		return goja.Undefined()
+	}); err != nil {
+		return nil, mustMarshalError("EVAL_ERROR", "failed to bind emit"), err
+	}
+
+	// Timeout handling with interrupt
+	wall := in.Limits.WallMS
+	if wall <= 0 {
+		wall = 1000 // 1s default
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(wall)*time.Millisecond)
+	defer cancel()
+
+	// Arrange to interrupt VM on timeout
+	done := make(chan struct{})
+	var runErr error
+	go func() {
+		defer close(done)
+		defer func() {
+			if r := recover(); r != nil {
+				// Propagate as error for classification below
+				if errVal, ok := r.(error); ok {
+					runErr = errVal
+				} else {
+					runErr = fmt.Errorf("panic: %v", r)
+				}
+			}
+		}()
+		_, runErr = vm.RunString(in.Source)
+	}()
+
+	select {
+	case <-done:
+		// Completed or panicked; classify below
+	case <-ctx.Done():
+		vm.Interrupt("timeout")
+		<-done
+		runErr = errTimeout
+	}
+
+	// Classify results
+	if runErr != nil {
+		switch runErr {
+		case errOutputLimit:
+			outJSON, mErr := json.Marshal(Output{Output: outBuf.String()})
+			if mErr != nil {
+				return nil, mustMarshalError("EVAL_ERROR", mErr.Error()), mErr
+			}
+			// audit before returning
+			_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+				"ts":        time.Now().UTC().Format(time.RFC3339Nano),
+				"tool":      "code.sandbox.js.run",
+				"span":      "tools.js.run",
+				"ms":        time.Since(start).Milliseconds(),
+				"bytes_out": len(outBuf.String()),
+				"event":     "OUTPUT_LIMIT",
+			})
+			return outJSON, mustMarshalError("OUTPUT_LIMIT", fmt.Sprintf("output exceeded %d KB", maxKB)), errOutputLimit
+		case errTimeout:
+			// audit before returning
+			_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+				"ts":        time.Now().UTC().Format(time.RFC3339Nano),
+				"tool":      "code.sandbox.js.run",
+				"span":      "tools.js.run",
+				"ms":        time.Since(start).Milliseconds(),
+				"bytes_out": outBuf.Len(),
+				"event":     "TIMEOUT",
+			})
+			return nil, mustMarshalError("TIMEOUT", fmt.Sprintf("execution exceeded %d ms", wall)), errTimeout
+		default:
+			// audit before returning
+			_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+				"ts":        time.Now().UTC().Format(time.RFC3339Nano),
+				"tool":      "code.sandbox.js.run",
+				"span":      "tools.js.run",
+				"ms":        time.Since(start).Milliseconds(),
+				"bytes_out": outBuf.Len(),
+				"event":     "EVAL_ERROR",
+			})
+			return nil, mustMarshalError("EVAL_ERROR", runErr.Error()), runErr
+		}
+	}
+
+	outJSON, mErr := json.Marshal(Output{Output: outBuf.String()})
+	if mErr != nil {
+		return nil, mustMarshalError("EVAL_ERROR", mErr.Error()), mErr
+	}
+	// success audit
+	_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+		"ts":        time.Now().UTC().Format(time.RFC3339Nano),
+		"tool":      "code.sandbox.js.run",
+		"span":      "tools.js.run",
+		"ms":        time.Since(start).Milliseconds(),
+		"bytes_out": len(outBuf.String()),
+		"event":     "success",
+	})
+	return outJSON, nil, nil
+}
+
+func mustMarshalError(code, msg string) []byte {
+	b, err := json.Marshal(Error{Code: code, Message: msg})
+	if err != nil {
+		// Fallback minimal JSON to avoid panics in error paths
+		return []byte(`{"code":"` + code + `","message":"` + msg + `"}`)
+	}
+	return b
+}
+
+func writeBounded(buf *bytes.Buffer, s string, capBytes int) {
+	if capBytes <= 0 {
+		_ = buf.WriteByte(0) // unreachable, but keep logic safe
+		return
+	}
+	remain := capBytes - buf.Len()
+	if remain <= 0 {
+		return
+	}
+	bs := []byte(s)
+	if len(bs) > remain {
+		buf.Write(bs[:remain])
+		return
+	}
+	buf.Write(bs)
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck // best-effort close
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
diff --git a/internal/tools/jsrun/handler_test.go b/internal/tools/jsrun/handler_test.go
new file mode 100644
index 0000000..dfaeef7
--- /dev/null
+++ b/internal/tools/jsrun/handler_test.go
@@ -0,0 +1,206 @@
+package jsrun
+
+import (
+	"encoding/json"
+	"testing"
+)
+
+func TestRun_EmitReadInput_Succeeds(t *testing.T) {
+	req := map[string]any{
+		"source": "emit(read_input())",
+		"input":  "hello",
+		"limits": map[string]any{"output_kb": 4},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err != nil || len(stderr) != 0 {
+		t.Fatalf("unexpected error: %v stderr=%s", err, string(stderr))
+	}
+	var out struct {
+		Output string `json:"output"`
+	}
+	if e := json.Unmarshal(stdout, &out); e != nil {
+		t.Fatalf("bad json: %v", e)
+	}
+	if out.Output != "hello" {
+		t.Fatalf("got %q want %q", out.Output, "hello")
+	}
+}
+
+func TestRun_OutputLimit_TruncatesAndErrors(t *testing.T) {
+	// Create input larger than 1 KiB
+	big := make([]byte, 1500)
+	for i := range big {
+		big[i] = 'a'
+	}
+	req := map[string]any{
+		"source": "emit(read_input())",
+		"input":  string(big),
+		"limits": map[string]any{"output_kb": 1},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err == nil {
+		t.Fatalf("expected error on output limit")
+	}
+	if string(stderr) == "" || !json.Valid(stderr) {
+		t.Fatalf("expected structured stderr json, got: %s", string(stderr))
+	}
+	var errObj struct{ Code, Message string }
+	if uerr := json.Unmarshal(stderr, &errObj); uerr != nil {
+		t.Fatalf("stderr not JSON: %v: %s", uerr, string(stderr))
+	}
+	if errObj.Code != "OUTPUT_LIMIT" {
+		t.Fatalf("expected OUTPUT_LIMIT code, got %q (%s)", errObj.Code, errObj.Message)
+	}
+	var out struct {
+		Output string `json:"output"`
+	}
+	if e := json.Unmarshal(stdout, &out); e != nil {
+		t.Fatalf("bad stdout json: %v", e)
+	}
+	if len(out.Output) != 1024 {
+		t.Fatalf("expected truncated to 1024 bytes, got %d", len(out.Output))
+	}
+}
+
+func TestRun_Timeout_Interrupts(t *testing.T) {
+	// Infinite loop; should be interrupted by wall_ms
+	req := map[string]any{
+		"source": "for(;;){}",
+		"input":  "",
+		"limits": map[string]any{"output_kb": 1, "wall_ms": 50},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err == nil {
+		t.Fatalf("expected timeout error")
+	}
+	if len(stdout) != 0 {
+		t.Fatalf("expected no stdout on timeout, got: %s", string(stdout))
+	}
+	var e struct{ Code, Message string }
+	if jerr := json.Unmarshal(stderr, &e); jerr != nil {
+		t.Fatalf("stderr not JSON: %v: %s", jerr, string(stderr))
+	}
+	if e.Code != "TIMEOUT" {
+		t.Fatalf("expected TIMEOUT code, got %q (%s)", e.Code, e.Message)
+	}
+}
+
+func TestRun_EvalError_ThrownError(t *testing.T) {
+	req := map[string]any{
+		"source": "throw new Error('boom')",
+		"input":  "",
+		"limits": map[string]any{"output_kb": 1},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err == nil {
+		t.Fatalf("expected EVAL_ERROR")
+	}
+	if len(stdout) != 0 {
+		t.Fatalf("expected no stdout on eval error, got: %s", string(stdout))
+	}
+	var e struct{ Code, Message string }
+	if jerr := json.Unmarshal(stderr, &e); jerr != nil {
+		t.Fatalf("stderr not JSON: %v: %s", jerr, string(stderr))
+	}
+	if e.Code != "EVAL_ERROR" {
+		t.Fatalf("expected EVAL_ERROR code, got %q (%s)", e.Code, e.Message)
+	}
+	if e.Message == "" {
+		t.Fatalf("expected non-empty error message")
+	}
+}
+
+func TestRun_EvalError_ReferenceError(t *testing.T) {
+	// Referencing an undefined symbol should raise an evaluation error
+	req := map[string]any{
+		"source": "emit(does_not_exist)",
+		"input":  "",
+		"limits": map[string]any{"output_kb": 1},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err == nil {
+		t.Fatalf("expected EVAL_ERROR for reference error")
+	}
+	if len(stdout) != 0 {
+		t.Fatalf("expected no stdout on eval error, got: %s", string(stdout))
+	}
+	var e struct{ Code, Message string }
+	if jerr := json.Unmarshal(stderr, &e); jerr != nil {
+		t.Fatalf("stderr not JSON: %v: %s", jerr, string(stderr))
+	}
+	if e.Code != "EVAL_ERROR" {
+		t.Fatalf("expected EVAL_ERROR code, got %q (%s)", e.Code, e.Message)
+	}
+}
+
+func TestRun_DenyByDefault_UndefinedGlobals(t *testing.T) {
+	// Verify that require/console are not bound and evaluate to undefined via typeof
+	req := map[string]any{
+		"source": "emit(typeof require + '|' + typeof console)",
+		"input":  "",
+		"limits": map[string]any{"output_kb": 1},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err != nil || len(stderr) != 0 {
+		t.Fatalf("unexpected error: %v stderr=%s", err, string(stderr))
+	}
+	var out struct {
+		Output string `json:"output"`
+	}
+	if e := json.Unmarshal(stdout, &out); e != nil {
+		t.Fatalf("bad json: %v", e)
+	}
+	if out.Output != "undefined|undefined" {
+		t.Fatalf("got %q want %q", out.Output, "undefined|undefined")
+	}
+}
+
+func TestRun_DenyByDefault_UndefinedTimers(t *testing.T) {
+	// Timers like setTimeout must not exist unless explicitly bound
+	req := map[string]any{
+		"source": "emit(typeof setTimeout + '|' + typeof setInterval)",
+		"input":  "",
+		"limits": map[string]any{"output_kb": 1},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err != nil || len(stderr) != 0 {
+		t.Fatalf("unexpected error: %v stderr=%s", err, string(stderr))
+	}
+	var out struct {
+		Output string `json:"output"`
+	}
+	if e := json.Unmarshal(stdout, &out); e != nil {
+		t.Fatalf("bad json: %v", e)
+	}
+	if out.Output != "undefined|undefined" {
+		t.Fatalf("got %q want %q", out.Output, "undefined|undefined")
+	}
+}
diff --git a/internal/tools/jsrun/observability_test.go b/internal/tools/jsrun/observability_test.go
new file mode 100644
index 0000000..61b7d79
--- /dev/null
+++ b/internal/tools/jsrun/observability_test.go
@@ -0,0 +1,88 @@
+package jsrun
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+// findRepoRoot is adapted to locate repository root containing go.mod.
+func findRepoRoot(t *testing.T) string {
+	t.Helper()
+	start, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	dir := start
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			t.Fatalf("go.mod not found from %s upward", start)
+		}
+		dir = parent
+	}
+}
+
+func waitForAuditFile(t *testing.T, auditDir string, timeout time.Duration) string {
+	t.Helper()
+	deadline := time.Now().Add(timeout)
+	for {
+		entries, err := os.ReadDir(auditDir)
+		if err == nil {
+			for _, e := range entries {
+				if !e.IsDir() {
+					return filepath.Join(auditDir, e.Name())
+				}
+			}
+		}
+		if time.Now().After(deadline) {
+			t.Fatalf("audit log not created in %s", auditDir)
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+}
+
+func TestObservability_AuditLineWritten_OnSuccess(t *testing.T) {
+	root := findRepoRoot(t)
+	_ = os.RemoveAll(filepath.Join(root, ".goagent")) //nolint:errcheck // best-effort cleanup
+
+	req := map[string]any{
+		"source": "emit('ok')",
+		"input":  "",
+		"limits": map[string]any{"output_kb": 1},
+	}
+	b, _ := json.Marshal(req) //nolint:errcheck // inputs are deterministic
+	stdout, stderr, err := Run(b)
+	if err != nil || len(stderr) != 0 {
+		t.Fatalf("unexpected error: %v stderr=%s", err, string(stderr))
+	}
+	if !json.Valid(stdout) {
+		t.Fatalf("stdout not json: %s", string(stdout))
+	}
+
+	auditDir := filepath.Join(root, ".goagent", "audit")
+	logFile := waitForAuditFile(t, auditDir, 2*time.Second)
+	data, rerr := os.ReadFile(logFile)
+	if rerr != nil {
+		t.Fatalf("read audit: %v", rerr)
+	}
+	content := string(data)
+	if !strings.Contains(content, "\"tool\":\"code.sandbox.js.run\"") {
+		t.Fatalf("audit missing tool field: %s", content)
+	}
+	if !strings.Contains(content, "\"span\":\"tools.js.run\"") {
+		t.Fatalf("audit missing span field: %s", content)
+	}
+	if !strings.Contains(content, "\"bytes_out\":") {
+		t.Fatalf("audit missing bytes_out field: %s", content)
+	}
+	if !strings.Contains(content, "\"event\":\"success\"") {
+		t.Fatalf("audit missing success event: %s", content)
+	}
+}
diff --git a/internal/tools/manifest.go b/internal/tools/manifest.go
new file mode 100644
index 0000000..d632707
--- /dev/null
+++ b/internal/tools/manifest.go
@@ -0,0 +1,165 @@
+package tools
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
+
+	"github.com/hyperifyio/goagent/internal/oai"
+)
+
+type ToolSpec struct {
+	Name        string          `json:"name"`
+	Description string          `json:"description,omitempty"`
+	Schema      json.RawMessage `json:"schema,omitempty"` // JSON Schema for params
+	Command     []string        `json:"command"`          // argv: program and args
+	TimeoutSec  int             `json:"timeoutSec,omitempty"`
+	// EnvPassthrough is an allowlist of environment variable names that may be
+	// passed through from the parent process to the tool process. Names are
+	// normalized to upper case, trimmed, validated against [A-Z_][A-Z0-9_]*,
+	// and de-duplicated while preserving order.
+	EnvPassthrough []string `json:"envPassthrough,omitempty"`
+}
+
+type Manifest struct {
+	Tools []ToolSpec `json:"tools"`
+}
+
+// LoadManifest reads tools.json and returns a name->spec registry and an OpenAI-compatible tools array.
+// Relative command paths in the manifest are validated and then resolved relative to the manifest's directory,
+// so they do not depend on the process working directory.
+func LoadManifest(manifestPath string) (map[string]ToolSpec, []oai.Tool, error) {
+	data, err := os.ReadFile(manifestPath)
+	if err != nil {
+		return nil, nil, fmt.Errorf("read manifest: %w", err)
+	}
+	var man Manifest
+	if err := json.Unmarshal(data, &man); err != nil {
+		return nil, nil, fmt.Errorf("parse manifest: %w", err)
+	}
+	registry := make(map[string]ToolSpec)
+	var oaiTools []oai.Tool
+	nameSeen := make(map[string]struct{})
+	manifestDir := filepath.Dir(manifestPath)
+	for i, t := range man.Tools {
+		if t.Name == "" {
+			return nil, nil, fmt.Errorf("tool[%d]: name is required", i)
+		}
+		if _, ok := nameSeen[t.Name]; ok {
+			return nil, nil, fmt.Errorf("tool[%d] %q: duplicate name", i, t.Name)
+		}
+		nameSeen[t.Name] = struct{}{}
+		if len(t.Command) < 1 {
+			return nil, nil, fmt.Errorf("tool[%d] %q: command must have at least program name", i, t.Name)
+		}
+		// Validate and normalize envPassthrough early so callers can rely on it
+		if len(t.EnvPassthrough) > 0 {
+			norm, err := normalizeEnvAllowlist(t.EnvPassthrough)
+			if err != nil {
+				return nil, nil, fmt.Errorf("tool[%d] %q: %v", i, t.Name, err)
+			}
+			t.EnvPassthrough = norm
+		}
+		// S52/S30: Harden command[0] validation. For any relative program path,
+		// enforce the canonical tools bin prefix and prevent path escapes.
+		cmd0 := t.Command[0]
+		if !filepath.IsAbs(cmd0) {
+			// Normalize separators: convert backslashes to slashes (works cross‑platform)
+			// and then perform a platform-agnostic clean. Finally, ensure forward slashes.
+			raw := strings.ReplaceAll(cmd0, "\\", "/")
+			norm := filepath.ToSlash(path.Clean(raw))
+			// Normalize to a consistent leading ./ for prefix checks
+			if strings.HasPrefix(norm, "tools/") || norm == "tools" {
+				norm = "./" + norm
+			}
+			// Reject leading parent traversal
+			if strings.HasPrefix(norm, "../") || norm == ".." {
+				return nil, nil, fmt.Errorf("tool[%d] %q: command[0] must not start with '..' or escape tools/bin (got %q)", i, t.Name, cmd0)
+			}
+			// If original referenced ./tools/bin, ensure cleaned still stays within ./tools/bin
+			if strings.HasPrefix(raw, "./tools/bin/") || raw == "./tools/bin" {
+				if !(strings.HasPrefix(norm, "./tools/bin/")) {
+					return nil, nil, fmt.Errorf("tool[%d] %q: command[0] escapes ./tools/bin after normalization (got %q -> %q)", i, t.Name, cmd0, norm)
+				}
+			} else {
+				// Enforce canonical prefix for all other relative commands
+				if !strings.HasPrefix(norm, "./tools/bin/") {
+					return nil, nil, fmt.Errorf("tool[%d] %q: relative command[0] must start with ./tools/bin/", i, t.Name)
+				}
+			}
+			// Resolve relative program path against the manifest directory to avoid dependence on process CWD
+			// Keep validation based on the normalized forward-slash path, but compute a concrete absolute filesystem path.
+			// Example: manifest in /repo/sub/manifest/tools.json and command "./tools/bin/name" -> /repo/sub/manifest/tools/bin/name
+			// Trim leading "./" for joining, then convert to OS-specific separators.
+			trimmed := strings.TrimPrefix(norm, "./")
+			resolved := filepath.Join(manifestDir, filepath.FromSlash(trimmed))
+			absResolved, errAbs := filepath.Abs(resolved)
+			if errAbs != nil {
+				return nil, nil, fmt.Errorf("tool[%d] %q: resolve command[0]: %v", i, t.Name, errAbs)
+			}
+			t.Command[0] = absResolved
+		}
+		registry[t.Name] = t
+		// Build OpenAI tools entry
+		entry := oai.Tool{
+			Type: "function",
+			Function: oai.ToolFunction{
+				Name:        t.Name,
+				Description: t.Description,
+				Parameters:  t.Schema,
+			},
+		}
+		oaiTools = append(oaiTools, entry)
+	}
+	return registry, oaiTools, nil
+}
+
+// normalizeEnvAllowlist normalizes, validates, and de-duplicates environment
+// variable names. It enforces the pattern ^[A-Z_][A-Z0-9_]*$ after converting
+// to upper case and trimming ASCII whitespace. Order of first occurrence is
+// preserved. Returns an error describing the first invalid entry.
+func normalizeEnvAllowlist(keys []string) ([]string, error) {
+	out := make([]string, 0, len(keys))
+	seen := make(map[string]struct{}, len(keys))
+	for idx, k := range keys {
+		// Treat empty and whitespace-only as invalid
+		trimmed := strings.TrimSpace(k)
+		if trimmed == "" {
+			return nil, fmt.Errorf("envPassthrough[%d]: empty name", idx)
+		}
+		upper := strings.ToUpper(trimmed)
+		// Validate against a strict env var name pattern
+		// First character: A-Z or _
+		// Subsequent: A-Z, 0-9, _
+		if !isValidEnvName(upper) {
+			return nil, fmt.Errorf("envPassthrough[%d]: invalid name %q (must match [A-Z_][A-Z0-9_]*)", idx, k)
+		}
+		if _, ok := seen[upper]; ok {
+			continue
+		}
+		seen[upper] = struct{}{}
+		out = append(out, upper)
+	}
+	return out, nil
+}
+
+func isValidEnvName(s string) bool {
+	if len(s) == 0 {
+		return false
+	}
+	// First rune must be A-Z or _
+	c := s[0]
+	if !((c >= 'A' && c <= 'Z') || c == '_') {
+		return false
+	}
+	for i := 1; i < len(s); i++ {
+		c = s[i]
+		if !((c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') {
+			return false
+		}
+	}
+	return true
+}
diff --git a/internal/tools/manifest_test.go b/internal/tools/manifest_test.go
new file mode 100644
index 0000000..9d7d9c1
--- /dev/null
+++ b/internal/tools/manifest_test.go
@@ -0,0 +1,237 @@
+package tools
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// https://github.com/hyperifyio/goagent/issues/1
+func TestLoadManifest_OK(t *testing.T) {
+	dir := t.TempDir()
+	file := filepath.Join(dir, "tools.json")
+	data := map[string]any{
+		"tools": []map[string]any{
+			{
+				"name":        "hello",
+				"description": "says hello",
+				"schema":      map[string]any{"type": "object"},
+				// absolute path allowed in tests
+				"command": []string{"/bin/echo", "{}"},
+				// envPassthrough should be normalized, deduplicated
+				"envPassthrough": []string{"oai_api_key", "OAI_API_KEY", " Path ", "TZ"},
+			},
+		},
+	}
+	b, err := json.Marshal(data)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	if err := os.WriteFile(file, b, 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	reg, tools, err := LoadManifest(file)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(reg) != 1 || len(tools) != 1 {
+		t.Fatalf("unexpected sizes: reg=%d tools=%d", len(reg), len(tools))
+	}
+	spec := reg["hello"]
+	want := []string{"OAI_API_KEY", "PATH", "TZ"}
+	if len(spec.EnvPassthrough) != len(want) {
+		t.Fatalf("envPassthrough len: got %d want %d", len(spec.EnvPassthrough), len(want))
+	}
+	for i := range want {
+		if spec.EnvPassthrough[i] != want[i] {
+			t.Fatalf("envPassthrough[%d]: got %q want %q", i, spec.EnvPassthrough[i], want[i])
+		}
+	}
+}
+
+func TestLoadManifest_DuplicateName(t *testing.T) {
+	dir := t.TempDir()
+	file := filepath.Join(dir, "tools.json")
+	data := `{"tools":[{"name":"x","command":["echo","{}"]},{"name":"x","command":["echo","{}"]}]}`
+	if err := os.WriteFile(file, []byte(data), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	_, _, err := LoadManifest(file)
+	if err == nil {
+		t.Fatalf("expected error for duplicate names")
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/1
+func TestLoadManifest_MissingNameOrCommand(t *testing.T) {
+	dir := t.TempDir()
+	file := filepath.Join(dir, "tools.json")
+	// Missing name
+	data := `{"tools":[{"description":"x","command":["echo","{}"]}]}`
+	if err := os.WriteFile(file, []byte(data), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	if _, _, err := LoadManifest(file); err == nil {
+		t.Fatalf("expected error for missing name")
+	}
+
+	// Missing command
+	data = `{"tools":[{"name":"x"}]}`
+	if err := os.WriteFile(file, []byte(data), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	if _, _, err := LoadManifest(file); err == nil {
+		t.Fatalf("expected error for missing command")
+	}
+}
+
+// Harden validation: reject relative command[0] that escapes ./tools/bin or contains .. after normalization
+func TestLoadManifest_CommandEscapeAndDotDot(t *testing.T) {
+	dir := t.TempDir()
+	file := filepath.Join(dir, "tools.json")
+
+	cases := []struct {
+		name     string
+		command0 string
+		wantErr  bool
+	}{
+		{name: "ok-absolute", command0: "/usr/bin/env", wantErr: false},
+		// relative simple path must now be under ./tools/bin
+		{name: "reject-simple-relative", command0: "echo", wantErr: true},
+		{name: "ok-tools-bin", command0: "./tools/bin/fs_read_file", wantErr: false},
+		{name: "reject-dotdot-leading", command0: "../tools/bin/get_time", wantErr: true},
+		{name: "reject-escape-from-bin", command0: "./tools/bin/../hack", wantErr: true},
+		// Windows-style backslashes that normalize to an escape must be rejected
+		{name: "reject-windows-backslash-escape", command0: ".\\tools\\bin\\..\\hack", wantErr: true},
+		// Windows-style acceptable path under tools/bin should be accepted after normalization
+		{name: "ok-windows-backslash-tools-bin", command0: ".\\tools\\bin\\fs_read_file", wantErr: false},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			data := map[string]any{
+				"tools": []map[string]any{
+					{
+						"name":    "t",
+						"command": []string{tc.command0},
+					},
+				},
+			}
+			b, err2 := json.Marshal(data)
+			if err2 != nil {
+				t.Fatalf("marshal: %v", err2)
+			}
+			if err := os.WriteFile(file, b, 0o644); err != nil {
+				t.Fatalf("write: %v", err)
+			}
+			_, _, err := LoadManifest(file)
+			if tc.wantErr && err == nil {
+				t.Fatalf("expected error for command0=%q", tc.command0)
+			}
+			if !tc.wantErr && err != nil {
+				t.Fatalf("unexpected error for %q: %v", tc.command0, err)
+			}
+		})
+	}
+}
+
+func TestLoadManifest_InvalidEnvPassthrough(t *testing.T) {
+	dir := t.TempDir()
+	file := filepath.Join(dir, "tools.json")
+	// Invalid names: leading digit and dash inside
+	data := map[string]any{
+		"tools": []map[string]any{
+			{
+				"name":           "t",
+				"command":        []string{"/bin/true"},
+				"envPassthrough": []string{"1BAD", "GOOD", "OAI-API-KEY"},
+			},
+		},
+	}
+	b, err := json.Marshal(data)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	if err := os.WriteFile(file, b, 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	if _, _, err := LoadManifest(file); err == nil {
+		t.Fatalf("expected error for invalid envPassthrough entries")
+	}
+}
+
+// Relative command paths must resolve against the manifest directory, not process CWD.
+// The loader should rewrite command[0] to an absolute path rooted at the manifest's folder.
+func TestLoadManifest_ResolvesRelativeAgainstManifestDir(t *testing.T) {
+	// Create nested manifest directory
+	base := t.TempDir()
+	nested := filepath.Join(base, "configs", "sub")
+	if err := os.MkdirAll(nested, 0o755); err != nil {
+		t.Fatalf("mkdir nested: %v", err)
+	}
+	// Create a fake tools/bin tree relative to the manifest
+	binDir := filepath.Join(nested, "tools", "bin")
+	if err := os.MkdirAll(binDir, 0o755); err != nil {
+		t.Fatalf("mkdir bin: %v", err)
+	}
+	// Create a small executable file to represent the tool binary
+	toolPath := filepath.Join(binDir, "hello_tool")
+	if err := os.WriteFile(toolPath, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write tool bin: %v", err)
+	}
+	// Write manifest that references ./tools/bin/hello_tool relative to the manifest dir
+	manPath := filepath.Join(nested, "tools.json")
+	data := map[string]any{
+		"tools": []map[string]any{
+			{
+				"name":    "hello",
+				"command": []string{"./tools/bin/hello_tool"},
+			},
+		},
+	}
+	b, err := json.Marshal(data)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	if err := os.WriteFile(manPath, b, 0o644); err != nil {
+		t.Fatalf("write manifest: %v", err)
+	}
+	// Change working directory to a different location to ensure CWD is not used for resolution
+	oldWD, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	other := filepath.Join(base, "other")
+	if err := os.MkdirAll(other, 0o755); err != nil {
+		t.Fatalf("mkdir other: %v", err)
+	}
+	if err := os.Chdir(other); err != nil {
+		t.Fatalf("chdir other: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.Chdir(oldWD); err != nil {
+			t.Logf("chdir restore: %v", err)
+		}
+	})
+
+	reg, _, err := LoadManifest(manPath)
+	if err != nil {
+		t.Fatalf("LoadManifest: %v", err)
+	}
+	spec, ok := reg["hello"]
+	if !ok {
+		t.Fatalf("missing tool in registry")
+	}
+	if len(spec.Command) == 0 {
+		t.Fatalf("empty command")
+	}
+	got := spec.Command[0]
+	if !filepath.IsAbs(got) {
+		t.Fatalf("command[0] not absolute: %q", got)
+	}
+	// It should point to the tool under the manifest's directory, not under CWD
+	if got != toolPath {
+		t.Fatalf("resolved path mismatch:\n got: %s\nwant: %s", got, toolPath)
+	}
+}
diff --git a/internal/tools/runner.go b/internal/tools/runner.go
new file mode 100644
index 0000000..83ecdd4
--- /dev/null
+++ b/internal/tools/runner.go
@@ -0,0 +1,308 @@
+package tools
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"time"
+)
+
+// RunToolWithJSON executes the tool command with args JSON provided on stdin.
+// Returns stdout bytes and an error if the command fails. The caller is responsible
+// for mapping errors to deterministic JSON per product rules.
+// timeNow is a package-level clock to enable deterministic tests.
+// In production it defaults to time.Now.
+var timeNow = time.Now
+
+// computeToolTimeout derives the timeout for a tool execution, honoring
+// spec.TimeoutSec when provided; otherwise it falls back to the default.
+func computeToolTimeout(spec ToolSpec, defaultTimeout time.Duration) time.Duration {
+	if spec.TimeoutSec > 0 {
+		return time.Duration(spec.TimeoutSec) * time.Second
+	}
+	return defaultTimeout
+}
+
+// buildToolEnvironment constructs a minimal environment for the tool process
+// and returns the environment slice along with the list of env keys that were
+// passed through (for audit visibility).
+func buildToolEnvironment(spec ToolSpec) (env []string, passedKeys []string) {
+	if v := os.Getenv("PATH"); v != "" {
+		env = append(env, "PATH="+v)
+	}
+	if v := os.Getenv("HOME"); v != "" {
+		env = append(env, "HOME="+v)
+	}
+	if len(spec.EnvPassthrough) > 0 {
+		for _, key := range spec.EnvPassthrough {
+			if val, ok := os.LookupEnv(key); ok {
+				env = append(env, key+"="+val)
+				passedKeys = append(passedKeys, key)
+			}
+		}
+	}
+	return env, passedKeys
+}
+
+// normalizeWaitError maps timeout and process errors to deterministic errors.
+func normalizeWaitError(ctx context.Context, waitErr error, stderrText string) error {
+	if ctx.Err() == context.DeadlineExceeded {
+		return errors.New("tool timed out")
+	}
+	if waitErr != nil {
+		msg := stderrText
+		if msg == "" {
+			msg = waitErr.Error()
+		}
+		return errors.New(msg)
+	}
+	return nil
+}
+
+func RunToolWithJSON(parentCtx context.Context, spec ToolSpec, jsonInput []byte, defaultTimeout time.Duration) ([]byte, error) {
+	start := time.Now()
+	// Derive timeout, honoring per-tool override when provided.
+	to := computeToolTimeout(spec, defaultTimeout)
+	ctx, cancel := context.WithTimeout(parentCtx, to)
+	defer cancel()
+
+	cmd := exec.CommandContext(ctx, spec.Command[0], spec.Command[1:]...)
+	// Build minimal environment and record passed-through keys for audit.
+	env, passedKeys := buildToolEnvironment(spec)
+	cmd.Env = env
+	stdin, err := cmd.StdinPipe()
+	if err != nil {
+		return nil, fmt.Errorf("stdin pipe: %w", err)
+	}
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		return nil, fmt.Errorf("stdout pipe: %w", err)
+	}
+	stderr, err := cmd.StderrPipe()
+	if err != nil {
+		return nil, fmt.Errorf("stderr pipe: %w", err)
+	}
+
+	if err := cmd.Start(); err != nil {
+		return nil, fmt.Errorf("start: %w", err)
+	}
+	// Write JSON to stdin
+	if len(jsonInput) == 0 {
+		jsonInput = []byte("{}")
+	}
+	if _, err := stdin.Write(jsonInput); err != nil {
+		return nil, fmt.Errorf("write stdin: %w", err)
+	}
+	// Best-effort close; log failure to audit but do not fail run
+	if err := stdin.Close(); err != nil {
+		// Capture the close error as a best-effort audit line
+		if err2 := appendAuditLog(map[string]any{
+			"ts":    timeNow().UTC().Format(time.RFC3339Nano),
+			"event": "stdin_close_error",
+			"tool":  spec.Name,
+			"error": err.Error(),
+		}); err2 != nil {
+			_ = err2
+		}
+	}
+
+	// Read stdout and stderr fully
+	outCh := make(chan []byte, 1)
+	errCh := make(chan []byte, 1)
+	go func() { outCh <- safeReadAll(stdout) }()
+	go func() { errCh <- safeReadAll(stderr) }()
+
+	err = cmd.Wait()
+	out := <-outCh
+	serr := <-errCh
+
+	exitCode := 0
+	if err != nil {
+		// Try to capture exit code when available
+		if ee, ok := err.(*exec.ExitError); ok && ee.ProcessState != nil {
+			exitCode = ee.ProcessState.ExitCode()
+		} else {
+			// Unknown exit (e.g., timeout/cancel)
+			exitCode = -1
+		}
+	}
+	// Best-effort audit (failures do not affect tool result)
+	writeAudit(spec, start, exitCode, len(out), len(serr), passedKeys)
+
+	if normErr := normalizeWaitError(ctx, err, string(serr)); normErr != nil {
+		return nil, normErr
+	}
+	return out, nil
+}
+
+// safeReadAll reads all bytes from r; on error it returns any bytes read so far (or nil).
+func safeReadAll(r io.Reader) []byte {
+	b, err := io.ReadAll(r)
+	if err != nil {
+		return b
+	}
+	return b
+}
+
+// writeAudit emits an NDJSON line capturing tool execution metadata.
+func writeAudit(spec ToolSpec, start time.Time, exitCode, stdoutBytes, stderrBytes int, envKeys []string) {
+	type auditEntry struct {
+		TS          string   `json:"ts"`
+		Tool        string   `json:"tool"`
+		Argv        []string `json:"argv"`
+		CWD         string   `json:"cwd"`
+		Exit        int      `json:"exit"`
+		MS          int64    `json:"ms"`
+		StdoutBytes int      `json:"stdoutBytes"`
+		StderrBytes int      `json:"stderrBytes"`
+		Truncated   bool     `json:"truncated"`
+		EnvKeys     []string `json:"envKeys,omitempty"`
+	}
+
+	cwd, err := os.Getwd()
+	if err != nil {
+		cwd = ""
+	}
+	entry := auditEntry{
+		TS:          timeNow().UTC().Format(time.RFC3339Nano),
+		Tool:        spec.Name,
+		Argv:        redactSensitiveStrings(append([]string(nil), spec.Command...)),
+		CWD:         redactSensitiveString(cwd),
+		Exit:        exitCode,
+		MS:          time.Since(start).Milliseconds(),
+		StdoutBytes: stdoutBytes,
+		StderrBytes: stderrBytes,
+		Truncated:   false,
+		EnvKeys:     append([]string(nil), envKeys...),
+	}
+	if err := appendAuditLog(entry); err != nil {
+		_ = err
+	}
+}
+
+// appendAuditLog writes an NDJSON audit line to .goagent/audit/YYYYMMDD.log under the repository root.
+// The repository root is determined by walking upward from the current working directory
+// until a directory containing go.mod is found. If no go.mod is found, falls back to CWD.
+func appendAuditLog(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := timeNow().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if err := f.Close(); err != nil {
+			_ = err
+		}
+	}()
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from the current working directory to locate the directory
+// containing go.mod. If none is found, it returns the current working directory.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			// Reached filesystem root; fallback to original cwd
+			return cwd
+		}
+		dir = parent
+	}
+}
+
+// redactSensitiveStrings applies redactSensitiveString to each element and returns a new slice.
+func redactSensitiveStrings(values []string) []string {
+	out := make([]string, len(values))
+	for i, v := range values {
+		out[i] = redactSensitiveString(v)
+	}
+	return out
+}
+
+// redactSensitiveString masks occurrences of configured sensitive patterns and known secret env values.
+// Patterns are sourced from GOAGENT_REDACT (comma/semicolon-separated substrings or regexes).
+// Additionally, values of well-known secret env vars (OAI_API_KEY, OPENAI_API_KEY) are masked if present.
+func redactSensitiveString(s string) string {
+	if s == "" {
+		return s
+	}
+	// Collect patterns
+	patterns := gatherRedactionPatterns()
+	// Apply regex replacements first
+	for _, rx := range patterns.regexps {
+		s = rx.ReplaceAllString(s, "***REDACTED***")
+	}
+	// Apply literal value masking
+	for _, lit := range patterns.literals {
+		if lit == "" {
+			continue
+		}
+		s = strings.ReplaceAll(s, lit, "***REDACTED***")
+	}
+	return s
+}
+
+type redactionPatterns struct {
+	regexps  []*regexp.Regexp
+	literals []string
+}
+
+// gatherRedactionPatterns builds redaction patterns from environment.
+// GOAGENT_REDACT may contain comma/semicolon separated regex patterns or literals.
+// Known secret env values are added as literal masks.
+func gatherRedactionPatterns() redactionPatterns {
+	var pats redactionPatterns
+	// Configurable patterns
+	cfg := os.Getenv("GOAGENT_REDACT")
+	if cfg != "" {
+		// split by comma or semicolon
+		fields := strings.FieldsFunc(cfg, func(r rune) bool { return r == ',' || r == ';' })
+		for _, f := range fields {
+			f = strings.TrimSpace(f)
+			if f == "" {
+				continue
+			}
+			// Try to compile as regex; if it fails, treat as literal
+			if rx, err := regexp.Compile(f); err == nil {
+				pats.regexps = append(pats.regexps, rx)
+			} else {
+				pats.literals = append(pats.literals, f)
+			}
+		}
+	}
+	// Known secret env values (mask exact substrings)
+	for _, key := range []string{"OAI_API_KEY", "OPENAI_API_KEY"} {
+		if v := os.Getenv(key); v != "" {
+			pats.literals = append(pats.literals, v)
+		}
+	}
+	return pats
+}
diff --git a/internal/tools/runner_test.go b/internal/tools/runner_test.go
new file mode 100644
index 0000000..1c0269a
--- /dev/null
+++ b/internal/tools/runner_test.go
@@ -0,0 +1,449 @@
+package tools
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io/fs"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+)
+
+// https://github.com/hyperifyio/goagent/issues/1
+func TestRunToolWithJSON_Timeout(t *testing.T) {
+	dir := t.TempDir()
+
+	// Build a small helper that sleeps longer than timeout
+	helper := filepath.Join(dir, "sleeper.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import ("time"; "os"; "io")
+func main(){_,_ = io.ReadAll(os.Stdin); time.Sleep(2*time.Second)}
+`), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	bin := filepath.Join(dir, "sleeper")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build helper: %v: %s", err, string(out))
+	}
+
+	spec := ToolSpec{Name: "sleep", Command: []string{bin}, TimeoutSec: 1}
+	_, err := RunToolWithJSON(context.Background(), spec, []byte(`{}`), 3*time.Second)
+	if err == nil {
+		t.Fatalf("expected timeout error")
+	}
+	if err.Error() != "tool timed out" {
+		t.Fatalf("expected 'tool timed out', got: %v", err)
+	}
+}
+
+func TestRunToolWithJSON_SuccessEcho(t *testing.T) {
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "echo.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import ("io"; "os"; "fmt")
+func main(){b,_:=io.ReadAll(os.Stdin); fmt.Print(string(b))}
+`), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	bin := filepath.Join(dir, "echo")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build helper: %v: %s", err, string(out))
+	}
+
+	spec := ToolSpec{Name: "echo", Command: []string{bin}, TimeoutSec: 2}
+	out, err := RunToolWithJSON(context.Background(), spec, []byte(`{"a":1}`), 5*time.Second)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	var js map[string]any
+	if err := json.Unmarshal(out, &js); err != nil {
+		t.Fatalf("bad json echo: %v; out=%s", err, string(out))
+	}
+}
+
+// Ensure deterministic collection order: stderr from a failing tool is surfaced as error
+func TestRunToolWithJSON_NonZeroExit_ReportsStderr(t *testing.T) {
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "fail.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import ("os"; "fmt")
+func main(){fmt.Fprint(os.Stderr, "boom"); os.Exit(3)}
+`), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	bin := filepath.Join(dir, "fail")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build helper: %v: %s", err, string(out))
+	}
+
+	spec := ToolSpec{Name: "fail", Command: []string{bin}, TimeoutSec: 2}
+	_, err := RunToolWithJSON(context.Background(), spec, []byte(`{}`), 5*time.Second)
+	if err == nil {
+		t.Fatalf("expected error")
+	}
+	if err.Error() != "boom" {
+		t.Fatalf("expected stderr content, got: %q", err.Error())
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/92
+func TestRunToolWithJSON_AuditLog_WritesLine(t *testing.T) {
+	// Run a quick echo tool and verify a log line is written
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "echo.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import ("io"; "os"; "fmt")
+func main(){b,_:=io.ReadAll(os.Stdin); fmt.Print(string(b))}
+`), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	bin := filepath.Join(dir, "echo")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build helper: %v: %s", err, string(out))
+	}
+
+	// Ensure audit dir at repo root is empty before run
+	root := findRepoRoot(t)
+	if err := os.RemoveAll(filepath.Join(root, ".goagent")); err != nil {
+		t.Logf("cleanup: %v", err)
+	}
+
+	spec := ToolSpec{Name: "echo", Command: []string{bin}, TimeoutSec: 2}
+	out, err := RunToolWithJSON(context.Background(), spec, []byte(`{"ok":true}`), 5*time.Second)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(out) == 0 {
+		t.Fatalf("expected output")
+	}
+
+	// Find today's audit log under repo root
+	auditDir := filepath.Join(root, ".goagent", "audit")
+	logFile := waitForAuditFile(t, auditDir, 2*time.Second)
+	data, err := os.ReadFile(logFile)
+	if err != nil {
+		t.Fatalf("read audit: %v", err)
+	}
+	// Expect at least one newline-terminated JSON object
+	if len(data) == 0 || data[len(data)-1] != '\n' {
+		t.Fatalf("audit log not newline terminated")
+	}
+	// Quick sanity check: file mode should be 0644
+	st, err := os.Stat(logFile)
+	if err == nil {
+		if st.Mode().Type() != fs.ModeType && (st.Mode().Perm()&0o644) == 0 {
+			t.Fatalf("unexpected permissions: %v", st.Mode())
+		}
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/92
+func TestRunToolWithJSON_AuditLog_RotationAcrossDateBoundary(t *testing.T) {
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "echo.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import ("io"; "os"; "fmt")
+func main(){b,_:=io.ReadAll(os.Stdin); fmt.Print(string(b))}
+`), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	bin := filepath.Join(dir, "echo")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build helper: %v: %s", err, string(out))
+	}
+
+	// Clean audit dir at repo root
+	root := findRepoRoot(t)
+	if err := os.RemoveAll(filepath.Join(root, ".goagent")); err != nil {
+		t.Logf("cleanup: %v", err)
+	}
+
+	// Freeze time around midnight UTC so successive calls land in different files
+	orig := timeNow
+	defer func() { timeNow = orig }()
+	t1 := time.Date(2025, 1, 2, 23, 59, 59, 0, time.UTC)
+	t2 := t1.Add(2 * time.Second) // 2025-01-03 00:00:01 UTC
+
+	spec := ToolSpec{Name: "echo", Command: []string{bin}, TimeoutSec: 2}
+
+	timeNow = func() time.Time { return t1 }
+	if _, err := RunToolWithJSON(context.Background(), spec, []byte(`{"a":1}`), 5*time.Second); err != nil {
+		t.Fatalf("unexpected error first run: %v", err)
+	}
+
+	timeNow = func() time.Time { return t2 }
+	if _, err := RunToolWithJSON(context.Background(), spec, []byte(`{"b":2}`), 5*time.Second); err != nil {
+		t.Fatalf("unexpected error second run: %v", err)
+	}
+
+	auditDir := filepath.Join(root, ".goagent", "audit")
+	want1 := filepath.Join(auditDir, "20250102.log")
+	want2 := filepath.Join(auditDir, "20250103.log")
+
+	// Allow brief delay for filesystem flush
+	deadline := time.Now().Add(2 * time.Second)
+	for time.Now().Before(deadline) {
+		if _, err1 := os.Stat(want1); err1 == nil {
+			if _, err2 := os.Stat(want2); err2 == nil {
+				break
+			}
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+
+	if _, err := os.Stat(want1); err != nil {
+		t.Fatalf("expected first log file %s: %v", want1, err)
+	}
+	if _, err := os.Stat(want2); err != nil {
+		t.Fatalf("expected second log file %s: %v", want2, err)
+	}
+
+	// Ensure each file has at least one line
+	if b, err := os.ReadFile(want1); err == nil {
+		if len(b) == 0 || b[len(b)-1] != '\n' {
+			t.Fatalf("first audit file empty or not newline terminated")
+		}
+	} else {
+		t.Fatalf("read %s: %v", want1, err)
+	}
+	if b, err := os.ReadFile(want2); err == nil {
+		if len(b) == 0 || b[len(b)-1] != '\n' {
+			t.Fatalf("second audit file empty or not newline terminated")
+		}
+	} else {
+		t.Fatalf("read %s: %v", want2, err)
+	}
+}
+
+// https://github.com/hyperifyio/goagent/issues/92
+func TestRunToolWithJSON_AuditLog_Redaction(t *testing.T) {
+	// Arrange: set env secrets and GOAGENT_REDACT patterns
+	t.Setenv("OAI_API_KEY", "sk-test-1234567890")
+	t.Setenv("GOAGENT_REDACT", "secret,sk-[a-z0-9]+")
+
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "echo.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import ("io"; "os"; "fmt")
+func main(){b,_:=io.ReadAll(os.Stdin); fmt.Print(string(b))}
+`), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	bin := filepath.Join(dir, "echo")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build helper: %v: %s", err, string(out))
+	}
+
+	// Clean audit dir at repo root
+	root := findRepoRoot(t)
+	if err := os.RemoveAll(filepath.Join(root, ".goagent")); err != nil {
+		t.Logf("cleanup: %v", err)
+	}
+
+	// Use argv containing sensitive literals
+	spec := ToolSpec{Name: "echo", Command: []string{bin, "--token=sk-test-1234567890", "--note=contains-secret"}, TimeoutSec: 2}
+	if _, err := RunToolWithJSON(context.Background(), spec, []byte(`{"x":1}`), 5*time.Second); err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Locate today's audit file under repo root
+	auditDir := filepath.Join(root, ".goagent", "audit")
+	logFile := waitForAuditFile(t, auditDir, 2*time.Second)
+	data, err := os.ReadFile(logFile)
+	if err != nil {
+		t.Fatalf("read audit: %v", err)
+	}
+	// Assert that raw secret substrings are not present
+	if string(data) == "" || len(data) == 0 {
+		t.Fatalf("empty audit")
+	}
+	if bytes := data; bytes != nil {
+		if contains := string(bytes); contains != "" {
+			if string(bytes) != "" && (containsFind(contains, "sk-test-1234567890") || containsFind(contains, "contains-secret")) {
+				t.Fatalf("expected redaction, found sensitive substrings in audit: %s", contains)
+			}
+		}
+	}
+}
+
+// New test encoding the centralized root behavior: logs must be written under
+// the repository root's .goagent/audit, not the package working directory.
+func TestRunToolWithJSON_AuditLog_CentralizedToRepoRoot(t *testing.T) {
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "echo.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import ("io"; "os"; "fmt")
+func main(){b,_:=io.ReadAll(os.Stdin); fmt.Print(string(b))}
+`), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	bin := filepath.Join(dir, "echo")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build helper: %v: %s", err, string(out))
+	}
+
+	root := findRepoRoot(t)
+	// Clean both potential locations to start from a known state
+	if err := os.RemoveAll(filepath.Join(root, ".goagent")); err != nil {
+		t.Logf("cleanup root .goagent: %v", err)
+	}
+	if err := os.RemoveAll(filepath.Join(".goagent")); err != nil {
+		t.Logf("cleanup cwd .goagent: %v", err)
+	}
+
+	spec := ToolSpec{Name: "echo", Command: []string{bin}, TimeoutSec: 2}
+	if _, err := RunToolWithJSON(context.Background(), spec, []byte(`{"ok":true}`), 5*time.Second); err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Expect audit under repo root
+	auditDirRoot := filepath.Join(root, ".goagent", "audit")
+	// This helper will fail if no file appears in the expected root directory
+	_ = waitForAuditFile(t, auditDirRoot, 2*time.Second)
+}
+
+// New test for env passthrough: only allowlisted keys are visible to the child,
+// and audit logs include only key names (not values).
+func TestRunToolWithJSON_EnvPassthrough_KeysOnlyAudit(t *testing.T) {
+	dir := t.TempDir()
+	helper := filepath.Join(dir, "printenv.go")
+	if err := os.WriteFile(helper, []byte(`package main
+import (
+  "encoding/json"; "os"; "fmt"
+)
+func main(){
+  out := map[string]string{
+    "OAI_API_KEY": os.Getenv("OAI_API_KEY"),
+    "OAI_BASE_URL": os.Getenv("OAI_BASE_URL"),
+    "UNSAFE": os.Getenv("UNSAFE"),
+  }
+  b,_ := json.Marshal(out)
+  fmt.Print(string(b))
+}
+`), 0o644); err != nil {
+		t.Fatalf("write: %v", err)
+	}
+	bin := filepath.Join(dir, "printenv")
+	if runtime.GOOS == "windows" {
+		bin += ".exe"
+	}
+	if out, err := exec.Command("go", "build", "-o", bin, helper).CombinedOutput(); err != nil {
+		t.Fatalf("build helper: %v: %s", err, string(out))
+	}
+
+	// Set env in parent
+	t.Setenv("OAI_API_KEY", "sk-live-should-not-appear-in-audit")
+	t.Setenv("OAI_BASE_URL", "https://example.invalid")
+	t.Setenv("UNSAFE", "DO-NOT-PASS")
+
+	// Clean audit dir at repo root
+	root := findRepoRoot(t)
+	if err := os.RemoveAll(filepath.Join(root, ".goagent")); err != nil {
+		t.Logf("cleanup: %v", err)
+	}
+
+	// Allowlist only OAI_API_KEY and OAI_BASE_URL
+	spec := ToolSpec{Name: "printenv", Command: []string{bin}, TimeoutSec: 2, EnvPassthrough: []string{"OAI_API_KEY", "OAI_BASE_URL"}}
+	out, err := RunToolWithJSON(context.Background(), spec, []byte(`{}`), 5*time.Second)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Child should see allowed keys and not see UNSAFE
+	var got map[string]string
+	if err := json.Unmarshal(out, &got); err != nil {
+		t.Fatalf("bad json: %v", err)
+	}
+	if got["OAI_API_KEY"] == "" || got["OAI_BASE_URL"] == "" {
+		t.Fatalf("allowed envs not visible to child: %v", got)
+	}
+	if got["UNSAFE"] != "" {
+		t.Fatalf("unexpected UNSAFE in child env: %q", got["UNSAFE"])
+	}
+
+	// Audit should include envKeys but never the secret values
+	auditDir := filepath.Join(root, ".goagent", "audit")
+	logFile := waitForAuditFile(t, auditDir, 2*time.Second)
+	data, errRead := os.ReadFile(logFile)
+	if errRead != nil {
+		t.Fatalf("read audit: %v", errRead)
+	}
+	s := string(data)
+	// Must mention the keys
+	if !strings.Contains(s, "\"envKeys\"") || !(strings.Contains(s, "OAI_API_KEY") && strings.Contains(s, "OAI_BASE_URL")) {
+		t.Fatalf("audit missing envKeys or keys: %s", s)
+	}
+	// Must not contain the actual secret value
+	if strings.Contains(s, "sk-live-should-not-appear-in-audit") || strings.Contains(s, fmt.Sprintf("%q", "sk-live-should-not-appear-in-audit")) {
+		t.Fatalf("secret value leaked into audit: %s", s)
+	}
+}
+
+// findRepoRoot walks upward from CWD to locate the directory containing go.mod.
+func findRepoRoot(t *testing.T) string {
+	t.Helper()
+	start, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	dir := start
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			t.Fatalf("go.mod not found from %s upward", start)
+		}
+		dir = parent
+	}
+}
+
+// waitForAuditFile polls the audit directory until a file appears or timeout elapses.
+func waitForAuditFile(t *testing.T, auditDir string, timeout time.Duration) string {
+	t.Helper()
+	deadline := time.Now().Add(timeout)
+	for {
+		entries, err := os.ReadDir(auditDir)
+		if err == nil {
+			for _, e := range entries {
+				if !e.IsDir() {
+					return filepath.Join(auditDir, e.Name())
+				}
+			}
+		}
+		if time.Now().After(deadline) {
+			t.Fatalf("audit log not created in %s", auditDir)
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+}
+
+// containsFind is a tiny helper to avoid importing strings in this test's top-level import list diff
+func containsFind(s, sub string) bool { return strings.Contains(s, sub) }
diff --git a/internal/tools/wasmrun/handler.go b/internal/tools/wasmrun/handler.go
new file mode 100644
index 0000000..53c8cf0
--- /dev/null
+++ b/internal/tools/wasmrun/handler.go
@@ -0,0 +1,215 @@
+package wasmrun
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/json"
+	"errors"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+// Input models the expected stdin JSON for code.sandbox.wasm.run
+type Input struct {
+	ModuleB64 string `json:"module_b64"`
+	Entry     string `json:"entry"`
+	Input     string `json:"input"`
+	Limits    struct {
+		WallMS   int `json:"wall_ms"`
+		MemPages int `json:"mem_pages"`
+		OutputKB int `json:"output_kb"`
+	} `json:"limits"`
+}
+
+// Output is the successful stdout JSON shape
+type Output struct {
+	Output string `json:"output"`
+}
+
+// Error represents a structured error payload for stderr JSON
+type Error struct {
+	Code    string `json:"code"`
+	Message string `json:"message"`
+}
+
+var (
+	errInvalidInput = errors.New("INVALID_INPUT")
+)
+
+// Run parses input and (in future) executes the provided WebAssembly module.
+// Returns (stdoutJSON, stderrJSON, err). For now, only input validation is implemented.
+func Run(raw []byte) ([]byte, []byte, error) {
+	start := time.Now()
+	var in Input
+	if err := json.Unmarshal(raw, &in); err != nil {
+		// audit invalid input
+		_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+			"ts":             time.Now().UTC().Format(time.RFC3339Nano),
+			"tool":           "code.sandbox.wasm.run",
+			"span":           "tools.wasm.run",
+			"ms":             time.Since(start).Milliseconds(),
+			"module_bytes":   0,
+			"wall_ms":        0,
+			"mem_pages_used": 0,
+			"bytes_out":      0,
+			"event":          "INVALID_INPUT",
+		})
+		return nil, mustMarshalError("INVALID_INPUT", "invalid JSON: "+err.Error()), errInvalidInput
+	}
+	if in.ModuleB64 == "" {
+		_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+			"ts":             time.Now().UTC().Format(time.RFC3339Nano),
+			"tool":           "code.sandbox.wasm.run",
+			"span":           "tools.wasm.run",
+			"ms":             time.Since(start).Milliseconds(),
+			"module_bytes":   0,
+			"wall_ms":        in.Limits.WallMS,
+			"mem_pages_used": 0,
+			"bytes_out":      0,
+			"event":          "INVALID_INPUT",
+		})
+		return nil, mustMarshalError("INVALID_INPUT", "missing module_b64"), errInvalidInput
+	}
+	// Validate base64 early to surface errors deterministically
+	modBytes, err := base64.StdEncoding.DecodeString(in.ModuleB64)
+	if err != nil {
+		_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+			"ts":             time.Now().UTC().Format(time.RFC3339Nano),
+			"tool":           "code.sandbox.wasm.run",
+			"span":           "tools.wasm.run",
+			"ms":             time.Since(start).Milliseconds(),
+			"module_bytes":   0,
+			"wall_ms":        in.Limits.WallMS,
+			"mem_pages_used": 0,
+			"bytes_out":      0,
+			"event":          "INVALID_INPUT",
+		})
+		return nil, mustMarshalError("INVALID_INPUT", "module_b64 is not valid base64: "+err.Error()), errInvalidInput
+	}
+	// Validate limits
+	if in.Limits.OutputKB <= 0 {
+		_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+			"ts":             time.Now().UTC().Format(time.RFC3339Nano),
+			"tool":           "code.sandbox.wasm.run",
+			"span":           "tools.wasm.run",
+			"ms":             time.Since(start).Milliseconds(),
+			"module_bytes":   len(modBytes),
+			"wall_ms":        in.Limits.WallMS,
+			"mem_pages_used": 0,
+			"bytes_out":      0,
+			"event":          "INVALID_INPUT",
+		})
+		return nil, mustMarshalError("INVALID_INPUT", "limits.output_kb must be > 0"), errInvalidInput
+	}
+	if in.Limits.WallMS <= 0 {
+		_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+			"ts":             time.Now().UTC().Format(time.RFC3339Nano),
+			"tool":           "code.sandbox.wasm.run",
+			"span":           "tools.wasm.run",
+			"ms":             time.Since(start).Milliseconds(),
+			"module_bytes":   len(modBytes),
+			"wall_ms":        in.Limits.WallMS,
+			"mem_pages_used": 0,
+			"bytes_out":      0,
+			"event":          "INVALID_INPUT",
+		})
+		return nil, mustMarshalError("INVALID_INPUT", "limits.wall_ms must be > 0"), errInvalidInput
+	}
+	if in.Limits.MemPages <= 0 {
+		_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+			"ts":             time.Now().UTC().Format(time.RFC3339Nano),
+			"tool":           "code.sandbox.wasm.run",
+			"span":           "tools.wasm.run",
+			"ms":             time.Since(start).Milliseconds(),
+			"module_bytes":   len(modBytes),
+			"wall_ms":        in.Limits.WallMS,
+			"mem_pages_used": 0,
+			"bytes_out":      0,
+			"event":          "INVALID_INPUT",
+		})
+		return nil, mustMarshalError("INVALID_INPUT", "limits.mem_pages must be > 0"), errInvalidInput
+	}
+
+	// Deny WASI by default: detect imports of wasi_snapshot_preview1 and fail fast.
+	// This is a conservative check prior to implementing full wasm execution.
+	if bytes.Contains(modBytes, []byte("wasi_snapshot_preview1")) {
+		_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+			"ts":             time.Now().UTC().Format(time.RFC3339Nano),
+			"tool":           "code.sandbox.wasm.run",
+			"span":           "tools.wasm.run",
+			"ms":             time.Since(start).Milliseconds(),
+			"module_bytes":   len(modBytes),
+			"wall_ms":        in.Limits.WallMS,
+			"mem_pages_used": 0,
+			"bytes_out":      0,
+			"event":          "MISSING_IMPORT",
+		})
+		return nil, mustMarshalError("MISSING_IMPORT", "WASI is not available by default; modules requiring 'wasi_snapshot_preview1' are unsupported"), errors.New("missing import: wasi_snapshot_preview1")
+	}
+
+	// Not yet implemented: actual wasm execution. Return a stable stub error.
+	_ = appendAudit(map[string]any{ //nolint:errcheck // best-effort audit
+		"ts":             time.Now().UTC().Format(time.RFC3339Nano),
+		"tool":           "code.sandbox.wasm.run",
+		"span":           "tools.wasm.run",
+		"ms":             time.Since(start).Milliseconds(),
+		"module_bytes":   len(modBytes),
+		"wall_ms":        in.Limits.WallMS,
+		"mem_pages_used": 0,
+		"bytes_out":      0,
+		"event":          "UNIMPLEMENTED",
+	})
+	return nil, mustMarshalError("UNIMPLEMENTED", "wasm execution not yet implemented"), errors.New("unimplemented")
+}
+
+func mustMarshalError(code, msg string) []byte {
+	b, err := json.Marshal(Error{Code: code, Message: msg})
+	if err != nil {
+		return []byte("{\"code\":\"" + code + "\",\"message\":\"" + msg + "\"}")
+	}
+	return b
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck // best-effort close
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
diff --git a/internal/tools/wasmrun/handler_test.go b/internal/tools/wasmrun/handler_test.go
new file mode 100644
index 0000000..56b38ec
--- /dev/null
+++ b/internal/tools/wasmrun/handler_test.go
@@ -0,0 +1,163 @@
+package wasmrun
+
+import (
+	"encoding/base64"
+	"encoding/json"
+	"testing"
+)
+
+func TestRun_InvalidJSON(t *testing.T) {
+	stdout, stderr, err := Run([]byte("not-json"))
+	if err == nil {
+		t.Fatalf("expected error for invalid JSON")
+	}
+	if len(stdout) != 0 {
+		t.Fatalf("expected no stdout, got: %s", string(stdout))
+	}
+	var e struct{ Code, Message string }
+	if jerr := json.Unmarshal(stderr, &e); jerr != nil {
+		t.Fatalf("stderr not JSON: %v: %s", jerr, string(stderr))
+	}
+	if e.Code != "INVALID_INPUT" {
+		t.Fatalf("expected INVALID_INPUT, got %q (%s)", e.Code, e.Message)
+	}
+}
+
+func TestRun_MissingModuleB64(t *testing.T) {
+	req := map[string]any{
+		"entry":  "main",
+		"input":  "",
+		"limits": map[string]any{"output_kb": 1, "wall_ms": 10, "mem_pages": 1},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal failed: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err == nil {
+		t.Fatalf("expected error for missing module_b64")
+	}
+	if len(stdout) != 0 {
+		t.Fatalf("expected no stdout, got: %s", string(stdout))
+	}
+	var e struct{ Code, Message string }
+	if jerr := json.Unmarshal(stderr, &e); jerr != nil {
+		t.Fatalf("stderr not JSON: %v: %s", jerr, string(stderr))
+	}
+	if e.Code != "INVALID_INPUT" {
+		t.Fatalf("expected INVALID_INPUT, got %q (%s)", e.Code, e.Message)
+	}
+}
+
+func TestRun_BadBase64(t *testing.T) {
+	req := map[string]any{
+		"module_b64": "!!!not-base64!!!",
+		"entry":      "main",
+		"input":      "",
+		"limits":     map[string]any{"output_kb": 1, "wall_ms": 10, "mem_pages": 1},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal failed: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err == nil {
+		t.Fatalf("expected error for invalid base64")
+	}
+	if len(stdout) != 0 {
+		t.Fatalf("expected no stdout, got: %s", string(stdout))
+	}
+	var e struct{ Code, Message string }
+	if jerr := json.Unmarshal(stderr, &e); jerr != nil {
+		t.Fatalf("stderr not JSON: %v: %s", jerr, string(stderr))
+	}
+	if e.Code != "INVALID_INPUT" {
+		t.Fatalf("expected INVALID_INPUT, got %q (%s)", e.Code, e.Message)
+	}
+}
+
+func TestRun_UnimplementedOnValidInput(t *testing.T) {
+	// module_b64 is valid base64 but not necessarily a valid wasm; current stub only validates base64
+	req := map[string]any{
+		"module_b64": "AA==", // base64 for single zero byte
+		"entry":      "main",
+		"input":      "",
+		"limits":     map[string]any{"output_kb": 1, "wall_ms": 10, "mem_pages": 1},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal failed: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err == nil {
+		t.Fatalf("expected unimplemented error")
+	}
+	if len(stdout) != 0 {
+		t.Fatalf("expected no stdout, got: %s", string(stdout))
+	}
+	var e struct{ Code, Message string }
+	if jerr := json.Unmarshal(stderr, &e); jerr != nil {
+		t.Fatalf("stderr not JSON: %v: %s", jerr, string(stderr))
+	}
+	if e.Code != "UNIMPLEMENTED" {
+		t.Fatalf("expected UNIMPLEMENTED, got %q (%s)", e.Code, e.Message)
+	}
+}
+
+func TestRun_InvalidLimits(t *testing.T) {
+	cases := []map[string]any{
+		{"module_b64": "AA==", "entry": "main", "input": "", "limits": map[string]any{"output_kb": 0, "wall_ms": 10, "mem_pages": 1}},
+		{"module_b64": "AA==", "entry": "main", "input": "", "limits": map[string]any{"output_kb": 1, "wall_ms": 0, "mem_pages": 1}},
+		{"module_b64": "AA==", "entry": "main", "input": "", "limits": map[string]any{"output_kb": 1, "wall_ms": 10, "mem_pages": 0}},
+	}
+	for i, req := range cases {
+		b, merr := json.Marshal(req)
+		if merr != nil {
+			t.Fatalf("case %d: marshal failed: %v", i, merr)
+		}
+		stdout, stderr, err := Run(b)
+		if err == nil {
+			t.Fatalf("case %d: expected error for invalid limits", i)
+		}
+		if len(stdout) != 0 {
+			t.Fatalf("case %d: expected no stdout, got: %s", i, string(stdout))
+		}
+		var e struct{ Code, Message string }
+		if jerr := json.Unmarshal(stderr, &e); jerr != nil {
+			t.Fatalf("case %d: stderr not JSON: %v: %s", i, jerr, string(stderr))
+		}
+		if e.Code != "INVALID_INPUT" {
+			t.Fatalf("case %d: expected INVALID_INPUT, got %q (%s)", i, e.Code, e.Message)
+		}
+	}
+}
+
+func TestRun_DenyWASIByDefault(t *testing.T) {
+	// Any bytes containing the string "wasi_snapshot_preview1" should be denied
+	// even before actual execution is implemented.
+	wasmLike := base64.StdEncoding.EncodeToString([]byte("xxwasi_snapshot_preview1xx"))
+	req := map[string]any{
+		"module_b64": wasmLike,
+		"entry":      "main",
+		"input":      "",
+		"limits":     map[string]any{"output_kb": 1, "wall_ms": 10, "mem_pages": 1},
+	}
+	b, merr := json.Marshal(req)
+	if merr != nil {
+		t.Fatalf("marshal failed: %v", merr)
+	}
+	stdout, stderr, err := Run(b)
+	if err == nil {
+		t.Fatalf("expected error for WASI-dependent module")
+	}
+	if len(stdout) != 0 {
+		t.Fatalf("expected no stdout, got: %s", string(stdout))
+	}
+	var e struct{ Code, Message string }
+	if jerr := json.Unmarshal(stderr, &e); jerr != nil {
+		t.Fatalf("stderr not JSON: %v: %s", jerr, string(stderr))
+	}
+	if e.Code != "MISSING_IMPORT" {
+		t.Fatalf("expected MISSING_IMPORT, got %q (%s)", e.Code, e.Message)
+	}
+}
diff --git a/internal/tools/wasmrun/memory.go b/internal/tools/wasmrun/memory.go
new file mode 100644
index 0000000..4208ddd
--- /dev/null
+++ b/internal/tools/wasmrun/memory.go
@@ -0,0 +1,46 @@
+package wasmrun
+
+import (
+	"errors"
+)
+
+// ErrOOBMemory is returned when attempting to read outside the bounds
+// of the guest's linear memory. It standardizes to code "OOB_MEMORY".
+var ErrOOBMemory = errors.New("OOB_MEMORY")
+
+// readLinearMemory returns a copy of length bytes starting at ptr from the
+// provided linear memory slice. It performs strict bounds checks and returns
+// ErrOOBMemory on any out-of-bounds or overflow condition. A length of zero
+// returns an empty slice without error only when ptr is within [0,len] (ptr==len allowed).
+func readLinearMemory(linearMemory []byte, ptr uint32, length uint32) ([]byte, error) {
+	memLen := uint32(len(linearMemory))
+
+	// Zero-length reads are valid only if ptr is not beyond the end.
+	if length == 0 {
+		if ptr > memLen { // strictly beyond end is invalid
+			return nil, ErrOOBMemory
+		}
+		return make([]byte, 0), nil
+	}
+
+	// For non-zero length, ptr must be within [0, len-1]
+	if ptr >= memLen {
+		return nil, ErrOOBMemory
+	}
+
+	// Detect overflow in ptr + length
+	end := ptr + length
+	if end < ptr { // overflow
+		return nil, ErrOOBMemory
+	}
+
+	// Ensure end does not exceed memory length (end index is exclusive).
+	if end > memLen {
+		return nil, ErrOOBMemory
+	}
+
+	// Safe to slice; copy to avoid exposing backing array.
+	out := make([]byte, length)
+	copy(out, linearMemory[ptr:end])
+	return out, nil
+}
diff --git a/internal/tools/wasmrun/memory_test.go b/internal/tools/wasmrun/memory_test.go
new file mode 100644
index 0000000..558b9ac
--- /dev/null
+++ b/internal/tools/wasmrun/memory_test.go
@@ -0,0 +1,60 @@
+package wasmrun
+
+import (
+	"bytes"
+	"testing"
+)
+
+func TestReadLinearMemory_Valid(t *testing.T) {
+	mem := []byte("hello world")
+	b, err := readLinearMemory(mem, 0, 5)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !bytes.Equal(b, []byte("hello")) {
+		t.Fatalf("want hello, got %q", string(b))
+	}
+	b, err = readLinearMemory(mem, 6, 5)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !bytes.Equal(b, []byte("world")) {
+		t.Fatalf("want world, got %q", string(b))
+	}
+}
+
+func TestReadLinearMemory_ZeroLength(t *testing.T) {
+	mem := []byte("abc")
+	b, err := readLinearMemory(mem, 1, 0)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(b) != 0 {
+		t.Fatalf("expected empty slice, got %d", len(b))
+	}
+}
+
+func TestReadLinearMemory_OutOfBounds(t *testing.T) {
+	mem := []byte("abc")
+	cases := [][2]uint32{
+		{3, 1}, // ptr == len
+		{4, 0}, // ptr > len
+		{2, 2}, // end == 4 > len 3
+		{0, 4}, // length > len
+	}
+	for i, c := range cases {
+		if _, err := readLinearMemory(mem, c[0], c[1]); err == nil {
+			t.Fatalf("case %d: expected OOB error", i)
+		}
+	}
+}
+
+func TestReadLinearMemory_Overflow(t *testing.T) {
+	mem := make([]byte, 8)
+	// Choose ptr and length that overflow uint32 when summed
+	ptr := uint32(0xFFFFFFF0)
+	length := uint32(0x30)
+	if _, err := readLinearMemory(mem, ptr, length); err == nil {
+		t.Fatalf("expected overflow to be treated as OOB")
+	}
+}
diff --git a/internal/tools/wasmrun/observability_test.go b/internal/tools/wasmrun/observability_test.go
new file mode 100644
index 0000000..162542d
--- /dev/null
+++ b/internal/tools/wasmrun/observability_test.go
@@ -0,0 +1,102 @@
+package wasmrun
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+// findRepoRoot locates repository root containing go.mod.
+func findRepoRoot(t *testing.T) string {
+	t.Helper()
+	start, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	dir := start
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			t.Fatalf("go.mod not found from %s upward", start)
+		}
+		dir = parent
+	}
+}
+
+func waitForAuditFile(t *testing.T, auditDir string, timeout time.Duration) string {
+	t.Helper()
+	deadline := time.Now().Add(timeout)
+	for {
+		entries, err := os.ReadDir(auditDir)
+		if err == nil {
+			for _, e := range entries {
+				if !e.IsDir() {
+					return filepath.Join(auditDir, e.Name())
+				}
+			}
+		}
+		if time.Now().After(deadline) {
+			t.Fatalf("audit log not created in %s", auditDir)
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+}
+
+func TestObservability_AuditLineWritten_Unimplemented(t *testing.T) {
+	root := findRepoRoot(t)
+	_ = os.RemoveAll(filepath.Join(root, ".goagent")) //nolint:errcheck // best-effort cleanup
+
+	req := map[string]any{
+		"module_b64": "AA==", // one zero byte
+		"entry":      "main",
+		"input":      "",
+		"limits":     map[string]any{"output_kb": 1, "wall_ms": 10, "mem_pages": 1},
+	}
+	b, _ := json.Marshal(req) //nolint:errcheck // inputs are deterministic
+	stdout, stderr, err := Run(b)
+	if err == nil || len(stdout) != 0 {
+		t.Fatalf("expected unimplemented error with no stdout, got err=%v stdout=%s", err, string(stdout))
+	}
+	var e struct{ Code, Message string }
+	if jerr := json.Unmarshal(stderr, &e); jerr != nil {
+		t.Fatalf("stderr not JSON: %v: %s", jerr, string(stderr))
+	}
+	if e.Code != "UNIMPLEMENTED" {
+		t.Fatalf("expected UNIMPLEMENTED, got %q", e.Code)
+	}
+
+	auditDir := filepath.Join(root, ".goagent", "audit")
+	logFile := waitForAuditFile(t, auditDir, 2*time.Second)
+	data, rerr := os.ReadFile(logFile)
+	if rerr != nil {
+		t.Fatalf("read audit: %v", rerr)
+	}
+	content := string(data)
+	if !strings.Contains(content, "\"tool\":\"code.sandbox.wasm.run\"") {
+		t.Fatalf("audit missing tool field: %s", content)
+	}
+	if !strings.Contains(content, "\"span\":\"tools.wasm.run\"") {
+		t.Fatalf("audit missing span field: %s", content)
+	}
+	if !strings.Contains(content, "\"module_bytes\":1") {
+		t.Fatalf("audit missing module_bytes field: %s", content)
+	}
+	if !strings.Contains(content, "\"wall_ms\":10") {
+		t.Fatalf("audit missing wall_ms field: %s", content)
+	}
+	if !strings.Contains(content, "\"mem_pages_used\":0") {
+		t.Fatalf("audit missing mem_pages_used field: %s", content)
+	}
+	if !strings.Contains(content, "\"bytes_out\":0") {
+		t.Fatalf("audit missing bytes_out field: %s", content)
+	}
+	if !strings.Contains(content, "\"event\":\"UNIMPLEMENTED\"") {
+		t.Fatalf("audit missing UNIMPLEMENTED event: %s", content)
+	}
+}
diff --git a/logs/CURRENT_TASK.md b/logs/CURRENT_TASK.md
new file mode 100644
index 0000000..00e5e8b
--- /dev/null
+++ b/logs/CURRENT_TASK.md
@@ -0,0 +1 @@
+Opened PR #55 (CLI reference). Preparing next docs-only slice.
diff --git a/rss_fetch b/rss_fetch
new file mode 100755
index 0000000..41ac2e3
Binary files /dev/null and b/rss_fetch differ
diff --git a/scripts b/scripts
new file mode 160000
index 0000000..a9772ba
--- /dev/null
+++ b/scripts
@@ -0,0 +1 @@
+Subproject commit a9772ba12c875746c01762c5f024fb478c2cd931
diff --git a/tools.json b/tools.json
new file mode 100644
index 0000000..a6fde70
--- /dev/null
+++ b/tools.json
@@ -0,0 +1,517 @@
+{
+  "tools": [
+    {
+      "name": "get_time",
+      "description": "Get current time for an IANA timezone",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "timezone": {
+            "type": "string",
+            "description": "IANA timezone, e.g. Europe/Helsinki"
+          },
+          "tz": {
+            "type": "string",
+            "description": "Alias for timezone (deprecated)"
+          }
+        },
+        "required": ["timezone"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/get_time"],
+      "timeoutSec": 5
+    }
+    ,
+    {
+      "name": "http_fetch",
+      "description": "Safe HTTP/HTTPS fetcher with byte cap and redirects",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "url": {"type": "string"},
+          "method": {"type": "string", "enum": ["GET", "HEAD"]},
+          "max_bytes": {"type": "integer", "minimum": 1, "default": 1048576},
+          "timeout_ms": {"type": "integer", "minimum": 1, "default": 10000},
+          "decompress": {"type": "boolean", "default": true}
+        },
+        "required": ["url"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/http_fetch"],
+      "timeoutSec": 15,
+      "envPassthrough": ["HTTP_TIMEOUT_MS"]
+    }
+    ,
+    {
+      "name": "fs_read_file",
+      "description": "Read a repository-relative file as base64 with optional offset and max bytes",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "path": {"type": "string", "description": "Repo-relative path to file"},
+          "offsetBytes": {"type": "integer", "minimum": 0},
+          "maxBytes": {"type": "integer", "minimum": 1}
+        },
+        "required": ["path"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_read_file"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "fs_write_file",
+      "description": "Atomically write a repository-relative file from base64 content",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "path": {"type": "string"},
+          "contentBase64": {"type": "string"},
+          "createModeOctal": {"type": "string"}
+        },
+        "required": ["path", "contentBase64"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_write_file"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "fs_append_file",
+      "description": "Append base64 content to a repository-relative file (create if missing)",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "path": {"type": "string"},
+          "contentBase64": {"type": "string"}
+        },
+        "required": ["path", "contentBase64"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_append_file"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "fs_mkdirp",
+      "description": "Recursively create a directory path",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "path": {"type": "string"},
+          "modeOctal": {"type": "string"}
+        },
+        "required": ["path"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_mkdirp"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "fs_rm",
+      "description": "Remove a repository-relative file or directory",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "path": {"type": "string"},
+          "recursive": {"type": "boolean"},
+          "force": {"type": "boolean"}
+        },
+        "required": ["path"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_rm"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "fs_move",
+      "description": "Move or rename a repository-relative path",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "from": {"type": "string"},
+          "to": {"type": "string"},
+          "overwrite": {"type": "boolean"}
+        },
+        "required": ["from", "to"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_move"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "fs_search",
+      "description": "Search repository files for a query with optional regex/globs",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "query": {"type": "string"},
+          "regex": {"type": "boolean"},
+          "globs": {"type": "array", "items": {"type": "string"}},
+          "maxResults": {"type": "integer", "minimum": 1}
+        },
+        "required": ["query"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_search"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "fs_listdir",
+      "description": "List directory entries with optional recursion and glob filtering",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "path": {"type": "string"},
+          "recursive": {"type": "boolean"},
+          "globs": {"type": "array", "items": {"type": "string"}},
+          "includeHidden": {"type": "boolean"},
+          "maxResults": {"type": "integer", "minimum": 1}
+        },
+        "required": ["path"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_listdir"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "fs_read_lines",
+      "description": "Read a line range from a repository-relative file with optional byte cap",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "path": {"type": "string"},
+          "startLine": {"type": "integer", "minimum": 0},
+          "endLine": {"type": "integer", "minimum": 0},
+          "maxBytes": {"type": "integer", "minimum": 1}
+        },
+        "required": ["path", "startLine", "endLine"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_read_lines"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "fs_apply_patch",
+      "description": "Apply a strict unified diff (optional dry-run)",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "unifiedDiff": {"type": "string"},
+          "dryRun": {"type": "boolean"}
+        },
+        "required": ["unifiedDiff"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_apply_patch"],
+      "timeoutSec": 10
+    },
+    {
+      "name": "fs_edit_range",
+      "description": "Atomically replace a byte range in a file with base64 content",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "path": {"type": "string"},
+          "startByte": {"type": "integer", "minimum": 0},
+          "endByte": {"type": "integer", "minimum": 0},
+          "replacementBase64": {"type": "string"},
+          "expectedSha256": {"type": "string"}
+        },
+        "required": ["path", "startByte", "endByte", "replacementBase64"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_edit_range"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "exec",
+      "description": "Run an arbitrary program with args, cwd, env, and stdin",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "cmd": {"type": "string"},
+          "args": {"type": "array", "items": {"type": "string"}},
+          "cwd": {"type": "string"},
+          "env": {"type": "object", "additionalProperties": {"type": "string"}},
+          "stdin": {"type": "string"},
+          "timeoutSec": {"type": "integer", "minimum": 1}
+        },
+        "required": ["cmd"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/exec"],
+      "timeoutSec": 30
+    },
+    {
+      "name": "fs_stat",
+      "description": "Stat a path (optionally follow symlinks and compute hash)",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "path": {"type": "string"},
+          "followSymlinks": {"type": "boolean"},
+          "hash": {"type": "string", "enum": ["none", "sha256"]}
+        },
+        "required": ["path"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/fs_stat"],
+      "timeoutSec": 5
+    },
+    {
+      "name": "img_create",
+      "description": "Generate image(s) with OpenAI Images API and save to repo or return base64",
+      "schema": {
+        "type": "object",
+        "required": ["prompt"],
+        "properties": {
+          "prompt": {"type": "string"},
+          "n": {"type": "integer", "minimum": 1, "maximum": 4, "default": 1},
+          "size": {"type": "string", "pattern": "^\\d{3,4}x\\d{3,4}$", "default": "1024x1024"},
+          "model": {"type": "string", "default": "gpt-image-1"},
+          "return_b64": {"type": "boolean", "default": false},
+          "save": {
+            "type": "object",
+            "required": ["dir"],
+            "properties": {
+              "dir": {"type": "string"},
+              "basename": {"type": "string", "default": "img"},
+              "ext": {"type": "string", "enum": ["png"], "default": "png"}
+            },
+            "additionalProperties": false
+          }
+        },
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/img_create"],
+      "timeoutSec": 120,
+      "envPassthrough": ["OAI_API_KEY", "OAI_BASE_URL", "OAI_IMAGE_BASE_URL", "OAI_HTTP_TIMEOUT"]
+    },
+    {
+      "name": "searxng_search",
+      "description": "Meta search via SearXNG",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "q": {"type": "string"},
+          "time_range": {"type": "string", "enum": ["day","week","month","year"]},
+          "categories": {"type": "array", "items": {"type": "string"}},
+          "engines": {"type": "array", "items": {"type": "string"}},
+          "language": {"type": "string"},
+          "page": {"type": "integer", "minimum": 1},
+          "size": {"type": "integer", "minimum": 1, "maximum": 50}
+        },
+        "required": ["q"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/searxng_search"],
+      "timeoutSec": 15,
+      "envPassthrough": ["SEARXNG_BASE_URL","HTTP_TIMEOUT_MS"]
+    }
+    ,
+    {
+      "name": "robots_check",
+      "description": "Evaluate robots.txt for a given URL and user agent",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "url": {"type": "string"},
+          "user_agent": {"type": "string"}
+        },
+        "required": ["url"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/robots_check"],
+      "timeoutSec": 10
+    }
+    ,
+    {
+      "name": "readability_extract",
+      "description": "Extract article content from HTML using go-readability",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "html": {"type": "string"},
+          "base_url": {"type": "string"}
+        },
+        "required": ["html", "base_url"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/readability_extract"],
+      "timeoutSec": 10
+    }
+    ,
+    {
+      "name": "metadata_extract",
+      "description": "Extract OpenGraph, Twitter cards, and JSON-LD from HTML",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "html": {"type": "string"},
+          "base_url": {"type": "string"}
+        },
+        "required": ["html", "base_url"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/metadata_extract"],
+      "timeoutSec": 10
+    }
+    ,
+    {
+      "name": "pdf_extract",
+      "description": "Extract text from PDF pages; optional OCR via tesseract",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "pdf_base64": {"type": "string"},
+          "pages": {"type": "array", "items": {"type": "integer"}}
+        },
+        "required": ["pdf_base64"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/pdf_extract"],
+      "timeoutSec": 60,
+      "envPassthrough": ["ENABLE_OCR"]
+    }
+    ,
+    {
+      "name": "rss_fetch",
+      "description": "Fetch and parse RSS/Atom feeds with conditional GET",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "url": {"type": "string"},
+          "if_modified_since": {"type": "string"}
+        },
+        "required": ["url"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/rss_fetch"],
+      "timeoutSec": 10
+    }
+    ,
+    {
+      "name": "wayback_lookup",
+      "description": "Query Internet Archive Wayback Machine for closest snapshot; optionally trigger save",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "url": {"type": "string"},
+          "save": {"type": "boolean", "default": false}
+        },
+        "required": ["url"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/wayback_lookup"],
+      "timeoutSec": 10
+    }
+    ,
+    {
+      "name": "wiki_query",
+      "description": "MediaWiki summaries/search",
+      "schema": {
+        "type": "object",
+        "additionalProperties": false,
+        "properties": {
+          "titles": {
+            "type": "string",
+            "description": "Exact page title to fetch summary for (mutually exclusive with 'search')"
+          },
+          "search": {
+            "type": "string",
+            "description": "Full-text search term to find pages (mutually exclusive with 'titles')"
+          },
+          "language": {
+            "type": "string",
+            "default": "en",
+            "description": "MediaWiki language code, e.g. en, fi"
+          }
+        }
+      },
+      "command": ["./tools/bin/wiki_query"],
+      "timeoutSec": 10
+    }
+    ,
+    {
+      "name": "openalex_search",
+      "description": "Search scholarly works via OpenAlex",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "q": {"type": "string"},
+          "from": {"type": "string"},
+          "to": {"type": "string"},
+          "per_page": {"type": "integer", "minimum": 1, "maximum": 50, "default": 10}
+        },
+        "required": ["q"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/openalex_search"],
+      "timeoutSec": 15
+    }
+    ,
+    {
+      "name": "crossref_search",
+      "description": "Search DOI metadata via Crossref",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "q": {"type": "string"},
+          "rows": {"type": "integer", "minimum": 1, "maximum": 50, "default": 10}
+        },
+        "required": ["q"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/crossref_search"],
+      "timeoutSec": 15,
+      "envPassthrough": ["CROSSREF_MAILTO", "HTTP_TIMEOUT_MS"]
+    }
+    ,
+    {
+      "name": "github_search",
+      "description": "Search GitHub repositories, code, issues, or commits",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "q": {"type": "string"},
+          "type": {"type": "string", "enum": ["repositories", "code", "issues", "commits"]},
+          "per_page": {"type": "integer", "minimum": 1, "maximum": 50, "default": 10}
+        },
+        "required": ["q", "type"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/github_search"],
+      "timeoutSec": 15,
+      "envPassthrough": ["GITHUB_TOKEN"]
+    }
+    ,
+    {
+      "name": "citation_pack",
+      "description": "Normalize a citation and optionally include Wayback archive URL",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "doc": {
+            "type": "object",
+            "properties": {
+              "title": {"type": "string"},
+              "url": {"type": "string"},
+              "published_at": {"type": "string"}
+            },
+            "required": ["url"],
+            "additionalProperties": false
+          },
+          "archive": {
+            "type": "object",
+            "properties": {
+              "wayback": {"type": "boolean"}
+            },
+            "additionalProperties": false
+          }
+        },
+        "required": ["doc"],
+        "additionalProperties": false
+      },
+      "command": ["./tools/bin/citation_pack"],
+      "timeoutSec": 10
+    }
+  ]
+}
diff --git a/tools/cmd/citation_pack/citation_pack.go b/tools/cmd/citation_pack/citation_pack.go
new file mode 100644
index 0000000..17df820
--- /dev/null
+++ b/tools/cmd/citation_pack/citation_pack.go
@@ -0,0 +1,244 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+type input struct {
+	Doc struct {
+		Title       string `json:"title"`
+		URL         string `json:"url"`
+		PublishedAt string `json:"published_at"`
+	} `json:"doc"`
+	Archive struct {
+		Wayback bool `json:"wayback"`
+	} `json:"archive"`
+}
+
+type output struct {
+	Title      string `json:"title,omitempty"`
+	URL        string `json:"url"`
+	Host       string `json:"host"`
+	AccessedAt string `json:"accessed_at"`
+	ArchiveURL string `json:"archive_url,omitempty"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		msg := strings.ReplaceAll(err.Error(), "\n", " ")
+		fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	in, err := decodeInput()
+	if err != nil {
+		return err
+	}
+	if strings.TrimSpace(in.Doc.URL) == "" {
+		return errors.New("doc.url is required")
+	}
+	u, err := url.Parse(in.Doc.URL)
+	if err != nil || (u.Scheme != "http" && u.Scheme != "https") {
+		return errors.New("doc.url must be a valid http/https URL")
+	}
+	out := output{
+		Title:      strings.TrimSpace(in.Doc.Title),
+		URL:        in.Doc.URL,
+		Host:       u.Hostname(),
+		AccessedAt: time.Now().UTC().Format(time.RFC3339),
+	}
+
+	archived := false
+	start := time.Now()
+	if in.Archive.Wayback {
+		archiveURL, aerr := waybackLookup(in.Doc.URL)
+		if aerr != nil {
+			return aerr
+		}
+		if archiveURL != "" {
+			out.ArchiveURL = archiveURL
+			archived = true
+		}
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		return fmt.Errorf("encode json: %w", err)
+	}
+	_ = appendAudit(map[string]any{ //nolint:errcheck
+		"ts":       time.Now().UTC().Format(time.RFC3339Nano),
+		"tool":     "citation_pack",
+		"url_host": out.Host,
+		"archived": archived,
+		"ms":       time.Since(start).Milliseconds(),
+	})
+	return nil
+}
+
+func decodeInput() (input, error) {
+	var in input
+	dec := json.NewDecoder(bufio.NewReader(os.Stdin))
+	if err := dec.Decode(&in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	return in, nil
+}
+
+// waybackLookup performs a lookup against the Wayback Machine compatible endpoint.
+// It respects WAYBACK_BASE_URL if set, otherwise defaults to https://web.archive.org.
+// Enforces a 3s timeout and SSRF guard on the base URL.
+func waybackLookup(targetURL string) (string, error) {
+	base := strings.TrimSpace(os.Getenv("WAYBACK_BASE_URL"))
+	if base == "" {
+		base = "https://web.archive.org"
+	}
+	baseURL, err := url.Parse(base)
+	if err != nil || (baseURL.Scheme != "http" && baseURL.Scheme != "https") {
+		return "", errors.New("WAYBACK_BASE_URL must be a valid http/https URL")
+	}
+	if err := ssrfGuard(baseURL); err != nil {
+		return "", err
+	}
+	reqURL, err := url.Parse(baseURL.String())
+	if err != nil {
+		return "", err
+	}
+	reqURL.Path = strings.TrimRight(reqURL.Path, "/") + "/available"
+	q := reqURL.Query()
+	q.Set("url", targetURL)
+	reqURL.RawQuery = q.Encode()
+	client := &http.Client{Timeout: 3 * time.Second}
+	resp, err := client.Get(reqURL.String())
+	if err != nil {
+		return "", fmt.Errorf("http: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }() //nolint:errcheck
+	var raw struct {
+		ArchivedSnapshots struct {
+			Closest struct {
+				Available bool   `json:"available"`
+				URL       string `json:"url"`
+				Timestamp string `json:"timestamp"`
+			} `json:"closest"`
+		} `json:"archived_snapshots"`
+	}
+	if err := json.NewDecoder(bufio.NewReader(resp.Body)).Decode(&raw); err != nil {
+		return "", fmt.Errorf("decode json: %w", err)
+	}
+	if raw.ArchivedSnapshots.Closest.Available {
+		return raw.ArchivedSnapshots.Closest.URL, nil
+	}
+	return "", nil
+}
+
+// ssrfGuard similar to other networked tools; can be bypassed in tests via CITATION_PACK_ALLOW_LOCAL=1
+func ssrfGuard(u *url.URL) error {
+	host := u.Hostname()
+	if host == "" {
+		return errors.New("invalid host")
+	}
+	if strings.HasSuffix(strings.ToLower(host), ".onion") {
+		return errors.New("SSRF blocked: onion domains are not allowed")
+	}
+	if os.Getenv("CITATION_PACK_ALLOW_LOCAL") == "1" {
+		return nil
+	}
+	ips, err := net.LookupIP(host)
+	if err != nil || len(ips) == 0 {
+		return errors.New("SSRF blocked: cannot resolve host")
+	}
+	for _, ip := range ips {
+		if isPrivateIP(ip) {
+			return errors.New("SSRF blocked: private or loopback address")
+		}
+	}
+	return nil
+}
+
+func isPrivateIP(ip net.IP) bool {
+	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+		return true
+	}
+	if v4 := ip.To4(); v4 != nil {
+		ip = v4
+		if v4[0] == 10 {
+			return true
+		}
+		if v4[0] == 172 && v4[1]&0xf0 == 16 {
+			return true
+		}
+		if v4[0] == 192 && v4[1] == 168 {
+			return true
+		}
+		if v4[0] == 169 && v4[1] == 254 {
+			return true
+		}
+		if v4[0] == 127 {
+			return true
+		}
+		return false
+	}
+	if ip.Equal(net.ParseIP("::1")) {
+		return true
+	}
+	if ip[0] == 0xfe && (ip[1]&0xc0) == 0x80 {
+		return true
+	}
+	if ip[0]&0xfe == 0xfc {
+		return true
+	}
+	return false
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
diff --git a/tools/cmd/citation_pack/citation_pack_test.go b/tools/cmd/citation_pack/citation_pack_test.go
new file mode 100644
index 0000000..b20f03e
--- /dev/null
+++ b/tools/cmd/citation_pack/citation_pack_test.go
@@ -0,0 +1,115 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// Allow local SSRF for tests targeting httptest.Server
+func TestMain(m *testing.M) {
+	if err := os.Setenv("CITATION_PACK_ALLOW_LOCAL", "1"); err != nil {
+		panic(err)
+	}
+	os.Exit(m.Run())
+}
+
+func runTool(t *testing.T, bin string, env []string, input any) (string, string, error) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	cmd.Env = env
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	runErr := cmd.Run()
+	return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), runErr
+}
+
+func TestCitationPack_NoArchive(t *testing.T) {
+	bin := testutil.BuildTool(t, "citation_pack")
+	env := os.Environ()
+	in := map[string]any{
+		"doc": map[string]any{
+			"title": "Example",
+			"url":   "http://example.com/article",
+		},
+	}
+	outStr, errStr, err := runTool(t, bin, env, in)
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"url\":\"http://example.com/article\"") {
+		t.Fatalf("unexpected url: %s", outStr)
+	}
+	if !strings.Contains(outStr, "\"host\":\"example.com\"") {
+		t.Fatalf("missing host: %s", outStr)
+	}
+	if strings.Contains(outStr, "archive_url") {
+		t.Fatalf("unexpected archive_url: %s", outStr)
+	}
+}
+
+func TestCitationPack_WaybackLookup_Success(t *testing.T) {
+	// Mock Wayback API
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/available" {
+			w.Header().Set("Content-Type", "application/json")
+			if _, err := w.Write([]byte(`{"archived_snapshots":{"closest":{"available":true,"url":"http://web.archive.org/web/20200101000000/http://example.com/article","timestamp":"20200101000000"}}}`)); err != nil {
+				t.Fatalf("write: %v", err)
+			}
+			return
+		}
+		http.NotFound(w, r)
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "citation_pack")
+	env := append(os.Environ(), "WAYBACK_BASE_URL="+srv.URL, "CITATION_PACK_ALLOW_LOCAL=1")
+	in := map[string]any{
+		"doc": map[string]any{
+			"url": "http://example.com/article",
+		},
+		"archive": map[string]any{
+			"wayback": true,
+		},
+	}
+	outStr, errStr, err := runTool(t, bin, env, in)
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"archive_url\":\"http://web.archive.org/web/20200101000000/http://example.com/article\"") {
+		t.Fatalf("expected archive_url, got: %s", outStr)
+	}
+}
+
+func TestCitationPack_SSRFBlocked_BaseURL(t *testing.T) {
+	bin := testutil.BuildTool(t, "citation_pack")
+	env := []string{"WAYBACK_BASE_URL=http://127.0.0.1:9"}
+	in := map[string]any{
+		"doc": map[string]any{
+			"url": "http://example.com",
+		},
+		"archive": map[string]any{
+			"wayback": true,
+		},
+	}
+	_, errStr, err := runTool(t, bin, env, in)
+	if err == nil {
+		t.Fatalf("expected error, got ok")
+	}
+	if !strings.Contains(errStr, "SSRF blocked") {
+		t.Fatalf("expected SSRF blocked, got: %s", errStr)
+	}
+}
diff --git a/tools/cmd/crossref_search/crossref_search.go b/tools/cmd/crossref_search/crossref_search.go
new file mode 100644
index 0000000..f98e684
--- /dev/null
+++ b/tools/cmd/crossref_search/crossref_search.go
@@ -0,0 +1,387 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// input defines the expected stdin JSON for the tool.
+type input struct {
+	Q    string `json:"q"`
+	Rows int    `json:"rows"`
+}
+
+// outputResult is the normalized result row produced by this tool.
+type outputResult struct {
+	Title      string `json:"title"`
+	DOI        string `json:"doi"`
+	Issued     string `json:"issued"`
+	Container  string `json:"container"`
+	TitleShort string `json:"title_short,omitempty"`
+}
+
+// output is the stdout JSON envelope produced by the tool.
+type output struct {
+	Results []outputResult `json:"results"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		msg := strings.ReplaceAll(err.Error(), "\n", " ")
+		fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	in, err := decodeInput()
+	if err != nil {
+		return err
+	}
+	if strings.TrimSpace(in.Q) == "" {
+		return errors.New("q is required")
+	}
+	mailto := strings.TrimSpace(os.Getenv("CROSSREF_MAILTO"))
+	if mailto == "" {
+		return errors.New("CROSSREF_MAILTO is required")
+	}
+	baseURL, reqURL, err := prepareURLs(in, mailto)
+	if err != nil {
+		return err
+	}
+	client := newHTTPClient(resolveTimeout())
+	start := time.Now()
+	status, body, err := doRequest(client, baseURL, reqURL, mailto)
+	if err != nil {
+		return err
+	}
+	rows, err := parseCrossref(body)
+	if err != nil {
+		return err
+	}
+	out := output{Results: mapResults(rows)}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		return fmt.Errorf("encode json: %w", err)
+	}
+	_ = appendAudit(map[string]any{ //nolint:errcheck
+		"ts":       time.Now().UTC().Format(time.RFC3339Nano),
+		"tool":     "crossref_search",
+		"url_host": baseURL.Hostname(),
+		"status":   status,
+		"ms":       time.Since(start).Milliseconds(),
+	})
+	return nil
+}
+
+func decodeInput() (input, error) {
+	var in input
+	dec := json.NewDecoder(bufio.NewReader(os.Stdin))
+	if err := dec.Decode(&in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	return in, nil
+}
+
+func prepareURLs(in input, mailto string) (*url.URL, *url.URL, error) {
+	base := strings.TrimSpace(os.Getenv("CROSSREF_BASE_URL"))
+	if base == "" {
+		base = "https://api.crossref.org"
+	}
+	baseURL, err := url.Parse(base)
+	if err != nil || (baseURL.Scheme != "http" && baseURL.Scheme != "https") {
+		return nil, nil, errors.New("CROSSREF_BASE_URL must be a valid http/https URL")
+	}
+	if err := ssrfGuard(baseURL); err != nil {
+		return nil, nil, err
+	}
+	reqURL, err := url.Parse(baseURL.String())
+	if err != nil {
+		return nil, nil, err
+	}
+	reqURL.Path = strings.TrimRight(reqURL.Path, "/") + "/works"
+	q := reqURL.Query()
+	q.Set("query", in.Q)
+	if in.Rows > 0 {
+		if in.Rows > 50 {
+			in.Rows = 50
+		}
+		q.Set("rows", strconv.Itoa(in.Rows))
+	} else {
+		q.Set("rows", "10")
+	}
+	q.Set("mailto", mailto)
+	reqURL.RawQuery = q.Encode()
+	return baseURL, reqURL, nil
+}
+
+func newHTTPClient(timeout time.Duration) *http.Client {
+	tr := &http.Transport{}
+	return &http.Client{Timeout: timeout, Transport: tr, CheckRedirect: func(req *http.Request, via []*http.Request) error {
+		if len(via) >= 5 {
+			return errors.New("too many redirects")
+		}
+		return ssrfGuard(req.URL)
+	}}
+}
+
+func resolveTimeout() time.Duration {
+	if v := strings.TrimSpace(os.Getenv("HTTP_TIMEOUT_MS")); v != "" {
+		if ms, err := time.ParseDuration(v + "ms"); err == nil && ms > 0 {
+			return ms
+		}
+	}
+	return 8 * time.Second
+}
+
+func doRequest(client *http.Client, baseURL *url.URL, reqURL *url.URL, mailto string) (int, []byte, error) {
+	if err := ssrfGuard(baseURL); err != nil {
+		return 0, nil, err
+	}
+	req, err := http.NewRequest(http.MethodGet, reqURL.String(), nil)
+	if err != nil {
+		return 0, nil, fmt.Errorf("new request: %w", err)
+	}
+	ua := "agentcli-crossref/0.1 (" + mailto + ")"
+	req.Header.Set("User-Agent", ua)
+	resp, err := client.Do(req)
+	if err != nil {
+		return 0, nil, fmt.Errorf("http: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }() //nolint:errcheck
+	if resp.StatusCode == http.StatusTooManyRequests {
+		ra := strings.TrimSpace(resp.Header.Get("Retry-After"))
+		if ra == "" {
+			return resp.StatusCode, nil, errors.New("RATE_LIMITED: retry later")
+		}
+		return resp.StatusCode, nil, fmt.Errorf("RATE_LIMITED: retry after %s seconds", ra)
+	}
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return resp.StatusCode, nil, fmt.Errorf("http status: %d", resp.StatusCode)
+	}
+	data, err := ioReadAllLimit(resp.Body, 4*1024*1024)
+	if err != nil {
+		return resp.StatusCode, nil, err
+	}
+	return resp.StatusCode, data, nil
+}
+
+// parseCrossref decodes the Crossref message.items array.
+func parseCrossref(data []byte) ([]map[string]any, error) {
+	var payload struct {
+		Message struct {
+			Items []map[string]any `json:"items"`
+		} `json:"message"`
+	}
+	if err := json.Unmarshal(data, &payload); err != nil {
+		return nil, fmt.Errorf("decode json: %w", err)
+	}
+	return payload.Message.Items, nil
+}
+
+func mapResults(rows []map[string]any) []outputResult {
+	out := make([]outputResult, 0, len(rows))
+	for _, r := range rows {
+		var res outputResult
+		res.Title = firstStringField(r, "title")
+		if s, ok := r["DOI"].(string); ok {
+			res.DOI = s
+		}
+		res.Container = firstStringField(r, "container-title")
+		res.TitleShort = firstStringField(r, "short-title")
+		res.Issued = parseIssuedField(r)
+		out = append(out, res)
+	}
+	return out
+}
+
+// firstStringField returns the first string value for a field that may be a string or array of strings.
+func firstStringField(m map[string]any, key string) string {
+	if v, ok := m[key].([]any); ok && len(v) > 0 {
+		if s, ok := v[0].(string); ok {
+			return s
+		}
+	}
+	if s, ok := m[key].(string); ok {
+		return s
+	}
+	return ""
+}
+
+// parseIssuedField formats Crossref issued.date-parts into YYYY[-MM[-DD]].
+func parseIssuedField(m map[string]any) string {
+	issued, ok := m["issued"].(map[string]any)
+	if !ok {
+		return ""
+	}
+	dps, ok := issued["date-parts"].([]any)
+	if !ok || len(dps) == 0 {
+		return ""
+	}
+	first, ok := dps[0].([]any)
+	if !ok || len(first) == 0 {
+		return ""
+	}
+	parts := make([]string, 0, len(first))
+	for i, p := range first {
+		switch v := p.(type) {
+		case float64:
+			val := int(v)
+			if i == 0 {
+				parts = append(parts, strconv.Itoa(val))
+			} else {
+				parts = append(parts, fmt.Sprintf("%02d", val))
+			}
+		case int:
+			if i == 0 {
+				parts = append(parts, strconv.Itoa(v))
+			} else {
+				parts = append(parts, fmt.Sprintf("%02d", v))
+			}
+		}
+	}
+	return strings.Join(parts, "-")
+}
+
+// --- helpers borrowed to avoid extra deps ---
+
+// ssrfGuard blocks loopback, RFC1918, link-local, ULA, and .onion unless CROSSREF_ALLOW_LOCAL=1
+func ssrfGuard(u *url.URL) error {
+	host := u.Hostname()
+	if host == "" {
+		return errors.New("invalid host")
+	}
+	if strings.HasSuffix(strings.ToLower(host), ".onion") {
+		return errors.New("SSRF blocked: onion domains are not allowed")
+	}
+	if os.Getenv("CROSSREF_ALLOW_LOCAL") == "1" {
+		return nil
+	}
+	ips, err := net.LookupIP(host)
+	if err != nil || len(ips) == 0 {
+		return errors.New("SSRF blocked: cannot resolve host")
+	}
+	for _, ip := range ips {
+		if isPrivateIP(ip) {
+			return errors.New("SSRF blocked: private or loopback address")
+		}
+	}
+	return nil
+}
+
+func isPrivateIP(ip net.IP) bool {
+	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+		return true
+	}
+	if v4 := ip.To4(); v4 != nil {
+		ip = v4
+		if v4[0] == 10 {
+			return true
+		}
+		if v4[0] == 172 && v4[1]&0xf0 == 16 {
+			return true
+		}
+		if v4[0] == 192 && v4[1] == 168 {
+			return true
+		}
+		if v4[0] == 169 && v4[1] == 254 {
+			return true
+		}
+		if v4[0] == 127 {
+			return true
+		}
+		return false
+	}
+	if ip.Equal(net.ParseIP("::1")) {
+		return true
+	}
+	if ip[0] == 0xfe && (ip[1]&0xc0) == 0x80 {
+		return true
+	}
+	if ip[0]&0xfe == 0xfc {
+		return true
+	}
+	return false
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
+
+// ioReadAllLimit reads up to max bytes from r.
+func ioReadAllLimit(r interface{ Read([]byte) (int, error) }, max int64) ([]byte, error) {
+	const chunk = 32 * 1024
+	buf := make([]byte, 0, 64*1024)
+	var readTotal int64
+	b := make([]byte, chunk)
+	for {
+		n, err := r.Read(b)
+		if n > 0 {
+			readTotal += int64(n)
+			if readTotal > max {
+				return nil, errors.New("response too large")
+			}
+			buf = append(buf, b[:n]...)
+		}
+		if err != nil {
+			if errors.Is(err, ioEOF) {
+				break
+			}
+			if strings.Contains(err.Error(), "EOF") { // fallback for stdlib EOF type
+				break
+			}
+			return nil, err
+		}
+	}
+	return buf, nil
+}
+
+var ioEOF = errors.New("EOF")
diff --git a/tools/cmd/crossref_search/crossref_search_test.go b/tools/cmd/crossref_search/crossref_search_test.go
new file mode 100644
index 0000000..6425452
--- /dev/null
+++ b/tools/cmd/crossref_search/crossref_search_test.go
@@ -0,0 +1,94 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+func runTool(t *testing.T, bin string, env []string, input any) (string, string, error) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	cmd.Env = env
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	runErr := cmd.Run()
+	return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), runErr
+}
+
+func TestCrossrefSearch_Success(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/works" {
+			http.Error(w, "bad path", http.StatusBadRequest)
+			return
+		}
+		if got := r.Header.Get("User-Agent"); !strings.Contains(got, "agentcli-crossref/0.1") {
+			http.Error(w, "bad ua", http.StatusBadRequest)
+			return
+		}
+		if r.URL.Query().Get("mailto") != "dev@example.com" {
+			http.Error(w, "missing mailto", http.StatusBadRequest)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(`{"message":{"items":[{"title":["A title"],"DOI":"10.1/x","issued":{"date-parts":[[2024,7,2]]},"container-title":["J Testing"],"short-title":["Short"]}]}}`)) //nolint:errcheck
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "crossref_search")
+	env := append(os.Environ(), "CROSSREF_BASE_URL="+srv.URL, "CROSSREF_ALLOW_LOCAL=1", "CROSSREF_MAILTO=dev@example.com")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "golang", "rows": 5})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"results\":[") {
+		t.Fatalf("missing results: %s", outStr)
+	}
+	if !strings.Contains(outStr, "A title") || !strings.Contains(outStr, "10.1/x") || !strings.Contains(outStr, "2024-07-02") || !strings.Contains(outStr, "J Testing") || !strings.Contains(outStr, "Short") {
+		t.Fatalf("missing mapped fields: %s", outStr)
+	}
+}
+
+func TestCrossrefSearch_RateLimited(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusTooManyRequests)
+		w.Header().Set("Retry-After", "10")
+		_, _ = w.Write([]byte("{}")) //nolint:errcheck
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "crossref_search")
+	env := append(os.Environ(), "CROSSREF_BASE_URL="+srv.URL, "CROSSREF_ALLOW_LOCAL=1", "CROSSREF_MAILTO=dev@example.com")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "rate"})
+	if err == nil {
+		t.Fatalf("expected error, got ok: %s", outStr)
+	}
+	if !strings.Contains(errStr, "RATE_LIMITED") {
+		t.Fatalf("expected RATE_LIMITED error, got: %s", errStr)
+	}
+}
+
+func TestCrossrefSearch_RequiresMailto(t *testing.T) {
+	bin := testutil.BuildTool(t, "crossref_search")
+	env := append(os.Environ(), "CROSSREF_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "x"})
+	if err == nil {
+		t.Fatalf("expected error, got ok: %s", outStr)
+	}
+	if !strings.Contains(errStr, "CROSSREF_MAILTO is required") {
+		t.Fatalf("expected mailto required error, got: %s", errStr)
+	}
+}
diff --git a/tools/cmd/dedupe_rank/dedupe_rank.go b/tools/cmd/dedupe_rank/dedupe_rank.go
new file mode 100644
index 0000000..ce2b146
--- /dev/null
+++ b/tools/cmd/dedupe_rank/dedupe_rank.go
@@ -0,0 +1,267 @@
+package main
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"sort"
+	"strings"
+)
+
+type inputDocument struct {
+	ID          string `json:"id"`
+	URL         string `json:"url,omitempty"`
+	Title       string `json:"title,omitempty"`
+	Text        string `json:"text,omitempty"`
+	PublishedAt string `json:"published_at,omitempty"`
+}
+
+type toolInput struct {
+	Documents []inputDocument `json:"docs"`
+}
+
+type outputGroup struct {
+	RepresentativeID string   `json:"representative_id"`
+	Members          []string `json:"members"`
+	Score            float64  `json:"score"`
+}
+
+type toolOutput struct {
+	Groups []outputGroup `json:"groups"`
+}
+
+type stderrError struct {
+	Error string `json:"error"`
+	Hint  string `json:"hint,omitempty"`
+}
+
+func writeErrorAndExit(err error, hint string) {
+	encErr := json.NewEncoder(os.Stderr).Encode(stderrError{Error: err.Error(), Hint: hint})
+	if encErr != nil {
+		// Best-effort fallback when JSON encode fails
+		_, _ = fmt.Fprintf(os.Stderr, "error=%q hint=%q\n", err.Error(), hint)
+	}
+	os.Exit(1)
+}
+
+func main() {
+	data, err := io.ReadAll(os.Stdin)
+	if err != nil {
+		writeErrorAndExit(err, "failed to read stdin")
+		return
+	}
+	in, err := parseInput(data)
+	if err != nil {
+		writeErrorAndExit(err, "invalid JSON input for dedupe_rank")
+		return
+	}
+	if len(in.Documents) == 0 {
+		writeErrorAndExit(errors.New("missing docs"), "provide docs: [{id,title?,text?,url?,published_at?}]")
+		return
+	}
+
+	documents := buildDocuments(in)
+	groups := groupDocuments(documents, 0.25)
+	out := toolOutput{Groups: groups}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		_, _ = fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", "failed to encode output")
+		os.Exit(1)
+	}
+}
+
+// parseInput unmarshals tool input from raw JSON bytes.
+func parseInput(data []byte) (toolInput, error) {
+	var in toolInput
+	err := json.Unmarshal(data, &in)
+	return in, err
+}
+
+type docData struct {
+	doc    inputDocument
+	tokens []string
+	set    map[string]struct{}
+}
+
+// buildDocuments tokenizes, filters, and constructs set representations.
+func buildDocuments(in toolInput) []docData {
+	documents := make([]docData, 0, len(in.Documents))
+	for _, d := range in.Documents {
+		tokens := tokenizeWords(strings.TrimSpace(d.Title + " " + d.Text))
+		tokens = filterStopwords(tokens)
+		set := make(map[string]struct{}, len(tokens))
+		for _, s := range tokens {
+			set[s] = struct{}{}
+		}
+		documents = append(documents, docData{doc: d, tokens: tokens, set: set})
+	}
+	return documents
+}
+
+// groupDocuments performs similarity grouping and representative selection.
+func groupDocuments(documents []docData, jaccardThreshold float64) []outputGroup {
+	// Union-Find structure
+	parent := make([]int, len(documents))
+	for i := range parent {
+		parent[i] = i
+	}
+	var find func(int) int
+	find = func(x int) int {
+		if parent[x] != x {
+			parent[x] = find(parent[x])
+		}
+		return parent[x]
+	}
+	union := func(a, b int) {
+		ra, rb := find(a), find(b)
+		if ra != rb {
+			parent[rb] = ra
+		}
+	}
+
+	// Pairwise similarities
+	for i := 0; i < len(documents); i++ {
+		for j := i + 1; j < len(documents); j++ {
+			sim := jaccard(documents[i].set, documents[j].set)
+			if sim >= jaccardThreshold {
+				union(i, j)
+			}
+		}
+	}
+
+	// Build groups by root parent
+	rootToIdx := make(map[int][]int)
+	for i := range documents {
+		r := find(i)
+		rootToIdx[r] = append(rootToIdx[r], i)
+	}
+
+	// Compute token doc frequency for TF-IDF scoring
+	tokenDocFreq := make(map[string]int)
+	for _, dd := range documents {
+		seen := map[string]struct{}{}
+		for _, t := range dd.tokens {
+			if _, ok := seen[t]; ok {
+				continue
+			}
+			seen[t] = struct{}{}
+			tokenDocFreq[t]++
+		}
+	}
+	scorer := func(idx int) float64 { return tfidfScore(documents[idx].tokens, tokenDocFreq, float64(len(documents))) }
+
+	groups := make([]outputGroup, 0, len(rootToIdx))
+	for _, idxs := range rootToIdx {
+		if len(idxs) == 1 {
+			i := idxs[0]
+			groups = append(groups, outputGroup{
+				RepresentativeID: documents[i].doc.ID,
+				Members:          []string{documents[i].doc.ID},
+				Score:            0,
+			})
+			continue
+		}
+		// Best representative by score; tie-break by id
+		bestIdx := idxs[0]
+		bestScore := scorer(bestIdx)
+		for k := 1; k < len(idxs); k++ {
+			s := scorer(idxs[k])
+			if s > bestScore || (s == bestScore && documents[idxs[k]].doc.ID < documents[bestIdx].doc.ID) {
+				bestScore = s
+				bestIdx = idxs[k]
+			}
+		}
+		members := make([]string, 0, len(idxs))
+		for _, i := range idxs {
+			members = append(members, documents[i].doc.ID)
+		}
+		sort.Strings(members)
+		groups = append(groups, outputGroup{
+			RepresentativeID: documents[bestIdx].doc.ID,
+			Members:          members,
+			Score:            bestScore,
+		})
+	}
+	sort.Slice(groups, func(i, j int) bool { return groups[i].RepresentativeID < groups[j].RepresentativeID })
+	return groups
+}
+
+// tfidfScore computes a crude TF-IDF score for a token sequence.
+func tfidfScore(tokens []string, tokenDocFreq map[string]int, numDocs float64) float64 {
+	tf := map[string]int{}
+	for _, t := range tokens {
+		tf[t]++
+	}
+	var score float64
+	for tok, c := range tf {
+		df := float64(tokenDocFreq[tok])
+		idf := 0.0
+		if df > 0 {
+			idf = math.Log(numDocs / df)
+		}
+		score += (1.0 + math.Log(float64(c))) * idf
+	}
+	return score
+}
+
+// tokenizeWords splits text into lowercase alphanumeric tokens.
+func tokenizeWords(s string) []string {
+	// Replace non-letters with spaces, split on spaces
+	b := strings.Builder{}
+	b.Grow(len(s))
+	for _, r := range s {
+		if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') {
+			b.WriteRune(r)
+		} else {
+			b.WriteByte(' ')
+		}
+	}
+	parts := strings.Fields(strings.ToLower(b.String()))
+	return parts
+}
+
+// filterStopwords removes a small set of common English stopwords.
+func filterStopwords(tokens []string) []string {
+	if len(tokens) == 0 {
+		return tokens
+	}
+	stop := map[string]struct{}{
+		"a": {}, "an": {}, "the": {}, "is": {}, "are": {}, "was": {}, "were": {},
+		"by": {}, "of": {}, "and": {}, "to": {}, "in": {}, "on": {}, "for": {},
+		"with": {}, "as": {}, "it": {}, "its": {}, "at": {}, "this": {}, "that": {},
+	}
+	out := make([]string, 0, len(tokens))
+	for _, t := range tokens {
+		if _, ok := stop[t]; ok {
+			continue
+		}
+		out = append(out, t)
+	}
+	return out
+}
+
+// jaccard computes Jaccard similarity between two sets.
+func jaccard(a, b map[string]struct{}) float64 {
+	if len(a) == 0 && len(b) == 0 {
+		return 1.0
+	}
+	inter := 0
+	var small, large map[string]struct{}
+	if len(a) < len(b) {
+		small, large = a, b
+	} else {
+		small, large = b, a
+	}
+	for k := range small {
+		if _, ok := large[k]; ok {
+			inter++
+		}
+	}
+	union := len(a) + len(b) - inter
+	if union == 0 {
+		return 0
+	}
+	return float64(inter) / float64(union)
+}
diff --git a/tools/cmd/dedupe_rank/dedupe_rank_test.go b/tools/cmd/dedupe_rank/dedupe_rank_test.go
new file mode 100644
index 0000000..c80ea29
--- /dev/null
+++ b/tools/cmd/dedupe_rank/dedupe_rank_test.go
@@ -0,0 +1,88 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"os/exec"
+	"sort"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type group struct {
+	RepresentativeID string   `json:"representative_id"`
+	Members          []string `json:"members"`
+	Score            float64  `json:"score"`
+}
+
+type output struct {
+	Groups []group `json:"groups"`
+}
+
+func runTool(t *testing.T, bin string, input any) (output, string, error) {
+	t.Helper()
+	var out output
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	if err == nil {
+		if decErr := json.Unmarshal([]byte(strings.TrimSpace(stdout.String())), &out); decErr != nil {
+			t.Fatalf("parse output: %v; raw=%s", decErr, stdout.String())
+		}
+	}
+	return out, strings.TrimSpace(stderr.String()), err
+}
+
+// TestDedupeRank_GroupsNearDuplicates encodes the expected behavior:
+// - Near-duplicate documents should be grouped together under one representative id
+// - The representative is the best-ranked member; tie-breaks use TF-IDF-like signal
+// This test is intentionally added before the implementation and should fail until implemented.
+func TestDedupeRank_GroupsNearDuplicates(t *testing.T) {
+	bin := testutil.BuildTool(t, "dedupe_rank")
+
+	docs := []map[string]any{
+		{"id": "a", "title": "Go Programming Language", "text": "Golang is a programming language created at Google."},
+		{"id": "b", "title": "The Go Language", "text": "Go is a programming language by Google."},
+		{"id": "c", "title": "Python Info", "text": "Python is a different programming language."},
+	}
+
+	in := map[string]any{"docs": docs}
+	out, errStr, err := runTool(t, bin, in)
+	if err != nil {
+		t.Fatalf("dedupe_rank errored: %v, stderr=%s", err, errStr)
+	}
+	if len(out.Groups) == 0 {
+		t.Fatalf("expected at least one group, got none")
+	}
+	// find group containing both a and b
+	var ab []string
+	for _, g := range out.Groups {
+		hasA := false
+		hasB := false
+		for _, id := range g.Members {
+			if id == "a" {
+				hasA = true
+			} else if id == "b" {
+				hasB = true
+			}
+		}
+		if hasA && hasB {
+			ab = append([]string{}, g.Members...)
+			// order members for deterministic comparison in golden-style tests
+			sort.Strings(ab)
+			break
+		}
+	}
+	if len(ab) == 0 {
+		t.Fatalf("expected docs 'a' and 'b' to be grouped together; groups=%v", out.Groups)
+	}
+}
diff --git a/tools/cmd/exec/exec.go b/tools/cmd/exec/exec.go
new file mode 100644
index 0000000..a9b1570
--- /dev/null
+++ b/tools/cmd/exec/exec.go
@@ -0,0 +1,155 @@
+package main
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+type execInput struct {
+	Cmd        string            `json:"cmd"`
+	Args       []string          `json:"args"`
+	Cwd        string            `json:"cwd,omitempty"`
+	Env        map[string]string `json:"env,omitempty"`
+	Stdin      string            `json:"stdin,omitempty"`
+	TimeoutSec int               `json:"timeoutSec,omitempty"`
+}
+
+type execOutput struct {
+	ExitCode   int    `json:"exitCode"`
+	Stdout     string `json:"stdout"`
+	Stderr     string `json:"stderr"`
+	DurationMs int64  `json:"durationMs"`
+}
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		// Standardized error contract: write single-line JSON to stderr and exit non-zero
+		msg := sanitizeError(err)
+		fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		os.Exit(1)
+	}
+
+	stdout, stderr, exitCode, dur := runCommand(in)
+	writeOutput(execOutput{ExitCode: exitCode, Stdout: stdout, Stderr: stderr, DurationMs: dur})
+}
+
+func readInput(r io.Reader) (execInput, error) {
+	var in execInput
+	br := bufio.NewReader(r)
+	data, err := io.ReadAll(br)
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(data, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.Cmd) == "" {
+		return in, fmt.Errorf("cmd is required")
+	}
+	return in, nil
+}
+
+func runCommand(in execInput) (stdoutStr, stderrStr string, exitCode int, durationMs int64) {
+	start := time.Now()
+	ctx := context.Background()
+	var cancel context.CancelFunc
+	if in.TimeoutSec > 0 {
+		ctx, cancel = context.WithTimeout(ctx, time.Duration(in.TimeoutSec)*time.Second)
+		defer cancel()
+	}
+
+	cmd := exec.CommandContext(ctx, in.Cmd, in.Args...)
+	if strings.TrimSpace(in.Cwd) != "" {
+		// Ensure cwd is clean and absolute if provided as relative
+		if !filepath.IsAbs(in.Cwd) {
+			if abs, err := filepath.Abs(in.Cwd); err == nil {
+				cmd.Dir = abs
+			} else {
+				// Fall back to provided value if Abs fails
+				cmd.Dir = in.Cwd
+			}
+		} else {
+			cmd.Dir = in.Cwd
+		}
+	}
+	// Start from current environment and apply overrides
+	env := os.Environ()
+	for k, v := range in.Env {
+		if strings.Contains(k, "=") {
+			// Skip invalid keys defensively
+			continue
+		}
+		env = append(env, fmt.Sprintf("%s=%s", k, v))
+	}
+	cmd.Env = env
+
+	if in.Stdin != "" {
+		cmd.Stdin = strings.NewReader(in.Stdin)
+	}
+	var stdoutBuf, stderrBuf strings.Builder
+	cmd.Stdout = &stdoutBuf
+	cmd.Stderr = &stderrBuf
+
+	exitCode = 0
+	err := cmd.Run()
+	durationMs = time.Since(start).Milliseconds()
+
+	stdoutStr = stdoutBuf.String()
+	stderrStr = stderrBuf.String()
+
+	if err == nil {
+		return
+	}
+	// Determine exit code and normalize timeout message
+	if ctxErr := ctx.Err(); ctxErr == context.DeadlineExceeded {
+		// Timed out
+		if ee, ok := err.(*exec.ExitError); ok {
+			exitCode = ee.ExitCode()
+		} else {
+			exitCode = 1
+		}
+		if !strings.Contains(strings.ToLower(stderrStr), "timeout") {
+			if len(stderrStr) > 0 && !strings.HasSuffix(stderrStr, "\n") {
+				stderrStr += "\n"
+			}
+			stderrStr += "timeout"
+		}
+		return
+	}
+	if ee, ok := err.(*exec.ExitError); ok {
+		exitCode = ee.ExitCode()
+	} else {
+		exitCode = 1
+	}
+	return
+}
+
+func writeOutput(out execOutput) {
+	enc, err := json.Marshal(out)
+	if err != nil {
+		// Best-effort: emit minimal JSON
+		fmt.Println("{\"exitCode\":0,\"stdout\":\"\",\"stderr\":\"marshal error\",\"durationMs\":0}")
+		return
+	}
+	// Single line JSON
+	fmt.Println(string(enc))
+}
+
+func sanitizeError(err error) string {
+	if err == nil {
+		return ""
+	}
+	msg := err.Error()
+	// Collapse newlines to keep single-line contract
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	return msg
+}
diff --git a/tools/cmd/exec/exec_test.go b/tools/cmd/exec/exec_test.go
new file mode 100644
index 0000000..413bb15
--- /dev/null
+++ b/tools/cmd/exec/exec_test.go
@@ -0,0 +1,177 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"os/exec"
+	"runtime"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// execOutput models the expected stdout JSON contract from tools/exec.go
+type execOutput struct {
+	ExitCode   int    `json:"exitCode"`
+	Stdout     string `json:"stdout"`
+	Stderr     string `json:"stderr"`
+	DurationMs int64  `json:"durationMs"`
+}
+
+// runExec runs the built exec tool with the given JSON input and decodes stdout.
+func runExec(t *testing.T, bin string, input any) execOutput {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatalf("exec tool failed to run: %v, stderr=%s", err, stderr.String())
+	}
+	// Output must be single-line JSON
+	out := strings.TrimSpace(stdout.String())
+	var parsed execOutput
+	if err := json.Unmarshal([]byte(out), &parsed); err != nil {
+		t.Fatalf("failed to parse exec output JSON: %v; raw=%q", err, out)
+	}
+	if parsed.DurationMs < 0 {
+		t.Fatalf("durationMs must be >= 0, got %d", parsed.DurationMs)
+	}
+	return parsed
+}
+
+// TestExec_InvalidJSON verifies stderr JSON error contract and non-zero exit
+func TestExec_InvalidJSON(t *testing.T) {
+	bin := testutil.BuildTool(t, "exec")
+	// Run with invalid JSON (not an object)
+	cmd := exec.Command(bin)
+	cmd.Stdin = strings.NewReader("not-json")
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err := cmd.Run()
+	if err == nil {
+		t.Fatalf("expected non-zero exit for invalid JSON")
+	}
+	// Stderr must be single-line JSON: {"error":"..."}
+	line := strings.TrimSpace(stderr.String())
+	if line == "" || !strings.HasPrefix(line, "{") || !strings.HasSuffix(line, "}") || strings.Contains(line, "\n") {
+		t.Fatalf("stderr not single-line JSON: %q", line)
+	}
+	var payload map[string]any
+	if err := json.Unmarshal([]byte(line), &payload); err != nil {
+		t.Fatalf("stderr not JSON parseable: %v raw=%q", err, line)
+	}
+	if _, ok := payload["error"]; !ok {
+		t.Fatalf("stderr JSON missing 'error' field: %v", payload)
+	}
+}
+
+func TestExec_SuccessEcho(t *testing.T) {
+	bin := testutil.BuildTool(t, "exec")
+	// Use /bin/echo on Unix; on Windows, use cmd /c echo via a small program is complex.
+	if runtime.GOOS == "windows" {
+		t.Skip("windows not supported in this test environment")
+	}
+	out := runExec(t, bin, map[string]any{
+		"cmd":  "/bin/echo",
+		"args": []string{"hello"},
+	})
+	if out.ExitCode != 0 {
+		t.Fatalf("expected exitCode 0, got %d (stderr=%q)", out.ExitCode, out.Stderr)
+	}
+	if strings.TrimSpace(out.Stdout) != "hello" {
+		t.Fatalf("unexpected stdout: %q", out.Stdout)
+	}
+}
+
+func TestExec_NonZeroExit(t *testing.T) {
+	bin := testutil.BuildTool(t, "exec")
+	if runtime.GOOS == "windows" {
+		t.Skip("windows not supported in this test environment")
+	}
+	// /bin/false exits with code 1
+	out := runExec(t, bin, map[string]any{
+		"cmd":  "/bin/false",
+		"args": []string{},
+	})
+	if out.ExitCode == 0 {
+		t.Fatalf("expected non-zero exitCode, got 0")
+	}
+	if out.Stdout != "" {
+		t.Fatalf("expected empty stdout for /bin/false, got %q", out.Stdout)
+	}
+}
+
+func TestExec_Timeout(t *testing.T) {
+	bin := testutil.BuildTool(t, "exec")
+	if runtime.GOOS == "windows" {
+		t.Skip("windows not supported in this test environment")
+	}
+	out := runExec(t, bin, map[string]any{
+		"cmd":        "/bin/sleep",
+		"args":       []string{"2"},
+		"timeoutSec": 1,
+	})
+	if out.ExitCode == 0 {
+		t.Fatalf("expected timeout to produce non-zero exitCode, got 0")
+	}
+	if !strings.Contains(strings.ToLower(out.Stderr), "timeout") {
+		t.Fatalf("stderr should mention timeout, got %q", out.Stderr)
+	}
+	if out.DurationMs < 900 || out.DurationMs > 3000 {
+		t.Fatalf("durationMs out of expected range: %d", out.DurationMs)
+	}
+}
+
+func TestExec_CwdAndEnv(t *testing.T) {
+	bin := testutil.BuildTool(t, "exec")
+	if runtime.GOOS == "windows" {
+		t.Skip("windows not supported in this test environment")
+	}
+	tmpDir := t.TempDir()
+	out := runExec(t, bin, map[string]any{
+		"cmd":  "/bin/pwd",
+		"args": []string{},
+		"cwd":  tmpDir,
+		"env": map[string]string{
+			"FOO": "BAR",
+		},
+	})
+	if strings.TrimSpace(out.Stdout) != tmpDir {
+		t.Fatalf("pwd did not respect cwd: expected %q, got %q", tmpDir, out.Stdout)
+	}
+
+	// Now verify env via /usr/bin/env
+	out2 := runExec(t, bin, map[string]any{
+		"cmd":  "/usr/bin/env",
+		"args": []string{},
+		"env": map[string]string{
+			"HELLO": "WORLD",
+		},
+	})
+	if !strings.Contains(out2.Stdout, "HELLO=WORLD") {
+		t.Fatalf("env var not present in stdout: %q", out2.Stdout)
+	}
+}
+
+func TestExec_StdinPassthrough(t *testing.T) {
+	bin := testutil.BuildTool(t, "exec")
+	if runtime.GOOS == "windows" {
+		t.Skip("windows not supported in this test environment")
+	}
+	out := runExec(t, bin, map[string]any{
+		"cmd":   "/bin/cat",
+		"args":  []string{},
+		"stdin": "xyz",
+	})
+	if out.Stdout != "xyz" {
+		t.Fatalf("stdin passthrough failed, got %q", out.Stdout)
+	}
+}
diff --git a/tools/cmd/fs_append_file/fs_append_file.go b/tools/cmd/fs_append_file/fs_append_file.go
new file mode 100644
index 0000000..e288d68
--- /dev/null
+++ b/tools/cmd/fs_append_file/fs_append_file.go
@@ -0,0 +1,106 @@
+package main
+
+import (
+	"bufio"
+	"encoding/base64"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+)
+
+type appendInput struct {
+	Path          string `json:"path"`
+	ContentBase64 string `json:"contentBase64"`
+}
+
+type appendOutput struct {
+	BytesAppended int `json:"bytesAppended"`
+}
+
+var fileLocks sync.Map // map[string]*sync.Mutex
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := validatePath(in.Path); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	data, err := base64.StdEncoding.DecodeString(in.ContentBase64)
+	if err != nil {
+		stderrJSON(fmt.Errorf("decode base64: %w", err))
+		os.Exit(1)
+	}
+	// advisory lock per-path
+	muIface, _ := fileLocks.LoadOrStore(in.Path, &sync.Mutex{})
+	mu, ok := muIface.(*sync.Mutex)
+	if !ok {
+		stderrJSON(errors.New("internal: invalid lock type"))
+		os.Exit(1)
+	}
+	mu.Lock()
+	defer mu.Unlock()
+
+	f, err := os.OpenFile(in.Path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	defer func() {
+		if cerr := f.Close(); cerr != nil {
+			// best-effort report; do not change exit code after success path
+			fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", "close: "+strings.ReplaceAll(cerr.Error(), "\n", " "))
+		}
+	}()
+	if _, err := f.Write(data); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(appendOutput{BytesAppended: len(data)}); err != nil {
+		stderrJSON(fmt.Errorf("write stdout: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (appendInput, error) {
+	var in appendInput
+	b, err := io.ReadAll(bufio.NewReader(r))
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.Path) == "" {
+		return in, errors.New("path is required")
+	}
+	if strings.TrimSpace(in.ContentBase64) == "" {
+		return in, errors.New("contentBase64 is required")
+	}
+	return in, nil
+}
+
+func validatePath(p string) error {
+	if filepath.IsAbs(p) {
+		return fmt.Errorf("path must be relative to repository root: %s", p)
+	}
+	clean := filepath.ToSlash(filepath.Clean(p))
+	if strings.HasPrefix(clean, "../") || strings.Contains(clean, "/../") {
+		return fmt.Errorf("path escapes repository root: %s", p)
+	}
+	return nil
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_append_file/fs_append_file_test.go b/tools/cmd/fs_append_file/fs_append_file_test.go
new file mode 100644
index 0000000..a2f3599
--- /dev/null
+++ b/tools/cmd/fs_append_file/fs_append_file_test.go
@@ -0,0 +1,264 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"sync"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsAppendOutput struct {
+	BytesAppended int `json:"bytesAppended"`
+}
+
+// runFsAppend runs the built fs_append_file tool with the given JSON input.
+func runFsAppend(t *testing.T, bin string, input any) (fsAppendOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsAppendOutput
+	if code == 0 {
+		if err := json.Unmarshal([]byte(strings.TrimSpace(stdout.String())), &out); err != nil {
+			t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+		}
+	}
+	return out, stderr.String(), code
+}
+
+func TestFsAppend_DoubleAppend(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_append_file")
+
+	dir := testutil.MakeRepoRelTempDir(t, "fsappend-double-")
+	path := filepath.Join(dir, "hello.txt")
+
+	part1 := []byte("hello")
+	out1, stderr1, code1 := runFsAppend(t, bin, map[string]any{
+		"path":          path,
+		"contentBase64": base64.StdEncoding.EncodeToString(part1),
+	})
+	if code1 != 0 {
+		t.Fatalf("first append expected success, got exit=%d stderr=%q", code1, stderr1)
+	}
+	if out1.BytesAppended != len(part1) {
+		t.Fatalf("bytesAppended mismatch on first append: got %d want %d", out1.BytesAppended, len(part1))
+	}
+
+	part2 := []byte(" world")
+	out2, stderr2, code2 := runFsAppend(t, bin, map[string]any{
+		"path":          path,
+		"contentBase64": base64.StdEncoding.EncodeToString(part2),
+	})
+	if code2 != 0 {
+		t.Fatalf("second append expected success, got exit=%d stderr=%q", code2, stderr2)
+	}
+	if out2.BytesAppended != len(part2) {
+		t.Fatalf("bytesAppended mismatch on second append: got %d want %d", out2.BytesAppended, len(part2))
+	}
+
+	// Verify final file content
+	got, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	want := append(append([]byte{}, part1...), part2...)
+	if !bytes.Equal(got, want) {
+		t.Fatalf("content mismatch: got %q want %q", got, want)
+	}
+}
+
+func TestFsAppend_Validation_MissingPath(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_append_file")
+	_, stderr, code := runFsAppend(t, bin, map[string]any{
+		"path":          "",
+		"contentBase64": base64.StdEncoding.EncodeToString([]byte("data")),
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for missing path")
+	}
+	if !strings.Contains(strings.ToLower(stderr), "path is required") {
+		t.Fatalf("stderr should mention path is required, got %q", stderr)
+	}
+}
+
+func TestFsAppend_Validation_MissingContent(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_append_file")
+	dir := testutil.MakeRepoRelTempDir(t, "fsappend-validate-")
+	path := filepath.Join(dir, "x.txt")
+	_, stderr, code := runFsAppend(t, bin, map[string]any{
+		"path":          path,
+		"contentBase64": "",
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for missing contentBase64")
+	}
+	if !strings.Contains(strings.ToLower(stderr), "contentbase64 is required") {
+		t.Fatalf("stderr should mention contentBase64 is required, got %q", stderr)
+	}
+}
+
+func TestFsAppend_Validation_AbsolutePath(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_append_file")
+	abs := filepath.Join("/", "tmp", "x.txt")
+	_, stderr, code := runFsAppend(t, bin, map[string]any{
+		"path":          abs,
+		"contentBase64": base64.StdEncoding.EncodeToString([]byte("x")),
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for absolute path")
+	}
+	if !strings.Contains(strings.ToLower(stderr), "path must be relative to repository root") {
+		t.Fatalf("stderr should mention relative path requirement, got %q", stderr)
+	}
+}
+
+func TestFsAppend_Validation_PathEscape(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_append_file")
+	_, stderr, code := runFsAppend(t, bin, map[string]any{
+		"path":          filepath.Join("..", "escape.txt"),
+		"contentBase64": base64.StdEncoding.EncodeToString([]byte("x")),
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for path escape")
+	}
+	if !strings.Contains(strings.ToLower(stderr), "path escapes repository root") {
+		t.Fatalf("stderr should mention path escapes repository root, got %q", stderr)
+	}
+}
+
+func TestFsAppend_Validation_BadBase64(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_append_file")
+	dir := testutil.MakeRepoRelTempDir(t, "fsappend-validate-")
+	path := filepath.Join(dir, "bad.txt")
+	_, stderr, code := runFsAppend(t, bin, map[string]any{
+		"path":          path,
+		"contentBase64": "!!!not-base64!!!",
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for bad base64")
+	}
+	if !strings.Contains(strings.ToLower(stderr), "decode base64") {
+		t.Fatalf("stderr should mention base64 decode failure, got %q", stderr)
+	}
+}
+
+func TestFsAppend_ConcurrentWriters(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_append_file")
+
+	dir := testutil.MakeRepoRelTempDir(t, "fsappend-concurrent-")
+	path := filepath.Join(dir, "concurrent.txt")
+
+	// Distinct payloads to allow order-agnostic verification via counts
+	partA := bytes.Repeat([]byte("A"), 10000)
+	partB := bytes.Repeat([]byte("B"), 12000)
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+
+	var out1 fsAppendOutput
+	var err1 string
+	var code1 int
+	go func() {
+		defer wg.Done()
+		out1, err1, code1 = runFsAppend(t, bin, map[string]any{
+			"path":          path,
+			"contentBase64": base64.StdEncoding.EncodeToString(partA),
+		})
+	}()
+
+	var out2 fsAppendOutput
+	var err2 string
+	var code2 int
+	go func() {
+		defer wg.Done()
+		out2, err2, code2 = runFsAppend(t, bin, map[string]any{
+			"path":          path,
+			"contentBase64": base64.StdEncoding.EncodeToString(partB),
+		})
+	}()
+
+	wg.Wait()
+
+	if code1 != 0 {
+		t.Fatalf("first concurrent append expected success, got exit=%d stderr=%q", code1, err1)
+	}
+	if code2 != 0 {
+		t.Fatalf("second concurrent append expected success, got exit=%d stderr=%q", code2, err2)
+	}
+	if out1.BytesAppended != len(partA) {
+		t.Fatalf("bytesAppended mismatch for first writer: got %d want %d", out1.BytesAppended, len(partA))
+	}
+	if out2.BytesAppended != len(partB) {
+		t.Fatalf("bytesAppended mismatch for second writer: got %d want %d", out2.BytesAppended, len(partB))
+	}
+
+	// Verify final content length and composition (order-agnostic)
+	got, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	wantLen := len(partA) + len(partB)
+	if len(got) != wantLen {
+		t.Fatalf("final size mismatch: got %d want %d", len(got), wantLen)
+	}
+	var countA, countB int
+	for _, b := range got {
+		if b == 'A' {
+			countA++
+		} else if b == 'B' {
+			countB++
+		}
+	}
+	if countA != len(partA) || countB != len(partB) {
+		t.Fatalf("content composition mismatch: countA=%d want %d, countB=%d want %d", countA, len(partA), countB, len(partB))
+	}
+}
+
+// TestFsAppend_ErrorJSON_PathRequired verifies standardized stderr JSON error
+// contract: when required input is missing (path/content), the tool writes a
+// single-line JSON object with an "error" key to stderr and exits non-zero.
+func TestFsAppend_ErrorJSON_PathRequired(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_append_file")
+	var stdout, stderr bytes.Buffer
+	cmd := exec.Command(bin)
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	cmd.Stdin = bytes.NewBufferString("{}")
+	err := cmd.Run()
+	if err == nil {
+		t.Fatalf("expected non-zero exit for missing fields; stderr=%q", stderr.String())
+	}
+	line := strings.TrimSpace(stderr.String())
+	var obj map[string]any
+	if jerr := json.Unmarshal([]byte(line), &obj); jerr != nil {
+		t.Fatalf("stderr is not JSON: %q err=%v", line, jerr)
+	}
+	if _, ok := obj["error"]; !ok {
+		t.Fatalf("stderr JSON missing 'error' key: %v", obj)
+	}
+}
diff --git a/tools/cmd/fs_apply_patch/fs_apply_patch.go b/tools/cmd/fs_apply_patch/fs_apply_patch.go
new file mode 100644
index 0000000..0803005
--- /dev/null
+++ b/tools/cmd/fs_apply_patch/fs_apply_patch.go
@@ -0,0 +1,149 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+type applyInput struct {
+	UnifiedDiff string `json:"unifiedDiff"`
+	DryRun      bool   `json:"dryRun,omitempty"`
+}
+
+type applyOutput struct {
+	FilesChanged int `json:"filesChanged"`
+}
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if strings.TrimSpace(in.UnifiedDiff) == "" {
+		stderrJSON(errors.New("unifiedDiff is required"))
+		os.Exit(1)
+	}
+	// Minimal implementation to create a new file per S02 for clean new-file apply
+	changed, err := applyNewFileOnly(in.UnifiedDiff, in.DryRun)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(applyOutput{FilesChanged: changed}); err != nil {
+		stderrJSON(fmt.Errorf("write stdout: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (applyInput, error) {
+	var in applyInput
+	b, err := io.ReadAll(bufio.NewReader(r))
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	return in, nil
+}
+
+// applyNewFileOnly parses a unified diff that creates exactly one new file and applies it.
+func applyNewFileOnly(diff string, dryRun bool) (int, error) {
+	// Expect minimal format:
+	// --- /dev/null
+	// +++ b/<path>
+	// @@ ...
+	// <content lines starting with +>
+	lines := strings.Split(diff, "\n")
+	if len(lines) < 3 {
+		return 0, errors.New("BAD_DIFF: too short")
+	}
+	var path string
+	seenOld := false
+	for _, ln := range lines {
+		if strings.HasPrefix(ln, "--- ") {
+			if !strings.Contains(ln, "/dev/null") {
+				return 0, errors.New("BAD_DIFF: old file must be /dev/null")
+			}
+			seenOld = true
+			continue
+		}
+		if strings.HasPrefix(ln, "+++ ") {
+			if !seenOld {
+				return 0, errors.New("BAD_DIFF: missing old file header")
+			}
+			p := strings.TrimSpace(strings.TrimPrefix(ln, "+++ "))
+			// Always normalize away optional leading "b/" without an extra conditional
+			path = strings.TrimPrefix(p, "b/")
+			break
+		}
+	}
+	if path == "" {
+		return 0, errors.New("BAD_DIFF: missing new file path")
+	}
+	if err := validateRelPath(path); err != nil {
+		return 0, err
+	}
+	var content strings.Builder
+	// Collect added lines exactly; do not add extra blank lines
+	for _, ln := range lines {
+		if strings.HasPrefix(ln, "+") && !strings.HasPrefix(ln, "+++") {
+			s := strings.TrimPrefix(ln, "+")
+			s = strings.ReplaceAll(s, "\r\n", "\n")
+			s = strings.ReplaceAll(s, "\r", "\n")
+			if strings.HasSuffix(s, "\n") {
+				content.WriteString(s)
+			} else {
+				content.WriteString(s)
+				content.WriteString("\n")
+			}
+		}
+	}
+	// Dry run: report number of files that would change (1 if create, 0 if identical exists)
+	if dryRun {
+		if existing, err := os.ReadFile(path); err == nil {
+			if string(existing) == content.String() {
+				return 0, nil
+			}
+		}
+		return 1, nil
+	}
+	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+		return 0, err
+	}
+	// Idempotence and conflict
+	if existing, err := os.ReadFile(path); err == nil {
+		if string(existing) == content.String() {
+			return 0, nil
+		}
+		return 0, errors.New("target exists with different content")
+	}
+	if err := os.WriteFile(path, []byte(content.String()), 0o644); err != nil {
+		return 0, err
+	}
+	return 1, nil
+}
+
+func validateRelPath(p string) error {
+	if filepath.IsAbs(p) {
+		return fmt.Errorf("ABSOLUTE_PATH: %s", p)
+	}
+	clean := filepath.ToSlash(filepath.Clean(p))
+	if strings.HasPrefix(clean, "../") || strings.Contains(clean, "/../") {
+		return fmt.Errorf("PATH_ESCAPE: %s", p)
+	}
+	return nil
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_apply_patch/fs_apply_patch_test.go b/tools/cmd/fs_apply_patch/fs_apply_patch_test.go
new file mode 100644
index 0000000..d341531
--- /dev/null
+++ b/tools/cmd/fs_apply_patch/fs_apply_patch_test.go
@@ -0,0 +1,229 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsApplyPatchOutput struct {
+	FilesChanged int `json:"filesChanged"`
+}
+
+// buildFsApplyPatch builds fs_apply_patch using testutil.BuildTool
+func buildFsApplyPatch(t *testing.T) string {
+	t.Helper()
+	return testutil.BuildTool(t, "fs_apply_patch")
+}
+
+// NOTE: legacy helper removed; tests use runFsApplyPatchInDir.
+
+func runFsApplyPatchInDir(t *testing.T, bin, dir string, input any) (fsApplyPatchOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = dir
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsApplyPatchOutput
+	if err := json.Unmarshal([]byte(strings.TrimSpace(stdout.String())), &out); err != nil && code == 0 {
+		t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+	}
+	return out, stderr.String(), code
+}
+
+func TestFsApplyPatch_CleanApply_NewFile(t *testing.T) {
+	bin := buildFsApplyPatch(t)
+	// Prepare a simple unified diff creating a file
+	diff := "" +
+		"--- /dev/null\n" +
+		"+++ b/tmp_new_file.txt\n" +
+		"@@ -0,0 +1,2 @@\n" +
+		"+hello\n" +
+		"+world\n"
+
+	// Run in an isolated temp directory to avoid polluting the repo
+	work := t.TempDir()
+	out, stderr, code := runFsApplyPatchInDir(t, bin, work, map[string]any{
+		"unifiedDiff": diff,
+	})
+	if code == 0 {
+		// Once implemented, expect code==0 and filesChanged==1
+		if out.FilesChanged != 1 {
+			t.Fatalf("filesChanged mismatch, got %d want 1", out.FilesChanged)
+		}
+		if _, err := os.Stat(filepath.Join(work, "tmp_new_file.txt")); err != nil {
+			t.Fatalf("expected file to exist: %v", err)
+		}
+		return
+	}
+	// For the initial stub, ensure we get a structured error
+	if !strings.Contains(strings.ToUpper(stderr), "NOT_IMPLEMENTED") {
+		t.Fatalf("expected NOT_IMPLEMENTED in stderr, got %q", stderr)
+	}
+}
+
+func TestFsApplyPatch_CleanApply_NewFile_Succeeds(t *testing.T) {
+	bin := buildFsApplyPatch(t)
+	work := t.TempDir()
+	diff := "" +
+		"--- /dev/null\n" +
+		"+++ b/tmp_new_file.txt\n" +
+		"@@ -0,0 +1,2 @@\n" +
+		"+hello\n" +
+		"+world\n"
+
+	out, stderr, code := runFsApplyPatchInDir(t, bin, work, map[string]any{
+		"unifiedDiff": diff,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.FilesChanged != 1 {
+		t.Fatalf("filesChanged mismatch, got %d want 1", out.FilesChanged)
+	}
+	if _, err := os.Stat(filepath.Join(work, "tmp_new_file.txt")); err != nil {
+		t.Fatalf("expected file to exist: %v", err)
+	}
+}
+
+func TestFsApplyPatch_Idempotent_NewFile(t *testing.T) {
+	bin := buildFsApplyPatch(t)
+	work := t.TempDir()
+	diff := "" +
+		"--- /dev/null\n" +
+		"+++ b/tmp_new_file.txt\n" +
+		"@@ -0,0 +1,2 @@\n" +
+		"+hello\n" +
+		"+world\n"
+
+	// First apply should create the file
+	out1, stderr1, code1 := runFsApplyPatchInDir(t, bin, work, map[string]any{
+		"unifiedDiff": diff,
+	})
+	if code1 != 0 {
+		t.Fatalf("first apply expected success, got exit=%d stderr=%q", code1, stderr1)
+	}
+	if out1.FilesChanged != 1 {
+		t.Fatalf("first apply filesChanged mismatch, got %d want 1", out1.FilesChanged)
+	}
+	// Second apply of the same diff should be idempotent: no-op with success
+	out2, stderr2, code2 := runFsApplyPatchInDir(t, bin, work, map[string]any{
+		"unifiedDiff": diff,
+	})
+	if code2 != 0 {
+		t.Fatalf("second apply expected success, got exit=%d stderr=%q", code2, stderr2)
+	}
+	if out2.FilesChanged != 0 {
+		t.Fatalf("second apply filesChanged mismatch, got %d want 0", out2.FilesChanged)
+	}
+}
+
+func TestFsApplyPatch_Conflict_TargetExistsWithDifferentContent(t *testing.T) {
+	bin := buildFsApplyPatch(t)
+	work := t.TempDir()
+
+	// Pre-create target with different content
+	if err := os.WriteFile(filepath.Join(work, "tmp_new_file.txt"), []byte("different\ncontent\n"), 0o644); err != nil {
+		t.Fatalf("prep write: %v", err)
+	}
+
+	// Diff attempts to create a new file with different content (new-file hunk)
+	diff := "" +
+		"--- /dev/null\n" +
+		"+++ b/tmp_new_file.txt\n" +
+		"@@ -0,0 +1,2 @@\n" +
+		"+hello\n" +
+		"+world\n"
+
+	out, stderr, code := runFsApplyPatchInDir(t, bin, work, map[string]any{
+		"unifiedDiff": diff,
+	})
+	if code == 0 {
+		t.Fatalf("expected failure, got success filesChanged=%d", out.FilesChanged)
+	}
+	if !strings.Contains(strings.ToLower(stderr), "target exists") {
+		t.Fatalf("expected error mentioning target exists, got %q", stderr)
+	}
+}
+
+func TestFsApplyPatch_CRLF_NewFile_NormalizedLF(t *testing.T) {
+	bin := buildFsApplyPatch(t)
+	work := t.TempDir()
+
+	// Unified diff with CRLF line endings; tool should accept and write LF-normalized content
+	diff := "" +
+		"--- /dev/null\r\n" +
+		"+++ b/tmp_new_file.txt\r\n" +
+		"@@ -0,0 +1,2 @@\r\n" +
+		"+hello\r\n" +
+		"+world\r\n"
+
+	out, stderr, code := runFsApplyPatchInDir(t, bin, work, map[string]any{
+		"unifiedDiff": diff,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.FilesChanged != 1 {
+		t.Fatalf("filesChanged mismatch, got %d want 1", out.FilesChanged)
+	}
+	data, err := os.ReadFile(filepath.Join(work, "tmp_new_file.txt"))
+	if err != nil {
+		t.Fatalf("expected file to exist: %v", err)
+	}
+	got := string(data)
+	want := "hello\nworld\n"
+	if got != want {
+		t.Fatalf("content mismatch, got %q want %q", got, want)
+	}
+}
+
+func TestFsApplyPatch_DryRun_NewFile_NoWrite(t *testing.T) {
+	bin := buildFsApplyPatch(t)
+	work := t.TempDir()
+
+	// Diff to create a new file, but run with dryRun=true
+	diff := "" +
+		"--- /dev/null\n" +
+		"+++ b/tmp_new_file.txt\n" +
+		"@@ -0,0 +1,2 @@\n" +
+		"+hello\n" +
+		"+world\n"
+
+	out, stderr, code := runFsApplyPatchInDir(t, bin, work, map[string]any{
+		"unifiedDiff": diff,
+		"dryRun":      true,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	// Should report it would change exactly 1 file
+	if out.FilesChanged != 1 {
+		t.Fatalf("filesChanged mismatch, got %d want 1", out.FilesChanged)
+	}
+	// File must not be written in dryRun
+	if _, err := os.Stat(filepath.Join(work, "tmp_new_file.txt")); !os.IsNotExist(err) {
+		t.Fatalf("expected file to NOT exist after dryRun, stat err=%v", err)
+	}
+}
diff --git a/tools/cmd/fs_edit_range/fs_edit_range.go b/tools/cmd/fs_edit_range/fs_edit_range.go
new file mode 100644
index 0000000..216469a
--- /dev/null
+++ b/tools/cmd/fs_edit_range/fs_edit_range.go
@@ -0,0 +1,127 @@
+package main
+
+import (
+	"bufio"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+)
+
+type editInput struct {
+	Path              string `json:"path"`
+	StartByte         int    `json:"startByte"`
+	EndByte           int    `json:"endByte"`
+	ReplacementBase64 string `json:"replacementBase64"`
+	ExpectedSha256    string `json:"expectedSha256,omitempty"`
+}
+
+type editOutput struct {
+	BytesReplaced int    `json:"bytesReplaced"`
+	NewSha256     string `json:"newSha256"`
+}
+
+var editLocks sync.Map
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := validatePath(in.Path); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	out, err := applyEdit(in)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		stderrJSON(fmt.Errorf("encode json: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (editInput, error) {
+	var in editInput
+	b, err := io.ReadAll(bufio.NewReader(r))
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.Path) == "" {
+		return in, errors.New("path is required")
+	}
+	if in.StartByte < 0 || in.EndByte < in.StartByte {
+		return in, errors.New("invalid range")
+	}
+	if _, err := base64.StdEncoding.DecodeString(in.ReplacementBase64); err != nil {
+		return in, fmt.Errorf("BAD_BASE64: %w", err)
+	}
+	return in, nil
+}
+
+func validatePath(p string) error {
+	if filepath.IsAbs(p) {
+		return fmt.Errorf("ABSOLUTE_PATH: %s", p)
+	}
+	clean := filepath.ToSlash(filepath.Clean(p))
+	if strings.HasPrefix(clean, "../") || strings.Contains(clean, "/../") {
+		return fmt.Errorf("PATH_ESCAPE: %s", p)
+	}
+	return nil
+}
+
+func applyEdit(in editInput) (editOutput, error) {
+	muIface, _ := editLocks.LoadOrStore(in.Path, &sync.Mutex{})
+	mu, ok := muIface.(*sync.Mutex)
+	if !ok {
+		return editOutput{}, errors.New("internal lock type assertion failed")
+	}
+	mu.Lock()
+	defer mu.Unlock()
+
+	data, err := os.ReadFile(in.Path)
+	if err != nil {
+		return editOutput{}, err
+	}
+	replacement, err := base64.StdEncoding.DecodeString(in.ReplacementBase64)
+	if err != nil {
+		return editOutput{}, fmt.Errorf("BAD_BASE64: %w", err)
+	}
+	if in.StartByte > len(data) {
+		in.StartByte = len(data)
+	}
+	if in.EndByte > len(data) {
+		in.EndByte = len(data)
+	}
+	newData := append(append(append([]byte{}, data[:in.StartByte]...), replacement...), data[in.EndByte:]...)
+	if in.ExpectedSha256 != "" {
+		h := sha256.Sum256(newData)
+		if hex.EncodeToString(h[:]) != strings.ToLower(in.ExpectedSha256) {
+			return editOutput{}, errors.New("SHA_MISMATCH")
+		}
+	}
+	if err := os.WriteFile(in.Path, newData, 0o644); err != nil {
+		return editOutput{}, err
+	}
+	h := sha256.Sum256(newData)
+	return editOutput{BytesReplaced: len(replacement), NewSha256: hex.EncodeToString(h[:])}, nil
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_edit_range/fs_edit_range_binary_test.go b/tools/cmd/fs_edit_range/fs_edit_range_binary_test.go
new file mode 100644
index 0000000..493f8b1
--- /dev/null
+++ b/tools/cmd/fs_edit_range/fs_edit_range_binary_test.go
@@ -0,0 +1,65 @@
+package main
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/hex"
+	"os"
+	"path/filepath"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// TestFsEditRange_BinaryContent verifies splicing works for arbitrary binary bytes.
+func TestFsEditRange_BinaryContent(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_edit_range")
+
+	// Arrange: repo-relative temporary directory and binary file
+	tmpDirAbs, err := os.MkdirTemp(".", "fsedit-bin-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+	fileRel := filepath.Join(base, "data.bin")
+	orig := []byte{0x00, 0x01, 0x02, 0xFF, 0x10, 0x11}
+	if err := os.WriteFile(fileRel, orig, 0o644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	// Act: replace bytes [2:5) (0x02,0xFF,0x10) with {0xAA,0xBB}
+	repl := []byte{0xAA, 0xBB}
+	out, stderr, code := runFsEditRange(t, bin, map[string]any{
+		"path":              fileRel,
+		"startByte":         2,
+		"endByte":           5,
+		"replacementBase64": base64.StdEncoding.EncodeToString(repl),
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.BytesReplaced != len(repl) {
+		t.Fatalf("bytesReplaced mismatch: got %d want %d", out.BytesReplaced, len(repl))
+	}
+
+	// Assert: file content and reported SHA
+	got, err := os.ReadFile(fileRel)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	want := []byte{0x00, 0x01, 0xAA, 0xBB, 0x11}
+	if !bytes.Equal(got, want) {
+		t.Fatalf("content mismatch: got %v want %v", got, want)
+	}
+	sum := sha256.Sum256(got)
+	wantHex := hex.EncodeToString(sum[:])
+	if out.NewSha256 != wantHex {
+		t.Fatalf("newSha256 mismatch: got %q want %q", out.NewSha256, wantHex)
+	}
+}
diff --git a/tools/cmd/fs_edit_range/fs_edit_range_concurrency_test.go b/tools/cmd/fs_edit_range/fs_edit_range_concurrency_test.go
new file mode 100644
index 0000000..4485089
--- /dev/null
+++ b/tools/cmd/fs_edit_range/fs_edit_range_concurrency_test.go
@@ -0,0 +1,76 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/base64"
+	"os"
+	"path/filepath"
+	"sync"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// TestFsEditRange_Concurrent_Serializes asserts two concurrent edits are serialized
+// resulting in a state equivalent to some sequential execution.
+// We choose equal-length replacements so byte indices remain stable.
+func TestFsEditRange_Concurrent_Serializes(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_edit_range")
+
+	// Arrange: seed repo-relative temp file
+	tmpDirAbs, err := os.MkdirTemp(".", "fsedit-conc-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+	fileRel := filepath.Join(base, "data.bin")
+	orig := []byte("abcdef") // 0..5
+	if err := os.WriteFile(fileRel, orig, 0o644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	// Edits: E1 [2:4) -> "XY"; E2 [4:6) -> "ZZ" (equal lengths)
+	// Sequentially applied, final should be "abXYZZ" regardless of order.
+	var wg sync.WaitGroup
+	wg.Add(2)
+	var code1, code2 int
+	go func() {
+		defer wg.Done()
+		_, _, code1 = runFsEditRange(t, bin, map[string]any{
+			"path":              fileRel,
+			"startByte":         2,
+			"endByte":           4,
+			"replacementBase64": base64.StdEncoding.EncodeToString([]byte("XY")),
+		})
+	}()
+	go func() {
+		defer wg.Done()
+		_, _, code2 = runFsEditRange(t, bin, map[string]any{
+			"path":              fileRel,
+			"startByte":         4,
+			"endByte":           6,
+			"replacementBase64": base64.StdEncoding.EncodeToString([]byte("ZZ")),
+		})
+	}()
+	wg.Wait()
+
+	if code1 != 0 || code2 != 0 {
+		t.Fatalf("expected both edits to succeed, got codes (%d,%d)", code1, code2)
+	}
+
+	got, err := os.ReadFile(fileRel)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	want := []byte("abXYZZ")
+	if !bytes.Equal(got, want) {
+		t.Fatalf("final content not serializable: got %q want %q", got, want)
+	}
+}
diff --git a/tools/cmd/fs_edit_range/fs_edit_range_test.go b/tools/cmd/fs_edit_range/fs_edit_range_test.go
new file mode 100644
index 0000000..77c5f32
--- /dev/null
+++ b/tools/cmd/fs_edit_range/fs_edit_range_test.go
@@ -0,0 +1,214 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/hex"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsEditRangeOutput struct {
+	BytesReplaced int    `json:"bytesReplaced"`
+	NewSha256     string `json:"newSha256"`
+}
+
+// buildFsEditRangeTool builds ./tools/cmd/fs_edit_range into a temporary binary.
+func buildFsEditRangeTool(t *testing.T) string { return testutil.BuildTool(t, "fs_edit_range") }
+
+// runFsEditRange executes the fs_edit_range tool with given JSON input.
+func runFsEditRange(t *testing.T, bin string, input any) (fsEditRangeOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsEditRangeOutput
+	if err := json.Unmarshal([]byte(strings.TrimSpace(stdout.String())), &out); err != nil {
+		t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+	}
+	return out, stderr.String(), code
+}
+
+// TestFsEditRange_MidFile_Splicing encodes the core contract:
+// replace bytes in [startByte:endByte) with replacementBase64, atomically.
+func TestFsEditRange_MidFile_Splicing(t *testing.T) {
+	bin := buildFsEditRangeTool(t)
+
+	// Arrange: create a repo-relative temp file with known content
+	tmpDirAbs, err := os.MkdirTemp(".", "fsedit-mid-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+	fileRel := filepath.Join(base, "data.bin")
+	orig := []byte("abcdef") // indices: 0 1 2 3 4 5
+	if err := os.WriteFile(fileRel, orig, 0o644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	// Act: replace bytes [2:4) ("cd") with "XY"
+	repl := []byte("XY")
+	out, stderr, code := runFsEditRange(t, bin, map[string]any{
+		"path":              fileRel,
+		"startByte":         2,
+		"endByte":           4,
+		"replacementBase64": base64.StdEncoding.EncodeToString(repl),
+	})
+
+	// Assert expected success and correct output contract
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.BytesReplaced != len(repl) {
+		t.Fatalf("bytesReplaced mismatch: got %d want %d", out.BytesReplaced, len(repl))
+	}
+	// Verify file content
+	got, err := os.ReadFile(fileRel)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	want := []byte("abXYef")
+	if !bytes.Equal(got, want) {
+		t.Fatalf("content mismatch: got %q want %q", got, want)
+	}
+	// Verify reported SHA matches actual SHA-256 of new content (hex-encoded)
+	sum := sha256.Sum256(got)
+	wantHex := hex.EncodeToString(sum[:])
+	if !strings.EqualFold(out.NewSha256, wantHex) {
+		t.Fatalf("newSha256 mismatch: got %q want %q", out.NewSha256, wantHex)
+	}
+}
+
+// TestFsEditRange_Beginning_Splicing ensures replacement at the beginning [0:n).
+func TestFsEditRange_Beginning_Splicing(t *testing.T) {
+	bin := buildFsEditRangeTool(t)
+
+	// Arrange
+	tmpDirAbs, err := os.MkdirTemp(".", "fsedit-beg-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+	fileRel := filepath.Join(base, "data.bin")
+	orig := []byte("abcdef")
+	if err := os.WriteFile(fileRel, orig, 0o644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	// Act: replace bytes [0:2) ("ab") with "ZZ"
+	repl := []byte("ZZ")
+	out, stderr, code := runFsEditRange(t, bin, map[string]any{
+		"path":              fileRel,
+		"startByte":         0,
+		"endByte":           2,
+		"replacementBase64": base64.StdEncoding.EncodeToString(repl),
+	})
+
+	// Assert
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.BytesReplaced != len(repl) {
+		t.Fatalf("bytesReplaced mismatch: got %d want %d", out.BytesReplaced, len(repl))
+	}
+	got, err := os.ReadFile(fileRel)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	want := []byte("ZZcdef")
+	if !bytes.Equal(got, want) {
+		t.Fatalf("content mismatch: got %q want %q", got, want)
+	}
+	sum := sha256.Sum256(got)
+	wantHex := hex.EncodeToString(sum[:])
+	if !strings.EqualFold(out.NewSha256, wantHex) {
+		t.Fatalf("newSha256 mismatch: got %q want %q", out.NewSha256, wantHex)
+	}
+}
+
+// TestFsEditRange_End_Splicing ensures replacement at the end [size-2:size).
+func TestFsEditRange_End_Splicing(t *testing.T) {
+	bin := buildFsEditRangeTool(t)
+
+	// Arrange
+	tmpDirAbs, err := os.MkdirTemp(".", "fsedit-end-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+	fileRel := filepath.Join(base, "data.bin")
+	orig := []byte("abcdef")
+	if err := os.WriteFile(fileRel, orig, 0o644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	// Act: replace bytes [4:6) ("ef") with "ZZZ"
+	repl := []byte("ZZZ")
+	out, stderr, code := runFsEditRange(t, bin, map[string]any{
+		"path":              fileRel,
+		"startByte":         4,
+		"endByte":           6,
+		"replacementBase64": base64.StdEncoding.EncodeToString(repl),
+	})
+
+	// Assert
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.BytesReplaced != len(repl) {
+		t.Fatalf("bytesReplaced mismatch: got %d want %d", out.BytesReplaced, len(repl))
+	}
+	got, err := os.ReadFile(fileRel)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	want := []byte("abcdZZZ")
+	if !bytes.Equal(got, want) {
+		t.Fatalf("content mismatch: got %q want %q", got, want)
+	}
+	sum := sha256.Sum256(got)
+	wantHex := hex.EncodeToString(sum[:])
+	if !strings.EqualFold(out.NewSha256, wantHex) {
+		t.Fatalf("newSha256 mismatch: got %q want %q", out.NewSha256, wantHex)
+	}
+}
diff --git a/tools/cmd/fs_listdir/fs_listdir.go b/tools/cmd/fs_listdir/fs_listdir.go
new file mode 100644
index 0000000..7fc7414
--- /dev/null
+++ b/tools/cmd/fs_listdir/fs_listdir.go
@@ -0,0 +1,216 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+)
+
+type listInput struct {
+	Path          string   `json:"path"`
+	Recursive     bool     `json:"recursive,omitempty"`
+	Globs         []string `json:"globs,omitempty"`
+	IncludeHidden bool     `json:"includeHidden,omitempty"`
+	MaxResults    int      `json:"maxResults,omitempty"`
+}
+
+type entry struct {
+	Path      string `json:"path"`
+	Type      string `json:"type"`
+	SizeBytes int64  `json:"sizeBytes"`
+	ModeOctal string `json:"modeOctal"`
+	ModTime   string `json:"modTime"`
+}
+
+type listOutput struct {
+	Entries   []entry `json:"entries"`
+	Truncated bool    `json:"truncated"`
+}
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := validatePath(in.Path); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	out, err := list(in)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		stderrJSON(fmt.Errorf("encode json: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (listInput, error) {
+	var in listInput
+	b, err := io.ReadAll(bufio.NewReader(r))
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.Path) == "" {
+		return in, fmt.Errorf("path is required")
+	}
+	return in, nil
+}
+
+func validatePath(p string) error {
+	if filepath.IsAbs(p) {
+		return fmt.Errorf("ABSOLUTE_PATH: %s", p)
+	}
+	clean := filepath.ToSlash(filepath.Clean(p))
+	if strings.HasPrefix(clean, "../") || strings.Contains(clean, "/../") {
+		return fmt.Errorf("PATH_ESCAPE: %s", p)
+	}
+	return nil
+}
+
+// nolint:gocyclo // Traversal + filtering logic increases complexity; validated by tests.
+func list(in listInput) (listOutput, error) {
+	var entries []entry
+	wildcards := in.Globs
+	if len(wildcards) == 0 {
+		wildcards = []string{"**/*"}
+	}
+	max := in.MaxResults
+	if max <= 0 {
+		max = 10000
+	}
+	// Normalize but avoid ineffectual assignments
+	if in.Path == "." {
+		in.Path = "."
+	}
+	visit := func(path string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return nil
+		}
+		// Hidden filtering
+		base := filepath.Base(path)
+		if !in.IncludeHidden && strings.HasPrefix(base, ".") {
+			if d.IsDir() {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		// Glob filtering (very simplified)
+		if len(wildcards) > 0 {
+			ok := false
+			for _, g := range wildcards {
+				if matchSimpleGlob(path, g) {
+					ok = true
+					break
+				}
+			}
+			if !ok {
+				if d.IsDir() {
+					return nil
+				}
+				return nil
+			}
+		}
+		info, infoErr := d.Info()
+		if infoErr != nil {
+			// If we cannot stat the entry, skip it silently
+			return nil
+		}
+		mode := info.Mode()
+		var etype string
+		if d.IsDir() {
+			etype = "dir"
+		} else if mode&os.ModeSymlink != 0 {
+			etype = "symlink"
+		} else {
+			etype = "file"
+		}
+		entries = append(entries, entry{
+			Path:      path,
+			Type:      etype,
+			SizeBytes: info.Size(),
+			ModeOctal: fmt.Sprintf("%04o", mode.Perm()),
+			ModTime:   info.ModTime().UTC().Format("2006-01-02T15:04:05Z07:00"),
+		})
+		if len(entries) >= max {
+			return io.EOF
+		}
+		return nil
+	}
+	if in.Recursive {
+		if err := filepath.WalkDir(in.Path, visit); err != nil && !errors.Is(err, io.EOF) {
+			return listOutput{}, err
+		}
+	} else {
+		de, err := os.ReadDir(in.Path)
+		if err != nil {
+			return listOutput{}, err
+		}
+		for _, d := range de {
+			if err := visit(filepath.Join(in.Path, d.Name()), d, nil); err != nil {
+				if errors.Is(err, io.EOF) {
+					break
+				}
+				if errors.Is(err, filepath.SkipDir) {
+					// In non-recursive mode, skipping a directory is equivalent to ignoring it.
+					continue
+				}
+				return listOutput{}, err
+			}
+		}
+	}
+	// stable ordering: dirs first, then files, lexicographic
+	sort.Slice(entries, func(i, j int) bool {
+		if entries[i].Type == entries[j].Type {
+			return entries[i].Path < entries[j].Path
+		}
+		if entries[i].Type == "dir" {
+			return true
+		}
+		if entries[j].Type == "dir" {
+			return false
+		}
+		return entries[i].Path < entries[j].Path
+	})
+	return listOutput{Entries: entries, Truncated: len(entries) >= max}, nil
+}
+
+func matchSimpleGlob(path, pattern string) bool {
+	pattern = filepath.ToSlash(pattern)
+	path = filepath.ToSlash(path)
+	if pattern == "**/*" || pattern == "**" || pattern == "*" {
+		return true
+	}
+	if strings.HasPrefix(pattern, "**/") {
+		suffix := strings.TrimPrefix(pattern, "**/")
+		if strings.HasPrefix(suffix, "*.") {
+			ext := strings.TrimPrefix(suffix, "*")
+			return strings.HasSuffix(path, ext)
+		}
+		return strings.HasSuffix(path, suffix)
+	}
+	if strings.HasPrefix(pattern, "*.") {
+		ext := strings.TrimPrefix(pattern, "*")
+		return strings.HasSuffix(path, ext)
+	}
+	return path == pattern
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_listdir/fs_listdir_glob_test.go b/tools/cmd/fs_listdir/fs_listdir_glob_test.go
new file mode 100644
index 0000000..4e21278
--- /dev/null
+++ b/tools/cmd/fs_listdir/fs_listdir_glob_test.go
@@ -0,0 +1,59 @@
+package main
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// TestFsListdir_Globs_NonRecursive filters by globs when non-recursive.
+func TestFsListdir_Globs_NonRecursive(t *testing.T) {
+	// Arrange
+	tmpDirAbs, err := os.MkdirTemp(".", "fslistdir-glob-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+
+	if err := os.Mkdir(filepath.Join(tmpDirAbs, "subdir"), 0o755); err != nil {
+		t.Fatalf("mkdir subdir: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(tmpDirAbs, "a.txt"), []byte("x"), 0o644); err != nil {
+		t.Fatalf("write a.txt: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(tmpDirAbs, "b.md"), []byte("y"), 0o644); err != nil {
+		t.Fatalf("write b.md: %v", err)
+	}
+
+	bin := testutil.BuildTool(t, "fs_listdir")
+
+	// Act: globs should only include *.txt at top level in non-recursive mode
+	out, stderr, code := runFsListdir(t, bin, map[string]any{
+		"path":          base,
+		"recursive":     false,
+		"includeHidden": false,
+		"globs":         []string{"**/*.txt"},
+		"maxResults":    100,
+	})
+
+	// Assert
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.Truncated {
+		t.Fatalf("should not be truncated")
+	}
+	if len(out.Entries) != 1 {
+		t.Fatalf("expected 1 entry (*.txt), got %d: %+v", len(out.Entries), out.Entries)
+	}
+	if filepath.Base(out.Entries[0].Path) != "a.txt" || out.Entries[0].Type != "file" {
+		t.Fatalf("expected a.txt file, got: %+v", out.Entries[0])
+	}
+}
diff --git a/tools/cmd/fs_listdir/fs_listdir_suffix_test.go b/tools/cmd/fs_listdir/fs_listdir_suffix_test.go
new file mode 100644
index 0000000..9ca751a
--- /dev/null
+++ b/tools/cmd/fs_listdir/fs_listdir_suffix_test.go
@@ -0,0 +1,23 @@
+package main
+
+import (
+	"runtime"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// TestBuildToolSuffix_Representative validates the suffix behavior via the shared helper.
+func TestBuildToolSuffix_Representative(t *testing.T) {
+	p := testutil.BuildTool(t, "fs_listdir")
+	if runtime.GOOS == "windows" {
+		if !strings.HasSuffix(p, ".exe") {
+			t.Fatalf("expected Windows suffix .exe in path: %q", p)
+		}
+	} else {
+		if strings.HasSuffix(p, ".exe") {
+			t.Fatalf("unexpected .exe suffix on non-Windows: %q", p)
+		}
+	}
+}
diff --git a/tools/cmd/fs_listdir/fs_listdir_symlink_test.go b/tools/cmd/fs_listdir/fs_listdir_symlink_test.go
new file mode 100644
index 0000000..457f44b
--- /dev/null
+++ b/tools/cmd/fs_listdir/fs_listdir_symlink_test.go
@@ -0,0 +1,63 @@
+package main
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// TestFsListdir_Symlink_EntryPresent verifies non-recursive listing includes symlink entries
+// with type="symlink" and respects includeHidden=false.
+func TestFsListdir_Symlink_EntryPresent(t *testing.T) {
+	// Arrange: create a repo-relative temp dir with a file and a symlink
+	tmpDirAbs, err := os.MkdirTemp(".", "fslistdir-symlink-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+
+	// Create target file and symlink next to it
+	target := filepath.Join(tmpDirAbs, "target.txt")
+	if err := os.WriteFile(target, []byte("data"), 0o644); err != nil {
+		t.Fatalf("write target: %v", err)
+	}
+	symlinkPath := filepath.Join(tmpDirAbs, "link.txt")
+	if err := os.Symlink("target.txt", symlinkPath); err != nil {
+		t.Fatalf("symlink: %v", err)
+	}
+
+	bin := testutil.BuildTool(t, "fs_listdir")
+
+	// Act: list with recursive=false
+	out, stderr, code := runFsListdir(t, bin, map[string]any{
+		"path":          base,
+		"recursive":     false,
+		"includeHidden": false,
+		"maxResults":    100,
+	})
+
+	// Assert
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	foundFile := false
+	foundLink := false
+	for _, e := range out.Entries {
+		if filepath.Base(e.Path) == "target.txt" && e.Type == "file" {
+			foundFile = true
+		}
+		if filepath.Base(e.Path) == "link.txt" && e.Type == "symlink" {
+			foundLink = true
+		}
+	}
+	if !foundFile || !foundLink {
+		t.Fatalf("expected both file and symlink entries, got: %+v", out.Entries)
+	}
+}
diff --git a/tools/cmd/fs_listdir/fs_listdir_test.go b/tools/cmd/fs_listdir/fs_listdir_test.go
new file mode 100644
index 0000000..78f1f52
--- /dev/null
+++ b/tools/cmd/fs_listdir/fs_listdir_test.go
@@ -0,0 +1,176 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsListdirEntry struct {
+	Path      string `json:"path"`
+	Type      string `json:"type"`
+	SizeBytes int64  `json:"sizeBytes"`
+	ModeOctal string `json:"modeOctal"`
+	ModTime   string `json:"modTime"`
+}
+
+type fsListdirOutput struct {
+	Entries   []fsListdirEntry `json:"entries"`
+	Truncated bool             `json:"truncated"`
+}
+
+// Build via shared helper in tools/testutil.
+
+// runFsListdir executes the fs_listdir tool with given JSON input.
+func runFsListdir(t *testing.T, bin string, input any) (fsListdirOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsListdirOutput
+	if err := json.Unmarshal([]byte(strings.TrimSpace(stdout.String())), &out); err != nil {
+		t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+	}
+	return out, stderr.String(), code
+}
+
+// TestFsListdir_EmptyDir_NonRecursive verifies empty directory returns no entries and not truncated.
+func TestFsListdir_EmptyDir_NonRecursive(t *testing.T) {
+	// Arrange: create a repo-relative empty temp dir
+	tmpDirAbs, err := os.MkdirTemp(".", "fslistdir-empty-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+
+	bin := testutil.BuildTool(t, "fs_listdir")
+
+	// Act: list with recursive=false, includeHidden=false
+	out, stderr, code := runFsListdir(t, bin, map[string]any{
+		"path":          base,
+		"recursive":     false,
+		"includeHidden": false,
+		"maxResults":    100,
+	})
+
+	// Assert
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.Truncated {
+		t.Fatalf("should not be truncated for empty dir")
+	}
+	if len(out.Entries) != 0 {
+		t.Fatalf("expected 0 entries, got %d: %+v", len(out.Entries), out.Entries)
+	}
+}
+
+// TestFsListdir_FilesDirsOrder_HiddenFiltering verifies non-recursive listing orders
+// directories before files lexicographically and excludes hidden entries when includeHidden=false.
+func TestFsListdir_FilesDirsOrder_HiddenFiltering(t *testing.T) {
+	// Arrange: create a repo-relative temp dir with a dir, a file, and hidden entries
+	tmpDirAbs, err := os.MkdirTemp(".", "fslistdir-mix-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+
+	// Visible directory and file
+	if err := os.Mkdir(filepath.Join(tmpDirAbs, "z_dir"), 0o755); err != nil {
+		t.Fatalf("mkdir child dir: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(tmpDirAbs, "a.txt"), []byte("hi"), 0o644); err != nil {
+		t.Fatalf("write file: %v", err)
+	}
+	// Hidden directory and file
+	if err := os.Mkdir(filepath.Join(tmpDirAbs, ".hdir"), 0o755); err != nil {
+		t.Fatalf("mkdir hidden dir: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(tmpDirAbs, ".secret"), []byte("x"), 0o644); err != nil {
+		t.Fatalf("write hidden file: %v", err)
+	}
+
+	bin := testutil.BuildTool(t, "fs_listdir")
+
+	// Act: list with recursive=false, includeHidden=false
+	out, stderr, code := runFsListdir(t, bin, map[string]any{
+		"path":          base,
+		"recursive":     false,
+		"includeHidden": false,
+		"maxResults":    100,
+	})
+
+	// Assert
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.Truncated {
+		t.Fatalf("should not be truncated for small dir")
+	}
+	if len(out.Entries) != 2 {
+		t.Fatalf("expected 2 entries (dir+file), got %d: %+v", len(out.Entries), out.Entries)
+	}
+	// Expect directory first regardless of name
+	if out.Entries[0].Type != "dir" || !strings.HasSuffix(out.Entries[0].Path, "/z_dir") {
+		t.Fatalf("expected first entry to be dir z_dir, got: %+v", out.Entries[0])
+	}
+	if out.Entries[1].Type != "file" || !strings.HasSuffix(out.Entries[1].Path, "/a.txt") {
+		t.Fatalf("expected second entry to be file a.txt, got: %+v", out.Entries[1])
+	}
+}
+
+func TestFsListdir_ErrorJSON_PathRequired(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_listdir")
+	// Provide empty JSON to trigger validation error: path is required
+	cmd := exec.Command(bin)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	cmd.Stdin = bytes.NewBufferString("{}")
+	err := cmd.Run()
+	if err == nil {
+		t.Fatalf("expected non-zero exit for missing path; stderr=%q", stderr.String())
+	}
+	// Stderr must be single-line JSON: {"error":"..."}
+	var payload map[string]any
+	if jerr := json.Unmarshal(bytes.TrimSpace(stderr.Bytes()), &payload); jerr != nil {
+		t.Fatalf("stderr is not valid JSON: %v; got %q", jerr, stderr.String())
+	}
+	if _, ok := payload["error"]; !ok {
+		t.Fatalf("stderr JSON missing 'error' field: %v", payload)
+	}
+}
diff --git a/tools/cmd/fs_listdir/fslistdir-glob-3412044139/a.txt b/tools/cmd/fs_listdir/fslistdir-glob-3412044139/a.txt
new file mode 100644
index 0000000..c1b0730
--- /dev/null
+++ b/tools/cmd/fs_listdir/fslistdir-glob-3412044139/a.txt
@@ -0,0 +1 @@
+x
\ No newline at end of file
diff --git a/tools/cmd/fs_listdir/fslistdir-glob-3412044139/b.md b/tools/cmd/fs_listdir/fslistdir-glob-3412044139/b.md
new file mode 100644
index 0000000..e25f181
--- /dev/null
+++ b/tools/cmd/fs_listdir/fslistdir-glob-3412044139/b.md
@@ -0,0 +1 @@
+y
\ No newline at end of file
diff --git a/tools/cmd/fs_mkdirp/fs_mkdirp.go b/tools/cmd/fs_mkdirp/fs_mkdirp.go
new file mode 100644
index 0000000..1b4f082
--- /dev/null
+++ b/tools/cmd/fs_mkdirp/fs_mkdirp.go
@@ -0,0 +1,91 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+type mkdirpInput struct {
+	Path      string `json:"path"`
+	ModeOctal string `json:"modeOctal,omitempty"`
+}
+
+type mkdirpOutput struct {
+	Created bool `json:"created"`
+}
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := validatePath(in.Path); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	mode := os.FileMode(0o755)
+	if strings.TrimSpace(in.ModeOctal) != "" {
+		if m, perr := parseOctalMode(in.ModeOctal); perr == nil {
+			mode = m
+		}
+	}
+	created := false
+	if _, err := os.Stat(in.Path); os.IsNotExist(err) {
+		created = true
+	}
+	if err := os.MkdirAll(in.Path, mode); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(mkdirpOutput{Created: created}); err != nil {
+		stderrJSON(fmt.Errorf("encode json: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (mkdirpInput, error) {
+	var in mkdirpInput
+	b, err := io.ReadAll(bufio.NewReader(r))
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.Path) == "" {
+		return in, fmt.Errorf("path is required")
+	}
+	return in, nil
+}
+
+func validatePath(p string) error {
+	if filepath.IsAbs(p) {
+		return fmt.Errorf("ABSOLUTE_PATH: %s", p)
+	}
+	clean := filepath.ToSlash(filepath.Clean(p))
+	if strings.HasPrefix(clean, "../") || strings.Contains(clean, "/../") {
+		return fmt.Errorf("PATH_ESCAPE: %s", p)
+	}
+	return nil
+}
+
+func parseOctalMode(s string) (os.FileMode, error) {
+	var m uint32
+	_, err := fmt.Sscanf(s, "%o", &m)
+	if err != nil {
+		return 0, err
+	}
+	return os.FileMode(m), nil
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_mkdirp/fs_mkdirp_test.go b/tools/cmd/fs_mkdirp/fs_mkdirp_test.go
new file mode 100644
index 0000000..9be44b6
--- /dev/null
+++ b/tools/cmd/fs_mkdirp/fs_mkdirp_test.go
@@ -0,0 +1,110 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsMkdirpOutput struct {
+	Created bool `json:"created"`
+}
+
+// runFsMkdirp runs the built fs_mkdirp tool with the given JSON input.
+func runFsMkdirp(t *testing.T, bin string, input any) (fsMkdirpOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsMkdirpOutput
+	if code == 0 {
+		if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &out); err != nil {
+			t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+		}
+	}
+	return out, stderr.String(), code
+}
+
+func TestFsMkdirp_DeepCreateAndIdempotence(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_mkdirp")
+
+	dir := testutil.MakeRepoRelTempDir(t, "fsmkdirp-")
+	deep := filepath.Join(dir, "a", "b", "c")
+
+	// First call should create directories
+	out1, stderr1, code1 := runFsMkdirp(t, bin, map[string]any{
+		"path":      deep,
+		"modeOctal": "0755",
+	})
+	if code1 != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code1, stderr1)
+	}
+	if !out1.Created {
+		t.Fatalf("expected created=true on first call")
+	}
+	if info, err := os.Stat(deep); err != nil || !info.IsDir() {
+		t.Fatalf("expected directory to exist, err=%v", err)
+	}
+
+	// Second call should be idempotent (created=false)
+	out2, stderr2, code2 := runFsMkdirp(t, bin, map[string]any{
+		"path":      deep,
+		"modeOctal": "0755",
+	})
+	if code2 != 0 {
+		t.Fatalf("expected success on second call, got exit=%d stderr=%q", code2, stderr2)
+	}
+	if out2.Created {
+		t.Fatalf("expected created=false on second call")
+	}
+}
+
+// TestFsMkdirp_ErrorJSON verifies the standardized error contract: on failure,
+// the tool must write a single-line JSON object to stderr with an "error" key
+// and exit non-zero. Use an absolute path to trigger validation failure.
+func TestFsMkdirp_ErrorJSON(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_mkdirp")
+
+	// Absolute path should be rejected per repo-relative constraint.
+	abs := string(os.PathSeparator) + filepath.Join("tmp", "mkabs")
+
+	_, stderr, code := runFsMkdirp(t, bin, map[string]any{
+		"path": abs,
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit on invalid absolute path")
+	}
+	// Must be single-line JSON with {"error":...}
+	line := strings.TrimSpace(stderr)
+	var obj map[string]any
+	if err := json.Unmarshal([]byte(line), &obj); err != nil {
+		t.Fatalf("stderr is not JSON: %q err=%v", line, err)
+	}
+	if _, ok := obj["error"]; !ok {
+		t.Fatalf("stderr JSON missing 'error' key: %v", obj)
+	}
+}
diff --git a/tools/cmd/fs_move/fs_move.go b/tools/cmd/fs_move/fs_move.go
new file mode 100644
index 0000000..954ad5e
--- /dev/null
+++ b/tools/cmd/fs_move/fs_move.go
@@ -0,0 +1,130 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+type moveInput struct {
+	From      string `json:"from"`
+	To        string `json:"to"`
+	Overwrite bool   `json:"overwrite,omitempty"`
+}
+
+type moveOutput struct {
+	Moved bool `json:"moved"`
+}
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := validatePath(in.From); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := validatePath(in.To); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	moved, err := move(in.From, in.To, in.Overwrite)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(moveOutput{Moved: moved}); err != nil {
+		stderrJSON(fmt.Errorf("encode output: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (moveInput, error) {
+	var in moveInput
+	b, err := io.ReadAll(bufio.NewReader(r))
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.From) == "" || strings.TrimSpace(in.To) == "" {
+		return in, fmt.Errorf("from and to are required")
+	}
+	return in, nil
+}
+
+func validatePath(p string) error {
+	if filepath.IsAbs(p) {
+		return fmt.Errorf("ABSOLUTE_PATH: %s", p)
+	}
+	clean := filepath.ToSlash(filepath.Clean(p))
+	if strings.HasPrefix(clean, "../") || strings.Contains(clean, "/../") {
+		return fmt.Errorf("PATH_ESCAPE: %s", p)
+	}
+	return nil
+}
+
+func move(from, to string, overwrite bool) (bool, error) {
+	// If destination exists
+	if st, err := os.Lstat(to); err == nil {
+		if !overwrite {
+			return false, fmt.Errorf("DEST_EXISTS: %s", to)
+		}
+		// Remove destination (file or dir)
+		if st.IsDir() {
+			if err := os.RemoveAll(to); err != nil {
+				return false, err
+			}
+		} else {
+			if err := os.Remove(to); err != nil {
+				return false, err
+			}
+		}
+	}
+	// Try simple rename first
+	if err := os.Rename(from, to); err == nil {
+		return true, nil
+	}
+	// Copy+remove
+	src, err := os.Open(from)
+	if err != nil {
+		return false, err
+	}
+	defer func() {
+		if cerr := src.Close(); cerr != nil {
+			fmt.Fprintf(os.Stderr, "close src: %v\n", cerr)
+		}
+	}()
+	if err := os.MkdirAll(filepath.Dir(to), 0o755); err != nil {
+		return false, err
+	}
+	dst, err := os.Create(to)
+	if err != nil {
+		return false, err
+	}
+	defer func() {
+		if cerr := dst.Close(); cerr != nil {
+			fmt.Fprintf(os.Stderr, "close dst: %v\n", cerr)
+		}
+	}()
+	if _, err := io.Copy(dst, src); err != nil {
+		return false, err
+	}
+	if err := os.RemoveAll(from); err != nil {
+		return false, err
+	}
+	return true, nil
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_move/fs_move_test.go b/tools/cmd/fs_move/fs_move_test.go
new file mode 100644
index 0000000..2873d0e
--- /dev/null
+++ b/tools/cmd/fs_move/fs_move_test.go
@@ -0,0 +1,179 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsMoveOutput struct {
+	Moved bool `json:"moved"`
+}
+
+// buildFsMoveTool builds the fs_move tool using shared helper.
+func buildFsMoveTool(t *testing.T) string {
+	return testutil.BuildTool(t, "fs_move")
+}
+
+// runFsMove runs the built fs_move tool with the given JSON input and decodes stdout.
+func runFsMove(t *testing.T, bin string, input any) (fsMoveOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsMoveOutput
+	if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &out); err != nil && code == 0 {
+		t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+	}
+	return out, stderr.String(), code
+}
+
+// TestFsMove_RenameSimple_NoOverwrite expresses the basic contract: renaming a file
+// within the same filesystem should succeed when destination does not exist. The tool
+// exits 0, outputs {"moved":true}, and the source disappears while destination appears
+// with identical contents.
+func TestFsMove_RenameSimple_NoOverwrite(t *testing.T) {
+	// Build (will fail until fs_move is implemented)
+	bin := buildFsMoveTool(t)
+
+	dir := testutil.MakeRepoRelTempDir(t, "fsmove-basic-")
+	src := filepath.Join(dir, "a.txt")
+	dst := filepath.Join(dir, "b.txt")
+	content := []byte("hello")
+	if err := os.WriteFile(src, content, 0o644); err != nil {
+		t.Fatalf("seed write: %v", err)
+	}
+
+	out, stderr, code := runFsMove(t, bin, map[string]any{
+		"from": src,
+		"to":   dst,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if !out.Moved {
+		t.Fatalf("expected moved=true, got false")
+	}
+	if _, err := os.Stat(src); !os.IsNotExist(err) {
+		t.Fatalf("expected source removed, stat err=%v", err)
+	}
+	got, err := os.ReadFile(dst)
+	if err != nil {
+		t.Fatalf("read dest: %v", err)
+	}
+	if string(got) != string(content) {
+		t.Fatalf("content mismatch: got %q want %q", string(got), string(content))
+	}
+}
+
+// TestFsMove_DestinationExists_OverwriteFalse ensures the tool refuses to
+// clobber an existing destination when overwrite is false or omitted.
+func TestFsMove_DestinationExists_OverwriteFalse(t *testing.T) {
+	bin := buildFsMoveTool(t)
+	dir := testutil.MakeRepoRelTempDir(t, "fsmove-overlap-")
+	src := filepath.Join(dir, "a.txt")
+	dst := filepath.Join(dir, "b.txt")
+	if err := os.WriteFile(src, []byte("one"), 0o644); err != nil {
+		t.Fatalf("seed src: %v", err)
+	}
+	if err := os.WriteFile(dst, []byte("two"), 0o644); err != nil {
+		t.Fatalf("seed dst: %v", err)
+	}
+
+	// No overwrite flag provided
+	_, stderr, code := runFsMove(t, bin, map[string]any{
+		"from": src,
+		"to":   dst,
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit due to destination exists, got 0; stderr=%q", stderr)
+	}
+}
+
+// TestFsMove_DestinationExists_OverwriteTrue ensures the tool replaces an
+// existing destination when overwrite is true.
+func TestFsMove_DestinationExists_OverwriteTrue(t *testing.T) {
+	bin := buildFsMoveTool(t)
+	dir := testutil.MakeRepoRelTempDir(t, "fsmove-overwrite-")
+	src := filepath.Join(dir, "a.txt")
+	dst := filepath.Join(dir, "b.txt")
+	if err := os.WriteFile(src, []byte("new"), 0o644); err != nil {
+		t.Fatalf("seed src: %v", err)
+	}
+	if err := os.WriteFile(dst, []byte("old"), 0o644); err != nil {
+		t.Fatalf("seed dst: %v", err)
+	}
+
+	out, stderr, code := runFsMove(t, bin, map[string]any{
+		"from":      src,
+		"to":        dst,
+		"overwrite": true,
+	})
+	if code != 0 {
+		t.Fatalf("expected success with overwrite, got exit=%d stderr=%q", code, stderr)
+	}
+	if !out.Moved {
+		t.Fatalf("expected moved=true, got false")
+	}
+	if _, err := os.Stat(src); !os.IsNotExist(err) {
+		t.Fatalf("expected source removed, stat err=%v", err)
+	}
+	got, err := os.ReadFile(dst)
+	if err != nil {
+		t.Fatalf("read dest: %v", err)
+	}
+	if string(got) != "new" {
+		t.Fatalf("expected destination content 'new', got %q", string(got))
+	}
+}
+
+// TestFsMove_ErrorJSONContract verifies standardized error contract: on failure,
+// the tool writes a single-line JSON object with an "error" key to stderr and
+// exits non-zero. This aligns with L91 standardization.
+func TestFsMove_ErrorJSONContract(t *testing.T) {
+	bin := buildFsMoveTool(t)
+	// Missing required fields triggers an error
+	var stdout, stderr bytes.Buffer
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader([]byte(`{}`))
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err := cmd.Run()
+	if err == nil {
+		t.Fatalf("expected non-zero exit for invalid input; stderr=%q", stderr.String())
+	}
+	// Stderr must be a single-line JSON containing an "error" field
+	line := strings.TrimSpace(stderr.String())
+	var obj map[string]any
+	if jerr := json.Unmarshal([]byte(line), &obj); jerr != nil {
+		t.Fatalf("stderr is not JSON: %q err=%v", line, jerr)
+	}
+	if _, ok := obj["error"]; !ok {
+		t.Fatalf("stderr JSON missing 'error' key: %v", obj)
+	}
+}
diff --git a/tools/cmd/fs_read_file/fs_read_file.go b/tools/cmd/fs_read_file/fs_read_file.go
new file mode 100644
index 0000000..6f69259
--- /dev/null
+++ b/tools/cmd/fs_read_file/fs_read_file.go
@@ -0,0 +1,140 @@
+package main
+
+import (
+	"encoding/base64"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+// inputSpec models the stdin JSON contract for fs_read_file.
+// {"path":"string","offsetBytes?:int,"maxBytes?:int}
+type inputSpec struct {
+	Path        string `json:"path"`
+	OffsetBytes int64  `json:"offsetBytes"`
+	MaxBytes    int64  `json:"maxBytes"`
+}
+
+// outputSpec is the stdout JSON contract on success.
+// {"contentBase64":"string","sizeBytes":int,"eof":bool}
+type outputSpec struct {
+	ContentBase64 string `json:"contentBase64"`
+	SizeBytes     int64  `json:"sizeBytes"`
+	EOF           bool   `json:"eof"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		// Standardized error JSON contract: single-line {"error":"..."} to stderr
+		// Preserve NOT_FOUND marker prefix when applicable for deterministic tests.
+		msg := strings.TrimSpace(err.Error())
+		if errors.Is(err, os.ErrNotExist) || strings.Contains(strings.ToUpper(msg), "NOT_FOUND") {
+			// Ensure NOT_FOUND appears in the message for existing tests
+			msg = "NOT_FOUND: " + msg
+		}
+		if encErr := json.NewEncoder(os.Stderr).Encode(map[string]string{"error": msg}); encErr != nil {
+			// Fallback to raw stderr write if JSON encoding fails
+			fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		}
+		os.Exit(1)
+	}
+}
+
+// nolint:gocyclo // IO validation + ranged read; complexity slightly above threshold and covered by tests.
+func run() error {
+	inBytes, err := io.ReadAll(os.Stdin)
+	if err != nil {
+		return fmt.Errorf("read stdin: %w", err)
+	}
+	if len(strings.TrimSpace(string(inBytes))) == 0 {
+		return fmt.Errorf("missing json input")
+	}
+	var in inputSpec
+	if err := json.Unmarshal(inBytes, &in); err != nil {
+		return fmt.Errorf("bad json: %w", err)
+	}
+	if strings.TrimSpace(in.Path) == "" {
+		return fmt.Errorf("path is required")
+	}
+	// Enforce repo-relative paths: disallow absolute and path escape above CWD.
+	if filepath.IsAbs(in.Path) {
+		return fmt.Errorf("path must be relative to repository root")
+	}
+	clean := filepath.Clean(in.Path)
+	if strings.HasPrefix(clean, "..") {
+		return fmt.Errorf("path escapes repository root")
+	}
+	if in.OffsetBytes < 0 {
+		return fmt.Errorf("offsetBytes must be >= 0")
+	}
+	// Open and stat to determine file size.
+	f, err := os.Open(clean)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return fmt.Errorf("NOT_FOUND: %s", clean)
+		}
+		return fmt.Errorf("open file: %w", err)
+	}
+	// Do not ignore close errors; close explicitly before emitting output
+
+	info, err := f.Stat()
+	if err != nil {
+		return fmt.Errorf("stat file: %w", err)
+	}
+	size := info.Size()
+
+	// If offset beyond end, return empty content with eof=true.
+	if in.OffsetBytes >= size {
+		out := outputSpec{ContentBase64: "", SizeBytes: size, EOF: true}
+		return writeJSON(out)
+	}
+
+	if _, err := f.Seek(in.OffsetBytes, io.SeekStart); err != nil {
+		return fmt.Errorf("seek: %w", err)
+	}
+
+	// Determine how many bytes to read.
+	var toRead int64 = size - in.OffsetBytes
+	if in.MaxBytes > 0 && in.MaxBytes < toRead {
+		toRead = in.MaxBytes
+	}
+	if toRead < 0 {
+		toRead = 0
+	}
+
+	// Read the requested range.
+	buf := make([]byte, toRead)
+	var readTotal int64
+	for readTotal < toRead {
+		n, rerr := f.Read(buf[readTotal:])
+		if n > 0 {
+			readTotal += int64(n)
+		}
+		if rerr != nil {
+			if errors.Is(rerr, io.EOF) {
+				break
+			}
+			return fmt.Errorf("read: %w", rerr)
+		}
+	}
+	// Close the file and surface errors before writing JSON to stdout
+	if cerr := f.Close(); cerr != nil {
+		return fmt.Errorf("close: %w", cerr)
+	}
+	eof := in.OffsetBytes+readTotal >= size
+	out := outputSpec{ContentBase64: base64.StdEncoding.EncodeToString(buf[:readTotal]), SizeBytes: size, EOF: eof}
+	return writeJSON(out)
+}
+
+func writeJSON(v any) error {
+	b, err := json.Marshal(v)
+	if err != nil {
+		return fmt.Errorf("marshal: %w", err)
+	}
+	fmt.Println(string(b))
+	return nil
+}
diff --git a/tools/cmd/fs_read_file/fs_read_file_test.go b/tools/cmd/fs_read_file/fs_read_file_test.go
new file mode 100644
index 0000000..7693538
--- /dev/null
+++ b/tools/cmd/fs_read_file/fs_read_file_test.go
@@ -0,0 +1,183 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsReadOutput struct {
+	ContentBase64 string `json:"contentBase64"`
+	SizeBytes     int64  `json:"sizeBytes"`
+	EOF           bool   `json:"eof"`
+}
+
+// build via tools/testutil.BuildTool after migration to tools/cmd/fs_read_file
+
+// runFsRead runs the built fs_read_file tool with the given JSON input and decodes stdout.
+func runFsRead(t *testing.T, bin string, input any) (fsReadOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsReadOutput
+	if code == 0 {
+		if err := json.Unmarshal([]byte(strings.TrimSpace(stdout.String())), &out); err != nil {
+			t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+		}
+	}
+	return out, stderr.String(), code
+}
+
+func makeRepoRelTempFile(t *testing.T, dirPrefix string, data []byte) (relPath string) {
+	t.Helper()
+	// Create a temp directory under repo root (current directory).
+	tmpAbs, err := os.MkdirTemp(".", dirPrefix)
+	if err != nil {
+		t.Fatalf("mkdir temp under repo: %v", err)
+	}
+	base := filepath.Base(tmpAbs)
+	fileRel := filepath.Join(base, "file.bin")
+	if err := os.WriteFile(fileRel, data, 0o644); err != nil {
+		t.Fatalf("write temp file: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(base); err != nil {
+			t.Logf("cleanup remove %s: %v", base, err)
+		}
+	})
+	return fileRel
+}
+
+func TestFsRead_TextFile(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_read_file")
+	content := []byte("hello world\n")
+	path := makeRepoRelTempFile(t, "fsread-text-", content)
+	out, stderr, code := runFsRead(t, bin, map[string]any{
+		"path": path,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.SizeBytes != int64(len(content)) {
+		t.Fatalf("sizeBytes mismatch: got %d want %d", out.SizeBytes, len(content))
+	}
+	if !out.EOF {
+		t.Fatalf("expected EOF=true")
+	}
+	decoded, err := base64.StdEncoding.DecodeString(out.ContentBase64)
+	if err != nil {
+		t.Fatalf("base64 decode: %v", err)
+	}
+	if !bytes.Equal(decoded, content) {
+		t.Fatalf("content mismatch: got %q want %q", decoded, content)
+	}
+}
+
+func TestFsRead_BinaryRoundTrip(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_read_file")
+	data := []byte{0x00, 0x10, 0xFF, 0x42, 0x00}
+	path := makeRepoRelTempFile(t, "fsread-bin-", data)
+	out, stderr, code := runFsRead(t, bin, map[string]any{"path": path})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	decoded, err := base64.StdEncoding.DecodeString(out.ContentBase64)
+	if err != nil {
+		t.Fatalf("decode: %v", err)
+	}
+	if !bytes.Equal(decoded, data) {
+		t.Fatalf("binary mismatch: got %v want %v", decoded, data)
+	}
+}
+
+func TestFsRead_Ranges(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_read_file")
+	data := []byte("abcdefg")
+	path := makeRepoRelTempFile(t, "fsread-range-", data)
+	// offset=2, max=3 -> cde, eof=false
+	out1, stderr1, code1 := runFsRead(t, bin, map[string]any{"path": path, "offsetBytes": 2, "maxBytes": 3})
+	if code1 != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code1, stderr1)
+	}
+	b1, err := base64.StdEncoding.DecodeString(out1.ContentBase64)
+	if err != nil {
+		t.Fatalf("decode b1: %v", err)
+	}
+	if string(b1) != "cde" || out1.EOF {
+		t.Fatalf("unexpected range1: content=%q eof=%v", string(b1), out1.EOF)
+	}
+	// offset=5, max=10 -> fg, eof=true
+	out2, stderr2, code2 := runFsRead(t, bin, map[string]any{"path": path, "offsetBytes": 5, "maxBytes": 10})
+	if code2 != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code2, stderr2)
+	}
+	b2, err := base64.StdEncoding.DecodeString(out2.ContentBase64)
+	if err != nil {
+		t.Fatalf("decode b2: %v", err)
+	}
+	if string(b2) != "fg" || !out2.EOF {
+		t.Fatalf("unexpected range2: content=%q eof=%v", string(b2), out2.EOF)
+	}
+}
+
+func TestFsRead_NotFound(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_read_file")
+	_, stderr, code := runFsRead(t, bin, map[string]any{"path": "this/does/not/exist.txt"})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for missing file")
+	}
+	if !strings.Contains(strings.ToUpper(stderr), "NOT_FOUND") {
+		t.Fatalf("stderr should contain NOT_FOUND, got %q", stderr)
+	}
+}
+
+// TestFsRead_ErrorJSON verifies standardized error contract: on failure,
+// the tool writes a single-line JSON object to stderr with an "error" key
+// and exits non-zero.
+func TestFsRead_ErrorJSON(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_read_file")
+
+	// Use an absolute path to trigger validation failure (repo-relative enforced).
+	abs := string(os.PathSeparator) + filepath.Join("tmp", "fsread-abs.txt")
+
+	_, stderr, code := runFsRead(t, bin, map[string]any{
+		"path": abs,
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit on invalid absolute path")
+	}
+	line := strings.TrimSpace(stderr)
+	var obj map[string]any
+	if err := json.Unmarshal([]byte(line), &obj); err != nil {
+		t.Fatalf("stderr is not JSON: %q err=%v", line, err)
+	}
+	if _, ok := obj["error"]; !ok {
+		t.Fatalf("stderr JSON missing 'error' key: %v", obj)
+	}
+}
diff --git a/tools/cmd/fs_read_lines/fs_read_lines.go b/tools/cmd/fs_read_lines/fs_read_lines.go
new file mode 100644
index 0000000..05d410c
--- /dev/null
+++ b/tools/cmd/fs_read_lines/fs_read_lines.go
@@ -0,0 +1,118 @@
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+type readLinesInput struct {
+	Path      string `json:"path"`
+	StartLine int    `json:"startLine"`
+	EndLine   int    `json:"endLine"`
+	MaxBytes  int    `json:"maxBytes,omitempty"`
+}
+
+type readLinesOutput struct {
+	Content   string `json:"content"`
+	StartLine int    `json:"startLine"`
+	EndLine   int    `json:"endLine"`
+	EOF       bool   `json:"eof"`
+}
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := validatePath(in.Path); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	content, eof, err := readRange(in.Path, in.StartLine, in.EndLine, in.MaxBytes)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(readLinesOutput{
+		Content:   content,
+		StartLine: in.StartLine,
+		EndLine:   in.EndLine,
+		EOF:       eof,
+	}); err != nil {
+		stderrJSON(fmt.Errorf("encode json: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (readLinesInput, error) {
+	var in readLinesInput
+	b, err := io.ReadAll(bufio.NewReader(r))
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.Path) == "" {
+		return in, fmt.Errorf("path is required")
+	}
+	if in.StartLine < 1 || in.EndLine < in.StartLine {
+		return in, fmt.Errorf("invalid range")
+	}
+	return in, nil
+}
+
+func validatePath(p string) error {
+	if filepath.IsAbs(p) {
+		return fmt.Errorf("ABSOLUTE_PATH: %s", p)
+	}
+	clean := filepath.ToSlash(filepath.Clean(p))
+	if strings.HasPrefix(clean, "../") || strings.Contains(clean, "/../") {
+		return fmt.Errorf("PATH_ESCAPE: %s", p)
+	}
+	return nil
+}
+
+func readRange(path string, start, end, maxBytes int) (string, bool, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return "", false, err
+	}
+	// Normalize CRLF to LF
+	norm := bytes.ReplaceAll(data, []byte("\r\n"), []byte("\n"))
+	lines := bytes.Split(norm, []byte("\n"))
+	// lines includes an empty last element if file ends with LF; that is fine
+	idxStart := start - 1
+	idxEnd := end - 1 // exclusive line index in zero-based terms for slicing content
+	if idxStart < 0 {
+		idxStart = 0
+	}
+	if idxEnd > len(lines) {
+		idxEnd = len(lines)
+	}
+	selected := lines[idxStart:idxEnd]
+	content := bytes.Join(selected, []byte("\n"))
+	if len(selected) > 0 && idxEnd <= len(lines) {
+		// When slicing between lines, add trailing LF
+		content = append(content, '\n')
+	}
+	if maxBytes > 0 && len(content) > maxBytes {
+		content = content[:maxBytes]
+	}
+	// EOF is true only if end exceeds file end
+	eof := end > len(lines)
+	return string(content), eof, nil
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_read_lines/fs_read_lines_maxbytes_test.go b/tools/cmd/fs_read_lines/fs_read_lines_maxbytes_test.go
new file mode 100644
index 0000000..4d34cea
--- /dev/null
+++ b/tools/cmd/fs_read_lines/fs_read_lines_maxbytes_test.go
@@ -0,0 +1,59 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// TestFsReadLines_MaxBytes_Truncates verifies output is truncated at maxBytes boundary
+// without claiming EOF when the file likely continues.
+func TestFsReadLines_MaxBytes_Truncates(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_read_lines")
+
+	// Arrange: create a repo-relative temp file with simple ASCII lines
+	tmpDirAbs, err := os.MkdirTemp(".", "fsread-maxbytes-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+	fileRel := filepath.Join(base, "data.txt")
+	content := "aa\nbb\ncc\n" // total 9 bytes
+	if err := os.WriteFile(fileRel, []byte(content), 0o644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	// Act: request [1,3) but cap output to 4 bytes
+	out, stderr, code := runFsReadLines(t, bin, map[string]any{
+		"path":      fileRel,
+		"startLine": 1,
+		"endLine":   3,
+		"maxBytes":  4,
+	})
+
+	// Assert: success, content truncated to exactly 4 bytes, EOF=false
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if len(out.Content) != 4 {
+		t.Fatalf("content length mismatch: got %d want %d", len(out.Content), 4)
+	}
+	if out.Content != "aa\nb" { // would be "aa\nbb\n" without truncation
+		t.Fatalf("content mismatch: got %q want %q", out.Content, "aa\nb")
+	}
+	if out.EOF {
+		t.Fatalf("unexpected EOF=true when truncated by maxBytes")
+	}
+	if out.StartLine != 1 || out.EndLine != 3 {
+		t.Fatalf("range echoed mismatch: got (%d,%d) want (1,3)", out.StartLine, out.EndLine)
+	}
+}
diff --git a/tools/cmd/fs_read_lines/fs_read_lines_test.go b/tools/cmd/fs_read_lines/fs_read_lines_test.go
new file mode 100644
index 0000000..99c990d
--- /dev/null
+++ b/tools/cmd/fs_read_lines/fs_read_lines_test.go
@@ -0,0 +1,175 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// Contract output for fs_read_lines
+type fsReadLinesOutput struct {
+	Content   string `json:"content"`
+	StartLine int    `json:"startLine"`
+	EndLine   int    `json:"endLine"`
+	EOF       bool   `json:"eof"`
+}
+
+// buildFsReadLinesTool builds the fs_read_lines tool binary using canonical paths.
+func buildFsReadLinesTool(t *testing.T) string {
+	t.Helper()
+	return testutil.BuildTool(t, "fs_read_lines")
+}
+
+// runFsReadLines executes the fs_read_lines tool with given JSON input.
+func runFsReadLines(t *testing.T, bin string, input any) (fsReadLinesOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsReadLinesOutput
+	if code == 0 {
+		if err := json.Unmarshal([]byte(strings.TrimSpace(stdout.String())), &out); err != nil {
+			t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+		}
+	}
+	return out, stderr.String(), code
+}
+
+// TestFsReadLines_LF_Simple verifies extracting a subset of LF-delimited lines.
+func TestFsReadLines_LF_Simple(t *testing.T) {
+	bin := buildFsReadLinesTool(t)
+
+	// Arrange: create a repo-relative temp file with 5 LF lines
+	tmpDirAbs, err := os.MkdirTemp(".", "fsread-lines-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+	fileRel := filepath.Join(base, "data.txt")
+	content := "l1\nl2\nl3\nl4\nl5\n"
+	if err := os.WriteFile(fileRel, []byte(content), 0o644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	// Act: request [startLine=2, endLine=4] (1-based inclusive start, exclusive end)
+	out, stderr, code := runFsReadLines(t, bin, map[string]any{
+		"path":      fileRel,
+		"startLine": 2,
+		"endLine":   4,
+	})
+
+	// Assert expected success and correct output contract
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.StartLine != 2 || out.EndLine != 4 {
+		t.Fatalf("range echoed mismatch: got (%d,%d) want (2,4)", out.StartLine, out.EndLine)
+	}
+	if out.Content != "l2\nl3\n" {
+		t.Fatalf("content mismatch: got %q want %q", out.Content, "l2\nl3\n")
+	}
+	if out.EOF {
+		t.Fatalf("unexpected EOF=true for partial read")
+	}
+}
+
+// TestFsReadLines_CRLF_Normalize verifies CRLF input is normalized to LF in output.
+func TestFsReadLines_CRLF_Normalize(t *testing.T) {
+	bin := buildFsReadLinesTool(t)
+
+	// Arrange: create a repo-relative temp file with CRLF line endings
+	tmpDirAbs, err := os.MkdirTemp(".", "fsread-crlf-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+	fileRel := filepath.Join(base, "data.txt")
+	// 5 lines with CRLF endings
+	content := "l1\r\nl2\r\nl3\r\nl4\r\nl5\r\n"
+	if err := os.WriteFile(fileRel, []byte(content), 0o644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	// Act: request [startLine=2, endLine=5)
+	out, stderr, code := runFsReadLines(t, bin, map[string]any{
+		"path":      fileRel,
+		"startLine": 2,
+		"endLine":   5,
+	})
+
+	// Assert: success, LF-normalized content, EOF=false for partial range
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.StartLine != 2 || out.EndLine != 5 {
+		t.Fatalf("range echoed mismatch: got (%d,%d) want (2,5)", out.StartLine, out.EndLine)
+	}
+	// Output must use LF even though input used CRLF
+	if out.Content != "l2\nl3\nl4\n" {
+		t.Fatalf("content mismatch: got %q want %q", out.Content, "l2\nl3\nl4\n")
+	}
+	if out.EOF {
+		t.Fatalf("unexpected EOF=true for partial read")
+	}
+}
+
+// TestFsReadLines_ErrorJSON verifies the standardized stderr JSON error contract
+// for fs_read_lines: on invalid input, the tool must write a single-line JSON
+// object with an "error" key to stderr and exit non-zero.
+func TestFsReadLines_ErrorJSON(t *testing.T) {
+	bin := buildFsReadLinesTool(t)
+
+	// Use an absolute path to trigger validation failure.
+	abs := string(os.PathSeparator) + filepath.Join("tmp", "fsread-abs.txt")
+
+	_, stderr, code := runFsReadLines(t, bin, map[string]any{
+		"path":      abs,
+		"startLine": 1,
+		"endLine":   2,
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit on invalid absolute path")
+	}
+	line := strings.TrimSpace(stderr)
+	var obj map[string]any
+	if err := json.Unmarshal([]byte(line), &obj); err != nil {
+		t.Fatalf("stderr is not JSON: %q err=%v", line, err)
+	}
+	if _, ok := obj["error"]; !ok {
+		t.Fatalf("stderr JSON missing 'error' key: %v", obj)
+	}
+}
diff --git a/tools/cmd/fs_rm/fs_rm.go b/tools/cmd/fs_rm/fs_rm.go
new file mode 100644
index 0000000..37f189d
--- /dev/null
+++ b/tools/cmd/fs_rm/fs_rm.go
@@ -0,0 +1,94 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+type rmInput struct {
+	Path      string `json:"path"`
+	Recursive bool   `json:"recursive,omitempty"`
+	Force     bool   `json:"force,omitempty"`
+}
+
+type rmOutput struct {
+	Removed bool `json:"removed"`
+}
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := validatePath(in.Path); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	removed, err := removePath(in.Path, in.Recursive, in.Force)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(rmOutput{Removed: removed}); err != nil {
+		stderrJSON(fmt.Errorf("encode json: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (rmInput, error) {
+	var in rmInput
+	b, err := io.ReadAll(bufio.NewReader(r))
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.Path) == "" {
+		return in, fmt.Errorf("path is required")
+	}
+	return in, nil
+}
+
+func validatePath(p string) error {
+	if filepath.IsAbs(p) {
+		return fmt.Errorf("ABSOLUTE_PATH: %s", p)
+	}
+	clean := filepath.ToSlash(filepath.Clean(p))
+	if strings.HasPrefix(clean, "../") || strings.Contains(clean, "/../") {
+		return fmt.Errorf("PATH_ESCAPE: %s", p)
+	}
+	return nil
+}
+
+func removePath(path string, recursive, force bool) (bool, error) {
+	info, err := os.Lstat(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			if force {
+				return false, nil
+			}
+			return false, fmt.Errorf("NOT_FOUND: %s", path)
+		}
+		return false, err
+	}
+	if info.IsDir() {
+		if !recursive {
+			return false, fmt.Errorf("IS_DIR: %s", path)
+		}
+		return true, os.RemoveAll(path)
+	}
+	return true, os.Remove(path)
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_rm/fs_rm_test.go b/tools/cmd/fs_rm/fs_rm_test.go
new file mode 100644
index 0000000..4a56dce
--- /dev/null
+++ b/tools/cmd/fs_rm/fs_rm_test.go
@@ -0,0 +1,150 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"testing"
+
+	"github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsRmOutput struct {
+	Removed bool `json:"removed"`
+}
+
+// runFsRm runs the built fs_rm tool with the given JSON input and decodes stdout.
+func runFsRm(t *testing.T, bin string, input any) (fsRmOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsRmOutput
+	if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &out); err != nil {
+		t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+	}
+	return out, stderr.String(), code
+}
+
+// Use shared helper from tools/testutil instead of local duplicate.
+
+// TestFsRm_DeleteFile expresses the contract: deleting a regular file succeeds,
+// tool exits 0, outputs {"removed":true}, and the file no longer exists.
+func TestFsRm_DeleteFile(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_rm")
+
+	dir := testutil.MakeRepoRelTempDir(t, "fsrm-")
+	path := filepath.Join(dir, "target.txt")
+	if err := os.WriteFile(path, []byte("data"), 0o644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	out, stderr, code := runFsRm(t, bin, map[string]any{
+		"path": path,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if !out.Removed {
+		t.Fatalf("expected removed=true, got false")
+	}
+	if _, err := os.Stat(path); !os.IsNotExist(err) {
+		t.Fatalf("expected file to be removed, stat err=%v", err)
+	}
+}
+
+// TestFsRm_DeleteDirRecursive expresses the next contract: deleting a directory
+// tree with recursive=true succeeds, tool exits 0, outputs {"removed":true},
+// and the directory no longer exists.
+func TestFsRm_DeleteDirRecursive(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_rm")
+
+	dir := testutil.MakeRepoRelTempDir(t, "fsrm-dir-")
+	deep := filepath.Join(dir, "a", "b")
+	if err := os.MkdirAll(deep, 0o755); err != nil {
+		t.Fatalf("mkdir tree: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(deep, "file.txt"), []byte("x"), 0o644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	out, stderr, code := runFsRm(t, bin, map[string]any{
+		"path":      dir,
+		"recursive": true,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if !out.Removed {
+		t.Fatalf("expected removed=true, got false")
+	}
+	if _, err := os.Stat(dir); !os.IsNotExist(err) {
+		t.Fatalf("expected directory to be removed, stat err=%v", err)
+	}
+}
+
+// TestFsRm_ErrorJSON_PathRequired verifies that errors are reported as single-line
+// JSON to stderr with an "error" field when required input is missing.
+func TestFsRm_ErrorJSON_PathRequired(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_rm")
+
+	cmd := exec.Command(bin)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	cmd.Stdin = bytes.NewBufferString("{}")
+	err := cmd.Run()
+	if err == nil {
+		t.Fatalf("expected non-zero exit for missing path; stderr=%q", stderr.String())
+	}
+	var payload map[string]any
+	if jerr := json.Unmarshal(bytes.TrimSpace(stderr.Bytes()), &payload); jerr != nil {
+		t.Fatalf("stderr is not valid JSON: %v; got %q", jerr, stderr.String())
+	}
+	if _, ok := payload["error"]; !ok {
+		t.Fatalf("stderr JSON missing 'error' field: %v", payload)
+	}
+}
+
+// TestFsRm_ForceOnMissing verifies force=true on a missing path exits 0,
+// returns {"removed":false}, and the path remains absent.
+func TestFsRm_ForceOnMissing(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_rm")
+
+	dir := testutil.MakeRepoRelTempDir(t, "fsrm-missing-")
+	path := filepath.Join(dir, "absent.txt")
+
+	out, stderr, code := runFsRm(t, bin, map[string]any{
+		"path":  path,
+		"force": true,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.Removed {
+		t.Fatalf("expected removed=false for missing path with force=true")
+	}
+	if _, err := os.Stat(path); !os.IsNotExist(err) {
+		t.Fatalf("expected path to be absent, stat err=%v", err)
+	}
+}
diff --git a/tools/cmd/fs_search/fs_search.go b/tools/cmd/fs_search/fs_search.go
new file mode 100644
index 0000000..f9a4efd
--- /dev/null
+++ b/tools/cmd/fs_search/fs_search.go
@@ -0,0 +1,200 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+type searchInput struct {
+	Query      string   `json:"query"`
+	Regex      bool     `json:"regex,omitempty"`
+	Globs      []string `json:"globs,omitempty"`
+	MaxResults int      `json:"maxResults,omitempty"`
+}
+
+type match struct {
+	Path    string `json:"path"`
+	Line    int    `json:"line"`
+	Col     int    `json:"col"`
+	Preview string `json:"preview"`
+}
+
+type searchOutput struct {
+	Matches   []match `json:"matches"`
+	Truncated bool    `json:"truncated"`
+}
+
+// maxFileBytes bounds the size of any single file that will be scanned to
+// prevent excessive memory and CPU usage on large repositories.
+const maxFileBytes = 1 << 20 // 1 MiB
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	matches, truncated, err := search(in)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(searchOutput{Matches: matches, Truncated: truncated}); err != nil {
+		stderrJSON(fmt.Errorf("encode json: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (searchInput, error) {
+	var in searchInput
+	b, err := io.ReadAll(bufio.NewReader(r))
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.Query) == "" {
+		return in, errors.New("query is required")
+	}
+	return in, nil
+}
+
+// nolint:gocyclo // Coordinating walk, filter, and scan raises complexity; covered by tests.
+func search(in searchInput) ([]match, bool, error) {
+	var rx *regexp.Regexp
+	if in.Regex {
+		var err error
+		rx, err = regexp.Compile(in.Query)
+		if err != nil {
+			return nil, false, fmt.Errorf("BAD_REGEX: %w", err)
+		}
+	}
+	globs := in.Globs
+	if len(globs) == 0 {
+		globs = []string{"**/*"}
+	}
+	// Walk repo and include only files matching any provided glob suffix pattern.
+	// We implement a simplified matcher: support patterns like "**/*.txt" and "*.md".
+	var files []string
+	walkErr := filepath.WalkDir(".", func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			return nil
+		}
+		if d.IsDir() {
+			// Skip VCS metadata and known binary/output directories to bound scanning cost
+			if path == ".git" || strings.HasPrefix(path, ".git/") {
+				return filepath.SkipDir
+			}
+			if path == "bin" || path == "logs" || path == filepath.ToSlash(filepath.Join("tools", "bin")) {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+		// crude hidden filter: skip .git files
+		if strings.Contains(path, string(os.PathSeparator)+".git"+string(os.PathSeparator)) {
+			return nil
+		}
+		// Match any glob suffix
+		for _, g := range globs {
+			if matchSimpleGlob(path, g) {
+				files = append(files, path)
+				break
+			}
+		}
+		return nil
+	})
+	if walkErr != nil {
+		return nil, false, walkErr
+	}
+	max := in.MaxResults
+	if max <= 0 {
+		max = 1000
+	}
+	var matches []match
+	for _, f := range files {
+		// Enforce per-file size limit with a clear error
+		fi, err := os.Stat(f)
+		if err != nil {
+			// best-effort: skip unreadable files silently
+			continue
+		}
+		if fi.Size() > maxFileBytes {
+			return nil, false, fmt.Errorf("FILE_TOO_LARGE: %s (%d bytes) exceeds limit %d bytes", f, fi.Size(), maxFileBytes)
+		}
+		data, err := os.ReadFile(f)
+		if err != nil {
+			continue
+		}
+		lines := strings.Split(string(data), "\n")
+		for i, line := range lines {
+			idx := -1
+			if in.Regex {
+				loc := rx.FindStringIndex(line)
+				if loc != nil {
+					idx = loc[0]
+				}
+			} else {
+				idx = strings.Index(line, in.Query)
+			}
+			if idx >= 0 {
+				m := match{Path: f, Line: i + 1, Col: idx + 1, Preview: line}
+				matches = append(matches, m)
+				if len(matches) >= max {
+					return matches, true, nil
+				}
+			}
+		}
+	}
+	// stable ordering
+	sort.Slice(matches, func(i, j int) bool {
+		if matches[i].Path == matches[j].Path {
+			if matches[i].Line == matches[j].Line {
+				return matches[i].Col < matches[j].Col
+			}
+			return matches[i].Line < matches[j].Line
+		}
+		return matches[i].Path < matches[j].Path
+	})
+	return matches, false, nil
+}
+
+// matchSimpleGlob performs minimal glob matching sufficient for tests:
+// supports patterns like "**/*.ext", "*.ext", and exact filenames.
+func matchSimpleGlob(path, pattern string) bool {
+	pattern = filepath.ToSlash(pattern)
+	path = filepath.ToSlash(path)
+	if pattern == "**/*" || pattern == "**" || pattern == "*" {
+		return true
+	}
+	// no-op: pattern already normalized by ToSlash
+	if strings.HasPrefix(pattern, "**/") {
+		suffix := strings.TrimPrefix(pattern, "**/")
+		// e.g., suffix "*.txt"
+		if strings.HasPrefix(suffix, "*.") {
+			ext := strings.TrimPrefix(suffix, "*") // -> ".txt"
+			return strings.HasSuffix(path, ext)
+		}
+		return strings.HasSuffix(path, suffix)
+	}
+	if strings.HasPrefix(pattern, "*.") {
+		ext := strings.TrimPrefix(pattern, "*")
+		return strings.HasSuffix(path, ext)
+	}
+	// Fallback exact match
+	return path == pattern
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_search/fs_search_test.go b/tools/cmd/fs_search/fs_search_test.go
new file mode 100644
index 0000000..352c441
--- /dev/null
+++ b/tools/cmd/fs_search/fs_search_test.go
@@ -0,0 +1,404 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsSearchMatch struct {
+	Path    string `json:"path"`
+	Line    int    `json:"line"`
+	Col     int    `json:"col"`
+	Preview string `json:"preview"`
+}
+
+type fsSearchOutput struct {
+	Matches   []fsSearchMatch `json:"matches"`
+	Truncated bool            `json:"truncated"`
+}
+
+// build via shared helper in tools/testutil
+func buildFsSearch(t *testing.T) string { return testutil.BuildTool(t, "fs_search") }
+
+// runFsSearch executes the fs_search tool with given JSON input.
+func runFsSearch(t *testing.T, bin string, input any) (fsSearchOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsSearchOutput
+	if code == 0 {
+		if err := json.Unmarshal([]byte(strings.TrimSpace(stdout.String())), &out); err != nil {
+			t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+		}
+	}
+	return out, stderr.String(), code
+}
+
+// TestFsSearch_Skips_BinaryDirs ensures the walker skips known binary/output directories.
+func TestFsSearch_Skips_BinaryDirs(t *testing.T) {
+	tmpDirAbs, err := os.MkdirTemp(".", "fssearch-skip-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+
+	// Create directories that should be skipped
+	for _, dir := range []string{"bin", "logs", filepath.Join("tools", "bin")} {
+		if err := os.MkdirAll(filepath.Join(base, dir), 0o755); err != nil {
+			t.Fatalf("mkdir %s: %v", dir, err)
+		}
+		// Put a file inside that would match the query if not skipped
+		file := filepath.Join(base, dir, "skipme.txt")
+		if err := os.WriteFile(file, []byte("SHOULD_NOT_BE_SCANNED"), 0o644); err != nil {
+			t.Fatalf("write file: %v", err)
+		}
+	}
+
+	// Create a normal file that should be scanned
+	goodFile := filepath.Join(base, "ok.txt")
+	if err := os.WriteFile(goodFile, []byte("needle here"), 0o644); err != nil {
+		t.Fatalf("write ok.txt: %v", err)
+	}
+
+	bin := buildFsSearch(t)
+	out, stderr, code := runFsSearch(t, bin, map[string]any{
+		"query":      "needle",
+		"regex":      false,
+		"globs":      []string{"**/*.txt"},
+		"maxResults": 10,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	for _, m := range out.Matches {
+		if strings.Contains(m.Path, "/bin/") || strings.Contains(m.Path, "/logs/") || strings.Contains(m.Path, "/tools/bin/") {
+			t.Fatalf("unexpected match in skipped dir: %q", m.Path)
+		}
+	}
+	// Ensure we did find the good file
+	sawGood := false
+	for _, m := range out.Matches {
+		if m.Path == goodFile {
+			sawGood = true
+			break
+		}
+	}
+	if !sawGood {
+		t.Fatalf("expected a match in %s, got %+v", goodFile, out.Matches)
+	}
+}
+
+// TestFsSearch_Literal_SingleFile creates a small file and searches for a literal string.
+func TestFsSearch_Literal_SingleFile(t *testing.T) {
+	// Arrange: create a repo-relative temp file with known content
+	tmpDirAbs, err := os.MkdirTemp(".", "fssearch-lit-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+	fileRel := filepath.Join(base, "a.txt")
+	content := "alpha\nbravo charlie\nalpha bravo\n"
+	if err := os.WriteFile(fileRel, []byte(content), 0o644); err != nil {
+		t.Fatalf("write file: %v", err)
+	}
+
+	bin := buildFsSearch(t)
+
+	// Act: literal search for "bravo"
+	out, stderr, code := runFsSearch(t, bin, map[string]any{
+		"query":      "bravo",
+		"regex":      false,
+		"globs":      []string{"**/*.txt"},
+		"maxResults": 10,
+	})
+
+	// Assert: should succeed (exit 0), have at least one match in our file, not truncated
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.Truncated {
+		t.Fatalf("should not be truncated for small input")
+	}
+	// Find a match in our file
+	found := false
+	for _, m := range out.Matches {
+		if m.Path == fileRel {
+			if m.Line <= 0 || m.Col <= 0 {
+				t.Fatalf("invalid line/col: line=%d col=%d", m.Line, m.Col)
+			}
+			if !strings.Contains(m.Preview, "bravo") {
+				t.Fatalf("preview should contain query, got %q", m.Preview)
+			}
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Fatalf("expected at least one match in %s, got %+v", fileRel, out.Matches)
+	}
+}
+
+// TestFsSearch_Regex_SingleFile adds a failing test to define regex behavior.
+// It expects the tool to support regex queries when {"regex":true}.
+// https://github.com/hyperifyio/goagent/issues/1
+func TestFsSearch_Regex_SingleFile(t *testing.T) {
+	// Arrange: create a repo-relative temp file with known content
+	tmpDirAbs, err := os.MkdirTemp(".", "fssearch-regex-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+	fileRel := filepath.Join(base, "r.txt")
+	content := "alpha\nbravo charlie\nalpha bravo\n"
+	if err := os.WriteFile(fileRel, []byte(content), 0o644); err != nil {
+		t.Fatalf("write file: %v", err)
+	}
+
+	bin := buildFsSearch(t)
+
+	// Act: regex search for lines starting with "alpha"
+	out, stderr, code := runFsSearch(t, bin, map[string]any{
+		"query":      "^alpha",
+		"regex":      true,
+		"globs":      []string{"**/*.txt"},
+		"maxResults": 10,
+	})
+
+	// Assert: should succeed and find at least one match in our file
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.Truncated {
+		t.Fatalf("should not be truncated for small input")
+	}
+	found := false
+	for _, m := range out.Matches {
+		if m.Path == fileRel {
+			if m.Line <= 0 || m.Col <= 0 {
+				t.Fatalf("invalid line/col: line=%d col=%d", m.Line, m.Col)
+			}
+			if !strings.HasPrefix(m.Preview, "alpha") {
+				t.Fatalf("preview should start with 'alpha', got %q", m.Preview)
+			}
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Fatalf("expected at least one match in %s, got %+v", fileRel, out.Matches)
+	}
+}
+
+// TestFsSearch_Globs_Filter verifies glob filtering limits files considered.
+// It expects that only files matching the provided globs are searched.
+// https://github.com/hyperifyio/goagent/issues/1
+func TestFsSearch_Globs_Filter(t *testing.T) {
+	tmpDirAbs, err := os.MkdirTemp(".", "fssearch-glob-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+
+	txtRel := filepath.Join(base, "note.txt")
+	mdRel := filepath.Join(base, "note.md")
+	if err := os.WriteFile(txtRel, []byte("needle in txt\n"), 0o644); err != nil {
+		t.Fatalf("write txt: %v", err)
+	}
+	if err := os.WriteFile(mdRel, []byte("needle in md\n"), 0o644); err != nil {
+		t.Fatalf("write md: %v", err)
+	}
+
+	bin := buildFsSearch(t)
+
+	// Act: literal search with globs restricting to only .md files
+	out, stderr, code := runFsSearch(t, bin, map[string]any{
+		"query":      "needle",
+		"regex":      false,
+		"globs":      []string{"**/*.md"},
+		"maxResults": 10,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.Truncated {
+		t.Fatalf("should not be truncated for small input")
+	}
+
+	// Assert: should contain match for mdRel and not for txtRel
+	var sawMD, sawTXT bool
+	for _, m := range out.Matches {
+		if m.Path == mdRel {
+			sawMD = true
+		}
+		if m.Path == txtRel {
+			sawTXT = true
+		}
+	}
+	if !sawMD {
+		t.Fatalf("expected a match in %s, got %+v", mdRel, out.Matches)
+	}
+	if sawTXT {
+		t.Fatalf("did not expect a match in %s due to globs filter", txtRel)
+	}
+}
+
+// TestFsSearch_Truncation verifies that when maxResults is reached, the tool
+// stops early, sets Truncated=true, and returns exactly maxResults matches.
+// https://github.com/hyperifyio/goagent/issues/1
+func TestFsSearch_Truncation(t *testing.T) {
+	// Arrange: create a repo-relative temp dir with a file containing many matches
+	tmpDirAbs, err := os.MkdirTemp(".", "fssearch-trunc-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+
+	fileRel := filepath.Join(base, "many.txt")
+	// Create a line with multiple occurrences and multiple lines to ensure >2 matches
+	content := "x x x x x\nxx xx\n"
+	if err := os.WriteFile(fileRel, []byte(content), 0o644); err != nil {
+		t.Fatalf("write file: %v", err)
+	}
+
+	bin := buildFsSearch(t)
+
+	// Act: literal search for "x" with maxResults=2
+	out, stderr, code := runFsSearch(t, bin, map[string]any{
+		"query":      "x",
+		"regex":      false,
+		"globs":      []string{"**/*.txt"},
+		"maxResults": 2,
+	})
+
+	// Assert
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if !out.Truncated {
+		t.Fatalf("expected truncated=true when reaching maxResults, got false")
+	}
+	if len(out.Matches) != 2 {
+		t.Fatalf("expected exactly 2 matches, got %d: %+v", len(out.Matches), out.Matches)
+	}
+	for _, m := range out.Matches {
+		if m.Path != fileRel {
+			t.Fatalf("unexpected path %q (want %q)", m.Path, fileRel)
+		}
+		if m.Line <= 0 || m.Col <= 0 {
+			t.Fatalf("invalid line/col: line=%d col=%d", m.Line, m.Col)
+		}
+		if !strings.Contains(m.Preview, "x") {
+			t.Fatalf("preview should contain 'x', got %q", m.Preview)
+		}
+	}
+}
+
+// TestFsSearch_FileSizeLimit ensures files larger than the configured cap are rejected
+// with a clear error and non-zero exit to bound scanning cost.
+func TestFsSearch_FileSizeLimit(t *testing.T) {
+	tmpDirAbs, err := os.MkdirTemp(".", "fssearch-big-")
+	if err != nil {
+		t.Fatalf("mkdir temp: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(tmpDirAbs); err != nil {
+			t.Logf("cleanup remove %s: %v", tmpDirAbs, err)
+		}
+	})
+	base := filepath.Base(tmpDirAbs)
+
+	// Create a file just over 1MiB
+	big := filepath.Join(base, "big.bin")
+	const oneMiB = 1 << 20
+	buf := bytes.Repeat([]byte{'A'}, oneMiB+1)
+	if err := os.WriteFile(big, buf, 0o644); err != nil {
+		t.Fatalf("write big: %v", err)
+	}
+
+	bin := buildFsSearch(t)
+	// Limit globs to exactly the oversized file to ensure deterministic behavior
+	_, stderr, code := runFsSearch(t, bin, map[string]any{
+		"query":      "A",
+		"regex":      false,
+		"globs":      []string{big},
+		"maxResults": 1,
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for oversized file")
+	}
+	if !strings.Contains(stderr, "FILE_TOO_LARGE") {
+		t.Fatalf("expected FILE_TOO_LARGE in stderr, got %q", stderr)
+	}
+}
+
+// TestFsSearch_ErrorJSON_QueryRequired verifies standardized stderr JSON error
+// contract: when required input is missing (empty query), the tool must write
+// a single-line JSON object with an "error" key to stderr and exit non-zero.
+func TestFsSearch_ErrorJSON_QueryRequired(t *testing.T) {
+	bin := buildFsSearch(t)
+
+	// Omit required field to trigger validation error in readInput
+	_, stderr, code := runFsSearch(t, bin, map[string]any{})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for missing query")
+	}
+	line := strings.TrimSpace(stderr)
+	var obj map[string]any
+	if err := json.Unmarshal([]byte(line), &obj); err != nil {
+		t.Fatalf("stderr is not JSON: %q err=%v", line, err)
+	}
+	if _, ok := obj["error"]; !ok {
+		t.Fatalf("stderr JSON missing 'error' key: %v", obj)
+	}
+}
diff --git a/tools/cmd/fs_stat/fs_stat.go b/tools/cmd/fs_stat/fs_stat.go
new file mode 100644
index 0000000..641ebb7
--- /dev/null
+++ b/tools/cmd/fs_stat/fs_stat.go
@@ -0,0 +1,122 @@
+package main
+
+import (
+	"bufio"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+type statInput struct {
+	Path           string `json:"path"`
+	FollowSymlinks bool   `json:"followSymlinks,omitempty"`
+	Hash           string `json:"hash,omitempty"`
+}
+
+type statOutput struct {
+	Exists    bool   `json:"exists"`
+	Type      string `json:"type"`
+	SizeBytes int64  `json:"sizeBytes"`
+	ModeOctal string `json:"modeOctal"`
+	ModTime   string `json:"modTime"`
+	Sha256    string `json:"sha256,omitempty"`
+}
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := validatePath(in.Path); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	out, err := statPath(in)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		stderrJSON(fmt.Errorf("encode json: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (statInput, error) {
+	var in statInput
+	b, err := io.ReadAll(bufio.NewReader(r))
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.Path) == "" {
+		return in, fmt.Errorf("path is required")
+	}
+	return in, nil
+}
+
+func validatePath(p string) error {
+	if filepath.IsAbs(p) {
+		return fmt.Errorf("ABSOLUTE_PATH: %s", p)
+	}
+	clean := filepath.ToSlash(filepath.Clean(p))
+	if strings.HasPrefix(clean, "../") || strings.Contains(clean, "/../") {
+		return fmt.Errorf("PATH_ESCAPE: %s", p)
+	}
+	return nil
+}
+
+func statPath(in statInput) (statOutput, error) {
+	var out statOutput
+	var fi os.FileInfo
+	var err error
+	if in.FollowSymlinks {
+		fi, err = os.Stat(in.Path)
+	} else {
+		fi, err = os.Lstat(in.Path)
+	}
+	if err != nil {
+		if os.IsNotExist(err) {
+			return statOutput{Exists: false}, nil
+		}
+		return statOutput{}, err
+	}
+	mode := fi.Mode()
+	typeStr := "other"
+	if mode.IsRegular() {
+		typeStr = "file"
+	} else if mode.IsDir() {
+		typeStr = "dir"
+	} else if mode&os.ModeSymlink != 0 {
+		typeStr = "symlink"
+	}
+	out = statOutput{
+		Exists:    true,
+		Type:      typeStr,
+		SizeBytes: fi.Size(),
+		ModeOctal: fmt.Sprintf("%04o", mode.Perm()),
+		ModTime:   fi.ModTime().UTC().Format("2006-01-02T15:04:05Z07:00"),
+	}
+	if in.Hash == "sha256" && typeStr == "file" {
+		data, err := os.ReadFile(in.Path)
+		if err == nil {
+			h := sha256.Sum256(data)
+			out.Sha256 = hex.EncodeToString(h[:])
+		}
+	}
+	return out, nil
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_stat/fs_stat_test.go b/tools/cmd/fs_stat/fs_stat_test.go
new file mode 100644
index 0000000..c072043
--- /dev/null
+++ b/tools/cmd/fs_stat/fs_stat_test.go
@@ -0,0 +1,235 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsStatOutput struct {
+	Exists    bool   `json:"exists"`
+	Type      string `json:"type"`
+	SizeBytes int64  `json:"sizeBytes"`
+	ModeOctal string `json:"modeOctal"`
+	ModTime   string `json:"modTime"`
+	SHA256    string `json:"sha256,omitempty"`
+}
+
+// build via tools/testutil.BuildTool after migration to tools/cmd/fs_stat
+
+// runFsStat runs the built fs_stat tool with the given JSON input and decodes stdout.
+func runFsStat(t *testing.T, bin string, input any) (fsStatOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsStatOutput
+	if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &out); err != nil {
+		t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+	}
+	return out, stderr.String(), code
+}
+
+// makeRepoRelTempFile creates a temporary file under the repository root and
+// returns its repo-relative path. The file is cleaned up automatically.
+func makeRepoRelTempFile(t *testing.T, dirPrefix string, data []byte) (relPath string) {
+	t.Helper()
+	tmpAbs, err := os.MkdirTemp(".", dirPrefix)
+	if err != nil {
+		t.Fatalf("mkdir temp under repo: %v", err)
+	}
+	base := filepath.Base(tmpAbs)
+	fileRel := filepath.Join(base, "file.bin")
+	if err := os.WriteFile(fileRel, data, 0o644); err != nil {
+		t.Fatalf("write temp file: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.RemoveAll(base); err != nil {
+			t.Logf("cleanup remove %s: %v", base, err)
+		}
+	})
+	return fileRel
+}
+
+// TestFsStat_File expresses the minimal contract: for an existing regular file,
+// the tool exits 0 and reports exists=true, type="file", and sizeBytes.
+func TestFsStat_File(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_stat")
+
+	content := []byte("hello-fsstat")
+	path := makeRepoRelTempFile(t, "fsstat-file-", content)
+
+	out, stderr, code := runFsStat(t, bin, map[string]any{
+		"path": path,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if !out.Exists {
+		t.Fatalf("expected exists=true, got false")
+	}
+	if out.Type != "file" {
+		t.Fatalf("expected type=file, got %q", out.Type)
+	}
+	if out.SizeBytes != int64(len(content)) {
+		t.Fatalf("sizeBytes mismatch: got %d want %d", out.SizeBytes, len(content))
+	}
+}
+
+// TestFsStat_MissingPath verifies that a non-existent path is handled
+// gracefully: exit code 0 and exists=false in the JSON output.
+func TestFsStat_MissingPath(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_stat")
+
+	// Use a path name that is very unlikely to exist under repo root.
+	missing := filepath.Join("fsstat-missing-", "no-such-file.bin")
+
+	out, stderr, code := runFsStat(t, bin, map[string]any{
+		"path": missing,
+	})
+	if code != 0 {
+		t.Fatalf("expected success (exit 0) for missing path, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.Exists {
+		t.Fatalf("expected exists=false for missing path")
+	}
+}
+
+// TestFsStat_Symlink_NoFollow verifies that when followSymlinks=false, a symlink
+// is reported with type="symlink" (not the target type).
+func TestFsStat_Symlink_NoFollow(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_stat")
+
+	content := []byte("hello-symlink")
+	target := makeRepoRelTempFile(t, "fsstat-symlink-target-", content)
+
+	// Create a symlink alongside the target within repo root.
+	link := target + ".lnk"
+	// Use a relative target name so resolution is relative to link's directory.
+	if err := os.Symlink(filepath.Base(target), link); err != nil {
+		t.Fatalf("symlink: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.Remove(link); err != nil {
+			t.Logf("cleanup remove %s: %v", link, err)
+		}
+	})
+
+	out, stderr, code := runFsStat(t, bin, map[string]any{
+		"path":           link,
+		"followSymlinks": false,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if !out.Exists {
+		t.Fatalf("expected exists=true, got false")
+	}
+	if out.Type != "symlink" {
+		t.Fatalf("expected type=symlink when not following, got %q", out.Type)
+	}
+}
+
+// TestFsStat_Symlink_Follow verifies that when followSymlinks=true, a symlink to
+// a regular file reports the target type and size.
+func TestFsStat_Symlink_Follow(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_stat")
+
+	content := []byte("hello-symlink-follow")
+	target := makeRepoRelTempFile(t, "fsstat-symlink-follow-", content)
+	link := target + ".lnk"
+	if err := os.Symlink(filepath.Base(target), link); err != nil {
+		t.Fatalf("symlink: %v", err)
+	}
+	t.Cleanup(func() {
+		if err := os.Remove(link); err != nil {
+			t.Logf("cleanup remove %s: %v", link, err)
+		}
+	})
+
+	out, stderr, code := runFsStat(t, bin, map[string]any{
+		"path":           link,
+		"followSymlinks": true,
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if !out.Exists {
+		t.Fatalf("expected exists=true, got false")
+	}
+	if out.Type != "file" {
+		t.Fatalf("expected type=file when following, got %q", out.Type)
+	}
+	if out.SizeBytes != int64(len(content)) {
+		t.Fatalf("sizeBytes mismatch: got %d want %d", out.SizeBytes, len(content))
+	}
+}
+
+// TestFsStat_SHA256 verifies that when hash="sha256" and the path is a regular
+// file, the tool includes the SHA256 hex digest in the output.
+func TestFsStat_SHA256(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_stat")
+
+	content := []byte("sha256-content\n")
+	path := makeRepoRelTempFile(t, "fsstat-sha256-", content)
+
+	out, stderr, code := runFsStat(t, bin, map[string]any{
+		"path": path,
+		"hash": "sha256",
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.SHA256 == "" {
+		t.Fatalf("expected sha256 present")
+	}
+}
+
+// TestFsStat_ErrorJSONContract verifies standardized error contract: on failure,
+// the tool writes a single-line JSON object with an "error" key to stderr and
+// exits non-zero (umbrella L177).
+func TestFsStat_ErrorJSONContract(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_stat")
+	var stdout, stderr bytes.Buffer
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader([]byte(`{}`))
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err := cmd.Run()
+	if err == nil {
+		t.Fatalf("expected non-zero exit for invalid input; stderr=%q", stderr.String())
+	}
+	line := strings.TrimSpace(stderr.String())
+	var obj map[string]any
+	if jerr := json.Unmarshal([]byte(line), &obj); jerr != nil {
+		t.Fatalf("stderr is not JSON: %q err=%v", line, jerr)
+	}
+	if _, ok := obj["error"]; !ok {
+		t.Fatalf("stderr JSON missing 'error' key: %v", obj)
+	}
+}
diff --git a/tools/cmd/fs_write_file/fs_write_file.go b/tools/cmd/fs_write_file/fs_write_file.go
new file mode 100644
index 0000000..6dc3676
--- /dev/null
+++ b/tools/cmd/fs_write_file/fs_write_file.go
@@ -0,0 +1,128 @@
+package main
+
+import (
+	"bufio"
+	"encoding/base64"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+type writeInput struct {
+	Path            string `json:"path"`
+	ContentBase64   string `json:"contentBase64"`
+	CreateModeOctal string `json:"createModeOctal,omitempty"`
+}
+
+type writeOutput struct {
+	BytesWritten int `json:"bytesWritten"`
+}
+
+func main() {
+	in, err := readInput(os.Stdin)
+	if err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	if err := validatePath(in.Path); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	data, err := base64.StdEncoding.DecodeString(in.ContentBase64)
+	if err != nil {
+		stderrJSON(fmt.Errorf("BAD_BASE64: %w", err))
+		os.Exit(1)
+	}
+	// Require parent directory to exist; do not create it implicitly
+	parent := filepath.Dir(in.Path)
+	if st, err := os.Stat(parent); err != nil || !st.IsDir() {
+		if err == nil {
+			// exists but not a directory
+			stderrJSON(fmt.Errorf("MISSING_PARENT: %s is not a directory", parent))
+		} else if os.IsNotExist(err) {
+			stderrJSON(fmt.Errorf("MISSING_PARENT: %s", parent))
+		} else {
+			stderrJSON(fmt.Errorf("MISSING_PARENT: %v", err))
+		}
+		os.Exit(1)
+	}
+	mode := os.FileMode(0o644)
+	if strings.TrimSpace(in.CreateModeOctal) != "" {
+		if m, perr := parseOctalMode(in.CreateModeOctal); perr == nil {
+			mode = m
+		}
+	}
+	if err := atomicWriteFile(in.Path, data, mode); err != nil {
+		stderrJSON(err)
+		os.Exit(1)
+	}
+	enc := json.NewEncoder(os.Stdout)
+	if err := enc.Encode(writeOutput{BytesWritten: len(data)}); err != nil {
+		// Ensure a deterministic non-zero exit with stderr JSON on failure
+		stderrJSON(fmt.Errorf("encode stdout: %w", err))
+		os.Exit(1)
+	}
+}
+
+func readInput(r io.Reader) (writeInput, error) {
+	var in writeInput
+	br := bufio.NewReader(r)
+	b, err := io.ReadAll(br)
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if err := json.Unmarshal(b, &in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.Path) == "" {
+		return in, errors.New("path is required")
+	}
+	if strings.TrimSpace(in.ContentBase64) == "" {
+		return in, errors.New("contentBase64 is required")
+	}
+	return in, nil
+}
+
+func validatePath(p string) error {
+	if filepath.IsAbs(p) {
+		return fmt.Errorf("ABSOLUTE_PATH: %s", p)
+	}
+	clean := filepath.ToSlash(filepath.Clean(p))
+	if strings.HasPrefix(clean, "../") || strings.Contains(clean, "/../") {
+		return fmt.Errorf("PATH_ESCAPE: %s", p)
+	}
+	return nil
+}
+
+func parseOctalMode(s string) (os.FileMode, error) {
+	var m uint32
+	_, err := fmt.Sscanf(s, "%o", &m)
+	if err != nil {
+		return 0, err
+	}
+	return os.FileMode(m), nil
+}
+
+func atomicWriteFile(path string, data []byte, mode os.FileMode) error {
+	tmp := path + ".tmp"
+	if err := os.WriteFile(tmp, data, mode); err != nil {
+		return err
+	}
+	if err := os.Rename(tmp, path); err != nil {
+		if rmErr := os.Remove(tmp); rmErr != nil {
+			_ = rmErr // ignore cleanup error
+		}
+		return err
+	}
+	return nil
+}
+
+func stderrJSON(err error) {
+	msg := err.Error()
+	msg = strings.ReplaceAll(msg, "\n", " ")
+	fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+}
diff --git a/tools/cmd/fs_write_file/fs_write_file_test.go b/tools/cmd/fs_write_file/fs_write_file_test.go
new file mode 100644
index 0000000..ae32bf3
--- /dev/null
+++ b/tools/cmd/fs_write_file/fs_write_file_test.go
@@ -0,0 +1,160 @@
+package main
+
+// https://github.com/hyperifyio/goagent/issues/1
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fsWriteOutput struct {
+	BytesWritten int `json:"bytesWritten"`
+}
+
+// runFsWrite runs the built fs_write_file tool with the given JSON input.
+func runFsWrite(t *testing.T, bin string, input any) (fsWriteOutput, string, int) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Dir = "."
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out fsWriteOutput
+	if code == 0 {
+		if err := json.Unmarshal([]byte(strings.TrimSpace(stdout.String())), &out); err != nil {
+			t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+		}
+	}
+	return out, stderr.String(), code
+}
+
+// makeRepoRelTempDir is now provided by tools/testutil.MakeRepoRelTempDir.
+
+func TestFsWrite_CreateText(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_write_file")
+	dir := testutil.MakeRepoRelTempDir(t, "fswrite-text-")
+	path := filepath.Join(dir, "hello.txt")
+	content := []byte("hello world\n")
+	out, stderr, code := runFsWrite(t, bin, map[string]any{
+		"path":          path,
+		"contentBase64": base64.StdEncoding.EncodeToString(content),
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.BytesWritten != len(content) {
+		t.Fatalf("bytesWritten mismatch: got %d want %d", out.BytesWritten, len(content))
+	}
+	readBack, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	if !bytes.Equal(readBack, content) {
+		t.Fatalf("content mismatch: got %q want %q", readBack, content)
+	}
+}
+
+func TestFsWrite_Overwrite(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_write_file")
+	dir := testutil.MakeRepoRelTempDir(t, "fswrite-over-")
+	path := filepath.Join(dir, "data.bin")
+	// Seed with initial content
+	if err := os.WriteFile(path, []byte("old"), 0o644); err != nil {
+		t.Fatalf("seed write: %v", err)
+	}
+	newContent := []byte("new-content")
+	out, stderr, code := runFsWrite(t, bin, map[string]any{
+		"path":          path,
+		"contentBase64": base64.StdEncoding.EncodeToString(newContent),
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.BytesWritten != len(newContent) {
+		t.Fatalf("bytesWritten mismatch: got %d want %d", out.BytesWritten, len(newContent))
+	}
+	readBack, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	if !bytes.Equal(readBack, newContent) {
+		t.Fatalf("overwrite failed: got %q want %q", readBack, newContent)
+	}
+}
+
+func TestFsWrite_Binary(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_write_file")
+	dir := testutil.MakeRepoRelTempDir(t, "fswrite-bin-")
+	path := filepath.Join(dir, "bytes.bin")
+	data := []byte{0x00, 0x10, 0xFF, 0x42, 0x00}
+	out, stderr, code := runFsWrite(t, bin, map[string]any{
+		"path":          path,
+		"contentBase64": base64.StdEncoding.EncodeToString(data),
+	})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.BytesWritten != len(data) {
+		t.Fatalf("bytesWritten mismatch: got %d want %d", out.BytesWritten, len(data))
+	}
+	readBack, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read back: %v", err)
+	}
+	if !bytes.Equal(readBack, data) {
+		t.Fatalf("binary mismatch: got %v want %v", readBack, data)
+	}
+}
+
+func TestFsWrite_MissingParent(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_write_file")
+	path := filepath.Join("no_such_parent_dir", "x", "file.txt")
+	_, stderr, code := runFsWrite(t, bin, map[string]any{
+		"path":          path,
+		"contentBase64": base64.StdEncoding.EncodeToString([]byte("x")),
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for missing parent")
+	}
+	if !strings.Contains(strings.ToUpper(stderr), "MISSING_PARENT") {
+		t.Fatalf("stderr should contain MISSING_PARENT, got %q", stderr)
+	}
+}
+
+// TestFsWrite_ErrorJSON_PathRequired verifies standardized error contract on missing required fields.
+func TestFsWrite_ErrorJSON_PathRequired(t *testing.T) {
+	bin := testutil.BuildTool(t, "fs_write_file")
+	// Omit path to trigger validation error in readInput
+	_, stderr, code := runFsWrite(t, bin, map[string]any{
+		"contentBase64": base64.StdEncoding.EncodeToString([]byte("hello")),
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit code for missing path")
+	}
+	s := strings.TrimSpace(stderr)
+	if s == "" || !strings.Contains(s, "\"error\"") {
+		t.Fatalf("stderr should contain JSON with 'error' field, got: %q", stderr)
+	}
+}
diff --git a/tools/cmd/get_time/get_time.go b/tools/cmd/get_time/get_time.go
new file mode 100644
index 0000000..5dcda2f
--- /dev/null
+++ b/tools/cmd/get_time/get_time.go
@@ -0,0 +1,68 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+	"time"
+)
+
+type input struct {
+	Timezone string `json:"timezone"`
+	// Backward-compatible alias
+	TZ string `json:"tz"`
+}
+
+type output struct {
+	Timezone string `json:"timezone"`
+	ISO8601  string `json:"iso8601"`
+}
+
+func main() {
+	inBytes, err := io.ReadAll(os.Stdin)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "{\"error\":\"read stdin: %s\"}\n", escape(err.Error()))
+		os.Exit(1)
+	}
+	var in input
+	if len(strings.TrimSpace(string(inBytes))) == 0 {
+		inBytes = []byte("{}")
+	}
+	if err := json.Unmarshal(inBytes, &in); err != nil {
+		fmt.Fprintf(os.Stderr, "{\"error\":\"bad json: %s\"}\n", escape(err.Error()))
+		os.Exit(1)
+	}
+	// Prefer canonical 'timezone'; allow backward-compatible alias 'tz'
+	if strings.TrimSpace(in.Timezone) == "" && strings.TrimSpace(in.TZ) != "" {
+		in.Timezone = in.TZ
+	}
+	if strings.TrimSpace(in.Timezone) == "" {
+		fmt.Fprintf(os.Stderr, "{\"error\":\"missing timezone\"}\n")
+		os.Exit(1)
+	}
+	loc, err := time.LoadLocation(in.Timezone)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "{\"error\":\"invalid timezone: %s\"}\n", escape(err.Error()))
+		os.Exit(1)
+	}
+	now := time.Now().In(loc).Format(time.RFC3339)
+	out := output{Timezone: in.Timezone, ISO8601: now}
+	b, err := json.Marshal(out)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "{\"error\":\"marshal: %s\"}\n", escape(err.Error()))
+		os.Exit(1)
+	}
+	if _, err := os.Stdout.Write(append(b, '\n')); err != nil {
+		fmt.Fprintf(os.Stderr, "{\"error\":\"write stdout: %s\"}\n", escape(err.Error()))
+		os.Exit(1)
+	}
+}
+
+func escape(s string) string {
+	s = strings.ReplaceAll(s, "\\", "\\\\")
+	s = strings.ReplaceAll(s, "\"", "\\\"")
+	s = strings.ReplaceAll(s, "\n", " ")
+	return s
+}
diff --git a/tools/cmd/get_time/get_time_test.go b/tools/cmd/get_time/get_time_test.go
new file mode 100644
index 0000000..2c8397b
--- /dev/null
+++ b/tools/cmd/get_time/get_time_test.go
@@ -0,0 +1,109 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+	"time"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type timeOutput struct {
+	Timezone string `json:"timezone"`
+	ISO8601  string `json:"iso8601"`
+}
+
+func runTimeTool(t *testing.T, bin string, input any) (timeOutput, string, int) {
+	t.Helper()
+	b, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(b)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	code := 0
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	var out timeOutput
+	if err := json.Unmarshal([]byte(strings.TrimSpace(stdout.String())), &out); err != nil && code == 0 {
+		t.Fatalf("unmarshal stdout: %v; raw=%q", err, stdout.String())
+	}
+	return out, stderr.String(), code
+}
+
+func TestTimeCLI_AcceptsTimezoneAndOutputsISO8601(t *testing.T) {
+	bin := testutil.BuildTool(t, "get_time")
+	out, stderr, code := runTimeTool(t, bin, map[string]any{"timezone": "UTC"})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.Timezone != "UTC" || out.ISO8601 == "" {
+		t.Fatalf("unexpected output: %+v", out)
+	}
+	if _, err := time.Parse(time.RFC3339, out.ISO8601); err != nil {
+		t.Fatalf("iso8601 not RFC3339: %v", err)
+	}
+}
+
+func TestTimeCLI_AcceptsAliasTZ(t *testing.T) {
+	bin := testutil.BuildTool(t, "get_time")
+	out, stderr, code := runTimeTool(t, bin, map[string]any{"tz": "Europe/Helsinki"})
+	if code != 0 {
+		t.Fatalf("expected success, got exit=%d stderr=%q", code, stderr)
+	}
+	if out.Timezone != "Europe/Helsinki" || out.ISO8601 == "" {
+		t.Fatalf("unexpected output: %+v", out)
+	}
+	if _, err := time.Parse(time.RFC3339, out.ISO8601); err != nil {
+		t.Fatalf("iso8601 not RFC3339: %v", err)
+	}
+}
+
+func TestTimeCLI_MissingTimezone_ErrorContract(t *testing.T) {
+	bin := testutil.BuildTool(t, "get_time")
+	out, stderr, code := runTimeTool(t, bin, map[string]any{})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for missing timezone, got 0; stderr=%q", stderr)
+	}
+	if out.Timezone != "" || out.ISO8601 != "" {
+		t.Fatalf("stdout should be empty on error, got: %+v", out)
+	}
+	s := strings.TrimSpace(stderr)
+	if s == "" || !strings.Contains(s, "\"error\"") {
+		t.Fatalf("stderr should contain JSON error, got: %q", stderr)
+	}
+}
+
+func TestTimeCLI_InvalidTimezone_ErrorContract(t *testing.T) {
+	bin := testutil.BuildTool(t, "get_time")
+	out, stderr, code := runTimeTool(t, bin, map[string]any{"timezone": "Not/AZone"})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for invalid timezone, got 0; stderr=%q", stderr)
+	}
+	if out.Timezone != "" || out.ISO8601 != "" {
+		t.Fatalf("stdout should be empty on error, got: %+v", out)
+	}
+	s := strings.TrimSpace(stderr)
+	if s == "" || !strings.Contains(s, "\"error\"") {
+		t.Fatalf("stderr should contain JSON error, got: %q", stderr)
+	}
+}
+
+func TestToolbeltDiagramExists(t *testing.T) {
+	if _, err := os.Stat("../../../docs/diagrams/toolbelt-seq.md"); err != nil {
+		t.Fatalf("missing docs/diagrams/toolbelt-seq.md: %v", err)
+	}
+}
diff --git a/tools/cmd/github_search/github_search.go b/tools/cmd/github_search/github_search.go
new file mode 100644
index 0000000..b10ec4b
--- /dev/null
+++ b/tools/cmd/github_search/github_search.go
@@ -0,0 +1,437 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+)
+
+type input struct {
+	Q       string `json:"q"`
+	Type    string `json:"type"`
+	PerPage int    `json:"per_page"`
+}
+
+type rateInfo struct {
+	Remaining int `json:"remaining"`
+	Reset     int `json:"reset"`
+}
+
+type output struct {
+	Results []map[string]any `json:"results"`
+	Rate    rateInfo         `json:"rate"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		var he *hintedError
+		if errors.As(err, &he) {
+			msg := strings.ReplaceAll(he.err.Error(), "\n", " ")
+			hint := strings.ReplaceAll(he.hint, "\n", " ")
+			if hint != "" {
+				fmt.Fprintf(os.Stderr, "{\"error\":%q,\"hint\":%q}\n", msg, hint)
+			} else {
+				fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+			}
+		} else {
+			msg := strings.ReplaceAll(err.Error(), "\n", " ")
+			fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		}
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	in, err := decodeInput()
+	if err != nil {
+		return err
+	}
+	t := strings.ToLower(strings.TrimSpace(in.Type))
+	switch t {
+	case "repositories", "code", "issues", "commits":
+	default:
+		return errors.New("type must be one of: repositories, code, issues, commits")
+	}
+	if strings.TrimSpace(in.Q) == "" {
+		return errors.New("q is required")
+	}
+
+	baseURL, reqURL, err := prepareURLs(t, in.Q, in.PerPage)
+	if err != nil {
+		return err
+	}
+
+	client := newHTTPClient(resolveTimeout())
+	var lastStatus int
+	var retries int
+	start := time.Now()
+	var body map[string]any
+	for attempt := 0; attempt < 2; attempt++ {
+		if err := ssrfGuard(baseURL); err != nil {
+			return err
+		}
+		req, err := http.NewRequest(http.MethodGet, reqURL.String(), nil)
+		if err != nil {
+			return fmt.Errorf("new request: %w", err)
+		}
+		req.Header.Set("User-Agent", "agentcli-github-search/0.1")
+		if tok := strings.TrimSpace(os.Getenv("GITHUB_TOKEN")); tok != "" {
+			req.Header.Set("Authorization", "Bearer "+tok)
+		}
+		req.Header.Set("Accept", "application/vnd.github+json")
+
+		resp, err := client.Do(req)
+		if err != nil {
+			if isTimeout(err) && attempt == 0 {
+				retries++
+				backoffSleep(0, attempt)
+				continue
+			}
+			return fmt.Errorf("http: %w", err)
+		}
+		lastStatus = resp.StatusCode
+
+		rate := parseRate(resp.Header)
+
+		dec := json.NewDecoder(bufio.NewReader(resp.Body))
+		if resp.StatusCode >= 500 && attempt == 0 {
+			_ = resp.Body.Close() //nolint:errcheck
+			retries++
+			backoffSleep(0, attempt)
+			continue
+		}
+		if err := dec.Decode(&body); err != nil {
+			_ = resp.Body.Close() //nolint:errcheck
+			if resp.StatusCode >= 500 && attempt == 0 {
+				retries++
+				backoffSleep(0, attempt)
+				continue
+			}
+			return fmt.Errorf("decode json: %w", err)
+		}
+		_ = resp.Body.Close() //nolint:errcheck
+
+		if rate.Remaining == 0 {
+			return hinted(errors.New("RATE_LIMITED"), "use GITHUB_TOKEN")
+		}
+
+		var items []any
+		if v, ok := body["items"].([]any); ok {
+			items = v
+		}
+		results := mapResults(t, items)
+		out := output{Results: results, Rate: rate}
+		if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+			return fmt.Errorf("encode json: %w", err)
+		}
+		_ = appendAudit(map[string]any{ //nolint:errcheck
+			"ts":       time.Now().UTC().Format(time.RFC3339Nano),
+			"tool":     "github_search",
+			"url_host": baseURL.Hostname(),
+			"type":     t,
+			"status":   lastStatus,
+			"ms":       time.Since(start).Milliseconds(),
+			"retries":  retries,
+			"query":    truncateQuery(in.Q),
+		})
+		return nil
+	}
+	return fmt.Errorf("unexpected retry exhaustion; last status %d", lastStatus)
+}
+
+func decodeInput() (input, error) {
+	var in input
+	dec := json.NewDecoder(bufio.NewReader(os.Stdin))
+	if err := dec.Decode(&in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	return in, nil
+}
+
+func prepareURLs(t string, q string, perPage int) (*url.URL, *url.URL, error) {
+	base := strings.TrimSpace(os.Getenv("GITHUB_BASE_URL"))
+	if base == "" {
+		base = "https://api.github.com"
+	}
+	baseURL, err := url.Parse(base)
+	if err != nil || (baseURL.Scheme != "http" && baseURL.Scheme != "https") {
+		return nil, nil, errors.New("GITHUB_BASE_URL must be a valid http/https URL")
+	}
+	if err := ssrfGuard(baseURL); err != nil {
+		return nil, nil, err
+	}
+	reqURL, err := url.Parse(baseURL.String())
+	if err != nil {
+		return nil, nil, err
+	}
+	reqURL.Path = strings.TrimRight(reqURL.Path, "/") + "/search/" + t
+	query := reqURL.Query()
+	query.Set("q", q)
+	if perPage <= 0 {
+		perPage = 10
+	}
+	if perPage > 50 {
+		perPage = 50
+	}
+	query.Set("per_page", strconv.Itoa(perPage))
+	reqURL.RawQuery = query.Encode()
+	return baseURL, reqURL, nil
+}
+
+func mapResults(t string, items []any) []map[string]any {
+	out := make([]map[string]any, 0, len(items))
+	for _, it := range items {
+		m, ok := it.(map[string]any)
+		if !ok {
+			continue
+		}
+		switch t {
+		case "repositories":
+			out = append(out, mapRepoItem(m))
+		case "code":
+			out = append(out, mapCodeItem(m))
+		case "issues":
+			out = append(out, mapIssueItem(m))
+		case "commits":
+			out = append(out, mapCommitItem(m))
+		}
+	}
+	return out
+}
+
+func mapRepoItem(m map[string]any) map[string]any {
+	row := map[string]any{}
+	if v, ok := m["full_name"].(string); ok {
+		row["full_name"] = v
+	}
+	if v, ok := m["html_url"].(string); ok {
+		row["url"] = v
+	}
+	if v, ok := m["description"].(string); ok {
+		row["description"] = v
+	}
+	if v, ok := m["stargazers_count"].(float64); ok {
+		row["stars"] = int(v)
+	}
+	return row
+}
+
+func mapCodeItem(m map[string]any) map[string]any {
+	row := map[string]any{}
+	if v, ok := m["name"].(string); ok {
+		row["name"] = v
+	}
+	if v, ok := m["path"].(string); ok {
+		row["path"] = v
+	}
+	if repo, ok := m["repository"].(map[string]any); ok {
+		if fn, ok := repo["full_name"].(string); ok {
+			row["repository"] = fn
+		}
+		if u, ok := repo["html_url"].(string); ok {
+			row["repo_url"] = u
+		}
+	}
+	if v, ok := m["html_url"].(string); ok {
+		row["url"] = v
+	}
+	return row
+}
+
+func mapIssueItem(m map[string]any) map[string]any {
+	row := map[string]any{}
+	if v, ok := m["title"].(string); ok {
+		row["title"] = v
+	}
+	if v, ok := m["html_url"].(string); ok {
+		row["url"] = v
+	}
+	if v, ok := m["state"].(string); ok {
+		row["state"] = v
+	}
+	return row
+}
+
+func mapCommitItem(m map[string]any) map[string]any {
+	row := map[string]any{}
+	if v, ok := m["sha"].(string); ok {
+		row["sha"] = v
+	}
+	if v, ok := m["html_url"].(string); ok {
+		row["url"] = v
+	} else if v, ok := m["url"].(string); ok {
+		row["url"] = v
+	}
+	if commit, ok := m["commit"].(map[string]any); ok {
+		if msg, ok := commit["message"].(string); ok {
+			row["message"] = msg
+		}
+	}
+	return row
+}
+
+func parseRate(h http.Header) rateInfo {
+	r := rateInfo{Remaining: -1, Reset: 0}
+	if v := strings.TrimSpace(h.Get("X-RateLimit-Remaining")); v != "" {
+		if n, err := strconv.Atoi(v); err == nil {
+			r.Remaining = n
+		}
+	}
+	if v := strings.TrimSpace(h.Get("X-RateLimit-Reset")); v != "" {
+		if n, err := strconv.Atoi(v); err == nil {
+			r.Reset = n
+		}
+	}
+	return r
+}
+
+func resolveTimeout() time.Duration {
+	if v := strings.TrimSpace(os.Getenv("HTTP_TIMEOUT_MS")); v != "" {
+		if ms, err := time.ParseDuration(v + "ms"); err == nil && ms > 0 {
+			return ms
+		}
+	}
+	return 8 * time.Second
+}
+
+func newHTTPClient(timeout time.Duration) *http.Client {
+	tr := &http.Transport{}
+	return &http.Client{Timeout: timeout, Transport: tr, CheckRedirect: func(req *http.Request, via []*http.Request) error {
+		if len(via) >= 5 {
+			return errors.New("too many redirects")
+		}
+		return ssrfGuard(req.URL)
+	}}
+}
+
+func isTimeout(err error) bool {
+	var ne net.Error
+	return errors.As(err, &ne) && ne.Timeout()
+}
+
+func backoffSleep(_ int64, attempt int) {
+	time.Sleep(time.Duration(100*(attempt+1)) * time.Millisecond)
+}
+
+func ssrfGuard(u *url.URL) error {
+	host := u.Hostname()
+	if host == "" {
+		return errors.New("invalid host")
+	}
+	if strings.HasSuffix(strings.ToLower(host), ".onion") {
+		return errors.New("SSRF blocked: onion domains are not allowed")
+	}
+	if os.Getenv("GITHUB_ALLOW_LOCAL") == "1" {
+		return nil
+	}
+	ips, err := net.LookupIP(host)
+	if err != nil || len(ips) == 0 {
+		return errors.New("SSRF blocked: cannot resolve host")
+	}
+	for _, ip := range ips {
+		if isPrivateIP(ip) {
+			return errors.New("SSRF blocked: private or loopback address")
+		}
+	}
+	return nil
+}
+
+func isPrivateIP(ip net.IP) bool {
+	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+		return true
+	}
+	if v4 := ip.To4(); v4 != nil {
+		ip = v4
+		if v4[0] == 10 {
+			return true
+		}
+		if v4[0] == 172 && v4[1]&0xf0 == 16 {
+			return true
+		}
+		if v4[0] == 192 && v4[1] == 168 {
+			return true
+		}
+		if v4[0] == 169 && v4[1] == 254 {
+			return true
+		}
+		if v4[0] == 127 {
+			return true
+		}
+		return false
+	}
+	if ip.Equal(net.ParseIP("::1")) {
+		return true
+	}
+	if ip[0] == 0xfe && (ip[1]&0xc0) == 0x80 {
+		return true
+	}
+	if ip[0]&0xfe == 0xfc {
+		return true
+	}
+	return false
+}
+
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
+
+type hintedError struct {
+	err  error
+	hint string
+}
+
+func (h *hintedError) Error() string      { return h.err.Error() }
+func hinted(err error, hint string) error { return &hintedError{err: err, hint: hint} }
+
+func truncateQuery(q string) any {
+	if len(q) <= 256 {
+		return q
+	}
+	return map[string]any{"prefix": q[:256], "query_truncated": true}
+}
diff --git a/tools/cmd/github_search/github_search_test.go b/tools/cmd/github_search/github_search_test.go
new file mode 100644
index 0000000..7aaa214
--- /dev/null
+++ b/tools/cmd/github_search/github_search_test.go
@@ -0,0 +1,116 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"strings"
+	"sync/atomic"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+func runTool(t *testing.T, bin string, env []string, input any) (string, string, error) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	cmd.Env = env
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	runErr := cmd.Run()
+	return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), runErr
+}
+
+func TestGithubSearch_RepositoriesSuccess(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/search/repositories" {
+			http.Error(w, "bad path", http.StatusBadRequest)
+			return
+		}
+		if r.URL.Query().Get("q") == "" {
+			http.Error(w, "missing q", http.StatusBadRequest)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		w.Header().Set("X-RateLimit-Remaining", "4999")
+		w.Header().Set("X-RateLimit-Reset", "12345")
+		if _, err := w.Write([]byte(`{"items":[{"full_name":"foo/bar","html_url":"https://github.com/foo/bar","description":"desc","stargazers_count":42}]}`)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "github_search")
+	env := append(os.Environ(), "GITHUB_BASE_URL="+srv.URL, "GITHUB_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "golang", "type": "repositories"})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"full_name\":\"foo/bar\"") {
+		t.Fatalf("missing repo in output: %s", outStr)
+	}
+	if !strings.Contains(outStr, "\"rate\":{") || !strings.Contains(outStr, "\"remaining\":4999") {
+		t.Fatalf("missing rate info: %s", outStr)
+	}
+}
+
+func TestGithubSearch_RateLimited(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.Header().Set("X-RateLimit-Remaining", "0")
+		w.Header().Set("X-RateLimit-Reset", "12345")
+		if _, err := w.Write([]byte(`{"items":[]}`)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "github_search")
+	env := append(os.Environ(), "GITHUB_BASE_URL="+srv.URL, "GITHUB_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "x", "type": "code"})
+	if err == nil {
+		t.Fatalf("expected error due to rate limiting, got ok: %s", outStr)
+	}
+	if !strings.Contains(errStr, "RATE_LIMITED") || !strings.Contains(errStr, "use GITHUB_TOKEN") {
+		t.Fatalf("expected RATE_LIMITED with hint, got: %s", errStr)
+	}
+}
+
+func TestGithubSearch_Retry5xxThenSuccess(t *testing.T) {
+	var calls int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		c := atomic.AddInt32(&calls, 1)
+		if c == 1 {
+			w.WriteHeader(http.StatusBadGateway)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		w.Header().Set("X-RateLimit-Remaining", "10")
+		if _, err := w.Write([]byte(`{"items":[{"full_name":"a/b","html_url":"https://github.com/a/b"}]}`)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "github_search")
+	env := append(os.Environ(), "GITHUB_BASE_URL="+srv.URL, "GITHUB_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "q", "type": "repositories"})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if atomic.LoadInt32(&calls) < 2 {
+		t.Fatalf("expected at least 2 calls, got %d", calls)
+	}
+	if !strings.Contains(outStr, "\"full_name\":\"a/b\"") {
+		t.Fatalf("missing repo: %s", outStr)
+	}
+}
diff --git a/tools/cmd/http_fetch/http_fetch.go b/tools/cmd/http_fetch/http_fetch.go
new file mode 100644
index 0000000..1cb1cb3
--- /dev/null
+++ b/tools/cmd/http_fetch/http_fetch.go
@@ -0,0 +1,297 @@
+package main
+
+import (
+	"bufio"
+	"encoding/base64"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+type input struct {
+	URL        string `json:"url"`
+	Method     string `json:"method"`
+	MaxBytes   int    `json:"max_bytes"`
+	TimeoutMs  int    `json:"timeout_ms"`
+	Decompress *bool  `json:"decompress"`
+}
+
+type output struct {
+	Status     int               `json:"status"`
+	Headers    map[string]string `json:"headers"`
+	BodyBase64 string            `json:"body_base64,omitempty"`
+	Truncated  bool              `json:"truncated"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		msg := strings.ReplaceAll(err.Error(), "\n", " ")
+		fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	in, err := decodeInput()
+	if err != nil {
+		return err
+	}
+	method, u, maxBytes, timeout, decompress, err := prepareRequestParams(in)
+	if err != nil {
+		return err
+	}
+	client := newHTTPClient(timeout, decompress)
+	req, err := http.NewRequest(method, in.URL, nil)
+	if err != nil {
+		return fmt.Errorf("new request: %w", err)
+	}
+	req.Header.Set("User-Agent", "agentcli-http-fetch/0.1")
+
+	start := time.Now()
+	resp, err := client.Do(req)
+	if err != nil {
+		return fmt.Errorf("http: %w", err)
+	}
+	defer func() {
+		_ = resp.Body.Close() //nolint:errcheck
+	}()
+
+	headers := collectHeaders(resp.Header)
+	bodyB64, truncated, bodyBytes, err := maybeReadBody(method, resp.Body, maxBytes)
+	if err != nil {
+		return err
+	}
+
+	out := output{Status: resp.StatusCode, Headers: headers, BodyBase64: bodyB64, Truncated: truncated}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		return fmt.Errorf("encode json: %w", err)
+	}
+	// Best-effort audit. Failures are ignored.
+	_ = appendAudit(map[string]any{ //nolint:errcheck
+		"ts":        time.Now().UTC().Format(time.RFC3339Nano),
+		"tool":      "http_fetch",
+		"url_host":  u.Hostname(),
+		"status":    resp.StatusCode,
+		"bytes":     bodyBytes,
+		"truncated": truncated,
+		"ms":        time.Since(start).Milliseconds(),
+	})
+	return nil
+}
+
+func decodeInput() (input, error) {
+	var in input
+	dec := json.NewDecoder(bufio.NewReader(os.Stdin))
+	if err := dec.Decode(&in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	return in, nil
+}
+
+func prepareRequestParams(in input) (method string, u *url.URL, maxBytes int, timeout time.Duration, decompress bool, err error) {
+	if strings.TrimSpace(in.URL) == "" {
+		return "", nil, 0, 0, false, errors.New("url is required")
+	}
+	u, err = url.Parse(in.URL)
+	if err != nil || (u.Scheme != "http" && u.Scheme != "https") {
+		return "", nil, 0, 0, false, errors.New("only http/https are allowed")
+	}
+	method = strings.ToUpper(strings.TrimSpace(in.Method))
+	if method == "" {
+		method = http.MethodGet
+	}
+	if method != http.MethodGet && method != http.MethodHead {
+		return "", nil, 0, 0, false, errors.New("method must be GET or HEAD")
+	}
+	maxBytes = in.MaxBytes
+	if maxBytes <= 0 {
+		maxBytes = 1 << 20 // default 1 MiB
+	}
+	timeout = resolveTimeout(in.TimeoutMs)
+	// Enforce SSRF guard before any request and on every redirect target.
+	if err = ssrfGuard(u); err != nil {
+		return "", nil, 0, 0, false, err
+	}
+	decompress = true
+	if in.Decompress != nil {
+		decompress = *in.Decompress
+	}
+	return method, u, maxBytes, timeout, decompress, nil
+}
+
+func resolveTimeout(timeoutMs int) time.Duration {
+	timeout := time.Duration(timeoutMs) * time.Millisecond
+	if timeout > 0 {
+		return timeout
+	}
+	if v := strings.TrimSpace(os.Getenv("HTTP_TIMEOUT_MS")); v != "" {
+		if ms, perr := time.ParseDuration(v + "ms"); perr == nil {
+			timeout = ms
+		}
+	}
+	if timeout <= 0 {
+		timeout = 10 * time.Second
+	}
+	return timeout
+}
+
+func newHTTPClient(timeout time.Duration, decompress bool) *http.Client {
+	tr := &http.Transport{DisableCompression: !decompress}
+	return &http.Client{Timeout: timeout, Transport: tr, CheckRedirect: func(req *http.Request, via []*http.Request) error {
+		if len(via) >= 5 {
+			return errors.New("too many redirects")
+		}
+		return ssrfGuard(req.URL)
+	}}
+}
+
+func collectHeaders(h http.Header) map[string]string {
+	headers := make(map[string]string, len(h))
+	for k, v := range h {
+		if len(v) > 0 {
+			headers[k] = v[0]
+		} else {
+			headers[k] = ""
+		}
+	}
+	return headers
+}
+
+func maybeReadBody(method string, r io.Reader, maxBytes int) (bodyB64 string, truncated bool, bodyBytes int, err error) {
+	if method == http.MethodHead {
+		return "", false, 0, nil
+	}
+	limited := io.LimitedReader{R: r, N: int64(maxBytes) + 1}
+	data, rerr := io.ReadAll(&limited)
+	if rerr != nil {
+		return "", false, 0, fmt.Errorf("read body: %w", rerr)
+	}
+	if int64(len(data)) > int64(maxBytes) {
+		truncated = true
+		data = data[:maxBytes]
+	}
+	bodyBytes = len(data)
+	bodyB64 = base64.StdEncoding.EncodeToString(data)
+	return bodyB64, truncated, bodyBytes, nil
+}
+
+// ssrfGuard blocks requests to loopback, RFC1918, link-local, and ULA addresses,
+// unless HTTP_FETCH_ALLOW_LOCAL=1 is set (only used in tests).
+func ssrfGuard(u *url.URL) error {
+	host := u.Hostname()
+	if host == "" {
+		return errors.New("invalid host")
+	}
+	if strings.HasSuffix(strings.ToLower(host), ".onion") {
+		return errors.New("SSRF blocked: onion domains are not allowed")
+	}
+	if os.Getenv("HTTP_FETCH_ALLOW_LOCAL") == "1" {
+		return nil
+	}
+	ips, err := net.LookupIP(host)
+	if err != nil || len(ips) == 0 {
+		// If DNS fails, be conservative and block
+		return errors.New("SSRF blocked: cannot resolve host")
+	}
+	for _, ip := range ips {
+		if isPrivateIP(ip) {
+			return errors.New("SSRF blocked: private or loopback address")
+		}
+	}
+	return nil
+}
+
+func isPrivateIP(ip net.IP) bool {
+	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+		return true
+	}
+	// Normalize to 16-byte form for IPv4
+	if v4 := ip.To4(); v4 != nil {
+		ip = v4
+		// 10.0.0.0/8
+		if v4[0] == 10 {
+			return true
+		}
+		// 172.16.0.0/12
+		if v4[0] == 172 && v4[1]&0xf0 == 16 {
+			return true
+		}
+		// 192.168.0.0/16
+		if v4[0] == 192 && v4[1] == 168 {
+			return true
+		}
+		// 169.254.0.0/16 link-local
+		if v4[0] == 169 && v4[1] == 254 {
+			return true
+		}
+		// 127.0.0.0/8 loopback handled by IsLoopback but keep explicit
+		if v4[0] == 127 {
+			return true
+		}
+		return false
+	}
+	// IPv6 ranges: ::1 (loopback), fe80::/10 (link-local), fc00::/7 (ULA)
+	if ip.Equal(net.ParseIP("::1")) {
+		return true
+	}
+	// fe80::/10
+	if ip[0] == 0xfe && (ip[1]&0xc0) == 0x80 {
+		return true
+	}
+	// fc00::/7
+	if ip[0]&0xfe == 0xfc {
+		return true
+	}
+	return false
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
diff --git a/tools/cmd/http_fetch/http_fetch_test.go b/tools/cmd/http_fetch/http_fetch_test.go
new file mode 100644
index 0000000..5853e7e
--- /dev/null
+++ b/tools/cmd/http_fetch/http_fetch_test.go
@@ -0,0 +1,268 @@
+package main_test
+
+import (
+	"bytes"
+	"compress/gzip"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+type fetchOutput struct {
+	Status     int               `json:"status"`
+	Headers    map[string]string `json:"headers"`
+	BodyBase64 string            `json:"body_base64,omitempty"`
+	Truncated  bool              `json:"truncated"`
+}
+
+// TestMain enables local SSRF allowance for most tests that rely on httptest servers.
+func TestMain(m *testing.M) {
+	if err := os.Setenv("HTTP_FETCH_ALLOW_LOCAL", "1"); err != nil {
+		panic(err)
+	}
+	os.Exit(m.Run())
+}
+
+func runFetch(t *testing.T, bin string, input any) (fetchOutput, string) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal input: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatalf("http_fetch failed to run: %v, stderr=%s", err, stderr.String())
+	}
+	out := strings.TrimSpace(stdout.String())
+	var parsed fetchOutput
+	if err := json.Unmarshal([]byte(out), &parsed); err != nil {
+		t.Fatalf("failed to parse http_fetch output JSON: %v; raw=%q", err, out)
+	}
+	return parsed, stderr.String()
+}
+
+// TestHttpFetch_Get200_Basic verifies a simple GET returns status, headers, and base64 body without truncation.
+func TestHttpFetch_Get200_Basic(t *testing.T) {
+	// Arrange a test server that returns plain text
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodGet {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+		w.Header().Set("Content-Type", "text/plain; charset=utf-8")
+		w.Header().Set("ETag", "\"abc123\"")
+		if _, err := w.Write([]byte("hello world")); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "http_fetch")
+
+	out, _ := runFetch(t, bin, map[string]any{
+		"url":        srv.URL,
+		"max_bytes":  1 << 20, // 1 MiB cap
+		"timeout_ms": 2000,
+		"decompress": true,
+	})
+
+	if out.Status != 200 {
+		t.Fatalf("expected status 200, got %d", out.Status)
+	}
+	if out.Truncated {
+		t.Fatalf("expected truncated=false")
+	}
+	if ct := out.Headers["Content-Type"]; !strings.HasPrefix(ct, "text/plain") {
+		t.Fatalf("unexpected content-type: %q", ct)
+	}
+	body, err := base64.StdEncoding.DecodeString(out.BodyBase64)
+	if err != nil {
+		t.Fatalf("body_base64 not valid base64: %v", err)
+	}
+	if string(body) != "hello world" {
+		t.Fatalf("unexpected body: %q", string(body))
+	}
+}
+
+// TestHttpFetch_HeadRequest ensures no body is returned and headers are present.
+func TestHttpFetch_HeadRequest(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodHead {
+			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+			return
+		}
+		w.Header().Set("Last-Modified", "Mon, 02 Jan 2006 15:04:05 GMT")
+		w.WriteHeader(204)
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "http_fetch")
+	out, _ := runFetch(t, bin, map[string]any{
+		"url":    srv.URL,
+		"method": "HEAD",
+	})
+	if out.Status != 204 {
+		t.Fatalf("expected 204, got %d", out.Status)
+	}
+	if out.BodyBase64 != "" {
+		t.Fatalf("expected empty body for HEAD, got %q", out.BodyBase64)
+	}
+	if out.Headers["Last-Modified"] == "" {
+		t.Fatalf("expected Last-Modified header present")
+	}
+}
+
+// TestHttpFetch_Redirects_Limited ensures redirects are followed up to 5 and then fail.
+func TestHttpFetch_Redirects_Limited(t *testing.T) {
+	// Chain of 6 redirects
+	mux := http.NewServeMux()
+	for i := 0; i < 6; i++ {
+		idx := i
+		path := fmt.Sprintf("/r%c", 'a'+i)
+		next := fmt.Sprintf("/r%c", 'a'+i+1)
+		mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
+			if idx == 5 {
+				w.WriteHeader(200)
+				if _, err := w.Write([]byte("ok")); err != nil {
+					t.Fatalf("write: %v", err)
+				}
+				return
+			}
+			http.Redirect(w, r, next, http.StatusFound)
+		})
+	}
+	srv := httptest.NewServer(mux)
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "http_fetch")
+	// Expect error due to >5 redirects
+	cmd := exec.Command(bin)
+	in := map[string]any{"url": srv.URL + "/ra", "timeout_ms": 2000}
+	data, err := json.Marshal(in)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	if err == nil {
+		t.Fatalf("expected error after too many redirects")
+	}
+	if !strings.Contains(stderr.String(), "too many redirects") {
+		t.Fatalf("expected too many redirects error, got %q", stderr.String())
+	}
+}
+
+// TestHttpFetch_GzipDecompress checks automatic gzip decoding by default.
+func TestHttpFetch_GzipDecompress(t *testing.T) {
+	gz := func(s string) []byte {
+		var buf bytes.Buffer
+		zw := gzip.NewWriter(&buf)
+		if _, err := zw.Write([]byte(s)); err != nil {
+			t.Fatalf("gzip write: %v", err)
+		}
+		if err := zw.Close(); err != nil {
+			t.Fatalf("gzip close: %v", err)
+		}
+		return buf.Bytes()
+	}
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Encoding", "gzip")
+		if _, err := w.Write(gz("zipper")); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "http_fetch")
+
+	// Default: decompress=true
+	out, _ := runFetch(t, bin, map[string]any{"url": srv.URL})
+	body, err := base64.StdEncoding.DecodeString(out.BodyBase64)
+	if err != nil {
+		t.Fatalf("decode base64: %v", err)
+	}
+	if string(body) != "zipper" {
+		t.Fatalf("expected decompressed body, got %q", string(body))
+	}
+
+	// With decompress=false, expect raw gzip bytes
+	out, _ = runFetch(t, bin, map[string]any{"url": srv.URL, "decompress": false})
+	body, err = base64.StdEncoding.DecodeString(out.BodyBase64)
+	if err != nil {
+		t.Fatalf("decode base64: %v", err)
+	}
+	if string(body) == "zipper" {
+		t.Fatalf("expected raw gzip bytes when decompress=false")
+	}
+}
+
+// TestHttpFetch_Truncation enforces max_bytes cap.
+func TestHttpFetch_Truncation(t *testing.T) {
+	data := strings.Repeat("A", 1024)
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if _, err := w.Write([]byte(data)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "http_fetch")
+	out, _ := runFetch(t, bin, map[string]any{"url": srv.URL, "max_bytes": 100})
+	if !out.Truncated {
+		t.Fatalf("expected truncated=true")
+	}
+	body, err := base64.StdEncoding.DecodeString(out.BodyBase64)
+	if err != nil {
+		t.Fatalf("decode base64: %v", err)
+	}
+	if len(body) != 100 {
+		t.Fatalf("expected 100 bytes, got %d", len(body))
+	}
+}
+
+// TestHttpFetch_SSRF_Block_Localhost ensures SSRF guard blocks localhost by default.
+func TestHttpFetch_SSRF_Block_Localhost(t *testing.T) {
+	bin := testutil.BuildTool(t, "http_fetch")
+	cmd := exec.Command(bin)
+	in := map[string]any{"url": "http://127.0.0.1:9"}
+	data, err := json.Marshal(in)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	// Ensure guard is active
+	// Inherit env but explicitly remove HTTP_FETCH_ALLOW_LOCAL to enforce guard
+	var env []string
+	for _, e := range os.Environ() {
+		if strings.HasPrefix(e, "HTTP_FETCH_ALLOW_LOCAL=") {
+			continue
+		}
+		env = append(env, e)
+	}
+	cmd.Env = env
+	err = cmd.Run()
+	if err == nil {
+		t.Fatalf("expected SSRF block error")
+	}
+	if !strings.Contains(stderr.String(), "SSRF blocked") {
+		t.Fatalf("expected SSRF blocked error, got %q", stderr.String())
+	}
+}
diff --git a/tools/cmd/img_create/img_create.go b/tools/cmd/img_create/img_create.go
new file mode 100644
index 0000000..7268ef7
--- /dev/null
+++ b/tools/cmd/img_create/img_create.go
@@ -0,0 +1,388 @@
+package main
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"strings"
+	"time"
+)
+
+type inputSpec struct {
+	Prompt    string `json:"prompt"`
+	N         int    `json:"n"`
+	Size      string `json:"size"`
+	Model     string `json:"model"`
+	ReturnB64 bool   `json:"return_b64"`
+	// Optional extras that are shallow-merged into the request body
+	// after validation as string->primitive. Unknown or non-primitive
+	// values are dropped. Core keys (model, prompt, n, size, response_format)
+	// are never overridden by extras.
+	Extras map[string]any `json:"extras"`
+	Save   *struct {
+		Dir      string `json:"dir"`
+		Basename string `json:"basename"`
+		Ext      string `json:"ext"`
+	} `json:"save"`
+}
+
+var sizeRe = regexp.MustCompile(`^\d{3,4}x\d{3,4}$`)
+
+func main() {
+	if err := run(); err != nil {
+		msg := strings.TrimSpace(err.Error())
+		// Best-effort error reporting to stderr in JSON; ignore encode errors
+		// nolint below: best-effort reporting; failures are non-fatal as we exit non-zero
+		_ = json.NewEncoder(os.Stderr).Encode(map[string]string{"error": msg}) //nolint:errcheck
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	// Parse and validate input
+	in, err := parseInput(os.Stdin)
+	if err != nil {
+		return err
+	}
+	// Build request body
+	bodyBytes, err := buildRequestBody(in)
+	if err != nil {
+		return err
+	}
+	// Perform HTTP request with limited retries
+	respBody, model, err := doRequest(bodyBytes)
+	if err != nil {
+		return err
+	}
+	// Format output (either save files or return b64)
+	return produceOutput(in, respBody, model)
+}
+
+// parseInput reads JSON from r and returns a validated inputSpec.
+func parseInput(r io.Reader) (inputSpec, error) {
+	var in inputSpec
+	data, err := io.ReadAll(r)
+	if err != nil {
+		return in, fmt.Errorf("read stdin: %w", err)
+	}
+	if len(strings.TrimSpace(string(data))) == 0 {
+		return in, errors.New("missing json input")
+	}
+	if err := json.Unmarshal(data, &in); err != nil {
+		return in, fmt.Errorf("bad json: %w", err)
+	}
+	if strings.TrimSpace(in.Prompt) == "" {
+		return in, errors.New("prompt is required")
+	}
+	if in.N == 0 {
+		in.N = 1
+	}
+	if in.N < 1 || in.N > 4 {
+		return in, errors.New("n must be between 1 and 4")
+	}
+	if in.Size == "" {
+		in.Size = "1024x1024"
+	}
+	if !sizeRe.MatchString(in.Size) {
+		return in, errors.New("size must match ^\\d{3,4}x\\d{3,4}$")
+	}
+	if in.Model == "" {
+		in.Model = "gpt-image-1"
+	}
+	if !in.ReturnB64 {
+		if in.Save == nil || strings.TrimSpace(in.Save.Dir) == "" {
+			return in, errors.New("save.dir is required when return_b64=false")
+		}
+		if filepath.IsAbs(in.Save.Dir) {
+			return in, errors.New("save.dir must be repo-relative")
+		}
+		clean := filepath.Clean(in.Save.Dir)
+		if strings.HasPrefix(clean, "..") {
+			return in, errors.New("save.dir escapes repository root")
+		}
+		if in.Save.Basename == "" {
+			in.Save.Basename = "img"
+		}
+		if in.Save.Ext == "" {
+			in.Save.Ext = "png"
+		}
+		if in.Save.Ext != "png" {
+			return in, errors.New("ext must be 'png'")
+		}
+	}
+	return in, nil
+}
+
+// buildRequestBody creates the JSON body for the Images API.
+func buildRequestBody(in inputSpec) ([]byte, error) {
+	reqBody := map[string]any{
+		"model":           in.Model,
+		"prompt":          in.Prompt,
+		"n":               in.N,
+		"size":            in.Size,
+		"response_format": "b64_json",
+	}
+	if len(in.Extras) > 0 {
+		safe := sanitizeExtras(in.Extras)
+		for k, v := range safe {
+			switch k {
+			case "model", "prompt", "n", "size", "response_format":
+			default:
+				reqBody[k] = v
+			}
+		}
+	}
+	b, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+	return b, nil
+}
+
+// doRequest posts to the Images API with retries and returns body and model.
+func doRequest(bodyBytes []byte) ([]byte, string, error) {
+	baseURL := strings.TrimRight(firstNonEmpty(os.Getenv("OAI_IMAGE_BASE_URL"), os.Getenv("OAI_BASE_URL"), ""), "/")
+	if baseURL == "" {
+		return nil, "", errors.New("missing OAI_IMAGE_BASE_URL or OAI_BASE_URL")
+	}
+	url := baseURL + "/v1/images/generations"
+	client := &http.Client{Timeout: httpTimeout()}
+	var lastErr error
+	var resp *http.Response
+	for attempt := 0; attempt < 3; attempt++ {
+		req, err := http.NewRequest("POST", url, bytes.NewReader(bodyBytes))
+		if err != nil {
+			return nil, "", fmt.Errorf("new request: %w", err)
+		}
+		req.Header.Set("Content-Type", "application/json")
+		if key := strings.TrimSpace(os.Getenv("OAI_API_KEY")); key != "" {
+			req.Header.Set("Authorization", "Bearer "+key)
+		}
+		resp, err = client.Do(req)
+		if err != nil {
+			lastErr = err
+		} else {
+			// For retry-able statuses, drain and retry
+			if shouldRetryStatus(resp.StatusCode) && attempt < 2 {
+				_, _ = io.Copy(io.Discard, resp.Body) //nolint:errcheck
+				_ = resp.Body.Close()                 //nolint:errcheck
+				time.Sleep(backoffDelay(attempt))
+				continue
+			}
+			break
+		}
+		if attempt < 2 {
+			time.Sleep(backoffDelay(attempt))
+		}
+	}
+	if resp == nil {
+		return nil, "", fmt.Errorf("http error: %v", lastErr)
+	}
+	defer func() { _ = resp.Body.Close() }() //nolint:errcheck
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, "", fmt.Errorf("read response: %w", err)
+	}
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		var obj map[string]any
+		if json.Unmarshal(body, &obj) == nil {
+			if msg, ok := obj["error"].(string); ok && msg != "" {
+				return nil, "", errors.New(msg)
+			}
+			if errobj, ok := obj["error"].(map[string]any); ok {
+				if m, ok2 := errobj["message"].(string); ok2 && m != "" {
+					return nil, "", errors.New(m)
+				}
+			}
+		}
+		return nil, "", fmt.Errorf("api status %d", resp.StatusCode)
+	}
+	// Success
+	return body, resp.Header.Get("OpenAI-Model"), nil
+}
+
+// produceOutput formats and writes output based on inputSpec.
+func produceOutput(in inputSpec, body []byte, model string) error {
+	var apiResp struct {
+		Data []struct {
+			B64 string `json:"b64_json"`
+		} `json:"data"`
+		Model string `json:"model,omitempty"`
+	}
+	if err := json.Unmarshal(body, &apiResp); err != nil {
+		return fmt.Errorf("parse response: %w", err)
+	}
+	if len(apiResp.Data) == 0 {
+		return errors.New("no images returned")
+	}
+	_ = model // reserved for future use; apiResp.Model may already include it
+
+	if in.ReturnB64 {
+		debug := isTruthyEnv("IMG_CREATE_DEBUG_B64") || isTruthyEnv("DEBUG_B64")
+		type img struct {
+			B64  string `json:"b64"`
+			Hint string `json:"hint,omitempty"`
+		}
+		out := struct {
+			Images []img `json:"images"`
+		}{Images: make([]img, 0, len(apiResp.Data))}
+		for _, d := range apiResp.Data {
+			if debug {
+				out.Images = append(out.Images, img{B64: d.B64})
+			} else {
+				out.Images = append(out.Images, img{B64: "", Hint: "b64 elided"})
+			}
+		}
+		return writeJSON(out)
+	}
+
+	// Save to disk
+	dir := filepath.Clean(in.Save.Dir)
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return fmt.Errorf("mkdir %s: %w", dir, err)
+	}
+	if strings.Contains(in.Save.Basename, "/") || strings.Contains(in.Save.Basename, string(filepath.Separator)) {
+		return errors.New("basename must not contain path separators")
+	}
+	saved := make([]struct {
+		Path   string `json:"path"`
+		Bytes  int    `json:"bytes"`
+		Sha256 string `json:"sha256"`
+	}, 0, len(apiResp.Data))
+	for i, d := range apiResp.Data {
+		imgBytes, decErr := decodeStdB64(d.B64)
+		if decErr != nil {
+			return fmt.Errorf("decode b64 image %d: %w", i+1, decErr)
+		}
+		fname := fmt.Sprintf("%s_%03d.%s", in.Save.Basename, i+1, in.Save.Ext)
+		finalPath := filepath.Join(dir, fname)
+		tmpPath := filepath.Join(dir, ".tmp-"+fname+"-"+strconv.FormatInt(time.Now().UnixNano(), 10))
+		if err := os.WriteFile(tmpPath, imgBytes, 0o644); err != nil {
+			return fmt.Errorf("write temp file: %w", err)
+		}
+		if err := os.Rename(tmpPath, finalPath); err != nil {
+			_ = os.Remove(tmpPath) //nolint:errcheck
+			return fmt.Errorf("rename: %w", err)
+		}
+		sum := sha256.Sum256(imgBytes)
+		saved = append(saved, struct {
+			Path   string `json:"path"`
+			Bytes  int    `json:"bytes"`
+			Sha256 string `json:"sha256"`
+		}{Path: finalPath, Bytes: len(imgBytes), Sha256: hex.EncodeToString(sum[:])})
+	}
+	out := struct {
+		Saved []struct {
+			Path   string `json:"path"`
+			Bytes  int    `json:"bytes"`
+			Sha256 string `json:"sha256"`
+		} `json:"saved"`
+		N     int    `json:"n"`
+		Size  string `json:"size"`
+		Model string `json:"model"`
+	}{Saved: saved, N: len(saved), Size: in.Size, Model: in.Model}
+	return writeJSON(out)
+}
+
+func httpTimeout() time.Duration {
+	to := strings.TrimSpace(os.Getenv("OAI_HTTP_TIMEOUT"))
+	if to == "" {
+		return 120 * time.Second
+	}
+	if d, err := time.ParseDuration(to); err == nil {
+		return d
+	}
+	return 120 * time.Second
+}
+
+func firstNonEmpty(values ...string) string {
+	for _, v := range values {
+		if strings.TrimSpace(v) != "" {
+			return v
+		}
+	}
+	return ""
+}
+
+func shouldRetryStatus(code int) bool {
+	if code == 429 {
+		return true
+	}
+	if code >= 500 {
+		return true
+	}
+	return false
+}
+
+func backoffDelay(attempt int) time.Duration {
+	switch attempt {
+	case 0:
+		return 250 * time.Millisecond
+	case 1:
+		return 500 * time.Millisecond
+	default:
+		return 1 * time.Second
+	}
+}
+
+func isTruthyEnv(key string) bool {
+	v := strings.ToLower(strings.TrimSpace(os.Getenv(key)))
+	switch v {
+	case "1", "true", "yes", "y", "on":
+		return true
+	default:
+		return false
+	}
+}
+
+func decodeStdB64(s string) ([]byte, error) {
+	return base64.StdEncoding.DecodeString(s)
+}
+
+func writeJSON(v any) error {
+	b, err := json.Marshal(v)
+	if err != nil {
+		return fmt.Errorf("marshal: %w", err)
+	}
+	fmt.Println(string(b))
+	return nil
+}
+
+// sanitizeExtras filters a map to only include string keys with primitive
+// JSON types: string, float64 (numbers), bool. It also allows nulls and
+// rejects nested arrays/objects to keep the request predictable.
+func sanitizeExtras(in map[string]any) map[string]any {
+	out := make(map[string]any, len(in))
+	for k, v := range in {
+		if strings.TrimSpace(k) == "" {
+			continue
+		}
+		switch tv := v.(type) {
+		case string:
+			out[k] = tv
+		case bool:
+			out[k] = tv
+		case float64:
+			// json.Unmarshal decodes all numbers into float64 by default
+			out[k] = tv
+		case int, int32, int64, uint, uint32, uint64:
+			// In practice, numbers arrive as float64, but accept ints as well
+			out[k] = tv
+		case nil:
+			out[k] = nil
+		default:
+			// drop arrays, maps, and unknown types
+		}
+	}
+	return out
+}
diff --git a/tools/cmd/img_create/img_create_test.go b/tools/cmd/img_create/img_create_test.go
new file mode 100644
index 0000000..120598c
--- /dev/null
+++ b/tools/cmd/img_create/img_create_test.go
@@ -0,0 +1,378 @@
+//nolint:errcheck // Tests elide error checks on JSON encoders/decoders where not relevant to the assertion under test.
+package main
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/hex"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+
+	"github.com/hyperifyio/goagent/tools/testutil"
+)
+
+func buildTool(t *testing.T) string {
+	// Build this package into a temp binary
+	bin := filepath.Join(t.TempDir(), "img_create-test-bin")
+	cmd := exec.Command("go", "build", "-o", bin, ".")
+	cmd.Dir = "."
+	if out, err := cmd.CombinedOutput(); err != nil {
+		t.Fatalf("build: %v\n%s", err, string(out))
+	}
+	return bin
+}
+
+func runTool(t *testing.T, bin string, in any, env map[string]string) (stdout, stderr string, code int) {
+	data, _ := json.Marshal(in)
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	var outBuf, errBuf bytes.Buffer
+	cmd.Stdout = &outBuf
+	cmd.Stderr = &errBuf
+	if env != nil {
+		e := os.Environ()
+		for k, v := range env {
+			e = append(e, k+"="+v)
+		}
+		cmd.Env = e
+	}
+	err := cmd.Run()
+	if err != nil {
+		if ee, ok := err.(*exec.ExitError); ok {
+			code = ee.ExitCode()
+		} else {
+			code = 1
+		}
+	}
+	return outBuf.String(), errBuf.String(), code
+}
+
+func TestMissingPrompt(t *testing.T) {
+	bin := buildTool(t)
+	_, stderr, code := runTool(t, bin, map[string]any{}, nil)
+	if code == 0 {
+		t.Fatalf("expected non-zero exit")
+	}
+	if !strings.Contains(stderr, "prompt is required") {
+		t.Fatalf("expected prompt error, got %q", stderr)
+	}
+}
+
+func TestHappyPath_SaveOnePNG(t *testing.T) {
+	// 1x1 transparent PNG
+	png1x1 := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO9cFmgAAAAASUVORK5CYII="
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost || r.URL.Path != "/v1/images/generations" {
+			t.Fatalf("unexpected request: %s %s", r.Method, r.URL.Path)
+		}
+		var req struct {
+			Model      string `json:"model"`
+			Prompt     string `json:"prompt"`
+			N          int    `json:"n"`
+			Size       string `json:"size"`
+			RespFmt    string `json:"response_format"`
+			Background string `json:"background"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("bad json: %v", err)
+		}
+		if req.Model != "gpt-image-1" || req.Prompt != "tiny-pixel" || req.N != 1 || req.Size != "1024x1024" || req.RespFmt != "b64_json" {
+			t.Fatalf("unexpected payload: %+v", req)
+		}
+		if req.Background != "transparent" {
+			t.Fatalf("expected extras merged: background=transparent, got %q", req.Background)
+		}
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"data":  []map[string]any{{"b64_json": png1x1}},
+			"model": "gpt-image-1",
+		})
+	}))
+	defer srv.Close()
+
+	bin := buildTool(t)
+	outDir := testutil.MakeRepoRelTempDir(t, "imgcreate-out-")
+	stdout, stderr, code := runTool(t, bin, map[string]any{
+		"prompt": "tiny-pixel",
+		"save":   map[string]any{"dir": outDir, "basename": "img", "ext": "png"},
+		"extras": map[string]any{"background": "transparent"},
+	}, map[string]string{
+		"OAI_IMAGE_BASE_URL": srv.URL,
+		"OAI_API_KEY":        "test-123",
+	})
+	if code != 0 {
+		t.Fatalf("unexpected failure: %s", stderr)
+	}
+	var obj struct {
+		Saved []struct {
+			Path   string `json:"path"`
+			Bytes  int    `json:"bytes"`
+			Sha256 string `json:"sha256"`
+		} `json:"saved"`
+		N     int    `json:"n"`
+		Size  string `json:"size"`
+		Model string `json:"model"`
+	}
+	if err := json.Unmarshal([]byte(strings.TrimSpace(stdout)), &obj); err != nil {
+		t.Fatalf("bad stdout json: %v; raw=%q", err, stdout)
+	}
+	if obj.N != 1 || len(obj.Saved) != 1 {
+		t.Fatalf("unexpected saved count: %+v", obj)
+	}
+	// Verify file exists and bytes match decoded b64
+	got, err := os.ReadFile(obj.Saved[0].Path)
+	if err != nil {
+		t.Fatalf("read saved file: %v", err)
+	}
+	want, _ := base64.StdEncoding.DecodeString(png1x1)
+	if len(got) != len(want) {
+		t.Fatalf("bytes mismatch: got %d want %d", len(got), len(want))
+	}
+}
+
+func TestExtras_DoNotOverrideCoreKeys_AndSanitize(t *testing.T) {
+	// Server returns a trivial valid image
+	png1x1 := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO9cFmgAAAAASUVORK5CYII="
+	var captured map[string]any
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req map[string]any
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Fatalf("bad json: %v", err)
+		}
+		captured = req
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"data": []map[string]any{{"b64_json": png1x1}},
+		})
+	}))
+	defer srv.Close()
+
+	bin := buildTool(t)
+	outDir := testutil.MakeRepoRelTempDir(t, "imgcreate-out-")
+	_, stderr, code := runTool(t, bin, map[string]any{
+		"prompt": "tiny",
+		"n":      2,
+		"size":   "512x512",
+		"model":  "gpt-image-1",
+		"save":   map[string]any{"dir": outDir},
+		"extras": map[string]any{
+			"prompt":          "OVERRIDE-ATTEMPT",
+			"n":               99,
+			"size":            "2048x2048",
+			"response_format": "raw",
+			"ok_string":       "yes",
+			"ok_number":       1.5,
+			"ok_bool":         true,
+			"drop_obj":        map[string]any{"x": 1},
+			"drop_arr":        []any{1, 2, 3},
+		},
+	}, map[string]string{
+		"OAI_IMAGE_BASE_URL": srv.URL,
+	})
+	if code != 0 {
+		t.Fatalf("unexpected failure: %s", stderr)
+	}
+	// Core keys must remain as provided in top-level fields
+	if captured["prompt"] != "tiny" || captured["n"].(float64) != 2 || captured["size"] != "512x512" || captured["response_format"] != "b64_json" {
+		t.Fatalf("core keys overridden by extras: %+v", captured)
+	}
+	if captured["ok_string"] != "yes" || captured["ok_bool"] != true {
+		t.Fatalf("expected sanitized primitives present: %+v", captured)
+	}
+	if _, ok := captured["drop_obj"]; ok {
+		t.Fatalf("unexpected object in extras: %+v", captured)
+	}
+	if _, ok := captured["drop_arr"]; ok {
+		t.Fatalf("unexpected array in extras: %+v", captured)
+	}
+}
+
+func TestMissingSaveDir_WhenReturnB64False(t *testing.T) {
+	bin := buildTool(t)
+	// Default return_b64 is false; omit save to trigger validation error
+	_, stderr, code := runTool(t, bin, map[string]any{
+		"prompt": "tiny",
+	}, nil)
+	if code == 0 {
+		t.Fatalf("expected non-zero exit")
+	}
+	if !strings.Contains(stderr, "save.dir is required when return_b64=false") {
+		t.Fatalf("expected save.dir error, got %q", stderr)
+	}
+}
+
+func TestInvalidSizePattern(t *testing.T) {
+	bin := buildTool(t)
+	// Provide an invalid size and set return_b64 to bypass save requirements
+	_, stderr, code := runTool(t, bin, map[string]any{
+		"prompt":     "tiny",
+		"size":       "big",
+		"return_b64": true,
+	}, nil)
+	if code == 0 {
+		t.Fatalf("expected non-zero exit")
+	}
+	if !strings.Contains(stderr, "size must match") {
+		t.Fatalf("expected size pattern error, got %q", stderr)
+	}
+}
+
+func TestAPI400_JSONErrorIsSurfaced(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadRequest)
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"error": map[string]any{"message": "bad prompt"},
+		})
+	}))
+	defer srv.Close()
+
+	bin := buildTool(t)
+	outDir := testutil.MakeRepoRelTempDir(t, "imgcreate-out-")
+	_, stderr, code := runTool(t, bin, map[string]any{
+		"prompt": "tiny",
+		"save":   map[string]any{"dir": outDir, "basename": "img", "ext": "png"},
+	}, map[string]string{
+		"OAI_IMAGE_BASE_URL": srv.URL,
+		"OAI_API_KEY":        "test-123",
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit")
+	}
+	if !strings.Contains(stderr, "bad prompt") {
+		t.Fatalf("expected API error message surfaced, got %q", stderr)
+	}
+}
+
+func TestSaveDir_AbsolutePathRejected(t *testing.T) {
+	bin := buildTool(t)
+	wd, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	abs := filepath.Join(wd, "imgcreate-abs-out")
+	// Ensure absolute path
+	if !filepath.IsAbs(abs) {
+		t.Fatalf("expected absolute path, got %q", abs)
+	}
+	_, stderr, code := runTool(t, bin, map[string]any{
+		"prompt": "tiny",
+		"save":   map[string]any{"dir": abs},
+	}, map[string]string{
+		"OAI_IMAGE_BASE_URL": "http://127.0.0.1:9", // invalid to avoid real network if it would try
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for absolute save.dir")
+	}
+	if !strings.Contains(stderr, "repo-relative") {
+		t.Fatalf("expected repo-relative error, got %q", stderr)
+	}
+}
+
+func TestSaveDir_EscapeOutsideRepoRejected(t *testing.T) {
+	bin := buildTool(t)
+	_, stderr, code := runTool(t, bin, map[string]any{
+		"prompt": "tiny",
+		"save":   map[string]any{"dir": filepath.Clean(filepath.Join(".."))},
+	}, map[string]string{
+		"OAI_IMAGE_BASE_URL": "http://127.0.0.1:9",
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for escape path")
+	}
+	if !strings.Contains(stderr, "escapes repository root") {
+		t.Fatalf("expected escape error, got %q", stderr)
+	}
+}
+
+func TestSaveDir_CleansRelativeAndCreatesNested_WithSHA256(t *testing.T) {
+	// 1x1 transparent PNG bytes and known SHA256
+	png1x1 := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO9cFmgAAAAASUVORK5CYII="
+	wantBytes, _ := base64.StdEncoding.DecodeString(png1x1)
+	sum := sha256.Sum256(wantBytes)
+	wantSHA := hex.EncodeToString(sum[:])
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"data":  []map[string]any{{"b64_json": png1x1}},
+			"model": "gpt-image-1",
+		})
+	}))
+	defer srv.Close()
+
+	bin := buildTool(t)
+	base := testutil.MakeRepoRelTempDir(t, "imgcreate-nested-")
+	// Provide a dir that cleans to a simple child (e.g., a/../b -> b)
+	nested := filepath.Join(base, "a", "..", "b")
+	stdout, stderr, code := runTool(t, bin, map[string]any{
+		"prompt": "tiny-pixel",
+		"save":   map[string]any{"dir": nested, "basename": "img"},
+	}, map[string]string{
+		"OAI_IMAGE_BASE_URL": srv.URL,
+		"OAI_API_KEY":        "test-123",
+	})
+	if code != 0 {
+		t.Fatalf("unexpected failure: %s", stderr)
+	}
+	var obj struct {
+		Saved []struct {
+			Path   string `json:"path"`
+			Bytes  int    `json:"bytes"`
+			Sha256 string `json:"sha256"`
+		} `json:"saved"`
+	}
+	if err := json.Unmarshal([]byte(strings.TrimSpace(stdout)), &obj); err != nil {
+		t.Fatalf("bad stdout json: %v; raw=%q", err, stdout)
+	}
+	if len(obj.Saved) != 1 {
+		t.Fatalf("expected one saved file, got %d", len(obj.Saved))
+	}
+	if obj.Saved[0].Sha256 != wantSHA {
+		t.Fatalf("sha256 mismatch: got %s want %s", obj.Saved[0].Sha256, wantSHA)
+	}
+	// Path should be repo-relative (not absolute)
+	if filepath.IsAbs(obj.Saved[0].Path) {
+		t.Fatalf("expected relative saved path, got absolute: %q", obj.Saved[0].Path)
+	}
+	// Ensure nested directories were created; file exists
+	if _, err := os.Stat(obj.Saved[0].Path); err != nil {
+		t.Fatalf("stat saved file: %v", err)
+	}
+}
+
+func TestBasename_MustNotContainPathSeparators(t *testing.T) {
+	// Need a server so the tool reaches save-path validation after decoding
+	png1x1 := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO9cFmgAAAAASUVORK5CYII="
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"data": []map[string]any{{"b64_json": png1x1}},
+		})
+	}))
+	defer srv.Close()
+
+	bin := buildTool(t)
+	outDir := testutil.MakeRepoRelTempDir(t, "imgcreate-out-")
+	badBase := "bad/name"
+	// On Windows also try using backslash explicitly
+	if runtime.GOOS == "windows" {
+		badBase = "bad\\name"
+	}
+	_, stderr, code := runTool(t, bin, map[string]any{
+		"prompt":     "tiny",
+		"save":       map[string]any{"dir": outDir, "basename": badBase},
+		"return_b64": false,
+	}, map[string]string{
+		"OAI_IMAGE_BASE_URL": srv.URL,
+	})
+	if code == 0 {
+		t.Fatalf("expected non-zero exit for basename with separator")
+	}
+	if !strings.Contains(stderr, "basename must not contain path separators") {
+		t.Fatalf("expected basename separator error, got %q", stderr)
+	}
+}
diff --git a/tools/cmd/metadata_extract/metadata_extract.go b/tools/cmd/metadata_extract/metadata_extract.go
new file mode 100644
index 0000000..4fbe9d3
--- /dev/null
+++ b/tools/cmd/metadata_extract/metadata_extract.go
@@ -0,0 +1,212 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+type input struct {
+	HTML    string `json:"html"`
+	BaseURL string `json:"base_url"`
+}
+
+type output struct {
+	OpenGraph map[string]any `json:"opengraph"`
+	Twitter   map[string]any `json:"twitter"`
+	JSONLD    []any          `json:"jsonld"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		msg := strings.ReplaceAll(err.Error(), "\n", " ")
+		fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	in, err := decodeInput()
+	if err != nil {
+		return err
+	}
+	if strings.TrimSpace(in.HTML) == "" {
+		return errors.New("html is required")
+	}
+	if strings.TrimSpace(in.BaseURL) == "" {
+		return errors.New("base_url is required")
+	}
+	if u, perr := url.Parse(in.BaseURL); perr != nil || u.Scheme == "" || u.Host == "" {
+		return errors.New("base_url must be an absolute URL")
+	}
+
+	// Minimal implementation that scans meta tags and JSON-LD scripts.
+	og, tw, ld := extractMetadata(in.HTML)
+
+	out := output{OpenGraph: og, Twitter: tw, JSONLD: ld}
+	enc := json.NewEncoder(os.Stdout)
+	enc.SetEscapeHTML(false)
+	if err := enc.Encode(out); err != nil {
+		return fmt.Errorf("encode json: %w", err)
+	}
+	_ = appendAudit(map[string]any{ //nolint:errcheck
+		"ts":   time.Now().UTC().Format(time.RFC3339Nano),
+		"tool": "metadata_extract",
+		"ms":   0,
+	})
+	return nil
+}
+
+func decodeInput() (input, error) {
+	var in input
+	dec := json.NewDecoder(bufio.NewReader(os.Stdin))
+	if err := dec.Decode(&in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	return in, nil
+}
+
+func extractMetadata(html string) (map[string]any, map[string]any, []any) {
+	og := map[string]any{}
+	tw := map[string]any{}
+	var ld []any
+
+	// Very basic parsing without external deps: regex-free naive scans.
+	lower := strings.ToLower(html)
+	// Extract <meta property="og:..." content="...">
+	idx := 0
+	for {
+		i := strings.Index(lower[idx:], "<meta")
+		if i < 0 {
+			break
+		}
+		i += idx
+		end := strings.Index(lower[i:], ">")
+		if end < 0 {
+			break
+		}
+		tag := html[i : i+end+1]
+		p := attrValue(tag, "property")
+		n := attrValue(tag, "name")
+		c := attrValue(tag, "content")
+		if strings.HasPrefix(strings.ToLower(p), "og:") && c != "" {
+			og[p] = c
+		}
+		if strings.HasPrefix(strings.ToLower(n), "twitter:") && c != "" {
+			tw[n] = c
+		}
+		idx = i + end + 1
+	}
+	// Extract <script type="application/ld+json"> ... </script>
+	idx = 0
+	for {
+		i := strings.Index(lower[idx:], "<script")
+		if i < 0 {
+			break
+		}
+		i += idx
+		closeTag := strings.Index(lower[i:], ">")
+		if closeTag < 0 {
+			break
+		}
+		tag := html[i : i+closeTag+1]
+		t := attrValue(tag, "type")
+		if strings.EqualFold(strings.TrimSpace(t), "application/ld+json") {
+			// find </script>
+			rest := html[i+closeTag+1:]
+			end := strings.Index(strings.ToLower(rest), "</script>")
+			if end >= 0 {
+				payload := strings.TrimSpace(rest[:end])
+				var v any
+				if err := json.Unmarshal([]byte(payload), &v); err == nil {
+					switch vv := v.(type) {
+					case []any:
+						ld = append(ld, vv...)
+					default:
+						ld = append(ld, vv)
+					}
+				}
+				idx = i + closeTag + 1 + end + len("</script>")
+				continue
+			}
+		}
+		idx = i + closeTag + 1
+	}
+
+	return og, tw, ld
+}
+
+// naive attribute value extractor for patterns like key="value"
+func attrValue(tag string, key string) string {
+	// search case-insensitively for key=
+	lower := strings.ToLower(tag)
+	k := strings.ToLower(key) + "="
+	j := strings.Index(lower, k)
+	if j < 0 {
+		return ""
+	}
+	// find quote type after =
+	start := j + len(k)
+	if start >= len(tag) {
+		return ""
+	}
+	quote := tag[start]
+	if quote != '"' && quote != '\'' {
+		return ""
+	}
+	start++
+	end := strings.IndexByte(tag[start:], byte(quote))
+	if end < 0 {
+		return ""
+	}
+	return tag[start : start+end]
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
diff --git a/tools/cmd/metadata_extract/metadata_extract_test.go b/tools/cmd/metadata_extract/metadata_extract_test.go
new file mode 100644
index 0000000..48d92a1
--- /dev/null
+++ b/tools/cmd/metadata_extract/metadata_extract_test.go
@@ -0,0 +1,60 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"os/exec"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+func runTool(t *testing.T, bin string, input any) (string, string, error) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), err
+}
+
+func TestMetadataExtract_ParsesOGTwitterJSONLD(t *testing.T) {
+	bin := testutil.BuildTool(t, "metadata_extract")
+	html := `<!doctype html><html><head>
+      <meta property="og:title" content="OG Title">
+      <meta name="twitter:card" content="summary_large_image">
+      <script type="application/ld+json">{"@context":"https://schema.org","@type":"Article","headline":"LD Headline"}</script>
+    </head><body>hi</body></html>`
+	in := map[string]any{"html": html, "base_url": "https://example.org/page"}
+	outStr, errStr, err := runTool(t, bin, in)
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"opengraph\"") {
+		t.Fatalf("expected opengraph in output: %s", outStr)
+	}
+	if !strings.Contains(outStr, "\"twitter\"") {
+		t.Fatalf("expected twitter in output: %s", outStr)
+	}
+	if !strings.Contains(outStr, "\"jsonld\"") {
+		t.Fatalf("expected jsonld in output: %s", outStr)
+	}
+}
+
+func TestMetadataExtract_RequiresInputs(t *testing.T) {
+	bin := testutil.BuildTool(t, "metadata_extract")
+	_, errStr, err := runTool(t, bin, map[string]any{"html": "", "base_url": ""})
+	if err == nil {
+		t.Fatalf("expected error for missing inputs")
+	}
+	if !strings.Contains(errStr, "required") {
+		t.Fatalf("expected required error, got: %s", errStr)
+	}
+}
diff --git a/tools/cmd/openalex_search/openalex_search.go b/tools/cmd/openalex_search/openalex_search.go
new file mode 100644
index 0000000..b9b35da
--- /dev/null
+++ b/tools/cmd/openalex_search/openalex_search.go
@@ -0,0 +1,352 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// input defines the expected stdin JSON for the tool.
+type input struct {
+	Q       string `json:"q"`
+	From    string `json:"from"`
+	To      string `json:"to"`
+	PerPage int    `json:"per_page"`
+}
+
+// outputResult is the normalized result row produced by this tool.
+type outputResult struct {
+	Title           string `json:"title"`
+	DOI             string `json:"doi,omitempty"`
+	PublicationYear int    `json:"publication_year"`
+	OpenAccessURL   string `json:"open_access_url,omitempty"`
+	// Authorships carries through as an opaque list to avoid schema churn.
+	Authorships  []any `json:"authorships"`
+	CitedByCount int   `json:"cited_by_count"`
+}
+
+// output is the stdout JSON envelope produced by the tool.
+type output struct {
+	Results    []outputResult `json:"results"`
+	NextCursor string         `json:"next_cursor,omitempty"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		msg := strings.ReplaceAll(err.Error(), "\n", " ")
+		fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	in, err := decodeInput()
+	if err != nil {
+		return err
+	}
+	if strings.TrimSpace(in.Q) == "" {
+		return errors.New("q is required")
+	}
+	baseURL, reqURL, err := prepareURLs(in)
+	if err != nil {
+		return err
+	}
+	client := newHTTPClient(resolveTimeout())
+	start := time.Now()
+	raw, status, retries, err := fetchWithRetry(client, baseURL, reqURL)
+	if err != nil {
+		return err
+	}
+	out := output{Results: mapResults(raw.Results)}
+	if v := strings.TrimSpace(raw.NextCursor); v != "" {
+		out.NextCursor = v
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		return fmt.Errorf("encode json: %w", err)
+	}
+	// Best-effort audit; ignore errors.
+	_ = appendAudit(map[string]any{ //nolint:errcheck
+		"ts":       time.Now().UTC().Format(time.RFC3339Nano),
+		"tool":     "openalex_search",
+		"url_host": baseURL.Hostname(),
+		"status":   status,
+		"ms":       time.Since(start).Milliseconds(),
+		"retries":  retries,
+	})
+	return nil
+}
+
+func decodeInput() (input, error) {
+	var in input
+	dec := json.NewDecoder(bufio.NewReader(os.Stdin))
+	if err := dec.Decode(&in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	return in, nil
+}
+
+func prepareURLs(in input) (*url.URL, *url.URL, error) {
+	base := strings.TrimSpace(os.Getenv("OPENALEX_BASE_URL"))
+	if base == "" {
+		base = "https://api.openalex.org"
+	}
+	baseURL, err := url.Parse(base)
+	if err != nil || (baseURL.Scheme != "http" && baseURL.Scheme != "https") {
+		return nil, nil, errors.New("OPENALEX_BASE_URL must be a valid http/https URL")
+	}
+	if err := ssrfGuard(baseURL); err != nil {
+		return nil, nil, err
+	}
+	reqURL, err := url.Parse(baseURL.String())
+	if err != nil {
+		return nil, nil, err
+	}
+	// Build: /works?search=...&per-page=...&from_publication_date=...&to_publication_date=...
+	reqURL.Path = strings.TrimRight(reqURL.Path, "/") + "/works"
+	q := reqURL.Query()
+	// The OpenAlex API supports "search" as a generic text search; stick to that.
+	q.Set("search", in.Q)
+	if in.PerPage > 0 {
+		if in.PerPage > 50 {
+			// OpenAlex allows up to 200, but keep a conservative cap here
+			in.PerPage = 50
+		}
+		q.Set("per-page", strconv.Itoa(in.PerPage))
+	} else {
+		q.Set("per-page", "10")
+	}
+	if strings.TrimSpace(in.From) != "" {
+		q.Set("from_publication_date", in.From)
+	}
+	if strings.TrimSpace(in.To) != "" {
+		q.Set("to_publication_date", in.To)
+	}
+	reqURL.RawQuery = q.Encode()
+	return baseURL, reqURL, nil
+}
+
+type openalexResponse struct {
+	Results    []map[string]any `json:"results"`
+	NextCursor string           `json:"next_cursor"`
+}
+
+func fetchWithRetry(client *http.Client, baseURL *url.URL, reqURL *url.URL) (openalexResponse, int, int, error) {
+	var out openalexResponse
+	var lastStatus int
+	var retries int
+	for attempt := 0; attempt < 2; attempt++ {
+		if err := ssrfGuard(baseURL); err != nil {
+			return openalexResponse{}, 0, retries, err
+		}
+		req, err := http.NewRequest(http.MethodGet, reqURL.String(), nil)
+		if err != nil {
+			return openalexResponse{}, 0, retries, fmt.Errorf("new request: %w", err)
+		}
+		req.Header.Set("User-Agent", "agentcli-openalex/0.1")
+		resp, err := client.Do(req)
+		if err != nil {
+			if isTimeout(err) && attempt == 0 {
+				retries++
+				backoffSleep(0, attempt)
+				continue
+			}
+			return openalexResponse{}, 0, retries, fmt.Errorf("http: %w", err)
+		}
+		lastStatus = resp.StatusCode
+		dec := json.NewDecoder(bufio.NewReader(resp.Body))
+		if resp.StatusCode >= 500 && attempt == 0 {
+			_ = resp.Body.Close() //nolint:errcheck
+			retries++
+			backoffSleep(0, attempt)
+			continue
+		}
+		if err := dec.Decode(&out); err != nil {
+			_ = resp.Body.Close() //nolint:errcheck
+			if resp.StatusCode >= 500 && attempt == 0 {
+				retries++
+				backoffSleep(0, attempt)
+				continue
+			}
+			return openalexResponse{}, lastStatus, retries, fmt.Errorf("decode json: %w", err)
+		}
+		_ = resp.Body.Close() //nolint:errcheck
+		break
+	}
+	return out, lastStatus, retries, nil
+}
+
+func mapResults(rows []map[string]any) []outputResult {
+	out := make([]outputResult, 0, len(rows))
+	for _, r := range rows {
+		var res outputResult
+		if v, ok := r["display_name"].(string); ok {
+			res.Title = v
+		}
+		if v, ok := r["title"].(string); ok && res.Title == "" {
+			res.Title = v
+		}
+		if v, ok := r["doi"].(string); ok {
+			res.DOI = v
+		}
+		if v, ok := r["publication_year"].(float64); ok {
+			res.PublicationYear = int(v)
+		} else if v, ok := r["publication_year"].(int); ok {
+			res.PublicationYear = v
+		}
+		if oa, ok := r["open_access"].(map[string]any); ok {
+			if v, ok := oa["oa_url"].(string); ok {
+				res.OpenAccessURL = v
+			}
+		}
+		if v, ok := r["authorships"].([]any); ok {
+			res.Authorships = v
+		}
+		if v, ok := r["cited_by_count"].(float64); ok {
+			res.CitedByCount = int(v)
+		} else if v, ok := r["cited_by_count"].(int); ok {
+			res.CitedByCount = v
+		}
+		out = append(out, res)
+	}
+	return out
+}
+
+func resolveTimeout() time.Duration {
+	// 8s default per spec, can be overridden via HTTP_TIMEOUT_MS
+	if v := strings.TrimSpace(os.Getenv("HTTP_TIMEOUT_MS")); v != "" {
+		if ms, err := time.ParseDuration(v + "ms"); err == nil && ms > 0 {
+			return ms
+		}
+	}
+	return 8 * time.Second
+}
+
+func newHTTPClient(timeout time.Duration) *http.Client {
+	tr := &http.Transport{}
+	return &http.Client{Timeout: timeout, Transport: tr, CheckRedirect: func(req *http.Request, via []*http.Request) error {
+		if len(via) >= 5 {
+			return errors.New("too many redirects")
+		}
+		return ssrfGuard(req.URL)
+	}}
+}
+
+func isTimeout(err error) bool {
+	var ne net.Error
+	return errors.As(err, &ne) && ne.Timeout()
+}
+
+func backoffSleep(_ int64, attempt int) {
+	time.Sleep(time.Duration(100*(attempt+1)) * time.Millisecond)
+}
+
+// ssrfGuard blocks loopback, RFC1918, link-local, ULA, and .onion unless OPENALEX_ALLOW_LOCAL=1
+func ssrfGuard(u *url.URL) error {
+	host := u.Hostname()
+	if host == "" {
+		return errors.New("invalid host")
+	}
+	if strings.HasSuffix(strings.ToLower(host), ".onion") {
+		return errors.New("SSRF blocked: onion domains are not allowed")
+	}
+	if os.Getenv("OPENALEX_ALLOW_LOCAL") == "1" {
+		return nil
+	}
+	ips, err := net.LookupIP(host)
+	if err != nil || len(ips) == 0 {
+		return errors.New("SSRF blocked: cannot resolve host")
+	}
+	for _, ip := range ips {
+		if isPrivateIP(ip) {
+			return errors.New("SSRF blocked: private or loopback address")
+		}
+	}
+	return nil
+}
+
+func isPrivateIP(ip net.IP) bool {
+	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+		return true
+	}
+	if v4 := ip.To4(); v4 != nil {
+		ip = v4
+		if v4[0] == 10 {
+			return true
+		}
+		if v4[0] == 172 && v4[1]&0xf0 == 16 {
+			return true
+		}
+		if v4[0] == 192 && v4[1] == 168 {
+			return true
+		}
+		if v4[0] == 169 && v4[1] == 254 {
+			return true
+		}
+		if v4[0] == 127 {
+			return true
+		}
+		return false
+	}
+	if ip.Equal(net.ParseIP("::1")) {
+		return true
+	}
+	if ip[0] == 0xfe && (ip[1]&0xc0) == 0x80 {
+		return true
+	}
+	if ip[0]&0xfe == 0xfc {
+		return true
+	}
+	return false
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
diff --git a/tools/cmd/openalex_search/openalex_search_test.go b/tools/cmd/openalex_search/openalex_search_test.go
new file mode 100644
index 0000000..ad229fd
--- /dev/null
+++ b/tools/cmd/openalex_search/openalex_search_test.go
@@ -0,0 +1,76 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// no exported test-only types
+
+func TestMain(m *testing.M) {
+	if err := os.Setenv("OPENALEX_ALLOW_LOCAL", "1"); err != nil {
+		panic(err)
+	}
+	os.Exit(m.Run())
+}
+
+func runTool(t *testing.T, bin string, env []string, input any) (string, string, error) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	cmd.Env = env
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	runErr := cmd.Run()
+	return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), runErr
+}
+
+func TestOpenAlexSearch_Success(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/works" {
+			http.Error(w, "bad path", http.StatusBadRequest)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(`{"results":[{"display_name":"A title","doi":"10.1/x","publication_year":2024,"authorships":[{"a":1}],"cited_by_count":3,"open_access":{"oa_url":"https://oa"}}]}`)) //nolint:errcheck
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "openalex_search")
+	env := append(os.Environ(), "OPENALEX_BASE_URL="+srv.URL, "OPENALEX_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "golang", "per_page": 5})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"results\":[") {
+		t.Fatalf("missing results: %s", outStr)
+	}
+	if !strings.Contains(outStr, "A title") {
+		t.Fatalf("missing mapped title: %s", outStr)
+	}
+}
+
+func TestOpenAlexSearch_SSRFBlocked(t *testing.T) {
+	bin := testutil.BuildTool(t, "openalex_search")
+	env := []string{"OPENALEX_BASE_URL=http://127.0.0.1:9"}
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "x"})
+	if err == nil {
+		t.Fatalf("expected error, got ok: %s", outStr)
+	}
+	if !strings.Contains(errStr, "SSRF blocked") {
+		t.Fatalf("expected SSRF blocked error, got: %s", errStr)
+	}
+}
diff --git a/tools/cmd/pdf_extract/pdf_extract.go b/tools/cmd/pdf_extract/pdf_extract.go
new file mode 100644
index 0000000..f22f098
--- /dev/null
+++ b/tools/cmd/pdf_extract/pdf_extract.go
@@ -0,0 +1,298 @@
+package main
+
+import (
+	"bufio"
+	"context"
+	"encoding/base64"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+
+	pdf "github.com/ledongthuc/pdf"
+)
+
+type input struct {
+	PDFBase64 string `json:"pdf_base64"`
+	Pages     []int  `json:"pages"`
+}
+
+type pageOut struct {
+	Index int    `json:"index"`
+	Text  string `json:"text"`
+}
+
+type output struct {
+	PageCount int       `json:"page_count"`
+	Pages     []pageOut `json:"pages"`
+}
+
+const maxPDFSizeBytes = 20 * 1024 * 1024 // 20 MiB
+
+func main() {
+	if err := run(); err != nil {
+		// Special-case OCR availability to emit a stable error code
+		if errors.Is(err, errOCRUnavailable) || strings.EqualFold(strings.TrimSpace(err.Error()), "ocr unavailable") {
+			fmt.Fprintln(os.Stderr, "{\"error\":\"OCR_UNAVAILABLE\"}")
+			os.Exit(1)
+		}
+		msg := strings.ReplaceAll(err.Error(), "\n", " ")
+		fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		os.Exit(1)
+	}
+}
+
+// nolint:gocyclo // Structured similarly to sibling tools; complexity acceptable for this CLI wrapper
+func run() error { // refactor candidates exist; keep within lint threshold
+	in, err := decodeInput()
+	if err != nil {
+		return err
+	}
+	if strings.TrimSpace(in.PDFBase64) == "" {
+		return errors.New("pdf_base64 is required")
+	}
+	data, err := base64.StdEncoding.DecodeString(in.PDFBase64)
+	if err != nil {
+		return fmt.Errorf("decode base64: %w", err)
+	}
+	if len(data) > maxPDFSizeBytes {
+		return fmt.Errorf("pdf too large: %d bytes (limit %d)", len(data), maxPDFSizeBytes)
+	}
+
+	// Write to a temp file for the parser and potential OCR tools
+	tmpDir, err := os.MkdirTemp("", "pdf_extract_*")
+	if err != nil {
+		return fmt.Errorf("mktemp dir: %w", err)
+	}
+	defer func() { _ = os.RemoveAll(tmpDir) }() //nolint:errcheck // best-effort cleanup
+	pdfPath := filepath.Join(tmpDir, "in.pdf")
+	if err := os.WriteFile(pdfPath, data, 0o600); err != nil {
+		return fmt.Errorf("write temp pdf: %w", err)
+	}
+
+	// Parse PDF
+	f, r, err := pdf.Open(pdfPath)
+	if err != nil {
+		return fmt.Errorf("open pdf: %w", err)
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck // best-effort close
+
+	totalPages := r.NumPage()
+	targetPages, err := normalizePages(in.Pages, totalPages)
+	if err != nil {
+		return err
+	}
+
+	texts := make([]string, totalPages)
+	emptyFlags := make([]bool, totalPages)
+	for i := 1; i <= totalPages; i++ { // 1-based
+		p := r.Page(i)
+		if p.V.IsNull() {
+			texts[i-1] = ""
+			emptyFlags[i-1] = true
+			continue
+		}
+		content := p.Content()
+		var b strings.Builder
+		// The library exposes extracted text spans under Content.Text ([]pdf.Text)
+		for _, span := range content.Text {
+			s := span.S
+			if s != "" {
+				b.WriteString(s)
+				b.WriteString("\n")
+			}
+		}
+		txt := strings.TrimSpace(b.String())
+		texts[i-1] = txt
+		emptyFlags[i-1] = (txt == "")
+	}
+
+	// Optional OCR for empty pages when enabled
+	if isOCREnabled() && anyEmptyRequested(emptyFlags, targetPages) {
+		ocrTexts, ocrErr := runTesseractOCR(pdfPath, totalPages, countEmptyRequested(emptyFlags, targetPages))
+		if ocrErr != nil {
+			return ocrErr
+		}
+		for _, idx := range targetPages {
+			if idx >= 0 && idx < len(texts) && strings.TrimSpace(texts[idx]) == "" {
+				if idx < len(ocrTexts) {
+					texts[idx] = strings.TrimSpace(ocrTexts[idx])
+				}
+			}
+		}
+	}
+
+	var outPages []pageOut
+	for _, idx := range targetPages {
+		if idx < 0 || idx >= totalPages {
+			continue
+		}
+		outPages = append(outPages, pageOut{Index: idx, Text: texts[idx]})
+	}
+
+	start := time.Now()
+	out := output{PageCount: totalPages, Pages: outPages}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		return fmt.Errorf("encode json: %w", err)
+	}
+	_ = appendAudit(map[string]any{ //nolint:errcheck
+		"ts":         time.Now().UTC().Format(time.RFC3339Nano),
+		"tool":       "pdf_extract",
+		"page_count": totalPages,
+		"ms":         time.Since(start).Milliseconds(),
+	})
+	return nil
+}
+
+func decodeInput() (input, error) {
+	var in input
+	dec := json.NewDecoder(bufio.NewReader(os.Stdin))
+	if err := dec.Decode(&in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	return in, nil
+}
+
+func normalizePages(pages []int, total int) ([]int, error) {
+	if total < 0 {
+		total = 0
+	}
+	if len(pages) == 0 {
+		out := make([]int, total)
+		for i := 0; i < total; i++ {
+			out[i] = i
+		}
+		return out, nil
+	}
+	seen := make(map[int]struct{}, len(pages))
+	out := make([]int, 0, len(pages))
+	for _, p := range pages {
+		if p < 0 || p >= total {
+			return nil, fmt.Errorf("page index out of range: %d (total %d)", p, total)
+		}
+		if _, ok := seen[p]; ok {
+			continue
+		}
+		seen[p] = struct{}{}
+		out = append(out, p)
+	}
+	return out, nil
+}
+
+func anyEmptyRequested(empty []bool, targets []int) bool {
+	for _, idx := range targets {
+		if idx >= 0 && idx < len(empty) && empty[idx] {
+			return true
+		}
+	}
+	return false
+}
+
+func countEmptyRequested(empty []bool, targets []int) int {
+	c := 0
+	for _, idx := range targets {
+		if idx >= 0 && idx < len(empty) && empty[idx] {
+			c++
+		}
+	}
+	if c == 0 {
+		return 1
+	}
+	return c
+}
+
+func isOCREnabled() bool {
+	v := strings.TrimSpace(os.Getenv("ENABLE_OCR"))
+	if v == "" {
+		return false
+	}
+	v = strings.ToLower(v)
+	return v == "1" || v == "true" || v == "yes"
+}
+
+var errOCRUnavailable = errors.New("ocr unavailable")
+
+func runTesseractOCR(pdfPath string, pageCount int, emptyRequested int) ([]string, error) {
+	if _, err := exec.LookPath("tesseract"); err != nil {
+		return nil, errOCRUnavailable
+	}
+	if pageCount <= 0 {
+		return []string{}, nil
+	}
+	timeout := 10 * time.Second * time.Duration(emptyRequested)
+	if timeout < 10*time.Second {
+		timeout = 10 * time.Second
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+
+	cmd := exec.CommandContext(ctx, "tesseract", pdfPath, "stdout")
+	out, err := cmd.Output()
+	if ctx.Err() == context.DeadlineExceeded {
+		return nil, fmt.Errorf("ocr timeout after %s", timeout)
+	}
+	if err != nil {
+		var ee *exec.ExitError
+		if errors.As(err, &ee) {
+			return nil, fmt.Errorf("ocr failed: %s", strings.TrimSpace(string(ee.Stderr)))
+		}
+		return nil, fmt.Errorf("ocr failed: %v", err)
+	}
+	parts := strings.Split(string(out), "\f")
+	if len(parts) < pageCount {
+		tmp := make([]string, pageCount)
+		copy(tmp, parts)
+		parts = tmp
+	}
+	for i := range parts {
+		parts[i] = strings.TrimSpace(parts[i])
+	}
+	return parts, nil
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
diff --git a/tools/cmd/pdf_extract/pdf_extract_test.go b/tools/cmd/pdf_extract/pdf_extract_test.go
new file mode 100644
index 0000000..c0e947b
--- /dev/null
+++ b/tools/cmd/pdf_extract/pdf_extract_test.go
@@ -0,0 +1,167 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+	gofpdf "github.com/jung-kurt/gofpdf"
+)
+
+// no output struct needed for the negative oversize test
+
+func TestPdfExtract_OversizeReject(t *testing.T) {
+	bin := testutil.BuildTool(t, "pdf_extract")
+	// Create >20 MiB decoded payload; base64 will be larger but limit checks decoded bytes
+	raw := bytes.Repeat([]byte{'A'}, 20*1024*1024+1)
+	b64 := base64.StdEncoding.EncodeToString(raw)
+	in := map[string]any{"pdf_base64": b64}
+	data, err := json.Marshal(in)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	if err := cmd.Run(); err == nil {
+		t.Fatalf("expected oversize rejection error")
+	}
+}
+
+type outPage struct {
+	Index int    `json:"index"`
+	Text  string `json:"text"`
+}
+
+type outPayload struct {
+	PageCount int       `json:"page_count"`
+	Pages     []outPage `json:"pages"`
+}
+
+func buildPDF(t *testing.T, withText bool) []byte {
+	t.Helper()
+	pdf := gofpdf.New("P", "mm", "A4", "")
+	pdf.AddPage()
+	if withText {
+		pdf.SetFont("Arial", "", 16)
+		pdf.Cell(40, 10, "Hello PDF")
+	} else {
+		// Draw a rectangle to create non-text content
+		pdf.SetLineWidth(1)
+		pdf.Rect(10, 10, 50, 30, "D")
+	}
+	var buf bytes.Buffer
+	if err := pdf.Output(&buf); err != nil {
+		t.Fatalf("generate pdf: %v", err)
+	}
+	return buf.Bytes()
+}
+
+func runTool(t *testing.T, bin string, b64 string, env map[string]string) (int, outPayload, string, error) {
+	t.Helper()
+	in := map[string]any{"pdf_base64": b64}
+	data, err := json.Marshal(in)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	if env != nil {
+		cmd.Env = os.Environ()
+		for k, v := range env {
+			cmd.Env = append(cmd.Env, k+"="+v)
+		}
+	}
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	if err != nil {
+		return 0, outPayload{}, stderr.String(), err
+	}
+	var out outPayload
+	if uerr := json.Unmarshal(stdout.Bytes(), &out); uerr != nil {
+		t.Fatalf("decode output: %v; raw=%s", uerr, stdout.String())
+	}
+	return cmd.ProcessState.ExitCode(), out, stderr.String(), nil
+}
+
+func TestPdfExtract_TextPDF_Extracts(t *testing.T) {
+	bin := testutil.BuildTool(t, "pdf_extract")
+	pdfBytes := buildPDF(t, true)
+	b64 := base64.StdEncoding.EncodeToString(pdfBytes)
+	code, out, stderr, err := runTool(t, bin, b64, nil)
+	if err != nil || code != 0 {
+		t.Fatalf("run failed: code=%d err=%v stderr=%s", code, err, stderr)
+	}
+	if out.PageCount < 1 || len(out.Pages) < 1 {
+		t.Fatalf("expected at least 1 page, got %+v", out)
+	}
+	norm := strings.ToLower(strings.ReplaceAll(out.Pages[0].Text, "\n", ""))
+	norm = strings.ReplaceAll(norm, " ", "")
+	if !strings.Contains(norm, "hellopdf") && !strings.Contains(norm, "hello") {
+		t.Fatalf("expected extracted text to contain 'hello', got %q (norm=%q)", out.Pages[0].Text, norm)
+	}
+}
+
+func TestPdfExtract_ImageOnly_NoOCR(t *testing.T) {
+	bin := testutil.BuildTool(t, "pdf_extract")
+	pdfBytes := buildPDF(t, false)
+	b64 := base64.StdEncoding.EncodeToString(pdfBytes)
+	code, out, stderr, err := runTool(t, bin, b64, nil)
+	if err != nil || code != 0 {
+		t.Fatalf("run failed: code=%d err=%v stderr=%s", code, err, stderr)
+	}
+	if out.PageCount < 1 || len(out.Pages) < 1 {
+		t.Fatalf("expected at least 1 page, got %+v", out)
+	}
+	if strings.TrimSpace(out.Pages[0].Text) != "" {
+		t.Fatalf("expected empty text without OCR, got %q", out.Pages[0].Text)
+	}
+}
+
+func TestPdfExtract_ImageOnly_WithOCR_Mock(t *testing.T) {
+	bin := testutil.BuildTool(t, "pdf_extract")
+	pdfBytes := buildPDF(t, false)
+	b64 := base64.StdEncoding.EncodeToString(pdfBytes)
+
+	// Create a mock 'tesseract' in PATH
+	mockDir := testutil.MakeRepoRelTempDir(t, "mockbin_")
+	exeName := "tesseract"
+	if runtime.GOOS == "windows" {
+		exeName += ".bat"
+	}
+	mockPath := filepath.Join(mockDir, exeName)
+	script := "#!/bin/sh\necho 'HELLO OCR'\n"
+	if runtime.GOOS == "windows" {
+		script = "@echo HELLO OCR\r\n"
+	}
+	if err := os.WriteFile(mockPath, []byte(script), 0o755); err != nil {
+		t.Fatalf("write mock tesseract: %v", err)
+	}
+
+	absMock, err := filepath.Abs(mockDir)
+	if err != nil {
+		t.Fatalf("abs mockdir: %v", err)
+	}
+	env := map[string]string{
+		"PATH":       absMock,
+		"ENABLE_OCR": "true",
+	}
+	code, out, stderr, err := runTool(t, bin, b64, env)
+	if err != nil || code != 0 {
+		t.Fatalf("run failed: code=%d err=%v stderr=%s", code, err, stderr)
+	}
+	if out.PageCount < 1 || len(out.Pages) < 1 {
+		t.Fatalf("expected at least 1 page, got %+v", out)
+	}
+	if strings.TrimSpace(out.Pages[0].Text) != "HELLO OCR" {
+		t.Fatalf("expected OCR text 'HELLO OCR', got %q", out.Pages[0].Text)
+	}
+}
diff --git a/tools/cmd/readability_extract/readability_extract.go b/tools/cmd/readability_extract/readability_extract.go
new file mode 100644
index 0000000..b8d68eb
--- /dev/null
+++ b/tools/cmd/readability_extract/readability_extract.go
@@ -0,0 +1,137 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	readability "github.com/go-shiori/go-readability"
+)
+
+type input struct {
+	HTML    string `json:"html"`
+	BaseURL string `json:"base_url"`
+}
+
+type output struct {
+	Title       string `json:"title"`
+	Byline      string `json:"byline,omitempty"`
+	Text        string `json:"text"`
+	ContentHTML string `json:"content_html"`
+	Length      int    `json:"length"`
+}
+
+const maxHTMLBytes = 5 << 20 // 5 MiB
+
+func main() {
+	if err := run(); err != nil {
+		msg := strings.ReplaceAll(err.Error(), "\n", " ")
+		fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	in, err := decodeInput()
+	if err != nil {
+		return err
+	}
+	if strings.TrimSpace(in.HTML) == "" {
+		return errors.New("html is required")
+	}
+	if strings.TrimSpace(in.BaseURL) == "" {
+		return errors.New("base_url is required")
+	}
+	// Parse base URL to the type expected by go-readability
+	parsedBase, perr := url.Parse(in.BaseURL)
+	if perr != nil || parsedBase.Scheme == "" || parsedBase.Host == "" {
+		return errors.New("base_url must be an absolute URL")
+	}
+	if len(in.HTML) > maxHTMLBytes {
+		return fmt.Errorf("html too large: limit %d bytes", maxHTMLBytes)
+	}
+
+	start := time.Now()
+	art, err := readability.FromReader(strings.NewReader(in.HTML), parsedBase)
+	if err != nil {
+		return fmt.Errorf("readability extract: %w", err)
+	}
+
+	out := output{
+		Title:       art.Title,
+		Byline:      art.Byline,
+		Text:        art.TextContent,
+		ContentHTML: art.Content,
+		Length:      art.Length,
+	}
+	enc := json.NewEncoder(os.Stdout)
+	enc.SetEscapeHTML(false)
+	if err := enc.Encode(out); err != nil {
+		return fmt.Errorf("encode json: %w", err)
+	}
+	_ = appendAudit(map[string]any{ //nolint:errcheck
+		"ts":     time.Now().UTC().Format(time.RFC3339Nano),
+		"tool":   "readability_extract",
+		"length": art.Length,
+		"ms":     time.Since(start).Milliseconds(),
+	})
+	return nil
+}
+
+func decodeInput() (input, error) {
+	var in input
+	dec := json.NewDecoder(bufio.NewReader(os.Stdin))
+	if err := dec.Decode(&in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	return in, nil
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
diff --git a/tools/cmd/readability_extract/readability_extract_test.go b/tools/cmd/readability_extract/readability_extract_test.go
new file mode 100644
index 0000000..57305ab
--- /dev/null
+++ b/tools/cmd/readability_extract/readability_extract_test.go
@@ -0,0 +1,69 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"os/exec"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+func runTool(t *testing.T, bin string, input any) (string, string, error) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), err
+}
+
+func TestReadabilityExtract_Simple(t *testing.T) {
+	bin := testutil.BuildTool(t, "readability_extract")
+	html := `<!doctype html><html><head><title>Example</title></head><body><nav>Links</nav><article><h1>My Title</h1><p>Hello <b>world</b>.</p></article></body></html>`
+	input := map[string]any{"html": html, "base_url": "https://example.org/page"}
+	outStr, errStr, err := runTool(t, bin, input)
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"title\":") {
+		t.Fatalf("missing title in output: %s", outStr)
+	}
+	if !strings.Contains(outStr, "Hello") {
+		t.Fatalf("expected extracted text to include 'Hello': %s", outStr)
+	}
+}
+
+func TestReadabilityExtract_NavHeavy(t *testing.T) {
+	bin := testutil.BuildTool(t, "readability_extract")
+	html := `<!doctype html><html><body><div id="nav">home | about | contact</div><div id="content"><h1>Article Heading</h1><p>Core content here.</p></div></body></html>`
+	outStr, errStr, err := runTool(t, bin, map[string]any{"html": html, "base_url": "https://example.org/x"})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "Article Heading") {
+		t.Fatalf("expected heading present: %s", outStr)
+	}
+	if !strings.Contains(outStr, "Core content here") {
+		t.Fatalf("expected article text present: %s", outStr)
+	}
+}
+
+func TestReadabilityExtract_LargeRejected(t *testing.T) {
+	bin := testutil.BuildTool(t, "readability_extract")
+	big := strings.Repeat("A", (5<<20)+1)
+	outStr, errStr, err := runTool(t, bin, map[string]any{"html": big, "base_url": "https://e/x"})
+	if err == nil {
+		t.Fatalf("expected error for oversized html, got ok: %s", outStr)
+	}
+	if !strings.Contains(errStr, "html too large") {
+		t.Fatalf("expected size error, got: %s", errStr)
+	}
+}
diff --git a/tools/cmd/robots_check/robots_check.go b/tools/cmd/robots_check/robots_check.go
new file mode 100644
index 0000000..30092e6
--- /dev/null
+++ b/tools/cmd/robots_check/robots_check.go
@@ -0,0 +1,296 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+	"time"
+)
+
+type input struct {
+	URL       string `json:"url"`
+	UserAgent string `json:"user_agent,omitempty"`
+}
+
+type output struct {
+	Allowed      bool     `json:"allowed"`
+	CrawlDelayMS int      `json:"crawl_delay_ms,omitempty"`
+	GroupRules   []string `json:"group_rules"`
+}
+
+type robotsGroup struct {
+	userAgents    []string
+	lines         []string
+	allowPaths    []string
+	disallowPaths []string
+	crawlDelayMS  int
+}
+
+func main() {
+	var in input
+	if err := json.NewDecoder(os.Stdin).Decode(&in); err != nil {
+		errorJSON(fmt.Errorf("invalid input: %w", err))
+		os.Exit(2)
+	}
+	if in.URL == "" {
+		errorJSON(errors.New("missing url"))
+		os.Exit(2)
+	}
+	if in.UserAgent == "" {
+		in.UserAgent = "agentcli"
+	}
+
+	u, err := url.Parse(in.URL)
+	if err != nil {
+		errorJSON(fmt.Errorf("bad url: %v", err))
+		os.Exit(2)
+	}
+	if blocked, reason := ssrfBlocked(u.Host); blocked {
+		errorJSON(fmt.Errorf("SSRF blocked: %s", reason))
+		os.Exit(1)
+	}
+
+	resp, err := fetchRobots(u)
+	if err != nil {
+		errorJSON(err)
+		os.Exit(1)
+	}
+	defer func() {
+		if cerr := resp.Body.Close(); cerr != nil {
+			// surface close error
+			_, _ = os.Stderr.WriteString(fmt.Sprintf("{\"error\":\"close body: %v\"}", cerr)) //nolint:errcheck
+		}
+	}()
+
+	groups, err := parseRobots(resp.Body)
+	if err != nil {
+		errorJSON(err)
+		os.Exit(1)
+	}
+	grp, ok := matchGroup(groups, in.UserAgent)
+	if !ok {
+		emitOrExit(output{Allowed: true})
+		return
+	}
+	allowed := evaluateAllowed(grp, u.EscapedPath())
+	out := output{Allowed: allowed, GroupRules: append([]string(nil), grp.lines...)}
+	if grp.crawlDelayMS > 0 {
+		out.CrawlDelayMS = grp.crawlDelayMS
+	}
+	emitOrExit(out)
+}
+
+func fetchRobots(u *url.URL) (*http.Response, error) {
+	origin := (&url.URL{Scheme: u.Scheme, Host: u.Host}).String()
+	client := &http.Client{Timeout: 5 * time.Second, CheckRedirect: func(req *http.Request, via []*http.Request) error {
+		if len(via) > 0 {
+			if req.URL.Host != u.Host || req.URL.Scheme != u.Scheme {
+				return http.ErrUseLastResponse
+			}
+		}
+		return nil
+	}}
+	req, err := http.NewRequest(http.MethodGet, origin+"/robots.txt", nil)
+	if err != nil {
+		return nil, fmt.Errorf("request: %w", err)
+	}
+	req.Header.Set("User-Agent", "agentcli-robots-check/0.1")
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("fetch robots.txt: %w", err)
+	}
+	if resp.StatusCode >= 400 {
+		// ensure body drained before close
+		if _, cerr := io.Copy(io.Discard, resp.Body); cerr != nil {
+			_, _ = os.Stderr.WriteString("{\"error\":\"drain body\"}") //nolint:errcheck
+		}
+		if cerr := resp.Body.Close(); cerr != nil {
+			_, _ = os.Stderr.WriteString("{\"error\":\"close body\"}") //nolint:errcheck
+		}
+		return nil, fmt.Errorf("robots.txt status %d", resp.StatusCode)
+	}
+	return resp, nil
+}
+
+func parseRobots(r io.Reader) ([]robotsGroup, error) {
+	scanner := bufio.NewScanner(r)
+	var groups []robotsGroup
+	cur := robotsGroup{}
+	flush := func() {
+		if len(cur.userAgents) > 0 {
+			groups = append(groups, cur)
+		}
+		cur = robotsGroup{}
+	}
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if i := strings.IndexByte(line, '#'); i >= 0 {
+			line = strings.TrimSpace(line[:i])
+		}
+		if line == "" {
+			continue
+		}
+		lower := strings.ToLower(line)
+		switch {
+		case strings.HasPrefix(lower, "user-agent:"):
+			val := strings.TrimSpace(line[len("User-agent:"):])
+			if len(cur.userAgents) > 0 && (len(cur.allowPaths) > 0 || len(cur.disallowPaths) > 0 || cur.crawlDelayMS > 0) {
+				flush()
+			}
+			cur.userAgents = append(cur.userAgents, strings.ToLower(val))
+			cur.lines = append(cur.lines, line)
+		case strings.HasPrefix(lower, "allow:"):
+			val := strings.TrimSpace(line[len("Allow:"):])
+			cur.allowPaths = append(cur.allowPaths, val)
+			cur.lines = append(cur.lines, line)
+		case strings.HasPrefix(lower, "disallow:"):
+			val := strings.TrimSpace(line[len("Disallow:"):])
+			cur.disallowPaths = append(cur.disallowPaths, val)
+			cur.lines = append(cur.lines, line)
+		case strings.HasPrefix(lower, "crawl-delay:"):
+			val := strings.TrimSpace(line[len("Crawl-delay:"):])
+			if d, err := time.ParseDuration(val + "s"); err == nil {
+				cur.crawlDelayMS = int(d / time.Millisecond)
+			}
+			cur.lines = append(cur.lines, line)
+		default:
+			// ignore others
+		}
+	}
+	flush()
+	if err := scanner.Err(); err != nil {
+		return nil, fmt.Errorf("read robots.txt: %w", err)
+	}
+	return groups, nil
+}
+
+func matchGroup(groups []robotsGroup, userAgent string) (robotsGroup, bool) {
+	uaLower := strings.ToLower(userAgent)
+	for _, g := range groups {
+		for _, ua := range g.userAgents {
+			if ua == uaLower {
+				return g, true
+			}
+		}
+	}
+	for _, g := range groups {
+		for _, ua := range g.userAgents {
+			if ua == "*" {
+				return g, true
+			}
+		}
+	}
+	return robotsGroup{}, false
+}
+
+func evaluateAllowed(g robotsGroup, path string) bool {
+	allowed := true
+	for _, d := range g.disallowPaths {
+		if d == "" {
+			continue
+		}
+		if strings.HasPrefix(path, d) {
+			allowed = false
+		}
+	}
+	for _, a := range g.allowPaths {
+		if a == "" {
+			continue
+		}
+		if strings.HasPrefix(path, a) {
+			allowed = true
+		}
+	}
+	return allowed
+}
+
+func emitJSON(v any) error {
+	enc := json.NewEncoder(os.Stdout)
+	enc.SetEscapeHTML(false)
+	return enc.Encode(v)
+}
+
+func emitOrExit(v any) {
+	if err := emitJSON(v); err != nil {
+		// best-effort stderr message; ignore write error explicitly
+		_, _ = os.Stderr.WriteString(fmt.Sprintf("{\"error\":\"encode output: %v\"}", err)) //nolint:errcheck
+		os.Exit(1)
+	}
+}
+
+func errorJSON(err error) {
+	b, jerr := json.Marshal(map[string]string{"error": err.Error()})
+	if jerr != nil {
+		_, _ = os.Stderr.WriteString("{\"error\":\"internal error\"}") //nolint:errcheck
+		return
+	}
+	_, _ = os.Stderr.Write(b) //nolint:errcheck
+}
+
+func ssrfBlocked(hostport string) (bool, string) {
+	// Allow override for tests
+	if os.Getenv("ROBOTS_CHECK_ALLOW_LOCAL") == "1" {
+		return false, "allow local for tests"
+	}
+	host, _, err := net.SplitHostPort(hostport)
+	if err != nil {
+		host = hostport
+	}
+	ip := net.ParseIP(host)
+	if ip != nil {
+		if ip.IsLoopback() || ip.IsUnspecified() {
+			return true, "loopback"
+		}
+		if isPrivateIP(ip) {
+			return true, "private address"
+		}
+		return false, ""
+	}
+	addrs, err := net.LookupIP(host)
+	if err != nil {
+		return true, "dns lookup failed"
+	}
+	for _, a := range addrs {
+		if a.IsLoopback() || a.IsUnspecified() || isPrivateIP(a) {
+			return true, "resolved to private/loopback"
+		}
+	}
+	return false, ""
+}
+
+var privateBlocks []*net.IPNet
+
+func init() {
+	cidrs := []string{
+		"10.0.0.0/8",
+		"172.16.0.0/12",
+		"192.168.0.0/16",
+		"127.0.0.0/8",
+		"::1/128",
+		"fc00::/7",
+		"fe80::/10",
+	}
+	for _, cidr := range cidrs {
+		_, block, err := net.ParseCIDR(cidr)
+		if err != nil {
+			panic("invalid CIDR literal: " + cidr)
+		}
+		privateBlocks = append(privateBlocks, block)
+	}
+}
+
+func isPrivateIP(ip net.IP) bool {
+	for _, block := range privateBlocks {
+		if block.Contains(ip) {
+			return true
+		}
+	}
+	return false
+}
diff --git a/tools/cmd/robots_check/robots_check_test.go b/tools/cmd/robots_check/robots_check_test.go
new file mode 100644
index 0000000..5728bac
--- /dev/null
+++ b/tools/cmd/robots_check/robots_check_test.go
@@ -0,0 +1,121 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// rcOutput intentionally omitted; tests validate via substring checks.
+
+// Allow local httptest origins during tests
+func TestMain(m *testing.M) {
+	if err := os.Setenv("ROBOTS_CHECK_ALLOW_LOCAL", "1"); err != nil {
+		panic(err)
+	}
+	os.Exit(m.Run())
+}
+
+func runRobots(t *testing.T, bin string, input any) (string, string, error) {
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), err
+}
+
+// UA-specific rules must take precedence over wildcard.
+func TestRobotsCheck_UAPrecedence_DenySpecific(t *testing.T) {
+	mux := http.NewServeMux()
+	mux.HandleFunc("/robots.txt", func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain")
+		// RFC 9309: specific UA group applies when matched
+		// Specific denies /private, while wildcard allows all
+		if _, err := w.Write([]byte("User-agent: agentcli\nDisallow: /private\n\nUser-agent: *\nAllow: /\n")); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	})
+	srv := httptest.NewServer(mux)
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "robots_check")
+	outStr, errStr, err := runRobots(t, bin, map[string]any{
+		"url":        srv.URL + "/private/page",
+		"user_agent": "agentcli",
+	})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"allowed\":false") {
+		t.Fatalf("expected allowed=false, got: %s", outStr)
+	}
+	if !strings.Contains(outStr, "group_rules") {
+		t.Fatalf("expected group_rules in output")
+	}
+}
+
+// Crawl-delay should be exposed in milliseconds when present in matched group.
+func TestRobotsCheck_CrawlDelay_Parsed(t *testing.T) {
+	mux := http.NewServeMux()
+	mux.HandleFunc("/robots.txt", func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain")
+		if _, err := w.Write([]byte("User-agent: agentcli\nCrawl-delay: 2\nAllow: /\n")); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	})
+	srv := httptest.NewServer(mux)
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "robots_check")
+	outStr, errStr, err := runRobots(t, bin, map[string]any{
+		"url":        srv.URL + "/anything",
+		"user_agent": "agentcli",
+	})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"allowed\":true") {
+		t.Fatalf("expected allowed=true, got: %s", outStr)
+	}
+	if !strings.Contains(outStr, "\"crawl_delay_ms\":2000") {
+		t.Fatalf("expected crawl_delay_ms=2000, got: %s", outStr)
+	}
+}
+
+// Wildcard-only group should apply when UA does not match a specific group.
+func TestRobotsCheck_WildcardFallback_DenyAll(t *testing.T) {
+	mux := http.NewServeMux()
+	mux.HandleFunc("/robots.txt", func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain")
+		if _, err := w.Write([]byte("User-agent: *\nDisallow: /\n")); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	})
+	srv := httptest.NewServer(mux)
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "robots_check")
+	outStr, errStr, err := runRobots(t, bin, map[string]any{
+		"url":        srv.URL + "/blocked",
+		"user_agent": "otherbot",
+	})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"allowed\":false") {
+		t.Fatalf("expected allowed=false, got: %s", outStr)
+	}
+}
diff --git a/tools/cmd/rss_fetch/rss_fetch.go b/tools/cmd/rss_fetch/rss_fetch.go
new file mode 100644
index 0000000..e163b02
--- /dev/null
+++ b/tools/cmd/rss_fetch/rss_fetch.go
@@ -0,0 +1,226 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"encoding/xml"
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+	"time"
+)
+
+type input struct {
+	URL             string `json:"url"`
+	IfModifiedSince string `json:"if_modified_since"`
+}
+
+type output struct {
+	Feed  map[string]string `json:"feed"`
+	Items []map[string]any  `json:"items"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		msg := strings.ReplaceAll(err.Error(), "\n", " ")
+		fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	var in input
+	if err := json.NewDecoder(bufio.NewReader(os.Stdin)).Decode(&in); err != nil {
+		return fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.URL) == "" {
+		return errors.New("url is required")
+	}
+	u, err := url.Parse(in.URL)
+	if err != nil || (u.Scheme != "http" && u.Scheme != "https") {
+		return errors.New("only http/https are allowed")
+	}
+	if err := ssrfGuard(u); err != nil {
+		return err
+	}
+	client := &http.Client{Timeout: 5 * time.Second, CheckRedirect: func(req *http.Request, via []*http.Request) error {
+		if len(via) >= 5 {
+			return errors.New("too many redirects")
+		}
+		return ssrfGuard(req.URL)
+	}}
+	req, err := http.NewRequest(http.MethodGet, in.URL, nil)
+	if err != nil {
+		return fmt.Errorf("new request: %w", err)
+	}
+	req.Header.Set("User-Agent", "agentcli-rss-fetch/0.1")
+	if strings.TrimSpace(in.IfModifiedSince) != "" {
+		req.Header.Set("If-Modified-Since", in.IfModifiedSince)
+	}
+	resp, err := client.Do(req)
+	if err != nil {
+		return fmt.Errorf("http: %w", err)
+	}
+	defer func() {
+		if cerr := resp.Body.Close(); cerr != nil {
+			// best-effort log; ignore secondary write error intentionally
+			_, _ = os.Stderr.WriteString(fmt.Sprintf("{\"error\":\"close body: %v\"}", cerr)) //nolint:errcheck
+		}
+	}()
+	if resp.StatusCode == http.StatusNotModified {
+		// Emit empty items on 304 (caller may infer not modified)
+		return emit(output{Feed: map[string]string{"title": "", "link": ""}, Items: []map[string]any{}})
+	}
+	if resp.StatusCode >= 400 {
+		// Drain then error
+		if _, derr := io.Copy(io.Discard, resp.Body); derr != nil {
+			_, _ = os.Stderr.WriteString("{\"error\":\"drain body\"}") //nolint:errcheck
+		}
+		return fmt.Errorf("status %d", resp.StatusCode)
+	}
+	data, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return fmt.Errorf("read body: %w", err)
+	}
+	// Try RSS first
+	if out, ok := parseRSS(data); ok {
+		return emit(out)
+	}
+	if out, ok := parseAtom(data); ok {
+		return emit(out)
+	}
+	return errors.New("unsupported feed format")
+}
+
+func emit(v any) error {
+	enc := json.NewEncoder(os.Stdout)
+	enc.SetEscapeHTML(false)
+	return enc.Encode(v)
+}
+
+// Minimal RSS 2.0 representation
+type rssDoc struct {
+	XMLName xml.Name   `xml:"rss"`
+	Channel rssChannel `xml:"channel"`
+}
+
+type rssChannel struct {
+	Title string    `xml:"title"`
+	Link  string    `xml:"link"`
+	Items []rssItem `xml:"item"`
+}
+
+type rssItem struct {
+	Title   string `xml:"title"`
+	Link    string `xml:"link"`
+	PubDate string `xml:"pubDate"`
+	Desc    string `xml:"description"`
+}
+
+func parseRSS(data []byte) (output, bool) {
+	var doc rssDoc
+	if err := xml.Unmarshal(data, &doc); err != nil || doc.XMLName.Local != "rss" {
+		return output{}, false
+	}
+	out := output{Feed: map[string]string{"title": doc.Channel.Title, "link": doc.Channel.Link}, Items: make([]map[string]any, 0, len(doc.Channel.Items))}
+	for _, it := range doc.Channel.Items {
+		item := map[string]any{"title": it.Title, "url": it.Link}
+		if it.PubDate != "" {
+			item["published_at"] = it.PubDate
+		}
+		if it.Desc != "" {
+			item["summary"] = it.Desc
+		}
+		out.Items = append(out.Items, item)
+	}
+	return out, true
+}
+
+// Minimal Atom 1.0 representation
+type atomLink struct {
+	Href string `xml:"href,attr"`
+}
+type atomDoc struct {
+	XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
+	Title   string   `xml:"title"`
+	Link    atomLink `xml:"link"`
+	Entries []struct {
+		Title     string   `xml:"title"`
+		Link      atomLink `xml:"link"`
+		Updated   string   `xml:"updated"`
+		Published string   `xml:"published"`
+		Summary   string   `xml:"summary"`
+	} `xml:"entry"`
+}
+
+func parseAtom(data []byte) (output, bool) {
+	var doc atomDoc
+	if err := xml.Unmarshal(data, &doc); err != nil || doc.XMLName.Local != "feed" {
+		return output{}, false
+	}
+	link := doc.Link.Href
+	out := output{Feed: map[string]string{"title": doc.Title, "link": link}}
+	for _, e := range doc.Entries {
+		item := map[string]any{"title": e.Title, "url": e.Link.Href}
+		if e.Updated != "" {
+			item["published_at"] = e.Updated
+		} else if e.Published != "" {
+			item["published_at"] = e.Published
+		}
+		if e.Summary != "" {
+			item["summary"] = e.Summary
+		}
+		out.Items = append(out.Items, item)
+	}
+	return out, true
+}
+
+func ssrfGuard(u *url.URL) error {
+	host := u.Hostname()
+	if host == "" {
+		return errors.New("invalid host")
+	}
+	if strings.HasSuffix(strings.ToLower(host), ".onion") {
+		return errors.New("SSRF blocked: onion domains are not allowed")
+	}
+	if os.Getenv("RSS_FETCH_ALLOW_LOCAL") == "1" {
+		return nil
+	}
+	ips, err := net.LookupIP(host)
+	if err != nil || len(ips) == 0 {
+		return errors.New("SSRF blocked: cannot resolve host")
+	}
+	for _, ip := range ips {
+		if isPrivateIP(ip) {
+			return errors.New("SSRF blocked: private or loopback address")
+		}
+	}
+	return nil
+}
+
+func isPrivateIP(ip net.IP) bool {
+	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+		return true
+	}
+	if v4 := ip.To4(); v4 != nil {
+		if v4[0] == 10 || (v4[0] == 172 && v4[1]&0xf0 == 16) || (v4[0] == 192 && v4[1] == 168) || (v4[0] == 169 && v4[1] == 254) || v4[0] == 127 {
+			return true
+		}
+		return false
+	}
+	if ip.Equal(net.ParseIP("::1")) {
+		return true
+	}
+	if ip[0] == 0xfe && (ip[1]&0xc0) == 0x80 {
+		return true
+	}
+	if ip[0]&0xfe == 0xfc {
+		return true
+	}
+	return false
+}
diff --git a/tools/cmd/rss_fetch/rss_fetch_test.go b/tools/cmd/rss_fetch/rss_fetch_test.go
new file mode 100644
index 0000000..c2e3f99
--- /dev/null
+++ b/tools/cmd/rss_fetch/rss_fetch_test.go
@@ -0,0 +1,142 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// Allow local SSRF for tests
+func TestMain(m *testing.M) {
+	if err := os.Setenv("RSS_FETCH_ALLOW_LOCAL", "1"); err != nil {
+		panic(err)
+	}
+	os.Exit(m.Run())
+}
+
+func runTool(t *testing.T, bin string, env []string, input any) (string, string, error) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	cmd.Env = env
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	runErr := cmd.Run()
+	return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), runErr
+}
+
+func TestRSSFetch_RSS(t *testing.T) {
+	rss := `<?xml version="1.0"?><rss version="2.0"><channel><title>t</title><link>https://ex/</link><item><title>A</title><link>https://ex/a</link><pubDate>Mon, 02 Jan 2006 15:04:05 MST</pubDate><description>da</description></item></channel></rss>`
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/rss+xml")
+		if _, err := w.Write([]byte(rss)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "rss_fetch")
+	env := append(os.Environ(), "RSS_FETCH_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"url": srv.URL})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"feed\":") || !strings.Contains(outStr, "\"items\":") {
+		t.Fatalf("unexpected output: %s", outStr)
+	}
+	if !strings.Contains(outStr, "https://ex/a") {
+		t.Fatalf("missing item url: %s", outStr)
+	}
+}
+
+func TestRSSFetch_Atom(t *testing.T) {
+	atom := `<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"><title>Example</title><link href="https://ex/"/><entry><title>B</title><link href="https://ex/b"/><updated>2006-01-02T15:04:05Z</updated><summary>sb</summary></entry></feed>`
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/atom+xml")
+		if _, err := w.Write([]byte(atom)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "rss_fetch")
+	env := append(os.Environ(), "RSS_FETCH_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"url": srv.URL})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"feed\":") || !strings.Contains(outStr, "\"items\":") {
+		t.Fatalf("unexpected output: %s", outStr)
+	}
+	if !strings.Contains(outStr, "https://ex/b") {
+		t.Fatalf("missing item url: %s", outStr)
+	}
+}
+
+func TestRSSFetch_304NotModified(t *testing.T) {
+	etag := "W/\"abc\""
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Header.Get("If-Modified-Since") != "" || r.Header.Get("If-None-Match") != "" {
+			w.WriteHeader(http.StatusNotModified)
+			return
+		}
+		w.Header().Set("ETag", etag)
+		w.Header().Set("Content-Type", "application/rss+xml")
+		if _, err := w.Write([]byte("<rss/>")); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "rss_fetch")
+	env := append(os.Environ(), "RSS_FETCH_ALLOW_LOCAL=1")
+	// First fetch
+	_, errStr, err := runTool(t, bin, env, map[string]any{"url": srv.URL})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	// Second fetch with If-Modified-Since triggers 304
+	outStr2, _, err2 := runTool(t, bin, env, map[string]any{"url": srv.URL, "if_modified_since": "Mon, 02 Jan 2006 15:04:05 GMT"})
+	if err2 != nil {
+		t.Fatalf("second run error: %v", err2)
+	}
+	if !strings.Contains(outStr2, "\"items\":[]") {
+		t.Fatalf("expected empty items on 304: %s", outStr2)
+	}
+}
+
+func TestRSSFetch_SSRFBlocked(t *testing.T) {
+	bin := testutil.BuildTool(t, "rss_fetch")
+	env := []string{"RSS_FETCH_ALLOW_LOCAL=0"}
+	_, errStr, err := runTool(t, bin, env, map[string]any{"url": "http://127.0.0.1:9"})
+	if err == nil {
+		t.Fatalf("expected error but got ok")
+	}
+	if !strings.Contains(errStr, "SSRF") {
+		t.Fatalf("expected SSRF error, got: %s", errStr)
+	}
+}
+
+func TestRSSFetch_BadInput(t *testing.T) {
+	bin := testutil.BuildTool(t, "rss_fetch")
+	env := append(os.Environ(), "RSS_FETCH_ALLOW_LOCAL=1")
+	_, errStr, err := runTool(t, bin, env, map[string]any{"url": ":bad"})
+	if err == nil {
+		t.Fatalf("expected error for bad url")
+	}
+	if !strings.Contains(errStr, "http/https") {
+		t.Fatalf("unexpected stderr: %s", errStr)
+	}
+}
diff --git a/tools/cmd/searxng_search/searxng_search.go b/tools/cmd/searxng_search/searxng_search.go
new file mode 100644
index 0000000..97aaac1
--- /dev/null
+++ b/tools/cmd/searxng_search/searxng_search.go
@@ -0,0 +1,405 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+)
+
+type input struct {
+	Q          string   `json:"q"`
+	TimeRange  string   `json:"time_range"`
+	Categories []string `json:"categories"`
+	Engines    []string `json:"engines"`
+	Language   string   `json:"language"`
+	Page       int      `json:"page"`
+	Size       int      `json:"size"`
+}
+
+type result struct {
+	Title       string `json:"title"`
+	URL         string `json:"url"`
+	Snippet     string `json:"snippet"`
+	Engine      string `json:"engine"`
+	PublishedAt string `json:"published_at,omitempty"`
+}
+
+type output struct {
+	Query   string   `json:"query"`
+	Results []result `json:"results"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		msg := strings.ReplaceAll(err.Error(), "\n", " ")
+		// Optional hint exposure
+		var he *hintedError
+		if errors.As(err, &he) && he.hint != "" {
+			fmt.Fprintf(os.Stderr, "{\"error\":%q,\"hint\":%q}\n", he.err.Error(), he.hint)
+		} else {
+			fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		}
+		os.Exit(1)
+	}
+}
+
+// run orchestrates input parsing, request building, HTTP execution with retries,
+// and audit emission. Keep this wrapper thin to satisfy lint complexity.
+func run() error {
+	in, err := decodeInput()
+	if err != nil {
+		return err
+	}
+	if strings.TrimSpace(in.Q) == "" {
+		return errors.New("q is required")
+	}
+	baseURL, reqURL, err := prepareURLs(in)
+	if err != nil {
+		return err
+	}
+	timeout := resolveTimeout()
+	client := newHTTPClient(timeout)
+	start := time.Now()
+	raw, lastStatus, retries, err := fetchWithRetries(client, baseURL, reqURL)
+	if err != nil {
+		return err
+	}
+	out := output{Query: in.Q, Results: parseResults(raw.Results)}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		return fmt.Errorf("encode json: %w", err)
+	}
+	entry := makeAudit(baseURL, in.Q, lastStatus, time.Since(start).Milliseconds(), retries)
+	_ = appendAudit(entry) //nolint:errcheck
+	return nil
+}
+
+func prepareURLs(in input) (*url.URL, *url.URL, error) {
+	base := strings.TrimSpace(os.Getenv("SEARXNG_BASE_URL"))
+	if base == "" {
+		return nil, nil, hinted(fmt.Errorf("SEARXNG_BASE_URL is required"), "export SEARXNG_BASE_URL=http://localhost:8888")
+	}
+	baseURL, err := url.Parse(base)
+	if err != nil || (baseURL.Scheme != "http" && baseURL.Scheme != "https") {
+		return nil, nil, errors.New("SEARXNG_BASE_URL must be a valid http/https URL")
+	}
+	if err := ssrfGuard(baseURL); err != nil {
+		return nil, nil, err
+	}
+	reqURL, err := url.Parse(baseURL.String())
+	if err != nil {
+		return nil, nil, err
+	}
+	reqURL.Path = strings.TrimRight(reqURL.Path, "/") + "/search"
+	q := reqURL.Query()
+	q.Set("format", "json")
+	q.Set("q", in.Q)
+	if in.TimeRange != "" {
+		switch in.TimeRange {
+		case "day", "week", "month", "year":
+			q.Set("time_range", in.TimeRange)
+		default:
+			return nil, nil, errors.New("time_range must be one of: day, week, month, year")
+		}
+	}
+	if len(in.Categories) > 0 {
+		q.Set("categories", strings.Join(in.Categories, ","))
+	}
+	if len(in.Engines) > 0 {
+		q.Set("engines", strings.Join(in.Engines, ","))
+	}
+	if in.Language != "" {
+		q.Set("language", in.Language)
+	}
+	if in.Page > 0 {
+		q.Set("pageno", strconv.Itoa(in.Page))
+	}
+	if in.Size > 0 {
+		if in.Size > 50 {
+			return nil, nil, errors.New("size must be <= 50")
+		}
+		q.Set("results", strconv.Itoa(in.Size))
+	}
+	reqURL.RawQuery = q.Encode()
+	return baseURL, reqURL, nil
+}
+
+type searxRaw struct {
+	Query   string           `json:"query"`
+	Results []map[string]any `json:"results"`
+}
+
+func fetchWithRetries(client *http.Client, baseURL *url.URL, reqURL *url.URL) (searxRaw, int, int, error) {
+	var retries int
+	var lastStatus int
+	var raw searxRaw
+	for attempt := 0; attempt < 3; attempt++ {
+		if attempt > 0 {
+			retries++
+		}
+		if err := ssrfGuard(baseURL); err != nil {
+			return searxRaw{}, 0, retries, err
+		}
+		req, err := http.NewRequest(http.MethodGet, reqURL.String(), nil)
+		if err != nil {
+			return searxRaw{}, 0, retries, fmt.Errorf("new request: %w", err)
+		}
+		req.Header.Set("User-Agent", "agentcli-searxng/0.1")
+		resp, err := client.Do(req)
+		if err != nil {
+			if isTimeout(err) && attempt < 2 {
+				backoffSleep(0, attempt)
+				continue
+			}
+			return searxRaw{}, 0, retries, fmt.Errorf("http: %w", err)
+		}
+		lastStatus = resp.StatusCode
+		if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode >= 500 {
+			if attempt < 2 {
+				sleepMs := retryAfterMs(resp.Header.Get("Retry-After"))
+				_ = resp.Body.Close() //nolint:errcheck
+				backoffSleep(sleepMs, attempt)
+				continue
+			}
+		}
+		dec := json.NewDecoder(bufio.NewReader(resp.Body))
+		if err := dec.Decode(&raw); err != nil {
+			_ = resp.Body.Close() //nolint:errcheck
+			if resp.StatusCode >= 500 && attempt < 2 {
+				backoffSleep(0, attempt)
+				continue
+			}
+			return searxRaw{}, lastStatus, retries, hinted(fmt.Errorf("decode json: %w", err), "verify SEARXNG_BASE_URL and that /search?format=json is reachable")
+		}
+		_ = resp.Body.Close() //nolint:errcheck
+		break
+	}
+	return raw, lastStatus, retries, nil
+}
+
+func parseResults(rows []map[string]any) []result {
+	var out []result
+	for _, r := range rows {
+		res := result{}
+		if v, ok := r["title"].(string); ok {
+			res.Title = v
+		}
+		if v, ok := r["url"].(string); ok {
+			res.URL = v
+		}
+		if v, ok := r["content"].(string); ok {
+			res.Snippet = v
+		}
+		if v, ok := r["snippet"].(string); ok && res.Snippet == "" {
+			res.Snippet = v
+		}
+		if v, ok := r["engine"].(string); ok {
+			res.Engine = v
+		}
+		if v, ok := r["publishedDate"].(string); ok {
+			res.PublishedAt = v
+		}
+		if v, ok := r["published_at"].(string); ok && res.PublishedAt == "" {
+			res.PublishedAt = v
+		}
+		out = append(out, res)
+	}
+	return out
+}
+
+func makeAudit(baseURL *url.URL, q string, status int, ms int64, retries int) map[string]any {
+	entry := map[string]any{
+		"ts":       time.Now().UTC().Format(time.RFC3339Nano),
+		"tool":     "searxng_search",
+		"url_host": baseURL.Hostname(),
+		"status":   status,
+		"ms":       ms,
+		"retries":  retries,
+	}
+	if len(q) <= 256 {
+		entry["query"] = q
+	} else {
+		entry["query"] = q[:256]
+		entry["query_truncated"] = true
+	}
+	return entry
+}
+
+func decodeInput() (input, error) {
+	var in input
+	dec := json.NewDecoder(bufio.NewReader(os.Stdin))
+	if err := dec.Decode(&in); err != nil {
+		return in, fmt.Errorf("parse json: %w", err)
+	}
+	return in, nil
+}
+
+func resolveTimeout() time.Duration {
+	if v := strings.TrimSpace(os.Getenv("HTTP_TIMEOUT_MS")); v != "" {
+		if ms, err := time.ParseDuration(v + "ms"); err == nil && ms > 0 {
+			return ms
+		}
+	}
+	return 10 * time.Second
+}
+
+func newHTTPClient(timeout time.Duration) *http.Client {
+	tr := &http.Transport{}
+	return &http.Client{Timeout: timeout, Transport: tr, CheckRedirect: func(req *http.Request, via []*http.Request) error {
+		if len(via) >= 5 {
+			return errors.New("too many redirects")
+		}
+		return ssrfGuard(req.URL)
+	}}
+}
+
+func isTimeout(err error) bool {
+	var ne net.Error
+	return errors.As(err, &ne) && ne.Timeout()
+}
+
+func retryAfterMs(h string) int64 {
+	if h == "" {
+		return 0
+	}
+	// Try integer seconds first
+	if n, err := strconv.Atoi(strings.TrimSpace(h)); err == nil && n >= 0 {
+		return int64(n) * 1000
+	}
+	// Try HTTP-date
+	if t, err := http.ParseTime(h); err == nil {
+		d := time.Until(t)
+		if d > 0 {
+			return d.Milliseconds()
+		}
+	}
+	return 0
+}
+
+func backoffSleep(retryAfterMs int64, attempt int) {
+	// jittered backoff: base 100ms * (attempt+1)
+	d := time.Duration(100*(attempt+1)) * time.Millisecond
+	if retryAfterMs > 0 {
+		d = time.Duration(retryAfterMs) * time.Millisecond
+	}
+	time.Sleep(d)
+}
+
+// SSRF guard similar to http_fetch, with opt-out for local during tests via SEARXNG_ALLOW_LOCAL=1
+func ssrfGuard(u *url.URL) error {
+	host := u.Hostname()
+	if host == "" {
+		return errors.New("invalid host")
+	}
+	if strings.HasSuffix(strings.ToLower(host), ".onion") {
+		return errors.New("SSRF blocked: onion domains are not allowed")
+	}
+	if os.Getenv("SEARXNG_ALLOW_LOCAL") == "1" {
+		return nil
+	}
+	ips, err := net.LookupIP(host)
+	if err != nil || len(ips) == 0 {
+		return errors.New("SSRF blocked: cannot resolve host")
+	}
+	for _, ip := range ips {
+		if isPrivateIP(ip) {
+			return errors.New("SSRF blocked: private or loopback address")
+		}
+	}
+	return nil
+}
+
+func isPrivateIP(ip net.IP) bool {
+	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+		return true
+	}
+	if v4 := ip.To4(); v4 != nil {
+		ip = v4
+		if v4[0] == 10 {
+			return true
+		}
+		if v4[0] == 172 && v4[1]&0xf0 == 16 {
+			return true
+		}
+		if v4[0] == 192 && v4[1] == 168 {
+			return true
+		}
+		if v4[0] == 169 && v4[1] == 254 {
+			return true
+		}
+		if v4[0] == 127 {
+			return true
+		}
+		return false
+	}
+	if ip.Equal(net.ParseIP("::1")) {
+		return true
+	}
+	if ip[0] == 0xfe && (ip[1]&0xc0) == 0x80 {
+		return true
+	}
+	if ip[0]&0xfe == 0xfc {
+		return true
+	}
+	return false
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
+
+type hintedError struct {
+	err  error
+	hint string
+}
+
+func (h *hintedError) Error() string { return h.err.Error() }
+
+func hinted(err error, hint string) error { return &hintedError{err: err, hint: hint} }
diff --git a/tools/cmd/searxng_search/searxng_search_test.go b/tools/cmd/searxng_search/searxng_search_test.go
new file mode 100644
index 0000000..77c6f2b
--- /dev/null
+++ b/tools/cmd/searxng_search/searxng_search_test.go
@@ -0,0 +1,153 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"strings"
+	"sync/atomic"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// no exported types required here
+
+// TestMain allows local SSRF to reach httptest.Server
+func TestMain(m *testing.M) {
+	if err := os.Setenv("SEARXNG_ALLOW_LOCAL", "1"); err != nil {
+		panic(err)
+	}
+	os.Exit(m.Run())
+}
+
+func runTool(t *testing.T, bin string, env []string, input any) (string, string, error) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	// Use provided env exactly; caller can pass os.Environ()-derived slice when needed.
+	cmd.Env = env
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	runErr := cmd.Run()
+	return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), runErr
+}
+
+func TestSearxngSearch_Success(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/search" || r.URL.Query().Get("format") != "json" {
+			http.Error(w, "bad", http.StatusBadRequest)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		if _, err := w.Write([]byte(`{"query":"golang","results":[{"title":"Go","url":"https://golang.org","content":"The Go Programming Language","engine":"duckduckgo"}]}`)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "searxng_search")
+	env := append(os.Environ(), "SEARXNG_BASE_URL="+srv.URL, "SEARXNG_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "golang"})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"query\":\"golang\"") {
+		t.Fatalf("missing query in output: %s", outStr)
+	}
+	if !strings.Contains(outStr, "golang.org") {
+		t.Fatalf("missing result url: %s", outStr)
+	}
+}
+
+func TestSearxngSearch_Retry429(t *testing.T) {
+	var calls int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		c := atomic.AddInt32(&calls, 1)
+		if c == 1 {
+			w.Header().Set("Retry-After", "0")
+			w.WriteHeader(http.StatusTooManyRequests)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		if _, err := w.Write([]byte(`{"query":"q","results":[{"title":"A","url":"https://a","content":"s","engine":"e"}]}`)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "searxng_search")
+	env := append(os.Environ(), "SEARXNG_BASE_URL="+srv.URL, "SEARXNG_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "q"})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if atomic.LoadInt32(&calls) < 2 {
+		t.Fatalf("expected at least 2 calls, got %d", calls)
+	}
+	if !strings.Contains(outStr, "\"results\":[") {
+		t.Fatalf("missing results: %s", outStr)
+	}
+}
+
+func TestSearxngSearch_Retry5xxThenSuccess(t *testing.T) {
+	var calls int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		c := atomic.AddInt32(&calls, 1)
+		if c == 1 {
+			w.WriteHeader(http.StatusBadGateway)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		if _, err := w.Write([]byte(`{"query":"q","results":[{"title":"B","url":"https://b","content":"s","engine":"e"}]}`)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "searxng_search")
+	env := append(os.Environ(), "SEARXNG_BASE_URL="+srv.URL, "SEARXNG_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "q"})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if atomic.LoadInt32(&calls) < 2 {
+		t.Fatalf("expected at least 2 calls, got %d", calls)
+	}
+	if !strings.Contains(outStr, "\"results\":[") {
+		t.Fatalf("missing results: %s", outStr)
+	}
+}
+
+func TestSearxngSearch_SSRFBlocked(t *testing.T) {
+	bin := testutil.BuildTool(t, "searxng_search")
+	// Force no local bypass and set private base URL
+	env := []string{"SEARXNG_BASE_URL=http://127.0.0.1:9"}
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"q": "x"})
+	if err == nil {
+		t.Fatalf("expected error, got ok: %s", outStr)
+	}
+	if !strings.Contains(errStr, "SSRF blocked") {
+		t.Fatalf("expected SSRF blocked error, got: %s", errStr)
+	}
+}
+
+func TestSearxngSearch_BadBaseURL(t *testing.T) {
+	bin := testutil.BuildTool(t, "searxng_search")
+	env := []string{"SEARXNG_BASE_URL=:bad"}
+	_, errStr, err := runTool(t, bin, env, map[string]any{"q": "x"})
+	if err == nil {
+		t.Fatalf("expected error for bad base url")
+	}
+	if !strings.Contains(errStr, "SEARXNG_BASE_URL") {
+		t.Fatalf("expected base url error, got: %s", errStr)
+	}
+}
diff --git a/tools/cmd/wayback_lookup/wayback_lookup.go b/tools/cmd/wayback_lookup/wayback_lookup.go
new file mode 100644
index 0000000..7f7220d
--- /dev/null
+++ b/tools/cmd/wayback_lookup/wayback_lookup.go
@@ -0,0 +1,242 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+type input struct {
+	URL  string `json:"url"`
+	Save bool   `json:"save"`
+}
+
+type output struct {
+	ClosestURL string `json:"closest_url,omitempty"`
+	Timestamp  string `json:"timestamp,omitempty"`
+	Saved      bool   `json:"saved,omitempty"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		msg := strings.ReplaceAll(err.Error(), "\n", " ")
+		fmt.Fprintf(os.Stderr, "{\"error\":%q}\n", msg)
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	var in input
+	dec := json.NewDecoder(bufio.NewReader(os.Stdin))
+	if err := dec.Decode(&in); err != nil {
+		return fmt.Errorf("parse json: %w", err)
+	}
+	if strings.TrimSpace(in.URL) == "" {
+		return errors.New("url is required")
+	}
+	base := strings.TrimSpace(os.Getenv("WAYBACK_BASE_URL"))
+	if base == "" {
+		return errors.New("WAYBACK_BASE_URL is required")
+	}
+	baseURL, err := url.Parse(base)
+	if err != nil || (baseURL.Scheme != "http" && baseURL.Scheme != "https") {
+		return errors.New("WAYBACK_BASE_URL must be a valid http/https URL")
+	}
+	if err := ssrfGuard(baseURL); err != nil {
+		return err
+	}
+
+	// Build available URL: <base>/available?url=<in.URL>
+	availURL, err := url.Parse(baseURL.String())
+	if err != nil {
+		return err
+	}
+	availURL.Path = strings.TrimRight(availURL.Path, "/") + "/available"
+	q := availURL.Query()
+	q.Set("url", in.URL)
+	availURL.RawQuery = q.Encode()
+
+	client := &http.Client{Timeout: 3 * time.Second}
+	start := time.Now()
+	resp, err := getWithRetry(client, availURL.String())
+	if err != nil {
+		return fmt.Errorf("http: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }() //nolint:errcheck
+
+	var raw struct {
+		ArchivedSnapshots struct {
+			Closest struct {
+				Available bool   `json:"available"`
+				URL       string `json:"url"`
+				Timestamp string `json:"timestamp"`
+			} `json:"closest"`
+		} `json:"archived_snapshots"`
+	}
+	if err := json.NewDecoder(bufio.NewReader(resp.Body)).Decode(&raw); err != nil {
+		return fmt.Errorf("decode json: %w", err)
+	}
+
+	out := output{}
+	saved := false
+	if raw.ArchivedSnapshots.Closest.Available {
+		out.ClosestURL = raw.ArchivedSnapshots.Closest.URL
+		out.Timestamp = raw.ArchivedSnapshots.Closest.Timestamp
+	} else if in.Save {
+		// Trigger save
+		saveURL, perr := url.Parse(baseURL.String())
+		if perr != nil {
+			return perr
+		}
+		saveURL.Path = strings.TrimRight(saveURL.Path, "/") + "/save/"
+		qs := saveURL.Query()
+		qs.Set("url", in.URL)
+		saveURL.RawQuery = qs.Encode()
+		// Re-guard
+		if err := ssrfGuard(saveURL); err != nil {
+			return err
+		}
+		resp2, herr := getWithRetry(client, saveURL.String())
+		if herr == nil {
+			if resp2.StatusCode >= 200 && resp2.StatusCode < 300 {
+				saved = true
+				out.Saved = true
+			}
+			_ = resp2.Body.Close() //nolint:errcheck
+		}
+	}
+	if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+		return fmt.Errorf("encode json: %w", err)
+	}
+	_ = appendAudit(map[string]any{ //nolint:errcheck
+		"ts":    time.Now().UTC().Format(time.RFC3339Nano),
+		"tool":  "wayback_lookup",
+		"ms":    time.Since(start).Milliseconds(),
+		"saved": saved,
+	})
+	return nil
+}
+
+// ssrfGuard blocks loopback, RFC1918, link-local, and ULA unless WAYBACK_ALLOW_LOCAL=1
+func ssrfGuard(u *url.URL) error {
+	host := u.Hostname()
+	if host == "" {
+		return errors.New("invalid host")
+	}
+	if strings.HasSuffix(strings.ToLower(host), ".onion") {
+		return errors.New("SSRF blocked: onion domains are not allowed")
+	}
+	if os.Getenv("WAYBACK_ALLOW_LOCAL") == "1" {
+		return nil
+	}
+	ips, err := net.LookupIP(host)
+	if err != nil || len(ips) == 0 {
+		return errors.New("SSRF blocked: cannot resolve host")
+	}
+	for _, ip := range ips {
+		if isPrivateIP(ip) {
+			return errors.New("SSRF blocked: private or loopback address")
+		}
+	}
+	return nil
+}
+
+func isPrivateIP(ip net.IP) bool {
+	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+		return true
+	}
+	if v4 := ip.To4(); v4 != nil {
+		ip = v4
+		if v4[0] == 10 {
+			return true
+		}
+		if v4[0] == 172 && v4[1]&0xf0 == 16 {
+			return true
+		}
+		if v4[0] == 192 && v4[1] == 168 {
+			return true
+		}
+		if v4[0] == 169 && v4[1] == 254 {
+			return true
+		}
+		if v4[0] == 127 {
+			return true
+		}
+		return false
+	}
+	if ip.Equal(net.ParseIP("::1")) {
+		return true
+	}
+	if ip[0] == 0xfe && (ip[1]&0xc0) == 0x80 {
+		return true
+	}
+	if ip[0]&0xfe == 0xfc {
+		return true
+	}
+	return false
+}
+
+// getWithRetry performs a GET with one retry on 5xx using a small backoff.
+func getWithRetry(client *http.Client, url string) (*http.Response, error) {
+	resp, err := client.Get(url)
+	if err != nil {
+		return nil, err
+	}
+	if resp.StatusCode >= 500 {
+		_ = resp.Body.Close() //nolint:errcheck
+		time.Sleep(150 * time.Millisecond)
+		return client.Get(url)
+	}
+	return resp, nil
+}
+
+// appendAudit writes an NDJSON line under .goagent/audit/YYYYMMDD.log at the repo root.
+func appendAudit(entry any) error {
+	b, err := json.Marshal(entry)
+	if err != nil {
+		return err
+	}
+	root := moduleRoot()
+	dir := filepath.Join(root, ".goagent", "audit")
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return err
+	}
+	fname := time.Now().UTC().Format("20060102") + ".log"
+	path := filepath.Join(dir, fname)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = f.Close() }() //nolint:errcheck
+	if _, err := f.Write(append(b, '\n')); err != nil {
+		return err
+	}
+	return nil
+}
+
+// moduleRoot walks upward from CWD to the directory containing go.mod; falls back to CWD.
+func moduleRoot() string {
+	cwd, err := os.Getwd()
+	if err != nil || cwd == "" {
+		return "."
+	}
+	dir := cwd
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return cwd
+		}
+		dir = parent
+	}
+}
diff --git a/tools/cmd/wayback_lookup/wayback_lookup_test.go b/tools/cmd/wayback_lookup/wayback_lookup_test.go
new file mode 100644
index 0000000..53ca160
--- /dev/null
+++ b/tools/cmd/wayback_lookup/wayback_lookup_test.go
@@ -0,0 +1,124 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+// Allow local SSRF for tests targeting httptest.Server
+func TestMain(m *testing.M) {
+	if err := os.Setenv("WAYBACK_ALLOW_LOCAL", "1"); err != nil {
+		panic(err)
+	}
+	os.Exit(m.Run())
+}
+
+func runTool(t *testing.T, bin string, env []string, input any) (string, string, error) {
+	t.Helper()
+	data, err := json.Marshal(input)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	cmd := exec.Command(bin)
+	cmd.Stdin = bytes.NewReader(data)
+	cmd.Env = env
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	runErr := cmd.Run()
+	return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), runErr
+}
+
+func TestWaybackLookup_SuccessLookup(t *testing.T) {
+	// Mock Wayback API
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch {
+		case r.URL.Path == "/available":
+			w.Header().Set("Content-Type", "application/json")
+			if _, err := w.Write([]byte(`{"archived_snapshots":{"closest":{"available":true,"url":"http://web.archive.org/web/20200101000000/http://example.com/","timestamp":"20200101000000"}}}`)); err != nil {
+				t.Fatalf("write: %v", err)
+			}
+		default:
+			http.NotFound(w, r)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "wayback_lookup")
+	env := append(os.Environ(), "WAYBACK_BASE_URL="+srv.URL, "WAYBACK_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"url": "http://example.com"})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !strings.Contains(outStr, "\"closest_url\":") || !strings.Contains(outStr, "web.archive.org/web/20200101000000") {
+		t.Fatalf("unexpected output: %s", outStr)
+	}
+	if !strings.Contains(outStr, "\"timestamp\":\"20200101000000\"") {
+		t.Fatalf("missing timestamp: %s", outStr)
+	}
+}
+
+func TestWaybackLookup_SaveTrue(t *testing.T) {
+	var saveCalled bool
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch {
+		case r.URL.Path == "/available":
+			w.Header().Set("Content-Type", "application/json")
+			if _, err := w.Write([]byte(`{"archived_snapshots":{"closest":{"available":false}}}`)); err != nil {
+				t.Fatalf("write: %v", err)
+			}
+		case r.URL.Path == "/save/":
+			saveCalled = true
+			w.WriteHeader(200)
+		default:
+			http.NotFound(w, r)
+		}
+	}))
+	defer srv.Close()
+
+	bin := testutil.BuildTool(t, "wayback_lookup")
+	env := append(os.Environ(), "WAYBACK_BASE_URL="+srv.URL, "WAYBACK_ALLOW_LOCAL=1")
+	outStr, errStr, err := runTool(t, bin, env, map[string]any{"url": "http://example.com", "save": true})
+	if err != nil {
+		t.Fatalf("run error: %v, stderr=%s", err, errStr)
+	}
+	if !saveCalled {
+		t.Fatalf("expected /save/ to be called")
+	}
+	if !strings.Contains(outStr, "\"saved\":true") {
+		t.Fatalf("expected saved=true in output: %s", outStr)
+	}
+}
+
+func TestWaybackLookup_SSRFBlocked(t *testing.T) {
+	bin := testutil.BuildTool(t, "wayback_lookup")
+	// Force no local bypass and set private base URL
+	env := []string{"WAYBACK_BASE_URL=http://127.0.0.1:9"}
+	_, errStr, err := runTool(t, bin, env, map[string]any{"url": "http://example.com"})
+	if err == nil {
+		t.Fatalf("expected error, got ok")
+	}
+	if !strings.Contains(errStr, "SSRF blocked") {
+		t.Fatalf("expected SSRF blocked error, got: %s", errStr)
+	}
+}
+
+func TestWaybackLookup_BadBaseURL(t *testing.T) {
+	bin := testutil.BuildTool(t, "wayback_lookup")
+	env := []string{"WAYBACK_BASE_URL=:bad"}
+	_, errStr, err := runTool(t, bin, env, map[string]any{"url": "http://example.com"})
+	if err == nil {
+		t.Fatalf("expected error for bad base url")
+	}
+	if !strings.Contains(errStr, "WAYBACK_BASE_URL") {
+		t.Fatalf("expected base url error, got: %s", errStr)
+	}
+}
diff --git a/tools/cmd/wiki_query/wiki_query.go b/tools/cmd/wiki_query/wiki_query.go
new file mode 100644
index 0000000..ebebc33
--- /dev/null
+++ b/tools/cmd/wiki_query/wiki_query.go
@@ -0,0 +1,216 @@
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"sort"
+	"strings"
+	"time"
+)
+
+type input struct {
+	Titles   string `json:"titles,omitempty"`
+	Search   string `json:"search,omitempty"`
+	Language string `json:"language,omitempty"`
+}
+
+type page struct {
+	Title   string `json:"title"`
+	URL     string `json:"url"`
+	Extract string `json:"extract"`
+}
+
+type output struct {
+	Pages []page `json:"pages"`
+}
+
+func main() {
+	if err := run(); err != nil {
+		encErr(os.Stderr, err)
+		os.Exit(1)
+	}
+}
+
+func run() error {
+	in, err := readInput()
+	if err != nil {
+		return err
+	}
+	// Enforce mutual exclusivity between 'titles' and 'search'
+	if (in.Titles == "" && in.Search == "") || (in.Titles != "" && in.Search != "") {
+		return errors.New("provide exactly one of 'titles' or 'search'")
+	}
+	base := os.Getenv("MEDIAWIKI_BASE_URL")
+	if base == "" {
+		base = fmt.Sprintf("https://%s.wikipedia.org", langOrDefault(in.Language))
+	}
+	if err := ssrfGuard(base); err != nil {
+		return err
+	}
+	client := &http.Client{Timeout: 5 * time.Second}
+	var pages []page
+	if in.Titles != "" {
+		pages, err = fetchExtracts(client, base, langOrDefault(in.Language), in.Titles)
+	} else {
+		pages, err = fetchOpenSearch(client, base, in.Search)
+	}
+	if err != nil {
+		return err
+	}
+	out := output{Pages: pages}
+	enc := json.NewEncoder(os.Stdout)
+	enc.SetEscapeHTML(false)
+	return enc.Encode(out)
+}
+
+func readInput() (input, error) {
+	var in input
+	s := bufio.NewScanner(os.Stdin)
+	s.Buffer(make([]byte, 0, 64*1024), 5*1024*1024)
+	var b strings.Builder
+	for s.Scan() {
+		b.Write(s.Bytes())
+	}
+	if err := s.Err(); err != nil {
+		return in, err
+	}
+	if err := json.Unmarshal([]byte(b.String()), &in); err != nil {
+		return in, err
+	}
+	return in, nil
+}
+
+func langOrDefault(l string) string {
+	if l == "" {
+		return "en"
+	}
+	return l
+}
+
+func ssrfGuard(base string) error {
+	if os.Getenv("WIKI_QUERY_ALLOW_LOCAL") == "1" {
+		return nil
+	}
+	u, err := url.Parse(base)
+	if err != nil {
+		return fmt.Errorf("MEDIAWIKI_BASE_URL invalid: %w", err)
+	}
+	host := u.Hostname()
+	if host == "" {
+		host = u.Host
+	}
+	ips, err := net.LookupIP(host)
+	if err != nil {
+		return fmt.Errorf("DNS error: %w", err)
+	}
+	for _, ip := range ips {
+		if ip.IsLoopback() || ip.IsPrivate() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+			return errors.New("SSRF blocked: private address")
+		}
+	}
+	return nil
+}
+
+func fetchExtracts(c *http.Client, base, lang, titles string) ([]page, error) {
+	q := url.Values{}
+	q.Set("action", "query")
+	q.Set("format", "json")
+	q.Set("prop", "extracts")
+	q.Set("exintro", "1")
+	q.Set("explaintext", "1")
+	q.Set("redirects", "1")
+	q.Set("titles", titles)
+	q.Set("uselang", lang)
+	reqURL := strings.TrimRight(base, "/") + "/w/api.php?" + q.Encode()
+	resp, err := c.Get(reqURL)
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = resp.Body.Close() }() //nolint:errcheck
+	var raw struct {
+		Query struct {
+			Pages map[string]struct {
+				Title   string `json:"title"`
+				Extract string `json:"extract"`
+			} `json:"pages"`
+		} `json:"query"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&raw); err != nil {
+		return nil, err
+	}
+	var pages []page
+	for _, v := range raw.Query.Pages {
+		pages = append(pages, page{
+			Title:   v.Title,
+			URL:     fmt.Sprintf("https://%s.wikipedia.org/wiki/%s", lang, url.PathEscape(strings.ReplaceAll(v.Title, " ", "_"))),
+			Extract: v.Extract,
+		})
+	}
+	sort.Slice(pages, func(i, j int) bool { return pages[i].Title < pages[j].Title })
+	return pages, nil
+}
+
+func fetchOpenSearch(c *http.Client, base, query string) ([]page, error) {
+	v := url.Values{}
+	v.Set("action", "opensearch")
+	v.Set("format", "json")
+	v.Set("search", query)
+	v.Set("limit", "10")
+	reqURL := strings.TrimRight(base, "/") + "/w/api.php?" + v.Encode()
+	resp, err := c.Get(reqURL)
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = resp.Body.Close() }() //nolint:errcheck
+	var arr []any
+	if err := json.NewDecoder(resp.Body).Decode(&arr); err != nil {
+		return nil, err
+	}
+	if len(arr) < 4 {
+		return nil, errors.New("unexpected opensearch response")
+	}
+	titlesAny, snippetsAny, urlsAny := arr[1], arr[2], arr[3]
+	titles, _ := toStringSlice(titlesAny)
+	snippets, _ := toStringSlice(snippetsAny)
+	urls, _ := toStringSlice(urlsAny)
+	n := min(len(titles), min(len(snippets), len(urls)))
+	pages := make([]page, 0, n)
+	for i := 0; i < n; i++ {
+		pages = append(pages, page{Title: titles[i], URL: urls[i], Extract: snippets[i]})
+	}
+	return pages, nil
+}
+
+func toStringSlice(v any) ([]string, bool) {
+	i, ok := v.([]any)
+	if !ok {
+		return nil, false
+	}
+	s := make([]string, 0, len(i))
+	for _, e := range i {
+		if str, ok := e.(string); ok {
+			s = append(s, str)
+		}
+	}
+	return s, true
+}
+
+func min(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func encErr(w *os.File, err error) {
+	if e := json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}); e != nil {
+		// best-effort stderr encoding
+		_, _ = w.Write([]byte("{\"error\":\"internal encode error\"}\n")) //nolint:errcheck
+	}
+}
diff --git a/tools/cmd/wiki_query/wiki_query_test.go b/tools/cmd/wiki_query/wiki_query_test.go
new file mode 100644
index 0000000..6036a5d
--- /dev/null
+++ b/tools/cmd/wiki_query/wiki_query_test.go
@@ -0,0 +1,154 @@
+package main_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"os/exec"
+	"testing"
+
+	testutil "github.com/hyperifyio/goagent/tools/testutil"
+)
+
+func TestWikiQuery_TitlesSuccess(t *testing.T) {
+	mux := http.NewServeMux()
+	mux.HandleFunc("/w/api.php", func(w http.ResponseWriter, r *http.Request) {
+		q := r.URL.Query()
+		if q.Get("action") != "query" || q.Get("prop") != "extracts" {
+			w.WriteHeader(http.StatusBadRequest)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		if _, err := w.Write([]byte(`{
+		  "batchcomplete":"",
+		  "query":{
+		    "pages":{
+		      "123":{
+		        "pageid":123,
+		        "ns":0,
+		        "title":"Golang",
+		        "extract":"Go is an open source programming language..."
+		      }
+		    }
+		  }
+        }`)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	})
+	ts := httptest.NewServer(mux)
+	defer ts.Close()
+
+	bin := testutil.BuildTool(t, "wiki_query")
+	stdin := []byte(`{"titles":"Golang","language":"en"}`)
+	cmd := exec.Command(bin)
+	cmd.Env = append(os.Environ(), "MEDIAWIKI_BASE_URL="+ts.URL, "WIKI_QUERY_ALLOW_LOCAL=1")
+	cmd.Stdin = bytes.NewReader(stdin)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatalf("tool error: %v, stderr=%s", err, stderr.String())
+	}
+
+	var out struct {
+		Pages []struct {
+			Title   string `json:"title"`
+			URL     string `json:"url"`
+			Extract string `json:"extract"`
+		}
+	}
+	if err := json.Unmarshal(stdout.Bytes(), &out); err != nil {
+		t.Fatalf("bad json: %v: %s", err, stdout.String())
+	}
+	if len(out.Pages) != 1 || out.Pages[0].Title != "Golang" {
+		t.Fatalf("unexpected pages: %+v", out.Pages)
+	}
+	if out.Pages[0].Extract == "" {
+		t.Fatalf("missing extract")
+	}
+}
+
+func TestWikiQuery_SearchSuccess(t *testing.T) {
+	mux := http.NewServeMux()
+	mux.HandleFunc("/w/api.php", func(w http.ResponseWriter, r *http.Request) {
+		q := r.URL.Query()
+		if q.Get("action") != "opensearch" {
+			w.WriteHeader(http.StatusBadRequest)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		if _, err := w.Write([]byte(`[
+		  "golang",
+		  ["Go (programming language)", "Gopher"],
+		  ["Go is an open source...", "Mascot of Go"],
+		  [
+		    "https://en.wikipedia.org/wiki/Go_(programming_language)",
+		    "https://en.wikipedia.org/wiki/Gopher_(programming_language)"
+		  ]
+        ]`)); err != nil {
+			t.Fatalf("write: %v", err)
+		}
+	})
+	ts := httptest.NewServer(mux)
+	defer ts.Close()
+
+	bin := testutil.BuildTool(t, "wiki_query")
+	stdin := []byte(`{"search":"golang"}`)
+	cmd := exec.Command(bin)
+	cmd.Env = append(os.Environ(), "MEDIAWIKI_BASE_URL="+ts.URL, "WIKI_QUERY_ALLOW_LOCAL=1")
+	cmd.Stdin = bytes.NewReader(stdin)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatalf("tool error: %v, stderr=%s", err, stderr.String())
+	}
+	var out struct {
+		Pages []struct{ Title, URL, Extract string }
+	}
+	if err := json.Unmarshal(stdout.Bytes(), &out); err != nil {
+		t.Fatalf("bad json: %v: %s", err, stdout.String())
+	}
+	if len(out.Pages) != 2 {
+		t.Fatalf("unexpected results: %d", len(out.Pages))
+	}
+	if out.Pages[0].Title == "" || out.Pages[0].URL == "" {
+		t.Fatalf("missing fields in first result: %+v", out.Pages[0])
+	}
+}
+
+func TestWikiQuery_BothParamsError(t *testing.T) {
+	bin := testutil.BuildTool(t, "wiki_query")
+	stdin := []byte(`{"titles":"Go","search":"golang"}`)
+	cmd := exec.Command(bin)
+	// No network needed; validation happens before requests
+	cmd.Stdin = bytes.NewReader(stdin)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err == nil {
+		t.Fatalf("expected error when both 'titles' and 'search' provided; stdout=%s", stdout.String())
+	}
+	if !bytes.Contains(stderr.Bytes(), []byte("provide exactly one of 'titles' or 'search'")) {
+		t.Fatalf("unexpected stderr: %s", stderr.String())
+	}
+}
+
+func TestWikiQuery_MissingParamsError(t *testing.T) {
+	bin := testutil.BuildTool(t, "wiki_query")
+	stdin := []byte(`{}`)
+	cmd := exec.Command(bin)
+	// No network needed; validation happens before requests
+	cmd.Stdin = bytes.NewReader(stdin)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err == nil {
+		t.Fatalf("expected error when neither 'titles' nor 'search' provided; stdout=%s", stdout.String())
+	}
+	if !bytes.Contains(stderr.Bytes(), []byte("provide exactly one of 'titles' or 'search'")) {
+		t.Fatalf("unexpected stderr: %s", stderr.String())
+	}
+}
diff --git a/tools/testutil/buildtool.go b/tools/testutil/buildtool.go
new file mode 100644
index 0000000..b8a3456
--- /dev/null
+++ b/tools/testutil/buildtool.go
@@ -0,0 +1,86 @@
+package testutil
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"testing"
+)
+
+// BuildTool builds the named tool binary into a test-scoped temporary
+// directory and returns the absolute path to the produced executable.
+//
+// Source discovery (absolute paths used to satisfy repository path hygiene
+// rules in linters/tests):
+//   - tools/cmd/<name> (canonical layout only)
+func BuildTool(t *testing.T, name string) string {
+	t.Helper()
+
+	repoRoot, err := findRepoRoot()
+	if err != nil {
+		t.Fatalf("find repo root: %v", err)
+	}
+
+	// Determine binary name with OS suffix
+	binName := name
+	if runtime.GOOS == "windows" {
+		binName += ".exe"
+	}
+	outPath := filepath.Join(t.TempDir(), binName)
+
+	// Candidate source locations (canonical layout only)
+	var candidates []string
+	candidates = append(candidates, filepath.Join(repoRoot, "tools", "cmd", name))
+
+	var srcPath string
+	for _, c := range candidates {
+		if fi, statErr := os.Stat(c); statErr == nil {
+			// Accept directories and regular files
+			if fi.IsDir() || fi.Mode().IsRegular() {
+				srcPath = c
+				break
+			}
+		}
+	}
+	if srcPath == "" {
+		t.Fatalf("tool sources not found for %q under %s", name, filepath.Join(repoRoot, "tools"))
+	}
+
+	cmd := exec.Command("go", "build", "-o", outPath, srcPath)
+	cmd.Dir = repoRoot
+	// Inherit environment; ensure CGO disabled for determinism
+	cmd.Env = append(os.Environ(), "CGO_ENABLED=0")
+	if output, err := cmd.CombinedOutput(); err != nil {
+		t.Fatalf("build %s from %s failed: %v\n%s", name, relOrSame(repoRoot, srcPath), err, string(output))
+	}
+	return outPath
+}
+
+func findRepoRoot() (string, error) {
+	// Start from CWD and walk up until go.mod is found
+	start, err := os.Getwd()
+	if err != nil || start == "" {
+		return "", errors.New("cannot determine working directory")
+	}
+	dir := start
+	for {
+		if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
+			return dir, nil
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return "", fmt.Errorf("go.mod not found from %s upward", start)
+		}
+		dir = parent
+	}
+}
+
+func relOrSame(base, target string) string {
+	if rel, err := filepath.Rel(base, target); err == nil {
+		return rel
+	}
+	return target
+}
diff --git a/tools/testutil/buildtool_test.go b/tools/testutil/buildtool_test.go
new file mode 100644
index 0000000..53618d3
--- /dev/null
+++ b/tools/testutil/buildtool_test.go
@@ -0,0 +1,21 @@
+package testutil
+
+import (
+	"runtime"
+	"strings"
+	"testing"
+)
+
+func TestBuildTool_WindowsSuffix(t *testing.T) {
+	// Use a real tool name to ensure build succeeds across environments.
+	path := BuildTool(t, "fs_listdir")
+	if runtime.GOOS == "windows" {
+		if !strings.HasSuffix(path, ".exe") {
+			t.Fatalf("expected .exe suffix on Windows, got %q", path)
+		}
+	} else {
+		if strings.HasSuffix(path, ".exe") {
+			t.Fatalf("did not expect .exe suffix on non-Windows, got %q", path)
+		}
+	}
+}
diff --git a/tools/testutil/tempdir.go b/tools/testutil/tempdir.go
new file mode 100644
index 0000000..9fd3717
--- /dev/null
+++ b/tools/testutil/tempdir.go
@@ -0,0 +1,25 @@
+package testutil
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// MakeRepoRelTempDir creates a temporary directory under the current
+// package working directory and returns its relative path (basename).
+// The directory is removed at test cleanup.
+func MakeRepoRelTempDir(t *testing.T, prefix string) string {
+	t.Helper()
+	tmpAbs, err := os.MkdirTemp(".", prefix)
+	if err != nil {
+		t.Fatalf("mkdir temp under repo: %v", err)
+	}
+	base := filepath.Base(tmpAbs)
+	t.Cleanup(func() {
+		if err := os.RemoveAll(base); err != nil {
+			t.Logf("cleanup remove %s: %v", base, err)
+		}
+	})
+	return base
+}