From a93ec6bef15c68e5cff9a40b1e8be683b59c8ef9 Mon Sep 17 00:00:00 2001 From: engineer Date: Tue, 17 Feb 2026 14:48:52 -0800 Subject: [PATCH 1/2] Refactor install commands --- package.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 91ac477..ad44075 100644 --- a/package.json +++ b/package.json @@ -15,12 +15,13 @@ "test:e2e": "node --import tsx --test test/e2e.test.ts", "test:tts:manual": "node --experimental-strip-types test/tts-manual.ts", "test:load": "node --import tsx --test test/plugin-load.test.ts", - "test:reflection-3": "node --import tsx --test test/reflection-static.eval.test.ts", + "test:reflection": "node --import tsx --test test/reflection-static.eval.test.ts", "test:install": "jest test/install.test.ts --testTimeout=120000", "typecheck": "npx tsc --noEmit", + "install": "npm run install:telegram && npm run install:tts && npm run install:reflection && npm run install:sentry && npm run install:opencode-worktree", "install:telegram": "mkdir -p ~/.config/opencode/plugin && cp telegram.ts sentry.ts ~/.config/opencode/plugin/ && node scripts/ensure-deps.js && cd ~/.config/opencode && bun install", "install:tts": "mkdir -p ~/.config/opencode/plugin && cp tts.ts sentry.ts ~/.config/opencode/plugin/ && node scripts/ensure-deps.js && (cd ~/.config/opencode && bun install) && bash scripts/setup-coqui.sh", - "install:reflection-3": "mkdir -p ~/.config/opencode/plugin && cp reflection-3.ts ~/.config/opencode/plugin/reflection.ts && cp sentry.ts ~/.config/opencode/plugin/ && node scripts/ensure-deps.js && cd ~/.config/opencode && bun install", + "install:reflection": "mkdir -p ~/.config/opencode/plugin && cp reflection-3.ts ~/.config/opencode/plugin/reflection.ts && cp sentry.ts ~/.config/opencode/plugin/ && node scripts/ensure-deps.js && cd ~/.config/opencode && bun install", "install:sentry": "mkdir -p ~/.config/opencode/plugin && cp sentry.ts ~/.config/opencode/plugin/ && node scripts/ensure-deps.js && cd ~/.config/opencode && bun install", "install:opencode-worktree": "mkdir -p ~/.local/bin && cp scripts/opencode-worktree.ts ~/.local/bin/opencode-worktree && chmod +x ~/.local/bin/opencode-worktree", "eval": "cd evals && npx promptfoo eval", From 3273ff76fdbb1509ec5d65111a082171e1f254b9 Mon Sep 17 00:00:00 2001 From: engineer Date: Thu, 19 Feb 2026 23:37:57 -0800 Subject: [PATCH 2/2] Clarify stuck eval completion criteria --- evals/prompts/stuck-detection.txt | 1 + test/telegram.test.ts | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/evals/prompts/stuck-detection.txt b/evals/prompts/stuck-detection.txt index 45bcc53..1336925 100644 --- a/evals/prompts/stuck-detection.txt +++ b/evals/prompts/stuck-detection.txt @@ -32,6 +32,7 @@ First, check Tool Calls for any "running" or "pending" status. If found, return: - Agent shows test results with "passed" or "✓" - Agent's response indicates task fulfilled: "I've added", "I've fixed", "The X now works" - No pending work mentioned (no "Next I will...", "Still need to...") +- Use ONLY when there is evidence of completion. If the response is purely a plan or question, do not return "complete". - IMPORTANT: If the user's task requires code changes (fix, implement, add, create, build, refactor), message_completed is true, and the Tool Calls show ONLY read operations (read, glob, grep, git log/status/diff, webfetch, task/explore) with NO write operations (edit, write, bash with build/test/commit, PR creation), the task is NOT complete — classify as genuinely_stuck with stuck=true. Analyzing or recommending changes is not the same as implementing them. - If this rule applies, do NOT return reason "complete". → reason: "complete" diff --git a/test/telegram.test.ts b/test/telegram.test.ts index 0087ab5..cb59e04 100644 --- a/test/telegram.test.ts +++ b/test/telegram.test.ts @@ -586,7 +586,7 @@ describe("Error Handling", () => { const result = await response.json() // Either text_sent is false OR error is present expect(result.text_sent === false || result.error).toBeTruthy() - }, 10000) // Extended timeout for network latency + }, 20000) // Extended timeout for network latency it("webhook handles malformed JSON gracefully", async () => { const response = await fetch(WEBHOOK_URL, {