Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions evals/prompts/stuck-detection.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ First, check Tool Calls for any "running" or "pending" status. If found, return:
- Agent shows test results with "passed" or "✓"
- Agent's response indicates task fulfilled: "I've added", "I've fixed", "The X now works"
- No pending work mentioned (no "Next I will...", "Still need to...")
- Use ONLY when there is evidence of completion. If the response is purely a plan or question, do not return "complete".
- IMPORTANT: If the user's task requires code changes (fix, implement, add, create, build, refactor), message_completed is true, and the Tool Calls show ONLY read operations (read, glob, grep, git log/status/diff, webfetch, task/explore) with NO write operations (edit, write, bash with build/test/commit, PR creation), the task is NOT complete — classify as genuinely_stuck with stuck=true. Analyzing or recommending changes is not the same as implementing them.
- If this rule applies, do NOT return reason "complete".
→ reason: "complete"
Expand Down
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@
"test:e2e": "node --import tsx --test test/e2e.test.ts",
"test:tts:manual": "node --experimental-strip-types test/tts-manual.ts",
"test:load": "node --import tsx --test test/plugin-load.test.ts",
"test:reflection-3": "node --import tsx --test test/reflection-static.eval.test.ts",
"test:reflection": "node --import tsx --test test/reflection-static.eval.test.ts",
"test:install": "jest test/install.test.ts --testTimeout=120000",
"typecheck": "npx tsc --noEmit",
"install": "npm run install:telegram && npm run install:tts && npm run install:reflection && npm run install:sentry && npm run install:opencode-worktree",
"install:telegram": "mkdir -p ~/.config/opencode/plugin && cp telegram.ts sentry.ts ~/.config/opencode/plugin/ && node scripts/ensure-deps.js && cd ~/.config/opencode && bun install",
"install:tts": "mkdir -p ~/.config/opencode/plugin && cp tts.ts sentry.ts ~/.config/opencode/plugin/ && node scripts/ensure-deps.js && (cd ~/.config/opencode && bun install) && bash scripts/setup-coqui.sh",
"install:reflection-3": "mkdir -p ~/.config/opencode/plugin && cp reflection-3.ts ~/.config/opencode/plugin/reflection.ts && cp sentry.ts ~/.config/opencode/plugin/ && node scripts/ensure-deps.js && cd ~/.config/opencode && bun install",
"install:reflection": "mkdir -p ~/.config/opencode/plugin && cp reflection-3.ts ~/.config/opencode/plugin/reflection.ts && cp sentry.ts ~/.config/opencode/plugin/ && node scripts/ensure-deps.js && cd ~/.config/opencode && bun install",
"install:sentry": "mkdir -p ~/.config/opencode/plugin && cp sentry.ts ~/.config/opencode/plugin/ && node scripts/ensure-deps.js && cd ~/.config/opencode && bun install",
"install:opencode-worktree": "mkdir -p ~/.local/bin && cp scripts/opencode-worktree.ts ~/.local/bin/opencode-worktree && chmod +x ~/.local/bin/opencode-worktree",
"eval": "cd evals && npx promptfoo eval",
Expand Down
2 changes: 1 addition & 1 deletion test/telegram.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ describe("Error Handling", () => {
const result = await response.json()
// Either text_sent is false OR error is present
expect(result.text_sent === false || result.error).toBeTruthy()
}, 10000) // Extended timeout for network latency
}, 20000) // Extended timeout for network latency

it("webhook handles malformed JSON gracefully", async () => {
const response = await fetch(WEBHOOK_URL, {
Expand Down
Loading