diff --git a/README.md b/README.md index 0a3dc53..0b8b9a4 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ The loop happens **across agent turns**, explicitly controlled by the extension' 4. **Loop Continuation**: The hook evaluates state (max iterations, promises) and instructs the CLI to start a new turn using the **original prompt** and clears the agent's memory from the previous turn. 5. **Repeat**: This continues autonomously until completion (max iterations, promises) or user interruption. -The `AfterAgent` hook in `hooks/stop-hook.sh` creates a **self-referential feedback loop** where: +The `AfterAgent` hook in `hooks/stop-hook.js` creates a **self-referential feedback loop** where: - **Stable Context & No Compaction**: The prompt never changes between iterations, and the **previous turn's conversational context is cleared**. This forces the agent to rely on the current state of the files rather than potentially stale or "compacted" chat history, ensuring maximum focus and reliability. - **Persistent State**: The agent's previous work persists in files and git history. - **Autonomous Improvement**: Each iteration allows the agent to see the current state of the codebase and improve upon its past work. @@ -43,7 +43,7 @@ To use Ralph, you must enable hooks and preview features in your `~/.gemini/sett } ``` -> **Note**: `includeDirectories` is required so that the Gemini CLI can access and execute Ralph's internal scripts (`setup.sh`, `cancel.sh`) and hook logic located in the extension's installation directory. +> **Note**: `includeDirectories` is required so that the Gemini CLI can access and execute Ralph's internal scripts (`setup.js`, `cancel.js`) and hook logic located in the extension's installation directory. ## Usage diff --git a/commands/ralph/cancel.toml b/commands/ralph/cancel.toml index 7d201ef..df3f317 100644 --- a/commands/ralph/cancel.toml +++ b/commands/ralph/cancel.toml @@ -4,6 +4,6 @@ You are stopping the Ralph loop. Run the cancel script to deactivate the stop hook and clean up state: ```bash -bash "${extensionPath}/scripts/cancel.sh" +node "${extensionPath}/scripts/cancel.js" ``` """ diff --git a/commands/ralph/loop.toml b/commands/ralph/loop.toml index 0d3ea8a..555fec9 100644 --- a/commands/ralph/loop.toml +++ b/commands/ralph/loop.toml @@ -5,10 +5,10 @@ You are starting the Ralph loop. **Step 1: Initialization** Run the setup script to initialize the loop state: ```bash -bash "${extensionPath}/scripts/setup.sh" $ARGUMENTS +node "${extensionPath}/scripts/setup.js" $ARGUMENTS ``` -**Supported Arguments for setup.sh:** +**Supported Arguments for setup.js:** - `--max-iterations `: Maximum number of loop iterations. - `--completion-promise `: A text token that must be output to finish. diff --git a/hooks/hooks.json b/hooks/hooks.json index 5298890..a3b95d2 100644 --- a/hooks/hooks.json +++ b/hooks/hooks.json @@ -7,7 +7,7 @@ { "name": "ralph-loop", "type": "command", - "command": "${extensionPath}/hooks/stop-hook.sh", + "command": "node ${extensionPath}/hooks/stop-hook.js", "description": "The Ralph infinite loop mechanism" } ] diff --git a/hooks/stop-hook.js b/hooks/stop-hook.js new file mode 100644 index 0000000..ae7d8de --- /dev/null +++ b/hooks/stop-hook.js @@ -0,0 +1,160 @@ +#!/usr/bin/env node +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +const fs = require('fs'); +const path = require('path'); + +const STATE_DIR = path.join('.gemini', 'ralph'); +const STATE_FILE = path.join(STATE_DIR, 'state.json'); + +function log(message) { + console.error(`Ralph: ${message}`); +} + +function cleanupStateDir() { + if (fs.existsSync(STATE_DIR)) { + try { + fs.rmdirSync(STATE_DIR); + } catch { + // Directory not empty, ignore + } + } +} + +function readStdin() { + return new Promise((resolve) => { + let data = ''; + process.stdin.setEncoding('utf8'); + process.stdin.on('data', (chunk) => { + data += chunk; + }); + process.stdin.on('end', () => { + resolve(data); + }); + }); +} + +async function main() { + const input = await readStdin(); + let lastMessage = ''; + let currentPrompt = ''; + + try { + const parsed = JSON.parse(input); + lastMessage = parsed.prompt_response || ''; + currentPrompt = parsed.prompt || ''; + } catch { + // Ignore parse errors + } + + // Check if loop is active + if (!fs.existsSync(STATE_FILE)) { + console.log(JSON.stringify({ decision: 'allow' })); + process.exit(0); + } + + // Load state + let state; + try { + state = JSON.parse(fs.readFileSync(STATE_FILE, 'utf8')); + } catch { + console.log(JSON.stringify({ decision: 'allow' })); + process.exit(0); + } + + const originalPrompt = state.original_prompt || ''; + + // Validate that this turn belongs to the Ralph loop + if (currentPrompt !== originalPrompt) { + // Normalize prompts for comparison by stripping prefix and extra whitespace + const cleanCurrent = currentPrompt + .replace(/^\/ralph:loop\s+/, '') + .replace(/--max-iterations\s+\S+\s*/g, '') + .replace(/--completion-promise\s+\S+\s*/g, '') + .trim(); + const cleanOriginal = originalPrompt.trim(); + + // Only perform mismatch check if a prompt was actually provided. + // Automated retries (like loop iterations) often have an empty prompt in the hook input. + if (cleanCurrent !== '' && cleanCurrent !== cleanOriginal) { + fs.unlinkSync(STATE_FILE); + cleanupStateDir(); + console.log(JSON.stringify({ + decision: 'allow', + systemMessage: `🚨 Ralph detected a prompt mismatch.\nExpected: '${cleanOriginal}'\nGot: '${cleanCurrent}'` + })); + process.exit(0); + } + } + + const active = state.active; + + if (active !== true) { + console.log(JSON.stringify({ decision: 'allow' })); + process.exit(0); + } + + // Check for completion promise BEFORE incrementing/continuing + const completionPromise = state.completion_promise || ''; + if (completionPromise && lastMessage.includes(`${completionPromise}`)) { + fs.unlinkSync(STATE_FILE); + cleanupStateDir(); + log(`I found a shiny penny! It says ${completionPromise}. The computer is sleeping now.`); + console.log(JSON.stringify({ + decision: 'allow', + continue: false, + stopReason: `✅ Ralph found the completion promise: ${completionPromise}`, + systemMessage: `✅ Ralph found the completion promise: ${completionPromise}` + })); + process.exit(0); + } + + const currentIteration = state.current_iteration || 0; + const maxIterations = state.max_iterations || 5; + + // Check for max iterations + if (currentIteration >= maxIterations) { + fs.unlinkSync(STATE_FILE); + cleanupStateDir(); + log(`I'm tired. I've gone around ${currentIteration} times. The computer is sleeping now.`); + console.log(JSON.stringify({ + decision: 'allow', + continue: false, + stopReason: '✅ Ralph has reached the iteration limit.', + systemMessage: '✅ Ralph has reached the iteration limit.' + })); + process.exit(0); + } + + // Increment iteration + const newIteration = currentIteration + 1; + state.current_iteration = newIteration; + fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2)); + + // Log progress (persona) + log(`I'm doing a circle! Iteration ${currentIteration} is done.`); + + // Maintain the loop by forcing a retry with the original prompt + console.log(JSON.stringify({ + decision: 'deny', + reason: originalPrompt, + systemMessage: `🔄 Ralph is starting iteration ${newIteration}...`, + clearContext: true + })); + + process.exit(0); +} + +main(); diff --git a/hooks/stop-hook.sh b/hooks/stop-hook.sh deleted file mode 100755 index 78b8332..0000000 --- a/hooks/stop-hook.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Functions -log() { - echo "Ralph: $1" >&2 -} - -die() { - echo "❌ Error: $1" >&2 - exit 1 -} - -# Setup paths -STATE_DIR=".gemini/ralph" -STATE_FILE="$STATE_DIR/state.json" - -# Read hook input from stdin -INPUT=$(cat) -LAST_MESSAGE=$(echo "$INPUT" | jq -r '.prompt_response') -CURRENT_PROMPT=$(echo "$INPUT" | jq -r '.prompt') - -# Check if loop is active -if [[ ! -f "$STATE_FILE" ]]; then - echo '{"decision": "allow"}' - exit 0 -fi - -# Validate that this turn belongs to the Ralph loop -ORIGINAL_PROMPT=$(jq -r '.original_prompt' "$STATE_FILE") - -if [[ "$CURRENT_PROMPT" != "$ORIGINAL_PROMPT" ]]; then - # Normalize prompts for comparison by stripping prefix and extra whitespace - # 1. Strip "/ralph:loop" prefix if present - # 2. Strip common flags like --max-iterations and --completion-promise - # 3. Trim whitespace - CLEAN_CURRENT=$(echo "$CURRENT_PROMPT" | sed -E 's/^\/ralph:loop[[:space:]]+//' | sed -E 's/--max-iterations[[:space:]]+[^[:space:]]+[[:space:]]*//' | sed -E 's/--completion-promise[[:space:]]+[^[:space:]]+[[:space:]]*//' | xargs) - CLEAN_ORIGINAL=$(echo "$ORIGINAL_PROMPT" | xargs) - - # Only perform mismatch check if a prompt was actually provided. - # Automated retries (like loop iterations) often have an empty prompt in the hook input. - if [[ -z "$CLEAN_CURRENT" ]]; then - : # Allow empty prompts (likely a Ralph-triggered iteration) - elif [[ "$CLEAN_CURRENT" != "$CLEAN_ORIGINAL" ]]; then - rm -f "$STATE_FILE" - # Only remove directory if it is empty - if [[ -d "$STATE_DIR" ]]; then - rmdir "$STATE_DIR" 2>/dev/null || true - fi - echo "{\"decision\": \"allow\", \"systemMessage\": \"🚨 Ralph detected a prompt mismatch.\\nExpected: '$CLEAN_ORIGINAL'\\nGot: '$CLEAN_CURRENT'\"}" - exit 0 - fi -fi - -ACTIVE=$(jq -r '.active' "$STATE_FILE") - -if [[ "$ACTIVE" != "true" ]]; then - echo '{"decision": "allow"}' - exit 0 -fi - -# Check for completion promise BEFORE incrementing/continuing -COMPLETION_PROMISE=$(jq -r '.completion_promise' "$STATE_FILE") -if [[ -n "$COMPLETION_PROMISE" ]] && [[ "$LAST_MESSAGE" == *"$COMPLETION_PROMISE"* ]]; then - rm -f "$STATE_FILE" - # Only remove directory if it is empty - if [[ -d "$STATE_DIR" ]]; then - rmdir "$STATE_DIR" 2>/dev/null || true - fi - log "I found a shiny penny! It says $COMPLETION_PROMISE. The computer is sleeping now." - echo '{"decision": "allow", "continue": false, "stopReason": "✅ Ralph found the completion promise: '"$COMPLETION_PROMISE"'", "systemMessage": "✅ Ralph found the completion promise: '"$COMPLETION_PROMISE"'"}' - exit 0 -fi - -# Load state -STATE=$(cat "$STATE_FILE") -CURRENT_ITERATION=$(echo "$STATE" | jq -r '.current_iteration') -MAX_ITERATIONS=$(echo "$STATE" | jq -r '.max_iterations') - -# Check for max iterations -if [[ $CURRENT_ITERATION -ge $MAX_ITERATIONS ]]; then - rm -f "$STATE_FILE" - # Only remove directory if it is empty - if [[ -d "$STATE_DIR" ]]; then - rmdir "$STATE_DIR" 2>/dev/null || true - fi - log "I'm tired. I've gone around $CURRENT_ITERATION times. The computer is sleeping now." - echo '{"decision": "allow", "continue": false, "stopReason": "✅ Ralph has reached the iteration limit.", "systemMessage": "✅ Ralph has reached the iteration limit."}' - exit 0 -fi - -# Increment iteration -NEW_ITERATION=$((CURRENT_ITERATION + 1)) -TMP_STATE=$(mktemp) -jq ".current_iteration = $NEW_ITERATION" "$STATE_FILE" > "$TMP_STATE" || die "Failed to increment iteration" -mv "$TMP_STATE" "$STATE_FILE" - -# Log progress (persona) -log "I'm doing a circle! Iteration $CURRENT_ITERATION is done." - -# Maintain the loop by forcing a retry with the original prompt -ORIGINAL_PROMPT=$(jq -r '.original_prompt' "$STATE_FILE") - -# Clear conversation history (LLM memory) -cat <&2 -else - echo "Ralph: I wasn't doing anything anyway!" >&2 -fi - -# Only remove directory if it is empty -if [[ -d "$STATE_DIR" ]]; then - rmdir "$STATE_DIR" 2>/dev/null || true -fi diff --git a/scripts/setup.js b/scripts/setup.js new file mode 100644 index 0000000..63dd35b --- /dev/null +++ b/scripts/setup.js @@ -0,0 +1,112 @@ +#!/usr/bin/env node +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +const fs = require('fs'); +const path = require('path'); + +const STATE_DIR = path.join('.gemini', 'ralph'); +const STATE_FILE = path.join(STATE_DIR, 'state.json'); + +function die(message) { + console.error(`❌ Error: ${message}`); + process.exit(1); +} + +// Ensure directory exists +try { + fs.mkdirSync(STATE_DIR, { recursive: true }); +} catch (err) { + die(`Could not create state directory: ${STATE_DIR}`); +} + +// Defaults +let maxIterations = 5; +let completionPromise = ''; +const promptArgs = []; + +// Parse arguments +// Workaround for LLM tool invocation passing all args as a single string +let args = process.argv.slice(2); +if (args.length === 1 && (args[0].startsWith('-') || args[0].includes(' --'))) { + // Split the single string into arguments, respecting quotes + args = args[0].match(/(?:[^\s"]+|"[^"]*")+/g) || []; + args = args.map(arg => arg.replace(/^"|"$/g, '')); +} + +for (let i = 0; i < args.length; i++) { + const arg = args[i]; + if (arg === '--max-iterations') { + const value = args[++i]; + if (!/^\d+$/.test(value)) { + die(`Invalid iteration limit: '${value || ''}'`); + } + maxIterations = parseInt(value, 10); + } else if (arg === '--completion-promise') { + const value = args[++i]; + if (!value) { + die('Missing promise text.'); + } + completionPromise = value; + } else { + promptArgs.push(arg); + } +} + +const prompt = promptArgs.join(' '); + +// Ensure a prompt was provided +if (!prompt) { + die('No task specified. Run /ralph:help for usage.'); +} + +// Initialize state.json +const state = { + active: true, + current_iteration: 1, + max_iterations: maxIterations, + completion_promise: completionPromise, + original_prompt: prompt, + started_at: new Date().toISOString() +}; + +try { + fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2)); +} catch (err) { + die(`Failed to initialize state file: ${STATE_FILE}`); +} + +// Ralph-style summary for the user and agent +console.log(''); +console.log(`Ralph is helping! I'm going in a circle! + +>> Config: + - Max Iterations: ${maxIterations} + - Completion Promise: ${completionPromise} + - Original Prompt: ${prompt} + +I'm starting now! I hope I don't run out of paste! + +⚠️ WARNING: This loop will continue until the task is complete, + the iteration limit (${maxIterations}) is reached, or a promise is fulfilled.`); + +if (completionPromise) { + console.log(''); + console.log('⚠️ RALPH IS LISTENING FOR A PROMISE TO EXIT'); + console.log(` You must OUTPUT: ${completionPromise}`); +} + +// Output for persona (stderr) +console.log(''); +console.error("Ralph is helping! I'm setting up my toys."); diff --git a/scripts/setup.sh b/scripts/setup.sh deleted file mode 100755 index 15a6276..0000000 --- a/scripts/setup.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Functions -die() { - echo "❌ Error: $1" >&2 - exit 1 -} - -# Setup paths -STATE_DIR=".gemini/ralph" -STATE_FILE="$STATE_DIR/state.json" - -# Ensure directory exists -mkdir -p "$STATE_DIR" || die "Could not create state directory: $STATE_DIR" - -# Defaults -MAX_ITERATIONS=5 -COMPLETION_PROMISE="" -PROMPT="" - -# Workaround for LLM tool invocation passing all args as a single string -if [[ $# -eq 1 ]]; then - if [[ "$1" =~ ^- ]] || [[ "$1" =~ " --" ]]; then - eval set -- "$1" - fi -fi - -# Parse arguments -while [[ $# -gt 0 ]]; do - case "$1" in - --max-iterations) - [[ "${2:-}" =~ ^[0-9]+$ ]] || die "Invalid iteration limit: '${2:-}'" - MAX_ITERATIONS="$2" - shift 2 - ;; - --completion-promise) - [[ -n "${2:-}" ]] || die "Missing promise text." - COMPLETION_PROMISE="$2" - shift 2 - ;; - *) - PROMPT_ARGS+=("$1") - shift - ;; - esac -done -PROMPT="${PROMPT_ARGS[*]:-}" - -# Ensure a prompt was provided -[[ -n "$PROMPT" ]] || die "No task specified. Run /ralph:help for usage." - -# Initialize state.json -jq -n \ - --arg max "$MAX_ITERATIONS" \ - --arg promise "$COMPLETION_PROMISE" \ - --arg prompt "$PROMPT" \ - --arg started_at "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \ - '{ - active: true, - current_iteration: 1, - max_iterations: ($max | tonumber), - completion_promise: $promise, - original_prompt: $prompt, - started_at: $started_at - }' > "$STATE_FILE" || die "Failed to initialize state file: $STATE_FILE" - -# Ralph-style summary for the user and agent -echo "" -cat <> Config: - - Max Iterations: $MAX_ITERATIONS - - Completion Promise: $COMPLETION_PROMISE - - Original Prompt: $PROMPT - -I'm starting now! I hope I don't run out of paste! - -⚠️ WARNING: This loop will continue until the task is complete, - the iteration limit ($MAX_ITERATIONS) is reached, or a promise is fulfilled. -EOF - -if [[ -n "$COMPLETION_PROMISE" ]]; then - echo "" - echo "⚠️ RALPH IS LISTENING FOR A PROMISE TO EXIT" - echo " You must OUTPUT: $COMPLETION_PROMISE" -fi - -# Output for persona (stderr) -echo "" -echo "Ralph is helping! I'm setting up my toys." >&2 diff --git a/tests/hook_test.js b/tests/hook_test.js new file mode 100644 index 0000000..d82104a --- /dev/null +++ b/tests/hook_test.js @@ -0,0 +1,162 @@ +#!/usr/bin/env node +// Copyright 2026 Google LLC +// Licensed under the Apache License, Version 2.0 + +const fs = require('fs'); +const path = require('path'); +const { execSync } = require('child_process'); + +const STATE_DIR = path.join('.gemini', 'ralph'); +const STATE_FILE = path.join(STATE_DIR, 'state.json'); +const HOOK = 'node hooks/stop-hook.js'; + +function setup() { + fs.mkdirSync(STATE_DIR, { recursive: true }); + const state = { + active: true, + current_iteration: 1, + max_iterations: 5, + completion_promise: '', + original_prompt: 'Task', + started_at: '2026-01-27T12:00:00Z' + }; + fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2)); +} + +function cleanup() { + if (fs.existsSync(STATE_FILE)) { + fs.unlinkSync(STATE_FILE); + } + if (fs.existsSync(STATE_DIR)) { + try { + fs.rmdirSync(STATE_DIR); + } catch { + // Directory not empty, ignore + } + } +} + +function assertJsonValue(key, expected) { + const state = JSON.parse(fs.readFileSync(STATE_FILE, 'utf8')); + const keys = key.replace(/^\./, '').split('.'); + let actual = state; + for (const k of keys) { + actual = actual[k]; + } + if (String(actual) !== String(expected)) { + console.log(`FAIL: Expected ${key} to be ${expected}, but got ${actual}`); + process.exit(1); + } +} + +function assertExists(filePath) { + if (!fs.existsSync(filePath)) { + console.log(`FAIL: ${filePath} does not exist`); + process.exit(1); + } +} + +function assertNotExists(filePath) { + if (fs.existsSync(filePath)) { + console.log(`FAIL: ${filePath} still exists`); + process.exit(1); + } +} + +function runHook(input) { + const result = execSync(HOOK, { + input: JSON.stringify(input), + encoding: 'utf8' + }); + return JSON.parse(result.trim()); +} + +function updateState(updates) { + const state = JSON.parse(fs.readFileSync(STATE_FILE, 'utf8')); + Object.assign(state, updates); + fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2)); +} + +// Ensure cleanup on exit +process.on('exit', cleanup); +process.on('SIGINT', () => { cleanup(); process.exit(1); }); +process.on('SIGTERM', () => { cleanup(); process.exit(1); }); + +try { + console.log('Running Test 1: Iteration increment...'); + setup(); + // Simulate initial command invocation with flags (Iteration 1) + let response = runHook({ prompt_response: 'Some response', prompt: '/ralph:loop --max-iterations 5 Task' }); + assertExists(STATE_FILE); + assertJsonValue('.current_iteration', 2); + if (response.systemMessage !== '🔄 Ralph is starting iteration 2...') { + console.log(`FAIL: Expected systemMessage to be '🔄 Ralph is starting iteration 2...', but got '${response.systemMessage}'`); + process.exit(1); + } + + console.log('Running Test 2: Termination (Max Iterations)...'); + setup(); + // Set current_iteration to 5, max_iterations is 5 + updateState({ current_iteration: 5 }); + // Subsequent iterations use the exact ORIGINAL_PROMPT + response = runHook({ prompt_response: 'Last response', prompt: 'Task' }); + assertNotExists(STATE_FILE); + if (response.decision !== 'allow') { + console.log("FAIL: Expected decision to be 'allow' upon termination"); + process.exit(1); + } + + console.log('Running Test 3: Termination (Completion Promise)...'); + setup(); + // Set completion_promise + updateState({ completion_promise: 'DONE' }); + // Agent provides the promise + response = runHook({ prompt_response: 'I am finished. DONE', prompt: 'Task' }); + assertNotExists(STATE_FILE); + if (response.decision !== 'allow') { + console.log("FAIL: Expected decision to be 'allow' upon promise fulfillment"); + process.exit(1); + } + + console.log('Running Test 4: Ghost Loop Cleanup (Unrelated Prompt)...'); + setup(); + // User asks something else while a loop is technically "active" on disk + response = runHook({ prompt_response: 'Paris', prompt: 'What is the capital of France?' }); + assertNotExists(STATE_FILE); + if (response.decision !== 'allow') { + console.log("FAIL: Expected decision to be 'allow' for unrelated prompt"); + process.exit(1); + } + const expectedMsg = "🚨 Ralph detected a prompt mismatch.\nExpected: 'Task'\nGot: 'What is the capital of France?'"; + if (response.systemMessage !== expectedMsg) { + console.log('FAIL: Ghost loop cleanup should show mismatch message'); + process.exit(1); + } + + console.log('Running Test 5: Hijack Prevention (Different Loop Command)...'); + setup(); + // state.json contains "Task" (from an orphaned loop A) + // User now runs a NEW loop B with a different prompt + response = runHook({ prompt_response: 'New Task response', prompt: '/ralph:loop Different Task' }); + assertNotExists(STATE_FILE); + if (response.decision !== 'allow') { + console.log("FAIL: Expected decision to be 'allow' when a different loop command is detected"); + process.exit(1); + } + + console.log('Running Test 6: Automated Retry (Empty Prompt)...'); + setup(); + // Iteration 2+ often has an empty prompt in the hook input + response = runHook({ prompt_response: 'Iteration 2 response', prompt: '' }); + assertExists(STATE_FILE); + assertJsonValue('.current_iteration', 2); + if (response.decision !== 'deny') { + console.log("FAIL: Expected decision to be 'deny' to continue the loop"); + process.exit(1); + } + + console.log('PASS: All tests passed!'); +} catch (err) { + console.error('Test failed with error:', err.message); + process.exit(1); +} diff --git a/tests/hook_test.sh b/tests/hook_test.sh deleted file mode 100755 index 3caa462..0000000 --- a/tests/hook_test.sh +++ /dev/null @@ -1,121 +0,0 @@ -#!/bin/bash -# Copyright 2026 Google LLC -# Licensed under the Apache License, Version 2.0 - -STATE_FILE=".gemini/ralph/state.json" -STATE_DIR=".gemini/ralph" -HOOK="./hooks/stop-hook.sh" - -setup() { - mkdir -p "$STATE_DIR" - jq -n '{active: true, current_iteration: 1, max_iterations: 5, completion_promise: "", original_prompt: "Task", started_at: "2026-01-27T12:00:00Z"}' > "$STATE_FILE" -} - -cleanup() { - rm -f "$STATE_FILE" - # Only remove directory if it is empty - if [[ -d "$STATE_DIR" ]]; then - rmdir "$STATE_DIR" 2>/dev/null || true - fi -} - -trap cleanup EXIT - -assert_json_value() { - local key="$1" - local expected="$2" - local actual=$(jq -r "$key" "$STATE_FILE") - if [[ "$actual" != "$expected" ]]; then - echo "FAIL: Expected $key to be $expected, but got $actual" - exit 1 - fi -} - -assert_exists() { - if [[ ! -f "$1" ]]; then - echo "FAIL: $1 does not exist" - exit 1 - fi -} - -assert_not_exists() { - if [[ -f "$1" ]]; then - echo "FAIL: $1 still exists" - exit 1 - fi -} - -echo "Running Test 1: Iteration increment..." -setup -# Simulate initial command invocation with flags (Iteration 1) -RESPONSE=$(echo '{"prompt_response": "Some response", "prompt": "/ralph:loop --max-iterations 5 Task"}' | "$HOOK") -assert_exists "$STATE_FILE" -assert_json_value ".current_iteration" "2" -if [[ $(echo "$RESPONSE" | jq -r '.systemMessage') != "🔄 Ralph is starting iteration 2..." ]]; then - echo "FAIL: Expected systemMessage to be '🔄 Ralph is starting iteration 2...', but got '$(echo "$RESPONSE" | jq -r '.systemMessage')'" - exit 1 -fi - -echo "Running Test 2: Termination (Max Iterations)..." -setup -# Set current_iteration to 5, max_iterations is 5 -jq '.current_iteration = 5' "$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE" -# Subsequent iterations use the exact ORIGINAL_PROMPT -RESPONSE=$(echo '{"prompt_response": "Last response", "prompt": "Task"}' | "$HOOK") -assert_not_exists "$STATE_FILE" -if [[ $(echo "$RESPONSE" | jq -r '.decision') != "allow" ]]; then - echo "FAIL: Expected decision to be 'allow' upon termination" - exit 1 -fi - -echo "Running Test 3: Termination (Completion Promise)..." -setup -# Set completion_promise -jq '.completion_promise = "DONE"' "$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE" -# Agent provides the promise -RESPONSE=$(echo '{"prompt_response": "I am finished. DONE", "prompt": "Task"}' | "$HOOK") -assert_not_exists "$STATE_FILE" -if [[ $(echo "$RESPONSE" | jq -r '.decision') != "allow" ]]; then - echo "FAIL: Expected decision to be 'allow' upon promise fulfillment" - exit 1 -fi - -echo "Running Test 4: Ghost Loop Cleanup (Unrelated Prompt)..." -setup -# User asks something else while a loop is technically "active" on disk -RESPONSE=$(echo '{"prompt_response": "Paris", "prompt": "What is the capital of France?"}' | "$HOOK") -assert_not_exists "$STATE_FILE" -if [[ $(echo "$RESPONSE" | jq -r '.decision') != "allow" ]]; then - echo "FAIL: Expected decision to be 'allow' for unrelated prompt" - exit 1 -fi -if [[ $(echo "$RESPONSE" | jq -r '.systemMessage') != "🚨 Ralph detected a prompt mismatch. -Expected: 'Task' -Got: 'What is the capital of France?'" ]]; then - echo "FAIL: Ghost loop cleanup should show mismatch message" - exit 1 -fi - -echo "Running Test 5: Hijack Prevention (Different Loop Command)..." -setup -# state.json contains "Task" (from an orphaned loop A) -# User now runs a NEW loop B with a different prompt -RESPONSE=$(echo '{"prompt_response": "New Task response", "prompt": "/ralph:loop Different Task"}' | "$HOOK") -assert_not_exists "$STATE_FILE" -if [[ $(echo "$RESPONSE" | jq -r '.decision') != "allow" ]]; then - echo "FAIL: Expected decision to be 'allow' when a different loop command is detected" - exit 1 -fi - -echo "Running Test 6: Automated Retry (Empty Prompt)..." -setup -# Iteration 2+ often has an empty prompt in the hook input -RESPONSE=$(echo '{"prompt_response": "Iteration 2 response", "prompt": ""}' | "$HOOK") -assert_exists "$STATE_FILE" -assert_json_value ".current_iteration" "2" -if [[ $(echo "$RESPONSE" | jq -r '.decision') != "deny" ]]; then - echo "FAIL: Expected decision to be 'deny' to continue the loop" - exit 1 -fi - -echo "PASS: All tests passed!" diff --git a/tests/setup_test.js b/tests/setup_test.js new file mode 100644 index 0000000..c2fec87 --- /dev/null +++ b/tests/setup_test.js @@ -0,0 +1,97 @@ +#!/usr/bin/env node +// Copyright 2026 Google LLC +// Licensed under the Apache License, Version 2.0 + +const fs = require('fs'); +const path = require('path'); +const { execSync } = require('child_process'); + +const STATE_DIR = path.join('.gemini', 'ralph'); +const STATE_FILE = path.join(STATE_DIR, 'state.json'); + +function setup() { + if (fs.existsSync(STATE_FILE)) { + fs.unlinkSync(STATE_FILE); + } +} + +function cleanup() { + if (fs.existsSync(STATE_FILE)) { + fs.unlinkSync(STATE_FILE); + } + if (fs.existsSync(STATE_DIR)) { + try { + fs.rmdirSync(STATE_DIR); + } catch { + // Directory not empty, ignore + } + } +} + +function assertExists(filePath) { + if (!fs.existsSync(filePath)) { + console.log(`FAIL: ${filePath} does not exist`); + process.exit(1); + } +} + +function assertJsonValue(key, expected) { + const state = JSON.parse(fs.readFileSync(STATE_FILE, 'utf8')); + // Navigate to nested key (e.g., ".active" -> state.active) + const keys = key.replace(/^\./, '').split('.'); + let actual = state; + for (const k of keys) { + actual = actual[k]; + } + if (String(actual) !== String(expected)) { + console.log(`FAIL: Expected ${key} to be ${expected}, but got ${actual}`); + process.exit(1); + } +} + +function runSetup(args) { + execSync(`node scripts/setup.js ${args}`, { stdio: 'inherit' }); +} + +// Ensure cleanup on exit +process.on('exit', cleanup); +process.on('SIGINT', () => { cleanup(); process.exit(1); }); +process.on('SIGTERM', () => { cleanup(); process.exit(1); }); + +try { + console.log('Running Test 1: Basic setup...'); + setup(); + runSetup('"Task"'); + assertExists(STATE_FILE); + assertJsonValue('.active', true); + assertJsonValue('.current_iteration', 1); + // Check if started_at is a valid ISO 8601 timestamp + const state1 = JSON.parse(fs.readFileSync(STATE_FILE, 'utf8')); + if (!/^\d{4}-\d{2}-\d{2}T/.test(state1.started_at)) { + console.log('FAIL: started_at is missing or not a valid ISO 8601 timestamp'); + process.exit(1); + } + + console.log('Running Test 2: Argument parsing (individual)...'); + setup(); + runSetup('"Task" --max-iterations 5 --completion-promise "DONE"'); + assertJsonValue('.max_iterations', 5); + assertJsonValue('.completion_promise', 'DONE'); + + console.log('Running Test 3: Argument parsing (combined string workaround)...'); + setup(); + runSetup('"/ralph:loop Task --max-iterations 10 --completion-promise FINISHED"'); + assertJsonValue('.max_iterations', 10); + assertJsonValue('.completion_promise', 'FINISHED'); + + console.log('Running Test 4: Complex prompt with spaces and quotes...'); + setup(); + runSetup(`"\\"Solve 'The Riddle'\\" --max-iterations 3"`); + assertJsonValue('.original_prompt', "Solve 'The Riddle'"); + assertJsonValue('.max_iterations', 3); + + console.log('PASS: All tests passed!'); +} catch (err) { + console.error('Test failed with error:', err.message); + process.exit(1); +} diff --git a/tests/setup_test.sh b/tests/setup_test.sh deleted file mode 100755 index e1cd1f2..0000000 --- a/tests/setup_test.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash -# Copyright 2026 Google LLC -# Licensed under the Apache License, Version 2.0 - -STATE_FILE=".gemini/ralph/state.json" -STATE_DIR=".gemini/ralph" - -setup() { - rm -f "$STATE_FILE" -} - -cleanup() { - rm -f "$STATE_FILE" - # Only remove directory if it is empty - if [[ -d "$STATE_DIR" ]]; then - rmdir "$STATE_DIR" 2>/dev/null || true - fi -} - -trap cleanup EXIT - -assert_exists() { - if [[ ! -f "$1" ]]; then - echo "FAIL: $1 does not exist" - exit 1 - fi -} - -assert_json_value() { - local key="$1" - local expected="$2" - local actual=$(jq -r "$key" "$STATE_FILE") - if [[ "$actual" != "$expected" ]]; then - echo "FAIL: Expected $key to be $expected, but got $actual" - exit 1 - fi -} - -echo "Running Test 1: Basic setup..." -setup -./scripts/setup.sh "Task" -assert_exists "$STATE_FILE" -assert_json_value ".active" "true" -assert_json_value ".current_iteration" "1" -# Check if started_at is a valid ISO 8601 timestamp -if ! jq -r ".started_at" "$STATE_FILE" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}T'; then - echo "FAIL: started_at is missing or not a valid ISO 8601 timestamp" - exit 1 -fi - -echo "Running Test 2: Argument parsing (individual)..." -setup -./scripts/setup.sh "Task" --max-iterations 5 --completion-promise "DONE" -assert_json_value ".max_iterations" "5" -assert_json_value ".completion_promise" "DONE" - -echo "Running Test 3: Argument parsing (combined string workaround)..." -setup -./scripts/setup.sh "/ralph:loop Task --max-iterations 10 --completion-promise FINISHED" -assert_json_value ".max_iterations" "10" -assert_json_value ".completion_promise" "FINISHED" - -echo "Running Test 4: Complex prompt with spaces and quotes..." -setup -./scripts/setup.sh "/ralph:loop \"Solve 'The Riddle'\" --max-iterations 3" -assert_json_value ".original_prompt" "Solve 'The Riddle'" -assert_json_value ".max_iterations" "3" - -echo "Running Test 5: Equality flag parsing (--flag=value)..." -setup -./scripts/setup.sh "Task" --max-iterations=15 --completion-promise=FINISH -assert_json_value ".max_iterations" "15" -assert_json_value ".completion_promise" "FINISH" - -echo "PASS: All tests passed!"