Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "ai-sidebar-extension",
"version": "0.6.1",
"version": "0.6.2",
"description": "Chrome extension AI sidebar with LLM providers and MCP support",
"private": true,
"type": "module",
Expand Down
60 changes: 60 additions & 0 deletions src/background/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,66 @@ chrome.runtime.onMessage.addListener((request: ExtensionMessage, sender, sendRes
return true; // Now async to support getBoundTabIdForSidebar
}

case 'EVAL_JUDGE': {
// Eval judge: use LLM to verify post-conditions of an eval scenario
(async () => {
try {
const { agentId, prompt, assistantResponse, toolCalls, postConditions } = request;

const systemPrompt = `You are an evaluation judge. Given a user prompt, assistant response, tool calls made, and expected post-conditions, determine whether the post-conditions were met.

Respond ONLY with a JSON object (no markdown fences, no extra text):
{"verdict": "pass" or "fail", "score": 0.0 to 1.0, "reasoning": "brief explanation"}`;

const toolCallsSummary = toolCalls
.map(
(tc: { toolName: string; input: unknown; output: unknown; status: string }) =>
`- ${tc.toolName}(${JSON.stringify(tc.input)}) → ${tc.status}: ${JSON.stringify(tc.output)}`
)
.join('\n');

const userMsg = `## User Prompt
${prompt}

## Assistant Response
${assistantResponse}

## Tool Calls
${toolCallsSummary || '(none)'}

## Post-Conditions to Verify
${postConditions}`;

const responseText = await aiClient.generateTextForAgent(agentId, systemPrompt, userMsg, {
temperature: 0,
});

// Parse JSON from response (strip markdown fences if present)
const cleaned = responseText
.replace(/```(?:json)?\s*/g, '')
.replace(/```\s*/g, '')
.trim();
const parsed = JSON.parse(cleaned);

sendResponse({
success: true,
verdict: parsed.verdict || 'fail',
score: typeof parsed.score === 'number' ? parsed.score : 0,
reasoning: parsed.reasoning || '',
});
} catch (error) {
log.error('[Background] EVAL_JUDGE error:', error);
sendResponse({
success: true,
score: 0,
verdict: 'fail',
reasoning: `Failed to parse judge response: ${error instanceof Error ? error.message : 'Unknown error'}`,
});
}
})();
return true;
}

default:
log.debug('Unknown message type:', request.type);
return false; // No async response needed
Expand Down
39 changes: 38 additions & 1 deletion src/lib/ai/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import log from '../logger';
import { createOpenAI } from '@ai-sdk/openai';
import { createAnthropic } from '@ai-sdk/anthropic';
import { createGoogleGenerativeAI } from '@ai-sdk/google';
import { streamText, type LanguageModel, CoreMessage, type JSONValue } from 'ai';
import { streamText, generateText, type LanguageModel, CoreMessage, type JSONValue } from 'ai';
import type { AgentConfig } from '../storage/config';
import type { AIProvider, ToolCall } from '../../types';
import { ConfigStorage } from '../storage/config';
Expand Down Expand Up @@ -758,6 +758,43 @@ export class AIClient {
this.abortController?.abort();
}

/**
* Non-streaming text generation for a specific agent.
* Used for eval judge and other non-interactive use cases.
*/
async generateTextForAgent(
agentId: string,
systemPrompt: string,
userMessage: string,
options?: { temperature?: number }
): Promise<string> {
const agent = await this.configStorage.getAgent(agentId);
if (!agent) {
throw new Error(`Agent ${agentId} not found`);
}

if (!agent.apiKey && !agent.endpoint) {
throw new Error(`No API key or endpoint configured for agent "${agent.name}"`);
}

const modelFactory = this.createProviderForAgent(agent);
const model = modelFactory();

const messages: CoreMessage[] = [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userMessage },
];

const result = await generateText({
model,
messages,
temperature: options?.temperature ?? 0,
maxRetries: 2,
});

return result.text;
}

/**
* Test connection with provided agent details (for new agents before saving)
*/
Expand Down
6 changes: 6 additions & 0 deletions src/lib/commands/builtins.ts
Original file line number Diff line number Diff line change
Expand Up @@ -317,5 +317,11 @@ export function createBuiltinCommands(
clear: () => {
clearChat();
},

eval: () => {
// No-op: actual handling is intercepted in sidebar's handleSendMessage.
// Registered here so /help lists it.
window.dispatchEvent(new CustomEvent('start-eval'));
},
};
}
Loading