From a369e1df73531505230fbc963bb1428573d1e2c8 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Sat, 17 Jan 2026 16:32:32 -0800 Subject: [PATCH 1/7] initial commit --- js/index.ts | 8 + js/llm.ts | 46 +++++- js/render-messages.ts | 30 +++- js/thread-utils.ts | 337 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 416 insertions(+), 5 deletions(-) create mode 100644 js/thread-utils.ts diff --git a/js/index.ts b/js/index.ts index 49e7863..bf06db3 100644 --- a/js/index.ts +++ b/js/index.ts @@ -40,3 +40,11 @@ export * from "./ragas"; export * from "./value"; export { Evaluators } from "./manifest"; export { makePartial, ScorerWithPartial } from "./partial"; +export { + computeThreadTemplateVars, + formatMessageArrayAsText, + isLLMMessageArray, + isRoleContentMessage, + type LLMMessage, + type ThreadTemplateVars, +} from "./thread-utils"; diff --git a/js/llm.ts b/js/llm.ts index 28ec3d3..059e7d1 100644 --- a/js/llm.ts +++ b/js/llm.ts @@ -8,6 +8,18 @@ import { } from "openai/resources"; import { makePartial, ScorerWithPartial } from "./partial"; import { renderMessages } from "./render-messages"; +import { + computeThreadTemplateVars, + type ThreadTemplateVars, +} from "./thread-utils"; + +/** + * Minimal interface for a Trace object that can provide thread data. + * This is compatible with the Trace interface from the braintrust SDK. + */ +export interface TraceForScorer { + getThread(options?: { preprocessor?: string }): Promise; +} const NO_COT_SUFFIX = "Answer the question by calling `select_choice` with a single choice from {{__choices}}."; @@ -193,6 +205,12 @@ function parseResponse( export type LLMClassifierArgs = { model?: string; useCoT?: boolean; + /** + * Optional trace object for multi-turn scoring. + * When provided, thread template variables (thread_text, thread_count, etc.) + * are automatically computed and made available in the template. + */ + trace?: TraceForScorer; } & LLMArgs & RenderArgs; @@ -217,6 +235,24 @@ export function LLMClassifierFromTemplate({ ) => { const useCoT = runtimeArgs.useCoT ?? useCoTArg ?? true; + // Compute thread template variables if trace is available + // These become available in templates as {{thread_text}}, {{thread_count}}, etc. + let threadVars: Record = {}; + if (runtimeArgs.trace) { + const thread = await runtimeArgs.trace.getThread(); + const computed = computeThreadTemplateVars(thread); + threadVars = { + thread: computed.thread, + thread_text: computed.thread_text, + thread_count: computed.thread_count, + first_message: computed.first_message, + last_message: computed.last_message, + user_messages: computed.user_messages, + assistant_messages: computed.assistant_messages, + human_ai_pairs: computed.human_ai_pairs, + }; + } + const prompt = promptTemplate + "\n" + (useCoT ? COT_SUFFIX : NO_COT_SUFFIX); @@ -228,7 +264,8 @@ export function LLMClassifierFromTemplate({ }, ]; - return await OpenAIClassifier({ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const classifierArgs: any = { name, messages, choiceScores, @@ -237,12 +274,15 @@ export function LLMClassifierFromTemplate({ maxTokens, temperature, __choices: choiceStrings, + // Thread template vars come first so explicit args can override + ...threadVars, ...runtimeArgs, - // Since the logic is a bit funky for computing this, include // it at the end to prevent overrides useCoT, - }); + }; + + return await OpenAIClassifier(classifierArgs); }; Object.defineProperty(ret, "name", { value: name, diff --git a/js/render-messages.ts b/js/render-messages.ts index ee29586..c111890 100644 --- a/js/render-messages.ts +++ b/js/render-messages.ts @@ -1,5 +1,32 @@ import mustache from "mustache"; import { ChatCompletionMessageParam } from "openai/resources"; +import { + isLLMMessageArray, + isRoleContentMessage, + formatMessageArrayAsText, +} from "./thread-utils"; + +/** + * Smart escape function for Mustache templates. + * - Strings are passed through unchanged + * - LLM message arrays are formatted as human-readable text + * - Single messages are formatted with role and content + * - Other values are JSON-stringified + */ +function escapeValue(v: unknown): string { + if (typeof v === "string") { + return v; + } + if (isLLMMessageArray(v)) { + return formatMessageArrayAsText(v); + } + if (isRoleContentMessage(v)) { + const content = + typeof v.content === "string" ? v.content : JSON.stringify(v.content); + return `${v.role}: ${content}`; + } + return JSON.stringify(v); +} export function renderMessages( messages: ChatCompletionMessageParam[], @@ -9,8 +36,7 @@ export function renderMessages( ...m, content: m.content ? mustache.render(m.content as string, renderArgs, undefined, { - escape: (v: unknown) => - typeof v === "string" ? v : JSON.stringify(v), + escape: escapeValue, }) : "", })); diff --git a/js/thread-utils.ts b/js/thread-utils.ts new file mode 100644 index 0000000..a523bc5 --- /dev/null +++ b/js/thread-utils.ts @@ -0,0 +1,337 @@ +/** + * Thread utilities for LLM-as-a-judge scorers. + * + * This module provides utilities for working with preprocessed conversation + * messages (threads) in LLM scorer templates. + */ + +/** + * A message with role and content fields (LLM chat message format). + */ +export interface LLMMessage { + role: string; + content: unknown; +} + +function isObject(value: unknown): value is { [key: string]: unknown } { + return value instanceof Object && !(value instanceof Array); +} + +/** + * Check if an item looks like an LLM message (has role and content). + */ +export function isRoleContentMessage(item: unknown): item is LLMMessage { + return isObject(item) && "role" in item && "content" in item; +} + +/** + * Check if a value is an array of LLM messages. + */ +export function isLLMMessageArray(value: unknown): value is LLMMessage[] { + return Array.isArray(value) && value.every(isRoleContentMessage); +} + +function indent(text: string, prefix = " "): string { + return text + .split("\n") + .map((line) => (line ? prefix + line : prefix)) + .join("\n"); +} + +function truncateMiddle(text: string, maxLen: number): string { + if (text.length <= maxLen) return text; + const charsRemoved = text.length - maxLen + 30; + const ellipsis = ` [...${charsRemoved} chars truncated...] `; + const avail = maxLen - ellipsis.length; + if (avail <= 0) return text.slice(0, maxLen); + const left = Math.floor(avail / 2); + const right = avail - left; + return text.slice(0, left) + ellipsis + text.slice(-right); +} + +interface PendingToolCall { + name: string; + args: string; +} + +function isTypedPart( + part: unknown, +): part is { type: string; [key: string]: unknown } { + return isObject(part) && typeof part.type === "string"; +} + +function extractToolCalls(content: unknown[]): Map { + const toolCalls = new Map(); + + for (const part of content) { + if (!isTypedPart(part) || part.type !== "tool_call") continue; + + const id = typeof part.tool_call_id === "string" ? part.tool_call_id : ""; + if (!id) continue; + + const name = + typeof part.tool_name === "string" ? part.tool_name : "unknown"; + + let args = ""; + if (isObject(part.arguments)) { + const argsObj = part.arguments; + if (argsObj.type === "valid") { + args = JSON.stringify(argsObj.value); + } else if (typeof argsObj.value === "string") { + args = argsObj.value; + } else { + args = JSON.stringify(argsObj.value); + } + } + + toolCalls.set(id, { name, args }); + } + + return toolCalls; +} + +function unwrapContent(content: unknown): string { + if (typeof content === "string") { + try { + const parsed = JSON.parse(content); + return unwrapContent(parsed); + } catch { + const errorMatch = content.match(/^error:\s*'(.+)'$/s); + if (errorMatch) { + return errorMatch[1]; + } + return content; + } + } + + if (Array.isArray(content)) { + const textParts: string[] = []; + for (const item of content) { + if (isObject(item) && typeof item.text === "string") { + textParts.push(unwrapContent(item.text)); + } else if (typeof item === "string") { + textParts.push(unwrapContent(item)); + } + } + if (textParts.length > 0) { + return textParts.join("\n"); + } + } + + if (isObject(content) && typeof content.text === "string") { + return unwrapContent(content.text); + } + + return typeof content === "string" ? content : JSON.stringify(content); +} + +function formatToolResult( + toolCallId: string, + toolName: string, + output: unknown, + pendingToolCalls: Map, +): string { + const pendingCall = pendingToolCalls.get(toolCallId); + const name = toolName || pendingCall?.name || "tool"; + const args = pendingCall?.args || ""; + + const resultContent = unwrapContent(output); + const lines = [`Tool (${name}):`]; + + if (args) { + lines.push(` Args:`); + lines.push(` ${truncateMiddle(args, 500)}`); + } + + const isError = + resultContent.toLowerCase().includes("error:") || + resultContent.toLowerCase().includes('"error"') || + resultContent.toLowerCase().startsWith("error"); + + if (isError) { + lines.push(` Error:`); + lines.push(` ${truncateMiddle(resultContent, 500)}`); + } else { + lines.push(` Result:`); + lines.push(` ${truncateMiddle(resultContent, 500)}`); + } + + if (pendingCall) { + pendingToolCalls.delete(toolCallId); + } + + return lines.join("\n"); +} + +function formatToolResults( + content: unknown[], + pendingToolCalls: Map, +): string[] { + const results: string[] = []; + + for (const part of content) { + if (!isTypedPart(part) || part.type !== "tool_result") continue; + + const toolCallId = + typeof part.tool_call_id === "string" ? part.tool_call_id : ""; + const toolName = typeof part.tool_name === "string" ? part.tool_name : ""; + + results.push( + formatToolResult(toolCallId, toolName, part.output, pendingToolCalls), + ); + } + + return results; +} + +function extractTextContent(content: unknown): string { + if (typeof content === "string") { + return content.trim() ? content : ""; + } + + if (!Array.isArray(content)) { + return ""; + } + + const parts: string[] = []; + for (const part of content) { + if (typeof part === "string" && part.trim()) { + parts.push(part); + } else if (isTypedPart(part)) { + if (part.type === "text" && typeof part.text === "string") { + parts.push(part.text); + } else if (part.type === "reasoning" && typeof part.text === "string") { + parts.push(`[thinking: ${part.text.slice(0, 100)}...]`); + } + } else if (isObject(part) && typeof part.text === "string") { + parts.push(part.text); + } + } + + return parts.join("\n"); +} + +/** + * Format an array of LLM messages as human-readable text. + */ +export function formatMessageArrayAsText(messages: LLMMessage[]): string { + const pendingToolCalls = new Map(); + for (const msg of messages) { + if (msg.role === "assistant" && Array.isArray(msg.content)) { + const calls = extractToolCalls(msg.content); + for (const [id, call] of calls) { + pendingToolCalls.set(id, call); + } + } + } + + const parts: string[] = []; + for (const msg of messages) { + const role = msg.role; + const capitalizedRole = role.charAt(0).toUpperCase() + role.slice(1); + + if (role === "tool" && Array.isArray(msg.content)) { + const toolResults = formatToolResults(msg.content, pendingToolCalls); + parts.push(...toolResults); + } else { + const text = extractTextContent(msg.content); + if (text) { + parts.push(`${capitalizedRole}:\n${indent(text)}`); + } + } + } + + return parts.join("\n\n"); +} + +/** + * Template variables computed from a thread for use in LLM-as-a-judge scorers. + */ +export interface ThreadTemplateVars { + thread: unknown[]; + thread_text: string; + thread_count: number; + first_message: unknown | null; + last_message: unknown | null; + user_messages: unknown[]; + assistant_messages: unknown[]; + human_ai_pairs: Array<{ human: unknown; assistant: unknown }>; +} + +/** + * Compute template variables from a thread for use in mustache templates. + * Uses lazy getters so expensive computations only run when accessed. + */ +export function computeThreadTemplateVars( + thread: unknown[], +): ThreadTemplateVars { + let _thread_text: string | undefined; + let _user_messages: unknown[] | undefined; + let _assistant_messages: unknown[] | undefined; + let _human_ai_pairs: + | Array<{ human: unknown; assistant: unknown }> + | undefined; + + return { + thread, + thread_count: thread.length, + + get thread_text(): string { + if (_thread_text === undefined) { + if (isLLMMessageArray(thread)) { + _thread_text = formatMessageArrayAsText(thread); + } else { + _thread_text = thread + .map((item) => + typeof item === "string" ? item : JSON.stringify(item), + ) + .join("\n"); + } + } + return _thread_text; + }, + + get first_message(): unknown | null { + return thread[0] ?? null; + }, + + get last_message(): unknown | null { + return thread[thread.length - 1] ?? null; + }, + + get user_messages(): unknown[] { + if (_user_messages === undefined) { + _user_messages = thread.filter( + (m) => isRoleContentMessage(m) && m.role === "user", + ); + } + return _user_messages; + }, + + get assistant_messages(): unknown[] { + if (_assistant_messages === undefined) { + _assistant_messages = thread.filter( + (m) => isRoleContentMessage(m) && m.role === "assistant", + ); + } + return _assistant_messages; + }, + + get human_ai_pairs(): Array<{ human: unknown; assistant: unknown }> { + if (_human_ai_pairs === undefined) { + _human_ai_pairs = []; + const users = thread.filter( + (m) => isRoleContentMessage(m) && m.role === "user", + ); + const assistants = thread.filter( + (m) => isRoleContentMessage(m) && m.role === "assistant", + ); + const pairCount = Math.min(users.length, assistants.length); + for (let i = 0; i < pairCount; i++) { + _human_ai_pairs.push({ human: users[i], assistant: assistants[i] }); + } + } + return _human_ai_pairs; + }, + }; +} From 468c1550cc063bccc377816cf55c2caf71f10b00 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Sat, 17 Jan 2026 16:45:22 -0800 Subject: [PATCH 2/7] add some tests --- js/llm.ts | 6 +- js/render-messages.test.ts | 146 +++++++++++++++++++++++++++++++++++++ js/thread-utils.ts | 24 ++---- 3 files changed, 156 insertions(+), 20 deletions(-) diff --git a/js/llm.ts b/js/llm.ts index 059e7d1..b072c5c 100644 --- a/js/llm.ts +++ b/js/llm.ts @@ -235,15 +235,15 @@ export function LLMClassifierFromTemplate({ ) => { const useCoT = runtimeArgs.useCoT ?? useCoTArg ?? true; - // Compute thread template variables if trace is available - // These become available in templates as {{thread_text}}, {{thread_count}}, etc. + // Compute thread template variables if trace is available. + // These become available in templates as {{thread}}, {{thread_count}}, etc. + // Note: {{thread}} automatically renders as human-readable text via smart escape. let threadVars: Record = {}; if (runtimeArgs.trace) { const thread = await runtimeArgs.trace.getThread(); const computed = computeThreadTemplateVars(thread); threadVars = { thread: computed.thread, - thread_text: computed.thread_text, thread_count: computed.thread_count, first_message: computed.first_message, last_message: computed.last_message, diff --git a/js/render-messages.test.ts b/js/render-messages.test.ts index 39e7000..3d41939 100644 --- a/js/render-messages.test.ts +++ b/js/render-messages.test.ts @@ -37,3 +37,149 @@ describe("renderMessages", () => { expect(rendered[0].content).toBe(""); }); }); + +describe("renderMessages with thread variables", () => { + const sampleThread = [ + { role: "user", content: "Hello, how are you?" }, + { role: "assistant", content: "I am doing well, thank you!" }, + { role: "user", content: "What is the weather like?" }, + { role: "assistant", content: "It is sunny and warm today." }, + ]; + + it("{{thread}} renders full conversation as human-readable text", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "{{thread}}" }, + ]; + const rendered = renderMessages(messages, { thread: sampleThread }); + + expect(rendered[0].content).toContain("User:"); + expect(rendered[0].content).toContain("Hello, how are you?"); + expect(rendered[0].content).toContain("Assistant:"); + expect(rendered[0].content).toContain("I am doing well, thank you!"); + expect(rendered[0].content).toContain("What is the weather like?"); + expect(rendered[0].content).toContain("It is sunny and warm today."); + }); + + it("{{thread.0}} renders first message as formatted text", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "First message: {{thread.0}}" }, + ]; + const rendered = renderMessages(messages, { thread: sampleThread }); + + expect(rendered[0].content).toBe( + "First message: user: Hello, how are you?", + ); + }); + + it("{{thread.1}} renders second message as formatted text", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "Second message: {{thread.1}}" }, + ]; + const rendered = renderMessages(messages, { thread: sampleThread }); + + expect(rendered[0].content).toBe( + "Second message: assistant: I am doing well, thank you!", + ); + }); + + it("{{first_message}} renders single message formatted", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "First: {{first_message}}" }, + ]; + const rendered = renderMessages(messages, { + first_message: sampleThread[0], + }); + + expect(rendered[0].content).toBe("First: user: Hello, how are you?"); + }); + + it("{{thread_count}} renders as a number", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "Count: {{thread_count}}" }, + ]; + const rendered = renderMessages(messages, { thread_count: 4 }); + + expect(rendered[0].content).toBe("Count: 4"); + }); + + it("{{user_messages}} renders array of user messages", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "Users said: {{user_messages}}" }, + ]; + const userMessages = sampleThread.filter((m) => m.role === "user"); + const rendered = renderMessages(messages, { user_messages: userMessages }); + + expect(rendered[0].content).toContain("User:"); + expect(rendered[0].content).toContain("Hello, how are you?"); + expect(rendered[0].content).toContain("What is the weather like?"); + expect(rendered[0].content).not.toContain("Assistant:"); + }); + + it("{{user_messages.0}} renders first user message", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "First user: {{user_messages.0}}" }, + ]; + const userMessages = sampleThread.filter((m) => m.role === "user"); + const rendered = renderMessages(messages, { user_messages: userMessages }); + + expect(rendered[0].content).toBe("First user: user: Hello, how are you?"); + }); + + it("{{human_ai_pairs}} renders array of paired turns", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "Pairs: {{human_ai_pairs}}" }, + ]; + const pairs = [ + { human: sampleThread[0], assistant: sampleThread[1] }, + { human: sampleThread[2], assistant: sampleThread[3] }, + ]; + const rendered = renderMessages(messages, { human_ai_pairs: pairs }); + + // Pairs are objects, so they get JSON stringified + expect(rendered[0].content).toContain("Pairs:"); + expect(rendered[0].content).toContain("human"); + expect(rendered[0].content).toContain("assistant"); + }); + + it("{{#thread}}...{{/thread}} iterates over messages", () => { + const messages: ChatCompletionMessageParam[] = [ + { + role: "user", + content: "Messages:{{#thread}}\n- {{role}}: {{content}}{{/thread}}", + }, + ]; + const rendered = renderMessages(messages, { thread: sampleThread }); + + expect(rendered[0].content).toBe( + "Messages:\n- user: Hello, how are you?\n- assistant: I am doing well, thank you!\n- user: What is the weather like?\n- assistant: It is sunny and warm today.", + ); + }); + + it("handles empty thread gracefully", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "Thread: {{thread}}" }, + ]; + const rendered = renderMessages(messages, { thread: [] }); + + expect(rendered[0].content).toBe("Thread: "); + }); + + it("handles thread with complex content (arrays)", () => { + const complexThread = [ + { + role: "user", + content: [{ type: "text", text: "Hello with structured content" }], + }, + { role: "assistant", content: "Simple response" }, + ]; + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "{{thread}}" }, + ]; + const rendered = renderMessages(messages, { thread: complexThread }); + + expect(rendered[0].content).toContain("User:"); + expect(rendered[0].content).toContain("Hello with structured content"); + expect(rendered[0].content).toContain("Assistant:"); + expect(rendered[0].content).toContain("Simple response"); + }); +}); diff --git a/js/thread-utils.ts b/js/thread-utils.ts index a523bc5..b999b62 100644 --- a/js/thread-utils.ts +++ b/js/thread-utils.ts @@ -246,10 +246,12 @@ export function formatMessageArrayAsText(messages: LLMMessage[]): string { /** * Template variables computed from a thread for use in LLM-as-a-judge scorers. + * + * Note: `thread` automatically renders as human-readable text in Mustache + * templates via the smart escape function. No need for a separate `thread_text`. */ export interface ThreadTemplateVars { thread: unknown[]; - thread_text: string; thread_count: number; first_message: unknown | null; last_message: unknown | null; @@ -261,11 +263,14 @@ export interface ThreadTemplateVars { /** * Compute template variables from a thread for use in mustache templates. * Uses lazy getters so expensive computations only run when accessed. + * + * Note: `thread` (and other message variables) will automatically render as + * human-readable text when used in templates like `{{thread}}` due to the + * smart escape function in renderMessages. */ export function computeThreadTemplateVars( thread: unknown[], ): ThreadTemplateVars { - let _thread_text: string | undefined; let _user_messages: unknown[] | undefined; let _assistant_messages: unknown[] | undefined; let _human_ai_pairs: @@ -276,21 +281,6 @@ export function computeThreadTemplateVars( thread, thread_count: thread.length, - get thread_text(): string { - if (_thread_text === undefined) { - if (isLLMMessageArray(thread)) { - _thread_text = formatMessageArrayAsText(thread); - } else { - _thread_text = thread - .map((item) => - typeof item === "string" ? item : JSON.stringify(item), - ) - .join("\n"); - } - } - return _thread_text; - }, - get first_message(): unknown | null { return thread[0] ?? null; }, From fba639e2cb25f94def802aeb1f708232113d7be9 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Sun, 18 Jan 2026 09:55:06 -0800 Subject: [PATCH 3/7] check for thread variable names --- js/llm.ts | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/js/llm.ts b/js/llm.ts index b072c5c..d29a2fa 100644 --- a/js/llm.ts +++ b/js/llm.ts @@ -21,6 +21,33 @@ export interface TraceForScorer { getThread(options?: { preprocessor?: string }): Promise; } +// Thread-related template variable names that require preprocessor invocation +const THREAD_VARIABLE_NAMES = [ + "thread", + "thread_count", + "first_message", + "last_message", + "user_messages", + "assistant_messages", + "human_ai_pairs", +]; + +// Regex pattern to match thread variable usage in templates +// Matches {{thread}}, {{thread.0}}, {{thread_count}}, {%...thread...%}, etc. +const THREAD_VARIABLE_PATTERN = new RegExp( + `\\{\\{\\s*(${THREAD_VARIABLE_NAMES.join("|")})(\\.[^}]*)?\\s*\\}\\}|` + // Mustache: {{thread}}, {{thread.0}} + `\\{%[^%]*\\b(${THREAD_VARIABLE_NAMES.join("|")})\\b[^%]*%\\}|` + // Jinja block: {% for m in thread %} + `\\{\\{[^}]*\\b(${THREAD_VARIABLE_NAMES.join("|")})\\b[^}]*\\}\\}`, // Jinja expr: {{ thread[0] }} + "i", +); + +/** + * Check if a template string uses any thread-related template variables. + */ +function templateUsesThreadVariables(template: string): boolean { + return THREAD_VARIABLE_PATTERN.test(template); +} + const NO_COT_SUFFIX = "Answer the question by calling `select_choice` with a single choice from {{__choices}}."; @@ -235,11 +262,13 @@ export function LLMClassifierFromTemplate({ ) => { const useCoT = runtimeArgs.useCoT ?? useCoTArg ?? true; - // Compute thread template variables if trace is available. + // Compute thread template variables if trace is available AND the template uses them. // These become available in templates as {{thread}}, {{thread_count}}, etc. // Note: {{thread}} automatically renders as human-readable text via smart escape. + // Only call getThread() if the template actually uses thread variables to avoid + // creating unnecessary preprocessor spans. let threadVars: Record = {}; - if (runtimeArgs.trace) { + if (runtimeArgs.trace && templateUsesThreadVariables(promptTemplate)) { const thread = await runtimeArgs.trace.getThread(); const computed = computeThreadTemplateVars(thread); threadVars = { From 782027e86625f615fd11476544cca0ec91e052c4 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Sun, 18 Jan 2026 09:56:56 -0800 Subject: [PATCH 4/7] consolidate --- js/llm.ts | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/js/llm.ts b/js/llm.ts index d29a2fa..d88a0ec 100644 --- a/js/llm.ts +++ b/js/llm.ts @@ -271,15 +271,10 @@ export function LLMClassifierFromTemplate({ if (runtimeArgs.trace && templateUsesThreadVariables(promptTemplate)) { const thread = await runtimeArgs.trace.getThread(); const computed = computeThreadTemplateVars(thread); - threadVars = { - thread: computed.thread, - thread_count: computed.thread_count, - first_message: computed.first_message, - last_message: computed.last_message, - user_messages: computed.user_messages, - assistant_messages: computed.assistant_messages, - human_ai_pairs: computed.human_ai_pairs, - }; + // Build threadVars from THREAD_VARIABLE_NAMES to keep in sync with the pattern + for (const name of THREAD_VARIABLE_NAMES) { + threadVars[name] = computed[name as keyof ThreadTemplateVars]; + } } const prompt = From 5e2b55fc80032304ae577dd46aff240cedafe367 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Sun, 18 Jan 2026 10:02:03 -0800 Subject: [PATCH 5/7] simplify heuristic --- js/llm.ts | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/js/llm.ts b/js/llm.ts index d88a0ec..5c325b1 100644 --- a/js/llm.ts +++ b/js/llm.ts @@ -32,20 +32,12 @@ const THREAD_VARIABLE_NAMES = [ "human_ai_pairs", ]; -// Regex pattern to match thread variable usage in templates -// Matches {{thread}}, {{thread.0}}, {{thread_count}}, {%...thread...%}, etc. -const THREAD_VARIABLE_PATTERN = new RegExp( - `\\{\\{\\s*(${THREAD_VARIABLE_NAMES.join("|")})(\\.[^}]*)?\\s*\\}\\}|` + // Mustache: {{thread}}, {{thread.0}} - `\\{%[^%]*\\b(${THREAD_VARIABLE_NAMES.join("|")})\\b[^%]*%\\}|` + // Jinja block: {% for m in thread %} - `\\{\\{[^}]*\\b(${THREAD_VARIABLE_NAMES.join("|")})\\b[^}]*\\}\\}`, // Jinja expr: {{ thread[0] }} - "i", -); - /** - * Check if a template string uses any thread-related template variables. + * Check if a template string might use thread-related template variables. + * This is a heuristic - if any variable name appears, we run the preprocessor. */ function templateUsesThreadVariables(template: string): boolean { - return THREAD_VARIABLE_PATTERN.test(template); + return THREAD_VARIABLE_NAMES.some((name) => template.includes(name)); } const NO_COT_SUFFIX = From f5496d07cfe76e98e30b2dd68899b249ae0c3bcd Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Sun, 18 Jan 2026 10:04:35 -0800 Subject: [PATCH 6/7] bump --- js/llm.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/js/llm.ts b/js/llm.ts index 5c325b1..48ee242 100644 --- a/js/llm.ts +++ b/js/llm.ts @@ -32,12 +32,17 @@ const THREAD_VARIABLE_NAMES = [ "human_ai_pairs", ]; +// Pattern to match thread variables in template syntax: {{thread, {{ thread, {%...thread, etc. +const THREAD_VARIABLE_PATTERN = new RegExp( + `\\{[\\{%]\\s*(${THREAD_VARIABLE_NAMES.join("|")})`, +); + /** * Check if a template string might use thread-related template variables. - * This is a heuristic - if any variable name appears, we run the preprocessor. + * This is a heuristic - looks for variable names after {{ or {% syntax. */ function templateUsesThreadVariables(template: string): boolean { - return THREAD_VARIABLE_NAMES.some((name) => template.includes(name)); + return THREAD_VARIABLE_PATTERN.test(template); } const NO_COT_SUFFIX = From 310c5cb21b59318291f964ddd58e98dba87f4ae7 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Sun, 18 Jan 2026 10:07:07 -0800 Subject: [PATCH 7/7] bump --- js/llm.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/js/llm.ts b/js/llm.ts index 48ee242..bc20874 100644 --- a/js/llm.ts +++ b/js/llm.ts @@ -22,7 +22,7 @@ export interface TraceForScorer { } // Thread-related template variable names that require preprocessor invocation -const THREAD_VARIABLE_NAMES = [ +export const THREAD_VARIABLE_NAMES = [ "thread", "thread_count", "first_message", @@ -33,7 +33,7 @@ const THREAD_VARIABLE_NAMES = [ ]; // Pattern to match thread variables in template syntax: {{thread, {{ thread, {%...thread, etc. -const THREAD_VARIABLE_PATTERN = new RegExp( +export const THREAD_VARIABLE_PATTERN = new RegExp( `\\{[\\{%]\\s*(${THREAD_VARIABLE_NAMES.join("|")})`, ); @@ -41,7 +41,7 @@ const THREAD_VARIABLE_PATTERN = new RegExp( * Check if a template string might use thread-related template variables. * This is a heuristic - looks for variable names after {{ or {% syntax. */ -function templateUsesThreadVariables(template: string): boolean { +export function templateUsesThreadVariables(template: string): boolean { return THREAD_VARIABLE_PATTERN.test(template); }