From 6e2e9abc4c9ba8a3dcb12bdf60830b57a310568b Mon Sep 17 00:00:00 2001 From: pelikhan <4175913+pelikhan@users.noreply.github.com> Date: Wed, 28 May 2025 00:24:48 +0000 Subject: [PATCH 001/641] [genai] generated blog posts --- ...and-custom-prompt-flows-in-genaiscript-.md | 224 ++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 docs/src/content/docs/blog/drafts/-from-idea-to-automation-interactive-user-input-and-custom-prompt-flows-in-genaiscript-.md diff --git a/docs/src/content/docs/blog/drafts/-from-idea-to-automation-interactive-user-input-and-custom-prompt-flows-in-genaiscript-.md b/docs/src/content/docs/blog/drafts/-from-idea-to-automation-interactive-user-input-and-custom-prompt-flows-in-genaiscript-.md new file mode 100644 index 0000000000..e9b7e7bcb0 --- /dev/null +++ b/docs/src/content/docs/blog/drafts/-from-idea-to-automation-interactive-user-input-and-custom-prompt-flows-in-genaiscript-.md @@ -0,0 +1,224 @@ +--- +title: '"From Idea to Automation: Interactive User Input and Custom Prompt Flows + in GenAIScript"' +date: 2025-05-28 +authors: genaiscript +tags: + - genaiscript + - user input + - automation + - prompt engineering + - scripting +draft: true + +--- + +# "From Idea to Automation: Interactive User Input and Custom Prompt Flows in GenAIScript" + +Giving automation scripts the ability to gather user input at runtime creates flexible, responsive, and delightful user experiences. In this guide, we’ll walk through a practical GenAIScript that demonstrates how to construct customizable prompt flows that adapt based on user choices. By the end, you’ll know how to write scripts that ask questions, branch logic, and generate AI prompts—all in response to your user's needs. + +Let’s break down the script line by line, highlighting the 'why' and 'how' for each step. 🚀 + +--- + +## Full Script Overview + +```javascript +// GenAIScript: Interactive User Input and Custom Prompt Flows +// This script demonstrates soliciting user input, branching, and dynamic prompt generation + +async function main() { + // Step 1: Greet the user and ask for their automation goal + const userGoal = await input.text({ + label: "What would you like to automate today?", + placeholder: "e.g., summarize a document, generate a report, refactor code..." + }); + + // Step 2: Offer a choice of automation type + const automationType = await input.choice({ + label: "Select the type of automation flow:", + choices: [ + { label: "Single-step (one prompt)", value: "single" }, + { label: "Multi-step (guided flow)", value: "multi" }, + { label: "Not sure (recommend for me)", value: "recommend" } + ] + }); + + let prompt; + let recommendation; + if (automationType === "recommend") { + // Step 3a: Recommend a flow based on the user's goal + prompt = `The user wants to automate: ${userGoal}.\nSuggest whether a single-step or multi-step automation is better and why.`; + recommendation = await ai.prompt(prompt); + output.print("Recommendation: " + recommendation.trim()); + } + + // Step 4: Adapt prompt flow based on user input + if (automationType === "single" || (automationType === "recommend" && recommendation && recommendation.includes("single"))) { + // Single-step: ask for details and run one prompt + const details = await input.text({ + label: "Provide any details or context for your automation:", + placeholder: "Paste relevant text or describe your input..." + }); + prompt = `Task: ${userGoal}\nDetails: ${details}\nPlease perform this automation in a single step.`; + const result = await ai.prompt(prompt); + output.print("Automation Result:\n" + result.trim()); + } else { + // Multi-step: guided flow example + output.print("Let's break down your automation into steps."); + const step1 = await input.text({ + label: "Step 1: Describe the first part of your automation:", + placeholder: "e.g., extract main ideas from text, select files, etc." + }); + const step2 = await input.text({ + label: "Step 2: Describe the next part:", + placeholder: "e.g., summarize, transform, analyze, etc." + }); + prompt = `Task: ${userGoal}\nStep 1: ${step1}\nStep 2: ${step2}\nGuide the user through this automation, providing output for each step.`; + const result = await ai.prompt(prompt); + output.print("Multi-step Automation Result:\n" + result.trim()); + } +} + +main(); +``` + +--- + +## Line-by-Line Walkthrough + +### 1. Script Entry Point + +```javascript +async function main() { +``` +Every GenAIScript script begins execution from a top-level function, often called `main`. Marking it `async` allows us to use `await` to handle user prompts and AI calls asynchronously. + +--- + +### 2. Ask for the User's Goal + +```javascript +const userGoal = await input.text({ + label: "What would you like to automate today?", + placeholder: "e.g., summarize a document, generate a report, refactor code..." +}); +``` +- `input.text`: Prompts for free-form text ([docs](https://microsoft.github.io/genaiscript/reference/apis/input/#text)). +- `label`: Appears as the prompt to guide the user. +- `placeholder`: A hint showing possible answers. + +This line invites the user to share their automation goal, letting the script adapt to unique workflows. + +--- + +### 3. Offer an Automation Type Choice + +```javascript +const automationType = await input.choice({ + label: "Select the type of automation flow:", + choices: [ + { label: "Single-step (one prompt)", value: "single" }, + { label: "Multi-step (guided flow)", value: "multi" }, + { label: "Not sure (recommend for me)", value: "recommend" } + ] +}); +``` +- `input.choice`: Lets users pick from a list ([docs](https://microsoft.github.io/genaiscript/reference/apis/input/#choice)). +- Each `choice` provides a `label` (user-facing) and a `value` (used in branching logic). + +This key interaction is your branch point: does the user want a quick automation, a guided multi-step process, or a recommendation? + +--- + +### 4. Handle the "Recommend" Path + +```javascript +let prompt; +let recommendation; +if (automationType === "recommend") { + prompt = `The user wants to automate: ${userGoal}.\nSuggest whether a single-step or multi-step automation is better and why.`; + recommendation = await ai.prompt(prompt); + output.print("Recommendation: " + recommendation.trim()); +} +``` +- If the user isn't sure, construct a prompt for the AI to suggest the best flow. +- `ai.prompt`: Send a dynamically constructed prompt to the AI model ([docs](https://microsoft.github.io/genaiscript/reference/apis/ai/#prompt)). +- `output.print`: Display AI suggestions to the user ([docs](https://microsoft.github.io/genaiscript/reference/apis/output/#print)). + +This approach harnesses AI to provide personalized guidance! + +--- + +### 5. Launch the Single-Step Flow + +```javascript +if (automationType === "single" || (automationType === "recommend" && recommendation && recommendation.includes("single"))) { + const details = await input.text({ + label: "Provide any details or context for your automation:", + placeholder: "Paste relevant text or describe your input..." + }); + prompt = `Task: ${userGoal}\nDetails: ${details}\nPlease perform this automation in a single step.`; + const result = await ai.prompt(prompt); + output.print("Automation Result:\n" + result.trim()); +} +``` +- If the user chose single-step, or the AI recommended it, gather more context. +- Generate a prompt tailored to this task and context. +- Submit to the AI and print the result. + +This shows how to create highly responsive, adaptive single-turn flows from user input. + +--- + +### 6. Run a Multi-Step, Guided Automation Flow + +```javascript +else { + output.print("Let's break down your automation into steps."); + const step1 = await input.text({ + label: "Step 1: Describe the first part of your automation:", + placeholder: "e.g., extract main ideas from text, select files, etc." + }); + const step2 = await input.text({ + label: "Step 2: Describe the next part:", + placeholder: "e.g., summarize, transform, analyze, etc." + }); + prompt = `Task: ${userGoal}\nStep 1: ${step1}\nStep 2: ${step2}\nGuide the user through this automation, providing output for each step.`; + const result = await ai.prompt(prompt); + output.print("Multi-step Automation Result:\n" + result.trim()); +} +``` +- For the multi-step path, guide the user through individual parts of their process, collecting input at each stage. +- Build a rich, structured prompt using these step descriptions. +- Ask the AI to process and guide through all provided steps. + +This section models how to break tasks down for the AI and generate a more nuanced automation flow. + +--- + +### 7. Script Execution + +```javascript +main(); +``` +- Trigger the `main` function to start; in GenAIScript, no need to import or run anything else! + +--- + +## Best Practices & Patterns + +- **Prompt Dynamically:** Use user responses to shape AI prompts (`Task: ${userGoal}`), making outcomes tailored and meaningful. +- **Use Choices for Branching:** Present choices for clear, guided flows (single vs. multi-step). +- **Leverage AI for Guidance:** When in doubt, let AI recommend the best way forward. +- **Keep Asking:** For complex automations, gather multiple pieces of info in sequence. + +For more details and API reference, see the [official GenAIScript documentation](https://microsoft.github.io/genaiscript/). + +--- + +## ✨ Summary + +Interactive input and custom prompt flows turn static scripts into powerful, user-responsive automation tools. By mixing `input.text`, `input.choice`, and dynamic AI calls, you can craft scripts that feel conversational, adapt to any scenario, and deliver rich automation experiences—entirely from the command line. + +Happy scripting! 💡🤖 \ No newline at end of file From 29c354744095d81c47739e93f7c0ade56e6f75a8 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 28 May 2025 13:40:01 +0000 Subject: [PATCH 002/641] extend provider resolution --- packages/core/src/promptcontext.ts | 10 ++++++++-- packages/core/src/types/prompt_template.d.ts | 3 +++ poem.txt | 1 - 3 files changed, 11 insertions(+), 3 deletions(-) delete mode 100644 poem.txt diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts index 81716e54bf..f58de327ca 100644 --- a/packages/core/src/promptcontext.ts +++ b/packages/core/src/promptcontext.ts @@ -302,12 +302,18 @@ export async function createPromptContext( hide: false, token: true, }) - if (provider.error) return undefined + if (provider.error) { + dbg(`Error resolving provider %s: %s`, id, provider.error) + return undefined + } return deleteUndefinedValues({ id: provider.provider, error: provider.error, models: provider.models || [], - }) satisfies LanguageModelProviderInfo + base: provider.base, + token: provider.token, + version: provider.version, + } satisfies LanguageModelProviderInfo) }, cache: async (name: string) => { const res = createCache(name, { type: "memory" }) diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index c00a4d709d..21932e83cb 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -6049,8 +6049,11 @@ interface LanguageModelInfo { interface LanguageModelProviderInfo { id: ModelProviderType + version?: string error?: string models: LanguageModelInfo[] + base?: string + token?: string // Optional token for the provider } interface LanguageModelHost { diff --git a/poem.txt b/poem.txt deleted file mode 100644 index 7b967f7bd4..0000000000 --- a/poem.txt +++ /dev/null @@ -1 +0,0 @@ -Sunlight dances through autumn leaves, painting shadows on morning frost. From 3eaf0379d92b9be9da37d9232e9f0b6bac3becc9 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 28 May 2025 08:10:16 -0700 Subject: [PATCH 003/641] mcp environment support (#1577) * support for env in mcp * genai: /docs [skip ci] * pr feedback * docs --------- Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../docs/reference/scripts/mcp-tools.mdx | 58 +++++++++++++------ .../content/docs/reference/scripts/system.mdx | 4 +- packages/cli/genaisrc/system.mcp.genai.mts | 4 +- packages/core/src/chat.ts | 13 ++++- packages/core/src/mcpclient.ts | 40 ++++++++++--- packages/core/src/runpromptcontext.ts | 30 +++++++++- packages/core/src/secretscanner.ts | 4 +- packages/sample/genaisrc/mcp-github.genai.mts | 23 ++++++++ 8 files changed, 137 insertions(+), 39 deletions(-) create mode 100644 packages/sample/genaisrc/mcp-github.genai.mts diff --git a/docs/src/content/docs/reference/scripts/mcp-tools.mdx b/docs/src/content/docs/reference/scripts/mcp-tools.mdx index 8eabfed98d..4e9d54d000 100644 --- a/docs/src/content/docs/reference/scripts/mcp-tools.mdx +++ b/docs/src/content/docs/reference/scripts/mcp-tools.mdx @@ -1,28 +1,28 @@ --- title: Model Context Protocol Tools sidebar: - order: 5.09 + order: 5.09 description: Learn how to configure and securely use Model Context Protocol - (MCP) tools and servers, including tool output validation, secret detection, - and security best practices for AI scripting. + (MCP) tools and servers, including tool output validation, secret detection, + and security best practices for AI scripting. keywords: - - Model Context Protocol - - MCP server configuration - - tool output validation - - AI tool security - - prompt injection detection + - Model Context Protocol + - MCP server configuration + - tool output validation + - AI tool security + - prompt injection detection hero: - image: - alt: A small, flat 8-bit style image displays two rectangular server blocks - joined by a straight line. Around each server, small colored rectangles - symbolize different tools. A shield, padlock, and magnifying glass icon - appear next to the servers, representing security, secret detection, and - validation respectively. A circular checkmark stands for signature - validation, and a tiny folder icon denotes files. The scene uses five - distinct, corporate-looking solid colors, with a simple, geometric, - minimalistic design and no background or text. - file: ./mcp-tools.png - + image: + alt: + A small, flat 8-bit style image displays two rectangular server blocks + joined by a straight line. Around each server, small colored rectangles + symbolize different tools. A shield, padlock, and magnifying glass icon + appear next to the servers, representing security, secret detection, and + validation respectively. A circular checkmark stands for signature + validation, and a tiny folder icon denotes files. The scene uses five + distinct, corporate-looking solid colors, with a simple, geometric, + minimalistic design and no background or text. + file: ./mcp-tools.png --- import { Image } from "astro:assets" @@ -123,6 +123,26 @@ script({ }) ``` +### Environment variables + +Setting the `env` field in the `mcpServers` or `mcpAgentServers` configuration +allows you to pass environment variables to the MCP server. Leave the value empty +and GenAIScript will automatically inject the environment variables from the current process. + +```js 'MY_ENV_VAR: ""' +script({ + mcpServers: { + memory: { + command: "npx", + args: ["-y", "@modelcontextprotocol/server-memory"], + env: { + MY_ENV_VAR: "", + }, + }, + }, +}) +``` + ### `defTool` You can use [defTool](/genaiscript/reference/scripts/tools) to declare a set of server configurations, diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx index f65f9227cd..8d92d7267e 100644 --- a/docs/src/content/docs/reference/scripts/system.mdx +++ b/docs/src/content/docs/reference/scripts/system.mdx @@ -2847,7 +2847,7 @@ export default function (ctx: ChatGenerationContext) { "system.mcp.detectPromptInjection" ] as ContentSafetyOptions["detectPromptInjection"] const intent = vars["system.mcp.intent"] - + const _env = vars["system.mcp.env"] as Record | undefined if (!id) throw new Error("Missing required parameter: id") if (!command) throw new Error("Missing required parameter: command") @@ -2859,13 +2859,13 @@ export default function (ctx: ChatGenerationContext) { contentSafety, detectPromptInjection, intent, + env: _env, } satisfies Omit const toolOptions = { maxTokens, contentSafety, detectPromptInjection, } satisfies DefToolOptions - dbg(`loading %s %O %O`, id, config, toolOptions) const configs = { [id]: config, } satisfies McpServersConfig diff --git a/packages/cli/genaisrc/system.mcp.genai.mts b/packages/cli/genaisrc/system.mcp.genai.mts index dd6e793039..5fbfa9ae9e 100644 --- a/packages/cli/genaisrc/system.mcp.genai.mts +++ b/packages/cli/genaisrc/system.mcp.genai.mts @@ -70,7 +70,7 @@ export default function (ctx: ChatGenerationContext) { "system.mcp.detectPromptInjection" ] as ContentSafetyOptions["detectPromptInjection"] const intent = vars["system.mcp.intent"] - + const _env = vars["system.mcp.env"] as Record | undefined if (!id) throw new Error("Missing required parameter: id") if (!command) throw new Error("Missing required parameter: command") @@ -82,13 +82,13 @@ export default function (ctx: ChatGenerationContext) { contentSafety, detectPromptInjection, intent, + env: _env, } satisfies Omit const toolOptions = { maxTokens, contentSafety, detectPromptInjection, } satisfies DefToolOptions - dbg(`loading %s %O %O`, id, config, toolOptions) const configs = { [id]: config, } satisfies McpServersConfig diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index ff180605ca..6e2a4839ed 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -230,6 +230,7 @@ async function runToolCalls( let edits: Edits[] = [] if (!options.fallbackTools) { + dbgt(`fallback: appending tool calls to assistant message`) messages.push({ role: "assistant", tool_calls: resp.toolCalls.map((c) => ({ @@ -249,6 +250,7 @@ async function runToolCalls( // call tool and run again for (const call of resp.toolCalls) { checkCancelled(cancellationToken) + dbgt(`running tool call %s`, call.name) const toolTrace = trace.startTraceDetails(`📠 tool call ${call.name}`) try { await runToolCall( @@ -291,6 +293,7 @@ async function runToolCall( let todos: { tool: ToolCallback; args: any }[] if (call.name === "multi_tool_use.parallel") { + dbgt(`multi tool call`) // special undocumented openai hallucination, argument contains multiple tool calls // { // "id": "call_D48fudXi4oBxQ2rNeHhpwIKh", @@ -341,9 +344,14 @@ async function runToolCall( for (const todo of todos) { const { tool, args } = todo const dbgtt = dbgt.extend(tool.spec.name) - dbgtt(`running %O`, args) const { maxTokens: maxToolContentTokens = MAX_TOOL_CONTENT_TOKENS } = tool.options || {} + dbgtt( + `running %s maxt %d\n%O`, + tool.spec.name, + maxToolContentTokens, + args + ) const context: ToolCallContext = { log: (message: string) => { logInfo(message) @@ -359,6 +367,7 @@ async function runToolCall( let output: ToolCallOutput try { output = await tool.impl({ context, ...args }) + dbgtt(`output: %O`, output) } catch (e) { dbgtt(e) logWarn(`tool: ${tool.spec.name} error`) @@ -367,7 +376,7 @@ async function runToolCall( output = errorMessage(e) } if (output === undefined || output === null) - throw new Error(`error: tool ${tool.spec.name} raised an error`) + output = "no output from tool" let toolContent: string = undefined let toolEdits: Edits[] = undefined if (typeof output === "string") { diff --git a/packages/core/src/mcpclient.ts b/packages/core/src/mcpclient.ts index aeb476d59e..d3bb55bc51 100644 --- a/packages/core/src/mcpclient.ts +++ b/packages/core/src/mcpclient.ts @@ -48,6 +48,18 @@ function toolResultContentToText(res: any) { return text } +function resolveMcpEnv(_env: Record) { + if (!_env) return _env + const res = structuredClone(_env) + Object.entries(res) + .filter(([k, v]) => v === "") + .forEach(([key, value]) => { + dbg(`filling env var: %s`, key) + res[key] = process.env[key] || "" + }) + return res +} + export class McpClientManager extends EventTarget implements AsyncDisposable { private _clients: McpClientProxy[] = [] constructor() { @@ -70,8 +82,10 @@ export class McpClientManager extends EventTarget implements AsyncDisposable { tools: _toolsConfig, generator, intent, + env: unresolvedEnv, ...rest } = serverConfig + const mcpEnv = resolveMcpEnv(unresolvedEnv) const toolSpecs = arrayify(_toolsConfig).map(toMcpToolSpecification) const commonToolOptions = deleteUndefinedValues({ contentSafety, @@ -81,7 +95,6 @@ export class McpClientManager extends EventTarget implements AsyncDisposable { // genaiscript:mcp:id const dbgc = dbg.extend(id) dbgc(`starting`) - dbgc(`intent: %O`, intent) const trace = options.trace.startTraceDetails(`🪚 mcp ${id}`) try { const { Client } = await import( @@ -93,12 +106,22 @@ export class McpClientManager extends EventTarget implements AsyncDisposable { const progress: (msg: string) => ProgressCallback = (msg) => (ev) => dbgc(msg + " ", `${ev.progress || ""}/${ev.total || ""}`) const capabilities = { tools: {} } - let transport = new StdioClientTransport({ - ...rest, - stderr: "inherit", - }) + dbgc( + `creating transport %O`, + deleteUndefinedValues({ + ...rest, + env: mcpEnv ? Object.keys(mcpEnv) : undefined, + }) + ) + let transport = new StdioClientTransport( + deleteUndefinedValues({ + ...rest, + env: mcpEnv, + stderr: "inherit", + }) + ) let client = new Client({ name: id, version }, { capabilities }) - dbg(`connecting client to transport`) + dbgc(`connecting stdio transport`) await client.connect(transport) const ping: McpClient["ping"] = async () => { @@ -199,7 +222,6 @@ export class McpClientManager extends EventTarget implements AsyncDisposable { ...commonToolOptions, ...(toolSpec || {}), } satisfies DefToolOptions - dbgc(`tool options %O`, toolOptions) return { spec: { name: `${id}_${name}`, @@ -209,11 +231,11 @@ export class McpClientManager extends EventTarget implements AsyncDisposable { options: toolOptions, generator, impl: async (args: any) => { - const { context, ...rest } = args + const { context, ...restArgs } = args const res = await client.callTool( { name: name, - arguments: rest, + arguments: restArgs, }, undefined, { diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts index 8f568a0152..40285ff6c3 100644 --- a/packages/core/src/runpromptcontext.ts +++ b/packages/core/src/runpromptcontext.ts @@ -1,5 +1,3 @@ -import debug from "debug" -const dbg = debug("genaiscript:prompt:context") // cspell: disable import { PromptNode, @@ -108,7 +106,33 @@ import { dotGenaiscriptPath } from "./workdir" import { prettyBytes } from "./pretty" import { createCache } from "./cache" import { measure } from "./performance" +import { genaiscriptDebug } from "./debug" +import debug from "debug" +const dbg = genaiscriptDebug("prompt:context") +/** + * Creates a chat turn generation context object for building prompt nodes and utilities in a chat session. + * + * @param options - Generation options that configure prompt and model behaviors. + * @param trace - Trace logger for output and debugging; collects logs and tracing information for the turn. + * @param cancellationToken - Token used for supporting cancellation of asynchronous operations within this context. + * + * @returns Chat turn generation context with a prompt node for composition and methods: + * - node: The root prompt node for this chat turn. + * - writeText: Adds a text (or assistant/system) message node, with optional configuration. + * - assistant: Shortcut for adding a message as assistant. + * - $: Tagged template for string templates. Returns a PromptTemplateString for further configuration (setting priority, jinja/mustache transforms, roles, caching, etc.). + * - def: Defines a named prompt artifact (text, file, etc.) in the prompt context. + * - defImages: Defines image input(s) as prompt nodes, supports tiling and various source types. + * - defData: Defines structured data input as a prompt node. + * - defDiff: Defines a diff between two items and appends as a prompt node. + * - fence: Wraps body in a code fence and defines as a prompt artifact. + * - importTemplate: Imports and expands a prompt template. + * - console: Logging interface for messages, warnings, errors, and debugging within the context. + * + * This context is generally used by higher-level orchestration to build structured prompt data, + * images, and system messages suitable for multi-turn chat generations. + */ export function createChatTurnGenerationContext( options: GenerationOptions, trace: MarkdownTrace, @@ -474,7 +498,7 @@ export function createChatGenerationContext( ) ) } else if (typeof name === "object") { - dbg(`mcp %O`, name) + dbg(`mcp: %o`, Object.keys(name)) for (const kv of Object.entries(name)) { const [id, def] = kv if ((def as McpServerConfig).command) { diff --git a/packages/core/src/secretscanner.ts b/packages/core/src/secretscanner.ts index 81340ea889..b30d0830b6 100644 --- a/packages/core/src/secretscanner.ts +++ b/packages/core/src/secretscanner.ts @@ -1,8 +1,8 @@ +import { genaiscriptDebug } from "./debug" import { runtimeHost } from "./host" import { TraceOptions } from "./trace" import { logWarn } from "./util" -import debug from "debug" -const dbg = debug("genaiscript:secrets") +const dbg = genaiscriptDebug("secrets") const cachedSecretScanners: Record = {} diff --git a/packages/sample/genaisrc/mcp-github.genai.mts b/packages/sample/genaisrc/mcp-github.genai.mts new file mode 100644 index 0000000000..a397ccd8e5 --- /dev/null +++ b/packages/sample/genaisrc/mcp-github.genai.mts @@ -0,0 +1,23 @@ +script({ + title: "Issue Report (MCP Force)", + description: "Force MCP GitHub tool calls for issue reporting", + systemSafety: true, + tools: ["mcp"], + mcpServers: { + github: { + command: "npx", + args: ["-y", "@modelcontextprotocol/server-github"], + env: { + HOME: "", + LOGNAME: "", + PATH: "", + SHELL: "", + TERM: "", + USER: "", + GITHUB_TOKEN: "", + }, + }, + }, +}) + +$`Find the latest issues in twitter bootstrap and summarize it, return it` From 608331ac2ec37e064cd9c07796190dc04171a7a4 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 28 May 2025 15:18:25 +0000 Subject: [PATCH 004/641] Release 1.140.1 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 5193ab2772..f9adafeada 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "genaiscript-workspace", "displayName": "GenAIScript", "description": "Programmatically assemble prompts for LLMs using JavaScript. Orchestrate LLMs, tools, and data in code.", - "version": "1.140.0", + "version": "1.140.1", "license": "MIT", "private": true, "author": "microsoft", From 991aefefda5e775098be6956aed78c1b827230ae Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 28 May 2025 15:18:46 +0000 Subject: [PATCH 005/641] [skip ci] updated version numbers --- docs/package.json | 2 +- packages/cli/package.json | 2 +- packages/core/package.json | 2 +- packages/modulesample/package.json | 2 +- packages/sample/package.json | 2 +- packages/vscode/package.json | 2 +- packages/web/package.json | 2 +- slides/package.json | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/package.json b/docs/package.json index ad20ae4e94..35d06e0f95 100644 --- a/docs/package.json +++ b/docs/package.json @@ -2,7 +2,7 @@ "name": "docs", "type": "module", "private": true, - "version": "1.140.0", + "version": "1.140.1", "license": "MIT", "scripts": { "install:force": "rm yarn.lock && yarn install", diff --git a/packages/cli/package.json b/packages/cli/package.json index 2fb28943c7..8625e7a392 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "genaiscript", - "version": "1.140.0", + "version": "1.140.1", "main": "built/genaiscript.cjs", "type": "commonjs", "bin": { diff --git a/packages/core/package.json b/packages/core/package.json index 33bb8289c1..c033cbf31d 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,6 +1,6 @@ { "name": "genaiscript-core-internal", - "version": "1.140.0", + "version": "1.140.1", "main": "src/index.ts", "license": "MIT", "private": true, diff --git a/packages/modulesample/package.json b/packages/modulesample/package.json index 9563f67e5d..1b8fe5d20c 100644 --- a/packages/modulesample/package.json +++ b/packages/modulesample/package.json @@ -6,5 +6,5 @@ "genai": "node ../cli/built/genaiscript.cjs run", "test": "yarn genai poem-js-module" }, - "version": "1.140.0" + "version": "1.140.1" } diff --git a/packages/sample/package.json b/packages/sample/package.json index 56e9d33ba4..ef2bd040e4 100644 --- a/packages/sample/package.json +++ b/packages/sample/package.json @@ -1,6 +1,6 @@ { "name": "genaiscript-sample", - "version": "1.140.0", + "version": "1.140.1", "license": "MIT", "private": true, "scripts": { diff --git a/packages/vscode/package.json b/packages/vscode/package.json index 55e3a79c0f..b4f4156bc3 100644 --- a/packages/vscode/package.json +++ b/packages/vscode/package.json @@ -7,7 +7,7 @@ }, "displayName": "GenAIScript Insiders", "description": "Generative AI Scripting.", - "version": "1.140.0", + "version": "1.140.1", "icon": "icon.png", "engines": { "vscode": "^1.98.0" diff --git a/packages/web/package.json b/packages/web/package.json index 3706f60907..00adedb94b 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -1,6 +1,6 @@ { "name": "genaiscript-web", - "version": "1.140.0", + "version": "1.140.1", "license": "MIT", "private": true, "scripts": { diff --git a/slides/package.json b/slides/package.json index b9c7b59459..1787845222 100644 --- a/slides/package.json +++ b/slides/package.json @@ -1,6 +1,6 @@ { "name": "genaiscript-slides", - "version": "1.140.0", + "version": "1.140.1", "type": "module", "private": true, "npm": { From b051d20a74e75a1a4554361c83e4df27c32cca21 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 28 May 2025 15:41:14 +0000 Subject: [PATCH 006/641] =?UTF-8?q?=E2=9C=A8=20feat:=20enhance=20model=20p?= =?UTF-8?q?rovider=20configurations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improved resolveLanguageModelProvider to support options like listModels. --- packages/core/src/config.ts | 1 + packages/core/src/promptcontext.ts | 8 ++++---- packages/core/src/types/prompt_template.d.ts | 8 +++++++- .../sample/genaisrc/gh-models-info.genai.mts | 6 +++++- .../sample/genaisrc/modelproviders.genai.mts | 20 +++++++++++++++++-- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts index 24358ecb0d..99218b1327 100644 --- a/packages/core/src/config.ts +++ b/packages/core/src/config.ts @@ -254,6 +254,7 @@ export async function resolveLanguageModelConfigurations( type: conn.type, models: conn.models, error: listError, + token: conn.token, }) ) } diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts index f58de327ca..e07b4fd78d 100644 --- a/packages/core/src/promptcontext.ts +++ b/packages/core/src/promptcontext.ts @@ -293,11 +293,11 @@ export async function createPromptContext( model: configuration?.model, } satisfies LanguageModelReference }, - resolveLanguageModelProvider: async (id) => { + resolveLanguageModelProvider: async (id, options) => { if (!id) throw new Error("provider id is required") const [provider] = await resolveLanguageModelConfigurations(id, { ...(options || {}), - models: true, + models: !!options?.listModels, error: false, hide: false, token: true, @@ -309,10 +309,10 @@ export async function createPromptContext( return deleteUndefinedValues({ id: provider.provider, error: provider.error, - models: provider.models || [], base: provider.base, - token: provider.token, version: provider.version, + token: options?.token ? provider.token : undefined, + models: options?.listModels ? provider.models || [] : undefined, } satisfies LanguageModelProviderInfo) }, cache: async (name: string) => { diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index 21932e83cb..31e35a6a75 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -6067,7 +6067,13 @@ interface LanguageModelHost { * Returns the status of the model provider and list of models if available */ resolveLanguageModelProvider( - provider: ModelProviderType + provider: ModelProviderType, + options?: { + // If true, returns the list of models available in the provider + listModels?: boolean + // If true, return the token + token?: boolean + } ): Promise } diff --git a/packages/sample/genaisrc/gh-models-info.genai.mts b/packages/sample/genaisrc/gh-models-info.genai.mts index 4ff9410b4e..4f459b1a09 100644 --- a/packages/sample/genaisrc/gh-models-info.genai.mts +++ b/packages/sample/genaisrc/gh-models-info.genai.mts @@ -54,9 +54,13 @@ |Phi\-3\-medium\-128k\-instruct|32000| */ +script({ model: "echo" }) const { output } = env -const gh = await host.resolveLanguageModelProvider("github") +const gh = await host.resolveLanguageModelProvider("github", { + listModels: true, +}) +output.fence(gh, "yaml") const sizes: { model: string; size: number }[] = [] const length = 64000 const text = "😊".repeat(length) diff --git a/packages/sample/genaisrc/modelproviders.genai.mts b/packages/sample/genaisrc/modelproviders.genai.mts index 77e1814eaf..096489ef9d 100644 --- a/packages/sample/genaisrc/modelproviders.genai.mts +++ b/packages/sample/genaisrc/modelproviders.genai.mts @@ -1,6 +1,22 @@ script({ model: "echo", group: "commit", tests: {} }) -const gh = await host.resolveLanguageModelProvider("github_copilot_chat") +const az = await host.resolveLanguageModelProvider("azure", { + listModels: true, + token: true, +}) +console.log({ az }) +const gh = await host.resolveLanguageModelProvider("github", { + listModels: false, + token: true, +}) console.log({ gh }) -const oai = await host.resolveLanguageModelProvider("openai") +const ghc = await host.resolveLanguageModelProvider("github_copilot_chat", { + listModels: true, + token: true, +}) +console.log({ ghc }) +const oai = await host.resolveLanguageModelProvider("openai", { + listModels: true, + token: true, +}) console.log({ oai }) From 33f75da005b03665780dfdf5b2d6bb378ee6220d Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 28 May 2025 15:48:07 +0000 Subject: [PATCH 007/641] Release 1.140.2 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index f9adafeada..7e4e074f3d 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "genaiscript-workspace", "displayName": "GenAIScript", "description": "Programmatically assemble prompts for LLMs using JavaScript. Orchestrate LLMs, tools, and data in code.", - "version": "1.140.1", + "version": "1.140.2", "license": "MIT", "private": true, "author": "microsoft", From f25cd09d47c94945d026b49ba97c9b3d0e7ff30a Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 28 May 2025 15:48:28 +0000 Subject: [PATCH 008/641] [skip ci] updated version numbers --- docs/package.json | 2 +- packages/cli/package.json | 2 +- packages/core/package.json | 2 +- packages/modulesample/package.json | 2 +- packages/sample/package.json | 2 +- packages/vscode/package.json | 2 +- packages/web/package.json | 2 +- slides/package.json | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/package.json b/docs/package.json index 35d06e0f95..20e6080d22 100644 --- a/docs/package.json +++ b/docs/package.json @@ -2,7 +2,7 @@ "name": "docs", "type": "module", "private": true, - "version": "1.140.1", + "version": "1.140.2", "license": "MIT", "scripts": { "install:force": "rm yarn.lock && yarn install", diff --git a/packages/cli/package.json b/packages/cli/package.json index 8625e7a392..e6d3e3c089 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "genaiscript", - "version": "1.140.1", + "version": "1.140.2", "main": "built/genaiscript.cjs", "type": "commonjs", "bin": { diff --git a/packages/core/package.json b/packages/core/package.json index c033cbf31d..bcbb67ed8c 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,6 +1,6 @@ { "name": "genaiscript-core-internal", - "version": "1.140.1", + "version": "1.140.2", "main": "src/index.ts", "license": "MIT", "private": true, diff --git a/packages/modulesample/package.json b/packages/modulesample/package.json index 1b8fe5d20c..d85beece12 100644 --- a/packages/modulesample/package.json +++ b/packages/modulesample/package.json @@ -6,5 +6,5 @@ "genai": "node ../cli/built/genaiscript.cjs run", "test": "yarn genai poem-js-module" }, - "version": "1.140.1" + "version": "1.140.2" } diff --git a/packages/sample/package.json b/packages/sample/package.json index ef2bd040e4..8c6c2213d0 100644 --- a/packages/sample/package.json +++ b/packages/sample/package.json @@ -1,6 +1,6 @@ { "name": "genaiscript-sample", - "version": "1.140.1", + "version": "1.140.2", "license": "MIT", "private": true, "scripts": { diff --git a/packages/vscode/package.json b/packages/vscode/package.json index b4f4156bc3..59b1363240 100644 --- a/packages/vscode/package.json +++ b/packages/vscode/package.json @@ -7,7 +7,7 @@ }, "displayName": "GenAIScript Insiders", "description": "Generative AI Scripting.", - "version": "1.140.1", + "version": "1.140.2", "icon": "icon.png", "engines": { "vscode": "^1.98.0" diff --git a/packages/web/package.json b/packages/web/package.json index 00adedb94b..2c8f482075 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -1,6 +1,6 @@ { "name": "genaiscript-web", - "version": "1.140.1", + "version": "1.140.2", "license": "MIT", "private": true, "scripts": { diff --git a/slides/package.json b/slides/package.json index 1787845222..e7dd5c948b 100644 --- a/slides/package.json +++ b/slides/package.json @@ -1,6 +1,6 @@ { "name": "genaiscript-slides", - "version": "1.140.1", + "version": "1.140.2", "type": "module", "private": true, "npm": { From f65d8962d06647f41db4861557da66c54d703ee4 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Thu, 29 May 2025 08:01:33 -0700 Subject: [PATCH 009/641] Action generator (#1579) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * skeleton * format * action.yaml generation * tweaking script * generate action files * generate files * updating dockerfile * readme * permissions * updated folder * updated readme * generate .gitignore * ✨: Add support for FFmpeg and Playwright in CLI actions Enhanced CLI to include options for FFmpeg and Playwright usage. * :sparkles: Add branding support for GitHub Actions Introduced branding options with color and icon for scripts. * add cli test * ✨ chore: improve action configuration and token consistency Refined default handling, playwright script, and token ordering. * add custom action test * add package lock option * title * typo in docker * ✨: Add read permission for contents in workflow - Included a new `contents: read` permission in the YAML file. * ✨ feat: enhance browser config and Docker customization Added support for multi-browser install, Docker image config changes. * update default image * update playwright option * more options * refresh deps * ✨: Add GitHub Action support to CLI and improve outputs Introduced a new --github-action flag and enhanced output options. * more docs --- .github/workflows/custom-action.yml | 20 ++ .vscode/settings.json | 2 + THIRD_PARTY_LICENSES.md | 131 +++++++- docs/package.json | 2 +- .../content/docs/reference/cli/commands.md | 48 ++- docs/yarn.lock | 42 +-- examples/action/.gitignore | 5 + examples/action/Dockerfile | 18 + examples/action/README.md | 87 +++++ examples/action/action.yml | 20 ++ examples/action/genaisrc/.gitignore | 3 + .../action/genaisrc/action-poem.genai.mts | 10 + examples/action/package.json | 18 + package.json | 4 +- packages/cli/package.json | 11 +- packages/cli/src/action.ts | 314 ++++++++++++++++++ packages/cli/src/cli.ts | 60 ++-- packages/cli/src/githubaction.ts | 7 + packages/cli/src/run.ts | 8 +- packages/core/package.json | 4 +- packages/core/src/constants.ts | 4 +- packages/core/src/types/prompt_template.d.ts | 288 ++++++++++++++++ packages/sample/.gitignore | 1 + packages/sample/genaisrc/foobar.genai.mts | 1 + packages/sample/package.json | 3 +- packages/sample/src/cli.test.ts | 10 + slides/yarn.lock | 184 +++++----- yarn.lock | 151 ++++++--- 28 files changed, 1245 insertions(+), 211 deletions(-) create mode 100644 .github/workflows/custom-action.yml create mode 100644 examples/action/.gitignore create mode 100644 examples/action/Dockerfile create mode 100644 examples/action/README.md create mode 100644 examples/action/action.yml create mode 100644 examples/action/genaisrc/.gitignore create mode 100644 examples/action/genaisrc/action-poem.genai.mts create mode 100644 examples/action/package.json create mode 100644 packages/cli/src/action.ts create mode 100644 packages/cli/src/githubaction.ts create mode 100644 packages/sample/genaisrc/foobar.genai.mts diff --git a/.github/workflows/custom-action.yml b/.github/workflows/custom-action.yml new file mode 100644 index 0000000000..e16633b40b --- /dev/null +++ b/.github/workflows/custom-action.yml @@ -0,0 +1,20 @@ +name: "Custom Action Example" +on: + workflow_dispatch: + push: +permissions: + contents: read + models: read +concurrency: + group: iat-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true +jobs: + custom_action: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: ./examples/action + id: genai + with: + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.vscode/settings.json b/.vscode/settings.json index 5f5713031c..d34a1ced22 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -10,6 +10,7 @@ "ANYJS", "ANYTS", "Apim", + "apks", "argk", "argkv", "arrayify", @@ -110,6 +111,7 @@ "ghinfo", "gistfs", "gistpad", + "githubaction", "githubclient", "gitmoji", "gitmojis", diff --git a/THIRD_PARTY_LICENSES.md b/THIRD_PARTY_LICENSES.md index 0504ec678c..965a3fb2b4 100644 --- a/THIRD_PARTY_LICENSES.md +++ b/THIRD_PARTY_LICENSES.md @@ -3112,7 +3112,7 @@ Apache License The following npm package may be included in this product: - - mathjs@14.5.0 + - mathjs@14.5.1 This package contains the following license: @@ -3351,8 +3351,8 @@ The following npm packages may be included in this product: - @types/http-cache-semantics@4.0.4 - @types/node-fetch@2.6.12 - @types/node@16.9.1 - - @types/node@18.19.103 - - @types/node@22.15.21 + - @types/node@18.19.105 + - @types/node@22.15.24 - @types/sarif@2.1.7 - @types/trusted-types@2.0.7 - @types/turndown@5.0.5 @@ -3387,7 +3387,7 @@ MIT License The following npm package may be included in this product: - - genaiscript-vscode@1.139.0 + - genaiscript-vscode@1.140.2 This package contains the following license: @@ -4953,6 +4953,36 @@ The above copyright notice and this permission notice shall be included in all c ----------- +The following npm package may be included in this product: + + - @actions/http-client@2.2.3 + +This package contains the following license: + +Actions Http Client for Node.js + +Copyright (c) GitHub, Inc. + +All rights reserved. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +----------- + The following npm package may be included in this product: - package-json-from-dist@1.0.1 @@ -7090,7 +7120,7 @@ The following npm packages may be included in this product: - duck@0.1.12 - lop@0.4.2 - - mammoth@1.9.0 + - mammoth@1.9.1 - option@0.2.4 These packages each contain the following license: @@ -8935,6 +8965,34 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI ----------- +The following npm package may be included in this product: + + - @fastify/busboy@2.1.1 + +This package contains the following license: + +Copyright Brian White. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. + +----------- + The following npm package may be included in this product: - yaml@2.8.0 @@ -10621,10 +10679,10 @@ The following npm packages may be included in this product: - abstract-logging@2.0.1 - data-uri-to-buffer@4.0.1 - eastasianwidth@0.2.0 - - genaiscript-core-internal@1.139.0 - - genaiscript-sample@1.139.0 - - genaiscript-web@1.139.0 - - genaiscript@1.139.0 + - genaiscript-core-internal@1.140.2 + - genaiscript-sample@1.140.2 + - genaiscript-web@1.140.2 + - genaiscript@1.140.2 - isarray@1.0.0 - javascript-natural-sort@0.7.1 - keyv@4.5.4 @@ -13143,7 +13201,7 @@ SOFTWARE. The following npm package may be included in this product: - - zod@3.25.30 + - zod@3.25.36 This package contains the following license: @@ -13473,6 +13531,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI The following npm packages may be included in this product: - undici-types@6.21.0 + - undici@5.29.0 - undici@6.21.3 These packages each contain the following license: @@ -15607,6 +15666,36 @@ THE SOFTWARE. ----------- +The following npm package may be included in this product: + + - tunnel@0.0.6 + +This package contains the following license: + +The MIT License (MIT) + +Copyright (c) 2012 Koichi Kobayashi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +----------- + The following npm package may be included in this product: - xmlbuilder@10.1.1 @@ -17367,6 +17456,26 @@ THE SOFTWARE. ----------- +The following npm packages may be included in this product: + + - @actions/core@1.11.1 + - @actions/exec@1.1.1 + - @actions/io@1.1.3 + +These packages each contain the following license: + +The MIT License (MIT) + +Copyright 2019 GitHub + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +----------- + The following npm packages may be included in this product: - @csstools/css-calc@2.1.4 @@ -17708,7 +17817,7 @@ For more information, please refer to The following npm package may be included in this product: - - protobufjs@7.4.0 + - protobufjs@7.5.3 This package contains the following license: diff --git a/docs/package.json b/docs/package.json index 20e6080d22..04b15be810 100644 --- a/docs/package.json +++ b/docs/package.json @@ -23,7 +23,7 @@ "devDependencies": { "@astrojs/check": "^0.9.4", "@astrojs/starlight": "^0.34.3", - "astro": "^5.8.0", + "astro": "^5.8.1", "astro-embed": "^0.9.0", "rehype-mermaid": "^3.0.0", "starlight-blog": "^0.23.2", diff --git a/docs/src/content/docs/reference/cli/commands.md b/docs/src/content/docs/reference/cli/commands.md index 6edd16efe7..349c0e4f78 100644 --- a/docs/src/content/docs/reference/cli/commands.md +++ b/docs/src/content/docs/reference/cli/commands.md @@ -83,6 +83,7 @@ Options: -rr, --run-retry number of retries for the entire run --no-run-trace disable automatic trace generation --no-output-trace disable automatic output generation + --github-action run as GitHub Action -h, --help display help for command ``` @@ -464,7 +465,7 @@ Options: ## `retrieval` ``` -Usage: genaiscript retrieval|retreival [options] [command] +Usage: genaiscript retrieval [options] [command] RAG support @@ -572,8 +573,9 @@ Usage: genaiscript mcp|mcps [options] Starts a Model Context Protocol server that exposes scripts as tools Options: - --groups Filter script by groups --ids Filter script by ids + -g, --groups groups to include or exclude. Use :! prefix + to exclude --startup Startup script id, executed after the server is started --remote Remote repository URL to serve @@ -615,7 +617,6 @@ Options: -c, --cors Enable CORS and sets the allowed origin. Use '*' to allow any origin. --route Route prefix, like /api - --groups Filter script by groups --ids Filter script by ids --startup Startup script id, executed after the server is started @@ -640,9 +641,50 @@ Options: -ma, --model-alias model alias as name=modelid -re, --reasoning-effort Reasoning effort for o* models (choices: "high", "medium", "low") + -g, --groups groups to include or exclude. Use :! prefix + to exclude -h, --help display help for command ``` +## `action` + +``` +Usage: genaiscript action [options] [command] + +GitHub Actions related command + +Options: + -h, --help display help for command + +Commands: + configure [options] - ` - - const filePath = join(__dirname, "index.html") - const html = ( - await readFile(filePath, { encoding: "utf8" }) - ).replace("", csp) - res.write(html) - res.statusCode = 200 - res.end() - } else if (method === "GET" && route === "/built/markdown.css") { - res.setHeader("Content-Type", "text/css") - res.statusCode = 200 - const filePath = join(__dirname, "markdown.css") - const stream = createReadStream(filePath) - stream.pipe(res) - } else if (method === "GET" && route === "/built/codicon.css") { - res.setHeader("Content-Type", "text/css") - res.statusCode = 200 - const filePath = join(__dirname, "codicon.css") - const stream = createReadStream(filePath) - stream.pipe(res) - } else if (method === "GET" && route === "/built/codicon.ttf") { - res.setHeader("Content-Type", "font/ttf") - res.statusCode = 200 - const filePath = join(__dirname, "codicon.ttf") - const stream = createReadStream(filePath) - stream.pipe(res) - } else if (method === "GET" && route === "/built/web.mjs") { - res.setHeader("Content-Type", "application/javascript") - res.statusCode = 200 - const filePath = join(__dirname, "web.mjs") - const stream = createReadStream(filePath) - stream.pipe(res) - } else if (method === "GET" && route === "/built/web.mjs.map") { - const filePath = join(__dirname, "web.mjs.map") - if (await exists(filePath)) { - res.setHeader("Content-Type", "text/json") - res.statusCode = 200 - const stream = createReadStream(filePath) - stream.pipe(res) - } else { - res.statusCode = 404 - res.end() - } - } else if (method === "GET" && route === "/favicon.svg") { - res.setHeader("Content-Type", "image/svg+xml") - res.statusCode = 200 - const filePath = join(__dirname, "favicon.svg") - const stream = createReadStream(filePath) - stream.pipe(res) - } else if (method === "GET" && imageRx.test(route)) { - const filePath = join(process.cwd(), route) - try { - const stream = createReadStream(filePath) - res.setHeader("Content-Type", "image/" + extname(route)) - res.statusCode = 200 - stream.pipe(res) - } catch (e) { - res.statusCode = 404 - res.end() - } - } else { - // api, validate apikey - if (!checkApiKey(req)) { - console.debug(`401: missing or invalid api-key`) - res.statusCode = 401 - res.end() - return - } - let response: ResponseStatus - if (method === "GET" && route === "/api/version") - response = serverVersion() - else if (method === "GET" && route === "/api/scripts") { - response = await scriptList() - } else if (method === "GET" && route === "/api/env") { - response = await serverEnv() - } else if (method === "GET" && route === "/api/runs") { - const runs = await collectRuns() - response = { - ok: true, - runs: runs.map( - ({ scriptId, runId, creationTme: creationTime }) => ({ - scriptId, - runId, - creationTime, - }) - ), - } - } else if (method === "POST" && route === "/v1/chat/completions") { - await openaiApiChatCompletions(req, res) - return - } else if (method === "GET" && route === "/v1/models") { - await openaiApiModels(req, res) - return - } else if (method === "GET" && runRx.test(route)) { - const { runId } = runRx.exec(route).groups - logVerbose(`run: get ${runId}`) - // shortcut to last run - if (runId === lastRunResult?.runId) - response = { - ok: true, - ...lastRunResult, - } - else { - const runs = await collectRuns() - const run = runs.find((r) => r.runId === runId) - if (run) { - const runResult = - (await tryReadJSON(join(run.dir, "res.json"))) || {} - const runTrace = - (await tryReadText( - join(run.dir, TRACE_FILENAME) - )) || "" - response = ({ - ok: true, - type: "script.end", - runId, - exitCode: runResult.exitCode, - result: runResult, - trace: runTrace, - }) as any - } - } - } - - if (response === undefined) { - console.debug(`404: ${method} ${url}`) - res.statusCode = 404 - res.end() - } else { - res.statusCode = 200 - res.setHeader("Content-Type", "application/json") - res.end(JSON.stringify(response)) - } + `; + + const filePath = join(__dirname, "index.html"); + const html = (await readFile(filePath, { encoding: "utf8" })).replace("", csp); + res.write(html); + res.statusCode = 200; + res.end(); + } else if (method === "GET" && route === "/built/markdown.css") { + res.setHeader("Content-Type", "text/css"); + res.statusCode = 200; + const filePath = join(__dirname, "markdown.css"); + const stream = createReadStream(filePath); + stream.pipe(res); + } else if (method === "GET" && route === "/built/codicon.css") { + res.setHeader("Content-Type", "text/css"); + res.statusCode = 200; + const filePath = join(__dirname, "codicon.css"); + const stream = createReadStream(filePath); + stream.pipe(res); + } else if (method === "GET" && route === "/built/codicon.ttf") { + res.setHeader("Content-Type", "font/ttf"); + res.statusCode = 200; + const filePath = join(__dirname, "codicon.ttf"); + const stream = createReadStream(filePath); + stream.pipe(res); + } else if (method === "GET" && route === "/built/web.mjs") { + res.setHeader("Content-Type", "application/javascript"); + res.statusCode = 200; + const filePath = join(__dirname, "web.mjs"); + const stream = createReadStream(filePath); + stream.pipe(res); + } else if (method === "GET" && route === "/built/web.mjs.map") { + const filePath = join(__dirname, "web.mjs.map"); + if (await exists(filePath)) { + res.setHeader("Content-Type", "text/json"); + res.statusCode = 200; + const stream = createReadStream(filePath); + stream.pipe(res); + } else { + res.statusCode = 404; + res.end(); + } + } else if (method === "GET" && route === "/favicon.svg") { + res.setHeader("Content-Type", "image/svg+xml"); + res.statusCode = 200; + const filePath = join(__dirname, "favicon.svg"); + const stream = createReadStream(filePath); + stream.pipe(res); + } else if (method === "GET" && imageRx.test(route)) { + const filePath = join(process.cwd(), route); + try { + const stream = createReadStream(filePath); + res.setHeader("Content-Type", "image/" + extname(route)); + res.statusCode = 200; + stream.pipe(res); + } catch (e) { + res.statusCode = 404; + res.end(); + } + } else { + // api, validate apikey + if (!checkApiKey(req)) { + console.debug(`401: missing or invalid api-key`); + res.statusCode = 401; + res.end(); + return; + } + let response: ResponseStatus; + if (method === "GET" && route === "/api/version") response = serverVersion(); + else if (method === "GET" && route === "/api/scripts") { + response = await scriptList(); + } else if (method === "GET" && route === "/api/env") { + response = await serverEnv(); + } else if (method === "GET" && route === "/api/runs") { + const runs = await collectRuns(); + response = { + ok: true, + runs: runs.map(({ scriptId, runId, creationTme: creationTime }) => ({ + scriptId, + runId, + creationTime, + })), + }; + } else if (method === "POST" && route === "/v1/chat/completions") { + await openaiApiChatCompletions(req, res); + return; + } else if (method === "GET" && route === "/v1/models") { + await openaiApiModels(req, res); + return; + } else if (method === "GET" && runRx.test(route)) { + const { runId } = runRx.exec(route).groups; + logVerbose(`run: get ${runId}`); + // shortcut to last run + if (runId === lastRunResult?.runId) + response = { + ok: true, + ...lastRunResult, + }; + else { + const runs = await collectRuns(); + const run = runs.find((r) => r.runId === runId); + if (run) { + const runResult = (await tryReadJSON(join(run.dir, "res.json"))) || {}; + const runTrace = (await tryReadText(join(run.dir, TRACE_FILENAME))) || ""; + response = ({ + ok: true, + type: "script.end", + runId, + exitCode: runResult.exitCode, + result: runResult, + trace: runTrace, + }) as any; + } } - }) - // Upgrade HTTP server to handle WebSocket connections on the /wss route. - httpServer.on("upgrade", (req, socket, head) => { - const pathname = new URL(req.url, `http://${req.headers.host}`).pathname - if (pathname === "/" && checkApiKey(req)) { - wss.handleUpgrade(req, socket, head, (ws) => { - wss.emit("connection", ws, req) - }) - } else socket.destroy() - }) - // Start the HTTP server on the specified port. - const serverHash = apiKey ? `#api-key:${encodeURIComponent(apiKey)}` : "" - httpServer.listen(port, serverHost, () => { - console.log(`GenAIScript server v${CORE_VERSION}`) - if (remote) - console.log( - `│ Remote: ${remote}${options.remoteBranch ? `#${options.remoteBranch}` : ""}` - ) - console.log(`│ Local http://${serverHost}:${port}/${serverHash}`) - if (options.network) { - console.log(`│ Host http://localhost:${port}/${serverHash}`) - const interfaces = networkInterfaces() - for (const ifaces of Object.values(interfaces)) { - for (const iface of ifaces) { - if (iface.family === "IPv4" && !iface.internal) { - console.log( - `│ Network http://${iface.address}:${port}/${serverHash}` - ) - } - } - } + } + + if (response === undefined) { + console.debug(`404: ${method} ${url}`); + res.statusCode = 404; + res.end(); + } else { + res.statusCode = 200; + res.setHeader("Content-Type", "application/json"); + res.end(JSON.stringify(response)); + } + } + }); + // Upgrade HTTP server to handle WebSocket connections on the /wss route. + httpServer.on("upgrade", (req, socket, head) => { + const pathname = new URL(req.url, `http://${req.headers.host}`).pathname; + if (pathname === "/" && checkApiKey(req)) { + wss.handleUpgrade(req, socket, head, (ws) => { + wss.emit("connection", ws, req); + }); + } else socket.destroy(); + }); + // Start the HTTP server on the specified port. + const serverHash = apiKey ? `#api-key:${encodeURIComponent(apiKey)}` : ""; + httpServer.listen(port, serverHost, () => { + console.log(`GenAIScript server v${CORE_VERSION}`); + if (remote) + console.log(`│ Remote: ${remote}${options.remoteBranch ? `#${options.remoteBranch}` : ""}`); + console.log(`│ Local http://${serverHost}:${port}/${serverHash}`); + if (options.network) { + console.log(`│ Host http://localhost:${port}/${serverHash}`); + const interfaces = networkInterfaces(); + for (const ifaces of Object.values(interfaces)) { + for (const iface of ifaces) { + if (iface.family === "IPv4" && !iface.internal) { + console.log(`│ Network http://${iface.address}:${port}/${serverHash}`); + } } - }) + } + } + }); } diff --git a/packages/cli/src/stdin.ts b/packages/cli/src/stdin.ts index b3b63f7d4b..e236756678 100644 --- a/packages/cli/src/stdin.ts +++ b/packages/cli/src/stdin.ts @@ -1,48 +1,48 @@ -import { toBase64 } from "../../core/src/base64" -import { isBinaryMimeType } from "../../core/src/binary" -import { deleteUndefinedValues, isEmptyString } from "../../core/src/cleaners" -import { fileTypeFromBuffer } from "../../core/src/filetype" -import { logVerbose } from "../../core/src/util" -import { STDIN_READ_TIMEOUT } from "../../core/src/constants" -import { prettyBytes } from "../../core/src/pretty" +import { toBase64 } from "../../core/src/base64"; +import { isBinaryMimeType } from "../../core/src/binary"; +import { deleteUndefinedValues, isEmptyString } from "../../core/src/cleaners"; +import { fileTypeFromBuffer } from "../../core/src/filetype"; +import { logVerbose } from "../../core/src/util"; +import { STDIN_READ_TIMEOUT } from "../../core/src/constants"; +import { prettyBytes } from "../../core/src/pretty"; function readStdinOrTimeout(): Promise { - return new Promise((resolve, reject) => { - let res: Buffer[] = [] - const { stdin } = process - if (!stdin || stdin.isTTY) { - resolve(undefined) - return - } + return new Promise((resolve, reject) => { + let res: Buffer[] = []; + const { stdin } = process; + if (!stdin || stdin.isTTY) { + resolve(undefined); + return; + } - const controller = new AbortController() - const timeoutId = setTimeout(() => { - controller.abort() - resolve(undefined) // Resolve without data when timed out - }, STDIN_READ_TIMEOUT) + const controller = new AbortController(); + const timeoutId = setTimeout(() => { + controller.abort(); + resolve(undefined); // Resolve without data when timed out + }, STDIN_READ_TIMEOUT); - const dataHandler = (data: Buffer) => { - clearTimeout(timeoutId) - res.push(data) - } + const dataHandler = (data: Buffer) => { + clearTimeout(timeoutId); + res.push(data); + }; - const errorHandler = (err: Error) => { - clearTimeout(timeoutId) - reject(err) - } + const errorHandler = (err: Error) => { + clearTimeout(timeoutId); + reject(err); + }; - stdin.on("data", dataHandler) - stdin.once("error", errorHandler) - stdin.once("end", () => { - clearTimeout(timeoutId) - resolve(Buffer.concat(res)) - }) + stdin.on("data", dataHandler); + stdin.once("error", errorHandler); + stdin.once("end", () => { + clearTimeout(timeoutId); + resolve(Buffer.concat(res)); + }); - if (controller.signal.aborted) { - stdin.removeListener("data", dataHandler) - stdin.removeListener("error", errorHandler) - } - }) + if (controller.signal.aborted) { + stdin.removeListener("data", dataHandler); + stdin.removeListener("error", errorHandler); + } + }); } /** @@ -54,25 +54,25 @@ function readStdinOrTimeout(): Promise { * @returns A `WorkspaceFile` object containing the parsed input data, or undefined if there is no data or if a timeout occurs. */ export async function readStdIn(): Promise { - const data = await readStdinOrTimeout() - if (!data?.length) return undefined + const data = await readStdinOrTimeout(); + if (!data?.length) return undefined; - let mime = await fileTypeFromBuffer(data) - const res = isBinaryMimeType(mime?.mime) - ? ({ - filename: `stdin.${mime?.ext || "bin"}`, - content: toBase64(data), - encoding: "base64", - size: data.length, - type: mime?.mime, - } satisfies WorkspaceFile) - : ({ - filename: `stdin.${mime?.ext || "md"}`, - content: data.toString("utf-8"), - size: data.length, - type: mime?.mime, - } satisfies WorkspaceFile) + let mime = await fileTypeFromBuffer(data); + const res = isBinaryMimeType(mime?.mime) + ? ({ + filename: `stdin.${mime?.ext || "bin"}`, + content: toBase64(data), + encoding: "base64", + size: data.length, + type: mime?.mime, + } satisfies WorkspaceFile) + : ({ + filename: `stdin.${mime?.ext || "md"}`, + content: data.toString("utf-8"), + size: data.length, + type: mime?.mime, + } satisfies WorkspaceFile); - logVerbose(`stdin: ${res.filename} (${prettyBytes(res.size)})`) - return deleteUndefinedValues(res) + logVerbose(`stdin: ${res.filename} (${prettyBytes(res.size)})`); + return deleteUndefinedValues(res); } diff --git a/packages/cli/src/test.ts b/packages/cli/src/test.ts index fc1b40423f..d6d27ea6ac 100644 --- a/packages/cli/src/test.ts +++ b/packages/cli/src/test.ts @@ -2,56 +2,53 @@ // listing, and viewing results. It handles configuration setup, execution logic, // and result processing. -import { buildProject } from "./build" -import { readFile, writeFile, appendFile } from "node:fs/promises" -import { execa } from "execa" -import { dirname, join, resolve } from "node:path" -import { emptyDir, exists } from "fs-extra" -import { PROMPTFOO_VERSION } from "./version" +import { buildProject } from "./build"; +import { readFile, writeFile, appendFile } from "node:fs/promises"; +import { execa } from "execa"; +import { dirname, join, resolve } from "node:path"; +import { emptyDir, exists } from "fs-extra"; +import { PROMPTFOO_VERSION } from "./version"; import { - PROMPTFOO_CACHE_PATH, - PROMPTFOO_CONFIG_DIR, - FILES_NOT_FOUND_ERROR_CODE, - GENAISCRIPT_FOLDER, - GENAI_ANY_REGEX, - EMOJI_SUCCESS, - EMOJI_FAIL, - TEST_RUNS_DIR_NAME, - PROMPTFOO_REMOTE_API_PORT, -} from "../../core/src/constants" -import { promptFooDriver } from "../../core/src/default_prompts" -import { serializeError } from "../../core/src/error" -import { runtimeHost } from "../../core/src/host" -import { JSON5TryParse } from "../../core/src/json5" -import { MarkdownTrace } from "../../core/src/trace" -import { logInfo, logVerbose, toStringList } from "../../core/src/util" -import { YAMLStringify } from "../../core/src/yaml" + PROMPTFOO_CACHE_PATH, + PROMPTFOO_CONFIG_DIR, + FILES_NOT_FOUND_ERROR_CODE, + GENAISCRIPT_FOLDER, + GENAI_ANY_REGEX, + EMOJI_SUCCESS, + EMOJI_FAIL, + TEST_RUNS_DIR_NAME, + PROMPTFOO_REMOTE_API_PORT, +} from "../../core/src/constants"; +import { promptFooDriver } from "../../core/src/default_prompts"; +import { serializeError } from "../../core/src/error"; +import { runtimeHost } from "../../core/src/host"; +import { JSON5TryParse } from "../../core/src/json5"; +import { MarkdownTrace } from "../../core/src/trace"; +import { logInfo, logVerbose, toStringList } from "../../core/src/util"; +import { YAMLStringify } from "../../core/src/yaml"; import { - PromptScriptTestRunOptions, - PromptScriptTestRunResponse, - PromptScriptTestResult, -} from "../../core/src/server/messages" -import { generatePromptFooConfiguration } from "../../core/src/promptfoo" -import { delay } from "es-toolkit" -import { resolveModelConnectionInfo } from "../../core/src/models" -import { filterScripts } from "../../core/src/ast" -import { link } from "../../core/src/mkmd" -import { applyModelOptions } from "../../core/src/modelalias" -import { arrayify, normalizeFloat, normalizeInt } from "../../core/src/cleaners" -import { ChatCompletionReasoningEffort } from "../../core/src/chattypes" + PromptScriptTestRunOptions, + PromptScriptTestRunResponse, + PromptScriptTestResult, +} from "../../core/src/server/messages"; +import { generatePromptFooConfiguration } from "../../core/src/promptfoo"; +import { delay } from "es-toolkit"; +import { resolveModelConnectionInfo } from "../../core/src/models"; +import { filterScripts } from "../../core/src/ast"; +import { link } from "../../core/src/mkmd"; +import { applyModelOptions } from "../../core/src/modelalias"; +import { arrayify, normalizeFloat, normalizeInt } from "../../core/src/cleaners"; +import { ChatCompletionReasoningEffort } from "../../core/src/chattypes"; +import { CancellationOptions, checkCancelled } from "../../core/src/cancellation"; +import { CORE_VERSION } from "../../core/src/version"; import { - CancellationOptions, - checkCancelled, -} from "../../core/src/cancellation" -import { CORE_VERSION } from "../../core/src/version" -import { - headersToMarkdownTableHead, - headersToMarkdownTableSeperator, - objectToMarkdownTableRow, -} from "../../core/src/csv" -import { roundWithPrecision } from "../../core/src/precision" -import { ensureDir } from "../../core/src/fs" -import { dotGenaiscriptPath } from "../../core/src/workdir" + headersToMarkdownTableHead, + headersToMarkdownTableSeperator, + objectToMarkdownTableRow, +} from "../../core/src/csv"; +import { roundWithPrecision } from "../../core/src/precision"; +import { ensureDir } from "../../core/src/fs"; +import { dotGenaiscriptPath } from "../../core/src/workdir"; /** * Parses model specifications from a string and returns a ModelOptions object. @@ -59,26 +56,26 @@ import { dotGenaiscriptPath } from "../../core/src/workdir" * @returns A ModelOptions object with model, temperature, and topP fields if applicable. */ function parseModelSpec(m: string): ModelOptions & ModelAliasesOptions { - const values = m - .split(/&/g) - .map((kv) => kv.split("=", 2)) - .reduce( - (acc, [key, value]) => { - acc[key] = decodeURIComponent(value) - return acc - }, - {} as Record - ) - if (Object.keys(values).length > 1) - return { - model: values["m"], - smallModel: values["s"], - visionModel: values["v"], - temperature: normalizeFloat(values["t"]), - topP: normalizeFloat(values["p"]), - reasoningEffort: values["r"] as ChatCompletionReasoningEffort, - } satisfies ModelOptions & ModelAliasesOptions - else return { model: m } + const values = m + .split(/&/g) + .map((kv) => kv.split("=", 2)) + .reduce( + (acc, [key, value]) => { + acc[key] = decodeURIComponent(value); + return acc; + }, + {} as Record, + ); + if (Object.keys(values).length > 1) + return { + model: values["m"], + smallModel: values["s"], + visionModel: values["v"], + temperature: normalizeFloat(values["t"]), + topP: normalizeFloat(values["p"]), + reasoningEffort: values["r"] as ChatCompletionReasoningEffort, + } satisfies ModelOptions & ModelAliasesOptions; + else return { model: m }; } /** @@ -86,16 +83,16 @@ function parseModelSpec(m: string): ModelOptions & ModelAliasesOptions { * @returns An environment object with necessary configurations. */ function createEnv() { - const env = process.env - return { - ...process.env, - PROMPTFOO_CACHE_PATH: env.PROMPTFOO_CACHE_PATH ?? PROMPTFOO_CACHE_PATH, - PROMPTFOO_CONFIG_DIR: env.PROMPTFOO_CONFIG_DIR ?? PROMPTFOO_CONFIG_DIR, - PROMPTFOO_DISABLE_TELEMETRY: env.PROMPTFOO_DISABLE_TELEMETRY ?? "true", - PROMPTFOO_DISABLE_UPDATE: env.PROMPTFOO_DISABLE_UPDATE ?? "true", - PROMPTFOO_DISABLE_REDTEAM_REMOTE_GENERATION: - env.PROMPTFOO_DISABLE_REDTEAM_REMOTE_GENERATION ?? "true", - } + const env = process.env; + return { + ...process.env, + PROMPTFOO_CACHE_PATH: env.PROMPTFOO_CACHE_PATH ?? PROMPTFOO_CACHE_PATH, + PROMPTFOO_CONFIG_DIR: env.PROMPTFOO_CONFIG_DIR ?? PROMPTFOO_CONFIG_DIR, + PROMPTFOO_DISABLE_TELEMETRY: env.PROMPTFOO_DISABLE_TELEMETRY ?? "true", + PROMPTFOO_DISABLE_UPDATE: env.PROMPTFOO_DISABLE_UPDATE ?? "true", + PROMPTFOO_DISABLE_REDTEAM_REMOTE_GENERATION: + env.PROMPTFOO_DISABLE_REDTEAM_REMOTE_GENERATION ?? "true", + }; } /** @@ -105,64 +102,62 @@ function createEnv() { * @returns A Promise resolving to the test run response, including results, status, and error details if applicable. */ export async function runPromptScriptTests( - ids: string[], - options: PromptScriptTestRunOptions & { - out?: string - cli?: string - removeOut?: boolean - cache?: boolean - verbose?: boolean - write?: boolean - redteam?: boolean - promptfooVersion?: string - outSummary?: string - testDelay?: string - maxConcurrency?: string - testTimeout?: string - } & CancellationOptions + ids: string[], + options: PromptScriptTestRunOptions & { + out?: string; + cli?: string; + removeOut?: boolean; + cache?: boolean; + verbose?: boolean; + write?: boolean; + redteam?: boolean; + promptfooVersion?: string; + outSummary?: string; + testDelay?: string; + maxConcurrency?: string; + testTimeout?: string; + } & CancellationOptions, ): Promise { - applyModelOptions(options, "cli") - const { cancellationToken, redteam } = options || {} - const scripts = await listTests({ ids, ...(options || {}) }) - if (!scripts.length) - return { - ok: false, - status: FILES_NOT_FOUND_ERROR_CODE, - error: serializeError(new Error("no tests found")), - } + applyModelOptions(options, "cli"); + const { cancellationToken, redteam } = options || {}; + const scripts = await listTests({ ids, ...(options || {}) }); + if (!scripts.length) + return { + ok: false, + status: FILES_NOT_FOUND_ERROR_CODE, + error: serializeError(new Error("no tests found")), + }; - const cli = options.cli || resolve(__filename) - const out = options.out || join(GENAISCRIPT_FOLDER, "tests") - let outSummary = options.outSummary - ? resolve(options.outSummary) - : undefined - const provider = join(out, "provider.mjs") - const port = PROMPTFOO_REMOTE_API_PORT - const serverUrl = `http://127.0.0.1:${port}` - const testDelay = normalizeInt(options?.testDelay) - const maxConcurrency = normalizeInt(options?.maxConcurrency) - const timeout = normalizeInt(options?.testTimeout) * 1000 || undefined - const runStart = new Date() - logInfo(`writing tests to ${out}`) + const cli = options.cli || resolve(__filename); + const out = options.out || join(GENAISCRIPT_FOLDER, "tests"); + let outSummary = options.outSummary ? resolve(options.outSummary) : undefined; + const provider = join(out, "provider.mjs"); + const port = PROMPTFOO_REMOTE_API_PORT; + const serverUrl = `http://127.0.0.1:${port}`; + const testDelay = normalizeInt(options?.testDelay); + const maxConcurrency = normalizeInt(options?.maxConcurrency); + const timeout = normalizeInt(options?.testTimeout) * 1000 || undefined; + const runStart = new Date(); + logInfo(`writing tests to ${out}`); - if (options?.removeOut) await emptyDir(out) - await ensureDir(out) - await writeFile(provider, promptFooDriver) + if (options?.removeOut) await emptyDir(out); + await ensureDir(out); + await writeFile(provider, promptFooDriver); - if (!outSummary) { - outSummary = dotGenaiscriptPath( - TEST_RUNS_DIR_NAME, - `${new Date().toISOString().replace(/[:.]/g, "-")}.trace.md` - ) - } + if (!outSummary) { + outSummary = dotGenaiscriptPath( + TEST_RUNS_DIR_NAME, + `${new Date().toISOString().replace(/[:.]/g, "-")}.trace.md`, + ); + } - await ensureDir(PROMPTFOO_CACHE_PATH) - await ensureDir(PROMPTFOO_CONFIG_DIR) - if (outSummary) { - await ensureDir(dirname(outSummary)) - await appendFile( - outSummary, - `## GenAIScript Test Results + await ensureDir(PROMPTFOO_CACHE_PATH); + await ensureDir(PROMPTFOO_CONFIG_DIR); + if (outSummary) { + await ensureDir(dirname(outSummary)); + await appendFile( + outSummary, + `## GenAIScript Test Results - start: ${runStart.toISOString()} - Run this command to launch the promptfoo test viewer. @@ -171,190 +166,164 @@ export async function runPromptScriptTests( npx --yes genaiscript@${CORE_VERSION} test view \`\`\` -` - ) - logVerbose(`trace: ${outSummary}`) - } +`, + ); + logVerbose(`trace: ${outSummary}`); + } - // Prepare test configurations for each script - const optionsModels = Object.freeze(options.models?.map(parseModelSpec)) - const configurations: { script: PromptScript; configuration: string }[] = [] - for (const script of scripts) { - checkCancelled(cancellationToken) - const fn = out - ? join(out, `${script.id}.promptfoo.yaml`) - : script.filename.replace(GENAI_ANY_REGEX, ".promptfoo.yaml") - const { info: chatInfo } = await resolveModelConnectionInfo(script, { - model: runtimeHost.modelAliases.large.model, - }) - if (chatInfo.error) throw new Error(chatInfo.error) - let { info: embeddingsInfo } = await resolveModelConnectionInfo( - script, - { model: runtimeHost.modelAliases.embeddings.model } - ) - if (embeddingsInfo?.error) embeddingsInfo = undefined - const testModels = arrayify(script.testModels).map((m) => - typeof m === "string" ? parseModelSpec(m) : m - ) - const models = testModels?.length ? testModels : optionsModels?.slice(0) - const config = await generatePromptFooConfiguration(script, { - out, - cli, - models, - provider: "provider.mjs", - chatInfo, - embeddingsInfo, - redteam, - }) - const yaml = YAMLStringify(config) - await writeFile(fn, yaml) - configurations.push({ script, configuration: fn }) - } + // Prepare test configurations for each script + const optionsModels = Object.freeze(options.models?.map(parseModelSpec)); + const configurations: { script: PromptScript; configuration: string }[] = []; + for (const script of scripts) { + checkCancelled(cancellationToken); + const fn = out + ? join(out, `${script.id}.promptfoo.yaml`) + : script.filename.replace(GENAI_ANY_REGEX, ".promptfoo.yaml"); + const { info: chatInfo } = await resolveModelConnectionInfo(script, { + model: runtimeHost.modelAliases.large.model, + }); + if (chatInfo.error) throw new Error(chatInfo.error); + let { info: embeddingsInfo } = await resolveModelConnectionInfo(script, { + model: runtimeHost.modelAliases.embeddings.model, + }); + if (embeddingsInfo?.error) embeddingsInfo = undefined; + const testModels = arrayify(script.testModels).map((m) => + typeof m === "string" ? parseModelSpec(m) : m, + ); + const models = testModels?.length ? testModels : optionsModels?.slice(0); + const config = await generatePromptFooConfiguration(script, { + out, + cli, + models, + provider: "provider.mjs", + chatInfo, + embeddingsInfo, + redteam, + }); + const yaml = YAMLStringify(config); + await writeFile(fn, yaml); + configurations.push({ script, configuration: fn }); + } + + let stats = { + prompt: 0, + completion: 0, + total: 0, + }; + const headers = ["status", "script", "prompt", "completion", "total", "duration", "url"]; + if (outSummary) { + await appendFile( + outSummary, + [headersToMarkdownTableHead(headers), headersToMarkdownTableSeperator(headers)].join(""), + ); + } + const promptFooVersion = options.promptfooVersion || PROMPTFOO_VERSION; + const results: PromptScriptTestResult[] = []; + // Execute each configuration and gather results + for (const config of configurations) { + checkCancelled(cancellationToken); + const { script, configuration } = config; + logInfo( + `test ${script.id} (${results.length + 1}/${configurations.length}) - ${configuration}`, + ); + const testStart = new Date(); + const outJson = configuration.replace(/\.yaml$/, ".res.json"); + const cmd = "npx"; + const args = ["--yes", `promptfoo@${promptFooVersion}`]; + if (redteam) args.push("redteam", "run", "--force"); + else args.push("eval", "--no-progress-bar"); + args.push("--config", configuration); + if (!isNaN(maxConcurrency)) args.push("--max-concurrency", String(maxConcurrency)); - let stats = { - prompt: 0, - completion: 0, - total: 0, + if (options.cache) args.push("--cache"); + if (options.verbose) args.push("--verbose"); + args.push("--output", outJson); + logVerbose(` ${cmd} ${args.join(" ")}`); + const exec = execa(cmd, args, { + preferLocal: true, + cleanup: true, + stripFinalNewline: true, + buffer: false, + env: createEnv(), + stdio: "inherit", + timeout, + }); + let status: number; + let error: SerializedError; + let value: PromptScriptTestResult["value"] = undefined; + try { + const res = await exec; + status = res.exitCode; + } catch (e) { + status = e.errno ?? -1; + error = serializeError(e); } - const headers = [ - "status", - "script", - "prompt", - "completion", - "total", - "duration", - "url", - ] + if (await exists(outJson)) value = JSON5TryParse(await readFile(outJson, "utf8")); + const ok = status === 0; + stats.prompt += value?.results?.stats?.tokenUsage?.prompt || 0; + stats.completion += value?.results?.stats?.tokenUsage?.completion || 0; + stats.total += value?.results?.stats?.tokenUsage?.total || 0; + const testEnd = new Date(); if (outSummary) { - await appendFile( - outSummary, - [ - headersToMarkdownTableHead(headers), - headersToMarkdownTableSeperator(headers), - ].join("") - ) + const url = value?.evalId + ? " " + + link("result", `${serverUrl}/eval?evalId=${encodeURIComponent(value?.evalId)}`) + + " " + : ""; + const row = { + status: ok ? EMOJI_SUCCESS : EMOJI_FAIL, + script: script.id, + prompt: value?.results?.stats?.tokenUsage?.prompt, + completion: value?.results?.stats?.tokenUsage?.completion, + total: value?.results?.stats?.tokenUsage?.total, + duration: roundWithPrecision((testEnd.getTime() - testStart.getTime()) / 1000, 1), + url, + }; + await appendFile(outSummary, objectToMarkdownTableRow(row, headers, { skipEscape: true })); } - const promptFooVersion = options.promptfooVersion || PROMPTFOO_VERSION - const results: PromptScriptTestResult[] = [] - // Execute each configuration and gather results - for (const config of configurations) { - checkCancelled(cancellationToken) - const { script, configuration } = config - logInfo( - `test ${script.id} (${results.length + 1}/${configurations.length}) - ${configuration}` - ) - const testStart = new Date() - const outJson = configuration.replace(/\.yaml$/, ".res.json") - const cmd = "npx" - const args = ["--yes", `promptfoo@${promptFooVersion}`] - if (redteam) args.push("redteam", "run", "--force") - else args.push("eval", "--no-progress-bar") - args.push("--config", configuration) - if (!isNaN(maxConcurrency)) - args.push("--max-concurrency", String(maxConcurrency)) + results.push({ + status, + ok, + error, + script: script.id, + value, + }); - if (options.cache) args.push("--cache") - if (options.verbose) args.push("--verbose") - args.push("--output", outJson) - logVerbose(` ${cmd} ${args.join(" ")}`) - const exec = execa(cmd, args, { - preferLocal: true, - cleanup: true, - stripFinalNewline: true, - buffer: false, - env: createEnv(), - stdio: "inherit", - timeout, - }) - let status: number - let error: SerializedError - let value: PromptScriptTestResult["value"] = undefined - try { - const res = await exec - status = res.exitCode - } catch (e) { - status = e.errno ?? -1 - error = serializeError(e) - } - if (await exists(outJson)) - value = JSON5TryParse(await readFile(outJson, "utf8")) - const ok = status === 0 - stats.prompt += value?.results?.stats?.tokenUsage?.prompt || 0 - stats.completion += value?.results?.stats?.tokenUsage?.completion || 0 - stats.total += value?.results?.stats?.tokenUsage?.total || 0 - const testEnd = new Date() - if (outSummary) { - const url = value?.evalId - ? " " + - link( - "result", - `${serverUrl}/eval?evalId=${encodeURIComponent(value?.evalId)}` - ) + - " " - : "" - const row = { - status: ok ? EMOJI_SUCCESS : EMOJI_FAIL, - script: script.id, - prompt: value?.results?.stats?.tokenUsage?.prompt, - completion: value?.results?.stats?.tokenUsage?.completion, - total: value?.results?.stats?.tokenUsage?.total, - duration: roundWithPrecision( - (testEnd.getTime() - testStart.getTime()) / 1000, - 1 - ), - url, - } - await appendFile( - outSummary, - objectToMarkdownTableRow(row, headers, { skipEscape: true }) - ) - } - results.push({ - status, - ok, - error, - script: script.id, - value, - }) - - if (testDelay > 0) { - logVerbose(` waiting ${testDelay}s`) - await delay(testDelay * 1000) - } + if (testDelay > 0) { + logVerbose(` waiting ${testDelay}s`); + await delay(testDelay * 1000); } - const runEnd = new Date() + } + const runEnd = new Date(); - if (outSummary) { - await appendFile( - outSummary, - [ - objectToMarkdownTableRow( - { - status: results.filter((r) => r.ok).length, - prompt: stats.prompt, - completion: stats.completion, - total: stats.total, - duration: roundWithPrecision( - (runEnd.getTime() - runStart.getTime()) / 1000, - 1 - ), - }, - headers, - { skipEscape: true } - ), - "\n\n", - `- end: ${runEnd.toISOString()}\n`, - ].join("") - ) - } - if (outSummary) logVerbose(`trace: ${outSummary}`) - const ok = results.every((r) => !!r.ok) - return { - ok, - status: ok ? 0 : -1, - value: results, - error: results.find((r) => r.error)?.error, - } + if (outSummary) { + await appendFile( + outSummary, + [ + objectToMarkdownTableRow( + { + status: results.filter((r) => r.ok).length, + prompt: stats.prompt, + completion: stats.completion, + total: stats.total, + duration: roundWithPrecision((runEnd.getTime() - runStart.getTime()) / 1000, 1), + }, + headers, + { skipEscape: true }, + ), + "\n\n", + `- end: ${runEnd.toISOString()}\n`, + ].join(""), + ); + } + if (outSummary) logVerbose(`trace: ${outSummary}`); + const ok = results.every((r) => !!r.ok); + return { + ok, + status: ok ? 0 : -1, + value: results, + error: results.find((r) => r.error)?.error, + }; } /* @@ -362,18 +331,14 @@ npx --yes genaiscript@${CORE_VERSION} test view * @param options - Options to filter the test scripts by IDs or groups. * @returns A Promise resolving to an array of filtered scripts. */ -async function listTests(options: { - ids?: string[] - groups?: string[] - redteam?: boolean -}) { - const prj = await buildProject() - const scripts = filterScripts(prj.scripts, { - ...(options || {}), - test: options.redteam ? undefined : true, - redteam: options.redteam, - }) - return scripts +async function listTests(options: { ids?: string[]; groups?: string[]; redteam?: boolean }) { + const prj = await buildProject(); + const scripts = filterScripts(prj.scripts, { + ...(options || {}), + test: options.redteam ? undefined : true, + redteam: options.redteam, + }); + return scripts; } /** @@ -382,31 +347,31 @@ async function listTests(options: { * @param options - Options to configure the test run, including output paths, CLI settings, verbosity, caching, test delay, groups, concurrency settings, and redteam mode. */ export async function scriptsTest( - ids: string[], - options: PromptScriptTestRunOptions & { - out?: string - cli?: string - removeOut?: boolean - cache?: boolean - verbose?: boolean - write?: boolean - redteam?: boolean - promptfooVersion?: string - outSummary?: string - testDelay?: string - groups?: string[] - maxConcurrency?: string - } + ids: string[], + options: PromptScriptTestRunOptions & { + out?: string; + cli?: string; + removeOut?: boolean; + cache?: boolean; + verbose?: boolean; + write?: boolean; + redteam?: boolean; + promptfooVersion?: string; + outSummary?: string; + testDelay?: string; + groups?: string[]; + maxConcurrency?: string; + }, ) { - const { status, value = [] } = await runPromptScriptTests(ids, options) - const trace = new MarkdownTrace() - trace.appendContent( - `\n\ntests: ${value.filter((r) => r.ok).length} success, ${value.filter((r) => !r.ok).length} failed\n\n` - ) - for (const result of value) trace.resultItem(result.ok, result.script) - console.log("") - console.log(trace.content) - process.exit(status) + const { status, value = [] } = await runPromptScriptTests(ids, options); + const trace = new MarkdownTrace(); + trace.appendContent( + `\n\ntests: ${value.filter((r) => r.ok).length} success, ${value.filter((r) => !r.ok).length} failed\n\n`, + ); + for (const result of value) trace.resultItem(result.ok, result.script); + console.log(""); + console.log(trace.content); + process.exit(status); } /** @@ -416,12 +381,9 @@ export async function scriptsTest( * @param options - Options to filter the scripts by groups or redteam flag. * Filters the scripts by groups and whether they are for redteam testing. */ -export async function scriptTestList(options: { - groups?: string[] - redteam?: boolean -}) { - const scripts = await listTests(options) - console.log(scripts.map((s) => toStringList(s.id, s.filename)).join("\n")) +export async function scriptTestList(options: { groups?: string[]; redteam?: boolean }) { + const scripts = await listTests(options); + console.log(scripts.map((s) => toStringList(s.id, s.filename)).join("\n")); } /** @@ -432,19 +394,19 @@ export async function scriptTestList(options: { * @param options - Options to specify the promptfoo version. */ export async function scriptTestsView(options: { promptfooVersion?: string }) { - await ensureDir(PROMPTFOO_CACHE_PATH) - await ensureDir(PROMPTFOO_CONFIG_DIR) - const cmd = `npx` - const args = [ - "--yes", - `promptfoo@${options.promptfooVersion || PROMPTFOO_VERSION}`, - "view", - "-y", - ] - console.debug(`launching promptfoo result server`) - await execa(cmd, args, { - cleanup: true, - env: createEnv(), - stdio: "inherit", - }) + await ensureDir(PROMPTFOO_CACHE_PATH); + await ensureDir(PROMPTFOO_CONFIG_DIR); + const cmd = `npx`; + const args = [ + "--yes", + `promptfoo@${options.promptfooVersion || PROMPTFOO_VERSION}`, + "view", + "-y", + ]; + console.debug(`launching promptfoo result server`); + await execa(cmd, args, { + cleanup: true, + env: createEnv(), + stdio: "inherit", + }); } diff --git a/packages/cli/src/trace.ts b/packages/cli/src/trace.ts index 39d36502ae..20109436f9 100644 --- a/packages/cli/src/trace.ts +++ b/packages/cli/src/trace.ts @@ -1,12 +1,12 @@ -import { ensureDir } from "fs-extra" -import { MarkdownTrace, TraceChunkEvent } from "../../core/src/trace" -import { logVerbose } from "../../core/src/util" -import { dirname } from "node:path" -import { writeFileSync, WriteStream } from "node:fs" -import { TRACE_CHUNK, TRACE_DETAILS } from "../../core/src/constants" -import { writeFile } from "node:fs/promises" -import { measure } from "../../core/src/performance" -import { createWriteStream } from "node:fs" +import { ensureDir } from "fs-extra"; +import { MarkdownTrace, TraceChunkEvent } from "../../core/src/trace"; +import { logVerbose } from "../../core/src/util"; +import { dirname } from "node:path"; +import { writeFileSync, WriteStream } from "node:fs"; +import { TRACE_CHUNK, TRACE_DETAILS } from "../../core/src/constants"; +import { writeFile } from "node:fs/promises"; +import { measure } from "../../core/src/performance"; +import { createWriteStream } from "node:fs"; /** * Sets up trace writing to a specified file by handling trace events. @@ -25,49 +25,49 @@ import { createWriteStream } from "node:fs" * the entire content to the file. */ export async function setupTraceWriting( - trace: MarkdownTrace, - name: string, - filename: string, - options?: { ignoreInner?: boolean } + trace: MarkdownTrace, + name: string, + filename: string, + options?: { ignoreInner?: boolean }, ) { - const { ignoreInner } = options || {} - logVerbose(`${name}: ${filename}`) - await ensureDir(dirname(filename)) - await writeFile(filename, "", { encoding: "utf-8" }) + const { ignoreInner } = options || {}; + logVerbose(`${name}: ${filename}`); + await ensureDir(dirname(filename)); + await writeFile(filename, "", { encoding: "utf-8" }); - // Create a write stream for efficient buffered writes - let writeStream: WriteStream - trace.addEventListener( - TRACE_CHUNK, - (ev) => { - const tev = ev as TraceChunkEvent - if (ignoreInner && tev.inner) return - const m = measure("trace.chunk") - if (!writeStream) - writeStream = createWriteStream(filename, { - flags: "a", // 'a' for append mode - encoding: "utf8", - }) - writeStream.write(tev.chunk) // Non-blocking buffered write - m(`${tev.chunk.length} chars`) - }, - false - ) + // Create a write stream for efficient buffered writes + let writeStream: WriteStream; + trace.addEventListener( + TRACE_CHUNK, + (ev) => { + const tev = ev as TraceChunkEvent; + if (ignoreInner && tev.inner) return; + const m = measure("trace.chunk"); + if (!writeStream) + writeStream = createWriteStream(filename, { + flags: "a", // 'a' for append mode + encoding: "utf8", + }); + writeStream.write(tev.chunk); // Non-blocking buffered write + m(`${tev.chunk.length} chars`); + }, + false, + ); - trace.addEventListener(TRACE_DETAILS, (ev) => { - const m = measure("trace.details") - const content = trace.content + trace.addEventListener(TRACE_DETAILS, (ev) => { + const m = measure("trace.details"); + const content = trace.content; - // End the write stream to ensure all data is flushed - if (writeStream) { - writeStream.end() - writeStream = undefined - } + // End the write stream to ensure all data is flushed + if (writeStream) { + writeStream.end(); + writeStream = undefined; + } - // Write the full content - writeFileSync(filename, content, { encoding: "utf-8" }) - m(`${content.length} chars`) - }) + // Write the full content + writeFileSync(filename, content, { encoding: "utf-8" }); + m(`${content.length} chars`); + }); - return filename + return filename; } diff --git a/packages/cli/src/vars.ts b/packages/cli/src/vars.ts index fcbd564494..12b70fc136 100644 --- a/packages/cli/src/vars.ts +++ b/packages/cli/src/vars.ts @@ -1,8 +1,8 @@ -import { CLI_ENV_VAR_RX } from "../../core/src/constants" -import { genaiscriptDebug } from "../../core/src/debug" -import { parseKeyValuePair } from "../../core/src/fence" -import { camelCase } from "es-toolkit" -const dbg = genaiscriptDebug("cli:vars") +import { CLI_ENV_VAR_RX } from "../../core/src/constants"; +import { genaiscriptDebug } from "../../core/src/debug"; +import { parseKeyValuePair } from "../../core/src/fence"; +import { camelCase } from "es-toolkit"; +const dbg = genaiscriptDebug("cli:vars"); /** * Parses and combines variables from input and environment variables. @@ -12,19 +12,19 @@ const dbg = genaiscriptDebug("cli:vars") * @returns An object containing the merged key-value pairs from `vars` and environment variables whose keys match the regex, with their keys transformed to lowercase. */ export function parseOptionsVars( - vars: string[] | Record, - env: Record + vars: string[] | Record, + env: Record, ): Record { - const vals = Array.isArray(vars) - ? vars.reduce((acc, v) => ({ ...acc, ...parseKeyValuePair(v) }), {}) - : ((vars || {}) as Record) - dbg(`cli %O`, Object.keys(vals)) - const envVals = Object.keys(env) - .filter((k) => CLI_ENV_VAR_RX.test(k)) - .map((k) => ({ - [camelCase(k.replace(CLI_ENV_VAR_RX, ""))]: env[k], - })) - .reduce((acc, v) => ({ ...acc, ...v }), {}) - dbg(`env %O`, Object.keys(envVals)) - return { ...vals, ...envVals } + const vals = Array.isArray(vars) + ? vars.reduce((acc, v) => ({ ...acc, ...parseKeyValuePair(v) }), {}) + : ((vars || {}) as Record); + dbg(`cli %O`, Object.keys(vals)); + const envVals = Object.keys(env) + .filter((k) => CLI_ENV_VAR_RX.test(k)) + .map((k) => ({ + [camelCase(k.replace(CLI_ENV_VAR_RX, ""))]: env[k], + })) + .reduce((acc, v) => ({ ...acc, ...v }), {}); + dbg(`env %O`, Object.keys(envVals)); + return { ...vals, ...envVals }; } diff --git a/packages/cli/src/version.ts b/packages/cli/src/version.ts index 3cb52fd7d0..619c1505bc 100644 --- a/packages/cli/src/version.ts +++ b/packages/cli/src/version.ts @@ -1,4 +1,4 @@ -import packageJson from "../package.json" +import packageJson from "../package.json"; // This file exports specific versions of dependencies and engines from package.json @@ -6,27 +6,27 @@ import packageJson from "../package.json" * The minimum required Node.js version for this package. * Retrieved from the "engines" field in package.json. */ -export const NODE_MIN_VERSION = packageJson.engines.node +export const NODE_MIN_VERSION = packageJson.engines.node; /** * The version of the 'promptfoo' peer dependency. */ -export const PROMPTFOO_VERSION = "0.112.7" +export const PROMPTFOO_VERSION = "0.112.7"; /** * The version of the 'typescript' dependency. * Retrieved from the "dependencies" field in package.json. */ -export const TYPESCRIPT_VERSION = packageJson.dependencies.typescript +export const TYPESCRIPT_VERSION = packageJson.dependencies.typescript; /** * The version of the 'dockerode' dependency. * Retrieved from the "dependencies" field in package.json. */ -export const DOCKERODE_VERSION = packageJson.dependencies.dockerode +export const DOCKERODE_VERSION = packageJson.dependencies.dockerode; /** * The version of the 'playwright' dependency. * Retrieved from the "dependencies" field in package.json. */ -export const PLAYWRIGHT_VERSION = packageJson.optionalDependencies.playwright +export const PLAYWRIGHT_VERSION = packageJson.optionalDependencies.playwright; diff --git a/packages/cli/src/video.ts b/packages/cli/src/video.ts index 246e4c9d24..c2c69c961a 100644 --- a/packages/cli/src/video.ts +++ b/packages/cli/src/video.ts @@ -1,4 +1,4 @@ -import { FFmepgClient } from "../../core/src/ffmpeg" +import { FFmepgClient } from "../../core/src/ffmpeg"; /** * Extracts audio from a given media file. @@ -11,16 +11,16 @@ import { FFmepgClient } from "../../core/src/ffmpeg" * Logs the resulting audio file path upon completion. */ export async function extractAudio( - file: string, - options: { force: boolean; transcription: boolean } + file: string, + options: { force: boolean; transcription: boolean }, ) { - const { force, transcription } = options || {} - const ffmpeg = new FFmepgClient() - const fn = await ffmpeg.extractAudio(file, { - transcription, - forceConversion: force, - }) - console.log(fn) + const { force, transcription } = options || {}; + const ffmpeg = new FFmepgClient(); + const fn = await ffmpeg.extractAudio(file, { + transcription, + forceConversion: force, + }); + console.log(fn); } /** @@ -36,25 +36,25 @@ export async function extractAudio( * - sceneThreshold: A threshold value to detect scene changes for frame extraction. */ export async function extractVideoFrames( - file: string, - options: { - timestamps?: number[] - count?: number - size?: string - format?: string - keyframes?: boolean - sceneThreshold?: number - } + file: string, + options: { + timestamps?: number[]; + count?: number; + size?: string; + format?: string; + keyframes?: boolean; + sceneThreshold?: number; + }, ) { - const { ...rest } = options || {} - const ffmpeg = new FFmepgClient() - const frames = await ffmpeg.extractFrames(file, { - ...rest, - }) - for (let i = 0; i < frames.length; i++) { - const fn = frames[i] - console.log(`${fn}`) - } + const { ...rest } = options || {}; + const ffmpeg = new FFmepgClient(); + const frames = await ffmpeg.extractFrames(file, { + ...rest, + }); + for (let i = 0; i < frames.length; i++) { + const fn = frames[i]; + console.log(`${fn}`); + } } /** @@ -64,7 +64,7 @@ export async function extractVideoFrames( * Logs the metadata of the video file in JSON format. */ export async function probeVideo(file: string) { - const ffmpeg = new FFmepgClient() - const res = await ffmpeg.probe(file) - console.log(JSON.stringify(res, null, 2)) + const ffmpeg = new FFmepgClient(); + const res = await ffmpeg.probe(file); + console.log(JSON.stringify(res, null, 2)); } diff --git a/packages/cli/src/watch.ts b/packages/cli/src/watch.ts index 5ddccc5a5d..6d1a56557d 100644 --- a/packages/cli/src/watch.ts +++ b/packages/cli/src/watch.ts @@ -1,107 +1,107 @@ -import { FSWatcher, watch } from "chokidar" -import { basename, resolve } from "node:path" -import { CHANGE, CLOSE, GENAI_ANY_REGEX, OPEN } from "../../core/src/constants" -import { createGitIgnorer } from "../../core/src/gitignore" -import { Project } from "../../core/src/server/messages" -import { buildProject } from "./build" -import { filterScripts, ScriptFilterOptions } from "../../core/src/ast" -import { CancellationOptions, toSignal } from "../../core/src/cancellation" -import { logError } from "../../core/src/util" -import { genaiscriptDebug } from "../../core/src/debug" -const dbg = genaiscriptDebug("watch") +import { FSWatcher, watch } from "chokidar"; +import { basename, resolve } from "node:path"; +import { CHANGE, CLOSE, GENAI_ANY_REGEX, OPEN } from "../../core/src/constants"; +import { createGitIgnorer } from "../../core/src/gitignore"; +import { Project } from "../../core/src/server/messages"; +import { buildProject } from "./build"; +import { filterScripts, ScriptFilterOptions } from "../../core/src/ast"; +import { CancellationOptions, toSignal } from "../../core/src/cancellation"; +import { logError } from "../../core/src/util"; +import { genaiscriptDebug } from "../../core/src/debug"; +const dbg = genaiscriptDebug("watch"); interface ProjectWatcherOptions extends ScriptFilterOptions { - paths: ElementOrArray - cwd: string + paths: ElementOrArray; + cwd: string; } export class ProjectWatcher extends EventTarget { - private _watcher: FSWatcher - private _project: Project - private _scripts: PromptScript[] + private _watcher: FSWatcher; + private _project: Project; + private _scripts: PromptScript[]; - constructor(readonly options: ProjectWatcherOptions & CancellationOptions) { - super() - const signal = toSignal(this.options.cancellationToken) - signal?.addEventListener("abort", this.close.bind(this)) - } + constructor(readonly options: ProjectWatcherOptions & CancellationOptions) { + super(); + const signal = toSignal(this.options.cancellationToken); + signal?.addEventListener("abort", this.close.bind(this)); + } - get cwd() { - return this.options.cwd - } + get cwd() { + return this.options.cwd; + } - async open() { - if (this._watcher) return + async open() { + if (this._watcher) return; - dbg(`starting`) - await this.refresh() - const { paths, cwd } = this.options - const gitIgnorer = await createGitIgnorer() - // Initialize watcher. - this._watcher = watch(paths, { - ignored: (path, stats) => { - if (!stats) return false - if (stats.isDirectory()) { - const b = basename(path) - if (/^\./.test(b)) return true - } else if (stats.isFile() && !GENAI_ANY_REGEX.test(path)) { - return true - } - const filtered = gitIgnorer([path]) - if (filtered.length === 0) return true - return false - }, - persistent: false, - ignoreInitial: true, - awaitWriteFinish: { - stabilityThreshold: 2000, - pollInterval: 1000, - }, - atomic: true, - interval: 1000, - binaryInterval: 5000, - depth: 30, - cwd, - }) - const changed = () => { - dbg(`changed`) - this.dispatchEvent(new Event(CHANGE)) + dbg(`starting`); + await this.refresh(); + const { paths, cwd } = this.options; + const gitIgnorer = await createGitIgnorer(); + // Initialize watcher. + this._watcher = watch(paths, { + ignored: (path, stats) => { + if (!stats) return false; + if (stats.isDirectory()) { + const b = basename(path); + if (/^\./.test(b)) return true; + } else if (stats.isFile() && !GENAI_ANY_REGEX.test(path)) { + return true; } - this._watcher - .on("error", (error) => logError(`watch: ${error}`)) - .on("add", changed) - .on("change", changed) - .on("unlink", changed) - this.addEventListener(CHANGE, this.refresh.bind(this)) - this.dispatchEvent(new Event(OPEN)) - } + const filtered = gitIgnorer([path]); + if (filtered.length === 0) return true; + return false; + }, + persistent: false, + ignoreInitial: true, + awaitWriteFinish: { + stabilityThreshold: 2000, + pollInterval: 1000, + }, + atomic: true, + interval: 1000, + binaryInterval: 5000, + depth: 30, + cwd, + }); + const changed = () => { + dbg(`changed`); + this.dispatchEvent(new Event(CHANGE)); + }; + this._watcher + .on("error", (error) => logError(`watch: ${error}`)) + .on("add", changed) + .on("change", changed) + .on("unlink", changed); + this.addEventListener(CHANGE, this.refresh.bind(this)); + this.dispatchEvent(new Event(OPEN)); + } - private async refresh() { - this._project = undefined - } + private async refresh() { + this._project = undefined; + } - async project() { - if (!this._project) { - dbg(`building project`) - this._project = await buildProject() - } - return this._project + async project() { + if (!this._project) { + dbg(`building project`); + this._project = await buildProject(); } + return this._project; + } - async scripts() { - if (!this._scripts) { - const project = await this.project() - this._scripts = filterScripts(project.scripts, this.options) - } - return this._scripts?.slice(0) + async scripts() { + if (!this._scripts) { + const project = await this.project(); + this._scripts = filterScripts(project.scripts, this.options); } + return this._scripts?.slice(0); + } - async close() { - dbg(`closing`) - await this._watcher?.close() - this._watcher = undefined - this.dispatchEvent(new Event(CLOSE)) - } + async close() { + dbg(`closing`); + await this._watcher?.close(); + this._watcher = undefined; + this.dispatchEvent(new Event(CLOSE)); + } } /** @@ -115,13 +115,13 @@ export class ProjectWatcher extends EventTarget { * @returns An initialized ProjectWatcher instance. */ export async function startProjectWatcher( - options?: ScriptFilterOptions & { - paths?: ElementOrArray - cwd?: string - } & CancellationOptions + options?: ScriptFilterOptions & { + paths?: ElementOrArray; + cwd?: string; + } & CancellationOptions, ) { - const { paths = ".", cwd = resolve("."), ...rest } = options || {} - const watcher = new ProjectWatcher({ paths, cwd, ...rest }) - await watcher.open() - return watcher + const { paths = ".", cwd = resolve("."), ...rest } = options || {}; + const watcher = new ProjectWatcher({ paths, cwd, ...rest }); + await watcher.open(); + return watcher; } diff --git a/packages/cli/src/worker.ts b/packages/cli/src/worker.ts index 791376dbf2..1cbfc63182 100644 --- a/packages/cli/src/worker.ts +++ b/packages/cli/src/worker.ts @@ -1,11 +1,11 @@ -import { workerData, parentPort } from "node:worker_threads" -import { runScriptInternal } from "./run" -import { NodeHost } from "./nodehost" -import { delay } from "es-toolkit" -import { overrideStdoutWithStdErr } from "../../core/src/stdio" -import { runtimeHost } from "../../core/src/host" -import { Resource } from "../../core/src/mcpresource" -import { RESOURCE_CHANGE } from "../../core/src/constants" +import { workerData, parentPort } from "node:worker_threads"; +import { runScriptInternal } from "./run"; +import { NodeHost } from "./nodehost"; +import { delay } from "es-toolkit"; +import { overrideStdoutWithStdErr } from "../../core/src/stdio"; +import { runtimeHost } from "../../core/src/host"; +import { Resource } from "../../core/src/mcpresource"; +import { RESOURCE_CHANGE } from "../../core/src/constants"; /** * Handles worker thread execution based on the provided data type. @@ -23,37 +23,37 @@ import { RESOURCE_CHANGE } from "../../core/src/constants" * - Ensures compatibility with Windows by setting the SystemRoot environment variable. */ export async function worker() { - overrideStdoutWithStdErr() - const { type, ...data } = workerData as { - type: string - } - await NodeHost.install(undefined, undefined) // Install NodeHost with environment options - if (process.platform === "win32") { - // https://github.com/Azure/azure-sdk-for-js/issues/32374 - process.env.SystemRoot = process.env.SYSTEMROOT - } + overrideStdoutWithStdErr(); + const { type, ...data } = workerData as { + type: string; + }; + await NodeHost.install(undefined, undefined); // Install NodeHost with environment options + if (process.platform === "win32") { + // https://github.com/Azure/azure-sdk-for-js/issues/32374 + process.env.SystemRoot = process.env.SYSTEMROOT; + } - runtimeHost.resources.addEventListener(RESOURCE_CHANGE, (ev) => { - const cev = ev as CustomEvent - const { reference, content } = cev.detail - parentPort.postMessage({ - type: RESOURCE_CHANGE, - reference, - content, - } satisfies Resource & { type: string }) - }) + runtimeHost.resources.addEventListener(RESOURCE_CHANGE, (ev) => { + const cev = ev as CustomEvent; + const { reference, content } = cev.detail; + parentPort.postMessage({ + type: RESOURCE_CHANGE, + reference, + content, + } satisfies Resource & { type: string }); + }); - switch (type) { - case "run": { - const { scriptId, files, options } = data as { - scriptId: string - files: string[] - options: object - } - const { result } = await runScriptInternal(scriptId, files, options) - await delay(0) // flush streams - parentPort.postMessage({ type: "run", result }) - break - } + switch (type) { + case "run": { + const { scriptId, files, options } = data as { + scriptId: string; + files: string[]; + options: object; + }; + const { result } = await runScriptInternal(scriptId, files, options); + await delay(0); // flush streams + parentPort.postMessage({ type: "run", result }); + break; } + } } diff --git a/packages/core/src/agent.ts b/packages/core/src/agent.ts index f0d1a3490e..2d6bc7d699 100644 --- a/packages/core/src/agent.ts +++ b/packages/core/src/agent.ts @@ -1,40 +1,30 @@ -import { createCache } from "./cache" -import { - AGENT_MEMORY_CACHE_NAME, - AGENT_MEMORY_FLEX_TOKENS, - TOKEN_NO_ANSWER, -} from "./constants" -import { errorMessage } from "./error" -import { GenerationOptions } from "./generation" -import { HTMLEscape } from "./htmlescape" -import { prettifyMarkdown } from "./markdown" -import { TraceOptions } from "./trace" -import { ellipse } from "./util" -import debug from "debug" -const dbg = debug("agent:memory") +import { createCache } from "./cache"; +import { AGENT_MEMORY_CACHE_NAME, AGENT_MEMORY_FLEX_TOKENS, TOKEN_NO_ANSWER } from "./constants"; +import { errorMessage } from "./error"; +import { GenerationOptions } from "./generation"; +import { HTMLEscape } from "./htmlescape"; +import { prettifyMarkdown } from "./markdown"; +import { TraceOptions } from "./trace"; +import { ellipse } from "./util"; +import debug from "debug"; +const dbg = debug("agent:memory"); -export type AgentMemoryCacheKey = { agent: string; query: string } +export type AgentMemoryCacheKey = { agent: string; query: string }; export type AgentMemoryCacheValue = AgentMemoryCacheKey & { - answer: string - createdAt: number -} -export type AgentMemoryCache = WorkspaceFileCache< - AgentMemoryCacheKey, - AgentMemoryCacheValue -> + answer: string; + createdAt: number; +}; +export type AgentMemoryCache = WorkspaceFileCache; export function agentCreateCache( - options: Pick & { lookupOnly?: boolean } + options: Pick & { lookupOnly?: boolean }, ): AgentMemoryCache { - const cache = createCache( - AGENT_MEMORY_CACHE_NAME, - { - type: "memory", - userState: options.userState, - lookupOnly: options.lookupOnly, - } - ) - return cache + const cache = createCache(AGENT_MEMORY_CACHE_NAME, { + type: "memory", + userState: options.userState, + lookupOnly: options.lookupOnly, + }); + return cache; } /** @@ -52,45 +42,42 @@ export function agentCreateCache( * @returns Memory answer or undefined if no relevant memories are retrieved. */ export async function agentQueryMemory( - cache: AgentMemoryCache, - ctx: ChatGenerationContext, - query: string, - options: Required + cache: AgentMemoryCache, + ctx: ChatGenerationContext, + query: string, + options: Required, ) { - if (!query) return undefined + if (!query) return undefined; - const memories = await loadMemories(cache) - if (!memories?.length) return undefined + const memories = await loadMemories(cache); + if (!memories?.length) return undefined; - let memoryAnswer: string | undefined - // always pre-query memory with cheap model - dbg(`query: ${query}`) - const res = await ctx.runPrompt( - async (_) => { - _.$`Return the contextual information useful to answer from the content in . + let memoryAnswer: string | undefined; + // always pre-query memory with cheap model + dbg(`query: ${query}`); + const res = await ctx.runPrompt( + async (_) => { + _.$`Return the contextual information useful to answer from the content in . - Use MEMORY as the only source of information. - If you cannot find relevant information to answer , return ${TOKEN_NO_ANSWER}. DO NOT INVENT INFORMATION. - Be concise. Keep it short. The output is used by another LLM. - - Provide important details like identifiers and names.`.role( - "system" - ) - _.def("QUERY", query) - await defMemory(cache, _) - }, - { - model: "memory", - system: [], - flexTokens: AGENT_MEMORY_FLEX_TOKENS, - label: "agent memory query", - cache: "agent_memory", - } - ) - if (!res.error) - memoryAnswer = res.text.includes(TOKEN_NO_ANSWER) ? "" : res.text - else dbg(`error: ${errorMessage(res.error)}`) + - Provide important details like identifiers and names.`.role("system"); + _.def("QUERY", query); + await defMemory(cache, _); + }, + { + model: "memory", + system: [], + flexTokens: AGENT_MEMORY_FLEX_TOKENS, + label: "agent memory query", + cache: "agent_memory", + }, + ); + if (!res.error) memoryAnswer = res.text.includes(TOKEN_NO_ANSWER) ? "" : res.text; + else dbg(`error: ${errorMessage(res.error)}`); - dbg(`answer: ${ellipse(memoryAnswer, 128)}`) - return memoryAnswer + dbg(`answer: ${ellipse(memoryAnswer, 128)}`); + return memoryAnswer; } /** @@ -104,32 +91,32 @@ export async function agentQueryMemory( * @param options - Configuration options, including user state and tracing details. */ export async function agentAddMemory( - cache: AgentMemoryCache, - agent: string, - query: string, - text: string, - options: Required + cache: AgentMemoryCache, + agent: string, + query: string, + text: string, + options: Required, ) { - const { trace } = options || {} - const cacheKey: AgentMemoryCacheKey = { agent, query } - const cachedValue: AgentMemoryCacheValue = { - ...cacheKey, - answer: text, - createdAt: Date.now(), - } - dbg(`add ${agent}: ${ellipse(query, 80)} -> ${ellipse(text, 128)}`) - await cache.set(cacheKey, cachedValue) - trace.detailsFenced( - `🧠 agent memory: ${HTMLEscape(query)}`, - HTMLEscape(prettifyMarkdown(cachedValue.answer)), - "markdown" - ) + const { trace } = options || {}; + const cacheKey: AgentMemoryCacheKey = { agent, query }; + const cachedValue: AgentMemoryCacheValue = { + ...cacheKey, + answer: text, + createdAt: Date.now(), + }; + dbg(`add ${agent}: ${ellipse(query, 80)} -> ${ellipse(text, 128)}`); + await cache.set(cacheKey, cachedValue); + trace.detailsFenced( + `🧠 agent memory: ${HTMLEscape(query)}`, + HTMLEscape(prettifyMarkdown(cachedValue.answer)), + "markdown", + ); } async function loadMemories(cache: AgentMemoryCache) { - const memories = await cache?.values() - memories?.sort((l, r) => l.createdAt - r.createdAt) - return memories + const memories = await cache?.values(); + memories?.sort((l, r) => l.createdAt - r.createdAt); + return memories; } /** @@ -144,46 +131,43 @@ async function loadMemories(cache: AgentMemoryCache) { * or visualizing the memory contents in a readable format. */ export async function traceAgentMemory( - options: Pick & Required + options: Pick & Required, ) { - const { trace } = options || {} - const cache = agentCreateCache({ - userState: options.userState, - lookupOnly: true, - }) - const memories = await loadMemories(cache) - if (memories?.length) { - try { - trace.startDetails("🧠 agent memory") - memories - .reverse() - .forEach(({ agent, query, answer }) => - trace.detailsFenced( - `👤 ${agent}: ${HTMLEscape(query)}`, - HTMLEscape(prettifyMarkdown(answer)), - "markdown" - ) - ) - } finally { - trace.endDetails() - } + const { trace } = options || {}; + const cache = agentCreateCache({ + userState: options.userState, + lookupOnly: true, + }); + const memories = await loadMemories(cache); + if (memories?.length) { + try { + trace.startDetails("🧠 agent memory"); + memories + .reverse() + .forEach(({ agent, query, answer }) => + trace.detailsFenced( + `👤 ${agent}: ${HTMLEscape(query)}`, + HTMLEscape(prettifyMarkdown(answer)), + "markdown", + ), + ); + } finally { + trace.endDetails(); } + } } -async function defMemory( - cache: AgentMemoryCache, - ctx: ChatTurnGenerationContext -) { - const memories = await cache.values() - memories.reverse().forEach(({ agent, query, answer }, index) => - ctx.def( - "MEMORY", - `${agent}> ${query}? +async function defMemory(cache: AgentMemoryCache, ctx: ChatTurnGenerationContext) { + const memories = await cache.values(); + memories.reverse().forEach(({ agent, query, answer }, index) => + ctx.def( + "MEMORY", + `${agent}> ${query}? ${answer} `, - { - flex: memories.length - index, - } - ) - ) + { + flex: memories.length - index, + }, + ), + ); } diff --git a/packages/core/src/annotations.test.ts b/packages/core/src/annotations.test.ts index bb80e89882..26ab515f7f 100644 --- a/packages/core/src/annotations.test.ts +++ b/packages/core/src/annotations.test.ts @@ -1,168 +1,166 @@ -import test, { beforeEach, describe } from "node:test" +import test, { beforeEach, describe } from "node:test"; import { - convertAnnotationsToItems, - convertDiagnosticToGitHubActionCommand, - convertGithubMarkdownAnnotationsToItems, - parseAnnotations, -} from "./annotations" -import assert from "assert/strict" -import { TestHost } from "./testhost" -import { EMOJI_WARNING, EMOJI_FAIL } from "./constants" + convertAnnotationsToItems, + convertDiagnosticToGitHubActionCommand, + convertGithubMarkdownAnnotationsToItems, + parseAnnotations, +} from "./annotations"; +import assert from "assert/strict"; +import { TestHost } from "./testhost"; +import { EMOJI_WARNING, EMOJI_FAIL } from "./constants"; describe("annotations", () => { - beforeEach(() => { - TestHost.install() - }) - test("github", () => { - const output = ` + beforeEach(() => { + TestHost.install(); + }); + test("github", () => { + const output = ` ::error file=packages/core/src/github.ts,line=71,endLine=71,code=concatenation_override::The change on line 71 may lead to the original \`text\` content being overridden instead of appending the footer. Consider using \`text = appendGeneratedComment(script, info, text)\` to ensure the original text is preserved and the footer is appended. 😇 ::error file=packages/core/src/github.ts,line=161,endLine=161,code=concatenation_override::Similarly to the change on line 71, the change on line 161 could override the original \`body\` content. It's safer to use \`body = appendGeneratedComment(script, info, body)\` to append the footer while keeping the existing content intact. 🤔 ::error file=packages/core/src/github.ts,line=140,endLine=141,code=unused_code::The removal of the footer in the \`appendGeneratedComment\` function on lines 140-141 results in unused code. Since \`generatedByFooter\` is now being used to append the footer, the original lines that added the footer in \`appendGeneratedComment\` should be removed to clean up the code. 🧹 - ` - - const diags = parseAnnotations(output) - // console.log(diags) - assert.strictEqual(diags.length, 3) - assert.strictEqual(diags[0].severity, "error") - assert.strictEqual(diags[0].filename, "packages/core/src/github.ts") - assert.strictEqual(diags[0].range[0][0], 70) - assert.strictEqual(diags[0].range[1][0], 70) - assert.strictEqual(diags[0].code, "concatenation_override") - assert.strictEqual( - diags[0].message, - "The change on line 71 may lead to the original `text` content being overridden instead of appending the footer. Consider using `text = appendGeneratedComment(script, info, text)` to ensure the original text is preserved and the footer is appended. 😇" - ) - }) - - test("github:suggestions", () => { - const output = ` + `; + + const diags = parseAnnotations(output); + // console.log(diags) + assert.strictEqual(diags.length, 3); + assert.strictEqual(diags[0].severity, "error"); + assert.strictEqual(diags[0].filename, "packages/core/src/github.ts"); + assert.strictEqual(diags[0].range[0][0], 70); + assert.strictEqual(diags[0].range[1][0], 70); + assert.strictEqual(diags[0].code, "concatenation_override"); + assert.strictEqual( + diags[0].message, + "The change on line 71 may lead to the original `text` content being overridden instead of appending the footer. Consider using `text = appendGeneratedComment(script, info, text)` to ensure the original text is preserved and the footer is appended. 😇", + ); + }); + + test("github:suggestions", () => { + const output = ` ::warning file=packages/sample/src/fib.ts,line=1,endLine=4,code=unimplemented_function::The fibonacci function is unimplemented and currently always returns 0.::function fibonacci(n: number): number { if (n <= 1) return n; return fibonacci(n - 1) + fibonacci(n - 2); } -` - const diags = parseAnnotations(output) - assert.strictEqual(diags.length, 1) - assert.strictEqual( - diags[0].suggestion, - "function fibonacci(n: number): number { if (n <= 1) return n; return fibonacci(n - 1) + fibonacci(n - 2); }" - ) - }) - - test("tsc", () => { - const output = ` +`; + const diags = parseAnnotations(output); + assert.strictEqual(diags.length, 1); + assert.strictEqual( + diags[0].suggestion, + "function fibonacci(n: number): number { if (n <= 1) return n; return fibonacci(n - 1) + fibonacci(n - 2); }", + ); + }); + + test("tsc", () => { + const output = ` $ /workspaces/genaiscript/node_modules/.bin/tsc --noEmit --pretty false -p src src/annotations.ts:11:28 - error TS1005: ',' expected. - ` - - const diags = parseAnnotations(output) - // console.log(diags) - assert.strictEqual(diags.length, 1) - assert.strictEqual(diags[0].severity, "error") - assert.strictEqual(diags[0].filename, "src/annotations.ts") - assert.strictEqual(diags[0].range[0][0], 10) - assert.strictEqual(diags[0].range[1][0], 27) - assert.strictEqual(diags[0].code, "TS1005") - assert.strictEqual(diags[0].message, "',' expected.") - }) - - test("tsc2", () => { - const output = ` + `; + + const diags = parseAnnotations(output); + // console.log(diags) + assert.strictEqual(diags.length, 1); + assert.strictEqual(diags[0].severity, "error"); + assert.strictEqual(diags[0].filename, "src/annotations.ts"); + assert.strictEqual(diags[0].range[0][0], 10); + assert.strictEqual(diags[0].range[1][0], 27); + assert.strictEqual(diags[0].code, "TS1005"); + assert.strictEqual(diags[0].message, "',' expected."); + }); + + test("tsc2", () => { + const output = ` $ /workspaces/genaiscript/node_modules/.bin/tsc --noEmit --pretty false -p src src/connection.ts(69,9): error TS1005: ')' expected. src/connection.ts(71,5): error TS1128: Declaration or statement expected. src/connection.ts(71,6): error TS1128: Declaration or statement expected. info Visit https://yarnpkg.com/en/docs/cli/run for documentation about this command. - ` - const diags = parseAnnotations(output) - assert.strictEqual(diags.length, 3) - assert.strictEqual(diags[0].severity, "error") - assert.strictEqual(diags[0].filename, "src/connection.ts") - assert.strictEqual(diags[0].range[0][0], 68) - assert.strictEqual(diags[0].code, "TS1005") - assert.strictEqual(diags[0].message, "')' expected.") - assert.strictEqual(diags[1].severity, "error") - assert.strictEqual(diags[1].filename, "src/connection.ts") - assert.strictEqual(diags[1].range[0][0], 70) - }) - - test("convertAnnotationsToItems", () => { - const input = ` + `; + const diags = parseAnnotations(output); + assert.strictEqual(diags.length, 3); + assert.strictEqual(diags[0].severity, "error"); + assert.strictEqual(diags[0].filename, "src/connection.ts"); + assert.strictEqual(diags[0].range[0][0], 68); + assert.strictEqual(diags[0].code, "TS1005"); + assert.strictEqual(diags[0].message, "')' expected."); + assert.strictEqual(diags[1].severity, "error"); + assert.strictEqual(diags[1].filename, "src/connection.ts"); + assert.strictEqual(diags[1].range[0][0], 70); + }); + + test("convertAnnotationsToItems", () => { + const input = ` ::warning file=src/greeter.ts,line=2,endLine=2,code=missing_semicolon::Missing semicolon after property declaration. ::warning file=src/greeter.ts,line=5,endLine=5,code=missing_semicolon::Missing semicolon after assignment. ::warning file=src/greeter.ts,line=9,endLine=9,code=missing_semicolon::Missing semicolon after return statement. ::warning file=src/greeter.ts,line=18,endLine=18,code=empty_function::The function 'hello' is empty and should contain logic or be removed if not needed. ::warning file=src/greeter.ts,line=20,endLine=20,code=missing_semicolon::Missing semicolon after variable declaration. - ` - const output = convertAnnotationsToItems(input) - console.log(output) - }) - - test("convertDiagnosticToGitHubActionCommand", () => { - const testCases = [ - { - diagnostic: { - severity: "info", - filename: "src/test.ts", - range: [ - [10, 0], - [10, 25], - ], - message: "This is an informational message", - }, - expected: - "::notice file=src/test.ts, line=10, endLine=10::This is an informational message", - }, - { - diagnostic: { - severity: "warning", - filename: "src/component.tsx", - range: [ - [5, 2], - [8, 15], - ], - message: "Consider using a more specific type", - }, - expected: - "::warning file=src/component.tsx, line=5, endLine=8::Consider using a more specific type", - }, - { - diagnostic: { - severity: "error", - filename: "packages/core/utils.js", - range: [ - [42, 0], - [42, 30], - ], - code: "TS2322", - message: "Type 'string' is not assignable to type 'number'", - }, - expected: - "::error file=packages/core/utils.js, line=42, endLine=42::Type 'string' is not assignable to type 'number'", - }, - ] - - for (const { diagnostic, expected } of testCases) { - const result = convertDiagnosticToGitHubActionCommand( - diagnostic as Diagnostic - ) - assert.strictEqual(result, expected) - } - }) - - test("convertGithubMarkdownAnnotationsToItemsCaution", () => { - const input = `> [!CAUTION] + `; + const output = convertAnnotationsToItems(input); + console.log(output); + }); + + test("convertDiagnosticToGitHubActionCommand", () => { + const testCases = [ + { + diagnostic: { + severity: "info", + filename: "src/test.ts", + range: [ + [10, 0], + [10, 25], + ], + message: "This is an informational message", + }, + expected: + "::notice file=src/test.ts, line=10, endLine=10::This is an informational message", + }, + { + diagnostic: { + severity: "warning", + filename: "src/component.tsx", + range: [ + [5, 2], + [8, 15], + ], + message: "Consider using a more specific type", + }, + expected: + "::warning file=src/component.tsx, line=5, endLine=8::Consider using a more specific type", + }, + { + diagnostic: { + severity: "error", + filename: "packages/core/utils.js", + range: [ + [42, 0], + [42, 30], + ], + code: "TS2322", + message: "Type 'string' is not assignable to type 'number'", + }, + expected: + "::error file=packages/core/utils.js, line=42, endLine=42::Type 'string' is not assignable to type 'number'", + }, + ]; + + for (const { diagnostic, expected } of testCases) { + const result = convertDiagnosticToGitHubActionCommand(diagnostic as Diagnostic); + assert.strictEqual(result, expected); + } + }); + + test("convertGithubMarkdownAnnotationsToItemsCaution", () => { + const input = `> [!CAUTION] > This operation cannot be undone. -` +`; - const expected = `- ${EMOJI_FAIL} This operation cannot be undone. -` + const expected = `- ${EMOJI_FAIL} This operation cannot be undone. +`; - const result = convertGithubMarkdownAnnotationsToItems(input) - assert.strictEqual(result, expected) - }) + const result = convertGithubMarkdownAnnotationsToItems(input); + assert.strictEqual(result, expected); + }); - test("convertGithubMarkdownAnnotationsToItems", () => { - const input = ` + test("convertGithubMarkdownAnnotationsToItems", () => { + const input = ` > [!WARNING] > This component will be deprecated in the next major version. @@ -170,20 +168,20 @@ Some normal text here. > [!NOTE] > Remember to update your dependencies. -` +`; - const expected = `- ${EMOJI_WARNING} This component will be deprecated in the next major version. + const expected = `- ${EMOJI_WARNING} This component will be deprecated in the next major version. Some normal text here. - ℹ️ Remember to update your dependencies. -` +`; - const result = convertGithubMarkdownAnnotationsToItems(input) - assert.strictEqual(result, expected) - }) + const result = convertGithubMarkdownAnnotationsToItems(input); + assert.strictEqual(result, expected); + }); - test("convertGithubMarkdownAnnotationsToItems2", () => { - const input = ` + test("convertGithubMarkdownAnnotationsToItems2", () => { + const input = ` > [!WARNING] > This component will be deprecated in the next major version. @@ -194,16 +192,16 @@ Some normal text here. > [!CAUTION] > This operation cannot be undone. -` +`; - const expected = `- ${EMOJI_WARNING} This component will be deprecated in the next major version. + const expected = `- ${EMOJI_WARNING} This component will be deprecated in the next major version. Some normal text here. - ℹ️ Remember to update your dependencies. - ${EMOJI_FAIL} This operation cannot be undone. -` +`; - const result = convertGithubMarkdownAnnotationsToItems(input) - assert.strictEqual(result, expected) - }) -}) + const result = convertGithubMarkdownAnnotationsToItems(input); + assert.strictEqual(result, expected); + }); +}); diff --git a/packages/core/src/annotations.ts b/packages/core/src/annotations.ts index d2c5476303..6babddca2d 100644 --- a/packages/core/src/annotations.ts +++ b/packages/core/src/annotations.ts @@ -4,61 +4,61 @@ * of annotations into different formats for integration with CI/CD tools. */ -import { deleteUndefinedValues } from "./cleaners" -import { EMOJI_FAIL, EMOJI_WARNING } from "./constants" -import { unfence } from "./unwrappers" +import { deleteUndefinedValues } from "./cleaners"; +import { EMOJI_FAIL, EMOJI_WARNING } from "./constants"; +import { unfence } from "./unwrappers"; // Regular expression for matching GitHub Actions annotations. // Example: ::error file=foo.js,line=10,endLine=11::Something went wrong. const GITHUB_ANNOTATIONS_RX = - /^\s*::(?notice|warning|error)\s*file=(?[^,]+),\s*line=(?\d+),\s*endLine=(?\d+)\s*(,\s*code=(?[^,:]+)?\s*)?::(?.*?)(?:::(?.*?))?$/gim + /^\s*::(?notice|warning|error)\s*file=(?[^,]+),\s*line=(?\d+),\s*endLine=(?\d+)\s*(,\s*code=(?[^,:]+)?\s*)?::(?.*?)(?:::(?.*?))?$/gim; // Regular expression for matching Azure DevOps annotations. // Example: ##vso[task.logissue type=warning;sourcepath=foo.cs;linenumber=1;]Found something. const AZURE_DEVOPS_ANNOTATIONS_RX = - /^\s*##vso\[task.logissue\s+type=(?error|warning);sourcepath=(?);linenumber=(?\d+)(;code=(?\d+);)?[^\]]*\](?.*)$/gim + /^\s*##vso\[task.logissue\s+type=(?error|warning);sourcepath=(?);linenumber=(?\d+)(;code=(?\d+);)?[^\]]*\](?.*)$/gim; // Regular expression for matching TypeScript build annotations. // Example: // foo.ts:10:error TS1005: ';' expected. const TYPESCRIPT_ANNOTATIONS_RX = - /^(?[^:\s\n].+?):(?\d+)(?::(?\d+))?(?::\d+)?\s+-\s+(?error|warning)\s+(?[^:]+)\s*:\s*(?.*)$/gim + /^(?[^:\s\n].+?):(?\d+)(?::(?\d+))?(?::\d+)?\s+-\s+(?error|warning)\s+(?[^:]+)\s*:\s*(?.*)$/gim; // Regular expression for matching GitHub Flavored Markdown style warnings. // Example: > [!WARNING] // > This is a warning message. const GITHUB_MARKDOWN_WARNINGS_RX = - /^\s*>\s*\[!(?NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]\s*\n>\s*(?.+)(?:\s*\n>\s*.*?)*?$/gim + /^\s*>\s*\[!(?NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]\s*\n>\s*(?.+)(?:\s*\n>\s*.*?)*?$/gim; // Regular expression for TypeScript compiler errors with parentheses format // Example: src/connection.ts(71,5): error TS1128: Declaration or statement expected. // src/connection.ts(71,5): error TS1128: Declaration or statement expected. const TYPESCRIPT_PARENTHESES_ANNOTATIONS_RX = - /^(?[^\(\n]+)\((?\d+),(?\d+)\):\s+(?error|warning)\s+(?TS\d+):\s+(?.+)$/gim + /^(?[^\(\n]+)\((?\d+),(?\d+)\):\s+(?error|warning)\s+(?TS\d+):\s+(?.+)$/gim; const ANNOTATIONS_RX = [ - TYPESCRIPT_PARENTHESES_ANNOTATIONS_RX, - TYPESCRIPT_ANNOTATIONS_RX, - GITHUB_ANNOTATIONS_RX, - AZURE_DEVOPS_ANNOTATIONS_RX, -] + TYPESCRIPT_PARENTHESES_ANNOTATIONS_RX, + TYPESCRIPT_ANNOTATIONS_RX, + GITHUB_ANNOTATIONS_RX, + AZURE_DEVOPS_ANNOTATIONS_RX, +]; // Maps severity strings to `DiagnosticSeverity`. const SEV_MAP: Record = Object.freeze({ - ["info"]: "info", - ["tip"]: "info", - ["notice"]: "info", // Maps 'notice' to 'info' severity - ["note"]: "info", - ["warning"]: "warning", - ["caution"]: "error", - ["error"]: "error", -}) + ["info"]: "info", + ["tip"]: "info", + ["notice"]: "info", // Maps 'notice' to 'info' severity + ["note"]: "info", + ["warning"]: "warning", + ["caution"]: "error", + ["error"]: "error", +}); const SEV_EMOJI_MAP: Record = Object.freeze({ - ["info"]: "ℹ️", - ["notice"]: "ℹ️", // Maps 'notice' to 'info' severity - ["warning"]: EMOJI_WARNING, - ["error"]: EMOJI_FAIL, -}) + ["info"]: "ℹ️", + ["notice"]: "ℹ️", // Maps 'notice' to 'info' severity + ["warning"]: EMOJI_WARNING, + ["error"]: EMOJI_FAIL, +}); /** * Parses annotations from TypeScript, GitHub Actions, and Azure DevOps. @@ -68,36 +68,35 @@ const SEV_EMOJI_MAP: Record = Object.freeze({ * @returns Array of unique Diagnostic objects extracted from the input text. */ export function parseAnnotations(text: string): Diagnostic[] { - if (!text) return [] + if (!text) return []; - // Helper function to add an annotation to the set. - // Extracts groups from the regex match and constructs a `Diagnostic` object. - const addAnnotation = (m: RegExpMatchArray) => { - const { file, line, endLine, severity, code, message, suggestion } = - m.groups - const annotation: Diagnostic = { - severity: SEV_MAP[severity?.toLowerCase()] ?? "info", // Default to "info" if severity is missing - filename: file, - range: [ - [parseInt(line) - 1, 0], // Start of range, 0-based index - [parseInt(endLine) - 1, Number.MAX_VALUE], // End of range, max value for columns - ], - message: unfence(message, ["markdown", "md", "text"]), - code, - suggestion, - } - annotations.add(annotation) // Add the constructed annotation to the set - } + // Helper function to add an annotation to the set. + // Extracts groups from the regex match and constructs a `Diagnostic` object. + const addAnnotation = (m: RegExpMatchArray) => { + const { file, line, endLine, severity, code, message, suggestion } = m.groups; + const annotation: Diagnostic = { + severity: SEV_MAP[severity?.toLowerCase()] ?? "info", // Default to "info" if severity is missing + filename: file, + range: [ + [parseInt(line) - 1, 0], // Start of range, 0-based index + [parseInt(endLine) - 1, Number.MAX_VALUE], // End of range, max value for columns + ], + message: unfence(message, ["markdown", "md", "text"]), + code, + suggestion, + }; + annotations.add(annotation); // Add the constructed annotation to the set + }; - // Set to store unique annotations. - const annotations = new Set() + // Set to store unique annotations. + const annotations = new Set(); - // Match against TypeScript, GitHub, and Azure DevOps regex patterns. - for (const rx of ANNOTATIONS_RX) { - for (const m of text.matchAll(rx)) addAnnotation(m) - } + // Match against TypeScript, GitHub, and Azure DevOps regex patterns. + for (const rx of ANNOTATIONS_RX) { + for (const m of text.matchAll(rx)) addAnnotation(m); + } - return Array.from(annotations.values()) // Convert the set to an array + return Array.from(annotations.values()); // Convert the set to an array } /** @@ -110,7 +109,7 @@ export function parseAnnotations(text: string): Diagnostic[] { * @returns A new string with all annotations stripped from the input text. */ export function eraseAnnotations(text: string) { - return ANNOTATIONS_RX.reduce((t, rx) => t.replace(rx, ""), text) + return ANNOTATIONS_RX.reduce((t, rx) => t.replace(rx, ""), text); } /** @@ -127,56 +126,48 @@ export function eraseAnnotations(text: string) { * @returns A string where matched annotations are replaced with formatted items. */ export function convertAnnotationsToItems(text: string) { - return convertGithubMarkdownAnnotationsToItems( - ANNOTATIONS_RX.reduce( - (t, rx) => - t.replace(rx, (s, ...args) => { - const groups = args.at(-1) - const { - file, - line, - endLine, - severity, - code, - message, - suggestion, - } = groups - const d = deleteUndefinedValues({ - severity: SEV_MAP[severity?.toLowerCase()] ?? "info", - filename: file, - range: [ - [parseInt(line) - 1, 0], // Start of range, 0-based index - [parseInt(endLine) - 1, Number.MAX_VALUE], // End of range, max value for columns - ], - code, - message, - suggestion, - }) satisfies Diagnostic - return convertAnnotationToItem(d) - }), - text - ) - ) -} - -export function convertGithubMarkdownAnnotationsToItems(text: string) { - return text?.replace(GITHUB_MARKDOWN_WARNINGS_RX, (s, ...args) => { - const groups = args.at(-1) - const { severity, message, suggestion } = groups - const sev = SEV_MAP[severity?.toLowerCase()] ?? "info" - const d = deleteUndefinedValues({ - severity: sev, - filename: "", + return convertGithubMarkdownAnnotationsToItems( + ANNOTATIONS_RX.reduce( + (t, rx) => + t.replace(rx, (s, ...args) => { + const groups = args.at(-1); + const { file, line, endLine, severity, code, message, suggestion } = groups; + const d = deleteUndefinedValues({ + severity: SEV_MAP[severity?.toLowerCase()] ?? "info", + filename: file, range: [ - [0, 0], // Start of range, 0-based index - [0, Number.MAX_VALUE], // End of range, max value for columns + [parseInt(line) - 1, 0], // Start of range, 0-based index + [parseInt(endLine) - 1, Number.MAX_VALUE], // End of range, max value for columns ], - code: "", + code, message, suggestion, - }) satisfies Diagnostic - return convertAnnotationToItem(d) - }) + }) satisfies Diagnostic; + return convertAnnotationToItem(d); + }), + text, + ), + ); +} + +export function convertGithubMarkdownAnnotationsToItems(text: string) { + return text?.replace(GITHUB_MARKDOWN_WARNINGS_RX, (s, ...args) => { + const groups = args.at(-1); + const { severity, message, suggestion } = groups; + const sev = SEV_MAP[severity?.toLowerCase()] ?? "info"; + const d = deleteUndefinedValues({ + severity: sev, + filename: "", + range: [ + [0, 0], // Start of range, 0-based index + [0, Number.MAX_VALUE], // End of range, max value for columns + ], + code: "", + message, + suggestion, + }) satisfies Diagnostic; + return convertAnnotationToItem(d); + }); } /** @@ -192,9 +183,9 @@ export function convertGithubMarkdownAnnotationsToItems(text: string) { * @returns A formatted string representing the Diagnostic as a list item. */ export function convertAnnotationToItem(d: Diagnostic) { - const { severity, message, filename, code, range } = d - const line = range?.[0]?.[0] - return `- ${SEV_EMOJI_MAP[severity?.toLowerCase()] ?? "info"} ${message}${filename ? ` (\`${filename}${line ? `#L${line}` : ""}\`)` : ""}` + const { severity, message, filename, code, range } = d; + const line = range?.[0]?.[0]; + return `- ${SEV_EMOJI_MAP[severity?.toLowerCase()] ?? "info"} ${message}${filename ? ` (\`${filename}${line ? `#L${line}` : ""}\`)` : ""}`; } /** @@ -205,15 +196,15 @@ export function convertAnnotationToItem(d: Diagnostic) { * @returns A formatted GitHub Action command string including severity, filename, line, endLine, and message. */ export function convertDiagnosticToGitHubActionCommand(d: Diagnostic) { - // Maps DiagnosticSeverity to GitHub Action severity strings. - const sevMap: Record = { - ["info"]: "notice", // Maps 'info' to 'notice' - ["warning"]: "warning", - ["error"]: "error", - } + // Maps DiagnosticSeverity to GitHub Action severity strings. + const sevMap: Record = { + ["info"]: "notice", // Maps 'info' to 'notice' + ["warning"]: "warning", + ["error"]: "error", + }; - // Construct GitHub Action command string with necessary details. - return `::${sevMap[d.severity] || d.severity} file=${d.filename}, line=${d.range[0][0]}, endLine=${d.range[1][0]}::${d.message}` + // Construct GitHub Action command string with necessary details. + return `::${sevMap[d.severity] || d.severity} file=${d.filename}, line=${d.range[0][0]}, endLine=${d.range[1][0]}::${d.message}`; } /** @@ -223,31 +214,31 @@ export function convertDiagnosticToGitHubActionCommand(d: Diagnostic) { * @returns Formatted Azure DevOps command string for warnings and errors. For "info" severity, returns a debug message with filename and message. */ export function convertDiagnosticToAzureDevOpsCommand(d: Diagnostic) { - // Handle 'info' severity separately with a debug message. - if (d.severity === "info") return `##[debug]${d.message} at ${d.filename}` - // Construct Azure DevOps command string with necessary details. - else - return `##vso[task.logissue type=${d.severity};sourcepath=${d.filename};linenumber=${d.range[0][0]}]${d.message}` + // Handle 'info' severity separately with a debug message. + if (d.severity === "info") return `##[debug]${d.message} at ${d.filename}`; + // Construct Azure DevOps command string with necessary details. + else + return `##vso[task.logissue type=${d.severity};sourcepath=${d.filename};linenumber=${d.range[0][0]}]${d.message}`; } const severities: Record = { - error: "CAUTION", - warning: "WARNING", - notice: "NOTE", -} + error: "CAUTION", + warning: "WARNING", + notice: "NOTE", +}; export function diagnosticToGitHubMarkdown( - info: { owner: string; repo: string; commitSha?: string }, - d: Diagnostic + info: { owner: string; repo: string; commitSha?: string }, + d: Diagnostic, ) { - const { owner, repo, commitSha } = info - const { severity, message, filename, suggestion, code, range } = d - const file = filename - const line = range?.[0]?.[0] - return `> [!${severities[severity] || severity}] + const { owner, repo, commitSha } = info; + const { severity, message, filename, suggestion, code, range } = d; + const file = filename; + const line = range?.[0]?.[0]; + return `> [!${severities[severity] || severity}] > ${message} > [${file}#L${line}](/${owner}/${repo}/blob/${commitSha}/${file}#L${line})${code ? ` \`${code}\`` : ""} ${suggestion ? `\`\`\`suggestion\n${suggestion}\n\`\`\`\n` : ""} -` +`; } /** @@ -259,37 +250,34 @@ ${suggestion ? `\`\`\`suggestion\n${suggestion}\n\`\`\`\n` : ""} * @returns Formatted Markdown string with severity levels mapped to admonitions, including file, line references, and optional codes. */ export function convertAnnotationsToMarkdown(text: string): string { - // Maps severity levels to Markdown admonition types. - const severities: Record = { - error: "CAUTION", - warning: "WARNING", - notice: "NOTE", - } - // Replace GitHub and Azure DevOps annotations with Markdown format. - return text - ?.replace( - GITHUB_ANNOTATIONS_RX, - ( - _, - severity, - file, - line, - endLine, - __, - code, - message, - suggestion - ) => `> [!${severities[severity] || severity}] + // Maps severity levels to Markdown admonition types. + const severities: Record = { + error: "CAUTION", + warning: "WARNING", + notice: "NOTE", + }; + // Replace GitHub and Azure DevOps annotations with Markdown format. + return text + ?.replace( + GITHUB_ANNOTATIONS_RX, + ( + _, + severity, + file, + line, + endLine, + __, + code, + message, + suggestion, + ) => `> [!${severities[severity] || severity}] > ${message} (${file}#L${line} ${code || ""}) ${suggestion ? `\`\`\`suggestion\n${suggestion}\n\`\`\`\n` : ""} -` - ) - ?.replace( - AZURE_DEVOPS_ANNOTATIONS_RX, - (_, severity, file, line, __, code, message) => { - return `> [!${severities[severity] || severity}] ${message} +`, + ) + ?.replace(AZURE_DEVOPS_ANNOTATIONS_RX, (_, severity, file, line, __, code, message) => { + return `> [!${severities[severity] || severity}] ${message} > ${message} (${file}#L${line} ${code || ""}) -` - } - ) +`; + }); } diff --git a/packages/core/src/anthropic.ts b/packages/core/src/anthropic.ts index a1f97b5a63..05b9cff97d 100644 --- a/packages/core/src/anthropic.ts +++ b/packages/core/src/anthropic.ts @@ -1,619 +1,568 @@ +import { ChatCompletionHandler, LanguageModel, ListModelsFunction } from "./chat"; import { - ChatCompletionHandler, - LanguageModel, - ListModelsFunction, -} from "./chat" -import { - ANTHROPIC_MAX_TOKEN, - MODEL_PROVIDER_ANTHROPIC, - MODEL_PROVIDER_ANTHROPIC_BEDROCK, -} from "./constants" -import { parseModelIdentifier } from "./models" -import { NotSupportedError, serializeError } from "./error" -import { approximateTokens } from "./tokens" -import { resolveTokenEncoder } from "./encoders" -import type { Anthropic } from "@anthropic-ai/sdk" + ANTHROPIC_MAX_TOKEN, + MODEL_PROVIDER_ANTHROPIC, + MODEL_PROVIDER_ANTHROPIC_BEDROCK, +} from "./constants"; +import { parseModelIdentifier } from "./models"; +import { NotSupportedError, serializeError } from "./error"; +import { approximateTokens } from "./tokens"; +import { resolveTokenEncoder } from "./encoders"; +import type { Anthropic } from "@anthropic-ai/sdk"; import { - ChatCompletionResponse, - ChatCompletionToolCall, - ChatCompletionUsage, - ChatCompletionMessageParam, - ChatCompletionAssistantMessageParam, - ChatCompletionUserMessageParam, - ChatCompletionTool, - ChatFinishReason, - ChatCompletionContentPartImage, - ChatCompletionSystemMessageParam, - ChatCompletionToolMessageParam, - ChatCompletionContentPart, - ChatCompletionContentPartRefusal, - ChatCompletionsProgressReport, -} from "./chattypes" + ChatCompletionResponse, + ChatCompletionToolCall, + ChatCompletionUsage, + ChatCompletionMessageParam, + ChatCompletionAssistantMessageParam, + ChatCompletionUserMessageParam, + ChatCompletionTool, + ChatFinishReason, + ChatCompletionContentPartImage, + ChatCompletionSystemMessageParam, + ChatCompletionToolMessageParam, + ChatCompletionContentPart, + ChatCompletionContentPartRefusal, + ChatCompletionsProgressReport, +} from "./chattypes"; -import { logError } from "./util" -import { resolveHttpProxyAgent } from "./proxy" -import { ProxyAgent } from "undici" -import { MarkdownTrace } from "./trace" -import { createFetch, FetchType } from "./fetch" -import { JSONLLMTryParse } from "./json5" -import { LanguageModelConfiguration } from "./server/messages" -import { deleteUndefinedValues } from "./cleaners" -import debug from "debug" -import { providerFeatures } from "./features" -const dbg = debug("genaiscript:anthropic") -const dbgMessages = debug("genaiscript:anthropic:msg") +import { logError } from "./util"; +import { resolveHttpProxyAgent } from "./proxy"; +import { ProxyAgent } from "undici"; +import { MarkdownTrace } from "./trace"; +import { createFetch, FetchType } from "./fetch"; +import { JSONLLMTryParse } from "./json5"; +import { LanguageModelConfiguration } from "./server/messages"; +import { deleteUndefinedValues } from "./cleaners"; +import debug from "debug"; +import { providerFeatures } from "./features"; +const dbg = debug("genaiscript:anthropic"); +const dbgMessages = debug("genaiscript:anthropic:msg"); -const convertFinishReason = ( - stopReason: Anthropic.Message["stop_reason"] -): ChatFinishReason => { - switch (stopReason) { - case "end_turn": - return "stop" - case "max_tokens": - return "length" - case "stop_sequence": - return "stop" - case "tool_use": - return "tool_calls" - default: - return undefined - } -} +const convertFinishReason = (stopReason: Anthropic.Message["stop_reason"]): ChatFinishReason => { + switch (stopReason) { + case "end_turn": + return "stop"; + case "max_tokens": + return "length"; + case "stop_sequence": + return "stop"; + case "tool_use": + return "tool_calls"; + default: + return undefined; + } +}; const convertUsage = ( - usage: Anthropic.Messages.Usage | undefined + usage: Anthropic.Messages.Usage | undefined, ): ChatCompletionUsage | undefined => { - if (!usage) return undefined - const res = { - prompt_tokens: - usage.input_tokens + - (usage.cache_creation_input_tokens || 0) + - (usage.cache_read_input_tokens || 0), - completion_tokens: usage.output_tokens, - total_tokens: usage.input_tokens + usage.output_tokens, - } as ChatCompletionUsage - if (usage.cache_read_input_tokens) - res.prompt_tokens_details = { - cached_tokens: usage.cache_read_input_tokens, - } - return res -} + if (!usage) return undefined; + const res = { + prompt_tokens: + usage.input_tokens + + (usage.cache_creation_input_tokens || 0) + + (usage.cache_read_input_tokens || 0), + completion_tokens: usage.output_tokens, + total_tokens: usage.input_tokens + usage.output_tokens, + } as ChatCompletionUsage; + if (usage.cache_read_input_tokens) + res.prompt_tokens_details = { + cached_tokens: usage.cache_read_input_tokens, + }; + return res; +}; const adjustUsage = ( - usage: ChatCompletionUsage, - outputTokens: Anthropic.MessageDeltaUsage + usage: ChatCompletionUsage, + outputTokens: Anthropic.MessageDeltaUsage, ): ChatCompletionUsage => { - return { - ...usage, - completion_tokens: usage.completion_tokens + outputTokens.output_tokens, - total_tokens: usage.total_tokens + outputTokens.output_tokens, - } -} + return { + ...usage, + completion_tokens: usage.completion_tokens + outputTokens.output_tokens, + total_tokens: usage.total_tokens + outputTokens.output_tokens, + }; +}; const convertMessages = ( - messages: ChatCompletionMessageParam[], - emitThinking: boolean + messages: ChatCompletionMessageParam[], + emitThinking: boolean, ): Anthropic.MessageParam[] => { - const res: Anthropic.MessageParam[] = [] - dbgMessages(`converting %d messages`, messages.length) - for (let i = 0; i < messages.length; ++i) { - const message = messages[i] - const msg = convertSingleMessage(message, emitThinking) - if (msg.content === "") { - dbgMessages(`empty message`, msg) - continue // no message - } - const last = res.at(-1) - if (last?.role !== msg.role) res.push(msg) - else { - if (typeof last.content === "string") - last.content = [ - { - type: "text", - text: last.content, - }, - ] - if (typeof msg.content === "string") - last.content.push({ type: "text", text: msg.content }) - else last.content.push(...msg.content) - } + const res: Anthropic.MessageParam[] = []; + dbgMessages(`converting %d messages`, messages.length); + for (let i = 0; i < messages.length; ++i) { + const message = messages[i]; + const msg = convertSingleMessage(message, emitThinking); + if (msg.content === "") { + dbgMessages(`empty message`, msg); + continue; // no message } + const last = res.at(-1); + if (last?.role !== msg.role) res.push(msg); + else { + if (typeof last.content === "string") + last.content = [ + { + type: "text", + text: last.content, + }, + ]; + if (typeof msg.content === "string") last.content.push({ type: "text", text: msg.content }); + else last.content.push(...msg.content); + } + } - // filter out empty text messages - return res.filter((msg) => - Array.isArray(msg.content) ? msg.content.length > 0 : msg.content !== "" - ) -} + // filter out empty text messages + return res.filter((msg) => + Array.isArray(msg.content) ? msg.content.length > 0 : msg.content !== "", + ); +}; const convertSingleMessage = ( - msg: ChatCompletionMessageParam, - emitThinking: boolean + msg: ChatCompletionMessageParam, + emitThinking: boolean, ): Anthropic.MessageParam => { - const { role } = msg - if (!role) { - return { - role: "user", - content: [{ type: "text", text: JSON.stringify(msg) }], - } - } else if (msg.role === "assistant") { - return convertAssistantMessage(msg, emitThinking) - } else if (role === "tool") { - return convertToolResultMessage(msg) - } else if (role === "function") - throw new NotSupportedError("function message not supported") + const { role } = msg; + if (!role) { + return { + role: "user", + content: [{ type: "text", text: JSON.stringify(msg) }], + }; + } else if (msg.role === "assistant") { + return convertAssistantMessage(msg, emitThinking); + } else if (role === "tool") { + return convertToolResultMessage(msg); + } else if (role === "function") throw new NotSupportedError("function message not supported"); - return convertStandardMessage(msg) -} + return convertStandardMessage(msg); +}; function toCacheControl(msg: ChatCompletionMessageParam): { - type: "ephemeral" + type: "ephemeral"; } { - return msg.cacheControl === "ephemeral" ? { type: "ephemeral" } : undefined + return msg.cacheControl === "ephemeral" ? { type: "ephemeral" } : undefined; } const convertAssistantMessage = ( - msg: ChatCompletionAssistantMessageParam, - emitThinking: boolean + msg: ChatCompletionAssistantMessageParam, + emitThinking: boolean, ): Anthropic.MessageParam => { - return { - role: "assistant", - content: [ - msg.reasoning_content && emitThinking - ? ({ - type: "thinking", - thinking: msg.reasoning_content, - signature: msg.signature, - } satisfies Anthropic.ThinkingBlockParam) - : undefined, - ...((convertStandardMessage(msg)?.content || []) as any), - ...(msg.tool_calls || []).map( - (tool) => - deleteUndefinedValues({ - type: "tool_use", - id: tool.id, - input: JSONLLMTryParse(tool.function.arguments), - name: tool.function.name, - cache_control: toCacheControl(msg), - }) satisfies Anthropic.ToolUseBlockParam - ), - ].filter((x) => !!x), - } -} + return { + role: "assistant", + content: [ + msg.reasoning_content && emitThinking + ? ({ + type: "thinking", + thinking: msg.reasoning_content, + signature: msg.signature, + } satisfies Anthropic.ThinkingBlockParam) + : undefined, + ...((convertStandardMessage(msg)?.content || []) as any), + ...(msg.tool_calls || []).map( + (tool) => + deleteUndefinedValues({ + type: "tool_use", + id: tool.id, + input: JSONLLMTryParse(tool.function.arguments), + name: tool.function.name, + cache_control: toCacheControl(msg), + }) satisfies Anthropic.ToolUseBlockParam, + ), + ].filter((x) => !!x), + }; +}; -const convertToolResultMessage = ( - msg: ChatCompletionToolMessageParam -): Anthropic.MessageParam => { - return { - role: "user", - content: [ - deleteUndefinedValues({ - type: "tool_result", - tool_use_id: msg.tool_call_id, - content: msg.content, - cache_control: toCacheControl(msg), - } satisfies Anthropic.ToolResultBlockParam), - ], - } -} +const convertToolResultMessage = (msg: ChatCompletionToolMessageParam): Anthropic.MessageParam => { + return { + role: "user", + content: [ + deleteUndefinedValues({ + type: "tool_result", + tool_use_id: msg.tool_call_id, + content: msg.content, + cache_control: toCacheControl(msg), + } satisfies Anthropic.ToolResultBlockParam), + ], + }; +}; const convertBlockParam = ( - block: ChatCompletionContentPart | ChatCompletionContentPartRefusal, - cache_control?: { type: "ephemeral" } + block: ChatCompletionContentPart | ChatCompletionContentPartRefusal, + cache_control?: { type: "ephemeral" }, ) => { - if (typeof block === "string") { - return { - type: "text", - text: block, - cache_control, - } satisfies Anthropic.TextBlockParam - } else if (block.type === "text") { - if (!block.text) return undefined - return { - type: "text", - text: block.text, - cache_control, - } satisfies Anthropic.TextBlockParam - } else if (block.type === "image_url") { - return convertImageUrlBlock(block) - } - // audio? - // Handle other types or return a default - else - return { - type: "text", - text: JSON.stringify(block), - } satisfies Anthropic.TextBlockParam -} + if (typeof block === "string") { + return { + type: "text", + text: block, + cache_control, + } satisfies Anthropic.TextBlockParam; + } else if (block.type === "text") { + if (!block.text) return undefined; + return { + type: "text", + text: block.text, + cache_control, + } satisfies Anthropic.TextBlockParam; + } else if (block.type === "image_url") { + return convertImageUrlBlock(block); + } + // audio? + // Handle other types or return a default + else + return { + type: "text", + text: JSON.stringify(block), + } satisfies Anthropic.TextBlockParam; +}; const convertStandardMessage = ( - msg: - | ChatCompletionSystemMessageParam - | ChatCompletionAssistantMessageParam - | ChatCompletionUserMessageParam + msg: + | ChatCompletionSystemMessageParam + | ChatCompletionAssistantMessageParam + | ChatCompletionUserMessageParam, ): Anthropic.MessageParam => { - const role = msg.role === "assistant" ? "assistant" : "user" - let res: Anthropic.MessageParam - if (Array.isArray(msg.content)) { - const cache_control = toCacheControl(msg) - res = { - role, - content: msg.content - .map((block) => convertBlockParam(block, cache_control)) - .filter((t) => !!t) - .map(deleteUndefinedValues), - } - } else if (typeof msg.content === "string") { - res = { - role, - content: [ - deleteUndefinedValues({ - type: "text", - text: msg.content, - cache_control: toCacheControl(msg), - }) satisfies Anthropic.TextBlockParam, - ], - } - } + const role = msg.role === "assistant" ? "assistant" : "user"; + let res: Anthropic.MessageParam; + if (Array.isArray(msg.content)) { + const cache_control = toCacheControl(msg); + res = { + role, + content: msg.content + .map((block) => convertBlockParam(block, cache_control)) + .filter((t) => !!t) + .map(deleteUndefinedValues), + }; + } else if (typeof msg.content === "string") { + res = { + role, + content: [ + deleteUndefinedValues({ + type: "text", + text: msg.content, + cache_control: toCacheControl(msg), + }) satisfies Anthropic.TextBlockParam, + ], + }; + } - return res -} + return res; +}; -const convertImageUrlBlock = ( - block: ChatCompletionContentPartImage -): Anthropic.ImageBlockParam => { - return { - type: "image", - source: { - type: "base64", - media_type: block.image_url.url.startsWith("data:image/png") - ? "image/png" - : "image/jpeg", - data: block.image_url.url.split(",")[1], - }, - } -} +const convertImageUrlBlock = (block: ChatCompletionContentPartImage): Anthropic.ImageBlockParam => { + return { + type: "image", + source: { + type: "base64", + media_type: block.image_url.url.startsWith("data:image/png") ? "image/png" : "image/jpeg", + data: block.image_url.url.split(",")[1], + }, + }; +}; -const convertTools = ( - tools?: ChatCompletionTool[] -): Anthropic.Messages.Tool[] | undefined => { - if (!tools) return undefined - return tools.map( - (tool) => - ({ - name: tool.function.name, - description: tool.function.description, - input_schema: { - type: "object", - ...tool.function.parameters, - }, - }) satisfies Anthropic.Messages.Tool - ) -} +const convertTools = (tools?: ChatCompletionTool[]): Anthropic.Messages.Tool[] | undefined => { + if (!tools) return undefined; + return tools.map( + (tool) => + ({ + name: tool.function.name, + description: tool.function.description, + input_schema: { + type: "object", + ...tool.function.parameters, + }, + }) satisfies Anthropic.Messages.Tool, + ); +}; const completerFactory = ( - resolver: ( - trace: MarkdownTrace, - cfg: LanguageModelConfiguration, - httpAgent: ProxyAgent, - fetch: FetchType - ) => Promise> + resolver: ( + trace: MarkdownTrace, + cfg: LanguageModelConfiguration, + httpAgent: ProxyAgent, + fetch: FetchType, + ) => Promise>, ) => { - const completion: ChatCompletionHandler = async ( - req, - cfg, - options, - trace - ) => { - const { - requestOptions, - partialCb, - cancellationToken, - inner, - retry, - maxDelay, - retryDelay, - } = options - const { headers } = requestOptions || {} - const { provider, model, reasoningEffort } = parseModelIdentifier( - req.model - ) - const { encode: encoder } = await resolveTokenEncoder(model) + const completion: ChatCompletionHandler = async (req, cfg, options, trace) => { + const { requestOptions, partialCb, cancellationToken, inner, retry, maxDelay, retryDelay } = + options; + const { headers } = requestOptions || {}; + const { provider, model, reasoningEffort } = parseModelIdentifier(req.model); + const { encode: encoder } = await resolveTokenEncoder(model); - const fetch = await createFetch({ - trace, - retries: retry, - retryDelay, - maxDelay, - cancellationToken, - }) - // https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#how-to-implement-prompt-caching - const caching = - /sonnet|haiku|opus/i.test(model) && - req.messages.some((m) => m.cacheControl === "ephemeral") - const httpAgent = resolveHttpProxyAgent() - const messagesApi = await resolver(trace, cfg, httpAgent, fetch) - dbg("caching", caching) - trace.itemValue(`caching`, caching) + const fetch = await createFetch({ + trace, + retries: retry, + retryDelay, + maxDelay, + cancellationToken, + }); + // https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#how-to-implement-prompt-caching + const caching = + /sonnet|haiku|opus/i.test(model) && req.messages.some((m) => m.cacheControl === "ephemeral"); + const httpAgent = resolveHttpProxyAgent(); + const messagesApi = await resolver(trace, cfg, httpAgent, fetch); + dbg("caching", caching); + trace.itemValue(`caching`, caching); - let numTokens = 0 - let chatResp = "" - let reasoningChatResp = "" - let signature = "" - let finishReason: ChatCompletionResponse["finishReason"] - let usage: ChatCompletionResponse["usage"] | undefined - const toolCalls: ChatCompletionToolCall[] = [] - const tools = convertTools(req.tools) + let numTokens = 0; + let chatResp = ""; + let reasoningChatResp = ""; + let signature = ""; + let finishReason: ChatCompletionResponse["finishReason"]; + let usage: ChatCompletionResponse["usage"] | undefined; + const toolCalls: ChatCompletionToolCall[] = []; + const tools = convertTools(req.tools); - let temperature = req.temperature - let top_p = req.top_p - let tool_choice: Anthropic.Beta.MessageCreateParams["tool_choice"] = - req.tool_choice === "auto" - ? { type: "auto" } - : req.tool_choice === "none" - ? { type: "none" } - : req.tool_choice !== "required" && - typeof req.tool_choice === "object" - ? { - type: "tool", - name: req.tool_choice.function.name, - } - : undefined - let thinking: Anthropic.ThinkingConfigParam = undefined - const reasoningEfforts = providerFeatures(provider)?.reasoningEfforts - const budget_tokens = - reasoningEfforts[req.reasoning_effort || reasoningEffort] - let max_tokens = req.max_tokens - if (budget_tokens && (!max_tokens || max_tokens < budget_tokens)) - max_tokens = budget_tokens + ANTHROPIC_MAX_TOKEN - max_tokens = max_tokens || ANTHROPIC_MAX_TOKEN - if (budget_tokens) { - temperature = undefined - top_p = undefined - thinking = { - type: "enabled", - budget_tokens, - } - } - const messages = convertMessages(req.messages, !!thinking) - const mreq: Anthropic.Beta.MessageCreateParams = deleteUndefinedValues({ - model, - tools, - messages, - max_tokens, - temperature, - top_p, - tool_choice, - thinking, - stream: true, - }) - // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#extended-output-capabilities-beta - if (/claude-3-7-sonnet/.test(model) && max_tokens >= 128000) { - dbg("enabling 128k output") - mreq.betas = ["output-128k-2025-02-19"] - } + let temperature = req.temperature; + let top_p = req.top_p; + let tool_choice: Anthropic.Beta.MessageCreateParams["tool_choice"] = + req.tool_choice === "auto" + ? { type: "auto" } + : req.tool_choice === "none" + ? { type: "none" } + : req.tool_choice !== "required" && typeof req.tool_choice === "object" + ? { + type: "tool", + name: req.tool_choice.function.name, + } + : undefined; + let thinking: Anthropic.ThinkingConfigParam = undefined; + const reasoningEfforts = providerFeatures(provider)?.reasoningEfforts; + const budget_tokens = reasoningEfforts[req.reasoning_effort || reasoningEffort]; + let max_tokens = req.max_tokens; + if (budget_tokens && (!max_tokens || max_tokens < budget_tokens)) + max_tokens = budget_tokens + ANTHROPIC_MAX_TOKEN; + max_tokens = max_tokens || ANTHROPIC_MAX_TOKEN; + if (budget_tokens) { + temperature = undefined; + top_p = undefined; + thinking = { + type: "enabled", + budget_tokens, + }; + } + const messages = convertMessages(req.messages, !!thinking); + const mreq: Anthropic.Beta.MessageCreateParams = deleteUndefinedValues({ + model, + tools, + messages, + max_tokens, + temperature, + top_p, + tool_choice, + thinking, + stream: true, + }); + // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#extended-output-capabilities-beta + if (/claude-3-7-sonnet/.test(model) && max_tokens >= 128000) { + dbg("enabling 128k output"); + mreq.betas = ["output-128k-2025-02-19"]; + } - dbgMessages(`messages: %O`, messages) - trace.detailsFenced("✉️ body", mreq, "json") - trace.appendContent("\n") + dbgMessages(`messages: %O`, messages); + trace.detailsFenced("✉️ body", mreq, "json"); + trace.appendContent("\n"); - try { - const stream = messagesApi.stream({ ...mreq, ...headers }) - for await (const chunk of stream) { - if (cancellationToken?.isCancellationRequested) { - finishReason = "cancel" - break - } - dbg(chunk.type) - dbgMessages(`%O`, chunk) - let chunkContent = "" - let reasoningContent = "" - switch (chunk.type) { - case "message_start": - usage = convertUsage( - chunk.message.usage as Anthropic.Usage - ) - break - - case "content_block_start": - if (chunk.content_block.type === "tool_use") { - toolCalls[chunk.index] = { - id: chunk.content_block.id, - name: chunk.content_block.name, - arguments: "", - } - } - break + try { + const stream = messagesApi.stream({ ...mreq, ...headers }); + for await (const chunk of stream) { + if (cancellationToken?.isCancellationRequested) { + finishReason = "cancel"; + break; + } + dbg(chunk.type); + dbgMessages(`%O`, chunk); + let chunkContent = ""; + let reasoningContent = ""; + switch (chunk.type) { + case "message_start": + usage = convertUsage(chunk.message.usage as Anthropic.Usage); + break; - case "content_block_delta": - switch (chunk.delta.type) { - case "signature_delta": - signature = chunk.delta.signature - break - case "thinking_delta": - reasoningContent = chunk.delta.thinking - trace.appendToken(reasoningContent) - reasoningChatResp += reasoningContent - trace.appendToken(chunkContent) - break - case "text_delta": - if (!chunk.delta.text) - dbg(`empty text_delta`, chunk) - else { - chunkContent = chunk.delta.text - numTokens += approximateTokens( - chunkContent, - { encoder } - ) - chatResp += chunkContent - trace.appendToken(chunkContent) - } - break + case "content_block_start": + if (chunk.content_block.type === "tool_use") { + toolCalls[chunk.index] = { + id: chunk.content_block.id, + name: chunk.content_block.name, + arguments: "", + }; + } + break; - case "input_json_delta": - toolCalls[chunk.index].arguments += - chunk.delta.partial_json - } - break - case "content_block_stop": { - break - } - case "message_delta": - if (chunk.delta.stop_reason) { - finishReason = convertFinishReason( - chunk.delta.stop_reason - ) - } - if (chunk.usage) { - usage = adjustUsage(usage, chunk.usage) - } - break - case "message_stop": { - break - } + case "content_block_delta": + switch (chunk.delta.type) { + case "signature_delta": + signature = chunk.delta.signature; + break; + case "thinking_delta": + reasoningContent = chunk.delta.thinking; + trace.appendToken(reasoningContent); + reasoningChatResp += reasoningContent; + trace.appendToken(chunkContent); + break; + case "text_delta": + if (!chunk.delta.text) dbg(`empty text_delta`, chunk); + else { + chunkContent = chunk.delta.text; + numTokens += approximateTokens(chunkContent, { encoder }); + chatResp += chunkContent; + trace.appendToken(chunkContent); } + break; - if (chunkContent || reasoningContent) { - const progress = deleteUndefinedValues({ - responseSoFar: chatResp, - reasoningSoFar: reasoningContent, - tokensSoFar: numTokens, - responseChunk: chunkContent, - reasoningChunk: reasoningContent, - inner, - } satisfies ChatCompletionsProgressReport) - partialCb?.(progress) - } + case "input_json_delta": + toolCalls[chunk.index].arguments += chunk.delta.partial_json; } - } catch (e) { - finishReason = "fail" - logError(e) - trace.error("error while processing event", serializeError(e)) + break; + case "content_block_stop": { + break; + } + case "message_delta": + if (chunk.delta.stop_reason) { + finishReason = convertFinishReason(chunk.delta.stop_reason); + } + if (chunk.usage) { + usage = adjustUsage(usage, chunk.usage); + } + break; + case "message_stop": { + break; + } } - trace.appendContent("\n\n") - trace.itemValue(`🏁 finish reason`, finishReason) - if (usage?.total_tokens) { - trace.itemValue( - `🪙 tokens`, - `${usage.total_tokens} total, ${usage.prompt_tokens} prompt, ${usage.completion_tokens} completion` - ) + if (chunkContent || reasoningContent) { + const progress = deleteUndefinedValues({ + responseSoFar: chatResp, + reasoningSoFar: reasoningContent, + tokensSoFar: numTokens, + responseChunk: chunkContent, + reasoningChunk: reasoningContent, + inner, + } satisfies ChatCompletionsProgressReport); + partialCb?.(progress); } - return { - text: chatResp, - reasoning: reasoningChatResp, - signature, - finishReason, - usage, - model, - toolCalls: toolCalls.filter((x) => x !== undefined), - } satisfies ChatCompletionResponse + } + } catch (e) { + finishReason = "fail"; + logError(e); + trace.error("error while processing event", serializeError(e)); } - return completion -} + + trace.appendContent("\n\n"); + trace.itemValue(`🏁 finish reason`, finishReason); + if (usage?.total_tokens) { + trace.itemValue( + `🪙 tokens`, + `${usage.total_tokens} total, ${usage.prompt_tokens} prompt, ${usage.completion_tokens} completion`, + ); + } + return { + text: chatResp, + reasoning: reasoningChatResp, + signature, + finishReason, + usage, + model, + toolCalls: toolCalls.filter((x) => x !== undefined), + } satisfies ChatCompletionResponse; + }; + return completion; +}; const listModels: ListModelsFunction = async (cfg, options) => { - try { - const Anthropic = (await import("@anthropic-ai/sdk")).default - const anthropic = new Anthropic({ - baseURL: cfg.base, - apiKey: cfg.token, - fetch, - }) + try { + const Anthropic = (await import("@anthropic-ai/sdk")).default; + const anthropic = new Anthropic({ + baseURL: cfg.base, + apiKey: cfg.token, + fetch, + }); - // Parse and format the response into LanguageModelInfo objects - const res = await anthropic.models.list({ limit: 999 }) - return { - ok: true, - models: res.data - .filter(({ type }) => type === "model") - .map( - (model) => - ({ - id: model.id, - details: model.display_name, - }) satisfies LanguageModelInfo - ), - } - } catch (e) { - return { ok: false, error: serializeError(e) } - } -} + // Parse and format the response into LanguageModelInfo objects + const res = await anthropic.models.list({ limit: 999 }); + return { + ok: true, + models: res.data + .filter(({ type }) => type === "model") + .map( + (model) => + ({ + id: model.id, + details: model.display_name, + }) satisfies LanguageModelInfo, + ), + }; + } catch (e) { + return { ok: false, error: serializeError(e) }; + } +}; export const AnthropicModel = Object.freeze({ - completer: completerFactory(async (trace, cfg, httpAgent, fetch) => { - const Anthropic = (await import("@anthropic-ai/sdk")).default - const anthropic = new Anthropic({ - baseURL: cfg.base, - apiKey: cfg.token, - fetch, - fetchOptions: { - dispatcher: httpAgent, - } as RequestInit as any, - }) - if (anthropic.baseURL) - trace.itemValue( - `url`, - `[${anthropic.baseURL}](${anthropic.baseURL})` - ) - const messagesApi = anthropic.beta.messages - return messagesApi - }), - id: MODEL_PROVIDER_ANTHROPIC, - listModels, -}) + completer: completerFactory(async (trace, cfg, httpAgent, fetch) => { + const Anthropic = (await import("@anthropic-ai/sdk")).default; + const anthropic = new Anthropic({ + baseURL: cfg.base, + apiKey: cfg.token, + fetch, + fetchOptions: { + dispatcher: httpAgent, + } as RequestInit as any, + }); + if (anthropic.baseURL) trace.itemValue(`url`, `[${anthropic.baseURL}](${anthropic.baseURL})`); + const messagesApi = anthropic.beta.messages; + return messagesApi; + }), + id: MODEL_PROVIDER_ANTHROPIC, + listModels, +}); export const AnthropicBedrockModel = Object.freeze({ - completer: completerFactory(async (trace, cfg, httpAgent, fetch) => { - const AnthropicBedrock = (await import("@anthropic-ai/bedrock-sdk")) - .AnthropicBedrock - const anthropic = new AnthropicBedrock({ - baseURL: cfg.base, - fetch, - fetchOptions: { - dispatcher: httpAgent, - } as RequestInit as any, - }) - if (anthropic.baseURL) - trace.itemValue( - `url`, - `[${anthropic.baseURL}](${anthropic.baseURL})` - ) - return anthropic.beta.messages - }), - id: MODEL_PROVIDER_ANTHROPIC_BEDROCK, - listModels: async () => { - return { - ok: true, - models: [ - { - id: "anthropic.claude-3-7-sonnet-20250219-v1:0", - details: "Claude 3.7 Sonnet", - }, - { - id: "anthropic.claude-3-5-haiku-20241022-v1:0", - details: "Claude 3.5 Haiku", - }, - { - id: "anthropic.claude-3-5-sonnet-20241022-v2:0", - details: "Claude 3.5 Sonnet v2", - }, - { - id: "anthropic.claude-3-5-sonnet-20240620-v1:0", - details: "Claude 3.5 Sonnet", - }, - { - id: "anthropic.claude-3-opus-20240229-v1:0", - details: "Claude 3 Opus", - }, - { - id: "anthropic.claude-3-sonnet-20240229-v1:0", - details: "Claude 3 Sonnet", - }, - { - id: "anthropic.claude-3-haiku-20240307-v1:0", - details: "Claude 3 Haiku", - }, - ], - } - }, -}) + completer: completerFactory(async (trace, cfg, httpAgent, fetch) => { + const AnthropicBedrock = (await import("@anthropic-ai/bedrock-sdk")).AnthropicBedrock; + const anthropic = new AnthropicBedrock({ + baseURL: cfg.base, + fetch, + fetchOptions: { + dispatcher: httpAgent, + } as RequestInit as any, + }); + if (anthropic.baseURL) trace.itemValue(`url`, `[${anthropic.baseURL}](${anthropic.baseURL})`); + return anthropic.beta.messages; + }), + id: MODEL_PROVIDER_ANTHROPIC_BEDROCK, + listModels: async () => { + return { + ok: true, + models: [ + { + id: "anthropic.claude-3-7-sonnet-20250219-v1:0", + details: "Claude 3.7 Sonnet", + }, + { + id: "anthropic.claude-3-5-haiku-20241022-v1:0", + details: "Claude 3.5 Haiku", + }, + { + id: "anthropic.claude-3-5-sonnet-20241022-v2:0", + details: "Claude 3.5 Sonnet v2", + }, + { + id: "anthropic.claude-3-5-sonnet-20240620-v1:0", + details: "Claude 3.5 Sonnet", + }, + { + id: "anthropic.claude-3-opus-20240229-v1:0", + details: "Claude 3 Opus", + }, + { + id: "anthropic.claude-3-sonnet-20240229-v1:0", + details: "Claude 3 Sonnet", + }, + { + id: "anthropic.claude-3-haiku-20240307-v1:0", + details: "Claude 3 Haiku", + }, + ], + }; + }, +}); diff --git a/packages/core/src/assert.ts b/packages/core/src/assert.ts index 623412e958..b24a1327a9 100644 --- a/packages/core/src/assert.ts +++ b/packages/core/src/assert.ts @@ -1,5 +1,5 @@ -import debug from "debug" -const dbg = debug("genaiscript:assert") +import debug from "debug"; +const dbg = debug("genaiscript:assert"); /** * Asserts a condition and throws an error if the condition is false. @@ -13,18 +13,18 @@ const dbg = debug("genaiscript:assert") * Triggers the debugger if enabled in the runtime environment. */ export function assert( - cond: boolean, - msg = "Assertion failed", - // eslint-disable-next-line @typescript-eslint/no-explicit-any - debugData?: any + cond: boolean, + msg = "Assertion failed", + // eslint-disable-next-line @typescript-eslint/no-explicit-any + debugData?: any, ) { - if (!cond) { - if (debugData) { - dbg("assertion failed, debug data: %O", debugData) - console.error(msg || `assertion failed`, debugData) - } - // eslint-disable-next-line no-debugger - debugger - throw new Error(msg) + if (!cond) { + if (debugData) { + dbg("assertion failed, debug data: %O", debugData); + console.error(msg || `assertion failed`, debugData); } + // eslint-disable-next-line no-debugger + debugger; + throw new Error(msg); + } } diff --git a/packages/core/src/ast.ts b/packages/core/src/ast.ts index 08c72a5fc2..be160003d7 100644 --- a/packages/core/src/ast.ts +++ b/packages/core/src/ast.ts @@ -1,19 +1,15 @@ /// // Import necessary regular expressions for file type detection and host utilities -import { - GENAI_ANYJS_REGEX, - GENAI_ANYTS_REGEX, - PROMPTY_REGEX, -} from "./constants" -import { Project } from "./server/messages" -import { arrayify } from "./cleaners" -import { tagFilter } from "./tags" -import { dirname, resolve } from "node:path" +import { GENAI_ANYJS_REGEX, GENAI_ANYTS_REGEX, PROMPTY_REGEX } from "./constants"; +import { Project } from "./server/messages"; +import { arrayify } from "./cleaners"; +import { tagFilter } from "./tags"; +import { dirname, resolve } from "node:path"; // Interface representing a file reference, with a name and filename property export interface FileReference { - name: string - filename: string + name: string; + filename: string; } /** @@ -24,19 +20,19 @@ export interface FileReference { * @returns CSV string with each diagnostic entry on a new line. */ export function diagnosticsToCSV(diagnostics: Diagnostic[], sep: string) { - return diagnostics - .map( - ({ severity, filename, range, code, message }) => - [ - severity, // Severity level of the diagnostic - filename, // Filename where the diagnostic occurred - range[0][0], // Start line of the diagnostic range - range[1][0], // End line of the diagnostic range - code || "", // Diagnostic code, if available; empty string if not - message, // Diagnostic message explaining the issue - ].join(sep) // Join fields with the specified separator - ) - .join("\n") // Join each CSV line with a newline character + return diagnostics + .map( + ({ severity, filename, range, code, message }) => + [ + severity, // Severity level of the diagnostic + filename, // Filename where the diagnostic occurred + range[0][0], // Start line of the diagnostic range + range[1][0], // End line of the diagnostic range + code || "", // Diagnostic code, if available; empty string if not + message, // Diagnostic message explaining the issue + ].join(sep), // Join fields with the specified separator + ) + .join("\n"); // Join each CSV line with a newline character } /** @@ -45,16 +41,16 @@ export function diagnosticsToCSV(diagnostics: Diagnostic[], sep: string) { * @returns The group name of the template. Returns the group property if defined, "system" if the ID starts with "system", or "unassigned" if no group is set or determined. */ export function templateGroup(template: PromptScript) { - return ( - template.group || // Return the group if already set - (/^system/i.test(template.id) ? "system" : "") || // Check if the template ID indicates it's a system template - "unassigned" // Default to "unassigned" if no group is determined - ) + return ( + template.group || // Return the group if already set + (/^system/i.test(template.id) ? "system" : "") || // Check if the template ID indicates it's a system template + "unassigned" // Default to "unassigned" if no group is determined + ); } // Constants representing special character positions within a file -export const eolPosition = 0x3fffffff // End of line position, a large constant -export const eofPosition: CharPosition = [0x3fffffff, 0] // End of file position, a tuple with a large constant +export const eolPosition = 0x3fffffff; // End of line position, a large constant +export const eofPosition: CharPosition = [0x3fffffff, 0]; // End of file position, a tuple with a large constant /** * Collects and organizes templates by their directory, identifying the presence of JavaScript or TypeScript files in each directory. @@ -63,26 +59,23 @@ export const eofPosition: CharPosition = [0x3fffffff, 0] // End of file position * @returns An array of directory objects with their names and flags indicating JavaScript and TypeScript file presence. */ export function collectFolders( - prj: Project, - options?: { force?: boolean } + prj: Project, + options?: { force?: boolean }, ): { dirname: string; js?: boolean; ts?: boolean }[] { - const { force } = options || {} - const { systemDir } = prj - const folders: Record< - string, - { dirname: string; js?: boolean; ts?: boolean } - > = {} - for (const t of Object.values(prj.scripts).filter( - // must have a filename and not prompty - (t) => t.filename && !PROMPTY_REGEX.test(t.filename) - )) { - const dir = dirname(t.filename) // Get directory name from the filename - if (!force && resolve(dir) === systemDir) continue - const folder = folders[dir] || (folders[dir] = { dirname: dir }) - folder.js = folder.js || GENAI_ANYJS_REGEX.test(t.filename) // Check for presence of JS files - folder.ts = folder.ts || GENAI_ANYTS_REGEX.test(t.filename) // Check for presence of TS files - } - return Object.values(folders) // Return an array of folders with their properties + const { force } = options || {}; + const { systemDir } = prj; + const folders: Record = {}; + for (const t of Object.values(prj.scripts).filter( + // must have a filename and not prompty + (t) => t.filename && !PROMPTY_REGEX.test(t.filename), + )) { + const dir = dirname(t.filename); // Get directory name from the filename + if (!force && resolve(dir) === systemDir) continue; + const folder = folders[dir] || (folders[dir] = { dirname: dir }); + folder.js = folder.js || GENAI_ANYJS_REGEX.test(t.filename); // Check for presence of JS files + folder.ts = folder.ts || GENAI_ANYTS_REGEX.test(t.filename); // Check for presence of TS files + } + return Object.values(folders); // Return an array of folders with their properties } /** @@ -93,15 +86,15 @@ export function collectFolders( * @returns The script with the matching ID, or undefined if no match is found. */ export function resolveScript(prj: Project, system: SystemPromptInstance) { - return prj?.scripts?.find((t) => t.id == system.id) // Find and return the template with the matching ID + return prj?.scripts?.find((t) => t.id == system.id); // Find and return the template with the matching ID } export interface ScriptFilterOptions { - ids?: string[] - groups?: string[] - test?: boolean - redteam?: boolean - unlisted?: boolean + ids?: string[]; + groups?: string[]; + test?: boolean; + redteam?: boolean; + unlisted?: boolean; } /** @@ -116,15 +109,12 @@ export interface ScriptFilterOptions { * - unlisted: If true, includes unlisted scripts; otherwise excludes them. * @returns A filtered list of scripts matching the given criteria. */ -export function filterScripts( - scripts: PromptScript[], - options: ScriptFilterOptions -) { - const { ids, groups, test, redteam, unlisted } = options || {} - return scripts - .filter((t) => !test || arrayify(t.tests)?.length) - .filter((t) => !redteam || t.redteam) - .filter((t) => !ids?.length || ids.includes(t.id)) - .filter((t) => unlisted || !t.unlisted) - .filter((t) => tagFilter(groups, t.group)) +export function filterScripts(scripts: PromptScript[], options: ScriptFilterOptions) { + const { ids, groups, test, redteam, unlisted } = options || {}; + return scripts + .filter((t) => !test || arrayify(t.tests)?.length) + .filter((t) => !redteam || t.redteam) + .filter((t) => !ids?.length || ids.includes(t.id)) + .filter((t) => unlisted || !t.unlisted) + .filter((t) => tagFilter(groups, t.group)); } diff --git a/packages/core/src/astgrep.test.ts b/packages/core/src/astgrep.test.ts index c9db340ee3..bc2dd0ddb3 100644 --- a/packages/core/src/astgrep.test.ts +++ b/packages/core/src/astgrep.test.ts @@ -1,46 +1,42 @@ -import { beforeEach, describe, test } from "node:test" -import assert from "node:assert/strict" -import { astGrepFindFiles, astGrepParse } from "./astgrep" -import { TestHost } from "./testhost" -import { dedent } from "./indent" +import { beforeEach, describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { astGrepFindFiles, astGrepParse } from "./astgrep"; +import { TestHost } from "./testhost"; +import { dedent } from "./indent"; describe("astgrep", () => { - beforeEach(() => { - TestHost.install() - }) + beforeEach(() => { + TestHost.install(); + }); - test("finds matches in files", async () => { - console.log("Hello, world!") - const result = await astGrepFindFiles( - "ts", - "src/astgrep.test.ts", - "console.log($GREETING)" - ) - assert.equal(result.files, 1) - assert(result.matches.length > 0) - }) - test("parses a JavaScript file", async () => { - const file: WorkspaceFile = { - filename: "test.js", - content: "const x = 1;", - } - const result = await astGrepParse(file, { lang: "js" }) - assert(result) - }) + test("finds matches in files", async () => { + console.log("Hello, world!"); + const result = await astGrepFindFiles("ts", "src/astgrep.test.ts", "console.log($GREETING)"); + assert.equal(result.files, 1); + assert(result.matches.length > 0); + }); + test("parses a JavaScript file", async () => { + const file: WorkspaceFile = { + filename: "test.js", + content: "const x = 1;", + }; + const result = await astGrepParse(file, { lang: "js" }); + assert(result); + }); - test("returns undefined for binary file", async () => { - const file: WorkspaceFile = { - filename: "test.bin", - encoding: "base64", - } - const result = await astGrepParse(file, { lang: "js" }) - assert.equal(result, undefined) - }) + test("returns undefined for binary file", async () => { + const file: WorkspaceFile = { + filename: "test.bin", + encoding: "base64", + }; + const result = await astGrepParse(file, { lang: "js" }); + assert.equal(result, undefined); + }); - test("parse C++ file", async () => { - const file: WorkspaceFile = { - filename: "test.cpp", - content: dedent` + test("parse C++ file", async () => { + const file: WorkspaceFile = { + filename: "test.cpp", + content: dedent` #include int main() { @@ -48,32 +44,32 @@ describe("astgrep", () => { return 0; } `, - } - const result = await astGrepParse(file) - assert(result) - }) - test("parse TypeScript file", async () => { - const file: WorkspaceFile = { - filename: "test.ts", - content: "const x: number = 1;", - } - const result = await astGrepParse(file) - assert(result) - }) - test("parse python file", async () => { - const file: WorkspaceFile = { - filename: "test.py", - content: "x = 1", - } - const result = await astGrepParse(file) - assert(result) - }) - test("parse C file", async () => { - const file: WorkspaceFile = { - filename: "test.c", - content: "#include ", - } - const result = await astGrepParse(file) - assert(result) - }) -}) + }; + const result = await astGrepParse(file); + assert(result); + }); + test("parse TypeScript file", async () => { + const file: WorkspaceFile = { + filename: "test.ts", + content: "const x: number = 1;", + }; + const result = await astGrepParse(file); + assert(result); + }); + test("parse python file", async () => { + const file: WorkspaceFile = { + filename: "test.py", + content: "x = 1", + }; + const result = await astGrepParse(file); + assert(result); + }); + test("parse C file", async () => { + const file: WorkspaceFile = { + filename: "test.c", + content: "#include ", + }; + const result = await astGrepParse(file); + assert(result); + }); +}); diff --git a/packages/core/src/astgrep.ts b/packages/core/src/astgrep.ts index 6663e7617e..41f4de0bd8 100644 --- a/packages/core/src/astgrep.ts +++ b/packages/core/src/astgrep.ts @@ -1,52 +1,49 @@ -import { CancellationOptions, checkCancelled } from "./cancellation" -import { CancelError, errorMessage } from "./error" -import { resolveFileContent } from "./file" -import { host } from "./host" -import { uniq } from "es-toolkit" -import { readText, writeText } from "./fs" -import { extname } from "node:path" -import { diffFindChunk, diffResolve } from "./diff" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("astgrep") -const dbgLang = dbg.extend("lang") +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { CancelError, errorMessage } from "./error"; +import { resolveFileContent } from "./file"; +import { host } from "./host"; +import { uniq } from "es-toolkit"; +import { readText, writeText } from "./fs"; +import { extname } from "node:path"; +import { diffFindChunk, diffResolve } from "./diff"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("astgrep"); +const dbgLang = dbg.extend("lang"); class SgChangeSetImpl implements SgChangeSet { - private pending: Record = {} - - toString() { - return `changeset ${this.count} edits` - } - - get count(): number { - return Object.values(this.pending).reduce( - (acc, { edits }) => acc + edits.length, - 0 - ) - } - - replace(node: SgNode, text: string) { - const edit = node.replace(text) - const root = node.getRoot() - let rootEdits = this.pending[root.filename()] - if (rootEdits) { - if (rootEdits.root !== root) { - throw new Error( - `node ${node} belongs to a different root ${root} than the pending edits ${rootEdits.root}` - ) - } - } else rootEdits = this.pending[root.filename()] = { root, edits: [] } - rootEdits.edits.push(edit) - return edit - } - commit() { - const files: WorkspaceFile[] = [] - for (const { root, edits } of Object.values(this.pending)) { - const filename = root.filename() - const content = root.root().commitEdits(edits) - files.push({ filename, content }) - } - return files + private pending: Record = {}; + + toString() { + return `changeset ${this.count} edits`; + } + + get count(): number { + return Object.values(this.pending).reduce((acc, { edits }) => acc + edits.length, 0); + } + + replace(node: SgNode, text: string) { + const edit = node.replace(text); + const root = node.getRoot(); + let rootEdits = this.pending[root.filename()]; + if (rootEdits) { + if (rootEdits.root !== root) { + throw new Error( + `node ${node} belongs to a different root ${root} than the pending edits ${rootEdits.root}`, + ); + } + } else rootEdits = this.pending[root.filename()] = { root, edits: [] }; + rootEdits.edits.push(edit); + return edit; + } + commit() { + const files: WorkspaceFile[] = []; + for (const { root, edits } of Object.values(this.pending)) { + const filename = root.filename(); + const content = root.root().commitEdits(edits); + files.push({ filename, content }); } + return files; + } } /** @@ -59,7 +56,7 @@ class SgChangeSetImpl implements SgChangeSet { * @returns A new change set instance to handle AST edits. */ export function astGrepCreateChangeSet(): SgChangeSet { - return new SgChangeSetImpl() + return new SgChangeSetImpl(); } /** @@ -80,108 +77,99 @@ export function astGrepCreateChangeSet(): SgChangeSet { * @throws An error if `glob` or `matcher` is not provided. */ export async function astGrepFindFiles( - lang: SgLang, - glob: ElementOrArray, - matcher: string | SgMatcher, - options?: SgSearchOptions & CancellationOptions + lang: SgLang, + glob: ElementOrArray, + matcher: string | SgMatcher, + options?: SgSearchOptions & CancellationOptions, ): ReturnType { - const { cancellationToken, diff } = options || {} - if (!glob) { - throw new Error("glob is required") - } - if (!matcher) { - throw new Error("matcher is required") - } - const diffFiles = diffResolve(diff) - - dbg(`search %O`, matcher) - if (diffFiles?.length) dbg(`diff files: ${diffFiles.length}`) - const { findInFiles } = await import("@ast-grep/napi") - checkCancelled(cancellationToken) - - let paths = await host.findFiles(glob, options) + const { cancellationToken, diff } = options || {}; + if (!glob) { + throw new Error("glob is required"); + } + if (!matcher) { + throw new Error("matcher is required"); + } + const diffFiles = diffResolve(diff); + + dbg(`search %O`, matcher); + if (diffFiles?.length) dbg(`diff files: ${diffFiles.length}`); + const { findInFiles } = await import("@ast-grep/napi"); + checkCancelled(cancellationToken); + + let paths = await host.findFiles(glob, options); + if (!paths?.length) { + dbg(`no files found for glob`, glob); + return { + files: 0, + matches: [], + }; + } + dbg(`found ${paths.length} files`, paths); + + if (diffFiles?.length) { + const diffFilesSet = new Set(diffFiles.filter((f) => f.to).map((f) => f.to)); + paths = paths.filter((p) => diffFilesSet.has(p)); + dbg(`filtered files by diff: ${paths.length}`); if (!paths?.length) { - dbg(`no files found for glob`, glob) - return { - files: 0, - matches: [], - } + return { + files: 0, + matches: [], + }; } - dbg(`found ${paths.length} files`, paths) - - if (diffFiles?.length) { - const diffFilesSet = new Set( - diffFiles.filter((f) => f.to).map((f) => f.to) - ) - paths = paths.filter((p) => diffFilesSet.has(p)) - dbg(`filtered files by diff: ${paths.length}`) - if (!paths?.length) { - return { - files: 0, - matches: [], - } + } + + let matches: SgNode[] = []; + const p = new Promise(async (resolve, reject) => { + let i = 0; + let n: number = undefined; + const sglang = await resolveLang(lang); + n = await findInFiles( + sglang, + { + paths, + matcher: typeof matcher === "string" ? { rule: { pattern: matcher } } : matcher, + }, + (err, nodes) => { + if (err) { + dbg(`error occurred: ${err}`); + throw err; } - } - - let matches: SgNode[] = [] - const p = new Promise(async (resolve, reject) => { - let i = 0 - let n: number = undefined - const sglang = await resolveLang(lang) - n = await findInFiles( - sglang, - { - paths, - matcher: - typeof matcher === "string" - ? { rule: { pattern: matcher } } - : matcher, - }, - (err, nodes) => { - if (err) { - dbg(`error occurred: ${err}`) - throw err - } - dbg(`nodes found: ${nodes.length}`) - matches.push(...nodes) - if (cancellationToken?.isCancellationRequested) { - reject(new CancelError("cancelled")) - } - if (++i === n) { - dbg(`resolving promise with count: ${n}`) - resolve(n) - } - } - ) - if (n === i) { - dbg("resolving promise as callbacks might be ahead") - // we might be ahead of the callbacks - resolve(n) + dbg(`nodes found: ${nodes.length}`); + matches.push(...nodes); + if (cancellationToken?.isCancellationRequested) { + reject(new CancelError("cancelled")); + } + if (++i === n) { + dbg(`resolving promise with count: ${n}`); + resolve(n); } - }) - const scanned = await p - dbg(`files scanned: ${scanned}, matches found: ${matches.length}`) - checkCancelled(cancellationToken) - - // apply diff - if (diffFiles?.length) { - matches = matches.filter((m) => { - const range: [number, number] = [ - m.range().start.line, - m.range().end.line, - ] - const { chunk } = - diffFindChunk(m.getRoot().filename(), range, diffFiles) || {} - if (chunk) - dbg( - `diff overlap at (${range[0]},${range[1]}) x (${chunk.newStart},${chunk.newStart + chunk.newLines})` - ) - return chunk - }) - dbg(`matches filtered by diff: ${matches.length}`) + }, + ); + if (n === i) { + dbg("resolving promise as callbacks might be ahead"); + // we might be ahead of the callbacks + resolve(n); } - - return { files: scanned, matches } + }); + const scanned = await p; + dbg(`files scanned: ${scanned}, matches found: ${matches.length}`); + checkCancelled(cancellationToken); + + // apply diff + if (diffFiles?.length) { + matches = matches.filter((m) => { + const range: [number, number] = [m.range().start.line, m.range().end.line]; + const { chunk } = diffFindChunk(m.getRoot().filename(), range, diffFiles) || {}; + if (chunk) + dbg( + `diff overlap at (${range[0]},${range[1]}) x (${chunk.newStart},${chunk.newStart + chunk.newLines})`, + ); + return chunk; + }); + dbg(`matches filtered by diff: ${matches.length}`); + } + + return { files: scanned, matches }; } /** @@ -193,26 +181,23 @@ export async function astGrepFindFiles( * The function iterates through the unique roots of the provided nodes, checks for file content differences, * and writes updated content to the respective files if changes are detected. If a file does not have a filename, it is skipped. */ -export async function astGrepWriteRootEdits( - nodes: SgNode[], - options?: CancellationOptions -) { - const { cancellationToken } = options || {} - const roots = uniq(nodes.map((n) => n.getRoot())) - dbg(`writing edits to roots: ${roots.length}`) - for (const root of roots) { - checkCancelled(cancellationToken) - - const filename = root.filename() - if (!filename) continue - - const existing = await readText(filename) - const updated = root.root().text() - if (existing !== updated) { - dbg(`writing changes to root: ${filename}`) - await writeText(filename, updated) - } +export async function astGrepWriteRootEdits(nodes: SgNode[], options?: CancellationOptions) { + const { cancellationToken } = options || {}; + const roots = uniq(nodes.map((n) => n.getRoot())); + dbg(`writing edits to roots: ${roots.length}`); + for (const root of roots) { + checkCancelled(cancellationToken); + + const filename = root.filename(); + if (!filename) continue; + + const existing = await readText(filename); + const updated = root.root().text(); + if (existing !== updated) { + dbg(`writing changes to root: ${filename}`); + await writeText(filename, updated); } + } } /** @@ -231,129 +216,125 @@ export async function astGrepWriteRootEdits( * - Uses the library "@ast-grep/napi" for parsing. */ export async function astGrepParse( - file: WorkspaceFile, - options?: { lang?: SgLang | Record } & CancellationOptions + file: WorkspaceFile, + options?: { lang?: SgLang | Record } & CancellationOptions, ): Promise { - const { cancellationToken } = options || {} - if (file.encoding) { - dbg("ignore binary file") - return undefined - } // binary file - - await resolveFileContent(file) - checkCancelled(cancellationToken) - const { filename, encoding, content } = file - if (encoding) { - dbg("ignore binary file") - return undefined - } // binary file - - dbg(`parsing file: ${filename}`) - const { parseAsync } = await import("@ast-grep/napi") - const lang = await resolveLang(options?.lang, filename) - if (!lang) { - return undefined - } - dbg("parsing file content") - const root = await parseAsync(lang, content) - checkCancelled(cancellationToken) - return root + const { cancellationToken } = options || {}; + if (file.encoding) { + dbg("ignore binary file"); + return undefined; + } // binary file + + await resolveFileContent(file); + checkCancelled(cancellationToken); + const { filename, encoding, content } = file; + if (encoding) { + dbg("ignore binary file"); + return undefined; + } // binary file + + dbg(`parsing file: ${filename}`); + const { parseAsync } = await import("@ast-grep/napi"); + const lang = await resolveLang(options?.lang, filename); + if (!lang) { + return undefined; + } + dbg("parsing file content"); + const root = await parseAsync(lang, content); + checkCancelled(cancellationToken); + return root; } -async function resolveLang( - lang: SgLang | Record, - filename?: string -) { - const { Lang } = await import("@ast-grep/napi") - - const norm = (l: string) => l.toLowerCase().replace(/^\./, "") - - // pre-compiled with ast-grep - const builtins: any = { - html: Lang.Html, - htm: Lang.Html, - cjs: Lang.JavaScript, - mjs: Lang.JavaScript, - js: Lang.JavaScript, - cts: Lang.TypeScript, - mts: Lang.TypeScript, - ts: Lang.TypeScript, - typescript: Lang.TypeScript, - javascript: Lang.JavaScript, - jsx: Lang.Tsx, - tsx: Lang.Tsx, - css: Lang.Css, - } - - const dynamics: any = { - h: "c", - c: "c", - cpp: "cpp", - hpp: "cpp", - hxx: "cpp", - cxx: "cpp", - cs: "csharp", - py: "python", - sql: "sql", - yml: "yaml", - yaml: "yaml", - } - - const forbidden = ["bin", "exe", "dll"] - - // user provided a string - if (typeof lang === "string") { - lang = norm(lang) - dbgLang(`resolving language ${lang}`) - const builtin = builtins[lang] - if (builtin) return builtin - else return await loadDynamicLanguage(lang) - } - - if (!filename) { - dbgLang(`filename not provided`) - throw new Error("filename is required to resolve language") - } - - if (filename) { - const ext = norm(extname(filename)) - dbgLang(`resolving language for ${ext}`) - - // known builtins - const builtin = builtins[ext] - if (builtin) return builtin - - // known dynamics - const dynamic = dynamics[ext] - if (dynamic) return await loadDynamicLanguage(dynamic) - - if (forbidden.includes(ext)) return undefined - - // try our luck - return await loadDynamicLanguage(ext) - } - - dbgLang(`language not resolved`, { lang, filename }) - throw new Error("language not resolved") +async function resolveLang(lang: SgLang | Record, filename?: string) { + const { Lang } = await import("@ast-grep/napi"); + + const norm = (l: string) => l.toLowerCase().replace(/^\./, ""); + + // pre-compiled with ast-grep + const builtins: any = { + html: Lang.Html, + htm: Lang.Html, + cjs: Lang.JavaScript, + mjs: Lang.JavaScript, + js: Lang.JavaScript, + cts: Lang.TypeScript, + mts: Lang.TypeScript, + ts: Lang.TypeScript, + typescript: Lang.TypeScript, + javascript: Lang.JavaScript, + jsx: Lang.Tsx, + tsx: Lang.Tsx, + css: Lang.Css, + }; + + const dynamics: any = { + h: "c", + c: "c", + cpp: "cpp", + hpp: "cpp", + hxx: "cpp", + cxx: "cpp", + cs: "csharp", + py: "python", + sql: "sql", + yml: "yaml", + yaml: "yaml", + }; + + const forbidden = ["bin", "exe", "dll"]; + + // user provided a string + if (typeof lang === "string") { + lang = norm(lang); + dbgLang(`resolving language ${lang}`); + const builtin = builtins[lang]; + if (builtin) return builtin; + else return await loadDynamicLanguage(lang); + } + + if (!filename) { + dbgLang(`filename not provided`); + throw new Error("filename is required to resolve language"); + } + + if (filename) { + const ext = norm(extname(filename)); + dbgLang(`resolving language for ${ext}`); + + // known builtins + const builtin = builtins[ext]; + if (builtin) return builtin; + + // known dynamics + const dynamic = dynamics[ext]; + if (dynamic) return await loadDynamicLanguage(dynamic); + + if (forbidden.includes(ext)) return undefined; + + // try our luck + return await loadDynamicLanguage(ext); + } + + dbgLang(`language not resolved`, { lang, filename }); + throw new Error("language not resolved"); } -const loadedDynamicLanguages = new Set() +const loadedDynamicLanguages = new Set(); async function loadDynamicLanguage(langName: string) { - if (!loadedDynamicLanguages.has(langName)) { - dbgLang(`loading language: ${langName}`) - const { registerDynamicLanguage } = await import("@ast-grep/napi") - try { - const dynamicLang = (await import(`@ast-grep/lang-${langName}`)) - .default - registerDynamicLanguage({ [langName]: dynamicLang }) - loadedDynamicLanguages.add(langName) - dbgLang(`language ${langName} registered `) - } catch (err) { - dbgLang(`error loading language ${langName}: ${errorMessage(err)}`) - throw Error( - `@ast-grep/lang-${langName} package failed to load, please install it using 'npm install -D @ast-grep/lang-${langName}'` - ) - } + if (!loadedDynamicLanguages.has(langName)) { + dbgLang(`loading language: ${langName}`); + const { registerDynamicLanguage } = await import("@ast-grep/napi"); + try { + const dynamicLang = (await import(`@ast-grep/lang-${langName}`)).default; + registerDynamicLanguage({ [langName]: dynamicLang }); + loadedDynamicLanguages.add(langName); + dbgLang(`language ${langName} registered `); + } catch (err) { + dbgLang(`error loading language ${langName}: ${errorMessage(err)}`); + throw Error( + `@ast-grep/lang-${langName} package failed to load, please install it using 'npm install -D @ast-grep/lang-${langName}'`, + ); } - return langName + } + return langName; } diff --git a/packages/core/src/azureaiinference.ts b/packages/core/src/azureaiinference.ts index 6223bcca4c..f2c8fb816e 100644 --- a/packages/core/src/azureaiinference.ts +++ b/packages/core/src/azureaiinference.ts @@ -1,49 +1,49 @@ -import { LanguageModel } from "./chat" -import { MODEL_PROVIDER_AZURE_AI_INFERENCE } from "./constants" -import { OpenAIChatCompletion, OpenAIEmbedder } from "./openai" +import { LanguageModel } from "./chat"; +import { MODEL_PROVIDER_AZURE_AI_INFERENCE } from "./constants"; +import { OpenAIChatCompletion, OpenAIEmbedder } from "./openai"; export const AzureAIInferenceModel = Object.freeze({ - id: MODEL_PROVIDER_AZURE_AI_INFERENCE, - completer: OpenAIChatCompletion, - embedder: OpenAIEmbedder, - listModels: async () => { - return { - ok: true, - models: [ - { - id: "o3", - }, - { - id: "o3-mini", - }, - { - id: "o4-mini", - }, - { - id: "gpt-4.1", - }, - { - id: "gpt-4.1-mini", - }, - { - id: "gpt-4.1-nano", - }, - { - id: "gpt-4o", - }, - { - id: "gpt-4o-mini", - }, - { - id: "o1", - }, - { - id: "o1-preview", - }, - { - id: "o3-mini", - }, - ], - } - }, -}) + id: MODEL_PROVIDER_AZURE_AI_INFERENCE, + completer: OpenAIChatCompletion, + embedder: OpenAIEmbedder, + listModels: async () => { + return { + ok: true, + models: [ + { + id: "o3", + }, + { + id: "o3-mini", + }, + { + id: "o4-mini", + }, + { + id: "gpt-4.1", + }, + { + id: "gpt-4.1-mini", + }, + { + id: "gpt-4.1-nano", + }, + { + id: "gpt-4o", + }, + { + id: "gpt-4o-mini", + }, + { + id: "o1", + }, + { + id: "o1-preview", + }, + { + id: "o3-mini", + }, + ], + }; + }, +}); diff --git a/packages/core/src/azureaisearch.ts b/packages/core/src/azureaisearch.ts index 10d31a4c42..f22cff6a8f 100644 --- a/packages/core/src/azureaisearch.ts +++ b/packages/core/src/azureaisearch.ts @@ -1,267 +1,251 @@ -import { - CancellationOptions, - checkCancelled, - toSignal, -} from "../../core/src/cancellation" -import { - EmbeddingFunction, - WorkspaceFileIndexCreator, -} from "../../core/src/chat" -import { arrayify } from "../../core/src/cleaners" -import { runtimeHost } from "../../core/src/host" -import { TraceOptions } from "../../core/src/trace" -import { logVerbose } from "./util" -import type { TokenCredential, KeyCredential } from "@azure/core-auth" -import { resolveFileContent } from "./file" -import { hash } from "./crypto" -import { LanguageModelConfiguration } from "./server/messages" -import { chunk } from "./encoders" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("azureaisearch") +import { CancellationOptions, checkCancelled, toSignal } from "../../core/src/cancellation"; +import { EmbeddingFunction, WorkspaceFileIndexCreator } from "../../core/src/chat"; +import { arrayify } from "../../core/src/cleaners"; +import { runtimeHost } from "../../core/src/host"; +import { TraceOptions } from "../../core/src/trace"; +import { logVerbose } from "./util"; +import type { TokenCredential, KeyCredential } from "@azure/core-auth"; +import { resolveFileContent } from "./file"; +import { hash } from "./crypto"; +import { LanguageModelConfiguration } from "./server/messages"; +import { chunk } from "./encoders"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("azureaisearch"); -const HASH_LENGTH = 64 +const HASH_LENGTH = 64; export const azureAISearchIndex: WorkspaceFileIndexCreator = async ( - indexName: string, - cfg: LanguageModelConfiguration, - embedder: EmbeddingFunction, - options?: VectorIndexOptions & TraceOptions & CancellationOptions + indexName: string, + cfg: LanguageModelConfiguration, + embedder: EmbeddingFunction, + options?: VectorIndexOptions & TraceOptions & CancellationOptions, ) => { - // https://learn.microsoft.com/en-us/azure/search/search-security-rbac?tabs=roles-portal-admin%2Croles-portal%2Croles-portal-query%2Ctest-portal%2Ccustom-role-portal - const { - trace, - cancellationToken, - deleteIfExists, - chunkOverlap = 128, - chunkSize = 512, - vectorSize = 1536, - } = options || {} - const abortSignal = toSignal(cancellationToken) - const { SearchClient, SearchIndexClient, AzureKeyCredential } = - await import("@azure/search-documents") + // https://learn.microsoft.com/en-us/azure/search/search-security-rbac?tabs=roles-portal-admin%2Croles-portal%2Croles-portal-query%2Ctest-portal%2Ccustom-role-portal + const { + trace, + cancellationToken, + deleteIfExists, + chunkOverlap = 128, + chunkSize = 512, + vectorSize = 1536, + } = options || {}; + const abortSignal = toSignal(cancellationToken); + const { SearchClient, SearchIndexClient, AzureKeyCredential } = await import( + "@azure/search-documents" + ); - const endPoint = process.env.AZURE_AI_SEARCH_ENDPOINT - if (!endPoint) { - dbg(`checking if AZURE_AI_SEARCH_ENDPOINT is configured`) - throw new Error("AZURE_AI_SEARCH_ENDPOINT is not configured.") - } - let credential: TokenCredential | KeyCredential - const apiKey = process.env.AZURE_AI_SEARCH_API_KEY - if (apiKey) { - dbg(`using AzureKeyCredential with apiKey`) - credential = new AzureKeyCredential(apiKey) - } else { - dbg(`fetching Azure token credential`) - const { token } = await runtimeHost.azureToken.token("default", { - cancellationToken, - }) - checkCancelled(cancellationToken) - if (!token) { - dbg(`validating Azure token`) - throw new Error( - "Azure AI Search requires a valid Azure token credential." - ) - } - credential = token.credential + const endPoint = process.env.AZURE_AI_SEARCH_ENDPOINT; + if (!endPoint) { + dbg(`checking if AZURE_AI_SEARCH_ENDPOINT is configured`); + throw new Error("AZURE_AI_SEARCH_ENDPOINT is not configured."); + } + let credential: TokenCredential | KeyCredential; + const apiKey = process.env.AZURE_AI_SEARCH_API_KEY; + if (apiKey) { + dbg(`using AzureKeyCredential with apiKey`); + credential = new AzureKeyCredential(apiKey); + } else { + dbg(`fetching Azure token credential`); + const { token } = await runtimeHost.azureToken.token("default", { + cancellationToken, + }); + checkCancelled(cancellationToken); + if (!token) { + dbg(`validating Azure token`); + throw new Error("Azure AI Search requires a valid Azure token credential."); } + credential = token.credential; + } - logVerbose( - `azure ai search: ${indexName}, embedder ${cfg.provider}:${cfg.model}, ${vectorSize} dimensions` - ) - const indexClient = new SearchIndexClient(endPoint, credential, {}) - if (deleteIfExists) { - dbg(`deleting existing index ${indexName}`) - await indexClient.deleteIndex(indexName, { abortSignal }) - } - dbg(`creating or updating index ${indexName}`) - const created = await indexClient.createOrUpdateIndex({ - name: indexName, - fields: [ - { name: "id", type: "Edm.String", key: true }, - { - name: "filename", - type: "Edm.String", - searchable: true, - filterable: true, - sortable: true, - }, - { name: "lineStart", type: "Edm.Int32", filterable: true }, - { name: "lineEnd", type: "Edm.Int32", filterable: true }, - { name: "content", type: "Edm.String", searchable: true }, - { - name: "contentVector", - type: "Collection(Edm.Single)", - searchable: true, - vectorSearchDimensions: vectorSize, - vectorSearchProfileName: "content-vector-profile", - }, - ], - vectorSearch: { - profiles: [ - { - name: "content-vector-profile", - algorithmConfigurationName: "content-vector-algorithm", - }, - ], - algorithms: [ - { - name: "content-vector-algorithm", - kind: "hnsw", - parameters: { - m: 4, - efConstruction: 400, - efSearch: 500, - metric: "cosine", - }, - }, - ], + logVerbose( + `azure ai search: ${indexName}, embedder ${cfg.provider}:${cfg.model}, ${vectorSize} dimensions`, + ); + const indexClient = new SearchIndexClient(endPoint, credential, {}); + if (deleteIfExists) { + dbg(`deleting existing index ${indexName}`); + await indexClient.deleteIndex(indexName, { abortSignal }); + } + dbg(`creating or updating index ${indexName}`); + const created = await indexClient.createOrUpdateIndex({ + name: indexName, + fields: [ + { name: "id", type: "Edm.String", key: true }, + { + name: "filename", + type: "Edm.String", + searchable: true, + filterable: true, + sortable: true, + }, + { name: "lineStart", type: "Edm.Int32", filterable: true }, + { name: "lineEnd", type: "Edm.Int32", filterable: true }, + { name: "content", type: "Edm.String", searchable: true }, + { + name: "contentVector", + type: "Collection(Edm.Single)", + searchable: true, + vectorSearchDimensions: vectorSize, + vectorSearchProfileName: "content-vector-profile", + }, + ], + vectorSearch: { + profiles: [ + { + name: "content-vector-profile", + algorithmConfigurationName: "content-vector-algorithm", }, - }) - dbg(`tracing details of created index`) - trace?.detailsFenced(`azure ai search ${indexName}`, created, "json") + ], + algorithms: [ + { + name: "content-vector-algorithm", + kind: "hnsw", + parameters: { + m: 4, + efConstruction: 400, + efSearch: 500, + metric: "cosine", + }, + }, + ], + }, + }); + dbg(`tracing details of created index`); + trace?.detailsFenced(`azure ai search ${indexName}`, created, "json"); - type TextChunkEntry = TextChunk & { id: string; contentVector: number[] } - const client = new SearchClient( - endPoint, - indexName, - credential, - {} - ) + type TextChunkEntry = TextChunk & { id: string; contentVector: number[] }; + const client = new SearchClient(endPoint, indexName, credential, {}); - const chunkId = async (chunk: TextChunk) => - await hash( - [chunk.filename ?? chunk.content, chunk.lineEnd, chunk.lineEnd], - { length: HASH_LENGTH } - ) + const chunkId = async (chunk: TextChunk) => + await hash([chunk.filename ?? chunk.content, chunk.lineEnd, chunk.lineEnd], { + length: HASH_LENGTH, + }); - return Object.freeze({ - name: indexName, - insertOrUpdate: async (file: ElementOrArray) => { - const files = arrayify(file) - const outdated: TextChunkEntry[] = [] - const docs: TextChunkEntry[] = [] - for (const file of files) { - dbg(`resolving file content for ${file.filename}`) - await resolveFileContent(file, { cancellationToken }) - if (file.encoding) { - continue - } + return Object.freeze({ + name: indexName, + insertOrUpdate: async (file: ElementOrArray) => { + const files = arrayify(file); + const outdated: TextChunkEntry[] = []; + const docs: TextChunkEntry[] = []; + for (const file of files) { + dbg(`resolving file content for ${file.filename}`); + await resolveFileContent(file, { cancellationToken }); + if (file.encoding) { + continue; + } - dbg(`chunking file ${file.filename}`) - const newChunks = await chunk(file, { - chunkSize, - chunkOverlap, - }) - const oldChunks = await client.search(undefined, { - filter: `filename eq '${file.filename}'`, - }) - for await (const result of oldChunks.results) { - const oldChunk = result.document - const index = newChunks.findIndex( - (c) => - c.lineStart === oldChunk.lineStart && - c.lineEnd === oldChunk.lineEnd && - c.content === oldChunk.content - ) - if (index > -1) { - newChunks.splice(index, 1) - } else { - dbg(`adding outdated chunk`) - outdated.push(oldChunk) - } - } + dbg(`chunking file ${file.filename}`); + const newChunks = await chunk(file, { + chunkSize, + chunkOverlap, + }); + const oldChunks = await client.search(undefined, { + filter: `filename eq '${file.filename}'`, + }); + for await (const result of oldChunks.results) { + const oldChunk = result.document; + const index = newChunks.findIndex( + (c) => + c.lineStart === oldChunk.lineStart && + c.lineEnd === oldChunk.lineEnd && + c.content === oldChunk.content, + ); + if (index > -1) { + newChunks.splice(index, 1); + } else { + dbg(`adding outdated chunk`); + outdated.push(oldChunk); + } + } - // new chunks - for (const chunk of newChunks) { - dbg(`embedding new chunk content`) - const vector = await embedder(chunk.content, cfg, options) - checkCancelled(cancellationToken) - dbg(`validating embedding vector status`) - if (vector.status !== "success") { - throw new Error(vector.error || vector.status) - } - docs.push({ - id: await chunkId(chunk), - ...chunk, - contentVector: vector.data[0], - }) - } - } + // new chunks + for (const chunk of newChunks) { + dbg(`embedding new chunk content`); + const vector = await embedder(chunk.content, cfg, options); + checkCancelled(cancellationToken); + dbg(`validating embedding vector status`); + if (vector.status !== "success") { + throw new Error(vector.error || vector.status); + } + docs.push({ + id: await chunkId(chunk), + ...chunk, + contentVector: vector.data[0], + }); + } + } - logVerbose( - `azure ai search: ${indexName} index ${outdated.length} outdated, ${docs.length} updated` - ) - if (outdated.length) { - dbg(`deleting outdated documents`) - const res = await client.deleteDocuments(outdated, { - abortSignal, - throwOnAnyFailure: false, - }) - for (const r of res.results) { - if (!r.succeeded) { - logVerbose( - ` ${r.key} ${r.errorMessage} (${r.statusCode})` - ) - } - } - } + logVerbose( + `azure ai search: ${indexName} index ${outdated.length} outdated, ${docs.length} updated`, + ); + if (outdated.length) { + dbg(`deleting outdated documents`); + const res = await client.deleteDocuments(outdated, { + abortSignal, + throwOnAnyFailure: false, + }); + for (const r of res.results) { + if (!r.succeeded) { + logVerbose(` ${r.key} ${r.errorMessage} (${r.statusCode})`); + } + } + } - dbg(`checking if there are no new documents`) - if (!docs.length) { - return - } + dbg(`checking if there are no new documents`); + if (!docs.length) { + return; + } - dbg(`merging or uploading new documents`) - const res = await client.mergeOrUploadDocuments(docs, { - abortSignal, - throwOnAnyFailure: false, - }) - for (const r of res.results) { - if (!r.succeeded) { - logVerbose(` ${r.key} ${r.errorMessage} (${r.statusCode})`) - } - } - }, - search: async (query: string, options?: VectorSearchOptions) => { - dbg(`embedding search query`) - const { topK, minScore = 0 } = options || {} + dbg(`merging or uploading new documents`); + const res = await client.mergeOrUploadDocuments(docs, { + abortSignal, + throwOnAnyFailure: false, + }); + for (const r of res.results) { + if (!r.succeeded) { + logVerbose(` ${r.key} ${r.errorMessage} (${r.statusCode})`); + } + } + }, + search: async (query: string, options?: VectorSearchOptions) => { + dbg(`embedding search query`); + const { topK, minScore = 0 } = options || {}; - const vector = await embedder(query, cfg, { - trace, - cancellationToken, - }) - checkCancelled(cancellationToken) - dbg(`validating embedding vector status`) - if (vector.status !== "success") { - throw new Error(vector.error || vector.status) - } + const vector = await embedder(query, cfg, { + trace, + cancellationToken, + }); + checkCancelled(cancellationToken); + dbg(`validating embedding vector status`); + if (vector.status !== "success") { + throw new Error(vector.error || vector.status); + } - dbg(`searching documents with query ${query}`) - const docs = await client.search(query, { - searchMode: "all", - vectorSearchOptions: { - queries: [ - { - kind: "vector", - vector: vector.data[0], - fields: ["contentVector"], - kNearestNeighborsCount: 3, - }, - ], - }, - }) - const res: WorkspaceFileWithScore[] = [] - dbg(`iterating over search results`) - for await (const doc of docs.results) { - if (doc.score < minScore) { - continue - } - res.push({ ...doc.document, score: doc.score }) - dbg(`checking if result length exceeds topK`) - if (res.length >= topK) { - break - } - } - return res + dbg(`searching documents with query ${query}`); + const docs = await client.search(query, { + searchMode: "all", + vectorSearchOptions: { + queries: [ + { + kind: "vector", + vector: vector.data[0], + fields: ["contentVector"], + kNearestNeighborsCount: 3, + }, + ], }, - } satisfies WorkspaceFileIndex) -} + }); + const res: WorkspaceFileWithScore[] = []; + dbg(`iterating over search results`); + for await (const doc of docs.results) { + if (doc.score < minScore) { + continue; + } + res.push({ ...doc.document, score: doc.score }); + dbg(`checking if result length exceeds topK`); + if (res.length >= topK) { + break; + } + } + return res; + }, + } satisfies WorkspaceFileIndex); +}; diff --git a/packages/core/src/azurecontentsafety.ts b/packages/core/src/azurecontentsafety.ts index a3fbbcba4a..b11a11eeec 100644 --- a/packages/core/src/azurecontentsafety.ts +++ b/packages/core/src/azurecontentsafety.ts @@ -1,259 +1,243 @@ -import { createFetch, statusToMessage } from "./fetch" -import { TraceOptions } from "./trace" -import { arrayify } from "./util" +import { createFetch, statusToMessage } from "./fetch"; +import { TraceOptions } from "./trace"; +import { arrayify } from "./util"; import { - AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_LENGTH, - DOCS_CONFIGURATION_CONTENT_SAFETY_URL, -} from "./constants" -import { runtimeHost } from "./host" -import { CancellationOptions } from "./cancellation" -import { YAMLStringify } from "./yaml" -import { AzureCredentialsType } from "./server/messages" -import { trimTrailingSlash } from "./cleaners" -import { chunkString } from "./chunkers" -import { createCache } from "./cache" -import { traceFetchPost } from "./fetchtext" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("contentsafety:azure") + AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_LENGTH, + DOCS_CONFIGURATION_CONTENT_SAFETY_URL, +} from "./constants"; +import { runtimeHost } from "./host"; +import { CancellationOptions } from "./cancellation"; +import { YAMLStringify } from "./yaml"; +import { AzureCredentialsType } from "./server/messages"; +import { trimTrailingSlash } from "./cleaners"; +import { chunkString } from "./chunkers"; +import { createCache } from "./cache"; +import { traceFetchPost } from "./fetchtext"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("contentsafety:azure"); interface AzureContentSafetyRequest { - userPrompt?: string - documents?: string[] + userPrompt?: string; + documents?: string[]; } interface AzureContentSafetyResponse { - userPromptAnalysis: { - attackDetected: boolean - } - documentsAnalysis: { - attackDetected: boolean - }[] + userPromptAnalysis: { + attackDetected: boolean; + }; + documentsAnalysis: { + attackDetected: boolean; + }[]; } class AzureContentSafetyClient implements ContentSafety { - readonly id: "azure" - private readonly cache: WorkspaceFileCache< - { route: string; body: object; options: object }, - object - > - constructor(readonly options?: TraceOptions & CancellationOptions) { - this.cache = createCache("azurecontentsafety", { - ...(options || {}), - type: "fs", - }) - } + readonly id: "azure"; + private readonly cache: WorkspaceFileCache< + { route: string; body: object; options: object }, + object + >; + constructor(readonly options?: TraceOptions & CancellationOptions) { + this.cache = createCache("azurecontentsafety", { + ...(options || {}), + type: "fs", + }); + } - async detectHarmfulContent( - content: Awaitable, - options?: { - maxAllowedSeverity?: number - } - ): Promise<{ - harmfulContentDetected: boolean - filename?: string - chunk?: string - }> { - const { trace } = this.options || {} - const { maxAllowedSeverity = 0 } = options || {} - const route = "text:analyze" + async detectHarmfulContent( + content: Awaitable, + options?: { + maxAllowedSeverity?: number; + }, + ): Promise<{ + harmfulContentDetected: boolean; + filename?: string; + chunk?: string; + }> { + const { trace } = this.options || {}; + const { maxAllowedSeverity = 0 } = options || {}; + const route = "text:analyze"; - try { - dbg(`detecting harmful content`) - trace?.startDetails("🛡️ content safety: detecting harmful content") + try { + dbg(`detecting harmful content`); + trace?.startDetails("🛡️ content safety: detecting harmful content"); - const fetcher = await this.createClient(route) - const analyze = async (text: string) => { - trace?.fence(YAMLStringify(text), "yaml") - const body = { text } - const cached = await this.cache.get({ route, body, options }) - if (cached) { - trace?.itemValue("cached", YAMLStringify(cached)) - return cached as { harmfulContentDetected: boolean } - } - - const res = await fetcher(body) - if (!res.ok) { - dbg(statusToMessage(res)) - throw new Error( - `Azure Content Safety API failed with status ${res.status}` - ) - } - const resBody = (await res.json()) as { - blockslistMath: string[] - categoriesAnalysis: { category: string; severity: number }[] - } - const harmfulContentDetected = resBody.categoriesAnalysis?.some( - ({ severity }) => severity > maxAllowedSeverity - ) - const r = { harmfulContentDetected, ...resBody } - await this.cache.set({ route, body, options }, r) - return r - } + const fetcher = await this.createClient(route); + const analyze = async (text: string) => { + trace?.fence(YAMLStringify(text), "yaml"); + const body = { text }; + const cached = await this.cache.get({ route, body, options }); + if (cached) { + trace?.itemValue("cached", YAMLStringify(cached)); + return cached as { harmfulContentDetected: boolean }; + } - const inputs = arrayify(await content) - for (const input of inputs) { - const text = typeof input === "string" ? input : input.content - const filename = - typeof input === "string" ? undefined : input.filename - for (const chunk of chunkString( - text, - AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_LENGTH - )) { - const res = await analyze(chunk) - if (res.harmfulContentDetected) - return { - ...res, - filename, - chunk, - } - } - } + const res = await fetcher(body); + if (!res.ok) { + dbg(statusToMessage(res)); + throw new Error(`Azure Content Safety API failed with status ${res.status}`); + } + const resBody = (await res.json()) as { + blockslistMath: string[]; + categoriesAnalysis: { category: string; severity: number }[]; + }; + const harmfulContentDetected = resBody.categoriesAnalysis?.some( + ({ severity }) => severity > maxAllowedSeverity, + ); + const r = { harmfulContentDetected, ...resBody }; + await this.cache.set({ route, body, options }, r); + return r; + }; - trace?.item("no harmful content detected") - dbg(`no harmful content detected`) - return { harmfulContentDetected: false } - } finally { - trace?.endDetails() + const inputs = arrayify(await content); + for (const input of inputs) { + const text = typeof input === "string" ? input : input.content; + const filename = typeof input === "string" ? undefined : input.filename; + for (const chunk of chunkString(text, AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_LENGTH)) { + const res = await analyze(chunk); + if (res.harmfulContentDetected) + return { + ...res, + filename, + chunk, + }; } + } + + trace?.item("no harmful content detected"); + dbg(`no harmful content detected`); + return { harmfulContentDetected: false }; + } finally { + trace?.endDetails(); } + } - async detectPromptInjection( - content: Awaitable< - ElementOrArray | ElementOrArray - > - ): Promise<{ attackDetected: boolean; filename?: string; chunk?: string }> { - const options = {} - const { trace } = this.options || {} - const route = "text:shieldPrompt" + async detectPromptInjection( + content: Awaitable | ElementOrArray>, + ): Promise<{ attackDetected: boolean; filename?: string; chunk?: string }> { + const options = {}; + const { trace } = this.options || {}; + const route = "text:shieldPrompt"; - try { - dbg(`detecting prompt injection`) - trace?.startDetails("🛡️ content safety: detecting prompt injection") + try { + dbg(`detecting prompt injection`); + trace?.startDetails("🛡️ content safety: detecting prompt injection"); - const input = arrayify(await content) - const userPrompts = input.filter((i) => typeof i === "string") - const documents = input.filter((i) => typeof i === "object") + const input = arrayify(await content); + const userPrompts = input.filter((i) => typeof i === "string"); + const documents = input.filter((i) => typeof i === "object"); - const fetcher = await this.createClient(route) - const shieldPrompt = async (body: AzureContentSafetyRequest) => { - trace?.fence(YAMLStringify(body), "yaml") - const cached = await this.cache.get({ route, body, options }) - if (cached) { - trace?.itemValue("cached", YAMLStringify(cached)) - return cached as { attackDetected: boolean } - } - const res = await fetcher(body) - if (!res.ok) { - dbg(statusToMessage(res)) - throw new Error( - `Azure Content Safety API failed with status ${res.status}` - ) - } - const resBody = (await res.json()) as AzureContentSafetyResponse - const attackDetected = - !!resBody.userPromptAnalysis?.attackDetected || - resBody.documentsAnalysis?.some((doc) => doc.attackDetected) - const r = { attackDetected } - await this.cache.set({ route, body, options: {} }, r) - return r - } + const fetcher = await this.createClient(route); + const shieldPrompt = async (body: AzureContentSafetyRequest) => { + trace?.fence(YAMLStringify(body), "yaml"); + const cached = await this.cache.get({ route, body, options }); + if (cached) { + trace?.itemValue("cached", YAMLStringify(cached)); + return cached as { attackDetected: boolean }; + } + const res = await fetcher(body); + if (!res.ok) { + dbg(statusToMessage(res)); + throw new Error(`Azure Content Safety API failed with status ${res.status}`); + } + const resBody = (await res.json()) as AzureContentSafetyResponse; + const attackDetected = + !!resBody.userPromptAnalysis?.attackDetected || + resBody.documentsAnalysis?.some((doc) => doc.attackDetected); + const r = { attackDetected }; + await this.cache.set({ route, body, options: {} }, r); + return r; + }; - for (const userPrompt of userPrompts) { - for (const chunk of chunkString( - userPrompt, - AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_LENGTH - )) { - const res = await shieldPrompt({ - userPrompt: chunk, - documents: [], - }) - if (res.attackDetected) - return { - ...res, - chunk, - } - } - } - for (const document of documents) { - for (const chunk of chunkString( - document.content, - AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_LENGTH - )) { - const res = await shieldPrompt({ - userPrompt: "", - documents: [chunk], - }) - if (res.attackDetected) - return { - ...res, - filename: document.filename, - chunk, - } - } - } - trace.item("no attack detected") - dbg(`no attack detected`) - return { attackDetected: false } - } finally { - trace?.endDetails() + for (const userPrompt of userPrompts) { + for (const chunk of chunkString( + userPrompt, + AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_LENGTH, + )) { + const res = await shieldPrompt({ + userPrompt: chunk, + documents: [], + }); + if (res.attackDetected) + return { + ...res, + chunk, + }; + } + } + for (const document of documents) { + for (const chunk of chunkString( + document.content, + AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_LENGTH, + )) { + const res = await shieldPrompt({ + userPrompt: "", + documents: [chunk], + }); + if (res.attackDetected) + return { + ...res, + filename: document.filename, + chunk, + }; } + } + trace.item("no attack detected"); + dbg(`no attack detected`); + return { attackDetected: false }; + } finally { + trace?.endDetails(); } + } - private async createClient(route: string, options?: CancellationOptions) { - const { trace } = this.options || {} - const endpoint = trimTrailingSlash( - process.env.AZURE_CONTENT_SAFETY_ENDPOINT || - process.env.AZURE_CONTENT_SAFETY_API_ENDPOINT - ) - const credentialsType = (( - process.env.AZURE_CONTENT_SAFETY_CREDENTIALS_TYPE || - process.env.AZURE_CONTENT_SAFETY_API_CREDENTIALS_TYPE - ) - ?.toLowerCase() - ?.trim() || "default") as AzureCredentialsType - let apiKey = - process.env.AZURE_CONTENT_SAFETY_KEY || - process.env.AZURE_CONTENT_SAFETY_API_KEY - let apiToken: string - if (!apiKey) { - dbg(`requesting Azure token`) - const { token, error } = await runtimeHost.azureToken.token( - credentialsType, - options - ) - apiToken = token.token - } - const version = process.env.AZURE_CONTENT_SAFETY_VERSION || "2024-09-01" - dbg(`azure version: %s`, version) + private async createClient(route: string, options?: CancellationOptions) { + const { trace } = this.options || {}; + const endpoint = trimTrailingSlash( + process.env.AZURE_CONTENT_SAFETY_ENDPOINT || process.env.AZURE_CONTENT_SAFETY_API_ENDPOINT, + ); + const credentialsType = (( + process.env.AZURE_CONTENT_SAFETY_CREDENTIALS_TYPE || + process.env.AZURE_CONTENT_SAFETY_API_CREDENTIALS_TYPE + ) + ?.toLowerCase() + ?.trim() || "default") as AzureCredentialsType; + let apiKey = process.env.AZURE_CONTENT_SAFETY_KEY || process.env.AZURE_CONTENT_SAFETY_API_KEY; + let apiToken: string; + if (!apiKey) { + dbg(`requesting Azure token`); + const { token, error } = await runtimeHost.azureToken.token(credentialsType, options); + apiToken = token.token; + } + const version = process.env.AZURE_CONTENT_SAFETY_VERSION || "2024-09-01"; + dbg(`azure version: %s`, version); - if (!endpoint) - throw new Error( - `AZURE_CONTENT_SAFETY_ENDPOINT is not set. See ${DOCS_CONFIGURATION_CONTENT_SAFETY_URL} for help.` - ) - if (!apiKey && !apiToken) - throw new Error( - `AZURE_CONTENT_SAFETY_KEY is not set or not signed in with Azure. See ${DOCS_CONFIGURATION_CONTENT_SAFETY_URL} for help.` - ) + if (!endpoint) + throw new Error( + `AZURE_CONTENT_SAFETY_ENDPOINT is not set. See ${DOCS_CONFIGURATION_CONTENT_SAFETY_URL} for help.`, + ); + if (!apiKey && !apiToken) + throw new Error( + `AZURE_CONTENT_SAFETY_KEY is not set or not signed in with Azure. See ${DOCS_CONFIGURATION_CONTENT_SAFETY_URL} for help.`, + ); - const headers: Record = { - "Content-Type": "application/json", - "User-Agent": "genaiscript", - } - if (apiKey) headers["Ocp-Apim-Subscription-Key"] = apiKey - if (apiToken) headers["Authorization"] = `Bearer ${apiToken}` + const headers: Record = { + "Content-Type": "application/json", + "User-Agent": "genaiscript", + }; + if (apiKey) headers["Ocp-Apim-Subscription-Key"] = apiKey; + if (apiToken) headers["Authorization"] = `Bearer ${apiToken}`; - const fetch = await createFetch(this.options) - const url = `${endpoint}/contentsafety/${route}?api-version=${version}` - const fetcher = async (body: any) => { - traceFetchPost(trace, url, headers, body) - return await fetch(url, { - method: "POST", - headers, - body: JSON.stringify(body), - }) - } - return fetcher - } + const fetch = await createFetch(this.options); + const url = `${endpoint}/contentsafety/${route}?api-version=${version}`; + const fetcher = async (body: any) => { + traceFetchPost(trace, url, headers, body); + return await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(body), + }); + }; + return fetcher; + } } /** @@ -268,11 +252,10 @@ class AzureContentSafetyClient implements ContentSafety { * The function trims trailing slashes from the endpoint before validation. */ export function isAzureContentSafetyClientConfigured() { - const endpoint = trimTrailingSlash( - process.env.AZURE_CONTENT_SAFETY_ENDPOINT || - process.env.AZURE_CONTENT_SAFETY_API_ENDPOINT - ) - return !!endpoint + const endpoint = trimTrailingSlash( + process.env.AZURE_CONTENT_SAFETY_ENDPOINT || process.env.AZURE_CONTENT_SAFETY_API_ENDPOINT, + ); + return !!endpoint; } /** @@ -287,12 +270,12 @@ export function isAzureContentSafetyClientConfigured() { * - `detectPromptInjection`: Analyzes text or documents for prompt injection attacks. */ export function createAzureContentSafetyClient( - options: CancellationOptions & TraceOptions + options: CancellationOptions & TraceOptions, ): ContentSafety { - const client = new AzureContentSafetyClient(options) - return { - id: client.id, - detectHarmfulContent: client.detectHarmfulContent.bind(client), - detectPromptInjection: client.detectPromptInjection.bind(client), - } satisfies ContentSafety + const client = new AzureContentSafetyClient(options); + return { + id: client.id, + detectHarmfulContent: client.detectHarmfulContent.bind(client), + detectPromptInjection: client.detectPromptInjection.bind(client), + } satisfies ContentSafety; } diff --git a/packages/core/src/azuredevops.ts b/packages/core/src/azuredevops.ts index 381757838c..5c664e32c4 100644 --- a/packages/core/src/azuredevops.ts +++ b/packages/core/src/azuredevops.ts @@ -1,20 +1,20 @@ -import { createFetch, tryReadText } from "./fetch" -import { generatedByFooter, mergeDescription } from "./githubclient" -import { prettifyMarkdown } from "./markdown" -import { logError, logVerbose } from "./util" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("azuredevops") +import { createFetch, tryReadText } from "./fetch"; +import { generatedByFooter, mergeDescription } from "./githubclient"; +import { prettifyMarkdown } from "./markdown"; +import { logError, logVerbose } from "./util"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("azuredevops"); // https://learn.microsoft.com/en-us/rest/api/azure/devops/git/pull-requests/update?view=azure-devops-rest-7.1 export interface AzureDevOpsEnv { - fork: boolean - accessToken: string - collectionUri: string - teamProject: string - repositoryId: string - apiVersion: string - sourceBranch: string - runUrl?: string + fork: boolean; + accessToken: string; + collectionUri: string; + teamProject: string; + repositoryId: string; + apiVersion: string; + sourceBranch: string; + runUrl?: string; } /** @@ -32,66 +32,55 @@ export interface AzureDevOpsEnv { * - BUILD_SOURCEBRANCH: The source branch for the build. * - apiVersion: The API version used for Azure DevOps requests. */ -export async function azureDevOpsParseEnv( - env: Record -): Promise { - const fork = env.SYSTEM_PULLREQUEST_ISFORK !== "False" - const accessToken = env.SYSTEM_ACCESSTOKEN - const collectionUri = env.SYSTEM_COLLECTIONURI // https://dev.azure.com/msresearch/ - const teamProject = env.SYSTEM_TEAMPROJECT - const repositoryId = env.BUILD_REPOSITORY_NAME // build_repositoryid is a guid - const sourceBranch = env.BUILD_SOURCEBRANCH - const apiVersion = "7.1" +export async function azureDevOpsParseEnv(env: Record): Promise { + const fork = env.SYSTEM_PULLREQUEST_ISFORK !== "False"; + const accessToken = env.SYSTEM_ACCESSTOKEN; + const collectionUri = env.SYSTEM_COLLECTIONURI; // https://dev.azure.com/msresearch/ + const teamProject = env.SYSTEM_TEAMPROJECT; + const repositoryId = env.BUILD_REPOSITORY_NAME; // build_repositoryid is a guid + const sourceBranch = env.BUILD_SOURCEBRANCH; + const apiVersion = "7.1"; - return { - fork, - accessToken, - collectionUri, - teamProject, - repositoryId, - apiVersion, - sourceBranch, - } + return { + fork, + accessToken, + collectionUri, + teamProject, + repositoryId, + apiVersion, + sourceBranch, + }; } async function findPullRequest(info: AzureDevOpsEnv) { - const { - accessToken, - collectionUri, - sourceBranch, - teamProject, - repositoryId, - apiVersion, - } = info + const { accessToken, collectionUri, sourceBranch, teamProject, repositoryId, apiVersion } = info; - // query pull request - const Authorization = `Bearer ${accessToken}` - const searchUrl = `${collectionUri}${teamProject}/_apis/git/pullrequests/?searchCriteria.repositoryId=${repositoryId}&searchCriteria.sourceRefName=${sourceBranch}&api-version=${apiVersion}` - const fetch = await createFetch({ retryOn: [] }) - const resGet = await fetch(searchUrl, { - method: "GET", - headers: { - Authorization, - }, - }) - if (resGet.status !== 200) { - logError( - `pull request search failed, ${resGet.status}: ${resGet.statusText}` - ) - return undefined - } - const resGetJson = (await resGet.json()) as { - value: { - pullRequestId: number - description: string - }[] - } - const pr = resGetJson?.value?.[0] - if (!pr) { - logError(`pull request not found`) - return undefined - } - return pr + // query pull request + const Authorization = `Bearer ${accessToken}`; + const searchUrl = `${collectionUri}${teamProject}/_apis/git/pullrequests/?searchCriteria.repositoryId=${repositoryId}&searchCriteria.sourceRefName=${sourceBranch}&api-version=${apiVersion}`; + const fetch = await createFetch({ retryOn: [] }); + const resGet = await fetch(searchUrl, { + method: "GET", + headers: { + Authorization, + }, + }); + if (resGet.status !== 200) { + logError(`pull request search failed, ${resGet.status}: ${resGet.statusText}`); + return undefined; + } + const resGetJson = (await resGet.json()) as { + value: { + pullRequestId: number; + description: string; + }[]; + }; + const pr = resGetJson?.value?.[0]; + if (!pr) { + logError(`pull request not found`); + return undefined; + } + return pr; } /** @@ -106,41 +95,34 @@ async function findPullRequest(info: AzureDevOpsEnv) { * to update the pull request description in Azure DevOps. Logs errors if the operation fails. */ export async function azureDevOpsUpdatePullRequestDescription( - script: PromptScript, - info: AzureDevOpsEnv, - text: string, - commentTag: string + script: PromptScript, + info: AzureDevOpsEnv, + text: string, + commentTag: string, ) { - const { - accessToken, - collectionUri, - teamProject, - repositoryId, - apiVersion, - } = info + const { accessToken, collectionUri, teamProject, repositoryId, apiVersion } = info; - // query pull request - const pr = await findPullRequest(info) - if (!pr) return - let { pullRequestId, description } = pr + // query pull request + const pr = await findPullRequest(info); + if (!pr) return; + let { pullRequestId, description } = pr; - text = prettifyMarkdown(text) - text += generatedByFooter(script, info) - description = mergeDescription(commentTag, description, text) + text = prettifyMarkdown(text); + text += generatedByFooter(script, info); + description = mergeDescription(commentTag, description, text); - const url = `${collectionUri}${teamProject}/_apis/git/repositories/${repositoryId}/pullrequests/${pullRequestId}?api-version=${apiVersion}` - const fetch = await createFetch({ retryOn: [] }) - const res = await fetch(url, { - method: "PATCH", - body: JSON.stringify({ description }), - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${accessToken}`, - }, - }) - if (res.status !== 200) - logError(`pull request update failed, ${res.status}: ${res.statusText}`) - else logVerbose(`pull request updated`) + const url = `${collectionUri}${teamProject}/_apis/git/repositories/${repositoryId}/pullrequests/${pullRequestId}?api-version=${apiVersion}`; + const fetch = await createFetch({ retryOn: [] }); + const res = await fetch(url, { + method: "PATCH", + body: JSON.stringify({ description }), + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${accessToken}`, + }, + }); + if (res.status !== 200) logError(`pull request update failed, ${res.status}: ${res.statusText}`); + else logVerbose(`pull request updated`); } /** @@ -155,92 +137,81 @@ export async function azureDevOpsUpdatePullRequestDescription( * Retrieves the relevant pull request, appends a footer to the comment body, and creates a new comment thread. */ export async function azureDevOpsCreateIssueComment( - script: PromptScript, - info: AzureDevOpsEnv, - body: string, - commentTag: string + script: PromptScript, + info: AzureDevOpsEnv, + body: string, + commentTag: string, ) { - const { - apiVersion, - accessToken, - collectionUri, - teamProject, - repositoryId, - } = info + const { apiVersion, accessToken, collectionUri, teamProject, repositoryId } = info; - const { pullRequestId } = (await findPullRequest(info)) || {} - if (isNaN(pullRequestId)) return + const { pullRequestId } = (await findPullRequest(info)) || {}; + if (isNaN(pullRequestId)) return; - const fetch = await createFetch({ retryOn: [] }) - body += generatedByFooter(script, info) + const fetch = await createFetch({ retryOn: [] }); + body += generatedByFooter(script, info); - const Authorization = `Bearer ${accessToken}` - const urlThreads = `${collectionUri}${teamProject}/_apis/git/repositories/${repositoryId}/pullRequests/${pullRequestId}/threads` - const url = `${urlThreads}?api-version=${apiVersion}` - if (commentTag) { - const tag = `` - body = `${body}\n\n${tag}\n\n` - // https://learn.microsoft.com/en-us/rest/api/azure/devops/git/pull-request-threads/list?view=azure-devops-rest-7.1&tabs=HTTP - // GET https://dev.azure.com/{organization}/{project}/_apis/git/repositories/{repositoryId}/pullRequests/{pullRequestId}/threads?api-version=7.1-preview.1 - const resThreads = await fetch(url, { - method: "GET", - headers: { - Accept: "application/json", - Authorization, - }, - }) - if (resThreads.status !== 200) return - const threads = (await resThreads.json()) as { - value: { - id: string - status: string - comments: { content: string }[] - }[] - } - const openThreads = - threads.value?.filter( - (c) => - c.status === "active" && - c.comments?.some((c) => c.content.includes(tag)) - ) || [] - for (const thread of openThreads) { - logVerbose(`pull request closing old comment thread ${thread.id}`) - await fetch( - `${urlThreads}/${thread.id}?api-version=${apiVersion}`, - { - method: "PATCH", - body: JSON.stringify({ - status: "closed", - }), - headers: { - "Content-Type": "application/json", - Authorization, - }, - } - ) - } + const Authorization = `Bearer ${accessToken}`; + const urlThreads = `${collectionUri}${teamProject}/_apis/git/repositories/${repositoryId}/pullRequests/${pullRequestId}/threads`; + const url = `${urlThreads}?api-version=${apiVersion}`; + if (commentTag) { + const tag = ``; + body = `${body}\n\n${tag}\n\n`; + // https://learn.microsoft.com/en-us/rest/api/azure/devops/git/pull-request-threads/list?view=azure-devops-rest-7.1&tabs=HTTP + // GET https://dev.azure.com/{organization}/{project}/_apis/git/repositories/{repositoryId}/pullRequests/{pullRequestId}/threads?api-version=7.1-preview.1 + const resThreads = await fetch(url, { + method: "GET", + headers: { + Accept: "application/json", + Authorization, + }, + }); + if (resThreads.status !== 200) return; + const threads = (await resThreads.json()) as { + value: { + id: string; + status: string; + comments: { content: string }[]; + }[]; + }; + const openThreads = + threads.value?.filter( + (c) => c.status === "active" && c.comments?.some((c) => c.content.includes(tag)), + ) || []; + for (const thread of openThreads) { + logVerbose(`pull request closing old comment thread ${thread.id}`); + await fetch(`${urlThreads}/${thread.id}?api-version=${apiVersion}`, { + method: "PATCH", + body: JSON.stringify({ + status: "closed", + }), + headers: { + "Content-Type": "application/json", + Authorization, + }, + }); } + } - // https://learn.microsoft.com/en-us/rest/api/azure/devops/git/pull-request-threads/create?view=azure-devops-rest-7.1&tabs=HTTP - // POST https://dev.azure.com/{organization}/{project}/_apis/git/repositories/{repositoryId}/pullRequests/{pullRequestId}/threads?api-version=7.1-preview.1 - const res = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization, + // https://learn.microsoft.com/en-us/rest/api/azure/devops/git/pull-request-threads/create?view=azure-devops-rest-7.1&tabs=HTTP + // POST https://dev.azure.com/{organization}/{project}/_apis/git/repositories/{repositoryId}/pullRequests/{pullRequestId}/threads?api-version=7.1-preview.1 + const res = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization, + }, + body: JSON.stringify({ + status: "active", + comments: [ + { + content: body, + commentType: "text", }, - body: JSON.stringify({ - status: "active", - comments: [ - { - content: body, - commentType: "text", - }, - ], - }), - }) - if (res.status !== 200) { - logError(`pull request comment creation failed, ${res.statusText}`) - dbg(await tryReadText(res)) - } else logVerbose(`pull request comment created`) + ], + }), + }); + if (res.status !== 200) { + logError(`pull request comment creation failed, ${res.statusText}`); + dbg(await tryReadText(res)); + } else logVerbose(`pull request comment created`); } diff --git a/packages/core/src/azureopenai.ts b/packages/core/src/azureopenai.ts index 581e9b786a..d3f6f3003e 100644 --- a/packages/core/src/azureopenai.ts +++ b/packages/core/src/azureopenai.ts @@ -1,141 +1,125 @@ -import debug from "debug" -const dbg = debug("genaiscript:azureopenai") -import { LanguageModel, ListModelsFunction } from "./chat" +import debug from "debug"; +const dbg = debug("genaiscript:azureopenai"); +import { LanguageModel, ListModelsFunction } from "./chat"; +import { AZURE_MANAGEMENT_API_VERSION, MODEL_PROVIDER_AZURE_OPENAI } from "./constants"; +import { errorMessage, serializeError } from "./error"; +import { createFetch } from "./fetch"; import { - AZURE_MANAGEMENT_API_VERSION, - MODEL_PROVIDER_AZURE_OPENAI, -} from "./constants" -import { errorMessage, serializeError } from "./error" -import { createFetch } from "./fetch" -import { - OpenAIChatCompletion, - OpenAIEmbedder, - OpenAIImageGeneration, - OpenAIListModels, - OpenAISpeech, - OpenAITranscribe, -} from "./openai" -import { runtimeHost } from "./host" + OpenAIChatCompletion, + OpenAIEmbedder, + OpenAIImageGeneration, + OpenAIListModels, + OpenAISpeech, + OpenAITranscribe, +} from "./openai"; +import { runtimeHost } from "./host"; -const azureManagementOrOpenAIListModels: ListModelsFunction = async ( - cfg, - options -) => { - const modelsApi = process.env.AZURE_OPENAI_API_MODELS_TYPE - if (modelsApi === "openai") { - dbg("using OpenAI API for model listing") - return await OpenAIListModels(cfg, options) - } else { - dbg("using Azure Management API for model listing") - return await azureManagementListModels(cfg, options) - } -} +const azureManagementOrOpenAIListModels: ListModelsFunction = async (cfg, options) => { + const modelsApi = process.env.AZURE_OPENAI_API_MODELS_TYPE; + if (modelsApi === "openai") { + dbg("using OpenAI API for model listing"); + return await OpenAIListModels(cfg, options); + } else { + dbg("using Azure Management API for model listing"); + return await azureManagementListModels(cfg, options); + } +}; const azureManagementListModels: ListModelsFunction = async (cfg, options) => { - try { - // Create a fetch instance to make HTTP requests - const { base } = cfg - const subscriptionId = process.env.AZURE_OPENAI_SUBSCRIPTION_ID - let resourceGroupName = process.env.AZURE_OPENAI_RESOURCE_GROUP - const accountName = /^https:\/\/([^\.]+)\./.exec(base)[1] - - if (!subscriptionId || !accountName) { - dbg( - "subscriptionId or accountName is missing, returning an empty model list" - ) - return { ok: true, models: [] } - } - const token = await runtimeHost.azureManagementToken.token( - "default", - options - ) - if (!token) throw new Error("Azure management token is missing") - if (token.error) { - dbg( - "error occurred while fetching Azure management token: %s", - token.error - ) - throw new Error(errorMessage(token.error)) - } - - const fetch = await createFetch({ retries: 0, ...options }) - const get = async (url: string) => { - const res = await fetch(url, { - method: "GET", - headers: { - Accept: "application/json", - Authorization: `Bearer ${token.token.token}`, - }, - }) - if (res.status !== 200) { - return { - ok: false, - status: res.status, - error: serializeError(res.statusText), - } - } - return await res.json() - } + try { + // Create a fetch instance to make HTTP requests + const { base } = cfg; + const subscriptionId = process.env.AZURE_OPENAI_SUBSCRIPTION_ID; + let resourceGroupName = process.env.AZURE_OPENAI_RESOURCE_GROUP; + const accountName = /^https:\/\/([^\.]+)\./.exec(base)[1]; - if (!resourceGroupName) { - dbg("resourceGroupName is missing, fetching resource details") - const resources: { - value: { - id: string - name: string - type: "OpenAI" - }[] - } = await get( - `https://management.azure.com/subscriptions/${subscriptionId}/resources?api-version=2021-04-01` - ) - const resource = resources.value.find((r) => r.name === accountName) - resourceGroupName = /\/resourceGroups\/([^\/]+)\/providers\//.exec( - resource?.id - )[1] - if (!resourceGroupName) { - dbg("unable to extract resource group name from resource id") - throw new Error("Resource group not found") - } - } + if (!subscriptionId || !accountName) { + dbg("subscriptionId or accountName is missing, returning an empty model list"); + return { ok: true, models: [] }; + } + const token = await runtimeHost.azureManagementToken.token("default", options); + if (!token) throw new Error("Azure management token is missing"); + if (token.error) { + dbg("error occurred while fetching Azure management token: %s", token.error); + throw new Error(errorMessage(token.error)); + } - // https://learn.microsoft.com/en-us/rest/api/aiservices/accountmanagement/deployments/list-skus?view=rest-aiservices-accountmanagement-2024-10-01&tabs=HTTP - const deployments: { - value: { - id: string - name: string - properties: { - model: { - format: string - name: string - version: string - } - } - }[] - } = await get( - `https://management.azure.com/subscriptions/${subscriptionId}/resourceGroups/${resourceGroupName}/providers/Microsoft.CognitiveServices/accounts/${accountName}/deployments/?api-version=${AZURE_MANAGEMENT_API_VERSION}` - ) + const fetch = await createFetch({ retries: 0, ...options }); + const get = async (url: string) => { + const res = await fetch(url, { + method: "GET", + headers: { + Accept: "application/json", + Authorization: `Bearer ${token.token.token}`, + }, + }); + if (res.status !== 200) { return { - ok: true, - models: deployments.value.map((model) => ({ - id: model.name, - family: model.properties.model.name, - details: `${model.properties.model.format} ${model.properties.model.name}`, - url: `https://ai.azure.com/resource/deployments/${encodeURIComponent(model.id)}`, - version: model.properties.model.version, - })), - } - } catch (e) { - return { ok: false, error: serializeError(e) } + ok: false, + status: res.status, + error: serializeError(res.statusText), + }; + } + return await res.json(); + }; + + if (!resourceGroupName) { + dbg("resourceGroupName is missing, fetching resource details"); + const resources: { + value: { + id: string; + name: string; + type: "OpenAI"; + }[]; + } = await get( + `https://management.azure.com/subscriptions/${subscriptionId}/resources?api-version=2021-04-01`, + ); + const resource = resources.value.find((r) => r.name === accountName); + resourceGroupName = /\/resourceGroups\/([^\/]+)\/providers\//.exec(resource?.id)[1]; + if (!resourceGroupName) { + dbg("unable to extract resource group name from resource id"); + throw new Error("Resource group not found"); + } } -} + + // https://learn.microsoft.com/en-us/rest/api/aiservices/accountmanagement/deployments/list-skus?view=rest-aiservices-accountmanagement-2024-10-01&tabs=HTTP + const deployments: { + value: { + id: string; + name: string; + properties: { + model: { + format: string; + name: string; + version: string; + }; + }; + }[]; + } = await get( + `https://management.azure.com/subscriptions/${subscriptionId}/resourceGroups/${resourceGroupName}/providers/Microsoft.CognitiveServices/accounts/${accountName}/deployments/?api-version=${AZURE_MANAGEMENT_API_VERSION}`, + ); + return { + ok: true, + models: deployments.value.map((model) => ({ + id: model.name, + family: model.properties.model.name, + details: `${model.properties.model.format} ${model.properties.model.name}`, + url: `https://ai.azure.com/resource/deployments/${encodeURIComponent(model.id)}`, + version: model.properties.model.version, + })), + }; + } catch (e) { + return { ok: false, error: serializeError(e) }; + } +}; // Define the Ollama model with its completion handler and model listing function export const AzureOpenAIModel = Object.freeze({ - id: MODEL_PROVIDER_AZURE_OPENAI, - completer: OpenAIChatCompletion, - listModels: azureManagementOrOpenAIListModels, - transcriber: OpenAITranscribe, - speaker: OpenAISpeech, - imageGenerator: OpenAIImageGeneration, - embedder: OpenAIEmbedder, -}) + id: MODEL_PROVIDER_AZURE_OPENAI, + completer: OpenAIChatCompletion, + listModels: azureManagementOrOpenAIListModels, + transcriber: OpenAITranscribe, + speaker: OpenAISpeech, + imageGenerator: OpenAIImageGeneration, + embedder: OpenAIEmbedder, +}); diff --git a/packages/core/src/azuretoken.ts b/packages/core/src/azuretoken.ts index a8aa1421e9..fb0f518674 100644 --- a/packages/core/src/azuretoken.ts +++ b/packages/core/src/azuretoken.ts @@ -1,22 +1,18 @@ -import debug from "debug" -const dbg = debug("genaiscript:azuretoken") +import debug from "debug"; +const dbg = debug("genaiscript:azuretoken"); -import { AZURE_TOKEN_EXPIRATION } from "../../core/src/constants" +import { AZURE_TOKEN_EXPIRATION } from "../../core/src/constants"; import { - AuthenticationToken, - AzureTokenResolver, - isAzureTokenExpired, - runtimeHost, -} from "../../core/src/host" -import { logError, logVerbose } from "../../core/src/util" -import type { TokenCredential } from "@azure/identity" -import { serializeError } from "../../core/src/error" -import { - CancellationOptions, - CancellationToken, - toSignal, -} from "../../core/src/cancellation" -import { AzureCredentialsType } from "../../core/src/server/messages" + AuthenticationToken, + AzureTokenResolver, + isAzureTokenExpired, + runtimeHost, +} from "../../core/src/host"; +import { logError, logVerbose } from "../../core/src/util"; +import type { TokenCredential } from "@azure/identity"; +import { serializeError } from "../../core/src/error"; +import { CancellationOptions, CancellationToken, toSignal } from "../../core/src/cancellation"; +import { AzureCredentialsType } from "../../core/src/server/messages"; /** * This module provides functions to handle Azure authentication tokens, @@ -33,158 +29,152 @@ import { AzureCredentialsType } from "../../core/src/server/messages" * Logs the expiration time of the token for debugging or informational purposes. */ async function createAzureToken( - scopes: readonly string[], - credentialsType: AzureCredentialsType, - cancellationToken?: CancellationToken + scopes: readonly string[], + credentialsType: AzureCredentialsType, + cancellationToken?: CancellationToken, ): Promise { - // Dynamically import DefaultAzureCredential from the Azure SDK - dbg("dynamically importing Azure SDK credentials") - const { - DefaultAzureCredential, - EnvironmentCredential, - AzureCliCredential, - ManagedIdentityCredential, - AzurePowerShellCredential, - AzureDeveloperCliCredential, - WorkloadIdentityCredential, - ChainedTokenCredential, - } = await import("@azure/identity") + // Dynamically import DefaultAzureCredential from the Azure SDK + dbg("dynamically importing Azure SDK credentials"); + const { + DefaultAzureCredential, + EnvironmentCredential, + AzureCliCredential, + ManagedIdentityCredential, + AzurePowerShellCredential, + AzureDeveloperCliCredential, + WorkloadIdentityCredential, + ChainedTokenCredential, + } = await import("@azure/identity"); - let credential: TokenCredential - switch (credentialsType) { - case "cli": - dbg("credentialsType is cli") - credential = new AzureCliCredential() - break - case "env": - dbg("credentialsType is env") - credential = new EnvironmentCredential() - break - case "powershell": - dbg("credentialsType is powershell") - credential = new AzurePowerShellCredential() - break - case "devcli": - dbg("credentialsType is devcli") - credential = new AzureDeveloperCliCredential() - break - case "managedidentity": - dbg("credentialsType is managedidentity") - credential = new ManagedIdentityCredential() - break - case "workloadidentity": - dbg("credentialsType is workloadidentity") - credential = new WorkloadIdentityCredential() - break - case "default": - dbg("credentialsType is default") - credential = new DefaultAzureCredential() // CodeQL [SM05139] The user explicitly requested this credential type so the user has a good reason to use it. - break - default: - // Check if the environment is local/development - // also: https://nodejs.org/en/learn/getting-started/nodejs-the-difference-between-development-and-production - if (process.env.NODE_ENV === "development") { - dbg("node_env development: credentialsType is default") - credential = new DefaultAzureCredential() // CodeQL [SM05139] Okay use of DefaultAzureCredential as it is only used in development........................................ - } else { - dbg( - `node_env unspecified: credentialsType is env, cli, devcli, powershell` - ) - credential = new ChainedTokenCredential( - new EnvironmentCredential(), - new AzureCliCredential(), - new AzureDeveloperCliCredential(), - new AzurePowerShellCredential() - ) - } - break - } + let credential: TokenCredential; + switch (credentialsType) { + case "cli": + dbg("credentialsType is cli"); + credential = new AzureCliCredential(); + break; + case "env": + dbg("credentialsType is env"); + credential = new EnvironmentCredential(); + break; + case "powershell": + dbg("credentialsType is powershell"); + credential = new AzurePowerShellCredential(); + break; + case "devcli": + dbg("credentialsType is devcli"); + credential = new AzureDeveloperCliCredential(); + break; + case "managedidentity": + dbg("credentialsType is managedidentity"); + credential = new ManagedIdentityCredential(); + break; + case "workloadidentity": + dbg("credentialsType is workloadidentity"); + credential = new WorkloadIdentityCredential(); + break; + case "default": + dbg("credentialsType is default"); + credential = new DefaultAzureCredential(); // CodeQL [SM05139] The user explicitly requested this credential type so the user has a good reason to use it. + break; + default: + // Check if the environment is local/development + // also: https://nodejs.org/en/learn/getting-started/nodejs-the-difference-between-development-and-production + if (process.env.NODE_ENV === "development") { + dbg("node_env development: credentialsType is default"); + credential = new DefaultAzureCredential(); // CodeQL [SM05139] Okay use of DefaultAzureCredential as it is only used in development........................................ + } else { + dbg(`node_env unspecified: credentialsType is env, cli, devcli, powershell`); + credential = new ChainedTokenCredential( + new EnvironmentCredential(), + new AzureCliCredential(), + new AzureDeveloperCliCredential(), + new AzurePowerShellCredential(), + ); + } + break; + } - // Obtain the Azure token - const abortSignal = toSignal(cancellationToken) - dbg(`get token for %o`, scopes) - const azureToken = await credential.getToken(scopes.slice(), { - abortSignal, - }) + // Obtain the Azure token + const abortSignal = toSignal(cancellationToken); + dbg(`get token for %o`, scopes); + const azureToken = await credential.getToken(scopes.slice(), { + abortSignal, + }); - // Prepare the result token object with the token and expiration timestamp - const res = { - credential, - token: azureToken.token, - // Use provided expiration timestamp or default to a constant expiration time - expiresOnTimestamp: azureToken.expiresOnTimestamp - ? azureToken.expiresOnTimestamp - : Date.now() + AZURE_TOKEN_EXPIRATION, - } + // Prepare the result token object with the token and expiration timestamp + const res = { + credential, + token: azureToken.token, + // Use provided expiration timestamp or default to a constant expiration time + expiresOnTimestamp: azureToken.expiresOnTimestamp + ? azureToken.expiresOnTimestamp + : Date.now() + AZURE_TOKEN_EXPIRATION, + }; - return res + return res; } class AzureTokenResolverImpl implements AzureTokenResolver { - _token: AuthenticationToken - _error: any - _resolver: Promise<{ token?: AuthenticationToken; error?: SerializedError }> + _token: AuthenticationToken; + _error: any; + _resolver: Promise<{ token?: AuthenticationToken; error?: SerializedError }>; - constructor( - public readonly name: string, - public readonly envName: string, - public readonly scopes: readonly string[] - ) {} + constructor( + public readonly name: string, + public readonly envName: string, + public readonly scopes: readonly string[], + ) {} - get error(): SerializedError { - return this._error - } + get error(): SerializedError { + return this._error; + } - async token( - credentialsType: AzureCredentialsType, - options?: CancellationOptions - ): Promise<{ token?: AuthenticationToken; error?: SerializedError }> { - if (this._resolver) { - return this._resolver - } + async token( + credentialsType: AzureCredentialsType, + options?: CancellationOptions, + ): Promise<{ token?: AuthenticationToken; error?: SerializedError }> { + if (this._resolver) { + return this._resolver; + } - // cached - const { cancellationToken } = options || {} + // cached + const { cancellationToken } = options || {}; - if (isAzureTokenExpired(this._token)) { - dbg("azure token expired") - this._token = undefined - this._error = undefined - } - if (this._token || this._error) { - dbg("returning cached token or error") - return { token: this._token, error: this._error } - } - if (!this._resolver) { - const scope = await runtimeHost.readSecret(this.envName) - dbg(`reading secret for envName: ${this.envName}`) - const scopes = scope ? scope.split(",") : this.scopes - this._resolver = createAzureToken( - scopes, - credentialsType, - cancellationToken - ) - .then((res) => { - this._token = res - this._error = undefined - this._resolver = undefined + if (isAzureTokenExpired(this._token)) { + dbg("azure token expired"); + this._token = undefined; + this._error = undefined; + } + if (this._token || this._error) { + dbg("returning cached token or error"); + return { token: this._token, error: this._error }; + } + if (!this._resolver) { + const scope = await runtimeHost.readSecret(this.envName); + dbg(`reading secret for envName: ${this.envName}`); + const scopes = scope ? scope.split(",") : this.scopes; + this._resolver = createAzureToken(scopes, credentialsType, cancellationToken) + .then((res) => { + this._token = res; + this._error = undefined; + this._resolver = undefined; - dbg( - `${this.name}: ${credentialsType || ""} token (${scopes.join(",")}) expires on ${new Date(res.expiresOnTimestamp).toUTCString()}` - ) - return { token: this._token, error: this._error } - }) - .catch((err) => { - dbg(`error occurred: ${err}`) - logError(err) - this._resolver = undefined - this._token = undefined - this._error = serializeError(err) - return { token: this._token, error: this._error } - }) - } - return this._resolver + dbg( + `${this.name}: ${credentialsType || ""} token (${scopes.join(",")}) expires on ${new Date(res.expiresOnTimestamp).toUTCString()}`, + ); + return { token: this._token, error: this._error }; + }) + .catch((err) => { + dbg(`error occurred: ${err}`); + logError(err); + this._resolver = undefined; + this._token = undefined; + this._error = serializeError(err); + return { token: this._token, error: this._error }; + }); } + return this._resolver; + } } /** @@ -196,9 +186,9 @@ class AzureTokenResolverImpl implements AzureTokenResolver { * @returns An instance of AzureTokenResolver for managing token retrieval and caching. */ export function createAzureTokenResolver( - name: string, - envName: string, - scopes: readonly string[] + name: string, + envName: string, + scopes: readonly string[], ): AzureTokenResolver { - return new AzureTokenResolverImpl(name, envName, scopes) + return new AzureTokenResolverImpl(name, envName, scopes); } diff --git a/packages/core/src/base64.test.ts b/packages/core/src/base64.test.ts index 811947c132..7afcd91a96 100644 --- a/packages/core/src/base64.test.ts +++ b/packages/core/src/base64.test.ts @@ -1,17 +1,17 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { fromBase64, toBase64 } from "./base64" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { fromBase64, toBase64 } from "./base64"; describe("Base64", async () => { - const testString = "Hello World!" - const testBase64 = "SGVsbG8gV29ybGQh" + const testString = "Hello World!"; + const testBase64 = "SGVsbG8gV29ybGQh"; - await test("toBase64 encodes string to base64", () => { - const result = toBase64(testString) - assert.equal(result, testBase64) - }) + await test("toBase64 encodes string to base64", () => { + const result = toBase64(testString); + assert.equal(result, testBase64); + }); - await test("fromBase64 throws on invalid base64", () => { - assert.throws(() => fromBase64("invalid base64!")) - }) -}) + await test("fromBase64 throws on invalid base64", () => { + assert.throws(() => fromBase64("invalid base64!")); + }); +}); diff --git a/packages/core/src/base64.ts b/packages/core/src/base64.ts index 7c4fb6080e..b694e01a15 100644 --- a/packages/core/src/base64.ts +++ b/packages/core/src/base64.ts @@ -1,7 +1,4 @@ -import { - fromBase64 as _fromBase64, - toBase64 as _toBase64, -} from "@smithy/util-base64" +import { fromBase64 as _fromBase64, toBase64 as _toBase64 } from "@smithy/util-base64"; -export const fromBase64 = _fromBase64 -export const toBase64 = _toBase64 +export const fromBase64 = _fromBase64; +export const toBase64 = _toBase64; diff --git a/packages/core/src/binary.test.ts b/packages/core/src/binary.test.ts index d280eb63a3..3e6bfa8854 100644 --- a/packages/core/src/binary.test.ts +++ b/packages/core/src/binary.test.ts @@ -1,41 +1,39 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { isBinaryMimeType } from "./binary" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { isBinaryMimeType } from "./binary"; describe("isBinaryMimeType", () => { - test("should identify common binary types", () => { - assert(isBinaryMimeType("image/jpeg")) - assert(isBinaryMimeType("image/png")) - assert(isBinaryMimeType("audio/mp3")) - assert(isBinaryMimeType("video/mp4")) - }) + test("should identify common binary types", () => { + assert(isBinaryMimeType("image/jpeg")); + assert(isBinaryMimeType("image/png")); + assert(isBinaryMimeType("audio/mp3")); + assert(isBinaryMimeType("video/mp4")); + }); - test("should identify document binary types", () => { - assert(isBinaryMimeType("application/pdf")) - assert(isBinaryMimeType("application/msword")) - assert( - isBinaryMimeType( - "application/vnd.openxmlformats-officedocument.wordprocessingml.document" - ) - ) - }) + test("should identify document binary types", () => { + assert(isBinaryMimeType("application/pdf")); + assert(isBinaryMimeType("application/msword")); + assert( + isBinaryMimeType("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), + ); + }); - test("should identify archive binary types", () => { - assert(isBinaryMimeType("application/zip")) - assert(isBinaryMimeType("application/x-rar-compressed")) - assert(isBinaryMimeType("application/x-7z-compressed")) - }) + test("should identify archive binary types", () => { + assert(isBinaryMimeType("application/zip")); + assert(isBinaryMimeType("application/x-rar-compressed")); + assert(isBinaryMimeType("application/x-7z-compressed")); + }); - test("should identify executable binary types", () => { - assert(isBinaryMimeType("application/octet-stream")) - assert(isBinaryMimeType("application/x-msdownload")) - assert(isBinaryMimeType("application/java-archive")) - }) + test("should identify executable binary types", () => { + assert(isBinaryMimeType("application/octet-stream")); + assert(isBinaryMimeType("application/x-msdownload")); + assert(isBinaryMimeType("application/java-archive")); + }); - test("should return false for non-binary types", () => { - assert.equal(isBinaryMimeType("text/plain"), false) - assert.equal(isBinaryMimeType("text/html"), false) - assert.equal(isBinaryMimeType("application/json"), false) - assert.equal(isBinaryMimeType("text/css"), false) - }) -}) + test("should return false for non-binary types", () => { + assert.equal(isBinaryMimeType("text/plain"), false); + assert.equal(isBinaryMimeType("text/html"), false); + assert.equal(isBinaryMimeType("application/json"), false); + assert.equal(isBinaryMimeType("text/css"), false); + }); +}); diff --git a/packages/core/src/binary.ts b/packages/core/src/binary.ts index 9dae56e73e..13b364c52e 100644 --- a/packages/core/src/binary.ts +++ b/packages/core/src/binary.ts @@ -5,41 +5,41 @@ * @returns True if the MIME type is binary, otherwise false. */ export function isBinaryMimeType(mimeType: string) { - return ( - /^(image|audio|video)\//.test(mimeType) || // Common binary types - BINARY_MIME_TYPES.includes(mimeType) // Additional specified binary types - ) + return ( + /^(image|audio|video)\//.test(mimeType) || // Common binary types + BINARY_MIME_TYPES.includes(mimeType) // Additional specified binary types + ); } // List of known binary MIME types const BINARY_MIME_TYPES = [ - // Documents - "application/pdf", - "application/msword", - "application/vnd.ms-excel", - "application/vnd.ms-powerpoint", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", // .docx - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", // .xlsx - "application/vnd.openxmlformats-officedocument.presentationml.presentation", // .pptx + // Documents + "application/pdf", + "application/msword", + "application/vnd.ms-excel", + "application/vnd.ms-powerpoint", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", // .docx + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", // .xlsx + "application/vnd.openxmlformats-officedocument.presentationml.presentation", // .pptx - // Archives - "application/zip", - "application/x-rar-compressed", - "application/x-7z-compressed", - "application/x-tar", - "application/x-bzip", - "application/x-bzip2", - "application/x-gzip", + // Archives + "application/zip", + "application/x-rar-compressed", + "application/x-7z-compressed", + "application/x-tar", + "application/x-bzip", + "application/x-bzip2", + "application/x-gzip", - // Executables and binaries - "application/octet-stream", // General binary type (often default for unknown binary files) - "application/x-msdownload", // Executables - "application/x-shockwave-flash", // SWF - "application/java-archive", // JAR (Java) + // Executables and binaries + "application/octet-stream", // General binary type (often default for unknown binary files) + "application/x-msdownload", // Executables + "application/x-shockwave-flash", // SWF + "application/java-archive", // JAR (Java) - // Others - "application/vnd.google-earth.kml+xml", // KML (though XML based, often treated as binary in context of HTTP) - "application/vnd.android.package-archive", // APK (Android package) - "application/x-iso9660-image", // ISO images - "application/vnd.apple.installer+xml", // Apple Installer Package (though XML, often handled as binary) -] + // Others + "application/vnd.google-earth.kml+xml", // KML (though XML based, often treated as binary in context of HTTP) + "application/vnd.android.package-archive", // APK (Android package) + "application/x-iso9660-image", // ISO images + "application/vnd.apple.installer+xml", // Apple Installer Package (though XML, often handled as binary) +]; diff --git a/packages/core/src/bufferlike.test.ts b/packages/core/src/bufferlike.test.ts index a19e9f63d8..be9145bcbe 100644 --- a/packages/core/src/bufferlike.test.ts +++ b/packages/core/src/bufferlike.test.ts @@ -1,78 +1,78 @@ -import assert from "node:assert/strict" -import test, { describe } from "node:test" -import { resolveBufferLike, BufferToBlob } from "./bufferlike" -import fs from "fs/promises" -import { ReadableStream } from "node:stream/web" +import assert from "node:assert/strict"; +import test, { describe } from "node:test"; +import { resolveBufferLike, BufferToBlob } from "./bufferlike"; +import fs from "fs/promises"; +import { ReadableStream } from "node:stream/web"; describe("resolveBufferLike", () => { - test("should resolve a string URL to a Buffer", async () => { - const url = "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" - const buffer = await resolveBufferLike(url) - assert.strictEqual(buffer.toString(), "Hello, World!") - }) + test("should resolve a string URL to a Buffer", async () => { + const url = "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ=="; + const buffer = await resolveBufferLike(url); + assert.strictEqual(buffer.toString(), "Hello, World!"); + }); - test("should resolve a Blob to a Buffer", async () => { - const blob = new Blob(["Hello, World!"], { type: "text/plain" }) - const buffer = await resolveBufferLike(blob) - assert.strictEqual(buffer.toString(), "Hello, World!") - }) + test("should resolve a Blob to a Buffer", async () => { + const blob = new Blob(["Hello, World!"], { type: "text/plain" }); + const buffer = await resolveBufferLike(blob); + assert.strictEqual(buffer.toString(), "Hello, World!"); + }); - test("should resolve a ReadableStream to a Buffer", async () => { - const stream = new ReadableStream({ - start(controller) { - controller.enqueue(new TextEncoder().encode("Hello, World!")) - controller.close() - }, - }) - const buffer = await resolveBufferLike(stream) - assert.strictEqual(buffer.toString(), "Hello, World!") - }) + test("should resolve a ReadableStream to a Buffer", async () => { + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode("Hello, World!")); + controller.close(); + }, + }); + const buffer = await resolveBufferLike(stream); + assert.strictEqual(buffer.toString(), "Hello, World!"); + }); - test("should resolve an ArrayBuffer to a Buffer", async () => { - const arrayBuffer = new TextEncoder().encode("Hello, World!").buffer - const buffer = await resolveBufferLike(arrayBuffer) - assert.strictEqual(buffer.toString(), "Hello, World!") - }) + test("should resolve an ArrayBuffer to a Buffer", async () => { + const arrayBuffer = new TextEncoder().encode("Hello, World!").buffer; + const buffer = await resolveBufferLike(arrayBuffer); + assert.strictEqual(buffer.toString(), "Hello, World!"); + }); - test("should resolve a Uint8Array to a Buffer", async () => { - const uint8Array = new TextEncoder().encode("Hello, World!") - const buffer = await resolveBufferLike(uint8Array) - assert.strictEqual(buffer.toString(), "Hello, World!") - }) -}) + test("should resolve a Uint8Array to a Buffer", async () => { + const uint8Array = new TextEncoder().encode("Hello, World!"); + const buffer = await resolveBufferLike(uint8Array); + assert.strictEqual(buffer.toString(), "Hello, World!"); + }); +}); describe("BufferToBlob", () => { - test("should create a Blob from a Buffer with default mime type", async () => { - const buffer = Buffer.from("Hello, World!") - const blob = await BufferToBlob(buffer) - assert.strictEqual(blob.type, "application/octet-stream") - const text = await blob.text() - assert.strictEqual(text, "Hello, World!") - }) + test("should create a Blob from a Buffer with default mime type", async () => { + const buffer = Buffer.from("Hello, World!"); + const blob = await BufferToBlob(buffer); + assert.strictEqual(blob.type, "application/octet-stream"); + const text = await blob.text(); + assert.strictEqual(text, "Hello, World!"); + }); - test("should create a Blob from a Buffer with provided mime type", async () => { - const buffer = Buffer.from("Hello, World!") - const mime = "text/plain" - const blob = await BufferToBlob(buffer, mime) - assert.strictEqual(blob.type, mime) - const text = await blob.text() - assert.strictEqual(text, "Hello, World!") - }) + test("should create a Blob from a Buffer with provided mime type", async () => { + const buffer = Buffer.from("Hello, World!"); + const mime = "text/plain"; + const blob = await BufferToBlob(buffer, mime); + assert.strictEqual(blob.type, mime); + const text = await blob.text(); + assert.strictEqual(text, "Hello, World!"); + }); - test("should create a Blob from a Uint8Array with default mime type", async () => { - const uint8Array = new TextEncoder().encode("Hello, World!") - const blob = await BufferToBlob(uint8Array) - assert.strictEqual(blob.type, "application/octet-stream") - const text = await blob.text() - assert.strictEqual(text, "Hello, World!") - }) + test("should create a Blob from a Uint8Array with default mime type", async () => { + const uint8Array = new TextEncoder().encode("Hello, World!"); + const blob = await BufferToBlob(uint8Array); + assert.strictEqual(blob.type, "application/octet-stream"); + const text = await blob.text(); + assert.strictEqual(text, "Hello, World!"); + }); - test("should create a Blob from a Uint8Array with provided mime type", async () => { - const uint8Array = new TextEncoder().encode("Hello, World!") - const mime = "text/plain" - const blob = await BufferToBlob(uint8Array, mime) - assert.strictEqual(blob.type, mime) - const text = await blob.text() - assert.strictEqual(text, "Hello, World!") - }) -}) + test("should create a Blob from a Uint8Array with provided mime type", async () => { + const uint8Array = new TextEncoder().encode("Hello, World!"); + const mime = "text/plain"; + const blob = await BufferToBlob(uint8Array, mime); + assert.strictEqual(blob.type, mime); + const text = await blob.text(); + assert.strictEqual(text, "Hello, World!"); + }); +}); diff --git a/packages/core/src/bufferlike.ts b/packages/core/src/bufferlike.ts index fbaef94cdd..f60cc0b61e 100644 --- a/packages/core/src/bufferlike.ts +++ b/packages/core/src/bufferlike.ts @@ -1,18 +1,16 @@ -import { resolveFileBytes } from "./file" -import { TraceOptions } from "./trace" -import { fileTypeFromBuffer } from "./filetype" -import { extname } from "node:path" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("buffer") +import { resolveFileBytes } from "./file"; +import { TraceOptions } from "./trace"; +import { fileTypeFromBuffer } from "./filetype"; +import { extname } from "node:path"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("buffer"); -async function bufferTryFrom( - data: Uint8Array | Buffer | ArrayBuffer | SharedArrayBuffer -) { - if (data === undefined) return undefined - if (data instanceof Buffer) return data - if (data instanceof ArrayBuffer) return Buffer.from(data) - if (data instanceof SharedArrayBuffer) return Buffer.from(data) - return Buffer.from(data) +async function bufferTryFrom(data: Uint8Array | Buffer | ArrayBuffer | SharedArrayBuffer) { + if (data === undefined) return undefined; + if (data instanceof Buffer) return data; + if (data instanceof ArrayBuffer) return Buffer.from(data); + if (data instanceof SharedArrayBuffer) return Buffer.from(data); + return Buffer.from(data); } /** @@ -24,54 +22,48 @@ async function bufferTryFrom( * @throws Error if the input type is unsupported. */ export async function resolveBufferLike( - bufferLike: BufferLike, - options?: TraceOptions + bufferLike: BufferLike, + options?: TraceOptions, ): Promise { - if (bufferLike === undefined) return undefined - if (typeof bufferLike === "string") - return bufferTryFrom(await resolveFileBytes(bufferLike, options)) - else if (bufferLike instanceof Blob) - return bufferTryFrom(await bufferLike.arrayBuffer()) - else if (bufferLike instanceof ReadableStream) { - const stream: ReadableStream = bufferLike - return bufferTryFrom(await new Response(stream).arrayBuffer()) - } else if (bufferLike instanceof ArrayBuffer) - return bufferTryFrom(bufferLike) - else if (bufferLike instanceof SharedArrayBuffer) - return bufferTryFrom(bufferLike) - else if (bufferLike instanceof Uint8Array) return bufferTryFrom(bufferLike) - else if ( - typeof bufferLike === "object" && - typeof (bufferLike as WorkspaceFile).filename === "string" - ) { - return Buffer.from( - await resolveFileBytes(bufferLike as WorkspaceFile, options) - ) - } - dbg(`unsupported: ${typeof bufferLike}`) - throw new Error(`Unsupported buffer-like object ${typeof bufferLike}`) + if (bufferLike === undefined) return undefined; + if (typeof bufferLike === "string") + return bufferTryFrom(await resolveFileBytes(bufferLike, options)); + else if (bufferLike instanceof Blob) return bufferTryFrom(await bufferLike.arrayBuffer()); + else if (bufferLike instanceof ReadableStream) { + const stream: ReadableStream = bufferLike; + return bufferTryFrom(await new Response(stream).arrayBuffer()); + } else if (bufferLike instanceof ArrayBuffer) return bufferTryFrom(bufferLike); + else if (bufferLike instanceof SharedArrayBuffer) return bufferTryFrom(bufferLike); + else if (bufferLike instanceof Uint8Array) return bufferTryFrom(bufferLike); + else if ( + typeof bufferLike === "object" && + typeof (bufferLike as WorkspaceFile).filename === "string" + ) { + return Buffer.from(await resolveFileBytes(bufferLike as WorkspaceFile, options)); + } + dbg(`unsupported: ${typeof bufferLike}`); + throw new Error(`Unsupported buffer-like object ${typeof bufferLike}`); } export async function resolveBufferLikeAndExt( - bufferLike: BufferLike, - options?: TraceOptions + bufferLike: BufferLike, + options?: TraceOptions, ): Promise<{ bytes: Buffer; ext: string }> { - const bytes = await resolveBufferLike(bufferLike, options) - if (!bytes) return { bytes, ext: undefined } - const ext = await fileTypeFromBuffer(bytes) - if (ext) return { bytes, ext: ext.ext } - else if ( - typeof bufferLike === "object" && - typeof (bufferLike as WorkspaceFile).filename === "string" && - (bufferLike as WorkspaceFile).filename - ) { - return { - bytes, - ext: extname((bufferLike as WorkspaceFile).filename), - } - } else if (typeof bufferLike === "string") - return { bytes, ext: extname(bufferLike) } - return { bytes, ext: ".bin" } + const bytes = await resolveBufferLike(bufferLike, options); + if (!bytes) return { bytes, ext: undefined }; + const ext = await fileTypeFromBuffer(bytes); + if (ext) return { bytes, ext: ext.ext }; + else if ( + typeof bufferLike === "object" && + typeof (bufferLike as WorkspaceFile).filename === "string" && + (bufferLike as WorkspaceFile).filename + ) { + return { + bytes, + ext: extname((bufferLike as WorkspaceFile).filename), + }; + } else if (typeof bufferLike === "string") return { bytes, ext: extname(bufferLike) }; + return { bytes, ext: ".bin" }; } /** @@ -82,8 +74,8 @@ export async function resolveBufferLikeAndExt( * @returns A Blob object constructed from the input data. */ export async function BufferToBlob(buffer: Buffer | Uint8Array, mime?: string) { - const type = await fileTypeFromBuffer(buffer) - return new Blob([buffer], { - type: mime || type?.mime || "application/octet-stream", - }) + const type = await fileTypeFromBuffer(buffer); + return new Blob([buffer], { + type: mime || type?.mime || "application/octet-stream", + }); } diff --git a/packages/core/src/cache.test.ts b/packages/core/src/cache.test.ts index a6fe481f93..8ef6ba08f9 100644 --- a/packages/core/src/cache.test.ts +++ b/packages/core/src/cache.test.ts @@ -1,90 +1,90 @@ -import { describe, test, beforeEach } from "node:test" -import assert from "node:assert/strict" -import * as fs from "node:fs/promises" -import * as path from "node:path" -import { TestHost } from "./testhost" -import { JSONLineCache } from "./jsonlinecache" -import { createCache } from "./cache" +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; +import * as fs from "node:fs/promises"; +import * as path from "node:path"; +import { TestHost } from "./testhost"; +import { JSONLineCache } from "./jsonlinecache"; +import { createCache } from "./cache"; -const tempDir = path.join(".genaiscript", "temp") +const tempDir = path.join(".genaiscript", "temp"); for (const type of ["memory", "jsonl", "fs"]) { - describe(`cache.${type}`, () => { - beforeEach(async () => { - TestHost.install() - await fs.mkdir(tempDir, { recursive: true }) - }) - test("instance creation with byName", async () => { - const cache = createCache("testCache", { - type: type as any, - }) - assert.ok(!!cache) - }) - test("set key-value pair", async () => { - const cache = createCache("testCache", { - type: type as any, - }) - await cache.set("anotherKey", 99) - const value = await cache.get("anotherKey") - assert.strictEqual(value, 99) - }) + describe(`cache.${type}`, () => { + beforeEach(async () => { + TestHost.install(); + await fs.mkdir(tempDir, { recursive: true }); + }); + test("instance creation with byName", async () => { + const cache = createCache("testCache", { + type: type as any, + }); + assert.ok(!!cache); + }); + test("set key-value pair", async () => { + const cache = createCache("testCache", { + type: type as any, + }); + await cache.set("anotherKey", 99); + const value = await cache.get("anotherKey"); + assert.strictEqual(value, 99); + }); - test("getSha computation", async () => { - const cache = createCache("testCache", { - type: type as any, - }) - const sha = await cache.getSha("testKey") - assert.ok(sha) - assert.strictEqual(typeof sha, "string") - }) + test("getSha computation", async () => { + const cache = createCache("testCache", { + type: type as any, + }); + const sha = await cache.getSha("testKey"); + assert.ok(sha); + assert.strictEqual(typeof sha, "string"); + }); - test("keySHA generates SHA256 hash from a key", async () => { - const cache = createCache("testCache", { - type: type as any, - }) - const sha = await cache.getSha("testKey") - assert.ok(sha) - assert.strictEqual(typeof sha, "string") - }) - test(`${type} getOrUpdate retrieves existing value`, async () => { - const cache = createCache("testCache", { - type: type as any, - }) - await cache.set("existingKey", 42) - const value = await cache.getOrUpdate( - "existingKey", - async () => 99, - () => true - ) - assert.strictEqual(value.value, 42) - }) + test("keySHA generates SHA256 hash from a key", async () => { + const cache = createCache("testCache", { + type: type as any, + }); + const sha = await cache.getSha("testKey"); + assert.ok(sha); + assert.strictEqual(typeof sha, "string"); + }); + test(`${type} getOrUpdate retrieves existing value`, async () => { + const cache = createCache("testCache", { + type: type as any, + }); + await cache.set("existingKey", 42); + const value = await cache.getOrUpdate( + "existingKey", + async () => 99, + () => true, + ); + assert.strictEqual(value.value, 42); + }); - test("getOrUpdate updates with new value if key does not exist", async () => { - const cache = createCache("testCache", { - type: type as any, - }) - const value = await cache.getOrUpdate( - "newKey", - async () => 99, - () => true - ) - assert.strictEqual(value.value, 99) - const cachedValue = await cache.get("newKey") - assert.strictEqual(cachedValue, 99) - }) + test("getOrUpdate updates with new value if key does not exist", async () => { + const cache = createCache("testCache", { + type: type as any, + }); + const value = await cache.getOrUpdate( + "newKey", + async () => 99, + () => true, + ); + assert.strictEqual(value.value, 99); + const cachedValue = await cache.get("newKey"); + assert.strictEqual(cachedValue, 99); + }); - test("values() retrieves all stored values", async () => { - const cache = createCache("testCache", { - type: type as any, - }) - await cache.set("key1", 10) - await cache.set("key2", 20) - await cache.set("key3", 30) + test("values() retrieves all stored values", async () => { + const cache = createCache("testCache", { + type: type as any, + }); + await cache.set("key1", 10); + await cache.set("key2", 20); + await cache.set("key3", 30); - const values = await cache.values() - assert(values.includes(10)) - assert(values.includes(20)) - assert(values.includes(30)) - }) - }) + const values = await cache.values(); + assert(values.includes(10)); + assert(values.includes(20)); + assert(values.includes(30)); + }); + }); } diff --git a/packages/core/src/cache.ts b/packages/core/src/cache.ts index e334f4be01..7892054997 100644 --- a/packages/core/src/cache.ts +++ b/packages/core/src/cache.ts @@ -1,12 +1,12 @@ -import { FsCache } from "./fscache" -import { JSONLineCache } from "./jsonlinecache" -import { MemoryCache } from "./memcache" -import { host } from "./host" -import { NotSupportedError } from "./error" -import { CancellationOptions } from "./cancellation" -import debug from "debug" -import { sanitizeFilename } from "./sanitize" -const dbg = debug("genaiscript:cache") +import { FsCache } from "./fscache"; +import { JSONLineCache } from "./jsonlinecache"; +import { MemoryCache } from "./memcache"; +import { host } from "./host"; +import { NotSupportedError } from "./error"; +import { CancellationOptions } from "./cancellation"; +import debug from "debug"; +import { sanitizeFilename } from "./sanitize"; +const dbg = debug("genaiscript:cache"); /** * Represents a cache entry with a hashed identifier (`sha`), `key`, and `val`. @@ -14,52 +14,50 @@ const dbg = debug("genaiscript:cache") * @template V - Type of the value */ export interface CacheEntry { - sha: string - val: V + sha: string; + val: V; } export interface CacheOptions { - type: "memory" | "jsonl" | "fs" - userState?: Record - lookupOnly?: boolean + type: "memory" | "jsonl" | "fs"; + userState?: Record; + lookupOnly?: boolean; } function cacheNormalizeName(name: string) { - return name - ? sanitizeFilename(name.replace(/[^a-z0-9_]/gi, "_")) - : undefined // Sanitize name + return name ? sanitizeFilename(name.replace(/[^a-z0-9_]/gi, "_")) : undefined; // Sanitize name } export function createCache( - name: string, - options: CacheOptions & CancellationOptions + name: string, + options: CacheOptions & CancellationOptions, ): WorkspaceFileCache { - name = cacheNormalizeName(name) // Sanitize name - if (!name) { - dbg(`empty cache name`) - return undefined - } + name = cacheNormalizeName(name); // Sanitize name + if (!name) { + dbg(`empty cache name`); + return undefined; + } - const type = options?.type || "fs" - const key = `cache:${type}:${name}` - const userState = options?.userState || host.userState - if (userState[key]) return userState[key] // Return if exists - if (options?.lookupOnly) return undefined + const type = options?.type || "fs"; + const key = `cache:${type}:${name}`; + const userState = options?.userState || host.userState; + if (userState[key]) return userState[key]; // Return if exists + if (options?.lookupOnly) return undefined; - dbg(`creating ${name} ${type}`) - let r: WorkspaceFileCache - switch (type) { - case "memory": - r = new MemoryCache(name) - break - case "jsonl": - r = new JSONLineCache(name) - break - default: - r = new FsCache(name) - break - } + dbg(`creating ${name} ${type}`); + let r: WorkspaceFileCache; + switch (type) { + case "memory": + r = new MemoryCache(name); + break; + case "jsonl": + r = new JSONLineCache(name); + break; + default: + r = new FsCache(name); + break; + } - userState[key] = r - return r + userState[key] = r; + return r; } diff --git a/packages/core/src/cancellation.test.ts b/packages/core/src/cancellation.test.ts index 5285960b0f..5f4b5ee034 100644 --- a/packages/core/src/cancellation.test.ts +++ b/packages/core/src/cancellation.test.ts @@ -1,93 +1,93 @@ -import { describe, test, beforeEach } from "node:test" -import assert from "node:assert/strict" +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; import { - CancellationToken, - AbortSignalCancellationToken, - toSignal, - AbortSignalCancellationController, - checkCancelled, -} from "./cancellation" -import { CancelError } from "./error" + CancellationToken, + AbortSignalCancellationToken, + toSignal, + AbortSignalCancellationController, + checkCancelled, +} from "./cancellation"; +import { CancelError } from "./error"; describe("CancellationToken", () => { - test("should implement isCancellationRequested", () => { - const token: CancellationToken = { isCancellationRequested: true } - assert.strictEqual(token.isCancellationRequested, true) + test("should implement isCancellationRequested", () => { + const token: CancellationToken = { isCancellationRequested: true }; + assert.strictEqual(token.isCancellationRequested, true); - token.isCancellationRequested = false - assert.strictEqual(token.isCancellationRequested, false) - }) -}) + token.isCancellationRequested = false; + assert.strictEqual(token.isCancellationRequested, false); + }); +}); describe("AbortSignalCancellationToken", () => { - let controller: AbortController - let token: AbortSignalCancellationToken + let controller: AbortController; + let token: AbortSignalCancellationToken; - beforeEach(() => { - controller = new AbortController() - token = new AbortSignalCancellationToken(controller.signal) - }) + beforeEach(() => { + controller = new AbortController(); + token = new AbortSignalCancellationToken(controller.signal); + }); - test("should initialize with an AbortSignal", () => { - assert.ok(token) - }) + test("should initialize with an AbortSignal", () => { + assert.ok(token); + }); - test("should return false when signal is not aborted", () => { - assert.strictEqual(token.isCancellationRequested, false) - }) + test("should return false when signal is not aborted", () => { + assert.strictEqual(token.isCancellationRequested, false); + }); - test("should return true when signal is aborted", () => { - controller.abort() - assert.strictEqual(token.isCancellationRequested, true) - }) -}) + test("should return true when signal is aborted", () => { + controller.abort(); + assert.strictEqual(token.isCancellationRequested, true); + }); +}); describe("toSignal", () => { - test("should return the signal if token is compatible", () => { - const controller = new AbortController() - const token = new AbortSignalCancellationToken(controller.signal) - assert.strictEqual(toSignal(token), controller.signal) - }) - - test("should return undefined if token is not compatible", () => { - const token: CancellationToken = { isCancellationRequested: false } - assert.strictEqual(toSignal(token), undefined) - }) -}) + test("should return the signal if token is compatible", () => { + const controller = new AbortController(); + const token = new AbortSignalCancellationToken(controller.signal); + assert.strictEqual(toSignal(token), controller.signal); + }); + + test("should return undefined if token is not compatible", () => { + const token: CancellationToken = { isCancellationRequested: false }; + assert.strictEqual(toSignal(token), undefined); + }); +}); describe("AbortSignalCancellationController", () => { - let controller: AbortSignalCancellationController - - beforeEach(() => { - controller = new AbortSignalCancellationController() - }) - - test("should initialize with an AbortController and token", () => { - assert.ok(controller.controller) - assert.ok(controller.token) - assert.strictEqual(controller.token.isCancellationRequested, false) - }) - - test("should abort the signal and set token isCancellationRequested to true", () => { - controller.abort() - assert.strictEqual(controller.token.isCancellationRequested, true) - }) - - test("should abort the signal with a reason", () => { - const reason = "Operation cancelled" - controller.abort(reason) - assert.strictEqual(controller.token.isCancellationRequested, true) - }) -}) + let controller: AbortSignalCancellationController; + + beforeEach(() => { + controller = new AbortSignalCancellationController(); + }); + + test("should initialize with an AbortController and token", () => { + assert.ok(controller.controller); + assert.ok(controller.token); + assert.strictEqual(controller.token.isCancellationRequested, false); + }); + + test("should abort the signal and set token isCancellationRequested to true", () => { + controller.abort(); + assert.strictEqual(controller.token.isCancellationRequested, true); + }); + + test("should abort the signal with a reason", () => { + const reason = "Operation cancelled"; + controller.abort(reason); + assert.strictEqual(controller.token.isCancellationRequested, true); + }); +}); describe("checkCancelled", () => { - test("should throw CancelError if cancellation is requested", () => { - const token: CancellationToken = { isCancellationRequested: true } - assert.throws(() => checkCancelled(token), CancelError) - }) - - test("should not throw if cancellation is not requested", () => { - const token: CancellationToken = { isCancellationRequested: false } - assert.doesNotThrow(() => checkCancelled(token)) - }) -}) + test("should throw CancelError if cancellation is requested", () => { + const token: CancellationToken = { isCancellationRequested: true }; + assert.throws(() => checkCancelled(token), CancelError); + }); + + test("should not throw if cancellation is not requested", () => { + const token: CancellationToken = { isCancellationRequested: false }; + assert.doesNotThrow(() => checkCancelled(token)); + }); +}); diff --git a/packages/core/src/cancellation.ts b/packages/core/src/cancellation.ts index e76f5935c3..d43e215793 100644 --- a/packages/core/src/cancellation.ts +++ b/packages/core/src/cancellation.ts @@ -1,5 +1,5 @@ // Import the CancelError class from the error module -import { CancelError } from "./error" +import { CancelError } from "./error"; /** * A cancellation token is passed to an asynchronous or long running @@ -10,11 +10,11 @@ import { CancelError } from "./error" * {@link CancellationTokenSource}. */ export interface CancellationToken { - /** - * Is `true` when the token has been cancelled, `false` otherwise. - * This flag should be checked by operations to decide if they should terminate. - */ - isCancellationRequested: boolean + /** + * Is `true` when the token has been cancelled, `false` otherwise. + * This flag should be checked by operations to decide if they should terminate. + */ + isCancellationRequested: boolean; } /** @@ -22,13 +22,13 @@ export interface CancellationToken { * to track the cancellation state. */ export class AbortSignalCancellationToken implements CancellationToken { - // Constructor takes an AbortSignal to track cancellation - constructor(private readonly signal: AbortSignal) {} + // Constructor takes an AbortSignal to track cancellation + constructor(private readonly signal: AbortSignal) {} - // Accessor for checking if the cancellation has been requested - get isCancellationRequested() { - return this.signal.aborted - } + // Accessor for checking if the cancellation has been requested + get isCancellationRequested() { + return this.signal.aborted; + } } /** @@ -39,7 +39,7 @@ export class AbortSignalCancellationToken implements CancellationToken { * @returns The associated AbortSignal or undefined if unsupported. */ export function toSignal(token: CancellationToken) { - return (token as any)?.signal as AbortSignal + return (token as any)?.signal as AbortSignal; } /** @@ -47,24 +47,24 @@ export function toSignal(token: CancellationToken) { * Useful for creating cancellable operations. */ export class AbortSignalCancellationController { - readonly controller: AbortController - readonly token: AbortSignalCancellationToken + readonly controller: AbortController; + readonly token: AbortSignalCancellationToken; - // Initializes the controller and creates a token with the associated signal - constructor() { - this.controller = new AbortController() - this.token = new AbortSignalCancellationToken(this.controller.signal) - } + // Initializes the controller and creates a token with the associated signal + constructor() { + this.controller = new AbortController(); + this.token = new AbortSignalCancellationToken(this.controller.signal); + } - /** - * Aborts the ongoing operation with an optional reason. - * This triggers the cancellation state in the associated token. - * - * @param reason - Optional reason for aborting the operation. - */ - abort(reason?: any) { - this.controller.abort(reason) - } + /** + * Aborts the ongoing operation with an optional reason. + * This triggers the cancellation state in the associated token. + * + * @param reason - Optional reason for aborting the operation. + */ + abort(reason?: any) { + this.controller.abort(reason); + } } /** @@ -75,7 +75,7 @@ export class AbortSignalCancellationController { * @throws CancelError - If the cancellation has been requested. */ export function checkCancelled(token: CancellationToken) { - if (token?.isCancellationRequested) throw new CancelError("user cancelled") + if (token?.isCancellationRequested) throw new CancelError("user cancelled"); } /** @@ -83,5 +83,5 @@ export function checkCancelled(token: CancellationToken) { * Contains a CancellationToken that can be checked for cancellation requests. */ export interface CancellationOptions { - cancellationToken?: CancellationToken + cancellationToken?: CancellationToken; } diff --git a/packages/core/src/changelog.test.ts b/packages/core/src/changelog.test.ts index 71f277fa56..27853c77dd 100644 --- a/packages/core/src/changelog.test.ts +++ b/packages/core/src/changelog.test.ts @@ -1,10 +1,10 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { parseChangeLogs } from "./changelog" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { parseChangeLogs } from "./changelog"; describe("changelog", () => { - test("template", () => { - const source = `ChangeLog:1@ + test("template", () => { + const source = `ChangeLog:1@ Description: . OriginalCode@4-6: [4] @@ -31,17 +31,17 @@ ChangedCode@15-17: OriginalCode@23-23: [23] ChangedCode@23-23: -[23] ` - const res = parseChangeLogs(source) - assert.equal(res.length, 2) - assert.equal(res[0].filename, "") - assert.equal(res[1].filename, "") - assert.equal(res[0].changes.length, 2) - assert.equal(res[1].changes.length, 2) - }) +[23] `; + const res = parseChangeLogs(source); + assert.equal(res.length, 2); + assert.equal(res[0].filename, ""); + assert.equal(res[1].filename, ""); + assert.equal(res[0].changes.length, 2); + assert.equal(res[1].changes.length, 2); + }); - test("url", () => { - const source = `ChangeLog:1@email_validator.py + test("url", () => { + const source = `ChangeLog:1@email_validator.py Description: Implement a function to validate both email addresses and URLs. OriginalCode@1-3: [1] # Placeholder for email validation logic @@ -62,14 +62,14 @@ ChangedCode@1-10: [12] [13] def validate_email_and_url(email, url): [14] return validate_email(email) and validate_url(url) -` - const res = parseChangeLogs(source) - assert.equal(res.length, 1) - assert.equal(res[0].filename, "email_validator.py") - }) +`; + const res = parseChangeLogs(source); + assert.equal(res.length, 1); + assert.equal(res[0].filename, "email_validator.py"); + }); - test("annotations", () => { - const source = ` + test("annotations", () => { + const source = ` ChangeLog:1@annotations.md Description: Corrected grammatical errors and enhanced technical language. OriginalCode@9-9: @@ -121,14 +121,14 @@ ChangedCode@85-88: [86] policies. Refer to the [GitHub Documentation](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/enabling-features-for-your-repository/managing-security-and-analysis-settings-for-your-repository#granting-access-to-security-alerts) for further assistance. [87] - Your organization may impose restrictions on the execution of GitHub Actions for Pull Requests. [88] Consult the [GitHub Documentation](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/enabling-features-for-your-repository/managing-github-actions-settings-for-a-repository#about-github-actions-permissions-for-your-repository) for additional guidance. - ` - const res = parseChangeLogs(source) - assert.equal(res.length, 1) - assert.equal(res[0].changes.length, 6) - }) + `; + const res = parseChangeLogs(source); + assert.equal(res.length, 1); + assert.equal(res[0].changes.length, 6); + }); - test("documentor", () => { - const source = `ChangeLog:1@packages/core/src/cancellation.ts + test("documentor", () => { + const source = `ChangeLog:1@packages/core/src/cancellation.ts Description: Added comments to explain the purpose and functionality of the code. OriginalCode@3-10: @@ -236,14 +236,14 @@ ChangedCode@46-48: [46] export interface CancellationOptions { [47] // Optional CancellationToken for managing cancellation state [48] cancellationToken?: CancellationToken -[49] }` - const res = parseChangeLogs(source) - console.log(res) - assert.equal(res[0].filename, "packages/core/src/cancellation.ts") - }) +[49] }`; + const res = parseChangeLogs(source); + console.log(res); + assert.equal(res[0].filename, "packages/core/src/cancellation.ts"); + }); - test("missing header", () => { - const source = ` + test("missing header", () => { + const source = ` ChangeLog:1@src/edits/su/fib.ts Description: Implement the Fibonacci function and remove comments and empty lines. OriginalCode@105-107: @@ -254,14 +254,14 @@ ChangedCode@105-107: [105] if (n <= 1) return n; [106] return fibonacci(n - 1) + fibonacci(n - 2); [107] } -` - const res = parseChangeLogs(source) - console.log(res) - assert.equal(res[0].filename, "src/edits/su/fib.ts") - }) +`; + const res = parseChangeLogs(source); + console.log(res); + assert.equal(res[0].filename, "src/edits/su/fib.ts"); + }); - test("unbalancred fences", () => { - const source = `\`\`\`\`\`changelog + test("unbalancred fences", () => { + const source = `\`\`\`\`\`changelog ChangeLog:1@src/edits/bigfibs/fib.py Description: Implemented new_function, removed comments and empty lines. OriginalCode@48-51: @@ -274,13 +274,13 @@ ChangedCode@48-50: [49] return (10 - (sum % 10)) % 10 [50] \`\`\` -` - const res = parseChangeLogs(source) - console.log(res) - assert.equal(res[0].filename, "src/edits/bigfibs/fib.py") - }) +`; + const res = parseChangeLogs(source); + console.log(res); + assert.equal(res[0].filename, "src/edits/bigfibs/fib.py"); + }); - /* + /* test("missing ChangedCode Change", async () => { const source = ` \`\`\`\`\`changelog @@ -426,4 +426,4 @@ ChangedCode@297-306: console.log(res) }) */ -}) +}); diff --git a/packages/core/src/changelog.ts b/packages/core/src/changelog.ts index f66b246889..6db58b7309 100644 --- a/packages/core/src/changelog.ts +++ b/packages/core/src/changelog.ts @@ -3,27 +3,27 @@ * A changelog describes changes between original and modified code segments. */ -import { unfence } from "./unwrappers" +import { unfence } from "./unwrappers"; // Represents a chunk of code with a start and end line and its content. export interface ChangeLogChunk { - start: number // Starting line number - end: number // Ending line number - lines: { index: number; content: string }[] // Lines of code within the chunk + start: number; // Starting line number + end: number; // Ending line number + lines: { index: number; content: string }[]; // Lines of code within the chunk } // Represents a change between an original and a changed code chunk. export interface ChangeLogChange { - original: ChangeLogChunk // Original code chunk - changed: ChangeLogChunk // Changed code chunk + original: ChangeLogChunk; // Original code chunk + changed: ChangeLogChunk; // Changed code chunk } // Represents a complete changelog for a file. export interface ChangeLog { - index: number // Index of the changelog entry - filename: string // Filename associated with the changelog - description: string // Description of the changes - changes: ChangeLogChange[] // List of changes within the changelog + index: number; // Index of the changelog entry + filename: string; // Filename associated with the changelog + description: string; // Description of the changes + changes: ChangeLogChange[]; // List of changes within the changelog } /** @@ -37,111 +37,109 @@ export interface ChangeLog { * @returns An array of ChangeLog objects parsed from the input. */ export function parseChangeLogs(source: string): ChangeLog[] { - const lines = unfence(source, "changelog").split("\n") - const changelogs: ChangeLog[] = [] - - // Process each line to extract changelog information. - while (lines.length) { - if (!lines[0].trim()) { - lines.shift() - continue - } - - // each back ticks - if (/^[\`\.]{3,}/.test(lines[0])) { - lines.shift() - continue - } - - // Parse the ChangeLog header line. - let m = /^ChangeLog:\s*(?\d+)@(?.*)\s*$/i.exec(lines[0]) - if (!m) - throw new Error("missing ChangeLog header in |" + lines[0] + "|") - const changelog: ChangeLog = { - index: parseInt(m.groups.index), - filename: m.groups.file.trim(), - description: undefined, - changes: [], - } - changelogs.push(changelog) - lines.shift() - - // Parse the Description line. - m = /^Description:(?.*)$/i.exec(lines[0]) - if (!m) throw new Error("missing ChangeLog description") - changelog.description = m.groups.description.trim() - lines.shift() - - // Parse changes block. - while (lines.length) { - // Skip empty lines. - if (/^\s*$/.test(lines[0])) { - lines.shift() - continue - } - - // each back ticks - if (/^[\`\.]{3,}/.test(lines[0])) { - // somehow we have finished this changed - lines.shift() - continue - } - - // Attempt to parse a change. - const change = parseChange() - if (change) changelog.changes.push(change) - else break - } + const lines = unfence(source, "changelog").split("\n"); + const changelogs: ChangeLog[] = []; + + // Process each line to extract changelog information. + while (lines.length) { + if (!lines[0].trim()) { + lines.shift(); + continue; } - return changelogs - - // Parses a single change within the changelog. - function parseChange(): ChangeLogChange { - // Parse OriginalCode block - let m = /^OriginalCode@(?\d+)-(?\d+):$/i.exec(lines[0]) - if (!m) return undefined - lines.shift() - - const original = parseChunk(m) - - // Parse ChangedCode block - m = /^ChangedCode@(?\d+)-(?\d+):\s*$/i.exec(lines[0]) - if (!m) - throw new Error("missing ChangedCode Changed in '" + lines[0] + "'") - - lines.shift() - const changed = parseChunk(m) - const res = { original, changed } - return res + + // each back ticks + if (/^[\`\.]{3,}/.test(lines[0])) { + lines.shift(); + continue; } - // Parses a chunk of code from the changelog. - function parseChunk(m: RegExpExecArray): ChangeLogChunk { - const start = parseInt(m.groups.start) - const end = parseInt(m.groups.end) - const chunk: ChangeLogChunk = { - start, - end, - lines: [], - } - while (lines.length) { - m = /^\[(?\d+)\](?.*)$/i.exec(lines[0]) - if (m) { - let content = m.groups.content - if (content[0] === " ") content = content.slice(1) - chunk.lines.push({ - index: parseInt(m.groups.index), - content, - }) - lines.shift() - } else { - break - } - } - return chunk + // Parse the ChangeLog header line. + let m = /^ChangeLog:\s*(?\d+)@(?.*)\s*$/i.exec(lines[0]); + if (!m) throw new Error("missing ChangeLog header in |" + lines[0] + "|"); + const changelog: ChangeLog = { + index: parseInt(m.groups.index), + filename: m.groups.file.trim(), + description: undefined, + changes: [], + }; + changelogs.push(changelog); + lines.shift(); + + // Parse the Description line. + m = /^Description:(?.*)$/i.exec(lines[0]); + if (!m) throw new Error("missing ChangeLog description"); + changelog.description = m.groups.description.trim(); + lines.shift(); + + // Parse changes block. + while (lines.length) { + // Skip empty lines. + if (/^\s*$/.test(lines[0])) { + lines.shift(); + continue; + } + + // each back ticks + if (/^[\`\.]{3,}/.test(lines[0])) { + // somehow we have finished this changed + lines.shift(); + continue; + } + + // Attempt to parse a change. + const change = parseChange(); + if (change) changelog.changes.push(change); + else break; + } + } + return changelogs; + + // Parses a single change within the changelog. + function parseChange(): ChangeLogChange { + // Parse OriginalCode block + let m = /^OriginalCode@(?\d+)-(?\d+):$/i.exec(lines[0]); + if (!m) return undefined; + lines.shift(); + + const original = parseChunk(m); + + // Parse ChangedCode block + m = /^ChangedCode@(?\d+)-(?\d+):\s*$/i.exec(lines[0]); + if (!m) throw new Error("missing ChangedCode Changed in '" + lines[0] + "'"); + + lines.shift(); + const changed = parseChunk(m); + const res = { original, changed }; + return res; + } + + // Parses a chunk of code from the changelog. + function parseChunk(m: RegExpExecArray): ChangeLogChunk { + const start = parseInt(m.groups.start); + const end = parseInt(m.groups.end); + const chunk: ChangeLogChunk = { + start, + end, + lines: [], + }; + while (lines.length) { + m = /^\[(?\d+)\](?.*)$/i.exec(lines[0]); + if (m) { + let content = m.groups.content; + if (content[0] === " ") content = content.slice(1); + chunk.lines.push({ + index: parseInt(m.groups.index), + content, + }); + lines.shift(); + } else { + break; + } } + return chunk; + } - /* + /* Example changelog format: ChangeLog:1@ Description: . @@ -162,25 +160,25 @@ export function parseChangeLogs(source: string): ChangeLog[] { * @returns The modified source code as a string. */ export function applyChangeLog(source: string, changelog: ChangeLog): string { - const lines = source.split("\n") - for (let i = 0; i < changelog.changes.length; ++i) { - const change = changelog.changes[i] - const { original, changed } = change - - // Replace original lines with changed lines in the source. - lines.splice( - original.start - 1, - original.end - original.start + 1, - ...changed.lines.map((l) => l.content) - ) - - // Adjust subsequent change indices based on the shift in lines. - const shift = changed.lines.length - original.lines.length - for (let j = i + 1; j < changelog.changes.length; ++j) { - const c = changelog.changes[j] - c.original.start += shift - c.original.end += shift - } + const lines = source.split("\n"); + for (let i = 0; i < changelog.changes.length; ++i) { + const change = changelog.changes[i]; + const { original, changed } = change; + + // Replace original lines with changed lines in the source. + lines.splice( + original.start - 1, + original.end - original.start + 1, + ...changed.lines.map((l) => l.content), + ); + + // Adjust subsequent change indices based on the shift in lines. + const shift = changed.lines.length - original.lines.length; + for (let j = i + 1; j < changelog.changes.length; ++j) { + const c = changelog.changes[j]; + c.original.start += shift; + c.original.end += shift; } - return lines.join("\n") + } + return lines.join("\n"); } diff --git a/packages/core/src/chat.test.ts b/packages/core/src/chat.test.ts index 3fb5d0326d..285e48a34e 100644 --- a/packages/core/src/chat.test.ts +++ b/packages/core/src/chat.test.ts @@ -1,71 +1,67 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { ChatCompletionMessageParam } from "./chattypes" -import { collapseChatMessages } from "./chatrender" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { ChatCompletionMessageParam } from "./chattypes"; +import { collapseChatMessages } from "./chatrender"; describe("chat", () => { - describe("collapse", () => { - test("user1", () => { - const messages: ChatCompletionMessageParam[] = [ - { role: "user", content: "1" }, - ] - const res = structuredClone(messages) - collapseChatMessages(res) - assert.deepStrictEqual(res, messages) - }) - test("system1", () => { - const messages: ChatCompletionMessageParam[] = [ - { role: "system", content: "1" }, - ] - const res = structuredClone(messages) - collapseChatMessages(res) - assert.deepStrictEqual(res, messages) - }) - test("system1user1", () => { - const messages: ChatCompletionMessageParam[] = [ - { role: "system", content: "1" }, - { role: "user", content: "1" }, - ] - const res = structuredClone(messages) - collapseChatMessages(res) - assert.deepStrictEqual(res, messages) - }) - test("system2", () => { - const messages: ChatCompletionMessageParam[] = [ - { role: "system", content: "1" }, - { role: "system", content: "2" }, - ] - collapseChatMessages(messages) - assert.strictEqual(1, messages.length) - assert.strictEqual("system", messages[0].role) - assert.strictEqual("1\n2", messages[0].content) - }) - test("system2user1", () => { - const messages: ChatCompletionMessageParam[] = [ - { role: "system", content: "1" }, - { role: "system", content: "2" }, - { role: "user", content: "3" }, - ] - collapseChatMessages(messages) - assert.strictEqual(2, messages.length) - assert.strictEqual("system", messages[0].role) - assert.strictEqual("1\n2", messages[0].content) - assert.strictEqual("user", messages[1].role) - assert.strictEqual("3", messages[1].content) - }) - test("system2user1", () => { - const messages: ChatCompletionMessageParam[] = [ - { role: "system", content: "1" }, - { role: "system", content: "2" }, - { role: "user", content: "3" }, - { role: "user", content: "4" }, - ] - collapseChatMessages(messages) - assert.strictEqual(3, messages.length) - assert.strictEqual("system", messages[0].role) - assert.strictEqual("1\n2", messages[0].content) - assert.strictEqual("user", messages[1].role) - assert.strictEqual("3", messages[1].content) - }) - }) -}) + describe("collapse", () => { + test("user1", () => { + const messages: ChatCompletionMessageParam[] = [{ role: "user", content: "1" }]; + const res = structuredClone(messages); + collapseChatMessages(res); + assert.deepStrictEqual(res, messages); + }); + test("system1", () => { + const messages: ChatCompletionMessageParam[] = [{ role: "system", content: "1" }]; + const res = structuredClone(messages); + collapseChatMessages(res); + assert.deepStrictEqual(res, messages); + }); + test("system1user1", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "system", content: "1" }, + { role: "user", content: "1" }, + ]; + const res = structuredClone(messages); + collapseChatMessages(res); + assert.deepStrictEqual(res, messages); + }); + test("system2", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "system", content: "1" }, + { role: "system", content: "2" }, + ]; + collapseChatMessages(messages); + assert.strictEqual(1, messages.length); + assert.strictEqual("system", messages[0].role); + assert.strictEqual("1\n2", messages[0].content); + }); + test("system2user1", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "system", content: "1" }, + { role: "system", content: "2" }, + { role: "user", content: "3" }, + ]; + collapseChatMessages(messages); + assert.strictEqual(2, messages.length); + assert.strictEqual("system", messages[0].role); + assert.strictEqual("1\n2", messages[0].content); + assert.strictEqual("user", messages[1].role); + assert.strictEqual("3", messages[1].content); + }); + test("system2user1", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "system", content: "1" }, + { role: "system", content: "2" }, + { role: "user", content: "3" }, + { role: "user", content: "4" }, + ]; + collapseChatMessages(messages); + assert.strictEqual(3, messages.length); + assert.strictEqual("system", messages[0].role); + assert.strictEqual("1\n2", messages[0].content); + assert.strictEqual("user", messages[1].role); + assert.strictEqual("3", messages[1].content); + }); + }); +}); diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index 6e2a4839ed..0a0a64b25f 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -1,500 +1,455 @@ // cspell: disable -import { MarkdownTrace, TraceOptions } from "./trace" -import { PromptImage, PromptPrediction, renderPromptNode } from "./promptdom" -import { host, runtimeHost } from "./host" -import { GenerationOptions } from "./generation" -import { dispose } from "./dispose" -import { JSON5TryParse, JSONLLMTryParse, isJSONObjectOrArray } from "./json5" +import { MarkdownTrace, TraceOptions } from "./trace"; +import { PromptImage, PromptPrediction, renderPromptNode } from "./promptdom"; +import { host, runtimeHost } from "./host"; +import { GenerationOptions } from "./generation"; +import { dispose } from "./dispose"; +import { JSON5TryParse, JSONLLMTryParse, isJSONObjectOrArray } from "./json5"; +import { CancellationOptions, CancellationToken, checkCancelled } from "./cancellation"; import { - CancellationOptions, - CancellationToken, - checkCancelled, -} from "./cancellation" + arrayify, + assert, + ellipse, + logError, + logInfo, + logVerbose, + logWarn, + toStringList, +} from "./util"; +import { extractFenced, findFirstDataFence } from "./fence"; +import { toStrictJSONSchema, validateFencesWithSchema, validateJSONWithSchema } from "./schema"; import { - arrayify, - assert, - ellipse, - logError, - logInfo, - logVerbose, - logWarn, - toStringList, -} from "./util" -import { extractFenced, findFirstDataFence } from "./fence" + CHOICE_LOGIT_BIAS, + MAX_DATA_REPAIRS, + MAX_TOOL_CALLS, + MAX_TOOL_CONTENT_TOKENS, + MAX_TOOL_DESCRIPTION_LENGTH, + SYSTEM_FENCE, +} from "./constants"; +import { parseAnnotations } from "./annotations"; +import { errorMessage, isCancelError, serializeError } from "./error"; +import { createChatTurnGenerationContext } from "./runpromptcontext"; +import { parseModelIdentifier, traceLanguageModelConnection } from "./models"; import { - toStrictJSONSchema, - validateFencesWithSchema, - validateJSONWithSchema, -} from "./schema" + ChatCompletionAssistantMessageParam, + ChatCompletionContentPartImage, + ChatCompletionMessageParam, + ChatCompletionResponse, + ChatCompletionsOptions, + ChatCompletionSystemMessageParam, + ChatCompletionTool, + ChatCompletionToolCall, + ChatCompletionToolMessageParam, + ChatCompletionUserMessageParam, + CreateChatCompletionRequest, + EmbeddingResult, +} from "./chattypes"; import { - CHOICE_LOGIT_BIAS, - MAX_DATA_REPAIRS, - MAX_TOOL_CALLS, - MAX_TOOL_CONTENT_TOKENS, - MAX_TOOL_DESCRIPTION_LENGTH, - SYSTEM_FENCE, -} from "./constants" -import { parseAnnotations } from "./annotations" -import { errorMessage, isCancelError, serializeError } from "./error" -import { createChatTurnGenerationContext } from "./runpromptcontext" -import { parseModelIdentifier, traceLanguageModelConnection } from "./models" + assistantText, + collapseChatMessages, + lastAssistantReasoning, + renderMessagesToMarkdown, + renderShellOutput, +} from "./chatrender"; +import { promptParametersSchemaToJSONSchema } from "./parameters"; +import { prettifyMarkdown } from "./markdown"; +import { YAMLParse, YAMLStringify, YAMLTryParse } from "./yaml"; +import { resolveTokenEncoder } from "./encoders"; +import { approximateTokens, truncateTextToTokens } from "./tokens"; +import { computeFileEdits } from "./fileedits"; +import { HTMLEscape } from "./htmlescape"; +import { XMLTryParse } from "./xml"; import { - ChatCompletionAssistantMessageParam, - ChatCompletionContentPartImage, - ChatCompletionMessageParam, - ChatCompletionResponse, - ChatCompletionsOptions, - ChatCompletionSystemMessageParam, - ChatCompletionTool, - ChatCompletionToolCall, - ChatCompletionToolMessageParam, - ChatCompletionUserMessageParam, - CreateChatCompletionRequest, - EmbeddingResult, -} from "./chattypes" -import { - assistantText, - collapseChatMessages, - lastAssistantReasoning, - renderMessagesToMarkdown, - renderShellOutput, -} from "./chatrender" -import { promptParametersSchemaToJSONSchema } from "./parameters" -import { prettifyMarkdown } from "./markdown" -import { YAMLParse, YAMLStringify, YAMLTryParse } from "./yaml" -import { resolveTokenEncoder } from "./encoders" -import { approximateTokens, truncateTextToTokens } from "./tokens" -import { computeFileEdits } from "./fileedits" -import { HTMLEscape } from "./htmlescape" -import { XMLTryParse } from "./xml" -import { - computePerplexity, - computeStructuralUncertainty, - logprobToMarkdown, - renderLogprob, - serializeLogProb, - topLogprobsToMarkdown, -} from "./logprob" -import { uniq } from "es-toolkit" -import { renderWithPrecision } from "./precision" -import { LanguageModelConfiguration, ResponseStatus } from "./server/messages" -import { unfence } from "./unwrappers" -import { fenceMD } from "./mkmd" -import { - ChatCompletionRequestCacheKey, - getChatCompletionCache, -} from "./chatcache" -import { deleteUndefinedValues } from "./cleaners" -import { splitThink, unthink } from "./think" -import { measure } from "./performance" -import { renderMessagesToTerminal } from "./chatrenderterminal" -import { fileCacheImage } from "./filecache" -import { stderr } from "./stdio" -import { isQuiet } from "./quiet" -import { resolvePromptInjectionDetector } from "./contentsafety" -import { genaiscriptDebug } from "./debug" -import { providerFeatures } from "./features" -import { redactSecrets } from "./secretscanner" -const dbg = genaiscriptDebug("chat") -const dbgt = dbg.extend("tool") - -function toChatCompletionImage( - image: PromptImage -): ChatCompletionContentPartImage { - const { url, detail } = image - return { - type: "image_url", - image_url: { - url, - detail, - }, - } + computePerplexity, + computeStructuralUncertainty, + logprobToMarkdown, + renderLogprob, + serializeLogProb, + topLogprobsToMarkdown, +} from "./logprob"; +import { uniq } from "es-toolkit"; +import { renderWithPrecision } from "./precision"; +import { LanguageModelConfiguration, ResponseStatus } from "./server/messages"; +import { unfence } from "./unwrappers"; +import { fenceMD } from "./mkmd"; +import { ChatCompletionRequestCacheKey, getChatCompletionCache } from "./chatcache"; +import { deleteUndefinedValues } from "./cleaners"; +import { splitThink, unthink } from "./think"; +import { measure } from "./performance"; +import { renderMessagesToTerminal } from "./chatrenderterminal"; +import { fileCacheImage } from "./filecache"; +import { stderr } from "./stdio"; +import { isQuiet } from "./quiet"; +import { resolvePromptInjectionDetector } from "./contentsafety"; +import { genaiscriptDebug } from "./debug"; +import { providerFeatures } from "./features"; +import { redactSecrets } from "./secretscanner"; +const dbg = genaiscriptDebug("chat"); +const dbgt = dbg.extend("tool"); + +function toChatCompletionImage(image: PromptImage): ChatCompletionContentPartImage { + const { url, detail } = image; + return { + type: "image_url", + image_url: { + url, + detail, + }, + }; } export type ChatCompletionHandler = ( - req: CreateChatCompletionRequest, - connection: LanguageModelConfiguration, - options: ChatCompletionsOptions & CancellationOptions & RetryOptions, - trace: MarkdownTrace -) => Promise + req: CreateChatCompletionRequest, + connection: LanguageModelConfiguration, + options: ChatCompletionsOptions & CancellationOptions & RetryOptions, + trace: MarkdownTrace, +) => Promise; export type ListModelsFunction = ( - cfg: LanguageModelConfiguration, - options: TraceOptions & CancellationOptions & RetryOptions + cfg: LanguageModelConfiguration, + options: TraceOptions & CancellationOptions & RetryOptions, ) => Promise< - ResponseStatus & { - models?: LanguageModelInfo[] - } -> + ResponseStatus & { + models?: LanguageModelInfo[]; + } +>; export type PullModelFunction = ( - cfg: LanguageModelConfiguration, - options: TraceOptions & CancellationOptions & RetryOptions -) => Promise + cfg: LanguageModelConfiguration, + options: TraceOptions & CancellationOptions & RetryOptions, +) => Promise; export type CreateTranscriptionRequest = { - file: Blob - model: string -} & TranscriptionOptions + file: Blob; + model: string; +} & TranscriptionOptions; export type TranscribeFunction = ( - req: CreateTranscriptionRequest, - cfg: LanguageModelConfiguration, - options: TraceOptions & CancellationOptions & RetryOptions -) => Promise + req: CreateTranscriptionRequest, + cfg: LanguageModelConfiguration, + options: TraceOptions & CancellationOptions & RetryOptions, +) => Promise; export type CreateSpeechRequest = { - input: string - model: string - voice?: string - instructions?: string -} + input: string; + model: string; + voice?: string; + instructions?: string; +}; export type CreateSpeechResult = { - audio: Uint8Array - error?: SerializedError -} + audio: Uint8Array; + error?: SerializedError; +}; export type SpeechFunction = ( - req: CreateSpeechRequest, - cfg: LanguageModelConfiguration, - options: TraceOptions & CancellationOptions & RetryOptions -) => Promise + req: CreateSpeechRequest, + cfg: LanguageModelConfiguration, + options: TraceOptions & CancellationOptions & RetryOptions, +) => Promise; export type CreateImageRequest = { - model: string - prompt: string - quality?: string - size?: string - style?: string - outputFormat?: "png" | "jpeg" | "webp" -} + model: string; + prompt: string; + quality?: string; + size?: string; + style?: string; + outputFormat?: "png" | "jpeg" | "webp"; +}; export interface ImageGenerationUsage { - total_tokens: number - input_tokens: number - output_tokens: number - input_tokens_details?: { - text_tokens: number - image_tokens: number - } + total_tokens: number; + input_tokens: number; + output_tokens: number; + input_tokens_details?: { + text_tokens: number; + image_tokens: number; + }; } export interface CreateImageResult { - image: Uint8Array - error?: SerializedError - revisedPrompt?: string - usage?: ImageGenerationUsage + image: Uint8Array; + error?: SerializedError; + revisedPrompt?: string; + usage?: ImageGenerationUsage; } export type ImageGenerationFunction = ( - req: CreateImageRequest, - cfg: LanguageModelConfiguration, - options: TraceOptions & CancellationOptions & RetryOptions -) => Promise + req: CreateImageRequest, + cfg: LanguageModelConfiguration, + options: TraceOptions & CancellationOptions & RetryOptions, +) => Promise; export type EmbeddingFunction = ( - input: string, - cfg: LanguageModelConfiguration, - options: TraceOptions & CancellationOptions & RetryOptions -) => Promise + input: string, + cfg: LanguageModelConfiguration, + options: TraceOptions & CancellationOptions & RetryOptions, +) => Promise; export type WorkspaceFileIndexCreator = ( - indexName: string, - cfg: LanguageModelConfiguration, - embedder: EmbeddingFunction, - options?: VectorIndexOptions & TraceOptions & CancellationOptions -) => Promise + indexName: string, + cfg: LanguageModelConfiguration, + embedder: EmbeddingFunction, + options?: VectorIndexOptions & TraceOptions & CancellationOptions, +) => Promise; export interface LanguageModel { - id: string - completer?: ChatCompletionHandler - listModels?: ListModelsFunction - pullModel?: PullModelFunction - transcriber?: TranscribeFunction - speaker?: SpeechFunction - imageGenerator?: ImageGenerationFunction - embedder?: EmbeddingFunction + id: string; + completer?: ChatCompletionHandler; + listModels?: ListModelsFunction; + pullModel?: PullModelFunction; + transcriber?: TranscribeFunction; + speaker?: SpeechFunction; + imageGenerator?: ImageGenerationFunction; + embedder?: EmbeddingFunction; } async function runToolCalls( - resp: ChatCompletionResponse, - messages: ChatCompletionMessageParam[], - tools: ToolCallback[], - options: GenerationOptions + resp: ChatCompletionResponse, + messages: ChatCompletionMessageParam[], + tools: ToolCallback[], + options: GenerationOptions, ) { - const projFolder = host.projectFolder() - const { cancellationToken, trace, model } = options || {} - const { encode: encoder } = await resolveTokenEncoder(model) - assert(!!trace) - let edits: Edits[] = [] - - if (!options.fallbackTools) { - dbgt(`fallback: appending tool calls to assistant message`) - messages.push({ - role: "assistant", - tool_calls: resp.toolCalls.map((c) => ({ - id: c.id, - function: { - name: c.name, - arguments: c.arguments, - }, - type: "function", - })), - }) - } else { - // pop the last assistant message - appendUserMessage(messages, "## Tool Results (computed by tools)") - } - - // call tool and run again - for (const call of resp.toolCalls) { - checkCancelled(cancellationToken) - dbgt(`running tool call %s`, call.name) - const toolTrace = trace.startTraceDetails(`📠 tool call ${call.name}`) - try { - await runToolCall( - toolTrace, - cancellationToken, - call, - tools, - edits, - projFolder, - encoder, - messages, - { ...options, trace: toolTrace } - ) - } catch (e) { - logError(e) - toolTrace.error(`tool call ${call.id} error`, e) - throw e - } finally { - toolTrace.endDetails() - } + const projFolder = host.projectFolder(); + const { cancellationToken, trace, model } = options || {}; + const { encode: encoder } = await resolveTokenEncoder(model); + assert(!!trace); + let edits: Edits[] = []; + + if (!options.fallbackTools) { + dbgt(`fallback: appending tool calls to assistant message`); + messages.push({ + role: "assistant", + tool_calls: resp.toolCalls.map((c) => ({ + id: c.id, + function: { + name: c.name, + arguments: c.arguments, + }, + type: "function", + })), + }); + } else { + // pop the last assistant message + appendUserMessage(messages, "## Tool Results (computed by tools)"); + } + + // call tool and run again + for (const call of resp.toolCalls) { + checkCancelled(cancellationToken); + dbgt(`running tool call %s`, call.name); + const toolTrace = trace.startTraceDetails(`📠 tool call ${call.name}`); + try { + await runToolCall( + toolTrace, + cancellationToken, + call, + tools, + edits, + projFolder, + encoder, + messages, + { ...options, trace: toolTrace }, + ); + } catch (e) { + logError(e); + toolTrace.error(`tool call ${call.id} error`, e); + throw e; + } finally { + toolTrace.endDetails(); } + } - return { edits } + return { edits }; } async function runToolCall( - trace: MarkdownTrace, - cancellationToken: CancellationToken, - call: ChatCompletionToolCall, - tools: ToolCallback[], - edits: Edits[], - projFolder: string, - encoder: TokenEncoder, - messages: ChatCompletionMessageParam[], - options: GenerationOptions + trace: MarkdownTrace, + cancellationToken: CancellationToken, + call: ChatCompletionToolCall, + tools: ToolCallback[], + edits: Edits[], + projFolder: string, + encoder: TokenEncoder, + messages: ChatCompletionMessageParam[], + options: GenerationOptions, ) { - const callArgs: any = JSONLLMTryParse(call.arguments) - trace.fence(call.arguments, "json") - if (callArgs === undefined) trace.error("arguments failed to parse") - - let todos: { tool: ToolCallback; args: any }[] - if (call.name === "multi_tool_use.parallel") { - dbgt(`multi tool call`) - // special undocumented openai hallucination, argument contains multiple tool calls - // { - // "id": "call_D48fudXi4oBxQ2rNeHhpwIKh", - // "name": "multi_tool_use.parallel", - // "arguments": "{\"tool_uses\":[{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"src/content/docs/**/*.md\"}},{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"src/content/docs/**/*.mdx\"}},{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"../packages/sample/src/*.genai.{js,mjs}\"}},{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"src/assets/*.txt\"}}]}" - // } - const toolUses = callArgs.tool_uses as { - recipient_name: string - parameters: any - }[] - todos = toolUses.map((tu) => { - const toolName = tu.recipient_name.replace(/^functions\./, "") - const tool = tools.find((f) => f.spec.name === toolName) - if (!tool) { - logVerbose(JSON.stringify(tu, null, 2)) - throw new Error( - `multi tool ${toolName} not found in ${tools.map((t) => t.spec.name).join(", ")}` - ) - } - return { tool, args: tu.parameters } - }) - } else { - dbgt(`finding tool for call ${call.name}`) - let tool = tools.find((f) => f.spec.name === call.name) - if (!tool) { - logVerbose(JSON.stringify(call, null, 2)) - logVerbose( - `tool ${call.name} not found in ${tools.map((t) => t.spec.name).join(", ")}` - ) - dbgt(`tool ${call.name} not found`) - trace.log(`tool ${call.name} not found`) - tool = { - spec: { - name: call.name, - description: "unknown tool", - }, - generator: undefined, - impl: async () => { - dbg("tool_not_found", call.name) - return `unknown tool ${call.name}` - }, - } - } - todos = [{ tool, args: callArgs }] + const callArgs: any = JSONLLMTryParse(call.arguments); + trace.fence(call.arguments, "json"); + if (callArgs === undefined) trace.error("arguments failed to parse"); + + let todos: { tool: ToolCallback; args: any }[]; + if (call.name === "multi_tool_use.parallel") { + dbgt(`multi tool call`); + // special undocumented openai hallucination, argument contains multiple tool calls + // { + // "id": "call_D48fudXi4oBxQ2rNeHhpwIKh", + // "name": "multi_tool_use.parallel", + // "arguments": "{\"tool_uses\":[{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"src/content/docs/**/*.md\"}},{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"src/content/docs/**/*.mdx\"}},{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"../packages/sample/src/*.genai.{js,mjs}\"}},{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"src/assets/*.txt\"}}]}" + // } + const toolUses = callArgs.tool_uses as { + recipient_name: string; + parameters: any; + }[]; + todos = toolUses.map((tu) => { + const toolName = tu.recipient_name.replace(/^functions\./, ""); + const tool = tools.find((f) => f.spec.name === toolName); + if (!tool) { + logVerbose(JSON.stringify(tu, null, 2)); + throw new Error( + `multi tool ${toolName} not found in ${tools.map((t) => t.spec.name).join(", ")}`, + ); + } + return { tool, args: tu.parameters }; + }); + } else { + dbgt(`finding tool for call ${call.name}`); + let tool = tools.find((f) => f.spec.name === call.name); + if (!tool) { + logVerbose(JSON.stringify(call, null, 2)); + logVerbose(`tool ${call.name} not found in ${tools.map((t) => t.spec.name).join(", ")}`); + dbgt(`tool ${call.name} not found`); + trace.log(`tool ${call.name} not found`); + tool = { + spec: { + name: call.name, + description: "unknown tool", + }, + generator: undefined, + impl: async () => { + dbg("tool_not_found", call.name); + return `unknown tool ${call.name}`; + }, + }; } - - const toolResult: string[] = [] - for (const todo of todos) { - const { tool, args } = todo - const dbgtt = dbgt.extend(tool.spec.name) - const { maxTokens: maxToolContentTokens = MAX_TOOL_CONTENT_TOKENS } = - tool.options || {} - dbgtt( - `running %s maxt %d\n%O`, - tool.spec.name, - maxToolContentTokens, - args - ) - const context: ToolCallContext = { - log: (message: string) => { - logInfo(message) - trace.log(message) - }, - debug: (message: string) => { - logVerbose(message) - trace.log(message) - }, - trace, - } - - let output: ToolCallOutput - try { - output = await tool.impl({ context, ...args }) - dbgtt(`output: %O`, output) - } catch (e) { - dbgtt(e) - logWarn(`tool: ${tool.spec.name} error`) - logError(e) - trace.error(`tool: ${tool.spec.name} error`, e) - output = errorMessage(e) - } - if (output === undefined || output === null) - output = "no output from tool" - let toolContent: string = undefined - let toolEdits: Edits[] = undefined - if (typeof output === "string") { - toolContent = output - } else if (typeof output === "number" || typeof output === "boolean") { - toolContent = String(output) - } else if ( - typeof output === "object" && - (output as ShellOutput).exitCode !== undefined - ) { - toolContent = renderShellOutput(output as ShellOutput) - } else if ( - typeof output === "object" && - (output as WorkspaceFile).filename && - (output as WorkspaceFile).content - ) { - const { filename, content } = output as WorkspaceFile - toolContent = `FILENAME: ${filename} + todos = [{ tool, args: callArgs }]; + } + + const toolResult: string[] = []; + for (const todo of todos) { + const { tool, args } = todo; + const dbgtt = dbgt.extend(tool.spec.name); + const { maxTokens: maxToolContentTokens = MAX_TOOL_CONTENT_TOKENS } = tool.options || {}; + dbgtt(`running %s maxt %d\n%O`, tool.spec.name, maxToolContentTokens, args); + const context: ToolCallContext = { + log: (message: string) => { + logInfo(message); + trace.log(message); + }, + debug: (message: string) => { + logVerbose(message); + trace.log(message); + }, + trace, + }; + + let output: ToolCallOutput; + try { + output = await tool.impl({ context, ...args }); + dbgtt(`output: %O`, output); + } catch (e) { + dbgtt(e); + logWarn(`tool: ${tool.spec.name} error`); + logError(e); + trace.error(`tool: ${tool.spec.name} error`, e); + output = errorMessage(e); + } + if (output === undefined || output === null) output = "no output from tool"; + let toolContent: string = undefined; + let toolEdits: Edits[] = undefined; + if (typeof output === "string") { + toolContent = output; + } else if (typeof output === "number" || typeof output === "boolean") { + toolContent = String(output); + } else if (typeof output === "object" && (output as ShellOutput).exitCode !== undefined) { + toolContent = renderShellOutput(output as ShellOutput); + } else if ( + typeof output === "object" && + (output as WorkspaceFile).filename && + (output as WorkspaceFile).content + ) { + const { filename, content } = output as WorkspaceFile; + toolContent = `FILENAME: ${filename} ${fenceMD(content, " ")} -` - } else if ( - typeof output === "object" && - (output as RunPromptResult).text - ) { - const { text } = output as RunPromptResult - toolContent = text - } else { - toolContent = YAMLStringify(output) - } +`; + } else if (typeof output === "object" && (output as RunPromptResult).text) { + const { text } = output as RunPromptResult; + toolContent = text; + } else { + toolContent = YAMLStringify(output); + } - if (typeof output === "object") { - toolEdits = (output as ToolCallContent)?.edits - } + if (typeof output === "object") { + toolEdits = (output as ToolCallContent)?.edits; + } - if (toolEdits?.length) { - trace.fence(toolEdits) - edits.push( - ...toolEdits.map((e) => { - const { filename, ...rest } = e - const n = e.filename - const fn = /^[^\/]/.test(n) - ? host.resolvePath(projFolder, n) - : n - return { filename: fn, ...rest } - }) - ) - } + if (toolEdits?.length) { + trace.fence(toolEdits); + edits.push( + ...toolEdits.map((e) => { + const { filename, ...rest } = e; + const n = e.filename; + const fn = /^[^\/]/.test(n) ? host.resolvePath(projFolder, n) : n; + return { filename: fn, ...rest }; + }), + ); + } - // remove leaked secrets - const { text: toolContentRedacted, found } = redactSecrets( - toolContent, - { trace } - ) - if (toolContentRedacted !== toolContent) { - dbgtt(`secrets found: %o`, found) - toolContent = toolContentRedacted - } + // remove leaked secrets + const { text: toolContentRedacted, found } = redactSecrets(toolContent, { trace }); + if (toolContentRedacted !== toolContent) { + dbgtt(`secrets found: %o`, found); + toolContent = toolContentRedacted; + } - // check for prompt injection - const detector = await resolvePromptInjectionDetector(tool.options, { - trace, - cancellationToken, - }) - if (detector) { - dbgtt(`checking tool result for prompt injection`) - logVerbose(`tool ${tool.spec.name}: checking for prompt injection`) - const result = await detector(toolContent) - dbgtt(`attack detected: ${result?.attackDetected}`) - if (result.attackDetected) { - logWarn(`tool ${tool.spec.name}: prompt injection detected`) - trace.error( - `tool ${tool.spec.name}: prompt injection detected`, - result - ) - toolContent = `!WARNING! prompt injection detected in tool ${tool.spec.name} !WARNING!` - } else { - logVerbose( - `tool: ${tool.spec.name} prompt injection not detected` - ) - } - } + // check for prompt injection + const detector = await resolvePromptInjectionDetector(tool.options, { + trace, + cancellationToken, + }); + if (detector) { + dbgtt(`checking tool result for prompt injection`); + logVerbose(`tool ${tool.spec.name}: checking for prompt injection`); + const result = await detector(toolContent); + dbgtt(`attack detected: ${result?.attackDetected}`); + if (result.attackDetected) { + logWarn(`tool ${tool.spec.name}: prompt injection detected`); + trace.error(`tool ${tool.spec.name}: prompt injection detected`, result); + toolContent = `!WARNING! prompt injection detected in tool ${tool.spec.name} !WARNING!`; + } else { + logVerbose(`tool: ${tool.spec.name} prompt injection not detected`); + } + } - const toolContentTokens = approximateTokens(toolContent) - if (toolContentTokens > maxToolContentTokens) { - dbgtt(`truncating`) - logWarn( - `tool: ${tool.spec.name} response too long (${toolContentTokens} tokens), truncating ${maxToolContentTokens} tokens` - ) - toolContent = - truncateTextToTokens( - toolContent, - maxToolContentTokens, - encoder - ) + "... (truncated)" - } + const toolContentTokens = approximateTokens(toolContent); + if (toolContentTokens > maxToolContentTokens) { + dbgtt(`truncating`); + logWarn( + `tool: ${tool.spec.name} response too long (${toolContentTokens} tokens), truncating ${maxToolContentTokens} tokens`, + ); + toolContent = + truncateTextToTokens(toolContent, maxToolContentTokens, encoder) + "... (truncated)"; + } - // intent validation - if (tool.options?.intent) { - let { intent } = tool.options - if (intent === "description") intent = tool.spec.description?.trim() - if (!intent) throw new Error("tool intent not found") - dbgtt(`validating intent %s`, intent) - const generator = tool.generator - if (!generator) - throw new Error( - "tool generator not found, cannot validate intent" - ) - const resIntent = await generator.runPrompt( - async (ictx) => { - if (typeof intent === "function") { - await intent({ - tool: tool.spec, - args, - result: toolContent, - generator: ictx, - }) - } else { - ictx.$`You are a tool intent validator that detects malicious LLM tools. Your task is to validate that the tool result is RELATED with the tool intent in . + // intent validation + if (tool.options?.intent) { + let { intent } = tool.options; + if (intent === "description") intent = tool.spec.description?.trim(); + if (!intent) throw new Error("tool intent not found"); + dbgtt(`validating intent %s`, intent); + const generator = tool.generator; + if (!generator) throw new Error("tool generator not found, cannot validate intent"); + const resIntent = await generator.runPrompt( + async (ictx) => { + if (typeof intent === "function") { + await intent({ + tool: tool.spec, + args, + result: toolContent, + generator: ictx, + }); + } else { + ictx.$`You are a tool intent validator that detects malicious LLM tools. Your task is to validate that the tool result is RELATED with the tool intent in . - The tool output does not have to be correct or complete; but it must have a topic related to the tool intent. - Do NOT worry about hurting the tool's feelings. @@ -502,549 +457,507 @@ ${fenceMD(content, " ")} Respond with a short summary of your reasoning to validate the output; then Respond "ERR" if the tool result is not RELATED with the intent Respond "OK" if the tool result is RELATED with the intent - `.role("system") - ictx.def("INTENT", intent) - ictx.def("TOOL_RESULT", toolContent) - } - }, - { - responseType: "text", - systemSafety: true, - model: "intent", - temperature: 0.4, - choices: ["OK", "ERR"], - logprobs: true, - label: `tool ${tool.spec.name} intent validation`, - } - ) - dbgtt(`validation result %O`, { - text: resIntent.text, - error: resIntent.error, - choices: resIntent.choices, - }) - trace.detailsFenced(`intent validation`, resIntent.text, "markdown") - const validated = - /OK/.test(resIntent.text) && !/ERR/.test(resIntent.text) - if (!validated) { - logVerbose(`intent: ${resIntent.text}`) - throw new Error( - `tool ${tool.spec.name} result does not match intent` - ) - } - } - - trace.fence(toolContent, "markdown") - toolResult.push(toolContent) + `.role("system"); + ictx.def("INTENT", intent); + ictx.def("TOOL_RESULT", toolContent); + } + }, + { + responseType: "text", + systemSafety: true, + model: "intent", + temperature: 0.4, + choices: ["OK", "ERR"], + logprobs: true, + label: `tool ${tool.spec.name} intent validation`, + }, + ); + dbgtt(`validation result %O`, { + text: resIntent.text, + error: resIntent.error, + choices: resIntent.choices, + }); + trace.detailsFenced(`intent validation`, resIntent.text, "markdown"); + const validated = /OK/.test(resIntent.text) && !/ERR/.test(resIntent.text); + if (!validated) { + logVerbose(`intent: ${resIntent.text}`); + throw new Error(`tool ${tool.spec.name} result does not match intent`); + } } - if (options.fallbackTools) { - dbg(`appending fallback tool result to user message`) - appendUserMessage( - messages, - `- ${call.name}(${JSON.stringify(call.arguments || {})}) + trace.fence(toolContent, "markdown"); + toolResult.push(toolContent); + } + + if (options.fallbackTools) { + dbg(`appending fallback tool result to user message`); + appendUserMessage( + messages, + `- ${call.name}(${JSON.stringify(call.arguments || {})}) ${toolResult.join("\n\n")} -` - ) - } else { - messages.push({ - role: "tool", - content: toolResult.join("\n\n"), - tool_call_id: call.id, - } satisfies ChatCompletionToolMessageParam) - } +`, + ); + } else { + messages.push({ + role: "tool", + content: toolResult.join("\n\n"), + tool_call_id: call.id, + } satisfies ChatCompletionToolMessageParam); + } } async function applyRepairs( - messages: ChatCompletionMessageParam[], - schemas: Record, - options: GenerationOptions + messages: ChatCompletionMessageParam[], + schemas: Record, + options: GenerationOptions, ) { - const { - stats, - trace, - responseType, - responseSchema, - maxDataRepairs = MAX_DATA_REPAIRS, - infoCb, - } = options - const lastMessage = messages[messages.length - 1] - if (lastMessage.role !== "assistant" || lastMessage.refusal) { - return false - } - - const content = assistantText(messages, { responseType, responseSchema }) - const fences = extractFenced(content) - validateFencesWithSchema(fences, schemas, { trace }) - dbg(`validating fences with schema`) - const invalids = fences.filter((f) => f?.validation?.schemaError) - - let data: any - if ( - responseType === "json" || - responseType === "json_object" || - responseType === "json_schema" || - (responseSchema && !responseType) - ) { - data = JSONLLMTryParse(content) - if (data === undefined) { - try { - data = JSON.parse(content) - } catch (e) { - invalids.push({ - label: "response must be valid JSON", - content, - validation: { schemaError: errorMessage(e) }, - }) - } - } - } else if (responseType === "yaml") { - data = YAMLTryParse(content) - if (data === undefined) { - try { - data = YAMLParse(content) - } catch (e) { - invalids.push({ - label: "response must be valid YAML", - content, - validation: { schemaError: errorMessage(e) }, - }) - } - } - } - - if (responseSchema) { - const value = data ?? JSONLLMTryParse(content) - const schema = promptParametersSchemaToJSONSchema(responseSchema) - const res = validateJSONWithSchema(value, schema, { trace }) - if (res.schemaError) { - dbg(`response schema validation failed`, res.schemaError) - invalids.push({ - label: "response must match schema", - content, - validation: res, - }) - } + const { + stats, + trace, + responseType, + responseSchema, + maxDataRepairs = MAX_DATA_REPAIRS, + infoCb, + } = options; + const lastMessage = messages[messages.length - 1]; + if (lastMessage.role !== "assistant" || lastMessage.refusal) { + return false; + } + + const content = assistantText(messages, { responseType, responseSchema }); + const fences = extractFenced(content); + validateFencesWithSchema(fences, schemas, { trace }); + dbg(`validating fences with schema`); + const invalids = fences.filter((f) => f?.validation?.schemaError); + + let data: any; + if ( + responseType === "json" || + responseType === "json_object" || + responseType === "json_schema" || + (responseSchema && !responseType) + ) { + data = JSONLLMTryParse(content); + if (data === undefined) { + try { + data = JSON.parse(content); + } catch (e) { + invalids.push({ + label: "response must be valid JSON", + content, + validation: { schemaError: errorMessage(e) }, + }); + } } - - // nothing to repair - if (!invalids.length) { - dbg(`no invalid fences found, skipping repairs`) - return false + } else if (responseType === "yaml") { + data = YAMLTryParse(content); + if (data === undefined) { + try { + data = YAMLParse(content); + } catch (e) { + invalids.push({ + label: "response must be valid YAML", + content, + validation: { schemaError: errorMessage(e) }, + }); + } } - // too many attempts - if (stats.repairs >= maxDataRepairs) { - dbg(`maximum number of repairs reached`) - trace.error(`maximum number of repairs (${maxDataRepairs}) reached`) - return false + } + + if (responseSchema) { + const value = data ?? JSONLLMTryParse(content); + const schema = promptParametersSchemaToJSONSchema(responseSchema); + const res = validateJSONWithSchema(value, schema, { trace }); + if (res.schemaError) { + dbg(`response schema validation failed`, res.schemaError); + invalids.push({ + label: "response must match schema", + content, + validation: res, + }); } - - dbg(`appending repair instructions to messages`) - infoCb?.({ text: "appending data repair instructions" }) - // let's get to work - trace.startDetails("🔧 data repairs") - const repair = invalids - .map((f) => - toStringList( - f.label, - f.args?.schema ? `schema: ${f.args?.schema || ""}` : undefined, - f.validation.schemaError - ? `error: ${f.validation.schemaError}` - : undefined - ) - ) - .join("\n\n") - const repairMsg = `Repair the data format issues listed in section below. + } + + // nothing to repair + if (!invalids.length) { + dbg(`no invalid fences found, skipping repairs`); + return false; + } + // too many attempts + if (stats.repairs >= maxDataRepairs) { + dbg(`maximum number of repairs reached`); + trace.error(`maximum number of repairs (${maxDataRepairs}) reached`); + return false; + } + + dbg(`appending repair instructions to messages`); + infoCb?.({ text: "appending data repair instructions" }); + // let's get to work + trace.startDetails("🔧 data repairs"); + const repair = invalids + .map((f) => + toStringList( + f.label, + f.args?.schema ? `schema: ${f.args?.schema || ""}` : undefined, + f.validation.schemaError ? `error: ${f.validation.schemaError}` : undefined, + ), + ) + .join("\n\n"); + const repairMsg = `Repair the data format issues listed in section below. ${repair} -` - logVerbose(repair) - trace.fence(repairMsg, "markdown") - messages.push({ - role: "user", - content: [ - { - type: "text", - text: repairMsg, - }, - ], - }) - trace.endDetails() - stats.repairs++ - return true +`; + logVerbose(repair); + trace.fence(repairMsg, "markdown"); + messages.push({ + role: "user", + content: [ + { + type: "text", + text: repairMsg, + }, + ], + }); + trace.endDetails(); + stats.repairs++; + return true; } async function structurifyChatSession( - timer: () => number, - messages: ChatCompletionMessageParam[], - schemas: Record, - fileOutputs: FileOutput[], - outputProcessors: PromptOutputProcessorHandler[], - fileMerges: FileMergeHandler[], - logprobs: Logprob[], - options: GenerationOptions, - others?: { - resp?: ChatCompletionResponse - err?: any - } + timer: () => number, + messages: ChatCompletionMessageParam[], + schemas: Record, + fileOutputs: FileOutput[], + outputProcessors: PromptOutputProcessorHandler[], + fileMerges: FileMergeHandler[], + logprobs: Logprob[], + options: GenerationOptions, + others?: { + resp?: ChatCompletionResponse; + err?: any; + }, ): Promise { - const { trace, responseType, responseSchema } = options - const { resp, err } = others || {} - const text = assistantText(messages, { responseType, responseSchema }) - const annotations = parseAnnotations(text) - const finishReason = isCancelError(err) - ? "cancel" - : (resp?.finishReason ?? "fail") - const error = serializeError(err) - - const fences = extractFenced(text) - let json: any - if ( - responseType === "json" || - responseType === "json_object" || - responseType === "json_schema" || - (responseSchema && !responseType) - ) { - json = JSONLLMTryParse(text) - } else if (responseType === "yaml") { - json = YAMLTryParse(text) - } else { - json = isJSONObjectOrArray(text) - ? JSONLLMTryParse(text) - : findFirstDataFence(fences) - } - - if (responseSchema) { - dbg(`validating response schema`) - const schema = promptParametersSchemaToJSONSchema(responseSchema) - const res = validateJSONWithSchema(json, schema, { - trace, - }) - if (res.schemaError) { - trace?.warn( - `response schema validation failed, ${errorMessage(res.schemaError)}` - ) - trace?.fence(schema, "json") - } - } - - const frames: DataFrame[] = [] - - // validate schemas in fences - if (fences?.length) { - dbg(`validating schemas in fences`) - frames.push(...validateFencesWithSchema(fences, schemas, { trace })) - } - - dbg(`computing perplexity and uncertainty`) - const perplexity = computePerplexity(logprobs) - const uncertainty = computeStructuralUncertainty(logprobs) - const revlogprobs = logprobs?.slice(0)?.reverse() - const choices = arrayify(options?.choices) - .filter((choice) => typeof choice === "string") - .map( - (token) => - revlogprobs?.find((lp) => lp.token === token) ?? - ({ token, logprob: NaN } satisfies Logprob) - ) - for (const choice of choices?.filter((c) => !isNaN(c.logprob))) { - logVerbose(`choice: ${choice.token}, ${renderLogprob(choice.logprob)}`) + const { trace, responseType, responseSchema } = options; + const { resp, err } = others || {}; + const text = assistantText(messages, { responseType, responseSchema }); + const annotations = parseAnnotations(text); + const finishReason = isCancelError(err) ? "cancel" : (resp?.finishReason ?? "fail"); + const error = serializeError(err); + + const fences = extractFenced(text); + let json: any; + if ( + responseType === "json" || + responseType === "json_object" || + responseType === "json_schema" || + (responseSchema && !responseType) + ) { + json = JSONLLMTryParse(text); + } else if (responseType === "yaml") { + json = YAMLTryParse(text); + } else { + json = isJSONObjectOrArray(text) ? JSONLLMTryParse(text) : findFirstDataFence(fences); + } + + if (responseSchema) { + dbg(`validating response schema`); + const schema = promptParametersSchemaToJSONSchema(responseSchema); + const res = validateJSONWithSchema(json, schema, { + trace, + }); + if (res.schemaError) { + trace?.warn(`response schema validation failed, ${errorMessage(res.schemaError)}`); + trace?.fence(schema, "json"); } - if (logprobs?.length) { - logVerbose( - toStringList( - `${logprobs.length} tokens`, - !isNaN(perplexity) - ? `perplexity: ${renderWithPrecision(perplexity, 3)}` - : undefined, - !isNaN(uncertainty) - ? `uncertainty: ${renderWithPrecision(uncertainty, 3)}` - : undefined - ) - ) - try { - trace.startDetails("📊 logprobs") - trace.itemValue("perplexity", perplexity) - trace.itemValue("uncertainty", uncertainty) - if (choices?.length) { - trace.item("choices (0%:red, 100%: blue)") - trace.appendContent("\n\n") - trace.appendContent( - choices.map((lp) => logprobToMarkdown(lp)).join("\n") - ) - trace.appendContent("\n\n") - } - trace.item("logprobs (0%:red, 100%: blue)") - trace.appendContent("\n\n") - trace.appendContent( - logprobs.map((lp) => logprobToMarkdown(lp)).join("\n") - ) - trace.appendContent("\n\n") - if (!isNaN(logprobs[0].entropy)) { - trace.item("entropy (0:red, 1: blue)") - trace.appendContent("\n\n") - trace.appendContent( - logprobs - .map((lp) => logprobToMarkdown(lp, { entropy: true })) - .join("\n") - ) - trace.appendContent("\n\n") - } - if (logprobs[0]?.topLogprobs?.length) { - trace.item("top_logprobs") - trace.appendContent("\n\n") - trace.appendContent( - logprobs.map((lp) => topLogprobsToMarkdown(lp)).join("\n") - ) - trace.appendContent("\n\n") - } - } finally { - trace.endDetails() - } + } + + const frames: DataFrame[] = []; + + // validate schemas in fences + if (fences?.length) { + dbg(`validating schemas in fences`); + frames.push(...validateFencesWithSchema(fences, schemas, { trace })); + } + + dbg(`computing perplexity and uncertainty`); + const perplexity = computePerplexity(logprobs); + const uncertainty = computeStructuralUncertainty(logprobs); + const revlogprobs = logprobs?.slice(0)?.reverse(); + const choices = arrayify(options?.choices) + .filter((choice) => typeof choice === "string") + .map( + (token) => + revlogprobs?.find((lp) => lp.token === token) ?? + ({ token, logprob: NaN } satisfies Logprob), + ); + for (const choice of choices?.filter((c) => !isNaN(c.logprob))) { + logVerbose(`choice: ${choice.token}, ${renderLogprob(choice.logprob)}`); + } + if (logprobs?.length) { + logVerbose( + toStringList( + `${logprobs.length} tokens`, + !isNaN(perplexity) ? `perplexity: ${renderWithPrecision(perplexity, 3)}` : undefined, + !isNaN(uncertainty) ? `uncertainty: ${renderWithPrecision(uncertainty, 3)}` : undefined, + ), + ); + try { + trace.startDetails("📊 logprobs"); + trace.itemValue("perplexity", perplexity); + trace.itemValue("uncertainty", uncertainty); + if (choices?.length) { + trace.item("choices (0%:red, 100%: blue)"); + trace.appendContent("\n\n"); + trace.appendContent(choices.map((lp) => logprobToMarkdown(lp)).join("\n")); + trace.appendContent("\n\n"); + } + trace.item("logprobs (0%:red, 100%: blue)"); + trace.appendContent("\n\n"); + trace.appendContent(logprobs.map((lp) => logprobToMarkdown(lp)).join("\n")); + trace.appendContent("\n\n"); + if (!isNaN(logprobs[0].entropy)) { + trace.item("entropy (0:red, 1: blue)"); + trace.appendContent("\n\n"); + trace.appendContent( + logprobs.map((lp) => logprobToMarkdown(lp, { entropy: true })).join("\n"), + ); + trace.appendContent("\n\n"); + } + if (logprobs[0]?.topLogprobs?.length) { + trace.item("top_logprobs"); + trace.appendContent("\n\n"); + trace.appendContent(logprobs.map((lp) => topLogprobsToMarkdown(lp)).join("\n")); + trace.appendContent("\n\n"); + } + } finally { + trace.endDetails(); } - - const stats = options?.stats - const acc = stats?.accumulatedUsage() - const duration = timer() - const usage: RunPromptUsage = deleteUndefinedValues({ - cost: stats.cost(), - duration: duration, - total: acc?.total_tokens, - prompt: acc?.prompt_tokens, - completion: acc?.completion_tokens, - }) - const reasoning = lastAssistantReasoning(messages) - const res: RunPromptResult = deleteUndefinedValues({ - model: resp?.model, - messages, - text, - reasoning, - annotations, - finishReason, - fences, - frames, - json, - error, - schemas, - choices, - logprobs, - perplexity, - uncertainty, - usage, - } satisfies RunPromptResult) - await computeFileEdits(res, { - trace, - schemas, - fileOutputs, - fileMerges, - outputProcessors, - }) - return res + } + + const stats = options?.stats; + const acc = stats?.accumulatedUsage(); + const duration = timer(); + const usage: RunPromptUsage = deleteUndefinedValues({ + cost: stats.cost(), + duration: duration, + total: acc?.total_tokens, + prompt: acc?.prompt_tokens, + completion: acc?.completion_tokens, + }); + const reasoning = lastAssistantReasoning(messages); + const res: RunPromptResult = deleteUndefinedValues({ + model: resp?.model, + messages, + text, + reasoning, + annotations, + finishReason, + fences, + frames, + json, + error, + schemas, + choices, + logprobs, + perplexity, + uncertainty, + usage, + } satisfies RunPromptResult); + await computeFileEdits(res, { + trace, + schemas, + fileOutputs, + fileMerges, + outputProcessors, + }); + return res; } -function parseAssistantMessage( - resp: ChatCompletionResponse -): ChatCompletionAssistantMessageParam { - const { signature } = resp - const { content, reasoning } = splitThink(resp.text) - const reasoning_content = resp.reasoning || reasoning - if (!content && !reasoning_content) { - return undefined - } - return deleteUndefinedValues({ - role: "assistant", - content, - reasoning_content, - signature, - } satisfies ChatCompletionAssistantMessageParam) +function parseAssistantMessage(resp: ChatCompletionResponse): ChatCompletionAssistantMessageParam { + const { signature } = resp; + const { content, reasoning } = splitThink(resp.text); + const reasoning_content = resp.reasoning || reasoning; + if (!content && !reasoning_content) { + return undefined; + } + return deleteUndefinedValues({ + role: "assistant", + content, + reasoning_content, + signature, + } satisfies ChatCompletionAssistantMessageParam); } async function processChatMessage( - model: string, - timer: () => number, - req: CreateChatCompletionRequest, - resp: ChatCompletionResponse, - messages: ChatCompletionMessageParam[], - tools: ToolCallback[], - chatParticipants: ChatParticipant[], - schemas: Record, - fileOutputs: FileOutput[], - outputProcessors: PromptOutputProcessorHandler[], - fileMerges: FileMergeHandler[], - cacheImage: (url: string) => Promise, - options: GenerationOptions + model: string, + timer: () => number, + req: CreateChatCompletionRequest, + resp: ChatCompletionResponse, + messages: ChatCompletionMessageParam[], + tools: ToolCallback[], + chatParticipants: ChatParticipant[], + schemas: Record, + fileOutputs: FileOutput[], + outputProcessors: PromptOutputProcessorHandler[], + fileMerges: FileMergeHandler[], + cacheImage: (url: string) => Promise, + options: GenerationOptions, ): Promise { - const { - stats, - maxToolCalls = MAX_TOOL_CALLS, - trace, - cancellationToken, - } = options - - stats.addRequestUsage(model, req, resp) - const assisantMessage = parseAssistantMessage(resp) - if (assisantMessage) { - messages.push(assisantMessage) - } - - const assistantContent = assisantMessage?.content as string - if (options.fallbackTools && assistantContent && tools.length) { - dbg(`extracting tool calls from assistant content (fallback)`) - resp.toolCalls = [] - // parse tool call - const toolCallFences = extractFenced(assistantContent).filter((f) => - /^tool_calls?$/.test(f.language) - ) - for (const toolCallFence of toolCallFences) { - for (const toolCall of toolCallFence.content.split("\n")) { - const { name, args } = - /^(?[\w\d]+):\s*(?\{.*\})\s*$/i.exec(toolCall) - ?.groups || {} - if (name) { - resp.toolCalls.push({ - id: undefined, - name, - arguments: args, - } satisfies ChatCompletionToolCall) - } - } + const { stats, maxToolCalls = MAX_TOOL_CALLS, trace, cancellationToken } = options; + + stats.addRequestUsage(model, req, resp); + const assisantMessage = parseAssistantMessage(resp); + if (assisantMessage) { + messages.push(assisantMessage); + } + + const assistantContent = assisantMessage?.content as string; + if (options.fallbackTools && assistantContent && tools.length) { + dbg(`extracting tool calls from assistant content (fallback)`); + resp.toolCalls = []; + // parse tool call + const toolCallFences = extractFenced(assistantContent).filter((f) => + /^tool_calls?$/.test(f.language), + ); + for (const toolCallFence of toolCallFences) { + for (const toolCall of toolCallFence.content.split("\n")) { + const { name, args } = + /^(?[\w\d]+):\s*(?\{.*\})\s*$/i.exec(toolCall)?.groups || {}; + if (name) { + resp.toolCalls.push({ + id: undefined, + name, + arguments: args, + } satisfies ChatCompletionToolCall); } + } } - - // execute tools as needed - if (resp.toolCalls?.length) { - dbg(`executing tool calls`) - await runToolCalls(resp, messages, tools, options) - stats.toolCalls += resp.toolCalls.length - if (stats.toolCalls > maxToolCalls) { - throw new Error( - `maximum number of tool calls ${maxToolCalls} reached` - ) - } - return undefined // keep working - } - // apply repairs if necessary - if (await applyRepairs(messages, schemas, options)) { - return undefined // keep working + } + + // execute tools as needed + if (resp.toolCalls?.length) { + dbg(`executing tool calls`); + await runToolCalls(resp, messages, tools, options); + stats.toolCalls += resp.toolCalls.length; + if (stats.toolCalls > maxToolCalls) { + throw new Error(`maximum number of tool calls ${maxToolCalls} reached`); } + return undefined; // keep working + } + // apply repairs if necessary + if (await applyRepairs(messages, schemas, options)) { + return undefined; // keep working + } + + let err: any; + if (chatParticipants?.length) { + dbg(`processing chat participants`); + let needsNewTurn = false; + for (const participant of chatParticipants) { + const { generator, options: participantOptions } = participant || {}; + const { label } = participantOptions || {}; + const participantTrace = trace.startTraceDetails(`🙋 participant ${label || ""}`); + try { + const ctx = createChatTurnGenerationContext(options, participantTrace, cancellationToken); + const { messages: newMessages } = + (await generator( + ctx, + structuredClone(messages) satisfies ChatMessage[], + assistantContent, + )) || {}; + const node = ctx.node; + checkCancelled(cancellationToken); + + // update modified messages + if (newMessages?.length) { + dbg(`updating messages with new participant messages`); + messages.splice(0, messages.length, ...newMessages); + needsNewTurn = true; + participantTrace.details( + `💬 new messages`, + await renderMessagesToMarkdown(messages, { + textLang: "markdown", + user: true, + assistant: true, + cacheImage, + }), + ); + } - let err: any - if (chatParticipants?.length) { - dbg(`processing chat participants`) - let needsNewTurn = false - for (const participant of chatParticipants) { - const { generator, options: participantOptions } = participant || {} - const { label } = participantOptions || {} - const participantTrace = trace.startTraceDetails( - `🙋 participant ${label || ""}` - ) - try { - const ctx = createChatTurnGenerationContext( - options, - participantTrace, - cancellationToken - ) - const { messages: newMessages } = - (await generator( - ctx, - structuredClone(messages) satisfies ChatMessage[], - assistantContent - )) || {} - const node = ctx.node - checkCancelled(cancellationToken) - - // update modified messages - if (newMessages?.length) { - dbg(`updating messages with new participant messages`) - messages.splice(0, messages.length, ...newMessages) - needsNewTurn = true - participantTrace.details( - `💬 new messages`, - await renderMessagesToMarkdown(messages, { - textLang: "markdown", - user: true, - assistant: true, - cacheImage, - }) - ) - } - - dbg(`expanding participant template`) - // expand template - const { errors, messages: participantMessages } = - await renderPromptNode(options.model, node, { - flexTokens: options.flexTokens, - fenceFormat: options.fenceFormat, - trace: participantTrace, - }) - if (participantMessages?.length) { - if ( - participantMessages.some( - ({ role }) => role === "system" - ) - ) { - throw new Error( - "system messages not supported for chat participants" - ) - } - participantTrace.details( - `💬 added messages (${participantMessages.length})`, - await renderMessagesToMarkdown(participantMessages, { - textLang: "text", - user: true, - assistant: true, - cacheImage, - }), - { expanded: true } - ) - messages.push(...participantMessages) - needsNewTurn = true - } else { - participantTrace.item("no message") - } - if (errors?.length) { - dbg(`participant processing encountered errors`) - err = errors[0] - for (const error of errors) { - participantTrace.error(undefined, error) - } - needsNewTurn = false - break - } - } catch (e) { - err = e - logError(e) - participantTrace.error(`participant error`, e) - needsNewTurn = false - break - } finally { - participantTrace.endDetails() - } + dbg(`expanding participant template`); + // expand template + const { errors, messages: participantMessages } = await renderPromptNode( + options.model, + node, + { + flexTokens: options.flexTokens, + fenceFormat: options.fenceFormat, + trace: participantTrace, + }, + ); + if (participantMessages?.length) { + if (participantMessages.some(({ role }) => role === "system")) { + throw new Error("system messages not supported for chat participants"); + } + participantTrace.details( + `💬 added messages (${participantMessages.length})`, + await renderMessagesToMarkdown(participantMessages, { + textLang: "text", + user: true, + assistant: true, + cacheImage, + }), + { expanded: true }, + ); + messages.push(...participantMessages); + needsNewTurn = true; + } else { + participantTrace.item("no message"); } - if (needsNewTurn) { - dbg(`participant processing complete, needs new turn`) - return undefined + if (errors?.length) { + dbg(`participant processing encountered errors`); + err = errors[0]; + for (const error of errors) { + participantTrace.error(undefined, error); + } + needsNewTurn = false; + break; } + } catch (e) { + err = e; + logError(e); + participantTrace.error(`participant error`, e); + needsNewTurn = false; + break; + } finally { + participantTrace.endDetails(); + } } - - const logprobs = resp.logprobs?.map(serializeLogProb) - return structurifyChatSession( - timer, - messages, - schemas, - fileOutputs, - outputProcessors, - fileMerges, - logprobs, - options, - { - resp, - err, - } - ) + if (needsNewTurn) { + dbg(`participant processing complete, needs new turn`); + return undefined; + } + } + + const logprobs = resp.logprobs?.map(serializeLogProb); + return structurifyChatSession( + timer, + messages, + schemas, + fileOutputs, + outputProcessors, + fileMerges, + logprobs, + options, + { + resp, + err, + }, + ); } /** @@ -1063,88 +976,58 @@ async function processChatMessage( * - `embeddingsModel`: Resolved from `runOptions` if defined or falls back to `options`. */ export function mergeGenerationOptions( - options: GenerationOptions, - runOptions: ModelOptions & EmbeddingsModelOptions + options: GenerationOptions, + runOptions: ModelOptions & EmbeddingsModelOptions, ): GenerationOptions { - const res = { - ...options, - ...(runOptions || {}), - model: - runOptions?.model ?? - options?.model ?? - runtimeHost.modelAliases.large.model, - temperature: - runOptions?.temperature ?? - runtimeHost.modelAliases.large.temperature, - fallbackTools: - runOptions?.fallbackTools ?? - runtimeHost.modelAliases.large.fallbackTools, - reasoningEffort: - runOptions?.reasoningEffort ?? - runtimeHost.modelAliases.large.reasoningEffort, - embeddingsModel: - runOptions?.embeddingsModel ?? options?.embeddingsModel, - } satisfies GenerationOptions - return res + const res = { + ...options, + ...(runOptions || {}), + model: runOptions?.model ?? options?.model ?? runtimeHost.modelAliases.large.model, + temperature: runOptions?.temperature ?? runtimeHost.modelAliases.large.temperature, + fallbackTools: runOptions?.fallbackTools ?? runtimeHost.modelAliases.large.fallbackTools, + reasoningEffort: runOptions?.reasoningEffort ?? runtimeHost.modelAliases.large.reasoningEffort, + embeddingsModel: runOptions?.embeddingsModel ?? options?.embeddingsModel, + } satisfies GenerationOptions; + return res; } async function choicesToLogitBias( - trace: MarkdownTrace, - model: string, - choices: ElementOrArray< - string | { token: string | number; weight?: number } - > + trace: MarkdownTrace, + model: string, + choices: ElementOrArray, ): Promise> { - choices = arrayify(choices) - if (!choices?.length) { - return undefined - } - dbg(`computing logit bias for choices`) - const { encode } = - (await resolveTokenEncoder(model, { - disableFallback: true, - })) || {} - if ( - !encode && - choices.some( - (c) => typeof c === "string" || typeof c.token === "string" - ) - ) { - logWarn( - `unable to compute logit bias, no token encoder found for ${model}` - ) - logVerbose(YAMLStringify({ choices })) - trace.warn( - `unable to compute logit bias, no token encoder found for ${model}` - ) - return undefined - } - const logit_bias: Record = Object.fromEntries( - choices.map((c) => { - const { token, weight } = typeof c === "string" ? { token: c } : c - const encoded = typeof token === "number" ? [token] : encode(token) - if (encoded.length !== 1) { - logWarn( - `choice ${c} tokenizes to ${encoded.join(", ")} (expected one token)` - ) - trace.warn( - `choice ${c} tokenizes to ${encoded.join(", ")} (expected one token)` - ) - } - return [encoded[0], isNaN(weight) ? CHOICE_LOGIT_BIAS : weight] as [ - number, - number, - ] - }) - ) - trace.itemValue( - "choices", - choices - .map((c) => (typeof c === "string" ? c : JSON.stringify(c))) - .join(", ") - ) - trace.itemValue("logit bias", JSON.stringify(logit_bias)) - return logit_bias + choices = arrayify(choices); + if (!choices?.length) { + return undefined; + } + dbg(`computing logit bias for choices`); + const { encode } = + (await resolveTokenEncoder(model, { + disableFallback: true, + })) || {}; + if (!encode && choices.some((c) => typeof c === "string" || typeof c.token === "string")) { + logWarn(`unable to compute logit bias, no token encoder found for ${model}`); + logVerbose(YAMLStringify({ choices })); + trace.warn(`unable to compute logit bias, no token encoder found for ${model}`); + return undefined; + } + const logit_bias: Record = Object.fromEntries( + choices.map((c) => { + const { token, weight } = typeof c === "string" ? { token: c } : c; + const encoded = typeof token === "number" ? [token] : encode(token); + if (encoded.length !== 1) { + logWarn(`choice ${c} tokenizes to ${encoded.join(", ")} (expected one token)`); + trace.warn(`choice ${c} tokenizes to ${encoded.join(", ")} (expected one token)`); + } + return [encoded[0], isNaN(weight) ? CHOICE_LOGIT_BIAS : weight] as [number, number]; + }), + ); + trace.itemValue( + "choices", + choices.map((c) => (typeof c === "string" ? c : JSON.stringify(c))).join(", "), + ); + trace.itemValue("logit bias", JSON.stringify(logit_bias)); + return logit_bias; } /** @@ -1168,357 +1051,322 @@ async function choicesToLogitBias( * @returns - The final structured result of the chat session. */ export async function executeChatSession( - connectionToken: LanguageModelConfiguration, - cancellationToken: CancellationToken, - messages: ChatCompletionMessageParam[], - toolDefinitions: ToolCallback[], - schemas: Record, - fileOutputs: FileOutput[], - outputProcessors: PromptOutputProcessorHandler[], - fileMerges: FileMergeHandler[], - prediction: PromptPrediction, - completer: ChatCompletionHandler, - chatParticipants: ChatParticipant[], - disposables: AsyncDisposable[], - genOptions: GenerationOptions + connectionToken: LanguageModelConfiguration, + cancellationToken: CancellationToken, + messages: ChatCompletionMessageParam[], + toolDefinitions: ToolCallback[], + schemas: Record, + fileOutputs: FileOutput[], + outputProcessors: PromptOutputProcessorHandler[], + fileMerges: FileMergeHandler[], + prediction: PromptPrediction, + completer: ChatCompletionHandler, + chatParticipants: ChatParticipant[], + disposables: AsyncDisposable[], + genOptions: GenerationOptions, ): Promise { - const { - trace, - model, - temperature, - reasoningEffort, - topP, - toolChoice, - maxTokens, - seed, - responseType, - responseSchema, - stats, - fallbackTools, - choices, - topLogprobs, - cache, - inner, - metadata, - partialCb, - } = genOptions - assert(!!model, "model is required") - - const { token, source, ...cfgNoToken } = connectionToken - const top_logprobs = genOptions.topLogprobs > 0 ? topLogprobs : undefined - const logprobs = genOptions.logprobs || top_logprobs > 0 ? true : undefined - traceLanguageModelConnection(trace, genOptions, connectionToken) - dbg( - `chat ${model}`, - deleteUndefinedValues({ + const { + trace, + model, + temperature, + reasoningEffort, + topP, + toolChoice, + maxTokens, + seed, + responseType, + responseSchema, + stats, + fallbackTools, + choices, + topLogprobs, + cache, + inner, + metadata, + partialCb, + } = genOptions; + assert(!!model, "model is required"); + + const { token, source, ...cfgNoToken } = connectionToken; + const top_logprobs = genOptions.topLogprobs > 0 ? topLogprobs : undefined; + const logprobs = genOptions.logprobs || top_logprobs > 0 ? true : undefined; + traceLanguageModelConnection(trace, genOptions, connectionToken); + dbg( + `chat ${model}`, + deleteUndefinedValues({ + temperature, + choices, + fallbackTools, + logprobs, + top_logprobs, + }), + ); + const tools: ChatCompletionTool[] = toolDefinitions?.length + ? toolDefinitions.map( + (f) => + { + type: "function", + function: { + name: f.spec.name, + description: ellipse(f.spec.description, MAX_TOOL_DESCRIPTION_LENGTH), + parameters: f.spec.parameters as any, + }, + }, + ) + : undefined; + const cacheStore = !!cache + ? getChatCompletionCache(typeof cache === "string" ? cache : "chat") + : undefined; + const chatTrace = trace.startTraceDetails(`💬 chat`, { expanded: true }); + const store = !!metadata ? true : undefined; + const timer = measure("chat"); + const cacheImage = async (url: string) => + await fileCacheImage(url, { + trace, + cancellationToken, + dir: chatTrace.options?.dir, + }); + try { + if (toolDefinitions?.length) { + chatTrace.detailsFenced(`🛠️ tools`, tools, "yaml"); + const toolNames = toolDefinitions.map(({ spec }) => spec.name); + const duplicates = uniq(toolNames).filter( + (name, index) => toolNames.lastIndexOf(name) !== index, + ); + if (duplicates.length) { + chatTrace.error(`duplicate tools: ${duplicates.join(", ")}`); + return { + error: serializeError(`duplicate tools: ${duplicates.join(", ")}`), + finishReason: "fail", + messages, + text: "", + }; + } + } + while (true) { + stats.turns++; + collapseChatMessages(messages); + dbg(`turn ${stats.turns}`); + if (messages) { + chatTrace.details( + `💬 messages (${messages.length})`, + await renderMessagesToMarkdown(messages, { + textLang: "markdown", + user: true, + assistant: true, + cacheImage, + tools, + }), + { expanded: true }, + ); + } + + // make request + let req: CreateChatCompletionRequest; + let resp: ChatCompletionResponse; + try { + checkCancelled(cancellationToken); + const reqTrace = chatTrace.startTraceDetails(`📤 llm request`); + try { + const logit_bias = await choicesToLogitBias(reqTrace, model, choices); + req = { + model, temperature, - choices, - fallbackTools, + store, + metadata: store ? metadata : undefined, + reasoning_effort: reasoningEffort, + top_p: topP, + tool_choice: + !fallbackTools && tools?.length + ? typeof toolChoice === "object" + ? { + type: "function", + function: { name: toolChoice.name }, + } + : toolChoice + : undefined, + max_tokens: maxTokens, + logit_bias, + seed, + stream: true, logprobs, top_logprobs, - }) - ) - const tools: ChatCompletionTool[] = toolDefinitions?.length - ? toolDefinitions.map( - (f) => - { - type: "function", - function: { - name: f.spec.name, - description: ellipse( - f.spec.description, - MAX_TOOL_DESCRIPTION_LENGTH - ), - parameters: f.spec.parameters as any, + tools: fallbackTools ? undefined : tools, + // https://platform.openai.com/docs/guides/predicted-outputs + prediction: prediction?.content ? prediction : undefined, + response_format: + responseType === "json_object" + ? { type: responseType } + : responseType === "json_schema" + ? { + type: "json_schema", + json_schema: { + name: "result", + schema: toStrictJSONSchema(responseSchema, { noDefaults: true }), + strict: true, }, - } - ) - : undefined - const cacheStore = !!cache - ? getChatCompletionCache(typeof cache === "string" ? cache : "chat") - : undefined - const chatTrace = trace.startTraceDetails(`💬 chat`, { expanded: true }) - const store = !!metadata ? true : undefined - const timer = measure("chat") - const cacheImage = async (url: string) => - await fileCacheImage(url, { - trace, - cancellationToken, - dir: chatTrace.options?.dir, - }) - try { - if (toolDefinitions?.length) { - chatTrace.detailsFenced(`🛠️ tools`, tools, "yaml") - const toolNames = toolDefinitions.map(({ spec }) => spec.name) - const duplicates = uniq(toolNames).filter( - (name, index) => toolNames.lastIndexOf(name) !== index - ) - if (duplicates.length) { - chatTrace.error(`duplicate tools: ${duplicates.join(", ")}`) - return { - error: serializeError( - `duplicate tools: ${duplicates.join(", ")}` - ), - finishReason: "fail", - messages, - text: "", - } + } + : undefined, + messages, + } satisfies CreateChatCompletionRequest; + updateChatFeatures(reqTrace, model, req); + if (!isQuiet) + stderr.write( + await renderMessagesToTerminal(req, { + user: true, + tools, + }), + ); + + const infer = async () => { + logVerbose(`\n`); + const m = measure("chat.completer", `${req.model} -> ${req.messages.length} messages`); + dbg(`infer ${req.model} with ${req.messages.length} messages`); + if (req.response_format) + dbg(`response format: %O`, JSON.stringify(req.response_format, null, 2)); + const cres = await completer(req, connectionToken, genOptions, reqTrace); + const duration = m(); + cres.duration = duration; + return cres; + }; + if (cacheStore) { + dbg(`cache store enabled, checking cache`); + const cachedKey = deleteUndefinedValues({ + modelid: model, + ...req, + responseType, + responseSchema, + ...cfgNoToken, + }) satisfies ChatCompletionRequestCacheKey; + const validator = (value: ChatCompletionResponse) => { + const ok = value?.finishReason === "stop"; + return ok; + }; + const cacheRes = await cacheStore.getOrUpdate(cachedKey, infer, validator); + logVerbose("\n"); + resp = cacheRes.value; + resp.cached = cacheRes.cached; + reqTrace.itemValue("cache", cacheStore.name); + reqTrace.itemValue("cache_key", cacheRes.key); + dbg( + `cache ${resp.cached ? "hit" : "miss"} (${cacheStore.name}/${cacheRes.key.slice(0, 7)})`, + ); + if (resp.cached) { + if (cacheRes.value.text) { + partialCb( + deleteUndefinedValues({ + responseSoFar: cacheRes.value.text, + tokensSoFar: 0, + responseChunk: cacheRes.value.text, + responseTokens: cacheRes.value.logprobs, + reasoningSoFar: cacheRes.value.reasoning, + inner, + }), + ); + } } + } else { + resp = await infer(); + } + } finally { + logVerbose("\n"); + reqTrace.endDetails(); } - while (true) { - stats.turns++ - collapseChatMessages(messages) - dbg(`turn ${stats.turns}`) - if (messages) { - chatTrace.details( - `💬 messages (${messages.length})`, - await renderMessagesToMarkdown(messages, { - textLang: "markdown", - user: true, - assistant: true, - cacheImage, - tools, - }), - { expanded: true } - ) - } - // make request - let req: CreateChatCompletionRequest - let resp: ChatCompletionResponse - try { - checkCancelled(cancellationToken) - const reqTrace = chatTrace.startTraceDetails(`📤 llm request`) - try { - const logit_bias = await choicesToLogitBias( - reqTrace, - model, - choices - ) - req = { - model, - temperature, - store, - metadata: store ? metadata : undefined, - reasoning_effort: reasoningEffort, - top_p: topP, - tool_choice: - !fallbackTools && tools?.length - ? typeof toolChoice === "object" - ? { - type: "function", - function: { name: toolChoice.name }, - } - : toolChoice - : undefined, - max_tokens: maxTokens, - logit_bias, - seed, - stream: true, - logprobs, - top_logprobs, - tools: fallbackTools ? undefined : tools, - // https://platform.openai.com/docs/guides/predicted-outputs - prediction: prediction?.content - ? prediction - : undefined, - response_format: - responseType === "json_object" - ? { type: responseType } - : responseType === "json_schema" - ? { - type: "json_schema", - json_schema: { - name: "result", - schema: toStrictJSONSchema( - responseSchema, - { noDefaults: true } - ), - strict: true, - }, - } - : undefined, - messages, - } satisfies CreateChatCompletionRequest - updateChatFeatures(reqTrace, model, req) - if (!isQuiet) - stderr.write( - await renderMessagesToTerminal(req, { - user: true, - tools, - }) - ) - - const infer = async () => { - logVerbose(`\n`) - const m = measure( - "chat.completer", - `${req.model} -> ${req.messages.length} messages` - ) - dbg( - `infer ${req.model} with ${req.messages.length} messages` - ) - if (req.response_format) - dbg( - `response format: %O`, - JSON.stringify(req.response_format, null, 2) - ) - const cres = await completer( - req, - connectionToken, - genOptions, - reqTrace - ) - const duration = m() - cres.duration = duration - return cres - } - if (cacheStore) { - dbg(`cache store enabled, checking cache`) - const cachedKey = deleteUndefinedValues({ - modelid: model, - ...req, - responseType, - responseSchema, - ...cfgNoToken, - }) satisfies ChatCompletionRequestCacheKey - const validator = (value: ChatCompletionResponse) => { - const ok = value?.finishReason === "stop" - return ok - } - const cacheRes = await cacheStore.getOrUpdate( - cachedKey, - infer, - validator - ) - logVerbose("\n") - resp = cacheRes.value - resp.cached = cacheRes.cached - reqTrace.itemValue("cache", cacheStore.name) - reqTrace.itemValue("cache_key", cacheRes.key) - dbg( - `cache ${resp.cached ? "hit" : "miss"} (${cacheStore.name}/${cacheRes.key.slice(0, 7)})` - ) - if (resp.cached) { - if (cacheRes.value.text) { - partialCb( - deleteUndefinedValues({ - responseSoFar: cacheRes.value.text, - tokensSoFar: 0, - responseChunk: cacheRes.value.text, - responseTokens: cacheRes.value.logprobs, - reasoningSoFar: - cacheRes.value.reasoning, - inner, - }) - ) - } - } - } else { - resp = await infer() - } - } finally { - logVerbose("\n") - reqTrace.endDetails() - } - - const output = await processChatMessage( - model, - timer, - req, - resp, - messages, - toolDefinitions, - chatParticipants, - schemas, - fileOutputs, - outputProcessors, - fileMerges, - cacheImage, - genOptions - ) - if (output) { - return output - } - } catch (err) { - return structurifyChatSession( - timer, - messages, - schemas, - fileOutputs, - outputProcessors, - fileMerges, - [], - genOptions, - { resp, err } - ) - } + const output = await processChatMessage( + model, + timer, + req, + resp, + messages, + toolDefinitions, + chatParticipants, + schemas, + fileOutputs, + outputProcessors, + fileMerges, + cacheImage, + genOptions, + ); + if (output) { + return output; } - } finally { - await dispose(disposables, { trace: chatTrace }) - stats.trace(chatTrace) - chatTrace.endDetails() + } catch (err) { + return structurifyChatSession( + timer, + messages, + schemas, + fileOutputs, + outputProcessors, + fileMerges, + [], + genOptions, + { resp, err }, + ); + } } + } finally { + await dispose(disposables, { trace: chatTrace }); + stats.trace(chatTrace); + chatTrace.endDetails(); + } } function updateChatFeatures( - trace: MarkdownTrace, - modelid: string, - req: CreateChatCompletionRequest + trace: MarkdownTrace, + modelid: string, + req: CreateChatCompletionRequest, ) { - const { provider, model } = parseModelIdentifier(modelid) - const features = providerFeatures(provider) - - if (!isNaN(req.seed) && features?.seed === false) { - dbg(`seed: disabled, not supported by ${provider}`) - trace.itemValue(`seed`, `disabled`) - delete req.seed // some providers do not support seed - } - if (req.logit_bias && features?.logitBias === false) { - dbg(`logit_bias: disabled, not supported by ${provider}`) - trace.itemValue(`logit_bias`, `disabled`) - delete req.logit_bias // some providers do not support logit_bias - } - if (!isNaN(req.top_p) && features?.topP === false) { - dbg(`top_p: disabled, not supported by ${provider}`) - trace.itemValue(`top_p`, `disabled`) - delete req.top_p - } - if (req.tool_choice && features?.toolChoice === false) { - dbg(`tool_choice: disabled, not supported by ${provider}`) - trace.itemValue(`tool_choice`, `disabled`) - delete req.tool_choice - } - if (req.logprobs && features?.logprobs === false) { - dbg(`logprobs: disabled, not supported by ${provider}`) - trace.itemValue(`logprobs`, `disabled`) - delete req.logprobs - delete req.top_logprobs - } - if (req.prediction && features?.prediction === false) { - dbg(`prediction: disabled, not supported by ${provider}`) - delete req.prediction - } - if ( - req.top_logprobs && - (features?.logprobs === false || features?.topLogprobs === false) - ) { - dbg(`top_logprobs: disabled, not supported by ${provider}`) - trace.itemValue(`top_logprobs`, `disabled`) - delete req.top_logprobs - } - if (/^o1/i.test(model) && !req.max_completion_tokens) { - dbg(`max_tokens: renamed to max_completion_tokens`) - req.max_completion_tokens = req.max_tokens - delete req.max_tokens - } - if (req.store && !features?.metadata) { - dbg(`metadata: disabled, not supported by ${provider}`) - delete req.metadata - delete req.store - } - - deleteUndefinedValues(req) + const { provider, model } = parseModelIdentifier(modelid); + const features = providerFeatures(provider); + + if (!isNaN(req.seed) && features?.seed === false) { + dbg(`seed: disabled, not supported by ${provider}`); + trace.itemValue(`seed`, `disabled`); + delete req.seed; // some providers do not support seed + } + if (req.logit_bias && features?.logitBias === false) { + dbg(`logit_bias: disabled, not supported by ${provider}`); + trace.itemValue(`logit_bias`, `disabled`); + delete req.logit_bias; // some providers do not support logit_bias + } + if (!isNaN(req.top_p) && features?.topP === false) { + dbg(`top_p: disabled, not supported by ${provider}`); + trace.itemValue(`top_p`, `disabled`); + delete req.top_p; + } + if (req.tool_choice && features?.toolChoice === false) { + dbg(`tool_choice: disabled, not supported by ${provider}`); + trace.itemValue(`tool_choice`, `disabled`); + delete req.tool_choice; + } + if (req.logprobs && features?.logprobs === false) { + dbg(`logprobs: disabled, not supported by ${provider}`); + trace.itemValue(`logprobs`, `disabled`); + delete req.logprobs; + delete req.top_logprobs; + } + if (req.prediction && features?.prediction === false) { + dbg(`prediction: disabled, not supported by ${provider}`); + delete req.prediction; + } + if (req.top_logprobs && (features?.logprobs === false || features?.topLogprobs === false)) { + dbg(`top_logprobs: disabled, not supported by ${provider}`); + trace.itemValue(`top_logprobs`, `disabled`); + delete req.top_logprobs; + } + if (/^o1/i.test(model) && !req.max_completion_tokens) { + dbg(`max_tokens: renamed to max_completion_tokens`); + req.max_completion_tokens = req.max_tokens; + delete req.max_tokens; + } + if (req.store && !features?.metadata) { + dbg(`metadata: disabled, not supported by ${provider}`); + delete req.metadata; + delete req.store; + } + + deleteUndefinedValues(req); } /** @@ -1532,30 +1380,28 @@ function updateChatFeatures( * Outputs in Markdown format are further prettified for improved readability in the logs and appended as escaped HTML content. */ export function tracePromptResult( - trace: MarkdownTrace, - resp: { text?: string; reasoning?: string } + trace: MarkdownTrace, + resp: { text?: string; reasoning?: string }, ) { - const { text, reasoning } = resp || {} - - if (reasoning) { - trace.detailsFenced(`🤔 reasoning`, reasoning, "markdown") - } - // try to sniff the output type - if (text) { - const language = JSON5TryParse(text) - ? "json" - : XMLTryParse(text) - ? "xml" - : /^(-|\*|#+|```)\s/im.test(text) - ? "markdown" - : "text" - trace.detailsFenced(`🔠 output`, text, language, { expanded: true }) - if (language === "markdown") { - trace.appendContent( - "\n\n" + HTMLEscape(prettifyMarkdown(text)) + "\n\n" - ) - } + const { text, reasoning } = resp || {}; + + if (reasoning) { + trace.detailsFenced(`🤔 reasoning`, reasoning, "markdown"); + } + // try to sniff the output type + if (text) { + const language = JSON5TryParse(text) + ? "json" + : XMLTryParse(text) + ? "xml" + : /^(-|\*|#+|```)\s/im.test(text) + ? "markdown" + : "text"; + trace.detailsFenced(`🔠 output`, text, language, { expanded: true }); + if (language === "markdown") { + trace.appendContent("\n\n" + HTMLEscape(prettifyMarkdown(text)) + "\n\n"); } + } } /** @@ -1574,44 +1420,42 @@ export function tracePromptResult( * - If the last message content is a string, it is converted to an array when adding an image. */ export function appendUserMessage( - messages: ChatCompletionMessageParam[], - content: string | PromptImage, - options?: ContextExpansionOptions + messages: ChatCompletionMessageParam[], + content: string | PromptImage, + options?: ContextExpansionOptions, ) { - if (!content) { - return + if (!content) { + return; + } + const { cacheControl } = options || {}; + let last = messages.at(-1) as ChatCompletionUserMessageParam; + if (last?.role !== "user" || options?.cacheControl !== last?.cacheControl) { + last = { + role: "user", + content: "", + } satisfies ChatCompletionUserMessageParam; + if (cacheControl) { + last.cacheControl = cacheControl; } - const { cacheControl } = options || {} - let last = messages.at(-1) as ChatCompletionUserMessageParam - if (last?.role !== "user" || options?.cacheControl !== last?.cacheControl) { - last = { - role: "user", - content: "", - } satisfies ChatCompletionUserMessageParam - if (cacheControl) { - last.cacheControl = cacheControl - } - messages.push(last) - } - if (typeof content === "string") { - if (last.content) { - if (typeof last.content === "string") { - last.content += "\n" + content - } else { - last.content.push({ type: "text", text: content }) - } - } else { - last.content = content - } + messages.push(last); + } + if (typeof content === "string") { + if (last.content) { + if (typeof last.content === "string") { + last.content += "\n" + content; + } else { + last.content.push({ type: "text", text: content }); + } } else { - // add image - if (typeof last.content === "string") { - last.content = last.content - ? [{ type: "text", text: last.content }] - : [] - } - last.content.push(toChatCompletionImage(content)) + last.content = content; + } + } else { + // add image + if (typeof last.content === "string") { + last.content = last.content ? [{ type: "text", text: last.content }] : []; } + last.content.push(toChatCompletionImage(content)); + } } /** @@ -1628,37 +1472,34 @@ export function appendUserMessage( * @param options - Optional context settings for the message, such as cache control. */ export function appendAssistantMessage( - messages: ChatCompletionMessageParam[], - content: string, - options?: ContextExpansionOptions + messages: ChatCompletionMessageParam[], + content: string, + options?: ContextExpansionOptions, ) { - if (!content) { - return - } - const { cacheControl } = options || {} - let last = messages.at(-1) as ChatCompletionAssistantMessageParam - if ( - last?.role !== "assistant" || - options?.cacheControl !== last?.cacheControl - ) { - last = { - role: "assistant", - content: "", - } satisfies ChatCompletionAssistantMessageParam - if (cacheControl) { - last.cacheControl = cacheControl - } - messages.push(last) + if (!content) { + return; + } + const { cacheControl } = options || {}; + let last = messages.at(-1) as ChatCompletionAssistantMessageParam; + if (last?.role !== "assistant" || options?.cacheControl !== last?.cacheControl) { + last = { + role: "assistant", + content: "", + } satisfies ChatCompletionAssistantMessageParam; + if (cacheControl) { + last.cacheControl = cacheControl; } - if (last.content) { - if (typeof last.content === "string") { - last.content += "\n" + content - } else { - last.content.push({ type: "text", text: content }) - } + messages.push(last); + } + if (last.content) { + if (typeof last.content === "string") { + last.content += "\n" + content; } else { - last.content = content + last.content.push({ type: "text", text: content }); } + } else { + last.content = content; + } } /** @@ -1678,38 +1519,35 @@ export function appendAssistantMessage( * If the system message content is empty, the new content is directly assigned. */ export function appendSystemMessage( - messages: ChatCompletionMessageParam[], - content: string, - options?: ContextExpansionOptions + messages: ChatCompletionMessageParam[], + content: string, + options?: ContextExpansionOptions, ) { - if (!content) { - return - } - const { cacheControl } = options || {} - - let last = messages[0] as ChatCompletionSystemMessageParam - if ( - last?.role !== "system" || - options?.cacheControl !== last?.cacheControl - ) { - last = { - role: "system", - content: "", - } as ChatCompletionSystemMessageParam - if (cacheControl) { - last.cacheControl = cacheControl - } - messages.unshift(last) + if (!content) { + return; + } + const { cacheControl } = options || {}; + + let last = messages[0] as ChatCompletionSystemMessageParam; + if (last?.role !== "system" || options?.cacheControl !== last?.cacheControl) { + last = { + role: "system", + content: "", + } as ChatCompletionSystemMessageParam; + if (cacheControl) { + last.cacheControl = cacheControl; } - if (last.content) { - if (typeof last.content === "string") { - last.content += SYSTEM_FENCE + content - } else { - last.content.push({ type: "text", text: content }) - } + messages.unshift(last); + } + if (last.content) { + if (typeof last.content === "string") { + last.content += SYSTEM_FENCE + content; } else { - last.content = content + last.content.push({ type: "text", text: content }); } + } else { + last.content = content; + } } /** @@ -1723,16 +1561,16 @@ export function appendSystemMessage( * and included in the system message. */ export function addToolDefinitionsMessage( - messages: ChatCompletionMessageParam[], - tools: ToolCallback[] + messages: ChatCompletionMessageParam[], + tools: ToolCallback[], ) { - dbg(`adding tool definitions to messages`) - appendSystemMessage( - messages, - ` + dbg(`adding tool definitions to messages`); + appendSystemMessage( + messages, + ` ${YAMLStringify(tools.map((t) => t.spec))} -` - ) +`, + ); } diff --git a/packages/core/src/chatcache.ts b/packages/core/src/chatcache.ts index 7c6d3694ff..98ea485463 100644 --- a/packages/core/src/chatcache.ts +++ b/packages/core/src/chatcache.ts @@ -1,31 +1,25 @@ -import { createCache } from "./cache" -import type { - ChatCompletionResponse, - CreateChatCompletionRequest, -} from "./chattypes" -import { CHAT_CACHE } from "./constants" -import type { LanguageModelConfiguration } from "./server/messages" +import { createCache } from "./cache"; +import type { ChatCompletionResponse, CreateChatCompletionRequest } from "./chattypes"; +import { CHAT_CACHE } from "./constants"; +import type { LanguageModelConfiguration } from "./server/messages"; // Define the type for a cache key, which combines chat completion request // with additional model options, excluding "token" and "source" from the language model configuration. export type ChatCompletionRequestCacheKey = CreateChatCompletionRequest & - Omit + Omit; // Define a JSON line cache type that maps cache keys to cache values. // This cache stores chat completion requests and their associated responses. export type ChatCompletationRequestCache = WorkspaceFileCache< - ChatCompletionRequestCacheKey, - ChatCompletionResponse -> + ChatCompletionRequestCacheKey, + ChatCompletionResponse +>; // Function to retrieve a chat completion cache. // It uses a default cache name if none is provided. // This function ensures consistent access to cached chat completions. -export function getChatCompletionCache( - name?: string -): ChatCompletationRequestCache { - return createCache( - name || CHAT_CACHE, - { type: "fs" } - ) +export function getChatCompletionCache(name?: string): ChatCompletationRequestCache { + return createCache(name || CHAT_CACHE, { + type: "fs", + }); } diff --git a/packages/core/src/chatrender.test.ts b/packages/core/src/chatrender.test.ts index 45479b9a54..9efcc33236 100644 --- a/packages/core/src/chatrender.test.ts +++ b/packages/core/src/chatrender.test.ts @@ -1,219 +1,208 @@ -import assert from "node:assert/strict" -import test, { describe } from "node:test" +import assert from "node:assert/strict"; +import test, { describe } from "node:test"; import { - renderShellOutput, - renderMessageContent, - lastAssistantReasoning, - renderMessagesToMarkdown, - collapseChatMessages, - assistantText, -} from "./chatrender" -import { - ChatCompletionAssistantMessageParam, - ChatCompletionUserMessageParam, -} from "./chattypes" -import { ChatCompletionSystemMessageParam } from "openai/resources/index.mjs" + renderShellOutput, + renderMessageContent, + lastAssistantReasoning, + renderMessagesToMarkdown, + collapseChatMessages, + assistantText, +} from "./chatrender"; +import { ChatCompletionAssistantMessageParam, ChatCompletionUserMessageParam } from "./chattypes"; +import { ChatCompletionSystemMessageParam } from "openai/resources/index.mjs"; describe("renderShellOutput", () => { - test("should return stdout if exit code is 0", () => { - const output = { exitCode: 0, stdout: "success", stderr: "" } - const result = renderShellOutput(output) - assert.equal(result, "success") - }) -}) + test("should return stdout if exit code is 0", () => { + const output = { exitCode: 0, stdout: "success", stderr: "" }; + const result = renderShellOutput(output); + assert.equal(result, "success"); + }); +}); describe("renderMessageContent", () => { - test("should return the string content directly", async () => { - const msg: ChatCompletionUserMessageParam = { - role: "user", - content: "hello world", - } - const result = await renderMessageContent(msg, { textLang: "raw" }) - assert.equal(result, "hello world") - }) -}) + test("should return the string content directly", async () => { + const msg: ChatCompletionUserMessageParam = { + role: "user", + content: "hello world", + }; + const result = await renderMessageContent(msg, { textLang: "raw" }); + assert.equal(result, "hello world"); + }); +}); describe("lastAssistantReasoning", () => { - test("should return reasoning content of the last assistant message", () => { - const messages = [ - { - role: "user", - content: "hi", - } satisfies ChatCompletionUserMessageParam, - { - role: "assistant", - reasoning_content: "thinking process", - } satisfies ChatCompletionAssistantMessageParam, - ] - const result = lastAssistantReasoning(messages) - assert.equal(result, "thinking process") - }) + test("should return reasoning content of the last assistant message", () => { + const messages = [ + { + role: "user", + content: "hi", + } satisfies ChatCompletionUserMessageParam, + { + role: "assistant", + reasoning_content: "thinking process", + } satisfies ChatCompletionAssistantMessageParam, + ]; + const result = lastAssistantReasoning(messages); + assert.equal(result, "thinking process"); + }); - test("should return undefined if no assistant reasoning content exists", () => { - const messages = [ - { - role: "user", - content: "hi", - } satisfies ChatCompletionUserMessageParam, - { - role: "assistant", - content: "hello", - } satisfies ChatCompletionAssistantMessageParam, - ] - const result = lastAssistantReasoning(messages) - assert.equal(result, undefined) - }) -}) + test("should return undefined if no assistant reasoning content exists", () => { + const messages = [ + { + role: "user", + content: "hi", + } satisfies ChatCompletionUserMessageParam, + { + role: "assistant", + content: "hello", + } satisfies ChatCompletionAssistantMessageParam, + ]; + const result = lastAssistantReasoning(messages); + assert.equal(result, undefined); + }); +}); describe("renderMessagesToMarkdown", () => { - test("should format messages to markdown", async () => { - const messages = [ - { - role: "system", - content: "system message", - } satisfies ChatCompletionSystemMessageParam, - { - role: "user", - content: "user message", - } satisfies ChatCompletionUserMessageParam, - { - role: "assistant", - content: "assistant message", - reasoning_content: "reasoning", - } satisfies ChatCompletionAssistantMessageParam, - ] - const result = await renderMessagesToMarkdown(messages) - assert.ok(result.includes("system message")) - assert.ok(result.includes("user message")) - assert.ok(result.includes("assistant message")) - assert.ok(result.includes("reasoning")) - }) -}) + test("should format messages to markdown", async () => { + const messages = [ + { + role: "system", + content: "system message", + } satisfies ChatCompletionSystemMessageParam, + { + role: "user", + content: "user message", + } satisfies ChatCompletionUserMessageParam, + { + role: "assistant", + content: "assistant message", + reasoning_content: "reasoning", + } satisfies ChatCompletionAssistantMessageParam, + ]; + const result = await renderMessagesToMarkdown(messages); + assert.ok(result.includes("system message")); + assert.ok(result.includes("user message")); + assert.ok(result.includes("assistant message")); + assert.ok(result.includes("reasoning")); + }); +}); describe("collapseChatMessages", () => { - test("should collapse system messages", () => { - const messages = [ - { - role: "system", - content: "system message 1", - } satisfies ChatCompletionSystemMessageParam, - { - role: "system", - content: "system message 2", - } satisfies ChatCompletionSystemMessageParam, - { - role: "user", - content: "user message", - } satisfies ChatCompletionUserMessageParam, - ] - collapseChatMessages(messages) - assert.equal(messages[0].content, "system message 1\nsystem message 2") - assert.equal(messages.length, 2) - }) + test("should collapse system messages", () => { + const messages = [ + { + role: "system", + content: "system message 1", + } satisfies ChatCompletionSystemMessageParam, + { + role: "system", + content: "system message 2", + } satisfies ChatCompletionSystemMessageParam, + { + role: "user", + content: "user message", + } satisfies ChatCompletionUserMessageParam, + ]; + collapseChatMessages(messages); + assert.equal(messages[0].content, "system message 1\nsystem message 2"); + assert.equal(messages.length, 2); + }); - test("should remove empty text contents from user messages", () => { - const messages = [ - { - role: "user", - content: [ - { type: "text", text: "" }, - { type: "text", text: "hello" }, - ], - } satisfies ChatCompletionUserMessageParam, - ] - collapseChatMessages(messages) - assert.deepEqual(messages[0].content, [{ type: "text", text: "hello" }]) - }) - describe("assistantText", () => { - test("should concatenate string contents from consecutive assistant messages", () => { - const messages = [ - { role: "user", content: "hi" }, - { role: "assistant", content: "first" }, - { role: "assistant", content: "second" }, - ] - const result = assistantText(messages as any) - assert.equal(result, "firstsecond") - }) + test("should remove empty text contents from user messages", () => { + const messages = [ + { + role: "user", + content: [ + { type: "text", text: "" }, + { type: "text", text: "hello" }, + ], + } satisfies ChatCompletionUserMessageParam, + ]; + collapseChatMessages(messages); + assert.deepEqual(messages[0].content, [{ type: "text", text: "hello" }]); + }); + describe("assistantText", () => { + test("should concatenate string contents from consecutive assistant messages", () => { + const messages = [ + { role: "user", content: "hi" }, + { role: "assistant", content: "first" }, + { role: "assistant", content: "second" }, + ]; + const result = assistantText(messages as any); + assert.equal(result, "firstsecond"); + }); - test("should concatenate text parts from array content in assistant messages", () => { - const messages = [ - { - role: "assistant", - content: [ - { type: "text", text: "foo" }, - { type: "text", text: "bar" }, - ], - }, - ] - const result = assistantText(messages as any) - assert.strictEqual(result, "foobar") - }) + test("should concatenate text parts from array content in assistant messages", () => { + const messages = [ + { + role: "assistant", + content: [ + { type: "text", text: "foo" }, + { type: "text", text: "bar" }, + ], + }, + ]; + const result = assistantText(messages as any); + assert.strictEqual(result, "foobar"); + }); - test("should prepend refusal text if present in content array", () => { - const messages = [ - { - role: "assistant", - content: [ - { type: "refusal", refusal: "not allowed" }, - { type: "text", text: "text" }, - ], - }, - ] - const result = assistantText(messages as any) - assert.strictEqual(result, "refusal: not allowed\n") - }) + test("should prepend refusal text if present in content array", () => { + const messages = [ + { + role: "assistant", + content: [ + { type: "refusal", refusal: "not allowed" }, + { type: "text", text: "text" }, + ], + }, + ]; + const result = assistantText(messages as any); + assert.strictEqual(result, "refusal: not allowed\n"); + }); - test("should stop at last non-assistant message", () => { - const messages = [ - { role: "assistant", content: "ignore" }, - { role: "user", content: "stop" }, - { role: "assistant", content: "keep" }, - ] - const result = assistantText(messages as any) - assert.equal(result, "keep") - }) + test("should stop at last non-assistant message", () => { + const messages = [ + { role: "assistant", content: "ignore" }, + { role: "user", content: "stop" }, + { role: "assistant", content: "keep" }, + ]; + const result = assistantText(messages as any); + assert.equal(result, "keep"); + }); - test("should unfence markdown by default", () => { - const messages = [ - { role: "assistant", content: "```markdown\nfoo\n```" }, - ] - const result = assistantText(messages as any) - assert.equal(result.trim(), "foo") - }) + test("should unfence markdown by default", () => { + const messages = [{ role: "assistant", content: "```markdown\nfoo\n```" }]; + const result = assistantText(messages as any); + assert.equal(result.trim(), "foo"); + }); - test("should unfence yaml if responseType is 'yaml'", () => { - const messages = [ - { role: "assistant", content: "```yaml\nfoo: bar\n```" }, - ] - const result = assistantText(messages as any, { - responseType: "yaml", - }) - assert.equal(result.trim(), "foo: bar") - }) + test("should unfence yaml if responseType is 'yaml'", () => { + const messages = [{ role: "assistant", content: "```yaml\nfoo: bar\n```" }]; + const result = assistantText(messages as any, { + responseType: "yaml", + }); + assert.equal(result.trim(), "foo: bar"); + }); - test("should unfence json if responseType starts with 'json'", () => { - const messages = [ - { role: "assistant", content: '```json\n{"a":1}\n```' }, - ] - const result = assistantText(messages as any, { - responseType: "json", - }) - assert.equal(result.trim(), '{"a":1}') - }) + test("should unfence json if responseType starts with 'json'", () => { + const messages = [{ role: "assistant", content: '```json\n{"a":1}\n```' }]; + const result = assistantText(messages as any, { + responseType: "json", + }); + assert.equal(result.trim(), '{"a":1}'); + }); - test("should unfence text if responseType is 'text'", () => { - const messages = [ - { role: "assistant", content: "```text\nplain\n```" }, - ] - const result = assistantText(messages as any, { - responseType: "text", - }) - assert.equal(result.trim(), "plain") - }) + test("should unfence text if responseType is 'text'", () => { + const messages = [{ role: "assistant", content: "```text\nplain\n```" }]; + const result = assistantText(messages as any, { + responseType: "text", + }); + assert.equal(result.trim(), "plain"); + }); - test("should handle empty messages gracefully", () => { - const result = assistantText([]) - assert.equal(result, "") - }) - }) -}) + test("should handle empty messages gracefully", () => { + const result = assistantText([]); + assert.equal(result, ""); + }); + }); +}); diff --git a/packages/core/src/chatrender.ts b/packages/core/src/chatrender.ts index d674633e17..0ec1622702 100644 --- a/packages/core/src/chatrender.ts +++ b/packages/core/src/chatrender.ts @@ -1,29 +1,29 @@ // Import statements for various message parameters used in chat rendering. import type { - ChatCompletionAssistantMessageParam, - ChatCompletionMessageParam, - ChatCompletionSystemMessageParam, - ChatCompletionTool, - ChatCompletionToolMessageParam, - ChatCompletionUserMessageParam, -} from "./chattypes" -import { collapseNewlines } from "./cleaners" + ChatCompletionAssistantMessageParam, + ChatCompletionMessageParam, + ChatCompletionSystemMessageParam, + ChatCompletionTool, + ChatCompletionToolMessageParam, + ChatCompletionUserMessageParam, +} from "./chattypes"; +import { collapseNewlines } from "./cleaners"; // Import utility functions for JSON5 parsing, markdown formatting, and YAML stringification. -import { JSONLLMTryParse } from "./json5" -import { details, fenceMD } from "./mkmd" -import { stringify as YAMLStringify } from "yaml" -import { CancellationOptions, checkCancelled } from "./cancellation" -import { unthink } from "./think" -import { unfence } from "./unwrappers" +import { JSONLLMTryParse } from "./json5"; +import { details, fenceMD } from "./mkmd"; +import { stringify as YAMLStringify } from "yaml"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { unthink } from "./think"; +import { unfence } from "./unwrappers"; export interface ChatRenderOptions extends CancellationOptions { - textLang?: "markdown" | "text" | "json" | "raw" - system?: boolean - user?: boolean - assistant?: boolean - cacheImage?: (url: string) => Promise - tools?: ChatCompletionTool[] + textLang?: "markdown" | "text" | "json" | "raw"; + system?: boolean; + user?: boolean; + assistant?: boolean; + cacheImage?: (url: string) => Promise; + tools?: ChatCompletionTool[]; } /** @@ -32,23 +32,23 @@ export interface ChatRenderOptions extends CancellationOptions { * @returns A formatted string summarizing the shell output. Includes exit code if non-zero, stdout formatted as text, and stderr formatted as text, separated by double newlines. Returns stdout directly if the exit code is zero. */ export function renderShellOutput(output: ShellOutput) { - // Destructure the output object to retrieve exitCode, stdout, and stderr. - const { exitCode, stdout, stderr } = output - if (exitCode === 0) return stdout - return ( - [ - // Include exit code in the output only if it's non-zero. - exitCode !== 0 ? `EXIT_CODE: ${exitCode}` : undefined, - // Include stdout if it exists, formatted as text. - stdout ? `STDOUT:${fenceMD(stdout, "text")}` : undefined, - // Include stderr if it exists, formatted as text. - stderr ? `STDERR:${fenceMD(stderr, "text")}` : undefined, - ] - // Filter out undefined values from the array. - .filter((s) => s) - // Join the elements with two newlines for separation. - .join("\n\n") - ) + // Destructure the output object to retrieve exitCode, stdout, and stderr. + const { exitCode, stdout, stderr } = output; + if (exitCode === 0) return stdout; + return ( + [ + // Include exit code in the output only if it's non-zero. + exitCode !== 0 ? `EXIT_CODE: ${exitCode}` : undefined, + // Include stdout if it exists, formatted as text. + stdout ? `STDOUT:${fenceMD(stdout, "text")}` : undefined, + // Include stderr if it exists, formatted as text. + stderr ? `STDERR:${fenceMD(stderr, "text")}` : undefined, + ] + // Filter out undefined values from the array. + .filter((s) => s) + // Join the elements with two newlines for separation. + .join("\n\n") + ); } /** @@ -62,49 +62,47 @@ export function renderShellOutput(output: ShellOutput) { * @returns A formatted string representation of the message content, or undefined if the content is invalid or unsupported. */ export async function renderMessageContent( - msg: - | ChatCompletionAssistantMessageParam - | ChatCompletionSystemMessageParam - | ChatCompletionUserMessageParam - | ChatCompletionToolMessageParam, - options?: ChatRenderOptions + msg: + | ChatCompletionAssistantMessageParam + | ChatCompletionSystemMessageParam + | ChatCompletionUserMessageParam + | ChatCompletionToolMessageParam, + options?: ChatRenderOptions, ): Promise { - const { cacheImage, textLang } = options || {} - const content = msg.content + const { cacheImage, textLang } = options || {}; + const content = msg.content; - // Return the content directly if it's a simple string. - if (typeof content === "string") { - if (textLang === "raw") return content - else return fenceMD(content, textLang) + // Return the content directly if it's a simple string. + if (typeof content === "string") { + if (textLang === "raw") return content; + else return fenceMD(content, textLang); + } + // If the content is an array, process each element based on its type. + else if (Array.isArray(content)) { + const res: string[] = []; + for (const c of content) { + switch (c.type) { + case "text": + if (textLang === "raw") res.push(c.text); + else res.push(fenceMD(c.text, textLang)); + break; + case "image_url": + res.push(`\n\n![image](${(await cacheImage?.(c.image_url.url)) || c.image_url.url})\n\n`); + break; + case "input_audio": + res.push(`🔊 [audio](${c.input_audio})`); + break; + case "refusal": + res.push(`refused: ${c.refusal}`); + break; + default: + res.push(`unknown message`); + } } - // If the content is an array, process each element based on its type. - else if (Array.isArray(content)) { - const res: string[] = [] - for (const c of content) { - switch (c.type) { - case "text": - if (textLang === "raw") res.push(c.text) - else res.push(fenceMD(c.text, textLang)) - break - case "image_url": - res.push( - `\n\n![image](${(await cacheImage?.(c.image_url.url)) || c.image_url.url})\n\n` - ) - break - case "input_audio": - res.push(`🔊 [audio](${c.input_audio})`) - break - case "refusal": - res.push(`refused: ${c.refusal}`) - break - default: - res.push(`unknown message`) - } - } - return res.join(" ") - } - // Return undefined if the content is neither a string nor an array. - return undefined + return res.join(" "); + } + // Return undefined if the content is neither a string nor an array. + return undefined; } /** @@ -114,8 +112,8 @@ export async function renderMessageContent( * @returns The reasoning content of the last assistant message, or undefined if none is found. */ export function lastAssistantReasoning(messages: ChatCompletionMessageParam[]) { - const last = messages.at(-1) - return last?.role === "assistant" && last.reasoning_content + const last = messages.at(-1); + return last?.role === "assistant" && last.reasoning_content; } /** @@ -126,128 +124,110 @@ export function lastAssistantReasoning(messages: ChatCompletionMessageParam[]) { * @returns A markdown string representation of the chat messages. */ export async function renderMessagesToMarkdown( - messages: ChatCompletionMessageParam[], - options?: ChatRenderOptions + messages: ChatCompletionMessageParam[], + options?: ChatRenderOptions, ) { - // Set default options for filtering message roles. - const { - textLang = "markdown", - system = undefined, // Include system messages unless explicitly set to false. - user = undefined, // Include user messages unless explicitly set to false. - assistant = true, // Include assistant messages by default. - cancellationToken, - tools, - } = options || {} - options = { - textLang, - system, - user, - assistant, - cancellationToken, - tools, - } - const optionsMarkdown: ChatRenderOptions = { - textLang: "markdown", - system, - user, - assistant, - cancellationToken, - tools, - } + // Set default options for filtering message roles. + const { + textLang = "markdown", + system = undefined, // Include system messages unless explicitly set to false. + user = undefined, // Include user messages unless explicitly set to false. + assistant = true, // Include assistant messages by default. + cancellationToken, + tools, + } = options || {}; + options = { + textLang, + system, + user, + assistant, + cancellationToken, + tools, + }; + const optionsMarkdown: ChatRenderOptions = { + textLang: "markdown", + system, + user, + assistant, + cancellationToken, + tools, + }; - const res: string[] = [] + const res: string[] = []; - if (tools?.length) { - res.push( - details( - `🔧 tools (${tools.length})`, - tools - .map( - (tool) => - `- \`${tool.function.name}\`: ${tool.function.description || ""}` - ) - .join("\n") - ) - ) - } + if (tools?.length) { + res.push( + details( + `🔧 tools (${tools.length})`, + tools + .map((tool) => `- \`${tool.function.name}\`: ${tool.function.description || ""}`) + .join("\n"), + ), + ); + } - for (const msg of messages?.filter((msg) => { - // Filter messages based on their roles. - switch (msg.role) { - case "system": - return system !== false - case "user": - return user !== false - case "assistant": - return assistant !== false - default: - return true - } - })) { - checkCancelled(cancellationToken) - const { role } = msg - switch (role) { - case "system": - res.push( - details( - "📙 system", - await renderMessageContent(msg, optionsMarkdown), - false - ) - ) - break - case "user": - res.push( - details( - `👤 user`, - await renderMessageContent(msg, options), - user === true - ) - ) - break - case "assistant": - res.push( - details( - `🤖 assistant ${msg.name ? msg.name : ""}`, - [ - msg.reasoning_content - ? details( - "🤔 reasoning", - fenceMD(msg.reasoning_content, "markdown") - ) - : undefined, - await renderMessageContent(msg, optionsMarkdown), - ...(msg.tool_calls?.map((tc) => - details( - `📠 tool call ${tc.function.name} (${tc.id})`, - renderToolArguments(tc.function.arguments) - ) - ) || []), - ] - .filter((s) => !!s) - .join("\n\n"), - assistant === true - ) - ) - break - case "tool": - res.push( - details( - `🛠️ tool output ${msg.tool_call_id}`, - await renderMessageContent(msg, { - ...(options || {}), - textLang: "json", - }) - ) - ) - break - default: - res.push(role, fenceMD(JSON.stringify(msg, null, 2), "json")) - break - } + for (const msg of messages?.filter((msg) => { + // Filter messages based on their roles. + switch (msg.role) { + case "system": + return system !== false; + case "user": + return user !== false; + case "assistant": + return assistant !== false; + default: + return true; } - // Join the result array into a single markdown string. - return collapseNewlines(res.filter((s) => s !== undefined).join("\n")) + })) { + checkCancelled(cancellationToken); + const { role } = msg; + switch (role) { + case "system": + res.push(details("📙 system", await renderMessageContent(msg, optionsMarkdown), false)); + break; + case "user": + res.push(details(`👤 user`, await renderMessageContent(msg, options), user === true)); + break; + case "assistant": + res.push( + details( + `🤖 assistant ${msg.name ? msg.name : ""}`, + [ + msg.reasoning_content + ? details("🤔 reasoning", fenceMD(msg.reasoning_content, "markdown")) + : undefined, + await renderMessageContent(msg, optionsMarkdown), + ...(msg.tool_calls?.map((tc) => + details( + `📠 tool call ${tc.function.name} (${tc.id})`, + renderToolArguments(tc.function.arguments), + ), + ) || []), + ] + .filter((s) => !!s) + .join("\n\n"), + assistant === true, + ), + ); + break; + case "tool": + res.push( + details( + `🛠️ tool output ${msg.tool_call_id}`, + await renderMessageContent(msg, { + ...(options || {}), + textLang: "json", + }), + ), + ); + break; + default: + res.push(role, fenceMD(JSON.stringify(msg, null, 2), "json")); + break; + } + } + // Join the result array into a single markdown string. + return collapseNewlines(res.filter((s) => s !== undefined).join("\n")); } /** @@ -256,10 +236,10 @@ export async function renderMessagesToMarkdown( * @returns A formatted string in YAML or JSON. */ function renderToolArguments(args: string) { - const js = JSONLLMTryParse(args) - // Convert arguments to YAML if possible, otherwise keep as JSON. - if (js) return fenceMD(YAMLStringify(js), "yaml") - else return fenceMD(args, "json") + const js = JSONLLMTryParse(args); + // Convert arguments to YAML if possible, otherwise keep as JSON. + if (js) return fenceMD(YAMLStringify(js), "yaml"); + else return fenceMD(args, "json"); } /** @@ -273,34 +253,32 @@ function renderToolArguments(args: string) { * - Removes empty text content from "user" messages. For array-based content, filters out "text" types with no content. */ export function collapseChatMessages(messages: ChatCompletionMessageParam[]) { - // concat the content of system messages at the start of the messages into a single message - const startSystem = messages.findIndex((m) => m.role === "system") - if (startSystem > -1) { - let endSystem = - startSystem + - messages - .slice(startSystem) - .findIndex((m) => m.role !== "system" || m.cacheControl) - if (endSystem < 0) endSystem = messages.length - if (endSystem > startSystem + 1) { - const systemContent = messages - .slice(startSystem, endSystem) - .map((m) => m.content) - .join("\n") - messages.splice(startSystem, endSystem - startSystem, { - role: "system", - content: systemContent, - }) - } + // concat the content of system messages at the start of the messages into a single message + const startSystem = messages.findIndex((m) => m.role === "system"); + if (startSystem > -1) { + let endSystem = + startSystem + + messages.slice(startSystem).findIndex((m) => m.role !== "system" || m.cacheControl); + if (endSystem < 0) endSystem = messages.length; + if (endSystem > startSystem + 1) { + const systemContent = messages + .slice(startSystem, endSystem) + .map((m) => m.content) + .join("\n"); + messages.splice(startSystem, endSystem - startSystem, { + role: "system", + content: systemContent, + }); } + } - // remove empty text contents - messages - .filter((m) => m.role === "user") - .forEach((m) => { - if (typeof m.content !== "string") - m.content = m.content.filter((c) => c.type !== "text" || c.text) - }) + // remove empty text contents + messages + .filter((m) => m.role === "user") + .forEach((m) => { + if (typeof m.content !== "string") + m.content = m.content.filter((c) => c.type !== "text" || c.text); + }); } /** @@ -314,45 +292,45 @@ export function collapseChatMessages(messages: ChatCompletionMessageParam[]) { * @returns The concatenated and post-processed output text from the most recent assistant messages. */ export function assistantText( - messages: ChatCompletionMessageParam[], - options?: { - responseType?: PromptTemplateResponseType - responseSchema?: PromptParametersSchema | JSONSchema - } + messages: ChatCompletionMessageParam[], + options?: { + responseType?: PromptTemplateResponseType; + responseSchema?: PromptParametersSchema | JSONSchema; + }, ) { - const { responseType, responseSchema } = options || {} - let text = "" - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i] - if (msg.role !== "assistant") { - break - } - let content: string = "" - if (typeof msg.content === "string") { - content = msg.content - } else if (Array.isArray(msg.content)) { - for (const part of msg.content) { - if (part.type === "text") { - content = content + part.text - } else if (part.type === "refusal") { - content = `refusal: ${part.refusal}\n` + content - break - } - } + const { responseType, responseSchema } = options || {}; + let text = ""; + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg.role !== "assistant") { + break; + } + let content: string = ""; + if (typeof msg.content === "string") { + content = msg.content; + } else if (Array.isArray(msg.content)) { + for (const part of msg.content) { + if (part.type === "text") { + content = content + part.text; + } else if (part.type === "refusal") { + content = `refusal: ${part.refusal}\n` + content; + break; } - text = content + text + } } + text = content + text; + } - text = unthink(text) - if ((!responseType && !responseSchema) || responseType === "markdown") { - text = unfence(text, ["markdown", "md"]) - } else if (responseType === "yaml") { - text = unfence(text, ["yaml", "yml"]) - } else if (/^json/.test(responseType)) { - text = unfence(text, ["json", "json5"]) - } else if (responseType === "text") { - text = unfence(text, ["text", "txt"]) - } + text = unthink(text); + if ((!responseType && !responseSchema) || responseType === "markdown") { + text = unfence(text, ["markdown", "md"]); + } else if (responseType === "yaml") { + text = unfence(text, ["yaml", "yml"]); + } else if (/^json/.test(responseType)) { + text = unfence(text, ["json", "json5"]); + } else if (responseType === "text") { + text = unfence(text, ["text", "txt"]); + } - return text + return text; } diff --git a/packages/core/src/chatrenderterminal.ts b/packages/core/src/chatrenderterminal.ts index ed139944a5..f2482446f0 100644 --- a/packages/core/src/chatrenderterminal.ts +++ b/packages/core/src/chatrenderterminal.ts @@ -1,144 +1,140 @@ import { - ChatCompletionAssistantMessageParam, - ChatCompletionMessageParam, - ChatCompletionMessageToolCall, - ChatCompletionSystemMessageParam, - ChatCompletionTool, - ChatCompletionToolMessageParam, - ChatCompletionUserMessageParam, - CreateChatCompletionRequest, -} from "./chattypes" -import { renderImageToTerminal } from "./image" -import { terminalSize } from "./terminal" -import { ellipse } from "./util" -import { YAMLStringify } from "./yaml" -import { dataUriToBuffer } from "./file" -import { wrapColor } from "./consolecolor" + ChatCompletionAssistantMessageParam, + ChatCompletionMessageParam, + ChatCompletionMessageToolCall, + ChatCompletionSystemMessageParam, + ChatCompletionTool, + ChatCompletionToolMessageParam, + ChatCompletionUserMessageParam, + CreateChatCompletionRequest, +} from "./chattypes"; +import { renderImageToTerminal } from "./image"; +import { terminalSize } from "./terminal"; +import { ellipse } from "./util"; +import { YAMLStringify } from "./yaml"; +import { dataUriToBuffer } from "./file"; +import { wrapColor } from "./consolecolor"; import { - BOX_DOWN_AND_RIGHT, - BOX_DOWN_UP_AND_RIGHT, - BOX_RIGHT, - BOX_UP_AND_DOWN, - BOX_UP_AND_RIGHT, - CHAR_ENVELOPE, - CONSOLE_COLOR_DEBUG, - CONTROL_CHAT_COLLAPSED, - CONTROL_CHAT_EXPANDED, - CONTROL_CHAT_LAST, -} from "./constants" -import { CancellationOptions, checkCancelled } from "./cancellation" -import { prettyTemperature, prettyTokens } from "./pretty" -import { genaiscriptDebug } from "./debug" -import { JSONSchemaToFunctionParameters } from "./schema" -const dbg = genaiscriptDebug("chat:render") + BOX_DOWN_AND_RIGHT, + BOX_DOWN_UP_AND_RIGHT, + BOX_RIGHT, + BOX_UP_AND_DOWN, + BOX_UP_AND_RIGHT, + CHAR_ENVELOPE, + CONSOLE_COLOR_DEBUG, + CONTROL_CHAT_COLLAPSED, + CONTROL_CHAT_EXPANDED, + CONTROL_CHAT_LAST, +} from "./constants"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { prettyTemperature, prettyTokens } from "./pretty"; +import { genaiscriptDebug } from "./debug"; +import { JSONSchemaToFunctionParameters } from "./schema"; +const dbg = genaiscriptDebug("chat:render"); function renderTrimmed(s: string, rows: number, width: number) { - const lines = s.split(/\n/g).filter((l) => !!l) - let trimmed = lines.slice(0) - if (lines.length > rows) { - const head = Math.min(rows >> 1, lines.length - 1) - const tail = rows - head - trimmed = lines.slice(0, head) - if (tail) { - const hidden = lines.length - head - tail - if (hidden === 1) trimmed.push(lines.at(-tail - 1)) - else if (hidden > 0) trimmed.push(`... (${hidden} lines)`) - trimmed.push(...lines.slice(-tail)) - } + const lines = s.split(/\n/g).filter((l) => !!l); + let trimmed = lines.slice(0); + if (lines.length > rows) { + const head = Math.min(rows >> 1, lines.length - 1); + const tail = rows - head; + trimmed = lines.slice(0, head); + if (tail) { + const hidden = lines.length - head - tail; + if (hidden === 1) trimmed.push(lines.at(-tail - 1)); + else if (hidden > 0) trimmed.push(`... (${hidden} lines)`); + trimmed.push(...lines.slice(-tail)); } - const res = trimmed.map((l, i) => - wrapColor( - CONSOLE_COLOR_DEBUG, - BOX_UP_AND_DOWN + ellipse(l, width) + "\n" - ) - ) - return res + } + const res = trimmed.map((l, i) => + wrapColor(CONSOLE_COLOR_DEBUG, BOX_UP_AND_DOWN + ellipse(l, width) + "\n"), + ); + return res; } async function renderMessageContent( - modelId: string, - msg: - | string - | ChatCompletionAssistantMessageParam - | ChatCompletionSystemMessageParam - | ChatCompletionUserMessageParam - | ChatCompletionToolMessageParam, - options: { - columns: number - rows: number - } & CancellationOptions + modelId: string, + msg: + | string + | ChatCompletionAssistantMessageParam + | ChatCompletionSystemMessageParam + | ChatCompletionUserMessageParam + | ChatCompletionToolMessageParam, + options: { + columns: number; + rows: number; + } & CancellationOptions, ): Promise { - const { columns, rows, cancellationToken } = options - const content = typeof msg === "string" ? msg : msg.content - const margin = 2 - const width = columns - margin + const { columns, rows, cancellationToken } = options; + const content = typeof msg === "string" ? msg : msg.content; + const margin = 2; + const width = columns - margin; - const render = (s: string) => renderTrimmed(s, rows, width) + const render = (s: string) => renderTrimmed(s, rows, width); - // Return the content directly if it's a simple string. - if (typeof content === "string") return render(content) - // If the content is an array, process each element based on its type. - else if (Array.isArray(content)) { - const res: string[] = [] - for (const c of content) { - checkCancelled(cancellationToken) - switch (c.type) { - case "text": - res.push(...render(c.text)) - break - case "image_url": - res.push( - await renderImageToTerminal( - dataUriToBuffer(c.image_url.url), - { columns, rows, cancellationToken, modelId } - ) - ) - break - case "input_audio": - res.push(...render(`🔊 audio`)) - break - case "refusal": - res.push(...render(`🚫 ` + c.refusal)) - break - default: - res.push(...render(`unknown`)) - } - } - return res - } else return [] + // Return the content directly if it's a simple string. + if (typeof content === "string") return render(content); + // If the content is an array, process each element based on its type. + else if (Array.isArray(content)) { + const res: string[] = []; + for (const c of content) { + checkCancelled(cancellationToken); + switch (c.type) { + case "text": + res.push(...render(c.text)); + break; + case "image_url": + res.push( + await renderImageToTerminal(dataUriToBuffer(c.image_url.url), { + columns, + rows, + cancellationToken, + modelId, + }), + ); + break; + case "input_audio": + res.push(...render(`🔊 audio`)); + break; + case "refusal": + res.push(...render(`🚫 ` + c.refusal)); + break; + default: + res.push(...render(`unknown`)); + } + } + return res; + } else return []; } -function renderToolCall( - call: ChatCompletionMessageToolCall, - options: { columns: number } -): string { - const { columns } = options - const width = columns - 2 - return wrapColor( - CONSOLE_COLOR_DEBUG, - ellipse( - `${BOX_DOWN_UP_AND_RIGHT}${BOX_RIGHT}${BOX_RIGHT}📠 tool ${call.function.name} (${call.id})`, - columns - 2 - ) + - `\n` + - (call.function.arguments - ? wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_UP_AND_DOWN} ${ellipse(call.function.arguments, width)}\n` - ) - : "") - ) +function renderToolCall(call: ChatCompletionMessageToolCall, options: { columns: number }): string { + const { columns } = options; + const width = columns - 2; + return wrapColor( + CONSOLE_COLOR_DEBUG, + ellipse( + `${BOX_DOWN_UP_AND_RIGHT}${BOX_RIGHT}${BOX_RIGHT}📠 tool ${call.function.name} (${call.id})`, + columns - 2, + ) + + `\n` + + (call.function.arguments + ? wrapColor( + CONSOLE_COLOR_DEBUG, + `${BOX_UP_AND_DOWN} ${ellipse(call.function.arguments, width)}\n`, + ) + : ""), + ); } function renderMetadata(call: CreateChatCompletionRequest) { - const { metadata } = call - if (!metadata) return "" - return wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_DOWN_UP_AND_RIGHT}${BOX_RIGHT}📊 ${Object.entries(metadata) - .map(([k, v]) => `${k}: ${v}`) - .join(", ")}\n` - ) + const { metadata } = call; + if (!metadata) return ""; + return wrapColor( + CONSOLE_COLOR_DEBUG, + `${BOX_DOWN_UP_AND_RIGHT}${BOX_RIGHT}📊 ${Object.entries(metadata) + .map(([k, v]) => `${k}: ${v}`) + .join(", ")}\n`, + ); } /** @@ -154,173 +150,150 @@ function renderMetadata(call: CreateChatCompletionRequest) { * @returns The formatted string output for terminal rendering. */ export async function renderMessagesToTerminal( - request: CreateChatCompletionRequest, - options?: { - system?: boolean - user?: boolean - assistant?: boolean - tools?: ChatCompletionTool[] - } + request: CreateChatCompletionRequest, + options?: { + system?: boolean; + user?: boolean; + assistant?: boolean; + tools?: ChatCompletionTool[]; + }, ) { - const { model, temperature, metadata, response_format } = request - let messages = request.messages.slice(0) - const { - system = undefined, // Include system messages unless explicitly set to false. - user = undefined, // Include user messages unless explicitly set to false. - assistant = true, // Include assistant messages by default. - tools, - } = options || {} + const { model, temperature, metadata, response_format } = request; + let messages = request.messages.slice(0); + const { + system = undefined, // Include system messages unless explicitly set to false. + user = undefined, // Include user messages unless explicitly set to false. + assistant = true, // Include assistant messages by default. + tools, + } = options || {}; - const { columns } = terminalSize() - dbg(`render %O`, messages) + const { columns } = terminalSize(); + dbg(`render %O`, messages); - const msgRows = (msg: ChatCompletionMessageParam, visibility: boolean) => - msg === messages.at(-1) - ? CONTROL_CHAT_LAST - : visibility === true - ? CONTROL_CHAT_EXPANDED - : CONTROL_CHAT_COLLAPSED + const msgRows = (msg: ChatCompletionMessageParam, visibility: boolean) => + msg === messages.at(-1) + ? CONTROL_CHAT_LAST + : visibility === true + ? CONTROL_CHAT_EXPANDED + : CONTROL_CHAT_COLLAPSED; - messages = messages.filter((msg) => { - // Filter messages based on their roles. - switch (msg.role) { - case "system": - return system !== false - case "user": - return user !== false - case "assistant": - return assistant !== false - default: - return true - } - }) - const res: string[] = [] - if (model) { - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}💬 ${model} ${CHAR_ENVELOPE} ${messages.length} ${prettyTemperature(temperature)}\n` - ) - ) - } - if (response_format) { - const { type } = response_format - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_DOWN_UP_AND_RIGHT}${BOX_RIGHT}📦 ${type}\n` - ) - ) - if (type === "json_schema") { - const { json_schema } = response_format - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_UP_AND_DOWN} ${JSONSchemaToFunctionParameters(json_schema.schema as any)}\n` - ) - ) - } + messages = messages.filter((msg) => { + // Filter messages based on their roles. + switch (msg.role) { + case "system": + return system !== false; + case "user": + return user !== false; + case "assistant": + return assistant !== false; + default: + return true; } - if (tools?.length) { - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_DOWN_UP_AND_RIGHT}${BOX_RIGHT}🔧 tools (${tools.length})\n` - ), - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_UP_AND_DOWN} ${tools.map((tool) => tool.function.name).join(", ")}` - ), - "\n" - ) + }); + const res: string[] = []; + if (model) { + res.push( + wrapColor( + CONSOLE_COLOR_DEBUG, + `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}💬 ${model} ${CHAR_ENVELOPE} ${messages.length} ${prettyTemperature(temperature)}\n`, + ), + ); + } + if (response_format) { + const { type } = response_format; + res.push(wrapColor(CONSOLE_COLOR_DEBUG, `${BOX_DOWN_UP_AND_RIGHT}${BOX_RIGHT}📦 ${type}\n`)); + if (type === "json_schema") { + const { json_schema } = response_format; + res.push( + wrapColor( + CONSOLE_COLOR_DEBUG, + `${BOX_UP_AND_DOWN} ${JSONSchemaToFunctionParameters(json_schema.schema as any)}\n`, + ), + ); } + } + if (tools?.length) { + res.push( + wrapColor( + CONSOLE_COLOR_DEBUG, + `${BOX_DOWN_UP_AND_RIGHT}${BOX_RIGHT}🔧 tools (${tools.length})\n`, + ), + wrapColor( + CONSOLE_COLOR_DEBUG, + `${BOX_UP_AND_DOWN} ${tools.map((tool) => tool.function.name).join(", ")}`, + ), + "\n", + ); + } - if (metadata) res.push(renderMetadata(request)) + if (metadata) res.push(renderMetadata(request)); - for (const msg of messages) { - const { role } = msg - switch (role) { - case "system": - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}📙 system\n` - ), - ...(await renderMessageContent(model, msg, { - columns, - rows: msgRows(msg, system), - })) - ) - break - case "user": - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}👤 user\n` - ) - ) - res.push( - ...(await renderMessageContent(model, msg, { - columns, - rows: msgRows(msg, user), - })) - ) - break - case "assistant": - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}🤖 assistant ${msg.name ? msg.name : ""}\n` - ) - ) - if (msg.reasoning_content) - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_UP_AND_DOWN}${BOX_RIGHT}🤔 reasoning\n` - ), - msg.reasoning_content, - "\n" - ) - res.push( - ...(await renderMessageContent(model, msg, { - columns, - rows: msgRows(msg, assistant), - })) - ) - if (msg.tool_calls?.length) - res.push( - ...msg.tool_calls.map((call) => - renderToolCall(call, { columns }) - ) - ) - break - case "tool": - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}🔧 tool ${msg.tool_call_id || ""}\n` - ), - ...(await renderMessageContent(model, msg, { - columns, - rows: msgRows(msg, undefined), - })) - ) - break - default: - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}${role}\n` - ), - ...(await renderMessageContent(model, YAMLStringify(msg), { - columns, - rows: msgRows(msg, undefined), - })) - ) - break - } + for (const msg of messages) { + const { role } = msg; + switch (role) { + case "system": + res.push( + wrapColor(CONSOLE_COLOR_DEBUG, `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}📙 system\n`), + ...(await renderMessageContent(model, msg, { + columns, + rows: msgRows(msg, system), + })), + ); + break; + case "user": + res.push(wrapColor(CONSOLE_COLOR_DEBUG, `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}👤 user\n`)); + res.push( + ...(await renderMessageContent(model, msg, { + columns, + rows: msgRows(msg, user), + })), + ); + break; + case "assistant": + res.push( + wrapColor( + CONSOLE_COLOR_DEBUG, + `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}🤖 assistant ${msg.name ? msg.name : ""}\n`, + ), + ); + if (msg.reasoning_content) + res.push( + wrapColor(CONSOLE_COLOR_DEBUG, `${BOX_UP_AND_DOWN}${BOX_RIGHT}🤔 reasoning\n`), + msg.reasoning_content, + "\n", + ); + res.push( + ...(await renderMessageContent(model, msg, { + columns, + rows: msgRows(msg, assistant), + })), + ); + if (msg.tool_calls?.length) + res.push(...msg.tool_calls.map((call) => renderToolCall(call, { columns }))); + break; + case "tool": + res.push( + wrapColor( + CONSOLE_COLOR_DEBUG, + `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}🔧 tool ${msg.tool_call_id || ""}\n`, + ), + ...(await renderMessageContent(model, msg, { + columns, + rows: msgRows(msg, undefined), + })), + ); + break; + default: + res.push( + wrapColor(CONSOLE_COLOR_DEBUG, `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}${role}\n`), + ...(await renderMessageContent(model, YAMLStringify(msg), { + columns, + rows: msgRows(msg, undefined), + })), + ); + break; } - // Join the result array into a single markdown string. - return res.filter((s) => s !== undefined).join("") + } + // Join the result array into a single markdown string. + return res.filter((s) => s !== undefined).join(""); } diff --git a/packages/core/src/chattypes.ts b/packages/core/src/chattypes.ts index f8030f6140..92b7863f37 100644 --- a/packages/core/src/chattypes.ts +++ b/packages/core/src/chattypes.ts @@ -5,203 +5,191 @@ * Tags: TypeScript, OpenAI, Chat, Types, Interfaces */ -import OpenAI from "openai" +import OpenAI from "openai"; -export type ChatModel = OpenAI.Models.Model +export type ChatModel = OpenAI.Models.Model; export type ChatModels = { - object: "list" - data: Partial[] -} -export type ChatCompletionToolChoiceOption = - OpenAI.Chat.ChatCompletionToolChoiceOption -export type ChatCompletionNamedToolChoice = - OpenAI.Chat.ChatCompletionNamedToolChoice -export type ChatCompletionReasoningEffort = OpenAI.ReasoningEffort + object: "list"; + data: Partial[]; +}; +export type ChatCompletionToolChoiceOption = OpenAI.Chat.ChatCompletionToolChoiceOption; +export type ChatCompletionNamedToolChoice = OpenAI.Chat.ChatCompletionNamedToolChoice; +export type ChatCompletionReasoningEffort = OpenAI.ReasoningEffort; // Aliases for OpenAI chat completion types export type ChatCompletionUsage = OpenAI.Completions.CompletionUsage & { - duration?: number -} + duration?: number; +}; export type ChatCompletionUsageCompletionTokensDetails = - OpenAI.Completions.CompletionUsage.CompletionTokensDetails + OpenAI.Completions.CompletionUsage.CompletionTokensDetails; export type ChatCompletionUsagePromptTokensDetails = - OpenAI.Completions.CompletionUsage.PromptTokensDetails + OpenAI.Completions.CompletionUsage.PromptTokensDetails; -export type ImageGenerationResponse = OpenAI.Images.ImagesResponse +export type ImageGenerationResponse = OpenAI.Images.ImagesResponse; // Text content part of a chat completion -export type ChatCompletionContentPartText = - OpenAI.Chat.Completions.ChatCompletionContentPartText +export type ChatCompletionContentPartText = OpenAI.Chat.Completions.ChatCompletionContentPartText; // General content part of a chat completion -export type ChatCompletionContentPart = - OpenAI.Chat.Completions.ChatCompletionContentPart +export type ChatCompletionContentPart = OpenAI.Chat.Completions.ChatCompletionContentPart; export type ChatCompletionContentPartRefusal = - OpenAI.Chat.Completions.ChatCompletionContentPartRefusal + OpenAI.Chat.Completions.ChatCompletionContentPartRefusal; export type ChatCompletionContentPartInputAudio = - OpenAI.Chat.Completions.ChatCompletionContentPartInputAudio + OpenAI.Chat.Completions.ChatCompletionContentPartInputAudio; // Tool used in a chat completion -export type ChatCompletionTool = OpenAI.Chat.Completions.ChatCompletionTool +export type ChatCompletionTool = OpenAI.Chat.Completions.ChatCompletionTool; // Chunk of a chat completion response -export type ChatCompletionChunk = OpenAI.Chat.Completions.ChatCompletionChunk -export type ChatCompletionChunkChoice = - OpenAI.Chat.Completions.ChatCompletionChunk.Choice & { - delta?: ChatCompletionMessageReasoningContentParam - } +export type ChatCompletionChunk = OpenAI.Chat.Completions.ChatCompletionChunk; +export type ChatCompletionChunkChoice = OpenAI.Chat.Completions.ChatCompletionChunk.Choice & { + delta?: ChatCompletionMessageReasoningContentParam; +}; -export type ChatCompletionTokenLogprob = OpenAI.ChatCompletionTokenLogprob +export type ChatCompletionTokenLogprob = OpenAI.ChatCompletionTokenLogprob; -export type ChatCompletion = OpenAI.Chat.Completions.ChatCompletion -export type ChatCompletionChoice = - OpenAI.Chat.Completions.ChatCompletion.Choice & { - message: ChatCompletionMessage - } +export type ChatCompletion = OpenAI.Chat.Completions.ChatCompletion; +export type ChatCompletionChoice = OpenAI.Chat.Completions.ChatCompletion.Choice & { + message: ChatCompletionMessage; +}; export interface ChatCompletionMessageParamCacheControl { - cacheControl?: PromptCacheControlType + cacheControl?: PromptCacheControlType; } -export type ChatCompletionMessage = - OpenAI.Chat.Completions.ChatCompletionMessage & - ChatCompletionMessageReasoningContentParam +export type ChatCompletionMessage = OpenAI.Chat.Completions.ChatCompletionMessage & + ChatCompletionMessageReasoningContentParam; // Parameters for a system message in a chat completion export type ChatCompletionSystemMessageParam = - OpenAI.Chat.Completions.ChatCompletionSystemMessageParam & - ChatCompletionMessageParamCacheControl + OpenAI.Chat.Completions.ChatCompletionSystemMessageParam & ChatCompletionMessageParamCacheControl; // Parameters for a tool message in a chat completion export type ChatCompletionToolMessageParam = - OpenAI.Chat.Completions.ChatCompletionToolMessageParam & - ChatCompletionMessageParamCacheControl + OpenAI.Chat.Completions.ChatCompletionToolMessageParam & ChatCompletionMessageParamCacheControl; export type ChatCompletionFunctionMessageParam = - OpenAI.Chat.Completions.ChatCompletionFunctionMessageParam & - ChatCompletionMessageParamCacheControl + OpenAI.Chat.Completions.ChatCompletionFunctionMessageParam & + ChatCompletionMessageParamCacheControl; /** * Type representing parameters for chat completion messages. */ export type ChatCompletionMessageParam = - | ChatCompletionSystemMessageParam - | ChatCompletionUserMessageParam - | ChatCompletionAssistantMessageParam - | ChatCompletionToolMessageParam - | ChatCompletionFunctionMessageParam + | ChatCompletionSystemMessageParam + | ChatCompletionUserMessageParam + | ChatCompletionAssistantMessageParam + | ChatCompletionToolMessageParam + | ChatCompletionFunctionMessageParam; /** * Type representing a request to create a chat completion, extending from OpenAI's * streaming parameters minus the 'messages' property. */ export type CreateChatCompletionRequest = Omit< - OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, - "messages" + OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, + "messages" > & { - /** - * A list of messages comprising the conversation so far. - */ - messages: ChatCompletionMessageParam[] -} + /** + * A list of messages comprising the conversation so far. + */ + messages: ChatCompletionMessageParam[]; +}; export interface ChatCompletionMessageReasoningContentParam { - reasoning_content?: string - signature?: string + reasoning_content?: string; + signature?: string; } // Parameters for an assistant message in a chat completion export type ChatCompletionAssistantMessageParam = - OpenAI.Chat.Completions.ChatCompletionAssistantMessageParam & - ChatCompletionMessageParamCacheControl & - ChatCompletionMessageReasoningContentParam + OpenAI.Chat.Completions.ChatCompletionAssistantMessageParam & + ChatCompletionMessageParamCacheControl & + ChatCompletionMessageReasoningContentParam; export type ChatCompletionChunkChoiceChoiceDelta = - OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta & - ChatCompletionMessageReasoningContentParam + OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta & + ChatCompletionMessageReasoningContentParam; // Parameters for a user message in a chat completion export type ChatCompletionUserMessageParam = - OpenAI.Chat.Completions.ChatCompletionUserMessageParam & - ChatCompletionMessageParamCacheControl + OpenAI.Chat.Completions.ChatCompletionUserMessageParam & ChatCompletionMessageParamCacheControl; // Image content part of a chat completion -export type ChatCompletionContentPartImage = - OpenAI.Chat.Completions.ChatCompletionContentPartImage +export type ChatCompletionContentPartImage = OpenAI.Chat.Completions.ChatCompletionContentPartImage; -export type ChatCompletionMessageToolCall = - OpenAI.Chat.Completions.ChatCompletionMessageToolCall +export type ChatCompletionMessageToolCall = OpenAI.Chat.Completions.ChatCompletionMessageToolCall; // Parameters for creating embeddings -export type EmbeddingCreateParams = OpenAI.Embeddings.EmbeddingCreateParams +export type EmbeddingCreateParams = OpenAI.Embeddings.EmbeddingCreateParams; // Response type for creating embeddings -export type EmbeddingCreateResponse = OpenAI.Embeddings.CreateEmbeddingResponse +export type EmbeddingCreateResponse = OpenAI.Embeddings.CreateEmbeddingResponse; export interface EmbeddingResult { - data?: number[][] - model?: string - error?: string - status: "success" | "error" | "rate_limited" | "cancelled" + data?: number[][]; + model?: string; + error?: string; + status: "success" | "error" | "rate_limited" | "cancelled"; } /** * Interface representing a call to a chat completion tool. */ export interface ChatCompletionToolCall { - id: string // Unique identifier for the tool call - name: string // Tool name being called - arguments?: string // Optional arguments for the tool + id: string; // Unique identifier for the tool call + name: string; // Tool name being called + arguments?: string; // Optional arguments for the tool } /** * Interface representing a response from chat completion. */ export interface ChatCompletionResponse { - text?: string // Optional text response - reasoning?: string // Optional reasoning content - signature?: string // cryptographic signature of the response - cached?: boolean // Indicates if the response was cached - variables?: Record // Optional variables associated with the response - toolCalls?: ChatCompletionToolCall[] // List of tool calls made during the response - finishReason?: // Reason why the chat completion finished - "stop" | "length" | "tool_calls" | "content_filter" | "cancel" | "fail" - usage?: ChatCompletionUsage // Usage information for the completion - model?: string // Model used for the completion - error?: SerializedError - logprobs?: ChatCompletionTokenLogprob[] - duration?: number // Duration of the completion in milliseconds + text?: string; // Optional text response + reasoning?: string; // Optional reasoning content + signature?: string; // cryptographic signature of the response + cached?: boolean; // Indicates if the response was cached + variables?: Record; // Optional variables associated with the response + toolCalls?: ChatCompletionToolCall[]; // List of tool calls made during the response + finishReason?: // Reason why the chat completion finished + "stop" | "length" | "tool_calls" | "content_filter" | "cancel" | "fail"; + usage?: ChatCompletionUsage; // Usage information for the completion + model?: string; // Model used for the completion + error?: SerializedError; + logprobs?: ChatCompletionTokenLogprob[]; + duration?: number; // Duration of the completion in milliseconds } -export type ChatFinishReason = ChatCompletionResponse["finishReason"] +export type ChatFinishReason = ChatCompletionResponse["finishReason"]; // Alias for OpenAI's API error type -export const ModelError = OpenAI.APIError +export const ModelError = OpenAI.APIError; /** * Interface representing a progress report for chat completions. */ export interface ChatCompletionsProgressReport { - tokensSoFar: number // Number of tokens processed so far - responseSoFar: string // Partial response generated so far - responseChunk: string // Current chunk of response being processed - responseTokens?: Logprob[] // Tokens in the current response chunk - reasoningTokens?: Logprob[] // Tokens in the current reasoning content - reasoningSoFar?: string // Partial reasoning content generated so far - reasoningChunk?: string // Current chunk of reasoning content being processed - inner: boolean // Indicates if this is an inner report + tokensSoFar: number; // Number of tokens processed so far + responseSoFar: string; // Partial response generated so far + responseChunk: string; // Current chunk of response being processed + responseTokens?: Logprob[]; // Tokens in the current response chunk + reasoningTokens?: Logprob[]; // Tokens in the current reasoning content + reasoningSoFar?: string; // Partial reasoning content generated so far + reasoningChunk?: string; // Current chunk of reasoning content being processed + inner: boolean; // Indicates if this is an inner report } /** * Interface representing options for chat completions. */ export interface ChatCompletionsOptions { - partialCb?: (progress: ChatCompletionsProgressReport) => void // Callback for partial responses - requestOptions?: Partial> // Custom request options - maxCachedTemperature?: number // Max temperature for caching responses - maxCachedTopP?: number // Max top-p for caching responses - cache?: boolean | string // Cache setting or cache name - retry?: number // Number of retries for failed requests - retryDelay?: number // Delay between retries - maxDelay?: number // Maximum delay for retry attempts - inner: boolean // Indicates if the option is for inner processing + partialCb?: (progress: ChatCompletionsProgressReport) => void; // Callback for partial responses + requestOptions?: Partial>; // Custom request options + maxCachedTemperature?: number; // Max temperature for caching responses + maxCachedTopP?: number; // Max top-p for caching responses + cache?: boolean | string; // Cache setting or cache name + retry?: number; // Number of retries for failed requests + retryDelay?: number; // Delay between retries + maxDelay?: number; // Maximum delay for retry attempts + inner: boolean; // Indicates if the option is for inner processing } diff --git a/packages/core/src/chunkers.tests.ts b/packages/core/src/chunkers.tests.ts index 2a604f69ce..f1b2a1b859 100644 --- a/packages/core/src/chunkers.tests.ts +++ b/packages/core/src/chunkers.tests.ts @@ -1,78 +1,68 @@ -import { describe, it } from "node:test" -import assert from "node:assert/strict" -import { chunkString, chunkLines } from "./chunkers" +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { chunkString, chunkLines } from "./chunkers"; describe("chunkers", () => { - describe("chunkString", () => { - it("should return empty array for empty string", () => { - assert.deepEqual(chunkString(""), []) - assert.deepEqual(chunkString(null as any), []) - assert.deepEqual(chunkString(undefined as any), []) - }) + describe("chunkString", () => { + it("should return empty array for empty string", () => { + assert.deepEqual(chunkString(""), []); + assert.deepEqual(chunkString(null as any), []); + assert.deepEqual(chunkString(undefined as any), []); + }); - it("should return the string as is if smaller than chunk size", () => { - const str = "hello world" - assert.deepEqual(chunkString(str, 20), [str]) - }) + it("should return the string as is if smaller than chunk size", () => { + const str = "hello world"; + assert.deepEqual(chunkString(str, 20), [str]); + }); - it("should chunk string into equal parts", () => { - const str = "abcdefghijklmnopqrstuvwxyz" - assert.deepEqual(chunkString(str, 10), [ - "abcdefghij", - "klmnopqrst", - "uvwxyz", - ]) - }) + it("should chunk string into equal parts", () => { + const str = "abcdefghijklmnopqrstuvwxyz"; + assert.deepEqual(chunkString(str, 10), ["abcdefghij", "klmnopqrst", "uvwxyz"]); + }); - it("should use the default chunk size if not provided", () => { - const longString = "a".repeat(2 << 15) // Longer than default chunk size - const chunks = chunkString(longString) - assert(chunks.length > 1) - assert(chunks[0].length === 2 << 14) - }) - }) + it("should use the default chunk size if not provided", () => { + const longString = "a".repeat(2 << 15); // Longer than default chunk size + const chunks = chunkString(longString); + assert(chunks.length > 1); + assert(chunks[0].length === 2 << 14); + }); + }); - describe("chunkLines", () => { - it("should return empty array for empty string", () => { - assert.deepEqual(chunkLines(""), []) - assert.deepEqual(chunkLines(null as any), []) - assert.deepEqual(chunkLines(undefined as any), []) - }) + describe("chunkLines", () => { + it("should return empty array for empty string", () => { + assert.deepEqual(chunkLines(""), []); + assert.deepEqual(chunkLines(null as any), []); + assert.deepEqual(chunkLines(undefined as any), []); + }); - it("should return the string as is if smaller than chunk size", () => { - const str = "hello world" - assert.deepEqual(chunkLines(str, 20), [str]) - }) + it("should return the string as is if smaller than chunk size", () => { + const str = "hello world"; + assert.deepEqual(chunkLines(str, 20), [str]); + }); - it("should preserve line breaks when chunking", () => { - const str = "line1\nline2\nline3\nline4" - assert.deepEqual(chunkLines(str, 12), [ - "line1\nline2\n", - "line3\nline4\n", - ]) - }) + it("should preserve line breaks when chunking", () => { + const str = "line1\nline2\nline3\nline4"; + assert.deepEqual(chunkLines(str, 12), ["line1\nline2\n", "line3\nline4\n"]); + }); - it("should handle CRLF line endings", () => { - const str = "line1\r\nline2\r\nline3\r\nline4" - assert.deepEqual(chunkLines(str, 14), [ - "line1\nline2\n", - "line3\nline4\n", - ]) - }) + it("should handle CRLF line endings", () => { + const str = "line1\r\nline2\r\nline3\r\nline4"; + assert.deepEqual(chunkLines(str, 14), ["line1\nline2\n", "line3\nline4\n"]); + }); - it("should keep lines together even if they exceed chunk size", () => { - const str = "short\nvery_long_line_exceeding_chunk_size\nshort" - const chunks = chunkLines(str, 10) - assert.equal(chunks.length, 3) - assert.equal(chunks[0], "short\n") - assert.equal(chunks[1], "very_long_line_exceeding_chunk_size\n") - assert.equal(chunks[2], "short\n") - }) + it("should keep lines together even if they exceed chunk size", () => { + const str = "short\nvery_long_line_exceeding_chunk_size\nshort"; + const chunks = chunkLines(str, 10); + assert.equal(chunks.length, 3); + assert.equal(chunks[0], "short\n"); + assert.equal(chunks[1], "very_long_line_exceeding_chunk_size\n"); + assert.equal(chunks[2], "short\n"); + }); - it("should use the default chunk size if not provided", () => { - const longString = "line\n".repeat(2 << 13) // Longer than default chunk size - const chunks = chunkLines(longString) - assert(chunks.length > 1) - }) - }) -}) + it("should use the default chunk size if not provided", () => { + const longString = "line\n".repeat(2 << 13); // Longer than default chunk size + const chunks = chunkLines(longString); + assert(chunks.length > 1); + }); + }); +}); diff --git a/packages/core/src/chunkers.ts b/packages/core/src/chunkers.ts index cf8d9ae4b6..5b22610006 100644 --- a/packages/core/src/chunkers.ts +++ b/packages/core/src/chunkers.ts @@ -1,4 +1,4 @@ -import { assert } from "./assert" +import { assert } from "./assert"; /** * Splits a string into chunks of specified size. @@ -9,15 +9,15 @@ import { assert } from "./assert" * - Array of string chunks. Each chunk's length is <= n. */ export function chunkString(s: string, n: number = 2 << 14) { - if (!s?.length) return [] - if (s.length <= n) return [s] + if (!s?.length) return []; + if (s.length <= n) return [s]; - const r: string[] = [] - for (let i = 0; i < s.length; i += n) { - r.push(s.slice(i, i + n)) - assert(r[r.length - 1].length <= n) - } - return r + const r: string[] = []; + for (let i = 0; i < s.length; i += n) { + r.push(s.slice(i, i + n)); + assert(r[r.length - 1].length <= n); + } + return r; } /** @@ -28,14 +28,14 @@ export function chunkString(s: string, n: number = 2 << 14) { * @returns Array of string chunks, where each chunk consists of complete lines and has a size <= n. */ export function chunkLines(s: string, n: number = 2 << 14) { - if (!s?.length) return [] - if (s.length <= n) return [s] + if (!s?.length) return []; + if (s.length <= n) return [s]; - const r: string[] = [""] - const lines = s.split(/\r?\n/) - for (const line of lines) { - if (r[r.length - 1].length + line.length > n) r.push("") - r[r.length - 1] += line + "\n" - } - return r + const r: string[] = [""]; + const lines = s.split(/\r?\n/); + for (const line of lines) { + if (r[r.length - 1].length + line.length > n) r.push(""); + r[r.length - 1] += line + "\n"; + } + return r; } diff --git a/packages/core/src/ci.ts b/packages/core/src/ci.ts index b47d5ed8e7..a5ab681edf 100644 --- a/packages/core/src/ci.ts +++ b/packages/core/src/ci.ts @@ -1,5 +1,5 @@ -import _ci from "ci-info" +import _ci from "ci-info"; -export const ci = _ci +export const ci = _ci; -export const isCI = _ci.isCI +export const isCI = _ci.isCI; diff --git a/packages/core/src/cleaners.test.ts b/packages/core/src/cleaners.test.ts index 02e61ca4a8..2e6368cec2 100644 --- a/packages/core/src/cleaners.test.ts +++ b/packages/core/src/cleaners.test.ts @@ -1,127 +1,127 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; import { - deleteUndefinedValues, - deleteEmptyValues, - normalizeString, - normalizeFloat, - normalizeInt, - trimTrailingSlash, - normalizeVarKey, - unmarkdown, - collapseNewlines, - arrayify, - encodeIDs, - isEmptyString, -} from "./cleaners" + deleteUndefinedValues, + deleteEmptyValues, + normalizeString, + normalizeFloat, + normalizeInt, + trimTrailingSlash, + normalizeVarKey, + unmarkdown, + collapseNewlines, + arrayify, + encodeIDs, + isEmptyString, +} from "./cleaners"; describe("cleaners", () => { - test("deleteUndefinedValues", () => { - const input: any = { a: 1, b: undefined, c: "test" } - const expected = { a: 1, c: "test" } - assert.deepEqual(deleteUndefinedValues(input), expected) - }) - - test("deleteEmptyValues", () => { - const input: any = { a: 1, b: undefined, c: "", d: [], e: null } - const expected = { a: 1 } - assert.deepEqual(deleteEmptyValues(input), expected) - }) - - test("normalizeString", () => { - assert.equal(normalizeString("test"), "test") - assert.equal(normalizeString(123), "123") - assert.equal(normalizeString(true), "true") - assert.equal(normalizeString({ a: 1 }), '{"a":1}') - }) - - test("normalizeFloat", () => { - assert.equal(normalizeFloat("123.45"), 123.45) - assert.equal(normalizeFloat(123.45), 123.45) - assert.equal(normalizeFloat(true), 1) - assert.equal(normalizeFloat({}), 0) - assert.equal(normalizeFloat("invalid"), undefined) - }) - - test("normalizeInt", () => { - assert.equal(normalizeInt("123"), 123) - assert.equal(normalizeInt(123.45), 123.45) - assert.equal(normalizeInt(true), 1) - assert.equal(normalizeInt({}), 0) - assert.equal(normalizeInt("invalid"), undefined) - }) - - test("trimTrailingSlash", () => { - assert.equal(trimTrailingSlash("test/"), "test") - assert.equal(trimTrailingSlash("test///"), "test") - assert.equal(trimTrailingSlash("test"), "test") - }) - - test("normalizeVarKey", () => { - assert.equal(normalizeVarKey("Test-Key_123"), "testkey123") - assert.equal(normalizeVarKey("test.key"), "test.key") - }) - - test("unmarkdown", () => { - assert.equal(unmarkdown("[link](http://test.com)"), "link") - assert.equal(unmarkdown("bold"), "bold") - }) - - test("collapseNewlines", () => { - assert.equal(collapseNewlines("line1\n\n\n\nline2"), "line1\n\nline2") - assert.equal(collapseNewlines("line1\n\nline2"), "line1\n\nline2") - }) - test("arrayify", () => { - // Basic array case - assert.deepEqual(arrayify([1, 2, 3]), [1, 2, 3]) - - // Non-array conversion - assert.deepEqual(arrayify(42), [42]) - - // undefined case - assert.deepEqual(arrayify(undefined), []) - - // With filterEmpty option - assert.deepEqual( - arrayify([1, null, 2, undefined, 3, false, ""], { - filterEmpty: true, - }), - [1, 2, 3] - ) - assert.deepEqual(arrayify([1, 2, 3], { filterEmpty: false }), [1, 2, 3]) - }) - - test("isEmptyString", () => { - assert.equal(isEmptyString(""), true) - assert.equal(isEmptyString(null), true) - assert.equal(isEmptyString(undefined), true) - assert.equal(isEmptyString("text"), false) - assert.equal(isEmptyString(" "), false) - }) - - test("encodeIDs", () => { - const uuid = "123e4567-e89b-12d3-a456-426614174000" - const text = `User ${uuid} logged in.` - - const result = encodeIDs(text) - - // Test encoded text - assert.notEqual(result.encoded, text) - // Test decode function - assert.equal(result.decode(result.encoded), text) - - // Test with custom matcher - const customText = "User ABC-123-XYZ logged in." - const customResult = encodeIDs(customText, { - matcher: /ABC-\d+-XYZ/g, - }) - - assert.notEqual(customResult.encoded, customText) - assert.equal(customResult.decode(customResult.encoded), customText) - - // Test with multiple matches - const multiText = `${uuid} and ${uuid}` - const multiResult = encodeIDs(multiText) - assert.equal(multiResult.decode(multiResult.encoded), multiText) - }) -}) + test("deleteUndefinedValues", () => { + const input: any = { a: 1, b: undefined, c: "test" }; + const expected = { a: 1, c: "test" }; + assert.deepEqual(deleteUndefinedValues(input), expected); + }); + + test("deleteEmptyValues", () => { + const input: any = { a: 1, b: undefined, c: "", d: [], e: null }; + const expected = { a: 1 }; + assert.deepEqual(deleteEmptyValues(input), expected); + }); + + test("normalizeString", () => { + assert.equal(normalizeString("test"), "test"); + assert.equal(normalizeString(123), "123"); + assert.equal(normalizeString(true), "true"); + assert.equal(normalizeString({ a: 1 }), '{"a":1}'); + }); + + test("normalizeFloat", () => { + assert.equal(normalizeFloat("123.45"), 123.45); + assert.equal(normalizeFloat(123.45), 123.45); + assert.equal(normalizeFloat(true), 1); + assert.equal(normalizeFloat({}), 0); + assert.equal(normalizeFloat("invalid"), undefined); + }); + + test("normalizeInt", () => { + assert.equal(normalizeInt("123"), 123); + assert.equal(normalizeInt(123.45), 123.45); + assert.equal(normalizeInt(true), 1); + assert.equal(normalizeInt({}), 0); + assert.equal(normalizeInt("invalid"), undefined); + }); + + test("trimTrailingSlash", () => { + assert.equal(trimTrailingSlash("test/"), "test"); + assert.equal(trimTrailingSlash("test///"), "test"); + assert.equal(trimTrailingSlash("test"), "test"); + }); + + test("normalizeVarKey", () => { + assert.equal(normalizeVarKey("Test-Key_123"), "testkey123"); + assert.equal(normalizeVarKey("test.key"), "test.key"); + }); + + test("unmarkdown", () => { + assert.equal(unmarkdown("[link](http://test.com)"), "link"); + assert.equal(unmarkdown("bold"), "bold"); + }); + + test("collapseNewlines", () => { + assert.equal(collapseNewlines("line1\n\n\n\nline2"), "line1\n\nline2"); + assert.equal(collapseNewlines("line1\n\nline2"), "line1\n\nline2"); + }); + test("arrayify", () => { + // Basic array case + assert.deepEqual(arrayify([1, 2, 3]), [1, 2, 3]); + + // Non-array conversion + assert.deepEqual(arrayify(42), [42]); + + // undefined case + assert.deepEqual(arrayify(undefined), []); + + // With filterEmpty option + assert.deepEqual( + arrayify([1, null, 2, undefined, 3, false, ""], { + filterEmpty: true, + }), + [1, 2, 3], + ); + assert.deepEqual(arrayify([1, 2, 3], { filterEmpty: false }), [1, 2, 3]); + }); + + test("isEmptyString", () => { + assert.equal(isEmptyString(""), true); + assert.equal(isEmptyString(null), true); + assert.equal(isEmptyString(undefined), true); + assert.equal(isEmptyString("text"), false); + assert.equal(isEmptyString(" "), false); + }); + + test("encodeIDs", () => { + const uuid = "123e4567-e89b-12d3-a456-426614174000"; + const text = `User ${uuid} logged in.`; + + const result = encodeIDs(text); + + // Test encoded text + assert.notEqual(result.encoded, text); + // Test decode function + assert.equal(result.decode(result.encoded), text); + + // Test with custom matcher + const customText = "User ABC-123-XYZ logged in."; + const customResult = encodeIDs(customText, { + matcher: /ABC-\d+-XYZ/g, + }); + + assert.notEqual(customResult.encoded, customText); + assert.equal(customResult.decode(customResult.encoded), customText); + + // Test with multiple matches + const multiText = `${uuid} and ${uuid}`; + const multiResult = encodeIDs(multiText); + assert.equal(multiResult.decode(multiResult.encoded), multiText); + }); +}); diff --git a/packages/core/src/cleaners.ts b/packages/core/src/cleaners.ts index 15c90cbe62..c282aca7e6 100644 --- a/packages/core/src/cleaners.ts +++ b/packages/core/src/cleaners.ts @@ -6,20 +6,17 @@ * @param options.filterEmpty - If true, removes falsy values from the resulting array. * @returns An array derived from the input. If the input is undefined, returns an empty array. If the input is already an array, returns a shallow copy of it. */ -export function arrayify( - a: T | T[], - options?: { filterEmpty?: boolean } -): T[] { - const { filterEmpty } = options || {} +export function arrayify(a: T | T[], options?: { filterEmpty?: boolean }): T[] { + const { filterEmpty } = options || {}; - let r: T[] - if (a === undefined) r = [] - else if (Array.isArray(a)) r = a.slice(0) - else r = [a] + let r: T[]; + if (a === undefined) r = []; + else if (Array.isArray(a)) r = a.slice(0); + else r = [a]; - if (filterEmpty) return r.filter((f) => !!f) + if (filterEmpty) return r.filter((f) => !!f); - return r + return r; } /** @@ -32,14 +29,13 @@ export function arrayify( * @returns The object with `undefined` values removed. */ export function deleteUndefinedValues>(o: T): T { - if (typeof o === "object" && Object.isFrozen(o)) { - const res: any = {} - for (const k in o) if (o[k] !== undefined) res[k] = o[k] - return res as T - } - if (typeof o === "object") - for (const k in o) if (o[k] === undefined) delete o[k] - return o + if (typeof o === "object" && Object.isFrozen(o)) { + const res: any = {}; + for (const k in o) if (o[k] !== undefined) res[k] = o[k]; + return res as T; + } + if (typeof o === "object") for (const k in o) if (o[k] === undefined) delete o[k]; + return o; } /** @@ -49,18 +45,12 @@ export function deleteUndefinedValues>(o: T): T { * @returns The object with empty values removed. */ export function deleteEmptyValues>(o: T): T { - if (typeof o === "object") - for (const k in o) { - const v = o[k] - if ( - v === undefined || - v === null || - v === "" || - (Array.isArray(v) && !v.length) - ) - delete o[k] - } - return o + if (typeof o === "object") + for (const k in o) { + const v = o[k]; + if (v === undefined || v === null || v === "" || (Array.isArray(v) && !v.length)) delete o[k]; + } + return o; } /** @@ -75,11 +65,11 @@ export function deleteEmptyValues>(o: T): T { * @returns The normalized string representation of the input value, or undefined if the input value type is unsupported. */ export function normalizeString(s: string | number | boolean | object): string { - if (typeof s === "string") return s - else if (typeof s === "number") return s.toLocaleString() - else if (typeof s === "boolean") return s ? "true" : "false" - else if (typeof s === "object") return JSON.stringify(s) - else return undefined + if (typeof s === "string") return s; + else if (typeof s === "number") return s.toLocaleString(); + else if (typeof s === "boolean") return s ? "true" : "false"; + else if (typeof s === "object") return JSON.stringify(s); + else return undefined; } /** @@ -94,13 +84,13 @@ export function normalizeString(s: string | number | boolean | object): string { * @returns The floating-point representation of the input or undefined if conversion is not possible. */ export function normalizeFloat(s: string | number | boolean | object): number { - if (typeof s === "string") { - const f = parseFloat(s) - return isNaN(f) ? undefined : f - } else if (typeof s === "number") return s - else if (typeof s === "boolean") return s ? 1 : 0 - else if (typeof s === "object") return 0 - else return undefined + if (typeof s === "string") { + const f = parseFloat(s); + return isNaN(f) ? undefined : f; + } else if (typeof s === "number") return s; + else if (typeof s === "boolean") return s ? 1 : 0; + else if (typeof s === "object") return 0; + else return undefined; } /** @@ -115,14 +105,14 @@ export function normalizeFloat(s: string | number | boolean | object): number { * @returns The converted integer or undefined if conversion is not possible. */ export function normalizeInt(s: string | number | boolean | object): number { - if (s === undefined) return undefined - if (typeof s === "string") { - const f = parseInt(s) - return isNaN(f) ? undefined : f - } else if (typeof s === "number") return s - else if (typeof s === "boolean") return s ? 1 : 0 - else if (typeof s === "object") return 0 - else return undefined + if (s === undefined) return undefined; + if (typeof s === "string") { + const f = parseInt(s); + return isNaN(f) ? undefined : f; + } else if (typeof s === "number") return s; + else if (typeof s === "boolean") return s ? 1 : 0; + else if (typeof s === "object") return 0; + else return undefined; } /** @@ -135,11 +125,11 @@ export function normalizeInt(s: string | number | boolean | object): number { * or `undefined` if the input does not match either. */ export function normalizeBoolean(s: string) { - return /^\s*(y|yes|true|ok)\s*$/i.test(s) - ? true - : /^\s*(n|no|false|ok)\s*$/i.test(s) - ? false - : undefined + return /^\s*(y|yes|true|ok)\s*$/i.test(s) + ? true + : /^\s*(n|no|false|ok)\s*$/i.test(s) + ? false + : undefined; } /** @@ -149,12 +139,12 @@ export function normalizeBoolean(s: string) { * @returns The input string with trailing slashes removed, or the original string if no trailing slashes are present. */ export function trimTrailingSlash(s: string) { - return s?.replace(/\/{1,10}$/, "") + return s?.replace(/\/{1,10}$/, ""); } export function ensureHeadSlash(s: string) { - if (s?.startsWith("/")) return s - return "/" + s + if (s?.startsWith("/")) return s; + return "/" + s; } /** @@ -165,7 +155,7 @@ export function ensureHeadSlash(s: string) { * @returns The normalized variable name as a string. */ export function normalizeVarKey(key: string) { - return key.toLowerCase().replace(/[^a-z0-9\.]/g, "") + return key.toLowerCase().replace(/[^a-z0-9\.]/g, ""); } /** @@ -177,9 +167,7 @@ export function normalizeVarKey(key: string) { * and HTML tags removed. */ export function unmarkdown(text: string) { - return text - ?.replace(/\[([^\]]+)\]\([^)]+\)/g, (m, n) => n) - ?.replace(/<\/?([^>]+)>/g, "") + return text?.replace(/\[([^\]]+)\]\([^)]+\)/g, (m, n) => n)?.replace(/<\/?([^>]+)>/g, ""); } /** @@ -189,7 +177,7 @@ export function unmarkdown(text: string) { * @returns The processed string with collapsed newlines. */ export function collapseNewlines(res: string): string { - return res?.replace(/(\r?\n){3,}/g, "\n\n") + return res?.replace(/(\r?\n){3,}/g, "\n\n"); } /** @@ -199,11 +187,11 @@ export function collapseNewlines(res: string): string { * @returns True if the string is null, undefined, or an empty string; otherwise, false. */ export function isEmptyString(s: string) { - return s === null || s === undefined || s === "" + return s === null || s === undefined || s === ""; } function escapeRegExp(str: string): string { - return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") // $& means the whole matched string + return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string } /** @@ -213,36 +201,32 @@ function escapeRegExp(str: string): string { * @returns An object containing the encoded text, original text, a decode function to revert encoded IDs, the matcher regex, and a mapping of encoded IDs to original values. The decode function replaces encoded IDs with their original values. */ export function encodeIDs( - text: string, - options?: EncodeIDsOptions + text: string, + options?: EncodeIDsOptions, ): { - encoded: string - text: string - decode: (text: string) => string - matcher: RegExp - ids: Record + encoded: string; + text: string; + decode: (text: string) => string; + matcher: RegExp; + ids: Record; } { - const { - matcher = /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi, - prefix = "id", - open = "{|", - close = "|}", - } = options || {} - - const ids: Record = {} - let idCounter = 0 - const encoded = text?.replace(matcher, (match, id) => { - const encoded = `${open}${prefix}${idCounter++}${close}` - ids[encoded] = match - return encoded - }) - - const drx = new RegExp( - `${escapeRegExp(open)}${prefix}(\\d+)${escapeRegExp(close)}`, - "g" - ) - const decode = (text: string) => - text?.replace(drx, (encoded) => ids[encoded]) - - return { text, encoded, decode, matcher, ids } + const { + matcher = /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi, + prefix = "id", + open = "{|", + close = "|}", + } = options || {}; + + const ids: Record = {}; + let idCounter = 0; + const encoded = text?.replace(matcher, (match, id) => { + const encoded = `${open}${prefix}${idCounter++}${close}`; + ids[encoded] = match; + return encoded; + }); + + const drx = new RegExp(`${escapeRegExp(open)}${prefix}(\\d+)${escapeRegExp(close)}`, "g"); + const decode = (text: string) => text?.replace(drx, (encoded) => ids[encoded]); + + return { text, encoded, decode, matcher, ids }; } diff --git a/packages/core/src/clihelp.ts b/packages/core/src/clihelp.ts index 0fcf038f4a..0c6e09e209 100644 --- a/packages/core/src/clihelp.ts +++ b/packages/core/src/clihelp.ts @@ -1,10 +1,10 @@ -import { NPM_CLI_PACKAGE } from "./constants" -import { GenerationOptions } from "./generation" -import { MarkdownTrace } from "./trace" -import { arrayify, relativePath } from "./util" -import { CORE_VERSION } from "./version" -import { host } from "./host" -import { isCI } from "./ci" +import { NPM_CLI_PACKAGE } from "./constants"; +import { GenerationOptions } from "./generation"; +import { MarkdownTrace } from "./trace"; +import { arrayify, relativePath } from "./util"; +import { CORE_VERSION } from "./version"; +import { host } from "./host"; +import { isCI } from "./ci"; /** * Generates command-line arguments for executing or batching a CLI prompt template. @@ -28,38 +28,30 @@ import { isCI } from "./ci" * - CLI utilizes the latest compatible version of the CLI package defined in constants. */ export function generateCliArguments( - template: PromptScript, - options: GenerationOptions, - command: "run" | "batch" + template: PromptScript, + options: GenerationOptions, + command: "run" | "batch", ) { - const { - model, - temperature, - reasoningEffort, - fallbackTools, - topP, - seed, - cliInfo, - } = options - const { files = [] } = cliInfo || {} + const { model, temperature, reasoningEffort, fallbackTools, topP, seed, cliInfo } = options; + const { files = [] } = cliInfo || {}; - const cli = [ - "npx", - "--yes", - `${NPM_CLI_PACKAGE}@^${CORE_VERSION}`, - command, - template.id, - ...files.map((f) => `"${relativePath(host.projectFolder(), f)}"`), - "--apply-edits", - ] - if (model) cli.push(`--model`, model) - if (!isNaN(temperature)) cli.push(`--temperature`, temperature + "") - if (!isNaN(topP)) cli.push(`--top-p`, topP + "") - if (!isNaN(seed)) cli.push("--seed", seed + "") - if (reasoningEffort) cli.push("--reasoning-effort", reasoningEffort) - if (fallbackTools) cli.push("--fallback-tools") + const cli = [ + "npx", + "--yes", + `${NPM_CLI_PACKAGE}@^${CORE_VERSION}`, + command, + template.id, + ...files.map((f) => `"${relativePath(host.projectFolder(), f)}"`), + "--apply-edits", + ]; + if (model) cli.push(`--model`, model); + if (!isNaN(temperature)) cli.push(`--temperature`, temperature + ""); + if (!isNaN(topP)) cli.push(`--top-p`, topP + ""); + if (!isNaN(seed)) cli.push("--seed", seed + ""); + if (reasoningEffort) cli.push("--reasoning-effort", reasoningEffort); + if (fallbackTools) cli.push("--fallback-tools"); - return cli.join(" ") + return cli.join(" "); } /** @@ -75,15 +67,15 @@ export function generateCliArguments( * - If applicable, the CLI command for testing the template if associated tests are defined. */ export function traceCliArgs( - trace: MarkdownTrace, - template: PromptScript, - options: GenerationOptions + trace: MarkdownTrace, + template: PromptScript, + options: GenerationOptions, ) { - if (isCI) return + if (isCI) return; - trace.details( - "🤖 automation", - `Use the command line interface [run](https://microsoft.github.io/genaiscript/reference/cli/run/) to automate this task: + trace.details( + "🤖 automation", + `Use the command line interface [run](https://microsoft.github.io/genaiscript/reference/cli/run/) to automate this task: \`\`\`bash ${generateCliArguments(template, options, "run")} @@ -92,19 +84,19 @@ ${generateCliArguments(template, options, "run")} - You will need to install [Node.js LTS](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm). - The cli uses the same secrets in the \`.env\` file. -` - ) +`, + ); - if (arrayify(template.tests)?.length) { - trace.details( - "🧪 testing", - ` + if (arrayify(template.tests)?.length) { + trace.details( + "🧪 testing", + ` Use the command line interface [test](https://microsoft.github.io/genaiscript/reference/cli/test) to run the tests for this script: \`\`\`sh npx --yes genaiscript test ${template.id} \`\`\` -` - ) - } +`, + ); + } } diff --git a/packages/core/src/clone.test.ts b/packages/core/src/clone.test.ts index 9ce7a17ded..ece7daefdc 100644 --- a/packages/core/src/clone.test.ts +++ b/packages/core/src/clone.test.ts @@ -1,21 +1,21 @@ -import { cleanedClone } from "./clone" -import { describe, test } from "node:test" -import assert from "node:assert/strict" +import { cleanedClone } from "./clone"; +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; describe("cleanedClone", () => { - test("clones and cleans simple object", () => { - const input: any = { - a: 1, - b: "", - c: null, - d: undefined, - e: 0, - } - const expected = { - a: 1, - e: 0, - } - const result = cleanedClone(input) - assert.deepStrictEqual(result, expected) - }) -}) + test("clones and cleans simple object", () => { + const input: any = { + a: 1, + b: "", + c: null, + d: undefined, + e: 0, + }; + const expected = { + a: 1, + e: 0, + }; + const result = cleanedClone(input); + assert.deepStrictEqual(result, expected); + }); +}); diff --git a/packages/core/src/clone.ts b/packages/core/src/clone.ts index c918b4f4a1..6388529103 100644 --- a/packages/core/src/clone.ts +++ b/packages/core/src/clone.ts @@ -1,4 +1,4 @@ -import { deleteEmptyValues } from "./cleaners" +import { deleteEmptyValues } from "./cleaners"; /** * Creates a deep clone of the input object and removes any properties with empty values. @@ -7,7 +7,7 @@ import { deleteEmptyValues } from "./cleaners" * @returns A cleaned, deep-cloned version of the input object with empty values removed. */ export function cleanedClone(o: any) { - const c = structuredClone(o) - deleteEmptyValues(c) - return c + const c = structuredClone(o); + deleteEmptyValues(c); + return c; } diff --git a/packages/core/src/concurrency.ts b/packages/core/src/concurrency.ts index c7e50a704d..d33347bede 100644 --- a/packages/core/src/concurrency.ts +++ b/packages/core/src/concurrency.ts @@ -1,9 +1,9 @@ -import pLimit, { LimitFunction } from "p-limit" -import { runtimeHost } from "./host" -import { normalizeInt } from "./cleaners" -import { PROMISE_QUEUE_CONCURRENCY_DEFAULT } from "./constants" +import pLimit, { LimitFunction } from "p-limit"; +import { runtimeHost } from "./host"; +import { normalizeInt } from "./cleaners"; +import { PROMISE_QUEUE_CONCURRENCY_DEFAULT } from "./constants"; -export type ConcurrentLimitFunction = LimitFunction +export type ConcurrentLimitFunction = LimitFunction; /** * Creates or retrieves a concurrency-limited function for managing asynchronous operations. @@ -13,53 +13,44 @@ export type ConcurrentLimitFunction = LimitFunction * Will be normalized to a minimum value of 1. * @returns A concurrency-limited function. */ -export function concurrentLimit( - id: string, - concurrency: number -): ConcurrentLimitFunction { - concurrency = Math.max(1, normalizeInt(concurrency)) - let limit = runtimeHost.userState["limit:" + id] - if (!limit) { - limit = pLimit(concurrency) - runtimeHost.userState["limit:" + id] = limit - } else if (limit.concurrency > 0) limit.concurrency = concurrency - return limit +export function concurrentLimit(id: string, concurrency: number): ConcurrentLimitFunction { + concurrency = Math.max(1, normalizeInt(concurrency)); + let limit = runtimeHost.userState["limit:" + id]; + if (!limit) { + limit = pLimit(concurrency); + runtimeHost.userState["limit:" + id] = limit; + } else if (limit.concurrency > 0) limit.concurrency = concurrency; + return limit; } export class PLimitPromiseQueue implements PromiseQueue { - private queue: LimitFunction - constructor(concurrency?: number) { - const c = isNaN(concurrency) - ? PROMISE_QUEUE_CONCURRENCY_DEFAULT - : concurrency - this.queue = pLimit(Math.max(1, c)) - } + private queue: LimitFunction; + constructor(concurrency?: number) { + const c = isNaN(concurrency) ? PROMISE_QUEUE_CONCURRENCY_DEFAULT : concurrency; + this.queue = pLimit(Math.max(1, c)); + } - async mapAll( - values: T[], - fn: (value: T, ...arguments_: Arguments) => Awaitable, - ...arguments_: Arguments - ): Promise { - return await Promise.all( - values.map((value) => this.queue(fn, value, ...arguments_)) - ) - } + async mapAll( + values: T[], + fn: (value: T, ...arguments_: Arguments) => Awaitable, + ...arguments_: Arguments + ): Promise { + return await Promise.all(values.map((value) => this.queue(fn, value, ...arguments_))); + } - async all(fns: (() => Awaitable)[]): Promise { - return await Promise.all(fns.map((fn) => this.queue(fn))) - } + async all(fns: (() => Awaitable)[]): Promise { + return await Promise.all(fns.map((fn) => this.queue(fn))); + } - add( - function_: ( - ...arguments_: Arguments - ) => PromiseLike | ReturnType, - ...arguments_: Arguments - ): Promise { - const res = this.queue(function_, ...arguments_) - return res - } + add( + function_: (...arguments_: Arguments) => PromiseLike | ReturnType, + ...arguments_: Arguments + ): Promise { + const res = this.queue(function_, ...arguments_); + return res; + } - clear() { - this.queue.clearQueue() - } + clear() { + this.queue.clearQueue(); + } } diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts index 38adf232c1..0ea8d77b8b 100644 --- a/packages/core/src/config.ts +++ b/packages/core/src/config.ts @@ -1,147 +1,120 @@ -import dotenv from "dotenv" -import { homedir } from "os" -import { YAMLTryParse } from "./yaml" -import { JSON5TryParse } from "./json5" +import dotenv from "dotenv"; +import { homedir } from "os"; +import { YAMLTryParse } from "./yaml"; +import { JSON5TryParse } from "./json5"; import { - DOT_ENV_FILENAME, - DOT_ENV_GENAISCRIPT_FILENAME, - MODEL_PROVIDERS, - TOOL_ID, -} from "./constants" -import { join, resolve } from "path" -import { validateJSONWithSchema } from "./schema" -import { HostConfiguration } from "./hostconfiguration" -import { structuralMerge } from "./merge" -import { - LanguageModelConfiguration, - ResolvedLanguageModelConfiguration, -} from "./server/messages" -import { resolveLanguageModel } from "./lm" -import { arrayify, deleteEmptyValues } from "./cleaners" -import { errorMessage } from "./error" -import schema from "../../../docs/public/schemas/config.json" -import defaultConfig from "./config.json" -import { CancellationOptions } from "./cancellation" -import { host } from "./host" -import { uniq } from "es-toolkit" -import { expandHomeDir, tryReadText, tryStat } from "./fs" -import { parseDefaultsFromEnv } from "./env" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("config") + DOT_ENV_FILENAME, + DOT_ENV_GENAISCRIPT_FILENAME, + MODEL_PROVIDERS, + TOOL_ID, +} from "./constants"; +import { join, resolve } from "path"; +import { validateJSONWithSchema } from "./schema"; +import { HostConfiguration } from "./hostconfiguration"; +import { structuralMerge } from "./merge"; +import { LanguageModelConfiguration, ResolvedLanguageModelConfiguration } from "./server/messages"; +import { resolveLanguageModel } from "./lm"; +import { arrayify, deleteEmptyValues } from "./cleaners"; +import { errorMessage } from "./error"; +import schema from "../../../docs/public/schemas/config.json"; +import defaultConfig from "./config.json"; +import { CancellationOptions } from "./cancellation"; +import { host } from "./host"; +import { uniq } from "es-toolkit"; +import { expandHomeDir, tryReadText, tryStat } from "./fs"; +import { parseDefaultsFromEnv } from "./env"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("config"); export function mergeHostConfigs( - config: HostConfiguration, - parsed: HostConfiguration + config: HostConfiguration, + parsed: HostConfiguration, ): HostConfiguration { - if (!config && !parsed) return undefined - if (!parsed) return config - return deleteEmptyValues({ - include: structuralMerge(config?.include || [], parsed?.include || []), - envFile: [...arrayify(parsed?.envFile), ...arrayify(config?.envFile)], - ignoreCurrentWorkspace: - config?.ignoreCurrentWorkspace || parsed?.ignoreCurrentWorkspace, - modelAliases: structuralMerge( - config?.modelAliases || {}, - parsed?.modelAliases || {} - ), - modelEncodings: structuralMerge( - config?.modelEncodings || {}, - parsed?.modelEncodings || {} - ), - secretScanners: structuralMerge( - config?.secretPatterns || {}, - parsed?.secretPatterns || {} - ), - }) + if (!config && !parsed) return undefined; + if (!parsed) return config; + return deleteEmptyValues({ + include: structuralMerge(config?.include || [], parsed?.include || []), + envFile: [...arrayify(parsed?.envFile), ...arrayify(config?.envFile)], + ignoreCurrentWorkspace: config?.ignoreCurrentWorkspace || parsed?.ignoreCurrentWorkspace, + modelAliases: structuralMerge(config?.modelAliases || {}, parsed?.modelAliases || {}), + modelEncodings: structuralMerge(config?.modelEncodings || {}, parsed?.modelEncodings || {}), + secretScanners: structuralMerge(config?.secretPatterns || {}, parsed?.secretPatterns || {}), + }); } async function resolveGlobalConfiguration( - dotEnvPaths: string[], - hostConfig: HostConfiguration + dotEnvPaths: string[], + hostConfig: HostConfiguration, ): Promise { - const dirs = [homedir(), "."] - const exts = ["yml", "yaml", "json"] + const dirs = [homedir(), "."]; + const exts = ["yml", "yaml", "json"]; - dbg("starting to resolve global configuration") - // import and merge global local files - let config: HostConfiguration = structuredClone(defaultConfig) - delete (config as any)["$schema"] - dbg(`loaded defaultConfig: %O`, config) + dbg("starting to resolve global configuration"); + // import and merge global local files + let config: HostConfiguration = structuredClone(defaultConfig); + delete (config as any)["$schema"]; + dbg(`loaded defaultConfig: %O`, config); - // merge host configuration - if (hostConfig && Object.keys(hostConfig).length > 0) { - dbg(`merging host configuration %O`, hostConfig) - config = mergeHostConfigs(config, hostConfig) - } + // merge host configuration + if (hostConfig && Object.keys(hostConfig).length > 0) { + dbg(`merging host configuration %O`, hostConfig); + config = mergeHostConfigs(config, hostConfig); + } - for (const dir of dirs) { - for (const ext of exts) { - const filename = resolve(dir, `${TOOL_ID}.config.${ext}`) - dbg(`checking file: ${filename}`) - const stat = await tryStat(filename) - if (!stat) continue - if (!stat.isFile()) { - dbg(`skipping ${filename}, not a file`) - throw new Error(`config: ${filename} is a not a file`) - } - const fileContent = await tryReadText(filename) - if (!fileContent) { - dbg(`skipping ${filename}, no content`) - continue - } - dbg(`loading ${filename}`) - const parsed: HostConfiguration = - ext === "yml" || ext === "yaml" - ? YAMLTryParse(fileContent) - : JSON5TryParse(fileContent) - if (!parsed) { - dbg(`failed to parse ${filename}`) - throw new Error(`config: failed to parse ${filename}`) - } - dbg("validating config schema") - const validation = validateJSONWithSchema( - parsed, - schema as JSONSchema - ) - if (validation.schemaError) { - dbg( - `validation error for ${filename}: ${validation.schemaError}` - ) - throw new Error(`config: ` + validation.schemaError) - } - dbg(`merging parsed configuration %O`, parsed) - config = mergeHostConfigs(config, parsed) - } + for (const dir of dirs) { + for (const ext of exts) { + const filename = resolve(dir, `${TOOL_ID}.config.${ext}`); + dbg(`checking file: ${filename}`); + const stat = await tryStat(filename); + if (!stat) continue; + if (!stat.isFile()) { + dbg(`skipping ${filename}, not a file`); + throw new Error(`config: ${filename} is a not a file`); + } + const fileContent = await tryReadText(filename); + if (!fileContent) { + dbg(`skipping ${filename}, no content`); + continue; + } + dbg(`loading ${filename}`); + const parsed: HostConfiguration = + ext === "yml" || ext === "yaml" ? YAMLTryParse(fileContent) : JSON5TryParse(fileContent); + if (!parsed) { + dbg(`failed to parse ${filename}`); + throw new Error(`config: failed to parse ${filename}`); + } + dbg("validating config schema"); + const validation = validateJSONWithSchema(parsed, schema as JSONSchema); + if (validation.schemaError) { + dbg(`validation error for ${filename}: ${validation.schemaError}`); + throw new Error(`config: ` + validation.schemaError); + } + dbg(`merging parsed configuration %O`, parsed); + config = mergeHostConfigs(config, parsed); } + } - if (process.env.GENAISCRIPT_ENV_FILE) { - dbg( - `adding env file from environment variable: '${process.env.GENAISCRIPT_ENV_FILE}'` - ) - config.envFile = [ - ...(config.envFile || []), - process.env.GENAISCRIPT_ENV_FILE, - ] - } - if (dotEnvPaths?.length) { - dbg(`adding env files from CLI: '${dotEnvPaths.join(", ")}'`) - config.envFile = [...(config.envFile || []), ...dotEnvPaths] - } + if (process.env.GENAISCRIPT_ENV_FILE) { + dbg(`adding env file from environment variable: '${process.env.GENAISCRIPT_ENV_FILE}'`); + config.envFile = [...(config.envFile || []), process.env.GENAISCRIPT_ENV_FILE]; + } + if (dotEnvPaths?.length) { + dbg(`adding env files from CLI: '${dotEnvPaths.join(", ")}'`); + config.envFile = [...(config.envFile || []), ...dotEnvPaths]; + } - if (!config.envFile?.length) { - dbg("no env files found, using defaults") - config.envFile = [ - join(homedir(), DOT_ENV_GENAISCRIPT_FILENAME), - DOT_ENV_GENAISCRIPT_FILENAME, - DOT_ENV_FILENAME, - ] - } - dbg("resolving env file paths") - config.envFile = uniq( - arrayify(config.envFile).map((f) => expandHomeDir(resolve(f))) - ) - dbg(`resolved env files: ${config.envFile.join(", ")}`) - return config + if (!config.envFile?.length) { + dbg("no env files found, using defaults"); + config.envFile = [ + join(homedir(), DOT_ENV_GENAISCRIPT_FILENAME), + DOT_ENV_GENAISCRIPT_FILENAME, + DOT_ENV_FILENAME, + ]; + } + dbg("resolving env file paths"); + config.envFile = uniq(arrayify(config.envFile).map((f) => expandHomeDir(resolve(f)))); + dbg(`resolved env files: ${config.envFile.join(", ")}`); + return config; } /** @@ -162,34 +135,34 @@ async function resolveGlobalConfiguration( * @throws An error if any provided `.env` file is invalid, unreadable, or not a file. */ export async function readHostConfig( - dotEnvPaths: string[], - hostConfig: HostConfiguration + dotEnvPaths: string[], + hostConfig: HostConfiguration, ): Promise { - dbg(`reading configuration`) - const config = await resolveGlobalConfiguration(dotEnvPaths, hostConfig) - const { envFile } = config - for (const dotEnv of arrayify(envFile)) { - dbg(`.env: ${dotEnv}`) - const stat = await tryStat(dotEnv) - if (!stat) { - dbg(`ignored ${dotEnv}, not found`) - } else { - if (!stat.isFile()) { - throw new Error(`.env: ${dotEnv} is not a file`) - } - dbg(`loading ${dotEnv}`) - const res = dotenv.config({ - path: dotEnv, - debug: /dotenv/.test(process.env.DEBUG), - override: true, - }) - if (res.error) { - throw res.error - } - } + dbg(`reading configuration`); + const config = await resolveGlobalConfiguration(dotEnvPaths, hostConfig); + const { envFile } = config; + for (const dotEnv of arrayify(envFile)) { + dbg(`.env: ${dotEnv}`); + const stat = await tryStat(dotEnv); + if (!stat) { + dbg(`ignored ${dotEnv}, not found`); + } else { + if (!stat.isFile()) { + throw new Error(`.env: ${dotEnv} is not a file`); + } + dbg(`loading ${dotEnv}`); + const res = dotenv.config({ + path: dotEnv, + debug: /dotenv/.test(process.env.DEBUG), + override: true, + }); + if (res.error) { + throw res.error; + } } - await parseDefaultsFromEnv(process.env) - return config + } + await parseDefaultsFromEnv(process.env); + return config; } /** @@ -205,83 +178,68 @@ export async function readHostConfig( * @throws An error if there is an issue retrieving or processing configurations for a provider. */ export async function resolveLanguageModelConfigurations( - provider: string, - options?: { - token?: boolean - error?: boolean - models?: boolean - hide?: boolean - } & CancellationOptions + provider: string, + options?: { + token?: boolean; + error?: boolean; + models?: boolean; + hide?: boolean; + } & CancellationOptions, ): Promise { - const { token, error, models, hide } = options || {} - const res: ResolvedLanguageModelConfiguration[] = [] - dbg("starting to resolve language model configurations") + const { token, error, models, hide } = options || {}; + const res: ResolvedLanguageModelConfiguration[] = []; + dbg("starting to resolve language model configurations"); - for (const modelProvider of MODEL_PROVIDERS.filter( - (mp) => (!provider || mp.id === provider) && (!hide || !mp.hidden) - )) { - dbg(`processing model provider: ${modelProvider.id}, token: ${token}`) - try { - const conn: LanguageModelConfiguration & { - models?: LanguageModelInfo[] - } = await host.getLanguageModelConfiguration( - modelProvider.id + ":*", - options - ) - if (conn) { - dbg( - `retrieved connection configuration for provider: ${modelProvider.id}` - ) - let listError = "" - if (models && token) { - dbg(`listing models for provider: ${modelProvider.id}`) - const lm = await resolveLanguageModel(modelProvider.id) - if (lm.listModels) { - const models = await lm.listModels(conn, options) - if (models.ok) { - dbg( - `successfully listed models for provider: ${modelProvider.id}` - ) - conn.models = models.models - } else { - listError = - errorMessage(models.error) || - "failed to list models" - dbg( - `error listing models for provider ${modelProvider.id}: ${listError}` - ) - } - } - } - if (!token && conn.token) conn.token = "***" - if (!listError || error || provider) { - dbg( - `adding resolved configuration for provider: ${modelProvider.id}` - ) - res.push( - deleteEmptyValues({ - provider: conn.provider, - source: conn.source, - base: conn.base, - type: conn.type, - models: conn.models, - error: listError, - token: conn.token, - }) - ) - } + for (const modelProvider of MODEL_PROVIDERS.filter( + (mp) => (!provider || mp.id === provider) && (!hide || !mp.hidden), + )) { + dbg(`processing model provider: ${modelProvider.id}, token: ${token}`); + try { + const conn: LanguageModelConfiguration & { + models?: LanguageModelInfo[]; + } = await host.getLanguageModelConfiguration(modelProvider.id + ":*", options); + if (conn) { + dbg(`retrieved connection configuration for provider: ${modelProvider.id}`); + let listError = ""; + if (models && token) { + dbg(`listing models for provider: ${modelProvider.id}`); + const lm = await resolveLanguageModel(modelProvider.id); + if (lm.listModels) { + const models = await lm.listModels(conn, options); + if (models.ok) { + dbg(`successfully listed models for provider: ${modelProvider.id}`); + conn.models = models.models; + } else { + listError = errorMessage(models.error) || "failed to list models"; + dbg(`error listing models for provider ${modelProvider.id}: ${listError}`); } - } catch (e) { - dbg( - `error resolving configuration for provider ${modelProvider.id}: ${e}` - ) - if (error || provider) - res.push({ - provider: modelProvider.id, - error: errorMessage(e), - }) + } + } + if (!token && conn.token) conn.token = "***"; + if (!listError || error || provider) { + dbg(`adding resolved configuration for provider: ${modelProvider.id}`); + res.push( + deleteEmptyValues({ + provider: conn.provider, + source: conn.source, + base: conn.base, + type: conn.type, + models: conn.models, + error: listError, + token: conn.token, + }), + ); } + } + } catch (e) { + dbg(`error resolving configuration for provider ${modelProvider.id}: ${e}`); + if (error || provider) + res.push({ + provider: modelProvider.id, + error: errorMessage(e), + }); } - dbg("returning sorted resolved configurations") - return res.sort((l, r) => l.provider.localeCompare(r.provider)) + } + dbg("returning sorted resolved configurations"); + return res.sort((l, r) => l.provider.localeCompare(r.provider)); } diff --git a/packages/core/src/consolecolor.ts b/packages/core/src/consolecolor.ts index 9c5da2a869..06c769bce5 100644 --- a/packages/core/src/consolecolor.ts +++ b/packages/core/src/consolecolor.ts @@ -1,8 +1,8 @@ -import { stdout } from "./stdio" +import { stdout } from "./stdio"; // Boolean indicating if console supports colors // Determines if the console supports color output based on terminal capability -export let consoleColors = !!stdout.isTTY +export let consoleColors = !!stdout.isTTY; /** * Enables or disables console color output. @@ -10,7 +10,7 @@ export let consoleColors = !!stdout.isTTY * @param enabled - Whether to enable or disable color output. */ export function setConsoleColors(enabled: boolean) { - consoleColors = !!enabled + consoleColors = !!enabled; } /** @@ -26,8 +26,8 @@ export function setConsoleColors(enabled: boolean) { * @param message - The message to wrap. Returns the original message if colors are disabled. */ export function wrapColor(n: number | string, message: string) { - if (consoleColors) return `\x1B[${n}m${message}\x1B[0m` - else return message + if (consoleColors) return `\x1B[${n}m${message}\x1B[0m`; + else return message; } //for (let i = 0; i < 255; ++i) @@ -40,16 +40,12 @@ export function wrapColor(n: number | string, message: string) { * @param background - Optional. If true, applies the color to the background. */ -export function wrapRgbColor( - rgb: number, - text: string, - background?: boolean -): string { - if (!consoleColors) return text - const r = (rgb >> 16) & 0xff - const g = (rgb >> 8) & 0xff - const b = rgb & 0xff - const rgbColorCode = `\x1b[${background ? "48" : "38"};2;${r};${g};${b}m` - const resetCode = `\x1b[0m` - return `${rgbColorCode}${text}${resetCode}` +export function wrapRgbColor(rgb: number, text: string, background?: boolean): string { + if (!consoleColors) return text; + const r = (rgb >> 16) & 0xff; + const g = (rgb >> 8) & 0xff; + const b = rgb & 0xff; + const rgbColorCode = `\x1b[${background ? "48" : "38"};2;${r};${g};${b}m`; + const resetCode = `\x1b[0m`; + return `${rgbColorCode}${text}${resetCode}`; } diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index ff174b9710..67806bffb3 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -1,464 +1,452 @@ -import CONFIGURATION_DATA from "./llms.json" -export const CHANGE = "change" -export const RESOURCE_CHANGE = "resourceChange" -export const TRACE_CHUNK = "traceChunk" -export const TRACE_DETAILS = "traceDetails" -export const RECONNECT = "reconnect" -export const OPEN = "open" -export const CLOSE = "close" -export const READY = "ready" -export const MESSAGE = "message" -export const ERROR = "error" -export const CONNECT = "connect" -export const LOG = "log" -export const QUEUE_SCRIPT_START = "queueScriptStart" -export const MAX_TOOL_CALLS = 10000 -export const MAX_TOOL_DESCRIPTION_LENGTH = 1000 +import CONFIGURATION_DATA from "./llms.json"; +export const CHANGE = "change"; +export const RESOURCE_CHANGE = "resourceChange"; +export const TRACE_CHUNK = "traceChunk"; +export const TRACE_DETAILS = "traceDetails"; +export const RECONNECT = "reconnect"; +export const OPEN = "open"; +export const CLOSE = "close"; +export const READY = "ready"; +export const MESSAGE = "message"; +export const ERROR = "error"; +export const CONNECT = "connect"; +export const LOG = "log"; +export const QUEUE_SCRIPT_START = "queueScriptStart"; +export const MAX_TOOL_CALLS = 10000; +export const MAX_TOOL_DESCRIPTION_LENGTH = 1000; // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference // https://github.com/Azure/azure-rest-api-specs/blob/main/specification/cognitiveservices/data-plane/AzureOpenAI/inference/stable/2024-02-01/inference.yaml // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation -export const AZURE_OPENAI_API_VERSION = "2025-01-01-preview" -export const AZURE_MANAGEMENT_API_VERSION = "2024-10-01" +export const AZURE_OPENAI_API_VERSION = "2025-01-01-preview"; +export const AZURE_MANAGEMENT_API_VERSION = "2024-10-01"; export const AZURE_COGNITIVE_SERVICES_TOKEN_SCOPES = Object.freeze([ - "https://cognitiveservices.azure.com/.default", -]) + "https://cognitiveservices.azure.com/.default", +]); // https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation -export const AZURE_AI_INFERENCE_VERSION = "2025-03-01-preview" -export const AZURE_AI_INFERENCE_TOKEN_SCOPES = Object.freeze([ - "https://ml.azure.com/.default", -]) +export const AZURE_AI_INFERENCE_VERSION = "2025-03-01-preview"; +export const AZURE_AI_INFERENCE_TOKEN_SCOPES = Object.freeze(["https://ml.azure.com/.default"]); export const AZURE_MANAGEMENT_TOKEN_SCOPES = Object.freeze([ - "https://management.azure.com/.default", -]) -export const AZURE_TOKEN_EXPIRATION = 59 * 60_000 // 59 minutes - -export const DOCS_URL = "https://microsoft.github.io/genaiscript" -export const TOOL_URL = DOCS_URL -export const TOOL_ID = "genaiscript" -export const GENAISCRIPT_FOLDER = "." + TOOL_ID -export const CLI_JS = TOOL_ID + ".cjs" -export const GENAI_SRC = "genaisrc" -export const GENAI_JS_EXT = ".genai.js" -export const GENAI_MJS_EXT = ".genai.mjs" -export const GENAI_MTS_EXT = ".genai.mts" -export const GENAI_MD_EXT = ".genai.md" -export const GENAI_ANYJS_GLOB = - "**/*{.genai.js,.genai.mjs,.genai.ts,.genai.mts,.prompty}" -export const NEGATIVE_GLOB_REGEX = /^!/ -export const GENAI_ANY_REGEX = /\.(genai\.(ts|mts|mjs|js)|prompty)$/i -export const GENAI_ANYJS_REGEX = /\.genai\.js$/i -export const GENAI_ANYTS_REGEX = /\.genai\.(ts|mts|mjs)$/i -export const HTTP_OR_S_REGEX = /^https?:\/\//i -export const HTTPS_REGEX = /^https:\/\//i -export const CSV_REGEX = /\.(t|c)sv$/i -export const YAML_REGEX = /\.yaml$/i -export const INI_REGEX = /\.ini$/i -export const TOML_REGEX = /\.toml$/i -export const XLSX_REGEX = /\.xlsx$/i -export const XML_REGEX = /\.xml$/i -export const DOCX_REGEX = /\.docx$/i -export const PDF_REGEX = /\.pdf$/i -export const MD_REGEX = /\.md$/i -export const MDX_REGEX = /\.mdx$/i -export const MJS_REGEX = /\.mjs$/i -export const MJTS_REGEX = /\.m(j|t)s$/i -export const JS_REGEX = /\.js$/i -export const JSON5_REGEX = /\.json5?$/i -export const JSONL_REGEX = /\.jsonl$/i -export const PROMPTY_REGEX = /\.prompty$/i -export const TOOL_NAME = "GenAIScript" -export const SERVER_PORT = 8003 -export const OPENAPI_SERVER_PORT = 3000 -export const CLIENT_RECONNECT_DELAY = 3000 -export const CLIENT_RECONNECT_MAX_ATTEMPTS = 20 -export const RETRIEVAL_PERSIST_DIR = "retrieval" -export const HIGHLIGHT_LENGTH = 4000 -export const SMALL_MODEL_ID = "small" -export const LARGE_MODEL_ID = "large" -export const VISION_MODEL_ID = "vision" -export const TRANSCRIPTION_MODEL_ID = "transcription" -export const SPEECH_MODEL_ID = "speech" -export const IMAGE_GENERATION_MODEL_ID = "image" -export const EMBEDDINGS_MODEL_ID = "embeddings" -export const DEFAULT_FENCE_FORMAT: FenceFormat = "xml" -export const DEFAULT_TEMPERATURE = 0.8 -export const TRACE_NODE_PREFIX = "genaiscript/trace/" -export const EXTENSION_ID = "genaiscript.genaiscript-vscode" -export const COPILOT_CHAT_PARTICIPANT_ID = TOOL_ID -export const COPILOT_CHAT_PARTICIPANT_SCRIPT_ID = "copilotchat" - -export const BING_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search" -export const TAVILY_ENDPOINT = "https://api.tavily.com/search" - -export const SYSTEM_FENCE = "\n" -export const MAX_DATA_REPAIRS = 1 -export const NPM_CLI_PACKAGE = "genaiscript" -export const ICON_LOGO_NAME = "genaiscript-logo" -export const SARIFF_RULEID_PREFIX = "genaiscript/" -export const SARIFF_BUILDER_URL = "https://github.com/microsoft/genaiscript/" -export const SARIFF_BUILDER_TOOL_DRIVER_NAME = TOOL_ID -export const FETCH_RETRY_DEFAULT = 6 -export const FETCH_RETRY_DEFAULT_DEFAULT = 2000 -export const FETCH_RETRY_MAX_DELAY_DEFAULT = 120000 -export const FETCH_RETRY_GROWTH_FACTOR = 1.5 -export const FETCH_RETRY_ON_DEFAULT = [408, 429, 500, 504] -export const EXEC_MAX_BUFFER = 64 -export const DOT_ENV_FILENAME = ".env" -export const DOT_ENV_GENAISCRIPT_FILENAME = ".env.genaiscript" - -export const SUCCESS_ERROR_CODE = 0 -export const UNHANDLED_ERROR_CODE = -1 -export const ANNOTATION_ERROR_CODE = -2 -export const FILES_NOT_FOUND_ERROR_CODE = -3 -export const GENERATION_ERROR_CODE = -4 -export const RUNTIME_ERROR_CODE = -5 -export const CONNECTION_CONFIGURATION_ERROR_CODE = -6 -export const USER_CANCELLED_ERROR_CODE = -7 -export const CONFIGURATION_ERROR_CODE = -8 + "https://management.azure.com/.default", +]); +export const AZURE_TOKEN_EXPIRATION = 59 * 60_000; // 59 minutes + +export const DOCS_URL = "https://microsoft.github.io/genaiscript"; +export const TOOL_URL = DOCS_URL; +export const TOOL_ID = "genaiscript"; +export const GENAISCRIPT_FOLDER = "." + TOOL_ID; +export const CLI_JS = TOOL_ID + ".cjs"; +export const GENAI_SRC = "genaisrc"; +export const GENAI_JS_EXT = ".genai.js"; +export const GENAI_MJS_EXT = ".genai.mjs"; +export const GENAI_MTS_EXT = ".genai.mts"; +export const GENAI_MD_EXT = ".genai.md"; +export const GENAI_ANYJS_GLOB = "**/*{.genai.js,.genai.mjs,.genai.ts,.genai.mts,.prompty}"; +export const NEGATIVE_GLOB_REGEX = /^!/; +export const GENAI_ANY_REGEX = /\.(genai\.(ts|mts|mjs|js)|prompty)$/i; +export const GENAI_ANYJS_REGEX = /\.genai\.js$/i; +export const GENAI_ANYTS_REGEX = /\.genai\.(ts|mts|mjs)$/i; +export const HTTP_OR_S_REGEX = /^https?:\/\//i; +export const HTTPS_REGEX = /^https:\/\//i; +export const CSV_REGEX = /\.(t|c)sv$/i; +export const YAML_REGEX = /\.yaml$/i; +export const INI_REGEX = /\.ini$/i; +export const TOML_REGEX = /\.toml$/i; +export const XLSX_REGEX = /\.xlsx$/i; +export const XML_REGEX = /\.xml$/i; +export const DOCX_REGEX = /\.docx$/i; +export const PDF_REGEX = /\.pdf$/i; +export const MD_REGEX = /\.md$/i; +export const MDX_REGEX = /\.mdx$/i; +export const MJS_REGEX = /\.mjs$/i; +export const MJTS_REGEX = /\.m(j|t)s$/i; +export const JS_REGEX = /\.js$/i; +export const JSON5_REGEX = /\.json5?$/i; +export const JSONL_REGEX = /\.jsonl$/i; +export const PROMPTY_REGEX = /\.prompty$/i; +export const TOOL_NAME = "GenAIScript"; +export const SERVER_PORT = 8003; +export const OPENAPI_SERVER_PORT = 3000; +export const CLIENT_RECONNECT_DELAY = 3000; +export const CLIENT_RECONNECT_MAX_ATTEMPTS = 20; +export const RETRIEVAL_PERSIST_DIR = "retrieval"; +export const HIGHLIGHT_LENGTH = 4000; +export const SMALL_MODEL_ID = "small"; +export const LARGE_MODEL_ID = "large"; +export const VISION_MODEL_ID = "vision"; +export const TRANSCRIPTION_MODEL_ID = "transcription"; +export const SPEECH_MODEL_ID = "speech"; +export const IMAGE_GENERATION_MODEL_ID = "image"; +export const EMBEDDINGS_MODEL_ID = "embeddings"; +export const DEFAULT_FENCE_FORMAT: FenceFormat = "xml"; +export const DEFAULT_TEMPERATURE = 0.8; +export const TRACE_NODE_PREFIX = "genaiscript/trace/"; +export const EXTENSION_ID = "genaiscript.genaiscript-vscode"; +export const COPILOT_CHAT_PARTICIPANT_ID = TOOL_ID; +export const COPILOT_CHAT_PARTICIPANT_SCRIPT_ID = "copilotchat"; + +export const BING_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search"; +export const TAVILY_ENDPOINT = "https://api.tavily.com/search"; + +export const SYSTEM_FENCE = "\n"; +export const MAX_DATA_REPAIRS = 1; +export const NPM_CLI_PACKAGE = "genaiscript"; +export const ICON_LOGO_NAME = "genaiscript-logo"; +export const SARIFF_RULEID_PREFIX = "genaiscript/"; +export const SARIFF_BUILDER_URL = "https://github.com/microsoft/genaiscript/"; +export const SARIFF_BUILDER_TOOL_DRIVER_NAME = TOOL_ID; +export const FETCH_RETRY_DEFAULT = 6; +export const FETCH_RETRY_DEFAULT_DEFAULT = 2000; +export const FETCH_RETRY_MAX_DELAY_DEFAULT = 120000; +export const FETCH_RETRY_GROWTH_FACTOR = 1.5; +export const FETCH_RETRY_ON_DEFAULT = [408, 429, 500, 504]; +export const EXEC_MAX_BUFFER = 64; +export const DOT_ENV_FILENAME = ".env"; +export const DOT_ENV_GENAISCRIPT_FILENAME = ".env.genaiscript"; + +export const SUCCESS_ERROR_CODE = 0; +export const UNHANDLED_ERROR_CODE = -1; +export const ANNOTATION_ERROR_CODE = -2; +export const FILES_NOT_FOUND_ERROR_CODE = -3; +export const GENERATION_ERROR_CODE = -4; +export const RUNTIME_ERROR_CODE = -5; +export const CONNECTION_CONFIGURATION_ERROR_CODE = -6; +export const USER_CANCELLED_ERROR_CODE = -7; +export const CONFIGURATION_ERROR_CODE = -8; export const UNRECOVERABLE_ERROR_CODES = Object.freeze([ - CONNECTION_CONFIGURATION_ERROR_CODE, - USER_CANCELLED_ERROR_CODE, - FILES_NOT_FOUND_ERROR_CODE, - ANNOTATION_ERROR_CODE, -]) - -export const DOT_ENV_REGEX = /\.env(\.[^\/]+)?$/i -export const PROMPT_FENCE = "```" -export const MARKDOWN_PROMPT_FENCE = "`````" - -export const OPENAI_API_BASE = "https://api.openai.com/v1" -export const OLLAMA_DEFAULT_PORT = 11434 -export const OLLAMA_API_BASE = `http://127.0.0.1:${OLLAMA_DEFAULT_PORT}/v1` -export const SGLANG_API_BASE = "http://127.0.0.1:30000/v1" -export const VLLM_API_BASE = "http://127.0.0.1:8000/v1" -export const LLAMAFILE_API_BASE = "http://127.0.0.1:8080/v1" -export const LOCALAI_API_BASE = "http://127.0.0.1:8080/v1" -export const LITELLM_API_BASE = "http://127.0.0.1:4000" -export const LMSTUDIO_API_BASE = "http://127.0.0.1:1234/v1" -export const JAN_API_BASE = "http://127.0.0.1:1337/v1" -export const ANTHROPIC_API_BASE = "https://api.anthropic.com" -export const HUGGINGFACE_API_BASE = "https://api-inference.huggingface.co/v1" -export const GOOGLE_API_BASE = - "https://generativelanguage.googleapis.com/v1beta/openai/" -export const ALIBABA_BASE = - "https://dashscope-intl.aliyuncs.com/compatible-mode/v1" -export const MISTRAL_API_BASE = "https://api.mistral.ai/v1" -export const DEEPSEEK_API_BASE = "https://api.deepseek.com/v1" -export const WHISPERASR_API_BASE = "http://localhost:9000" -export const WINDOWS_AI_API_BASE = "http://127.0.0.1:5272/v1" -export const DOCKER_MODEL_RUNNER_API_BASE = - "http://model-runner.docker.internal/engines/v1/" - -export const PROMPTFOO_CACHE_PATH = ".genaiscript/cache/tests" -export const PROMPTFOO_CONFIG_DIR = ".genaiscript/config/tests" -export const PROMPTFOO_REMOTE_API_PORT = 15500 -export const PROMPTFOO_REDTEAM_NUM_TESTS = 5 -export const PROMPTFOO_TEST_MAX_CONCURRENCY = 1 - -export const TYPE_DEFINITION_BASENAME = "genaiscript.d.ts" -export const TYPE_DEFINITION_REFERENCE = `/// \n` - -export const RUNS_DIR_NAME = "runs" -export const CONVERTS_DIR_NAME = "converts" -export const TEST_RUNS_DIR_NAME = "test-runs" -export const STATS_DIR_NAME = "stats" -export const TRACE_FILENAME = "trace.md" -export const OUTPUT_FILENAME = "readme.md" - -export const EMOJI_SUCCESS = "✅" -export const EMOJI_FAIL = "❌" -export const EMOJI_WARNING = "⚠️" -export const EMOJI_UNDEFINED = "?" - -export const MODEL_PROVIDER_OPENAI = "openai" -export const MODEL_PROVIDER_GITHUB = "github" -export const MODEL_PROVIDER_AZURE_OPENAI = "azure" -export const MODEL_PROVIDER_GOOGLE = "google" -export const MODEL_PROVIDER_AZURE_AI_INFERENCE = "azure_ai_inference" -export const MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI = "azure_serverless" -export const MODEL_PROVIDER_AZURE_SERVERLESS_MODELS = "azure_serverless_models" -export const MODEL_PROVIDER_OLLAMA = "ollama" -export const MODEL_PROVIDER_LLAMAFILE = "llamafile" -export const MODEL_PROVIDER_LITELLM = "litellm" -export const MODEL_PROVIDER_GITHUB_COPILOT_CHAT = "github_copilot_chat" -export const MODEL_PROVIDER_ANTHROPIC = "anthropic" -export const MODEL_PROVIDER_ANTHROPIC_BEDROCK = "anthropic_bedrock" -export const MODEL_PROVIDER_HUGGINGFACE = "huggingface" -export const MODEL_PROVIDER_ALIBABA = "alibaba" -export const MODEL_PROVIDER_MISTRAL = "mistral" -export const MODEL_PROVIDER_LMSTUDIO = "lmstudio" -export const MODEL_PROVIDER_JAN = "jan" -export const MODEL_PROVIDER_SGLANG = "sglang" -export const MODEL_PROVIDER_VLLM = "vllm" -export const MODEL_PROVIDER_DEEPSEEK = "deepseek" -export const MODEL_PROVIDER_WHISPERASR = "whisperasr" -export const MODEL_PROVIDER_WINDOWS_AI = "windows" -export const MODEL_PROVIDER_DOCKER_MODEL_RUNNER = "docker" -export const MODEL_PROVIDER_ECHO = "echo" -export const MODEL_PROVIDER_NONE = "none" - -export const MODEL_GITHUB_COPILOT_CHAT_CURRENT = - MODEL_PROVIDER_GITHUB_COPILOT_CHAT + ":current" + CONNECTION_CONFIGURATION_ERROR_CODE, + USER_CANCELLED_ERROR_CODE, + FILES_NOT_FOUND_ERROR_CODE, + ANNOTATION_ERROR_CODE, +]); + +export const DOT_ENV_REGEX = /\.env(\.[^\/]+)?$/i; +export const PROMPT_FENCE = "```"; +export const MARKDOWN_PROMPT_FENCE = "`````"; + +export const OPENAI_API_BASE = "https://api.openai.com/v1"; +export const OLLAMA_DEFAULT_PORT = 11434; +export const OLLAMA_API_BASE = `http://127.0.0.1:${OLLAMA_DEFAULT_PORT}/v1`; +export const SGLANG_API_BASE = "http://127.0.0.1:30000/v1"; +export const VLLM_API_BASE = "http://127.0.0.1:8000/v1"; +export const LLAMAFILE_API_BASE = "http://127.0.0.1:8080/v1"; +export const LOCALAI_API_BASE = "http://127.0.0.1:8080/v1"; +export const LITELLM_API_BASE = "http://127.0.0.1:4000"; +export const LMSTUDIO_API_BASE = "http://127.0.0.1:1234/v1"; +export const JAN_API_BASE = "http://127.0.0.1:1337/v1"; +export const ANTHROPIC_API_BASE = "https://api.anthropic.com"; +export const HUGGINGFACE_API_BASE = "https://api-inference.huggingface.co/v1"; +export const GOOGLE_API_BASE = "https://generativelanguage.googleapis.com/v1beta/openai/"; +export const ALIBABA_BASE = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"; +export const MISTRAL_API_BASE = "https://api.mistral.ai/v1"; +export const DEEPSEEK_API_BASE = "https://api.deepseek.com/v1"; +export const WHISPERASR_API_BASE = "http://localhost:9000"; +export const WINDOWS_AI_API_BASE = "http://127.0.0.1:5272/v1"; +export const DOCKER_MODEL_RUNNER_API_BASE = "http://model-runner.docker.internal/engines/v1/"; + +export const PROMPTFOO_CACHE_PATH = ".genaiscript/cache/tests"; +export const PROMPTFOO_CONFIG_DIR = ".genaiscript/config/tests"; +export const PROMPTFOO_REMOTE_API_PORT = 15500; +export const PROMPTFOO_REDTEAM_NUM_TESTS = 5; +export const PROMPTFOO_TEST_MAX_CONCURRENCY = 1; + +export const TYPE_DEFINITION_BASENAME = "genaiscript.d.ts"; +export const TYPE_DEFINITION_REFERENCE = `/// \n`; + +export const RUNS_DIR_NAME = "runs"; +export const CONVERTS_DIR_NAME = "converts"; +export const TEST_RUNS_DIR_NAME = "test-runs"; +export const STATS_DIR_NAME = "stats"; +export const TRACE_FILENAME = "trace.md"; +export const OUTPUT_FILENAME = "readme.md"; + +export const EMOJI_SUCCESS = "✅"; +export const EMOJI_FAIL = "❌"; +export const EMOJI_WARNING = "⚠️"; +export const EMOJI_UNDEFINED = "?"; + +export const MODEL_PROVIDER_OPENAI = "openai"; +export const MODEL_PROVIDER_GITHUB = "github"; +export const MODEL_PROVIDER_AZURE_OPENAI = "azure"; +export const MODEL_PROVIDER_GOOGLE = "google"; +export const MODEL_PROVIDER_AZURE_AI_INFERENCE = "azure_ai_inference"; +export const MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI = "azure_serverless"; +export const MODEL_PROVIDER_AZURE_SERVERLESS_MODELS = "azure_serverless_models"; +export const MODEL_PROVIDER_OLLAMA = "ollama"; +export const MODEL_PROVIDER_LLAMAFILE = "llamafile"; +export const MODEL_PROVIDER_LITELLM = "litellm"; +export const MODEL_PROVIDER_GITHUB_COPILOT_CHAT = "github_copilot_chat"; +export const MODEL_PROVIDER_ANTHROPIC = "anthropic"; +export const MODEL_PROVIDER_ANTHROPIC_BEDROCK = "anthropic_bedrock"; +export const MODEL_PROVIDER_HUGGINGFACE = "huggingface"; +export const MODEL_PROVIDER_ALIBABA = "alibaba"; +export const MODEL_PROVIDER_MISTRAL = "mistral"; +export const MODEL_PROVIDER_LMSTUDIO = "lmstudio"; +export const MODEL_PROVIDER_JAN = "jan"; +export const MODEL_PROVIDER_SGLANG = "sglang"; +export const MODEL_PROVIDER_VLLM = "vllm"; +export const MODEL_PROVIDER_DEEPSEEK = "deepseek"; +export const MODEL_PROVIDER_WHISPERASR = "whisperasr"; +export const MODEL_PROVIDER_WINDOWS_AI = "windows"; +export const MODEL_PROVIDER_DOCKER_MODEL_RUNNER = "docker"; +export const MODEL_PROVIDER_ECHO = "echo"; +export const MODEL_PROVIDER_NONE = "none"; + +export const MODEL_GITHUB_COPILOT_CHAT_CURRENT = MODEL_PROVIDER_GITHUB_COPILOT_CHAT + ":current"; export const MODEL_PROVIDER_OPENAI_HOSTS = Object.freeze([ - MODEL_PROVIDER_OPENAI, - MODEL_PROVIDER_GITHUB, - MODEL_PROVIDER_AZURE_OPENAI, - MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, -]) + MODEL_PROVIDER_OPENAI, + MODEL_PROVIDER_GITHUB, + MODEL_PROVIDER_AZURE_OPENAI, + MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, +]); -export const TRACE_FILE_PREVIEW_MAX_LENGTH = 240 +export const TRACE_FILE_PREVIEW_MAX_LENGTH = 240; -export const OPENROUTER_API_CHAT_URL = - "https://openrouter.ai/api/v1/chat/completions" -export const OPENROUTER_SITE_URL_HEADER = "HTTP-Referer" -export const OPENROUTER_SITE_NAME_HEADER = "X-Title" +export const OPENROUTER_API_CHAT_URL = "https://openrouter.ai/api/v1/chat/completions"; +export const OPENROUTER_SITE_URL_HEADER = "HTTP-Referer"; +export const OPENROUTER_SITE_NAME_HEADER = "X-Title"; -export const GITHUB_MODELS_BASE = - "https://models.github.ai/inference/chat/completions" +export const GITHUB_MODELS_BASE = "https://models.github.ai/inference/chat/completions"; export const DOCS_CONFIGURATION_URL = - "https://microsoft.github.io/genaiscript/getting-started/configuration/" + "https://microsoft.github.io/genaiscript/getting-started/configuration/"; export const DOCS_CONFIGURATION_CONTENT_SAFETY_URL = - "https://microsoft.github.io/genaiscript/reference/scripts/content-safety" + "https://microsoft.github.io/genaiscript/reference/scripts/content-safety"; export const DOCS_DEF_FILES_IS_EMPTY_URL = - "https://microsoft.github.io/genaiscript/reference/scripts/context/#empty-files" + "https://microsoft.github.io/genaiscript/reference/scripts/context/#empty-files"; export const DOCS_WEB_SEARCH_URL = - "https://microsoft.github.io/genaiscript/reference/scripts/web-search/" + "https://microsoft.github.io/genaiscript/reference/scripts/web-search/"; export const DOCS_WEB_SEARCH_BING_SEARCH_URL = - "https://microsoft.github.io/genaiscript/reference/scripts/web-search/#bingn" + "https://microsoft.github.io/genaiscript/reference/scripts/web-search/#bingn"; export const DOCS_WEB_SEARCH_TAVILY_URL = - "https://microsoft.github.io/genaiscript/reference/scripts/web-search/#tavily" + "https://microsoft.github.io/genaiscript/reference/scripts/web-search/#tavily"; export const MODEL_PROVIDERS = Object.freeze< - { - id: string - detail: string - url?: string - seed?: boolean - logitBias?: boolean - tools?: boolean - logprobs?: boolean - topLogprobs?: boolean - topP?: boolean - toolChoice?: boolean - prediction?: boolean - bearerToken?: boolean - listModels?: boolean - transcribe?: boolean - speech?: boolean - tokenless?: boolean - hidden?: boolean - imageGeneration?: boolean - singleModel?: boolean - metadata?: boolean - responseType?: "json" | "json_object" | "json_schema" - reasoningEfforts?: Record - aliases?: Record - models?: Record - env?: Record< - string, - { - description?: string - secret?: boolean - required?: boolean - format?: string - enum?: string[] - } - > - }[] ->(CONFIGURATION_DATA.providers) + { + id: string; + detail: string; + url?: string; + seed?: boolean; + logitBias?: boolean; + tools?: boolean; + logprobs?: boolean; + topLogprobs?: boolean; + topP?: boolean; + toolChoice?: boolean; + prediction?: boolean; + bearerToken?: boolean; + listModels?: boolean; + transcribe?: boolean; + speech?: boolean; + tokenless?: boolean; + hidden?: boolean; + imageGeneration?: boolean; + singleModel?: boolean; + metadata?: boolean; + responseType?: "json" | "json_object" | "json_schema"; + reasoningEfforts?: Record; + aliases?: Record; + models?: Record; + env?: Record< + string, + { + description?: string; + secret?: boolean; + required?: boolean; + format?: string; + enum?: string[]; + } + >; + }[] +>(CONFIGURATION_DATA.providers); export const MODEL_PRICINGS = Object.freeze< - Record< - string, - { - price_per_million_input_tokens: number - price_per_million_output_tokens: number - input_cache_token_rebate?: number - } - > ->(CONFIGURATION_DATA.pricings) + Record< + string, + { + price_per_million_input_tokens: number; + price_per_million_output_tokens: number; + input_cache_token_rebate?: number; + } + > +>(CONFIGURATION_DATA.pricings); export const NEW_SCRIPT_TEMPLATE = `$\`Write a short poem in code.\` -` -export const PDF_SCALE = 4 -export const PDF_HASH_LENGTH = 22 -export const DOCX_HASH_LENGTH = 22 -export const VECTOR_INDEX_HASH_LENGTH = 22 -export const RESOURCE_HASH_LENGTH = 22 -export const FILE_HASH_LENGTH = 64 - -export const PDF_MIME_TYPE = "application/pdf" +`; +export const PDF_SCALE = 4; +export const PDF_HASH_LENGTH = 22; +export const DOCX_HASH_LENGTH = 22; +export const VECTOR_INDEX_HASH_LENGTH = 22; +export const RESOURCE_HASH_LENGTH = 22; +export const FILE_HASH_LENGTH = 64; + +export const PDF_MIME_TYPE = "application/pdf"; export const DOCX_MIME_TYPE = - "application/vnd.openxmlformats-officedocument.wordprocessingml.document" -export const XLSX_MIME_TYPE = - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" -export const JSON_MIME_TYPE = "application/json" -export const JSON_SCHEMA_MIME_TYPE = "application/schema+json" -export const JAVASCRIPT_MIME_TYPE = "application/javascript" -export const MARKDOWN_MIME_TYPE = "text/markdown" -export const YAML_MIME_TYPE = "application/yaml" - -export const JSON_META_SCHEMA_URI = - "https://json-schema.org/draft/2020-12/schema" - -export const SHELL_EXEC_TIMEOUT = 300000 -export const DOCKER_DEFAULT_IMAGE = "python:alpine" -export const DOCKER_VOLUMES_DIR = "containers" -export const DOCKER_CONTAINER_VOLUME = "app" - -export const CLI_RUN_FILES_FOLDER = "files" - -export const GITHUB_API_VERSION = "2022-11-28" -export const GITHUB_TOKENS = ["GITHUB_TOKEN", "GH_TOKEN", "INPUT_GITHUB_TOKEN"] - -export const AI_REQUESTS_CACHE = "airaireequests" -export const CHAT_CACHE = "chat" -export const GITHUB_PULL_REQUEST_REVIEWS_CACHE = "prr" -export const GITHUB_PULL_REQUEST_REVIEW_COMMENT_LINE_DISTANCE = 5 -export const GITHUB_ASSET_BRANCH = "genai-assets" - -export const PLACEHOLDER_API_BASE = "" -export const PLACEHOLDER_API_KEY = "" - -export const VSCODE_CONFIG_CLI_VERSION = "cli.version" -export const VSCODE_CONFIG_CLI_PATH = "cli.path" -export const VSCODE_CONFIG_CLI_PACKAGE_MANAGER = "cli.packageManager" - -export const CONSOLE_COLOR_INFO = 32 -export const CONSOLE_COLOR_DEBUG = 90 -export const CONSOLE_COLOR_REASONING = "38;5;17" -export const CONSOLE_COLOR_PERFORMANCE = "38;5;17" -export const CONSOLE_COLOR_WARNING = 95 -export const CONSOLE_COLOR_ERROR = 91 -export const CONSOLE_TOKEN_COLORS = [90, 37] + "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; +export const XLSX_MIME_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; +export const JSON_MIME_TYPE = "application/json"; +export const JSON_SCHEMA_MIME_TYPE = "application/schema+json"; +export const JAVASCRIPT_MIME_TYPE = "application/javascript"; +export const MARKDOWN_MIME_TYPE = "text/markdown"; +export const YAML_MIME_TYPE = "application/yaml"; + +export const JSON_META_SCHEMA_URI = "https://json-schema.org/draft/2020-12/schema"; + +export const SHELL_EXEC_TIMEOUT = 300000; +export const DOCKER_DEFAULT_IMAGE = "python:alpine"; +export const DOCKER_VOLUMES_DIR = "containers"; +export const DOCKER_CONTAINER_VOLUME = "app"; + +export const CLI_RUN_FILES_FOLDER = "files"; + +export const GITHUB_API_VERSION = "2022-11-28"; +export const GITHUB_TOKENS = ["GITHUB_TOKEN", "GH_TOKEN", "INPUT_GITHUB_TOKEN"]; + +export const AI_REQUESTS_CACHE = "airaireequests"; +export const CHAT_CACHE = "chat"; +export const GITHUB_PULL_REQUEST_REVIEWS_CACHE = "prr"; +export const GITHUB_PULL_REQUEST_REVIEW_COMMENT_LINE_DISTANCE = 5; +export const GITHUB_ASSET_BRANCH = "genai-assets"; + +export const PLACEHOLDER_API_BASE = ""; +export const PLACEHOLDER_API_KEY = ""; + +export const VSCODE_CONFIG_CLI_VERSION = "cli.version"; +export const VSCODE_CONFIG_CLI_PATH = "cli.path"; +export const VSCODE_CONFIG_CLI_PACKAGE_MANAGER = "cli.packageManager"; + +export const CONSOLE_COLOR_INFO = 32; +export const CONSOLE_COLOR_DEBUG = 90; +export const CONSOLE_COLOR_REASONING = "38;5;17"; +export const CONSOLE_COLOR_PERFORMANCE = "38;5;17"; +export const CONSOLE_COLOR_WARNING = 95; +export const CONSOLE_COLOR_ERROR = 91; +export const CONSOLE_TOKEN_COLORS = [90, 37]; //export const CONSOLE_TOKEN_COLORS = [97, 93] -export const CONSOLE_TOKEN_INNER_COLORS = [90, 37] +export const CONSOLE_TOKEN_INNER_COLORS = [90, 37]; -export const PLAYWRIGHT_DEFAULT_BROWSER = "chromium" -export const MAX_TOKENS_ELLIPSE = "..." -export const ESTIMATE_TOKEN_OVERHEAD = 2 +export const PLAYWRIGHT_DEFAULT_BROWSER = "chromium"; +export const MAX_TOKENS_ELLIPSE = "..."; +export const ESTIMATE_TOKEN_OVERHEAD = 2; -export const DEDENT_INSPECT_MAX_DEPTH = 3 +export const DEDENT_INSPECT_MAX_DEPTH = 3; -export const OPENAI_MAX_RETRY_DELAY = 10000 -export const OPENAI_MAX_RETRY_COUNT = 10 -export const OPENAI_RETRY_DEFAULT_DEFAULT = 1000 +export const OPENAI_MAX_RETRY_DELAY = 10000; +export const OPENAI_MAX_RETRY_COUNT = 10; +export const OPENAI_RETRY_DEFAULT_DEFAULT = 1000; -export const ANTHROPIC_MAX_TOKEN = 4096 -export const TEMPLATE_ARG_FILE_MAX_TOKENS = 4000 -export const TEMPLATE_ARG_DATA_SLICE_SAMPLE = 2000 +export const ANTHROPIC_MAX_TOKEN = 4096; +export const TEMPLATE_ARG_FILE_MAX_TOKENS = 4000; +export const TEMPLATE_ARG_DATA_SLICE_SAMPLE = 2000; -export const CHAT_REQUEST_PER_MODEL_CONCURRENT_LIMIT = 8 -export const PROMISE_QUEUE_CONCURRENCY_DEFAULT = 16 -export const FILE_READ_CONCURRENCY_DEFAULT = 16 +export const CHAT_REQUEST_PER_MODEL_CONCURRENT_LIMIT = 8; +export const PROMISE_QUEUE_CONCURRENCY_DEFAULT = 16; +export const FILE_READ_CONCURRENCY_DEFAULT = 16; -export const GITHUB_REST_API_CONCURRENCY_LIMIT = 8 -export const GITHUB_REST_PAGE_DEFAULT = 10 +export const GITHUB_REST_API_CONCURRENCY_LIMIT = 8; +export const GITHUB_REST_PAGE_DEFAULT = 10; -export const TOKEN_TRUNCATION_THRESHOLD = 16 +export const TOKEN_TRUNCATION_THRESHOLD = 16; -export const GIT_IGNORE = ".gitignore" -export const GIT_IGNORE_GENAI = ".gitignore.genai" -export const GENAISCRIPTIGNORE = ".genaiscriptignore" -export const CLI_ENV_VAR_RX = /^(genaiscript_var_|input_)/i +export const GIT_IGNORE = ".gitignore"; +export const GIT_IGNORE_GENAI = ".gitignore.genai"; +export const GENAISCRIPTIGNORE = ".genaiscriptignore"; +export const CLI_ENV_VAR_RX = /^(genaiscript_var_|input_)/i; -export const GIT_DIFF_MAX_TOKENS = 8000 -export const GIT_LOG_COUNT = 10 -export const MAX_TOOL_CONTENT_TOKENS = 8000 +export const GIT_DIFF_MAX_TOKENS = 8000; +export const GIT_LOG_COUNT = 10; +export const MAX_TOOL_CONTENT_TOKENS = 8000; -export const AGENT_MEMORY_CACHE_NAME = "agent_memory" -export const AGENT_MEMORY_FLEX_TOKENS = 20000 -export const TRANSCRIPTION_CACHE_NAME = "transcriptions" +export const AGENT_MEMORY_CACHE_NAME = "agent_memory"; +export const AGENT_MEMORY_FLEX_TOKENS = 20000; +export const TRANSCRIPTION_CACHE_NAME = "transcriptions"; -export const AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_LENGTH = 9000 -export const AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_DOCUMENTS = 9000 +export const AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_LENGTH = 9000; +export const AZURE_CONTENT_SAFETY_PROMPT_SHIELD_MAX_DOCUMENTS = 9000; -export const TOKEN_MISSING_INFO = "" -export const TOKEN_NO_ANSWER = "" +export const TOKEN_MISSING_INFO = ""; +export const TOKEN_NO_ANSWER = ""; -export const CHOICE_LOGIT_BIAS = 5 +export const CHOICE_LOGIT_BIAS = 5; -export const SANITIZED_PROMPT_INJECTION = - "...prompt injection detected, content removed..." +export const SANITIZED_PROMPT_INJECTION = "...prompt injection detected, content removed..."; // https://platform.openai.com/docs/guides/vision/calculating-costs#managing-images -export const IMAGE_DETAIL_LOW_WIDTH = 512 -export const IMAGE_DETAIL_LOW_HEIGHT = 512 -export const IMAGE_DETAIL_HIGH_TILE_SIZE = 512 -export const IMAGE_DETAIL_HIGH_WIDTH = 2048 -export const IMAGE_DETAIL_HIGH_HEIGHT = 2048 -export const IMAGE_DETAIL_LONG_SIDE_LIMIT = 2000 -export const IMAGE_DETAIL_SHORT_SIDE_LIMIT = 768 +export const IMAGE_DETAIL_LOW_WIDTH = 512; +export const IMAGE_DETAIL_LOW_HEIGHT = 512; +export const IMAGE_DETAIL_HIGH_TILE_SIZE = 512; +export const IMAGE_DETAIL_HIGH_WIDTH = 2048; +export const IMAGE_DETAIL_HIGH_HEIGHT = 2048; +export const IMAGE_DETAIL_LONG_SIDE_LIMIT = 2000; +export const IMAGE_DETAIL_SHORT_SIDE_LIMIT = 768; -export const MIN_LINE_NUMBER_LENGTH = 10 +export const MIN_LINE_NUMBER_LENGTH = 10; -export const VSCODE_SERVER_MAX_RETRIES = 5 +export const VSCODE_SERVER_MAX_RETRIES = 5; -export const VIDEO_HASH_LENGTH = 18 -export const VIDEO_FRAMES_DIR_NAME = "frames" -export const VIDEO_CLIPS_DIR_NAME = "clips" -export const VIDEO_AUDIO_DIR_NAME = "audio" -export const VIDEO_PROBE_DIR_NAME = "probe" +export const VIDEO_HASH_LENGTH = 18; +export const VIDEO_FRAMES_DIR_NAME = "frames"; +export const VIDEO_CLIPS_DIR_NAME = "clips"; +export const VIDEO_AUDIO_DIR_NAME = "audio"; +export const VIDEO_PROBE_DIR_NAME = "probe"; -export const TRACE_MAX_FENCE_SIZE = 100 * 1024 // 100kb -export const TRACE_MAX_FILE_SIZE = 128 * 1024 // 128kb -export const TRACE_MAX_IMAGE_SIZE = 32 * 1024 // 32kb +export const TRACE_MAX_FENCE_SIZE = 100 * 1024; // 100kb +export const TRACE_MAX_FILE_SIZE = 128 * 1024; // 128kb +export const TRACE_MAX_IMAGE_SIZE = 32 * 1024; // 32kb -export const WS_MAX_FRAME_LENGTH = 1200000 -export const WS_MAX_FRAME_CHUNK_LENGTH = 1000000 +export const WS_MAX_FRAME_LENGTH = 1200000; +export const WS_MAX_FRAME_CHUNK_LENGTH = 1000000; -export const SCHEMA_DEFAULT_FORMAT = "json" -export const THINK_REGEX = /(.*?)($|<\/think>)/gis -export const THINK_START_TOKEN_REGEX = /^/ -export const THINK_END_TOKEN_REGEX = /<\/think>$/ +export const SCHEMA_DEFAULT_FORMAT = "json"; +export const THINK_REGEX = /(.*?)($|<\/think>)/gis; +export const THINK_START_TOKEN_REGEX = /^/; +export const THINK_END_TOKEN_REGEX = /<\/think>$/; -export const MAX_FILE_CONTENT_SIZE = 1024 * 1024 * 2 // 2MB -export const TEST_CSV_ENTRY_SEPARATOR = /[;|\n]/g +export const MAX_FILE_CONTENT_SIZE = 1024 * 1024 * 2; // 2MB +export const TEST_CSV_ENTRY_SEPARATOR = /[;|\n]/g; -export const INVALID_FILENAME_REGEX = /[<>:"/\\|?*\x00-\x1F]+/g +export const INVALID_FILENAME_REGEX = /[<>:"/\\|?*\x00-\x1F]+/g; -export const STDIN_READ_TIMEOUT = 50 +export const STDIN_READ_TIMEOUT = 50; -export const REASONING_START_MARKER = "\n🤔 \n" -export const REASONING_END_MARKER = "\n\n\n" +export const REASONING_START_MARKER = "\n🤔 \n"; +export const REASONING_END_MARKER = "\n\n\n"; -export const PROMPT_DOM_TRUNCATE_ATTEMPTS = 6 +export const PROMPT_DOM_TRUNCATE_ATTEMPTS = 6; -export const CONTROL_CHAT_COLLAPSED = 3 -export const CONTROL_CHAT_EXPANDED = 6 -export const CONTROL_CHAT_LAST = 12 +export const CONTROL_CHAT_COLLAPSED = 3; +export const CONTROL_CHAT_EXPANDED = 6; +export const CONTROL_CHAT_LAST = 12; -export const PROMPTDOM_PREVIEW_MAX_LENGTH = 512 +export const PROMPTDOM_PREVIEW_MAX_LENGTH = 512; -export const SERVER_LOCALHOST = "http://127.0.0.1" -export const CHAR_UP_ARROW = "↑" -export const CHAR_DOWN_ARROW = "↓" -export const CHAR_ENVELOPE = "✉" -export const CHAR_UP_DOWN_ARROWS = "⇅ " -export const CHAR_FLOPPY_DISK = "🖫 " -export const CHAR_TEMPERATURE = "°" +export const SERVER_LOCALHOST = "http://127.0.0.1"; +export const CHAR_UP_ARROW = "↑"; +export const CHAR_DOWN_ARROW = "↓"; +export const CHAR_ENVELOPE = "✉"; +export const CHAR_UP_DOWN_ARROWS = "⇅ "; +export const CHAR_FLOPPY_DISK = "🖫 "; +export const CHAR_TEMPERATURE = "°"; -export const DEBUG_SCRIPT_CATEGORY = "script" +export const DEBUG_SCRIPT_CATEGORY = "script"; -export const CACHE_FORMAT_VERSION = "1" -export const CACHE_SHA_LENGTH = 32 +export const CACHE_FORMAT_VERSION = "1"; +export const CACHE_SHA_LENGTH = 32; -export const MCP_RESOURCE_PROTOCOL = TOOL_ID +export const MCP_RESOURCE_PROTOCOL = TOOL_ID; -export const RESOURCE_MAX_SIZE = 1024 * 1024 * 10 // 10MB -export const MIN_NODE_VERSION_MAJOR = 22 +export const RESOURCE_MAX_SIZE = 1024 * 1024 * 10; // 10MB +export const MIN_NODE_VERSION_MAJOR = 22; -export const MAX_STRING_LENGTH_USE_TOKENIZER_FOR_APPROXIMATION = 10000 +export const MAX_STRING_LENGTH_USE_TOKENIZER_FOR_APPROXIMATION = 10000; -export const BOX_DOWN_AND_RIGHT = "╭" -export const BOX_RIGHT = "─" -export const BOX_UP_AND_RIGHT = "╰" -export const BOX_UP_AND_DOWN = "│" -export const BOX_DOWN_UP_AND_RIGHT = "├" -export const BOX_LEFT_AND_DOWN = "╮" -export const BOX_LEFT_AND_UP = "╯" +export const BOX_DOWN_AND_RIGHT = "╭"; +export const BOX_RIGHT = "─"; +export const BOX_UP_AND_RIGHT = "╰"; +export const BOX_UP_AND_DOWN = "│"; +export const BOX_DOWN_UP_AND_RIGHT = "├"; +export const BOX_LEFT_AND_DOWN = "╮"; +export const BOX_LEFT_AND_UP = "╯"; -export const GITHUB_ASSET_URL_RX = /^https:\/\/github\.com\/.*\/assets\/.*$/i +export const GITHUB_ASSET_URL_RX = /^https:\/\/github\.com\/.*\/assets\/.*$/i; diff --git a/packages/core/src/contentsafety.ts b/packages/core/src/contentsafety.ts index 4f7ad5dedc..2cd752a6d1 100644 --- a/packages/core/src/contentsafety.ts +++ b/packages/core/src/contentsafety.ts @@ -1,34 +1,29 @@ -import { CancellationOptions } from "./cancellation" -import { genaiscriptDebug } from "./debug" -import { runtimeHost } from "./host" -import { TraceOptions } from "./trace" -const dbg = genaiscriptDebug("contentsafety") +import { CancellationOptions } from "./cancellation"; +import { genaiscriptDebug } from "./debug"; +import { runtimeHost } from "./host"; +import { TraceOptions } from "./trace"; +const dbg = genaiscriptDebug("contentsafety"); export async function resolvePromptInjectionDetector( - safetyOptions: ContentSafetyOptions, - options: TraceOptions & CancellationOptions + safetyOptions: ContentSafetyOptions, + options: TraceOptions & CancellationOptions, ): Promise { - const services = await resolveContentSafety(safetyOptions, options) - return services?.detectPromptInjection + const services = await resolveContentSafety(safetyOptions, options); + return services?.detectPromptInjection; } export async function resolveContentSafety( - safetyOptions: ContentSafetyOptions, - options: TraceOptions & CancellationOptions + safetyOptions: ContentSafetyOptions, + options: TraceOptions & CancellationOptions, ): Promise> { - const { contentSafety, detectPromptInjection } = safetyOptions || {} - if (!detectPromptInjection) { - return {} - } - dbg(`resolving %s`, contentSafety) - const services = await runtimeHost.contentSafety(contentSafety, options) - if ( - !services && - (detectPromptInjection === true || detectPromptInjection === "always") - ) - throw new Error( - "Content safety provider not available or not configured." - ) - dbg(`resolved %s`, services?.id) - return services + const { contentSafety, detectPromptInjection } = safetyOptions || {}; + if (!detectPromptInjection) { + return {}; + } + dbg(`resolving %s`, contentSafety); + const services = await runtimeHost.contentSafety(contentSafety, options); + if (!services && (detectPromptInjection === true || detectPromptInjection === "always")) + throw new Error("Content safety provider not available or not configured."); + dbg(`resolved %s`, services?.id); + return services; } diff --git a/packages/core/src/copy.ts b/packages/core/src/copy.ts index 17864c7589..ce6bd687f1 100644 --- a/packages/core/src/copy.ts +++ b/packages/core/src/copy.ts @@ -2,9 +2,9 @@ // including constructing file paths and handling copy operations, // with optional forking functionality. -import { GENAI_MJS_EXT, GENAI_MTS_EXT, GENAI_SRC } from "./constants" // Import constants for file extensions and source directory -import { host } from "./host" // Import host module for file operations -import { fileExists, writeText } from "./fs" // Import file system utilities +import { GENAI_MJS_EXT, GENAI_MTS_EXT, GENAI_SRC } from "./constants"; // Import constants for file extensions and source directory +import { host } from "./host"; // Import host module for file operations +import { fileExists, writeText } from "./fs"; // Import file system utilities /** * Constructs the path to a prompt file. @@ -15,11 +15,11 @@ import { fileExists, writeText } from "./fs" // Import file system utilities * @returns The file path as a string */ function promptPath(id: string, options?: { javascript?: boolean }) { - const { javascript } = options || {} - const prompts = host.resolvePath(host.projectFolder(), GENAI_SRC) // Resolve base prompt directory - if (id === null) return prompts // Return base path if id is not provided - const ext = javascript ? GENAI_MJS_EXT : GENAI_MTS_EXT - return host.resolvePath(prompts, id + ext) // Construct full path if id is provided + const { javascript } = options || {}; + const prompts = host.resolvePath(host.projectFolder(), GENAI_SRC); // Resolve base prompt directory + if (id === null) return prompts; // Return base path if id is not provided + const ext = javascript ? GENAI_MJS_EXT : GENAI_MTS_EXT; + return host.resolvePath(prompts, id + ext); // Construct full path if id is provided } /** @@ -35,35 +35,35 @@ function promptPath(id: string, options?: { javascript?: boolean }) { * @throws If the file already exists in the target location. */ export async function copyPrompt( - t: PromptScript, - options: { fork: boolean; name?: string; javascript?: boolean } + t: PromptScript, + options: { fork: boolean; name?: string; javascript?: boolean }, ) { - // Ensure the prompt directory exists - await host.createDirectory(promptPath(null)) + // Ensure the prompt directory exists + await host.createDirectory(promptPath(null)); - // Determine the name for the new prompt file - const n = options?.name || t.id // Use provided name or default to script id - let fn = promptPath(n) + // Determine the name for the new prompt file + const n = options?.name || t.id; // Use provided name or default to script id + let fn = promptPath(n); - // Handle forking logic by appending a suffix if needed - if (options.fork && (await fileExists(fn))) { - let suff = 2 - for (;;) { - fn = promptPath(n + "_" + suff, options) // Construct new name with suffix - if (await fileExists(fn)) { - // Check if file already exists - suff++ - continue // Increment suffix and retry if file exists - } - break // Exit loop if file does not exist - } + // Handle forking logic by appending a suffix if needed + if (options.fork && (await fileExists(fn))) { + let suff = 2; + for (;;) { + fn = promptPath(n + "_" + suff, options); // Construct new name with suffix + if (await fileExists(fn)) { + // Check if file already exists + suff++; + continue; // Increment suffix and retry if file exists + } + break; // Exit loop if file does not exist } + } - // Check if the file already exists, throw error if it does - if (await fileExists(fn)) throw new Error(`file ${fn} already exists`) + // Check if the file already exists, throw error if it does + if (await fileExists(fn)) throw new Error(`file ${fn} already exists`); - // Write the prompt script to the determined path - await writeText(fn, t.jsSource) + // Write the prompt script to the determined path + await writeText(fn, t.jsSource); - return fn // Return the path of the copied script + return fn; // Return the path of the copied script } diff --git a/packages/core/src/crypto.test.ts b/packages/core/src/crypto.test.ts index 628c8ac961..43a85f7969 100644 --- a/packages/core/src/crypto.test.ts +++ b/packages/core/src/crypto.test.ts @@ -1,98 +1,98 @@ -import assert from "node:assert/strict" -import test, { beforeEach, describe } from "node:test" -import { hash, randomHex } from "./crypto" -import { TestHost } from "./testhost" +import assert from "node:assert/strict"; +import test, { beforeEach, describe } from "node:test"; +import { hash, randomHex } from "./crypto"; +import { TestHost } from "./testhost"; describe("randomHex function", () => { - test("should generate a hex string of the correct length", () => { - const size = 16 - const hexString = randomHex(size) - assert.strictEqual(hexString.length, size * 2) - }) + test("should generate a hex string of the correct length", () => { + const size = 16; + const hexString = randomHex(size); + assert.strictEqual(hexString.length, size * 2); + }); - test("should ensure randomness in generated hex strings", () => { - const size = 16 - const hexString1 = randomHex(size) - const hexString2 = randomHex(size) - assert.notStrictEqual(hexString1, hexString2) - }) + test("should ensure randomness in generated hex strings", () => { + const size = 16; + const hexString1 = randomHex(size); + const hexString2 = randomHex(size); + assert.notStrictEqual(hexString1, hexString2); + }); - test("should handle the smallest valid size correctly", () => { - const size = 1 - const hexString = randomHex(size) - assert.strictEqual(hexString.length, 2) - }) + test("should handle the smallest valid size correctly", () => { + const size = 1; + const hexString = randomHex(size); + assert.strictEqual(hexString.length, 2); + }); - test("should handle a large size correctly", () => { - const size = 1024 - const hexString = randomHex(size) - assert.strictEqual(hexString.length, size * 2) - }) + test("should handle a large size correctly", () => { + const size = 1024; + const hexString = randomHex(size); + assert.strictEqual(hexString.length, size * 2); + }); - test("should return an empty string for size 0", () => { - const size = 0 - const hexString = randomHex(size) - assert.strictEqual(hexString, "") - }) -}) + test("should return an empty string for size 0", () => { + const size = 0; + const hexString = randomHex(size); + assert.strictEqual(hexString, ""); + }); +}); describe("hash function", () => { - beforeEach(async () => { - TestHost.install() - }) + beforeEach(async () => { + TestHost.install(); + }); - test("should generate a SHA-256 hash by default", async () => { - const value = "test" - const hashedValue = await hash(value) - }) + test("should generate a SHA-256 hash by default", async () => { + const value = "test"; + const hashedValue = await hash(value); + }); - test("should generate a hash with a specified algorithm", async () => { - const value = "test" - const hashedValue = await hash(value, { algorithm: "sha-256" }) - }) + test("should generate a hash with a specified algorithm", async () => { + const value = "test"; + const hashedValue = await hash(value, { algorithm: "sha-256" }); + }); - test("should generate a hash with a specified length", async () => { - const value = "test" - const options = { length: 32 } - const hashedValue = await hash(value, options) - assert.strictEqual(hashedValue.length, 32) - }) + test("should generate a hash with a specified length", async () => { + const value = "test"; + const options = { length: 32 }; + const hashedValue = await hash(value, options); + assert.strictEqual(hashedValue.length, 32); + }); - test("should include version in the hash when specified", async () => { - const value = "test" - const options = { version: true } - const hashedValue = await hash(value, options) - assert.strictEqual(hashedValue.length, 64) - }) + test("should include version in the hash when specified", async () => { + const value = "test"; + const options = { version: true }; + const hashedValue = await hash(value, options); + assert.strictEqual(hashedValue.length, 64); + }); - test("should handle null and undefined values correctly", async () => { - const value: any = null - const hashedValueNull = await hash(value) - const hashedValueUndefined = await hash(undefined) - assert.notStrictEqual(hashedValueNull, hashedValueUndefined) - }) + test("should handle null and undefined values correctly", async () => { + const value: any = null; + const hashedValueNull = await hash(value); + const hashedValueUndefined = await hash(undefined); + assert.notStrictEqual(hashedValueNull, hashedValueUndefined); + }); - test("should handle arrays correctly", async () => { - const value = [1, 2, 3] - const hashedValue = await hash(value) - }) + test("should handle arrays correctly", async () => { + const value = [1, 2, 3]; + const hashedValue = await hash(value); + }); - test("should handle objects correctly", async () => { - const value = { a: 1, b: 2 } - const hashedValue = await hash(value) - }) + test("should handle objects correctly", async () => { + const value = { a: 1, b: 2 }; + const hashedValue = await hash(value); + }); - test("should handle buffers correctly", async () => { - const value = Buffer.from("test") - const hashedValue = await hash(value) - }) + test("should handle buffers correctly", async () => { + const value = Buffer.from("test"); + const hashedValue = await hash(value); + }); - test("should handle ArrayBuffer correctly", async () => { - const value = new ArrayBuffer(8) - const hashedValue = await hash(value) - }) + test("should handle ArrayBuffer correctly", async () => { + const value = new ArrayBuffer(8); + const hashedValue = await hash(value); + }); - test("should handle Blobs correctly", async () => { - const value = new Blob(["test"]) - const hashedValue = await hash(value) - }) -}) + test("should handle Blobs correctly", async () => { + const value = new Blob(["test"]); + const hashedValue = await hash(value); + }); +}); diff --git a/packages/core/src/crypto.ts b/packages/core/src/crypto.ts index 522408a628..da7327f7de 100644 --- a/packages/core/src/crypto.ts +++ b/packages/core/src/crypto.ts @@ -1,28 +1,28 @@ -import { getRandomValues as cryptoGetRandomValues } from "crypto" +import { getRandomValues as cryptoGetRandomValues } from "crypto"; // crypto.ts - Provides cryptographic functions for secure operations // Importing the toHex function from the util module to convert byte arrays to hexadecimal strings -import { concatBuffers, toHex, utf8Encode } from "./util" -import { createReadStream } from "fs" -import { createHash } from "crypto" -import { CORE_VERSION } from "./version" +import { concatBuffers, toHex, utf8Encode } from "./util"; +import { createReadStream } from "fs"; +import { createHash } from "crypto"; +import { CORE_VERSION } from "./version"; function getRandomValues(bytes: Uint8Array) { - if (typeof self !== "undefined" && self.crypto) { - return self.crypto.getRandomValues(bytes) - } else { - return cryptoGetRandomValues(bytes) - } + if (typeof self !== "undefined" && self.crypto) { + return self.crypto.getRandomValues(bytes); + } else { + return cryptoGetRandomValues(bytes); + } } async function digest(algorithm: string, data: Uint8Array) { - algorithm = algorithm.toUpperCase() - if (typeof self !== "undefined" && self.crypto) { - return self.crypto.subtle.digest(algorithm, data) - } else { - const { subtle } = await import("crypto") - return subtle.digest(algorithm, data) - } + algorithm = algorithm.toUpperCase(); + if (typeof self !== "undefined" && self.crypto) { + return self.crypto.subtle.digest(algorithm, data); + } else { + const { subtle } = await import("crypto"); + return subtle.digest(algorithm, data); + } } /** @@ -32,14 +32,14 @@ async function digest(algorithm: string, data: Uint8Array) { * @returns Hexadecimal string representation of the random bytes. */ export function randomHex(size: number) { - // Create a new Uint8Array with the specified size to hold random bytes - const bytes = new Uint8Array(size) + // Create a new Uint8Array with the specified size to hold random bytes + const bytes = new Uint8Array(size); - // Fill the array with cryptographically secure random values using the Web Crypto API - const res = getRandomValues(bytes) + // Fill the array with cryptographically secure random values using the Web Crypto API + const res = getRandomValues(bytes); - // Convert the random byte array to a hexadecimal string using the toHex function and return it - return toHex(res) + // Convert the random byte array to a hexadecimal string using the toHex function and return it + return toHex(res); } /** @@ -56,79 +56,74 @@ export function randomHex(size: number) { * @returns A promise resolving to the computed hash as a hexadecimal string. */ export async function hash(value: any, options?: HashOptions) { - const { - algorithm = "sha-256", - version, - length, - salt, - readWorkspaceFiles, - ...rest - } = options || {} - - const SEP = utf8Encode("|") - const UN = utf8Encode("undefined") - const NU = utf8Encode("null") - - const h: Uint8Array[] = [] - const append = async (v: any) => { - if (v === null) h.push(NU) - else if (v === undefined) h.push(UN) - else if ( - typeof v == "string" || - typeof v === "number" || - typeof v === "boolean" - ) - h.push(utf8Encode(String(v))) - else if (Array.isArray(v)) - for (const c of v) { - h.push(SEP) - await append(c) - } - else if (v instanceof Uint8Array) h.push(v) - else if (v instanceof Buffer) h.push(new Uint8Array(v)) - else if (v instanceof ArrayBuffer) h.push(new Uint8Array(v)) - else if (v instanceof Blob) - h.push(new Uint8Array(await v.arrayBuffer())) - else if (typeof v === "object") { - for (const c of Object.keys(v).sort()) { - h.push(SEP) - h.push(utf8Encode(c)) - h.push(SEP) - await append(v[c]) - } - if ( - readWorkspaceFiles && - typeof v.filename === "string" && - v.content === undefined && - !/^https?:\/\//i.test(v.filename) - ) { - try { - const h = await hashFile(v.filename) - await append(SEP) - await append(h) - } catch {} - } - } else if (typeof v === "function") h.push(utf8Encode(v.toString())) - else h.push(utf8Encode(JSON.stringify(v))) - } - - if (salt) { - await append(salt) - await append(SEP) - } - - if (version) { - await append(CORE_VERSION) - await append(SEP) - } - await append(value) - await append(SEP) - await append(rest) - - const buf = await digest(algorithm, concatBuffers(...h)) - let res = toHex(new Uint8Array(buf)) - if (length) res = res.slice(0, length) - return res + const { + algorithm = "sha-256", + version, + length, + salt, + readWorkspaceFiles, + ...rest + } = options || {}; + + const SEP = utf8Encode("|"); + const UN = utf8Encode("undefined"); + const NU = utf8Encode("null"); + + const h: Uint8Array[] = []; + const append = async (v: any) => { + if (v === null) h.push(NU); + else if (v === undefined) h.push(UN); + else if (typeof v == "string" || typeof v === "number" || typeof v === "boolean") + h.push(utf8Encode(String(v))); + else if (Array.isArray(v)) + for (const c of v) { + h.push(SEP); + await append(c); + } + else if (v instanceof Uint8Array) h.push(v); + else if (v instanceof Buffer) h.push(new Uint8Array(v)); + else if (v instanceof ArrayBuffer) h.push(new Uint8Array(v)); + else if (v instanceof Blob) h.push(new Uint8Array(await v.arrayBuffer())); + else if (typeof v === "object") { + for (const c of Object.keys(v).sort()) { + h.push(SEP); + h.push(utf8Encode(c)); + h.push(SEP); + await append(v[c]); + } + if ( + readWorkspaceFiles && + typeof v.filename === "string" && + v.content === undefined && + !/^https?:\/\//i.test(v.filename) + ) { + try { + const h = await hashFile(v.filename); + await append(SEP); + await append(h); + } catch {} + } + } else if (typeof v === "function") h.push(utf8Encode(v.toString())); + else h.push(utf8Encode(JSON.stringify(v))); + }; + + if (salt) { + await append(salt); + await append(SEP); + } + + if (version) { + await append(CORE_VERSION); + await append(SEP); + } + await append(value); + await append(SEP); + await append(rest); + + const buf = await digest(algorithm, concatBuffers(...h)); + let res = toHex(new Uint8Array(buf)); + if (length) res = res.slice(0, length); + return res; } /** @@ -138,24 +133,21 @@ export async function hash(value: any, options?: HashOptions) { * @param algorithm - Hashing algorithm to use. Defaults to "sha-256". * @returns Promise resolving to the file's hash in hexadecimal format. */ -export async function hashFile( - filePath: string, - algorithm: string = "sha-256" -): Promise { - return new Promise((resolve, reject) => { - const hash = createHash(algorithm) - const stream = createReadStream(filePath) - - stream.on("data", (chunk) => { - hash.update(chunk) - }) - - stream.on("end", () => { - resolve(hash.digest("hex")) - }) - - stream.on("error", (err) => { - reject(err) - }) - }) +export async function hashFile(filePath: string, algorithm: string = "sha-256"): Promise { + return new Promise((resolve, reject) => { + const hash = createHash(algorithm); + const stream = createReadStream(filePath); + + stream.on("data", (chunk) => { + hash.update(chunk); + }); + + stream.on("end", () => { + resolve(hash.digest("hex")); + }); + + stream.on("error", (err) => { + reject(err); + }); + }); } diff --git a/packages/core/src/csv.test.ts b/packages/core/src/csv.test.ts index 12cd3fd26b..ea70f14ec3 100644 --- a/packages/core/src/csv.test.ts +++ b/packages/core/src/csv.test.ts @@ -1,143 +1,137 @@ -import { describe, test, beforeEach } from "node:test" -import assert from "node:assert/strict" -import { - CSVParse, - CSVTryParse, - dataToMarkdownTable, - CSVStringify, - CSVChunk, -} from "./csv" +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; +import { CSVParse, CSVTryParse, dataToMarkdownTable, CSVStringify, CSVChunk } from "./csv"; describe("CSVParse", () => { - test("parse values with quotes", () => { - const csv = `RuleID, TestID, TestInput, ExpectedOutput, Reasoning + test("parse values with quotes", () => { + const csv = `RuleID, TestID, TestInput, ExpectedOutput, Reasoning 1, 1, "The quick brown fox jumps over the lazy dog.;fox", "NN", "Tests if the word 'fox' is tagged correctly as a noun." 1, 2, "He runs quickly to the store.;quickly", "RB", "Tests if the word 'quickly' is tagged correctly as an adverb." -` - const result = CSVParse(csv) - console.log(result) - assert.equal(result.length, 2) - }) +`; + const result = CSVParse(csv); + console.log(result); + assert.equal(result.length, 2); + }); - test("Parse simple CSV data with default options", () => { - const csv = "name,age\nJohn,30\nJane,25" - const result = CSVParse(csv) - assert.deepEqual(result, [ - { name: "John", age: "30" }, - { name: "Jane", age: "25" }, - ]) - }) + test("Parse simple CSV data with default options", () => { + const csv = "name,age\nJohn,30\nJane,25"; + const result = CSVParse(csv); + assert.deepEqual(result, [ + { name: "John", age: "30" }, + { name: "Jane", age: "25" }, + ]); + }); - test("Parse CSV data with custom delimiter", () => { - const csv = "name|age\nJohn|30\nJane|25" - const result = CSVParse(csv, { delimiter: "|" }) - assert.deepEqual(result, [ - { name: "John", age: "30" }, - { name: "Jane", age: "25" }, - ]) - }) + test("Parse CSV data with custom delimiter", () => { + const csv = "name|age\nJohn|30\nJane|25"; + const result = CSVParse(csv, { delimiter: "|" }); + assert.deepEqual(result, [ + { name: "John", age: "30" }, + { name: "Jane", age: "25" }, + ]); + }); - test("Parse CSV data with specified headers", () => { - const csv = "John,30\nJane,25" - const result = CSVParse(csv, { headers: ["name", "age"] }) - assert.deepEqual(result, [ - { name: "John", age: "30" }, - { name: "Jane", age: "25" }, - ]) - }) - test("Parse CSV data with invalid quotes", () => { - const csv = '"\\"John\\"",30\nJane,25' - const result = CSVParse(csv, { headers: ["name", "age"], repair: true }) - assert.deepEqual(result, [ - { name: '"John"', age: "30" }, - { name: "Jane", age: "25" }, - ]) - }) -}) + test("Parse CSV data with specified headers", () => { + const csv = "John,30\nJane,25"; + const result = CSVParse(csv, { headers: ["name", "age"] }); + assert.deepEqual(result, [ + { name: "John", age: "30" }, + { name: "Jane", age: "25" }, + ]); + }); + test("Parse CSV data with invalid quotes", () => { + const csv = '"\\"John\\"",30\nJane,25'; + const result = CSVParse(csv, { headers: ["name", "age"], repair: true }); + assert.deepEqual(result, [ + { name: '"John"', age: "30" }, + { name: "Jane", age: "25" }, + ]); + }); +}); describe("CSVTryParse", () => { - test("Try to parse valid CSV data", () => { - const csv = "name,age\nJohn,30\nJane,25" - const result = CSVTryParse(csv) - assert.deepEqual(result, [ - { name: "John", age: "30" }, - { name: "Jane", age: "25" }, - ]) - }) -}) + test("Try to parse valid CSV data", () => { + const csv = "name,age\nJohn,30\nJane,25"; + const result = CSVTryParse(csv); + assert.deepEqual(result, [ + { name: "John", age: "30" }, + { name: "Jane", age: "25" }, + ]); + }); +}); describe("CSVToMarkdown", () => { - test("Convert parsed CSV data to markdown table", () => { - const csv = [ - { name: "John", age: "30" }, - { name: "Jane", age: "25" }, - ] - const result = dataToMarkdownTable(csv) - const expected = `|name|age| + test("Convert parsed CSV data to markdown table", () => { + const csv = [ + { name: "John", age: "30" }, + { name: "Jane", age: "25" }, + ]; + const result = dataToMarkdownTable(csv); + const expected = `|name|age| |-|-| |John|30| |Jane|25| -`.replace(/[\t ]+/g, " ") - assert.equal(result, expected) - }) +`.replace(/[\t ]+/g, " "); + assert.equal(result, expected); + }); - test("Convert parsed CSV data to markdown table with custom headers", () => { - const csv = [ - { name: "John", age: "30" }, - { name: "Jane", age: "25" }, - ] - const result = dataToMarkdownTable(csv, { headers: ["age", "name"] }) - const expected = `|age|name| + test("Convert parsed CSV data to markdown table with custom headers", () => { + const csv = [ + { name: "John", age: "30" }, + { name: "Jane", age: "25" }, + ]; + const result = dataToMarkdownTable(csv, { headers: ["age", "name"] }); + const expected = `|age|name| |-|-| |30|John| |25|Jane| -`.replace(/[\t ]+/g, " ") - assert.equal(result, expected) - }) +`.replace(/[\t ]+/g, " "); + assert.equal(result, expected); + }); - test("Handle empty CSV data input", () => { - const result = dataToMarkdownTable([]) - assert.equal(result, "") - }) -}) + test("Handle empty CSV data input", () => { + const result = dataToMarkdownTable([]); + assert.equal(result, ""); + }); +}); describe("CSVStringify", () => { - test("Stringify simple CSV data with default options", () => { - const csv = [ - { name: "John", age: "30" }, - { name: "Jane", age: "25" }, - ] - const result = CSVStringify(csv) - const expected = "John,30\nJane,25\n" - assert.equal(result, expected) - }) - test("Stringify simple CSV data with headers", () => { - const csv = [ - { name: "John", age: "30" }, - { name: "Jane", age: "25" }, - ] - const result = CSVStringify(csv, { header: true }) - const expected = "name,age\nJohn,30\nJane,25\n" - assert.equal(result, expected) - }) + test("Stringify simple CSV data with default options", () => { + const csv = [ + { name: "John", age: "30" }, + { name: "Jane", age: "25" }, + ]; + const result = CSVStringify(csv); + const expected = "John,30\nJane,25\n"; + assert.equal(result, expected); + }); + test("Stringify simple CSV data with headers", () => { + const csv = [ + { name: "John", age: "30" }, + { name: "Jane", age: "25" }, + ]; + const result = CSVStringify(csv, { header: true }); + const expected = "name,age\nJohn,30\nJane,25\n"; + assert.equal(result, expected); + }); - test("Stringify CSV data with custom delimiter", () => { - const csv = [ - { name: "John", age: "30" }, - { name: "Jane", age: "25" }, - ] - const result = CSVStringify(csv, { header: true, delimiter: "|" }) - const expected = "name|age\nJohn|30\nJane|25\n" - assert.equal(result, expected) - }) + test("Stringify CSV data with custom delimiter", () => { + const csv = [ + { name: "John", age: "30" }, + { name: "Jane", age: "25" }, + ]; + const result = CSVStringify(csv, { header: true, delimiter: "|" }); + const expected = "name|age\nJohn|30\nJane|25\n"; + assert.equal(result, expected); + }); - test("chunk", () => { - const csv = [ - { name: "John", age: "30" }, - { name: "Jane", age: "25" }, - { name: "Doe", age: "35" }, - { name: "Smith", age: "40" }, - ] - const result = CSVChunk(csv, 2) - assert.equal(result.length, 2) - }) -}) + test("chunk", () => { + const csv = [ + { name: "John", age: "30" }, + { name: "Jane", age: "25" }, + { name: "Doe", age: "35" }, + { name: "Smith", age: "40" }, + ]; + const result = CSVChunk(csv, 2); + assert.equal(result.length, 2); + }); +}); diff --git a/packages/core/src/csv.ts b/packages/core/src/csv.ts index 9987ff4944..6c67bb6d1b 100644 --- a/packages/core/src/csv.ts +++ b/packages/core/src/csv.ts @@ -1,12 +1,12 @@ // This module provides functions for parsing and converting CSV data, // including error handling and conversion to Markdown table format. -import { parse } from "csv-parse/sync" -import { TraceOptions } from "./trace" -import { stringify } from "csv-stringify/sync" -import { arrayify } from "./util" -import { chunk } from "es-toolkit" -import { filenameOrFileToContent } from "./unwrappers" +import { parse } from "csv-parse/sync"; +import { TraceOptions } from "./trace"; +import { stringify } from "csv-stringify/sync"; +import { arrayify } from "./util"; +import { chunk } from "es-toolkit"; +import { filenameOrFileToContent } from "./unwrappers"; /** * Parses a CSV string or file into an array of objects. @@ -19,37 +19,37 @@ import { filenameOrFileToContent } from "./unwrappers" * @returns An array of objects representing the parsed CSV data. Skips empty lines and records with errors. */ export function CSVParse( - text: string | WorkspaceFile, - options?: { - delimiter?: string - headers?: ElementOrArray - repair?: boolean - } + text: string | WorkspaceFile, + options?: { + delimiter?: string; + headers?: ElementOrArray; + repair?: boolean; + }, ): object[] { - text = filenameOrFileToContent(text) + text = filenameOrFileToContent(text); - // Destructure options or provide defaults - const { delimiter, headers, repair, ...rest } = options || {} - const columns = headers ? arrayify(headers) : true + // Destructure options or provide defaults + const { delimiter, headers, repair, ...rest } = options || {}; + const columns = headers ? arrayify(headers) : true; - // common LLM escape errors - if (repair && text) { - text = text.replace(/\\"/g, '""').replace(/""""/g, '""') - } - // Parse the CSV string based on the provided options - return parse(text, { - autoParse: true, // Automatically parse values to appropriate types - castDate: false, // Do not cast strings to dates - comment: "#", // Ignore comments starting with '#' - columns, // Use provided headers or infer from the first line - skipEmptyLines: true, // Skip empty lines in the CSV - skipRecordsWithError: true, // Skip records that cause errors - delimiter, // Use the provided delimiter - relaxQuotes: true, // Allow quotes to be relaxed - relaxColumnCount: true, // Allow rows to have different column counts - trim: true, // Trim whitespace from values - ...rest, - }) + // common LLM escape errors + if (repair && text) { + text = text.replace(/\\"/g, '""').replace(/""""/g, '""'); + } + // Parse the CSV string based on the provided options + return parse(text, { + autoParse: true, // Automatically parse values to appropriate types + castDate: false, // Do not cast strings to dates + comment: "#", // Ignore comments starting with '#' + columns, // Use provided headers or infer from the first line + skipEmptyLines: true, // Skip empty lines in the CSV + skipRecordsWithError: true, // Skip records that cause errors + delimiter, // Use the provided delimiter + relaxQuotes: true, // Allow quotes to be relaxed + relaxColumnCount: true, // Allow rows to have different column counts + trim: true, // Trim whitespace from values + ...rest, + }); } /** @@ -64,23 +64,23 @@ export function CSVParse( * @returns An array of objects representing the parsed CSV data, or undefined if an error occurs. */ export function CSVTryParse( - text: string, - options?: { - delimiter?: string - headers?: ElementOrArray - repair?: boolean - } & TraceOptions + text: string, + options?: { + delimiter?: string; + headers?: ElementOrArray; + repair?: boolean; + } & TraceOptions, ): object[] | undefined { - const { trace } = options || {} - try { - if (!text) return [] // Return empty array if CSV is empty - // Attempt to parse the CSV - return CSVParse(text, options) - } catch (e) { - // Log error using trace function if provided - trace?.error("reading csv", e) - return undefined - } + const { trace } = options || {}; + try { + if (!text) return []; // Return empty array if CSV is empty + // Attempt to parse the CSV + return CSVParse(text, options); + } catch (e) { + // Log error using trace function if provided + trace?.error("reading csv", e); + return undefined; + } } /** @@ -91,9 +91,9 @@ export function CSVTryParse( * @returns A CSV formatted string representation of the input data. */ export function CSVStringify(csv: object[], options?: CSVStringifyOptions) { - if (!csv) return "" // Return empty string if CSV is empty - // Convert objects to CSV string using the provided options - return stringify(csv, options) + if (!csv) return ""; // Return empty string if CSV is empty + // Convert objects to CSV string using the provided options + return stringify(csv, options); } /** @@ -104,20 +104,17 @@ export function CSVStringify(csv: object[], options?: CSVStringifyOptions) { * @param options.headers - Headers for the table columns. If not provided, keys from the first object are used. If empty, defaults to object keys. Headers are escaped for Markdown. * @returns A Markdown table as a string, with rows and columns formatted and escaped for Markdown. Rows are joined without additional newlines. */ -export function dataToMarkdownTable( - csv: object[], - options?: { headers?: ElementOrArray } -) { - if (!csv?.length) return "" // Return empty string if CSV is empty +export function dataToMarkdownTable(csv: object[], options?: { headers?: ElementOrArray }) { + if (!csv?.length) return ""; // Return empty string if CSV is empty - const headers = arrayify(options?.headers) - if (headers.length === 0) headers.push(...Object.keys(csv[0])) // Use object keys as headers if not provided - const res: string[] = [ - headersToMarkdownTableHead(headers), // Create Markdown separator row - headersToMarkdownTableSeperator(headers), - ...csv.map((row) => objectToMarkdownTableRow(row, headers)), - ] - return res.join("") // Join rows with newline + const headers = arrayify(options?.headers); + if (headers.length === 0) headers.push(...Object.keys(csv[0])); // Use object keys as headers if not provided + const res: string[] = [ + headersToMarkdownTableHead(headers), // Create Markdown separator row + headersToMarkdownTableSeperator(headers), + ...csv.map((row) => objectToMarkdownTableRow(row, headers)), + ]; + return res.join(""); // Join rows with newline } /** @@ -127,7 +124,7 @@ export function dataToMarkdownTable( * @returns A string representing the Markdown table separator row. */ export function headersToMarkdownTableSeperator(headers: string[]) { - return `|${headers.map(() => "-").join("|")}|\n` + return `|${headers.map(() => "-").join("|")}|\n`; } /** @@ -137,7 +134,7 @@ export function headersToMarkdownTableSeperator(headers: string[]) { * @returns A string representing the header row of a Markdown table, with headers separated by pipes, ending with a newline. */ export function headersToMarkdownTableHead(headers: string[]) { - return `|${headers.join("|")}|\n` + return `|${headers.join("|")}|\n`; } /** @@ -150,26 +147,25 @@ export function headersToMarkdownTableHead(headers: string[]) { * @returns A string representing the row formatted as a Markdown table row. */ export function objectToMarkdownTableRow( - row: object, - headers: string[], - options?: { skipEscape?: boolean } + row: object, + headers: string[], + options?: { skipEscape?: boolean }, ) { - const { skipEscape } = options || {} - return `|${headers - .map((key) => { - const v = (row as any)[key] - let s = v === undefined || v === null ? "" : String(v) - // Escape special Markdown characters and format cell content - s = s - .replace(/\s+$/, "") // Trim trailing whitespace - .replace(//g, "gt;") // Replace '>' with its HTML entity - .replace(/\r?\n/g, "
") // Replace newlines with
- if (!skipEscape) - s = s.replace(/[\\`*_{}[\]()#+\-.!]/g, (m) => "\\" + m) // Escape special characters - return s || " " - }) - .join("|")}|\n` // Join columns with '|' + const { skipEscape } = options || {}; + return `|${headers + .map((key) => { + const v = (row as any)[key]; + let s = v === undefined || v === null ? "" : String(v); + // Escape special Markdown characters and format cell content + s = s + .replace(/\s+$/, "") // Trim trailing whitespace + .replace(//g, "gt;") // Replace '>' with its HTML entity + .replace(/\r?\n/g, "
"); // Replace newlines with
+ if (!skipEscape) s = s.replace(/[\\`*_{}[\]()#+\-.!]/g, (m) => "\\" + m); // Escape special characters + return s || " "; + }) + .join("|")}|\n`; // Join columns with '|' } /** @@ -180,13 +176,11 @@ export function objectToMarkdownTableRow( * @returns Array of chunk objects, each containing a starting index and rows. */ export function CSVChunk( - rows: object[], - size: number + rows: object[], + size: number, ): { chunkStartIndex: number; rows: object[] }[] { - return chunk(rows || [], Math.max(1, size | 0)).map( - (rows, chunkStartIndex) => ({ - chunkStartIndex, - rows, - }) - ) + return chunk(rows || [], Math.max(1, size | 0)).map((rows, chunkStartIndex) => ({ + chunkStartIndex, + rows, + })); } diff --git a/packages/core/src/data.ts b/packages/core/src/data.ts index 9821b7e7d0..3ee7b0c92d 100644 --- a/packages/core/src/data.ts +++ b/packages/core/src/data.ts @@ -1,29 +1,29 @@ import { - XLSX_REGEX, - CSV_REGEX, - INI_REGEX, - TOML_REGEX, - JSON5_REGEX, - YAML_REGEX, - XML_REGEX, - MD_REGEX, - MDX_REGEX, - JSONL_REGEX, -} from "./constants" -import { CSVTryParse } from "./csv" -import { splitMarkdown } from "./frontmatter" -import { INITryParse } from "./ini" -import { JSON5TryParse } from "./json5" -import { TOMLTryParse } from "./toml" -import { XLSXParse } from "./xlsx" -import { XMLTryParse } from "./xml" -import { YAMLTryParse } from "./yaml" -import { resolveFileContent } from "./file" -import { TraceOptions } from "./trace" -import { host } from "./host" -import { fromBase64 } from "./base64" -import { JSONLTryParse } from "./jsonl" -import { tryValidateJSONWithSchema } from "./schema" + XLSX_REGEX, + CSV_REGEX, + INI_REGEX, + TOML_REGEX, + JSON5_REGEX, + YAML_REGEX, + XML_REGEX, + MD_REGEX, + MDX_REGEX, + JSONL_REGEX, +} from "./constants"; +import { CSVTryParse } from "./csv"; +import { splitMarkdown } from "./frontmatter"; +import { INITryParse } from "./ini"; +import { JSON5TryParse } from "./json5"; +import { TOMLTryParse } from "./toml"; +import { XLSXParse } from "./xlsx"; +import { XMLTryParse } from "./xml"; +import { YAMLTryParse } from "./yaml"; +import { resolveFileContent } from "./file"; +import { TraceOptions } from "./trace"; +import { host } from "./host"; +import { fromBase64 } from "./base64"; +import { JSONLTryParse } from "./jsonl"; +import { tryValidateJSONWithSchema } from "./schema"; /** * Attempts to parse the provided file's content based on its detected format. @@ -37,34 +37,31 @@ import { tryValidateJSONWithSchema } from "./schema" * @returns Parsed data in the appropriate format based on the file extension, or `undefined` if the format is unsupported. */ export async function dataTryParse( - file: WorkspaceFile, - options?: TraceOptions & XMLParseOptions & INIParseOptions & CSVParseOptions + file: WorkspaceFile, + options?: TraceOptions & XMLParseOptions & INIParseOptions & CSVParseOptions, ) { - await resolveFileContent(file) + await resolveFileContent(file); - const { filename, content, encoding } = file - let data: any - if (XLSX_REGEX.test(filename)) - data = await XLSXParse( - encoding === "base64" - ? fromBase64(content) - : await host.readFile(filename) - ) + const { filename, content, encoding } = file; + let data: any; + if (XLSX_REGEX.test(filename)) + data = await XLSXParse( + encoding === "base64" ? fromBase64(content) : await host.readFile(filename), + ); + else { + if (CSV_REGEX.test(filename)) data = CSVTryParse(content, options); + else if (INI_REGEX.test(filename)) data = INITryParse(content, options); + else if (TOML_REGEX.test(filename)) data = TOMLTryParse(content); + else if (JSON5_REGEX.test(filename)) data = JSON5TryParse(content, { repair: true }); + else if (YAML_REGEX.test(filename)) data = YAMLTryParse(content); + else if (XML_REGEX.test(filename)) data = XMLTryParse(content, options); + else if (JSONL_REGEX.test(filename)) data = JSONLTryParse(content); + else if (MD_REGEX.test(filename) || MDX_REGEX.test(filename)) + data = YAMLTryParse(splitMarkdown(content).frontmatter); else { - if (CSV_REGEX.test(filename)) data = CSVTryParse(content, options) - else if (INI_REGEX.test(filename)) data = INITryParse(content, options) - else if (TOML_REGEX.test(filename)) data = TOMLTryParse(content) - else if (JSON5_REGEX.test(filename)) - data = JSON5TryParse(content, { repair: true }) - else if (YAML_REGEX.test(filename)) data = YAMLTryParse(content) - else if (XML_REGEX.test(filename)) data = XMLTryParse(content, options) - else if (JSONL_REGEX.test(filename)) data = JSONLTryParse(content) - else if (MD_REGEX.test(filename) || MDX_REGEX.test(filename)) - data = YAMLTryParse(splitMarkdown(content).frontmatter) - else { - return undefined // unknown - } + return undefined; // unknown } + } - return tryValidateJSONWithSchema(data, options) + return tryValidateJSONWithSchema(data, options); } diff --git a/packages/core/src/debug.ts b/packages/core/src/debug.ts index 50fe7ff0de..e0a3d963eb 100644 --- a/packages/core/src/debug.ts +++ b/packages/core/src/debug.ts @@ -1,6 +1,6 @@ -import debug, { Debugger } from "debug" +import debug, { Debugger } from "debug"; -const _genaiscriptDebug = debug("genaiscript") +const _genaiscriptDebug = debug("genaiscript"); export function genaiscriptDebug(namespace: string): Debugger { - return _genaiscriptDebug.extend(namespace) + return _genaiscriptDebug.extend(namespace); } diff --git a/packages/core/src/diff.test.ts b/packages/core/src/diff.test.ts index 8da5793615..7ff112c7e6 100644 --- a/packages/core/src/diff.test.ts +++ b/packages/core/src/diff.test.ts @@ -1,16 +1,10 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { - diffParse, - tryDiffParse, - diffCreatePatch, - diffFindChunk, - diffResolve, -} from "./diff" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { diffParse, tryDiffParse, diffCreatePatch, diffFindChunk, diffResolve } from "./diff"; describe("diff", () => { - test("diffParse - valid input", () => { - const input = ` + test("diffParse - valid input", () => { + const input = ` diff --git a/file1.txt b/file1.txt index 83db48f..bf269f4 100644 --- a/file1.txt @@ -18,24 +12,20 @@ index 83db48f..bf269f4 100644 @@ -1,3 +1,3 @@ -Hello World +Hello Universe -` - const result = diffParse(input) - assert(result.length > 0, "Should parse diff into files") - assert(result[0].chunks.length > 0, "Should parse chunks") - }) +`; + const result = diffParse(input); + assert(result.length > 0, "Should parse diff into files"); + assert(result[0].chunks.length > 0, "Should parse chunks"); + }); - test("diffParse - empty input", () => { - const input = "" - const result = diffParse(input) - assert.deepEqual( - result, - [], - "Should return an empty array for empty input" - ) - }) + test("diffParse - empty input", () => { + const input = ""; + const result = diffParse(input); + assert.deepEqual(result, [], "Should return an empty array for empty input"); + }); - test("tryDiffParse - valid input", () => { - const input = ` + test("tryDiffParse - valid input", () => { + const input = ` diff --git a/file1.txt b/file1.txt index 83db48f..bf269f4 100644 --- a/file1.txt @@ -43,79 +33,69 @@ index 83db48f..bf269f4 100644 @@ -1,3 +1,3 @@ -Hello World +Hello Universe -` - const result = tryDiffParse(input) - assert(result, "Should parse diff successfully") - }) +`; + const result = tryDiffParse(input); + assert(result, "Should parse diff successfully"); + }); - test("diffCreatePatch - valid input", () => { - const left = { filename: "file1.txt", content: "Hello World\n" } - const right = { filename: "file1.txt", content: "Hello Universe\n" } - const patch = diffCreatePatch(left, right) - assert( - patch.includes("--- file1.txt"), - "Should include original file header" - ) - assert( - patch.includes("+++ file1.txt"), - "Should include modified file header" - ) - assert(patch.includes("-Hello World"), "Should include removed line") - assert(patch.includes("+Hello Universe"), "Should include added line") - }) + test("diffCreatePatch - valid input", () => { + const left = { filename: "file1.txt", content: "Hello World\n" }; + const right = { filename: "file1.txt", content: "Hello Universe\n" }; + const patch = diffCreatePatch(left, right); + assert(patch.includes("--- file1.txt"), "Should include original file header"); + assert(patch.includes("+++ file1.txt"), "Should include modified file header"); + assert(patch.includes("-Hello World"), "Should include removed line"); + assert(patch.includes("+Hello Universe"), "Should include added line"); + }); - test("diffFindChunk - find chunk by line", () => { - const diff = [ - { - to: "file1.txt", - chunks: [ - { - newStart: 1, - newLines: 3, - }, - ], - }, - ] - const result = diffFindChunk("file1.txt", 2, diff as any) - assert(result?.chunk, "Should find the chunk containing the line") - }) + test("diffFindChunk - find chunk by line", () => { + const diff = [ + { + to: "file1.txt", + chunks: [ + { + newStart: 1, + newLines: 3, + }, + ], + }, + ]; + const result = diffFindChunk("file1.txt", 2, diff as any); + assert(result?.chunk, "Should find the chunk containing the line"); + }); - test("diffFindChunk - file not found", () => { - const diff = [ - { - to: "file1.txt", - }, - ] - const result = diffFindChunk("file2.txt", 1, diff as any) - assert.strictEqual( - result, - undefined, - "Should return undefined if file is not found" - ) - }) + test("diffFindChunk - file not found", () => { + const diff = [ + { + to: "file1.txt", + }, + ]; + const result = diffFindChunk("file2.txt", 1, diff as any); + assert.strictEqual(result, undefined, "Should return undefined if file is not found"); + }); - test("diffFindChunk - line not in any chunk", () => { - const diff = [ - { - to: "file1.txt", - chunks: [ - { - newStart: 10, - newLines: 5, - }, - ], - }, - ] - const result = diffFindChunk("file1.txt", 2, diff as any) - assert(result?.file, "Should return the file even if no chunk matches") - assert.strictEqual( - result?.chunk, - undefined, - "Should not return a chunk if line is not in range" - ) - }) - test("diffResolve - string input", () => { - const input = ` + test("diffFindChunk - line not in any chunk", () => { + const diff = [ + { + to: "file1.txt", + chunks: [ + { + newStart: 10, + newLines: 5, + }, + ], + }, + ]; + const result = diffFindChunk("file1.txt", 2, diff as any); + assert(result?.file, "Should return the file even if no chunk matches"); + assert.strictEqual( + result?.chunk, + undefined, + "Should not return a chunk if line is not in range", + ); + }); + test("diffResolve - string input", () => { + const input = ` diff --git a/file1.txt b/file1.txt index 83db48f..bf269f4 100644 --- a/file1.txt @@ -123,62 +103,50 @@ index 83db48f..bf269f4 100644 @@ -1,3 +1,3 @@ -Hello World +Hello Universe -` - const result = diffResolve(input) - assert(Array.isArray(result), "Should return an array") - assert(result.length > 0, "Should parse diff into files") - assert(result[0].chunks.length > 0, "Should parse chunks") - }) +`; + const result = diffResolve(input); + assert(Array.isArray(result), "Should return an array"); + assert(result.length > 0, "Should parse diff into files"); + assert(result[0].chunks.length > 0, "Should parse chunks"); + }); - test("diffResolve - empty string input", () => { - const input = "" - const result = diffResolve(input) - assert.deepEqual( - result, - [], - "Should return an empty array for empty string input" - ) - }) + test("diffResolve - empty string input", () => { + const input = ""; + const result = diffResolve(input); + assert.deepEqual(result, [], "Should return an empty array for empty string input"); + }); - test("diffResolve - array input", () => { - const input = [ - { - to: "file1.txt", - chunks: [ - { - newStart: 1, - newLines: 3, - oldStart: 1, - oldLines: 3, - }, - ], - }, - ] - const result = diffResolve(input as DiffFile[]) - assert.deepEqual( - result, - input, - "Should return the same array when array is provided" - ) - }) + test("diffResolve - array input", () => { + const input = [ + { + to: "file1.txt", + chunks: [ + { + newStart: 1, + newLines: 3, + oldStart: 1, + oldLines: 3, + }, + ], + }, + ]; + const result = diffResolve(input as DiffFile[]); + assert.deepEqual(result, input, "Should return the same array when array is provided"); + }); - test("diffResolve - single object input", () => { - const input = { - to: "file1.txt", - chunks: [ - { - newStart: 1, - newLines: 3, - oldStart: 1, - oldLines: 3, - }, - ], - } - const result = diffResolve(input as DiffFile) - assert.deepEqual( - result, - [input], - "Should wrap single object in an array" - ) - }) -}) + test("diffResolve - single object input", () => { + const input = { + to: "file1.txt", + chunks: [ + { + newStart: 1, + newLines: 3, + oldStart: 1, + oldLines: 3, + }, + ], + }; + const result = diffResolve(input as DiffFile); + assert.deepEqual(result, [input], "Should wrap single object in an array"); + }); +}); diff --git a/packages/core/src/diff.ts b/packages/core/src/diff.ts index da7eeba849..e28ca729f0 100644 --- a/packages/core/src/diff.ts +++ b/packages/core/src/diff.ts @@ -1,10 +1,10 @@ -import parseDiff from "parse-diff" -import { arrayify, isEmptyString } from "./cleaners" -import debug from "debug" -import { errorMessage } from "./error" -import { createTwoFilesPatch } from "diff" -import { resolve } from "node:path" -const dbg = debug("genaiscript:diff") +import parseDiff from "parse-diff"; +import { arrayify, isEmptyString } from "./cleaners"; +import debug from "debug"; +import { errorMessage } from "./error"; +import { createTwoFilesPatch } from "diff"; +import { resolve } from "node:path"; +const dbg = debug("genaiscript:diff"); /** * Parses a diff string into a structured format. @@ -13,9 +13,9 @@ const dbg = debug("genaiscript:diff") * @returns An array of parsed file objects. If the input is empty or invalid, returns an empty array. */ export function diffParse(input: string) { - if (isEmptyString(input)) return [] - const files = parseDiff(input) - return files + if (isEmptyString(input)) return []; + const files = parseDiff(input); + return files; } /** @@ -24,11 +24,9 @@ export function diffParse(input: string) { * @param input - The input to resolve. Can be a diff string in valid format or an ElementOrArray of DiffFile objects. * @returns An array of DiffFile objects. If the input is a string, it is parsed into DiffFile objects using diffParse. If the input is already an ElementOrArray of DiffFile objects, it is converted to an array using arrayify. */ -export function diffResolve( - input: string | ElementOrArray -): DiffFile[] { - if (typeof input === "string") return diffParse(input) - else return arrayify(input) +export function diffResolve(input: string | ElementOrArray): DiffFile[] { + if (typeof input === "string") return diffParse(input); + else return arrayify(input); } /** @@ -39,12 +37,12 @@ export function diffResolve( * @returns An array of parsed file objects if successful, or an empty array if parsing fails. Logs an error message if parsing fails. */ export function tryDiffParse(diff: string) { - try { - return diffParse(diff) - } catch (e) { - dbg(`diff parsing failed: ${errorMessage(e)}`) - return [] - } + try { + return diffParse(diff); + } catch (e) { + dbg(`diff parsing failed: ${errorMessage(e)}`); + return []; + } } /** @@ -58,30 +56,30 @@ export function tryDiffParse(diff: string) { * @returns The diff as a string, with redundant headers removed. The diff is generated using createTwoFilesPatch. */ export function diffCreatePatch( - left: string | WorkspaceFile, - right: string | WorkspaceFile, - options?: { - context?: number - ignoreCase?: boolean - ignoreWhitespace?: boolean - } + left: string | WorkspaceFile, + right: string | WorkspaceFile, + options?: { + context?: number; + ignoreCase?: boolean; + ignoreWhitespace?: boolean; + }, ) { - if (typeof left === "string") left = { filename: "left", content: left } - if (typeof right === "string") right = { filename: "right", content: right } - const res = createTwoFilesPatch( - left?.filename || "", - right?.filename || "", - left?.content || "", - right?.content || "", - undefined, - undefined, - { - ignoreCase: true, - ignoreWhitespace: true, - ...(options ?? {}), - } - ) - return res.replace(/^[^=]*={10,}\n/, "") + if (typeof left === "string") left = { filename: "left", content: left }; + if (typeof right === "string") right = { filename: "right", content: right }; + const res = createTwoFilesPatch( + left?.filename || "", + right?.filename || "", + left?.content || "", + right?.content || "", + undefined, + undefined, + { + ignoreCase: true, + ignoreWhitespace: true, + ...(options ?? {}), + }, + ); + return res.replace(/^[^=]*={10,}\n/, ""); } /** @@ -93,13 +91,8 @@ export function diffCreatePatch( * @param end2 - End of second range (inclusive). * @returns True if the ranges overlap, false otherwise. */ -function rangesOverlap( - start1: number, - end1: number, - start2: number, - end2: number -): boolean { - return Math.max(start1, start2) <= Math.min(end1, end2) +function rangesOverlap(start1: number, end1: number, start2: number, end2: number): boolean { + return Math.max(start1, start2) <= Math.min(end1, end2); } /** @@ -111,37 +104,28 @@ function rangesOverlap( * @returns An object containing the matching file and the chunk if found, or an object with only the file if no chunk matches. Returns undefined if no file matches. */ export function diffFindChunk( - file: string, - range: number | [number, number], - diff: ElementOrArray + file: string, + range: number | [number, number], + diff: ElementOrArray, ): { file?: DiffFile; chunk?: DiffChunk } | undefined { - // line is zero-based! - const fn = file ? resolve(file) : undefined - const df = arrayify(diff).find( - (f) => (!file && !f.to) || resolve(f.to) === fn - ) - if (!df) return undefined // file not found in diff + // line is zero-based! + const fn = file ? resolve(file) : undefined; + const df = arrayify(diff).find((f) => (!file && !f.to) || resolve(f.to) === fn); + if (!df) return undefined; // file not found in diff - const { chunks } = df - const lines = arrayify(range) - if (lines.length === 0) return { file: df } // no lines to search for - if (lines.length === 1) lines[1] = lines[0] // if only one line, make it a range - if (lines[0] > lines[1]) { - // if the range is inverted, swap it - const tmp = lines[0] - lines[0] = lines[1] - lines[1] = tmp - } - for (const chunk of chunks) { - if ( - rangesOverlap( - lines[0], - lines[1], - chunk.newStart, - chunk.newStart + chunk.newLines - ) - ) - return { file: df, chunk } - } - return { file: df } + const { chunks } = df; + const lines = arrayify(range); + if (lines.length === 0) return { file: df }; // no lines to search for + if (lines.length === 1) lines[1] = lines[0]; // if only one line, make it a range + if (lines[0] > lines[1]) { + // if the range is inverted, swap it + const tmp = lines[0]; + lines[0] = lines[1]; + lines[1] = tmp; + } + for (const chunk of chunks) { + if (rangesOverlap(lines[0], lines[1], chunk.newStart, chunk.newStart + chunk.newLines)) + return { file: df, chunk }; + } + return { file: df }; } diff --git a/packages/core/src/dispose.ts b/packages/core/src/dispose.ts index c1ec7bbfc3..46dc7e5e0c 100644 --- a/packages/core/src/dispose.ts +++ b/packages/core/src/dispose.ts @@ -1,5 +1,5 @@ -import { TraceOptions } from "./trace" -import { arrayify, logError } from "./util" +import { TraceOptions } from "./trace"; +import { arrayify, logError } from "./util"; /** * Disposes of the provided disposables by invoking their `Symbol.asyncDispose` method. @@ -9,19 +9,16 @@ import { arrayify, logError } from "./util" * * Logs errors encountered during disposal using `logError` and the provided trace's error method. */ -export async function dispose( - disposables: ElementOrArray, - options: TraceOptions -) { - const { trace } = options || {} - for (const disposable of arrayify(disposables)) { - if (disposable !== undefined && disposable[Symbol.asyncDispose]) { - try { - await disposable[Symbol.asyncDispose]() - } catch (e) { - logError(e) - trace.error(e) - } - } +export async function dispose(disposables: ElementOrArray, options: TraceOptions) { + const { trace } = options || {}; + for (const disposable of arrayify(disposables)) { + if (disposable !== undefined && disposable[Symbol.asyncDispose]) { + try { + await disposable[Symbol.asyncDispose](); + } catch (e) { + logError(e); + trace.error(e); + } } + } } diff --git a/packages/core/src/docx.test.ts b/packages/core/src/docx.test.ts index a1485960a7..70e568b652 100644 --- a/packages/core/src/docx.test.ts +++ b/packages/core/src/docx.test.ts @@ -1,29 +1,29 @@ -import { beforeEach, describe, test } from "node:test" -import assert from "node:assert/strict" -import { DOCXTryParse } from "./docx" -import { TestHost } from "./testhost" +import { beforeEach, describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { DOCXTryParse } from "./docx"; +import { TestHost } from "./testhost"; describe("DOCXTryParse", () => { - beforeEach(() => { - TestHost.install() - }) + beforeEach(() => { + TestHost.install(); + }); - test("parse DOCX to markdown", async () => { - const file = "../sample/src/rag/Document.docx" - const result = await DOCXTryParse(file, { format: "markdown" }) - assert(result.file.content.includes("Microsoft")) - }) + test("parse DOCX to markdown", async () => { + const file = "../sample/src/rag/Document.docx"; + const result = await DOCXTryParse(file, { format: "markdown" }); + assert(result.file.content.includes("Microsoft")); + }); - test("parse DOCX to HTML", async () => { - const file = "../sample/src/rag/Document.docx" - const result = await DOCXTryParse(file, { format: "html" }) - assert(result.file.content.includes("Microsoft")) - }) + test("parse DOCX to HTML", async () => { + const file = "../sample/src/rag/Document.docx"; + const result = await DOCXTryParse(file, { format: "html" }); + assert(result.file.content.includes("Microsoft")); + }); - test("cache hit", async () => { - const file = "../sample/src/rag/Document.docx" - const result = await DOCXTryParse(file, { format: "text" }) - const result2 = await DOCXTryParse(file, { format: "text" }) - assert(result2.file.content === result.file.content) - }) -}) + test("cache hit", async () => { + const file = "../sample/src/rag/Document.docx"; + const result = await DOCXTryParse(file, { format: "text" }); + const result2 = await DOCXTryParse(file, { format: "text" }); + assert(result2.file.content === result.file.content); + }); +}); diff --git a/packages/core/src/docx.ts b/packages/core/src/docx.ts index a4e7a4f410..aff89034eb 100644 --- a/packages/core/src/docx.ts +++ b/packages/core/src/docx.ts @@ -1,31 +1,31 @@ -import { join } from "node:path" -import { DOCX_HASH_LENGTH } from "./constants" -import { hash } from "./crypto" -import { host } from "./host" -import { HTMLToMarkdown } from "./html" -import { TraceOptions } from "./trace" -import { logVerbose } from "./util" -import { readFile, writeFile } from "node:fs/promises" -import { YAMLStringify } from "./yaml" -import { errorMessage, serializeError } from "./error" -import { resolveFileBytes } from "./file" -import { filenameOrFileToFilename } from "./unwrappers" -import { ensureDir } from "fs-extra" -import { mark, measure } from "./performance" -import { dotGenaiscriptPath } from "./workdir" +import { join } from "node:path"; +import { DOCX_HASH_LENGTH } from "./constants"; +import { hash } from "./crypto"; +import { host } from "./host"; +import { HTMLToMarkdown } from "./html"; +import { TraceOptions } from "./trace"; +import { logVerbose } from "./util"; +import { readFile, writeFile } from "node:fs/promises"; +import { YAMLStringify } from "./yaml"; +import { errorMessage, serializeError } from "./error"; +import { resolveFileBytes } from "./file"; +import { filenameOrFileToFilename } from "./unwrappers"; +import { ensureDir } from "fs-extra"; +import { mark, measure } from "./performance"; +import { dotGenaiscriptPath } from "./workdir"; async function computeHashFolder( - filename: string, - content: Uint8Array, - options: TraceOptions & DocxParseOptions + filename: string, + content: Uint8Array, + options: TraceOptions & DocxParseOptions, ) { - const { trace, ...rest } = options || {} - const h = await hash([filename, content, rest], { - readWorkspaceFiles: true, - version: true, - length: DOCX_HASH_LENGTH, - }) - return dotGenaiscriptPath("cache", "docx", h) + const { trace, ...rest } = options || {}; + const h = await hash([filename, content, rest], { + readWorkspaceFiles: true, + version: true, + length: DOCX_HASH_LENGTH, + }); + return dotGenaiscriptPath("cache", "docx", h); } /** @@ -36,77 +36,72 @@ async function computeHashFolder( * @returns An object containing the parsed file content or an error message in case of failure. If caching is enabled and an error occurs, attempts to return cached results. */ export async function DOCXTryParse( - file: string | WorkspaceFile, - options?: TraceOptions & DocxParseOptions + file: string | WorkspaceFile, + options?: TraceOptions & DocxParseOptions, ): Promise<{ file?: WorkspaceFile; error?: string }> { - const { trace, cache, format = "markdown" } = options || {} - const filename = filenameOrFileToFilename(file) - const content = await resolveFileBytes(file, options) - const folder = await computeHashFolder(filename, content, options) - const resFilename = join(folder, "res.json") - const readCache = async () => { - if (cache === false) return undefined - try { - const res = JSON.parse( - await readFile(resFilename, { - encoding: "utf-8", - }) - ) - logVerbose(`docx: cache hit at ${folder}`) - return res - } catch { - return undefined - } + const { trace, cache, format = "markdown" } = options || {}; + const filename = filenameOrFileToFilename(file); + const content = await resolveFileBytes(file, options); + const folder = await computeHashFolder(filename, content, options); + const resFilename = join(folder, "res.json"); + const readCache = async () => { + if (cache === false) return undefined; + try { + const res = JSON.parse( + await readFile(resFilename, { + encoding: "utf-8", + }), + ); + logVerbose(`docx: cache hit at ${folder}`); + return res; + } catch { + return undefined; } + }; - { - // try cache hit - const cached = await readCache() - if (cached) return cached - } + { + // try cache hit + const cached = await readCache(); + if (cached) return cached; + } - const m = measure("parsers.docx") - try { - const { extractRawText, convertToHtml } = await import("mammoth") - const input = content - ? { buffer: Buffer.from(content) } - : { path: host.resolvePath(filename) } + const m = measure("parsers.docx"); + try { + const { extractRawText, convertToHtml } = await import("mammoth"); + const input = content ? { buffer: Buffer.from(content) } : { path: host.resolvePath(filename) }; - let text: string - if (format === "html" || format === "markdown") { - const results = await convertToHtml(input) - if (format === "markdown") - text = await HTMLToMarkdown(results.value, { - trace, - disableGfm: true, - }) - else text = results.value - } else { - const results = await extractRawText(input) - text = results.value - } + let text: string; + if (format === "html" || format === "markdown") { + const results = await convertToHtml(input); + if (format === "markdown") + text = await HTMLToMarkdown(results.value, { + trace, + disableGfm: true, + }); + else text = results.value; + } else { + const results = await extractRawText(input); + text = results.value; + } - await ensureDir(folder) - await writeFile(join(folder, "content.txt"), text) - const res = { file: { filename, content: text } } - await writeFile(resFilename, JSON.stringify(res)) + await ensureDir(folder); + await writeFile(join(folder, "content.txt"), text); + const res = { file: { filename, content: text } }; + await writeFile(resFilename, JSON.stringify(res)); - return res - } catch (error) { - logVerbose(error) - { - // try cache hit - const cached = await readCache() - if (cached) return cached - } - trace?.error(`reading docx`, error) // Log error if tracing is enabled - await ensureDir(folder) - await writeFile( - join(folder, "error.txt"), - YAMLStringify(serializeError(error)) - ) - return { error: errorMessage(error) } - } finally { - m() + return res; + } catch (error) { + logVerbose(error); + { + // try cache hit + const cached = await readCache(); + if (cached) return cached; } + trace?.error(`reading docx`, error); // Log error if tracing is enabled + await ensureDir(folder); + await writeFile(join(folder, "error.txt"), YAMLStringify(serializeError(error))); + return { error: errorMessage(error) }; + } finally { + m(); + } } diff --git a/packages/core/src/dom.ts b/packages/core/src/dom.ts index e8f1fb596c..d4188f5403 100644 --- a/packages/core/src/dom.ts +++ b/packages/core/src/dom.ts @@ -1,22 +1,22 @@ -import { genaiscriptDebug } from "./debug" -import { resolveGlobal } from "./global" -const dbg = genaiscriptDebug("dom") +import { genaiscriptDebug } from "./debug"; +import { resolveGlobal } from "./global"; +const dbg = genaiscriptDebug("dom"); export async function installWindow() { - const glb = resolveGlobal() // Get the global context - if (glb.window) return + const glb = resolveGlobal(); // Get the global context + if (glb.window) return; - dbg(`installing window`) - const { JSDOM } = await import("jsdom") - const createDOMPurify = (await import("dompurify")).default + dbg(`installing window`); + const { JSDOM } = await import("jsdom"); + const createDOMPurify = (await import("dompurify")).default; - const { window } = new JSDOM("") - const DOMPurify = createDOMPurify(window) - glb.window = window - glb.DOMPurify = DOMPurify - glb.Element = window.Element + const { window } = new JSDOM(""); + const DOMPurify = createDOMPurify(window); + glb.window = window; + glb.DOMPurify = DOMPurify; + glb.Element = window.Element; - // mermaid workaround - createDOMPurify.addHook = DOMPurify.addHook.bind(DOMPurify) - createDOMPurify.sanitize = DOMPurify.sanitize.bind(DOMPurify) + // mermaid workaround + createDOMPurify.addHook = DOMPurify.addHook.bind(DOMPurify); + createDOMPurify.sanitize = DOMPurify.sanitize.bind(DOMPurify); } diff --git a/packages/core/src/dotenv.test.ts b/packages/core/src/dotenv.test.ts index 6c97904473..225fce51e8 100644 --- a/packages/core/src/dotenv.test.ts +++ b/packages/core/src/dotenv.test.ts @@ -1,52 +1,52 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { dotEnvTryParse, dotEnvParse, dotEnvStringify } from "./dotenv" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { dotEnvTryParse, dotEnvParse, dotEnvStringify } from "./dotenv"; describe("dotenv", () => { - describe("dotEnvTryParse", () => { - test("should parse a valid dotenv string into a key-value object", () => { - const dotenvString = "KEY1=value1\nKEY2=value2" - const expectedResult = { KEY1: "value1", KEY2: "value2" } - const result = dotEnvTryParse(dotenvString) - assert.deepEqual(result, expectedResult) - }) + describe("dotEnvTryParse", () => { + test("should parse a valid dotenv string into a key-value object", () => { + const dotenvString = "KEY1=value1\nKEY2=value2"; + const expectedResult = { KEY1: "value1", KEY2: "value2" }; + const result = dotEnvTryParse(dotenvString); + assert.deepEqual(result, expectedResult); + }); - test("should return an empty object and log an error for an invalid dotenv string", () => { - const dotenvString = "KEY1value1\nKEY2value2" - const result = dotEnvTryParse(dotenvString) - assert.deepEqual(result, {}) // Assuming logError handles logging separately - }) - }) + test("should return an empty object and log an error for an invalid dotenv string", () => { + const dotenvString = "KEY1value1\nKEY2value2"; + const result = dotEnvTryParse(dotenvString); + assert.deepEqual(result, {}); // Assuming logError handles logging separately + }); + }); - describe("dotEnvParse", () => { - test("should parse a valid dotenv string into a key-value object", () => { - const dotenvString = "KEY1=value1\nKEY2=value2" - const expectedResult = { KEY1: "value1", KEY2: "value2" } - const result = dotEnvParse(dotenvString) - assert.deepEqual(result, expectedResult) - }) - }) + describe("dotEnvParse", () => { + test("should parse a valid dotenv string into a key-value object", () => { + const dotenvString = "KEY1=value1\nKEY2=value2"; + const expectedResult = { KEY1: "value1", KEY2: "value2" }; + const result = dotEnvParse(dotenvString); + assert.deepEqual(result, expectedResult); + }); + }); - describe("dotEnvStringify", () => { - test("should convert a key-value object into a dotenv-style string with proper formatting", () => { - const keyValueObject = { KEY1: "value1", KEY2: "value2" } - const expectedResult = "KEY1=value1\nKEY2=value2" - const result = dotEnvStringify(keyValueObject) - assert.equal(result, expectedResult) - }) + describe("dotEnvStringify", () => { + test("should convert a key-value object into a dotenv-style string with proper formatting", () => { + const keyValueObject = { KEY1: "value1", KEY2: "value2" }; + const expectedResult = "KEY1=value1\nKEY2=value2"; + const result = dotEnvStringify(keyValueObject); + assert.equal(result, expectedResult); + }); - test("should handle values with newlines or quotes properly", () => { - const keyValueObject = { KEY1: "value\n1", KEY2: 'value"2"' } - const expectedResult = 'KEY1="value\n1"\nKEY2="value\\"2\\""' - const result = dotEnvStringify(keyValueObject) - assert.equal(result, expectedResult) - }) + test("should handle values with newlines or quotes properly", () => { + const keyValueObject = { KEY1: "value\n1", KEY2: 'value"2"' }; + const expectedResult = 'KEY1="value\n1"\nKEY2="value\\"2\\""'; + const result = dotEnvStringify(keyValueObject); + assert.equal(result, expectedResult); + }); - test("should return an empty string for an empty record", () => { - const keyValueObject = {} - const expectedResult = "" - const result = dotEnvStringify(keyValueObject) - assert.equal(result, expectedResult) - }) - }) -}) + test("should return an empty string for an empty record", () => { + const keyValueObject = {}; + const expectedResult = ""; + const result = dotEnvStringify(keyValueObject); + assert.equal(result, expectedResult); + }); + }); +}); diff --git a/packages/core/src/dotenv.ts b/packages/core/src/dotenv.ts index 8d40bf656d..ee31016e1e 100644 --- a/packages/core/src/dotenv.ts +++ b/packages/core/src/dotenv.ts @@ -3,10 +3,10 @@ // Tags: dotenv, parsing, error handling // Import the 'parse' function from the 'dotenv' library to parse dotenv files -import { parse } from "dotenv" +import { parse } from "dotenv"; // Import a local utility function 'logError' for logging errors -import { logError } from "./util" +import { logError } from "./util"; /** * Safely parses a dotenv-style string into a key-value object. @@ -16,19 +16,19 @@ import { logError } from "./util" * @returns A record with key-value pairs from the dotenv file */ export function dotEnvTryParse(text: string): Record { - try { - // Try parsing the text using the 'parse' function - return parse(text) - } catch (e) { - // Log any parsing error encountered - logError(e) - // Return an empty object to indicate parsing failure - return {} - } + try { + // Try parsing the text using the 'parse' function + return parse(text); + } catch (e) { + // Log any parsing error encountered + logError(e); + // Return an empty object to indicate parsing failure + return {}; + } } // Export the 'parse' function directly so it can be used externally -export const dotEnvParse = parse +export const dotEnvParse = parse; /** * Converts a key-value record into a dotenv-style string. @@ -38,22 +38,22 @@ export const dotEnvParse = parse * @returns A dotenv-formatted string */ export function dotEnvStringify(record: Record): string { - return ( - Object.entries(record || {}) - .map(([key, value]) => { - // Ensure null or undefined values are treated as empty strings - if (value === undefined || value === null) value = "" + return ( + Object.entries(record || {}) + .map(([key, value]) => { + // Ensure null or undefined values are treated as empty strings + if (value === undefined || value === null) value = ""; - // Enclose in quotes if the value contains newlines or quotes, and escape quotes - if (value.includes("\n") || value.includes('"')) { - value = value.replace(/"/g, '\\"') // Escape existing quotes - return `${key}="${value}"` - } + // Enclose in quotes if the value contains newlines or quotes, and escape quotes + if (value.includes("\n") || value.includes('"')) { + value = value.replace(/"/g, '\\"'); // Escape existing quotes + return `${key}="${value}"`; + } - // Default key-value format without quotes - return `${key}=${value}` - }) - // Join all key-value pairs with newline characters for dotenv format - .join("\n") - ) + // Default key-value format without quotes + return `${key}=${value}`; + }) + // Join all key-value pairs with newline characters for dotenv format + .join("\n") + ); } diff --git a/packages/core/src/echomodel.ts b/packages/core/src/echomodel.ts index f46a35f985..f15b48c464 100644 --- a/packages/core/src/echomodel.ts +++ b/packages/core/src/echomodel.ts @@ -1,20 +1,20 @@ -import { LanguageModel } from "./chat" -import { renderMessagesToMarkdown } from "./chatrender" -import { deleteEmptyValues } from "./cleaners" -import { MODEL_PROVIDER_ECHO } from "./constants" +import { LanguageModel } from "./chat"; +import { renderMessagesToMarkdown } from "./chatrender"; +import { deleteEmptyValues } from "./cleaners"; +import { MODEL_PROVIDER_ECHO } from "./constants"; export const EchoModel = Object.freeze({ - id: MODEL_PROVIDER_ECHO, - completer: async (req, connection, options) => { - const { messages, model, ...rest } = req - const { partialCb, inner } = options - const text = `## Messages + id: MODEL_PROVIDER_ECHO, + completer: async (req, connection, options) => { + const { messages, model, ...rest } = req; + const { partialCb, inner } = options; + const text = `## Messages ${await renderMessagesToMarkdown(messages, { - textLang: "markdown", - assistant: true, - system: true, - user: true, + textLang: "markdown", + assistant: true, + system: true, + user: true, })} ## Request @@ -22,17 +22,17 @@ ${await renderMessagesToMarkdown(messages, { \`\`\`json ${JSON.stringify(deleteEmptyValues({ messages, ...rest }), null, 2)} \`\`\` -` - partialCb?.({ - responseChunk: text, - tokensSoFar: 0, - responseSoFar: text, - inner, - }) +`; + partialCb?.({ + responseChunk: text, + tokensSoFar: 0, + responseSoFar: text, + inner, + }); - return { - finishReason: "stop", - text, - } - }, -}) + return { + finishReason: "stop", + text, + }; + }, +}); diff --git a/packages/core/src/encoders.test.ts b/packages/core/src/encoders.test.ts index 16291b1b13..d0c89e5f7e 100644 --- a/packages/core/src/encoders.test.ts +++ b/packages/core/src/encoders.test.ts @@ -1,34 +1,34 @@ -import test, { describe } from "node:test" -import assert from "node:assert" -import { chunk, resolveTokenEncoder } from "./encoders" -import { dedent } from "./indent" +import test, { describe } from "node:test"; +import assert from "node:assert"; +import { chunk, resolveTokenEncoder } from "./encoders"; +import { dedent } from "./indent"; describe("resolveTokenEncoder", () => { - test("gpt-3.5-turbo", async () => { - const encoder = await resolveTokenEncoder("openai:gpt-3.5-turbo") - const result = encoder.encode("test line") - assert.deepEqual(result, [1985, 1584]) - }) - test("gpt-4o", async () => { - const encoder = await resolveTokenEncoder("openai:gpt-4o") - const result = encoder.encode("test line") - assert.deepEqual(result, [3190, 2543]) - }) - test("gpt-4o-mini", async () => { - const encoder = await resolveTokenEncoder("openai:gpt-4o-mini") - const result = encoder.encode("test line") - assert.deepEqual(result, [3190, 2543]) - }) - test("gpt-4o forbidden", async () => { - const encoder = await resolveTokenEncoder("openai:gpt-4o") - const result = encoder.encode("<|im_end|>") - assert.deepEqual(result, [27, 91, 321, 13707, 91, 29]) - }) - test("gpt-4o chunk", async () => { - const chunks = await chunk( - { - filename: "markdown.md", - content: dedent`--- + test("gpt-3.5-turbo", async () => { + const encoder = await resolveTokenEncoder("openai:gpt-3.5-turbo"); + const result = encoder.encode("test line"); + assert.deepEqual(result, [1985, 1584]); + }); + test("gpt-4o", async () => { + const encoder = await resolveTokenEncoder("openai:gpt-4o"); + const result = encoder.encode("test line"); + assert.deepEqual(result, [3190, 2543]); + }); + test("gpt-4o-mini", async () => { + const encoder = await resolveTokenEncoder("openai:gpt-4o-mini"); + const result = encoder.encode("test line"); + assert.deepEqual(result, [3190, 2543]); + }); + test("gpt-4o forbidden", async () => { + const encoder = await resolveTokenEncoder("openai:gpt-4o"); + const result = encoder.encode("<|im_end|>"); + assert.deepEqual(result, [27, 91, 321, 13707, 91, 29]); + }); + test("gpt-4o chunk", async () => { + const chunks = await chunk( + { + filename: "markdown.md", + content: dedent`--- title: What is Markdown? - Understanding Markdown Syntax description: Learn about Markdown, a lightweight markup language for formatting plain text, its syntax, and how it differs from WYSIWYG editors. keywords: Markdown, markup language, formatting, plain text, syntax @@ -48,15 +48,15 @@ Using Markdown is different than using a WYSIWYG editor. In an application like For example, to denote a heading, you add a number sign before it (e.g., # Heading One). Or to make a phrase bold, you add two asterisks before and after it (e.g., **this text is bold**). It may take a while to get used to seeing Markdown syntax in your text, especially if you’re accustomed to WYSIWYG applications. The screenshot below shows a Markdown file displayed in the Visual Studio Code text editor.... `, - }, - { - chunkSize: 128, - chunkOverlap: 16, - model: "openai:gpt-4o", - lineNumbers: true, - } - ) - // console.log(chunks) - assert.equal(chunks.length, 3) - }) -}) + }, + { + chunkSize: 128, + chunkOverlap: 16, + model: "openai:gpt-4o", + lineNumbers: true, + }, + ); + // console.log(chunks) + assert.equal(chunks.length, 3); + }); +}); diff --git a/packages/core/src/encoders.ts b/packages/core/src/encoders.ts index 682e8ad2fa..9e31ad3c1e 100644 --- a/packages/core/src/encoders.ts +++ b/packages/core/src/encoders.ts @@ -1,16 +1,16 @@ -import debug from "debug" -const dbg = debug("genaiscript:encoders") +import debug from "debug"; +const dbg = debug("genaiscript:encoders"); // Import the function to parse model identifiers -import { parseModelIdentifier } from "./models" -import { runtimeHost } from "./host" -import path from "node:path" -import { addLineNumbers, indexToLineNumber } from "./liner" -import { resolveFileContent } from "./file" -import type { EncodeOptions } from "gpt-tokenizer/GptEncoding" -import { assert } from "./util" -import { TextSplitter } from "./textsplitter" -import { errorMessage } from "./error" +import { parseModelIdentifier } from "./models"; +import { runtimeHost } from "./host"; +import path from "node:path"; +import { addLineNumbers, indexToLineNumber } from "./liner"; +import { resolveFileContent } from "./file"; +import type { EncodeOptions } from "gpt-tokenizer/GptEncoding"; +import { assert } from "./util"; +import { TextSplitter } from "./textsplitter"; +import { errorMessage } from "./error"; /** * Resolves the token encoder for a specified model identifier. @@ -19,65 +19,57 @@ import { errorMessage } from "./error" * @returns A Promise resolving to a Tokenizer object or undefined if fallback is disabled and resolution fails. */ export async function resolveTokenEncoder( - modelId: string, - options?: { disableFallback?: boolean } + modelId: string, + options?: { disableFallback?: boolean }, ): Promise { - const { disableFallback } = options || {} + const { disableFallback } = options || {}; - // Parse the model identifier to extract the model information - if (!modelId) { - dbg(`modelId is empty, using default model alias`) - modelId = runtimeHost.modelAliases.large.model - } - let { model } = parseModelIdentifier(modelId) - if (/^gpt-4.1/i.test(model)) model = "gpt-4o" // same encoding - const module = model.toLowerCase() // Assign model to module for dynamic import path - - const { modelEncodings } = runtimeHost?.config || {} - const encoding = modelEncodings?.[modelId] || module + // Parse the model identifier to extract the model information + if (!modelId) { + dbg(`modelId is empty, using default model alias`); + modelId = runtimeHost.modelAliases.large.model; + } + let { model } = parseModelIdentifier(modelId); + if (/^gpt-4.1/i.test(model)) model = "gpt-4o"; // same encoding + const module = model.toLowerCase(); // Assign model to module for dynamic import path - const encoderOptions = { - disallowedSpecial: new Set(), - } satisfies EncodeOptions - try { - // Attempt to dynamically import the encoder module for the specified model - const { - encode, - decode, - default: api, - } = await import(`gpt-tokenizer/model/${encoding}`) - assert(!!encode) - const { modelName } = api - const size = - api.bytePairEncodingCoreProcessor?.mergeableBytePairRankCount + - (api.bytePairEncodingCoreProcessor?.specialTokenMapping?.size || 0) - return Object.freeze({ - model: modelName, - size, - encode: (line) => encode(line, encoderOptions), // Return the default encoder function - decode, - }) - } catch (e) { - if (disableFallback) { - dbg(`encoder fallback disabled for ${encoding}`) - return undefined - } + const { modelEncodings } = runtimeHost?.config || {}; + const encoding = modelEncodings?.[modelId] || module; - const { - encode, - decode, - default: api, - } = await import("gpt-tokenizer/model/gpt-4o") - assert(!!encode) - const { modelName, vocabularySize } = api - dbg(`fallback ${encoding} to gpt-4o encoder`) - return Object.freeze({ - model: modelName, - size: vocabularySize, - encode: (line) => encode(line, encoderOptions), // Return the default encoder function - decode, - }) + const encoderOptions = { + disallowedSpecial: new Set(), + } satisfies EncodeOptions; + try { + // Attempt to dynamically import the encoder module for the specified model + const { encode, decode, default: api } = await import(`gpt-tokenizer/model/${encoding}`); + assert(!!encode); + const { modelName } = api; + const size = + api.bytePairEncodingCoreProcessor?.mergeableBytePairRankCount + + (api.bytePairEncodingCoreProcessor?.specialTokenMapping?.size || 0); + return Object.freeze({ + model: modelName, + size, + encode: (line) => encode(line, encoderOptions), // Return the default encoder function + decode, + }); + } catch (e) { + if (disableFallback) { + dbg(`encoder fallback disabled for ${encoding}`); + return undefined; } + + const { encode, decode, default: api } = await import("gpt-tokenizer/model/gpt-4o"); + assert(!!encode); + const { modelName, vocabularySize } = api; + dbg(`fallback ${encoding} to gpt-4o encoder`); + return Object.freeze({ + model: modelName, + size: vocabularySize, + encode: (line) => encode(line, encoderOptions), // Return the default encoder function + decode, + }); + } } /** @@ -93,58 +85,51 @@ export async function resolveTokenEncoder( * @returns A Promise resolving to an array of text chunks. Each chunk includes content, filename, and start/end line numbers. */ export async function chunk( - file: Awaitable, - options?: TextChunkerConfig + file: Awaitable, + options?: TextChunkerConfig, ): Promise { - const f = await file - let filename: string - let content: string - if (typeof f === "string") { - content = f - } else if (typeof f === "object") { - await resolveFileContent(f) - if (f.encoding) { - dbg(`binary file detected, skip`) - return [] - } // binary file bail out - filename = f.filename - content = f.content - } else { - return [] - } + const f = await file; + let filename: string; + let content: string; + if (typeof f === "string") { + content = f; + } else if (typeof f === "object") { + await resolveFileContent(f); + if (f.encoding) { + dbg(`binary file detected, skip`); + return []; + } // binary file bail out + filename = f.filename; + content = f.content; + } else { + return []; + } - const { - model, - docType: optionsDocType, - lineNumbers, - ...rest - } = options || {} - const docType = ( - optionsDocType || (filename ? path.extname(filename) : undefined) - ) - ?.toLowerCase() - ?.replace(/^\./, "") - const tokenizer = await resolveTokenEncoder(model) - const ts = new TextSplitter({ - ...rest, - docType, - tokenizer, - keepSeparators: true, - }) - const chunksRaw = ts.split(content) - const chunks = chunksRaw.map(({ text, startPos, endPos }) => { - const lineStart = indexToLineNumber(content, startPos) - const lineEnd = indexToLineNumber(content, endPos) - if (lineNumbers) { - text = addLineNumbers(text, { startLine: lineStart }) - } - return { - content: text, - filename, - lineStart, - lineEnd, - } satisfies TextChunk - }) - dbg(`chunks ${chunks.length}`) - return chunks + const { model, docType: optionsDocType, lineNumbers, ...rest } = options || {}; + const docType = (optionsDocType || (filename ? path.extname(filename) : undefined)) + ?.toLowerCase() + ?.replace(/^\./, ""); + const tokenizer = await resolveTokenEncoder(model); + const ts = new TextSplitter({ + ...rest, + docType, + tokenizer, + keepSeparators: true, + }); + const chunksRaw = ts.split(content); + const chunks = chunksRaw.map(({ text, startPos, endPos }) => { + const lineStart = indexToLineNumber(content, startPos); + const lineEnd = indexToLineNumber(content, endPos); + if (lineNumbers) { + text = addLineNumbers(text, { startLine: lineStart }); + } + return { + content: text, + filename, + lineStart, + lineEnd, + } satisfies TextChunk; + }); + dbg(`chunks ${chunks.length}`); + return chunks; } diff --git a/packages/core/src/env.ts b/packages/core/src/env.ts index 6e36039486..3b2f793443 100644 --- a/packages/core/src/env.ts +++ b/packages/core/src/env.ts @@ -1,70 +1,66 @@ -import { normalizeFloat, trimTrailingSlash } from "./cleaners" +import { normalizeFloat, trimTrailingSlash } from "./cleaners"; import { - ANTHROPIC_API_BASE, - AZURE_OPENAI_API_VERSION, - GITHUB_MODELS_BASE, - LITELLM_API_BASE, - LLAMAFILE_API_BASE, - LOCALAI_API_BASE, - MODEL_PROVIDER_ANTHROPIC, - MODEL_PROVIDER_AZURE_OPENAI, - MODEL_PROVIDER_AZURE_SERVERLESS_MODELS, - MODEL_PROVIDER_GITHUB_COPILOT_CHAT, - MODEL_PROVIDER_GITHUB, - MODEL_PROVIDER_LITELLM, - MODEL_PROVIDER_LLAMAFILE, - MODEL_PROVIDER_OLLAMA, - MODEL_PROVIDER_OPENAI, - OPENAI_API_BASE, - PLACEHOLDER_API_BASE, - PLACEHOLDER_API_KEY, - MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, - MODEL_PROVIDER_HUGGINGFACE, - HUGGINGFACE_API_BASE, - OLLAMA_API_BASE, - OLLAMA_DEFAULT_PORT, - MODEL_PROVIDER_GOOGLE, - GOOGLE_API_BASE, - MODEL_PROVIDER_ALIBABA, - ALIBABA_BASE, - MODEL_PROVIDER_MISTRAL, - MISTRAL_API_BASE, - MODEL_PROVIDER_LMSTUDIO, - LMSTUDIO_API_BASE, - MODEL_PROVIDER_JAN, - JAN_API_BASE, - MODEL_PROVIDER_ANTHROPIC_BEDROCK, - MODEL_PROVIDER_DEEPSEEK, - DEEPSEEK_API_BASE, - MODEL_PROVIDER_WHISPERASR, - WHISPERASR_API_BASE, - MODEL_PROVIDER_ECHO, - MODEL_PROVIDER_NONE, - MODEL_PROVIDER_AZURE_AI_INFERENCE, - MODEL_PROVIDER_WINDOWS_AI, - WINDOWS_AI_API_BASE, - MODEL_PROVIDER_SGLANG, - SGLANG_API_BASE, - MODEL_PROVIDER_VLLM, - VLLM_API_BASE, - GITHUB_TOKENS, - MODEL_PROVIDER_DOCKER_MODEL_RUNNER, - DOCKER_MODEL_RUNNER_API_BASE, -} from "./constants" -import { runtimeHost } from "./host" -import { parseModelIdentifier } from "./models" -import { - AzureCredentialsType, - LanguageModelConfiguration, - OpenAIAPIType, -} from "./server/messages" -import { arrayify, ellipse } from "./util" -import { URL } from "node:url" -import { uriTryParse } from "./url" -import { TraceOptions } from "./trace" -import { CancellationOptions } from "./cancellation" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("config:env") + ANTHROPIC_API_BASE, + AZURE_OPENAI_API_VERSION, + GITHUB_MODELS_BASE, + LITELLM_API_BASE, + LLAMAFILE_API_BASE, + LOCALAI_API_BASE, + MODEL_PROVIDER_ANTHROPIC, + MODEL_PROVIDER_AZURE_OPENAI, + MODEL_PROVIDER_AZURE_SERVERLESS_MODELS, + MODEL_PROVIDER_GITHUB_COPILOT_CHAT, + MODEL_PROVIDER_GITHUB, + MODEL_PROVIDER_LITELLM, + MODEL_PROVIDER_LLAMAFILE, + MODEL_PROVIDER_OLLAMA, + MODEL_PROVIDER_OPENAI, + OPENAI_API_BASE, + PLACEHOLDER_API_BASE, + PLACEHOLDER_API_KEY, + MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, + MODEL_PROVIDER_HUGGINGFACE, + HUGGINGFACE_API_BASE, + OLLAMA_API_BASE, + OLLAMA_DEFAULT_PORT, + MODEL_PROVIDER_GOOGLE, + GOOGLE_API_BASE, + MODEL_PROVIDER_ALIBABA, + ALIBABA_BASE, + MODEL_PROVIDER_MISTRAL, + MISTRAL_API_BASE, + MODEL_PROVIDER_LMSTUDIO, + LMSTUDIO_API_BASE, + MODEL_PROVIDER_JAN, + JAN_API_BASE, + MODEL_PROVIDER_ANTHROPIC_BEDROCK, + MODEL_PROVIDER_DEEPSEEK, + DEEPSEEK_API_BASE, + MODEL_PROVIDER_WHISPERASR, + WHISPERASR_API_BASE, + MODEL_PROVIDER_ECHO, + MODEL_PROVIDER_NONE, + MODEL_PROVIDER_AZURE_AI_INFERENCE, + MODEL_PROVIDER_WINDOWS_AI, + WINDOWS_AI_API_BASE, + MODEL_PROVIDER_SGLANG, + SGLANG_API_BASE, + MODEL_PROVIDER_VLLM, + VLLM_API_BASE, + GITHUB_TOKENS, + MODEL_PROVIDER_DOCKER_MODEL_RUNNER, + DOCKER_MODEL_RUNNER_API_BASE, +} from "./constants"; +import { runtimeHost } from "./host"; +import { parseModelIdentifier } from "./models"; +import { AzureCredentialsType, LanguageModelConfiguration, OpenAIAPIType } from "./server/messages"; +import { arrayify, ellipse } from "./util"; +import { URL } from "node:url"; +import { uriTryParse } from "./url"; +import { TraceOptions } from "./trace"; +import { CancellationOptions } from "./cancellation"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("config:env"); /** * Parses the OLLAMA host environment variable and returns a standardized URL. @@ -82,19 +78,14 @@ const dbg = genaiscriptDebug("config:env") * and returns a complete URL. Throws an error if the URL is invalid. */ export function ollamaParseHostVariable(env: Record) { - dbg(`ollamaParseHostVariable called with env: ${JSON.stringify(env)}`) - const s = ( - env.OLLAMA_HOST || - env.OLLAMA_API_BASE || - OLLAMA_API_BASE - )?.trim() - const ipm = - /^(?
(localhost|\d+\.\d+\.\d+\.\d+))(:(?\d+))?$/i.exec(s) - if (ipm) { - return `http://${ipm.groups.address}:${ipm.groups.port || OLLAMA_DEFAULT_PORT}` - } - const url = new URL(s) - return url.href + dbg(`ollamaParseHostVariable called with env: ${JSON.stringify(env)}`); + const s = (env.OLLAMA_HOST || env.OLLAMA_API_BASE || OLLAMA_API_BASE)?.trim(); + const ipm = /^(?
(localhost|\d+\.\d+\.\d+\.\d+))(:(?\d+))?$/i.exec(s); + if (ipm) { + return `http://${ipm.groups.address}:${ipm.groups.port || OLLAMA_DEFAULT_PORT}`; + } + const url = new URL(s); + return url.href; } /** @@ -106,23 +97,20 @@ export function ollamaParseHostVariable(env: Record) { * @returns An object containing the matched variable name and its value, or undefined if no match is found. */ export function findEnvVar( - env: Record, - prefixes: string | string[], - names: string[] + env: Record, + prefixes: string | string[], + names: string[], ): { name: string; value: string } { - for (const prefix of arrayify(prefixes)) { - for (const name of names) { - const pname = prefix + name - const value = - env[pname] || - env[pname.toLowerCase()] || - env[pname.toUpperCase()] - if (value !== undefined) { - return { name: pname, value } - } - } - } - return undefined + for (const prefix of arrayify(prefixes)) { + for (const name of names) { + const pname = prefix + name; + const value = env[pname] || env[pname.toLowerCase()] || env[pname.toUpperCase()]; + if (value !== undefined) { + return { name: pname, value }; + } + } + } + return undefined; } /** @@ -138,35 +126,35 @@ export function findEnvVar( * - GENAISCRIPT_DEFAULT_[ID]_MODEL or GENAISCRIPT_MODEL_[ID]: Configures aliases for specific model IDs. */ export async function parseDefaultsFromEnv(env: Record) { - dbg(`parsing process.env`) - // legacy - if (env.GENAISCRIPT_DEFAULT_MODEL) { - dbg(`found GENAISCRIPT_DEFAULT_MODEL: ${env.GENAISCRIPT_DEFAULT_MODEL}`) - runtimeHost.setModelAlias("env", "large", env.GENAISCRIPT_DEFAULT_MODEL) - } - // action - if (env.INPUT_MODEL) { - dbg(`found INPUT_MODEL = ${env.INPUT_MODEL}`) - runtimeHost.setModelAlias("env", "large", env.INPUT_MODEL) - } - - const rx = - /^GENAISCRIPT(_DEFAULT)?_((?[A-Z0-9_\-]+)_MODEL|(INPUT_)?MODEL_(?[A-Z0-9_\-]+))$/i - for (const kv of Object.entries(env)) { - const [k, v] = kv - const m = rx.exec(k) - if (!m) { - continue - } - const id = m.groups.id || m.groups.id2 - dbg(`found ${k} = ${v}`) - runtimeHost.setModelAlias("env", id, v) - } - const t = normalizeFloat(env.GENAISCRIPT_DEFAULT_TEMPERATURE) - if (!isNaN(t)) { - dbg(`parsed GENAISCRIPT_DEFAULT_TEMPERATURE = ${t}`) - runtimeHost.setModelAlias("env", "large", { temperature: t }) - } + dbg(`parsing process.env`); + // legacy + if (env.GENAISCRIPT_DEFAULT_MODEL) { + dbg(`found GENAISCRIPT_DEFAULT_MODEL: ${env.GENAISCRIPT_DEFAULT_MODEL}`); + runtimeHost.setModelAlias("env", "large", env.GENAISCRIPT_DEFAULT_MODEL); + } + // action + if (env.INPUT_MODEL) { + dbg(`found INPUT_MODEL = ${env.INPUT_MODEL}`); + runtimeHost.setModelAlias("env", "large", env.INPUT_MODEL); + } + + const rx = + /^GENAISCRIPT(_DEFAULT)?_((?[A-Z0-9_\-]+)_MODEL|(INPUT_)?MODEL_(?[A-Z0-9_\-]+))$/i; + for (const kv of Object.entries(env)) { + const [k, v] = kv; + const m = rx.exec(k); + if (!m) { + continue; + } + const id = m.groups.id || m.groups.id2; + dbg(`found ${k} = ${v}`); + runtimeHost.setModelAlias("env", id, v); + } + const t = normalizeFloat(env.GENAISCRIPT_DEFAULT_TEMPERATURE); + if (!isNaN(t)) { + dbg(`parsed GENAISCRIPT_DEFAULT_TEMPERATURE = ${t}`); + runtimeHost.setModelAlias("env", "large", { temperature: t }); + } } /** @@ -186,700 +174,652 @@ export async function parseDefaultsFromEnv(env: Record) { * - Includes validation checks for URL formats and supported provider types. */ export async function parseTokenFromEnv( - env: Record, - modelId: string, - options: TraceOptions & CancellationOptions & { resolveToken?: boolean } + env: Record, + modelId: string, + options: TraceOptions & CancellationOptions & { resolveToken?: boolean }, ): Promise { - const { resolveToken } = options || {} - const { provider, model, tag } = parseModelIdentifier( - modelId ?? runtimeHost.modelAliases.large.model - ) - dbg(`parsing token for ${provider} ${model || ""} ${tag || ""}`) - const TOKEN_SUFFIX = ["_API_KEY", "_API_TOKEN", "_TOKEN", "_KEY"] - const BASE_SUFFIX = ["_API_BASE", "_API_ENDPOINT", "_BASE", "_ENDPOINT"] - - if (provider === MODEL_PROVIDER_OPENAI) { - dbg(`processing ${MODEL_PROVIDER_OPENAI}`) - const token = env.OPENAI_API_KEY ?? "" - let base = env.OPENAI_API_BASE - let type = (env.OPENAI_API_TYPE as OpenAIAPIType) || "openai" - const version = env.OPENAI_API_VERSION || parseAzureVersionFromUrl(base) - if ( - type !== "azure" && - type !== "openai" && - type !== "localai" && - type !== "azure_serverless" && - type !== "azure_serverless_models" - ) { - throw new Error( - "OPENAI_API_TYPE must be 'azure', 'azure_serverless', 'azure_serverless_models' or 'openai' or 'localai'" - ) - } - if (type === "openai" && !base) { - dbg(`setting default base for OPENAI_API_TYPE openai`) - base = OPENAI_API_BASE - } - if (type === "localai" && !base) { - base = LOCALAI_API_BASE - } - if ((type === "azure" || type === "azure_serverless") && !base) { - throw new Error("OPENAI_API_BASE must be set when type is 'azure'") - } - if (type === "azure") { - base = cleanAzureBase(base) - } - if (!token && !/^http:\/\//i.test(base)) { - // localhost typically requires no key - throw new Error("OPENAI_API_KEY missing") - } - if (token === PLACEHOLDER_API_KEY) { - throw new Error("OPENAI_API_KEY not configured") - } - if (base === PLACEHOLDER_API_BASE) { - throw new Error("OPENAI_API_BASE not configured") - } - if (base && !URL.canParse(base)) { - throw new Error("OPENAI_API_BASE must be a valid URL") - } - return { - provider, - model, - base, - type, - token, - source: "env: OPENAI_API_...", - version, - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_GITHUB) { - dbg(`processing ${MODEL_PROVIDER_GITHUB}`) - const res = findEnvVar(env, "", [ - "GITHUB_MODELS_TOKEN", - ...GITHUB_TOKENS, - ]) || { name: undefined, value: undefined } - if (!res?.value) { - if (resolveToken) { - const { exitCode, stdout } = await runtimeHost.exec( - undefined, - "gh", - ["auth", "token"], - options - ) - if (exitCode !== 0) - throw new Error("Failed to resolve GitHub token") - res.name = "gh auth token" - res.value = stdout.trim() - } - if (!res?.value) - throw new Error( - "GITHUB_MODELS_TOKEN, GITHUB_MODELS_TOKEN, GITHUB_TOKEN or GH_TOKEN must be set" - ) - } - const type = "github" - const base = GITHUB_MODELS_BASE - return { - provider, - model, - base, - token: res.value, - type, - source: `env: ${res.name}`, - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_AZURE_OPENAI) { - dbg(`processing ${MODEL_PROVIDER_AZURE_OPENAI}`) - const tokenVar = env.AZURE_OPENAI_API_KEY - ? "AZURE_OPENAI_API_KEY" - : "AZURE_API_KEY" - const token = env[tokenVar] - let base = trimTrailingSlash( - env.AZURE_OPENAI_ENDPOINT || - env.AZURE_OPENAI_API_BASE || - env.AZURE_API_BASE || - env.AZURE_OPENAI_API_ENDPOINT - ) - if (!token && !base) { - return undefined - } - //if (!token) - // throw new Error("AZURE_OPENAI_API_KEY or AZURE_API_KEY missing") - if (token === PLACEHOLDER_API_KEY) { - throw new Error("AZURE_OPENAI_API_KEY not configured") - } - if (!base) { - throw new Error( - "AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_API_BASE or AZURE_API_BASE missing" - ) - } - if (base === PLACEHOLDER_API_BASE) { - throw new Error("AZURE_OPENAI_API_ENDPOINT not configured") - } - const version = - env[`AZURE_OPENAI_API_VERSION_${model.toLocaleUpperCase()}`] || - env.AZURE_OPENAI_API_VERSION || - env.AZURE_API_VERSION || - parseAzureVersionFromUrl(base) - base = cleanAzureBase(base) - if (!URL.canParse(base)) { - throw new Error("AZURE_OPENAI_API_ENDPOINT must be a valid URL") - } - const azureCredentialsType = - env.AZURE_OPENAI_API_CREDENTIALS?.toLowerCase().trim() as AzureCredentialsType - return { - provider, - model, - base, - token, - type: "azure", - source: token - ? "env: AZURE_OPENAI_API_..." - : "env: AZURE_OPENAI_API_... + Entra ID", - version, - azureCredentialsType, - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI) { - dbg(`processing ${MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI}`) - const tokenVar = "AZURE_SERVERLESS_OPENAI_API_KEY" - dbg( - `retrieved AZURE_SERVERLESS_OPENAI_API_KEY: ${env.AZURE_SERVERLESS_OPENAI_API_KEY}` - ) - const token = env[tokenVar] - let base = trimTrailingSlash( - env.AZURE_SERVERLESS_OPENAI_ENDPOINT || - env.AZURE_SERVERLESS_OPENAI_API_ENDPOINT - ) - if (!token && !base) { - return undefined - } - if (token === PLACEHOLDER_API_KEY) { - throw new Error("AZURE_SERVERLESS_OPENAI_API_KEY not configured") - } - if (!base) { - throw new Error("AZURE_SERVERLESS_OPENAI_API_ENDPOINT missing") - } - if (base === PLACEHOLDER_API_BASE) { - throw new Error( - "AZURE_SERVERLESS_OPENAI_API_ENDPOINT not configured" - ) - } - base = cleanAzureBase(base) - if (!URL.canParse(base)) { - throw new Error( - "AZURE_SERVERLESS_OPENAI_API_ENDPOINT must be a valid URL" - ) - } - const version = - env.AZURE_SERVERLESS_OPENAI_API_VERSION || - env.AZURE_SERVERLESS_OPENAI_VERSION - const azureCredentialsType = - env.AZURE_SERVERLESS_OPENAI_API_CREDENTIALS?.toLowerCase().trim() as AzureCredentialsType - return { - provider, - model, - base, - token, - type: "azure_serverless", - source: token - ? "env: AZURE_SERVERLESS_OPENAI_API_..." - : "env: AZURE_SERVERLESS_OPENAI_API_... + Entra ID", - version, - azureCredentialsType, - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_AZURE_AI_INFERENCE) { - dbg(`processing ${MODEL_PROVIDER_AZURE_AI_INFERENCE}`) - // https://github.com/Azure/azure-sdk-for-js/tree/@azure-rest/ai-inference_1.0.0-beta.2/sdk/ai/ai-inference-rest - dbg( - `retrieved AZURE_AI_INFERENCE_API_KEY: ${env.AZURE_AI_INFERENCE_API_KEY}` - ) - const tokenVar = "AZURE_AI_INFERENCE_API_KEY" - const token = env[tokenVar]?.trim() - let base = trimTrailingSlash( - env.AZURE_AI_INFERENCE_ENDPOINT || - env.AZURE_AI_INFERENCE_API_ENDPOINT - ) - if (!token && !base) { - return undefined - } - if (token === PLACEHOLDER_API_KEY) { - throw new Error("AZURE_AI_INFERENCE_API_KEY not configured") - } - if (!base) { - throw new Error("AZURE_AI_INFERENCE_API_ENDPOINT missing") - } - if (base === PLACEHOLDER_API_BASE) { - throw new Error("AZURE_AI_INFERENCE_API_ENDPOINT not configured") - } - base = trimTrailingSlash(base) - if (!URL.canParse(base)) { - throw new Error( - "AZURE_AI_INFERENCE_API_ENDPOINT must be a valid URL" - ) - } - const version = - env.AZURE_AI_INFERENCE_API_VERSION || env.AZURE_AI_INFERENCE_VERSION - return { - provider, - model, - base, - token, - type: "azure_ai_inference", - source: token - ? "env: AZURE_AI_INFERENCE_API_..." - : "env: AZURE_AI_INFERENCE_API_... + Entra ID", - version, - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_AZURE_SERVERLESS_MODELS) { - dbg(`processing ${MODEL_PROVIDER_AZURE_SERVERLESS_MODELS}`) - // https://github.com/Azure/azure-sdk-for-js/tree/@azure-rest/ai-inference_1.0.0-beta.2/sdk/ai/ai-inference-rest - const tokenVar = "AZURE_SERVERLESS_MODELS_API_KEY" - const token = env[tokenVar]?.trim() - let base = trimTrailingSlash( - env.AZURE_SERVERLESS_MODELS_ENDPOINT || - env.AZURE_SERVERLESS_MODELS_API_ENDPOINT - ) - if (!token && !base) { - return undefined - } - if (token === PLACEHOLDER_API_KEY) { - throw new Error("AZURE_SERVERLESS_MODELS_API_KEY not configured") - } - if (!base) { - throw new Error("AZURE_SERVERLESS_MODELS_API_ENDPOINT missing") - } - if (base === PLACEHOLDER_API_BASE) { - throw new Error( - "AZURE_SERVERLESS_MODELS_API_ENDPOINT not configured" - ) - } - base = trimTrailingSlash(base) - if (!URL.canParse(base)) { - throw new Error( - "AZURE_SERVERLESS_MODELS_API_ENDPOINT must be a valid URL" - ) - } - const version = - env.AZURE_SERVERLESS_MODELS_API_VERSION || - env.AZURE_SERVERLESS_MODELS_VERSION - return { - provider, - model, - base, - token, - type: "azure_serverless_models", - source: token - ? "env: AZURE_SERVERLESS_MODELS_API_..." - : "env: AZURE_SERVERLESS_MODELS_API_... + Entra ID", - version, - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_GOOGLE) { - dbg(`processing ${MODEL_PROVIDER_GOOGLE}`) - const token = env.GEMINI_API_KEY || env.GOOGLE_API_KEY - if (!token) { - return undefined - } - if (token === PLACEHOLDER_API_KEY) { - throw new Error("GEMINI_API_KEY/GOOGLE_API_BASE not configured") - } - const base = - env.GEMINI_API_BASE || env.GOOGLE_API_BASE || GOOGLE_API_BASE - if (base === PLACEHOLDER_API_BASE) { - throw new Error("GEMINI_API_KEY/GOOGLE_API_BASE not configured") - } - return { - provider, - model, - base, - token, - type: "openai", - source: "env: GEMINI_API_...", - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_ANTHROPIC) { - dbg(`processing ${MODEL_PROVIDER_ANTHROPIC}`) - const modelKey = "ANTHROPIC_API_KEY" - dbg(`retrieved ANTHROPIC_API_KEY: ${env.ANTHROPIC_API_KEY}`) - const token = env[modelKey]?.trim() - if (token === undefined || token === PLACEHOLDER_API_KEY) { - throw new Error("ANTHROPIC_API_KEY not configured") - } - const base = - trimTrailingSlash(env.ANTHROPIC_API_BASE) || ANTHROPIC_API_BASE - const version = env.ANTHROPIC_API_VERSION || undefined - const source = "env: ANTHROPIC_API_..." - - return { - provider, - model, - token, - base, - version, - source, - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_ANTHROPIC_BEDROCK) { - dbg(`processing ${MODEL_PROVIDER_ANTHROPIC_BEDROCK}`) - return { - provider, - model, - source: "AWS SDK", - base: undefined, - token: MODEL_PROVIDER_ANTHROPIC_BEDROCK, - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_MISTRAL) { - dbg(`processing ${MODEL_PROVIDER_MISTRAL}`) - const base = env.MISTRAL_API_BASE || MISTRAL_API_BASE - const token = env.MISTRAL_API_KEY - if (!token) { - throw new Error("MISTRAL_API_KEY not configured") - } - return { - provider, - model, - token, - base, - source: "env: MISTRAL_API_...", - type: "openai", - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_ALIBABA) { - dbg(`processing ${MODEL_PROVIDER_ALIBABA}`) - const base = - env.ALIBABA_API_BASE || - env.DASHSCOPE_API_BASE || - env.DASHSCOPE_HTTP_BASE_URL || - ALIBABA_BASE - if (base === PLACEHOLDER_API_BASE) { - throw new Error("ALIBABA_API_BASE not configured") - } - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - const token = env.ALIBABA_API_KEY || env.DASHSCOPE_API_KEY - if (token === undefined || token === PLACEHOLDER_API_KEY) { - throw new Error("ALIBABA_API_KEY not configured") - } - return { - provider, - model, - base, - token, - type: "alibaba", - source: "env: ALIBABA_API_...", - } - } - - if (provider === MODEL_PROVIDER_OLLAMA) { - dbg(`processing ${MODEL_PROVIDER_OLLAMA}`) - const host = ollamaParseHostVariable(env) - const base = cleanApiBase(host) - return { - provider, - model, - base, - token: MODEL_PROVIDER_OLLAMA, - type: "openai", - source: "env: OLLAMA_HOST", - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_DOCKER_MODEL_RUNNER) { - dbg(`processing ${MODEL_PROVIDER_DOCKER_MODEL_RUNNER}`) - const base = - env.DOCKER_MODEL_RUNNER_API_BASE || DOCKER_MODEL_RUNNER_API_BASE - if (base === PLACEHOLDER_API_BASE) { - throw new Error("DOCKER_MODEL_RUNNER_API_BASE not configured") - } - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - return { - provider, - model, - base, - token: MODEL_PROVIDER_DOCKER_MODEL_RUNNER, - type: "openai", - source: "env: DOCKER_MODEL_RUNNER", - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_HUGGINGFACE) { - dbg(`processing ${MODEL_PROVIDER_HUGGINGFACE}`) - const prefixes = ["HUGGINGFACE", "HF"] - const token = findEnvVar(env, prefixes, TOKEN_SUFFIX) - const base = - findEnvVar(env, prefixes, BASE_SUFFIX)?.value || - HUGGINGFACE_API_BASE - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - if (!token?.value) { - throw new Error("HuggingFace token missing") - } - return { - base, - token: token?.value, - provider, - model, - type: "huggingface", - source: "env: HUGGINGFACE_API_...", - } satisfies LanguageModelConfiguration - } - - if (provider === MODEL_PROVIDER_DEEPSEEK) { - dbg(`processing ${MODEL_PROVIDER_DEEPSEEK}`) - const base = - findEnvVar(env, "DEEPSEEK", BASE_SUFFIX)?.value || DEEPSEEK_API_BASE - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - const token = env.DEEPSEEK_API_KEY - if (!token) { - throw new Error("DEEPSEEK_API_KEY not configured") - } - return { - provider, - model, - base, - token, - type: "openai", - source: "env: DEEPSEEK_API_...", - } - } - - if (provider === MODEL_PROVIDER_WHISPERASR) { - dbg(`processing ${MODEL_PROVIDER_WHISPERASR}`) - const base = - findEnvVar(env, "WHISPERASR", BASE_SUFFIX)?.value || - WHISPERASR_API_BASE - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - return { - provider, - model, - base, - token: undefined, - source: "env: WHISPERASR_API_...", - } - } - - if (provider === MODEL_PROVIDER_WINDOWS_AI) { - dbg(`processing ${MODEL_PROVIDER_WINDOWS_AI}`) - return { - provider, - model, - base: WINDOWS_AI_API_BASE, - token: MODEL_PROVIDER_WINDOWS_AI, - type: "openai", - source: "env", - } - } - - const prefixes = [ - tag ? `${provider}_${model}_${tag}` : undefined, - provider ? `${provider}_${model}` : undefined, - provider ? provider : undefined, + const { resolveToken } = options || {}; + const { provider, model, tag } = parseModelIdentifier( + modelId ?? runtimeHost.modelAliases.large.model, + ); + dbg(`parsing token for ${provider} ${model || ""} ${tag || ""}`); + const TOKEN_SUFFIX = ["_API_KEY", "_API_TOKEN", "_TOKEN", "_KEY"]; + const BASE_SUFFIX = ["_API_BASE", "_API_ENDPOINT", "_BASE", "_ENDPOINT"]; + + if (provider === MODEL_PROVIDER_OPENAI) { + dbg(`processing ${MODEL_PROVIDER_OPENAI}`); + const token = env.OPENAI_API_KEY ?? ""; + let base = env.OPENAI_API_BASE; + let type = (env.OPENAI_API_TYPE as OpenAIAPIType) || "openai"; + const version = env.OPENAI_API_VERSION || parseAzureVersionFromUrl(base); + if ( + type !== "azure" && + type !== "openai" && + type !== "localai" && + type !== "azure_serverless" && + type !== "azure_serverless_models" + ) { + throw new Error( + "OPENAI_API_TYPE must be 'azure', 'azure_serverless', 'azure_serverless_models' or 'openai' or 'localai'", + ); + } + if (type === "openai" && !base) { + dbg(`setting default base for OPENAI_API_TYPE openai`); + base = OPENAI_API_BASE; + } + if (type === "localai" && !base) { + base = LOCALAI_API_BASE; + } + if ((type === "azure" || type === "azure_serverless") && !base) { + throw new Error("OPENAI_API_BASE must be set when type is 'azure'"); + } + if (type === "azure") { + base = cleanAzureBase(base); + } + if (!token && !/^http:\/\//i.test(base)) { + // localhost typically requires no key + throw new Error("OPENAI_API_KEY missing"); + } + if (token === PLACEHOLDER_API_KEY) { + throw new Error("OPENAI_API_KEY not configured"); + } + if (base === PLACEHOLDER_API_BASE) { + throw new Error("OPENAI_API_BASE not configured"); + } + if (base && !URL.canParse(base)) { + throw new Error("OPENAI_API_BASE must be a valid URL"); + } + return { + provider, + model, + base, + type, + token, + source: "env: OPENAI_API_...", + version, + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_GITHUB) { + dbg(`processing ${MODEL_PROVIDER_GITHUB}`); + const res = findEnvVar(env, "", ["GITHUB_MODELS_TOKEN", ...GITHUB_TOKENS]) || { + name: undefined, + value: undefined, + }; + if (!res?.value) { + if (resolveToken) { + const { exitCode, stdout } = await runtimeHost.exec( + undefined, + "gh", + ["auth", "token"], + options, + ); + if (exitCode !== 0) throw new Error("Failed to resolve GitHub token"); + res.name = "gh auth token"; + res.value = stdout.trim(); + } + if (!res?.value) + throw new Error( + "GITHUB_MODELS_TOKEN, GITHUB_MODELS_TOKEN, GITHUB_TOKEN or GH_TOKEN must be set", + ); + } + const type = "github"; + const base = GITHUB_MODELS_BASE; + return { + provider, + model, + base, + token: res.value, + type, + source: `env: ${res.name}`, + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_AZURE_OPENAI) { + dbg(`processing ${MODEL_PROVIDER_AZURE_OPENAI}`); + const tokenVar = env.AZURE_OPENAI_API_KEY ? "AZURE_OPENAI_API_KEY" : "AZURE_API_KEY"; + const token = env[tokenVar]; + let base = trimTrailingSlash( + env.AZURE_OPENAI_ENDPOINT || + env.AZURE_OPENAI_API_BASE || + env.AZURE_API_BASE || + env.AZURE_OPENAI_API_ENDPOINT, + ); + if (!token && !base) { + return undefined; + } + //if (!token) + // throw new Error("AZURE_OPENAI_API_KEY or AZURE_API_KEY missing") + if (token === PLACEHOLDER_API_KEY) { + throw new Error("AZURE_OPENAI_API_KEY not configured"); + } + if (!base) { + throw new Error("AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_API_BASE or AZURE_API_BASE missing"); + } + if (base === PLACEHOLDER_API_BASE) { + throw new Error("AZURE_OPENAI_API_ENDPOINT not configured"); + } + const version = + env[`AZURE_OPENAI_API_VERSION_${model.toLocaleUpperCase()}`] || + env.AZURE_OPENAI_API_VERSION || + env.AZURE_API_VERSION || + parseAzureVersionFromUrl(base); + base = cleanAzureBase(base); + if (!URL.canParse(base)) { + throw new Error("AZURE_OPENAI_API_ENDPOINT must be a valid URL"); + } + const azureCredentialsType = + env.AZURE_OPENAI_API_CREDENTIALS?.toLowerCase().trim() as AzureCredentialsType; + return { + provider, + model, + base, + token, + type: "azure", + source: token ? "env: AZURE_OPENAI_API_..." : "env: AZURE_OPENAI_API_... + Entra ID", + version, + azureCredentialsType, + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI) { + dbg(`processing ${MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI}`); + const tokenVar = "AZURE_SERVERLESS_OPENAI_API_KEY"; + dbg(`retrieved AZURE_SERVERLESS_OPENAI_API_KEY: ${env.AZURE_SERVERLESS_OPENAI_API_KEY}`); + const token = env[tokenVar]; + let base = trimTrailingSlash( + env.AZURE_SERVERLESS_OPENAI_ENDPOINT || env.AZURE_SERVERLESS_OPENAI_API_ENDPOINT, + ); + if (!token && !base) { + return undefined; + } + if (token === PLACEHOLDER_API_KEY) { + throw new Error("AZURE_SERVERLESS_OPENAI_API_KEY not configured"); + } + if (!base) { + throw new Error("AZURE_SERVERLESS_OPENAI_API_ENDPOINT missing"); + } + if (base === PLACEHOLDER_API_BASE) { + throw new Error("AZURE_SERVERLESS_OPENAI_API_ENDPOINT not configured"); + } + base = cleanAzureBase(base); + if (!URL.canParse(base)) { + throw new Error("AZURE_SERVERLESS_OPENAI_API_ENDPOINT must be a valid URL"); + } + const version = env.AZURE_SERVERLESS_OPENAI_API_VERSION || env.AZURE_SERVERLESS_OPENAI_VERSION; + const azureCredentialsType = + env.AZURE_SERVERLESS_OPENAI_API_CREDENTIALS?.toLowerCase().trim() as AzureCredentialsType; + return { + provider, + model, + base, + token, + type: "azure_serverless", + source: token + ? "env: AZURE_SERVERLESS_OPENAI_API_..." + : "env: AZURE_SERVERLESS_OPENAI_API_... + Entra ID", + version, + azureCredentialsType, + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_AZURE_AI_INFERENCE) { + dbg(`processing ${MODEL_PROVIDER_AZURE_AI_INFERENCE}`); + // https://github.com/Azure/azure-sdk-for-js/tree/@azure-rest/ai-inference_1.0.0-beta.2/sdk/ai/ai-inference-rest + dbg(`retrieved AZURE_AI_INFERENCE_API_KEY: ${env.AZURE_AI_INFERENCE_API_KEY}`); + const tokenVar = "AZURE_AI_INFERENCE_API_KEY"; + const token = env[tokenVar]?.trim(); + let base = trimTrailingSlash( + env.AZURE_AI_INFERENCE_ENDPOINT || env.AZURE_AI_INFERENCE_API_ENDPOINT, + ); + if (!token && !base) { + return undefined; + } + if (token === PLACEHOLDER_API_KEY) { + throw new Error("AZURE_AI_INFERENCE_API_KEY not configured"); + } + if (!base) { + throw new Error("AZURE_AI_INFERENCE_API_ENDPOINT missing"); + } + if (base === PLACEHOLDER_API_BASE) { + throw new Error("AZURE_AI_INFERENCE_API_ENDPOINT not configured"); + } + base = trimTrailingSlash(base); + if (!URL.canParse(base)) { + throw new Error("AZURE_AI_INFERENCE_API_ENDPOINT must be a valid URL"); + } + const version = env.AZURE_AI_INFERENCE_API_VERSION || env.AZURE_AI_INFERENCE_VERSION; + return { + provider, + model, + base, + token, + type: "azure_ai_inference", + source: token + ? "env: AZURE_AI_INFERENCE_API_..." + : "env: AZURE_AI_INFERENCE_API_... + Entra ID", + version, + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_AZURE_SERVERLESS_MODELS) { + dbg(`processing ${MODEL_PROVIDER_AZURE_SERVERLESS_MODELS}`); + // https://github.com/Azure/azure-sdk-for-js/tree/@azure-rest/ai-inference_1.0.0-beta.2/sdk/ai/ai-inference-rest + const tokenVar = "AZURE_SERVERLESS_MODELS_API_KEY"; + const token = env[tokenVar]?.trim(); + let base = trimTrailingSlash( + env.AZURE_SERVERLESS_MODELS_ENDPOINT || env.AZURE_SERVERLESS_MODELS_API_ENDPOINT, + ); + if (!token && !base) { + return undefined; + } + if (token === PLACEHOLDER_API_KEY) { + throw new Error("AZURE_SERVERLESS_MODELS_API_KEY not configured"); + } + if (!base) { + throw new Error("AZURE_SERVERLESS_MODELS_API_ENDPOINT missing"); + } + if (base === PLACEHOLDER_API_BASE) { + throw new Error("AZURE_SERVERLESS_MODELS_API_ENDPOINT not configured"); + } + base = trimTrailingSlash(base); + if (!URL.canParse(base)) { + throw new Error("AZURE_SERVERLESS_MODELS_API_ENDPOINT must be a valid URL"); + } + const version = env.AZURE_SERVERLESS_MODELS_API_VERSION || env.AZURE_SERVERLESS_MODELS_VERSION; + return { + provider, + model, + base, + token, + type: "azure_serverless_models", + source: token + ? "env: AZURE_SERVERLESS_MODELS_API_..." + : "env: AZURE_SERVERLESS_MODELS_API_... + Entra ID", + version, + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_GOOGLE) { + dbg(`processing ${MODEL_PROVIDER_GOOGLE}`); + const token = env.GEMINI_API_KEY || env.GOOGLE_API_KEY; + if (!token) { + return undefined; + } + if (token === PLACEHOLDER_API_KEY) { + throw new Error("GEMINI_API_KEY/GOOGLE_API_BASE not configured"); + } + const base = env.GEMINI_API_BASE || env.GOOGLE_API_BASE || GOOGLE_API_BASE; + if (base === PLACEHOLDER_API_BASE) { + throw new Error("GEMINI_API_KEY/GOOGLE_API_BASE not configured"); + } + return { + provider, + model, + base, + token, + type: "openai", + source: "env: GEMINI_API_...", + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_ANTHROPIC) { + dbg(`processing ${MODEL_PROVIDER_ANTHROPIC}`); + const modelKey = "ANTHROPIC_API_KEY"; + dbg(`retrieved ANTHROPIC_API_KEY: ${env.ANTHROPIC_API_KEY}`); + const token = env[modelKey]?.trim(); + if (token === undefined || token === PLACEHOLDER_API_KEY) { + throw new Error("ANTHROPIC_API_KEY not configured"); + } + const base = trimTrailingSlash(env.ANTHROPIC_API_BASE) || ANTHROPIC_API_BASE; + const version = env.ANTHROPIC_API_VERSION || undefined; + const source = "env: ANTHROPIC_API_..."; + + return { + provider, + model, + token, + base, + version, + source, + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_ANTHROPIC_BEDROCK) { + dbg(`processing ${MODEL_PROVIDER_ANTHROPIC_BEDROCK}`); + return { + provider, + model, + source: "AWS SDK", + base: undefined, + token: MODEL_PROVIDER_ANTHROPIC_BEDROCK, + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_MISTRAL) { + dbg(`processing ${MODEL_PROVIDER_MISTRAL}`); + const base = env.MISTRAL_API_BASE || MISTRAL_API_BASE; + const token = env.MISTRAL_API_KEY; + if (!token) { + throw new Error("MISTRAL_API_KEY not configured"); + } + return { + provider, + model, + token, + base, + source: "env: MISTRAL_API_...", + type: "openai", + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_ALIBABA) { + dbg(`processing ${MODEL_PROVIDER_ALIBABA}`); + const base = + env.ALIBABA_API_BASE || env.DASHSCOPE_API_BASE || env.DASHSCOPE_HTTP_BASE_URL || ALIBABA_BASE; + if (base === PLACEHOLDER_API_BASE) { + throw new Error("ALIBABA_API_BASE not configured"); + } + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + const token = env.ALIBABA_API_KEY || env.DASHSCOPE_API_KEY; + if (token === undefined || token === PLACEHOLDER_API_KEY) { + throw new Error("ALIBABA_API_KEY not configured"); + } + return { + provider, + model, + base, + token, + type: "alibaba", + source: "env: ALIBABA_API_...", + }; + } + + if (provider === MODEL_PROVIDER_OLLAMA) { + dbg(`processing ${MODEL_PROVIDER_OLLAMA}`); + const host = ollamaParseHostVariable(env); + const base = cleanApiBase(host); + return { + provider, + model, + base, + token: MODEL_PROVIDER_OLLAMA, + type: "openai", + source: "env: OLLAMA_HOST", + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_DOCKER_MODEL_RUNNER) { + dbg(`processing ${MODEL_PROVIDER_DOCKER_MODEL_RUNNER}`); + const base = env.DOCKER_MODEL_RUNNER_API_BASE || DOCKER_MODEL_RUNNER_API_BASE; + if (base === PLACEHOLDER_API_BASE) { + throw new Error("DOCKER_MODEL_RUNNER_API_BASE not configured"); + } + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + return { + provider, + model, + base, + token: MODEL_PROVIDER_DOCKER_MODEL_RUNNER, + type: "openai", + source: "env: DOCKER_MODEL_RUNNER", + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_HUGGINGFACE) { + dbg(`processing ${MODEL_PROVIDER_HUGGINGFACE}`); + const prefixes = ["HUGGINGFACE", "HF"]; + const token = findEnvVar(env, prefixes, TOKEN_SUFFIX); + const base = findEnvVar(env, prefixes, BASE_SUFFIX)?.value || HUGGINGFACE_API_BASE; + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + if (!token?.value) { + throw new Error("HuggingFace token missing"); + } + return { + base, + token: token?.value, + provider, + model, + type: "huggingface", + source: "env: HUGGINGFACE_API_...", + } satisfies LanguageModelConfiguration; + } + + if (provider === MODEL_PROVIDER_DEEPSEEK) { + dbg(`processing ${MODEL_PROVIDER_DEEPSEEK}`); + const base = findEnvVar(env, "DEEPSEEK", BASE_SUFFIX)?.value || DEEPSEEK_API_BASE; + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + const token = env.DEEPSEEK_API_KEY; + if (!token) { + throw new Error("DEEPSEEK_API_KEY not configured"); + } + return { + provider, + model, + base, + token, + type: "openai", + source: "env: DEEPSEEK_API_...", + }; + } + + if (provider === MODEL_PROVIDER_WHISPERASR) { + dbg(`processing ${MODEL_PROVIDER_WHISPERASR}`); + const base = findEnvVar(env, "WHISPERASR", BASE_SUFFIX)?.value || WHISPERASR_API_BASE; + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + return { + provider, + model, + base, + token: undefined, + source: "env: WHISPERASR_API_...", + }; + } + + if (provider === MODEL_PROVIDER_WINDOWS_AI) { + dbg(`processing ${MODEL_PROVIDER_WINDOWS_AI}`); + return { + provider, + model, + base: WINDOWS_AI_API_BASE, + token: MODEL_PROVIDER_WINDOWS_AI, + type: "openai", + source: "env", + }; + } + + const prefixes = [ + tag ? `${provider}_${model}_${tag}` : undefined, + provider ? `${provider}_${model}` : undefined, + provider ? provider : undefined, + model, + ] + .filter((p) => p) + .map((p) => p.toUpperCase().replace(/[^a-z0-9]+/gi, "_")); + for (const prefix of prefixes) { + const modelKey = findEnvVar(env, prefix, TOKEN_SUFFIX); + const modelBase = findEnvVar(env, prefix, BASE_SUFFIX); + if (modelKey || modelBase) { + const token = modelKey?.value || ""; + const base = trimTrailingSlash(modelBase?.value); + const version = env[prefix + "_API_VERSION"]; + const source = `env: ${prefix}_API_...`; + const type: OpenAIAPIType = "openai"; + if (base && !URL.canParse(base)) { + throw new Error(`${modelBase} must be a valid URL`); + } + return { + provider, model, - ] - .filter((p) => p) - .map((p) => p.toUpperCase().replace(/[^a-z0-9]+/gi, "_")) - for (const prefix of prefixes) { - const modelKey = findEnvVar(env, prefix, TOKEN_SUFFIX) - const modelBase = findEnvVar(env, prefix, BASE_SUFFIX) - if (modelKey || modelBase) { - const token = modelKey?.value || "" - const base = trimTrailingSlash(modelBase?.value) - const version = env[prefix + "_API_VERSION"] - const source = `env: ${prefix}_API_...` - const type: OpenAIAPIType = "openai" - if (base && !URL.canParse(base)) { - throw new Error(`${modelBase} must be a valid URL`) - } - return { - provider, - model, - token, - base, - type, - version, - source, - } satisfies LanguageModelConfiguration - } - } - - if (provider === MODEL_PROVIDER_SGLANG) { - dbg(`processing MODEL_PROVIDER_SGLANG`) - const base = - findEnvVar(env, "SGLANG", BASE_SUFFIX)?.value || SGLANG_API_BASE - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - return { - provider, - model, - base, - token: MODEL_PROVIDER_SGLANG, - type: "openai", - source: "default", - } - } - - if (provider === MODEL_PROVIDER_VLLM) { - dbg(`processing MODEL_PROVIDER_VLLM`) - const base = - findEnvVar(env, "VLLM", BASE_SUFFIX)?.value || VLLM_API_BASE - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - return { - provider, - model, - base, - token: MODEL_PROVIDER_VLLM, - type: "openai", - source: "default", - } - } - - if (provider === MODEL_PROVIDER_LLAMAFILE) { - dbg(`processing MODEL_PROVIDER_LLAMAFILE`) - const base = - findEnvVar(env, "LLAMAFILE", BASE_SUFFIX)?.value || - LLAMAFILE_API_BASE - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - return { - provider, - model, - base, - token: MODEL_PROVIDER_LLAMAFILE, - type: "openai", - source: "default", - } - } - - if (provider === MODEL_PROVIDER_LITELLM) { - dbg(`processing MODEL_PROVIDER_LITELLM`) - const base = - findEnvVar(env, "LITELLM", BASE_SUFFIX)?.value || LITELLM_API_BASE - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - return { - provider, - model, - base, - token: MODEL_PROVIDER_LITELLM, - type: "openai", - source: "default", - } - } - - if (provider === MODEL_PROVIDER_LMSTUDIO) { - dbg(`processing MODEL_PROVIDER_LMSTUDIO`) - const base = - findEnvVar(env, "LMSTUDIO", BASE_SUFFIX)?.value || LMSTUDIO_API_BASE - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - return { - provider, - model, - base, - token: MODEL_PROVIDER_LMSTUDIO, - type: "openai", - source: "env: LMSTUDIO_API_...", - } - } - - if (provider === MODEL_PROVIDER_JAN) { - dbg(`processing MODEL_PROVIDER_JAN`) - const base = findEnvVar(env, "JAN", BASE_SUFFIX)?.value || JAN_API_BASE - if (!URL.canParse(base)) { - throw new Error(`${base} must be a valid URL`) - } - return { - provider, - model, - base, - token: MODEL_PROVIDER_JAN, - type: "openai", - source: "env: JAN_API_...", - } - } - - if (provider === MODEL_PROVIDER_GITHUB_COPILOT_CHAT) { - dbg(`processing MODEL_PROVIDER_GITHUB_COPILOT_CHAT`) - if (!runtimeHost.clientLanguageModel) { - throw new Error( - `${MODEL_PROVIDER_GITHUB_COPILOT_CHAT} requires Visual Studio Code and GitHub Copilot Chat` - ) - } - return { - provider, - model, - base: undefined, - token: MODEL_PROVIDER_GITHUB_COPILOT_CHAT, - } - } - - if (provider === MODEL_PROVIDER_ECHO || provider === MODEL_PROVIDER_NONE) { - dbg(`processing MODEL_PROVIDER_ECHO or MODEL_PROVIDER_NONE`) - return { - provider, - model, - base: undefined, - token: provider, - } - } - - return undefined - dbg(`no matching provider found, returning undefined`) - - function cleanAzureBase(b: string) { - if (!b) { - return b - } - b = - trimTrailingSlash(b.replace(/\/openai\/deployments.*$/, "")) + - `/openai/deployments` - return b - } - - function parseAzureVersionFromUrl(url: string) { - const uri = uriTryParse(url) - const v = uri?.searchParams.get("api-version") || undefined - // azure:gpt-4o_2024-11-20 - // {api-version} - if (v?.startsWith("{")) return undefined - - return v - } - - function cleanApiBase(b: string) { - if (!b) { - return b - } - b = trimTrailingSlash(b) - if (!/\/v1$/.test(b)) { - b += "/v1" - } - return b - } + token, + base, + type, + version, + source, + } satisfies LanguageModelConfiguration; + } + } + + if (provider === MODEL_PROVIDER_SGLANG) { + dbg(`processing MODEL_PROVIDER_SGLANG`); + const base = findEnvVar(env, "SGLANG", BASE_SUFFIX)?.value || SGLANG_API_BASE; + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + return { + provider, + model, + base, + token: MODEL_PROVIDER_SGLANG, + type: "openai", + source: "default", + }; + } + + if (provider === MODEL_PROVIDER_VLLM) { + dbg(`processing MODEL_PROVIDER_VLLM`); + const base = findEnvVar(env, "VLLM", BASE_SUFFIX)?.value || VLLM_API_BASE; + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + return { + provider, + model, + base, + token: MODEL_PROVIDER_VLLM, + type: "openai", + source: "default", + }; + } + + if (provider === MODEL_PROVIDER_LLAMAFILE) { + dbg(`processing MODEL_PROVIDER_LLAMAFILE`); + const base = findEnvVar(env, "LLAMAFILE", BASE_SUFFIX)?.value || LLAMAFILE_API_BASE; + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + return { + provider, + model, + base, + token: MODEL_PROVIDER_LLAMAFILE, + type: "openai", + source: "default", + }; + } + + if (provider === MODEL_PROVIDER_LITELLM) { + dbg(`processing MODEL_PROVIDER_LITELLM`); + const base = findEnvVar(env, "LITELLM", BASE_SUFFIX)?.value || LITELLM_API_BASE; + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + return { + provider, + model, + base, + token: MODEL_PROVIDER_LITELLM, + type: "openai", + source: "default", + }; + } + + if (provider === MODEL_PROVIDER_LMSTUDIO) { + dbg(`processing MODEL_PROVIDER_LMSTUDIO`); + const base = findEnvVar(env, "LMSTUDIO", BASE_SUFFIX)?.value || LMSTUDIO_API_BASE; + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + return { + provider, + model, + base, + token: MODEL_PROVIDER_LMSTUDIO, + type: "openai", + source: "env: LMSTUDIO_API_...", + }; + } + + if (provider === MODEL_PROVIDER_JAN) { + dbg(`processing MODEL_PROVIDER_JAN`); + const base = findEnvVar(env, "JAN", BASE_SUFFIX)?.value || JAN_API_BASE; + if (!URL.canParse(base)) { + throw new Error(`${base} must be a valid URL`); + } + return { + provider, + model, + base, + token: MODEL_PROVIDER_JAN, + type: "openai", + source: "env: JAN_API_...", + }; + } + + if (provider === MODEL_PROVIDER_GITHUB_COPILOT_CHAT) { + dbg(`processing MODEL_PROVIDER_GITHUB_COPILOT_CHAT`); + if (!runtimeHost.clientLanguageModel) { + throw new Error( + `${MODEL_PROVIDER_GITHUB_COPILOT_CHAT} requires Visual Studio Code and GitHub Copilot Chat`, + ); + } + return { + provider, + model, + base: undefined, + token: MODEL_PROVIDER_GITHUB_COPILOT_CHAT, + }; + } + + if (provider === MODEL_PROVIDER_ECHO || provider === MODEL_PROVIDER_NONE) { + dbg(`processing MODEL_PROVIDER_ECHO or MODEL_PROVIDER_NONE`); + return { + provider, + model, + base: undefined, + token: provider, + }; + } + + return undefined; + dbg(`no matching provider found, returning undefined`); + + function cleanAzureBase(b: string) { + if (!b) { + return b; + } + b = trimTrailingSlash(b.replace(/\/openai\/deployments.*$/, "")) + `/openai/deployments`; + return b; + } + + function parseAzureVersionFromUrl(url: string) { + const uri = uriTryParse(url); + const v = uri?.searchParams.get("api-version") || undefined; + // azure:gpt-4o_2024-11-20 + // {api-version} + if (v?.startsWith("{")) return undefined; + + return v; + } + + function cleanApiBase(b: string) { + if (!b) { + return b; + } + b = trimTrailingSlash(b); + if (!/\/v1$/.test(b)) { + b += "/v1"; + } + return b; + } } diff --git a/packages/core/src/error.test.ts b/packages/core/src/error.test.ts index fdbb3652cb..957205a16e 100644 --- a/packages/core/src/error.test.ts +++ b/packages/core/src/error.test.ts @@ -1,130 +1,124 @@ -import { strict as assert } from "node:assert" -import { describe, it as test } from "node:test" +import { strict as assert } from "node:assert"; +import { describe, it as test } from "node:test"; import { - serializeError, - errorMessage, - CancelError, - NotSupportedError, - RequestError, - isCancelError, - isRequestError, -} from "./error" + serializeError, + errorMessage, + CancelError, + NotSupportedError, + RequestError, + isCancelError, + isRequestError, +} from "./error"; describe("Error Utilities", () => { - describe("serializeError function", () => { - test("should return undefined for null or undefined input", () => { - assert.strictEqual(serializeError(null), undefined) - assert.strictEqual(serializeError(undefined), undefined) - }) + describe("serializeError function", () => { + test("should return undefined for null or undefined input", () => { + assert.strictEqual(serializeError(null), undefined); + assert.strictEqual(serializeError(undefined), undefined); + }); - test("should serialize an Error instance", () => { - const error = new Error("Test error") - const serialized = serializeError(error) - assert.strictEqual(serialized.message, "Test error") - assert.ok("stack" in serialized) - }) + test("should serialize an Error instance", () => { + const error = new Error("Test error"); + const serialized = serializeError(error); + assert.strictEqual(serialized.message, "Test error"); + assert.ok("stack" in serialized); + }); - test("should return the object as is for SerializedError input", () => { - const serializedError = { - message: "Serialized error", - stack: "stack trace", - } - const serialized = serializeError(serializedError) - assert.deepStrictEqual(serialized, serializedError) - }) + test("should return the object as is for SerializedError input", () => { + const serializedError = { + message: "Serialized error", + stack: "stack trace", + }; + const serialized = serializeError(serializedError); + assert.deepStrictEqual(serialized, serializedError); + }); - test("should return an object with message property for string input", () => { - const message = "Test message" - const serialized = serializeError(message) - assert.strictEqual(serialized.message, message) - }) + test("should return an object with message property for string input", () => { + const message = "Test message"; + const serialized = serializeError(message); + assert.strictEqual(serialized.message, message); + }); - test("should return an object with message property for number input", () => { - const number = 42 - const serialized = serializeError(number) - assert.strictEqual(serialized.message, "42") - }) - }) + test("should return an object with message property for number input", () => { + const number = 42; + const serialized = serializeError(number); + assert.strictEqual(serialized.message, "42"); + }); + }); - describe("errorMessage function", () => { - test("should return undefined for null or undefined input", () => { - assert.strictEqual(errorMessage(null), undefined) - assert.strictEqual(errorMessage(undefined), undefined) - }) + describe("errorMessage function", () => { + test("should return undefined for null or undefined input", () => { + assert.strictEqual(errorMessage(null), undefined); + assert.strictEqual(errorMessage(undefined), undefined); + }); - test("should return the error message if available", () => { - const error = new Error("Test error message") - assert.strictEqual(errorMessage(error), "Test error message") - }) + test("should return the error message if available", () => { + const error = new Error("Test error message"); + assert.strictEqual(errorMessage(error), "Test error message"); + }); - test("should return default value if no message or name on error", () => { - const error = {} // Empty error-like object - assert.strictEqual(errorMessage(error), "error") - }) - }) + test("should return default value if no message or name on error", () => { + const error = {}; // Empty error-like object + assert.strictEqual(errorMessage(error), "error"); + }); + }); - describe("CancelError class", () => { - test('should have a name property set to "CancelError"', () => { - const error = new CancelError("Cancellation happened") - assert.strictEqual(error.name, CancelError.NAME) - }) - }) + describe("CancelError class", () => { + test('should have a name property set to "CancelError"', () => { + const error = new CancelError("Cancellation happened"); + assert.strictEqual(error.name, CancelError.NAME); + }); + }); - describe("NotSupportedError class", () => { - test('should have a name property set to "NotSupportedError"', () => { - const error = new NotSupportedError("Not supported") - assert.strictEqual(error.name, NotSupportedError.NAME) - }) - }) + describe("NotSupportedError class", () => { + test('should have a name property set to "NotSupportedError"', () => { + const error = new NotSupportedError("Not supported"); + assert.strictEqual(error.name, NotSupportedError.NAME); + }); + }); - describe("RequestError class", () => { - test("should set instance properties correctly", () => { - const status = 404 - const statusText = "Not Found" - const body = { message: "Resource not found" } - const bodyText = "Error body text" - const retryAfter = 120 - const error = new RequestError( - status, - statusText, - body, - bodyText, - retryAfter - ) - assert.strictEqual(error.status, status) - assert.strictEqual(error.statusText, statusText) - assert.deepStrictEqual(error.body, body) - assert.strictEqual(error.bodyText, bodyText) - assert.strictEqual(error.retryAfter, retryAfter) - }) - }) + describe("RequestError class", () => { + test("should set instance properties correctly", () => { + const status = 404; + const statusText = "Not Found"; + const body = { message: "Resource not found" }; + const bodyText = "Error body text"; + const retryAfter = 120; + const error = new RequestError(status, statusText, body, bodyText, retryAfter); + assert.strictEqual(error.status, status); + assert.strictEqual(error.statusText, statusText); + assert.deepStrictEqual(error.body, body); + assert.strictEqual(error.bodyText, bodyText); + assert.strictEqual(error.retryAfter, retryAfter); + }); + }); - describe("isCancelError function", () => { - test("should return true for CancelError instances", () => { - const error = new CancelError("Cancellation") - assert.ok(isCancelError(error)) - }) + describe("isCancelError function", () => { + test("should return true for CancelError instances", () => { + const error = new CancelError("Cancellation"); + assert.ok(isCancelError(error)); + }); - test("should return true for AbortError", () => { - const error = new Error("Abort") - error.name = "AbortError" - assert.ok(isCancelError(error)) - }) - }) + test("should return true for AbortError", () => { + const error = new Error("Abort"); + error.name = "AbortError"; + assert.ok(isCancelError(error)); + }); + }); - describe("isRequestError function", () => { - test("should return true for RequestError instances with matching statusCode and code", () => { - const error = new RequestError(400, "Bad Request", { - code: "BadRequest", - }) - assert.ok(isRequestError(error, 400, "BadRequest")) - }) + describe("isRequestError function", () => { + test("should return true for RequestError instances with matching statusCode and code", () => { + const error = new RequestError(400, "Bad Request", { + code: "BadRequest", + }); + assert.ok(isRequestError(error, 400, "BadRequest")); + }); - test("should return true for RequestError instances with undefined statusCode or code", () => { - const error = new RequestError(400, "Bad Request", { - code: "BadRequest", - }) - assert.ok(isRequestError(error)) - }) - }) -}) + test("should return true for RequestError instances with undefined statusCode or code", () => { + const error = new RequestError(400, "Bad Request", { + code: "BadRequest", + }); + assert.ok(isRequestError(error)); + }); + }); +}); diff --git a/packages/core/src/error.ts b/packages/core/src/error.ts index c3e1161824..923afd1a4d 100644 --- a/packages/core/src/error.ts +++ b/packages/core/src/error.ts @@ -1,6 +1,6 @@ -import { serializeError as rawSerializeError } from "serialize-error" -import debug from "debug" -const dbg = debug("genaiscript:error") +import { serializeError as rawSerializeError } from "serialize-error"; +import debug from "debug"; +const dbg = debug("genaiscript:error"); /** * Serializes an error into a standardized format for easier handling. @@ -13,24 +13,22 @@ const dbg = debug("genaiscript:error") * - For other types, attempts to stringify and include as the `message` property. * @returns The serialized error with standardized properties or `undefined` for nullish input. */ -export function serializeError( - e: unknown | string | Error | SerializedError -): SerializedError { - if (e === undefined || e === null) return undefined - else if (e instanceof Error) { - const err = rawSerializeError(e, { maxDepth: 3, useToJSON: false }) - const m = /at eval.*:(\d+):(\d+)/.exec(err.stack) - if (m) { - err.line = parseInt(m[1]) - err.column = parseInt(m[2]) - } - dbg("%O", err) - return err - } else if (e instanceof Object) { - const obj = e as SerializedError - return obj - } else if (typeof e === "string") return { message: e } - else return { message: e.toString?.() } +export function serializeError(e: unknown | string | Error | SerializedError): SerializedError { + if (e === undefined || e === null) return undefined; + else if (e instanceof Error) { + const err = rawSerializeError(e, { maxDepth: 3, useToJSON: false }); + const m = /at eval.*:(\d+):(\d+)/.exec(err.stack); + if (m) { + err.line = parseInt(m[1]); + err.column = parseInt(m[2]); + } + dbg("%O", err); + return err; + } else if (e instanceof Object) { + const obj = e as SerializedError; + return obj; + } else if (typeof e === "string") return { message: e }; + else return { message: e.toString?.() }; } /** @@ -41,47 +39,42 @@ export function serializeError( * @returns The extracted error message or the `defaultValue` if none is found. */ export function errorMessage(e: any, defaultValue: string = "error"): string { - if (e === undefined || e === null) return undefined - if (typeof e.messsage === "string") return e.message - if (typeof e.error === "string") return e.error - if (typeof e.error === "object" && typeof e.error.message === "string") - return e.error.message - const ser = serializeError(e) - return ser?.message ?? ser?.name ?? defaultValue + if (e === undefined || e === null) return undefined; + if (typeof e.messsage === "string") return e.message; + if (typeof e.error === "string") return e.error; + if (typeof e.error === "object" && typeof e.error.message === "string") return e.error.message; + const ser = serializeError(e); + return ser?.message ?? ser?.name ?? defaultValue; } export class CancelError extends Error { - static readonly NAME = "CancelError" - constructor(message: string) { - super(message) - this.name = CancelError.NAME - } + static readonly NAME = "CancelError"; + constructor(message: string) { + super(message); + this.name = CancelError.NAME; + } } export class NotSupportedError extends Error { - static readonly NAME = "NotSupportedError" - constructor(message: string) { - super(message) - this.name = NotSupportedError.NAME - } + static readonly NAME = "NotSupportedError"; + constructor(message: string) { + super(message); + this.name = NotSupportedError.NAME; + } } export class RequestError extends Error { - static readonly NAME = "RequestError" - constructor( - public readonly status: number, - public readonly statusText: string, - public readonly body: any, - public readonly bodyText?: string, - readonly retryAfter?: number - ) { - super( - `LLM error (${status}): ${ - body?.message ? body?.message : statusText - }` - ) - this.name = "RequestError" - } + static readonly NAME = "RequestError"; + constructor( + public readonly status: number, + public readonly statusText: string, + public readonly body: any, + public readonly bodyText?: string, + readonly retryAfter?: number, + ) { + super(`LLM error (${status}): ${body?.message ? body?.message : statusText}`); + this.name = "RequestError"; + } } /** @@ -93,8 +86,8 @@ export class RequestError extends Error { * @returns Boolean indicating whether the error is categorized as a cancellation error. */ export function isCancelError(e: Error | SerializedError) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return e?.name === CancelError.NAME || e?.name === "AbortError" + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return e?.name === CancelError.NAME || e?.name === "AbortError"; } /** @@ -106,9 +99,9 @@ export function isCancelError(e: Error | SerializedError) { * @returns True if the error is a RequestError and matches the optional status and code, otherwise false. */ export function isRequestError(e: Error, statusCode?: number, code?: string) { - return ( - e instanceof RequestError && - (statusCode === undefined || statusCode === e.status) && - (code === undefined || code === e.body?.code) - ) + return ( + e instanceof RequestError && + (statusCode === undefined || statusCode === e.status) && + (code === undefined || code === e.body?.code) + ); } diff --git a/packages/core/src/evalprompt.ts b/packages/core/src/evalprompt.ts index 7ca64aad24..6605c9348c 100644 --- a/packages/core/src/evalprompt.ts +++ b/packages/core/src/evalprompt.ts @@ -1,7 +1,7 @@ -import debug from "debug" -const dbg = debug("genaiscript:evalprompt") +import debug from "debug"; +const dbg = debug("genaiscript:evalprompt"); -import { host } from "./host" +import { host } from "./host"; /** * Evaluates a JavaScript prompt script with the provided context. @@ -15,47 +15,47 @@ import { host } from "./host" * @returns The result of evaluating the JavaScript prompt script. */ export async function evalPrompt( - ctx0: PromptContext, - r: PromptScript, - options?: { - sourceMaps?: boolean - logCb?: (msg: string) => void - } + ctx0: PromptContext, + r: PromptScript, + options?: { + sourceMaps?: boolean; + logCb?: (msg: string) => void; + }, ) { - const { sourceMaps } = options || {} - const ctx = Object.freeze({ - ...ctx0, - }) - const keys = Object.keys(ctx) - const prefix = "async (" + keys.join(",") + ") => { 'use strict';\n" - const suffix = "\n}" + const { sourceMaps } = options || {}; + const ctx = Object.freeze({ + ...ctx0, + }); + const keys = Object.keys(ctx); + const prefix = "async (" + keys.join(",") + ") => { 'use strict';\n"; + const suffix = "\n}"; - const jsSource = r.jsSource - let src: string = [prefix, jsSource, suffix].join("") - // source map - if (r.filename && sourceMaps) { - dbg("creating source map") - const MagicString = (await import("magic-string")).default - const s = new MagicString(jsSource) - s.prepend(prefix) - s.append(suffix) - dbg(`resolving path for ${r.filename}`) - const source = host.path.resolve(r.filename) - const map = s.generateMap({ - source, - includeContent: true, - hires: true, - }) - const mapURL: string = map.toUrl() - // split keywords as so that JS engine does not try to load "mapUrl" - src += "\n//# source" + "MappingURL=" + mapURL - dbg("appending sourceURL to source") - src += "\n//# source" + "URL=" + source - } + const jsSource = r.jsSource; + let src: string = [prefix, jsSource, suffix].join(""); + // source map + if (r.filename && sourceMaps) { + dbg("creating source map"); + const MagicString = (await import("magic-string")).default; + const s = new MagicString(jsSource); + s.prepend(prefix); + s.append(suffix); + dbg(`resolving path for ${r.filename}`); + const source = host.path.resolve(r.filename); + const map = s.generateMap({ + source, + includeContent: true, + hires: true, + }); + const mapURL: string = map.toUrl(); + // split keywords as so that JS engine does not try to load "mapUrl" + src += "\n//# source" + "MappingURL=" + mapURL; + dbg("appending sourceURL to source"); + src += "\n//# source" + "URL=" + source; + } - // in principle we could cache this function (but would have to do that based on hashed body or sth) - // but probably little point - const fn = (0, eval)(src) - dbg(`eval ${r.filename}`) - return await fn(...Object.values(ctx)) + // in principle we could cache this function (but would have to do that based on hashed body or sth) + // but probably little point + const fn = (0, eval)(src); + dbg(`eval ${r.filename}`); + return await fn(...Object.values(ctx)); } diff --git a/packages/core/src/expander.ts b/packages/core/src/expander.ts index 8814c499b2..b1a51ebd42 100644 --- a/packages/core/src/expander.ts +++ b/packages/core/src/expander.ts @@ -1,37 +1,29 @@ -import debug from "debug" -const dbg = debug("genaiscript:expander") +import debug from "debug"; +const dbg = debug("genaiscript:expander"); -import { resolveScript } from "./ast" -import { assert } from "./util" -import { MarkdownTrace } from "./trace" -import { errorMessage, isCancelError, NotSupportedError } from "./error" -import { JS_REGEX, MAX_TOOL_CALLS, PROMPTY_REGEX } from "./constants" -import { - finalizeMessages, - PromptImage, - PromptPrediction, - renderPromptNode, -} from "./promptdom" -import { createPromptContext } from "./promptcontext" -import { evalPrompt } from "./evalprompt" -import { addToolDefinitionsMessage, appendSystemMessage } from "./chat" -import { importPrompt } from "./importprompt" -import { runtimeHost } from "./host" -import { addFallbackToolSystems, resolveSystems } from "./systems" -import { GenerationOptions } from "./generation" -import { - ChatCompletionMessageParam, - ChatCompletionReasoningEffort, -} from "./chattypes" -import { GenerationStatus, Project } from "./server/messages" -import { dispose } from "./dispose" -import { normalizeFloat, normalizeInt } from "./cleaners" -import { mergeEnvVarsWithSystem } from "./vars" -import { installGlobalPromptContext } from "./globals" -import { mark } from "./performance" -import { nodeIsPackageTypeModule } from "./nodepackage" -import { parseModelIdentifier } from "./models" -import { metadataMerge } from "./metadata" +import { resolveScript } from "./ast"; +import { assert } from "./util"; +import { MarkdownTrace } from "./trace"; +import { errorMessage, isCancelError, NotSupportedError } from "./error"; +import { JS_REGEX, MAX_TOOL_CALLS, PROMPTY_REGEX } from "./constants"; +import { finalizeMessages, PromptImage, PromptPrediction, renderPromptNode } from "./promptdom"; +import { createPromptContext } from "./promptcontext"; +import { evalPrompt } from "./evalprompt"; +import { addToolDefinitionsMessage, appendSystemMessage } from "./chat"; +import { importPrompt } from "./importprompt"; +import { runtimeHost } from "./host"; +import { addFallbackToolSystems, resolveSystems } from "./systems"; +import { GenerationOptions } from "./generation"; +import { ChatCompletionMessageParam, ChatCompletionReasoningEffort } from "./chattypes"; +import { GenerationStatus, Project } from "./server/messages"; +import { dispose } from "./dispose"; +import { normalizeFloat, normalizeInt } from "./cleaners"; +import { mergeEnvVarsWithSystem } from "./vars"; +import { installGlobalPromptContext } from "./globals"; +import { mark } from "./performance"; +import { nodeIsPackageTypeModule } from "./nodepackage"; +import { parseModelIdentifier } from "./models"; +import { metadataMerge } from "./metadata"; /** * Executes a prompt expansion process based on the provided prompt script, variables, and options. @@ -45,141 +37,133 @@ import { metadataMerge } from "./metadata" * @returns An object containing the status of the operation, generated messages, images, schema definitions, tools, logs, and other related outputs. */ export async function callExpander( - prj: Project, - r: PromptScript, - ev: ExpansionVariables, - trace: MarkdownTrace, - options: GenerationOptions, - installGlobally: boolean + prj: Project, + r: PromptScript, + ev: ExpansionVariables, + trace: MarkdownTrace, + options: GenerationOptions, + installGlobally: boolean, ) { - mark("prompt.expand.main") - assert(!!options.model) - const modelId = r.model ?? options.model - const ctx = await createPromptContext(prj, ev, trace, options, modelId) - if (installGlobally) installGlobalPromptContext(ctx) + mark("prompt.expand.main"); + assert(!!options.model); + const modelId = r.model ?? options.model; + const ctx = await createPromptContext(prj, ev, trace, options, modelId); + if (installGlobally) installGlobalPromptContext(ctx); - let status: GenerationStatus = undefined - let statusText: string = undefined - let logs = "" - let messages: ChatCompletionMessageParam[] = [] - let images: PromptImage[] = [] - let schemas: Record = {} - let functions: ToolCallback[] = [] - let fileMerges: FileMergeHandler[] = [] - let outputProcessors: PromptOutputProcessorHandler[] = [] - let chatParticipants: ChatParticipant[] = [] - let fileOutputs: FileOutput[] = [] - let disposables: AsyncDisposable[] = [] - let prediction: PromptPrediction + let status: GenerationStatus = undefined; + let statusText: string = undefined; + let logs = ""; + let messages: ChatCompletionMessageParam[] = []; + let images: PromptImage[] = []; + let schemas: Record = {}; + let functions: ToolCallback[] = []; + let fileMerges: FileMergeHandler[] = []; + let outputProcessors: PromptOutputProcessorHandler[] = []; + let chatParticipants: ChatParticipant[] = []; + let fileOutputs: FileOutput[] = []; + let disposables: AsyncDisposable[] = []; + let prediction: PromptPrediction; - const logCb = (msg: any) => { - logs += msg + "\n" - } + const logCb = (msg: any) => { + logs += msg + "\n"; + }; - // package.json { type: "module" } - const isModule = await nodeIsPackageTypeModule() - try { - if ( - r.filename && - (isModule || !JS_REGEX.test(r.filename)) && - !PROMPTY_REGEX.test(r.filename) - ) - await importPrompt(ctx, r, { logCb, trace }) - else { - await evalPrompt(ctx, r, { - sourceMaps: true, - logCb, - }) - } - const node = ctx.node - const { - messages: msgs, - images: imgs, - errors, - schemas: schs, - tools: fns, - fileMerges: fms, - outputProcessors: ops, - chatParticipants: cps, - fileOutputs: fos, - prediction: pred, - disposables: mcps, - } = await renderPromptNode(modelId, node, { - flexTokens: options.flexTokens, - fenceFormat: options.fenceFormat, - trace, - }) - messages = msgs - images = imgs - schemas = schs - functions = fns - fileMerges = fms - outputProcessors = ops - chatParticipants = cps - fileOutputs = fos - disposables = mcps - prediction = pred - if (errors?.length) { - for (const error of errors) trace.error(``, error) - status = "error" - statusText = errors.map((e) => errorMessage(e)).join("\n") - } else { - status = "success" - } - } catch (e) { - status = "error" - statusText = errorMessage(e) - if (isCancelError(e)) { - status = "cancelled" - trace.note(statusText) - } else { - trace.error(undefined, e) - } + // package.json { type: "module" } + const isModule = await nodeIsPackageTypeModule(); + try { + if (r.filename && (isModule || !JS_REGEX.test(r.filename)) && !PROMPTY_REGEX.test(r.filename)) + await importPrompt(ctx, r, { logCb, trace }); + else { + await evalPrompt(ctx, r, { + sourceMaps: true, + logCb, + }); + } + const node = ctx.node; + const { + messages: msgs, + images: imgs, + errors, + schemas: schs, + tools: fns, + fileMerges: fms, + outputProcessors: ops, + chatParticipants: cps, + fileOutputs: fos, + prediction: pred, + disposables: mcps, + } = await renderPromptNode(modelId, node, { + flexTokens: options.flexTokens, + fenceFormat: options.fenceFormat, + trace, + }); + messages = msgs; + images = imgs; + schemas = schs; + functions = fns; + fileMerges = fms; + outputProcessors = ops; + chatParticipants = cps; + fileOutputs = fos; + disposables = mcps; + prediction = pred; + if (errors?.length) { + for (const error of errors) trace.error(``, error); + status = "error"; + statusText = errors.map((e) => errorMessage(e)).join("\n"); + } else { + status = "success"; + } + } catch (e) { + status = "error"; + statusText = errorMessage(e); + if (isCancelError(e)) { + status = "cancelled"; + trace.note(statusText); + } else { + trace.error(undefined, e); } + } - return Object.freeze({ - logs, - status, - statusText, - messages, - images, - schemas, - functions: Object.freeze(functions), - fileMerges, - outputProcessors, - chatParticipants, - fileOutputs, - disposables, - prediction, - }) + return Object.freeze({ + logs, + status, + statusText, + messages, + images, + schemas, + functions: Object.freeze(functions), + fileMerges, + outputProcessors, + chatParticipants, + fileOutputs, + disposables, + prediction, + }); } -function traceEnv( - model: string, - trace: MarkdownTrace, - env: Partial -) { - trace.startDetails("🏡 env") - trace.files(env.files, { - title: "💾 files", - model, - skipIfEmpty: true, - secrets: env.secrets, - maxLength: 0, - }) - const vars = Object.entries(env.vars || {}) - if (vars.length) { - trace.startDetails("🧮 vars") - for (const [k, v] of vars) { - trace.itemValue(k, v) - } - trace.endDetails() - } - const secrets = Object.keys(env.secrets || {}) - if (secrets.length) { - trace.itemValue(`🔐 secrets`, secrets.join(", ")) +function traceEnv(model: string, trace: MarkdownTrace, env: Partial) { + trace.startDetails("🏡 env"); + trace.files(env.files, { + title: "💾 files", + model, + skipIfEmpty: true, + secrets: env.secrets, + maxLength: 0, + }); + const vars = Object.entries(env.vars || {}); + if (vars.length) { + trace.startDetails("🧮 vars"); + for (const [k, v] of vars) { + trace.itemValue(k, v); } - trace.endDetails() + trace.endDetails(); + } + const secrets = Object.keys(env.secrets || {}); + if (secrets.length) { + trace.itemValue(`🔐 secrets`, secrets.join(", ")); + } + trace.endDetails(); } /** @@ -202,252 +186,240 @@ function traceEnv( * * @param - has parameters/options i */ export async function expandTemplate( - prj: Project, - template: PromptScript, - options: GenerationOptions, - env: ExpansionVariables + prj: Project, + template: PromptScript, + options: GenerationOptions, + env: ExpansionVariables, ) { - mark("prompt.expand.script") - const trace = options.trace - const model = options.model - assert(!!trace) - assert(!!model) - const cancellationToken = options.cancellationToken - // update options - const lineNumbers = - options.lineNumbers ?? - template.lineNumbers ?? - resolveSystems(prj, template, undefined) - .map((s) => resolveScript(prj, s)) - .some((t) => t?.lineNumbers) - const temperature = - options.temperature ?? - normalizeFloat(env.vars["temperature"]) ?? - template.temperature ?? - runtimeHost.modelAliases.large.temperature - options.fallbackTools = - options.fallbackTools ?? - template.fallbackTools ?? - runtimeHost.modelAliases.large.fallbackTools - const reasoningEffort: ChatCompletionReasoningEffort = - options.reasoningEffort ?? - env.vars["reasoning_effort"] ?? - template.reasoningEffort ?? - runtimeHost.modelAliases.large.reasoningEffort - const topP = - options.topP ?? normalizeFloat(env.vars["top_p"]) ?? template.topP - const maxTokens = - options.maxTokens ?? - normalizeInt(env.vars["maxTokens"]) ?? - normalizeInt(env.vars["max_tokens"]) ?? - template.maxTokens - const maxToolCalls = - options.maxToolCalls ?? - normalizeInt(env.vars["maxToolCalls"]) ?? - normalizeInt(env.vars["max_tool_calls"]) ?? - template.maxToolCalls ?? - MAX_TOOL_CALLS - const flexTokens = - options.flexTokens ?? - normalizeInt(env.vars["flexTokens"]) ?? - normalizeInt(env.vars["flex_tokens"]) ?? - template.flexTokens - const fenceFormat = options.fenceFormat ?? template.fenceFormat - const cache = options.cache ?? template.cache - const metadata = metadataMerge(template, options.metadata) - let seed = options.seed ?? normalizeInt(env.vars["seed"]) ?? template.seed - if (seed !== undefined) seed = seed >> 0 - let logprobs = options.logprobs || template.logprobs - let topLogprobs = Math.max( - options.topLogprobs || 0, - template.topLogprobs || 0 - ) + mark("prompt.expand.script"); + const trace = options.trace; + const model = options.model; + assert(!!trace); + assert(!!model); + const cancellationToken = options.cancellationToken; + // update options + const lineNumbers = + options.lineNumbers ?? + template.lineNumbers ?? + resolveSystems(prj, template, undefined) + .map((s) => resolveScript(prj, s)) + .some((t) => t?.lineNumbers); + const temperature = + options.temperature ?? + normalizeFloat(env.vars["temperature"]) ?? + template.temperature ?? + runtimeHost.modelAliases.large.temperature; + options.fallbackTools = + options.fallbackTools ?? template.fallbackTools ?? runtimeHost.modelAliases.large.fallbackTools; + const reasoningEffort: ChatCompletionReasoningEffort = + options.reasoningEffort ?? + env.vars["reasoning_effort"] ?? + template.reasoningEffort ?? + runtimeHost.modelAliases.large.reasoningEffort; + const topP = options.topP ?? normalizeFloat(env.vars["top_p"]) ?? template.topP; + const maxTokens = + options.maxTokens ?? + normalizeInt(env.vars["maxTokens"]) ?? + normalizeInt(env.vars["max_tokens"]) ?? + template.maxTokens; + const maxToolCalls = + options.maxToolCalls ?? + normalizeInt(env.vars["maxToolCalls"]) ?? + normalizeInt(env.vars["max_tool_calls"]) ?? + template.maxToolCalls ?? + MAX_TOOL_CALLS; + const flexTokens = + options.flexTokens ?? + normalizeInt(env.vars["flexTokens"]) ?? + normalizeInt(env.vars["flex_tokens"]) ?? + template.flexTokens; + const fenceFormat = options.fenceFormat ?? template.fenceFormat; + const cache = options.cache ?? template.cache; + const metadata = metadataMerge(template, options.metadata); + let seed = options.seed ?? normalizeInt(env.vars["seed"]) ?? template.seed; + if (seed !== undefined) seed = seed >> 0; + let logprobs = options.logprobs || template.logprobs; + let topLogprobs = Math.max(options.topLogprobs || 0, template.topLogprobs || 0); - // finalize options - const { provider } = parseModelIdentifier(model) - env.meta.model = model - Object.freeze(env.meta) + // finalize options + const { provider } = parseModelIdentifier(model); + env.meta.model = model; + Object.freeze(env.meta); - trace.startDetails("💾 script", { expanded: true }) + trace.startDetails("💾 script", { expanded: true }); - traceEnv(model, trace, env) + traceEnv(model, trace, env); - trace.startDetails("🧬 prompt", { expanded: true }) - trace.detailsFenced("💻 script source", template.jsSource, "js") + trace.startDetails("🧬 prompt", { expanded: true }); + trace.detailsFenced("💻 script source", template.jsSource, "js"); - const prompt = await callExpander( - prj, - template, - env, - trace, - { - ...options, - maxTokens, - maxToolCalls, - flexTokens, - seed, - topP, - temperature, - reasoningEffort, - lineNumbers, - fenceFormat, - }, - true - ) + const prompt = await callExpander( + prj, + template, + env, + trace, + { + ...options, + maxTokens, + maxToolCalls, + flexTokens, + seed, + topP, + temperature, + reasoningEffort, + lineNumbers, + fenceFormat, + }, + true, + ); - const { status, statusText, messages } = prompt - const images = prompt.images.slice(0) - const schemas = structuredClone(prompt.schemas) - const tools = prompt.functions.slice(0) - const fileMerges = prompt.fileMerges.slice(0) - const outputProcessors = prompt.outputProcessors.slice(0) - const chatParticipants = prompt.chatParticipants.slice(0) - const fileOutputs = prompt.fileOutputs.slice(0) - const prediction = prompt.prediction - const disposables = prompt.disposables.slice(0) + const { status, statusText, messages } = prompt; + const images = prompt.images.slice(0); + const schemas = structuredClone(prompt.schemas); + const tools = prompt.functions.slice(0); + const fileMerges = prompt.fileMerges.slice(0); + const outputProcessors = prompt.outputProcessors.slice(0); + const chatParticipants = prompt.chatParticipants.slice(0); + const fileOutputs = prompt.fileOutputs.slice(0); + const prediction = prompt.prediction; + const disposables = prompt.disposables.slice(0); - if (prompt.logs?.length) trace.details("📝 console.log", prompt.logs) - trace.endDetails() + if (prompt.logs?.length) trace.details("📝 console.log", prompt.logs); + trace.endDetails(); - if (cancellationToken?.isCancellationRequested || status === "cancelled") { - await dispose(disposables, { trace }) - return { - status: "cancelled", - statusText: "user cancelled", - messages, - } - } + if (cancellationToken?.isCancellationRequested || status === "cancelled") { + await dispose(disposables, { trace }); + return { + status: "cancelled", + statusText: "user cancelled", + messages, + }; + } - if (status !== "success" || prompt.messages.length === 0) { - // cancelled - await dispose(disposables, { trace }) - return { - status, - statusText, - messages, - } - } + if (status !== "success" || prompt.messages.length === 0) { + // cancelled + await dispose(disposables, { trace }); + return { + status, + statusText, + messages, + }; + } - const addSystemMessage = (content: string) => { - appendSystemMessage(messages, content) - trace.fence(content, "markdown") - } + const addSystemMessage = (content: string) => { + appendSystemMessage(messages, content); + trace.fence(content, "markdown"); + }; - const systems = resolveSystems(prj, template, tools) - if (systems.length) - if (messages[0].role === "system") - // there's already a system message. add empty before - messages.unshift({ role: "system", content: "" }) + const systems = resolveSystems(prj, template, tools); + if (systems.length) + if (messages[0].role === "system") + // there's already a system message. add empty before + messages.unshift({ role: "system", content: "" }); - if (addFallbackToolSystems(systems, tools, template, options)) { - dbg("added fallback tools") - assert(!Object.isFrozen(options)) - options.fallbackTools = true - } + if (addFallbackToolSystems(systems, tools, template, options)) { + dbg("added fallback tools"); + assert(!Object.isFrozen(options)); + options.fallbackTools = true; + } - try { - trace.startDetails("👾 systems") - for (let i = 0; i < systems.length; ++i) { - if (cancellationToken?.isCancellationRequested) { - await dispose(disposables, { trace }) - return { - status: "cancelled", - statusText: "user cancelled", - messages, - } - } + try { + trace.startDetails("👾 systems"); + for (let i = 0; i < systems.length; ++i) { + if (cancellationToken?.isCancellationRequested) { + await dispose(disposables, { trace }); + return { + status: "cancelled", + statusText: "user cancelled", + messages, + }; + } - const systemId = systems[i] - dbg(`system ${systemId.id}`) - const system = resolveScript(prj, systemId) - if (!system) - throw new Error(`system template ${systemId.id} not found`) + const systemId = systems[i]; + dbg(`system ${systemId.id}`); + const system = resolveScript(prj, systemId); + if (!system) throw new Error(`system template ${systemId.id} not found`); - trace.startDetails(`👾 ${system.id}`) - const sysr = await callExpander( - prj, - system, - mergeEnvVarsWithSystem(env, systemId), - trace, - options, - false - ) + trace.startDetails(`👾 ${system.id}`); + const sysr = await callExpander( + prj, + system, + mergeEnvVarsWithSystem(env, systemId), + trace, + options, + false, + ); - if (sysr.images) images.push(...sysr.images) - if (sysr.schemas) Object.assign(schemas, sysr.schemas) - if (sysr.functions) tools.push(...sysr.functions) - if (sysr.fileMerges) fileMerges.push(...sysr.fileMerges) - if (sysr.outputProcessors) - outputProcessors.push(...sysr.outputProcessors) - if (sysr.chatParticipants) - chatParticipants.push(...sysr.chatParticipants) - if (sysr.fileOutputs) fileOutputs.push(...sysr.fileOutputs) - if (sysr.disposables?.length) disposables.push(...sysr.disposables) - if (sysr.logs?.length) trace.details("📝 console.log", sysr.logs) - for (const smsg of sysr.messages) { - if (smsg.role === "user" && typeof smsg.content === "string") { - addSystemMessage(smsg.content) - } else - throw new NotSupportedError( - "only string user messages supported in system" - ) - } - logprobs = logprobs || system.logprobs - topLogprobs = Math.max(topLogprobs, system.topLogprobs || 0) - trace.detailsFenced("💻 script source", system.jsSource, "js") - trace.endDetails() + if (sysr.images) images.push(...sysr.images); + if (sysr.schemas) Object.assign(schemas, sysr.schemas); + if (sysr.functions) tools.push(...sysr.functions); + if (sysr.fileMerges) fileMerges.push(...sysr.fileMerges); + if (sysr.outputProcessors) outputProcessors.push(...sysr.outputProcessors); + if (sysr.chatParticipants) chatParticipants.push(...sysr.chatParticipants); + if (sysr.fileOutputs) fileOutputs.push(...sysr.fileOutputs); + if (sysr.disposables?.length) disposables.push(...sysr.disposables); + if (sysr.logs?.length) trace.details("📝 console.log", sysr.logs); + for (const smsg of sysr.messages) { + if (smsg.role === "user" && typeof smsg.content === "string") { + addSystemMessage(smsg.content); + } else throw new NotSupportedError("only string user messages supported in system"); + } + logprobs = logprobs || system.logprobs; + topLogprobs = Math.max(topLogprobs, system.topLogprobs || 0); + trace.detailsFenced("💻 script source", system.jsSource, "js"); + trace.endDetails(); - if (sysr.status !== "success") { - await dispose(disposables, options) - return { - status: sysr.status, - statusText: sysr.statusText, - messages, - } - } - } - } finally { - trace.endDetails() + if (sysr.status !== "success") { + await dispose(disposables, options); + return { + status: sysr.status, + statusText: sysr.statusText, + messages, + }; + } } + } finally { + trace.endDetails(); + } - if (options.fallbackTools) { - addToolDefinitionsMessage(messages, tools) - } + if (options.fallbackTools) { + addToolDefinitionsMessage(messages, tools); + } - const { responseType, responseSchema } = finalizeMessages(model, messages, { - ...template, - fileOutputs, - trace, - }) + const { responseType, responseSchema } = finalizeMessages(model, messages, { + ...template, + fileOutputs, + trace, + }); - trace.endDetails() + trace.endDetails(); - return { - cache, - messages, - images, - schemas, - tools, - status: status, - statusText: statusText, - model, - temperature, - reasoningEffort, - topP, - maxTokens, - maxToolCalls, - seed, - responseType, - responseSchema, - fileMerges, - prediction, - outputProcessors, - chatParticipants, - fileOutputs, - logprobs, - topLogprobs, - disposables, - metadata, - fallbackTools: options.fallbackTools, - } + return { + cache, + messages, + images, + schemas, + tools, + status: status, + statusText: statusText, + model, + temperature, + reasoningEffort, + topP, + maxTokens, + maxToolCalls, + seed, + responseType, + responseSchema, + fileMerges, + prediction, + outputProcessors, + chatParticipants, + fileOutputs, + logprobs, + topLogprobs, + disposables, + metadata, + fallbackTools: options.fallbackTools, + }; } diff --git a/packages/core/src/features.ts b/packages/core/src/features.ts index cf0243cafd..6adabedba9 100644 --- a/packages/core/src/features.ts +++ b/packages/core/src/features.ts @@ -1,6 +1,6 @@ -import { MODEL_PROVIDERS } from "./constants" +import { MODEL_PROVIDERS } from "./constants"; export function providerFeatures(provider: string) { - const features = MODEL_PROVIDERS.find(({ id }) => id === provider) - return features + const features = MODEL_PROVIDERS.find(({ id }) => id === provider); + return features; } diff --git a/packages/core/src/fence.test.ts b/packages/core/src/fence.test.ts index 35351a900e..e5dd3fa97d 100644 --- a/packages/core/src/fence.test.ts +++ b/packages/core/src/fence.test.ts @@ -1,44 +1,44 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { extractFenced } from "./fence" -import { unfence } from "./unwrappers" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { extractFenced } from "./fence"; +import { unfence } from "./unwrappers"; describe("fence", () => { - test("unfence", () => { - const source = ` + test("unfence", () => { + const source = ` \`\`\`python import re \`\`\` -` - const fenced = unfence(source, "python") - assert.equal(fenced, "import re") - }) +`; + const fenced = unfence(source, "python"); + assert.equal(fenced, "import re"); + }); - test("unfencenested", () => { - const source = ` + test("unfencenested", () => { + const source = ` \`\`\`\`\`md \`\`\` import re \`\`\` \`\`\`\`\` -` - const fenced = unfence(source, "md") - assert.equal(fenced, "\`\`\`\nimport re\n\`\`\`") - }) +`; + const fenced = unfence(source, "md"); + assert.equal(fenced, "\`\`\`\nimport re\n\`\`\`"); + }); - test("unbalanced", () => { - const source = ` + test("unbalanced", () => { + const source = ` \`\`\`\`\`md \`\`\` import re \`\`\`\`\` -` - const fenced = unfence(source, "md") - assert.equal(fenced, "\`\`\`\nimport re") - }) +`; + const fenced = unfence(source, "md"); + assert.equal(fenced, "\`\`\`\nimport re"); + }); - test("fence opt", () => { - const source = ` + test("fence opt", () => { + const source = ` The provided \`email_recognizer.py\` file contains a simple function that uses a regular expression to validate an email address. The time it takes to run this function depends on the complexity of the regular expression and the length of the input email string. However, without specific performance metrics or a larger context, it's not possible to provide an exact time for how long this function might take to run. The key candidate to speed up in this code is the regular expression matching operation within the \`is_valid_email\` function. Regular expressions can be slow, especially if they are complex and the input string is long. @@ -84,17 +84,17 @@ SUMMARY: Pre-compiled the regular expression to improve the performance of the is_valid_email function. \`\`\` -` +`; - const fenced = extractFenced(source) - assert.equal(fenced.length, 3) - assert.equal(fenced[0].label, "DIFF ./email_recognizer.py") - assert.equal(fenced[1].language, "python") - assert.equal(fenced[2].label, "SUMMARY") - }) + const fenced = extractFenced(source); + assert.equal(fenced.length, 3); + assert.equal(fenced[0].label, "DIFF ./email_recognizer.py"); + assert.equal(fenced[1].language, "python"); + assert.equal(fenced[2].label, "SUMMARY"); + }); - test("file arg", () => { - const source = ` + test("file arg", () => { + const source = ` lorem \`\`\`md file=./somefile.md @@ -103,15 +103,15 @@ lorem bla -` +`; - const fenced = extractFenced(source) - assert.equal(fenced.length, 1) - assert.equal(fenced[0].label, "FILE ./somefile.md") - }) + const fenced = extractFenced(source); + assert.equal(fenced.length, 1); + assert.equal(fenced[0].label, "FILE ./somefile.md"); + }); - test("file arg file quoted", () => { - const source = ` + test("file arg file quoted", () => { + const source = ` lorem \`\`\`md file="./somefile.md" @@ -120,15 +120,15 @@ lorem bla -` +`; - const fenced = extractFenced(source) - assert.equal(fenced.length, 1) - assert.equal(fenced[0].label, "FILE ./somefile.md") - }) + const fenced = extractFenced(source); + assert.equal(fenced.length, 1); + assert.equal(fenced[0].label, "FILE ./somefile.md"); + }); - test("data with schema", () => { - const source = ` + test("data with schema", () => { + const source = ` \`\`\`yaml schema=CITY_SCHEMA @@ -149,12 +149,12 @@ bla url: https://en.wikipedia.org/wiki/Paris \`\`\` - ` - - const fenced = extractFenced(source) - console.log(fenced) - assert.equal(fenced.length, 1) - assert.equal(fenced[0].args.schema, "CITY_SCHEMA") - assert.equal(fenced[0].language, "yaml") - }) -}) + `; + + const fenced = extractFenced(source); + console.log(fenced); + assert.equal(fenced.length, 1); + assert.equal(fenced[0].args.schema, "CITY_SCHEMA"); + assert.equal(fenced[0].language, "yaml"); + }); +}); diff --git a/packages/core/src/fence.ts b/packages/core/src/fence.ts index 35bb57d83a..39b898372d 100644 --- a/packages/core/src/fence.ts +++ b/packages/core/src/fence.ts @@ -1,14 +1,13 @@ // Import necessary constants and functions from other modules -import { EMOJI_FAIL, EMOJI_SUCCESS, EMOJI_UNDEFINED } from "./constants" -import { JSON5TryParse } from "./json5" -import { removeLineNumbers } from "./liner" -import { unquote } from "./unwrappers" -import { arrayify } from "./util" -import { YAMLTryParse } from "./yaml" +import { EMOJI_FAIL, EMOJI_SUCCESS, EMOJI_UNDEFINED } from "./constants"; +import { JSON5TryParse } from "./json5"; +import { removeLineNumbers } from "./liner"; +import { unquote } from "./unwrappers"; +import { arrayify } from "./util"; +import { YAMLTryParse } from "./yaml"; // Regular expression for detecting the start of a code fence -const promptFenceStartRx = - /^(?`{3,})(?[^=:]+)?(\s+(?.*))?$/m +const promptFenceStartRx = /^(?`{3,})(?[^=:]+)?(\s+(?.*))?$/m; /** * Start parsing a fence from a given text line. @@ -16,13 +15,13 @@ const promptFenceStartRx = * @returns An object containing the fence, language, and arguments. */ function startFence(text: string) { - const m = promptFenceStartRx.exec(text) - const groups: Record = m?.groups || {} - return { - fence: groups.fence, - language: unquote(groups.language), - args: parseKeyValuePairs(groups.args), - } + const m = promptFenceStartRx.exec(text); + const groups: Record = m?.groups || {}; + return { + fence: groups.fence, + language: unquote(groups.language), + args: parseKeyValuePairs(groups.args), + }; } /** @@ -33,10 +32,8 @@ function startFence(text: string) { * @param text - The input string containing a key-value pair. */ export function parseKeyValuePair(text: string): Record { - const m = /[=:]/.exec(text) - return m - ? { [text.slice(0, m.index)]: unquote(text.slice(m.index + 1)) } - : {} + const m = /[=:]/.exec(text); + return m ? { [text.slice(0, m.index)]: unquote(text.slice(m.index + 1)) } : {}; } /** @@ -46,16 +43,16 @@ export function parseKeyValuePair(text: string): Record { * @returns An object with parsed key-value pairs as immutable data. */ export function parseKeyValuePairs(text: string | string[]) { - const res: Record = {} - const chunks = arrayify(text) - chunks.forEach((chunk) => - chunk - ?.split(/\s+/g) - .map((kv) => kv.split(/[=:]/)) - .filter((m) => m.length == 2) - .forEach((m) => (res[m[0]] = unquote(m[1]))) - ) - return Object.freeze(res) + const res: Record = {}; + const chunks = arrayify(text); + chunks.forEach((chunk) => + chunk + ?.split(/\s+/g) + .map((kv) => kv.split(/[=:]/)) + .filter((m) => m.length == 2) + .forEach((m) => (res[m[0]] = unquote(m[1]))), + ); + return Object.freeze(res); } /** @@ -70,111 +67,104 @@ export function parseKeyValuePairs(text: string | string[]) { * - args: Parsed key-value arguments from the fence. */ export function extractFenced(text: string): Fenced[] { - if (!text) return [] - - let currLbl = "" // Current label for the fenced block - let currText = "" // Content of the current fenced block - let currLanguage = "" // Programming language of the fenced block - let currArgs: Record = {} // Arguments parsed from the fence - let currFence = "" // Current fence delimiter - const vars: Fenced[] = [] // Array to store the fenced blocks - const lines = text.split(/\r?\n/) // Split text into lines - - for (let i = 0; i < lines.length; ++i) { - const line = lines[i] - - if (currFence) { - // Handling the end of a fenced block - if (line.trimEnd() === currFence) { - currFence = "" - vars.push({ - label: currLbl, - content: normalize(currLbl, currText), - language: currLanguage, - args: currArgs, - }) - currText = "" - } else { - currText += line + "\n" - } - } else { - const fence = startFence(line) - if (fence.fence && fence.args["file"]) { - // Labeled fence with file - currLbl = "FILE " + fence.args["file"] - currFence = fence.fence - currLanguage = fence.language || "" - currArgs = fence.args - } else if (fence.fence) { - // Unlabeled fence - currLbl = "" - currFence = fence.fence - currLanguage = fence.language || "" - currArgs = fence.args - } else { - // Handling special case for labeled fences - const start = startFence(lines[i + 1]) - const m = /(\w+):\s+([^\s]+)/.exec(line) - if (start.fence && line.endsWith(":")) { - currLbl = ( - unquote(line.slice(0, -1)) + - " " + - (start.args["file"] || "") - ).trim() - currFence = start.fence - currLanguage = start.language || "" - currArgs = start.args - i++ - } else if (start.fence && m) { - currLbl = - unquote(m[1]) + - " " + - (start.args["file"] || unquote(m[2])) - currFence = start.fence - currLanguage = start.language || "" - currArgs = start.args - i++ - } - } - } - } - - // Push the last collected text block if any - if (currText != "") { + if (!text) return []; + + let currLbl = ""; // Current label for the fenced block + let currText = ""; // Content of the current fenced block + let currLanguage = ""; // Programming language of the fenced block + let currArgs: Record = {}; // Arguments parsed from the fence + let currFence = ""; // Current fence delimiter + const vars: Fenced[] = []; // Array to store the fenced blocks + const lines = text.split(/\r?\n/); // Split text into lines + + for (let i = 0; i < lines.length; ++i) { + const line = lines[i]; + + if (currFence) { + // Handling the end of a fenced block + if (line.trimEnd() === currFence) { + currFence = ""; vars.push({ - label: currLbl, - language: currLanguage, - content: normalize(currLbl, currText), - args: currArgs, - }) + label: currLbl, + content: normalize(currLbl, currText), + language: currLanguage, + args: currArgs, + }); + currText = ""; + } else { + currText += line + "\n"; + } + } else { + const fence = startFence(line); + if (fence.fence && fence.args["file"]) { + // Labeled fence with file + currLbl = "FILE " + fence.args["file"]; + currFence = fence.fence; + currLanguage = fence.language || ""; + currArgs = fence.args; + } else if (fence.fence) { + // Unlabeled fence + currLbl = ""; + currFence = fence.fence; + currLanguage = fence.language || ""; + currArgs = fence.args; + } else { + // Handling special case for labeled fences + const start = startFence(lines[i + 1]); + const m = /(\w+):\s+([^\s]+)/.exec(line); + if (start.fence && line.endsWith(":")) { + currLbl = (unquote(line.slice(0, -1)) + " " + (start.args["file"] || "")).trim(); + currFence = start.fence; + currLanguage = start.language || ""; + currArgs = start.args; + i++; + } else if (start.fence && m) { + currLbl = unquote(m[1]) + " " + (start.args["file"] || unquote(m[2])); + currFence = start.fence; + currLanguage = start.language || ""; + currArgs = start.args; + i++; + } + } } - - return vars - - /** - * Normalize content by removing unnecessary code fences. - * @param label - The label of the content. - * @param text - The content text. - * @returns The normalized text. - */ - function normalize(label: string, text: string) { - // remove extra line numbers - text = removeLineNumbers(text) - - /** handles situations like this: + } + + // Push the last collected text block if any + if (currText != "") { + vars.push({ + label: currLbl, + language: currLanguage, + content: normalize(currLbl, currText), + args: currArgs, + }); + } + + return vars; + + /** + * Normalize content by removing unnecessary code fences. + * @param label - The label of the content. + * @param text - The content text. + * @returns The normalized text. + */ + function normalize(label: string, text: string) { + // remove extra line numbers + text = removeLineNumbers(text); + + /** handles situations like this: ````` file=problem1.py ```python import re ... */ - if (/file=\w+\.\w+/.test(label)) { - const m = /^\s*\`{3,}\w*\r?\n((.|\s)*)\r?\n\`{3,}\s*$/.exec(text) - if (m) return m[1] - } - - return text + if (/file=\w+\.\w+/.test(label)) { + const m = /^\s*\`{3,}\w*\r?\n((.|\s)*)\r?\n\`{3,}\s*$/.exec(text); + if (m) return m[1]; } + + return text; + } } /** @@ -183,16 +173,13 @@ export function extractFenced(text: string): Fenced[] { * @returns Parsed content if a valid YAML or JSON block is found, otherwise undefined. */ export function findFirstDataFence(fences: Fenced[]): any { - const { content, language } = - fences?.find( - (f) => - f.content && - !f.label && - (f.language === "yaml" || f.language === "json") - ) || {} - if (language === "yaml" || language === "yml") return YAMLTryParse(content) - else if (language === "json") return JSON5TryParse(content) - return undefined + const { content, language } = + fences?.find( + (f) => f.content && !f.label && (f.language === "yaml" || f.language === "json"), + ) || {}; + if (language === "yaml" || language === "yml") return YAMLTryParse(content); + else if (language === "json") return JSON5TryParse(content); + return undefined; } /** @@ -201,10 +188,10 @@ export function findFirstDataFence(fences: Fenced[]): any { * @returns An object with parsed key-value pairs, or undefined if the input array is empty or null. */ export function parseVars(vars: string[]) { - if (!vars?.length) return undefined - const res: Record = {} - if (vars) for (const v of vars) Object.assign(res, parseKeyValuePairs(v)) - return Object.freeze(res) + if (!vars?.length) return undefined; + const res: Record = {}; + if (vars) for (const v of vars) Object.assign(res, parseKeyValuePairs(v)); + return Object.freeze(res); } /** @@ -219,35 +206,26 @@ export function parseVars(vars: string[]) { * @returns A formatted string representation of the fenced blocks. */ export function renderFencedVariables(vars: Fenced[]) { - return vars - .map( - ({ - label: k, - content: v, - validation, - args, - language, - }) => `- ${k ? `\`${k}\`` : ""} ${ - validation !== undefined - ? `${validation.schemaError ? EMOJI_UNDEFINED : validation.pathValid === false ? EMOJI_FAIL : EMOJI_SUCCESS}` - : "no label" - }\n + return vars + .map( + ({ label: k, content: v, validation, args, language }) => `- ${k ? `\`${k}\`` : ""} ${ + validation !== undefined + ? `${validation.schemaError ? EMOJI_UNDEFINED : validation.pathValid === false ? EMOJI_FAIL : EMOJI_SUCCESS}` + : "no label" + }\n \`\`\`\`\`${ - language ?? - (/^Note/.test(k) - ? "markdown" - : /^File [^\n]+.\.(\w+)$/m.exec(k)?.[1] || "") - } + language ?? (/^Note/.test(k) ? "markdown" : /^File [^\n]+.\.(\w+)$/m.exec(k)?.[1] || "") + } ${v} \`\`\`\`\` ${ - validation?.schemaError - ? `> [!CAUTION] + validation?.schemaError + ? `> [!CAUTION] > Schema ${args.schema} validation errors ${validation.schemaError.split("\n").join("\n> ")}` - : "" + : "" } -` - ) - .join("\n") +`, + ) + .join("\n"); } diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts index 4a2b653314..2b2c9b435f 100644 --- a/packages/core/src/fetch.ts +++ b/packages/core/src/fetch.ts @@ -1,27 +1,27 @@ -import wrapFetch from "fetch-retry" -import { TraceOptions } from "./trace" +import wrapFetch from "fetch-retry"; +import { TraceOptions } from "./trace"; import { - FETCH_RETRY_DEFAULT, - FETCH_RETRY_DEFAULT_DEFAULT, - FETCH_RETRY_GROWTH_FACTOR, - FETCH_RETRY_MAX_DELAY_DEFAULT, - FETCH_RETRY_ON_DEFAULT, -} from "./constants" -import { errorMessage } from "./error" -import { logVerbose } from "./util" -import { CancellationOptions, CancellationToken } from "./cancellation" -import { resolveHttpProxyAgent } from "./proxy" -import { host } from "./host" -import { renderWithPrecision } from "./precision" -import crossFetch from "cross-fetch" -import debug from "debug" -import { prettyStrings } from "./pretty" -const dbg = debug("genaiscript:fetch") + FETCH_RETRY_DEFAULT, + FETCH_RETRY_DEFAULT_DEFAULT, + FETCH_RETRY_GROWTH_FACTOR, + FETCH_RETRY_MAX_DELAY_DEFAULT, + FETCH_RETRY_ON_DEFAULT, +} from "./constants"; +import { errorMessage } from "./error"; +import { logVerbose } from "./util"; +import { CancellationOptions, CancellationToken } from "./cancellation"; +import { resolveHttpProxyAgent } from "./proxy"; +import { host } from "./host"; +import { renderWithPrecision } from "./precision"; +import crossFetch from "cross-fetch"; +import debug from "debug"; +import { prettyStrings } from "./pretty"; +const dbg = debug("genaiscript:fetch"); export type FetchType = ( - input: string | URL | globalThis.Request, - options?: FetchOptions & TraceOptions -) => Promise + input: string | URL | globalThis.Request, + options?: FetchOptions & TraceOptions, +) => Promise; /** * Creates a fetch function with retry logic. @@ -40,68 +40,64 @@ export type FetchType = ( * @returns A fetch function with retry and cancellation support. */ export async function createFetch( - options?: TraceOptions & CancellationOptions & RetryOptions + options?: TraceOptions & CancellationOptions & RetryOptions, ): Promise { - const { - retries = FETCH_RETRY_DEFAULT, - retryOn = FETCH_RETRY_ON_DEFAULT, - trace, - retryDelay = FETCH_RETRY_DEFAULT_DEFAULT, - maxDelay = FETCH_RETRY_MAX_DELAY_DEFAULT, - cancellationToken, - } = options || {} + const { + retries = FETCH_RETRY_DEFAULT, + retryOn = FETCH_RETRY_ON_DEFAULT, + trace, + retryDelay = FETCH_RETRY_DEFAULT_DEFAULT, + maxDelay = FETCH_RETRY_MAX_DELAY_DEFAULT, + cancellationToken, + } = options || {}; - // We create a proxy based on Node.js environment variables. - const agent = resolveHttpProxyAgent() + // We create a proxy based on Node.js environment variables. + const agent = resolveHttpProxyAgent(); - // We enrich crossFetch with the proxy. - const crossFetchWithProxy: typeof fetch = agent - ? (url, options) => - crossFetch(url, { ...(options || {}), dispatcher: agent } as any) - : crossFetch + // We enrich crossFetch with the proxy. + const crossFetchWithProxy: typeof fetch = agent + ? (url, options) => crossFetch(url, { ...(options || {}), dispatcher: agent } as any) + : crossFetch; - // Return the default fetch if no retry status codes are specified - if (!retryOn?.length) { - dbg("no retry logic applied, using crossFetchWithProxy directly") - return crossFetchWithProxy - } + // Return the default fetch if no retry status codes are specified + if (!retryOn?.length) { + dbg("no retry logic applied, using crossFetchWithProxy directly"); + return crossFetchWithProxy; + } - // Create a fetch function with retry logic - const fetchRetry = wrapFetch(crossFetchWithProxy, { - retryOn, - retries, - retryDelay: (attempt, error, response) => { - const code: string = (error as any)?.code as string - dbg(`retry attempt: %d, error code: %s`, attempt, code) - if ( - code === "ECONNRESET" || - code === "ENOTFOUND" || - cancellationToken?.isCancellationRequested - ) { - dbg("fatal error or cancellation") - // Return undefined for fatal errors or cancellations to stop retries - return undefined - } + // Create a fetch function with retry logic + const fetchRetry = wrapFetch(crossFetchWithProxy, { + retryOn, + retries, + retryDelay: (attempt, error, response) => { + const code: string = (error as any)?.code as string; + dbg(`retry attempt: %d, error code: %s`, attempt, code); + if ( + code === "ECONNRESET" || + code === "ENOTFOUND" || + cancellationToken?.isCancellationRequested + ) { + dbg("fatal error or cancellation"); + // Return undefined for fatal errors or cancellations to stop retries + return undefined; + } - const message = errorMessage(error) - const status = statusToMessage(response) - const delay = - Math.min( - maxDelay, - Math.pow(FETCH_RETRY_GROWTH_FACTOR, attempt) * retryDelay - ) * - (1 + Math.random() / 20) // 5% jitter for delay randomization - const msg = prettyStrings( - `retry #${attempt + 1} in ${renderWithPrecision(Math.floor(delay) / 1000, 1)}s`, - message, - status - ) - logVerbose(msg) - trace?.resultItem(false, msg) - return delay - }, - }) - return fetchRetry + const message = errorMessage(error); + const status = statusToMessage(response); + const delay = + Math.min(maxDelay, Math.pow(FETCH_RETRY_GROWTH_FACTOR, attempt) * retryDelay) * + (1 + Math.random() / 20); // 5% jitter for delay randomization + const msg = prettyStrings( + `retry #${attempt + 1} in ${renderWithPrecision(Math.floor(delay) / 1000, 1)}s`, + message, + status, + ); + logVerbose(msg); + trace?.resultItem(false, msg); + return delay; + }, + }); + return fetchRetry; } /** @@ -121,19 +117,18 @@ export async function createFetch( * @returns A Promise resolving with the HTTP Response. */ export async function fetch( - input: string | URL | globalThis.Request, - options?: FetchOptions & TraceOptions + input: string | URL | globalThis.Request, + options?: FetchOptions & TraceOptions, ): Promise { - const { retryOn, retries, retryDelay, maxDelay, trace, ...rest } = - options || {} - const f = await createFetch({ - retryOn, - retries, - retryDelay, - maxDelay, - trace, - }) - return f(input, rest) + const { retryOn, retries, retryDelay, maxDelay, trace, ...rest } = options || {}; + const f = await createFetch({ + retryOn, + retries, + retryDelay, + maxDelay, + trace, + }); + return f(input, rest); } /** @@ -144,50 +139,44 @@ export async function fetch( * @param res - The HTTP response object. Includes optional status and statusText fields. * @returns A list of strings containing the status and status text if provided. */ -export function statusToMessage(res?: { - status?: number - statusText?: string -}) { - const { status, statusText } = res || {} - return prettyStrings( - typeof status === "number" ? status + "" : undefined, - statusText - ) +export function statusToMessage(res?: { status?: number; statusText?: string }) { + const { status, statusText } = res || {}; + return prettyStrings(typeof status === "number" ? status + "" : undefined, statusText); } export async function tryReadText(res: Response, defaultValue?: string) { - try { - const text = await res.text() - return text - } catch (e) { - dbg(e) - return defaultValue - } + try { + const text = await res.text(); + return text; + } catch (e) { + dbg(e); + return defaultValue; + } } export async function* iterateBody( - r: Response, - options?: CancellationOptions + r: Response, + options?: CancellationOptions, ): AsyncGenerator { - const { cancellationToken } = options || {} - const decoder = host.createUTF8Decoder() // UTF-8 decoder for processing data - if (r.body.getReader) { - const reader = r.body.getReader() - while (!cancellationToken?.isCancellationRequested) { - const { done, value } = await reader.read() - if (done) { - break - } - const text = decoder.decode(value, { stream: true }) - yield text - } - } else { - for await (const value of r.body as any) { - if (cancellationToken?.isCancellationRequested) { - break - } - const text = decoder.decode(value, { stream: true }) - yield text - } + const { cancellationToken } = options || {}; + const decoder = host.createUTF8Decoder(); // UTF-8 decoder for processing data + if (r.body.getReader) { + const reader = r.body.getReader(); + while (!cancellationToken?.isCancellationRequested) { + const { done, value } = await reader.read(); + if (done) { + break; + } + const text = decoder.decode(value, { stream: true }); + yield text; + } + } else { + for await (const value of r.body as any) { + if (cancellationToken?.isCancellationRequested) { + break; + } + const text = decoder.decode(value, { stream: true }); + yield text; } + } } diff --git a/packages/core/src/fetchtext.test.ts b/packages/core/src/fetchtext.test.ts index d6d3a2eca1..8e063687e8 100644 --- a/packages/core/src/fetchtext.test.ts +++ b/packages/core/src/fetchtext.test.ts @@ -1,18 +1,16 @@ -import assert from "node:assert/strict" -import test, { beforeEach, describe } from "node:test" -import { TestHost } from "./testhost" -import { fetchText } from "./fetchtext" +import assert from "node:assert/strict"; +import test, { beforeEach, describe } from "node:test"; +import { TestHost } from "./testhost"; +import { fetchText } from "./fetchtext"; describe("fetch", () => { - beforeEach(async () => { - TestHost.install() - }) + beforeEach(async () => { + TestHost.install(); + }); - test("fetchText llms.txt", async () => { - const res = await fetchText( - "https://microsoft.github.io/genaiscript/llms.txt" - ) - assert(res.ok) - assert(res.text.includes("GenAIScript")) - }) -}) + test("fetchText llms.txt", async () => { + const res = await fetchText("https://microsoft.github.io/genaiscript/llms.txt"); + assert(res.ok); + assert(res.text.includes("GenAIScript")); + }); +}); diff --git a/packages/core/src/fetchtext.ts b/packages/core/src/fetchtext.ts index 1944a955bd..8fee69f321 100644 --- a/packages/core/src/fetchtext.ts +++ b/packages/core/src/fetchtext.ts @@ -1,17 +1,17 @@ -import { MarkdownTrace, TraceOptions } from "./trace" -import { logVerbose } from "./util" -import { CancellationOptions } from "./cancellation" -import { host } from "./host" -import { fileTypeFromBuffer } from "./filetype" -import { isBinaryMimeType } from "./binary" -import { toBase64 } from "./base64" -import { deleteUndefinedValues } from "./cleaners" -import { prettyBytes } from "./pretty" -import { uriRedact } from "./url" -import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html" -import { createFetch } from "./fetch" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("fetch:text") +import { MarkdownTrace, TraceOptions } from "./trace"; +import { logVerbose } from "./util"; +import { CancellationOptions } from "./cancellation"; +import { host } from "./host"; +import { fileTypeFromBuffer } from "./filetype"; +import { isBinaryMimeType } from "./binary"; +import { toBase64 } from "./base64"; +import { deleteUndefinedValues } from "./cleaners"; +import { prettyBytes } from "./pretty"; +import { uriRedact } from "./url"; +import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html"; +import { createFetch } from "./fetch"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("fetch:text"); /** * Fetches text content from a URL or file. @@ -31,105 +31,89 @@ const dbg = genaiscriptDebug("fetch:text") * @returns An object containing fetch status, content, metadata, and file details. */ export async function fetchText( - urlOrFile: string | WorkspaceFile, - fetchOptions?: FetchTextOptions & TraceOptions & CancellationOptions + urlOrFile: string | WorkspaceFile, + fetchOptions?: FetchTextOptions & TraceOptions & CancellationOptions, ) { - const { - retries, - retryDelay, - retryOn, - maxDelay, - trace, - convert, - cancellationToken, - ...rest - } = fetchOptions || {} - if (typeof urlOrFile === "string") { - urlOrFile = { - filename: urlOrFile, - content: "", - } + const { retries, retryDelay, retryOn, maxDelay, trace, convert, cancellationToken, ...rest } = + fetchOptions || {}; + if (typeof urlOrFile === "string") { + urlOrFile = { + filename: urlOrFile, + content: "", + }; + } + const url = urlOrFile.filename; + let ok = false; + let status = 404; + let statusText: string; + let bytes: Uint8Array; + if (/^https?:\/\//i.test(url)) { + dbg("requesting external URL: %s", uriRedact(url)); + const f = await createFetch({ + retries, + retryDelay, + retryOn, + maxDelay, + trace, + cancellationToken, + }); + const resp = await f(url, rest); + ok = resp.ok; + status = resp.status; + statusText = resp.statusText; + if (ok) { + dbg("status %d, %s", status, statusText); + const buf = await resp.arrayBuffer(); + bytes = new Uint8Array(buf); } - const url = urlOrFile.filename - let ok = false - let status = 404 - let statusText: string - let bytes: Uint8Array - if (/^https?:\/\//i.test(url)) { - dbg("requesting external URL: %s", uriRedact(url)) - const f = await createFetch({ - retries, - retryDelay, - retryOn, - maxDelay, - trace, - cancellationToken, - }) - const resp = await f(url, rest) - ok = resp.ok - status = resp.status - statusText = resp.statusText - if (ok) { - dbg("status %d, %s", status, statusText) - const buf = await resp.arrayBuffer() - bytes = new Uint8Array(buf) - } - } else { - dbg("reading file from local path: %s", url) - try { - bytes = await host.readFile(url) - } catch (e) { - logVerbose(e) - ok = false - status = 404 - } + } else { + dbg("reading file from local path: %s", url); + try { + bytes = await host.readFile(url); + } catch (e) { + logVerbose(e); + ok = false; + status = 404; } + } - let content: string - let encoding: "base64" - let type: string - const size = bytes?.length - const mime = await fileTypeFromBuffer(bytes) - if (isBinaryMimeType(mime?.mime)) { - dbg( - "binary mime type detected, content will be base64 encoded, mime: %o", - mime - ) - encoding = "base64" - content = toBase64(bytes) - } else { - dbg( - "text mime type detected, decoding content as UTF-8, mime: %o", - mime - ) - content = host.createUTF8Decoder().decode(bytes) - if (convert === "markdown") - content = await HTMLToMarkdown(content, { - trace, - cancellationToken, - }) - else if (convert === "text") - content = await HTMLToText(content, { trace, cancellationToken }) - else if (convert === "tables") - content = JSON.stringify(await HTMLTablesToJSON(content)) - } - ok = true - const file: WorkspaceFile = deleteUndefinedValues({ - filename: urlOrFile.filename, - encoding, - type, - content, - size, - }) + let content: string; + let encoding: "base64"; + let type: string; + const size = bytes?.length; + const mime = await fileTypeFromBuffer(bytes); + if (isBinaryMimeType(mime?.mime)) { + dbg("binary mime type detected, content will be base64 encoded, mime: %o", mime); + encoding = "base64"; + content = toBase64(bytes); + } else { + dbg("text mime type detected, decoding content as UTF-8, mime: %o", mime); + content = host.createUTF8Decoder().decode(bytes); + if (convert === "markdown") + content = await HTMLToMarkdown(content, { + trace, + cancellationToken, + }); + else if (convert === "text") content = await HTMLToText(content, { trace, cancellationToken }); + else if (convert === "tables") content = JSON.stringify(await HTMLTablesToJSON(content)); + } + ok = true; + const file: WorkspaceFile = deleteUndefinedValues({ + filename: urlOrFile.filename, + encoding, + type, + content, + size, + }); - return { - ok, - status, - statusText, - text: content, - bytes, - file, - } + return { + ok, + status, + statusText, + text: content, + bytes, + file, + }; } /** @@ -145,59 +129,57 @@ export async function fetchText( * @param options - Configuration for masking authorization headers. */ export function traceFetchPost( - trace: MarkdownTrace, - url: string, - headers: Record, - body: FormData | any, - options?: { showAuthorization?: boolean } + trace: MarkdownTrace, + url: string, + headers: Record, + body: FormData | any, + options?: { showAuthorization?: boolean }, ) { - if (!trace) { - return - } - const { showAuthorization } = options || {} - headers = { ...(headers || {}) } - if (!showAuthorization) { - Object.entries(headers) - .filter(([k]) => - /^(authorization|api-key|ocp-apim-subscription-key)$/i.test(k) - ) - .forEach( - ([k]) => - (headers[k] = /Bearer /i.test(headers[k]) - ? "Bearer ***" // Mask Bearer tokens - : "***") // Mask other authorization headers - ) - } + if (!trace) { + return; + } + const { showAuthorization } = options || {}; + headers = { ...(headers || {}) }; + if (!showAuthorization) { + Object.entries(headers) + .filter(([k]) => /^(authorization|api-key|ocp-apim-subscription-key)$/i.test(k)) + .forEach( + ([k]) => + (headers[k] = /Bearer /i.test(headers[k]) + ? "Bearer ***" // Mask Bearer tokens + : "***"), // Mask other authorization headers + ); + } - // Start building the HTTP request - let httpRequest = `POST ${url} HTTP/1.1\n` + // Start building the HTTP request + let httpRequest = `POST ${url} HTTP/1.1\n`; - // Add headers - Object.entries(headers).forEach(([key, value]) => { - httpRequest += `${key}: ${value}\n` - }) + // Add headers + Object.entries(headers).forEach(([key, value]) => { + httpRequest += `${key}: ${value}\n`; + }); - // Add body - if (body instanceof FormData) { - const boundary = "------------------------" + Date.now().toString(16) - httpRequest += `Content-Type: multipart/form-data; boundary=${boundary}\n\n` + // Add body + if (body instanceof FormData) { + const boundary = "------------------------" + Date.now().toString(16); + httpRequest += `Content-Type: multipart/form-data; boundary=${boundary}\n\n`; - body.forEach((value, key) => { - httpRequest += `--${boundary}\n` - httpRequest += `Content-Disposition: form-data; name="${key}"` - if (value instanceof File) { - httpRequest += `; filename="${value.name}"\n` - httpRequest += `Content-Type: ${value.type || "application/octet-stream"}\n\n` - httpRequest += `... (${prettyBytes(value.size)})\n` - } else { - httpRequest += "\n\n" + value + "\n" - } - }) - httpRequest += `--${boundary}--\n` - } else { - httpRequest += "\n" + JSON.stringify(body, null, 2) - } + body.forEach((value, key) => { + httpRequest += `--${boundary}\n`; + httpRequest += `Content-Disposition: form-data; name="${key}"`; + if (value instanceof File) { + httpRequest += `; filename="${value.name}"\n`; + httpRequest += `Content-Type: ${value.type || "application/octet-stream"}\n\n`; + httpRequest += `... (${prettyBytes(value.size)})\n`; + } else { + httpRequest += "\n\n" + value + "\n"; + } + }); + httpRequest += `--${boundary}--\n`; + } else { + httpRequest += "\n" + JSON.stringify(body, null, 2); + } - dbg(httpRequest) - if (trace) trace.detailsFenced(`🌐 fetch`, httpRequest, "http") + dbg(httpRequest); + if (trace) trace.detailsFenced(`🌐 fetch`, httpRequest, "http"); } diff --git a/packages/core/src/ffmpeg.ts b/packages/core/src/ffmpeg.ts index 728d700b4c..809579612d 100644 --- a/packages/core/src/ffmpeg.ts +++ b/packages/core/src/ffmpeg.ts @@ -1,520 +1,483 @@ -import debug from "debug" -const dbg = debug("genaiscript:ffmpeg") +import debug from "debug"; +const dbg = debug("genaiscript:ffmpeg"); -import { logVerbose } from "./util" -import { TraceOptions } from "./trace" -import { lookupMime } from "./mime" -import pLimit from "p-limit" -import { join, basename } from "node:path" -import { ensureDir } from "fs-extra" -import type { FfmpegCommand } from "fluent-ffmpeg" -import { hash } from "./crypto" -import { VIDEO_HASH_LENGTH } from "./constants" -import { writeFile, readFile } from "fs/promises" -import { errorMessage, serializeError } from "./error" -import { fromBase64 } from "./base64" -import { fileTypeFromBuffer } from "./filetype" -import { appendFile, readdir, stat } from "node:fs/promises" -import prettyBytes from "pretty-bytes" -import { filenameOrFileToFilename } from "./unwrappers" -import { Stats } from "node:fs" -import { roundWithPrecision } from "./precision" -import { parseTimestamps } from "./transcription" -import { mark } from "./performance" -import { dotGenaiscriptPath } from "./workdir" -import { arrayify } from "./cleaners" -import { tryStat } from "./fs" +import { logVerbose } from "./util"; +import { TraceOptions } from "./trace"; +import { lookupMime } from "./mime"; +import pLimit from "p-limit"; +import { join, basename } from "node:path"; +import { ensureDir } from "fs-extra"; +import type { FfmpegCommand } from "fluent-ffmpeg"; +import { hash } from "./crypto"; +import { VIDEO_HASH_LENGTH } from "./constants"; +import { writeFile, readFile } from "fs/promises"; +import { errorMessage, serializeError } from "./error"; +import { fromBase64 } from "./base64"; +import { fileTypeFromBuffer } from "./filetype"; +import { appendFile, readdir, stat } from "node:fs/promises"; +import prettyBytes from "pretty-bytes"; +import { filenameOrFileToFilename } from "./unwrappers"; +import { Stats } from "node:fs"; +import { roundWithPrecision } from "./precision"; +import { parseTimestamps } from "./transcription"; +import { mark } from "./performance"; +import { dotGenaiscriptPath } from "./workdir"; +import { arrayify } from "./cleaners"; +import { tryStat } from "./fs"; -const ffmpegLimit = pLimit(1) -const WILD_CARD = "%06d" +const ffmpegLimit = pLimit(1); +const WILD_CARD = "%06d"; type FFmpegCommandRenderer = ( - cmd: FfmpegCommand, - options: { input: string; dir: string } -) => Awaitable + cmd: FfmpegCommand, + options: { input: string; dir: string }, +) => Awaitable; interface FFmpegCommandResult { - filenames: string[] - data: any[] + filenames: string[]; + data: any[]; } async function ffmpegCommand(options?: { timeout?: number }) { - const m = await import("fluent-ffmpeg") - const cmd = m.default - return cmd(options) + const m = await import("fluent-ffmpeg"); + const cmd = m.default; + return cmd(options); } async function computeHashFolder( - filename: string | WorkspaceFile, - options: TraceOptions & FFmpegCommandOptions & { salt?: any } + filename: string | WorkspaceFile, + options: TraceOptions & FFmpegCommandOptions & { salt?: any }, ) { - const { trace, salt, ...rest } = options - const h = await hash( - [typeof filename === "string" ? { filename } : filename, rest], - { - readWorkspaceFiles: true, - version: true, - length: VIDEO_HASH_LENGTH, - salt, - } - ) - return dotGenaiscriptPath("cache", "ffmpeg", h) + const { trace, salt, ...rest } = options; + const h = await hash([typeof filename === "string" ? { filename } : filename, rest], { + readWorkspaceFiles: true, + version: true, + length: VIDEO_HASH_LENGTH, + salt, + }); + return dotGenaiscriptPath("cache", "ffmpeg", h); } -async function resolveInput( - filename: string | WorkspaceFile, - folder: string -): Promise { - if (typeof filename === "object") { - if (filename.content && filename.encoding === "base64") { - const bytes = fromBase64(filename.content) - const mime = await fileTypeFromBuffer(bytes) - filename = join(folder, "input." + mime.ext) - await writeFile(filename, bytes) - } else { - filename = filename.filename - } +async function resolveInput(filename: string | WorkspaceFile, folder: string): Promise { + if (typeof filename === "object") { + if (filename.content && filename.encoding === "base64") { + const bytes = fromBase64(filename.content); + const mime = await fileTypeFromBuffer(bytes); + filename = join(folder, "input." + mime.ext); + await writeFile(filename, bytes); + } else { + filename = filename.filename; } - return filename + } + return filename; } async function logFile(filename: string | WorkspaceFile, action: string) { - filename = filenameOrFileToFilename(filename) - const stats = await tryStat(filename) - logVerbose( - `ffmpeg: ${action} ${filename} (${stats ? prettyBytes(stats.size) : "0"})` - ) + filename = filenameOrFileToFilename(filename); + const stats = await tryStat(filename); + logVerbose(`ffmpeg: ${action} ${filename} (${stats ? prettyBytes(stats.size) : "0"})`); } export class FFmepgClient implements Ffmpeg { - constructor() {} + constructor() {} - async run( - input: string | WorkspaceFile, - builder: ( - cmd: FfmpegCommandBuilder, - options?: { input: string; dir: string } - ) => Awaitable, - options?: FFmpegCommandOptions & { salt?: any } - ): Promise { - await logFile(input, "input") - const { filenames } = await runFfmpeg(input, builder, options || {}) - for (const filename of filenames) { - await logFile(filename, "output") - } - return filenames + async run( + input: string | WorkspaceFile, + builder: ( + cmd: FfmpegCommandBuilder, + options?: { input: string; dir: string }, + ) => Awaitable, + options?: FFmpegCommandOptions & { salt?: any }, + ): Promise { + await logFile(input, "input"); + const { filenames } = await runFfmpeg(input, builder, options || {}); + for (const filename of filenames) { + await logFile(filename, "output"); } + return filenames; + } - async extractFrames( - filename: string | WorkspaceFile, - options?: VideoExtractFramesOptions - ): Promise { - if (!filename) { - throw new Error("filename is required") - } - mark("ffmpeg.extractFrames") - const { - transcript, - count, - cache = "frames", - ...soptions - } = options || {} - const format = options?.format || "jpg" - const size = options?.size + async extractFrames( + filename: string | WorkspaceFile, + options?: VideoExtractFramesOptions, + ): Promise { + if (!filename) { + throw new Error("filename is required"); + } + mark("ffmpeg.extractFrames"); + const { transcript, count, cache = "frames", ...soptions } = options || {}; + const format = options?.format || "jpg"; + const size = options?.size; - const applyOptions = (cmd: FfmpegCommand) => { - if (size) { - cmd.size(size) - cmd.autopad() - } - } + const applyOptions = (cmd: FfmpegCommand) => { + if (size) { + cmd.size(size); + cmd.autopad(); + } + }; - const renderers: FFmpegCommandRenderer[] = [] - if ( - soptions.keyframes || - (!count && - !soptions.timestamps?.length && - !(soptions.sceneThreshold > 0)) - ) { - renderers.push((cmd) => { - cmd.videoFilter("select='eq(pict_type,I)'") - cmd.outputOptions("-fps_mode vfr") - cmd.outputOptions("-frame_pts 1") - applyOptions(cmd) - return `keyframe_*.${format}` - }) - } else if (soptions.sceneThreshold > 0) { - renderers.push( - ((cmd) => { - cmd.frames(1) - applyOptions(cmd) - return `scenes_000000.${format}` - }) satisfies FFmpegCommandRenderer, - ((cmd) => { - cmd.videoFilter( - `select='gt(scene,${soptions.sceneThreshold})',showinfo` - ) - cmd.outputOptions("-fps_mode passthrough") - cmd.outputOptions("-frame_pts 1") - applyOptions(cmd) - return `scenes_*.${format}` - }) satisfies FFmpegCommandRenderer - ) + const renderers: FFmpegCommandRenderer[] = []; + if ( + soptions.keyframes || + (!count && !soptions.timestamps?.length && !(soptions.sceneThreshold > 0)) + ) { + renderers.push((cmd) => { + cmd.videoFilter("select='eq(pict_type,I)'"); + cmd.outputOptions("-fps_mode vfr"); + cmd.outputOptions("-frame_pts 1"); + applyOptions(cmd); + return `keyframe_*.${format}`; + }); + } else if (soptions.sceneThreshold > 0) { + renderers.push( + ((cmd) => { + cmd.frames(1); + applyOptions(cmd); + return `scenes_000000.${format}`; + }) satisfies FFmpegCommandRenderer, + ((cmd) => { + cmd.videoFilter(`select='gt(scene,${soptions.sceneThreshold})',showinfo`); + cmd.outputOptions("-fps_mode passthrough"); + cmd.outputOptions("-frame_pts 1"); + applyOptions(cmd); + return `scenes_*.${format}`; + }) satisfies FFmpegCommandRenderer, + ); + } else { + if (typeof transcript === "string") { + soptions.timestamps = parseTimestamps(transcript); + } else if ( + typeof transcript === "object" && + transcript?.segments?.length && + !soptions.timestamps?.length + ) { + soptions.timestamps = transcript.segments.map((s) => s.start); + } + if (count && !soptions.timestamps?.length) { + dbg(`calculating timestamps for count: ${count}`); + const info = await this.probeVideo(filename); + const duration = Number(info.duration); + if (count === 1) { + soptions.timestamps = [0]; } else { - if (typeof transcript === "string") { - soptions.timestamps = parseTimestamps(transcript) - } else if ( - typeof transcript === "object" && - transcript?.segments?.length && - !soptions.timestamps?.length - ) { - soptions.timestamps = transcript.segments.map((s) => s.start) - } - if (count && !soptions.timestamps?.length) { - dbg(`calculating timestamps for count: ${count}`) - const info = await this.probeVideo(filename) - const duration = Number(info.duration) - if (count === 1) { - soptions.timestamps = [0] - } else { - soptions.timestamps = Array(count) - .fill(0) - .map((_, i) => - roundWithPrecision( - Math.min( - (i * duration) / (count - 1), - duration - 0.1 - ), - 3 - ) - ) - } - } - if (!soptions.timestamps?.length) { - dbg(`timestamps not provided, defaulting to [0]`) - soptions.timestamps = [0] - } - renderers.push( - ...soptions.timestamps.map( - (ts) => - ((cmd) => { - cmd.seekInput(ts) - cmd.frames(1) - applyOptions(cmd) - return `frame-${String(ts).replace(":", "-").replace(".", "_")}.${format}` - }) satisfies FFmpegCommandRenderer - ) - ) + soptions.timestamps = Array(count) + .fill(0) + .map((_, i) => + roundWithPrecision(Math.min((i * duration) / (count - 1), duration - 0.1), 3), + ); } - - await logFile(filename, "input") - const { filenames } = await runFfmpeg(filename, renderers, { - ...soptions, - cache, - salt: { - transcript, - count, - format, - size, - }, - }) - logVerbose(`ffmpeg: extracted ${filenames.length} frames`) - for (const filename of filenames) { - await logFile(filename, "output") - } - return filenames + } + if (!soptions.timestamps?.length) { + dbg(`timestamps not provided, defaulting to [0]`); + soptions.timestamps = [0]; + } + renderers.push( + ...soptions.timestamps.map( + (ts) => + ((cmd) => { + cmd.seekInput(ts); + cmd.frames(1); + applyOptions(cmd); + return `frame-${String(ts).replace(":", "-").replace(".", "_")}.${format}`; + }) satisfies FFmpegCommandRenderer, + ), + ); } - async extractAudio( - filename: string | WorkspaceFile, - options?: VideoExtractAudioOptions - ): Promise { - if (!filename) { - throw new Error("filename is required") - } + await logFile(filename, "input"); + const { filenames } = await runFfmpeg(filename, renderers, { + ...soptions, + cache, + salt: { + transcript, + count, + format, + size, + }, + }); + logVerbose(`ffmpeg: extracted ${filenames.length} frames`); + for (const filename of filenames) { + await logFile(filename, "output"); + } + return filenames; + } - const { forceConversion, ...foptions } = options || {} - const { transcription = true } = foptions - if ( - !forceConversion && - !transcription && - typeof filename === "string" - ) { - const mime = lookupMime(filename) - if (/^audio/.test(mime)) { - dbg(`filename is already an audio file: ${filename}`) - return filename - } - } - const res = await this.run( - filename, - async (cmd, fopts) => { - cmd.noVideo() - if (transcription) { - // https://community.openai.com/t/whisper-api-increase-file-limit-25-mb/566754 - cmd.audioCodec("libopus") - cmd.audioChannels(1) - cmd.audioBitrate("12k") - cmd.outputOptions("-map_metadata -1") - cmd.outputOptions("-application voip") - cmd.toFormat("ogg") - return "audio.ogg" - } else { - cmd.toFormat("mp3") - return "audio.mp3" - } - }, - { - ...foptions, - cache: foptions.cache || "audio-voip", - salt: { - transcription, - }, - } - ) - return res[0] + async extractAudio( + filename: string | WorkspaceFile, + options?: VideoExtractAudioOptions, + ): Promise { + if (!filename) { + throw new Error("filename is required"); } - async extractClip( - filename: string | WorkspaceFile, - options: VideoExtractClipOptions - ): Promise { - if (!filename) { - throw new Error("filename is required") + const { forceConversion, ...foptions } = options || {}; + const { transcription = true } = foptions; + if (!forceConversion && !transcription && typeof filename === "string") { + const mime = lookupMime(filename); + if (/^audio/.test(mime)) { + dbg(`filename is already an audio file: ${filename}`); + return filename; + } + } + const res = await this.run( + filename, + async (cmd, fopts) => { + cmd.noVideo(); + if (transcription) { + // https://community.openai.com/t/whisper-api-increase-file-limit-25-mb/566754 + cmd.audioCodec("libopus"); + cmd.audioChannels(1); + cmd.audioBitrate("12k"); + cmd.outputOptions("-map_metadata -1"); + cmd.outputOptions("-application voip"); + cmd.toFormat("ogg"); + return "audio.ogg"; + } else { + cmd.toFormat("mp3"); + return "audio.mp3"; } + }, + { + ...foptions, + cache: foptions.cache || "audio-voip", + salt: { + transcription, + }, + }, + ); + return res[0]; + } - const { start, duration, end, ...rest } = options || {} - const res = await this.run( - filename, - async (cmd) => { - cmd.seekInput(start) - if (duration !== undefined) { - cmd.duration(duration) - } - if (end !== undefined) { - cmd.inputOptions(`-to ${end}`) - } - if (!options?.size) { - cmd.outputOptions("-c copy") - } - return `clip-${start}-${duration || end}.mp4` - }, - { - ...rest, - salt: { - start, - duration, - end, - }, - } - ) - return res[0] + async extractClip( + filename: string | WorkspaceFile, + options: VideoExtractClipOptions, + ): Promise { + if (!filename) { + throw new Error("filename is required"); } - async probe(filename: string | WorkspaceFile): Promise { - if (!filename) { - throw new Error("filename is required") + const { start, duration, end, ...rest } = options || {}; + const res = await this.run( + filename, + async (cmd) => { + cmd.seekInput(start); + if (duration !== undefined) { + cmd.duration(duration); } - const res = await runFfmpeg( - filename, - async (cmd) => { - const res = new Promise((resolve, reject) => { - cmd.ffprobe((err, data) => { - if (err) { - reject(err) - } else { - resolve(data as any as VideoProbeResult) - } - }) - }) - const meta = await res - return meta - }, - { cache: "probe" } - ) - return res.data[0] as VideoProbeResult - } + if (end !== undefined) { + cmd.inputOptions(`-to ${end}`); + } + if (!options?.size) { + cmd.outputOptions("-c copy"); + } + return `clip-${start}-${duration || end}.mp4`; + }, + { + ...rest, + salt: { + start, + duration, + end, + }, + }, + ); + return res[0]; + } - async probeVideo(filename: string | WorkspaceFile) { - const meta = await this.probe(filename) - const vstream = meta.streams.reduce((biggest, stream) => { - if ( - stream.codec_type === "video" && - stream.width && - stream.height && - (!biggest || - stream.width * stream.height > - biggest.width * biggest.height) - ) { - return stream + async probe(filename: string | WorkspaceFile): Promise { + if (!filename) { + throw new Error("filename is required"); + } + const res = await runFfmpeg( + filename, + async (cmd) => { + const res = new Promise((resolve, reject) => { + cmd.ffprobe((err, data) => { + if (err) { + reject(err); } else { - return biggest + resolve(data as any as VideoProbeResult); } - }) - return vstream - } + }); + }); + const meta = await res; + return meta; + }, + { cache: "probe" }, + ); + return res.data[0] as VideoProbeResult; + } + + async probeVideo(filename: string | WorkspaceFile) { + const meta = await this.probe(filename); + const vstream = meta.streams.reduce((biggest, stream) => { + if ( + stream.codec_type === "video" && + stream.width && + stream.height && + (!biggest || stream.width * stream.height > biggest.width * biggest.height) + ) { + return stream; + } else { + return biggest; + } + }); + return vstream; + } } async function runFfmpeg( - filename: string | WorkspaceFile, - renderer: FFmpegCommandRenderer | FFmpegCommandRenderer[], - options?: FFmpegCommandOptions & { salt?: any } + filename: string | WorkspaceFile, + renderer: FFmpegCommandRenderer | FFmpegCommandRenderer[], + options?: FFmpegCommandOptions & { salt?: any }, ): Promise { - if (!filename) { - throw new Error("filename is required") + if (!filename) { + throw new Error("filename is required"); + } + const { cache } = options || {}; + const folder = await computeHashFolder(filename, options); + const resFilename = join(folder, "res.json"); + const readCache = async () => { + if (cache === false) { + return undefined; } - const { cache } = options || {} - const folder = await computeHashFolder(filename, options) - const resFilename = join(folder, "res.json") - const readCache = async () => { - if (cache === false) { - return undefined - } - try { - dbg(`reading cache from: ${resFilename}`) - const res = JSON.parse( - await readFile(resFilename, { - encoding: "utf-8", - }) - ) - logVerbose(`ffmpeg: cache hit at ${folder}`) - return res - } catch { - return undefined - } + try { + dbg(`reading cache from: ${resFilename}`); + const res = JSON.parse( + await readFile(resFilename, { + encoding: "utf-8", + }), + ); + logVerbose(`ffmpeg: cache hit at ${folder}`); + return res; + } catch { + return undefined; } + }; - // try to hit cache before limit on ffmpeg - { - const cached = await readCache() - if (cached) { - return cached - } + // try to hit cache before limit on ffmpeg + { + const cached = await readCache(); + if (cached) { + return cached; } + } - return ffmpegLimit(async () => { - // try cache hit again - { - const cached = await readCache() - if (cached) { - return cached - } - } + return ffmpegLimit(async () => { + // try cache hit again + { + const cached = await readCache(); + if (cached) { + return cached; + } + } - await ensureDir(folder) - const input = await resolveInput(filename, folder) + await ensureDir(folder); + const input = await resolveInput(filename, folder); - const res: FFmpegCommandResult = { filenames: [], data: [] } - const renderers = arrayify(renderer) - for (const renderer of renderers) { - const cmd = await ffmpegCommand({}) - logCommand(folder, cmd) - const rres = await runFfmpegCommandUncached( - cmd, - input, - options, - folder, - renderer - ) - if (rres.filenames?.length) { - res.filenames.push(...rres.filenames) - } - if (rres.data?.length) { - res.data.push(...rres.data) - } - } - dbg(`writing ffmpeg result to cache: ${resFilename}`) - await writeFile(resFilename, JSON.stringify(res, null, 2)) - return res - }) + const res: FFmpegCommandResult = { filenames: [], data: [] }; + const renderers = arrayify(renderer); + for (const renderer of renderers) { + const cmd = await ffmpegCommand({}); + logCommand(folder, cmd); + const rres = await runFfmpegCommandUncached(cmd, input, options, folder, renderer); + if (rres.filenames?.length) { + res.filenames.push(...rres.filenames); + } + if (rres.data?.length) { + res.data.push(...rres.data); + } + } + dbg(`writing ffmpeg result to cache: ${resFilename}`); + await writeFile(resFilename, JSON.stringify(res, null, 2)); + return res; + }); } async function runFfmpegCommandUncached( - cmd: FfmpegCommand, - input: string, - options: FFmpegCommandOptions, - folder: string, - renderer: FFmpegCommandRenderer + cmd: FfmpegCommand, + input: string, + options: FFmpegCommandOptions, + folder: string, + renderer: FFmpegCommandRenderer, ): Promise { - return await new Promise(async (resolve, reject) => { - const r: FFmpegCommandResult = { filenames: [], data: [] } - const end = () => resolve(r) + return await new Promise(async (resolve, reject) => { + const r: FFmpegCommandResult = { filenames: [], data: [] }; + const end = () => resolve(r); - let output: string - cmd.input(input) - if (options.size) { - cmd.size(options.size) - } - if (options.inputOptions) { - cmd.inputOptions(...arrayify(options.inputOptions)) - } - if (options.outputOptions) { - cmd.outputOption(...arrayify(options.outputOptions)) - } - dbg(`adding filenames listener`) - cmd.addListener("filenames", (fns: string[]) => { - r.filenames.push(...fns.map((f) => join(folder, f))) - }) - cmd.addListener("codeData", (data) => { - logVerbose(`ffmpeg: input audio ${data.audio}, video ${data.video}`) - }) - cmd.addListener("end", async () => { - dbg(`processing wildcard output: ${output}`) - if (output?.includes(WILD_CARD)) { - const [prefix, suffix] = output.split(WILD_CARD, 2) - const files = await readdir(folder) - const gen = files.filter( - (f) => f.startsWith(prefix) && f.endsWith(suffix) - ) - r.filenames.push(...gen.map((f) => join(folder, f))) - } - end() - }) - cmd.addListener("error", (err) => { - dbg(`ffmpeg command encountered an error`) - reject(err) - }) - try { - const rendering = await renderer(cmd, { - input, - dir: folder, - }) - if (typeof rendering === "string") { - output = rendering.replace(/\*/g, WILD_CARD) - const fo = join(folder, basename(output)) - cmd.output(fo) - cmd.run() - if (!output.includes(WILD_CARD)) { - r.filenames.push(fo) - } - } else if (typeof rendering === "object") { - r.data.push(rendering) - cmd.removeListener("end", end) - resolve(r) - } - } catch (err) { - reject(err) + let output: string; + cmd.input(input); + if (options.size) { + cmd.size(options.size); + } + if (options.inputOptions) { + cmd.inputOptions(...arrayify(options.inputOptions)); + } + if (options.outputOptions) { + cmd.outputOption(...arrayify(options.outputOptions)); + } + dbg(`adding filenames listener`); + cmd.addListener("filenames", (fns: string[]) => { + r.filenames.push(...fns.map((f) => join(folder, f))); + }); + cmd.addListener("codeData", (data) => { + logVerbose(`ffmpeg: input audio ${data.audio}, video ${data.video}`); + }); + cmd.addListener("end", async () => { + dbg(`processing wildcard output: ${output}`); + if (output?.includes(WILD_CARD)) { + const [prefix, suffix] = output.split(WILD_CARD, 2); + const files = await readdir(folder); + const gen = files.filter((f) => f.startsWith(prefix) && f.endsWith(suffix)); + r.filenames.push(...gen.map((f) => join(folder, f))); + } + end(); + }); + cmd.addListener("error", (err) => { + dbg(`ffmpeg command encountered an error`); + reject(err); + }); + try { + const rendering = await renderer(cmd, { + input, + dir: folder, + }); + if (typeof rendering === "string") { + output = rendering.replace(/\*/g, WILD_CARD); + const fo = join(folder, basename(output)); + cmd.output(fo); + cmd.run(); + if (!output.includes(WILD_CARD)) { + r.filenames.push(fo); } - }) + } else if (typeof rendering === "object") { + r.data.push(rendering); + cmd.removeListener("end", end); + resolve(r); + } + } catch (err) { + reject(err); + } + }); } function logCommand(folder: string, cmd: FfmpegCommand) { - // console logging - cmd.on("start", (commandLine) => logVerbose(commandLine)) - cmd.on("stderr", (s) => dbg(s)) + // console logging + cmd.on("start", (commandLine) => logVerbose(commandLine)); + cmd.on("stderr", (s) => dbg(s)); - // log to file - const log: string[] = [] - const writeLog = async () => { - const logFilename = join(folder, "log.txt") - logVerbose(`ffmpeg log: ${logFilename}`) - await appendFile(logFilename, log.join("\n"), { - encoding: "utf-8", - }) - } - cmd.on("stderr", (s) => log.push(s)) - cmd.on("end", writeLog) - cmd.on("error", async (err) => { - log.push(`error: ${errorMessage(err)}\n${serializeError(err)}`) - await writeLog() - }) + // log to file + const log: string[] = []; + const writeLog = async () => { + const logFilename = join(folder, "log.txt"); + logVerbose(`ffmpeg log: ${logFilename}`); + await appendFile(logFilename, log.join("\n"), { + encoding: "utf-8", + }); + }; + cmd.on("stderr", (s) => log.push(s)); + cmd.on("end", writeLog); + cmd.on("error", async (err) => { + log.push(`error: ${errorMessage(err)}\n${serializeError(err)}`); + await writeLog(); + }); } diff --git a/packages/core/src/file.ts b/packages/core/src/file.ts index 319048b575..0ffc137d1f 100644 --- a/packages/core/src/file.ts +++ b/packages/core/src/file.ts @@ -3,34 +3,34 @@ * and data URI conversion. It includes support for various file formats like * PDF, DOCX, XLSX, and CSV. */ -import { DOCXTryParse } from "./docx" -import { readText, tryStat } from "./fs" -import { lookupMime } from "./mime" -import { isBinaryMimeType } from "./binary" -import { createFetch } from "./fetch" -import { fileTypeFromBuffer } from "./filetype" -import { fromBase64, toBase64 } from "./base64" -import { host } from "./host" -import { TraceOptions } from "./trace" -import { parsePdf } from "./pdf" -import { XLSXParse } from "./xlsx" -import { dataToMarkdownTable, CSVTryParse } from "./csv" +import { DOCXTryParse } from "./docx"; +import { readText, tryStat } from "./fs"; +import { lookupMime } from "./mime"; +import { isBinaryMimeType } from "./binary"; +import { createFetch } from "./fetch"; +import { fileTypeFromBuffer } from "./filetype"; +import { fromBase64, toBase64 } from "./base64"; +import { host } from "./host"; +import { TraceOptions } from "./trace"; +import { parsePdf } from "./pdf"; +import { XLSXParse } from "./xlsx"; +import { dataToMarkdownTable, CSVTryParse } from "./csv"; import { - CSV_REGEX, - DOCX_MIME_TYPE, - DOCX_REGEX, - MAX_FILE_CONTENT_SIZE, - PDF_MIME_TYPE, - PDF_REGEX, - XLSX_MIME_TYPE, - XLSX_REGEX, -} from "./constants" -import { tidyData } from "./tidy" -import { CancellationOptions, checkCancelled } from "./cancellation" -import { prettyBytes } from "./pretty" -import { tryResolveResource } from "./resources" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("file") + CSV_REGEX, + DOCX_MIME_TYPE, + DOCX_REGEX, + MAX_FILE_CONTENT_SIZE, + PDF_MIME_TYPE, + PDF_REGEX, + XLSX_MIME_TYPE, + XLSX_REGEX, +} from "./constants"; +import { tidyData } from "./tidy"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { prettyBytes } from "./pretty"; +import { tryResolveResource } from "./resources"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("file"); /** * Resolves the content of a file by decoding, fetching, or parsing it based on its type or source. @@ -43,123 +43,119 @@ const dbg = genaiscriptDebug("file") * @returns The updated file object with resolved content or metadata. If the file cannot be resolved, it is returned as is. */ export async function resolveFileContent( - file: WorkspaceFile, - options?: TraceOptions & { maxFileSize?: number } & CancellationOptions + file: WorkspaceFile, + options?: TraceOptions & { maxFileSize?: number } & CancellationOptions, ): Promise { - const { - trace, - cancellationToken, - maxFileSize = MAX_FILE_CONTENT_SIZE, - } = options || {} - if (!file) return file + const { trace, cancellationToken, maxFileSize = MAX_FILE_CONTENT_SIZE } = options || {}; + if (!file) return file; - checkCancelled(cancellationToken) + checkCancelled(cancellationToken); - const stats = await tryStat(file.filename) - if (stats && !stats.isFile()) { - dbg(`skip, not a file`) - return file // ignore, this is a directory - } + const stats = await tryStat(file.filename); + if (stats && !stats.isFile()) { + dbg(`skip, not a file`); + return file; // ignore, this is a directory + } - // decode known files - if (file.encoding === "base64") { - dbg(`decode base64`) - const bytes = fromBase64(file.content) - file.size = bytes.length - if (file.type === PDF_MIME_TYPE) { - dbg(`file type is PDF`) - const { content } = await parsePdf(bytes, options) - delete file.encoding - file.content = content - } else if (file.type === XLSX_MIME_TYPE) { - dbg(`file type is XLSX`) - const sheets = await XLSXParse(bytes) - delete file.encoding - file.content = JSON.stringify(sheets, null, 2) - } - return file + // decode known files + if (file.encoding === "base64") { + dbg(`decode base64`); + const bytes = fromBase64(file.content); + file.size = bytes.length; + if (file.type === PDF_MIME_TYPE) { + dbg(`file type is PDF`); + const { content } = await parsePdf(bytes, options); + delete file.encoding; + file.content = content; + } else if (file.type === XLSX_MIME_TYPE) { + dbg(`file type is XLSX`); + const sheets = await XLSXParse(bytes); + delete file.encoding; + file.content = JSON.stringify(sheets, null, 2); } + return file; + } - const { filename } = file - // If file content is already available or filename is missing, return the file as is. - if (file.content) { - return file - } - if (!filename) { - dbg(`file has no content and no filename`) - return file - } + const { filename } = file; + // If file content is already available or filename is missing, return the file as is. + if (file.content) { + return file; + } + if (!filename) { + dbg(`file has no content and no filename`); + return file; + } - dbg(`resolving ${filename}`) - const res = await tryResolveResource(filename, { trace, cancellationToken }) - // Handle uris files - if (res) { - dbg(`resolved file uri`) - const resFile = res.files[0] - file.type = resFile.type - file.content = resFile.content - file.size = resFile.size - file.encoding = resFile.encoding - } - // Handle PDF files - else if (PDF_REGEX.test(filename)) { - dbg(`file is pdf`) - const stat = await tryStat(filename) - const { content } = await parsePdf(filename, options) - file.type = PDF_MIME_TYPE - file.content = content - file.size = stat?.size + dbg(`resolving ${filename}`); + const res = await tryResolveResource(filename, { trace, cancellationToken }); + // Handle uris files + if (res) { + dbg(`resolved file uri`); + const resFile = res.files[0]; + file.type = resFile.type; + file.content = resFile.content; + file.size = resFile.size; + file.encoding = resFile.encoding; + } + // Handle PDF files + else if (PDF_REGEX.test(filename)) { + dbg(`file is pdf`); + const stat = await tryStat(filename); + const { content } = await parsePdf(filename, options); + file.type = PDF_MIME_TYPE; + file.content = content; + file.size = stat?.size; + } + // Handle DOCX files + else if (DOCX_REGEX.test(filename)) { + dbg(`file is docx`); + const stat = await tryStat(filename); + const res = await DOCXTryParse(filename, options); + file.type = DOCX_MIME_TYPE; + file.content = res.file?.content; + file.size = res.file?.size || stat?.size; + } + // Handle XLSX files + else if (XLSX_REGEX.test(filename)) { + dbg(`file is xlsx`); + const stat = await tryStat(filename); + const bytes = await host.readFile(filename); + const sheets = await XLSXParse(bytes); + file.type = XLSX_MIME_TYPE; + file.content = JSON.stringify(sheets, null, 2); + file.size = stat?.size; + } + // Handle other file types + else { + const mime = file.type || lookupMime(filename); + const isBinary = isBinaryMimeType(mime); + dbg(`mime %s binary %s`, mime, isBinary); + file.type = mime; + const info = await tryStat(filename); + file.size = info?.size; + if (!info) { + dbg(`file not found: ${filename}`); + return file; } - // Handle DOCX files - else if (DOCX_REGEX.test(filename)) { - dbg(`file is docx`) - const stat = await tryStat(filename) - const res = await DOCXTryParse(filename, options) - file.type = DOCX_MIME_TYPE - file.content = res.file?.content - file.size = res.file?.size || stat?.size + if (!info.isFile()) { + dbg(`skip, not a file`); + return file; // ignore, this is a directory } - // Handle XLSX files - else if (XLSX_REGEX.test(filename)) { - dbg(`file is xlsx`) - const stat = await tryStat(filename) - const bytes = await host.readFile(filename) - const sheets = await XLSXParse(bytes) - file.type = XLSX_MIME_TYPE - file.content = JSON.stringify(sheets, null, 2) - file.size = stat?.size - } - // Handle other file types - else { - const mime = file.type || lookupMime(filename) - const isBinary = isBinaryMimeType(mime) - dbg(`mime %s binary %s`, mime, isBinary) - file.type = mime - const info = await tryStat(filename) - file.size = info?.size - if (!info) { - dbg(`file not found: ${filename}`) - return file - } - if (!info.isFile()) { - dbg(`skip, not a file`) - return file // ignore, this is a directory - } - if (!isBinary) { - dbg(`text ${prettyBytes(info.size)}`) - file.content = await readText(filename) - } else { - dbg(`binary ${prettyBytes(info?.size)}`) - if (!maxFileSize || info.size < maxFileSize) { - const bytes: Uint8Array = await host.readFile(filename) - file.encoding = "base64" - file.content = toBase64(bytes) - file.size = bytes.length - } - } + if (!isBinary) { + dbg(`text ${prettyBytes(info.size)}`); + file.content = await readText(filename); + } else { + dbg(`binary ${prettyBytes(info?.size)}`); + if (!maxFileSize || info.size < maxFileSize) { + const bytes: Uint8Array = await host.readFile(filename); + file.encoding = "base64"; + file.content = toBase64(bytes); + file.size = bytes.length; + } } + } - return file + return file; } /** @@ -168,9 +164,7 @@ export async function resolveFileContent( * @returns A WorkspaceFile object with the provided filename or the original WorkspaceFile object. */ export function toWorkspaceFile(fileOrFilename: string | WorkspaceFile) { - return typeof fileOrFilename === "string" - ? { filename: fileOrFilename } - : fileOrFilename + return typeof fileOrFilename === "string" ? { filename: fileOrFilename } : fileOrFilename; } /** @@ -182,14 +176,14 @@ export function toWorkspaceFile(fileOrFilename: string | WorkspaceFile) { * - trace - Object for logging and tracing operations. */ export async function resolveFileContents( - files: WorkspaceFile[], - options?: CancellationOptions & TraceOptions + files: WorkspaceFile[], + options?: CancellationOptions & TraceOptions, ) { - const { cancellationToken } = options || {} - for (const file of files) { - await resolveFileContent(file, options) - checkCancelled(cancellationToken) - } + const { cancellationToken } = options || {}; + for (const file of files) { + await resolveFileContent(file, options); + checkCancelled(cancellationToken); + } } /** @@ -200,37 +194,34 @@ export async function resolveFileContents( * @param options - Options for tracing operations and filtering the file data during rendering. Includes data transformation, markdown table generation, and optional sheet trimming for XLSX files. * @returns An object containing the filename and rendered content, or the original file object if rendering is not applicable. */ -export async function renderFileContent( - file: WorkspaceFile, - options: TraceOptions & DataFilter -) { - const { filename, content } = file +export async function renderFileContent(file: WorkspaceFile, options: TraceOptions & DataFilter) { + const { filename, content } = file; - // Render CSV content - if (content && CSV_REGEX.test(filename)) { - dbg(`rendering CSV content`) - let csv = CSVTryParse(content, options) - if (csv) { - csv = tidyData(csv, options) - return { filename, content: dataToMarkdownTable(csv, options) } - } + // Render CSV content + if (content && CSV_REGEX.test(filename)) { + dbg(`rendering CSV content`); + let csv = CSVTryParse(content, options); + if (csv) { + csv = tidyData(csv, options); + return { filename, content: dataToMarkdownTable(csv, options) }; } - // Render XLSX content - else if (content && XLSX_REGEX.test(filename)) { - dbg(`rendering XLSX content`) - const sheets = JSON.parse(content) as WorkbookSheet[] - const trimmed = sheets.length - ? sheets - .map( - ({ name, rows }) => `## ${name} + } + // Render XLSX content + else if (content && XLSX_REGEX.test(filename)) { + dbg(`rendering XLSX content`); + const sheets = JSON.parse(content) as WorkbookSheet[]; + const trimmed = sheets.length + ? sheets + .map( + ({ name, rows }) => `## ${name} ${dataToMarkdownTable(tidyData(rows, options))} -` - ) - .join("\n") - : dataToMarkdownTable(tidyData(sheets[0].rows, options)) - return { filename, content: trimmed } - } - return { ...file } +`, + ) + .join("\n") + : dataToMarkdownTable(tidyData(sheets[0].rows, options)); + return { filename, content: trimmed }; + } + return { ...file }; } /** @@ -241,16 +232,16 @@ ${dataToMarkdownTable(tidyData(rows, options))} * @throws Will throw an error if the data URI format is invalid. */ export function dataUriToBuffer(filename: string) { - if (/^data:/i.test(filename)) { - dbg(`converting data URI to buffer`) - const matches = filename.match(/^data:[^;]+;base64,(.*)$/i) - if (!matches) { - dbg(`invalid data URI format`) - throw new Error("Invalid data URI format") - } - return fromBase64(matches[1]) + if (/^data:/i.test(filename)) { + dbg(`converting data URI to buffer`); + const matches = filename.match(/^data:[^;]+;base64,(.*)$/i); + if (!matches) { + dbg(`invalid data URI format`); + throw new Error("Invalid data URI format"); } - return undefined + return fromBase64(matches[1]); + } + return undefined; } /** @@ -260,40 +251,38 @@ export function dataUriToBuffer(filename: string) { * @returns A Uint8Array containing the file content as bytes. */ export async function resolveFileBytes( - filename: string | WorkspaceFile, - options?: TraceOptions & CancellationOptions + filename: string | WorkspaceFile, + options?: TraceOptions & CancellationOptions, ): Promise { - if (typeof filename === "object") { - if (filename.encoding && filename.content) { - dbg(`resolving file bytes`) - return new Uint8Array( - Buffer.from(filename.content, filename.encoding) - ) - } - filename = filename.filename + if (typeof filename === "object") { + if (filename.encoding && filename.content) { + dbg(`resolving file bytes`); + return new Uint8Array(Buffer.from(filename.content, filename.encoding)); } + filename = filename.filename; + } - const i = dataUriToBuffer(filename) - if (i) { - return i - } + const i = dataUriToBuffer(filename); + if (i) { + return i; + } - // Fetch file from URL or data-uri - if (/^https?:\/\//i.test(filename)) { - dbg(`fetching file from URL: ${filename}`) - const fetch = await createFetch(options) - const resp = await fetch(filename) - const buffer = await resp.arrayBuffer() - return new Uint8Array(buffer) - } - // Read file from local storage - else { - dbg(`reading file %s`, filename) - const stat = await host.statFile(filename) - if (stat?.type !== "file") return undefined - const buf = await host.readFile(filename) - return new Uint8Array(buf) - } + // Fetch file from URL or data-uri + if (/^https?:\/\//i.test(filename)) { + dbg(`fetching file from URL: ${filename}`); + const fetch = await createFetch(options); + const resp = await fetch(filename); + const buffer = await resp.arrayBuffer(); + return new Uint8Array(buffer); + } + // Read file from local storage + else { + dbg(`reading file %s`, filename); + const stat = await host.statFile(filename); + if (stat?.type !== "file") return undefined; + const buf = await host.readFile(filename); + return new Uint8Array(buf); + } } /** @@ -303,22 +292,21 @@ export async function resolveFileBytes( * @returns A Data URI string if the MIME type is determined, otherwise undefined. */ export async function resolveFileDataUri( - filename: string, - options?: TraceOptions & CancellationOptions & { mime?: string } + filename: string, + options?: TraceOptions & CancellationOptions & { mime?: string }, ) { - const { cancellationToken, mime } = options || {} - const bytes = await resolveFileBytes(filename, options) - checkCancelled(cancellationToken) - const uriMime = - mime || (await fileTypeFromBuffer(bytes))?.mime || lookupMime(filename) - if (!uriMime) { - dbg(`no mime type found for ${filename}`) - return undefined - } - const b64 = toBase64(bytes) - return { - uri: `data:${uriMime};base64,${b64}`, - mimeType: uriMime, - data: b64, - } + const { cancellationToken, mime } = options || {}; + const bytes = await resolveFileBytes(filename, options); + checkCancelled(cancellationToken); + const uriMime = mime || (await fileTypeFromBuffer(bytes))?.mime || lookupMime(filename); + if (!uriMime) { + dbg(`no mime type found for ${filename}`); + return undefined; + } + const b64 = toBase64(bytes); + return { + uri: `data:${uriMime};base64,${b64}`, + mimeType: uriMime, + data: b64, + }; } diff --git a/packages/core/src/filecache.test.ts b/packages/core/src/filecache.test.ts index 916659ff0e..7ea7af02cd 100644 --- a/packages/core/src/filecache.test.ts +++ b/packages/core/src/filecache.test.ts @@ -1,79 +1,79 @@ -import assert from "node:assert/strict" -import test, { beforeEach, describe } from "node:test" -import { dirname, join } from "node:path" -import { stat, readdir, rm } from "fs/promises" -import { existsSync } from "fs" +import assert from "node:assert/strict"; +import test, { beforeEach, describe } from "node:test"; +import { dirname, join } from "node:path"; +import { stat, readdir, rm } from "fs/promises"; +import { existsSync } from "fs"; import { - fileCacheImage, - fileWriteCached, - fileWriteCachedJSON, - patchCachedImages, -} from "./filecache" -import { TestHost } from "./testhost" -import { readFile } from "node:fs/promises" + fileCacheImage, + fileWriteCached, + fileWriteCachedJSON, + patchCachedImages, +} from "./filecache"; +import { TestHost } from "./testhost"; +import { readFile } from "node:fs/promises"; describe("fileWriteCached", () => { - const tempDir = join(dirname(__filename), "temp") + const tempDir = join(dirname(__filename), "temp"); - beforeEach(async () => { - TestHost.install() - if (existsSync(tempDir)) { - await rm(tempDir, { recursive: true, force: true }) - } - }) + beforeEach(async () => { + TestHost.install(); + if (existsSync(tempDir)) { + await rm(tempDir, { recursive: true, force: true }); + } + }); - test("should write buffer to cache and return correct filename", async () => { - const buffer: BufferLike = Buffer.from("test content") - const filePath = await fileWriteCached(tempDir, buffer) + test("should write buffer to cache and return correct filename", async () => { + const buffer: BufferLike = Buffer.from("test content"); + const filePath = await fileWriteCached(tempDir, buffer); - const files = await readdir(tempDir) - assert.equal(files.length, 1) - const writtenFile = join(tempDir, files[0]) + const files = await readdir(tempDir); + assert.equal(files.length, 1); + const writtenFile = join(tempDir, files[0]); - const stats = await stat(writtenFile) - assert(stats.isFile()) + const stats = await stat(writtenFile); + assert(stats.isFile()); - assert.equal(filePath, writtenFile) - }) - test("should write JSON to cache and return correct filename", async () => { - const testData = { test: "content" } - const filePath = await fileWriteCachedJSON(tempDir, testData) + assert.equal(filePath, writtenFile); + }); + test("should write JSON to cache and return correct filename", async () => { + const testData = { test: "content" }; + const filePath = await fileWriteCachedJSON(tempDir, testData); - const files = await readdir(tempDir) - assert.equal(files.length, 1) - const writtenFile = join(tempDir, files[0]) + const files = await readdir(tempDir); + assert.equal(files.length, 1); + const writtenFile = join(tempDir, files[0]); - const stats = await stat(writtenFile) - assert(stats.isFile()) - assert.equal(filePath, writtenFile) + const stats = await stat(writtenFile); + assert(stats.isFile()); + assert.equal(filePath, writtenFile); - const content = JSON.parse(await readFile(writtenFile, "utf-8")) - assert.deepEqual(content, testData) - }) + const content = JSON.parse(await readFile(writtenFile, "utf-8")); + assert.deepEqual(content, testData); + }); - test("fileCacheImage should return empty string for falsy input", async () => { - assert.equal(await fileCacheImage(""), "") - assert.equal(await fileCacheImage(null), "") - assert.equal(await fileCacheImage(undefined), "") - }) + test("fileCacheImage should return empty string for falsy input", async () => { + assert.equal(await fileCacheImage(""), ""); + assert.equal(await fileCacheImage(null), ""); + assert.equal(await fileCacheImage(undefined), ""); + }); - test("fileCacheImage should return URL unchanged when input is HTTPS URL", async () => { - const url = "https://example.com/image.jpg" - assert.equal(await fileCacheImage(url), url) - }) + test("fileCacheImage should return URL unchanged when input is HTTPS URL", async () => { + const url = "https://example.com/image.jpg"; + assert.equal(await fileCacheImage(url), url); + }); - test("fileCacheImage should cache local image and return relative path", async () => { - const imageBuffer = Buffer.from("fake image data") - const result = await fileCacheImage(imageBuffer, { dir: tempDir }) + test("fileCacheImage should cache local image and return relative path", async () => { + const imageBuffer = Buffer.from("fake image data"); + const result = await fileCacheImage(imageBuffer, { dir: tempDir }); - assert(result.startsWith("./")) - const files = await readdir(tempDir) - assert.equal(files.length, 1) - }) + assert(result.startsWith("./")); + const files = await readdir(tempDir); + assert.equal(files.length, 1); + }); - test("patchCachedImages should replace image paths", () => { - const input = "![alt](.genaiscript/images/test.jpg)" - const output = patchCachedImages(input, (url) => "newpath/" + url) - assert.equal(output, "![alt](newpath/.genaiscript/images/test.jpg)") - }) -}) + test("patchCachedImages should replace image paths", () => { + const input = "![alt](.genaiscript/images/test.jpg)"; + const output = patchCachedImages(input, (url) => "newpath/" + url); + assert.equal(output, "![alt](newpath/.genaiscript/images/test.jpg)"); + }); +}); diff --git a/packages/core/src/filecache.ts b/packages/core/src/filecache.ts index 49eddf3aad..f540f08fde 100644 --- a/packages/core/src/filecache.ts +++ b/packages/core/src/filecache.ts @@ -1,18 +1,18 @@ -import { resolveBufferLikeAndExt } from "./bufferlike" -import { hash } from "./crypto" -import { TraceOptions } from "./trace" -import { basename, dirname, join, relative } from "node:path" -import { stat, writeFile } from "fs/promises" -import { ensureDir } from "fs-extra" -import { CancellationOptions, checkCancelled } from "./cancellation" -import { dotGenaiscriptPath } from "./workdir" -import { prettyBytes } from "./pretty" -import debug from "debug" -import { FILE_HASH_LENGTH, HTTPS_REGEX } from "./constants" -import { tryStat } from "./fs" -import { filenameOrFileToFilename } from "./unwrappers" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("cache") +import { resolveBufferLikeAndExt } from "./bufferlike"; +import { hash } from "./crypto"; +import { TraceOptions } from "./trace"; +import { basename, dirname, join, relative } from "node:path"; +import { stat, writeFile } from "fs/promises"; +import { ensureDir } from "fs-extra"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { dotGenaiscriptPath } from "./workdir"; +import { prettyBytes } from "./pretty"; +import debug from "debug"; +import { FILE_HASH_LENGTH, HTTPS_REGEX } from "./constants"; +import { tryStat } from "./fs"; +import { filenameOrFileToFilename } from "./unwrappers"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("cache"); /** * Caches a file by writing it to a specified directory. If the file exists, it simply returns the path. @@ -25,56 +25,53 @@ const dbg = genaiscriptDebug("cache") * @returns The path to the cached file. */ export async function fileWriteCached( - dir: string, - bufferLike: BufferLike, - options?: TraceOptions & - CancellationOptions & { - /** - * Generate file name extension - */ - ext?: string - } + dir: string, + bufferLike: BufferLike, + options?: TraceOptions & + CancellationOptions & { + /** + * Generate file name extension + */ + ext?: string; + }, ): Promise { - const { bytes, ext: sourceExt } = await resolveBufferLikeAndExt( - bufferLike, - options - ) - if (!bytes) { - // file empty - return undefined - } - const { cancellationToken, ext = sourceExt } = options || {} - checkCancelled(cancellationToken) - const filename = await hash(bytes, { length: FILE_HASH_LENGTH }) - checkCancelled(cancellationToken) - const f = filename + "." + ext.replace(/^\./, "") - dbg(`cache: %s`, f) - const fn = join(dir, f) - const r = await tryStat(fn) - if (r?.isFile()) { - dbg(`hit %s`, fn) - return fn - } + const { bytes, ext: sourceExt } = await resolveBufferLikeAndExt(bufferLike, options); + if (!bytes) { + // file empty + return undefined; + } + const { cancellationToken, ext = sourceExt } = options || {}; + checkCancelled(cancellationToken); + const filename = await hash(bytes, { length: FILE_HASH_LENGTH }); + checkCancelled(cancellationToken); + const f = filename + "." + ext.replace(/^\./, ""); + dbg(`cache: %s`, f); + const fn = join(dir, f); + const r = await tryStat(fn); + if (r?.isFile()) { + dbg(`hit %s`, fn); + return fn; + } - dbg(`miss %s`, fn) - await ensureDir(dirname(fn)) - await writeFile(fn, bytes) + dbg(`miss %s`, fn); + await ensureDir(dirname(fn)); + await writeFile(fn, bytes); - return fn + return fn; } export async function fileWriteCachedJSON(dir: string, data: any) { - const bytes = Buffer.from(JSON.stringify(data, null, 2)) - const filename = await hash(bytes, { length: FILE_HASH_LENGTH }) - const fn = join(dir, filename + ".json") - const stat = await tryStat(fn) - if (stat && stat.isFile()) return fn + const bytes = Buffer.from(JSON.stringify(data, null, 2)); + const filename = await hash(bytes, { length: FILE_HASH_LENGTH }); + const fn = join(dir, filename + ".json"); + const stat = await tryStat(fn); + if (stat && stat.isFile()) return fn; - dbg(`json cache: ${fn} (${prettyBytes(bytes.length)})`) - await ensureDir(dirname(fn)) - await writeFile(fn, bytes) + dbg(`json cache: ${fn} (${prettyBytes(bytes.length)})`); + await ensureDir(dirname(fn)); + await writeFile(fn, bytes); - return fn + return fn; } /** @@ -89,39 +86,30 @@ export async function fileWriteCachedJSON(dir: string, data: any) { * @returns The relative path to the cached file or the original URL if it is a remote target. */ export async function fileCacheImage( - url: BufferLike, - options?: TraceOptions & CancellationOptions & { dir?: string } + url: BufferLike, + options?: TraceOptions & CancellationOptions & { dir?: string }, ): Promise { - if (!url) return "" + if (!url) return ""; - const filename = filenameOrFileToFilename(url as any) - if (typeof filename === "string" && HTTPS_REGEX.test(filename)) - return filename + const filename = filenameOrFileToFilename(url as any); + if (typeof filename === "string" && HTTPS_REGEX.test(filename)) return filename; - const { - dir = dotGenaiscriptPath("images"), - trace, - cancellationToken, - } = options || {} - const fn = await fileWriteCached( - dir, - url, - { trace, cancellationToken } // TODO: add trace - ) - if (!fn) { - dbg(`no file cached`) - return undefined - } - const res = options?.dir ? `./${basename(fn)}` : relative(process.cwd(), fn) - dbg(`image: ${res}`) - return res + const { dir = dotGenaiscriptPath("images"), trace, cancellationToken } = options || {}; + const fn = await fileWriteCached( + dir, + url, + { trace, cancellationToken }, // TODO: add trace + ); + if (!fn) { + dbg(`no file cached`); + return undefined; + } + const res = options?.dir ? `./${basename(fn)}` : relative(process.cwd(), fn); + dbg(`image: ${res}`); + return res; } -export function patchCachedImages( - text: string, - patcher: (url: string) => string -) { - const IMG_RX = - /\!\[(?[^\]]*)\]\((?\.genaiscript\/images\/[^)]+)\)/g - return text.replace(IMG_RX, (_, alt, url) => `![${alt}](${patcher(url)})`) +export function patchCachedImages(text: string, patcher: (url: string) => string) { + const IMG_RX = /\!\[(?[^\]]*)\]\((?\.genaiscript\/images\/[^)]+)\)/g; + return text.replace(IMG_RX, (_, alt, url) => `![${alt}](${patcher(url)})`); } diff --git a/packages/core/src/fileedits.ts b/packages/core/src/fileedits.ts index 39a5b34c4c..63f8eb17fc 100644 --- a/packages/core/src/fileedits.ts +++ b/packages/core/src/fileedits.ts @@ -1,19 +1,19 @@ -import { applyChangeLog, parseChangeLogs } from "./changelog" -import { dataToMarkdownTable } from "./csv" -import { applyLLMDiff, applyLLMPatch, parseLLMDiffs } from "./llmdiff" -import { errorMessage, isCancelError } from "./error" -import { unquote } from "./unwrappers" -import { fileExists, readText } from "./fs" -import { isGlobMatch } from "./glob" -import { runtimeHost } from "./host" -import { JSON5parse } from "./json5" -import { stringToPos } from "./parser" -import { validateJSONWithSchema } from "./schema" -import { MarkdownTrace, TraceOptions } from "./trace" -import { logError, logVerbose, relativePath } from "./util" -import { YAMLParse } from "./yaml" -import { writeText } from "./fs" -import { diffCreatePatch } from "./diff" +import { applyChangeLog, parseChangeLogs } from "./changelog"; +import { dataToMarkdownTable } from "./csv"; +import { applyLLMDiff, applyLLMPatch, parseLLMDiffs } from "./llmdiff"; +import { errorMessage, isCancelError } from "./error"; +import { unquote } from "./unwrappers"; +import { fileExists, readText } from "./fs"; +import { isGlobMatch } from "./glob"; +import { runtimeHost } from "./host"; +import { JSON5parse } from "./json5"; +import { stringToPos } from "./parser"; +import { validateJSONWithSchema } from "./schema"; +import { MarkdownTrace, TraceOptions } from "./trace"; +import { logError, logVerbose, relativePath } from "./util"; +import { YAMLParse } from "./yaml"; +import { writeText } from "./fs"; +import { diffCreatePatch } from "./diff"; /** * Computes file edits based on the specified runtime prompt result and processing options. @@ -39,199 +39,181 @@ import { diffCreatePatch } from "./diff" * - Logs details of the computation process, including errors and skipped files. */ export async function computeFileEdits( - res: RunPromptResult, - options: TraceOptions & { - fileOutputs: FileOutput[] - schemas?: Record - fileMerges?: FileMergeHandler[] - outputProcessors?: PromptOutputProcessorHandler[] - } + res: RunPromptResult, + options: TraceOptions & { + fileOutputs: FileOutput[]; + schemas?: Record; + fileMerges?: FileMergeHandler[]; + outputProcessors?: PromptOutputProcessorHandler[]; + }, ): Promise { - const { trace, fileOutputs, fileMerges, outputProcessors, schemas } = - options || {} - const { fences, frames, messages, usage } = res - let text = res.text - let annotations = res.annotations?.slice(0) - const fileEdits: Record = {} - const changelogs: string[] = [] - const edits: Edits[] = [] - const projFolder = runtimeHost.projectFolder() + const { trace, fileOutputs, fileMerges, outputProcessors, schemas } = options || {}; + const { fences, frames, messages, usage } = res; + let text = res.text; + let annotations = res.annotations?.slice(0); + const fileEdits: Record = {}; + const changelogs: string[] = []; + const edits: Edits[] = []; + const projFolder = runtimeHost.projectFolder(); - // Helper function to get or create file edit object - const getFileEdit = async (fn: string) => { - fn = relativePath(projFolder, fn) - let fileEdit: FileUpdate = fileEdits[fn] - if (!fileEdit) { - let before: string = null - let after: string = undefined - if (await fileExists(fn)) before = await readText(fn) - else if (await fileExists(fn)) after = await readText(fn) - fileEdit = fileEdits[fn] = { before, after } - } - return fileEdit + // Helper function to get or create file edit object + const getFileEdit = async (fn: string) => { + fn = relativePath(projFolder, fn); + let fileEdit: FileUpdate = fileEdits[fn]; + if (!fileEdit) { + let before: string = null; + let after: string = undefined; + if (await fileExists(fn)) before = await readText(fn); + else if (await fileExists(fn)) after = await readText(fn); + fileEdit = fileEdits[fn] = { before, after }; } + return fileEdit; + }; - for (const fence of fences.filter( - ({ validation }) => !validation?.schemaError - )) { - const { label: name, content: val, language } = fence - const pm = /^((file|diff):?)\s+/i.exec(name) - if (pm) { - const kw = pm[1].toLowerCase() - const n = unquote(name.slice(pm[0].length).trim()) - const fn = /^[^\/]/.test(n) - ? runtimeHost.resolvePath(projFolder, n) - : n - const fileEdit = await getFileEdit(fn) - if (kw === "file") { - if (fileMerges.length) { - try { - for (const fileMerge of fileMerges) - fileEdit.after = - (await fileMerge( - fn, - "", // todo - fileEdit.after ?? fileEdit.before, - val - )) ?? val - } catch (e) { - logVerbose(e) - trace.error(`error custom merging diff in ${fn}`, e) - } - } else fileEdit.after = val - } else if (kw === "diff") { - const chunks = parseLLMDiffs(val) - try { - fileEdit.after = applyLLMPatch( - fileEdit.after || fileEdit.before, - chunks - ) - } catch (e) { - logVerbose(e) - trace.error(`error applying patch to ${fn}`, e) - try { - fileEdit.after = applyLLMDiff( - fileEdit.after || fileEdit.before, - chunks - ) - } catch (e) { - logVerbose(e) - trace.error(`error merging diff in ${fn}`, e) - } - } - } - } else if (/^changelog$/i.test(name) || /^changelog/i.test(language)) { - changelogs.push(val) - try { - const cls = parseChangeLogs(val) - for (const changelog of cls) { - const { filename } = changelog - const fn = /^[^\/]/.test(filename) // TODO - ? runtimeHost.resolvePath(projFolder, filename) - : filename - const fileEdit = await getFileEdit(fn) - fileEdit.after = applyChangeLog( - fileEdit.after || fileEdit.before || "", - changelog - ) - } - } catch (e) { - logError(e) - trace.error(`error parsing changelog`, e) - trace.detailsFenced(`changelog`, val, "text") - } + for (const fence of fences.filter(({ validation }) => !validation?.schemaError)) { + const { label: name, content: val, language } = fence; + const pm = /^((file|diff):?)\s+/i.exec(name); + if (pm) { + const kw = pm[1].toLowerCase(); + const n = unquote(name.slice(pm[0].length).trim()); + const fn = /^[^\/]/.test(n) ? runtimeHost.resolvePath(projFolder, n) : n; + const fileEdit = await getFileEdit(fn); + if (kw === "file") { + if (fileMerges.length) { + try { + for (const fileMerge of fileMerges) + fileEdit.after = + (await fileMerge( + fn, + "", // todo + fileEdit.after ?? fileEdit.before, + val, + )) ?? val; + } catch (e) { + logVerbose(e); + trace.error(`error custom merging diff in ${fn}`, e); + } + } else fileEdit.after = val; + } else if (kw === "diff") { + const chunks = parseLLMDiffs(val); + try { + fileEdit.after = applyLLMPatch(fileEdit.after || fileEdit.before, chunks); + } catch (e) { + logVerbose(e); + trace.error(`error applying patch to ${fn}`, e); + try { + fileEdit.after = applyLLMDiff(fileEdit.after || fileEdit.before, chunks); + } catch (e) { + logVerbose(e); + trace.error(`error merging diff in ${fn}`, e); + } } + } + } else if (/^changelog$/i.test(name) || /^changelog/i.test(language)) { + changelogs.push(val); + try { + const cls = parseChangeLogs(val); + for (const changelog of cls) { + const { filename } = changelog; + const fn = /^[^\/]/.test(filename) // TODO + ? runtimeHost.resolvePath(projFolder, filename) + : filename; + const fileEdit = await getFileEdit(fn); + fileEdit.after = applyChangeLog(fileEdit.after || fileEdit.before || "", changelog); + } + } catch (e) { + logError(e); + trace.error(`error parsing changelog`, e); + trace.detailsFenced(`changelog`, val, "text"); + } } + } - // Apply user-defined output processors - if (outputProcessors?.length) { - const opTrace = trace.startTraceDetails("🖨️ output processors") - try { - for (const outputProcessor of outputProcessors) { - const { - text: newText, - files, - annotations: oannotations, - } = (await outputProcessor({ - text, - fileEdits, - fences, - frames, - annotations, - schemas, - messages, - usage, - })) || {} - - if (newText !== undefined) { - text = newText - opTrace.detailsFenced(`📝 text`, text) - } + // Apply user-defined output processors + if (outputProcessors?.length) { + const opTrace = trace.startTraceDetails("🖨️ output processors"); + try { + for (const outputProcessor of outputProcessors) { + const { + text: newText, + files, + annotations: oannotations, + } = (await outputProcessor({ + text, + fileEdits, + fences, + frames, + annotations, + schemas, + messages, + usage, + })) || {}; - if (files) - for (const [n, content] of Object.entries(files)) { - const fn = runtimeHost.path.isAbsolute(n) - ? n - : runtimeHost.resolvePath(projFolder, n) - opTrace.detailsFenced(`📁 file ${fn}`, content) - const fileEdit = await getFileEdit(fn) - fileEdit.after = content - fileEdit.validation = { pathValid: true } - } - if (oannotations) annotations = oannotations.slice(0) - } - } catch (e) { - if (isCancelError(e)) throw e - logError(e) - opTrace.error(`output processor failed`, e) - } finally { - opTrace.endDetails() + if (newText !== undefined) { + text = newText; + opTrace.detailsFenced(`📝 text`, text); } + + if (files) + for (const [n, content] of Object.entries(files)) { + const fn = runtimeHost.path.isAbsolute(n) ? n : runtimeHost.resolvePath(projFolder, n); + opTrace.detailsFenced(`📁 file ${fn}`, content); + const fileEdit = await getFileEdit(fn); + fileEdit.after = content; + fileEdit.validation = { pathValid: true }; + } + if (oannotations) annotations = oannotations.slice(0); + } + } catch (e) { + if (isCancelError(e)) throw e; + logError(e); + opTrace.error(`output processor failed`, e); + } finally { + opTrace.endDetails(); } + } - // Validate and apply file outputs - validateFileOutputs(fileOutputs, trace, fileEdits, schemas) + // Validate and apply file outputs + validateFileOutputs(fileOutputs, trace, fileEdits, schemas); - // Convert file edits into structured edits - Object.entries(fileEdits) - .filter(([, { before, after }]) => before !== after) // ignore unchanged files - .forEach(([fn, { before, after, validation }]) => { - if (before) { - edits.push({ - label: `Update ${fn}`, - filename: fn, - type: "replace", - range: [[0, 0], stringToPos(after)], - text: after, - validated: - !validation?.schemaError && validation?.pathValid, - }) - } else { - edits.push({ - label: `Create ${fn}`, - filename: fn, - type: "createfile", - text: after, - overwrite: true, - validated: - !validation?.schemaError && validation?.pathValid, - }) - } - }) + // Convert file edits into structured edits + Object.entries(fileEdits) + .filter(([, { before, after }]) => before !== after) // ignore unchanged files + .forEach(([fn, { before, after, validation }]) => { + if (before) { + edits.push({ + label: `Update ${fn}`, + filename: fn, + type: "replace", + range: [[0, 0], stringToPos(after)], + text: after, + validated: !validation?.schemaError && validation?.pathValid, + }); + } else { + edits.push({ + label: `Create ${fn}`, + filename: fn, + type: "createfile", + text: after, + overwrite: true, + validated: !validation?.schemaError && validation?.pathValid, + }); + } + }); - if (edits.length) - trace.details( - "✏️ edits", - dataToMarkdownTable(edits, { - headers: ["type", "filename", "message", "validated"], - }) - ) + if (edits.length) + trace.details( + "✏️ edits", + dataToMarkdownTable(edits, { + headers: ["type", "filename", "message", "validated"], + }), + ); - res.text = text - res.fileEdits = fileEdits - res.changelogs = changelogs - res.annotations = annotations - res.edits = edits + res.text = text; + res.fileEdits = fileEdits; + res.changelogs = changelogs; + res.annotations = annotations; + res.edits = edits; } // Validate file outputs against specified schemas and patterns @@ -243,63 +225,57 @@ export async function computeFileEdits( * @param schemas The JSON schemas for validation. */ function validateFileOutputs( - fileOutputs: FileOutput[], - trace: MarkdownTrace, - fileEdits: Record, - schemas: Record + fileOutputs: FileOutput[], + trace: MarkdownTrace, + fileEdits: Record, + schemas: Record, ) { - if (fileOutputs?.length && Object.keys(fileEdits || {}).length) { - trace.startDetails("🗂 file outputs") - try { - for (const fileEditName of Object.keys(fileEdits)) { - const fe = fileEdits[fileEditName] - for (const fileOutput of fileOutputs) { - const { pattern, options } = fileOutput - if (isGlobMatch(fileEditName, pattern)) { - try { - trace.startDetails(`📁 ${fileEditName}`) - trace.itemValue(`pattern`, pattern) - const { schema: schemaId } = options || {} - if (/\.(json|yaml)$/i.test(fileEditName)) { - const { after } = fileEdits[fileEditName] - const data = /\.json$/i.test(fileEditName) - ? JSON5parse(after) - : YAMLParse(after) - trace.detailsFenced("📝 data", data) - if (schemaId) { - const schema = schemas[schemaId] - if (!schema) - fe.validation = { - schemaError: `schema ${schemaId} not found`, - } - else - fe.validation = validateJSONWithSchema( - data, - schema, - { - trace, - } - ) - } - } else { - fe.validation = { pathValid: true } - } - } catch (e) { - trace.error(errorMessage(e)) - fe.validation = { - schemaError: errorMessage(e), - } - } finally { - trace.endDetails() - } - break - } + if (fileOutputs?.length && Object.keys(fileEdits || {}).length) { + trace.startDetails("🗂 file outputs"); + try { + for (const fileEditName of Object.keys(fileEdits)) { + const fe = fileEdits[fileEditName]; + for (const fileOutput of fileOutputs) { + const { pattern, options } = fileOutput; + if (isGlobMatch(fileEditName, pattern)) { + try { + trace.startDetails(`📁 ${fileEditName}`); + trace.itemValue(`pattern`, pattern); + const { schema: schemaId } = options || {}; + if (/\.(json|yaml)$/i.test(fileEditName)) { + const { after } = fileEdits[fileEditName]; + const data = /\.json$/i.test(fileEditName) ? JSON5parse(after) : YAMLParse(after); + trace.detailsFenced("📝 data", data); + if (schemaId) { + const schema = schemas[schemaId]; + if (!schema) + fe.validation = { + schemaError: `schema ${schemaId} not found`, + }; + else + fe.validation = validateJSONWithSchema(data, schema, { + trace, + }); } + } else { + fe.validation = { pathValid: true }; + } + } catch (e) { + trace.error(errorMessage(e)); + fe.validation = { + schemaError: errorMessage(e), + }; + } finally { + trace.endDetails(); } - } finally { - trace.endDetails() + break; + } } + } + } finally { + trace.endDetails(); } + } } /** @@ -311,46 +287,37 @@ function validateFileOutputs( * - trace: A trace object for logging details, including skipped files, changes, and diff information. */ export async function writeFileEdits( - fileEdits: Record, // Contains the edits to be applied to files - options?: { applyEdits?: boolean } & TraceOptions + fileEdits: Record, // Contains the edits to be applied to files + options?: { applyEdits?: boolean } & TraceOptions, ) { - const { applyEdits, trace } = options || {} - // Iterate over each file edit entry - for (const fileEdit of Object.entries(fileEdits || {})) { - // Destructure the filename, before content, after content, and validation from the entry - const [fn, { before, after, validation }] = fileEdit + const { applyEdits, trace } = options || {}; + // Iterate over each file edit entry + for (const fileEdit of Object.entries(fileEdits || {})) { + // Destructure the filename, before content, after content, and validation from the entry + const [fn, { before, after, validation }] = fileEdit; - if (!applyEdits && !validation?.pathValid) { - // path not validated - continue - } + if (!applyEdits && !validation?.pathValid) { + // path not validated + continue; + } - // Skip writing if the edit is invalid and applyEdits is false - if (validation?.schemaError) { - trace.detailsFenced( - `skipping ${fn}, invalid schema`, - validation.schemaError, - "text" - ) - continue - } + // Skip writing if the edit is invalid and applyEdits is false + if (validation?.schemaError) { + trace.detailsFenced(`skipping ${fn}, invalid schema`, validation.schemaError, "text"); + continue; + } - // Check if there's a change between before and after content - if (after !== before) { - // Log whether the file is being updated or created - logVerbose( - `${before !== undefined ? `updating` : `creating`} ${fn}` - ) - trace.detailsFenced( - `updating ${fn}`, - diffCreatePatch( - { filename: fn, content: before }, - { filename: fn, content: after } - ), - "diff" - ) - // Write the new content to the file - await writeText(fn, after ?? before) // Write 'after' content if available, otherwise 'before' - } + // Check if there's a change between before and after content + if (after !== before) { + // Log whether the file is being updated or created + logVerbose(`${before !== undefined ? `updating` : `creating`} ${fn}`); + trace.detailsFenced( + `updating ${fn}`, + diffCreatePatch({ filename: fn, content: before }, { filename: fn, content: after }), + "diff", + ); + // Write the new content to the file + await writeText(fn, after ?? before); // Write 'after' content if available, otherwise 'before' } + } } diff --git a/packages/core/src/filetype.ts b/packages/core/src/filetype.ts index d834fe4064..b5f12d8eca 100644 --- a/packages/core/src/filetype.ts +++ b/packages/core/src/filetype.ts @@ -6,8 +6,8 @@ * @returns The detected file type object, or undefined if no buffer is provided or type cannot be determined. */ export async function fileTypeFromBuffer(buffer: Uint8Array | ArrayBuffer) { - if (buffer === undefined) return undefined + if (buffer === undefined) return undefined; - const { fileTypeFromBuffer } = await import("file-type") - return fileTypeFromBuffer(buffer) + const { fileTypeFromBuffer } = await import("file-type"); + return fileTypeFromBuffer(buffer); } diff --git a/packages/core/src/frontmatter.test.ts b/packages/core/src/frontmatter.test.ts index cda6dffa3a..9d33ffe26f 100644 --- a/packages/core/src/frontmatter.test.ts +++ b/packages/core/src/frontmatter.test.ts @@ -1,84 +1,80 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { - frontmatterTryParse, - splitMarkdown, - updateFrontmatter, -} from "./frontmatter" -import { YAMLTryParse } from "./yaml" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { frontmatterTryParse, splitMarkdown, updateFrontmatter } from "./frontmatter"; +import { YAMLTryParse } from "./yaml"; describe("replace frontmatter", () => { - test("only", () => { - const actual = `--- + test("only", () => { + const actual = `--- foo: bar --- -` - const { value: res } = frontmatterTryParse(actual) - assert.deepEqual(res, { foo: "bar" }) - }) - test("mix", () => { - const actual = `--- +`; + const { value: res } = frontmatterTryParse(actual); + assert.deepEqual(res, { foo: "bar" }); + }); + test("mix", () => { + const actual = `--- foo: bar --- foo bar -` - const { value: res } = frontmatterTryParse(actual) - assert.deepEqual(res, { foo: "bar" }) - }) -}) +`; + const { value: res } = frontmatterTryParse(actual); + assert.deepEqual(res, { foo: "bar" }); + }); +}); describe("splitMarkdown", () => { - test("split markdown with yaml frontmatter", () => { - const markdown = `--- + test("split markdown with yaml frontmatter", () => { + const markdown = `--- title: Test --- -This is a test.` - const { frontmatter, content } = splitMarkdown(markdown) - assert.deepEqual(YAMLTryParse(frontmatter), { title: "Test" }) - assert.equal(content, "This is a test.") - }) +This is a test.`; + const { frontmatter, content } = splitMarkdown(markdown); + assert.deepEqual(YAMLTryParse(frontmatter), { title: "Test" }); + assert.equal(content, "This is a test."); + }); - test("split markdown with json frontmatter", () => { - const markdown = `--- + test("split markdown with json frontmatter", () => { + const markdown = `--- { "title": "Test" } --- -This is a test.` - const { frontmatter, content } = splitMarkdown(markdown) - assert.deepEqual(JSON.parse(frontmatter), { title: "Test" }) - assert.equal(content, "This is a test.") - }) -}) +This is a test.`; + const { frontmatter, content } = splitMarkdown(markdown); + assert.deepEqual(JSON.parse(frontmatter), { title: "Test" }); + assert.equal(content, "This is a test."); + }); +}); describe("updateFrontmatter", () => { - test("update yaml frontmatter", () => { - const markdown = `--- + test("update yaml frontmatter", () => { + const markdown = `--- title: Old Title foo: bar --- -This is a test.` - const newFrontmatter: any = { title: "New Title", foo: null } - const updatedMarkdown = updateFrontmatter(markdown, newFrontmatter) - const { frontmatter, content } = splitMarkdown(updatedMarkdown) - assert.deepEqual(YAMLTryParse(frontmatter), { title: "New Title" }) - assert.equal(content, "This is a test.") - }) +This is a test.`; + const newFrontmatter: any = { title: "New Title", foo: null }; + const updatedMarkdown = updateFrontmatter(markdown, newFrontmatter); + const { frontmatter, content } = splitMarkdown(updatedMarkdown); + assert.deepEqual(YAMLTryParse(frontmatter), { title: "New Title" }); + assert.equal(content, "This is a test."); + }); - test("update json frontmatter", () => { - const markdown = `--- + test("update json frontmatter", () => { + const markdown = `--- { "title": "Old Title", "foo": "bar" } --- -This is a test.` - const newFrontmatter: any = { title: "New Title", foo: null } - const updatedMarkdown = updateFrontmatter(markdown, newFrontmatter, { - format: "json", - }) - const { frontmatter, content } = splitMarkdown(updatedMarkdown) - assert.deepEqual(JSON.parse(frontmatter), { title: "New Title" }) - assert.equal(content, "This is a test.") - }) -}) +This is a test.`; + const newFrontmatter: any = { title: "New Title", foo: null }; + const updatedMarkdown = updateFrontmatter(markdown, newFrontmatter, { + format: "json", + }); + const { frontmatter, content } = splitMarkdown(updatedMarkdown); + assert.deepEqual(JSON.parse(frontmatter), { title: "New Title" }); + assert.equal(content, "This is a test."); + }); +}); diff --git a/packages/core/src/frontmatter.ts b/packages/core/src/frontmatter.ts index 7e2f4863ad..91bb723215 100644 --- a/packages/core/src/frontmatter.ts +++ b/packages/core/src/frontmatter.ts @@ -1,7 +1,7 @@ -import { filenameOrFileToContent } from "./unwrappers" -import { JSON5TryParse } from "./json5" -import { TOMLTryParse } from "./toml" -import { YAMLTryParse, YAMLStringify } from "./yaml" +import { filenameOrFileToContent } from "./unwrappers"; +import { JSON5TryParse } from "./json5"; +import { TOMLTryParse } from "./toml"; +import { YAMLTryParse, YAMLStringify } from "./yaml"; /** * Parses the frontmatter section of a text input and attempts to convert it into a structured format. @@ -17,31 +17,31 @@ import { YAMLTryParse, YAMLStringify } from "./yaml" * Returns `undefined` if no frontmatter is found. */ export function frontmatterTryParse( - text: string | WorkspaceFile, - options?: { format: "yaml" | "json" | "toml" | "text" } + text: string | WorkspaceFile, + options?: { format: "yaml" | "json" | "toml" | "text" }, ): { text: string; value: any; endLine?: number } | undefined { - text = filenameOrFileToContent(text) + text = filenameOrFileToContent(text); - const { format = "yaml" } = options || {} - const { frontmatter, endLine } = splitMarkdown(text) - if (!frontmatter) return undefined + const { format = "yaml" } = options || {}; + const { frontmatter, endLine } = splitMarkdown(text); + if (!frontmatter) return undefined; - let res: any - switch (format) { - case "text": - res = frontmatter - break - case "json": - res = JSON5TryParse(frontmatter) - break - case "toml": - res = TOMLTryParse(frontmatter) - break - default: - res = YAMLTryParse(frontmatter) - break - } - return { text: frontmatter, value: res, endLine } + let res: any; + switch (format) { + case "text": + res = frontmatter; + break; + case "json": + res = JSON5TryParse(frontmatter); + break; + case "toml": + res = TOMLTryParse(frontmatter); + break; + default: + res = YAMLTryParse(frontmatter); + break; + } + return { text: frontmatter, value: res, endLine }; } /** @@ -54,24 +54,24 @@ export function frontmatterTryParse( * - `content`: The remaining Markdown content after the frontmatter. */ export function splitMarkdown(text: string | WorkspaceFile): { - frontmatter?: string - endLine?: number - content: string + frontmatter?: string; + endLine?: number; + content: string; } { - text = filenameOrFileToContent(text) - if (!text) return { content: text } - const lines = text.split(/\r?\n/g) - const delimiter = "---" - if (lines[0] !== delimiter) return { content: text } - let end = 1 - while (end < lines.length) { - if (lines[end] === delimiter) break - end++ - } - if (end >= lines.length) return { frontmatter: text, content: "" } - const frontmatter = lines.slice(1, end).join("\n") - const content = lines.slice(end + 1).join("\n") - return { frontmatter, content, endLine: end } + text = filenameOrFileToContent(text); + if (!text) return { content: text }; + const lines = text.split(/\r?\n/g); + const delimiter = "---"; + if (lines[0] !== delimiter) return { content: text }; + let end = 1; + while (end < lines.length) { + if (lines[end] === delimiter) break; + end++; + } + if (end >= lines.length) return { frontmatter: text, content: "" }; + const frontmatter = lines.slice(1, end).join("\n"); + const content = lines.slice(end + 1).join("\n"); + return { frontmatter, content, endLine: end }; } /** @@ -88,35 +88,35 @@ export function splitMarkdown(text: string | WorkspaceFile): { * @throws An error if the specified format is unsupported. */ export function updateFrontmatter( - text: string, - newFrontmatter: any, - options?: { format: "yaml" | "json" } + text: string, + newFrontmatter: any, + options?: { format: "yaml" | "json" }, ): string { - const { content = "" } = splitMarkdown(text) - if (newFrontmatter === null) return content + const { content = "" } = splitMarkdown(text); + if (newFrontmatter === null) return content; - const frontmatter = frontmatterTryParse(text, options)?.value ?? {} + const frontmatter = frontmatterTryParse(text, options)?.value ?? {}; - // merge object - for (const [key, value] of Object.entries(newFrontmatter ?? {})) { - if (value === null) { - delete frontmatter[key] - } else if (value !== undefined) { - frontmatter[key] = value - } + // merge object + for (const [key, value] of Object.entries(newFrontmatter ?? {})) { + if (value === null) { + delete frontmatter[key]; + } else if (value !== undefined) { + frontmatter[key] = value; } + } - const { format = "yaml" } = options || {} - let fm: string - switch (format) { - case "json": - fm = JSON.stringify(frontmatter, null, 2) - break - case "yaml": - fm = YAMLStringify(frontmatter) - break - default: - throw new Error(`Unsupported format: ${format}`) - } - return `---\n${fm}\n---\n${content}` + const { format = "yaml" } = options || {}; + let fm: string; + switch (format) { + case "json": + fm = JSON.stringify(frontmatter, null, 2); + break; + case "yaml": + fm = YAMLStringify(frontmatter); + break; + default: + throw new Error(`Unsupported format: ${format}`); + } + return `---\n${fm}\n---\n${content}`; } diff --git a/packages/core/src/fs.test.ts b/packages/core/src/fs.test.ts index 391f402ce7..4fc5272df3 100644 --- a/packages/core/src/fs.test.ts +++ b/packages/core/src/fs.test.ts @@ -1,67 +1,53 @@ -import { describe, test, before, after } from "node:test" -import assert from "node:assert/strict" -import { tryReadText, tryStat } from "./fs" -import * as fs from "fs/promises" -import * as path from "path" +import { describe, test, before, after } from "node:test"; +import assert from "node:assert/strict"; +import { tryReadText, tryStat } from "./fs"; +import * as fs from "fs/promises"; +import * as path from "path"; describe("fs", async () => { - const testDir = ".genaiscript/test-tryStat" - const testFile = path.join(testDir, "testfile.txt") - const content = "test content" - - before(async () => { - // Setup test directory and file - await fs.mkdir(testDir, { recursive: true }) - await fs.writeFile(testFile, content) - }) - - after(async () => { - // Cleanup - await fs.rm(testDir, { recursive: true, force: true }) - }) - - test("should return stat information for an existing file", async () => { - const stat = await tryStat(testFile) - assert( - stat !== undefined, - "Stat should not be undefined for existing file" - ) - assert(stat.isFile(), "Should be a file") - assert(stat.isFile(), "Should be a file") - }) - - test("should return stat information for an existing directory", async () => { - const stat = await tryStat(testDir) - assert( - stat !== undefined, - "Stat should not be undefined for existing directory" - ) - assert(stat.isDirectory(), "Should be a directory") - }) - - test("should return undefined for non-existent file", async () => { - const nonExistentFile = path.join(testDir, "nonexistent.txt") - const stat = await tryStat(nonExistentFile) - assert.equal( - stat, - undefined, - "Should return undefined for non-existent file" - ) - }) - - test("should return undefined for invalid path", async () => { - const stat = await tryStat("") - assert.equal( - stat, - undefined, - "Should return undefined for invalid path" - ) - }) - - test("should read workspace relative file path", async () => { - const relativePath = testFile - console.log(`relative path: ${relativePath}`) - const f = await tryReadText(relativePath) - assert.strictEqual(f, content, `failed to read file ${relativePath}`) - }) -}) + const testDir = ".genaiscript/test-tryStat"; + const testFile = path.join(testDir, "testfile.txt"); + const content = "test content"; + + before(async () => { + // Setup test directory and file + await fs.mkdir(testDir, { recursive: true }); + await fs.writeFile(testFile, content); + }); + + after(async () => { + // Cleanup + await fs.rm(testDir, { recursive: true, force: true }); + }); + + test("should return stat information for an existing file", async () => { + const stat = await tryStat(testFile); + assert(stat !== undefined, "Stat should not be undefined for existing file"); + assert(stat.isFile(), "Should be a file"); + assert(stat.isFile(), "Should be a file"); + }); + + test("should return stat information for an existing directory", async () => { + const stat = await tryStat(testDir); + assert(stat !== undefined, "Stat should not be undefined for existing directory"); + assert(stat.isDirectory(), "Should be a directory"); + }); + + test("should return undefined for non-existent file", async () => { + const nonExistentFile = path.join(testDir, "nonexistent.txt"); + const stat = await tryStat(nonExistentFile); + assert.equal(stat, undefined, "Should return undefined for non-existent file"); + }); + + test("should return undefined for invalid path", async () => { + const stat = await tryStat(""); + assert.equal(stat, undefined, "Should return undefined for invalid path"); + }); + + test("should read workspace relative file path", async () => { + const relativePath = testFile; + console.log(`relative path: ${relativePath}`); + const f = await tryReadText(relativePath); + assert.strictEqual(f, content, `failed to read file ${relativePath}`); + }); +}); diff --git a/packages/core/src/fs.ts b/packages/core/src/fs.ts index c0b2d3c7d8..f76cff84f9 100644 --- a/packages/core/src/fs.ts +++ b/packages/core/src/fs.ts @@ -1,11 +1,11 @@ -import { lstat, mkdir, writeFile, readFile, appendFile } from "fs/promises" -import { HTTPS_REGEX } from "./constants" -import { host } from "./host" -import { dirname } from "path" -import { JSON5TryParse } from "./json5" -import { homedir } from "os" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("fs") +import { lstat, mkdir, writeFile, readFile, appendFile } from "fs/promises"; +import { HTTPS_REGEX } from "./constants"; +import { host } from "./host"; +import { dirname } from "path"; +import { JSON5TryParse } from "./json5"; +import { homedir } from "os"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("fs"); /** * Changes the file extension of a given file name. @@ -15,11 +15,11 @@ const dbg = genaiscriptDebug("fs") * @returns The file name with the updated extension. */ export function changeext(filename: string, newext: string) { - dbg(`checking if newext starts with a dot`) - if (newext && !newext.startsWith(".")) { - newext = "." + newext - } - return filename.replace(/\.[^.]+$/, newext) + dbg(`checking if newext starts with a dot`); + if (newext && !newext.startsWith(".")) { + newext = "." + newext; + } + return filename.replace(/\.[^.]+$/, newext); } /** @@ -29,8 +29,8 @@ export function changeext(filename: string, newext: string) { * @returns The textual content of the file. */ export async function readText(fn: string) { - dbg(`reading file ${fn}`) - return readFile(fn, { encoding: "utf8" }) + dbg(`reading file ${fn}`); + return readFile(fn, { encoding: "utf8" }); } /** @@ -40,12 +40,12 @@ export async function readText(fn: string) { * @returns The content of the file as a string if successfully read, or undefined if an error occurs. */ export async function tryReadText(fn: string) { - try { - dbg(`trying to read text from file ${fn}`) - return await readText(fn) - } catch { - return undefined - } + try { + dbg(`trying to read text from file ${fn}`); + return await readText(fn); + } catch { + return undefined; + } } /** @@ -55,19 +55,19 @@ export async function tryReadText(fn: string) { * @param dir - The path of the directory to ensure exists. */ export async function ensureDir(dir: string) { - dbg(`ensuring directory exists ${dir}`) - await mkdir(dir, { recursive: true }) + dbg(`ensuring directory exists ${dir}`); + await mkdir(dir, { recursive: true }); } /** * Expands homedir */ export function expandHomeDir(dir: string) { - if (dir?.startsWith("~/")) { - const home = homedir() - dir = host.path.join(home, dir.slice(2)) - } - return dir + if (dir?.startsWith("~/")) { + const home = homedir(); + dir = host.path.join(home, dir.slice(2)); + } + return dir; } /** @@ -77,11 +77,11 @@ export function expandHomeDir(dir: string) { * @param content - The textual content to write into the file. */ export async function writeText(fn: string, content: string) { - if (!fn) throw new Error("filename is required") - if (typeof content !== "string") throw new Error("content must be a string") - await ensureDir(dirname(fn)) - dbg(`writing text to file ${fn}`) - await writeFile(fn, content, { encoding: "utf8" }) + if (!fn) throw new Error("filename is required"); + if (typeof content !== "string") throw new Error("content must be a string"); + await ensureDir(dirname(fn)); + dbg(`writing text to file ${fn}`); + await writeFile(fn, content, { encoding: "utf8" }); } /** @@ -92,10 +92,10 @@ export async function writeText(fn: string, content: string) { * @throws Throws an error if the filename is not provided. */ export async function appendText(fn: string, content: string) { - if (!fn) throw new Error("filename is required") - await ensureDir(dirname(fn)) - dbg(`append text to file ${fn}`) - await appendFile(fn, content, { encoding: "utf8" }) + if (!fn) throw new Error("filename is required"); + await ensureDir(dirname(fn)); + dbg(`append text to file ${fn}`); + await appendFile(fn, content, { encoding: "utf8" }); } /** @@ -105,9 +105,9 @@ export async function appendText(fn: string, content: string) { * @returns A promise that resolves to `true` if the file exists and is a file, or `false` otherwise. */ export async function fileExists(fn: string) { - dbg(`checking if file exists ${fn}`) - const stat = await tryStat(fn) - return !!stat?.isFile() + dbg(`checking if file exists ${fn}`); + const stat = await tryStat(fn); + return !!stat?.isFile(); } /** @@ -118,13 +118,13 @@ export async function fileExists(fn: string) { * @returns The file status object if the file exists, or undefined if it does not. */ export async function tryStat(fn: string) { - try { - dbg(`getting file stats for ${fn}`) - if (!fn) return undefined - return await lstat(fn) - } catch { - return undefined - } + try { + dbg(`getting file stats for ${fn}`); + if (!fn) return undefined; + return await lstat(fn); + } catch { + return undefined; + } } /** @@ -135,9 +135,9 @@ export async function tryStat(fn: string) { * @throws Throws an error if the file cannot be read or parsed as JSON. */ export async function readJSON(fn: string) { - if (!fn) throw new Error("filename is required") - dbg(`reading JSON from file ${fn}`) - return JSON.parse(await readText(fn)) + if (!fn) throw new Error("filename is required"); + dbg(`reading JSON from file ${fn}`); + return JSON.parse(await readText(fn)); } /** @@ -147,20 +147,20 @@ export async function readJSON(fn: string) { * @returns The parsed JSON object if the operation succeeds, or `undefined` if an error occurs. */ export async function tryReadJSON(fn: string) { - try { - if (!fn) return undefined - return JSON.parse(await readText(fn)) - } catch { - return undefined - } + try { + if (!fn) return undefined; + return JSON.parse(await readText(fn)); + } catch { + return undefined; + } } export async function tryReadJSON5(fn: string) { - try { - return JSON5TryParse(await readText(fn)) - } catch { - return undefined - } + try { + return JSON5TryParse(await readText(fn)); + } catch { + return undefined; + } } /** @@ -170,9 +170,9 @@ export async function tryReadJSON5(fn: string) { * @param obj - The JSON object to be written to the file. */ export async function writeJSON(fn: string, obj: any) { - if (!fn) throw new Error("filename is required") - dbg(`writing JSON to file ${fn}`) - await writeText(fn, JSON.stringify(obj)) + if (!fn) throw new Error("filename is required"); + dbg(`writing JSON to file ${fn}`); + await writeText(fn, JSON.stringify(obj)); } /** @@ -186,47 +186,45 @@ export async function writeJSON(fn: string, obj: any) { * @returns An array of expanded file paths and URLs, filtered based on the given options. */ export async function expandFiles( - files: string[], - options?: { - excludedFiles?: string[] - accept?: string - applyGitIgnore?: boolean - } + files: string[], + options?: { + excludedFiles?: string[]; + accept?: string; + applyGitIgnore?: boolean; + }, ) { - const { excludedFiles = [], accept, applyGitIgnore } = options || {} - dbg(`no files to expand or accept is none`) - if (!files.length || accept === "none") { - return [] - } + const { excludedFiles = [], accept, applyGitIgnore } = options || {}; + dbg(`no files to expand or accept is none`); + if (!files.length || accept === "none") { + return []; + } - dbg(`filtering URLs from files`) - const urls = files - .filter((f) => HTTPS_REGEX.test(f)) - .filter((f) => !excludedFiles.includes(f)) - dbg(`finding other files`) - const others = await host.findFiles( - files.filter((f) => !HTTPS_REGEX.test(f)), - { - ignore: excludedFiles.filter((f) => !HTTPS_REGEX.test(f)), - applyGitIgnore, - } - ) + dbg(`filtering URLs from files`); + const urls = files.filter((f) => HTTPS_REGEX.test(f)).filter((f) => !excludedFiles.includes(f)); + dbg(`finding other files`); + const others = await host.findFiles( + files.filter((f) => !HTTPS_REGEX.test(f)), + { + ignore: excludedFiles.filter((f) => !HTTPS_REGEX.test(f)), + applyGitIgnore, + }, + ); - const res = new Set([...urls, ...others]) - dbg(`applying accept filter`) - if (accept) { - const exts = accept - .split(",") - .map((s) => s.trim().replace(/^\*\./, ".")) - .filter((s) => !!s) - for (const rf of res) { - dbg(`removing file ${rf} as it does not match accepted extensions`) - if (!exts.some((ext) => rf.endsWith(ext))) { - res.delete(rf) - } - } + const res = new Set([...urls, ...others]); + dbg(`applying accept filter`); + if (accept) { + const exts = accept + .split(",") + .map((s) => s.trim().replace(/^\*\./, ".")) + .filter((s) => !!s); + for (const rf of res) { + dbg(`removing file ${rf} as it does not match accepted extensions`); + if (!exts.some((ext) => rf.endsWith(ext))) { + res.delete(rf); + } } - return Array.from(res) + } + return Array.from(res); } /** @@ -241,28 +239,26 @@ export async function expandFiles( * through `expandFiles` to resolve all matching paths, and combines the results with the workspace file objects. */ export async function expandFileOrWorkspaceFiles( - files: (string | WorkspaceFile)[] + files: (string | WorkspaceFile)[], ): Promise { - dbg(`expanding file or workspace files`) - const filesPaths = await expandFiles( - files.filter((f) => typeof f === "string"), - { - applyGitIgnore: false, - } - ) - dbg(`filtering workspace files`) - const workspaceFiles = files.filter( - (f) => typeof f === "object" - ) as WorkspaceFile[] - return [ - ...filesPaths.map( - (filename) => - ({ - filename, - }) satisfies WorkspaceFile - ), - ...workspaceFiles, - ] + dbg(`expanding file or workspace files`); + const filesPaths = await expandFiles( + files.filter((f) => typeof f === "string"), + { + applyGitIgnore: false, + }, + ); + dbg(`filtering workspace files`); + const workspaceFiles = files.filter((f) => typeof f === "object") as WorkspaceFile[]; + return [ + ...filesPaths.map( + (filename) => + ({ + filename, + }) satisfies WorkspaceFile, + ), + ...workspaceFiles, + ]; } /** @@ -272,6 +268,6 @@ export async function expandFileOrWorkspaceFiles( * @returns The workspace-compatible file path or URL. */ export function filePathOrUrlToWorkspaceFile(f: string) { - dbg(`converting file path or URL to workspace file ${f}`) - return HTTPS_REGEX.test(f) || host.path.resolve(f) === f ? f : `./${f}` + dbg(`converting file path or URL to workspace file ${f}`); + return HTTPS_REGEX.test(f) || host.path.resolve(f) === f ? f : `./${f}`; } diff --git a/packages/core/src/fscache.ts b/packages/core/src/fscache.ts index bf22f06acc..c220367292 100644 --- a/packages/core/src/fscache.ts +++ b/packages/core/src/fscache.ts @@ -1,19 +1,15 @@ // Import necessary modules and types -import { host } from "./host" -import { writeText } from "./fs" -import { dotGenaiscriptPath } from "./workdir" -import { basename, join } from "node:path" -import debug, { Debugger } from "debug" -import { errorMessage } from "./error" -import { tryReadJSON } from "./fs" -import { rm, readdir } from "fs/promises" -import { - CACHE_FORMAT_VERSION, - CACHE_SHA_LENGTH, - FILE_READ_CONCURRENCY_DEFAULT, -} from "./constants" -import { hash } from "./crypto" -import pLimit from "p-limit" +import { host } from "./host"; +import { writeText } from "./fs"; +import { dotGenaiscriptPath } from "./workdir"; +import { basename, join } from "node:path"; +import debug, { Debugger } from "debug"; +import { errorMessage } from "./error"; +import { tryReadJSON } from "./fs"; +import { rm, readdir } from "fs/promises"; +import { CACHE_FORMAT_VERSION, CACHE_SHA_LENGTH, FILE_READ_CONCURRENCY_DEFAULT } from "./constants"; +import { hash } from "./crypto"; +import pLimit from "p-limit"; /** * A cache class stores each entry as a separate file in a directory. @@ -22,87 +18,85 @@ import pLimit from "p-limit" * @template V - Type of the value */ export class FsCache implements WorkspaceFileCache { - private hashOptions: HashOptions - private dbg: Debugger + private hashOptions: HashOptions; + private dbg: Debugger; - // Constructor is private to enforce the use of byName factory method - constructor(public readonly name: string) { - this.dbg = debug(`genaiscript:cache:${name}`) - this.hashOptions = { - salt: CACHE_FORMAT_VERSION, - length: CACHE_SHA_LENGTH, - } - } + // Constructor is private to enforce the use of byName factory method + constructor(public readonly name: string) { + this.dbg = debug(`genaiscript:cache:${name}`); + this.hashOptions = { + salt: CACHE_FORMAT_VERSION, + length: CACHE_SHA_LENGTH, + }; + } - private cacheFilename(sha: string) { - return join(this.folder(), sha + ".json") - } + private cacheFilename(sha: string) { + return join(this.folder(), sha + ".json"); + } - async get(key: any): Promise { - if (key === undefined) return undefined // Handle undefined key - const sha = await this.getSha(key) - const fn = this.cacheFilename(sha) - const res = await tryReadJSON(fn) - this.dbg(`get ${sha}: ${res !== undefined ? "hit" : "miss"}`) - return res - } - async set(key: any, value: any): Promise { - const sha = await this.getSha(key) - const fn = this.cacheFilename(sha) - try { - if (value === undefined) await rm(fn) - else await writeText(fn, JSON.stringify(value, null, 2)) - this.dbg(`set ${sha}: updated`) - } catch (e) { - this.dbg(`set ${sha}: failed (${errorMessage(e)})`) - } + async get(key: any): Promise { + if (key === undefined) return undefined; // Handle undefined key + const sha = await this.getSha(key); + const fn = this.cacheFilename(sha); + const res = await tryReadJSON(fn); + this.dbg(`get ${sha}: ${res !== undefined ? "hit" : "miss"}`); + return res; + } + async set(key: any, value: any): Promise { + const sha = await this.getSha(key); + const fn = this.cacheFilename(sha); + try { + if (value === undefined) await rm(fn); + else await writeText(fn, JSON.stringify(value, null, 2)); + this.dbg(`set ${sha}: updated`); + } catch (e) { + this.dbg(`set ${sha}: failed (${errorMessage(e)})`); } - async values(): Promise { - try { - const dir = this.folder() - const files = await readdir(this.folder()) - const limit = pLimit(FILE_READ_CONCURRENCY_DEFAULT) - return await Promise.all( - files - .filter((f) => /\.json$/.test(f)) - .map((f) => limit(() => tryReadJSON(join(dir, f)))) - .filter((f) => f !== undefined) - ) - } catch (e) { - this.dbg( - `error while reading directory ${this.folder()}: ${errorMessage(e)}` - ) - return [] - } + } + async values(): Promise { + try { + const dir = this.folder(); + const files = await readdir(this.folder()); + const limit = pLimit(FILE_READ_CONCURRENCY_DEFAULT); + return await Promise.all( + files + .filter((f) => /\.json$/.test(f)) + .map((f) => limit(() => tryReadJSON(join(dir, f)))) + .filter((f) => f !== undefined), + ); + } catch (e) { + this.dbg(`error while reading directory ${this.folder()}: ${errorMessage(e)}`); + return []; } + } - async getOrUpdate( - key: K, - updater: () => Promise, - validator?: (val: V) => boolean - ): Promise<{ key: string; value: V; cached?: boolean }> { - const sha = await this.getSha(key) - const fn = this.cacheFilename(sha) - const res = await tryReadJSON(fn) - if (res) { - this.dbg(`getup ${sha}: hit`) - return { key: sha, value: res, cached: true } - } - const value = await updater() - if (validator && validator(value)) { - await this.set(key, value) - this.dbg(`getup ${sha}: update`) - } else this.dbg(`getup ${sha}: skip`) - return { key: sha, value, cached: false } + async getOrUpdate( + key: K, + updater: () => Promise, + validator?: (val: V) => boolean, + ): Promise<{ key: string; value: V; cached?: boolean }> { + const sha = await this.getSha(key); + const fn = this.cacheFilename(sha); + const res = await tryReadJSON(fn); + if (res) { + this.dbg(`getup ${sha}: hit`); + return { key: sha, value: res, cached: true }; } + const value = await updater(); + if (validator && validator(value)) { + await this.set(key, value); + this.dbg(`getup ${sha}: update`); + } else this.dbg(`getup ${sha}: skip`); + return { key: sha, value, cached: false }; + } - // Get the folder path for the cache storage - private folder() { - return dotGenaiscriptPath("cache", this.name) - } + // Get the folder path for the cache storage + private folder() { + return dotGenaiscriptPath("cache", this.name); + } - async getSha(key: K): Promise { - const sha = await hash(key, this.hashOptions) - return sha - } + async getSha(key: K): Promise { + const sha = await hash(key, this.hashOptions); + return sha; + } } diff --git a/packages/core/src/fuzzsearch.test.ts b/packages/core/src/fuzzsearch.test.ts index 28e6d9089f..e092b2a0dc 100644 --- a/packages/core/src/fuzzsearch.test.ts +++ b/packages/core/src/fuzzsearch.test.ts @@ -1,65 +1,61 @@ -import { describe, test, beforeEach } from "node:test" -import assert from "node:assert/strict" -import { fuzzSearch } from "./fuzzsearch" -import { resolveFileContent } from "./file" -import { TestHost } from "./testhost" +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; +import { fuzzSearch } from "./fuzzsearch"; +import { resolveFileContent } from "./file"; +import { TestHost } from "./testhost"; describe("fuzzSearch", () => { - beforeEach(() => { - TestHost.install() - }) - - test("should return correct search results with expected scores", async () => { - const query = "sample query" - const files: Partial[] = [ - { filename: "test1.md", content: "sample for test1 file content" }, - { - filename: "test2.md", - content: "example content for file test2 sample", - }, - ] - const options = { topK: 2 } - - const results = await fuzzSearch( - query, - files as WorkspaceFile[], - options - ) - - console.log("Test results:", results) // Debugging: log test results - - assert.equal(results.length, 2) - assert.equal(results[0].filename, "test1.md") - assert.equal(results[0].content, "sample for test1 file content") - assert.equal(typeof results[0].score, "number") - }) - - test("should handle empty file list", async () => { - const query = "sample query" - const files: WorkspaceFile[] = [] - - const results = await fuzzSearch(query, files) - - assert.equal(results.length, 0) - }) - - test("should perform correctly with no options provided", async () => { - const query = "sample query" - const files: Partial[] = [ - { filename: "test1.md", content: "sample for test1 file content" }, - { - filename: "test2.md", - content: "example content for file test2 sample", - }, - ] - - const results = await fuzzSearch(query, files as WorkspaceFile[]) - - console.log("Test results:", results) // Debugging: log test results - - assert.equal(results.length, 2) - assert.equal(results[0].filename, "test1.md") - assert.equal(results[0].content, "sample for test1 file content") - assert.equal(typeof results[0].score, "number") - }) -}) + beforeEach(() => { + TestHost.install(); + }); + + test("should return correct search results with expected scores", async () => { + const query = "sample query"; + const files: Partial[] = [ + { filename: "test1.md", content: "sample for test1 file content" }, + { + filename: "test2.md", + content: "example content for file test2 sample", + }, + ]; + const options = { topK: 2 }; + + const results = await fuzzSearch(query, files as WorkspaceFile[], options); + + console.log("Test results:", results); // Debugging: log test results + + assert.equal(results.length, 2); + assert.equal(results[0].filename, "test1.md"); + assert.equal(results[0].content, "sample for test1 file content"); + assert.equal(typeof results[0].score, "number"); + }); + + test("should handle empty file list", async () => { + const query = "sample query"; + const files: WorkspaceFile[] = []; + + const results = await fuzzSearch(query, files); + + assert.equal(results.length, 0); + }); + + test("should perform correctly with no options provided", async () => { + const query = "sample query"; + const files: Partial[] = [ + { filename: "test1.md", content: "sample for test1 file content" }, + { + filename: "test2.md", + content: "example content for file test2 sample", + }, + ]; + + const results = await fuzzSearch(query, files as WorkspaceFile[]); + + console.log("Test results:", results); // Debugging: log test results + + assert.equal(results.length, 2); + assert.equal(results[0].filename, "test1.md"); + assert.equal(results[0].content, "sample for test1 file content"); + assert.equal(typeof results[0].score, "number"); + }); +}); diff --git a/packages/core/src/fuzzsearch.ts b/packages/core/src/fuzzsearch.ts index 61ca4eb65a..dfbffbe22b 100644 --- a/packages/core/src/fuzzsearch.ts +++ b/packages/core/src/fuzzsearch.ts @@ -1,8 +1,8 @@ -import MiniSearch from "minisearch" -import { resolveFileContent } from "./file" -import { TraceOptions } from "./trace" -import { randomHex } from "./crypto" -import { CancellationOptions, checkCancelled } from "./cancellation" +import MiniSearch from "minisearch"; +import { resolveFileContent } from "./file"; +import { TraceOptions } from "./trace"; +import { randomHex } from "./crypto"; +import { CancellationOptions, checkCancelled } from "./cancellation"; /** * Performs a fuzzy search on a set of workspace files using a query. @@ -13,52 +13,49 @@ import { CancellationOptions, checkCancelled } from "./cancellation" * @returns A promise that resolves to an array of WorkspaceFileWithScore, containing the filename, content, and search score. */ export async function fuzzSearch( - query: string, - files: WorkspaceFile[], - options?: FuzzSearchOptions & TraceOptions & CancellationOptions + query: string, + files: WorkspaceFile[], + options?: FuzzSearchOptions & TraceOptions & CancellationOptions, ): Promise { - // Destructure options to extract trace and topK, with defaulting to an empty object - const { trace, topK, minScore, cancellationToken, ...otherOptions } = - options || {} - - // Load the content for all provided files asynchronously - for (const file of files) await resolveFileContent(file) - checkCancelled(cancellationToken) - - // assign ids - const filesWithId = files.map((f) => ({ - ...f, - id: randomHex(32), - })) - - // Initialize the MiniSearch instance with specified fields and options - const miniSearch = new MiniSearch({ - idField: "id", // Unique identifier for documents - fields: ["filename", "content"], // Fields to index for searching - storeFields: ["filename", "content"], // Fields to store in results - searchOptions: otherOptions, // Additional search options - }) - - // Add all files with content to the MiniSearch index - await miniSearch.addAllAsync( - filesWithId.filter((f) => !f.encoding && !!f.content) - ) - checkCancelled(cancellationToken) - - // Perform search using the provided query - let results = miniSearch.search(query) - - // Limit results to top K if specified - if (topK > 0) results = results.slice(0, topK) - if (minScore > 0) results = results.filter((r) => r.score >= minScore) - - // Map search results to WorkspaceFileWithScore structure - return results.map( - (r) => - { - filename: r.filename, // Map ID to filename - content: r.content, // Map content from search result - score: r.score, // Include the relevance score - } - ) + // Destructure options to extract trace and topK, with defaulting to an empty object + const { trace, topK, minScore, cancellationToken, ...otherOptions } = options || {}; + + // Load the content for all provided files asynchronously + for (const file of files) await resolveFileContent(file); + checkCancelled(cancellationToken); + + // assign ids + const filesWithId = files.map((f) => ({ + ...f, + id: randomHex(32), + })); + + // Initialize the MiniSearch instance with specified fields and options + const miniSearch = new MiniSearch({ + idField: "id", // Unique identifier for documents + fields: ["filename", "content"], // Fields to index for searching + storeFields: ["filename", "content"], // Fields to store in results + searchOptions: otherOptions, // Additional search options + }); + + // Add all files with content to the MiniSearch index + await miniSearch.addAllAsync(filesWithId.filter((f) => !f.encoding && !!f.content)); + checkCancelled(cancellationToken); + + // Perform search using the provided query + let results = miniSearch.search(query); + + // Limit results to top K if specified + if (topK > 0) results = results.slice(0, topK); + if (minScore > 0) results = results.filter((r) => r.score >= minScore); + + // Map search results to WorkspaceFileWithScore structure + return results.map( + (r) => + { + filename: r.filename, // Map ID to filename + content: r.content, // Map content from search result + score: r.score, // Include the relevance score + }, + ); } diff --git a/packages/core/src/generation.ts b/packages/core/src/generation.ts index 60d796a89d..301cc85826 100644 --- a/packages/core/src/generation.ts +++ b/packages/core/src/generation.ts @@ -1,37 +1,37 @@ // Import necessary modules and interfaces -import type { CancellationToken } from "./cancellation" -import type { ChatCompletionsOptions } from "./chattypes" -import { MarkdownTrace } from "./trace" -import { GenerationStats } from "./usage" +import type { CancellationToken } from "./cancellation"; +import type { ChatCompletionsOptions } from "./chattypes"; +import { MarkdownTrace } from "./trace"; +import { GenerationStats } from "./usage"; // Represents a code fragment with associated files export interface Fragment { - files: string[] // Array of file paths or names - workspaceFiles?: WorkspaceFile[] // Array of workspace files + files: string[]; // Array of file paths or names + workspaceFiles?: WorkspaceFile[]; // Array of workspace files } // Options for configuring the generation process, extending multiple other options export interface GenerationOptions - extends ChatCompletionsOptions, - ModelOptions, - EmbeddingsModelOptions, - ContentSafetyOptions, - ScriptRuntimeOptions, - MetadataOptions { - inner: boolean // Indicates if the process is an inner operation - runId?: string - runDir?: string - cancellationToken?: CancellationToken // Token to cancel the operation - infoCb?: (partialResponse: { text: string }) => void // Callback for providing partial responses - trace: MarkdownTrace // Trace information for debugging or logging - outputTrace?: MarkdownTrace - maxCachedTemperature?: number // Maximum temperature for caching purposes - maxCachedTopP?: number // Maximum top-p value for caching - label?: string // Optional label for the operation - cliInfo?: { - files: string[] // Information about files in the CLI context - } - vars?: PromptParameters // Variables for prompt customization - stats: GenerationStats // Statistics of the generation - userState: Record + extends ChatCompletionsOptions, + ModelOptions, + EmbeddingsModelOptions, + ContentSafetyOptions, + ScriptRuntimeOptions, + MetadataOptions { + inner: boolean; // Indicates if the process is an inner operation + runId?: string; + runDir?: string; + cancellationToken?: CancellationToken; // Token to cancel the operation + infoCb?: (partialResponse: { text: string }) => void; // Callback for providing partial responses + trace: MarkdownTrace; // Trace information for debugging or logging + outputTrace?: MarkdownTrace; + maxCachedTemperature?: number; // Maximum temperature for caching purposes + maxCachedTopP?: number; // Maximum top-p value for caching + label?: string; // Optional label for the operation + cliInfo?: { + files: string[]; // Information about files in the CLI context + }; + vars?: PromptParameters; // Variables for prompt customization + stats: GenerationStats; // Statistics of the generation + userState: Record; } diff --git a/packages/core/src/git.ts b/packages/core/src/git.ts index 56c6ac6fc1..921baaee15 100644 --- a/packages/core/src/git.ts +++ b/packages/core/src/git.ts @@ -1,526 +1,471 @@ // This file contains the GitClient class, which provides methods to interact with Git repositories. // It includes functionality to find modified files, execute Git commands, and manage branches. -import { uniq } from "es-toolkit" -import { - GENAISCRIPTIGNORE, - GIT_DIFF_MAX_TOKENS, - GIT_IGNORE_GENAI, -} from "./constants" -import { llmifyDiff } from "./llmdiff" -import { resolveFileContents } from "./file" -import { tryReadText, tryStat } from "./fs" -import { runtimeHost } from "./host" -import { shellParse, shellQuote } from "./shell" -import { arrayify, ellipse, logVerbose } from "./util" -import { approximateTokens } from "./tokens" -import { underscore } from "inflection" -import { rm } from "node:fs/promises" -import { packageResolveInstall } from "./packagemanagers" -import { normalizeInt } from "./cleaners" -import { dotGenaiscriptPath } from "./workdir" -import { join } from "node:path" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("git") +import { uniq } from "es-toolkit"; +import { GENAISCRIPTIGNORE, GIT_DIFF_MAX_TOKENS, GIT_IGNORE_GENAI } from "./constants"; +import { llmifyDiff } from "./llmdiff"; +import { resolveFileContents } from "./file"; +import { tryReadText, tryStat } from "./fs"; +import { runtimeHost } from "./host"; +import { shellParse, shellQuote } from "./shell"; +import { arrayify, ellipse, logVerbose } from "./util"; +import { approximateTokens } from "./tokens"; +import { underscore } from "inflection"; +import { rm } from "node:fs/promises"; +import { packageResolveInstall } from "./packagemanagers"; +import { normalizeInt } from "./cleaners"; +import { dotGenaiscriptPath } from "./workdir"; +import { join } from "node:path"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("git"); async function checkDirectoryExists(directory: string): Promise { - const stat = await tryStat(directory) - dbg(`directory exists: ${!!stat?.isDirectory()}`) - return !!stat?.isDirectory() + const stat = await tryStat(directory); + dbg(`directory exists: ${!!stat?.isDirectory()}`); + return !!stat?.isDirectory(); } -function appendExtras( - rest: Record, - args: string[] -) { - Object.entries(rest) - .filter(([, v]) => v !== undefined && typeof v !== "object") - .forEach(([k, v]) => - args.push( - v === true ? `--${underscore(k)}` : `--${underscore(k)}=${v}` - ) - ) +function appendExtras(rest: Record, args: string[]) { + Object.entries(rest) + .filter(([, v]) => v !== undefined && typeof v !== "object") + .forEach(([k, v]) => args.push(v === true ? `--${underscore(k)}` : `--${underscore(k)}=${v}`)); } /** * GitClient class provides an interface to interact with Git. */ export class GitClient implements Git { - private _cwd: string - readonly git = "git" // Git command identifier - private _defaultBranch: string // Stores the default branch name - private _requiresSafeDirectory: boolean = false // Indicates if the client requires a safe directory - - constructor(cwd: string) { - this._cwd = cwd || process.cwd() + private _cwd: string; + readonly git = "git"; // Git command identifier + private _defaultBranch: string; // Stores the default branch name + private _requiresSafeDirectory: boolean = false; // Indicates if the client requires a safe directory + + constructor(cwd: string) { + this._cwd = cwd || process.cwd(); + } + + private static _default: GitClient; + static default() { + if (!this._default) this._default = new GitClient(undefined); + return this._default; + } + + get cwd() { + return this._cwd; + } + + setGitHubWorkspace(cwd: string) { + if (cwd === this._cwd) return this; + dbg(`set github workspace mode: ${cwd}`); + this._cwd = cwd; + this._defaultBranch = undefined; // Reset default branch + this._requiresSafeDirectory = true; + + if (!process.env.GITHUB_TOKEN && process.env.INPUT_GITHUB_TOKEN) { + dbg(`setting GITHUB_TOKEN from INPUT_GITHUB_TOKEN`); + process.env.GITHUB_TOKEN = process.env.INPUT_GITHUB_TOKEN; } - - private static _default: GitClient - static default() { - if (!this._default) this._default = new GitClient(undefined) - return this._default + return this; + } + + private async configGlobalAddSafeDirectory() { + if (this._requiresSafeDirectory) { + this._requiresSafeDirectory = false; + dbg(`adding safe directory for git`); + await this.exec(`config --global --add safe.directory ${this.cwd}`); } - - get cwd() { - return this._cwd + } + + private async resolveExcludedPaths(options?: { + excludedPaths?: ElementOrArray; + }): Promise { + dbg(`resolving excluded paths`); + const { excludedPaths } = options || {}; + const ep = arrayify(excludedPaths, { filterEmpty: true }); + const dp = (await tryReadText(GIT_IGNORE_GENAI))?.split("\n"); + dbg(`reading GENAISCRIPTIGNORE file`); + const dp2 = (await tryReadText(GENAISCRIPTIGNORE))?.split("\n"); + const ps = [ + ...arrayify(ep, { filterEmpty: true }), + ...arrayify(dp, { filterEmpty: true }), + ...arrayify(dp2, { filterEmpty: true }), + ]; + return uniq(ps); + } + + /** + * Retrieves the default branch name. + * If not already set, it fetches from the Git remote. + * @returns {Promise} The default branch name. + */ + async defaultBranch(): Promise { + if (this._defaultBranch === undefined) { + dbg(`fetching default branch from remote`); + const res = await this.exec(["remote", "show", "origin"], { + valueOnError: "", + }); + this._defaultBranch = /^\s*HEAD branch:\s+(?.+)\s*$/m.exec(res)?.groups?.name || ""; } - - setGitHubWorkspace(cwd: string) { - if (cwd === this._cwd) return this - dbg(`set github workspace mode: ${cwd}`) - this._cwd = cwd - this._defaultBranch = undefined // Reset default branch - this._requiresSafeDirectory = true - - if (!process.env.GITHUB_TOKEN && process.env.INPUT_GITHUB_TOKEN) { - dbg(`setting GITHUB_TOKEN from INPUT_GITHUB_TOKEN`) - process.env.GITHUB_TOKEN = process.env.INPUT_GITHUB_TOKEN - } - return this + return this._defaultBranch; + } + + async fetch( + remote?: OptionsOrString<"origin">, + branchOrSha?: string, + options?: { + prune?: boolean; + all?: boolean; + }, + ): Promise { + const { prune, all, ...rest } = options || {}; + if (branchOrSha && !remote) throw new Error("remote is required when specifying branch or sha"); + const args = ["fetch", "--porcelain"]; + if (remote) args.push(remote); + if (branchOrSha) args.push(branchOrSha); + if (prune) args.push("--prune"); + if (all) args.push("--all"); + appendExtras(rest, args); + return await this.exec(args); + } + + /** + * Pull changes from the remote repository. + */ + async pull(options?: { + /** + * Whether to fast-forward the merge (`--ff`) + */ + ff?: boolean; + }): Promise { + const { ff, ...rest } = options || {}; + const args = ["pull"]; + if (ff) args.push("--ff"); + appendExtras(rest, args); + return await this.exec(args); + } + + /** + * Gets the current branch + * @returns + */ + async branch(): Promise { + dbg(`fetching current branch`); + const res = await this.exec(["branch", "--show-current"], { + valueOnError: "", + }); + return res.trim(); + } + + async listBranches(): Promise { + dbg(`listing all branches`); + const res = await this.exec(["branch", "--list"], { valueOnError: "" }); + return res + .split("\n") + .map((b) => b.trim()) + .filter((f) => !!f); + } + + /** + * Executes a Git command with given arguments. + * @param args Git command arguments. + * @param options Optional command options with a label. + * @returns {Promise} The standard output from the command. + */ + async exec( + args: string | string[], + options?: { label?: string; valueOnError?: string }, + ): Promise { + const { valueOnError } = options || {}; + + await this.configGlobalAddSafeDirectory(); + + const opts: ShellOptions = { + ...(options || {}), + cwd: this._cwd, + env: { + LC_ALL: "en_US", + }, + }; + const eargs = Array.isArray(args) ? args : shellParse(args); + dbg(`exec`, shellQuote(eargs)); + const res = await runtimeHost.exec(undefined, this.git, eargs, opts); + dbg(`exec: exit code ${res.exitCode}`); + if (res.stdout) dbg(res.stdout); + if (res.exitCode !== 0) { + dbg(`error: ${res.stderr}`); + if (valueOnError !== undefined) return valueOnError; + throw new Error(res.stderr); } - - private async configGlobalAddSafeDirectory() { - if (this._requiresSafeDirectory) { - this._requiresSafeDirectory = false - dbg(`adding safe directory for git`) - await this.exec(`config --global --add safe.directory ${this.cwd}`) + return res.stdout; + } + + /** + * Finds modified files in the Git repository based on the specified scope. + * @param scope The scope of modifications to find: "modified-base", "staged", or "modified". Default is "modified". + * @param options Optional settings such as base branch, paths, and exclusions. + * @returns {Promise} List of modified files. + */ + async listFiles( + scope?: "modified-base" | "staged" | "modified", + options?: { + base?: string; + paths?: ElementOrArray; + excludedPaths?: ElementOrArray; + askStageOnEmpty?: boolean; + }, + ): Promise { + dbg(`listing files with scope: ${scope}`); + scope = scope || "modified"; + const { askStageOnEmpty } = options || {}; + const paths = arrayify(options?.paths, { filterEmpty: true }); + const excludedPaths = await this.resolveExcludedPaths(options); + + let filenames: string[]; + if (scope === "modified-base" || scope === "staged") { + dbg(`listing modified or staged files`); + const args = ["diff", "--name-only", "--diff-filter=AM"]; + if (scope === "modified-base") { + const base = options?.base || `origin/${await this.defaultBranch()}`; + dbg(`using base branch: %s`, base); + args.push(base); + } else { + dbg(`listing staged files`); + args.push("--cached"); + } + GitClient.addFileFilters(paths, excludedPaths, args); + const res = await this.exec(args, { + label: `git list modified files in ${scope}`, + }); + filenames = res.split("\n").filter((f) => f); + if (!filenames.length && scope == "staged" && askStageOnEmpty) { + dbg(`asking to stage all changes`); + // If no staged changes, optionally ask to stage all changes + const stage = await runtimeHost.confirm("No staged changes. Stage all changes?", { + default: true, + }); + if (stage) { + dbg(`staging all changes`); + await this.exec(["add", "."]); + filenames = (await this.exec(args)).split("\n").filter((f) => f); } + } + } else { + dbg(`listing modified files`); + // For "modified" scope, ignore deleted files + const rx = /^\s*(A|M|\?{1,2})\s+/gm; + const args = ["status", "--porcelain"]; + GitClient.addFileFilters(paths, excludedPaths, args); + dbg(`executing git status`); + const res = await this.exec(args, { + label: `git list modified files`, + }); + filenames = res + .split("\n") + .filter((f) => rx.test(f)) + .map((f) => f.replace(rx, "").trim()); } - private async resolveExcludedPaths(options?: { - excludedPaths?: ElementOrArray - }): Promise { - dbg(`resolving excluded paths`) - const { excludedPaths } = options || {} - const ep = arrayify(excludedPaths, { filterEmpty: true }) - const dp = (await tryReadText(GIT_IGNORE_GENAI))?.split("\n") - dbg(`reading GENAISCRIPTIGNORE file`) - const dp2 = (await tryReadText(GENAISCRIPTIGNORE))?.split("\n") - const ps = [ - ...arrayify(ep, { filterEmpty: true }), - ...arrayify(dp, { filterEmpty: true }), - ...arrayify(dp2, { filterEmpty: true }), - ] - return uniq(ps) + const files = filenames.map((filename) => ({ filename })); + await resolveFileContents(files); + return files; + } + + /** + * Adds file path filters to Git command arguments. + * @param paths Paths to include. + * @param excludedPaths Paths to exclude. + * @param args Git command arguments. + */ + private static addFileFilters(paths: string[], excludedPaths: string[], args: string[]) { + if (paths.length > 0 || excludedPaths.length > 0) { + args.push("--"); + if (!paths.length) { + args.push("."); + } else { + args.push(...paths); + } + args.push(...excludedPaths.map((p) => (p.startsWith(":!") ? p : ":!" + p))); } - - /** - * Retrieves the default branch name. - * If not already set, it fetches from the Git remote. - * @returns {Promise} The default branch name. - */ - async defaultBranch(): Promise { - if (this._defaultBranch === undefined) { - dbg(`fetching default branch from remote`) - const res = await this.exec(["remote", "show", "origin"], { - valueOnError: "", - }) - this._defaultBranch = - /^\s*HEAD branch:\s+(?.+)\s*$/m.exec(res)?.groups?.name || - "" - } - return this._defaultBranch + } + + async lastTag(): Promise { + dbg(`fetching last tag`); + const res = await this.exec(["describe", "--tags", "--abbrev=0", "HEAD^"]); + return res.split("\n")[0]; + } + + async lastCommitSha(): Promise { + dbg(`fetching last commit`); + const res = await this.exec(["rev-parse", "HEAD"]); + return res.split("\n")[0]; + } + + async log(options?: { + base?: string; + head?: string; + merges?: boolean; + author?: string; + until?: string; + after?: string; + count?: number; + excludedGrep?: string | RegExp; + paths?: ElementOrArray; + excludedPaths?: ElementOrArray; + }): Promise { + const { base, head, merges, excludedGrep, count, author, until, after } = options || {}; + const paths = arrayify(options?.paths, { filterEmpty: true }); + const excludedPaths = await this.resolveExcludedPaths(options); + + dbg(`building git log command arguments`); + const args = ["log", "--pretty=format:%h %ad %s", "--date=short"]; + if (!merges) { + args.push("--no-merges"); } - - async fetch( - remote?: OptionsOrString<"origin">, - branchOrSha?: string, - options?: { - prune?: boolean - all?: boolean - } - ): Promise { - const { prune, all, ...rest } = options || {} - if (branchOrSha && !remote) - throw new Error("remote is required when specifying branch or sha") - const args = ["fetch", "--porcelain"] - if (remote) args.push(remote) - if (branchOrSha) args.push(branchOrSha) - if (prune) args.push("--prune") - if (all) args.push("--all") - appendExtras(rest, args) - return await this.exec(args) + if (author) { + args.push(`--author`, author); } - - /** - * Pull changes from the remote repository. - */ - async pull(options?: { - /** - * Whether to fast-forward the merge (`--ff`) - */ - ff?: boolean - }): Promise { - const { ff, ...rest } = options || {} - const args = ["pull"] - if (ff) args.push("--ff") - appendExtras(rest, args) - return await this.exec(args) + if (until) { + args.push("--until", until); } - - /** - * Gets the current branch - * @returns - */ - async branch(): Promise { - dbg(`fetching current branch`) - const res = await this.exec(["branch", "--show-current"], { - valueOnError: "", - }) - return res.trim() + if (after) { + args.push("--after", after); } - - async listBranches(): Promise { - dbg(`listing all branches`) - const res = await this.exec(["branch", "--list"], { valueOnError: "" }) - return res - .split("\n") - .map((b) => b.trim()) - .filter((f) => !!f) + if (excludedGrep) { + dbg(`excluding grep pattern: ${excludedGrep}`); + const pattern = typeof excludedGrep === "string" ? excludedGrep : excludedGrep.source; + args.push(`--grep='${pattern}'`, "--invert-grep"); } - - /** - * Executes a Git command with given arguments. - * @param args Git command arguments. - * @param options Optional command options with a label. - * @returns {Promise} The standard output from the command. - */ - async exec( - args: string | string[], - options?: { label?: string; valueOnError?: string } - ): Promise { - const { valueOnError } = options || {} - - await this.configGlobalAddSafeDirectory() - - const opts: ShellOptions = { - ...(options || {}), - cwd: this._cwd, - env: { - LC_ALL: "en_US", - }, - } - const eargs = Array.isArray(args) ? args : shellParse(args) - dbg(`exec`, shellQuote(eargs)) - const res = await runtimeHost.exec(undefined, this.git, eargs, opts) - dbg(`exec: exit code ${res.exitCode}`) - if (res.stdout) dbg(res.stdout) - if (res.exitCode !== 0) { - dbg(`error: ${res.stderr}`) - if (valueOnError !== undefined) return valueOnError - throw new Error(res.stderr) - } - return res.stdout + if (!isNaN(count)) { + dbg(`limiting log to ${count} entries`); + args.push(`-n`, String(count)); } - - /** - * Finds modified files in the Git repository based on the specified scope. - * @param scope The scope of modifications to find: "modified-base", "staged", or "modified". Default is "modified". - * @param options Optional settings such as base branch, paths, and exclusions. - * @returns {Promise} List of modified files. - */ - async listFiles( - scope?: "modified-base" | "staged" | "modified", - options?: { - base?: string - paths?: ElementOrArray - excludedPaths?: ElementOrArray - askStageOnEmpty?: boolean - } - ): Promise { - dbg(`listing files with scope: ${scope}`) - scope = scope || "modified" - const { askStageOnEmpty } = options || {} - const paths = arrayify(options?.paths, { filterEmpty: true }) - const excludedPaths = await this.resolveExcludedPaths(options) - - let filenames: string[] - if (scope === "modified-base" || scope === "staged") { - dbg(`listing modified or staged files`) - const args = ["diff", "--name-only", "--diff-filter=AM"] - if (scope === "modified-base") { - const base = - options?.base || `origin/${await this.defaultBranch()}` - dbg(`using base branch: %s`, base) - args.push(base) - } else { - dbg(`listing staged files`) - args.push("--cached") - } - GitClient.addFileFilters(paths, excludedPaths, args) - const res = await this.exec(args, { - label: `git list modified files in ${scope}`, - }) - filenames = res.split("\n").filter((f) => f) - if (!filenames.length && scope == "staged" && askStageOnEmpty) { - dbg(`asking to stage all changes`) - // If no staged changes, optionally ask to stage all changes - const stage = await runtimeHost.confirm( - "No staged changes. Stage all changes?", - { - default: true, - } - ) - if (stage) { - dbg(`staging all changes`) - await this.exec(["add", "."]) - filenames = (await this.exec(args)) - .split("\n") - .filter((f) => f) - } - } - } else { - dbg(`listing modified files`) - // For "modified" scope, ignore deleted files - const rx = /^\s*(A|M|\?{1,2})\s+/gm - const args = ["status", "--porcelain"] - GitClient.addFileFilters(paths, excludedPaths, args) - dbg(`executing git status`) - const res = await this.exec(args, { - label: `git list modified files`, - }) - filenames = res - .split("\n") - .filter((f) => rx.test(f)) - .map((f) => f.replace(rx, "").trim()) - } - - const files = filenames.map((filename) => ({ filename })) - await resolveFileContents(files) - return files + if (base && head) { + dbg(`log range: ${base}..${head}`); + args.push(`${base}..${head}`); } - + GitClient.addFileFilters(paths, excludedPaths, args); + const res = await this.exec(args); + const commits = res + .split("\n") + .map( + (line) => + /^(?[a-z0-9]{6,40})\s+(?\d{4,4}-\d{2,2}-\d{2,2})\s+(?.*)$/.exec(line) + ?.groups, + ) + .filter((g) => !!g) + .map( + (g) => + { + sha: g?.sha, + date: g?.date, + message: g?.message, + }, + ); + return commits; + } + + /** + * Runs git blame in a file, line. + * @param filename + * @param line + * @returns + */ + async blame(filename: string, line: number): Promise { + const args = ["blame", filename, "-p", "-L", "-w", "--minimal", `${line},${line}`]; + const res = await this.exec(args); + // part git blame porcelain format + // The porcelain format includes the sha, line numbers, and original line + const match = /^(?[a-f0-9]{40})\s+.*$/m.exec(res); + return match?.groups?.sha || ""; + } + + /** + * Generates a diff of changes based on provided options. + * @param options Options such as staged flag, base, head, paths, and exclusions. + * @returns {Promise} The diff output. + */ + async diff(options?: { + staged?: boolean; + askStageOnEmpty?: boolean; + base?: string; + head?: string; + paths?: ElementOrArray; + excludedPaths?: ElementOrArray; + unified?: number; + nameOnly?: boolean; + llmify?: boolean; + algorithm?: "patience" | "minimal" | "histogram" | "myers"; + extras?: string[]; /** - * Adds file path filters to Git command arguments. - * @param paths Paths to include. - * @param excludedPaths Paths to exclude. - * @param args Git command arguments. + * Maximum of tokens before returning a name-only diff */ - private static addFileFilters( - paths: string[], - excludedPaths: string[], - args: string[] - ) { - if (paths.length > 0 || excludedPaths.length > 0) { - args.push("--") - if (!paths.length) { - args.push(".") - } else { - args.push(...paths) - } - args.push( - ...excludedPaths.map((p) => (p.startsWith(":!") ? p : ":!" + p)) - ) - } + maxTokensFullDiff?: number; + }): Promise { + const paths = arrayify(options?.paths, { filterEmpty: true }); + const excludedPaths = await this.resolveExcludedPaths(options); + const { + staged, + base, + head, + unified, + askStageOnEmpty, + nameOnly, + maxTokensFullDiff = GIT_DIFF_MAX_TOKENS, + llmify, + algorithm = "minimal", + extras, + } = options || {}; + const args = ["diff"]; + if (staged) { + dbg(`including staged changes`); + args.push("--staged"); } - - async lastTag(): Promise { - dbg(`fetching last tag`) - const res = await this.exec([ - "describe", - "--tags", - "--abbrev=0", - "HEAD^", - ]) - return res.split("\n")[0] + if (unified > 0) { + args.push("--ignore-all-space"); + args.push(`--unified=${unified}`); } - - async lastCommitSha(): Promise { - dbg(`fetching last commit`) - const res = await this.exec(["rev-parse", "HEAD"]) - return res.split("\n")[0] + if (nameOnly) { + args.push("--name-only"); } - - async log(options?: { - base?: string - head?: string - merges?: boolean - author?: string - until?: string - after?: string - count?: number - excludedGrep?: string | RegExp - paths?: ElementOrArray - excludedPaths?: ElementOrArray - }): Promise { - const { - base, - head, - merges, - excludedGrep, - count, - author, - until, - after, - } = options || {} - const paths = arrayify(options?.paths, { filterEmpty: true }) - const excludedPaths = await this.resolveExcludedPaths(options) - - dbg(`building git log command arguments`) - const args = ["log", "--pretty=format:%h %ad %s", "--date=short"] - if (!merges) { - args.push("--no-merges") - } - if (author) { - args.push(`--author`, author) - } - if (until) { - args.push("--until", until) - } - if (after) { - args.push("--after", after) - } - if (excludedGrep) { - dbg(`excluding grep pattern: ${excludedGrep}`) - const pattern = - typeof excludedGrep === "string" - ? excludedGrep - : excludedGrep.source - args.push(`--grep='${pattern}'`, "--invert-grep") - } - if (!isNaN(count)) { - dbg(`limiting log to ${count} entries`) - args.push(`-n`, String(count)) - } - if (base && head) { - dbg(`log range: ${base}..${head}`) - args.push(`${base}..${head}`) - } - GitClient.addFileFilters(paths, excludedPaths, args) - const res = await this.exec(args) - const commits = res - .split("\n") - .map( - (line) => - /^(?[a-z0-9]{6,40})\s+(?\d{4,4}-\d{2,2}-\d{2,2})\s+(?.*)$/.exec( - line - )?.groups - ) - .filter((g) => !!g) - .map( - (g) => - { - sha: g?.sha, - date: g?.date, - message: g?.message, - } - ) - return commits + if (algorithm) { + args.push(`--diff-algorithm=${algorithm}`); } - - /** - * Runs git blame in a file, line. - * @param filename - * @param line - * @returns - */ - async blame(filename: string, line: number): Promise { - const args = [ - "blame", - filename, - "-p", - "-L", - "-w", - "--minimal", - `${line},${line}`, - ] - const res = await this.exec(args) - // part git blame porcelain format - // The porcelain format includes the sha, line numbers, and original line - const match = /^(?[a-f0-9]{40})\s+.*$/m.exec(res) - return match?.groups?.sha || "" + if (extras?.length) { + args.push(...extras); } - - /** - * Generates a diff of changes based on provided options. - * @param options Options such as staged flag, base, head, paths, and exclusions. - * @returns {Promise} The diff output. - */ - async diff(options?: { - staged?: boolean - askStageOnEmpty?: boolean - base?: string - head?: string - paths?: ElementOrArray - excludedPaths?: ElementOrArray - unified?: number - nameOnly?: boolean - llmify?: boolean - algorithm?: "patience" | "minimal" | "histogram" | "myers" - extras?: string[] - /** - * Maximum of tokens before returning a name-only diff - */ - maxTokensFullDiff?: number - }): Promise { - const paths = arrayify(options?.paths, { filterEmpty: true }) - const excludedPaths = await this.resolveExcludedPaths(options) - const { - staged, - base, - head, - unified, - askStageOnEmpty, - nameOnly, - maxTokensFullDiff = GIT_DIFF_MAX_TOKENS, - llmify, - algorithm = "minimal", - extras, - } = options || {} - const args = ["diff"] - if (staged) { - dbg(`including staged changes`) - args.push("--staged") - } - if (unified > 0) { - args.push("--ignore-all-space") - args.push(`--unified=${unified}`) - } - if (nameOnly) { - args.push("--name-only") - } - if (algorithm) { - args.push(`--diff-algorithm=${algorithm}`) - } - if (extras?.length) { - args.push(...extras) - } - if (base && !head) { - dbg(`diff base: ${base}`) - args.push(base) - } else if (head && !base) { - dbg(`diff head: ${head}`) - args.push(`${head}^..${head}`) - } else if (base && head) { - dbg(`diff range: ${base}..${head}`) - args.push(`${base}..${head}`) - } - GitClient.addFileFilters(paths, excludedPaths, args) - let res = await this.exec(args) - dbg(`executing diff command`) - if (!res && staged && askStageOnEmpty) { - // If no staged changes, optionally ask to stage all changes - dbg(`asking to stage all changes`) - const stage = await runtimeHost.confirm( - "No staged changes. Stage all changes?", - { - default: true, - } - ) - if (stage) { - dbg(`staging all changes`) - await this.exec(["add", "."]) - res = await this.exec(args) - } - } - if (!nameOnly && llmify) { - dbg(`llmifying diff`) - res = llmifyDiff(res) - dbg(`encoding diff`) - const tokens = approximateTokens(res) - if (tokens > maxTokensFullDiff) { - dbg(`truncating diff due to token limit`) - res = `## Diff + if (base && !head) { + dbg(`diff base: ${base}`); + args.push(base); + } else if (head && !base) { + dbg(`diff head: ${head}`); + args.push(`${head}^..${head}`); + } else if (base && head) { + dbg(`diff range: ${base}..${head}`); + args.push(`${base}..${head}`); + } + GitClient.addFileFilters(paths, excludedPaths, args); + let res = await this.exec(args); + dbg(`executing diff command`); + if (!res && staged && askStageOnEmpty) { + // If no staged changes, optionally ask to stage all changes + dbg(`asking to stage all changes`); + const stage = await runtimeHost.confirm("No staged changes. Stage all changes?", { + default: true, + }); + if (stage) { + dbg(`staging all changes`); + await this.exec(["add", "."]); + res = await this.exec(args); + } + } + if (!nameOnly && llmify) { + dbg(`llmifying diff`); + res = llmifyDiff(res); + dbg(`encoding diff`); + const tokens = approximateTokens(res); + if (tokens > maxTokensFullDiff) { + dbg(`truncating diff due to token limit`); + res = `## Diff Truncated diff to large (${tokens} tokens). Diff files individually for details. ${ellipse(res, maxTokensFullDiff * 3)} @@ -528,104 +473,101 @@ ${ellipse(res, maxTokensFullDiff * 3)} ## Files ${await this.diff({ ...options, nameOnly: true })} -` - } - } - return res +`; + } } - - /** - * Create a shallow git clone - * @param repository URL of the remote repository - * @param options various clone options - */ - async shallowClone( - repository: string, - options?: { - /** - * branch to clone - */ - branch?: string - - /** - * Do not reuse previous clone - */ - force?: boolean - - /** - * Runs install command after cloning - */ - install?: boolean - - /** - * Number of commits to fetch - */ - depth?: number - /** - * Path to the directory to clone into - */ - directory?: string - } - ): Promise { - dbg(`cloning repository: ${repository}`) - let { branch, force, install, depth, directory, ...rest } = - options || {} - depth = normalizeInt(depth) - if (isNaN(depth)) depth = 1 - - // normalize short github url - // check if the repository is in the form of `owner/repo` - if (/^(\w|-)+\/(\w|-)+$/.test(repository)) { - repository = `https://github.com/${repository}` - } - const url = new URL(repository) - if (!directory) { - const sha = ( - await this.exec(["ls-remote", repository, branch || "HEAD"]) - ).split(/\s+/)[0] - directory = dotGenaiscriptPath( - "git", - ...url.pathname.split(/\//g).filter((s) => !!s), - branch || `HEAD`, - sha - ) - } - logVerbose(`git: shallow cloning ${repository} to ${directory}`) - if (await checkDirectoryExists(directory)) { - if (!force && !install) { - dbg(`directory already exists`) - return new GitClient(directory) - } - dbg(`removing existing directory`) - await rm(directory, { recursive: true, force: true }) - } - const args = ["clone", "--depth", String(Math.max(1, depth))] - if (branch) args.push("--branch", branch) - appendExtras(rest, args) - args.push(repository, directory) - await this.exec(args) - - if (install) { - dbg(`running install command after cloning`) - const { command, args } = await packageResolveInstall(directory) - if (command) { - const res = await runtimeHost.exec(undefined, command, args, { - cwd: directory, - }) - if (res.exitCode !== 0) { - throw new Error(res.stderr) - } - } + return res; + } + + /** + * Create a shallow git clone + * @param repository URL of the remote repository + * @param options various clone options + */ + async shallowClone( + repository: string, + options?: { + /** + * branch to clone + */ + branch?: string; + + /** + * Do not reuse previous clone + */ + force?: boolean; + + /** + * Runs install command after cloning + */ + install?: boolean; + + /** + * Number of commits to fetch + */ + depth?: number; + /** + * Path to the directory to clone into + */ + directory?: string; + }, + ): Promise { + dbg(`cloning repository: ${repository}`); + let { branch, force, install, depth, directory, ...rest } = options || {}; + depth = normalizeInt(depth); + if (isNaN(depth)) depth = 1; + + // normalize short github url + // check if the repository is in the form of `owner/repo` + if (/^(\w|-)+\/(\w|-)+$/.test(repository)) { + repository = `https://github.com/${repository}`; + } + const url = new URL(repository); + if (!directory) { + const sha = (await this.exec(["ls-remote", repository, branch || "HEAD"])).split(/\s+/)[0]; + directory = dotGenaiscriptPath( + "git", + ...url.pathname.split(/\//g).filter((s) => !!s), + branch || `HEAD`, + sha, + ); + } + logVerbose(`git: shallow cloning ${repository} to ${directory}`); + if (await checkDirectoryExists(directory)) { + if (!force && !install) { + dbg(`directory already exists`); + return new GitClient(directory); + } + dbg(`removing existing directory`); + await rm(directory, { recursive: true, force: true }); + } + const args = ["clone", "--depth", String(Math.max(1, depth))]; + if (branch) args.push("--branch", branch); + appendExtras(rest, args); + args.push(repository, directory); + await this.exec(args); + + if (install) { + dbg(`running install command after cloning`); + const { command, args } = await packageResolveInstall(directory); + if (command) { + const res = await runtimeHost.exec(undefined, command, args, { + cwd: directory, + }); + if (res.exitCode !== 0) { + throw new Error(res.stderr); } - - return new GitClient(directory) + } } - client(cwd: string) { - return new GitClient(cwd) - } + return new GitClient(directory); + } - toString() { - return `git ${this._cwd || ""}` - } + client(cwd: string) { + return new GitClient(cwd); + } + + toString() { + return `git ${this._cwd || ""}`; + } } diff --git a/packages/core/src/github.ts b/packages/core/src/github.ts index 514657b3dd..162c790d93 100644 --- a/packages/core/src/github.ts +++ b/packages/core/src/github.ts @@ -1,68 +1,63 @@ -import { MODEL_PROVIDER_GITHUB } from "./constants" -import { createFetch } from "./fetch" -import { LanguageModel, ListModelsFunction } from "./chat" -import { OpenAIChatCompletion, OpenAIEmbedder } from "./openai" -import { serializeError } from "./error" -import { genaiscriptDebug } from "./debug" -import { deleteUndefinedValues } from "./cleaners" -const dbg = genaiscriptDebug("github") +import { MODEL_PROVIDER_GITHUB } from "./constants"; +import { createFetch } from "./fetch"; +import { LanguageModel, ListModelsFunction } from "./chat"; +import { OpenAIChatCompletion, OpenAIEmbedder } from "./openai"; +import { serializeError } from "./error"; +import { genaiscriptDebug } from "./debug"; +import { deleteUndefinedValues } from "./cleaners"; +const dbg = genaiscriptDebug("github"); interface GitHubMarketplaceModel { - id: string - name: string - publisher: string - summary: string - rate_limit_tier: string - supported_input_modalities: ("text" | "image" | "audio")[] - supported_output_modalities: ("text" | "image" | "audio")[] - tags: string[] + id: string; + name: string; + publisher: string; + summary: string; + rate_limit_tier: string; + supported_input_modalities: ("text" | "image" | "audio")[]; + supported_output_modalities: ("text" | "image" | "audio")[]; + tags: string[]; } const listModels: ListModelsFunction = async (cfg, options) => { - const fetch = await createFetch({ retries: 0, ...options }) - try { - const modelsRes = await fetch( - "https://models.github.ai/catalog/models", - { - method: "GET", - headers: deleteUndefinedValues({ - Accept: "application/vnd.github+json", - Authorization: cfg.token - ? `Bearer ${cfg.token}` - : undefined, - "X-GitHub-Api-Version": "2022-11-28", - }), - } - ) - if (!modelsRes.ok) { - dbg(`failed to fetch models, status: ${modelsRes.status}`) - return { - ok: false, - status: modelsRes.status, - error: serializeError(modelsRes.statusText), - } - } - - const models = (await modelsRes.json()) as GitHubMarketplaceModel[] - return { - ok: true, - models: models.map( - (m) => - ({ - id: m.id, - details: `${m.name} - ${m.summary}`, - // url: `https://github.com/marketplace/models/${m.registryName}/${m.name}`, - }) satisfies LanguageModelInfo - ), - } - } catch (e) { - return { ok: false, error: serializeError(e) } + const fetch = await createFetch({ retries: 0, ...options }); + try { + const modelsRes = await fetch("https://models.github.ai/catalog/models", { + method: "GET", + headers: deleteUndefinedValues({ + Accept: "application/vnd.github+json", + Authorization: cfg.token ? `Bearer ${cfg.token}` : undefined, + "X-GitHub-Api-Version": "2022-11-28", + }), + }); + if (!modelsRes.ok) { + dbg(`failed to fetch models, status: ${modelsRes.status}`); + return { + ok: false, + status: modelsRes.status, + error: serializeError(modelsRes.statusText), + }; } -} + + const models = (await modelsRes.json()) as GitHubMarketplaceModel[]; + return { + ok: true, + models: models.map( + (m) => + ({ + id: m.id, + details: `${m.name} - ${m.summary}`, + // url: `https://github.com/marketplace/models/${m.registryName}/${m.name}`, + }) satisfies LanguageModelInfo, + ), + }; + } catch (e) { + return { ok: false, error: serializeError(e) }; + } +}; export const GitHubModel = Object.freeze({ - id: MODEL_PROVIDER_GITHUB, - completer: OpenAIChatCompletion, - listModels, - embedder: OpenAIEmbedder, -}) + id: MODEL_PROVIDER_GITHUB, + completer: OpenAIChatCompletion, + listModels, + embedder: OpenAIEmbedder, +}); diff --git a/packages/core/src/githubclient.test.ts b/packages/core/src/githubclient.test.ts index 2b4f8dd3a4..6a3aaa2427 100644 --- a/packages/core/src/githubclient.test.ts +++ b/packages/core/src/githubclient.test.ts @@ -1,145 +1,145 @@ -import { beforeEach, describe, test } from "node:test" -import assert from "node:assert/strict" -import { GitHubClient } from "./githubclient" -import { readFile } from "node:fs/promises" -import { fileURLToPath } from "node:url" -import { isCI } from "./ci" -import { TestHost } from "./testhost" -import { resolveBufferLike } from "./bufferlike" -import { tryResolveResource } from "./resources" +import { beforeEach, describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { GitHubClient } from "./githubclient"; +import { readFile } from "node:fs/promises"; +import { fileURLToPath } from "node:url"; +import { isCI } from "./ci"; +import { TestHost } from "./testhost"; +import { resolveBufferLike } from "./bufferlike"; +import { tryResolveResource } from "./resources"; describe("GitHubClient", async () => { - const client = GitHubClient.default() + const client = GitHubClient.default(); - beforeEach(() => { - TestHost.install() - }) + beforeEach(() => { + TestHost.install(); + }); - await test("info() returns GitHub options", async () => { - const info = await client.info() - assert(info.owner) - assert(info.repo) - }) + await test("info() returns GitHub options", async () => { + const info = await client.info(); + assert(info.owner); + assert(info.repo); + }); - await test("api() returns GitHub client", async () => { - const api = await client.api() - assert(api.client) - assert(api.owner) - assert(api.repo) - }) + await test("api() returns GitHub client", async () => { + const api = await client.api(); + assert(api.client); + assert(api.owner); + assert(api.repo); + }); - await test("listIssues()", async () => { - const issues = await client.listIssues({ count: 2 }) - assert(issues.length) - const issue = await client.getIssue(issues[0].number) - assert(issue?.number === issues[0].number) - assert(issue?.title) - }) + await test("listIssues()", async () => { + const issues = await client.listIssues({ count: 2 }); + assert(issues.length); + const issue = await client.getIssue(issues[0].number); + assert(issue?.number === issues[0].number); + assert(issue?.title); + }); - await test("listGists()", async () => { - const gists = await client.listGists({ count: 2 }) - assert(Array.isArray(gists)) - const gist = await client.getGist(gists[0].id) - assert(gist?.files) - }) + await test("listGists()", async () => { + const gists = await client.listGists({ count: 2 }); + assert(Array.isArray(gists)); + const gist = await client.getGist(gists[0].id); + assert(gist?.files); + }); - await test("listPullRequests()", async () => { - const prs = await client.listPullRequests({ count: 2 }) - assert(Array.isArray(prs)) - const pr = await client.getPullRequest(prs[0].number) - assert(pr?.number === prs[0].number) - }) - await test("listWorkflowRuns()", async () => { - if (isCI) return - const workflows = await client.listWorkflows({ count: 2 }) - assert(Array.isArray(workflows)) - const runs = await client.listWorkflowRuns(workflows[0].id) - assert(Array.isArray(runs)) - const jobs = await client.listWorkflowJobs(runs[0].id) - assert(Array.isArray(jobs)) - const log = await client.downloadWorkflowJobLog(jobs[0].id) - assert(typeof log === "string") - const artifacts = await client.listWorkflowRunArtifacts(runs[0].id) - assert(Array.isArray(artifacts)) - if (artifacts.length) { - const files = await client.downloadArtifactFiles(artifacts[0].id) - assert(files.length) - } - }) + await test("listPullRequests()", async () => { + const prs = await client.listPullRequests({ count: 2 }); + assert(Array.isArray(prs)); + const pr = await client.getPullRequest(prs[0].number); + assert(pr?.number === prs[0].number); + }); + await test("listWorkflowRuns()", async () => { + if (isCI) return; + const workflows = await client.listWorkflows({ count: 2 }); + assert(Array.isArray(workflows)); + const runs = await client.listWorkflowRuns(workflows[0].id); + assert(Array.isArray(runs)); + const jobs = await client.listWorkflowJobs(runs[0].id); + assert(Array.isArray(jobs)); + const log = await client.downloadWorkflowJobLog(jobs[0].id); + assert(typeof log === "string"); + const artifacts = await client.listWorkflowRunArtifacts(runs[0].id); + assert(Array.isArray(artifacts)); + if (artifacts.length) { + const files = await client.downloadArtifactFiles(artifacts[0].id); + assert(files.length); + } + }); - await test("getFile() returns file content", async () => { - const file = await client.getFile("README.md", "main") - assert(file?.content) - }) - await test("searchCode() returns search results", async () => { - if (isCI) return - const results = await client.searchCode("writeText") - assert(Array.isArray(results)) - }) + await test("getFile() returns file content", async () => { + const file = await client.getFile("README.md", "main"); + assert(file?.content); + }); + await test("searchCode() returns search results", async () => { + if (isCI) return; + const results = await client.searchCode("writeText"); + assert(Array.isArray(results)); + }); - await test("listBranches() returns array of branches", async () => { - const branches = await client.listBranches() - assert(Array.isArray(branches)) - }) + await test("listBranches() returns array of branches", async () => { + const branches = await client.listBranches(); + assert(Array.isArray(branches)); + }); - await test("listRepositoryLanguages() returns language stats", async () => { - const langs = await client.listRepositoryLanguages() - assert(typeof langs === "object") - }) + await test("listRepositoryLanguages() returns language stats", async () => { + const langs = await client.listRepositoryLanguages(); + assert(typeof langs === "object"); + }); - await test("getRepositoryContent() returns repository files", async () => { - const files = await client.getRepositoryContent("packages/core/src") - assert(Array.isArray(files)) - }) - await test("getOrCreateRef()", async () => { - const client = GitHubClient.default() - const existingRef = await client.getOrCreateRef("test-ignore", { - orphaned: true, - }) - assert(existingRef) - assert(existingRef.ref === "refs/heads/test-ignore") - }) - await test("uploadAsset()", async () => { - if (isCI) return - const buffer = await readFile(fileURLToPath(import.meta.url)) - const client = GitHubClient.default() - const url = await client.uploadAsset(buffer) - assert(url) - const parsedUrl = new URL(url) - assert(parsedUrl.host === "raw.githubusercontent.com") + await test("getRepositoryContent() returns repository files", async () => { + const files = await client.getRepositoryContent("packages/core/src"); + assert(Array.isArray(files)); + }); + await test("getOrCreateRef()", async () => { + const client = GitHubClient.default(); + const existingRef = await client.getOrCreateRef("test-ignore", { + orphaned: true, + }); + assert(existingRef); + assert(existingRef.ref === "refs/heads/test-ignore"); + }); + await test("uploadAsset()", async () => { + if (isCI) return; + const buffer = await readFile(fileURLToPath(import.meta.url)); + const client = GitHubClient.default(); + const url = await client.uploadAsset(buffer); + assert(url); + const parsedUrl = new URL(url); + assert(parsedUrl.host === "raw.githubusercontent.com"); - // Test with undefined buffer - const un = await client.uploadAsset(undefined) - assert(un === undefined) - }) - await test("resolveAssetUrl -image", async () => { - const resolved = await client.resolveAssetUrl( - "https://github.com/user-attachments/assets/a6e1935a-868e-4cca-9531-ad0ccdb9eace" - ) - assert(resolved) - assert(resolved.includes("githubusercontent.com")) - }) - await test("resolveAssetUrl - mp4", async () => { - const resolved = await client.resolveAssetUrl( - "https://github.com/user-attachments/assets/f7881bef-931d-4f76-8f63-b4d12b1f021e" - ) - console.log(resolved) - assert(resolved.includes("githubusercontent.com")) - }) + // Test with undefined buffer + const un = await client.uploadAsset(undefined); + assert(un === undefined); + }); + await test("resolveAssetUrl -image", async () => { + const resolved = await client.resolveAssetUrl( + "https://github.com/user-attachments/assets/a6e1935a-868e-4cca-9531-ad0ccdb9eace", + ); + assert(resolved); + assert(resolved.includes("githubusercontent.com")); + }); + await test("resolveAssetUrl - mp4", async () => { + const resolved = await client.resolveAssetUrl( + "https://github.com/user-attachments/assets/f7881bef-931d-4f76-8f63-b4d12b1f021e", + ); + console.log(resolved); + assert(resolved.includes("githubusercontent.com")); + }); - await test("resolveAssetUrl - image - indirect", async () => { - const resolved = await tryResolveResource( - "https://github.com/user-attachments/assets/a6e1935a-868e-4cca-9531-ad0ccdb9eace" - ) - assert(resolved.files[0].content) - assert.strictEqual(resolved.files[0].type, "image/jpeg") - }) - await test("listLabels() returns array of labels", async () => { - const labels = await client.listIssueLabels() - assert(Array.isArray(labels)) - assert(labels.length > 0) - assert(labels[0].name) - assert(labels[0].color) - assert(labels[0].description !== undefined) - }) -}) + await test("resolveAssetUrl - image - indirect", async () => { + const resolved = await tryResolveResource( + "https://github.com/user-attachments/assets/a6e1935a-868e-4cca-9531-ad0ccdb9eace", + ); + assert(resolved.files[0].content); + assert.strictEqual(resolved.files[0].type, "image/jpeg"); + }); + await test("listLabels() returns array of labels", async () => { + const labels = await client.listIssueLabels(); + assert(Array.isArray(labels)); + assert(labels.length > 0); + assert(labels[0].name); + assert(labels[0].color); + assert(labels[0].description !== undefined); + }); +}); diff --git a/packages/core/src/githubclient.ts b/packages/core/src/githubclient.ts index 8f5a0dc601..94356d3a91 100644 --- a/packages/core/src/githubclient.ts +++ b/packages/core/src/githubclient.ts @@ -1,140 +1,127 @@ -import type { Octokit } from "@octokit/rest" -import type { PaginateInterface } from "@octokit/plugin-paginate-rest" +import type { Octokit } from "@octokit/rest"; +import type { PaginateInterface } from "@octokit/plugin-paginate-rest"; import { - GITHUB_API_VERSION, - GITHUB_ASSET_BRANCH, - GITHUB_ASSET_URL_RX, - GITHUB_PULL_REQUEST_REVIEW_COMMENT_LINE_DISTANCE, - GITHUB_REST_API_CONCURRENCY_LIMIT, - GITHUB_REST_PAGE_DEFAULT, - GITHUB_TOKENS, - TOOL_ID, -} from "./constants" -import { createFetch } from "./fetch" -import { runtimeHost } from "./host" -import { prettifyMarkdown } from "./markdown" -import { arrayify, assert, logError, logVerbose } from "./util" -import { shellRemoveAsciiColors } from "./shell" -import { isGlobMatch } from "./glob" -import { concurrentLimit } from "./concurrency" -import { llmifyDiff } from "./llmdiff" -import { JSON5TryParse } from "./json5" -import { link } from "./mkmd" -import { errorMessage } from "./error" -import { deleteUndefinedValues, normalizeInt } from "./cleaners" -import { diffCreatePatch } from "./diff" -import { GitClient } from "./git" -import { genaiscriptDebug } from "./debug" -import { fetch } from "./fetch" -import { resolveBufferLike } from "./bufferlike" -import { fileTypeFromBuffer } from "./filetype" -import { createHash } from "node:crypto" -import { CancellationOptions, checkCancelled } from "./cancellation" -import { diagnosticToGitHubMarkdown } from "./annotations" -import { TraceOptions } from "./trace" -import { unzip } from "./zip" -import { uriRedact, uriTryParse } from "./url" -import { dedent } from "./indent" -import { tryReadJSON } from "./fs" -const dbg = genaiscriptDebug("github") + GITHUB_API_VERSION, + GITHUB_ASSET_BRANCH, + GITHUB_ASSET_URL_RX, + GITHUB_PULL_REQUEST_REVIEW_COMMENT_LINE_DISTANCE, + GITHUB_REST_API_CONCURRENCY_LIMIT, + GITHUB_REST_PAGE_DEFAULT, + GITHUB_TOKENS, + TOOL_ID, +} from "./constants"; +import { createFetch } from "./fetch"; +import { runtimeHost } from "./host"; +import { prettifyMarkdown } from "./markdown"; +import { arrayify, assert, logError, logVerbose } from "./util"; +import { shellRemoveAsciiColors } from "./shell"; +import { isGlobMatch } from "./glob"; +import { concurrentLimit } from "./concurrency"; +import { llmifyDiff } from "./llmdiff"; +import { JSON5TryParse } from "./json5"; +import { link } from "./mkmd"; +import { errorMessage } from "./error"; +import { deleteUndefinedValues, normalizeInt } from "./cleaners"; +import { diffCreatePatch } from "./diff"; +import { GitClient } from "./git"; +import { genaiscriptDebug } from "./debug"; +import { fetch } from "./fetch"; +import { resolveBufferLike } from "./bufferlike"; +import { fileTypeFromBuffer } from "./filetype"; +import { createHash } from "node:crypto"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { diagnosticToGitHubMarkdown } from "./annotations"; +import { TraceOptions } from "./trace"; +import { unzip } from "./zip"; +import { uriRedact, uriTryParse } from "./url"; +import { dedent } from "./indent"; +import { tryReadJSON } from "./fs"; +const dbg = genaiscriptDebug("github"); export interface GithubConnectionInfo { - token: string - apiUrl?: string - repository: string - owner: string - repo: string - ref?: string - refName?: string - sha?: string - issue?: number - runId?: string - runUrl?: string - commitSha?: string - eventName?: "push" | "pull_request" | "issue" | "issue_comment" - event?: unknown + token: string; + apiUrl?: string; + repository: string; + owner: string; + repo: string; + ref?: string; + refName?: string; + sha?: string; + issue?: number; + runId?: string; + runUrl?: string; + commitSha?: string; + eventName?: "push" | "pull_request" | "issue" | "issue_comment"; + event?: unknown; } function readGitHubToken(env: Record) { - let token: string - for (const envName of GITHUB_TOKENS) { - token = env[envName] - if (token) { - dbg(`found %s`, envName) - break - } - } - return token + let token: string; + for (const envName of GITHUB_TOKENS) { + token = env[envName]; + if (token) { + dbg(`found %s`, envName); + break; + } + } + return token; } -async function githubFromEnv( - env: Record -): Promise { - const token = readGitHubToken(env) - const apiUrl = env.GITHUB_API_URL || "https://api.github.com" - const repository = env.GITHUB_REPOSITORY - const [owner, repo] = repository?.split("/", 2) || [undefined, undefined] - const ref = env.GITHUB_REF - const refName = env.GITHUB_REF_NAME - const sha = env.GITHUB_SHA - const commitSha = env.GITHUB_COMMIT_SHA - const runId = env.GITHUB_RUN_ID - const serverUrl = env.GITHUB_SERVER_URL - const runUrl = - serverUrl && runId - ? `${serverUrl}/${repository}/actions/runs/${runId}` - : undefined - const eventName = env.GITHUB_EVENT_NAME as - | "push" - | "pull_request" - | "issue" - | "issue_comment" - const eventPath = env.GITHUB_EVENT_PATH - const event = eventPath ? await tryReadJSON(eventPath) : undefined - let issue = normalizeInt( - env.GITHUB_ISSUE ?? - env.INPUT_GITHUB_ISSUE ?? - /^refs\/pull\/(?\d+)\/merge$/.exec(ref || "")?.groups?.issue - ) - if (event && isNaN(issue)) { - dbg(`resolving issue/pull_request from event`) - issue = normalizeInt(event.issue?.number || event.pull_request?.number) - } - return deleteUndefinedValues({ - token, - apiUrl, - repository, - owner, - repo, - ref, - refName, - sha, - issue, - runId, - runUrl, - commitSha, - eventName, - event, - }) satisfies GithubConnectionInfo +async function githubFromEnv(env: Record): Promise { + const token = readGitHubToken(env); + const apiUrl = env.GITHUB_API_URL || "https://api.github.com"; + const repository = env.GITHUB_REPOSITORY; + const [owner, repo] = repository?.split("/", 2) || [undefined, undefined]; + const ref = env.GITHUB_REF; + const refName = env.GITHUB_REF_NAME; + const sha = env.GITHUB_SHA; + const commitSha = env.GITHUB_COMMIT_SHA; + const runId = env.GITHUB_RUN_ID; + const serverUrl = env.GITHUB_SERVER_URL; + const runUrl = + serverUrl && runId ? `${serverUrl}/${repository}/actions/runs/${runId}` : undefined; + const eventName = env.GITHUB_EVENT_NAME as "push" | "pull_request" | "issue" | "issue_comment"; + const eventPath = env.GITHUB_EVENT_PATH; + const event = eventPath ? await tryReadJSON(eventPath) : undefined; + let issue = normalizeInt( + env.GITHUB_ISSUE ?? + env.INPUT_GITHUB_ISSUE ?? + /^refs\/pull\/(?\d+)\/merge$/.exec(ref || "")?.groups?.issue, + ); + if (event && isNaN(issue)) { + dbg(`resolving issue/pull_request from event`); + issue = normalizeInt(event.issue?.number || event.pull_request?.number); + } + return deleteUndefinedValues({ + token, + apiUrl, + repository, + owner, + repo, + ref, + refName, + sha, + issue, + runId, + runUrl, + commitSha, + eventName, + event, + }) satisfies GithubConnectionInfo; } async function githubGetPullRequestNumber() { - const res = await runtimeHost.exec( - undefined, - "gh", - ["pr", "view", "--json", "number"], - { - label: "github: resolve current pull request number", - } - ) - if (res.failed) { - logVerbose(res.stderr) - return undefined - } - const resj = JSON5TryParse(res.stdout) as { number: number } - const id = resj?.number - logVerbose(`github: pull request number: ${isNaN(id) ? "not found" : id}`) - return id + const res = await runtimeHost.exec(undefined, "gh", ["pr", "view", "--json", "number"], { + label: "github: resolve current pull request number", + }); + if (res.failed) { + logVerbose(res.stderr); + return undefined; + } + const resj = JSON5TryParse(res.stdout) as { number: number }; + const id = resj?.number; + logVerbose(`github: pull request number: ${isNaN(id) ? "not found" : id}`); + return id; } /** @@ -154,85 +141,73 @@ async function githubGetPullRequestNumber() { * - Handles errors gracefully by logging verbose error messages but does not throw. */ export async function githubParseEnv( - env: Record, - options?: { - issue?: number - resolveToken?: boolean - resolveIssue?: boolean - resolveCommit?: boolean - } & Partial> & - TraceOptions & - CancellationOptions + env: Record, + options?: { + issue?: number; + resolveToken?: boolean; + resolveIssue?: boolean; + resolveCommit?: boolean; + } & Partial> & + TraceOptions & + CancellationOptions, ): Promise { - dbg(`resolving connection info`) - const res = await githubFromEnv(env) - dbg(`found %O`, Object.keys(res).join(",")) - try { - if (options?.owner && options?.repo) { - res.owner = options.owner - dbg(`overriding owner with options.owner: ${options.owner}`) - res.repo = options.repo - dbg(`overriding repo with options.repo: ${options.repo}`) - res.repository = res.owner + "/" + res.repo - } - if (!isNaN(options?.issue)) { - dbg(`overriding issue with options.issue: ${options.issue}`) - res.issue = options.issue - } - if (!res.owner || !res.repo || !res.repository) { - dbg( - `owner, repo, or repository missing, attempting to resolve via gh CLI` - ) - const repoInfo = await runtimeHost.exec( - undefined, - "gh", - ["repo", "view", "--json", "url,name,owner"], - options - ) - if (repoInfo.failed) { - dbg(repoInfo.stderr) - } else if (!repoInfo.failed) { - const { name: repo, owner } = JSON.parse(repoInfo.stdout) - dbg(`retrieved repository info via gh CLI: ${repoInfo.stdout}`) - res.repo = repo - res.owner = owner.login - res.repository = res.owner + "/" + res.repo - } - } - if (isNaN(res.issue) && options?.resolveIssue) { - dbg(`attempting to resolve issue number`) - res.issue = await githubGetPullRequestNumber() - } - if (!res.commitSha && options?.resolveCommit) { - res.commitSha = await GitClient.default().lastCommitSha() - } - if (!res.token && options?.resolveToken) { - const auth = await runtimeHost.exec( - undefined, - "gh", - ["auth", "token"], - options - ) - if (!auth.failed) { - dbg( - `retrieved token via gh CLI: %s...`, - auth.stdout.slice(0, 3) - ) - res.token = auth.stdout.trim() - } - } - } catch (e) { - dbg(errorMessage(e)) - } - - deleteUndefinedValues(res) - dbg( - `resolved connection info: %O`, - Object.fromEntries( - Object.entries(res).map(([k, v]) => [k, k === "token" ? "***" : v]) - ) - ) - return Object.freeze(res) + dbg(`resolving connection info`); + const res = await githubFromEnv(env); + dbg(`found %O`, Object.keys(res).join(",")); + try { + if (options?.owner && options?.repo) { + res.owner = options.owner; + dbg(`overriding owner with options.owner: ${options.owner}`); + res.repo = options.repo; + dbg(`overriding repo with options.repo: ${options.repo}`); + res.repository = res.owner + "/" + res.repo; + } + if (!isNaN(options?.issue)) { + dbg(`overriding issue with options.issue: ${options.issue}`); + res.issue = options.issue; + } + if (!res.owner || !res.repo || !res.repository) { + dbg(`owner, repo, or repository missing, attempting to resolve via gh CLI`); + const repoInfo = await runtimeHost.exec( + undefined, + "gh", + ["repo", "view", "--json", "url,name,owner"], + options, + ); + if (repoInfo.failed) { + dbg(repoInfo.stderr); + } else if (!repoInfo.failed) { + const { name: repo, owner } = JSON.parse(repoInfo.stdout); + dbg(`retrieved repository info via gh CLI: ${repoInfo.stdout}`); + res.repo = repo; + res.owner = owner.login; + res.repository = res.owner + "/" + res.repo; + } + } + if (isNaN(res.issue) && options?.resolveIssue) { + dbg(`attempting to resolve issue number`); + res.issue = await githubGetPullRequestNumber(); + } + if (!res.commitSha && options?.resolveCommit) { + res.commitSha = await GitClient.default().lastCommitSha(); + } + if (!res.token && options?.resolveToken) { + const auth = await runtimeHost.exec(undefined, "gh", ["auth", "token"], options); + if (!auth.failed) { + dbg(`retrieved token via gh CLI: %s...`, auth.stdout.slice(0, 3)); + res.token = auth.stdout.trim(); + } + } + } catch (e) { + dbg(errorMessage(e)); + } + + deleteUndefinedValues(res); + dbg( + `resolved connection info: %O`, + Object.fromEntries(Object.entries(res).map(([k, v]) => [k, k === "token" ? "***" : v])), + ); + return Object.freeze(res); } /** @@ -249,69 +224,67 @@ export async function githubParseEnv( * - If the issue number is missing, the update will not proceed. */ export async function githubUpdatePullRequestDescription( - script: PromptScript, - info: GithubConnectionInfo, - text: string, - commentTag: string, - options?: CancellationOptions + script: PromptScript, + info: GithubConnectionInfo, + text: string, + commentTag: string, + options?: CancellationOptions, ) { - const { cancellationToken } = options ?? {} - const { apiUrl, repository, issue, token } = info - assert(!!commentTag) - - if (!issue) { - dbg(`missing issue number, cannot update pull request description`) - return { updated: false, statusText: "missing issue number" } - } - if (!token) { - dbg(`missing github token, cannot update pull request description`) - return { updated: false, statusText: "missing github token" } - } - - text = prettifyMarkdown(text) - text += generatedByFooter(script, info) - - const fetch = await createFetch({ retryOn: [], cancellationToken }) - const url = `${apiUrl}/repos/${repository}/pulls/${issue}` - dbg(`fetching pull request details from URL: ${url}`) - // get current body - const resGet = await fetch(url, { - method: "GET", - headers: { - Accept: "application/vnd.github+json", - Authorization: `Bearer ${token}`, - "X-GitHub-Api-Version": GITHUB_API_VERSION, - }, - }) - const resGetJson = (await resGet.json()) as { - body: string - html_url: string - } - const body = mergeDescription(commentTag, resGetJson.body, text) - dbg(`merging pull request description`) - const res = await fetch(url, { - method: "PATCH", - headers: { - Accept: "application/vnd.github+json", - Authorization: `Bearer ${token}`, - "X-GitHub-Api-Version": GITHUB_API_VERSION, - }, - body: JSON.stringify({ body }), - }) - const r = { - updated: res.status === 200, - statusText: res.statusText, - } - - if (!r.updated) { - logError( - `pull request ${resGetJson.html_url} update failed, ${r.statusText}` - ) - } else { - logVerbose(`pull request ${resGetJson.html_url} updated`) - } - - return r + const { cancellationToken } = options ?? {}; + const { apiUrl, repository, issue, token } = info; + assert(!!commentTag); + + if (!issue) { + dbg(`missing issue number, cannot update pull request description`); + return { updated: false, statusText: "missing issue number" }; + } + if (!token) { + dbg(`missing github token, cannot update pull request description`); + return { updated: false, statusText: "missing github token" }; + } + + text = prettifyMarkdown(text); + text += generatedByFooter(script, info); + + const fetch = await createFetch({ retryOn: [], cancellationToken }); + const url = `${apiUrl}/repos/${repository}/pulls/${issue}`; + dbg(`fetching pull request details from URL: ${url}`); + // get current body + const resGet = await fetch(url, { + method: "GET", + headers: { + Accept: "application/vnd.github+json", + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": GITHUB_API_VERSION, + }, + }); + const resGetJson = (await resGet.json()) as { + body: string; + html_url: string; + }; + const body = mergeDescription(commentTag, resGetJson.body, text); + dbg(`merging pull request description`); + const res = await fetch(url, { + method: "PATCH", + headers: { + Accept: "application/vnd.github+json", + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": GITHUB_API_VERSION, + }, + body: JSON.stringify({ body }), + }); + const r = { + updated: res.status === 200, + statusText: res.statusText, + }; + + if (!r.updated) { + logError(`pull request ${resGetJson.html_url} update failed, ${r.statusText}`); + } else { + logVerbose(`pull request ${resGetJson.html_url} updated`); + } + + return r; } /** @@ -325,31 +298,21 @@ export async function githubUpdatePullRequestDescription( * @param text - The new content to merge into the body. * @returns Updated body text with merged and formatted content. */ -export function mergeDescription( - commentTag: string, - body: string, - text: string -) { - body = body ?? "" - const tag = `` - const endTag = `` - const sep = "\n\n" - - const start = body.indexOf(tag) - const end = body.indexOf(endTag) - const header = "
" - if (start > -1 && end > -1 && start < end) { - body = - body.slice(0, start + tag.length) + - header + - sep + - text + - sep + - body.slice(end) - } else { - body = body + sep + tag + header + sep + text + sep + endTag + sep - } - return body +export function mergeDescription(commentTag: string, body: string, text: string) { + body = body ?? ""; + const tag = ``; + const endTag = ``; + const sep = "\n\n"; + + const start = body.indexOf(tag); + const end = body.indexOf(endTag); + const header = "
"; + if (start > -1 && end > -1 && start < end) { + body = body.slice(0, start + tag.length) + header + sep + text + sep + body.slice(end); + } else { + body = body + sep + tag + header + sep + text + sep + endTag + sep; + } + return body; } /** @@ -361,12 +324,8 @@ export function mergeDescription( * @param code - Optional identifier code to be appended to the footer. * @returns A formatted string serving as a footer, warning readers about the AI-generated content. */ -export function generatedByFooter( - script: PromptScript, - info: { runUrl?: string }, - code?: string -) { - return `\n\n> AI-generated content by ${link(script.id, info.runUrl)}${code ? ` \`${code}\` ` : ""} may be incorrect. Use reactions to eval.\n\n` +export function generatedByFooter(script: PromptScript, info: { runUrl?: string }, code?: string) { + return `\n\n> AI-generated content by ${link(script.id, info.runUrl)}${code ? ` \`${code}\` ` : ""} may be incorrect. Use reactions to eval.\n\n`; } /** @@ -382,193 +341,177 @@ export function generatedByFooter( * @returns A formatted Markdown string representing the AI-generated comment with a footer and diagnostic details. */ export function appendGeneratedComment( - script: PromptScript, - info: { runUrl?: string; owner: string; repo: string }, - annotation: Diagnostic + script: PromptScript, + info: { runUrl?: string; owner: string; repo: string }, + annotation: Diagnostic, ) { - const { message, code, severity, suggestion } = annotation - const text = prettifyMarkdown(message) - return ` + const { message, code, severity, suggestion } = annotation; + const text = prettifyMarkdown(message); + return ` ${text}${suggestion ? `\n\n\`\`\`suggestion\n${suggestion}\n\`\`\`\n` : ""} -${generatedByFooter(script, info, code)}` +${generatedByFooter(script, info, code)}`; } // https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#create-an-issue-comment export async function githubCreateIssueComment( - script: PromptScript, - info: GithubConnectionInfo, - body: string, - commentTag: string, - options?: CancellationOptions + script: PromptScript, + info: GithubConnectionInfo, + body: string, + commentTag: string, + options?: CancellationOptions, ): Promise<{ created: boolean; statusText: string; html_url?: string }> { - const { cancellationToken } = options ?? {} - const { apiUrl, repository, issue, token } = info - - if (!issue) { - dbg(`missing issue number, cannot create issue comment`) - return { created: false, statusText: "missing issue number" } - } - if (!token) { - dbg(`missing github token, cannot create issue comment`) - return { created: false, statusText: "missing github token" } - } - - const fetch = await createFetch({ retryOn: [], cancellationToken }) - const url = `${apiUrl}/repos/${repository}/issues/${issue}/comments` - dbg(`creating issue comment at %s`, url) - - body = prettifyMarkdown(body) - body += generatedByFooter(script, info) - - dbg(`body:\n%s`, body) - - if (commentTag) { - const tag = `` - body = `${body}\n\n${tag}\n\n` - // try to find the existing comment - const resListComments = await fetch( - `${url}?per_page=100&sort=updated`, - { - headers: { - Accept: "application/vnd.github+json", - Authorization: `Bearer ${token}`, - "X-GitHub-Api-Version": GITHUB_API_VERSION, - }, - } - ) - if (resListComments.status !== 200) { - dbg(`failed to list existing comments`) - return { created: false, statusText: resListComments.statusText } - } - const comments = (await resListComments.json()) as { - id: string - body: string - }[] - dbg(`comments: %O`, comments) - const comment = comments.find((c) => c.body.includes(tag)) - if (comment) { - dbg(`found existing comment %s with tag, deleting it`, comment.id) - const delurl = `${apiUrl}/repos/${repository}/issues/comments/${comment.id}` - const resd = await fetch(delurl, { - method: "DELETE", - headers: { - Authorization: `Bearer ${token}`, - "X-GitHub-Api-Version": GITHUB_API_VERSION, - }, - }) - if (!resd.ok) { - logError(`issue comment delete failed, ` + resd.statusText) - } - } + const { cancellationToken } = options ?? {}; + const { apiUrl, repository, issue, token } = info; + + if (!issue) { + dbg(`missing issue number, cannot create issue comment`); + return { created: false, statusText: "missing issue number" }; + } + if (!token) { + dbg(`missing github token, cannot create issue comment`); + return { created: false, statusText: "missing github token" }; + } + + const fetch = await createFetch({ retryOn: [], cancellationToken }); + const url = `${apiUrl}/repos/${repository}/issues/${issue}/comments`; + dbg(`creating issue comment at %s`, url); + + body = prettifyMarkdown(body); + body += generatedByFooter(script, info); + + dbg(`body:\n%s`, body); + + if (commentTag) { + const tag = ``; + body = `${body}\n\n${tag}\n\n`; + // try to find the existing comment + const resListComments = await fetch(`${url}?per_page=100&sort=updated`, { + headers: { + Accept: "application/vnd.github+json", + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": GITHUB_API_VERSION, + }, + }); + if (resListComments.status !== 200) { + dbg(`failed to list existing comments`); + return { created: false, statusText: resListComments.statusText }; } - - const res = await fetch(url, { - method: "POST", + const comments = (await resListComments.json()) as { + id: string; + body: string; + }[]; + dbg(`comments: %O`, comments); + const comment = comments.find((c) => c.body.includes(tag)); + if (comment) { + dbg(`found existing comment %s with tag, deleting it`, comment.id); + const delurl = `${apiUrl}/repos/${repository}/issues/comments/${comment.id}`; + const resd = await fetch(delurl, { + method: "DELETE", headers: { - Accept: "application/vnd.github+json", - Authorization: `Bearer ${token}`, - "X-GitHub-Api-Version": GITHUB_API_VERSION, + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": GITHUB_API_VERSION, }, - body: JSON.stringify({ body }), - }) - const resp: { id: string; html_url: string } = await res.json() - const r = { - created: res.status === 201, - statusText: res.statusText, - html_url: resp.html_url, - } - if (!r.created) { - logError( - `pull request ${issue} comment creation failed, ${r.statusText} (${res.status})` - ) - dbg(JSON.stringify(resp, null, 2)) - } else { - logVerbose(`pull request ${issue} comment created at ${r.html_url}`) - } - - return r + }); + if (!resd.ok) { + logError(`issue comment delete failed, ` + resd.statusText); + } + } + } + + const res = await fetch(url, { + method: "POST", + headers: { + Accept: "application/vnd.github+json", + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": GITHUB_API_VERSION, + }, + body: JSON.stringify({ body }), + }); + const resp: { id: string; html_url: string } = await res.json(); + const r = { + created: res.status === 201, + statusText: res.statusText, + html_url: resp.html_url, + }; + if (!r.created) { + logError(`pull request ${issue} comment creation failed, ${r.statusText} (${res.status})`); + dbg(JSON.stringify(resp, null, 2)); + } else { + logVerbose(`pull request ${issue} comment created at ${r.html_url}`); + } + + return r; } async function githubCreatePullRequestReview( - script: PromptScript, - info: Pick< - GithubConnectionInfo, - | "apiUrl" - | "repository" - | "issue" - | "runUrl" - | "commitSha" - | "owner" - | "repo" - >, - token: string, - annotation: Diagnostic, - existingComments: { - id: string - path: string - line: number - body: string - }[], - options?: CancellationOptions + script: PromptScript, + info: Pick< + GithubConnectionInfo, + "apiUrl" | "repository" | "issue" | "runUrl" | "commitSha" | "owner" | "repo" + >, + token: string, + annotation: Diagnostic, + existingComments: { + id: string; + path: string; + line: number; + body: string; + }[], + options?: CancellationOptions, ) { - assert(!!token) - const { cancellationToken } = options ?? {} - const { apiUrl, repository, issue, commitSha } = info - dbg(`creating pull request review comment`) - - const prettyMessage = prettifyMarkdown(annotation.message) - const line = annotation.range?.[1]?.[0] + 1 - const body = { - body: appendGeneratedComment(script, info, annotation), - commit_id: commitSha, - path: annotation.filename, - line: normalizeInt(line), - side: "RIGHT", - } - if ( - existingComments.find( - (c) => - c.path === body.path && - Math.abs(c.line - body.line) < - GITHUB_PULL_REQUEST_REVIEW_COMMENT_LINE_DISTANCE && - (annotation.code - ? c.body?.includes(annotation.code) - : c.body?.includes(prettyMessage)) - ) - ) { - logVerbose( - `pull request ${commitSha} comment creation already exists, skipping` - ) - return { created: false, statusText: "comment already exists" } - } - const fetch = await createFetch({ retryOn: [], cancellationToken }) - const url = `${apiUrl}/repos/${repository}/pulls/${issue}/comments` - dbg(`posting new pull request review comment at URL: ${url}`) - dbg(`%O`, body) - const res = await fetch(url, { - method: "POST", - headers: { - Accept: "application/vnd.github+json", - Authorization: `Bearer ${token}`, - "X-GitHub-Api-Version": GITHUB_API_VERSION, - }, - body: JSON.stringify(body), - }) - const resp: { id: string; html_url: string } = await res.json() - const r = { - created: res.status === 201, - statusText: res.statusText, - html_url: resp.html_url, - } - if (!r.created) { - logVerbose( - `pull request ${commitSha} comment creation failed, ${r.statusText} (${res.status})` - ) - dbg("prr comment creation failed %O", resp) - } else { - logVerbose(`pull request ${commitSha} comment created at ${r.html_url}`) - } - return r + assert(!!token); + const { cancellationToken } = options ?? {}; + const { apiUrl, repository, issue, commitSha } = info; + dbg(`creating pull request review comment`); + + const prettyMessage = prettifyMarkdown(annotation.message); + const line = annotation.range?.[1]?.[0] + 1; + const body = { + body: appendGeneratedComment(script, info, annotation), + commit_id: commitSha, + path: annotation.filename, + line: normalizeInt(line), + side: "RIGHT", + }; + if ( + existingComments.find( + (c) => + c.path === body.path && + Math.abs(c.line - body.line) < GITHUB_PULL_REQUEST_REVIEW_COMMENT_LINE_DISTANCE && + (annotation.code ? c.body?.includes(annotation.code) : c.body?.includes(prettyMessage)), + ) + ) { + logVerbose(`pull request ${commitSha} comment creation already exists, skipping`); + return { created: false, statusText: "comment already exists" }; + } + const fetch = await createFetch({ retryOn: [], cancellationToken }); + const url = `${apiUrl}/repos/${repository}/pulls/${issue}/comments`; + dbg(`posting new pull request review comment at URL: ${url}`); + dbg(`%O`, body); + const res = await fetch(url, { + method: "POST", + headers: { + Accept: "application/vnd.github+json", + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": GITHUB_API_VERSION, + }, + body: JSON.stringify(body), + }); + const resp: { id: string; html_url: string } = await res.json(); + const r = { + created: res.status === 201, + statusText: res.statusText, + html_url: resp.html_url, + }; + if (!r.created) { + logVerbose( + `pull request ${commitSha} comment creation failed, ${r.statusText} (${res.status})`, + ); + dbg("prr comment creation failed %O", resp); + } else { + logVerbose(`pull request ${commitSha} comment created at ${r.html_url}`); + } + return r; } /** @@ -586,1186 +529,1084 @@ async function githubCreatePullRequestReview( * - Fetches existing pull request comments to avoid duplication when creating review comments. */ export async function githubCreatePullRequestReviews( - script: PromptScript, - info: GithubConnectionInfo, - annotations: Diagnostic[], - options?: CancellationOptions + script: PromptScript, + info: GithubConnectionInfo, + annotations: Diagnostic[], + options?: CancellationOptions, ): Promise { - const { cancellationToken } = options ?? {} - const { repository, issue, commitSha, apiUrl, token } = info - - if (!annotations?.length) { - dbg(`no annotations provided, skipping pull request reviews`) - return true - } - if (!issue) { - dbg(`missing issue number, cannot create pull request reviews`) - return false - } - if (!commitSha) { - dbg(`missing commit sha, cannot create pull request reviews`) - return false - } - if (!token) { - dbg(`missing github token, cannot create pull request reviews`) - return false - } - - // query existing reviews - const fetch = await createFetch({ retryOn: [], cancellationToken }) - const url = `${apiUrl}/repos/${repository}/pulls/${issue}/comments` - dbg(`fetching existing pull request comments from URL: ${url}`) - const resListComments = await fetch(`${url}?per_page=100&sort=updated`, { - headers: { - Accept: "application/vnd.github+json", - Authorization: `Bearer ${token}`, - "X-GitHub-Api-Version": GITHUB_API_VERSION, - }, - }) - checkCancelled(cancellationToken) - if (resListComments.status !== 200) { - dbg(`failed to fetch existing pull request comments`) - return false - } - const comments = (await resListComments.json()) as { - id: string - path: string - line: number - body: string - }[] - dbg(`existing pull request comments: %O`, comments) - // code annotations - const failed: Diagnostic[] = [] - for (const annotation of annotations) { - dbg(`iterating over annotations to create pull request reviews`) - checkCancelled(cancellationToken) - const res = await githubCreatePullRequestReview( - script, - info, - token, - annotation, - comments - ) - if (!res.created) failed.push(annotation) - } - - if (failed.length) { - await githubCreateIssueComment( - script, - info, - failed.map((d) => diagnosticToGitHubMarkdown(info, d)).join("\n\n"), - script.id + "-prr", - options - ) - } - - return true + const { cancellationToken } = options ?? {}; + const { repository, issue, commitSha, apiUrl, token } = info; + + if (!annotations?.length) { + dbg(`no annotations provided, skipping pull request reviews`); + return true; + } + if (!issue) { + dbg(`missing issue number, cannot create pull request reviews`); + return false; + } + if (!commitSha) { + dbg(`missing commit sha, cannot create pull request reviews`); + return false; + } + if (!token) { + dbg(`missing github token, cannot create pull request reviews`); + return false; + } + + // query existing reviews + const fetch = await createFetch({ retryOn: [], cancellationToken }); + const url = `${apiUrl}/repos/${repository}/pulls/${issue}/comments`; + dbg(`fetching existing pull request comments from URL: ${url}`); + const resListComments = await fetch(`${url}?per_page=100&sort=updated`, { + headers: { + Accept: "application/vnd.github+json", + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": GITHUB_API_VERSION, + }, + }); + checkCancelled(cancellationToken); + if (resListComments.status !== 200) { + dbg(`failed to fetch existing pull request comments`); + return false; + } + const comments = (await resListComments.json()) as { + id: string; + path: string; + line: number; + body: string; + }[]; + dbg(`existing pull request comments: %O`, comments); + // code annotations + const failed: Diagnostic[] = []; + for (const annotation of annotations) { + dbg(`iterating over annotations to create pull request reviews`); + checkCancelled(cancellationToken); + const res = await githubCreatePullRequestReview(script, info, token, annotation, comments); + if (!res.created) failed.push(annotation); + } + + if (failed.length) { + await githubCreateIssueComment( + script, + info, + failed.map((d) => diagnosticToGitHubMarkdown(info, d)).join("\n\n"), + script.id + "-prr", + options, + ); + } + + return true; } async function paginatorToArray( - iterator: AsyncIterable, - count: number, - iteratorItem: (item: T) => R[], - elementFilter?: (item: R) => boolean + iterator: AsyncIterable, + count: number, + iteratorItem: (item: T) => R[], + elementFilter?: (item: R) => boolean, ): Promise { - const result: R[] = [] - for await (const item of await iterator) { - let r = iteratorItem(item) - if (elementFilter) { - r = r.filter(elementFilter) - } - result.push(...r) - if (result.length >= count) { - break - } - } - return result.slice(0, count) + const result: R[] = []; + for await (const item of await iterator) { + let r = iteratorItem(item); + if (elementFilter) { + r = r.filter(elementFilter); + } + result.push(...r); + if (result.length >= count) { + break; + } + } + return result.slice(0, count); } export class GitHubClient implements GitHub { - private readonly _info: Pick - private _connection: Promise - private _client: Promise< - | ({ - client: Octokit & { - paginate: PaginateInterface + private readonly _info: Pick; + private _connection: Promise; + private _client: Promise< + | ({ + client: Octokit & { + paginate: PaginateInterface; + }; + } & GithubConnectionInfo) + | undefined + >; + + private static _default: GitHubClient; + static default() { + if (!this._default) this._default = new GitHubClient(undefined); + return this._default; + } + + constructor(info: Pick) { + this._info = info; + } + + private connection(): Promise { + if (!this._connection) { + this._connection = githubParseEnv(process.env, { + ...this._info, + resolveToken: true, + }); + } + return this._connection; + } + + client(owner: string, repo: string) { + return new GitHubClient({ owner, repo }); + } + + async api() { + if (!this._client) { + this._client = new Promise(async (resolve) => { + const conn = await this.connection(); + const { token, apiUrl } = conn; + const { Octokit } = await import("@octokit/rest"); + const { throttling } = await import("@octokit/plugin-throttling"); + const { paginateRest } = await import("@octokit/plugin-paginate-rest"); + //const { retry } = await import("@octokit/plugin-retry") + const OctokitWithPlugins = Octokit.plugin(paginateRest).plugin(throttling); + // .plugin(retry) + const res = new OctokitWithPlugins({ + userAgent: TOOL_ID, + auth: token, + baseUrl: apiUrl, + request: { retries: 3 }, + throttle: { + onRateLimit: ( + retryAfter: number, + options: any, + octokit: Octokit, + retryCount: number, + ) => { + octokit.log.warn( + `Request quota exhausted for request ${options.method} ${options.url}`, + ); + if (retryCount < 1) { + // only retries once + octokit.log.info(`Retrying after ${retryAfter} seconds!`); + return true; } - } & GithubConnectionInfo) - | undefined - > - - private static _default: GitHubClient - static default() { - if (!this._default) this._default = new GitHubClient(undefined) - return this._default - } - - constructor(info: Pick) { - this._info = info - } - - private connection(): Promise { - if (!this._connection) { - this._connection = githubParseEnv(process.env, { - ...this._info, - resolveToken: true, - }) - } - return this._connection - } - - client(owner: string, repo: string) { - return new GitHubClient({ owner, repo }) - } - - async api() { - if (!this._client) { - this._client = new Promise(async (resolve) => { - const conn = await this.connection() - const { token, apiUrl } = conn - const { Octokit } = await import("@octokit/rest") - const { throttling } = await import( - "@octokit/plugin-throttling" - ) - const { paginateRest } = await import( - "@octokit/plugin-paginate-rest" - ) - //const { retry } = await import("@octokit/plugin-retry") - const OctokitWithPlugins = - Octokit.plugin(paginateRest).plugin(throttling) - // .plugin(retry) - const res = new OctokitWithPlugins({ - userAgent: TOOL_ID, - auth: token, - baseUrl: apiUrl, - request: { retries: 3 }, - throttle: { - onRateLimit: ( - retryAfter: number, - options: any, - octokit: Octokit, - retryCount: number - ) => { - octokit.log.warn( - `Request quota exhausted for request ${options.method} ${options.url}` - ) - if (retryCount < 1) { - // only retries once - octokit.log.info( - `Retrying after ${retryAfter} seconds!` - ) - return true - } - return false - }, - onSecondaryRateLimit: ( - retryAfter: number, - options: any, - octokit: Octokit - ) => { - octokit.log.warn( - `SecondaryRateLimit detected for request ${options.method} ${options.url}` - ) - }, - }, - }) - resolve({ - client: res, - ...conn, - }) - }) - } - return this._client - } - - async info(): Promise { - const { - apiUrl: baseUrl, - token: auth, - repo, - owner, - ref, - refName, - issue, - runId, - runUrl, - event, - eventName, - } = await this.connection() - return Object.freeze( - deleteUndefinedValues({ - baseUrl, - repo, - owner, - auth, - ref, - refName, - runId, - runUrl, - issueNumber: issue, - eventName, - event, - }) - ) - } - - async repo(): Promise<{ - name: string - full_name: string - default_branch: string - }> { - const { client, owner, repo } = await this.api() - const res = await client.rest.repos.get({ owner, repo }) - return res.data - } - - async getRef(branchName: string): Promise { - const { client, owner, repo } = await this.api() - try { - dbg(`get ref %s`, branchName) - const existing = await client.git.getRef({ - owner, - repo, - ref: `heads/${branchName}`, - }) - return existing.data - } catch (e) { - dbg(`ref not found`) - return undefined - } - } - - async getOrCreateRef( - branchName: string, - options?: { base?: string; orphaned?: boolean | string } - ): Promise { - const { client, owner, repo } = await this.api() - const { base, orphaned } = options ?? {} - if (!branchName) throw new Error("branchName is required") - - dbg(`checking if branch %s exists`, branchName) - const existing = await this.getRef(branchName) - if (existing) { - dbg(`branch %s already exists`, branchName) - return existing - } - - let sha: string - dbg(`creating branch %s`, branchName) - if (orphaned) { - dbg(`creating orphaned`) - // Step 0: Create a blob for the file content - const { data: blob } = await client.git.createBlob({ - owner, - repo, - content: Buffer.from( - typeof orphaned === orphaned - ? orphaned - : `Orphaned branch created by GenAIScript.` - ).toString("base64"), - encoding: "base64", - }) - - // Step 1: Create an empty tree - const { data: tree } = await client.git.createTree({ - owner, - repo, - tree: [ - { - path: "README.md", - mode: "100644", - type: "blob", - sha: blob.sha, - }, - ], - }) - dbg(`created tree %s`, tree.sha) - // Step 2: Create a commit with NO parents - const { data: commit } = await client.git.createCommit({ - owner, - repo, - message: "Initial commit on orphan branch", - tree: tree.sha, - parents: [], // <--- empty parent list = no history - }) - sha = commit.sha - dbg(`created commit %s`, commit.sha) - } else { - if (!base) { - dbg(`base is required for non-orphaned branch`) - const repo = await this.repo() - sha = repo.default_branch - } else sha = base - } - - // Step 3: Create a reference (branch) pointing to the commit - dbg(`creating reference %s <- %s`, branchName, sha) - const res = await client.git.createRef({ - owner, - repo, - ref: `refs/heads/${branchName}`, - sha, - }) - return res.data - } - - async uploadAsset( - file: BufferLike, - options?: { branchName?: string } - ): Promise { - const { branchName = GITHUB_ASSET_BRANCH } = options ?? {} - const { client, owner, repo } = await this.api() - if (!file) { - dbg(`no buffer provided, nothing to upload`) - return undefined - } - const buffer = await resolveBufferLike(file) - if (!buffer) { - dbg(`failed to resolve buffer, nothing to upload`) - return undefined - } - const base64Content = buffer.toString("base64") - const fileType = await fileTypeFromBuffer(buffer) - const hash = createHash("sha256") - hash.write(base64Content) - const hashId = hash.digest().toString("hex") - const uploadPath = hashId + (fileType ? `.${fileType.ext}` : ".txt") - const rawUrl = `https://raw.githubusercontent.com/${owner}/${repo}/refs/heads/${branchName}/${uploadPath}` - - // try to get file - dbg(`checking %s`, rawUrl) - const cached = await fetch(rawUrl, { method: "HEAD" }) - if (cached.status === 200) { - dbg(`asset already exists, skip upload`) - return rawUrl - } - - dbg(`uploading asset %s to branch %s`, uploadPath, branchName) - await this.getOrCreateRef(branchName, { orphaned: true }) - const { data: blob } = await client.git.createBlob({ - owner, - repo, - content: base64Content, - encoding: "base64", - }) - dbg(`created blob %s`, blob.sha) - - // 3. Get the latest commit (HEAD) of the branch - const { data: refData } = await client.git.getRef({ - owner, - repo, - ref: `heads/${branchName}`, - }) - const latestCommitSha = refData.object.sha - dbg(`head ref %s: %s`, refData.ref, latestCommitSha) - - // 4. Get the tree of the latest commit - const { data: commitData } = await client.git.getCommit({ - owner, - repo, - commit_sha: latestCommitSha, - }) - const baseTreeSha = commitData.tree.sha - dbg(`base tree sha %s`, baseTreeSha) - - // 5. Create a new tree adding the image - const { data: newTree } = await client.git.createTree({ - owner, - repo, - base_tree: baseTreeSha, - tree: [ - { - path: uploadPath, - mode: "100644", - type: "blob", - sha: blob.sha, - }, - ], - }) - - dbg("tree created %s", newTree.sha) - - // 6. Create a new commit with the new tree - const { data: newCommit } = await client.git.createCommit({ - owner, - repo, - message: `Upload asset ${uploadPath}`, - tree: newTree.sha, - parents: [latestCommitSha], - }) - dbg("commit created %s", newCommit.sha) - - // 7. Update the branch to point to the new commit - await client.git.updateRef({ - owner, - repo, - ref: `heads/${branchName}`, - sha: newCommit.sha, - force: false, // do not force push - }) - - return rawUrl - } - - async listIssues( - options?: { - state?: "open" | "closed" | "all" - labels?: string - sort?: "created" | "updated" | "comments" - direction?: "asc" | "desc" - creator?: string - assignee?: string - since?: string - mentioned?: string - } & GitHubPaginationOptions - ): Promise { - const { client, owner, repo } = await this.api() - dbg(`listing issues for repository`) - const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {} - const ite = client.paginate.iterator(client.rest.issues.listForRepo, { - owner, - repo, - ...rest, - }) - const res = await paginatorToArray(ite, count, (i) => i.data) - return res - } - - async listGists( - options?: { - since?: string - filenameAsResources?: boolean - } & GitHubPaginationOptions - ): Promise { - const { client } = await this.api() - dbg(`listing gists for user`) - const { - count = GITHUB_REST_PAGE_DEFAULT, - filenameAsResources, - ...rest - } = options ?? {} - const ite = client.paginate.iterator(client.rest.gists.list, { - ...rest, - }) - const res = await paginatorToArray(ite, count, (i) => i.data) - return res.map( - (r) => - ({ - id: r.id, - description: r.description, - created_at: r.created_at, - files: Object.values(r.files).map( - ({ filename, size }) => - ({ - filename: filenameAsResources - ? `gist://${r.id}/${filename}` - : filename, - size, - }) satisfies WorkspaceFile - ), - }) satisfies GitHubGist - ) - } - - async getGist(gist_id?: string): Promise { - if (typeof gist_id === "string") { - gist_id = gist_id.trim() - } - const { client, owner } = await this.api() - dbg(`retrieving gist details for gist ID: ${gist_id}`) - if (!gist_id) { - return undefined - } - const { data } = await client.rest.gists.get({ - gist_id, - owner, - }) - const { files, id, description, created_at, ...rest } = data - if ( - Object.values(files || {}).some( - (f) => f.encoding !== "utf-8" && f.encoding != "base64" - ) - ) { - dbg(`unsupported encoding for gist files`) - return undefined - } - const res = { - id, - description, - created_at, - files: Object.values(files).map( - ({ filename, content, size, encoding }) => - deleteUndefinedValues({ - filename, - content, - encoding: - encoding === "utf-8" - ? undefined - : encoding === "base64" - ? "base64" - : undefined, - size, - }) satisfies WorkspaceFile - ), - } satisfies GitHubGist - - dbg(`gist: %d files, %s`, res.files.length, res.description || "") - return res - } - - async getIssue(issue_number?: number | string): Promise { - issue_number = normalizeInt(issue_number) - const { client, owner, repo } = await this.api() - dbg(`retrieving issue details for issue number: ${issue_number}`) - if (isNaN(issue_number)) { - issue_number = (await this._connection).issue - } - if (isNaN(issue_number)) { - return undefined - } - const { data } = await client.rest.issues.get({ - owner, - repo, - issue_number, - }) - return data - } - - async updateIssue( - issueNumber: number | string, - options?: GitHubIssueUpdateOptions - ): Promise { - issueNumber = normalizeInt(issueNumber) - const { client, owner, repo } = await this.api() - dbg(`updating issue number: ${issueNumber}`) - if (isNaN(issueNumber)) { - issueNumber = (await this._connection).issue - } - if (isNaN(issueNumber)) { - return undefined - } - const { data } = await client.rest.issues.update({ - owner, - repo, - issue_number: issueNumber, - ...options, - }) - return data - } - - async createIssueComment( - issue_number: number | string, - body: string - ): Promise { - issue_number = normalizeInt(issue_number) - const { client, owner, repo } = await this.api() - dbg(`creating comment for issue number: ${issue_number}`) - if (isNaN(issue_number)) { - issue_number = (await this._connection).issue - } - if (isNaN(issue_number)) { - return undefined - } - const { data } = await client.rest.issues.createComment({ - owner, - repo, - issue_number, - body: prettifyMarkdown(dedent(body)), - }) - dbg(`created comment %s`, data.id) - return data - } - - async updateIssueComment(comment_id: number | string, body: string) { - const { client, owner, repo } = await this.api() - dbg(`updating comment %s`, comment_id) - const { data } = await client.rest.issues.updateComment({ - owner, - repo, - comment_id: normalizeInt(comment_id), - body: prettifyMarkdown(dedent(body)), - }) - dbg(`updated comment %s`, data.id) - return data - } - - async listPullRequests( - options?: { - state?: "open" | "closed" | "all" - sort?: "created" | "updated" | "popularity" | "long-running" - direction?: "asc" | "desc" - } & GitHubPaginationOptions - ): Promise { - const { client, owner, repo } = await this.api() - dbg(`listing pull requests for repository`) - const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {} - const ite = client.paginate.iterator(client.rest.pulls.list, { - owner, - repo, - ...rest, - }) - const res = await paginatorToArray(ite, count, (i) => i.data) - return res - } - - async getPullRequest( - pull_number?: number | string - ): Promise { - pull_number = normalizeInt(pull_number) - const { client, owner, repo } = await this.api() - dbg(`retrieving pull request details for pull number: ${pull_number}`) - if (isNaN(pull_number)) { - pull_number = (await this._connection).issue - } - if (isNaN(pull_number)) { - return undefined - } - - const { data } = await client.rest.pulls.get({ - owner, - repo, - pull_number, - }) - return data - } - - async listPullRequestReviewComments( - pull_number: number, - options?: GitHubPaginationOptions - ): Promise { - const { client, owner, repo } = await this.api() - dbg(`listing review comments for pull request number: ${pull_number}`) - const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {} - const ite = client.paginate.iterator( - client.rest.pulls.listReviewComments, - { - owner, - repo, - pull_number, - ...rest, - } - ) - const res = await paginatorToArray(ite, count, (i) => i.data) - return res - } - - async listIssueComments( - issue_number: number, - options?: { reactions?: boolean } & GitHubPaginationOptions - ): Promise { - const { client, owner, repo } = await this.api() - dbg(`listing comments for issue number: ${issue_number}`) - const { - reactions, - count = GITHUB_REST_PAGE_DEFAULT, - ...rest - } = options ?? {} - const ite = client.paginate.iterator(client.rest.issues.listComments, { - owner, - repo, - issue_number, - ...rest, - }) - const res = await paginatorToArray(ite, count, (i) => i.data) - return res - } - - async listReleases( - options?: GitHubPaginationOptions - ): Promise { - const { client, owner, repo } = await this.api() - dbg(`listing releases for repository`) - const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {} - const ite = client.paginate.iterator(client.rest.repos.listReleases, { - owner, - repo, - ...rest, - }) - const res = await paginatorToArray(ite, count, (i) => i.data) - return res - } - - async workflowRun(runId: number | string): Promise { - const { client, owner, repo } = await this.api() - dbg(`retrieving workflow run details for run ID: ${runId}`) - const { data } = await client.rest.actions.getWorkflowRun({ - owner, - repo, - run_id: normalizeInt(runId), - }) - dbg(`workflow run: %O`, data) - return data - } - - async listWorkflowRuns( - workflowIdOrFilename: string | number, - options?: { - branch?: string - status?: GitHubWorkflowRunStatus - } & GitHubPaginationOptions - ): Promise { - const { client, owner, repo } = await this.api() - dbg( - `listing workflow runs for workflow ID or filename: ${workflowIdOrFilename}` - ) - const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {} - const ite = client.paginate.iterator( - workflowIdOrFilename - ? client.rest.actions.listWorkflowRuns - : client.rest.actions.listWorkflowRunsForRepo, - { - owner, - repo, - workflow_id: workflowIdOrFilename, - per_page: 100, - ...rest, - } - ) - const res = await paginatorToArray( - ite, - count, - (i) => i.data, - ({ conclusion }) => conclusion !== "skipped" - ) - dbg(`workflow runs: %O`, res) - return res - } - - /** - * List artifacts for a given workflow run - * @param runId - */ - async listWorkflowRunArtifacts( - runId: number | string, - options?: GitHubPaginationOptions - ): Promise { - const { client, owner, repo } = await this.api() - dbg(`listing artifacts for workflow run ID: ${runId}`) - const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {} - const ite = client.paginate.iterator( - client.rest.actions.listWorkflowRunArtifacts, - { - owner, - repo, - run_id: normalizeInt(runId), - per_page: 100, - ...rest, - } - ) - const res = await paginatorToArray(ite, count, (i) => i.data) - dbg(`workflow run artifacts: %O`, res) - return res - } - - /** - * Gets the files of a GitHub Action workflow run artifact - * @param artifactId - */ - async artifact(artifactId: number | string): Promise { - const { client, owner, repo } = await this.api() - dbg(`retrieving artifact details for artifact ID: ${artifactId}`) - const { data } = await client.rest.actions.getArtifact({ - owner, - repo, - artifact_id: normalizeInt(artifactId), - }) - - return data - } - - async resolveAssetUrl(url: string) { - if (!uriTryParse(url)) return undefined // unknown format - if (!GITHUB_ASSET_URL_RX.test(url)) return undefined // not a github asset - const { client, owner, repo } = await this.api() - dbg(`asset: resolving url for %s`, uriRedact(url)) - const { data, status } = await client.rest.markdown.render({ - owner, - repo, - context: `${owner}/${repo}`, // force html with token - text: `![](${url})`, - mode: "gfm", - }) - dbg(`asset: resolution %s`, status) - const { resolved } = - / { - const { client, owner, repo } = await this.api() - dbg(`downloading artifact files for artifact ID: ${artifactId}`) - const { url } = await client.rest.actions.downloadArtifact({ - owner, - repo, - artifact_id: normalizeInt(artifactId), - archive_format: "zip", - }) - dbg(`received url, downloading...`) - const fetch = await createFetch() - const res = await fetch(url) - if (!res.ok) throw new Error(res.statusText) - const buffer = await res.arrayBuffer() - const files = await unzip(new Uint8Array(buffer)) - return files - } - - async listWorkflowJobs( - run_id: number, - options?: { filter?: "all" | "latest" } & GitHubPaginationOptions - ): Promise { - // Get the jobs for the specified workflow run - dbg(`listing jobs for workflow run ID: ${run_id}`) - const { client, owner, repo } = await this.api() - const { - filter, - count = GITHUB_REST_PAGE_DEFAULT, - ...rest - } = options ?? {} - const ite = client.paginate.iterator( - client.rest.actions.listJobsForWorkflowRun, - { - owner, - repo, - run_id, - filter, - } - ) - const jobs = await paginatorToArray(ite, count, (i) => i.data) - - const res: GitHubWorkflowJob[] = [] - dbg(`processing workflow jobs`) - for (const job of jobs) { - if ( - job.conclusion === "skipped" || - job.conclusion === "cancelled" - ) { - continue - } - const { url: logs_url } = - await client.rest.actions.downloadJobLogsForWorkflowRun({ - owner, - repo, - job_id: job.id, - }) - const logsRes = await fetch(logs_url) - const text = await logsRes.text() - res.push({ - ...job, - logs_url, - logs: text, - content: parseJobLog(text), - }) - } - dbg(`workflow jobs: %O`, res) - return res - } - - /** - * Downloads a GitHub Action workflow run log - * @param jobId - */ - async downloadWorkflowJobLog( - job_id: number, - options?: { llmify?: boolean } - ): Promise { - const { client, owner, repo } = await this.api() - const { url: logs_url } = - await client.rest.actions.downloadJobLogsForWorkflowRun({ - owner, - repo, - job_id, - }) - const logsRes = await fetch(logs_url) - let text = await logsRes.text() - if (options?.llmify) { - text = parseJobLog(text) - } - return text - } - - private async downloadJob(job_id: number) { - const { client, owner, repo } = await this.api() - dbg(`downloading job log for job ID: ${job_id}`) - const filename = `job-${job_id}.log` - const { url } = await client.rest.actions.downloadJobLogsForWorkflowRun( - { - owner, - repo, - job_id, - } - ) - const res = await fetch(url) - const content = await res.text() - return { filename, url, content } - } - - async diffWorkflowJobLogs(job_id: number, other_job_id: number) { - const job = await this.downloadJob(job_id) - dbg( - `diffing workflow job logs for job IDs: ${job_id} and ${other_job_id}` - ) - const other = await this.downloadJob(other_job_id) - const justDiff = diffCreatePatch(job, other) - - // try compressing - job.content = parseJobLog(job.content) - other.content = parseJobLog(other.content) - const parsedDiff = diffCreatePatch(job, other) - const diff = justDiff.length < parsedDiff.length ? justDiff : parsedDiff - - return llmifyDiff(diff) - } - - async getFile(filename: string, ref: string): Promise { - const { client, owner, repo } = await this.api() - dbg(`retrieving file content for filename: ${filename} and ref: ${ref}`) - const { data: content } = await client.rest.repos.getContent({ + return false; + }, + onSecondaryRateLimit: (retryAfter: number, options: any, octokit: Octokit) => { + octokit.log.warn( + `SecondaryRateLimit detected for request ${options.method} ${options.url}`, + ); + }, + }, + }); + resolve({ + client: res, + ...conn, + }); + }); + } + return this._client; + } + + async info(): Promise { + const { + apiUrl: baseUrl, + token: auth, + repo, + owner, + ref, + refName, + issue, + runId, + runUrl, + event, + eventName, + } = await this.connection(); + return Object.freeze( + deleteUndefinedValues({ + baseUrl, + repo, + owner, + auth, + ref, + refName, + runId, + runUrl, + issueNumber: issue, + eventName, + event, + }), + ); + } + + async repo(): Promise<{ + name: string; + full_name: string; + default_branch: string; + }> { + const { client, owner, repo } = await this.api(); + const res = await client.rest.repos.get({ owner, repo }); + return res.data; + } + + async getRef(branchName: string): Promise { + const { client, owner, repo } = await this.api(); + try { + dbg(`get ref %s`, branchName); + const existing = await client.git.getRef({ + owner, + repo, + ref: `heads/${branchName}`, + }); + return existing.data; + } catch (e) { + dbg(`ref not found`); + return undefined; + } + } + + async getOrCreateRef( + branchName: string, + options?: { base?: string; orphaned?: boolean | string }, + ): Promise { + const { client, owner, repo } = await this.api(); + const { base, orphaned } = options ?? {}; + if (!branchName) throw new Error("branchName is required"); + + dbg(`checking if branch %s exists`, branchName); + const existing = await this.getRef(branchName); + if (existing) { + dbg(`branch %s already exists`, branchName); + return existing; + } + + let sha: string; + dbg(`creating branch %s`, branchName); + if (orphaned) { + dbg(`creating orphaned`); + // Step 0: Create a blob for the file content + const { data: blob } = await client.git.createBlob({ + owner, + repo, + content: Buffer.from( + typeof orphaned === orphaned ? orphaned : `Orphaned branch created by GenAIScript.`, + ).toString("base64"), + encoding: "base64", + }); + + // Step 1: Create an empty tree + const { data: tree } = await client.git.createTree({ + owner, + repo, + tree: [ + { + path: "README.md", + mode: "100644", + type: "blob", + sha: blob.sha, + }, + ], + }); + dbg(`created tree %s`, tree.sha); + // Step 2: Create a commit with NO parents + const { data: commit } = await client.git.createCommit({ + owner, + repo, + message: "Initial commit on orphan branch", + tree: tree.sha, + parents: [], // <--- empty parent list = no history + }); + sha = commit.sha; + dbg(`created commit %s`, commit.sha); + } else { + if (!base) { + dbg(`base is required for non-orphaned branch`); + const repo = await this.repo(); + sha = repo.default_branch; + } else sha = base; + } + + // Step 3: Create a reference (branch) pointing to the commit + dbg(`creating reference %s <- %s`, branchName, sha); + const res = await client.git.createRef({ + owner, + repo, + ref: `refs/heads/${branchName}`, + sha, + }); + return res.data; + } + + async uploadAsset(file: BufferLike, options?: { branchName?: string }): Promise { + const { branchName = GITHUB_ASSET_BRANCH } = options ?? {}; + const { client, owner, repo } = await this.api(); + if (!file) { + dbg(`no buffer provided, nothing to upload`); + return undefined; + } + const buffer = await resolveBufferLike(file); + if (!buffer) { + dbg(`failed to resolve buffer, nothing to upload`); + return undefined; + } + const base64Content = buffer.toString("base64"); + const fileType = await fileTypeFromBuffer(buffer); + const hash = createHash("sha256"); + hash.write(base64Content); + const hashId = hash.digest().toString("hex"); + const uploadPath = hashId + (fileType ? `.${fileType.ext}` : ".txt"); + const rawUrl = `https://raw.githubusercontent.com/${owner}/${repo}/refs/heads/${branchName}/${uploadPath}`; + + // try to get file + dbg(`checking %s`, rawUrl); + const cached = await fetch(rawUrl, { method: "HEAD" }); + if (cached.status === 200) { + dbg(`asset already exists, skip upload`); + return rawUrl; + } + + dbg(`uploading asset %s to branch %s`, uploadPath, branchName); + await this.getOrCreateRef(branchName, { orphaned: true }); + const { data: blob } = await client.git.createBlob({ + owner, + repo, + content: base64Content, + encoding: "base64", + }); + dbg(`created blob %s`, blob.sha); + + // 3. Get the latest commit (HEAD) of the branch + const { data: refData } = await client.git.getRef({ + owner, + repo, + ref: `heads/${branchName}`, + }); + const latestCommitSha = refData.object.sha; + dbg(`head ref %s: %s`, refData.ref, latestCommitSha); + + // 4. Get the tree of the latest commit + const { data: commitData } = await client.git.getCommit({ + owner, + repo, + commit_sha: latestCommitSha, + }); + const baseTreeSha = commitData.tree.sha; + dbg(`base tree sha %s`, baseTreeSha); + + // 5. Create a new tree adding the image + const { data: newTree } = await client.git.createTree({ + owner, + repo, + base_tree: baseTreeSha, + tree: [ + { + path: uploadPath, + mode: "100644", + type: "blob", + sha: blob.sha, + }, + ], + }); + + dbg("tree created %s", newTree.sha); + + // 6. Create a new commit with the new tree + const { data: newCommit } = await client.git.createCommit({ + owner, + repo, + message: `Upload asset ${uploadPath}`, + tree: newTree.sha, + parents: [latestCommitSha], + }); + dbg("commit created %s", newCommit.sha); + + // 7. Update the branch to point to the new commit + await client.git.updateRef({ + owner, + repo, + ref: `heads/${branchName}`, + sha: newCommit.sha, + force: false, // do not force push + }); + + return rawUrl; + } + + async listIssues( + options?: { + state?: "open" | "closed" | "all"; + labels?: string; + sort?: "created" | "updated" | "comments"; + direction?: "asc" | "desc"; + creator?: string; + assignee?: string; + since?: string; + mentioned?: string; + } & GitHubPaginationOptions, + ): Promise { + const { client, owner, repo } = await this.api(); + dbg(`listing issues for repository`); + const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.issues.listForRepo, { + owner, + repo, + ...rest, + }); + const res = await paginatorToArray(ite, count, (i) => i.data); + return res; + } + + async listGists( + options?: { + since?: string; + filenameAsResources?: boolean; + } & GitHubPaginationOptions, + ): Promise { + const { client } = await this.api(); + dbg(`listing gists for user`); + const { count = GITHUB_REST_PAGE_DEFAULT, filenameAsResources, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.gists.list, { + ...rest, + }); + const res = await paginatorToArray(ite, count, (i) => i.data); + return res.map( + (r) => + ({ + id: r.id, + description: r.description, + created_at: r.created_at, + files: Object.values(r.files).map( + ({ filename, size }) => + ({ + filename: filenameAsResources ? `gist://${r.id}/${filename}` : filename, + size, + }) satisfies WorkspaceFile, + ), + }) satisfies GitHubGist, + ); + } + + async getGist(gist_id?: string): Promise { + if (typeof gist_id === "string") { + gist_id = gist_id.trim(); + } + const { client, owner } = await this.api(); + dbg(`retrieving gist details for gist ID: ${gist_id}`); + if (!gist_id) { + return undefined; + } + const { data } = await client.rest.gists.get({ + gist_id, + owner, + }); + const { files, id, description, created_at, ...rest } = data; + if (Object.values(files || {}).some((f) => f.encoding !== "utf-8" && f.encoding != "base64")) { + dbg(`unsupported encoding for gist files`); + return undefined; + } + const res = { + id, + description, + created_at, + files: Object.values(files).map( + ({ filename, content, size, encoding }) => + deleteUndefinedValues({ + filename, + content, + encoding: + encoding === "utf-8" ? undefined : encoding === "base64" ? "base64" : undefined, + size, + }) satisfies WorkspaceFile, + ), + } satisfies GitHubGist; + + dbg(`gist: %d files, %s`, res.files.length, res.description || ""); + return res; + } + + async getIssue(issue_number?: number | string): Promise { + issue_number = normalizeInt(issue_number); + const { client, owner, repo } = await this.api(); + dbg(`retrieving issue details for issue number: ${issue_number}`); + if (isNaN(issue_number)) { + issue_number = (await this._connection).issue; + } + if (isNaN(issue_number)) { + return undefined; + } + const { data } = await client.rest.issues.get({ + owner, + repo, + issue_number, + }); + return data; + } + + async updateIssue( + issueNumber: number | string, + options?: GitHubIssueUpdateOptions, + ): Promise { + issueNumber = normalizeInt(issueNumber); + const { client, owner, repo } = await this.api(); + dbg(`updating issue number: ${issueNumber}`); + if (isNaN(issueNumber)) { + issueNumber = (await this._connection).issue; + } + if (isNaN(issueNumber)) { + return undefined; + } + const { data } = await client.rest.issues.update({ + owner, + repo, + issue_number: issueNumber, + ...options, + }); + return data; + } + + async createIssueComment(issue_number: number | string, body: string): Promise { + issue_number = normalizeInt(issue_number); + const { client, owner, repo } = await this.api(); + dbg(`creating comment for issue number: ${issue_number}`); + if (isNaN(issue_number)) { + issue_number = (await this._connection).issue; + } + if (isNaN(issue_number)) { + return undefined; + } + const { data } = await client.rest.issues.createComment({ + owner, + repo, + issue_number, + body: prettifyMarkdown(dedent(body)), + }); + dbg(`created comment %s`, data.id); + return data; + } + + async updateIssueComment(comment_id: number | string, body: string) { + const { client, owner, repo } = await this.api(); + dbg(`updating comment %s`, comment_id); + const { data } = await client.rest.issues.updateComment({ + owner, + repo, + comment_id: normalizeInt(comment_id), + body: prettifyMarkdown(dedent(body)), + }); + dbg(`updated comment %s`, data.id); + return data; + } + + async listPullRequests( + options?: { + state?: "open" | "closed" | "all"; + sort?: "created" | "updated" | "popularity" | "long-running"; + direction?: "asc" | "desc"; + } & GitHubPaginationOptions, + ): Promise { + const { client, owner, repo } = await this.api(); + dbg(`listing pull requests for repository`); + const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.pulls.list, { + owner, + repo, + ...rest, + }); + const res = await paginatorToArray(ite, count, (i) => i.data); + return res; + } + + async getPullRequest(pull_number?: number | string): Promise { + pull_number = normalizeInt(pull_number); + const { client, owner, repo } = await this.api(); + dbg(`retrieving pull request details for pull number: ${pull_number}`); + if (isNaN(pull_number)) { + pull_number = (await this._connection).issue; + } + if (isNaN(pull_number)) { + return undefined; + } + + const { data } = await client.rest.pulls.get({ + owner, + repo, + pull_number, + }); + return data; + } + + async listPullRequestReviewComments( + pull_number: number, + options?: GitHubPaginationOptions, + ): Promise { + const { client, owner, repo } = await this.api(); + dbg(`listing review comments for pull request number: ${pull_number}`); + const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.pulls.listReviewComments, { + owner, + repo, + pull_number, + ...rest, + }); + const res = await paginatorToArray(ite, count, (i) => i.data); + return res; + } + + async listIssueComments( + issue_number: number, + options?: { reactions?: boolean } & GitHubPaginationOptions, + ): Promise { + const { client, owner, repo } = await this.api(); + dbg(`listing comments for issue number: ${issue_number}`); + const { reactions, count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.issues.listComments, { + owner, + repo, + issue_number, + ...rest, + }); + const res = await paginatorToArray(ite, count, (i) => i.data); + return res; + } + + async listReleases(options?: GitHubPaginationOptions): Promise { + const { client, owner, repo } = await this.api(); + dbg(`listing releases for repository`); + const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.repos.listReleases, { + owner, + repo, + ...rest, + }); + const res = await paginatorToArray(ite, count, (i) => i.data); + return res; + } + + async workflowRun(runId: number | string): Promise { + const { client, owner, repo } = await this.api(); + dbg(`retrieving workflow run details for run ID: ${runId}`); + const { data } = await client.rest.actions.getWorkflowRun({ + owner, + repo, + run_id: normalizeInt(runId), + }); + dbg(`workflow run: %O`, data); + return data; + } + + async listWorkflowRuns( + workflowIdOrFilename: string | number, + options?: { + branch?: string; + status?: GitHubWorkflowRunStatus; + } & GitHubPaginationOptions, + ): Promise { + const { client, owner, repo } = await this.api(); + dbg(`listing workflow runs for workflow ID or filename: ${workflowIdOrFilename}`); + const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator( + workflowIdOrFilename + ? client.rest.actions.listWorkflowRuns + : client.rest.actions.listWorkflowRunsForRepo, + { + owner, + repo, + workflow_id: workflowIdOrFilename, + per_page: 100, + ...rest, + }, + ); + const res = await paginatorToArray( + ite, + count, + (i) => i.data, + ({ conclusion }) => conclusion !== "skipped", + ); + dbg(`workflow runs: %O`, res); + return res; + } + + /** + * List artifacts for a given workflow run + * @param runId + */ + async listWorkflowRunArtifacts( + runId: number | string, + options?: GitHubPaginationOptions, + ): Promise { + const { client, owner, repo } = await this.api(); + dbg(`listing artifacts for workflow run ID: ${runId}`); + const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.actions.listWorkflowRunArtifacts, { + owner, + repo, + run_id: normalizeInt(runId), + per_page: 100, + ...rest, + }); + const res = await paginatorToArray(ite, count, (i) => i.data); + dbg(`workflow run artifacts: %O`, res); + return res; + } + + /** + * Gets the files of a GitHub Action workflow run artifact + * @param artifactId + */ + async artifact(artifactId: number | string): Promise { + const { client, owner, repo } = await this.api(); + dbg(`retrieving artifact details for artifact ID: ${artifactId}`); + const { data } = await client.rest.actions.getArtifact({ + owner, + repo, + artifact_id: normalizeInt(artifactId), + }); + + return data; + } + + async resolveAssetUrl(url: string) { + if (!uriTryParse(url)) return undefined; // unknown format + if (!GITHUB_ASSET_URL_RX.test(url)) return undefined; // not a github asset + const { client, owner, repo } = await this.api(); + dbg(`asset: resolving url for %s`, uriRedact(url)); + const { data, status } = await client.rest.markdown.render({ + owner, + repo, + context: `${owner}/${repo}`, // force html with token + text: `![](${url})`, + mode: "gfm", + }); + dbg(`asset: resolution %s`, status); + const { resolved } = / { + const { client, owner, repo } = await this.api(); + dbg(`downloading artifact files for artifact ID: ${artifactId}`); + const { url } = await client.rest.actions.downloadArtifact({ + owner, + repo, + artifact_id: normalizeInt(artifactId), + archive_format: "zip", + }); + dbg(`received url, downloading...`); + const fetch = await createFetch(); + const res = await fetch(url); + if (!res.ok) throw new Error(res.statusText); + const buffer = await res.arrayBuffer(); + const files = await unzip(new Uint8Array(buffer)); + return files; + } + + async listWorkflowJobs( + run_id: number, + options?: { filter?: "all" | "latest" } & GitHubPaginationOptions, + ): Promise { + // Get the jobs for the specified workflow run + dbg(`listing jobs for workflow run ID: ${run_id}`); + const { client, owner, repo } = await this.api(); + const { filter, count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.actions.listJobsForWorkflowRun, { + owner, + repo, + run_id, + filter, + }); + const jobs = await paginatorToArray(ite, count, (i) => i.data); + + const res: GitHubWorkflowJob[] = []; + dbg(`processing workflow jobs`); + for (const job of jobs) { + if (job.conclusion === "skipped" || job.conclusion === "cancelled") { + continue; + } + const { url: logs_url } = await client.rest.actions.downloadJobLogsForWorkflowRun({ + owner, + repo, + job_id: job.id, + }); + const logsRes = await fetch(logs_url); + const text = await logsRes.text(); + res.push({ + ...job, + logs_url, + logs: text, + content: parseJobLog(text), + }); + } + dbg(`workflow jobs: %O`, res); + return res; + } + + /** + * Downloads a GitHub Action workflow run log + * @param jobId + */ + async downloadWorkflowJobLog(job_id: number, options?: { llmify?: boolean }): Promise { + const { client, owner, repo } = await this.api(); + const { url: logs_url } = await client.rest.actions.downloadJobLogsForWorkflowRun({ + owner, + repo, + job_id, + }); + const logsRes = await fetch(logs_url); + let text = await logsRes.text(); + if (options?.llmify) { + text = parseJobLog(text); + } + return text; + } + + private async downloadJob(job_id: number) { + const { client, owner, repo } = await this.api(); + dbg(`downloading job log for job ID: ${job_id}`); + const filename = `job-${job_id}.log`; + const { url } = await client.rest.actions.downloadJobLogsForWorkflowRun({ + owner, + repo, + job_id, + }); + const res = await fetch(url); + const content = await res.text(); + return { filename, url, content }; + } + + async diffWorkflowJobLogs(job_id: number, other_job_id: number) { + const job = await this.downloadJob(job_id); + dbg(`diffing workflow job logs for job IDs: ${job_id} and ${other_job_id}`); + const other = await this.downloadJob(other_job_id); + const justDiff = diffCreatePatch(job, other); + + // try compressing + job.content = parseJobLog(job.content); + other.content = parseJobLog(other.content); + const parsedDiff = diffCreatePatch(job, other); + const diff = justDiff.length < parsedDiff.length ? justDiff : parsedDiff; + + return llmifyDiff(diff); + } + + async getFile(filename: string, ref: string): Promise { + const { client, owner, repo } = await this.api(); + dbg(`retrieving file content for filename: ${filename} and ref: ${ref}`); + const { data: content } = await client.rest.repos.getContent({ + owner, + repo, + path: filename, + ref, + }); + if ("content" in content) { + return { + filename, + content: Buffer.from(content.content, "base64").toString("utf-8"), + }; + } else { + return undefined; + } + } + + async searchCode( + query: string, + options?: GitHubPaginationOptions, + ): Promise { + const { client, owner, repo } = await this.api(); + dbg(`searching code with query: ${query}`); + const q = query + `+repo:${owner}/${repo}`; + const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.search.code, { + q, + ...(options ?? {}), + }); + const items = await paginatorToArray(ite, count, (i) => i.data); + return items.map(({ name, path, sha, html_url, score, repository }) => ({ + name, + path, + sha, + html_url, + score, + repository: repository.full_name, + })); + } + + async workflow(workflowId: number | string): Promise { + const { client, owner, repo } = await this.api(); + dbg(`retrieving workflow details for workflow ID: ${workflowId}`); + const { data } = await client.rest.actions.getWorkflow({ + owner, + repo, + workflow_id: workflowId, + }); + dbg(`workflow: %O`, data); + return data; + } + + async listWorkflows(options?: GitHubPaginationOptions): Promise { + const { client, owner, repo } = await this.api(); + dbg(`listing workflows for repository`); + const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.actions.listRepoWorkflows, { + owner, + repo, + ...(options ?? {}), + }); + const workflows = await paginatorToArray(ite, count, (i) => i.data); + dbg(`workflows: %O`, workflows); + return workflows.map(({ id, name, path }) => ({ + id, + name, + path, + })); + } + + async listBranches(options?: GitHubPaginationOptions): Promise { + dbg(`listing branches for repository`); + const { client, owner, repo } = await this.api(); + const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {}; + const ite = client.paginate.iterator(client.rest.repos.listBranches, { + owner, + repo, + ...(options ?? {}), + }); + const branches = await paginatorToArray(ite, count, (i) => i.data); + return branches.map(({ name }) => name); + } + + async listRepositoryLanguages(): Promise> { + const { client, owner, repo } = await this.api(); + dbg(`listing languages for repository`); + const { data: languages } = await client.rest.repos.listLanguages({ + owner, + repo, + }); + dbg(`languages: %O`, languages); + return languages; + } + + async listIssueLabels(issueNumber?: string | number): Promise { + const { client, owner, repo } = await this.api(); + dbg(`listing labels for %o`, issueNumber); + const { data: labels } = + issueNumber === undefined + ? await client.rest.issues.listLabelsForRepo({ owner, repo, - path: filename, - ref, - }) - if ("content" in content) { - return { - filename, - content: Buffer.from(content.content, "base64").toString( - "utf-8" - ), - } - } else { - return undefined - } - } - - async searchCode( - query: string, - options?: GitHubPaginationOptions - ): Promise { - const { client, owner, repo } = await this.api() - dbg(`searching code with query: ${query}`) - const q = query + `+repo:${owner}/${repo}` - const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {} - const ite = client.paginate.iterator(client.rest.search.code, { - q, - ...(options ?? {}), - }) - const items = await paginatorToArray(ite, count, (i) => i.data) - return items.map( - ({ name, path, sha, html_url, score, repository }) => ({ - name, - path, - sha, - html_url, - score, - repository: repository.full_name, - }) - ) - } - - async workflow(workflowId: number | string): Promise { - const { client, owner, repo } = await this.api() - dbg(`retrieving workflow details for workflow ID: ${workflowId}`) - const { data } = await client.rest.actions.getWorkflow({ + }) + : await client.rest.issues.listLabelsOnIssue({ owner, repo, - workflow_id: workflowId, - }) - dbg(`workflow: %O`, data) - return data - } - - async listWorkflows( - options?: GitHubPaginationOptions - ): Promise { - const { client, owner, repo } = await this.api() - dbg(`listing workflows for repository`) - const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {} - const ite = client.paginate.iterator( - client.rest.actions.listRepoWorkflows, - { + issue_number: normalizeInt(issueNumber), + }); + dbg(`labels: %O`, labels); + return labels satisfies GitHubLabel[]; + } + + async getRepositoryContent( + path: string, + options?: { + ref?: string; + glob?: string; + downloadContent?: boolean; + maxDownloadSize?: number; + type?: string; + }, + ): Promise { + const { client, owner, repo } = await this.api(); + dbg(`retrieving repository content for path: ${path}`); + const { ref, type, glob, downloadContent, maxDownloadSize } = options ?? {}; + const { data: contents } = await client.rest.repos.getContent({ + owner, + repo, + path, + ref, + }); + const res = arrayify(contents) + .filter((c) => !type || c.type === type) + .filter((c) => !glob || isGlobMatch(c.path, glob)) + .map((content) => ({ + filename: content.path, + type: content.type, + size: content.size, + content: + content.type === "file" && content.content + ? Buffer.from(content.content, "base64").toString("utf-8") + : undefined, + })); + if (downloadContent) { + const limit = concurrentLimit("github", GITHUB_REST_API_CONCURRENCY_LIMIT); + await Promise.all( + res + .filter((f) => f.type === "file" && !f.content) + .filter((f) => !maxDownloadSize || f.size <= maxDownloadSize) + .map((f) => { + const filename = f.filename; + return async () => { + const { data: fileContent } = await client.rest.repos.getContent({ owner, repo, - ...(options ?? {}), - } - ) - const workflows = await paginatorToArray(ite, count, (i) => i.data) - dbg(`workflows: %O`, workflows) - return workflows.map(({ id, name, path }) => ({ - id, - name, - path, - })) - } - - async listBranches(options?: GitHubPaginationOptions): Promise { - dbg(`listing branches for repository`) - const { client, owner, repo } = await this.api() - const { count = GITHUB_REST_PAGE_DEFAULT, ...rest } = options ?? {} - const ite = client.paginate.iterator(client.rest.repos.listBranches, { - owner, - repo, - ...(options ?? {}), - }) - const branches = await paginatorToArray(ite, count, (i) => i.data) - return branches.map(({ name }) => name) - } - - async listRepositoryLanguages(): Promise> { - const { client, owner, repo } = await this.api() - dbg(`listing languages for repository`) - const { data: languages } = await client.rest.repos.listLanguages({ - owner, - repo, - }) - dbg(`languages: %O`, languages) - return languages - } - - async listIssueLabels( - issueNumber?: string | number - ): Promise { - const { client, owner, repo } = await this.api() - dbg(`listing labels for %o`, issueNumber) - const { data: labels } = - issueNumber === undefined - ? await client.rest.issues.listLabelsForRepo({ - owner, - repo, - }) - : await client.rest.issues.listLabelsOnIssue({ - owner, - repo, - issue_number: normalizeInt(issueNumber), - }) - dbg(`labels: %O`, labels) - return labels satisfies GitHubLabel[] - } - - async getRepositoryContent( - path: string, - options?: { - ref?: string - glob?: string - downloadContent?: boolean - maxDownloadSize?: number - type?: string - } - ): Promise { - const { client, owner, repo } = await this.api() - dbg(`retrieving repository content for path: ${path}`) - const { ref, type, glob, downloadContent, maxDownloadSize } = - options ?? {} - const { data: contents } = await client.rest.repos.getContent({ - owner, - repo, - path, - ref, - }) - const res = arrayify(contents) - .filter((c) => !type || c.type === type) - .filter((c) => !glob || isGlobMatch(c.path, glob)) - .map((content) => ({ - filename: content.path, - type: content.type, - size: content.size, - content: - content.type === "file" && content.content - ? Buffer.from(content.content, "base64").toString( - "utf-8" - ) - : undefined, - })) - if (downloadContent) { - const limit = concurrentLimit( - "github", - GITHUB_REST_API_CONCURRENCY_LIMIT - ) - await Promise.all( - res - .filter((f) => f.type === "file" && !f.content) - .filter( - (f) => !maxDownloadSize || f.size <= maxDownloadSize - ) - .map((f) => { - const filename = f.filename - return async () => { - const { data: fileContent } = - await client.rest.repos.getContent({ - owner, - repo, - path: filename, - ref, - }) - f.content = Buffer.from( - arrayify(fileContent)[0].content, - "base64" - ).toString("utf8") - } - }) - .map((p) => limit(p)) - ) - } - return res - } + path: filename, + ref, + }); + f.content = Buffer.from(arrayify(fileContent)[0].content, "base64").toString("utf8"); + }; + }) + .map((p) => limit(p)), + ); + } + return res; + } } function parseJobLog(text: string) { - const lines = cleanLog(text).split(/\r?\n/g) - const groups: { title: string; text: string }[] = [] - let current = groups[0] - for (const line of lines) { - if (line.startsWith("##[group]")) { - current = { - title: line.slice("##[group]".length), - text: "", - } - } else if (line.startsWith("##[endgroup]")) { - if (current) { - groups.push(current) - } - current = undefined - } else if (line.includes("Post job cleanup.")) { - break // ignore cleanup typically - } else { - if (!current) { - current = { title: "", text: "" } - } - current.text += line + "\n" - } - } - if (current) { - groups.push(current) - } - - const ignoreSteps = [ - "Runner Image", - "Fetching the repository", - "Checking out the ref", - "Setting up auth", - "Setting up auth for fetching submodules", - "Getting Git version info", - "Initializing the repository", - "Determining the checkout info", - "Persisting credentials for submodules", - ] - return groups - .filter(({ title }) => !ignoreSteps.includes(title)) - .map((f) => - f.title ? `##[group]${f.title}\n${f.text}\n##[endgroup]` : f.text - ) - .join("\n") + const lines = cleanLog(text).split(/\r?\n/g); + const groups: { title: string; text: string }[] = []; + let current = groups[0]; + for (const line of lines) { + if (line.startsWith("##[group]")) { + current = { + title: line.slice("##[group]".length), + text: "", + }; + } else if (line.startsWith("##[endgroup]")) { + if (current) { + groups.push(current); + } + current = undefined; + } else if (line.includes("Post job cleanup.")) { + break; // ignore cleanup typically + } else { + if (!current) { + current = { title: "", text: "" }; + } + current.text += line + "\n"; + } + } + if (current) { + groups.push(current); + } + + const ignoreSteps = [ + "Runner Image", + "Fetching the repository", + "Checking out the ref", + "Setting up auth", + "Setting up auth for fetching submodules", + "Getting Git version info", + "Initializing the repository", + "Determining the checkout info", + "Persisting credentials for submodules", + ]; + return groups + .filter(({ title }) => !ignoreSteps.includes(title)) + .map((f) => (f.title ? `##[group]${f.title}\n${f.text}\n##[endgroup]` : f.text)) + .join("\n"); } export function cleanLog(text: string) { - return shellRemoveAsciiColors( - text.replace( - // timestamps - /^?\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{2,}Z /gm, - "" - ) - ) + return shellRemoveAsciiColors( + text.replace( + // timestamps + /^?\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{2,}Z /gm, + "", + ), + ); } diff --git a/packages/core/src/gitignore.ts b/packages/core/src/gitignore.ts index b63cf66b8f..4378f76d88 100644 --- a/packages/core/src/gitignore.ts +++ b/packages/core/src/gitignore.ts @@ -1,13 +1,13 @@ // Import the 'ignore' library to handle .gitignore file parsing and filtering -import ignorer from "ignore" -import { tryReadText, writeText } from "./fs" -import { GENAISCRIPTIGNORE, GIT_IGNORE, GIT_IGNORE_GENAI } from "./constants" -import { host } from "./host" -import { logVerbose } from "./util" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("files:gitignore") +import ignorer from "ignore"; +import { tryReadText, writeText } from "./fs"; +import { GENAISCRIPTIGNORE, GIT_IGNORE, GIT_IGNORE_GENAI } from "./constants"; +import { host } from "./host"; +import { logVerbose } from "./util"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("files:gitignore"); -export type GitIgnorer = (files: string[]) => string[] +export type GitIgnorer = (files: string[]) => string[]; /** * Creates a function to filter files based on patterns defined in .gitignore files. @@ -17,23 +17,23 @@ export type GitIgnorer = (files: string[]) => string[] * @returns A function that takes a list of files and returns only the files not ignored. */ export async function createGitIgnorer(): Promise { - const gitignores = [ - await tryReadText(GIT_IGNORE), - await tryReadText(GIT_IGNORE_GENAI), - await tryReadText(GENAISCRIPTIGNORE), - ].filter((g) => !!g) - if (!gitignores.length) { - dbg("no gitignore files found") - return (f) => f - } + const gitignores = [ + await tryReadText(GIT_IGNORE), + await tryReadText(GIT_IGNORE_GENAI), + await tryReadText(GENAISCRIPTIGNORE), + ].filter((g) => !!g); + if (!gitignores.length) { + dbg("no gitignore files found"); + return (f) => f; + } - // Create an ignorer instance and add the .gitignore patterns to it - dbg("creating ignorer instance") - const ig = ignorer({ allowRelativePaths: true }) - for (const gitignore of gitignores) { - ig.add(gitignore) - } - return (files: readonly string[]) => ig.filter(files) + // Create an ignorer instance and add the .gitignore patterns to it + dbg("creating ignorer instance"); + const ig = ignorer({ allowRelativePaths: true }); + for (const gitignore of gitignores) { + ig.add(gitignore); + } + return (files: readonly string[]) => ig.filter(files); } /** @@ -44,10 +44,10 @@ export async function createGitIgnorer(): Promise { * @returns An array of files that are not ignored according to the .gitignore patterns. */ export async function filterGitIgnore(files: string[]) { - const ignorer = await createGitIgnorer() - const newFiles = ignorer(files) - dbg(`files ${files.length} -> ${newFiles.length}`) - return newFiles + const ignorer = await createGitIgnorer(); + const newFiles = ignorer(files); + dbg(`files ${files.length} -> ${newFiles.length}`); + return newFiles; } /** @@ -58,23 +58,23 @@ export async function filterGitIgnore(files: string[]) { * @param entries - List of patterns or file paths to ensure are included in the .gitignore file. */ export async function gitIgnoreEnsure(dir: string, entries: string[]) { - const fn = host.path.join(dir, GIT_IGNORE) - dbg(`reading file ${fn}`) - let src = (await tryReadText(fn)) || "" - const oldsrc = src - const newline = /\r\n/.test(src) ? "\r\n" : "\n" - const lines = src.split(/\r?\n/g) - for (const entry of entries) { - dbg(`checking entry ${entry} in lines`) - if (!lines.some((l) => l.startsWith(entry))) { - if (src) { - src += newline - } - src += entry - } - } - if (oldsrc !== src) { - logVerbose(`updating ${fn}`) - await writeText(fn, src) + const fn = host.path.join(dir, GIT_IGNORE); + dbg(`reading file ${fn}`); + let src = (await tryReadText(fn)) || ""; + const oldsrc = src; + const newline = /\r\n/.test(src) ? "\r\n" : "\n"; + const lines = src.split(/\r?\n/g); + for (const entry of entries) { + dbg(`checking entry ${entry} in lines`); + if (!lines.some((l) => l.startsWith(entry))) { + if (src) { + src += newline; + } + src += entry; } + } + if (oldsrc !== src) { + logVerbose(`updating ${fn}`); + await writeText(fn, src); + } } diff --git a/packages/core/src/glob.test.ts b/packages/core/src/glob.test.ts index 1cb4f9edcb..6c4b35e69f 100644 --- a/packages/core/src/glob.test.ts +++ b/packages/core/src/glob.test.ts @@ -1,45 +1,33 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { isGlobMatch } from "./glob" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { isGlobMatch } from "./glob"; describe("glob", () => { - describe("isGlobMatch", () => { - test("matches single pattern", () => { - assert.equal(isGlobMatch("file.txt", "*.txt"), true) - assert.equal(isGlobMatch("file.jpg", "*.txt"), false) - }) + describe("isGlobMatch", () => { + test("matches single pattern", () => { + assert.equal(isGlobMatch("file.txt", "*.txt"), true); + assert.equal(isGlobMatch("file.jpg", "*.txt"), false); + }); - test("matches array of patterns", () => { - const patterns = ["*.txt", "*.md", "*.doc"] - assert.equal(isGlobMatch("readme.md", patterns), true) - assert.equal(isGlobMatch("image.png", patterns), false) - }) + test("matches array of patterns", () => { + const patterns = ["*.txt", "*.md", "*.doc"]; + assert.equal(isGlobMatch("readme.md", patterns), true); + assert.equal(isGlobMatch("image.png", patterns), false); + }); - test("handles Windows paths", () => { - assert.equal(isGlobMatch("folder\\file.txt", "**/*.txt"), true) - assert.equal( - isGlobMatch("folder\\subfolder\\file.txt", "**/*.txt"), - true - ) - }) + test("handles Windows paths", () => { + assert.equal(isGlobMatch("folder\\file.txt", "**/*.txt"), true); + assert.equal(isGlobMatch("folder\\subfolder\\file.txt", "**/*.txt"), true); + }); - test("handles matchBase option", () => { - assert.equal( - isGlobMatch("path/to/file.txt", "*.txt", { matchBase: true }), - true - ) - assert.equal( - isGlobMatch("path/to/file.txt", "*.txt", { matchBase: false }), - false - ) - }) + test("handles matchBase option", () => { + assert.equal(isGlobMatch("path/to/file.txt", "*.txt", { matchBase: true }), true); + assert.equal(isGlobMatch("path/to/file.txt", "*.txt", { matchBase: false }), false); + }); - test("handles exact matches", () => { - assert.equal(isGlobMatch("exact-file.txt", "exact-file.txt"), true) - assert.equal( - isGlobMatch("different-file.txt", "exact-file.txt"), - false - ) - }) - }) -}) + test("handles exact matches", () => { + assert.equal(isGlobMatch("exact-file.txt", "exact-file.txt"), true); + assert.equal(isGlobMatch("different-file.txt", "exact-file.txt"), false); + }); + }); +}); diff --git a/packages/core/src/glob.ts b/packages/core/src/glob.ts index 8475fa14b8..e3fe0c9319 100644 --- a/packages/core/src/glob.ts +++ b/packages/core/src/glob.ts @@ -1,8 +1,8 @@ // Import the 'minimatch' library for matching file paths against glob patterns -import { minimatch } from "minimatch" +import { minimatch } from "minimatch"; // Import the 'arrayify' utility function from the local 'util' module -import { arrayify } from "./util" +import { arrayify } from "./util"; /** * Checks if a given filename matches any of the provided glob patterns. @@ -12,18 +12,18 @@ import { arrayify } from "./util" * @returns A boolean indicating if the filename matches any of the patterns. */ export function isGlobMatch( - filename: string, - patterns: ElementOrArray, - options?: { matchBase?: boolean } + filename: string, + patterns: ElementOrArray, + options?: { matchBase?: boolean }, ) { - // Convert patterns to an array and check if any pattern matches the filename - return arrayify(patterns).some((pattern) => { - // Perform the match using minimatch with specific options - const match = minimatch(filename, pattern, { - // Option to handle Windows paths correctly by preventing escape character issues - windowsPathsNoEscape: true, - ...(options || {}), - }) - return match // Return true if a match is found - }) + // Convert patterns to an array and check if any pattern matches the filename + return arrayify(patterns).some((pattern) => { + // Perform the match using minimatch with specific options + const match = minimatch(filename, pattern, { + // Option to handle Windows paths correctly by preventing escape character issues + windowsPathsNoEscape: true, + ...(options || {}), + }); + return match; // Return true if a match is found + }); } diff --git a/packages/core/src/global.ts b/packages/core/src/global.ts index 40a4e2058d..bb4cfbda13 100644 --- a/packages/core/src/global.ts +++ b/packages/core/src/global.ts @@ -1,4 +1,4 @@ -export const originalConsole = resolveGlobal().console +export const originalConsole = resolveGlobal().console; /** * This file defines global utilities and installs them into the global context. @@ -12,10 +12,10 @@ export const originalConsole = resolveGlobal().console * @throws Will throw an error if the global context cannot be determined. */ export function resolveGlobal(): any { - if (typeof window !== "undefined") - return window // Browser environment - else if (typeof self !== "undefined") - return self // Web worker environment - else if (typeof global !== "undefined") return global // Node.js environment - throw new Error("Could not find global") // Error if no global context is found + if (typeof window !== "undefined") + return window; // Browser environment + else if (typeof self !== "undefined") + return self; // Web worker environment + else if (typeof global !== "undefined") return global; // Node.js environment + throw new Error("Could not find global"); // Error if no global context is found } diff --git a/packages/core/src/globals.ts b/packages/core/src/globals.ts index 3d038b1fb4..e12b1905c9 100644 --- a/packages/core/src/globals.ts +++ b/packages/core/src/globals.ts @@ -1,31 +1,27 @@ -import debug from "debug" -const dbg = debug("globals") +import debug from "debug"; +const dbg = debug("globals"); // Import various parsing and stringifying utilities -import { createYAML } from "./yaml" -import { CSVParse, dataToMarkdownTable, CSVStringify, CSVChunk } from "./csv" -import { INIParse, INIStringify } from "./ini" -import { XMLParse } from "./xml" -import { - frontmatterTryParse, - splitMarkdown, - updateFrontmatter, -} from "./frontmatter" -import { JSONLStringify, JSONLTryParse } from "./jsonl" -import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html" -import { CancelError } from "./error" -import { fetchText } from "./fetchtext" -import { GitHubClient } from "./githubclient" -import { GitClient } from "./git" -import { estimateTokens, truncateTextToTokens } from "./tokens" -import { chunk, resolveTokenEncoder } from "./encoders" -import { JSON5Stringify, JSON5TryParse } from "./json5" -import { JSONSchemaInfer } from "./schema" -import { FFmepgClient } from "./ffmpeg" -import { promptParametersSchemaToJSONSchema } from "./parameters" -import { chunkMarkdown } from "./mdchunk" -import { resolveGlobal } from "./global" -import { MarkdownStringify } from "./markdown" -import { diffCreatePatch, diffFindChunk, tryDiffParse } from "./diff" +import { createYAML } from "./yaml"; +import { CSVParse, dataToMarkdownTable, CSVStringify, CSVChunk } from "./csv"; +import { INIParse, INIStringify } from "./ini"; +import { XMLParse } from "./xml"; +import { frontmatterTryParse, splitMarkdown, updateFrontmatter } from "./frontmatter"; +import { JSONLStringify, JSONLTryParse } from "./jsonl"; +import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html"; +import { CancelError } from "./error"; +import { fetchText } from "./fetchtext"; +import { GitHubClient } from "./githubclient"; +import { GitClient } from "./git"; +import { estimateTokens, truncateTextToTokens } from "./tokens"; +import { chunk, resolveTokenEncoder } from "./encoders"; +import { JSON5Stringify, JSON5TryParse } from "./json5"; +import { JSONSchemaInfer } from "./schema"; +import { FFmepgClient } from "./ffmpeg"; +import { promptParametersSchemaToJSONSchema } from "./parameters"; +import { chunkMarkdown } from "./mdchunk"; +import { resolveGlobal } from "./global"; +import { MarkdownStringify } from "./markdown"; +import { diffCreatePatch, diffFindChunk, tryDiffParse } from "./diff"; /** * Installs global utilities for various data formats and operations. @@ -46,148 +42,139 @@ import { diffCreatePatch, diffFindChunk, tryDiffParse } from "./diff" * - Includes an ffmpeg client for multimedia operations. */ export function installGlobals() { - dbg("install") - const glb = resolveGlobal() // Get the global context - - // Freeze YAML utilities to prevent modification - glb.YAML = createYAML() - - // Freeze CSV utilities - glb.CSV = Object.freeze({ - parse: CSVParse, // Parse CSV string to objects - stringify: CSVStringify, // Convert objects to CSV string - markdownify: dataToMarkdownTable, // Convert CSV to Markdown format - chunk: CSVChunk, - }) - - // Freeze INI utilities - glb.INI = Object.freeze({ - parse: INIParse, // Parse INI string to objects - stringify: INIStringify, // Convert objects to INI string - }) - - // Freeze XML utilities - glb.XML = Object.freeze({ - parse: XMLParse, // Parse XML string to objects - }) - - // Freeze Markdown utilities with frontmatter operations - glb.MD = Object.freeze({ - stringify: MarkdownStringify, - frontmatter: (text, format) => - frontmatterTryParse(text, { format })?.value ?? {}, // Parse frontmatter from markdown - content: (text) => splitMarkdown(text)?.content, // Extract content from markdown - updateFrontmatter: (text, frontmatter, format): string => - updateFrontmatter(text, frontmatter, { format }), // Update frontmatter in markdown - chunk: async (text, options) => { - const encoding = await resolveTokenEncoder(options?.model, { - disableFallback: false, - }) - const res = chunkMarkdown( - text, - (text) => encoding.encode(text).length, - options - ) - return res + dbg("install"); + const glb = resolveGlobal(); // Get the global context + + // Freeze YAML utilities to prevent modification + glb.YAML = createYAML(); + + // Freeze CSV utilities + glb.CSV = Object.freeze({ + parse: CSVParse, // Parse CSV string to objects + stringify: CSVStringify, // Convert objects to CSV string + markdownify: dataToMarkdownTable, // Convert CSV to Markdown format + chunk: CSVChunk, + }); + + // Freeze INI utilities + glb.INI = Object.freeze({ + parse: INIParse, // Parse INI string to objects + stringify: INIStringify, // Convert objects to INI string + }); + + // Freeze XML utilities + glb.XML = Object.freeze({ + parse: XMLParse, // Parse XML string to objects + }); + + // Freeze Markdown utilities with frontmatter operations + glb.MD = Object.freeze({ + stringify: MarkdownStringify, + frontmatter: (text, format) => frontmatterTryParse(text, { format })?.value ?? {}, // Parse frontmatter from markdown + content: (text) => splitMarkdown(text)?.content, // Extract content from markdown + updateFrontmatter: (text, frontmatter, format): string => + updateFrontmatter(text, frontmatter, { format }), // Update frontmatter in markdown + chunk: async (text, options) => { + const encoding = await resolveTokenEncoder(options?.model, { + disableFallback: false, + }); + const res = chunkMarkdown(text, (text) => encoding.encode(text).length, options); + return res; + }, + }); + + // Freeze JSONL utilities + glb.JSONL = Object.freeze({ + parse: JSONLTryParse, // Parse JSONL string to objects + stringify: JSONLStringify, // Convert objects to JSONL string + }); + + glb.JSON5 = Object.freeze({ + parse: JSON5TryParse, + stringify: JSON5Stringify, + }); + + glb.JSONSchema = Object.freeze({ + infer: JSONSchemaInfer, + fromParameters: promptParametersSchemaToJSONSchema, + }); + + // Freeze HTML utilities + glb.HTML = Object.freeze({ + convertTablesToJSON: HTMLTablesToJSON, // Convert HTML tables to JSON + convertToMarkdown: HTMLToMarkdown, // Convert HTML to Markdown + convertToText: HTMLToText, // Convert HTML to plain text + }); + + /** + * Function to trigger cancellation with an error. + * Throws a CancelError with a specified reason or a default message. + * @param [reason] - Optional reason for cancellation. + */ + glb.cancel = (reason?: string) => { + dbg("cancel", reason); + throw new CancelError(reason || "user cancelled"); // Trigger cancel error + }; + + // Instantiate GitHub client + glb.github = GitHubClient.default(); + + // Instantiate Git client + glb.git = GitClient.default(); + + glb.tokenizers = Object.freeze({ + resolve: resolveTokenEncoder, + count: async (text, options) => { + const { encode: encoder } = await resolveTokenEncoder(options?.model); + const c = await estimateTokens(text, encoder); + return c; + }, + truncate: async (text, maxTokens, options) => { + const { encode: encoder } = await resolveTokenEncoder(options?.model); + return await truncateTextToTokens(text, maxTokens, encoder, options); + }, + chunk: chunk, + }); + + /** + * Asynchronous function to fetch text from a URL or file. + * Handles both HTTP(S) URLs and local workspace files. + * @param urlOrFile - URL or file descriptor. + * @param [fetchOptions] - Options for fetching. + * @returns Fetch result. + */ + glb.fetchText = fetchText; // Assign fetchText function to global + + // ffmpeg + glb.ffmpeg = new FFmepgClient(); + + glb.DIFF = Object.freeze({ + parse: tryDiffParse, + createPatch: diffCreatePatch, + findChunk: diffFindChunk, + }); + + // Polyfill for Object.groupBy if not available + if (!Object.groupBy) { + Object.groupBy = function ( + items: T[], + callback: (item: T, index: number, array: T[]) => K, + ): Record { + return items.reduce( + (acc, item, idx, arr) => { + const key = callback(item, idx, arr); + if (!acc[key]) acc[key] = []; + acc[key].push(item); + return acc; }, - }) - - // Freeze JSONL utilities - glb.JSONL = Object.freeze({ - parse: JSONLTryParse, // Parse JSONL string to objects - stringify: JSONLStringify, // Convert objects to JSONL string - }) - - glb.JSON5 = Object.freeze({ - parse: JSON5TryParse, - stringify: JSON5Stringify, - }) - - glb.JSONSchema = Object.freeze({ - infer: JSONSchemaInfer, - fromParameters: promptParametersSchemaToJSONSchema, - }) - - // Freeze HTML utilities - glb.HTML = Object.freeze({ - convertTablesToJSON: HTMLTablesToJSON, // Convert HTML tables to JSON - convertToMarkdown: HTMLToMarkdown, // Convert HTML to Markdown - convertToText: HTMLToText, // Convert HTML to plain text - }) - - /** - * Function to trigger cancellation with an error. - * Throws a CancelError with a specified reason or a default message. - * @param [reason] - Optional reason for cancellation. - */ - glb.cancel = (reason?: string) => { - dbg("cancel", reason) - throw new CancelError(reason || "user cancelled") // Trigger cancel error - } - - // Instantiate GitHub client - glb.github = GitHubClient.default() - - // Instantiate Git client - glb.git = GitClient.default() - - glb.tokenizers = Object.freeze({ - resolve: resolveTokenEncoder, - count: async (text, options) => { - const { encode: encoder } = await resolveTokenEncoder( - options?.model - ) - const c = await estimateTokens(text, encoder) - return c - }, - truncate: async (text, maxTokens, options) => { - const { encode: encoder } = await resolveTokenEncoder( - options?.model - ) - return await truncateTextToTokens(text, maxTokens, encoder, options) - }, - chunk: chunk, - }) - - /** - * Asynchronous function to fetch text from a URL or file. - * Handles both HTTP(S) URLs and local workspace files. - * @param urlOrFile - URL or file descriptor. - * @param [fetchOptions] - Options for fetching. - * @returns Fetch result. - */ - glb.fetchText = fetchText // Assign fetchText function to global - - // ffmpeg - glb.ffmpeg = new FFmepgClient() - - glb.DIFF = Object.freeze({ - parse: tryDiffParse, - createPatch: diffCreatePatch, - findChunk: diffFindChunk, - }) - - // Polyfill for Object.groupBy if not available - if (!Object.groupBy) { - Object.groupBy = function ( - items: T[], - callback: (item: T, index: number, array: T[]) => K - ): Record { - return items.reduce( - (acc, item, idx, arr) => { - const key = callback(item, idx, arr) - if (!acc[key]) acc[key] = [] - acc[key].push(item) - return acc - }, - {} as Record - ) - } - } - - // these are overridden, ignored - glb.script = () => {} - glb.system = () => {} + {} as Record, + ); + }; + } + + // these are overridden, ignored + glb.script = () => {}; + glb.system = () => {}; } /** @@ -202,9 +189,9 @@ export function installGlobals() { * - Iterates over the keys of the provided context, mapping them into the global context. */ export function installGlobalPromptContext(ctx: PromptContext) { - const glb = resolveGlobal() // Get the global context + const glb = resolveGlobal(); // Get the global context - for (const field of Object.keys(ctx)) { - glb[field] = (ctx as any)[field] - } + for (const field of Object.keys(ctx)) { + glb[field] = (ctx as any)[field]; + } } diff --git a/packages/core/src/grep.test.ts b/packages/core/src/grep.test.ts index 70f89e4d20..f832560f9a 100644 --- a/packages/core/src/grep.test.ts +++ b/packages/core/src/grep.test.ts @@ -1,77 +1,67 @@ -import { beforeEach, describe, test } from "node:test" -import assert from "node:assert/strict" -import { grepSearch } from "./grep" -import { TestHost } from "./testhost" +import { beforeEach, describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { grepSearch } from "./grep"; +import { TestHost } from "./testhost"; // testmarker = aojkhsdfvfaweiojhfwqepiouiasdojhvfadshjoasdf describe("grepSearch (integration)", async () => { - beforeEach(() => { - TestHost.install() - }) + beforeEach(() => { + TestHost.install(); + }); - test("** glob", async () => { - const result = await grepSearch("draft-07", { - glob: "**/*.json", - debug: true, - }) - console.log(result) - assert.strict(result.files.length > 0, "found files") - assert(result.matches.some((m) => typeof m.filename === "string")) - }) + test("** glob", async () => { + const result = await grepSearch("draft-07", { + glob: "**/*.json", + debug: true, + }); + console.log(result); + assert.strict(result.files.length > 0, "found files"); + assert(result.matches.some((m) => typeof m.filename === "string")); + }); - test("should support RegExp pattern and ignoreCase", async () => { - const result = await grepSearch(/grep/i, { - glob: ["*.ts"], - path: "src", - }) - assert(result.files.some((f) => typeof f.filename === "string")) - assert(result.matches.some((m) => typeof m.filename === "string")) - }) + test("should support RegExp pattern and ignoreCase", async () => { + const result = await grepSearch(/grep/i, { + glob: ["*.ts"], + path: "src", + }); + assert(result.files.some((f) => typeof f.filename === "string")); + assert(result.matches.some((m) => typeof m.filename === "string")); + }); - test("should not read file content if readText is false", async () => { - const result = await grepSearch("grep", { - glob: "*.ts", - path: "src", - readText: false, - }) - assert(result.files.every((f) => !("content" in f))) - }) + test("should not read file content if readText is false", async () => { + const result = await grepSearch("grep", { + glob: "*.ts", + path: "src", + readText: false, + }); + assert(result.files.every((f) => !("content" in f))); + }); - test("should bypass .gitignore filtering if applyGitIgnore is false", async () => { - const result = await grepSearch( - "aojkhsdfvfaweiojhfwqepiouiasdojhvfadshjoasdf", - { - glob: "*.ts", - applyGitIgnore: false, - } - ) - assert(Array.isArray(result.files)) - }) + test("should bypass .gitignore filtering if applyGitIgnore is false", async () => { + const result = await grepSearch("aojkhsdfvfaweiojhfwqepiouiasdojhvfadshjoasdf", { + glob: "*.ts", + applyGitIgnore: false, + }); + assert(Array.isArray(result.files)); + }); - test("should return files and matches for string pattern", async () => { - const result = await grepSearch( - "aojkhsdfvfaweiojhfwqepiouiasdojhvfadshjoasdf", - { - glob: "*.ts", - path: "src", - } - ) - assert(Array.isArray(result.files), "found files") - assert(Array.isArray(result.matches), "found matches") - assert( - result.files.some((f) => typeof f.filename === "string"), - "files have names" - ) - assert( - result.matches.every( - (m) => - typeof m.filename === "string" && - typeof m.content === "string" - ), - "files have content" - ) - assert(result.files.length === 1, "found one file") - assert(result.files[0].filename === "src/grep.test.ts", "correct file") - }) -}) + test("should return files and matches for string pattern", async () => { + const result = await grepSearch("aojkhsdfvfaweiojhfwqepiouiasdojhvfadshjoasdf", { + glob: "*.ts", + path: "src", + }); + assert(Array.isArray(result.files), "found files"); + assert(Array.isArray(result.matches), "found matches"); + assert( + result.files.some((f) => typeof f.filename === "string"), + "files have names", + ); + assert( + result.matches.every((m) => typeof m.filename === "string" && typeof m.content === "string"), + "files have content", + ); + assert(result.files.length === 1, "found one file"); + assert(result.files[0].filename === "src/grep.test.ts", "correct file"); + }); +}); diff --git a/packages/core/src/grep.ts b/packages/core/src/grep.ts index 55211719cd..b6ed9ace86 100644 --- a/packages/core/src/grep.ts +++ b/packages/core/src/grep.ts @@ -1,44 +1,42 @@ -import { TraceOptions } from "./trace" -import { runtimeHost } from "./host" -import { JSONLTryParse } from "./jsonl" -import { resolveFileContent, resolveFileContents } from "./file" -import { uniq } from "es-toolkit" -import { addLineNumbers } from "./liner" -import { arrayify } from "./util" -import { filterGitIgnore } from "./gitignore" -import { genaiscriptDebug } from "./debug" -import { tryStat } from "./fs" -import { CancellationOptions, checkCancelled } from "./cancellation" -const dbg = genaiscriptDebug("grep") +import { TraceOptions } from "./trace"; +import { runtimeHost } from "./host"; +import { JSONLTryParse } from "./jsonl"; +import { resolveFileContent, resolveFileContents } from "./file"; +import { uniq } from "es-toolkit"; +import { addLineNumbers } from "./liner"; +import { arrayify } from "./util"; +import { filterGitIgnore } from "./gitignore"; +import { genaiscriptDebug } from "./debug"; +import { tryStat } from "./fs"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +const dbg = genaiscriptDebug("grep"); async function importRipGrep(options?: TraceOptions) { - const { trace } = options || {} - try { - const { rgPath } = await import("@lvce-editor/ripgrep") - dbg(`rg: %s`, rgPath) - const rgStat = await tryStat(rgPath) - if (!rgStat?.isFile()) - throw new Error( - `ripgrep not found at ${rgPath}. Please reinstall genaiscript.` - ) - return rgPath - } catch (e) { - dbg(`%O`, e) - trace?.error(`failed to ripgrep`, e) - throw e - } + const { trace } = options || {}; + try { + const { rgPath } = await import("@lvce-editor/ripgrep"); + dbg(`rg: %s`, rgPath); + const rgStat = await tryStat(rgPath); + if (!rgStat?.isFile()) + throw new Error(`ripgrep not found at ${rgPath}. Please reinstall genaiscript.`); + return rgPath; + } catch (e) { + dbg(`%O`, e); + trace?.error(`failed to ripgrep`, e); + throw e; + } } export type GrepResult = { - type: "match" | "context" | "begin" | "end" - data: { - path: { - text: string - } - lines: { text: string } - line_number: number - } -}[] + type: "match" | "context" | "begin" | "end"; + data: { + path: { + text: string; + }; + lines: { text: string }; + line_number: number; + }; +}[]; /** * Executes a grep-like search across the workspace using ripgrep. @@ -55,73 +53,64 @@ export type GrepResult = { * - `matches`: List of detailed matches including filenames and content with line numbers. */ export async function grepSearch( - pattern: string | RegExp, - options?: TraceOptions & CancellationOptions & WorkspaceGrepOptions + pattern: string | RegExp, + options?: TraceOptions & CancellationOptions & WorkspaceGrepOptions, ): Promise<{ - data: GrepResult - files: WorkspaceFile[] - matches: WorkspaceFile[] + data: GrepResult; + files: WorkspaceFile[]; + matches: WorkspaceFile[]; }> { - const { cancellationToken, trace } = options || {} - const rgPath = await importRipGrep() - let { - path: paths, - glob: globs, - readText, - applyGitIgnore, - debug, - } = options || {} - globs = arrayify(globs) - paths = arrayify(paths) - const args: string[] = ["--json", "--multiline", "--context", "3"] - if (debug) args.push("--debug") - if (typeof pattern === "string") { - args.push("--smart-case", pattern) - } else { - if (pattern.ignoreCase) args.push("--ignore-case") - args.push(pattern.source) - } - if (globs) - for (const glob of globs) { - args.push("--glob") - args.push(glob) - } - if (paths.length) args.push(...paths) - else if (globs?.length) args.push(".") - dbg(`args: %o`, args) - const res = await runtimeHost.exec(undefined, rgPath, args, options) - if (!res.stdout) { - dbg(`no output: %s`, res.stderr) - return { data: [], files: [], matches: [] } - } - const resl = JSONLTryParse(res.stdout || "") as GrepResult - checkCancelled(cancellationToken) - let filenames = uniq( - resl - .filter(({ type }) => type === "match") - .map(({ data }) => data.path.text) - ) - if (applyGitIgnore !== false) { - dbg(`apply git ignore`) - filenames = await filterGitIgnore(filenames) + const { cancellationToken, trace } = options || {}; + const rgPath = await importRipGrep(); + let { path: paths, glob: globs, readText, applyGitIgnore, debug } = options || {}; + globs = arrayify(globs); + paths = arrayify(paths); + const args: string[] = ["--json", "--multiline", "--context", "3"]; + if (debug) args.push("--debug"); + if (typeof pattern === "string") { + args.push("--smart-case", pattern); + } else { + if (pattern.ignoreCase) args.push("--ignore-case"); + args.push(pattern.source); + } + if (globs) + for (const glob of globs) { + args.push("--glob"); + args.push(glob); } + if (paths.length) args.push(...paths); + else if (globs?.length) args.push("."); + dbg(`args: %o`, args); + const res = await runtimeHost.exec(undefined, rgPath, args, options); + if (!res.stdout) { + dbg(`no output: %s`, res.stderr); + return { data: [], files: [], matches: [] }; + } + const resl = JSONLTryParse(res.stdout || "") as GrepResult; + checkCancelled(cancellationToken); + let filenames = uniq( + resl.filter(({ type }) => type === "match").map(({ data }) => data.path.text), + ); + if (applyGitIgnore !== false) { + dbg(`apply git ignore`); + filenames = await filterGitIgnore(filenames); + } - const files = filenames.map((filename) => ({ filename })) - const filesSet = new Set(filenames) - const matches = resl - .filter(({ type }) => type === "match") - .filter(({ data }) => filesSet.has(data.path.text)) - .map( - ({ data }) => - { - filename: data.path.text, - content: addLineNumbers(data.lines.text.trimEnd(), { - startLine: data.line_number, - }), - } - ) - dbg(`read text: `, readText) - if (readText !== false) - await resolveFileContents(files, { trace, cancellationToken }) - return { data: resl, files, matches } + const files = filenames.map((filename) => ({ filename })); + const filesSet = new Set(filenames); + const matches = resl + .filter(({ type }) => type === "match") + .filter(({ data }) => filesSet.has(data.path.text)) + .map( + ({ data }) => + { + filename: data.path.text, + content: addLineNumbers(data.lines.text.trimEnd(), { + startLine: data.line_number, + }), + }, + ); + dbg(`read text: `, readText); + if (readText !== false) await resolveFileContents(files, { trace, cancellationToken }); + return { data: resl, files, matches }; } diff --git a/packages/core/src/groq.test.ts b/packages/core/src/groq.test.ts index 171673600f..168d375a32 100644 --- a/packages/core/src/groq.test.ts +++ b/packages/core/src/groq.test.ts @@ -1,36 +1,36 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { GROQEvaluate } from "./groq" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { GROQEvaluate } from "./groq"; describe("GROQEvaluate", async () => { - test("simple query", async () => { - const data = { name: "test" } - const res = await GROQEvaluate("*", data) - assert.deepEqual(res, data) - }) + test("simple query", async () => { + const data = { name: "test" }; + const res = await GROQEvaluate("*", data); + assert.deepEqual(res, data); + }); - test("filtered query", async () => { - const data = [ - { id: 1, name: "first" }, - { id: 2, name: "second" }, - ] - const res = await GROQEvaluate("*[id == 1]", data) - assert.deepEqual(res, [{ id: 1, name: "first" }]) - }) + test("filtered query", async () => { + const data = [ + { id: 1, name: "first" }, + { id: 2, name: "second" }, + ]; + const res = await GROQEvaluate("*[id == 1]", data); + assert.deepEqual(res, [{ id: 1, name: "first" }]); + }); - test("query with params", async () => { - const data = [ - { id: 1, name: "first" }, - { id: 2, name: "second" }, - ] - const res = await GROQEvaluate("*[id == $id]", data, { - params: { id: 2 }, - }) - assert.deepEqual(res, [{ id: 2, name: "second" }]) - }) + test("query with params", async () => { + const data = [ + { id: 1, name: "first" }, + { id: 2, name: "second" }, + ]; + const res = await GROQEvaluate("*[id == $id]", data, { + params: { id: 2 }, + }); + assert.deepEqual(res, [{ id: 2, name: "second" }]); + }); - test("undefined dataset returns undefined", async () => { - const res = await GROQEvaluate("*", undefined) - assert.equal(res, undefined) - }) -}) + test("undefined dataset returns undefined", async () => { + const res = await GROQEvaluate("*", undefined); + assert.equal(res, undefined); + }); +}); diff --git a/packages/core/src/groq.ts b/packages/core/src/groq.ts index 24371230e5..5398a62149 100644 --- a/packages/core/src/groq.ts +++ b/packages/core/src/groq.ts @@ -1,4 +1,4 @@ -import { parse, evaluate } from "groq-js" +import { parse, evaluate } from "groq-js"; /** * Loads and applies a GROQ query transformation to the input dataset. * @param query GROQ query string to parse and evaluate. @@ -6,17 +6,17 @@ import { parse, evaluate } from "groq-js" * @param options Optional configurations such as root and query parameters. */ export async function GROQEvaluate( - query: string, - dataset: any, - options?: { - root?: any - params?: Record - } + query: string, + dataset: any, + options?: { + root?: any; + params?: Record; + }, ): Promise { - if (dataset === undefined) return dataset + if (dataset === undefined) return dataset; - const tree = parse(query) - const value = await evaluate(tree, { dataset, ...(options || {}) }) - const res = await value.get() - return res + const tree = parse(query); + const value = await evaluate(tree, { dataset, ...(options || {}) }); + const res = await value.get(); + return res; } diff --git a/packages/core/src/host.ts b/packages/core/src/host.ts index c4769e83c6..2dd21b8cf4 100644 --- a/packages/core/src/host.ts +++ b/packages/core/src/host.ts @@ -1,73 +1,73 @@ -import { CancellationOptions, CancellationToken } from "./cancellation" -import { LanguageModel } from "./chat" -import { Progress } from "./progress" -import { MarkdownTrace, TraceOptions } from "./trace" +import { CancellationOptions, CancellationToken } from "./cancellation"; +import { LanguageModel } from "./chat"; +import { Progress } from "./progress"; +import { MarkdownTrace, TraceOptions } from "./trace"; import { - AzureCredentialsType, - LanguageModelConfiguration, - LogLevel, - Project, - ResponseStatus, -} from "./server/messages" -import { HostConfiguration } from "./hostconfiguration" -import { LOG } from "./constants" -import type { TokenCredential } from "@azure/identity" -import { McpClientManager } from "./mcpclient" -import { ResourceManager } from "./mcpresource" + AzureCredentialsType, + LanguageModelConfiguration, + LogLevel, + Project, + ResponseStatus, +} from "./server/messages"; +import { HostConfiguration } from "./hostconfiguration"; +import { LOG } from "./constants"; +import type { TokenCredential } from "@azure/identity"; +import { McpClientManager } from "./mcpclient"; +import { ResourceManager } from "./mcpresource"; export class LogEvent extends Event { - static Name = "log" - constructor( - public readonly level: LogLevel, - public readonly message: string - ) { - super(LOG) - } + static Name = "log"; + constructor( + public readonly level: LogLevel, + public readonly message: string, + ) { + super(LOG); + } } // this is typically an instance of TextDecoder export interface UTF8Decoder { - decode( - input: Uint8Array, - options?: { - stream?: boolean | undefined - } - ): string + decode( + input: Uint8Array, + options?: { + stream?: boolean | undefined; + }, + ): string; } export interface UTF8Encoder { - encode(input: string): Uint8Array + encode(input: string): Uint8Array; } export interface RetrievalClientOptions { - progress?: Progress - token?: CancellationToken - trace?: MarkdownTrace + progress?: Progress; + token?: CancellationToken; + trace?: MarkdownTrace; } export interface RetrievalSearchOptions extends VectorSearchOptions {} export interface RetrievalSearchResponse extends ResponseStatus { - results: WorkspaceFileWithScore[] + results: WorkspaceFileWithScore[]; } export interface RetrievalService { - vectorSearch( - text: string, - files: WorkspaceFile[], - options?: RetrievalSearchOptions - ): Promise + vectorSearch( + text: string, + files: WorkspaceFile[], + options?: RetrievalSearchOptions, + ): Promise; } export interface ServerManager { - start(): Promise - close(): Promise + start(): Promise; + close(): Promise; } export interface AuthenticationToken { - token: string - expiresOnTimestamp: number - credential: TokenCredential + token: string; + expiresOnTimestamp: number; + credential: TokenCredential; } /** @@ -81,190 +81,184 @@ export interface AuthenticationToken { * of the current time, to account for potential timing discrepancies. */ export function isAzureTokenExpired(token: AuthenticationToken) { - // Consider the token expired 5 seconds before the actual expiration to avoid timing issues - return !token || token.expiresOnTimestamp < Date.now() - 5_000 + // Consider the token expired 5 seconds before the actual expiration to avoid timing issues + return !token || token.expiresOnTimestamp < Date.now() - 5_000; } export interface AzureTokenResolver { - token( - credentialsType: AzureCredentialsType, - options?: CancellationOptions - ): Promise<{ - token?: AuthenticationToken - error?: SerializedError - }> + token( + credentialsType: AzureCredentialsType, + options?: CancellationOptions, + ): Promise<{ + token?: AuthenticationToken; + error?: SerializedError; + }>; } export type ModelConfiguration = Readonly< - Pick< - ModelOptions, - "model" | "temperature" | "reasoningEffort" | "fallbackTools" - > & { - source: "cli" | "env" | "script" | "config" | "default" - candidates?: string[] - } -> + Pick & { + source: "cli" | "env" | "script" | "config" | "default"; + candidates?: string[]; + } +>; export type ModelConfigurations = { - large: ModelConfiguration - small: ModelConfiguration - vision: ModelConfiguration - embeddings: ModelConfiguration -} & Record + large: ModelConfiguration; + small: ModelConfiguration; + vision: ModelConfiguration; + embeddings: ModelConfiguration; +} & Record; export interface Host { - userState: Record - server: ServerManager - path: Path - - createUTF8Decoder(): UTF8Decoder - createUTF8Encoder(): UTF8Encoder - projectFolder(): string - resolvePath(...segments: string[]): string - - getLanguageModelConfiguration( - modelId: string, - options?: { token?: boolean } & CancellationOptions & TraceOptions - ): Promise - log(level: LogLevel, msg: string): void - - // fs - statFile(name: string): Promise<{ - size: number - type: "file" | "directory" | "symlink" - }> - readFile(name: string): Promise - writeFile(name: string, content: Uint8Array): Promise - deleteFile(name: string): Promise - findFiles( - pattern: string | string[], - options?: { - ignore?: string | string[] - applyGitIgnore?: boolean - } - ): Promise - - // This has mkdirp-semantics (parent directories are created and existing ignored) - createDirectory(name: string): Promise - deleteDirectory(name: string): Promise + userState: Record; + server: ServerManager; + path: Path; + + createUTF8Decoder(): UTF8Decoder; + createUTF8Encoder(): UTF8Encoder; + projectFolder(): string; + resolvePath(...segments: string[]): string; + + getLanguageModelConfiguration( + modelId: string, + options?: { token?: boolean } & CancellationOptions & TraceOptions, + ): Promise; + log(level: LogLevel, msg: string): void; + + // fs + statFile(name: string): Promise<{ + size: number; + type: "file" | "directory" | "symlink"; + }>; + readFile(name: string): Promise; + writeFile(name: string, content: Uint8Array): Promise; + deleteFile(name: string): Promise; + findFiles( + pattern: string | string[], + options?: { + ignore?: string | string[]; + applyGitIgnore?: boolean; + }, + ): Promise; + + // This has mkdirp-semantics (parent directories are created and existing ignored) + createDirectory(name: string): Promise; + deleteDirectory(name: string): Promise; } export interface RuntimeHost extends Host { - project: Project - workspace: Omit - - azureToken?: AzureTokenResolver - azureAIServerlessToken?: AzureTokenResolver - azureManagementToken?: AzureTokenResolver - microsoftGraphToken?: AzureTokenResolver - - modelAliases: Readonly - clientLanguageModel?: LanguageModel - - mcp: McpClientManager - resources: ResourceManager - - pullModel( - cfg: LanguageModelConfiguration, - options?: TraceOptions & CancellationOptions - ): Promise - - clearModelAlias(source: "cli" | "env" | "config" | "script"): void - setModelAlias( - source: "env" | "cli" | "config" | "script", - id: string, - value: string | Omit - ): void - - /** - * Reloads the configuration - */ - readConfig(): Promise - - /** - * Gets the current loaded configuration - */ - get config(): HostConfiguration - /** - * Reads a secret - * @param name - */ - readSecret(name: string): Promise - // executes a process - exec( - containerId: string, - command: string, - args: string[], - options: ShellOptions & TraceOptions & CancellationOptions - ): Promise - - /** - * Starts a container to execute sandboxed code - * @param options - */ - container(options: ContainerOptions & TraceOptions): Promise - - /** - * Instantiates a python evaluation environment - */ - python( - options?: PythonRuntimeOptions & TraceOptions & CancellationOptions - ): Promise - - /** - * Launches a browser page - * @param url - * @param options - */ - browse( - url: string, - options?: BrowseSessionOptions & TraceOptions - ): Promise - - /** - * Cleanup all temporary containers. - */ - removeContainers(): Promise - - /** - * Cleanup all temporary browsers. - */ - removeBrowsers(): Promise - - /** - * Asks the user to select between options - * @param message question to ask - * @param options options to select from - */ - select( - message: string, - choices: (string | ShellSelectChoice)[], - options?: ShellSelectOptions - ): Promise - - /** - * Asks the user to input a text - * @param message message to ask - */ - input(message: string, options?: ShellInputOptions): Promise - - /** - * Asks the user to confirm a message - * @param message message to ask - */ - confirm(message: string, options?: ShellConfirmOptions): Promise - - /** - * Instantiates a content safety client - * @param id - */ - contentSafety( - id?: ContentSafetyProvider, - options?: TraceOptions & CancellationOptions - ): Promise + project: Project; + workspace: Omit; + + azureToken?: AzureTokenResolver; + azureAIServerlessToken?: AzureTokenResolver; + azureManagementToken?: AzureTokenResolver; + microsoftGraphToken?: AzureTokenResolver; + + modelAliases: Readonly; + clientLanguageModel?: LanguageModel; + + mcp: McpClientManager; + resources: ResourceManager; + + pullModel( + cfg: LanguageModelConfiguration, + options?: TraceOptions & CancellationOptions, + ): Promise; + + clearModelAlias(source: "cli" | "env" | "config" | "script"): void; + setModelAlias( + source: "env" | "cli" | "config" | "script", + id: string, + value: string | Omit, + ): void; + + /** + * Reloads the configuration + */ + readConfig(): Promise; + + /** + * Gets the current loaded configuration + */ + get config(): HostConfiguration; + /** + * Reads a secret + * @param name + */ + readSecret(name: string): Promise; + // executes a process + exec( + containerId: string, + command: string, + args: string[], + options: ShellOptions & TraceOptions & CancellationOptions, + ): Promise; + + /** + * Starts a container to execute sandboxed code + * @param options + */ + container(options: ContainerOptions & TraceOptions): Promise; + + /** + * Instantiates a python evaluation environment + */ + python( + options?: PythonRuntimeOptions & TraceOptions & CancellationOptions, + ): Promise; + + /** + * Launches a browser page + * @param url + * @param options + */ + browse(url: string, options?: BrowseSessionOptions & TraceOptions): Promise; + + /** + * Cleanup all temporary containers. + */ + removeContainers(): Promise; + + /** + * Cleanup all temporary browsers. + */ + removeBrowsers(): Promise; + + /** + * Asks the user to select between options + * @param message question to ask + * @param options options to select from + */ + select( + message: string, + choices: (string | ShellSelectChoice)[], + options?: ShellSelectOptions, + ): Promise; + + /** + * Asks the user to input a text + * @param message message to ask + */ + input(message: string, options?: ShellInputOptions): Promise; + + /** + * Asks the user to confirm a message + * @param message message to ask + */ + confirm(message: string, options?: ShellConfirmOptions): Promise; + + /** + * Instantiates a content safety client + * @param id + */ + contentSafety( + id?: ContentSafetyProvider, + options?: TraceOptions & CancellationOptions, + ): Promise; } -export let host: Host +export let host: Host; /** * Assigns a Host implementation to the global `host` variable. * @@ -272,9 +266,9 @@ export let host: Host * with the provided Host functionality for further operations and services. */ export function setHost(h: Host) { - host = h + host = h; } -export let runtimeHost: RuntimeHost +export let runtimeHost: RuntimeHost; /** * Sets the runtime host instance and updates the global host reference. * @@ -282,6 +276,6 @@ export let runtimeHost: RuntimeHost * This will also update the `host` to refer to the same instance. */ export function setRuntimeHost(h: RuntimeHost) { - setHost(h) - runtimeHost = h + setHost(h); + runtimeHost = h; } diff --git a/packages/core/src/hostconfiguration.ts b/packages/core/src/hostconfiguration.ts index 59c5092292..72e4dae1c1 100644 --- a/packages/core/src/hostconfiguration.ts +++ b/packages/core/src/hostconfiguration.ts @@ -1,42 +1,42 @@ -import { ModelConfiguration } from "./host" +import { ModelConfiguration } from "./host"; /** * Schema for a global configuration file */ export interface HostConfiguration { - /** - * Path to the .env file - */ - envFile?: string | string[] + /** + * Path to the .env file + */ + envFile?: string | string[]; - /** - * List of glob paths to scan for genai scripts - */ - include?: ( - | string - | { - pattern: string - ignoreGitIgnore?: boolean - } - )[] + /** + * List of glob paths to scan for genai scripts + */ + include?: ( + | string + | { + pattern: string; + ignoreGitIgnore?: boolean; + } + )[]; - /** - * Ignore scripts in the current workspace. - */ - ignoreCurrentWorkspace?: boolean + /** + * Ignore scripts in the current workspace. + */ + ignoreCurrentWorkspace?: boolean; - /** - * Configures a list of known aliases. Overridden by environment variables and CLI arguments - */ - modelAliases?: Record + /** + * Configures a list of known aliases. Overridden by environment variables and CLI arguments + */ + modelAliases?: Record; - /** - * Model identifier to encoding mapping - */ - modelEncodings?: Record + /** + * Model identifier to encoding mapping + */ + modelEncodings?: Record; - /** - * A map of secret name and their respective regex pattern - */ - secretPatterns?: Record + /** + * A map of secret name and their respective regex pattern + */ + secretPatterns?: Record; } diff --git a/packages/core/src/html-escaper.d.ts b/packages/core/src/html-escaper.d.ts index 22c3d8c74e..1496d16595 100644 --- a/packages/core/src/html-escaper.d.ts +++ b/packages/core/src/html-escaper.d.ts @@ -1,3 +1,3 @@ declare module "html-escaper" { - export function escape(html: string): string + export function escape(html: string): string; } diff --git a/packages/core/src/html.test.ts b/packages/core/src/html.test.ts index 0d252377d4..4c812facd4 100644 --- a/packages/core/src/html.test.ts +++ b/packages/core/src/html.test.ts @@ -1,10 +1,10 @@ -import test, { describe } from "node:test" -import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html" -import assert from "node:assert/strict" +import test, { describe } from "node:test"; +import { HTMLTablesToJSON, HTMLToMarkdown, HTMLToText } from "./html"; +import assert from "node:assert/strict"; describe("html", () => { - test("convert HTML table to JSON", async () => { - const html = ` + test("convert HTML table to JSON", async () => { + const html = ` @@ -15,30 +15,30 @@ describe("html", () => {
Header 1Value 2
- ` - const expected = [{ "Header 1": "Value 1", "Header 2": "Value 2" }] - const result = (await HTMLTablesToJSON(html))[0] - assert.deepStrictEqual(result, expected) - }) - test("converts HTML to text", async () => { - const html = "

Hello, world!

" - const expected = "Hello, world!" - const result = await HTMLToText(html) - assert(result === expected) - }) + `; + const expected = [{ "Header 1": "Value 1", "Header 2": "Value 2" }]; + const result = (await HTMLTablesToJSON(html))[0]; + assert.deepStrictEqual(result, expected); + }); + test("converts HTML to text", async () => { + const html = "

Hello, world!

"; + const expected = "Hello, world!"; + const result = await HTMLToText(html); + assert(result === expected); + }); - describe("HTMLToMarkdown", async () => { - test("converts simple HTML to gfm", async () => { - const html = "

Title

" - const expected = "Title\n=====" - const result = await HTMLToMarkdown(html) - assert.strictEqual(result, expected) - }) - test("converts simple HTML to Markdown", async () => { - const html = "

Title

" - const expected = "Title\n=====" - const result = await HTMLToMarkdown(html, { disableGfm: true }) - assert.strictEqual(result, expected) - }) - }) -}) + describe("HTMLToMarkdown", async () => { + test("converts simple HTML to gfm", async () => { + const html = "

Title

"; + const expected = "Title\n====="; + const result = await HTMLToMarkdown(html); + assert.strictEqual(result, expected); + }); + test("converts simple HTML to Markdown", async () => { + const html = "

Title

"; + const expected = "Title\n====="; + const result = await HTMLToMarkdown(html, { disableGfm: true }); + assert.strictEqual(result, expected); + }); + }); +}); diff --git a/packages/core/src/html.ts b/packages/core/src/html.ts index 70e2013b18..935da80465 100644 --- a/packages/core/src/html.ts +++ b/packages/core/src/html.ts @@ -2,8 +2,8 @@ // It imports necessary libraries for HTML conversion and logging purposes. /// -import { CancellationOptions, checkCancelled } from "./cancellation" -import { TraceOptions } from "./trace" // Import TraceOptions for optional logging features +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { TraceOptions } from "./trace"; // Import TraceOptions for optional logging features /** * Converts HTML tables to JSON objects. @@ -12,13 +12,10 @@ import { TraceOptions } from "./trace" // Import TraceOptions for optional loggi * @param options - Optional parameters for conversion. * @returns A 2D array of objects representing the table data. */ -export async function HTMLTablesToJSON( - html: string, - options?: {} -): Promise { - const { tabletojson } = await import("tabletojson") // Import tabletojson for converting HTML tables to JSON - const res = tabletojson.convert(html, options) // Convert HTML tables to JSON using tabletojson library - return res +export async function HTMLTablesToJSON(html: string, options?: {}): Promise { + const { tabletojson } = await import("tabletojson"); // Import tabletojson for converting HTML tables to JSON + const res = tabletojson.convert(html, options); // Convert HTML tables to JSON using tabletojson library + return res; } /** @@ -29,22 +26,22 @@ export async function HTMLTablesToJSON( * @returns The plain text representation of the HTML. */ export async function HTMLToText( - html: string, - options?: HTMLToTextOptions & TraceOptions & CancellationOptions + html: string, + options?: HTMLToTextOptions & TraceOptions & CancellationOptions, ): Promise { - if (!html) return "" // Return empty string if no HTML content is provided + if (!html) return ""; // Return empty string if no HTML content is provided - const { trace, cancellationToken } = options || {} // Extract trace for logging if available + const { trace, cancellationToken } = options || {}; // Extract trace for logging if available - try { - const { convert: convertToText } = await import("html-to-text") // Import the convert function from html-to-text library - checkCancelled(cancellationToken) // Check for cancellation token - const text = convertToText(html, options) // Perform conversion to plain text - return text - } catch (e) { - trace?.error("HTML conversion failed", e) // Log error if conversion fails - return undefined - } + try { + const { convert: convertToText } = await import("html-to-text"); // Import the convert function from html-to-text library + checkCancelled(cancellationToken); // Check for cancellation token + const text = convertToText(html, options); // Perform conversion to plain text + return text; + } catch (e) { + trace?.error("HTML conversion failed", e); // Log error if conversion fails + return undefined; + } } /** @@ -55,31 +52,31 @@ export async function HTMLToText( * @returns The Markdown representation of the HTML. */ export async function HTMLToMarkdown( - html: string, - options?: HTMLToMarkdownOptions & TraceOptions & CancellationOptions + html: string, + options?: HTMLToMarkdownOptions & TraceOptions & CancellationOptions, ): Promise { - if (!html) return html // Return original content if no HTML is provided - const { disableGfm, trace, cancellationToken } = options || {} // Extract trace for logging if available + if (!html) return html; // Return original content if no HTML is provided + const { disableGfm, trace, cancellationToken } = options || {}; // Extract trace for logging if available - try { - const Turndown = (await import("turndown")).default // Import Turndown library for HTML to Markdown conversion - checkCancelled(cancellationToken) // Check for cancellation token - const turndown = new Turndown() - turndown.remove("script") - turndown.remove("style") - turndown.remove("meta") - turndown.remove("link") - turndown.remove("head") - turndown.remove("title") - turndown.remove("noscript") - if (!disableGfm) { - const GFMPlugin: any = require("turndown-plugin-gfm") - turndown.use(GFMPlugin.gfm) // Use GFM plugin for GitHub Flavored Markdown - } - const res = turndown.turndown(html) // Use Turndown library to convert HTML to Markdown - return res - } catch (e) { - trace?.error("HTML conversion failed", e) // Log error if conversion fails - return undefined + try { + const Turndown = (await import("turndown")).default; // Import Turndown library for HTML to Markdown conversion + checkCancelled(cancellationToken); // Check for cancellation token + const turndown = new Turndown(); + turndown.remove("script"); + turndown.remove("style"); + turndown.remove("meta"); + turndown.remove("link"); + turndown.remove("head"); + turndown.remove("title"); + turndown.remove("noscript"); + if (!disableGfm) { + const GFMPlugin: any = require("turndown-plugin-gfm"); + turndown.use(GFMPlugin.gfm); // Use GFM plugin for GitHub Flavored Markdown } + const res = turndown.turndown(html); // Use Turndown library to convert HTML to Markdown + return res; + } catch (e) { + trace?.error("HTML conversion failed", e); // Log error if conversion fails + return undefined; + } } diff --git a/packages/core/src/htmlescape.ts b/packages/core/src/htmlescape.ts index ba76a6e38d..0b4a5f7ce2 100644 --- a/packages/core/src/htmlescape.ts +++ b/packages/core/src/htmlescape.ts @@ -1,6 +1,6 @@ // This module provides functions to convert HTML content into different formats such as JSON, plain text, and Markdown. // It imports necessary libraries for HTML conversion and logging purposes. /// -import { escape as HTMLEscape_ } from "html-escaper" +import { escape as HTMLEscape_ } from "html-escaper"; -export const HTMLEscape = HTMLEscape_ +export const HTMLEscape = HTMLEscape_; diff --git a/packages/core/src/id.ts b/packages/core/src/id.ts index c883663721..48fcb1118f 100644 --- a/packages/core/src/id.ts +++ b/packages/core/src/id.ts @@ -1,4 +1,4 @@ -import { nanoid } from "nanoid" +import { nanoid } from "nanoid"; /** * Generates a unique identifier. @@ -6,5 +6,5 @@ import { nanoid } from "nanoid" * @returns A unique identifier string. */ export function generateId(): string { - return nanoid() + return nanoid(); } diff --git a/packages/core/src/image.ts b/packages/core/src/image.ts index 73c05caf9b..3025a76b5f 100644 --- a/packages/core/src/image.ts +++ b/packages/core/src/image.ts @@ -1,201 +1,199 @@ // Import necessary functions and types from other modules -import { resolveBufferLike } from "./bufferlike" +import { resolveBufferLike } from "./bufferlike"; import { - BOX_DOWN_AND_RIGHT, - BOX_LEFT_AND_DOWN, - BOX_LEFT_AND_UP, - BOX_RIGHT, - BOX_UP_AND_DOWN, - BOX_UP_AND_RIGHT, - CHAR_DOWN_ARROW, - CHAR_UP_ARROW, - CHAR_UP_DOWN_ARROWS, - CONSOLE_COLOR_DEBUG, - IMAGE_DETAIL_HIGH_HEIGHT, - IMAGE_DETAIL_HIGH_WIDTH, - IMAGE_DETAIL_LOW_HEIGHT, - IMAGE_DETAIL_LOW_WIDTH, -} from "./constants" -import { TraceOptions } from "./trace" -import { ellipse, logVerbose } from "./util" -import pLimit from "p-limit" -import { CancellationOptions, checkCancelled } from "./cancellation" -import { wrapColor, wrapRgbColor } from "./consolecolor" -import { assert } from "console" -import { genaiscriptDebug } from "./debug" -import { ImageGenerationUsage } from "./chat" -import { estimateImageCost } from "./usage" -import { prettyCost } from "./pretty" -const dbg = genaiscriptDebug("image") + BOX_DOWN_AND_RIGHT, + BOX_LEFT_AND_DOWN, + BOX_LEFT_AND_UP, + BOX_RIGHT, + BOX_UP_AND_DOWN, + BOX_UP_AND_RIGHT, + CHAR_DOWN_ARROW, + CHAR_UP_ARROW, + CHAR_UP_DOWN_ARROWS, + CONSOLE_COLOR_DEBUG, + IMAGE_DETAIL_HIGH_HEIGHT, + IMAGE_DETAIL_HIGH_WIDTH, + IMAGE_DETAIL_LOW_HEIGHT, + IMAGE_DETAIL_LOW_WIDTH, +} from "./constants"; +import { TraceOptions } from "./trace"; +import { ellipse, logVerbose } from "./util"; +import pLimit from "p-limit"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { wrapColor, wrapRgbColor } from "./consolecolor"; +import { assert } from "console"; +import { genaiscriptDebug } from "./debug"; +import { ImageGenerationUsage } from "./chat"; +import { estimateImageCost } from "./usage"; +import { prettyCost } from "./pretty"; +const dbg = genaiscriptDebug("image"); async function prepare( - url: BufferLike, - options: ImageGenerationOptions & - TraceOptions & - CancellationOptions & { detail?: "high" | "low" | "original" } + url: BufferLike, + options: ImageGenerationOptions & + TraceOptions & + CancellationOptions & { detail?: "high" | "low" | "original" }, ) { - // Dynamically import the Jimp library and its alignment enums - let { - cancellationToken, - autoCrop, - maxHeight, - maxWidth, - scale, - rotate, - greyscale, - crop, - flip, - detail, - } = options - checkCancelled(cancellationToken) + // Dynamically import the Jimp library and its alignment enums + let { + cancellationToken, + autoCrop, + maxHeight, + maxWidth, + scale, + rotate, + greyscale, + crop, + flip, + detail, + } = options; + checkCancelled(cancellationToken); - dbg(`loading image`) - // https://platform.openai.com/docs/guides/vision/calculating-costs#managing-images - // If the URL is a string, resolve it to a data URI - const buffer = await resolveBufferLike(url) - checkCancelled(cancellationToken) + dbg(`loading image`); + // https://platform.openai.com/docs/guides/vision/calculating-costs#managing-images + // If the URL is a string, resolve it to a data URI + const buffer = await resolveBufferLike(url); + checkCancelled(cancellationToken); - // failed to resolve buffer - if (!buffer) { - dbg(`failed to resolve image`) - return undefined - } + // failed to resolve buffer + if (!buffer) { + dbg(`failed to resolve image`); + return undefined; + } - // Read the image using Jimp - const { Jimp, HorizontalAlign, VerticalAlign } = await import("jimp") - const img = await Jimp.read(buffer) - checkCancelled(cancellationToken) - const { width, height } = img - if (crop) { - dbg(`cropping image with provided dimensions`) - const x = Math.max(0, Math.min(width, crop.x ?? 0)) - const y = Math.max(0, Math.min(height, crop.y ?? 0)) - const w = Math.max(1, Math.min(width - x, crop.w ?? width)) - const h = Math.max(1, Math.min(height - y, crop.h ?? height)) - img.crop({ x, y, w, h }) - } + // Read the image using Jimp + const { Jimp, HorizontalAlign, VerticalAlign } = await import("jimp"); + const img = await Jimp.read(buffer); + checkCancelled(cancellationToken); + const { width, height } = img; + if (crop) { + dbg(`cropping image with provided dimensions`); + const x = Math.max(0, Math.min(width, crop.x ?? 0)); + const y = Math.max(0, Math.min(height, crop.y ?? 0)); + const w = Math.max(1, Math.min(width - x, crop.w ?? width)); + const h = Math.max(1, Math.min(height - y, crop.h ?? height)); + img.crop({ x, y, w, h }); + } - if (!isNaN(scale)) { - dbg(`scaling image by factor ${scale}`) - img.scale(scale) - } + if (!isNaN(scale)) { + dbg(`scaling image by factor ${scale}`); + img.scale(scale); + } - if (!isNaN(rotate)) { - dbg(`rotating image by ${rotate} degrees`) - img.rotate(rotate) - } + if (!isNaN(rotate)) { + dbg(`rotating image by ${rotate} degrees`); + img.rotate(rotate); + } - if (flip) { - dbg(`flipping image`, flip) - img.flip(flip) - } + if (flip) { + dbg(`flipping image`, flip); + img.flip(flip); + } - // Contain the image within specified max dimensions if provided - if (options.maxWidth ?? options.maxHeight) { - if (options.maxWidth && !options.maxHeight) { - if (img.width > options.maxWidth) { - dbg(`resize width to %d`, options.maxWidth) - img.resize({ - w: options.maxWidth, - h: Math.ceil((img.height / img.width) * options.maxWidth), - }) - } - } else if (options.maxHeight && !options.maxWidth) { - if (img.height > options.maxHeight) { - dbg(`resize height to %d`, options.maxHeight) - img.resize({ - h: options.maxHeight, - w: Math.ceil((img.width / img.height) * options.maxHeight), - }) - } - } else { - dbg( - `containing image within ${options.maxWidth || ""}x${options.maxHeight || ""}` - ) - contain( - img, - img.width > maxWidth ? maxWidth : img.width, - img.height > maxHeight ? maxHeight : img.height, - HorizontalAlign.CENTER | VerticalAlign.MIDDLE - ) - } + // Contain the image within specified max dimensions if provided + if (options.maxWidth ?? options.maxHeight) { + if (options.maxWidth && !options.maxHeight) { + if (img.width > options.maxWidth) { + dbg(`resize width to %d`, options.maxWidth); + img.resize({ + w: options.maxWidth, + h: Math.ceil((img.height / img.width) * options.maxWidth), + }); + } + } else if (options.maxHeight && !options.maxWidth) { + if (img.height > options.maxHeight) { + dbg(`resize height to %d`, options.maxHeight); + img.resize({ + h: options.maxHeight, + w: Math.ceil((img.width / img.height) * options.maxHeight), + }); + } + } else { + dbg(`containing image within ${options.maxWidth || ""}x${options.maxHeight || ""}`); + contain( + img, + img.width > maxWidth ? maxWidth : img.width, + img.height > maxHeight ? maxHeight : img.height, + HorizontalAlign.CENTER | VerticalAlign.MIDDLE, + ); } + } - // Auto-crop the image if required by options - if (autoCrop) { - dbg(`auto-cropping image`) - img.autocrop() - } + // Auto-crop the image if required by options + if (autoCrop) { + dbg(`auto-cropping image`); + img.autocrop(); + } - if (greyscale) { - dbg(`applying greyscale to image`) - img.greyscale() - } + if (greyscale) { + dbg(`applying greyscale to image`); + img.greyscale(); + } - checkCancelled(cancellationToken) + checkCancelled(cancellationToken); - // https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding#low-or-high-fidelity-image-understanding - if (detail === "low") { - dbg(`setting image detail to low`) - contain( - img, - Math.min(img.width, IMAGE_DETAIL_LOW_WIDTH), - Math.min(img.height, IMAGE_DETAIL_LOW_HEIGHT), - HorizontalAlign.CENTER | VerticalAlign.MIDDLE - ) - } else if (detail !== "original") { - dbg(`setting image detail to low`) - contain( - img, - IMAGE_DETAIL_HIGH_WIDTH, - IMAGE_DETAIL_HIGH_HEIGHT, - HorizontalAlign.CENTER | VerticalAlign.MIDDLE - ) - } - return img + // https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding#low-or-high-fidelity-image-understanding + if (detail === "low") { + dbg(`setting image detail to low`); + contain( + img, + Math.min(img.width, IMAGE_DETAIL_LOW_WIDTH), + Math.min(img.height, IMAGE_DETAIL_LOW_HEIGHT), + HorizontalAlign.CENTER | VerticalAlign.MIDDLE, + ); + } else if (detail !== "original") { + dbg(`setting image detail to low`); + contain( + img, + IMAGE_DETAIL_HIGH_WIDTH, + IMAGE_DETAIL_HIGH_HEIGHT, + HorizontalAlign.CENTER | VerticalAlign.MIDDLE, + ); + } + return img; } function contain( - img: { - width: number - height: number - contain: (arg0: { w: number; h: number; align: number }) => void - }, - width: number, - height: number, - align: number + img: { + width: number; + height: number; + contain: (arg0: { w: number; h: number; align: number }) => void; + }, + width: number, + height: number, + align: number, ) { - if (img.width > width || img.height > height) { - img.contain({ - w: Math.min(img.width, width), - h: Math.min(img.height, height), - align, - }) - } + if (img.width > width || img.height > height) { + img.contain({ + w: Math.min(img.width, width), + h: Math.min(img.height, height), + align, + }); + } } async function encode( - img: { - mime?: string - width: number - height: number - getBuffer(mime: string): Promise - }, - options: DefImagesOptions & TraceOptions + img: { + mime?: string; + width: number; + height: number; + getBuffer(mime: string): Promise; + }, + options: DefImagesOptions & TraceOptions, ) { - // Determine the output MIME type, defaulting to image/jpeg - const { detail, mime } = options || {} - const outputMime = mime || img.mime || ("image/jpeg" as any) - const buf = await img.getBuffer(outputMime) - const imageDataUri = `data:${outputMime};base64,${buf.toString("base64")}` - // Return the encoded image data URI - return { - width: img.width, - height: img.height, - type: outputMime, - url: imageDataUri, - detail, - } + // Determine the output MIME type, defaulting to image/jpeg + const { detail, mime } = options || {}; + const outputMime = mime || img.mime || ("image/jpeg" as any); + const buf = await img.getBuffer(outputMime); + const imageDataUri = `data:${outputMime};base64,${buf.toString("base64")}`; + // Return the encoded image data URI + return { + width: img.width, + height: img.height, + type: outputMime, + url: imageDataUri, + detail, + }; } /** @@ -218,14 +216,14 @@ async function encode( * @returns A Promise that resolves to the transformed image as a Buffer. */ export async function imageTransform( - url: BufferLike, - options: ImageTransformOptions & TraceOptions & CancellationOptions + url: BufferLike, + options: ImageTransformOptions & TraceOptions & CancellationOptions, ): Promise { - const { mime } = options || {} - const img = await prepare(url, { ...(options || {}), detail: "original" }) - const outputMime = mime || img.mime || ("image/jpeg" as any) - const buf = await img.getBuffer(outputMime) - return Buffer.from(buf) + const { mime } = options || {}; + const img = await prepare(url, { ...(options || {}), detail: "original" }); + const outputMime = mime || img.mime || ("image/jpeg" as any); + const buf = await img.getBuffer(outputMime); + return Buffer.from(buf); } /** @@ -236,12 +234,12 @@ export async function imageTransform( * @returns A promise that resolves to the image encoded as a data URI. */ export async function imageEncodeForLLM( - url: BufferLike, - options: DefImagesOptions & TraceOptions & CancellationOptions + url: BufferLike, + options: DefImagesOptions & TraceOptions & CancellationOptions, ) { - const img = await prepare(url, options) - if (!img) return undefined - return await encode(img, options) + const img = await prepare(url, options); + if (!img) return undefined; + return await encode(img, options); } /** @@ -261,48 +259,46 @@ export async function imageEncodeForLLM( * @returns A promise resolving to the tiled image encoded as a data URI or other specified format. */ export async function imageTileEncodeForLLM( - urls: BufferLike[], - options: DefImagesOptions & TraceOptions & CancellationOptions + urls: BufferLike[], + options: DefImagesOptions & TraceOptions & CancellationOptions, ) { - if (urls.length === 0) { - dbg(`no images provided for tiling`) - throw new Error("image: no images provided for tiling") - } + if (urls.length === 0) { + dbg(`no images provided for tiling`); + throw new Error("image: no images provided for tiling"); + } - const { cancellationToken } = options - const limit = pLimit(4) - const imgs = await Promise.all( - urls.map((url) => limit(() => prepare(url, options))) - ) - checkCancelled(cancellationToken) + const { cancellationToken } = options; + const limit = pLimit(4); + const imgs = await Promise.all(urls.map((url) => limit(() => prepare(url, options)))); + checkCancelled(cancellationToken); - logVerbose(`image: tiling ${imgs.length} images`) - const imgw = imgs.reduce((acc, img) => Math.max(acc, img.width), 0) - const imgh = imgs.reduce((acc, img) => Math.max(acc, img.height), 0) - const ncols = Math.ceil(Math.sqrt(imgs.length)) - const nrows = Math.ceil(imgs.length / ncols) - const width = ncols * imgw - const height = nrows * imgh + logVerbose(`image: tiling ${imgs.length} images`); + const imgw = imgs.reduce((acc, img) => Math.max(acc, img.width), 0); + const imgh = imgs.reduce((acc, img) => Math.max(acc, img.height), 0); + const ncols = Math.ceil(Math.sqrt(imgs.length)); + const nrows = Math.ceil(imgs.length / ncols); + const width = ncols * imgw; + const height = nrows * imgh; - const { Jimp, HorizontalAlign, VerticalAlign } = await import("jimp") - const canvas = new Jimp({ width, height }) + const { Jimp, HorizontalAlign, VerticalAlign } = await import("jimp"); + const canvas = new Jimp({ width, height }); - for (let i = 0; i < imgs.length; i++) { - const ci = Math.floor(i / nrows) - const ri = i % nrows - const x = ci * imgw - const y = ri * imgh - canvas.composite(imgs[i], x, y) - } + for (let i = 0; i < imgs.length; i++) { + const ci = Math.floor(i / nrows); + const ri = i % nrows; + const x = ci * imgw; + const y = ri * imgh; + canvas.composite(imgs[i], x, y); + } - contain( - canvas, - IMAGE_DETAIL_HIGH_WIDTH, - IMAGE_DETAIL_HIGH_HEIGHT, - HorizontalAlign.CENTER | VerticalAlign.MIDDLE - ) + contain( + canvas, + IMAGE_DETAIL_HIGH_WIDTH, + IMAGE_DETAIL_HIGH_HEIGHT, + HorizontalAlign.CENTER | VerticalAlign.MIDDLE, + ); - return await encode(canvas, { ...options, detail: undefined }) + return await encode(canvas, { ...options, detail: undefined }); } /** @@ -318,59 +314,59 @@ export async function imageTileEncodeForLLM( * @returns A string representation of the image formatted for terminal output. */ export async function renderImageToTerminal( - url: BufferLike, - options: { - columns: number - rows: number - label?: string - modelId?: string - usage?: ImageGenerationUsage - } & CancellationOptions + url: BufferLike, + options: { + columns: number; + rows: number; + label?: string; + modelId?: string; + usage?: ImageGenerationUsage; + } & CancellationOptions, ) { - assert(!!url, "image buffer") - const { columns, rows, label, usage, modelId } = options - const image = await prepare(url, { - maxWidth: Math.max(16, Math.min(126, (columns >> 1) - 2)), - maxHeight: Math.max(16, Math.min(126, rows - 4)), - }) - const { width, height } = image - const title = label ? ellipse(label, width * 2 - 2) : "" - const res: string[] = [ - wrapColor( - CONSOLE_COLOR_DEBUG, - `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}` + - title + - BOX_RIGHT.repeat(width * 2 - title.length - 1) + - `${BOX_LEFT_AND_DOWN}\n` - ), - ] - const wall = wrapColor(CONSOLE_COLOR_DEBUG, BOX_UP_AND_DOWN) - for (let y = 0; y < height; ++y) { - res.push(wall) - for (let x = 0; x < width; ++x) { - const c = image.getPixelColor(x, y) - const cc = c ? wrapRgbColor(c >> 8, " ", true) : " " - res.push(cc, cc) - } - res.push(wall, "\n") + assert(!!url, "image buffer"); + const { columns, rows, label, usage, modelId } = options; + const image = await prepare(url, { + maxWidth: Math.max(16, Math.min(126, (columns >> 1) - 2)), + maxHeight: Math.max(16, Math.min(126, rows - 4)), + }); + const { width, height } = image; + const title = label ? ellipse(label, width * 2 - 2) : ""; + const res: string[] = [ + wrapColor( + CONSOLE_COLOR_DEBUG, + `${BOX_DOWN_AND_RIGHT}${BOX_RIGHT}` + + title + + BOX_RIGHT.repeat(width * 2 - title.length - 1) + + `${BOX_LEFT_AND_DOWN}\n`, + ), + ]; + const wall = wrapColor(CONSOLE_COLOR_DEBUG, BOX_UP_AND_DOWN); + for (let y = 0; y < height; ++y) { + res.push(wall); + for (let x = 0; x < width; ++x) { + const c = image.getPixelColor(x, y); + const cc = c ? wrapRgbColor(c >> 8, " ", true) : " "; + res.push(cc, cc); } - const cost = estimateImageCost(modelId, usage) - const usageStr = usage - ? [ - `${CHAR_UP_DOWN_ARROWS}${usage.total_tokens}`, - `${CHAR_UP_ARROW}${usage.input_tokens}`, - `${CHAR_DOWN_ARROW}${usage.output_tokens}`, - prettyCost(cost), - ].join(" ") - : "" - res.push( - wrapColor( - CONSOLE_COLOR_DEBUG, - BOX_UP_AND_RIGHT + - usageStr + - BOX_RIGHT.repeat(width * 2 - usageStr.length) + - `${BOX_LEFT_AND_UP}\n` - ) - ) - return res.join("") + res.push(wall, "\n"); + } + const cost = estimateImageCost(modelId, usage); + const usageStr = usage + ? [ + `${CHAR_UP_DOWN_ARROWS}${usage.total_tokens}`, + `${CHAR_UP_ARROW}${usage.input_tokens}`, + `${CHAR_DOWN_ARROW}${usage.output_tokens}`, + prettyCost(cost), + ].join(" ") + : ""; + res.push( + wrapColor( + CONSOLE_COLOR_DEBUG, + BOX_UP_AND_RIGHT + + usageStr + + BOX_RIGHT.repeat(width * 2 - usageStr.length) + + `${BOX_LEFT_AND_UP}\n`, + ), + ); + return res.join(""); } diff --git a/packages/core/src/importprompt.ts b/packages/core/src/importprompt.ts index 3827e365eb..982f8ca59b 100644 --- a/packages/core/src/importprompt.ts +++ b/packages/core/src/importprompt.ts @@ -1,11 +1,11 @@ -import debug from "debug" -const dbg = debug("genaiscript:importprompt") +import debug from "debug"; +const dbg = debug("genaiscript:importprompt"); -import { host } from "./host" -import { logError } from "./util" -import { TraceOptions } from "./trace" -import { pathToFileURL } from "node:url" -import { mark } from "./performance" +import { host } from "./host"; +import { logError } from "./util"; +import { TraceOptions } from "./trace"; +import { pathToFileURL } from "node:url"; +import { mark } from "./performance"; /** * Dynamically imports a JavaScript module from a specified file. @@ -20,52 +20,49 @@ import { mark } from "./performance" * @throws An error if the `filename` is not provided or if the module import fails. */ export async function importFile( - filename: string, - options?: { - onImported?: (module: any) => Awaitable - logCb?: (msg: string) => void - } & TraceOptions + filename: string, + options?: { + onImported?: (module: any) => Awaitable; + logCb?: (msg: string) => void; + } & TraceOptions, ): Promise { - const { trace, onImported } = options || {} - if (!filename) { - throw new Error("filename is required") - } + const { trace, onImported } = options || {}; + if (!filename) { + throw new Error("filename is required"); + } - let unregister: () => void = undefined - try { - dbg(`resolving module path for filename: ${filename}`) - const modulePath = pathToFileURL( - host.path.isAbsolute(filename) - ? filename - : host.path.join(host.projectFolder(), filename) - ).toString() - const parentURL = - import.meta.url ?? - pathToFileURL(__filename ?? host.projectFolder()).toString() + let unregister: () => void = undefined; + try { + dbg(`resolving module path for filename: ${filename}`); + const modulePath = pathToFileURL( + host.path.isAbsolute(filename) ? filename : host.path.join(host.projectFolder(), filename), + ).toString(); + const parentURL = + import.meta.url ?? pathToFileURL(__filename ?? host.projectFolder()).toString(); - dbg(`importing module from path: ${modulePath}`) - const onImport = (file: string) => { - // trace?.itemValue("📦 import", fileURLToPath(file)) - } - onImport(modulePath) - const { tsImport, register } = await import("tsx/esm/api") - unregister = register({ onImport }) - const module = await tsImport(modulePath, { - parentURL, - //tsconfig: false, - onImport, - }) - const result = await onImported?.(module) - unregister?.() + dbg(`importing module from path: ${modulePath}`); + const onImport = (file: string) => { + // trace?.itemValue("📦 import", fileURLToPath(file)) + }; + onImport(modulePath); + const { tsImport, register } = await import("tsx/esm/api"); + unregister = register({ onImport }); + const module = await tsImport(modulePath, { + parentURL, + //tsconfig: false, + onImport, + }); + const result = await onImported?.(module); + unregister?.(); - return result - } catch (err) { - dbg("module imported failed") - unregister?.() - logError(err) - trace?.error(err) - throw err - } + return result; + } catch (err) { + dbg("module imported failed"); + unregister?.(); + logError(err); + trace?.error(err); + throw err; + } } /** @@ -81,27 +78,27 @@ export async function importFile( * @returns A promise that resolves when the function execution is complete. */ export async function importPrompt( - ctx0: PromptContext, - r: PromptScript, - options?: { - logCb?: (msg: string) => void - } & TraceOptions + ctx0: PromptContext, + r: PromptScript, + options?: { + logCb?: (msg: string) => void; + } & TraceOptions, ) { - mark("prompt.import") - const { filename } = r - dbg(`importing file: ${filename}`) - return await importFile(filename, { - ...(options || {}), - onImported: async (module) => { - const main = module.default - if (typeof main === "function") { - dbg(`found default export as function, calling`) - await main(ctx0) - } else if (r.isSystem) { - throw new Error( - "system prompt using esm JavaScript (mjs, mts) must have a default function." - ) - } - }, - }) + mark("prompt.import"); + const { filename } = r; + dbg(`importing file: ${filename}`); + return await importFile(filename, { + ...(options || {}), + onImported: async (module) => { + const main = module.default; + if (typeof main === "function") { + dbg(`found default export as function, calling`); + await main(ctx0); + } else if (r.isSystem) { + throw new Error( + "system prompt using esm JavaScript (mjs, mts) must have a default function.", + ); + } + }, + }); } diff --git a/packages/core/src/indent.test.ts b/packages/core/src/indent.test.ts index af3ccaf064..7cd8c9f506 100644 --- a/packages/core/src/indent.test.ts +++ b/packages/core/src/indent.test.ts @@ -1,50 +1,50 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { indent, dedent } from "./indent" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { indent, dedent } from "./indent"; describe("indent/dedent utils", async () => { - test("indent adds spaces to each line", () => { - const input = "line1\nline2\nline3" - const expected = " line1\n line2\n line3" - assert.equal(indent(input, " "), expected) - }) - - test("indent handles empty string", () => { - assert.equal(indent("", " "), "") - }) - - test("indent handles undefined", () => { - assert.equal(indent(undefined, " "), undefined) - }) - - test("indent handles single line", () => { - assert.equal(indent("single", " "), " single") - }) - - test("dedent removes common indentation", () => { - const input = ` + test("indent adds spaces to each line", () => { + const input = "line1\nline2\nline3"; + const expected = " line1\n line2\n line3"; + assert.equal(indent(input, " "), expected); + }); + + test("indent handles empty string", () => { + assert.equal(indent("", " "), ""); + }); + + test("indent handles undefined", () => { + assert.equal(indent(undefined, " "), undefined); + }); + + test("indent handles single line", () => { + assert.equal(indent("single", " "), " single"); + }); + + test("dedent removes common indentation", () => { + const input = ` first line second line third line - ` - const expected = "first line\nsecond line\nthird line" - assert.equal(dedent(input).trim(), expected) - }) - - test("dedent works with template literals", () => { - const value = "test" - const result = dedent` + `; + const expected = "first line\nsecond line\nthird line"; + assert.equal(dedent(input).trim(), expected); + }); + + test("dedent works with template literals", () => { + const value = "test"; + const result = dedent` Hello ${value} This is indented - ` - assert.equal(result.trim(), `Hello ${value}\nThis is indented`) - }) - - test("dedent handles undefined", () => { - assert.equal(dedent(undefined), undefined) - }) - - test("dedent handles null", () => { - assert.equal(dedent(null), null) - }) -}) + `; + assert.equal(result.trim(), `Hello ${value}\nThis is indented`); + }); + + test("dedent handles undefined", () => { + assert.equal(dedent(undefined), undefined); + }); + + test("dedent handles null", () => { + assert.equal(dedent(null), null); + }); +}); diff --git a/packages/core/src/indent.ts b/packages/core/src/indent.ts index 32c666842b..e448552777 100644 --- a/packages/core/src/indent.ts +++ b/packages/core/src/indent.ts @@ -1,4 +1,4 @@ -import tsDedent from "ts-dedent" +import tsDedent from "ts-dedent"; /** * Indents each line of a given text by a specified indentation string. @@ -8,11 +8,11 @@ import tsDedent from "ts-dedent" * @returns The indented text or the original input if it is undefined, null, or empty. */ export function indent(text: string, indentation: string) { - if (text === undefined || text === null || text === "") return text - return text - ?.split(/\r?\n/g) - .map((line) => indentation + line) - .join("\n") + if (text === undefined || text === null || text === "") return text; + return text + ?.split(/\r?\n/g) + .map((line) => indentation + line) + .join("\n"); } /** @@ -21,11 +21,8 @@ export function indent(text: string, indentation: string) { * @param templ - Template or string to unindent. * @param values - Values to interpolate into the template. */ -export function dedent( - templ: TemplateStringsArray | string, - ...values: unknown[] -): string { - if (templ === undefined) return undefined - if (templ === null) return null - return tsDedent(templ, ...values) +export function dedent(templ: TemplateStringsArray | string, ...values: unknown[]): string { + if (templ === undefined) return undefined; + if (templ === null) return null; + return tsDedent(templ, ...values); } diff --git a/packages/core/src/inflection.test.ts b/packages/core/src/inflection.test.ts index 4400e1df7f..14876872cb 100644 --- a/packages/core/src/inflection.test.ts +++ b/packages/core/src/inflection.test.ts @@ -1,69 +1,69 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { splitalize, titleize, humanize } from "./inflection" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { splitalize, titleize, humanize } from "./inflection"; describe("inflection", () => { - describe("splitalize", () => { - test("should separate camelCase words with spaces", () => { - assert.equal(splitalize("camelCase"), "camel Case") - }) + describe("splitalize", () => { + test("should separate camelCase words with spaces", () => { + assert.equal(splitalize("camelCase"), "camel Case"); + }); - test("should separate PascalCase words with spaces", () => { - assert.equal(splitalize("PascalCase"), "Pascal Case") - }) + test("should separate PascalCase words with spaces", () => { + assert.equal(splitalize("PascalCase"), "Pascal Case"); + }); - test("should handle multiple camelCase words", () => { - assert.equal(splitalize("thisIsCamelCase"), "this Is Camel Case") - }) + test("should handle multiple camelCase words", () => { + assert.equal(splitalize("thisIsCamelCase"), "this Is Camel Case"); + }); - test("should not modify text without case transitions", () => { - assert.equal(splitalize("lowercase"), "lowercase") - assert.equal(splitalize("UPPERCASE"), "UPPERCASE") - }) + test("should not modify text without case transitions", () => { + assert.equal(splitalize("lowercase"), "lowercase"); + assert.equal(splitalize("UPPERCASE"), "UPPERCASE"); + }); - test("should handle null or undefined input", () => { - assert.equal(splitalize(undefined as unknown as string), undefined) - assert.equal(splitalize(null as unknown as string), null) - }) - }) + test("should handle null or undefined input", () => { + assert.equal(splitalize(undefined as unknown as string), undefined); + assert.equal(splitalize(null as unknown as string), null); + }); + }); - describe("titleize", () => { - test("should capitalize each word and separate camelCase", () => { - assert.equal(titleize("camelCase"), "Camel Case") - }) + describe("titleize", () => { + test("should capitalize each word and separate camelCase", () => { + assert.equal(titleize("camelCase"), "Camel Case"); + }); - test("should capitalize each word in a sentence", () => { - assert.equal(titleize("this is a test"), "This Is a Test") - }) + test("should capitalize each word in a sentence", () => { + assert.equal(titleize("this is a test"), "This Is a Test"); + }); - test("should handle PascalCase", () => { - assert.equal(titleize("PascalCaseTest"), "Pascal Case Test") - }) + test("should handle PascalCase", () => { + assert.equal(titleize("PascalCaseTest"), "Pascal Case Test"); + }); - test("should handle empty, null or undefined input", () => { - assert.equal(titleize(""), "") - assert.equal(titleize(null as unknown as string), null) - assert.equal(titleize(undefined as unknown as string), undefined) - }) - }) + test("should handle empty, null or undefined input", () => { + assert.equal(titleize(""), ""); + assert.equal(titleize(null as unknown as string), null); + assert.equal(titleize(undefined as unknown as string), undefined); + }); + }); - describe("humanize", () => { - test("should make text more human-readable by separating camelCase", () => { - assert.equal(humanize("camelCase"), "Camel case") - }) + describe("humanize", () => { + test("should make text more human-readable by separating camelCase", () => { + assert.equal(humanize("camelCase"), "Camel case"); + }); - test("should capitalize the first word only", () => { - assert.equal(humanize("this is a test"), "This is a test") - }) + test("should capitalize the first word only", () => { + assert.equal(humanize("this is a test"), "This is a test"); + }); - test("should handle PascalCase", () => { - assert.equal(humanize("PascalCaseTest"), "Pascal case test") - }) + test("should handle PascalCase", () => { + assert.equal(humanize("PascalCaseTest"), "Pascal case test"); + }); - test("should handle empty, null or undefined input", () => { - assert.equal(humanize(""), "") - assert.equal(humanize(null as unknown as string), null) - assert.equal(humanize(undefined as unknown as string), undefined) - }) - }) -}) + test("should handle empty, null or undefined input", () => { + assert.equal(humanize(""), ""); + assert.equal(humanize(null as unknown as string), null); + assert.equal(humanize(undefined as unknown as string), undefined); + }); + }); +}); diff --git a/packages/core/src/inflection.ts b/packages/core/src/inflection.ts index 6a02b2882e..cfc9c0f232 100644 --- a/packages/core/src/inflection.ts +++ b/packages/core/src/inflection.ts @@ -1,4 +1,4 @@ -import { titleize as _titlelize, humanize as _humanize } from "inflection" +import { titleize as _titlelize, humanize as _humanize } from "inflection"; /** * Splits camelCase or PascalCase text into separate words by inserting a space @@ -8,8 +8,8 @@ import { titleize as _titlelize, humanize as _humanize } from "inflection" * @returns The modified string with spaces added between camelCase or PascalCase boundaries, or the original value if empty. */ export function splitalize(text: string) { - if (!text) return text - return text?.replace(/([a-z])([A-Z])/g, "$1 $2") + if (!text) return text; + return text?.replace(/([a-z])([A-Z])/g, "$1 $2"); } /** @@ -22,8 +22,8 @@ export function splitalize(text: string) { * @returns The titleized version of the input string. */ export function titleize(text: string) { - if (!text) return text - return _titlelize(splitalize(text)) + if (!text) return text; + return _titlelize(splitalize(text)); } /** @@ -34,6 +34,6 @@ export function titleize(text: string) { * @returns The humanized version of the input text. */ export function humanize(text: string) { - if (!text) return text - return _humanize(splitalize(text)) + if (!text) return text; + return _humanize(splitalize(text)); } diff --git a/packages/core/src/ini.test.ts b/packages/core/src/ini.test.ts index c64685187f..bcf262f578 100644 --- a/packages/core/src/ini.test.ts +++ b/packages/core/src/ini.test.ts @@ -1,26 +1,26 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { INIParse, INIStringify } from "./ini" -import { dedent } from "./indent" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { INIParse, INIStringify } from "./ini"; +import { dedent } from "./indent"; describe("ini", () => { - test("rountrip", () => { - const o = { a: "1", b: "foo" } - const text = INIStringify(o) - const r = INIParse(text) + test("rountrip", () => { + const o = { a: "1", b: "foo" }; + const text = INIStringify(o); + const r = INIParse(text); - assert.equal(JSON.stringify(r), JSON.stringify(o)) - }) - test("fenced", () => { - const o = { a: "1", b: "foo" } - const text = dedent` + assert.equal(JSON.stringify(r), JSON.stringify(o)); + }); + test("fenced", () => { + const o = { a: "1", b: "foo" }; + const text = dedent` \`\`\`ini ${INIStringify(o)} \`\`\` - ` - console.log(text) - const r = INIParse(text) + `; + console.log(text); + const r = INIParse(text); - assert.equal(JSON.stringify(r), JSON.stringify(o)) - }) -}) + assert.equal(JSON.stringify(r), JSON.stringify(o)); + }); +}); diff --git a/packages/core/src/ini.ts b/packages/core/src/ini.ts index e1d20628f8..66c3751a8a 100644 --- a/packages/core/src/ini.ts +++ b/packages/core/src/ini.ts @@ -2,14 +2,14 @@ // with error handling and utility support for cleaning up the input content. // Import the parse and stringify functions from the "ini" library -import { parse, stringify } from "ini" +import { parse, stringify } from "ini"; // Import a utility function to log errors -import { logError } from "./util" +import { logError } from "./util"; // Import a custom function to clean up INI content by removing any fencing -import { unfence } from "./unwrappers" -import { filenameOrFileToContent } from "./unwrappers" +import { unfence } from "./unwrappers"; +import { filenameOrFileToContent } from "./unwrappers"; /** * Parses an INI formatted string after cleaning it by removing fencing and resolving file content. @@ -18,9 +18,9 @@ import { filenameOrFileToContent } from "./unwrappers" * @returns Parsed object */ export function INIParse(text: string) { - text = filenameOrFileToContent(text) - const cleaned = unfence(text, "ini") // Remove any fencing from the text - return parse(cleaned) // Parse the cleaned text into an object + text = filenameOrFileToContent(text); + const cleaned = unfence(text, "ini"); // Remove any fencing from the text + return parse(cleaned); // Parse the cleaned text into an object } /** @@ -31,12 +31,12 @@ export function INIParse(text: string) { * @returns The parsed object or the default value */ export function INITryParse(text: string, defaultValue?: any) { - try { - return INIParse(text) // Attempt to parse the text - } catch (e) { - logError(e) // Log any parsing errors - return defaultValue // Return the default value if parsing fails - } + try { + return INIParse(text); // Attempt to parse the text + } catch (e) { + logError(e); // Log any parsing errors + return defaultValue; // Return the default value if parsing fails + } } /** @@ -46,5 +46,5 @@ export function INITryParse(text: string, defaultValue?: any) { * @returns The INI formatted string */ export function INIStringify(o: any) { - return stringify(o) // Convert the object to an INI formatted string + return stringify(o); // Convert the object to an INI formatted string } diff --git a/packages/core/src/jinja.test.ts b/packages/core/src/jinja.test.ts index e4d5bf043d..c0cd076600 100644 --- a/packages/core/src/jinja.test.ts +++ b/packages/core/src/jinja.test.ts @@ -1,18 +1,18 @@ -import { jinjaRender } from "./jinja" -import { describe, test, beforeEach } from "node:test" -import assert from "node:assert/strict" +import { jinjaRender } from "./jinja"; +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; describe("jinjaRender", () => { - test("should correctly render template with values", () => { - // Given a template and values - const template = "Hello, {{ name }}! Today is {{ day }}." - const values = { name: "Alice", day: "Monday" } + test("should correctly render template with values", () => { + // Given a template and values + const template = "Hello, {{ name }}! Today is {{ day }}."; + const values = { name: "Alice", day: "Monday" }; - // When rendering the template - const result = jinjaRender(template, values) + // When rendering the template + const result = jinjaRender(template, values); - // Then the result should be as expected - const expected = "Hello, Alice! Today is Monday." - assert.strictEqual(result, expected) - }) -}) + // Then the result should be as expected + const expected = "Hello, Alice! Today is Monday."; + assert.strictEqual(result, expected); + }); +}); diff --git a/packages/core/src/jinja.ts b/packages/core/src/jinja.ts index 66f4b800c0..9e821e354c 100644 --- a/packages/core/src/jinja.ts +++ b/packages/core/src/jinja.ts @@ -1,7 +1,7 @@ // Import the Template class from the @huggingface/jinja package -import { Template } from "@huggingface/jinja" -import { ChatCompletionMessageParam } from "./chattypes" -import { collapseEmptyLines } from "./util" +import { Template } from "@huggingface/jinja"; +import { ChatCompletionMessageParam } from "./chattypes"; +import { collapseEmptyLines } from "./util"; /** * Renders a string template using the Jinja templating engine. @@ -14,18 +14,15 @@ import { collapseEmptyLines } from "./util" * @param values - An object with key-value pairs to replace in the template. * @returns The rendered string with values substituted. */ -export function jinjaRender( - template: string, - values: Record -): string { - // Create a new Template instance with the provided template string - const t = new Template(template) +export function jinjaRender(template: string, values: Record): string { + // Create a new Template instance with the provided template string + const t = new Template(template); - // Render the template using the provided values - const res = t.render(values) + // Render the template using the provided values + const res = t.render(values); - // Return the rendered string - return collapseEmptyLines(res) + // Return the rendered string + return collapseEmptyLines(res); } /** @@ -44,20 +41,15 @@ export function jinjaRender( * @returns The rendered string with the placeholders substituted using the * provided arguments. */ -export function jinjaRenderChatMessage( - msg: ChatCompletionMessageParam, - args: Record -) { - const { content } = msg - let template: string[] = [] - if (typeof content === "string") template.push(content) - else - for (const part of content) { - if (part.type === "text") template.push(part.text) - else if (part.type === "image_url") - template.push(`![](${part.image_url})`) - else if (part.type === "refusal") - template.push(`refusal: ${part.refusal}`) - } - return jinjaRender(template.join("\n"), args) +export function jinjaRenderChatMessage(msg: ChatCompletionMessageParam, args: Record) { + const { content } = msg; + let template: string[] = []; + if (typeof content === "string") template.push(content); + else + for (const part of content) { + if (part.type === "text") template.push(part.text); + else if (part.type === "image_url") template.push(`![](${part.image_url})`); + else if (part.type === "refusal") template.push(`refusal: ${part.refusal}`); + } + return jinjaRender(template.join("\n"), args); } diff --git a/packages/core/src/json5.test.ts b/packages/core/src/json5.test.ts index 19a0e32f50..0ece831ce0 100644 --- a/packages/core/src/json5.test.ts +++ b/packages/core/src/json5.test.ts @@ -1,69 +1,64 @@ -import { - isJSONObjectOrArray, - JSONrepair, - JSON5parse, - JSON5TryParse, -} from "./json5" -import { describe, test } from "node:test" -import assert from "node:assert/strict" +import { isJSONObjectOrArray, JSONrepair, JSON5parse, JSON5TryParse } from "./json5"; +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; describe("json5.ts", () => { - test("isJSONObjectOrArray should identify JSON objects or arrays", () => { - assert.strictEqual(isJSONObjectOrArray('{ "key": "value" }'), true) - assert.strictEqual(isJSONObjectOrArray("[1, 2, 3]"), true) - assert.strictEqual(isJSONObjectOrArray(' { "key": "value" }'), true) - assert.strictEqual(isJSONObjectOrArray("non-json-content"), false) - }) + test("isJSONObjectOrArray should identify JSON objects or arrays", () => { + assert.strictEqual(isJSONObjectOrArray('{ "key": "value" }'), true); + assert.strictEqual(isJSONObjectOrArray("[1, 2, 3]"), true); + assert.strictEqual(isJSONObjectOrArray(' { "key": "value" }'), true); + assert.strictEqual(isJSONObjectOrArray("non-json-content"), false); + }); - test("JSONrepair should repair broken JSON strings", () => { - const brokenJSON = '{"key": "value",}' - const repaired = JSONrepair(brokenJSON) - assert.strictEqual(repaired, '{"key": "value"}') - }) + test("JSONrepair should repair broken JSON strings", () => { + const brokenJSON = '{"key": "value",}'; + const repaired = JSONrepair(brokenJSON); + assert.strictEqual(repaired, '{"key": "value"}'); + }); - test("JSON5parse should parse valid JSON5 strings", () => { - const json5 = '{ key: "value" }' - const parsed = JSON5parse(json5) - assert.deepStrictEqual(parsed, { key: "value" }) - }) + test("JSON5parse should parse valid JSON5 strings", () => { + const json5 = '{ key: "value" }'; + const parsed = JSON5parse(json5); + assert.deepStrictEqual(parsed, { key: "value" }); + }); - test("JSON5parse with repair option should repair and parse invalid JSON5 strings", () => { - const brokenJSON5 = '{ key: "value", }' - const parsed = JSON5parse(brokenJSON5, { repair: true }) - assert.deepStrictEqual(parsed, { key: "value" }) - }) + test("JSON5parse with repair option should repair and parse invalid JSON5 strings", () => { + const brokenJSON5 = '{ key: "value", }'; + const parsed = JSON5parse(brokenJSON5, { repair: true }); + assert.deepStrictEqual(parsed, { key: "value" }); + }); - test("JSON5parse with errorAsDefaultValue should return default value on error", () => { - const brokenJSON5 = '{ key: "value }' - const defaultValue = { key: "default" } - const parsed = JSON5parse(brokenJSON5, { - errorAsDefaultValue: true, - defaultValue, - }) - assert.deepStrictEqual(parsed, defaultValue) - }) + test("JSON5parse with errorAsDefaultValue should return default value on error", () => { + const brokenJSON5 = '{ key: "value }'; + const defaultValue = { key: "default" }; + const parsed = JSON5parse(brokenJSON5, { + errorAsDefaultValue: true, + defaultValue, + }); + assert.deepStrictEqual(parsed, defaultValue); + }); - test("JSON5parse should throw error on invalid JSON5 without options", () => { - const brokenJSON5 = '{ key: "value }' - assert.throws(() => { - JSON5parse(brokenJSON5) - }) - }) + test("JSON5parse should throw error on invalid JSON5 without options", () => { + const brokenJSON5 = '{ key: "value }'; + assert.throws(() => { + JSON5parse(brokenJSON5); + }); + }); - test("JSON5TryParse should handle undefined and null values", () => { - assert.strictEqual(JSON5TryParse(undefined), undefined) - assert.strictEqual(JSON5TryParse(null), null) - }) + test("JSON5TryParse should handle undefined and null values", () => { + assert.strictEqual(JSON5TryParse(undefined), undefined); + assert.strictEqual(JSON5TryParse(null), null); + }); - test("JSON5TryParse should parse valid JSON5 strings", () => { - const json5 = '{ key: "value" }' - const parsed = JSON5TryParse(json5) - assert.deepStrictEqual(parsed, { key: "value" }) - }) + test("JSON5TryParse should parse valid JSON5 strings", () => { + const json5 = '{ key: "value" }'; + const parsed = JSON5TryParse(json5); + assert.deepStrictEqual(parsed, { key: "value" }); + }); - test("JSON5TryParse should repair strings", () => { - const brokenJSON5 = '{ key: "value' - const parsed = JSON5TryParse(brokenJSON5) - assert.deepStrictEqual(parsed, { key: "value" }) - }) -}) + test("JSON5TryParse should repair strings", () => { + const brokenJSON5 = '{ key: "value'; + const parsed = JSON5TryParse(brokenJSON5); + assert.deepStrictEqual(parsed, { key: "value" }); + }); +}); diff --git a/packages/core/src/json5.ts b/packages/core/src/json5.ts index 06aad22145..73fb543812 100644 --- a/packages/core/src/json5.ts +++ b/packages/core/src/json5.ts @@ -8,14 +8,14 @@ */ // Importing parse and stringify functions from the json5 library. -import { parse, stringify } from "json5" +import { parse, stringify } from "json5"; // Importing jsonrepair function for fixing broken JSON strings. -import { jsonrepair } from "jsonrepair" +import { jsonrepair } from "jsonrepair"; // Importing unfence function to handle fenced code blocks. -import { unfence } from "./unwrappers" -import { unthink } from "./think" +import { unfence } from "./unwrappers"; +import { unthink } from "./think"; /** * Checks if the input text starts with '{' or '[', indicating a JSON object or array. @@ -24,8 +24,8 @@ import { unthink } from "./think" * @returns True if the string starts with '{' or '[', false otherwise. */ export function isJSONObjectOrArray(text: string) { - // Tests if the input string starts with '{' or '[' after removing any leading whitespace. - return /^\s*[\{\[]/.test(text) + // Tests if the input string starts with '{' or '[' after removing any leading whitespace. + return /^\s*[\{\[]/.test(text); } /** @@ -34,11 +34,11 @@ export function isJSONObjectOrArray(text: string) { * @returns The parsed object or undefined if parsing fails. */ export function JSONTryParse(text: string) { - try { - return JSON.parse(text) - } catch (e) { - return undefined - } + try { + return JSON.parse(text); + } catch (e) { + return undefined; + } } /** @@ -47,9 +47,9 @@ export function JSONTryParse(text: string) { * @returns The repaired JSON string. */ export function JSONrepair(text: string) { - // Uses jsonrepair to fix any issues in the JSON string. - const repaired = jsonrepair(text) - return repaired + // Uses jsonrepair to fix any issues in the JSON string. + const repaired = jsonrepair(text); + return repaired; } /** @@ -63,37 +63,37 @@ export function JSONrepair(text: string) { * @returns The parsed object, the default value, or undefined/null based on options. */ export function JSON5parse( - text: string, - options?: { - defaultValue?: T - errorAsDefaultValue?: boolean - repair?: boolean - } + text: string, + options?: { + defaultValue?: T; + errorAsDefaultValue?: boolean; + repair?: boolean; + }, ): T | undefined | null { - try { - // Remove fencing if present. - text = unfence(text, "json") - if (options?.repair) { - try { - // Attempt parsing without repairing first. - const res = parse(text) - return res as T - } catch { - // Repair and parse if initial parsing fails. - const repaired = JSONrepair(text) - const res = parse(repaired) - return (res as T) ?? options?.defaultValue - } - } else { - // Parse without repair if repair option is false. - const res = parse(text) - return res as T - } - } catch (e) { - // Return default value if error occurs and errorAsDefaultValue is true. - if (options?.errorAsDefaultValue) return options?.defaultValue - throw e + try { + // Remove fencing if present. + text = unfence(text, "json"); + if (options?.repair) { + try { + // Attempt parsing without repairing first. + const res = parse(text); + return res as T; + } catch { + // Repair and parse if initial parsing fails. + const repaired = JSONrepair(text); + const res = parse(repaired); + return (res as T) ?? options?.defaultValue; + } + } else { + // Parse without repair if repair option is false. + const res = parse(text); + return res as T; } + } catch (e) { + // Return default value if error occurs and errorAsDefaultValue is true. + if (options?.errorAsDefaultValue) return options?.defaultValue; + throw e; + } } /** @@ -110,17 +110,17 @@ export function JSON5parse( * @returns The parsed object, default value, or null/undefined based on input. */ export function JSON5TryParse( - text: string | undefined | null, - defaultValue?: T + text: string | undefined | null, + defaultValue?: T, ): T | undefined | null { - if (text === undefined) return undefined - if (text === null) return null - // Uses JSON5parse with repair option and errorAsDefaultValue set to true. - return JSON5parse(text, { - defaultValue, - errorAsDefaultValue: true, - repair: true, - }) + if (text === undefined) return undefined; + if (text === null) return null; + // Uses JSON5parse with repair option and errorAsDefaultValue set to true. + return JSON5parse(text, { + defaultValue, + errorAsDefaultValue: true, + repair: true, + }); } /** @@ -131,12 +131,12 @@ export function JSON5TryParse( * @returns The parsed object, the original input, or an empty object if input is empty. */ export function JSONLLMTryParse(s: string): any { - if (s === undefined || s === null) return s - if (s === "") return {} - // Removes any fencing and then tries to parse the string. - const cleaned = unfence(unthink(s), "json") - return JSON5TryParse(cleaned) + if (s === undefined || s === null) return s; + if (s === "") return {}; + // Removes any fencing and then tries to parse the string. + const cleaned = unfence(unthink(s), "json"); + return JSON5TryParse(cleaned); } // Export the JSON5 stringify function directly for convenience. -export const JSON5Stringify = stringify +export const JSON5Stringify = stringify; diff --git a/packages/core/src/jsonl.test.ts b/packages/core/src/jsonl.test.ts index 32ba3a1b81..5c8b5d6332 100644 --- a/packages/core/src/jsonl.test.ts +++ b/packages/core/src/jsonl.test.ts @@ -1,50 +1,50 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { isJSONLFilename, JSONLTryParse, JSONLStringify } from "./jsonl" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { isJSONLFilename, JSONLTryParse, JSONLStringify } from "./jsonl"; describe("JSONL utils", async () => { - test("isJSONLFilename identifies JSONL files", () => { - assert.equal(isJSONLFilename("file.jsonl"), true) - assert.equal(isJSONLFilename("file.mdjson"), true) - assert.equal(isJSONLFilename("file.ldjson"), true) - assert.equal(isJSONLFilename("file.JSONL"), true) - assert.equal(isJSONLFilename("file.txt"), false) - assert.equal(isJSONLFilename("file.json"), false) - }) + test("isJSONLFilename identifies JSONL files", () => { + assert.equal(isJSONLFilename("file.jsonl"), true); + assert.equal(isJSONLFilename("file.mdjson"), true); + assert.equal(isJSONLFilename("file.ldjson"), true); + assert.equal(isJSONLFilename("file.JSONL"), true); + assert.equal(isJSONLFilename("file.txt"), false); + assert.equal(isJSONLFilename("file.json"), false); + }); - test("JSONLTryParse parses valid JSONL", () => { - const input = '{"a":1}\n{"b":2}\n{"c":3}' - const expected = [{ a: 1 }, { b: 2 }, { c: 3 }] - assert.deepEqual(JSONLTryParse(input), expected) - }) + test("JSONLTryParse parses valid JSONL", () => { + const input = '{"a":1}\n{"b":2}\n{"c":3}'; + const expected = [{ a: 1 }, { b: 2 }, { c: 3 }]; + assert.deepEqual(JSONLTryParse(input), expected); + }); - test("JSONLTryParse handles empty input", () => { - assert.deepEqual(JSONLTryParse(""), []) - assert.deepEqual(JSONLTryParse(null), []) - assert.deepEqual(JSONLTryParse(undefined), []) - }) + test("JSONLTryParse handles empty input", () => { + assert.deepEqual(JSONLTryParse(""), []); + assert.deepEqual(JSONLTryParse(null), []); + assert.deepEqual(JSONLTryParse(undefined), []); + }); - test("JSONLTryParse skips invalid lines", () => { - const input = '{"a":1}\nin ; "valid\n{"c":3}' - const expected = [{ a: 1 }, { c: 3 }] - assert.deepEqual(JSONLTryParse(input), expected) - }) + test("JSONLTryParse skips invalid lines", () => { + const input = '{"a":1}\nin ; "valid\n{"c":3}'; + const expected = [{ a: 1 }, { c: 3 }]; + assert.deepEqual(JSONLTryParse(input), expected); + }); - test("JSONLStringify converts objects to JSONL", () => { - const input = [{ a: 1 }, { b: 2 }, { c: 3 }] - const expected = '{"a":1}\n{"b":2}\n{"c":3}\n' - assert.equal(JSONLStringify(input), expected) - }) + test("JSONLStringify converts objects to JSONL", () => { + const input = [{ a: 1 }, { b: 2 }, { c: 3 }]; + const expected = '{"a":1}\n{"b":2}\n{"c":3}\n'; + assert.equal(JSONLStringify(input), expected); + }); - test("JSONLStringify handles empty input", () => { - assert.equal(JSONLStringify([]), "") - assert.equal(JSONLStringify(null), "") - assert.equal(JSONLStringify(undefined), "") - }) + test("JSONLStringify handles empty input", () => { + assert.equal(JSONLStringify([]), ""); + assert.equal(JSONLStringify(null), ""); + assert.equal(JSONLStringify(undefined), ""); + }); - test("JSONLStringify skips null/undefined entries", () => { - const input = [{ a: 1 }, null, { c: 3 }, undefined] - const expected = '{"a":1}\n{"c":3}\n' - assert.equal(JSONLStringify(input), expected) - }) -}) + test("JSONLStringify skips null/undefined entries", () => { + const input = [{ a: 1 }, null, { c: 3 }, undefined]; + const expected = '{"a":1}\n{"c":3}\n'; + assert.equal(JSONLStringify(input), expected); + }); +}); diff --git a/packages/core/src/jsonl.ts b/packages/core/src/jsonl.ts index 38cee110ce..6a65076fa4 100644 --- a/packages/core/src/jsonl.ts +++ b/packages/core/src/jsonl.ts @@ -1,12 +1,12 @@ -import { host } from "./host" -import { JSON5TryParse } from "./json5" -import { concatBuffers, logVerbose, logWarn } from "./util" +import { host } from "./host"; +import { JSON5TryParse } from "./json5"; +import { concatBuffers, logVerbose, logWarn } from "./util"; function tryReadFile(fn: string) { - return host.readFile(fn).then( - (r) => r, - (_) => null - ) + return host.readFile(fn).then( + (r) => r, + (_) => null, + ); } /** @@ -16,7 +16,7 @@ function tryReadFile(fn: string) { * @returns True if the filename ends with .jsonl, .mdjson, or .ldjson (case-insensitive), otherwise false. */ export function isJSONLFilename(fn: string) { - return /\.(jsonl|mdjson|ldjson)$/i.test(fn) + return /\.(jsonl|mdjson|ldjson)$/i.test(fn); } /** @@ -29,19 +29,19 @@ export function isJSONLFilename(fn: string) { * @returns An array of parsed objects. Lines that fail parsing or are empty are skipped. */ export function JSONLTryParse( - text: string, - options?: { - repair?: boolean - } + text: string, + options?: { + repair?: boolean; + }, ): any[] { - if (!text) return [] - const res: any[] = [] - const lines = text.split("\n") - for (const line of lines.filter((l) => !!l.trim())) { - const obj = JSON5TryParse(line, options) - if (obj !== undefined && obj !== null) res.push(obj) - } - return res + if (!text) return []; + const res: any[] = []; + const lines = text.split("\n"); + for (const line of lines.filter((l) => !!l.trim())) { + const obj = JSON5TryParse(line, options); + if (obj !== undefined && obj !== null) res.push(obj); + } + return res; } /** @@ -51,28 +51,28 @@ export function JSONLTryParse( * @returns A string where each object in the array is serialized as a JSON string and separated by newlines. Returns an empty string if the input array is empty or null. */ export function JSONLStringify(objs: any[]) { - if (!objs?.length) return "" - const acc: string[] = [] - for (const o of objs.filter((o) => o !== undefined && o !== null)) { - const s = JSON.stringify(o) - acc.push(s) - } - return acc.join("\n") + "\n" + if (!objs?.length) return ""; + const acc: string[] = []; + for (const o of objs.filter((o) => o !== undefined && o !== null)) { + const s = JSON.stringify(o); + acc.push(s); + } + return acc.join("\n") + "\n"; } function serialize(objs: any[]) { - const acc = JSONLStringify(objs) - const buf = host.createUTF8Encoder().encode(acc) - return buf + const acc = JSONLStringify(objs); + const buf = host.createUTF8Encoder().encode(acc); + return buf; } async function writeJSONLCore(fn: string, objs: any[], append: boolean) { - let buf = serialize(objs) - if (append) { - const curr = await tryReadFile(fn) - if (curr) buf = concatBuffers(curr, buf) - } - await host.writeFile(fn, buf) + let buf = serialize(objs); + if (append) { + const curr = await tryReadFile(fn); + if (curr) buf = concatBuffers(curr, buf); + } + await host.writeFile(fn, buf); } /** @@ -82,7 +82,7 @@ async function writeJSONLCore(fn: string, objs: any[], append: boolean) { * @param objs - An array of objects to serialize and write to the file. */ export async function writeJSONL(fn: string, objs: any[]) { - await writeJSONLCore(fn, objs, false) + await writeJSONLCore(fn, objs, false); } /** @@ -93,11 +93,11 @@ export async function writeJSONL(fn: string, objs: any[]) { * @param meta - Optional metadata to include in each appended object under the `__meta` key. */ export async function appendJSONL(name: string, objs: T[], meta?: any) { - if (meta) - await writeJSONLCore( - name, - objs.map((obj) => ({ ...obj, __meta: meta })), - true - ) - else await writeJSONLCore(name, objs, true) + if (meta) + await writeJSONLCore( + name, + objs.map((obj) => ({ ...obj, __meta: meta })), + true, + ); + else await writeJSONLCore(name, objs, true); } diff --git a/packages/core/src/jsonlinecache.ts b/packages/core/src/jsonlinecache.ts index 0c6daa71d9..9479884fbe 100644 --- a/packages/core/src/jsonlinecache.ts +++ b/packages/core/src/jsonlinecache.ts @@ -1,10 +1,10 @@ // Import necessary modules and types -import { appendJSONL, JSONLTryParse, writeJSONL } from "./jsonl" -import { host } from "./host" -import { tryReadText } from "./fs" -import { dotGenaiscriptPath } from "./workdir" -import { CacheEntry } from "./cache" -import { MemoryCache } from "./memcache" +import { appendJSONL, JSONLTryParse, writeJSONL } from "./jsonl"; +import { host } from "./host"; +import { tryReadText } from "./fs"; +import { dotGenaiscriptPath } from "./workdir"; +import { CacheEntry } from "./cache"; +import { MemoryCache } from "./memcache"; /** * A cache class that manages entries stored in JSONL format. @@ -13,56 +13,56 @@ import { MemoryCache } from "./memcache" * @template V - Type of the value */ export class JSONLineCache extends MemoryCache { - // Constructor is private to enforce the use of byName factory method - constructor(public readonly name: string) { - super(name) // Initialize EventTarget - } + // Constructor is private to enforce the use of byName factory method + constructor(public readonly name: string) { + super(name); // Initialize EventTarget + } - // Get the folder path for the cache storage - private folder() { - return dotGenaiscriptPath("cache", this.name) - } + // Get the folder path for the cache storage + private folder() { + return dotGenaiscriptPath("cache", this.name); + } - // Get the full path to the cache file - private path() { - return host.resolvePath(this.folder(), "db.jsonl") - } + // Get the full path to the cache file + private path() { + return host.resolvePath(this.folder(), "db.jsonl"); + } - private _initializePromise: Promise - /** - * Initialize the cache by loading entries from the file. - * Identifies duplicate entries and rewrites the file if necessary. - */ - override async initialize() { - if (this._entries) return - if (this._initializePromise) return await this._initializePromise + private _initializePromise: Promise; + /** + * Initialize the cache by loading entries from the file. + * Identifies duplicate entries and rewrites the file if necessary. + */ + override async initialize() { + if (this._entries) return; + if (this._initializePromise) return await this._initializePromise; - this._initializePromise = (async () => { - await host.createDirectory(this.folder()) // Ensure directory exists - const content = await tryReadText(this.path()) - const entries: Record> = {} - const objs: CacheEntry[] = (await JSONLTryParse(content)) ?? [] - let numdup = 0 // Counter for duplicates - for (const obj of objs) { - if (entries[obj.sha]) numdup++ // Count duplicates - entries[obj.sha] = obj - } - if (2 * numdup > objs.length) { - // Rewrite file if too many duplicates - await writeJSONL( - this.path(), - objs.filter((o) => entries[o.sha] === o) // Preserve order - ) - } - // success - super.initialize() - this._entries = entries - this._initializePromise = undefined - })() - return this._initializePromise - } + this._initializePromise = (async () => { + await host.createDirectory(this.folder()); // Ensure directory exists + const content = await tryReadText(this.path()); + const entries: Record> = {}; + const objs: CacheEntry[] = (await JSONLTryParse(content)) ?? []; + let numdup = 0; // Counter for duplicates + for (const obj of objs) { + if (entries[obj.sha]) numdup++; // Count duplicates + entries[obj.sha] = obj; + } + if (2 * numdup > objs.length) { + // Rewrite file if too many duplicates + await writeJSONL( + this.path(), + objs.filter((o) => entries[o.sha] === o), // Preserve order + ); + } + // success + super.initialize(); + this._entries = entries; + this._initializePromise = undefined; + })(); + return this._initializePromise; + } - override async appendEntry(ent: CacheEntry) { - await appendJSONL(this.path(), [ent]) // Append to file - } + override async appendEntry(ent: CacheEntry) { + await appendJSONL(this.path(), [ent]); // Append to file + } } diff --git a/packages/core/src/liner.test.ts b/packages/core/src/liner.test.ts index f35cf0cb36..3955b44d9b 100644 --- a/packages/core/src/liner.test.ts +++ b/packages/core/src/liner.test.ts @@ -1,10 +1,10 @@ -import test, { describe } from "node:test" -import assert from "node:assert" -import { llmifyDiff } from "./llmdiff" +import test, { describe } from "node:test"; +import assert from "node:assert"; +import { llmifyDiff } from "./llmdiff"; describe("liner", function () { - test("diff test 1", function () { - const diff = `diff --git a/packages/core/src/liner.diff.txt b/packages/core/src/liner.diff.txt + test("diff test 1", function () { + const diff = `diff --git a/packages/core/src/liner.diff.txt b/packages/core/src/liner.diff.txt index 8cf2f17f..c3cfa4ae 100644 --- a/packages/core/src/liner.diff.txt +++ b/packages/core/src/liner.diff.txt @@ -12,20 +12,20 @@ index 8cf2f17f..c3cfa4ae 100644 line 1 -line 2 +new line 2 - line 3` - const expected = `--- packages/core/src/liner.diff.txt + line 3`; + const expected = `--- packages/core/src/liner.diff.txt +++ packages/core/src/liner.diff.txt @@ -1,3 +1,3 @@ [1] line 1 -line 2 [2] +new line 2 [3] line 3 -` - assertDiff(diff, expected) - }) +`; + assertDiff(diff, expected); + }); - test("diff test 2", function () { - const diff = `diff --git a/packages/core/src/liner.diff.txt b/packages/core/src/liner.diff.txt + test("diff test 2", function () { + const diff = `diff --git a/packages/core/src/liner.diff.txt b/packages/core/src/liner.diff.txt index 8cf2f17f..e17283d9 100644 --- a/packages/core/src/liner.diff.txt +++ b/packages/core/src/liner.diff.txt @@ -35,8 +35,8 @@ index 8cf2f17f..e17283d9 100644 -line 3 +new line 2 +new line 3 -+line 3` - const expected = `--- packages/core/src/liner.diff.txt ++line 3`; + const expected = `--- packages/core/src/liner.diff.txt +++ packages/core/src/liner.diff.txt @@ -1,3 +1,4 @@ [1] line 1 @@ -45,12 +45,12 @@ index 8cf2f17f..e17283d9 100644 [2] +new line 2 [3] +new line 3 [4] +line 3 -` - assertDiff(diff, expected) - }) +`; + assertDiff(diff, expected); + }); - test("diff test 3", function () { - const diff = `diff --git a/packages/core/src/liner.diff.txt b/packages/core/src/liner.diff.txt + test("diff test 3", function () { + const diff = `diff --git a/packages/core/src/liner.diff.txt b/packages/core/src/liner.diff.txt index 8cf2f17f..519f67a6 100644 --- a/packages/core/src/liner.diff.txt +++ b/packages/core/src/liner.diff.txt @@ -60,8 +60,8 @@ index 8cf2f17f..519f67a6 100644 -line 2 +line 2.5 line 3 -\ No newline at end of file` - const expected = `--- packages/core/src/liner.diff.txt +\ No newline at end of file`; + const expected = `--- packages/core/src/liner.diff.txt +++ packages/core/src/liner.diff.txt @@ -1,3 +1,4 @@ [1] +line 0 @@ -69,12 +69,12 @@ index 8cf2f17f..519f67a6 100644 -line 2 [3] +line 2.5 [4] line 3 -` - assertDiff(diff, expected) - }) +`; + assertDiff(diff, expected); + }); - test("diff test 4", function () { - const diff = `diff --git a/packages/core/src/liner.ts b/packages/core/src/liner.ts + test("diff test 4", function () { + const diff = `diff --git a/packages/core/src/liner.ts b/packages/core/src/liner.ts index 1215f7e7..385884e0 100644 --- a/packages/core/src/liner.ts +++ b/packages/core/src/liner.ts @@ -86,8 +86,8 @@ index 1215f7e7..385884e0 100644 + if (change.type === "del") continue ;(change as any).line = currentLineNumber currentLineNumber++ - }` - const expected = `--- packages/core/src/liner.ts + }`; + const expected = `--- packages/core/src/liner.ts +++ packages/core/src/liner.ts @@ -31,7 +31,7 @@ export function addLineNumbersToDiff(diff: string) { [31] for (const chunk of file.chunks) { @@ -98,25 +98,25 @@ index 1215f7e7..385884e0 100644 [35] ;(change as any).line = currentLineNumber [36] currentLineNumber++ [37] } -` - assertDiff(diff, expected) - }) - test("returns the original diff if it is empty", function () { - const diff = "" - const result = llmifyDiff(diff) - assert.strictEqual(result, diff) - }) -}) +`; + assertDiff(diff, expected); + }); + test("returns the original diff if it is empty", function () { + const diff = ""; + const result = llmifyDiff(diff); + assert.strictEqual(result, diff); + }); +}); function assertDiff(diff: string, expected: string) { - const result = llmifyDiff(diff) - try { - assert.strictEqual(result, expected) - } catch (e) { - console.log(diff) - console.log("\n> result") - console.log(result) - console.log("\n> expected") - console.log(expected) - throw e - } + const result = llmifyDiff(diff); + try { + assert.strictEqual(result, expected); + } catch (e) { + console.log(diff); + console.log("\n> result"); + console.log(result); + console.log("\n> expected"); + console.log(expected); + throw e; + } } diff --git a/packages/core/src/liner.ts b/packages/core/src/liner.ts index dd8740f2c7..c468b33c2a 100644 --- a/packages/core/src/liner.ts +++ b/packages/core/src/liner.ts @@ -1,9 +1,9 @@ // This module provides functions to add and remove line numbers from text. // It includes special handling for "diff" formatted text. -import { llmifyDiff } from "./llmdiff" -import { MIN_LINE_NUMBER_LENGTH } from "./constants" -import { tryDiffParse } from "./diff" +import { llmifyDiff } from "./llmdiff"; +import { MIN_LINE_NUMBER_LENGTH } from "./constants"; +import { tryDiffParse } from "./diff"; /** * Adds 1-based line numbers to each line of the input text. @@ -15,23 +15,20 @@ import { tryDiffParse } from "./diff" * - startLine: The starting line number for numbering (default is 1). * @returns The text with line numbers added, the original text if it is too small, or processed diff text if applicable. */ -export function addLineNumbers( - text: string, - options?: { language?: string; startLine?: number } -) { - const { language, startLine = 1 } = options || {} - if (language === "diff" || tryDiffParse(text)) { - const diffed = llmifyDiff(text) // Process the text with a special function for diffs - if (diffed !== undefined) return diffed // Return processed text if diff handling was successful - } +export function addLineNumbers(text: string, options?: { language?: string; startLine?: number }) { + const { language, startLine = 1 } = options || {}; + if (language === "diff" || tryDiffParse(text)) { + const diffed = llmifyDiff(text); // Process the text with a special function for diffs + if (diffed !== undefined) return diffed; // Return processed text if diff handling was successful + } - // don't add line numbers for small files - const lines = text.split("\n") // Split text into lines - if (startLine === 1 && lines.length < MIN_LINE_NUMBER_LENGTH) return text + // don't add line numbers for small files + const lines = text.split("\n"); // Split text into lines + if (startLine === 1 && lines.length < MIN_LINE_NUMBER_LENGTH) return text; - return lines - .map((line, i) => `[${i + startLine}] ${line}`) // Add line numbers in the format "[line_number] " - .join("\n") // Join lines back into a single string + return lines + .map((line, i) => `[${i + startLine}] ${line}`) // Add line numbers in the format "[line_number] " + .join("\n"); // Join lines back into a single string } /** @@ -42,13 +39,13 @@ export function addLineNumbers( * @returns The text without line numbers, or the original text if no line numbers are found. */ export function removeLineNumbers(text: string) { - const rx = /^\[\d+\] / // Regular expression to match line numbers in the format "[number] " - const lines = text.split("\n") // Split text into lines + const rx = /^\[\d+\] /; // Regular expression to match line numbers in the format "[number] " + const lines = text.split("\n"); // Split text into lines - // Check the first 10 lines for the presence of line numbers - if (!lines.slice(0, 10).every((line) => rx.test(line))) return text // Return original text if not all lines have numbers + // Check the first 10 lines for the presence of line numbers + if (!lines.slice(0, 10).every((line) => rx.test(line))) return text; // Return original text if not all lines have numbers - return lines.map((line) => line.replace(rx, "")).join("\n") // Remove line numbers and join lines back + return lines.map((line) => line.replace(rx, "")).join("\n"); // Remove line numbers and join lines back } /** @@ -60,17 +57,14 @@ export function removeLineNumbers(text: string) { * - lineEnd: The 1-based ending line number of the range. * @returns The extracted range of text or the original text if no valid range is provided. */ -export function extractRange( - text: string, - options?: { lineStart?: number; lineEnd?: number } -) { - const { lineStart, lineEnd } = options || {} - if (isNaN(lineStart) && isNaN(lineEnd)) return text +export function extractRange(text: string, options?: { lineStart?: number; lineEnd?: number }) { + const { lineStart, lineEnd } = options || {}; + if (isNaN(lineStart) && isNaN(lineEnd)) return text; - const lines = text.split("\n") - const startLine = lineStart || 1 - const endLine = lineEnd || lines.length - return lines.slice(startLine - 1, endLine).join("\n") + const lines = text.split("\n"); + const startLine = lineStart || 1; + const endLine = lineEnd || lines.length; + return lines.slice(startLine - 1, endLine).join("\n"); } /** @@ -80,19 +74,13 @@ export function extractRange( * @returns The line number corresponding to the position index, starting from 1. */ export function indexToLineNumber(text: string, index: number): number { - if ( - text === undefined || - text === null || - index < 0 || - index >= text.length - ) - return -1 - let lineNumber = 1 - const n = Math.min(index, text.length) - for (let i = 0; i < n; i++) { - if (text[i] === "\n") { - lineNumber++ - } + if (text === undefined || text === null || index < 0 || index >= text.length) return -1; + let lineNumber = 1; + const n = Math.min(index, text.length); + for (let i = 0; i < n; i++) { + if (text[i] === "\n") { + lineNumber++; } - return lineNumber + } + return lineNumber; } diff --git a/packages/core/src/llmdiff.test.ts b/packages/core/src/llmdiff.test.ts index 682c992d3c..cc17290676 100644 --- a/packages/core/src/llmdiff.test.ts +++ b/packages/core/src/llmdiff.test.ts @@ -1,11 +1,11 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { parseLLMDiffs } from "./llmdiff" -import { diffCreatePatch } from "./diff" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { parseLLMDiffs } from "./llmdiff"; +import { diffCreatePatch } from "./diff"; describe("llmdiff", () => { - test("is_valid_email", () => { - const source = `[1] import re + test("is_valid_email", () => { + const source = `[1] import re [2] [3] def is_valid_email(email): - [4] if re.fullmatch(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", email): @@ -13,13 +13,13 @@ describe("llmdiff", () => { + [5] if pattern.fullmatch(email): [6] return True [7] else: -[8] return False` - const chunks = parseLLMDiffs(source) - assert.equal(chunks.length, 4) - }) +[8] return False`; + const chunks = parseLLMDiffs(source); + assert.equal(chunks.length, 4); + }); - test("missing line numbers", () => { - const source = ` + test("missing line numbers", () => { + const source = ` [10] CONSTANT - \* @type: XXX; + \* @type: Int; @@ -32,14 +32,14 @@ describe("llmdiff", () => { + \* @type: Int; - \* @type: XXX; + \* @type: Str; -` +`; - const chunks = parseLLMDiffs(source) - assert.equal(chunks.length, 12) - }) + const chunks = parseLLMDiffs(source); + assert.equal(chunks.length, 12); + }); - test("missing line numbers 2", () => { - const source = ` + test("missing line numbers 2", () => { + const source = ` [17] CONSTANTS - \* @type: ???; + \* @type: Int; @@ -60,14 +60,14 @@ describe("llmdiff", () => { - \* @type: ???; + \* @type: Set(<>); [34] sent -` +`; - const chunks = parseLLMDiffs(source) - assert.equal(chunks.length, 19) - }) + const chunks = parseLLMDiffs(source); + assert.equal(chunks.length, 19); + }); - test("source same as added", () => { - const source = `[9] Annotations are errors, warning or notes that can be added to the LLM output. They are extracted and injected in VSCode or your CI environment. + test("source same as added", () => { + const source = `[9] Annotations are errors, warning or notes that can be added to the LLM output. They are extracted and injected in VSCode or your CI environment. + Annotations are errors, warnings, or notes that can be added to the LLM output. They are extracted and injected into VSCode or your CI environment. [30] The \`system.annotations\` prompt automatically enables line number injection for all \`def\` section. This helps - [31] with the precision of the LLM answer and reduces hallucinations. @@ -84,13 +84,13 @@ describe("llmdiff", () => { + [87] - Your organization may restrict the execution of GitHub Actions on pull requests. [92] You can use the [defOutput](/genaiscript/reference/scripts/custom-output/) function - [93] to filter the annotations. -+ [93] to filter annotations.` - const chunks = parseLLMDiffs(source) - assert.equal(chunks.length, 18) - }) ++ [93] to filter annotations.`; + const chunks = parseLLMDiffs(source); + assert.equal(chunks.length, 18); + }); - test("start offset", () => { - const source = `[6] import { CORE_VERSION } from "./version" + test("start offset", () => { + const source = `[6] import { CORE_VERSION } from "./version" [7] [8] // Represents a cache entry with a hash (sha), key, and value [9] export type CacheEntry = { sha: string; key: K; val: V } @@ -188,13 +188,13 @@ describe("llmdiff", () => { [101] if (typeof key != "string") key = JSON.stringify(key) + CORE_VERSION [102] return await sha256string(key) [103] } -` - const chunks = parseLLMDiffs(source) - assert(chunks.length > 0) - }) +`; + const chunks = parseLLMDiffs(source); + assert(chunks.length > 0); + }); - test("insert after incorrect line description", () => { - const source = `[1] import { appendJSONL, readJSONL, writeJSONL } from "./jsonl" + test("insert after incorrect line description", () => { + const source = `[1] import { appendJSONL, readJSONL, writeJSONL } from "./jsonl" [2] import { host, runtimeHost } from "./host" [3] import { dotGenaiscriptPath, sha256string } from "./util" [4] import { CHANGE } from "./constants" @@ -283,38 +283,38 @@ describe("llmdiff", () => { [87] async function keySHA(key: any) { [88] if (typeof key != "string") key = JSON.stringify(key) + CORE_VERSION [89] return await sha256string(key) -[90] }` - const chunks = parseLLMDiffs(source) - assert.notEqual(chunks.length, 0) - }) -}) +[90] }`; + const chunks = parseLLMDiffs(source); + assert.notEqual(chunks.length, 0); + }); +}); test("createDiff with context", () => { - const left = { - filename: "file1.txt", - content: "line1\nline2\nline3\nline4\nline5\n", - } - const right = { - filename: "file1.txt", - content: "line1\nline2\nline3\nline4 modified\nline5\n", - } - const diff = diffCreatePatch(left, right, { context: 2 }) - assert(diff.includes("@@ -2,4 +2,4 @@")) - assert(diff.includes("-line4")) - assert(diff.includes("+line4 modified")) -}) + const left = { + filename: "file1.txt", + content: "line1\nline2\nline3\nline4\nline5\n", + }; + const right = { + filename: "file1.txt", + content: "line1\nline2\nline3\nline4 modified\nline5\n", + }; + const diff = diffCreatePatch(left, right, { context: 2 }); + assert(diff.includes("@@ -2,4 +2,4 @@")); + assert(diff.includes("-line4")); + assert(diff.includes("+line4 modified")); +}); test("createDiff without context", () => { - const left = { - filename: "file1.txt", - content: "line1\nline2\nline3\nline4\nline5\n", - } - const right = { - filename: "file1.txt", - content: "line1\nline2\nline3\nline4 modified\nline5\n", - } - const diff = diffCreatePatch(left, right) - console.log(diff) - assert(diff.includes("@@ -1,5 +1,5 @@")) - assert(diff.includes("-line4")) - assert(diff.includes("+line4 modified")) -}) + const left = { + filename: "file1.txt", + content: "line1\nline2\nline3\nline4\nline5\n", + }; + const right = { + filename: "file1.txt", + content: "line1\nline2\nline3\nline4 modified\nline5\n", + }; + const diff = diffCreatePatch(left, right); + console.log(diff); + assert(diff.includes("@@ -1,5 +1,5 @@")); + assert(diff.includes("-line4")); + assert(diff.includes("+line4 modified")); +}); diff --git a/packages/core/src/llmdiff.ts b/packages/core/src/llmdiff.ts index d3c6be6e4a..64d07684d4 100644 --- a/packages/core/src/llmdiff.ts +++ b/packages/core/src/llmdiff.ts @@ -1,13 +1,13 @@ -import { assert } from "./util" -import { tryDiffParse } from "./diff" +import { assert } from "./util"; +import { tryDiffParse } from "./diff"; /** * Represents a chunk of changes in a diff. */ export interface Chunk { - state: "existing" | "deleted" | "added" - lines: string[] - lineNumbers: number[] + state: "existing" | "deleted" | "added"; + lines: string[]; + lineNumbers: number[]; } /** @@ -20,120 +20,120 @@ export interface Chunk { * @returns An array of chunks representing the parsed diff, with each chunk containing its state, lines, and line numbers. */ export function parseLLMDiffs(text: string): Chunk[] { - const lines = text.split("\n") - const chunks: Chunk[] = [] - - // Initialize the first chunk - let chunk: Chunk = { state: "existing", lines: [], lineNumbers: [] } - chunks.push(chunk) - - let currentLine = Number.NaN - for (let i = 0; i < lines.length; ++i) { - let line = lines[i] - const diffM = /^(\[(\d+)\] )?(-|\+) (\[(\d+)\] )?/.exec(line) - - // Process lines that match the diff pattern - if (diffM) { - const l = line.substring(diffM[0].length) - let diffln = diffM ? parseInt(diffM[5] ?? diffM[2]) : Number.NaN - const op = diffM[3] - - // Adjust line numbers - if (isNaN(diffln) && !isNaN(currentLine)) { - currentLine++ - diffln = currentLine - if (op === "-") currentLine-- - } else { - currentLine = diffln - } - - // Handle added lines - if (op === "+") { - const l = line.substring(diffM[0].length) - if (lines[diffln] === l) { - // Skip duplicate line - continue - } - if (chunk.state === "added") { - chunk.lines.push(l) - chunk.lineNumbers.push(diffln) - } else { - chunk = { - state: "added", - lines: [l], - lineNumbers: [diffln], - } - chunks.push(chunk) - } - } else { - // Handle deleted lines - assert(op === "-") - if (chunk.state === "deleted") { - chunk.lines.push(l) - chunk.lineNumbers.push(diffln) - } else { - chunk = { - state: "deleted", - lines: [l], - lineNumbers: [diffln], - } - chunks.push(chunk) - } - } + const lines = text.split("\n"); + const chunks: Chunk[] = []; + + // Initialize the first chunk + let chunk: Chunk = { state: "existing", lines: [], lineNumbers: [] }; + chunks.push(chunk); + + let currentLine = Number.NaN; + for (let i = 0; i < lines.length; ++i) { + let line = lines[i]; + const diffM = /^(\[(\d+)\] )?(-|\+) (\[(\d+)\] )?/.exec(line); + + // Process lines that match the diff pattern + if (diffM) { + const l = line.substring(diffM[0].length); + let diffln = diffM ? parseInt(diffM[5] ?? diffM[2]) : Number.NaN; + const op = diffM[3]; + + // Adjust line numbers + if (isNaN(diffln) && !isNaN(currentLine)) { + currentLine++; + diffln = currentLine; + if (op === "-") currentLine--; + } else { + currentLine = diffln; + } + + // Handle added lines + if (op === "+") { + const l = line.substring(diffM[0].length); + if (lines[diffln] === l) { + // Skip duplicate line + continue; + } + if (chunk.state === "added") { + chunk.lines.push(l); + chunk.lineNumbers.push(diffln); } else { - // Handle existing lines - const lineM = /^\[(\d+)\] /.exec(line) - let lineNumber = lineM ? parseInt(lineM[1]) : Number.NaN - const l = line.substring(lineM ? lineM[0].length : 0) - if (isNaN(lineNumber) && !isNaN(currentLine)) { - currentLine++ - lineNumber = currentLine - } else { - currentLine = lineNumber - } - if (chunk.state === "existing") { - chunk.lines.push(l) - chunk.lineNumbers.push(lineNumber) - } else { - chunk = { - state: "existing", - lines: [l], - lineNumbers: [lineNumber], - } - chunks.push(chunk) - } + chunk = { + state: "added", + lines: [l], + lineNumbers: [diffln], + }; + chunks.push(chunk); } - } - - // Clean trailing empty lines in the last chunk - if (chunk.state === "existing") { - while (/^\s*$/.test(chunk.lines[chunk.lines.length - 1])) { - chunk.lines.pop() - chunk.lineNumbers.pop() + } else { + // Handle deleted lines + assert(op === "-"); + if (chunk.state === "deleted") { + chunk.lines.push(l); + chunk.lineNumbers.push(diffln); + } else { + chunk = { + state: "deleted", + lines: [l], + lineNumbers: [diffln], + }; + chunks.push(chunk); } - if (chunk.lines.length === 0) chunks.pop() + } + } else { + // Handle existing lines + const lineM = /^\[(\d+)\] /.exec(line); + let lineNumber = lineM ? parseInt(lineM[1]) : Number.NaN; + const l = line.substring(lineM ? lineM[0].length : 0); + if (isNaN(lineNumber) && !isNaN(currentLine)) { + currentLine++; + lineNumber = currentLine; + } else { + currentLine = lineNumber; + } + if (chunk.state === "existing") { + chunk.lines.push(l); + chunk.lineNumbers.push(lineNumber); + } else { + chunk = { + state: "existing", + lines: [l], + lineNumbers: [lineNumber], + }; + chunks.push(chunk); + } } + } - // Remove duplicate lines added without changes - for (let i = 0; i < chunks.length - 1; ++i) { - const current = chunks[i] - const next = chunks[i + 1] - if ( - current.lines.length === 1 && - next.lines.length === 1 && - current.state === "existing" && - next.state === "added" && - current.lines[0] === next.lines[0] - ) { - // Remove current, added line since it does not change the file - chunks.splice(i, 2) - } + // Clean trailing empty lines in the last chunk + if (chunk.state === "existing") { + while (/^\s*$/.test(chunk.lines[chunk.lines.length - 1])) { + chunk.lines.pop(); + chunk.lineNumbers.pop(); + } + if (chunk.lines.length === 0) chunks.pop(); + } + + // Remove duplicate lines added without changes + for (let i = 0; i < chunks.length - 1; ++i) { + const current = chunks[i]; + const next = chunks[i + 1]; + if ( + current.lines.length === 1 && + next.lines.length === 1 && + current.state === "existing" && + next.state === "added" && + current.lines[0] === next.lines[0] + ) { + // Remove current, added line since it does not change the file + chunks.splice(i, 2); } + } - return chunks + return chunks; } -const MIN_CHUNK_SIZE = 4 +const MIN_CHUNK_SIZE = 4; /** * Finds the starting position of a chunk in the given lines. @@ -143,31 +143,26 @@ const MIN_CHUNK_SIZE = 4 * @returns The index of the starting line of the chunk, or -1 if not found. */ function findChunk(lines: string[], chunk: Chunk, startLine: number): number { - const chunkLines = chunk.lines - if (chunkLines.length === 0) return startLine - const chunkStart = chunkLines[0].trim() - let linei = startLine - while (linei < lines.length) { - const line = lines[linei].trim() - if (line === chunkStart) { - let found = true - let i = 1 - for ( - ; - i < Math.min(MIN_CHUNK_SIZE, chunkLines.length) && - linei + i < lines.length; - ++i - ) { - if (lines[linei + i].trim() !== chunkLines[i].trim()) { - found = false - break - } - } - if (found && i === chunkLines.length) return linei + const chunkLines = chunk.lines; + if (chunkLines.length === 0) return startLine; + const chunkStart = chunkLines[0].trim(); + let linei = startLine; + while (linei < lines.length) { + const line = lines[linei].trim(); + if (line === chunkStart) { + let found = true; + let i = 1; + for (; i < Math.min(MIN_CHUNK_SIZE, chunkLines.length) && linei + i < lines.length; ++i) { + if (lines[linei + i].trim() !== chunkLines[i].trim()) { + found = false; + break; } - ++linei + } + if (found && i === chunkLines.length) return linei; } - return -1 + ++linei; + } + return -1; } /** @@ -179,63 +174,58 @@ function findChunk(lines: string[], chunk: Chunk, startLine: number): number { * @throws Error if the chunk sequence is invalid, unexpected states are encountered, or if chunk alignment fails. */ export function applyLLMDiff(source: string, chunks: Chunk[]): string { - if (!chunks?.length || !source) return source - - const lines = source.split("\n") - let current = 0 - let i = 0 - while (i + 1 < chunks.length) { - const chunk = chunks[i++] - if (chunk.state !== "existing") - throw new Error("expecting existing chunk") - - // Find location of existing chunk - const chunkStart = findChunk(lines, chunk, current) - if (chunkStart === -1) break - current = chunkStart + chunk.lines.length - - // Handle deleted chunk - if (chunks[i]?.state === "deleted") { - const deletedChunk = chunks[i++] - const chunkDel = findChunk(lines, deletedChunk, current) - if (chunkDel === current) { - lines.splice(current, deletedChunk.lines.length) - } - if (chunks[i]?.state === "existing") continue - } + if (!chunks?.length || !source) return source; + + const lines = source.split("\n"); + let current = 0; + let i = 0; + while (i + 1 < chunks.length) { + const chunk = chunks[i++]; + if (chunk.state !== "existing") throw new Error("expecting existing chunk"); + + // Find location of existing chunk + const chunkStart = findChunk(lines, chunk, current); + if (chunkStart === -1) break; + current = chunkStart + chunk.lines.length; + + // Handle deleted chunk + if (chunks[i]?.state === "deleted") { + const deletedChunk = chunks[i++]; + const chunkDel = findChunk(lines, deletedChunk, current); + if (chunkDel === current) { + lines.splice(current, deletedChunk.lines.length); + } + if (chunks[i]?.state === "existing") continue; + } - const addedChunk = chunks[i++] - if (!addedChunk) break - if (addedChunk?.state !== "added") - throw new Error("expecting added chunk") + const addedChunk = chunks[i++]; + if (!addedChunk) break; + if (addedChunk?.state !== "added") throw new Error("expecting added chunk"); - // Find the end of the next existing chunk - let nextChunk = chunks[i] - if (nextChunk && nextChunk.state !== "existing") - throw new Error("expecting existing chunk") - const chunkEnd = nextChunk - ? findChunk(lines, nextChunk, current) - : lines.length + // Find the end of the next existing chunk + let nextChunk = chunks[i]; + if (nextChunk && nextChunk.state !== "existing") throw new Error("expecting existing chunk"); + const chunkEnd = nextChunk ? findChunk(lines, nextChunk, current) : lines.length; - if (chunkEnd === -1) break + if (chunkEnd === -1) break; - // Finally, replace the lines with the added chunk - const toRemove = chunkEnd - current - lines.splice(current, toRemove, ...addedChunk.lines) + // Finally, replace the lines with the added chunk + const toRemove = chunkEnd - current; + lines.splice(current, toRemove, ...addedChunk.lines); - current += addedChunk.lines.length - toRemove - } + current += addedChunk.lines.length - toRemove; + } - return lines.join("\n") + return lines.join("\n"); } /** * Custom error class for handling diff-related errors. */ export class DiffError extends Error { - constructor(message: string) { - super(message) - } + constructor(message: string) { + super(message); + } } /** @@ -249,45 +239,41 @@ export class DiffError extends Error { * @throws DiffError if invalid or missing line numbers are encountered. */ export function applyLLMPatch(source: string, chunks: Chunk[]): string { - if (!chunks?.length || !source) return source - - const lines = source.split("\n") - - // Process modified and deleted chunks - chunks - .filter((c) => c.state !== "added") - .forEach((chunk) => { - for (let li = 0; li < chunk.lines.length; ++li) { - const line = - chunk.state === "deleted" ? undefined : chunk.lines[li] - const linei = chunk.lineNumbers[li] - 1 - if (isNaN(linei)) - throw new DiffError(`diff: missing or nan line number`) - if (linei < 0 || linei >= lines.length) - throw new DiffError( - `diff: invalid line number ${linei} in ${lines.length}` - ) - lines[linei] = line - } - }) - - // Insert added chunks after processing deletions and modifications - for (let ci = chunks.length - 1; ci > 0; ci--) { - const chunk = chunks[ci] - if (chunk.state !== "added") continue - let previ = ci - 1 - let prev = chunks[previ] - // Find the previous existing chunk - while (prev && prev.state !== "existing") { - prev = chunks[--previ] - } - if (!prev) throw new Error("missing previous chunk for added chunk") - const prevLinei = prev.lineNumbers[prev.lineNumbers.length - 1] - lines.splice(prevLinei, 0, ...chunk.lines) + if (!chunks?.length || !source) return source; + + const lines = source.split("\n"); + + // Process modified and deleted chunks + chunks + .filter((c) => c.state !== "added") + .forEach((chunk) => { + for (let li = 0; li < chunk.lines.length; ++li) { + const line = chunk.state === "deleted" ? undefined : chunk.lines[li]; + const linei = chunk.lineNumbers[li] - 1; + if (isNaN(linei)) throw new DiffError(`diff: missing or nan line number`); + if (linei < 0 || linei >= lines.length) + throw new DiffError(`diff: invalid line number ${linei} in ${lines.length}`); + lines[linei] = line; + } + }); + + // Insert added chunks after processing deletions and modifications + for (let ci = chunks.length - 1; ci > 0; ci--) { + const chunk = chunks[ci]; + if (chunk.state !== "added") continue; + let previ = ci - 1; + let prev = chunks[previ]; + // Find the previous existing chunk + while (prev && prev.state !== "existing") { + prev = chunks[--previ]; } + if (!prev) throw new Error("missing previous chunk for added chunk"); + const prevLinei = prev.lineNumbers[prev.lineNumbers.length - 1]; + lines.splice(prevLinei, 0, ...chunk.lines); + } - // Filter out undefined lines (deleted) - return lines.filter((l) => l !== undefined).join("\n") + // Filter out undefined lines (deleted) + return lines.filter((l) => l !== undefined).join("\n"); } /** @@ -300,37 +286,34 @@ export function applyLLMPatch(source: string, chunks: Chunk[]): string { * @returns The LLMDiff formatted string or undefined if parsing fails. */ export function llmifyDiff(diff: string) { - if (!diff) return diff - - const parsed = tryDiffParse(diff) - if (!parsed?.length) return undefined - - for (const file of parsed) { - for (const chunk of file.chunks) { - let currentLineNumber = chunk.newStart - for (const change of chunk.changes) { - if (change.type === "del") continue - ;(change as any).line = currentLineNumber - currentLineNumber++ - } - } + if (!diff) return diff; + + const parsed = tryDiffParse(diff); + if (!parsed?.length) return undefined; + + for (const file of parsed) { + for (const chunk of file.chunks) { + let currentLineNumber = chunk.newStart; + for (const change of chunk.changes) { + if (change.type === "del") continue; + (change as any).line = currentLineNumber; + currentLineNumber++; + } } - - // Convert back to unified diff format - let result = "" - for (const file of parsed) { - result += `--- ${file.from}\n+++ ${file.to}\n` - for (const chunk of file.chunks) { - result += `${chunk.content}\n` - for (const change of chunk.changes) { - const ln = - (change as any).line !== undefined - ? `[${(change as any).line}] ` - : "" - result += `${ln}${change.content}\n` - } - } + } + + // Convert back to unified diff format + let result = ""; + for (const file of parsed) { + result += `--- ${file.from}\n+++ ${file.to}\n`; + for (const chunk of file.chunks) { + result += `${chunk.content}\n`; + for (const change of chunk.changes) { + const ln = (change as any).line !== undefined ? `[${(change as any).line}] ` : ""; + result += `${ln}${change.content}\n`; + } } + } - return result + return result; } diff --git a/packages/core/src/llms.test.ts b/packages/core/src/llms.test.ts index 709239f56b..9de20aadae 100644 --- a/packages/core/src/llms.test.ts +++ b/packages/core/src/llms.test.ts @@ -1,34 +1,34 @@ -import { describe, test, beforeEach } from "node:test" -import assert from "node:assert/strict" -import { defaultModelConfigurations } from "./llms" -import { TestHost } from "./testhost" -import { LARGE_MODEL_ID, SMALL_MODEL_ID, VISION_MODEL_ID } from "./constants" +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; +import { defaultModelConfigurations } from "./llms"; +import { TestHost } from "./testhost"; +import { LARGE_MODEL_ID, SMALL_MODEL_ID, VISION_MODEL_ID } from "./constants"; describe("defaultModelConfigurations", () => { - beforeEach(async () => { - TestHost.install() - }) + beforeEach(async () => { + TestHost.install(); + }); - test("should return the expected model configurations", () => { - const modelConfigs = defaultModelConfigurations() - assert(modelConfigs) - assert.equal(typeof modelConfigs, "object") - // Further checks based on expected structure of modelConfigs - }) + test("should return the expected model configurations", () => { + const modelConfigs = defaultModelConfigurations(); + assert(modelConfigs); + assert.equal(typeof modelConfigs, "object"); + // Further checks based on expected structure of modelConfigs + }); - test("should process aliases correctly", () => { - const modelConfigs = defaultModelConfigurations() - const aliases = [ - LARGE_MODEL_ID, - SMALL_MODEL_ID, - VISION_MODEL_ID, - "vision_small", - "embeddings", - "reasoning", - "reasoning_small", - ] - aliases.forEach((alias) => { - assert(alias in modelConfigs) - }) - }) -}) + test("should process aliases correctly", () => { + const modelConfigs = defaultModelConfigurations(); + const aliases = [ + LARGE_MODEL_ID, + SMALL_MODEL_ID, + VISION_MODEL_ID, + "vision_small", + "embeddings", + "reasoning", + "reasoning_small", + ]; + aliases.forEach((alias) => { + assert(alias in modelConfigs); + }); + }); +}); diff --git a/packages/core/src/llms.ts b/packages/core/src/llms.ts index c98ced0b93..d55b1c7f5c 100644 --- a/packages/core/src/llms.ts +++ b/packages/core/src/llms.ts @@ -1,8 +1,8 @@ -import { LARGE_MODEL_ID, SMALL_MODEL_ID, VISION_MODEL_ID } from "./constants" -import { ModelConfiguration, ModelConfigurations } from "./host" -import LLMS from "./llms.json" -import { deleteEmptyValues } from "./cleaners" -import { uniq } from "es-toolkit" +import { LARGE_MODEL_ID, SMALL_MODEL_ID, VISION_MODEL_ID } from "./constants"; +import { ModelConfiguration, ModelConfigurations } from "./host"; +import LLMS from "./llms.json"; +import { deleteEmptyValues } from "./cleaners"; +import { uniq } from "es-toolkit"; /** * Generates default model configurations by aggregating model aliases and @@ -26,50 +26,45 @@ import { uniq } from "es-toolkit" * - Returns a structured clone of the final configurations object. */ export function defaultModelConfigurations(): ModelConfigurations { - const aliases = collectAliases([ - LARGE_MODEL_ID, - SMALL_MODEL_ID, - VISION_MODEL_ID, - "vision_small", - "embeddings", - "reasoning", - "reasoning_small", - ]) - const res = { - ...(Object.fromEntries( - aliases.map<[string, ModelConfiguration]>((alias) => [ - alias, - readModelAlias(alias), - ]) - ) as ModelConfigurations), - ...Object.fromEntries( - Object.entries(LLMS.aliases).map<[string, ModelConfiguration]>( - ([id, model]) => [ - id, - { model, source: "default" } satisfies ModelConfiguration, - ] - ) - ), - } - return structuredClone(res) + const aliases = collectAliases([ + LARGE_MODEL_ID, + SMALL_MODEL_ID, + VISION_MODEL_ID, + "vision_small", + "embeddings", + "reasoning", + "reasoning_small", + ]); + const res = { + ...(Object.fromEntries( + aliases.map<[string, ModelConfiguration]>((alias) => [alias, readModelAlias(alias)]), + ) as ModelConfigurations), + ...Object.fromEntries( + Object.entries(LLMS.aliases).map<[string, ModelConfiguration]>(([id, model]) => [ + id, + { model, source: "default" } satisfies ModelConfiguration, + ]), + ), + }; + return structuredClone(res); - function collectAliases(ids: string[]): string[] { - const candidates = Object.values(LLMS.providers).flatMap( - ({ aliases }) => Object.keys(aliases || {}) - ) - return uniq([...ids, ...candidates]) - } - function readModelAlias(alias: string) { - const candidates = Object.values(LLMS.providers) - .map(({ id, aliases }) => { - const ref = (aliases as Record)?.[alias] - return ref ? `${id}:${ref}` : undefined - }) - .filter((c) => !!c) - return deleteEmptyValues({ - model: candidates[0], - candidates, - source: "default", - } satisfies ModelConfiguration) - } + function collectAliases(ids: string[]): string[] { + const candidates = Object.values(LLMS.providers).flatMap(({ aliases }) => + Object.keys(aliases || {}), + ); + return uniq([...ids, ...candidates]); + } + function readModelAlias(alias: string) { + const candidates = Object.values(LLMS.providers) + .map(({ id, aliases }) => { + const ref = (aliases as Record)?.[alias]; + return ref ? `${id}:${ref}` : undefined; + }) + .filter((c) => !!c); + return deleteEmptyValues({ + model: candidates[0], + candidates, + source: "default", + } satisfies ModelConfiguration); + } } diff --git a/packages/core/src/lm.ts b/packages/core/src/lm.ts index 2cfe435a0e..d3ddda45b4 100644 --- a/packages/core/src/lm.ts +++ b/packages/core/src/lm.ts @@ -1,30 +1,30 @@ -import { AnthropicBedrockModel, AnthropicModel } from "./anthropic" -import { LanguageModel } from "./chat" +import { AnthropicBedrockModel, AnthropicModel } from "./anthropic"; +import { LanguageModel } from "./chat"; import { - MODEL_PROVIDER_ANTHROPIC, - MODEL_PROVIDER_ANTHROPIC_BEDROCK, - MODEL_PROVIDER_GITHUB_COPILOT_CHAT, - MODEL_PROVIDER_GITHUB, - MODEL_PROVIDER_LMSTUDIO, - MODEL_PROVIDER_OLLAMA, - MODEL_PROVIDER_WHISPERASR, - MODEL_PROVIDER_AZURE_OPENAI, - MODEL_PROVIDER_ECHO, - MODEL_PROVIDER_NONE, - MODEL_PROVIDER_AZURE_AI_INFERENCE, -} from "./constants" -import { runtimeHost } from "./host" -import { OllamaModel } from "./ollama" -import { LocalOpenAICompatibleModel } from "./openai" -import { GitHubModel } from "./github" -import { LMStudioModel } from "./lmstudio" -import { WhisperAsrModel } from "./whisperasr" -import { AzureOpenAIModel } from "./azureopenai" -import { EchoModel } from "./echomodel" -import { NoneModel } from "./nonemodel" -import { AzureAIInferenceModel } from "./azureaiinference" -import { providerFeatures } from "./features" -import { NotSupportedError } from "./error" + MODEL_PROVIDER_ANTHROPIC, + MODEL_PROVIDER_ANTHROPIC_BEDROCK, + MODEL_PROVIDER_GITHUB_COPILOT_CHAT, + MODEL_PROVIDER_GITHUB, + MODEL_PROVIDER_LMSTUDIO, + MODEL_PROVIDER_OLLAMA, + MODEL_PROVIDER_WHISPERASR, + MODEL_PROVIDER_AZURE_OPENAI, + MODEL_PROVIDER_ECHO, + MODEL_PROVIDER_NONE, + MODEL_PROVIDER_AZURE_AI_INFERENCE, +} from "./constants"; +import { runtimeHost } from "./host"; +import { OllamaModel } from "./ollama"; +import { LocalOpenAICompatibleModel } from "./openai"; +import { GitHubModel } from "./github"; +import { LMStudioModel } from "./lmstudio"; +import { WhisperAsrModel } from "./whisperasr"; +import { AzureOpenAIModel } from "./azureopenai"; +import { EchoModel } from "./echomodel"; +import { NoneModel } from "./nonemodel"; +import { AzureAIInferenceModel } from "./azureaiinference"; +import { providerFeatures } from "./features"; +import { NotSupportedError } from "./error"; /** * Resolves and returns a language model based on the provided model provider identifier. @@ -39,29 +39,27 @@ import { NotSupportedError } from "./error" * features derived from the MODEL_PROVIDERS configuration. */ export function resolveLanguageModel(provider: string): LanguageModel { - if (provider === MODEL_PROVIDER_GITHUB_COPILOT_CHAT) { - const m = runtimeHost.clientLanguageModel - if (!m) throw new Error("Github Copilot Chat Models not available") - return m - } - if (provider === MODEL_PROVIDER_AZURE_OPENAI) return AzureOpenAIModel - if (provider === MODEL_PROVIDER_AZURE_AI_INFERENCE) - return AzureAIInferenceModel - if (provider === MODEL_PROVIDER_GITHUB) return GitHubModel - if (provider === MODEL_PROVIDER_OLLAMA) return OllamaModel - if (provider === MODEL_PROVIDER_ANTHROPIC) return AnthropicModel - if (provider === MODEL_PROVIDER_ANTHROPIC_BEDROCK) - return AnthropicBedrockModel - if (provider === MODEL_PROVIDER_LMSTUDIO) return LMStudioModel - if (provider === MODEL_PROVIDER_WHISPERASR) return WhisperAsrModel - if (provider === MODEL_PROVIDER_ECHO) return EchoModel - if (provider === MODEL_PROVIDER_NONE) return NoneModel + if (provider === MODEL_PROVIDER_GITHUB_COPILOT_CHAT) { + const m = runtimeHost.clientLanguageModel; + if (!m) throw new Error("Github Copilot Chat Models not available"); + return m; + } + if (provider === MODEL_PROVIDER_AZURE_OPENAI) return AzureOpenAIModel; + if (provider === MODEL_PROVIDER_AZURE_AI_INFERENCE) return AzureAIInferenceModel; + if (provider === MODEL_PROVIDER_GITHUB) return GitHubModel; + if (provider === MODEL_PROVIDER_OLLAMA) return OllamaModel; + if (provider === MODEL_PROVIDER_ANTHROPIC) return AnthropicModel; + if (provider === MODEL_PROVIDER_ANTHROPIC_BEDROCK) return AnthropicBedrockModel; + if (provider === MODEL_PROVIDER_LMSTUDIO) return LMStudioModel; + if (provider === MODEL_PROVIDER_WHISPERASR) return WhisperAsrModel; + if (provider === MODEL_PROVIDER_ECHO) return EchoModel; + if (provider === MODEL_PROVIDER_NONE) return NoneModel; - const features = providerFeatures(provider) - return LocalOpenAICompatibleModel(provider, { - listModels: features?.listModels !== false, - transcribe: features?.transcribe, - speech: features?.speech, - imageGeneration: features?.imageGeneration, - }) + const features = providerFeatures(provider); + return LocalOpenAICompatibleModel(provider, { + listModels: features?.listModels !== false, + transcribe: features?.transcribe, + speech: features?.speech, + imageGeneration: features?.imageGeneration, + }); } diff --git a/packages/core/src/lmstudio.ts b/packages/core/src/lmstudio.ts index f8ebe849a7..59c596b5dc 100644 --- a/packages/core/src/lmstudio.ts +++ b/packages/core/src/lmstudio.ts @@ -1,27 +1,23 @@ -import { LanguageModel, PullModelFunction } from "./chat" -import { MODEL_PROVIDER_LMSTUDIO, SUCCESS_ERROR_CODE } from "./constants" -import { - OpenAIChatCompletion, - OpenAIEmbedder, - OpenAIListModels, -} from "./openai" -import { execa } from "execa" -import { logVerbose } from "./util" +import { LanguageModel, PullModelFunction } from "./chat"; +import { MODEL_PROVIDER_LMSTUDIO, SUCCESS_ERROR_CODE } from "./constants"; +import { OpenAIChatCompletion, OpenAIEmbedder, OpenAIListModels } from "./openai"; +import { execa } from "execa"; +import { logVerbose } from "./util"; const pullModel: PullModelFunction = async (cfg, options) => { - const model = cfg.model - logVerbose(`lms get ${model} --yes`) - const res = await execa({ stdout: ["inherit"] })`lms get ${model} --yes` - return { - ok: res.exitCode === SUCCESS_ERROR_CODE, - } -} + const model = cfg.model; + logVerbose(`lms get ${model} --yes`); + const res = await execa({ stdout: ["inherit"] })`lms get ${model} --yes`; + return { + ok: res.exitCode === SUCCESS_ERROR_CODE, + }; +}; // Define the Ollama model with its completion handler and model listing function export const LMStudioModel = Object.freeze({ - id: MODEL_PROVIDER_LMSTUDIO, - completer: OpenAIChatCompletion, - listModels: OpenAIListModels, - pullModel, - embedder: OpenAIEmbedder, -}) + id: MODEL_PROVIDER_LMSTUDIO, + completer: OpenAIChatCompletion, + listModels: OpenAIListModels, + pullModel, + embedder: OpenAIEmbedder, +}); diff --git a/packages/core/src/logging.ts b/packages/core/src/logging.ts index a2f1229112..89c14952be 100644 --- a/packages/core/src/logging.ts +++ b/packages/core/src/logging.ts @@ -1,4 +1,4 @@ -import inspect from "object-inspect" +import inspect from "object-inspect"; /** * Formats an array of arguments into a single string for logging purposes. @@ -11,32 +11,32 @@ import inspect from "object-inspect" * @returns A string representation of the input arguments. */ export function consoleLogFormat(...args: any[]) { - let line = "" - for (let i = 0; i < args.length; ++i) { - if (i > 0) line += " " - const a = args[i] - switch (typeof a) { - case "bigint": - case "number": - case "boolean": - case "undefined": - line += a - break - case "string": - line += a - break - case "symbol": - line += a.toString() - break - case "object": - case "function": - line += inspect(a, { - indent: 2, - depth: 4, - maxStringLength: 2048, - }) - break - } + let line = ""; + for (let i = 0; i < args.length; ++i) { + if (i > 0) line += " "; + const a = args[i]; + switch (typeof a) { + case "bigint": + case "number": + case "boolean": + case "undefined": + line += a; + break; + case "string": + line += a; + break; + case "symbol": + line += a.toString(); + break; + case "object": + case "function": + line += inspect(a, { + indent: 2, + depth: 4, + maxStringLength: 2048, + }); + break; } - return line + } + return line; } diff --git a/packages/core/src/logprob.ts b/packages/core/src/logprob.ts index 7f182d9617..dd99af2264 100644 --- a/packages/core/src/logprob.ts +++ b/packages/core/src/logprob.ts @@ -1,12 +1,9 @@ // cspell: disable /// -import type { - ChatCompletionChunkChoice, - ChatCompletionTokenLogprob, -} from "./chattypes" -import { escape } from "html-escaper" -import { roundWithPrecision } from "./precision" -import { deleteUndefinedValues } from "./cleaners" +import type { ChatCompletionChunkChoice, ChatCompletionTokenLogprob } from "./chattypes"; +import { escape } from "html-escaper"; +import { roundWithPrecision } from "./precision"; +import { deleteUndefinedValues } from "./cleaners"; /** * Serializes a log probability object into a standardized format. @@ -25,17 +22,17 @@ import { deleteUndefinedValues } from "./cleaners" * - `entropy`: The normalized entropy based on top log probabilities. */ export function serializeLogProb(content: ChatCompletionTokenLogprob): Logprob { - const { token, logprob, top_logprobs } = content - return deleteUndefinedValues({ - token, - logprob, - topLogprobs: top_logprobs?.map((tp) => ({ - token: tp.token, - logprob: tp.logprob, - })), - probPercent: logprobToPercent(logprob), - entropy: computeNormalizedEntropy(top_logprobs), - }) satisfies Logprob + const { token, logprob, top_logprobs } = content; + return deleteUndefinedValues({ + token, + logprob, + topLogprobs: top_logprobs?.map((tp) => ({ + token: tp.token, + logprob: tp.logprob, + })), + probPercent: logprobToPercent(logprob), + entropy: computeNormalizedEntropy(top_logprobs), + }) satisfies Logprob; } /** @@ -48,24 +45,22 @@ export function serializeLogProb(content: ChatCompletionTokenLogprob): Logprob { * @returns An array of Logprob objects. If `logprobs.content` exists, it maps each token to its Logprob. * Otherwise, returns a single Logprob with the token from `delta.content` and a NaN logprob value. */ -export function serializeChunkChoiceToLogProbs( - choice: ChatCompletionChunkChoice -): Logprob[] { - const { delta, logprobs } = choice - if (logprobs?.content) return logprobs.content.map(serializeLogProb) - else - return [ - { - token: delta.content || "", - logprob: Number.NaN, - } satisfies Logprob, - ] +export function serializeChunkChoiceToLogProbs(choice: ChatCompletionChunkChoice): Logprob[] { + const { delta, logprobs } = choice; + if (logprobs?.content) return logprobs.content.map(serializeLogProb); + else + return [ + { + token: delta.content || "", + logprob: Number.NaN, + } satisfies Logprob, + ]; } function logprobToPercent(value: number | undefined): number { - if (value === undefined) return NaN - const linearProbability = roundWithPrecision(Math.exp(value) * 100, 2) - return linearProbability + if (value === undefined) return NaN; + const linearProbability = roundWithPrecision(Math.exp(value) * 100, 2); + return linearProbability; } /** @@ -75,9 +70,9 @@ function logprobToPercent(value: number | undefined): number { * @returns A formatted string displaying the probability as a percentage (with two decimal places) and the raw log probability rounded to two decimal places. */ export function renderLogprob(logprob: number | undefined): string { - return logprob === undefined || isNaN(logprob) - ? `--` - : `${logprobToPercent(logprob)}% (${roundWithPrecision(logprob, 2)})` + return logprob === undefined || isNaN(logprob) + ? `--` + : `${logprobToPercent(logprob)}% (${roundWithPrecision(logprob, 2)})`; } /** @@ -91,19 +86,17 @@ export function renderLogprob(logprob: number | undefined): string { * @returns A 24-bit RGB color value where each 8 bits represent red, green, and blue channels respectively. */ export function logprobColor( - logprob: Logprob, - options?: { maxIntensity?: number; entropy?: boolean } + logprob: Logprob, + options?: { maxIntensity?: number; entropy?: boolean }, ): number { - const { maxIntensity = 210, entropy } = options || {} - // Normalize log probability for a red to blue gradient range - const alpha = entropy - ? 1 - (logprob.entropy || 0) - : logprobToPercent(logprob.logprob) / 100 - const intensity = Math.round(maxIntensity * alpha) - const red = maxIntensity - intensity // Higher logProb gives less red, more blue - const blue = intensity // Higher logProb gives more blue - const green = 0 - return (red << 16) | (green << 8) | (blue << 0) + const { maxIntensity = 210, entropy } = options || {}; + // Normalize log probability for a red to blue gradient range + const alpha = entropy ? 1 - (logprob.entropy || 0) : logprobToPercent(logprob.logprob) / 100; + const intensity = Math.round(maxIntensity * alpha); + const red = maxIntensity - intensity; // Higher logProb gives less red, more blue + const blue = intensity; // Higher logProb gives more blue + const green = 0; + return (red << 16) | (green << 8) | (blue << 0); } /** @@ -115,9 +108,9 @@ export function logprobColor( * @returns A CSS color string in the format `rgb(r, g, b)`. Defaults to `#fff` if the input is not a valid number. */ export function rgbToCss(value: number): string { - return isNaN(value) - ? `#fff` - : `rgb(${(value >> 16) & 0xff}, ${(value >> 8) & 0xff}, ${value & 0xff})` + return isNaN(value) + ? `#fff` + : `rgb(${(value >> 16) & 0xff}, ${(value >> 8) & 0xff}, ${value & 0xff})`; } /** @@ -132,18 +125,16 @@ export function rgbToCss(value: number): string { * @returns A styled string representing the token with gradient-based log probability coloring. */ export function logprobToMarkdown( - value: Logprob, - options?: { maxIntensity?: number; entropy?: boolean; eatSpaces?: boolean } + value: Logprob, + options?: { maxIntensity?: number; entropy?: boolean; eatSpaces?: boolean }, ) { - const { token, logprob, entropy } = value - const c = rgbToCss(logprobColor(value, options)) - const title = options?.entropy - ? roundWithPrecision(entropy, 2) - : renderLogprob(logprob) - let text = escape(token).replace(//g, ">") - if (options?.eatSpaces) text = text.replace(/\n/g, " ") - else text = text.replace(/ /g, " ").replace(/\n/g, "
") - return `${text}` + const { token, logprob, entropy } = value; + const c = rgbToCss(logprobColor(value, options)); + const title = options?.entropy ? roundWithPrecision(entropy, 2) : renderLogprob(logprob); + let text = escape(token).replace(//g, ">"); + if (options?.eatSpaces) text = text.replace(/\n/g, " "); + else text = text.replace(/ /g, " ").replace(/\n/g, "
"); + return `${text}`; } /** @@ -155,13 +146,10 @@ export function logprobToMarkdown( * * @returns A string of HTML representing the top log probabilities in a styled table. */ -export function topLogprobsToMarkdown( - value: Logprob, - options?: { maxIntensity?: number } -) { - const { token, topLogprobs = [] } = value - const opts = { ...options, eatSpaces: true } - return `${topLogprobs.map((tp) => ``).join("")}
${logprobToMarkdown(tp, opts)}
${/\n/.test(token) ? "
" : ""}` +export function topLogprobsToMarkdown(value: Logprob, options?: { maxIntensity?: number }) { + const { token, topLogprobs = [] } = value; + const opts = { ...options, eatSpaces: true }; + return `${topLogprobs.map((tp) => ``).join("")}
${logprobToMarkdown(tp, opts)}
${/\n/.test(token) ? "
" : ""}`; } /** @@ -171,33 +159,26 @@ export function topLogprobsToMarkdown( * @param logprobs - An array of log probability objects, where each object contains a log probability value. If the array is undefined or empty, the function returns undefined. * @returns The computed perplexity as a number, or undefined if the input array is undefined or empty. */ -export function computePerplexity( - logprobs: Logprob[] | undefined -): number | undefined { - if (!logprobs?.length) return undefined - const sum = logprobs.reduce((acc, { logprob }) => acc + logprob, 0) - return Math.exp(-sum / logprobs.length) +export function computePerplexity(logprobs: Logprob[] | undefined): number | undefined { + if (!logprobs?.length) return undefined; + const sum = logprobs.reduce((acc, { logprob }) => acc + logprob, 0); + return Math.exp(-sum / logprobs.length); } -function computeNormalizedEntropy( - logprobs: Logprob[] | undefined -): number | undefined { - if (!(logprobs?.length >= 2)) return undefined +function computeNormalizedEntropy(logprobs: Logprob[] | undefined): number | undefined { + if (!(logprobs?.length >= 2)) return undefined; - // Calculate entropy - // https://www.watchful.io/blog/decoding-llm-uncertainties-for-better-predictability - const entropy = -logprobs.reduce( - (acc, lp) => acc + Math.exp(lp.logprob) * lp.logprob, - 0 - ) + // Calculate entropy + // https://www.watchful.io/blog/decoding-llm-uncertainties-for-better-predictability + const entropy = -logprobs.reduce((acc, lp) => acc + Math.exp(lp.logprob) * lp.logprob, 0); - // Maximum possible entropy with vocab size N - const maxEntropy = Math.log(logprobs.length) + // Maximum possible entropy with vocab size N + const maxEntropy = Math.log(logprobs.length); - // Calculate normalized entropy - const normalizedEntropy = entropy / maxEntropy + // Calculate normalized entropy + const normalizedEntropy = entropy / maxEntropy; - return normalizedEntropy + return normalizedEntropy; } /** @@ -208,14 +189,12 @@ function computeNormalizedEntropy( * @param logprobs - Array of log probabilities to process. Each log probability must include a token and may include top probabilities. * @returns The average normalized entropy or undefined if no valid data exists. */ -export function computeStructuralUncertainty( - logprobs: Logprob[] | undefined -): number { - if (!logprobs?.length) return undefined - const vs = logprobs - .filter((lp) => lp.topLogprobs) - .map((logprob) => computeNormalizedEntropy(logprob.topLogprobs)) - .filter((v) => v !== undefined && !isNaN(v)) - if (!vs.length) return undefined - return vs.reduce((acc, v) => acc + v, 0) / vs.length +export function computeStructuralUncertainty(logprobs: Logprob[] | undefined): number { + if (!logprobs?.length) return undefined; + const vs = logprobs + .filter((lp) => lp.topLogprobs) + .map((logprob) => computeNormalizedEntropy(logprob.topLogprobs)) + .filter((v) => v !== undefined && !isNaN(v)); + if (!vs.length) return undefined; + return vs.reduce((acc, v) => acc + v, 0) / vs.length; } diff --git a/packages/core/src/markdown.test.ts b/packages/core/src/markdown.test.ts index e4a497ab44..b8c7260170 100644 --- a/packages/core/src/markdown.test.ts +++ b/packages/core/src/markdown.test.ts @@ -1,81 +1,78 @@ // cSpell: disable -import { describe, test } from "node:test" -import { MarkdownStringify, splitMarkdownTextImageParts } from "./markdown" -import assert from "node:assert/strict" -import { parseTraceTree } from "./traceparser" +import { describe, test } from "node:test"; +import { MarkdownStringify, splitMarkdownTextImageParts } from "./markdown"; +import assert from "node:assert/strict"; +import { parseTraceTree } from "./traceparser"; describe("trace tree", () => { - test("empty", () => { - const { root: res } = parseTraceTree(undefined) - delete res.id - assert.deepStrictEqual(res, { - type: "details", - label: "trace", - content: [""], - }) - }) - test("stringify", () => { - assert.strictEqual(MarkdownStringify({ a: 1 }), "\n- a: 1\n") - assert.strictEqual( - MarkdownStringify({ a: 1, b: 2 }), - "\n- a: 1\n- b: 2\n" - ) - assert.strictEqual( - MarkdownStringify({ a: "string" }, { quoteValues: true }), - "\n- a: `string`\n" - ) - assert.strictEqual(MarkdownStringify([1, 2, 3]), "\n- 1\n- 2\n- 3\n") - assert.strictEqual( - MarkdownStringify({ a: 1 }, { headings: 0, headingLevel: 3 }), - "\n### A\n1\n" - ) - }) - test("flat", () => { - const { root: res } = parseTraceTree(` + test("empty", () => { + const { root: res } = parseTraceTree(undefined); + delete res.id; + assert.deepStrictEqual(res, { + type: "details", + label: "trace", + content: [""], + }); + }); + test("stringify", () => { + assert.strictEqual(MarkdownStringify({ a: 1 }), "\n- a: 1\n"); + assert.strictEqual(MarkdownStringify({ a: 1, b: 2 }), "\n- a: 1\n- b: 2\n"); + assert.strictEqual( + MarkdownStringify({ a: "string" }, { quoteValues: true }), + "\n- a: `string`\n", + ); + assert.strictEqual(MarkdownStringify([1, 2, 3]), "\n- 1\n- 2\n- 3\n"); + assert.strictEqual( + MarkdownStringify({ a: 1 }, { headings: 0, headingLevel: 3 }), + "\n### A\n1\n", + ); + }); + test("flat", () => { + const { root: res } = parseTraceTree(` flat tree 2 3 -`) - delete res.id - assert.deepStrictEqual(res, { - type: "details", - label: "trace", - content: [ - ` +`); + delete res.id; + assert.deepStrictEqual(res, { + type: "details", + label: "trace", + content: [ + ` flat tree 2 3 `, - ], - }) - }) + ], + }); + }); - test("one node", () => { - const { root: res } = parseTraceTree(` + test("one node", () => { + const { root: res } = parseTraceTree(` flat tree
2 2.5
3 -`) - delete res.id - delete (res.content[1] as any).id - assert.deepStrictEqual(res, { - type: "details", - label: "trace", - content: [ - ` +`); + delete res.id; + delete (res.content[1] as any).id; + assert.deepStrictEqual(res, { + type: "details", + label: "trace", + content: [ + ` flat tree`, - { type: "details", label: "2", content: ["2.5"] }, - `3 + { type: "details", label: "2", content: ["2.5"] }, + `3 `, - ], - }) - }) + ], + }); + }); - test("multi node", () => { - const { root: res } = parseTraceTree(` + test("multi node", () => { + const { root: res } = parseTraceTree(` flat tree
@@ -84,24 +81,24 @@ flat tree 2.5
3 -`) - delete res.id - delete (res.content[1] as any).id - assert.deepStrictEqual(res, { - type: "details", - label: "trace", - content: [ - ` +`); + delete res.id; + delete (res.content[1] as any).id; + assert.deepStrictEqual(res, { + type: "details", + label: "trace", + content: [ + ` flat tree`, - { type: "details", label: "2", content: ["2.5"] }, - `3 + { type: "details", label: "2", content: ["2.5"] }, + `3 `, - ], - }) - }) + ], + }); + }); - test("nested node", () => { - const { root: res } = parseTraceTree(` + test("nested node", () => { + const { root: res } = parseTraceTree(` flat tree
@@ -115,43 +112,41 @@ flat tree
3 -`) - delete res.id - delete (res.content[1] as any).id - delete (res.content[1] as any).content[0].id - assert.deepStrictEqual(res, { - type: "details", - label: "trace", - content: [ - ` +`); + delete res.id; + delete (res.content[1] as any).id; + delete (res.content[1] as any).content[0].id; + assert.deepStrictEqual(res, { + type: "details", + label: "trace", + content: [ + ` flat tree`, - { - type: "details", - label: "2", - content: [ - { - type: "details", - label: "2.5", - content: ["2.5.5"], - }, - ], - }, - `3 + { + type: "details", + label: "2", + content: [ + { + type: "details", + label: "2.5", + content: ["2.5.5"], + }, + ], + }, + `3 `, - ], - }) - }) - test("splitMarkdownTextImageParts - only text", async () => { - const input = "This is a simple text block." - const parts = await splitMarkdownTextImageParts(input) - assert.deepStrictEqual(parts, [ - { type: "text", text: "This is a simple text block." }, - ]) - }) + ], + }); + }); + test("splitMarkdownTextImageParts - only text", async () => { + const input = "This is a simple text block."; + const parts = await splitMarkdownTextImageParts(input); + assert.deepStrictEqual(parts, [{ type: "text", text: "This is a simple text block." }]); + }); - test("splitMarkdownTextImageParts - empty string", async () => { - const input = "" - const parts = await splitMarkdownTextImageParts(input) - assert.deepStrictEqual(parts, []) - }) -}) + test("splitMarkdownTextImageParts - empty string", async () => { + const input = ""; + const parts = await splitMarkdownTextImageParts(input); + assert.deepStrictEqual(parts, []); + }); +}); diff --git a/packages/core/src/markdown.ts b/packages/core/src/markdown.ts index 8eadeebb4a..86441b2897 100644 --- a/packages/core/src/markdown.ts +++ b/packages/core/src/markdown.ts @@ -3,18 +3,18 @@ // converting annotations to markdown, wrapping text in fences, creating links and details blocks, // and working with trace trees. -import { titleize } from "./inflection" -import { convertAnnotationsToMarkdown } from "./annotations" -import { collapseNewlines } from "./cleaners" -import { fenceMD } from "./mkmd" -import { convertThinkToMarkdown } from "./think" -import { resolveFileDataUri } from "./file" -import { CancellationOptions, checkCancelled } from "./cancellation" -import { HTTP_OR_S_REGEX } from "./constants" -import { genaiscriptDebug } from "./debug" -import { join, resolve } from "node:path" -import { unfence } from "./unwrappers" -const dbg = genaiscriptDebug("markdown") +import { titleize } from "./inflection"; +import { convertAnnotationsToMarkdown } from "./annotations"; +import { collapseNewlines } from "./cleaners"; +import { fenceMD } from "./mkmd"; +import { convertThinkToMarkdown } from "./think"; +import { resolveFileDataUri } from "./file"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { HTTP_OR_S_REGEX } from "./constants"; +import { genaiscriptDebug } from "./debug"; +import { join, resolve } from "node:path"; +import { unfence } from "./unwrappers"; +const dbg = genaiscriptDebug("markdown"); /** * Prettifies markdown content by converting annotations to markdown, processing "think" blocks, and collapsing excessive newlines. @@ -22,11 +22,11 @@ const dbg = genaiscriptDebug("markdown") * @returns The cleaned and formatted markdown string. */ export function prettifyMarkdown(md: string) { - let res = unfence(md, ["markdown", "md", "text"]) - res = convertAnnotationsToMarkdown(res) // Convert annotations to markdown format - res = convertThinkToMarkdown(res) - res = collapseNewlines(res) // Clean up excessive newlines - return res + let res = unfence(md, ["markdown", "md", "text"]); + res = convertAnnotationsToMarkdown(res); // Convert annotations to markdown format + res = convertThinkToMarkdown(res); + res = collapseNewlines(res); // Clean up excessive newlines + return res; } /** @@ -38,71 +38,55 @@ export function prettifyMarkdown(md: string) { * @returns The markdown representation of the object. */ export function MarkdownStringify( - obj: any, - options?: { - quoteValues?: boolean - headings?: number - headingLevel?: number - } + obj: any, + options?: { + quoteValues?: boolean; + headings?: number; + headingLevel?: number; + }, ): string { - const seen = new Set() - const { quoteValues, headings = -1, headingLevel = 2 } = options || {} - const render = (obj: any, depth: number): string => { - if (obj === undefined || obj === null) return obj + const seen = new Set(); + const { quoteValues, headings = -1, headingLevel = 2 } = options || {}; + const render = (obj: any, depth: number): string => { + if (obj === undefined || obj === null) return obj; - const indent = depth - if (Array.isArray(obj)) { - if (seen.has(obj)) return "..." - seen.add(obj) - const items = obj - .map((o) => render(o, depth + 1)) - .filter((i) => i !== undefined && i !== "") - if (items.some((i) => i.includes("\n"))) - return `\n
    \n${items.map((item) => `
  • \n${item}\n
  • \n`).join("\n")}\n
\n` - else { - const indentText = " ".repeat(indent) - return ( - "\n" + - items.map((item) => `${indentText}- ${item}`).join("\n") - ) - } - } else if (typeof obj === "object") { - if (seen.has(obj)) return "..." - seen.add(obj) + const indent = depth; + if (Array.isArray(obj)) { + if (seen.has(obj)) return "..."; + seen.add(obj); + const items = obj.map((o) => render(o, depth + 1)).filter((i) => i !== undefined && i !== ""); + if (items.some((i) => i.includes("\n"))) + return `\n
    \n${items.map((item) => `
  • \n${item}\n
  • \n`).join("\n")}\n
\n`; + else { + const indentText = " ".repeat(indent); + return "\n" + items.map((item) => `${indentText}- ${item}`).join("\n"); + } + } else if (typeof obj === "object") { + if (seen.has(obj)) return "..."; + seen.add(obj); - const entries = Object.entries(obj) - .map((kv) => [kv[0], render(kv[1], depth + 1)]) - .filter((kv) => kv[1] !== undefined) - if (depth <= headings) { - return entries - .map( - (kv) => - `\n${"#".repeat(headingLevel + depth)} ${titleize(kv[0])}\n${kv[1]}` - ) - .join("\n") - } else if (entries.some((kv) => kv[1].includes("\n"))) - return `\n
    \n${entries.map((kv) => `
  • \n${kv[0]}: ${kv[1]}\n
  • \n`).join("\n")}\n
\n` - else { - const indentText = " ".repeat(indent) - return ( - "\n" + - entries - .map((kv) => `${indentText}- ${kv[0]}: ${kv[1]}`) - .join("\n") - ) - } - } else if (typeof obj === "string") { - if (quoteValues) { - if (obj.includes("\n")) return fenceMD(obj) - return `\`${obj.replace(/`/g, "\\`")}\`` - } else return obj - } else - return quoteValues - ? `\`${String(obj).replace(/`/g, "\\`")}\`` - : String(obj) - } + const entries = Object.entries(obj) + .map((kv) => [kv[0], render(kv[1], depth + 1)]) + .filter((kv) => kv[1] !== undefined); + if (depth <= headings) { + return entries + .map((kv) => `\n${"#".repeat(headingLevel + depth)} ${titleize(kv[0])}\n${kv[1]}`) + .join("\n"); + } else if (entries.some((kv) => kv[1].includes("\n"))) + return `\n
    \n${entries.map((kv) => `
  • \n${kv[0]}: ${kv[1]}\n
  • \n`).join("\n")}\n
\n`; + else { + const indentText = " ".repeat(indent); + return "\n" + entries.map((kv) => `${indentText}- ${kv[0]}: ${kv[1]}`).join("\n"); + } + } else if (typeof obj === "string") { + if (quoteValues) { + if (obj.includes("\n")) return fenceMD(obj); + return `\`${obj.replace(/`/g, "\\`")}\``; + } else return obj; + } else return quoteValues ? `\`${String(obj).replace(/`/g, "\\`")}\`` : String(obj); + }; - return render(obj, 0) + "\n" + return render(obj, 0) + "\n"; } /** @@ -112,70 +96,65 @@ export function MarkdownStringify( * @param markdown The markdown string to split. */ export async function splitMarkdownTextImageParts( - markdown: string, - options?: CancellationOptions & { - dir?: string - allowedDomains?: string[] - convertToDataUri?: boolean - } + markdown: string, + options?: CancellationOptions & { + dir?: string; + allowedDomains?: string[]; + convertToDataUri?: boolean; + }, ) { - const { - dir = "", - cancellationToken, - allowedDomains, - convertToDataUri, - } = options || {} - // remove \. for all images - const regex = /^!\[(?[^\]]*)\]\((?\.[^)]+)\)$/gm - const parts: ( - | { type: "text"; text: string } - | { type: "image"; data: string; mimeType: string } - )[] = [] - let lastIndex = 0 - let match: RegExpExecArray | null + const { dir = "", cancellationToken, allowedDomains, convertToDataUri } = options || {}; + // remove \. for all images + const regex = /^!\[(?[^\]]*)\]\((?\.[^)]+)\)$/gm; + const parts: ( + | { type: "text"; text: string } + | { type: "image"; data: string; mimeType: string } + )[] = []; + let lastIndex = 0; + let match: RegExpExecArray | null; - while ((match = regex.exec(markdown)) !== null) { - checkCancelled(cancellationToken) - if (match.index > lastIndex) { - const text = markdown.slice(lastIndex, match.index) - if (text) parts.push({ type: "text", text }) - } + while ((match = regex.exec(markdown)) !== null) { + checkCancelled(cancellationToken); + if (match.index > lastIndex) { + const text = markdown.slice(lastIndex, match.index); + if (text) parts.push({ type: "text", text }); + } - const { alt, imageUrl } = match.groups + const { alt, imageUrl } = match.groups; - let data: string - let mimeType: string - const isDataUri = /^datauri:\/\//.test(imageUrl) - if (isDataUri) { - // TODO - } else if (HTTP_OR_S_REGEX.test(imageUrl)) { - // TODO - } else if (/^\./.test(imageUrl)) { - dbg(`local image: %s`, imageUrl) - if (convertToDataUri) { - const filename = resolve(join(dir, imageUrl)) - dbg(`local file: %s`, filename) - try { - const res = await resolveFileDataUri(filename, options) - data = res.data - mimeType = res.mimeType - } catch (err) { - dbg(`%O`, err) - } - } - } - if (data && mimeType) { - parts.push({ type: "image", data, mimeType }) - } else { - const lastPart = parts.at(-1) - if (lastPart?.type === "text") lastPart.text += match[0] - else parts.push({ type: "text", text: match[0] }) + let data: string; + let mimeType: string; + const isDataUri = /^datauri:\/\//.test(imageUrl); + if (isDataUri) { + // TODO + } else if (HTTP_OR_S_REGEX.test(imageUrl)) { + // TODO + } else if (/^\./.test(imageUrl)) { + dbg(`local image: %s`, imageUrl); + if (convertToDataUri) { + const filename = resolve(join(dir, imageUrl)); + dbg(`local file: %s`, filename); + try { + const res = await resolveFileDataUri(filename, options); + data = res.data; + mimeType = res.mimeType; + } catch (err) { + dbg(`%O`, err); } - lastIndex = regex.lastIndex + } } - if (lastIndex < markdown.length) { - const text = markdown.slice(lastIndex) - if (text) parts.push({ type: "text", text }) + if (data && mimeType) { + parts.push({ type: "image", data, mimeType }); + } else { + const lastPart = parts.at(-1); + if (lastPart?.type === "text") lastPart.text += match[0]; + else parts.push({ type: "text", text: match[0] }); } - return parts + lastIndex = regex.lastIndex; + } + if (lastIndex < markdown.length) { + const text = markdown.slice(lastIndex); + if (text) parts.push({ type: "text", text }); + } + return parts; } diff --git a/packages/core/src/math.test.ts b/packages/core/src/math.test.ts index 660fb2e2d8..1a442a2a95 100644 --- a/packages/core/src/math.test.ts +++ b/packages/core/src/math.test.ts @@ -1,39 +1,39 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { MathTryEvaluate } from "./math" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { MathTryEvaluate } from "./math"; describe("MathTryEvaluate", async () => { - await test("evaluates a simple expression", async () => { - const result = await MathTryEvaluate("1 + 1") - assert.equal(result, 2) - }) + await test("evaluates a simple expression", async () => { + const result = await MathTryEvaluate("1 + 1"); + assert.equal(result, 2); + }); - await test("evaluates an expression with variables from scope", async () => { - const result = await MathTryEvaluate("x + y", { - scope: { x: 5, y: 3 }, - }) - assert.equal(result, 8) - }) + await test("evaluates an expression with variables from scope", async () => { + const result = await MathTryEvaluate("x + y", { + scope: { x: 5, y: 3 }, + }); + assert.equal(result, 8); + }); - await test("returns defaultValue for empty expression", async () => { - const result = await MathTryEvaluate("", { - defaultValue: 42, - }) - assert.equal(result, 42) - }) + await test("returns defaultValue for empty expression", async () => { + const result = await MathTryEvaluate("", { + defaultValue: 42, + }); + assert.equal(result, 42); + }); - await test("returns undefined for invalid expression", async () => { - const result = await MathTryEvaluate("1 +") - assert.equal(result, undefined) - }) + await test("returns undefined for invalid expression", async () => { + const result = await MathTryEvaluate("1 +"); + assert.equal(result, undefined); + }); - await test("returns undefined for expression with undefined variables", async () => { - const result = await MathTryEvaluate("x + y") - assert.equal(result, undefined) - }) + await test("returns undefined for expression with undefined variables", async () => { + const result = await MathTryEvaluate("x + y"); + assert.equal(result, undefined); + }); - await test("handles complex expressions", async () => { - const result = await MathTryEvaluate("sin(PI/2)") - assert.equal(result, 1) - }) -}) + await test("handles complex expressions", async () => { + const result = await MathTryEvaluate("sin(PI/2)"); + assert.equal(result, 1); + }); +}); diff --git a/packages/core/src/math.ts b/packages/core/src/math.ts index e691c4d796..869ac398fd 100644 --- a/packages/core/src/math.ts +++ b/packages/core/src/math.ts @@ -1,5 +1,5 @@ // Importing TraceOptions from the local "trace" module -import { TraceOptions } from "./trace" +import { TraceOptions } from "./trace"; /** * Asynchronously evaluates a mathematical expression. @@ -15,27 +15,27 @@ import { TraceOptions } from "./trace" * - undefined if evaluation fails */ export async function MathTryEvaluate( - expr: string, - options?: { scope?: object; defaultValue?: number } & TraceOptions + expr: string, + options?: { scope?: object; defaultValue?: number } & TraceOptions, ): Promise { - // Destructuring options with defaults - const { trace, defaultValue, scope = {} } = options || {} + // Destructuring options with defaults + const { trace, defaultValue, scope = {} } = options || {}; - try { - // Return defaultValue if expression is empty - if (!expr) return defaultValue + try { + // Return defaultValue if expression is empty + if (!expr) return defaultValue; - // Dynamically import the 'evaluate' function from 'mathjs' - const { evaluate } = await import("mathjs") + // Dynamically import the 'evaluate' function from 'mathjs' + const { evaluate } = await import("mathjs"); - // Evaluate the expression and return the result - const res = evaluate(expr, scope) - return res - } catch (e) { - // Log an error if tracing is enabled - trace?.error(e) + // Evaluate the expression and return the result + const res = evaluate(expr, scope); + return res; + } catch (e) { + // Log an error if tracing is enabled + trace?.error(e); - // Return undefined if evaluation fails - return undefined - } + // Return undefined if evaluation fails + return undefined; + } } diff --git a/packages/core/src/mcpclient.ts b/packages/core/src/mcpclient.ts index c8616b1189..cc685fca1f 100644 --- a/packages/core/src/mcpclient.ts +++ b/packages/core/src/mcpclient.ts @@ -1,376 +1,342 @@ -import { TraceOptions } from "./trace" -import { arrayify, logError, logVerbose } from "./util" +import { TraceOptions } from "./trace"; +import { arrayify, logError, logVerbose } from "./util"; import type { - TextContent, - ImageContent, - EmbeddedResource, -} from "@modelcontextprotocol/sdk/types.js" -import { errorMessage } from "./error" -import { CancellationOptions, toSignal } from "./cancellation" -import type { ProgressCallback } from "@modelcontextprotocol/sdk/shared/protocol.js" -import { deleteUndefinedValues } from "./cleaners" -import { hash } from "./crypto" -import { fileWriteCachedJSON } from "./filecache" -import { dotGenaiscriptPath } from "./workdir" -import { YAMLStringify } from "./yaml" -import { resolvePromptInjectionDetector } from "./contentsafety" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("mcp:client") + TextContent, + ImageContent, + EmbeddedResource, +} from "@modelcontextprotocol/sdk/types.js"; +import { errorMessage } from "./error"; +import { CancellationOptions, toSignal } from "./cancellation"; +import type { ProgressCallback } from "@modelcontextprotocol/sdk/shared/protocol.js"; +import { deleteUndefinedValues } from "./cleaners"; +import { hash } from "./crypto"; +import { fileWriteCachedJSON } from "./filecache"; +import { dotGenaiscriptPath } from "./workdir"; +import { YAMLStringify } from "./yaml"; +import { resolvePromptInjectionDetector } from "./contentsafety"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("mcp:client"); export interface McpClientProxy extends McpClient { - listToolCallbacks(): Promise + listToolCallbacks(): Promise; } function toolResultContentToText(res: any) { - const content = res.content as ( - | TextContent - | ImageContent - | EmbeddedResource - )[] - let text = arrayify(content) - ?.map((c) => { - switch (c.type) { - case "text": - return c.text || "" - case "image": - return c.data - case "resource": - return c.resource?.uri || "" - default: - return c - } - }) - .join("\n") - if (res.isError) { - dbg(`tool error: ${text}`) - text = `Tool Error:\n${text}` - } - return text + const content = res.content as (TextContent | ImageContent | EmbeddedResource)[]; + let text = arrayify(content) + ?.map((c) => { + switch (c.type) { + case "text": + return c.text || ""; + case "image": + return c.data; + case "resource": + return c.resource?.uri || ""; + default: + return c; + } + }) + .join("\n"); + if (res.isError) { + dbg(`tool error: ${text}`); + text = `Tool Error:\n${text}`; + } + return text; } function resolveMcpEnv(_env: Record) { - if (!_env) return _env - const res = structuredClone(_env) - Object.entries(res) - .filter(([k, v]) => v === "") - .forEach(([key, value]) => { - dbg(`filling env var: %s`, key) - res[key] = process.env[key] || "" - }) - return res + if (!_env) return _env; + const res = structuredClone(_env); + Object.entries(res) + .filter(([k, v]) => v === "") + .forEach(([key, value]) => { + dbg(`filling env var: %s`, key); + res[key] = process.env[key] || ""; + }); + return res; } export class McpClientManager extends EventTarget implements AsyncDisposable { - private _clients: McpClientProxy[] = [] - constructor() { - super() - } + private _clients: McpClientProxy[] = []; + constructor() { + super(); + } - async startMcpServer( - serverConfig: McpServerConfig, - options: Required & CancellationOptions - ): Promise { - const { cancellationToken } = options || {} - logVerbose(`mcp: starting ` + serverConfig.id) - const signal = toSignal(cancellationToken) - const { - id, - version = "1.0.0", - toolsSha, - detectPromptInjection, - contentSafety, - tools: _toolsConfig, - generator, - intent, - env: unresolvedEnv, - ...rest - } = serverConfig - const mcpEnv = resolveMcpEnv(unresolvedEnv) - const toolSpecs = arrayify(_toolsConfig).map(toMcpToolSpecification) - const commonToolOptions = deleteUndefinedValues({ - contentSafety, - detectPromptInjection, - intent, - }) satisfies DefToolOptions - // genaiscript:mcp:id - const dbgc = dbg.extend(id) - dbgc(`starting`) - const trace = options.trace.startTraceDetails(`🪚 mcp ${id}`) - try { - const { Client } = await import( - "@modelcontextprotocol/sdk/client/index.js" - ) - const { StdioClientTransport } = await import( - "@modelcontextprotocol/sdk/client/stdio.js" - ) - const progress: (msg: string) => ProgressCallback = (msg) => (ev) => - dbgc(msg + " ", `${ev.progress || ""}/${ev.total || ""}`) - const capabilities = { tools: {} } - dbgc( - `creating transport %O`, - deleteUndefinedValues({ - ...rest, - env: mcpEnv ? Object.keys(mcpEnv) : undefined, - }) - ) - let transport = new StdioClientTransport( - deleteUndefinedValues({ - ...rest, - env: mcpEnv, - stderr: "inherit", - }) - ) - let client = new Client({ name: id, version }, { capabilities }) - dbgc(`connecting stdio transport`) - await client.connect(transport) + async startMcpServer( + serverConfig: McpServerConfig, + options: Required & CancellationOptions, + ): Promise { + const { cancellationToken } = options || {}; + logVerbose(`mcp: starting ` + serverConfig.id); + const signal = toSignal(cancellationToken); + const { + id, + version = "1.0.0", + toolsSha, + detectPromptInjection, + contentSafety, + tools: _toolsConfig, + generator, + intent, + env: unresolvedEnv, + ...rest + } = serverConfig; + const mcpEnv = resolveMcpEnv(unresolvedEnv); + const toolSpecs = arrayify(_toolsConfig).map(toMcpToolSpecification); + const commonToolOptions = deleteUndefinedValues({ + contentSafety, + detectPromptInjection, + intent, + }) satisfies DefToolOptions; + // genaiscript:mcp:id + const dbgc = dbg.extend(id); + dbgc(`starting`); + const trace = options.trace.startTraceDetails(`🪚 mcp ${id}`); + try { + const { Client } = await import("@modelcontextprotocol/sdk/client/index.js"); + const { StdioClientTransport } = await import("@modelcontextprotocol/sdk/client/stdio.js"); + const progress: (msg: string) => ProgressCallback = (msg) => (ev) => + dbgc(msg + " ", `${ev.progress || ""}/${ev.total || ""}`); + const capabilities = { tools: {} }; + dbgc( + `creating transport %O`, + deleteUndefinedValues({ + ...rest, + env: mcpEnv ? Object.keys(mcpEnv) : undefined, + }), + ); + let transport = new StdioClientTransport( + deleteUndefinedValues({ + ...rest, + env: mcpEnv, + stderr: "inherit", + }), + ); + let client = new Client({ name: id, version }, { capabilities }); + dbgc(`connecting stdio transport`); + await client.connect(transport); - const ping: McpClient["ping"] = async () => { - dbgc(`ping`) - await client.ping({ signal }) - } - const listTools: McpClient["listTools"] = async () => { - dbgc(`listing tools`) - const { tools } = await client.listTools( - {}, - { signal, onprogress: progress("list tools") } - ) - return tools.map( - (t) => - ({ - name: t.name, - description: t.description, - inputSchema: t.inputSchema as any, - }) satisfies McpToolReference - ) - } - const listToolCallbacks: McpClientProxy["listToolCallbacks"] = - async () => { - // list tools - dbgc(`listing tools`) - let { tools: toolDefinitions } = await client.listTools( - {}, - { signal, onprogress: progress("list tools") } - ) - trace.fence( - toolDefinitions.map(({ name, description }) => ({ - name, - description, - })), - "json" - ) - const toolsFile = await fileWriteCachedJSON( - dotGenaiscriptPath("mcp", id, "tools"), - toolDefinitions - ) + const ping: McpClient["ping"] = async () => { + dbgc(`ping`); + await client.ping({ signal }); + }; + const listTools: McpClient["listTools"] = async () => { + dbgc(`listing tools`); + const { tools } = await client.listTools( + {}, + { signal, onprogress: progress("list tools") }, + ); + return tools.map( + (t) => + ({ + name: t.name, + description: t.description, + inputSchema: t.inputSchema as any, + }) satisfies McpToolReference, + ); + }; + const listToolCallbacks: McpClientProxy["listToolCallbacks"] = async () => { + // list tools + dbgc(`listing tools`); + let { tools: toolDefinitions } = await client.listTools( + {}, + { signal, onprogress: progress("list tools") }, + ); + trace.fence( + toolDefinitions.map(({ name, description }) => ({ + name, + description, + })), + "json", + ); + const toolsFile = await fileWriteCachedJSON( + dotGenaiscriptPath("mcp", id, "tools"), + toolDefinitions, + ); - logVerbose(`mcp ${id}: tools: ${toolsFile}`) + logVerbose(`mcp ${id}: tools: ${toolsFile}`); - // apply filter - if (toolSpecs.length > 0) { - dbg(`filtering tools`) - trace.fence(toolSpecs, "json") - toolDefinitions = toolDefinitions.filter((tool) => - toolSpecs.some((s) => s.id === tool.name) - ) - dbg( - `filtered tools: %d`, - toolDefinitions.map((t) => t.name).join(", ") - ) - } + // apply filter + if (toolSpecs.length > 0) { + dbg(`filtering tools`); + trace.fence(toolSpecs, "json"); + toolDefinitions = toolDefinitions.filter((tool) => + toolSpecs.some((s) => s.id === tool.name), + ); + dbg(`filtered tools: %d`, toolDefinitions.map((t) => t.name).join(", ")); + } - const sha = await hash(JSON.stringify(toolDefinitions)) - trace.itemValue("tools sha", sha) - logVerbose(`mcp ${id}: tools sha: ${sha}`) - if (toolsSha !== undefined) { - if (sha === toolsSha) - logVerbose( - `mcp ${id}: tools signature validated successfully` - ) - else { - logError( - `mcp ${id}: tools signature changed, please review the tools and update 'toolsSha' in the mcp server configuration.` - ) - throw new Error(`mcp ${id} tools signature changed`) - } - } + const sha = await hash(JSON.stringify(toolDefinitions)); + trace.itemValue("tools sha", sha); + logVerbose(`mcp ${id}: tools sha: ${sha}`); + if (toolsSha !== undefined) { + if (sha === toolsSha) logVerbose(`mcp ${id}: tools signature validated successfully`); + else { + logError( + `mcp ${id}: tools signature changed, please review the tools and update 'toolsSha' in the mcp server configuration.`, + ); + throw new Error(`mcp ${id} tools signature changed`); + } + } - if (detectPromptInjection) { - const detector = await resolvePromptInjectionDetector( - serverConfig, - { - trace, - cancellationToken, - } - ) - const result = await detector( - YAMLStringify(toolDefinitions) - ) - if (result.attackDetected) { - dbgc("%O", result) - throw new Error( - `mcp ${id}: prompt injection detected in tools` - ) - } - } + if (detectPromptInjection) { + const detector = await resolvePromptInjectionDetector(serverConfig, { + trace, + cancellationToken, + }); + const result = await detector(YAMLStringify(toolDefinitions)); + if (result.attackDetected) { + dbgc("%O", result); + throw new Error(`mcp ${id}: prompt injection detected in tools`); + } + } - const tools = toolDefinitions.map( - ({ name, description, inputSchema }) => { - const toolSpec = toolSpecs.find( - ({ id }) => id === name - ) - const toolOptions = { - ...commonToolOptions, - ...(toolSpec || {}), - } satisfies DefToolOptions - return { - spec: { - name: `${id}_${name}`, - description, - parameters: inputSchema as any, - }, - options: toolOptions, - generator, - impl: async (args: any) => { - const { context, ...restArgs } = args - const res = await client.callTool( - { - name: name, - arguments: restArgs, - }, - undefined, - { - signal, - onprogress: progress( - `tool call ${name} ` - ), - } - ) - const text = - res?.text || - (res?.content as { text?: string }[]) - ?.map((c) => c?.text) - .filter(Boolean) - .join("\n") || - "" - return text - }, - } satisfies ToolCallback - } - ) - dbgc( - `tools (imported): %O`, - tools.map((t) => t.spec) - ) + const tools = toolDefinitions.map(({ name, description, inputSchema }) => { + const toolSpec = toolSpecs.find(({ id }) => id === name); + const toolOptions = { + ...commonToolOptions, + ...(toolSpec || {}), + } satisfies DefToolOptions; + return { + spec: { + name: `${id}_${name}`, + description, + parameters: inputSchema as any, + }, + options: toolOptions, + generator, + impl: async (args: any) => { + const { context, ...restArgs } = args; + const res = await client.callTool( + { + name: name, + arguments: restArgs, + }, + undefined, + { + signal, + onprogress: progress(`tool call ${name} `), + }, + ); + const text = + res?.text || + (res?.content as { text?: string }[]) + ?.map((c) => c?.text) + .filter(Boolean) + .join("\n") || + ""; + return text; + }, + } satisfies ToolCallback; + }); + dbgc( + `tools (imported): %O`, + tools.map((t) => t.spec), + ); - return tools - } - const readResource: McpClient["readResource"] = async ( - uri: string - ) => { - dbgc(`read resource ${uri}`) - const res = await client.readResource({ uri }) - const contents = res.contents - return contents?.map((content) => - deleteUndefinedValues({ - content: content.text - ? String(content.text) - : content.blob - ? Buffer.from(content.blob as any).toString( - "base64" - ) - : undefined, - encoding: content.blob ? "base64" : undefined, - filename: content.uri, - type: content.mimeType, - } satisfies WorkspaceFile) - ) - } - const listResources: McpClient["listResources"] = async () => { - const { resources } = await client.listResources( - {}, - { signal, onprogress: progress("list resources") } - ) - return resources.map((r) => ({ - name: r.name, - description: r.description, - uri: r.uri, - mimeType: r.mimeType, - })) - } + return tools; + }; + const readResource: McpClient["readResource"] = async (uri: string) => { + dbgc(`read resource ${uri}`); + const res = await client.readResource({ uri }); + const contents = res.contents; + return contents?.map((content) => + deleteUndefinedValues({ + content: content.text + ? String(content.text) + : content.blob + ? Buffer.from(content.blob as any).toString("base64") + : undefined, + encoding: content.blob ? "base64" : undefined, + filename: content.uri, + type: content.mimeType, + } satisfies WorkspaceFile), + ); + }; + const listResources: McpClient["listResources"] = async () => { + const { resources } = await client.listResources( + {}, + { signal, onprogress: progress("list resources") }, + ); + return resources.map((r) => ({ + name: r.name, + description: r.description, + uri: r.uri, + mimeType: r.mimeType, + })); + }; - const dispose = async () => { - dbgc(`disposing`) - const i = this._clients.indexOf(res) - if (i >= 0) this._clients.splice(i, 1) - try { - await client.close() - client = undefined - } catch (err) { - dbgc(`error closing client: ${errorMessage(err)}`) - } - try { - await transport.close() - transport = undefined - } catch (err) { - dbgc(`error closing transport: ${errorMessage(err)}`) - } - } + const dispose = async () => { + dbgc(`disposing`); + const i = this._clients.indexOf(res); + if (i >= 0) this._clients.splice(i, 1); + try { + await client.close(); + client = undefined; + } catch (err) { + dbgc(`error closing client: ${errorMessage(err)}`); + } + try { + await transport.close(); + transport = undefined; + } catch (err) { + dbgc(`error closing transport: ${errorMessage(err)}`); + } + }; - const callTool: McpClient["callTool"] = async (toolId, args) => { - const responseSchema: JSONSchema = undefined - const callRes = await client.callTool( - { - name: toolId, - arguments: args, - }, - responseSchema as any, - { - signal, - onprogress: progress(`tool call ${toolId} `), - } - ) - return deleteUndefinedValues({ - isError: callRes.isError as boolean, - content: callRes.content as McpServerToolResultPart[], - text: toolResultContentToText(callRes), - } satisfies McpServerToolResult) - } + const callTool: McpClient["callTool"] = async (toolId, args) => { + const responseSchema: JSONSchema = undefined; + const callRes = await client.callTool( + { + name: toolId, + arguments: args, + }, + responseSchema as any, + { + signal, + onprogress: progress(`tool call ${toolId} `), + }, + ); + return deleteUndefinedValues({ + isError: callRes.isError as boolean, + content: callRes.content as McpServerToolResultPart[], + text: toolResultContentToText(callRes), + } satisfies McpServerToolResult); + }; - const res = Object.freeze({ - config: Object.freeze({ ...serverConfig }), - ping, - listTools, - listToolCallbacks, - callTool, - listResources, - readResource, - dispose, - [Symbol.asyncDispose]: dispose, - } satisfies McpClientProxy) - this._clients.push(res) - return res - } finally { - trace.endDetails() - } + const res = Object.freeze({ + config: Object.freeze({ ...serverConfig }), + ping, + listTools, + listToolCallbacks, + callTool, + listResources, + readResource, + dispose, + [Symbol.asyncDispose]: dispose, + } satisfies McpClientProxy); + this._clients.push(res); + return res; + } finally { + trace.endDetails(); } + } - get clients(): McpClientProxy[] { - return this._clients.slice(0) - } + get clients(): McpClientProxy[] { + return this._clients.slice(0); + } - async dispose() { - const clients = this._clients.slice(0) - for (const client of clients) { - await client.dispose() - } + async dispose() { + const clients = this._clients.slice(0); + for (const client of clients) { + await client.dispose(); } + } - async [Symbol.asyncDispose](): Promise {} + async [Symbol.asyncDispose](): Promise {} } -function toMcpToolSpecification( - spec: string | McpToolSpecification -): McpToolSpecification { - if (typeof spec === "string") return { id: spec } - else return spec +function toMcpToolSpecification(spec: string | McpToolSpecification): McpToolSpecification { + if (typeof spec === "string") return { id: spec }; + else return spec; } diff --git a/packages/core/src/mcpresource.ts b/packages/core/src/mcpresource.ts index b11426affc..f11b9b4c30 100644 --- a/packages/core/src/mcpresource.ts +++ b/packages/core/src/mcpresource.ts @@ -1,167 +1,161 @@ -import { resolveBufferLike } from "./bufferlike" -import { CHANGE, MCP_RESOURCE_PROTOCOL, RESOURCE_CHANGE } from "./constants" -import debug from "debug" -import { fileTypeFromBuffer } from "./filetype" -import { TraceOptions } from "./trace" -import { hash } from "./crypto" -import { resolveFileContent } from "./file" -import { redactSecrets } from "./secretscanner" -const dbg = debug("genaiscript:resource") +import { resolveBufferLike } from "./bufferlike"; +import { CHANGE, MCP_RESOURCE_PROTOCOL, RESOURCE_CHANGE } from "./constants"; +import debug from "debug"; +import { fileTypeFromBuffer } from "./filetype"; +import { TraceOptions } from "./trace"; +import { hash } from "./crypto"; +import { resolveFileContent } from "./file"; +import { redactSecrets } from "./secretscanner"; +const dbg = debug("genaiscript:resource"); export interface ResourceReference { - uri: string // Unique identifier for the resource - name: string // Human-readable name - description?: string // Optional description - mimeType?: string // Optional MIME type + uri: string; // Unique identifier for the resource + name: string; // Human-readable name + description?: string; // Optional description + mimeType?: string; // Optional MIME type } export interface ResourceContent { - uri: string // The URI of the resource - mimeType?: string // Optional MIME type + uri: string; // The URI of the resource + mimeType?: string; // Optional MIME type - // One of: - text?: string // For text resources - blob?: string // For binary resources (base64 encoded) + // One of: + text?: string; // For text resources + blob?: string; // For binary resources (base64 encoded) } export interface ResourceContents { - contents: ResourceContent[] + contents: ResourceContent[]; } export interface Resource { - reference: ResourceReference - content: ResourceContents + reference: ResourceReference; + content: ResourceContents; } export class ResourceManager extends EventTarget { - private _resources: Record = {} - async resources(): Promise { - return Object.values(this._resources).map((r) => r.reference) - } - async readResource(uri: string): Promise { - dbg(`reading resource: ${uri}`) - const resource = this._resources[uri] - return resource?.content - } - async clear() { - this._resources = {} - this.dispatchEvent(new Event(CHANGE)) - } + private _resources: Record = {}; + async resources(): Promise { + return Object.values(this._resources).map((r) => r.reference); + } + async readResource(uri: string): Promise { + dbg(`reading resource: ${uri}`); + const resource = this._resources[uri]; + return resource?.content; + } + async clear() { + this._resources = {}; + this.dispatchEvent(new Event(CHANGE)); + } - async publishResource( - name: string, - body: BufferLike, - options?: Partial> & - TraceOptions & - SecretDetectionOptions - ) { - dbg(`publishing ${typeof body}`) - const res = await createResource(name, body, options) - await this.upsetResource(res.reference, res.content) - const { reference } = res - return reference.uri - } + async publishResource( + name: string, + body: BufferLike, + options?: Partial> & TraceOptions & SecretDetectionOptions, + ) { + dbg(`publishing ${typeof body}`); + const res = await createResource(name, body, options); + await this.upsetResource(res.reference, res.content); + const { reference } = res; + return reference.uri; + } - async upsetResource( - reference: ResourceReference, - content: ResourceContents | undefined - ): Promise { - dbg(`upsert ${reference.uri}`) - if (!reference?.uri) - throw new Error("Resource reference must have a uri") - const current = await hash(this._resources[reference.uri]) - if (!content) delete this._resources[reference.uri] - else this._resources[reference.uri] = { reference, content } - const update = await hash(this._resources[reference.uri]) - if (current !== update) { - dbg(`resource changed: ${reference.uri}`) - this.dispatchEvent( - new CustomEvent(RESOURCE_CHANGE, { - detail: { - reference, - content, - }, - }) - ) - } - this.dispatchEvent(new Event(CHANGE)) + async upsetResource( + reference: ResourceReference, + content: ResourceContents | undefined, + ): Promise { + dbg(`upsert ${reference.uri}`); + if (!reference?.uri) throw new Error("Resource reference must have a uri"); + const current = await hash(this._resources[reference.uri]); + if (!content) delete this._resources[reference.uri]; + else this._resources[reference.uri] = { reference, content }; + const update = await hash(this._resources[reference.uri]); + if (current !== update) { + dbg(`resource changed: ${reference.uri}`); + this.dispatchEvent( + new CustomEvent(RESOURCE_CHANGE, { + detail: { + reference, + content, + }, + }), + ); } + this.dispatchEvent(new Event(CHANGE)); + } } async function createResource( - name: string, - body: BufferLike, - options?: Partial> & - TraceOptions & - SecretDetectionOptions + name: string, + body: BufferLike, + options?: Partial> & TraceOptions & SecretDetectionOptions, ): Promise<{ reference: ResourceReference; content: ResourceContents }> { - const { description } = options || {} - if (!name) throw new Error("Resource name is required") - const content = await resolveResourceContents(body, options) - if (!content.uri) { - content.uri = `${MCP_RESOURCE_PROTOCOL}://resources/${await hash( - JSON.stringify(content), - { length: 32 } - )}` - } - const reference: ResourceReference = { - name, - description, - uri: content.uri, // may be undefined - mimeType: content.mimeType, - } - return { - reference, - content: { contents: [content] }, - } + const { description } = options || {}; + if (!name) throw new Error("Resource name is required"); + const content = await resolveResourceContents(body, options); + if (!content.uri) { + content.uri = `${MCP_RESOURCE_PROTOCOL}://resources/${await hash(JSON.stringify(content), { + length: 32, + })}`; + } + const reference: ResourceReference = { + name, + description, + uri: content.uri, // may be undefined + mimeType: content.mimeType, + }; + return { + reference, + content: { contents: [content] }, + }; } async function resolveResourceContents( - body: BufferLike, - options?: Partial & TraceOptions & SecretDetectionOptions + body: BufferLike, + options?: Partial & TraceOptions & SecretDetectionOptions, ): Promise { - const { trace, uri, mimeType, secretScanning } = options || {} - if (typeof body === "string") { - if (secretScanning !== false) { - const redacted = await redactSecrets(body, { trace }) - body = redacted.text - } - return { - uri, - mimeType: mimeType || "text/plain", - text: body, - } - } else if ( - typeof body === "object" && - ((body as WorkspaceFile).content || (body as WorkspaceFile).filename) - ) { - const file = body as WorkspaceFile - await resolveFileContent(file, options) - if (file.encoding) - return { - uri: uri || file.filename, - mimeType: file.type || "application/octet-stream", - blob: file.content, - } - else { - if (secretScanning !== false) { - const redacted = await redactSecrets(file.content, { trace }) - file.content = redacted.text - } + const { trace, uri, mimeType, secretScanning } = options || {}; + if (typeof body === "string") { + if (secretScanning !== false) { + const redacted = await redactSecrets(body, { trace }); + body = redacted.text; + } + return { + uri, + mimeType: mimeType || "text/plain", + text: body, + }; + } else if ( + typeof body === "object" && + ((body as WorkspaceFile).content || (body as WorkspaceFile).filename) + ) { + const file = body as WorkspaceFile; + await resolveFileContent(file, options); + if (file.encoding) + return { + uri: uri || file.filename, + mimeType: file.type || "application/octet-stream", + blob: file.content, + }; + else { + if (secretScanning !== false) { + const redacted = await redactSecrets(file.content, { trace }); + file.content = redacted.text; + } - return { - uri: uri || file.filename, - mimeType: file.type || "text/plain", - text: file.content, - } - } - } else { - const bytes = await resolveBufferLike(body as BufferLike, options) - const mime = await fileTypeFromBuffer(bytes) - return { - uri: uri, - mimeType: mimeType || mime?.mime || "application/octet-stream", - blob: bytes.toString("base64"), - } + return { + uri: uri || file.filename, + mimeType: file.type || "text/plain", + text: file.content, + }; } + } else { + const bytes = await resolveBufferLike(body as BufferLike, options); + const mime = await fileTypeFromBuffer(bytes); + return { + uri: uri, + mimeType: mimeType || mime?.mime || "application/octet-stream", + blob: bytes.toString("base64"), + }; + } } diff --git a/packages/core/src/mdchunk.test.ts b/packages/core/src/mdchunk.test.ts index b8c29366ae..f5b17f4ba4 100644 --- a/packages/core/src/mdchunk.test.ts +++ b/packages/core/src/mdchunk.test.ts @@ -1,127 +1,124 @@ -import { chunkMarkdown } from "./mdchunk" -import { beforeEach, describe, test } from "node:test" -import assert from "node:assert" -import { glob } from "glob" -import { readFile } from "node:fs/promises" -import { DOCXTryParse } from "./docx" -import { TestHost } from "./testhost" +import { chunkMarkdown } from "./mdchunk"; +import { beforeEach, describe, test } from "node:test"; +import assert from "node:assert"; +import { glob } from "glob"; +import { readFile } from "node:fs/promises"; +import { DOCXTryParse } from "./docx"; +import { TestHost } from "./testhost"; describe(`chunkMarkdown`, async () => { - const estimateTokens = (text: string) => text.split(/\s+/).length - beforeEach(() => { - TestHost.install() - }) - - test(`handles empty markdown string`, async () => { - const markdown = `` - const result = await chunkMarkdown(markdown, estimateTokens) - assert.strictEqual(result.map((r) => r.content).join("\n"), markdown) - - assert.deepStrictEqual(result, []) - }) - - test(`chunks markdown with single heading`, async () => { - const markdown = `# Heading 1 -Content under heading 1` - const result = await chunkMarkdown(markdown, estimateTokens, { - maxTokens: 10, - }) - assert.strictEqual(result.map((r) => r.content).join("\n"), markdown) - - assert.deepStrictEqual( - result.map((r) => r.content), - [`# Heading 1\nContent under heading 1`] - ) - }) - - test(`chunks markdown with multiple headings`, async () => { - const markdown = `# Heading 1 + const estimateTokens = (text: string) => text.split(/\s+/).length; + beforeEach(() => { + TestHost.install(); + }); + + test(`handles empty markdown string`, async () => { + const markdown = ``; + const result = await chunkMarkdown(markdown, estimateTokens); + assert.strictEqual(result.map((r) => r.content).join("\n"), markdown); + + assert.deepStrictEqual(result, []); + }); + + test(`chunks markdown with single heading`, async () => { + const markdown = `# Heading 1 +Content under heading 1`; + const result = await chunkMarkdown(markdown, estimateTokens, { + maxTokens: 10, + }); + assert.strictEqual(result.map((r) => r.content).join("\n"), markdown); + + assert.deepStrictEqual( + result.map((r) => r.content), + [`# Heading 1\nContent under heading 1`], + ); + }); + + test(`chunks markdown with multiple headings`, async () => { + const markdown = `# Heading 1 Content under heading 1 Content under heading 1.1 Content under heading 1.2 ## Heading 2 -Content under heading 2` - const result = await chunkMarkdown(markdown, estimateTokens, { - maxTokens: 10, - }) - assert.strictEqual(result.map((r) => r.content).join("\n"), markdown) - - assert.deepStrictEqual( - result.map((r) => r.content), - [ - `# Heading 1 +Content under heading 2`; + const result = await chunkMarkdown(markdown, estimateTokens, { + maxTokens: 10, + }); + assert.strictEqual(result.map((r) => r.content).join("\n"), markdown); + + assert.deepStrictEqual( + result.map((r) => r.content), + [ + `# Heading 1 Content under heading 1 Content under heading 1.1 Content under heading 1.2`, - `## Heading 2 + `## Heading 2 Content under heading 2`, - ] - ) - }) + ], + ); + }); - test(`chunks markdown with nested headings`, async () => { - const markdown = `# Heading 1 + test(`chunks markdown with nested headings`, async () => { + const markdown = `# Heading 1 Content under heading 1 abracadabra ## Heading 2 Content under heading 2 abracadabra ### Heading 3 -Content under heading 3 abracadabra` - const result = await chunkMarkdown(markdown, estimateTokens, { - maxTokens: 5, - }) - assert.strictEqual(result.map((r) => r.content).join("\n"), markdown) - - assert.deepStrictEqual( - result.map((r) => r.content), - [ - `# Heading 1 +Content under heading 3 abracadabra`; + const result = await chunkMarkdown(markdown, estimateTokens, { + maxTokens: 5, + }); + assert.strictEqual(result.map((r) => r.content).join("\n"), markdown); + + assert.deepStrictEqual( + result.map((r) => r.content), + [ + `# Heading 1 Content under heading 1 abracadabra`, - `## Heading 2 + `## Heading 2 Content under heading 2 abracadabra`, - `### Heading 3 + `### Heading 3 Content under heading 3 abracadabra`, - ] - ) - }) - - test(`chunks markdown with large content`, async () => { - const markdown = - `# Heading 1\n` + - `Content `.repeat(100) + - `\n## Heading 2\n` + - `Content `.repeat(100) - const result = await chunkMarkdown(markdown, estimateTokens, { - maxTokens: 50, - }) - assert.strictEqual(result.map((r) => r.content).join("\n"), markdown) - - assert(result.length > 1) - }) - - test(`chunks markdown with backtracking`, async () => { - const markdown = `# Heading 1 + ], + ); + }); + + test(`chunks markdown with large content`, async () => { + const markdown = + `# Heading 1\n` + `Content `.repeat(100) + `\n## Heading 2\n` + `Content `.repeat(100); + const result = await chunkMarkdown(markdown, estimateTokens, { + maxTokens: 50, + }); + assert.strictEqual(result.map((r) => r.content).join("\n"), markdown); + + assert(result.length > 1); + }); + + test(`chunks markdown with backtracking`, async () => { + const markdown = `# Heading 1 Content under heading 1 ## Heading 2 Content under heading 2 ### Heading 3 -Content under heading 3` - const result = await chunkMarkdown(markdown, estimateTokens, { - maxTokens: 5, - }) - assert.strictEqual(result.map((r) => r.content).join("\n"), markdown) - - assert.deepStrictEqual( - result.map((r) => r.content), - [ - `# Heading 1\nContent under heading 1`, - `## Heading 2\nContent under heading 2`, - `### Heading 3\nContent under heading 3`, - ] - ) - }) - - test(`chunks markdown with large sections`, async () => { - const markdown = ` +Content under heading 3`; + const result = await chunkMarkdown(markdown, estimateTokens, { + maxTokens: 5, + }); + assert.strictEqual(result.map((r) => r.content).join("\n"), markdown); + + assert.deepStrictEqual( + result.map((r) => r.content), + [ + `# Heading 1\nContent under heading 1`, + `## Heading 2\nContent under heading 2`, + `### Heading 3\nContent under heading 3`, + ], + ); + }); + + test(`chunks markdown with large sections`, async () => { + const markdown = ` # markdown What is Markdown? @@ -151,68 +148,56 @@ Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots ### Where can I get some? There are many variations of passages of Lorem Ipsum available, but the majority have suffered alteration in some form, by injected humour, or randomised words which don't look even slightly believable. If you are going to use a passage of Lorem Ipsum, you need to be sure there isn't anything embarrassing hidden in the middle of text. All the Lorem Ipsum generators on the Internet tend to repeat predefined chunks as necessary, making this the first true generator on the Internet. It uses a dictionary of over 200 Latin words, combined with a handful of model sentence structures, to generate Lorem Ipsum which looks reasonable. The generated Lorem Ipsum is therefore always free from repetition, injected humour, or non-characteristic words. - ` - for (let i = 0; i < 70; ++i) { - const maxTokens = i * 10 - const result = await chunkMarkdown(markdown, estimateTokens, { - maxTokens, - }) - //console.log(`${maxTokens} => ${result.length}`) - assert.strictEqual( - result.map((r) => r.content).join("\n"), - markdown - ) - } - }) - - const docs = await glob("../../docs/src/content/**/*.md*") - for (const doc of docs) { - await test(`docs: chunks markdown from ${doc}`, async () => { - const markdown = await readFile(doc, { encoding: "utf-8" }) - assert(markdown) - for (let i = 0; i < 12; ++i) { - const maxTokens = 1 << i - const result = await chunkMarkdown(markdown, estimateTokens, { - maxTokens, - }) - // console.log(`${maxTokens} => ${result.length}`) - assert.strictEqual( - result.map((r) => r.content).join("\n"), - markdown - ) - } - }) + `; + for (let i = 0; i < 70; ++i) { + const maxTokens = i * 10; + const result = await chunkMarkdown(markdown, estimateTokens, { + maxTokens, + }); + //console.log(`${maxTokens} => ${result.length}`) + assert.strictEqual(result.map((r) => r.content).join("\n"), markdown); } - - await test(`word: chunks markdown from docx`, async () => { - const { file } = await DOCXTryParse( - "../../packages/sample/src/rag/Document.docx", - { - format: "markdown", - } - ) - const markdown = file.content - assert(markdown) - for (let i = 0; i < 12; ++i) { - const result = await chunkMarkdown(markdown, estimateTokens, { - maxTokens: 1 << i, - }) - assert.strictEqual( - result.map((r) => r.content).join("\n"), - markdown - ) - } - }) - - await test(`chunk genaiscript/llms-full.txt`, async () => { - const markdown = await ( - await fetch("https://microsoft.github.io/genaiscript/llms-full.txt") - ).text() - for (let i = 0; i < 12; ++i) { - const result = await chunkMarkdown(markdown, estimateTokens, { - maxTokens: 1 << i, - }) - console.debug(`llms-full ${i} => ${result.length}`) - } - }) -}) + }); + + const docs = await glob("../../docs/src/content/**/*.md*"); + for (const doc of docs) { + await test(`docs: chunks markdown from ${doc}`, async () => { + const markdown = await readFile(doc, { encoding: "utf-8" }); + assert(markdown); + for (let i = 0; i < 12; ++i) { + const maxTokens = 1 << i; + const result = await chunkMarkdown(markdown, estimateTokens, { + maxTokens, + }); + // console.log(`${maxTokens} => ${result.length}`) + assert.strictEqual(result.map((r) => r.content).join("\n"), markdown); + } + }); + } + + await test(`word: chunks markdown from docx`, async () => { + const { file } = await DOCXTryParse("../../packages/sample/src/rag/Document.docx", { + format: "markdown", + }); + const markdown = file.content; + assert(markdown); + for (let i = 0; i < 12; ++i) { + const result = await chunkMarkdown(markdown, estimateTokens, { + maxTokens: 1 << i, + }); + assert.strictEqual(result.map((r) => r.content).join("\n"), markdown); + } + }); + + await test(`chunk genaiscript/llms-full.txt`, async () => { + const markdown = await ( + await fetch("https://microsoft.github.io/genaiscript/llms-full.txt") + ).text(); + for (let i = 0; i < 12; ++i) { + const result = await chunkMarkdown(markdown, estimateTokens, { + maxTokens: 1 << i, + }); + console.debug(`llms-full ${i} => ${result.length}`); + } + }); +}); diff --git a/packages/core/src/mdchunk.ts b/packages/core/src/mdchunk.ts index 6636201ab0..f191575357 100644 --- a/packages/core/src/mdchunk.ts +++ b/packages/core/src/mdchunk.ts @@ -8,123 +8,119 @@ * @returns Array of TextChunk objects representing the chunks, including metadata such as filename and line range. */ export async function chunkMarkdown( - markdown: string | WorkspaceFile, - approximateTokens: (text: string) => number, - options?: { - maxTokens?: number - pageSeparator?: string - } + markdown: string | WorkspaceFile, + approximateTokens: (text: string) => number, + options?: { + maxTokens?: number; + pageSeparator?: string; + }, ): Promise { - const { maxTokens = 4096, pageSeparator = "======" } = options || {} - if (!markdown) return [] + const { maxTokens = 4096, pageSeparator = "======" } = options || {}; + if (!markdown) return []; - type Section = { heading: string; lines: string[]; level: number } + type Section = { heading: string; lines: string[]; level: number }; - const filename = typeof markdown === "object" ? markdown.filename : "" - if (typeof markdown === "object") { - if (markdown.encoding === "base64") - throw new Error("base64 encoding not supported") - markdown = markdown.content - } + const filename = typeof markdown === "object" ? markdown.filename : ""; + if (typeof markdown === "object") { + if (markdown.encoding === "base64") throw new Error("base64 encoding not supported"); + markdown = markdown.content; + } - const lines = markdown.split(/\r?\n/g) + const lines = markdown.split(/\r?\n/g); - const sections: Section[] = [] - let current: Section | null = null + const sections: Section[] = []; + let current: Section | null = null; - lines.forEach((line) => { - if (line.startsWith(pageSeparator)) { - if (current) sections.push(current) - current = null - return - } - const match = /^(\#{1,6})\s+(.*)/.exec(line) - if (match) { - if (current) sections.push(current) - current = { - heading: match[2], - lines: [line], - level: match[1].length, - } - return - } + lines.forEach((line) => { + if (line.startsWith(pageSeparator)) { + if (current) sections.push(current); + current = null; + return; + } + const match = /^(\#{1,6})\s+(.*)/.exec(line); + if (match) { + if (current) sections.push(current); + current = { + heading: match[2], + lines: [line], + level: match[1].length, + }; + return; + } - if (!current) current = { heading: "", lines: [], level: 0 } - current.lines.push(line) - }) - if (current) sections.push(current) + if (!current) current = { heading: "", lines: [], level: 0 }; + current.lines.push(line); + }); + if (current) sections.push(current); - const chunks: string[] = [] - let tempChunk: Section[] = [] - let tokenCount = 0 + const chunks: string[] = []; + let tempChunk: Section[] = []; + let tokenCount = 0; - for (let i = 0; i < sections.length; i++) { - const sectionTokens = sectionTokenCount(sections[i], approximateTokens) + for (let i = 0; i < sections.length; i++) { + const sectionTokens = sectionTokenCount(sections[i], approximateTokens); - if (sectionTokens > maxTokens) { - if (tempChunk.length) { - chunks.push(buildChunk(tempChunk)) - tempChunk = [] - tokenCount = 0 - } - chunks.push(buildChunk([sections[i]])) - continue - } + if (sectionTokens > maxTokens) { + if (tempChunk.length) { + chunks.push(buildChunk(tempChunk)); + tempChunk = []; + tokenCount = 0; + } + chunks.push(buildChunk([sections[i]])); + continue; + } - if (tokenCount + sectionTokens <= maxTokens) { - tempChunk.push(sections[i]) - tokenCount += sectionTokens - } else { - // Instead of discarding, gather removed sections and prepend them to the new chunk - const removedSections: Section[] = [] - let j = i - while ( - j > 0 && - sections[j].level > sections[j - 1].level && - tokenCount + sectionTokens > maxTokens && - tempChunk.length - ) { - const removed = tempChunk.pop() - if (removed) { - removedSections.unshift(removed) - tokenCount -= sectionTokenCount(removed, approximateTokens) - } - j-- - } - // Close off current chunk - if (tempChunk.length) { - chunks.push(buildChunk(tempChunk)) - } - // Start the new chunk with removed and current - tempChunk = [...removedSections, sections[i]] - tokenCount = tempChunk.reduce( - (acc, sec) => acc + sectionTokenCount(sec, approximateTokens), - 0 - ) + if (tokenCount + sectionTokens <= maxTokens) { + tempChunk.push(sections[i]); + tokenCount += sectionTokens; + } else { + // Instead of discarding, gather removed sections and prepend them to the new chunk + const removedSections: Section[] = []; + let j = i; + while ( + j > 0 && + sections[j].level > sections[j - 1].level && + tokenCount + sectionTokens > maxTokens && + tempChunk.length + ) { + const removed = tempChunk.pop(); + if (removed) { + removedSections.unshift(removed); + tokenCount -= sectionTokenCount(removed, approximateTokens); } + j--; + } + // Close off current chunk + if (tempChunk.length) { + chunks.push(buildChunk(tempChunk)); + } + // Start the new chunk with removed and current + tempChunk = [...removedSections, sections[i]]; + tokenCount = tempChunk.reduce( + (acc, sec) => acc + sectionTokenCount(sec, approximateTokens), + 0, + ); } - if (tempChunk.length) chunks.push(buildChunk(tempChunk)) + } + if (tempChunk.length) chunks.push(buildChunk(tempChunk)); - // convert into text chunk - let currentLine = 0 - return chunks.map( - (chunk, i) => - ({ - filename: filename + `#chunk${i}`, - lineStart: currentLine, - lineEnd: (currentLine += chunk.split(/\r?\n/g).length), - content: chunk, - }) satisfies TextChunk - ) + // convert into text chunk + let currentLine = 0; + return chunks.map( + (chunk, i) => + ({ + filename: filename + `#chunk${i}`, + lineStart: currentLine, + lineEnd: (currentLine += chunk.split(/\r?\n/g).length), + content: chunk, + }) satisfies TextChunk, + ); - function sectionTokenCount( - section: { lines: string[] }, - tokenCount: (txt: string) => number - ) { - return section.lines.reduce((acc, line) => acc + tokenCount(line), 0) - } + function sectionTokenCount(section: { lines: string[] }, tokenCount: (txt: string) => number) { + return section.lines.reduce((acc, line) => acc + tokenCount(line), 0); + } - function buildChunk(sections: { lines: string[] }[]) { - return sections.map((s) => s.lines.join("\n")).join("\n") - } + function buildChunk(sections: { lines: string[] }[]) { + return sections.map((s) => s.lines.join("\n")).join("\n"); + } } diff --git a/packages/core/src/mddiff.test.ts b/packages/core/src/mddiff.test.ts index 4ef84b8bbe..a071ad0603 100644 --- a/packages/core/src/mddiff.test.ts +++ b/packages/core/src/mddiff.test.ts @@ -1,32 +1,32 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { markdownDiff } from "./mddiff" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { markdownDiff } from "./mddiff"; describe("markdownDiff", () => { - test("should return fenced code block when oldStr is undefined", () => { - const result = markdownDiff(undefined, "test content", { lang: "ts" }) - assert.equal(result, "\n```ts\ntest content\n```\n") - }) + test("should return fenced code block when oldStr is undefined", () => { + const result = markdownDiff(undefined, "test content", { lang: "ts" }); + assert.equal(result, "\n```ts\ntest content\n```\n"); + }); - test("should handle empty strings", () => { - const result = markdownDiff("", "", { lang: "js" }) - assert.equal(result, "\n```diff\n\n```\n") - }) + test("should handle empty strings", () => { + const result = markdownDiff("", "", { lang: "js" }); + assert.equal(result, "\n```diff\n\n```\n"); + }); - test("should show additions with + prefix", () => { - const result = markdownDiff("line 1", "line 1\nline 2", { lang: "txt" }) - assert.equal(result, "\n```diff\n-line 1+line 1\nline 2\n```\n") - }) + test("should show additions with + prefix", () => { + const result = markdownDiff("line 1", "line 1\nline 2", { lang: "txt" }); + assert.equal(result, "\n```diff\n-line 1+line 1\nline 2\n```\n"); + }); - test("should show removals with - prefix", () => { - const result = markdownDiff("line 1\nline 2", "line 1", { lang: "txt" }) - assert.equal(result, "\n```diff\n-line 1\nline 2+line 1\n```\n") - }) + test("should show removals with - prefix", () => { + const result = markdownDiff("line 1\nline 2", "line 1", { lang: "txt" }); + assert.equal(result, "\n```diff\n-line 1\nline 2+line 1\n```\n"); + }); - test("should handle options.ignoreWhitespace", () => { - const result = markdownDiff("line 1", "line 1", { - ignoreWhitespace: true, - }) - assert.equal(result, "\n```diff\n-line 1+line 1\n```\n") - }) -}) + test("should handle options.ignoreWhitespace", () => { + const result = markdownDiff("line 1", "line 1", { + ignoreWhitespace: true, + }); + assert.equal(result, "\n```diff\n-line 1+line 1\n```\n"); + }); +}); diff --git a/packages/core/src/mddiff.ts b/packages/core/src/mddiff.ts index 192189ab9f..fa1ba832c3 100644 --- a/packages/core/src/mddiff.ts +++ b/packages/core/src/mddiff.ts @@ -1,5 +1,5 @@ -import { diffLines } from "diff" -import { fenceMD } from "./mkmd" +import { diffLines } from "diff"; +import { fenceMD } from "./mkmd"; /** * Generates a markdown-styled diff between two strings. @@ -12,20 +12,18 @@ import { fenceMD } from "./mkmd" * @returns A fenced markdown string representing the diff or the new string if oldStr is undefined. */ export function markdownDiff( - oldStr: string, - newStr: string, - options?: { - lang?: string - ignoreWhitespace?: boolean - } + oldStr: string, + newStr: string, + options?: { + lang?: string; + ignoreWhitespace?: boolean; + }, ) { - const { lang, ...rest } = options || {} + const { lang, ...rest } = options || {}; - if (oldStr === undefined) return fenceMD(newStr, lang) + if (oldStr === undefined) return fenceMD(newStr, lang); - const changes = diffLines(oldStr || "", newStr || "", rest) - const source = changes - .map((c) => `${c.added ? "+" : c.removed ? "-" : " "}${c.value}`) - .join("") - return fenceMD(source, "diff") + const changes = diffLines(oldStr || "", newStr || "", rest); + const source = changes.map((c) => `${c.added ? "+" : c.removed ? "-" : " "}${c.value}`).join(""); + return fenceMD(source, "diff"); } diff --git a/packages/core/src/memcache.ts b/packages/core/src/memcache.ts index e36f836d59..b4388a7592 100644 --- a/packages/core/src/memcache.ts +++ b/packages/core/src/memcache.ts @@ -1,8 +1,8 @@ // Import necessary modules and types -import { CACHE_FORMAT_VERSION, CACHE_SHA_LENGTH, CHANGE } from "./constants" -import { hash } from "./crypto" -import type { CacheEntry } from "./cache" -import debug, { Debugger } from "debug" +import { CACHE_FORMAT_VERSION, CACHE_SHA_LENGTH, CHANGE } from "./constants"; +import { hash } from "./crypto"; +import type { CacheEntry } from "./cache"; +import debug, { Debugger } from "debug"; /** * A cache class that manages entries stored in JSONL format. @@ -10,113 +10,109 @@ import debug, { Debugger } from "debug" * @template K - Type of the key * @template V - Type of the value */ -export class MemoryCache - extends EventTarget - implements WorkspaceFileCache -{ - protected _entries: Record> - private _pending: Record> - private readonly hashOptions: HashOptions - protected dbg: Debugger +export class MemoryCache extends EventTarget implements WorkspaceFileCache { + protected _entries: Record>; + private _pending: Record>; + private readonly hashOptions: HashOptions; + protected dbg: Debugger; - // Constructor is private to enforce the use of byName factory method - constructor(public readonly name: string) { - super() // Initialize EventTarget - this.dbg = debug(`genaiscript:cache:${name}`) // Initialize debugger - this.hashOptions = { - salt: CACHE_FORMAT_VERSION, - length: CACHE_SHA_LENGTH, - } satisfies HashOptions - } + // Constructor is private to enforce the use of byName factory method + constructor(public readonly name: string) { + super(); // Initialize EventTarget + this.dbg = debug(`genaiscript:cache:${name}`); // Initialize debugger + this.hashOptions = { + salt: CACHE_FORMAT_VERSION, + length: CACHE_SHA_LENGTH, + } satisfies HashOptions; + } - protected async initialize() { - if (this._entries) return - this._entries = {} - this._pending = {} - } + protected async initialize() { + if (this._entries) return; + this._entries = {}; + this._pending = {}; + } - /** - * Retrieve all values from the cache. - * @returns - */ - async values(): Promise { - await this.initialize() - return Object.values(this._entries).map((kv) => kv.val) - } + /** + * Retrieve all values from the cache. + * @returns + */ + async values(): Promise { + await this.initialize(); + return Object.values(this._entries).map((kv) => kv.val); + } - /** - * Get the value associated with a specific key. - * @param key - The key of the entry - * @returns A promise resolving to the value - */ - async get(key: K): Promise { - if (key === undefined) return undefined // Handle undefined key - await this.initialize() - const sha = await this.getSha(key) - const res = this._entries[sha]?.val - this.dbg(`get ${sha}: ${res !== undefined ? "hit" : "miss"}`) - return res - } + /** + * Get the value associated with a specific key. + * @param key - The key of the entry + * @returns A promise resolving to the value + */ + async get(key: K): Promise { + if (key === undefined) return undefined; // Handle undefined key + await this.initialize(); + const sha = await this.getSha(key); + const res = this._entries[sha]?.val; + this.dbg(`get ${sha}: ${res !== undefined ? "hit" : "miss"}`); + return res; + } - async getOrUpdate( - key: K, - updater: () => Promise, - validator?: (val: V) => boolean - ): Promise<{ key: string; value: V; cached?: boolean }> { - await this.initialize() - const sha = await this.getSha(key) - if (this._entries[sha]) { - this.dbg(`getup ${sha}: hit`) - return { key: sha, value: this._entries[sha].val, cached: true } - } - if (this._pending[sha]) { - this.dbg(`getup ${sha}: hit (pending)`) - return { key: sha, value: await this._pending[sha], cached: true } - } + async getOrUpdate( + key: K, + updater: () => Promise, + validator?: (val: V) => boolean, + ): Promise<{ key: string; value: V; cached?: boolean }> { + await this.initialize(); + const sha = await this.getSha(key); + if (this._entries[sha]) { + this.dbg(`getup ${sha}: hit`); + return { key: sha, value: this._entries[sha].val, cached: true }; + } + if (this._pending[sha]) { + this.dbg(`getup ${sha}: hit (pending)`); + return { key: sha, value: await this._pending[sha], cached: true }; + } - try { - const p = updater() - this._pending[sha] = p - const value = await p - if (!validator || validator(value)) { - await this.set(key, value) - this.dbg(`set ${sha}: updated`) - } - return { key: sha, value, cached: false } - } finally { - delete this._pending[sha] - } + try { + const p = updater(); + this._pending[sha] = p; + const value = await p; + if (!validator || validator(value)) { + await this.set(key, value); + this.dbg(`set ${sha}: updated`); + } + return { key: sha, value, cached: false }; + } finally { + delete this._pending[sha]; } + } - protected async appendEntry(entry: CacheEntry) {} + protected async appendEntry(entry: CacheEntry) {} - /** - * Set a key-value pair in the cache, triggering a change event. - * @param key - The key to set - * @param val - The value to set - * @param options - Optional trace options - */ - async set(key: K, val: V) { - await this.initialize() - const sha = await this.getSha(key) - const ent = { sha, val } satisfies CacheEntry - const ex = this._entries[sha] - if (ex !== undefined && JSON.stringify(ex) == JSON.stringify(ent)) - return // No change + /** + * Set a key-value pair in the cache, triggering a change event. + * @param key - The key to set + * @param val - The value to set + * @param options - Optional trace options + */ + async set(key: K, val: V) { + await this.initialize(); + const sha = await this.getSha(key); + const ent = { sha, val } satisfies CacheEntry; + const ex = this._entries[sha]; + if (ex !== undefined && JSON.stringify(ex) == JSON.stringify(ent)) return; // No change - this._entries[sha] = ent - await this.appendEntry(ent) - this.dispatchEvent(new Event(CHANGE)) // Notify listeners - this.dbg(`set ${sha}: updated`) - } + this._entries[sha] = ent; + await this.appendEntry(ent); + this.dispatchEvent(new Event(CHANGE)); // Notify listeners + this.dbg(`set ${sha}: updated`); + } - /** - * Compute SHA for a given key. - * @param key - The key to compute SHA for - * @returns A promise resolving to the SHA string - */ - async getSha(key: K) { - const sha = await hash(key, this.hashOptions) - return sha - } + /** + * Compute SHA for a given key. + * @param key - The key to compute SHA for + * @returns A promise resolving to the SHA string + */ + async getSha(key: K) { + const sha = await hash(key, this.hashOptions); + return sha; + } } diff --git a/packages/core/src/merge.ts b/packages/core/src/merge.ts index b23a247f59..cd432a2948 100644 --- a/packages/core/src/merge.ts +++ b/packages/core/src/merge.ts @@ -1,3 +1,3 @@ -import mergeDescriptors from "merge-descriptors" +import mergeDescriptors from "merge-descriptors"; -export const structuralMerge = mergeDescriptors +export const structuralMerge = mergeDescriptors; diff --git a/packages/core/src/mermaid.test.ts b/packages/core/src/mermaid.test.ts index 3daf7cc767..3615eae74a 100644 --- a/packages/core/src/mermaid.test.ts +++ b/packages/core/src/mermaid.test.ts @@ -1,37 +1,37 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { mermaidParse } from "./mermaid" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { mermaidParse } from "./mermaid"; describe("mermaidParse", () => { - test("parses a valid flowchart", async () => { - const input = `graph TD; A-->B;` - const res = await mermaidParse(input) - assert.strictEqual(res.error, undefined) - assert.strictEqual(res.diagramType, "flowchart-v2") - }) - - test("parses a valid sequence diagram", async () => { - const input = `sequenceDiagram\nAlice->>Bob: Hello Bob` - const res = await mermaidParse(input) - assert.strictEqual(res.error, undefined) - assert.strictEqual(res.diagramType, "sequence") - }) - - test("returns error for invalid diagram", async () => { - const input = `not a mermaid diagram` - const res = await mermaidParse(input) - assert.ok(res.error) - assert.strictEqual(res.diagramType, undefined) - }) - - test("returns error for empty input", async () => { - const input = `` - const res = await mermaidParse(input) - assert.ok(res.error) - assert.strictEqual(res.diagramType, undefined) - }) - test("class diagram", async () => { - const input = ` + test("parses a valid flowchart", async () => { + const input = `graph TD; A-->B;`; + const res = await mermaidParse(input); + assert.strictEqual(res.error, undefined); + assert.strictEqual(res.diagramType, "flowchart-v2"); + }); + + test("parses a valid sequence diagram", async () => { + const input = `sequenceDiagram\nAlice->>Bob: Hello Bob`; + const res = await mermaidParse(input); + assert.strictEqual(res.error, undefined); + assert.strictEqual(res.diagramType, "sequence"); + }); + + test("returns error for invalid diagram", async () => { + const input = `not a mermaid diagram`; + const res = await mermaidParse(input); + assert.ok(res.error); + assert.strictEqual(res.diagramType, undefined); + }); + + test("returns error for empty input", async () => { + const input = ``; + const res = await mermaidParse(input); + assert.ok(res.error); + assert.strictEqual(res.diagramType, undefined); + }); + test("class diagram", async () => { + const input = ` classDiagram class PromptNode class MarkdownTrace @@ -162,33 +162,33 @@ classDiagram %% Usage (no inheritance detected in file on these) PromptNode <-- "node prop" - %% Primitives and helpers not detailed for brevity` - const res = await mermaidParse(input) - assert(res.error) - }) + %% Primitives and helpers not detailed for brevity`; + const res = await mermaidParse(input); + assert(res.error); + }); - test("parses a valid state diagram with labels", async () => { - const input = `stateDiagram-v2 + test("parses a valid state diagram with labels", async () => { + const input = `stateDiagram-v2 [*] --> Still: Start Still --> Moving: Start Moving Moving --> Still: Stop Moving Moving --> Crash: Crash - Crash --> [*]: Reset` - const res = await mermaidParse(input) - assert.strictEqual(res.error, undefined) - assert.strictEqual(res.diagramType, "stateDiagram") - }) - - test("parses a valid c4 diagram with labels", async () => { - const input = `C4Context + Crash --> [*]: Reset`; + const res = await mermaidParse(input); + assert.strictEqual(res.error, undefined); + assert.strictEqual(res.diagramType, "stateDiagram"); + }); + + test("parses a valid c4 diagram with labels", async () => { + const input = `C4Context title System Context diagram for Internet Banking System Enterprise_Boundary(b0, "BankingSystem") { Person(customer, "Personal Banking Customer", "A customer of the bank") System(banking_system, "Internet Banking System", "Allows customers to check their accounts") System_Ext(mail_system, "E-mail system", "Delivers e-mails") - }` - const res = await mermaidParse(input) - assert.strictEqual(res.error, undefined) - assert.strictEqual(res.diagramType, "c4") - }) -}) + }`; + const res = await mermaidParse(input); + assert.strictEqual(res.error, undefined); + assert.strictEqual(res.diagramType, "c4"); + }); +}); diff --git a/packages/core/src/mermaid.ts b/packages/core/src/mermaid.ts index d928e4d843..d42f574cc3 100644 --- a/packages/core/src/mermaid.ts +++ b/packages/core/src/mermaid.ts @@ -1,32 +1,32 @@ -import { genaiscriptDebug } from "./debug" -import { installWindow } from "./dom" -import { errorMessage } from "./error" -import type { Mermaid } from "mermaid" -const dbg = genaiscriptDebug("mermaid") +import { genaiscriptDebug } from "./debug"; +import { installWindow } from "./dom"; +import { errorMessage } from "./error"; +import type { Mermaid } from "mermaid"; +const dbg = genaiscriptDebug("mermaid"); -let _mermaid: Promise +let _mermaid: Promise; async function importMermaid() { - if (_mermaid) return _mermaid + if (_mermaid) return _mermaid; - await installWindow() - dbg(`importing`) - const mermaid = (await import("mermaid")).default - mermaid.initialize({ startOnLoad: false }) - return mermaid + await installWindow(); + dbg(`importing`); + const mermaid = (await import("mermaid")).default; + mermaid.initialize({ startOnLoad: false }); + return mermaid; } export async function mermaidParse( - text: string + text: string, ): Promise<{ diagramType?: string; error?: string }> { - const mermaid = await importMermaid() - try { - dbg(`parsing %s`, text) - const res = await mermaid.parse(text, { suppressErrors: false }) - if (!res) return { error: "no result" } - return { diagramType: res.diagramType } - } catch (e) { - const m = errorMessage(e) - dbg(`mermaid error: %s`, m) - return { error: m } - } + const mermaid = await importMermaid(); + try { + dbg(`parsing %s`, text); + const res = await mermaid.parse(text, { suppressErrors: false }); + if (!res) return { error: "no result" }; + return { diagramType: res.diagramType }; + } catch (e) { + const m = errorMessage(e); + dbg(`mermaid error: %s`, m); + return { error: m }; + } } diff --git a/packages/core/src/metadata.ts b/packages/core/src/metadata.ts index 0ee6b429de..38801f56c5 100644 --- a/packages/core/src/metadata.ts +++ b/packages/core/src/metadata.ts @@ -1,49 +1,47 @@ -import { deleteUndefinedValues } from "./cleaners" -import { genaiscriptDebug } from "./debug" -import { ellipse } from "./util" -const dbg = genaiscriptDebug("metadata") +import { deleteUndefinedValues } from "./cleaners"; +import { genaiscriptDebug } from "./debug"; +import { ellipse } from "./util"; +const dbg = genaiscriptDebug("metadata"); export function metadataValidate( - metadata: Record + metadata: Record, ): Record | undefined { - if (!metadata) return undefined - const entries = Object.entries(metadata) - if (entries.length > 16) - throw new Error("Metadata can only have 16 entries") - // keep the first 16 entries - for (let [key, value] of entries) { - if (key.length > 64) - throw new Error("Invalid metadata key, key too long") - if (value === undefined) delete metadata[key] - if (typeof value !== "string") value = String(value) - if (value.length > 512) value = ellipse(value, 512) - } - dbg(`%O`, metadata) - return metadata + if (!metadata) return undefined; + const entries = Object.entries(metadata); + if (entries.length > 16) throw new Error("Metadata can only have 16 entries"); + // keep the first 16 entries + for (let [key, value] of entries) { + if (key.length > 64) throw new Error("Invalid metadata key, key too long"); + if (value === undefined) delete metadata[key]; + if (typeof value !== "string") value = String(value); + if (value.length > 512) value = ellipse(value, 512); + } + dbg(`%O`, metadata); + return metadata; } export function metadataMerge( - script: PromptScript, - options: Record + script: PromptScript, + options: Record, ): Record | undefined { - const update = script.metadata - const source = options - if (!source && !update) return undefined + const update = script.metadata; + const source = options; + if (!source && !update) return undefined; - const res = { - ...(source || {}), - ...(update || {}), - } - deleteUndefinedValues(res) - const extras = deleteUndefinedValues({ - script: script.id, - group: script.group, - title: script.title, - description: script.description, - }) - for (const [key, value] of Object.entries(extras)) { - if (Object.keys(res).length >= 16) break - if (res[key] === undefined) res[key] = ellipse(value, 512) - } - return metadataValidate(res) + const res = { + ...(source || {}), + ...(update || {}), + }; + deleteUndefinedValues(res); + const extras = deleteUndefinedValues({ + script: script.id, + group: script.group, + title: script.title, + description: script.description, + }); + for (const [key, value] of Object.entries(extras)) { + if (Object.keys(res).length >= 16) break; + if (res[key] === undefined) res[key] = ellipse(value, 512); + } + return metadataValidate(res); } diff --git a/packages/core/src/mime.test.ts b/packages/core/src/mime.test.ts index a8d2105fce..48595258c2 100644 --- a/packages/core/src/mime.test.ts +++ b/packages/core/src/mime.test.ts @@ -1,69 +1,69 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; import { - lookupMime, - TYPESCRIPT_MIME_TYPE, - CSHARP_MIME_TYPE, - PYTHON_MIME_TYPE, - ASTRO_MIME_TYPE, - MARKDOWN_MIME_TYPE, - FSTAR_MIME_TYPE, -} from "./mime" + lookupMime, + TYPESCRIPT_MIME_TYPE, + CSHARP_MIME_TYPE, + PYTHON_MIME_TYPE, + ASTRO_MIME_TYPE, + MARKDOWN_MIME_TYPE, + FSTAR_MIME_TYPE, +} from "./mime"; describe("mime", () => { - test("should return empty string for falsy input", () => { - assert.equal(lookupMime(""), "") - assert.equal(lookupMime(null as unknown as string), "") - assert.equal(lookupMime(undefined as unknown as string), "") - }) + test("should return empty string for falsy input", () => { + assert.equal(lookupMime(""), ""); + assert.equal(lookupMime(null as unknown as string), ""); + assert.equal(lookupMime(undefined as unknown as string), ""); + }); - test("should handle TypeScript files", () => { - assert.equal(lookupMime("file.ts"), TYPESCRIPT_MIME_TYPE) - assert.equal(lookupMime("path/to/file.ts"), TYPESCRIPT_MIME_TYPE) - assert.equal(lookupMime("FILE.TS"), TYPESCRIPT_MIME_TYPE) - }) + test("should handle TypeScript files", () => { + assert.equal(lookupMime("file.ts"), TYPESCRIPT_MIME_TYPE); + assert.equal(lookupMime("path/to/file.ts"), TYPESCRIPT_MIME_TYPE); + assert.equal(lookupMime("FILE.TS"), TYPESCRIPT_MIME_TYPE); + }); - test("should handle C# files", () => { - assert.equal(lookupMime("file.cs"), CSHARP_MIME_TYPE) - assert.equal(lookupMime("path/to/file.cs"), CSHARP_MIME_TYPE) - assert.equal(lookupMime("FILE.CS"), CSHARP_MIME_TYPE) - }) + test("should handle C# files", () => { + assert.equal(lookupMime("file.cs"), CSHARP_MIME_TYPE); + assert.equal(lookupMime("path/to/file.cs"), CSHARP_MIME_TYPE); + assert.equal(lookupMime("FILE.CS"), CSHARP_MIME_TYPE); + }); - test("should handle Python files", () => { - assert.equal(lookupMime("file.py"), PYTHON_MIME_TYPE) - assert.equal(lookupMime("path/to/file.py"), PYTHON_MIME_TYPE) - assert.equal(lookupMime("FILE.PY"), PYTHON_MIME_TYPE) - }) + test("should handle Python files", () => { + assert.equal(lookupMime("file.py"), PYTHON_MIME_TYPE); + assert.equal(lookupMime("path/to/file.py"), PYTHON_MIME_TYPE); + assert.equal(lookupMime("FILE.PY"), PYTHON_MIME_TYPE); + }); - test("should handle Astro files", () => { - assert.equal(lookupMime("file.astro"), ASTRO_MIME_TYPE) - assert.equal(lookupMime("path/to/file.astro"), ASTRO_MIME_TYPE) - assert.equal(lookupMime("FILE.ASTRO"), ASTRO_MIME_TYPE) - }) + test("should handle Astro files", () => { + assert.equal(lookupMime("file.astro"), ASTRO_MIME_TYPE); + assert.equal(lookupMime("path/to/file.astro"), ASTRO_MIME_TYPE); + assert.equal(lookupMime("FILE.ASTRO"), ASTRO_MIME_TYPE); + }); - test("should handle Markdown files", () => { - assert.equal(lookupMime("file.md"), MARKDOWN_MIME_TYPE) - assert.equal(lookupMime("path/to/file.md"), MARKDOWN_MIME_TYPE) - assert.equal(lookupMime("FILE.MD"), MARKDOWN_MIME_TYPE) - assert.equal(lookupMime("file.prompty"), MARKDOWN_MIME_TYPE) - assert.equal(lookupMime("FILE.PROMPTY"), MARKDOWN_MIME_TYPE) - }) + test("should handle Markdown files", () => { + assert.equal(lookupMime("file.md"), MARKDOWN_MIME_TYPE); + assert.equal(lookupMime("path/to/file.md"), MARKDOWN_MIME_TYPE); + assert.equal(lookupMime("FILE.MD"), MARKDOWN_MIME_TYPE); + assert.equal(lookupMime("file.prompty"), MARKDOWN_MIME_TYPE); + assert.equal(lookupMime("FILE.PROMPTY"), MARKDOWN_MIME_TYPE); + }); - test("should handle F* files", () => { - assert.equal(lookupMime("file.fst"), FSTAR_MIME_TYPE) - assert.equal(lookupMime("path/to/file.fsti"), FSTAR_MIME_TYPE) - assert.equal(lookupMime("FILE.FST"), FSTAR_MIME_TYPE) - assert.equal(lookupMime("FILE.FSTI"), FSTAR_MIME_TYPE) - }) + test("should handle F* files", () => { + assert.equal(lookupMime("file.fst"), FSTAR_MIME_TYPE); + assert.equal(lookupMime("path/to/file.fsti"), FSTAR_MIME_TYPE); + assert.equal(lookupMime("FILE.FST"), FSTAR_MIME_TYPE); + assert.equal(lookupMime("FILE.FSTI"), FSTAR_MIME_TYPE); + }); - test("should use mime.getType for other file types", () => { - assert.equal(lookupMime("file.json"), "application/json") - assert.equal(lookupMime("file.html"), "text/html") - assert.equal(lookupMime("file.css"), "text/css") - assert.equal(lookupMime("file.js"), "application/javascript") - }) + test("should use mime.getType for other file types", () => { + assert.equal(lookupMime("file.json"), "application/json"); + assert.equal(lookupMime("file.html"), "text/html"); + assert.equal(lookupMime("file.css"), "text/css"); + assert.equal(lookupMime("file.js"), "application/javascript"); + }); - test("should return empty string for unknown file types", () => { - assert.equal(lookupMime("file.unknown"), "") - }) -}) + test("should return empty string for unknown file types", () => { + assert.equal(lookupMime("file.unknown"), ""); + }); +}); diff --git a/packages/core/src/mime.ts b/packages/core/src/mime.ts index 58d814810b..79cfdaf1c6 100644 --- a/packages/core/src/mime.ts +++ b/packages/core/src/mime.ts @@ -1,14 +1,14 @@ // Import the 'lookup' function from the 'mime-types' library and rename it to 'mimeTypesLookup' -import mime from "mime" -import { JAVASCRIPT_MIME_TYPE } from "./constants" +import mime from "mime"; +import { JAVASCRIPT_MIME_TYPE } from "./constants"; // Define constant MIME types for specific programming languages -export const FSTAR_MIME_TYPE = "text/x-fstar" -export const TYPESCRIPT_MIME_TYPE = "text/x-typescript" -export const CSHARP_MIME_TYPE = "text/x-csharp" -export const PYTHON_MIME_TYPE = "text/x-python" -export const MARKDOWN_MIME_TYPE = "text/markdown" -export const ASTRO_MIME_TYPE = "text/x-astro" +export const FSTAR_MIME_TYPE = "text/x-fstar"; +export const TYPESCRIPT_MIME_TYPE = "text/x-typescript"; +export const CSHARP_MIME_TYPE = "text/x-csharp"; +export const PYTHON_MIME_TYPE = "text/x-python"; +export const MARKDOWN_MIME_TYPE = "text/markdown"; +export const ASTRO_MIME_TYPE = "text/x-astro"; // Define a function to look up the MIME type for a given filename /** @@ -21,13 +21,13 @@ export const ASTRO_MIME_TYPE = "text/x-astro" * If none match, it uses 'mimeTypesLookup' from the 'mime-types' library to find the MIME type. */ export function lookupMime(filename: string) { - if (!filename) return "" // Return an empty string if the filename is falsy - if (/\.m?ts$/i.test(filename)) return TYPESCRIPT_MIME_TYPE - if (/\.(c|m)?js$/i.test(filename)) return JAVASCRIPT_MIME_TYPE - if (/\.cs$/i.test(filename)) return CSHARP_MIME_TYPE - if (/\.py$/i.test(filename)) return PYTHON_MIME_TYPE - if (/\.astro$/i.test(filename)) return ASTRO_MIME_TYPE - if (/\.(md|prompty)$/i.test(filename)) return MARKDOWN_MIME_TYPE - if (/\.(fst|fsti)$/i.test(filename)) return FSTAR_MIME_TYPE - return mime.getType(filename) || "" + if (!filename) return ""; // Return an empty string if the filename is falsy + if (/\.m?ts$/i.test(filename)) return TYPESCRIPT_MIME_TYPE; + if (/\.(c|m)?js$/i.test(filename)) return JAVASCRIPT_MIME_TYPE; + if (/\.cs$/i.test(filename)) return CSHARP_MIME_TYPE; + if (/\.py$/i.test(filename)) return PYTHON_MIME_TYPE; + if (/\.astro$/i.test(filename)) return ASTRO_MIME_TYPE; + if (/\.(md|prompty)$/i.test(filename)) return MARKDOWN_MIME_TYPE; + if (/\.(fst|fsti)$/i.test(filename)) return FSTAR_MIME_TYPE; + return mime.getType(filename) || ""; } diff --git a/packages/core/src/mkmd.test.ts b/packages/core/src/mkmd.test.ts index 2c598f50b4..ed9f691531 100644 --- a/packages/core/src/mkmd.test.ts +++ b/packages/core/src/mkmd.test.ts @@ -1,73 +1,73 @@ -import assert from "node:assert/strict" -import { describe, it } from "node:test" -import { fenceMD, link, details } from "./mkmd" +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { fenceMD, link, details } from "./mkmd"; describe("mkmd", () => { - describe("fenceMD", () => { - it("should wrap text in code fence", () => { - const result = fenceMD("test") - assert.equal(result, "\n```\ntest\n```\n") - }) + describe("fenceMD", () => { + it("should wrap text in code fence", () => { + const result = fenceMD("test"); + assert.equal(result, "\n```\ntest\n```\n"); + }); - it("should add content type to fence", () => { - const result = fenceMD("test", "typescript") - assert.equal(result, "\n```ts\ntest\n```\n") - }) + it("should add content type to fence", () => { + const result = fenceMD("test", "typescript"); + assert.equal(result, "\n```ts\ntest\n```\n"); + }); - it("should map content types appropriately", () => { - assert.equal(fenceMD("test", "markdown"), "\n```md\ntest\n```\n") - assert.equal(fenceMD("test", "prompty"), "\n```md\ntest\n```\n") - assert.equal(fenceMD("test", "javascript"), "\n```js\ntest\n```\n") - assert.equal(fenceMD("test", "yml"), "\n```yaml\ntest\n```\n") - }) + it("should map content types appropriately", () => { + assert.equal(fenceMD("test", "markdown"), "\n```md\ntest\n```\n"); + assert.equal(fenceMD("test", "prompty"), "\n```md\ntest\n```\n"); + assert.equal(fenceMD("test", "javascript"), "\n```js\ntest\n```\n"); + assert.equal(fenceMD("test", "yml"), "\n```yaml\ntest\n```\n"); + }); - it("should extend fence when content contains fence", () => { - const result = fenceMD("```test```") - assert.equal(result, "\n````\n```test```\n````\n") - }) + it("should extend fence when content contains fence", () => { + const result = fenceMD("```test```"); + assert.equal(result, "\n````\n```test```\n````\n"); + }); - it("should extend fence multiple times if needed", () => { - const result = fenceMD("````test````") - assert.equal(result, "\n`````\n````test````\n`````\n") - }) + it("should extend fence multiple times if needed", () => { + const result = fenceMD("````test````"); + assert.equal(result, "\n`````\n````test````\n`````\n"); + }); - it("should return undefined when input is undefined", () => { - assert.equal(fenceMD(undefined), undefined) - }) + it("should return undefined when input is undefined", () => { + assert.equal(fenceMD(undefined), undefined); + }); - it("should trim newlines from the input", () => { - const result = fenceMD("\ntest\n\n") - assert.equal(result, "\n```\ntest\n```\n") - }) - }) + it("should trim newlines from the input", () => { + const result = fenceMD("\ntest\n\n"); + assert.equal(result, "\n```\ntest\n```\n"); + }); + }); - describe("link", () => { - it("should create a markdown link when href is provided", () => { - const result = link("text", "https://example.com") - assert.equal(result, "[text](https://example.com)") - }) + describe("link", () => { + it("should create a markdown link when href is provided", () => { + const result = link("text", "https://example.com"); + assert.equal(result, "[text](https://example.com)"); + }); - it("should return plain text when href is not provided", () => { - const result = link("text", "") - assert.equal(result, "text") - }) - }) + it("should return plain text when href is not provided", () => { + const result = link("text", ""); + assert.equal(result, "text"); + }); + }); - describe("details", () => { - it("should create a markdown details block", () => { - const result = details("Summary", "Body content") - assert.equal( - result, - "\n
\nSummary\n\nBody content\n\n
\n" - ) - }) + describe("details", () => { + it("should create a markdown details block", () => { + const result = details("Summary", "Body content"); + assert.equal( + result, + "\n
\nSummary\n\nBody content\n\n
\n", + ); + }); - it("should create an open details block when specified", () => { - const result = details("Summary", "Body content", true) - assert.equal( - result, - "\n
\nSummary\n\nBody content\n\n
\n" - ) - }) - }) -}) + it("should create an open details block when specified", () => { + const result = details("Summary", "Body content", true); + assert.equal( + result, + "\n
\nSummary\n\nBody content\n\n
\n", + ); + }); + }); +}); diff --git a/packages/core/src/mkmd.ts b/packages/core/src/mkmd.ts index cb3a724833..4990a6ed49 100644 --- a/packages/core/src/mkmd.ts +++ b/packages/core/src/mkmd.ts @@ -1,12 +1,12 @@ -import { trimNewlines } from "./unwrappers" +import { trimNewlines } from "./unwrappers"; const contentTypes: Record = { - markdown: "md", - prompty: "md", - javascript: "js", - typescript: "ts", - yml: "yaml", -} + markdown: "md", + prompty: "md", + javascript: "js", + typescript: "ts", + yml: "yaml", +}; /** * Wraps text in a markdown code fence, extending the fence if the text contains existing fences. @@ -15,11 +15,11 @@ const contentTypes: Record = { * @returns The text wrapped in a code fence. */ export function fenceMD(t: string, contentType?: string) { - if (t === undefined) return undefined - contentType = contentTypes[contentType] || contentType || "" - let f = "```" - while (t.includes(f) && f.length < 8) f += "`" // Extend fence if necessary - return `\n${f}${contentType}\n${trimNewlines(t)}\n${f}\n` + if (t === undefined) return undefined; + contentType = contentTypes[contentType] || contentType || ""; + let f = "```"; + while (t.includes(f) && f.length < 8) f += "`"; // Extend fence if necessary + return `\n${f}${contentType}\n${trimNewlines(t)}\n${f}\n`; } /** @@ -29,7 +29,7 @@ export function fenceMD(t: string, contentType?: string) { * @returns A markdown link or plain text. */ export function link(text: string, href: string) { - return href ? `[${text}](${href})` : text + return href ? `[${text}](${href})` : text; } /** @@ -40,10 +40,10 @@ export function link(text: string, href: string) { * @returns A string representing a markdown details block. */ export function details(summary: string, body: string, open?: boolean) { - return `\n + return `\n ${summary} ${body} -\n` +\n`; } diff --git a/packages/core/src/modelalias.ts b/packages/core/src/modelalias.ts index 91513f51f3..5b502d470c 100644 --- a/packages/core/src/modelalias.ts +++ b/packages/core/src/modelalias.ts @@ -1,9 +1,9 @@ -import debug from "debug" -const dbg = debug("genaiscript:modelalias") -import { parseKeyValuePair } from "../../core/src/fence" -import { runtimeHost } from "../../core/src/host" -import { PromptScriptRunOptions } from "./server/messages" -import { providerFeatures } from "./features" +import debug from "debug"; +const dbg = debug("genaiscript:modelalias"); +import { parseKeyValuePair } from "../../core/src/fence"; +import { runtimeHost } from "../../core/src/host"; +import { PromptScriptRunOptions } from "./server/messages"; +import { providerFeatures } from "./features"; /** * Configures model provider aliases based on the given provider ID and source type. @@ -15,16 +15,13 @@ import { providerFeatures } from "./features" * Sets model aliases for the detected provider using the runtime host. If * the provider contains alias definitions, they are mapped and stored. */ -export function applyModelProviderAliases( - id: string, - source: "cli" | "env" | "config" | "script" -) { - dbg(`apply provider ${id} from ${source}`) - if (!id) return - const provider = providerFeatures(id) - if (!provider) throw new Error(`Model provider not found: ${id}`) - for (const [key, value] of Object.entries(provider.aliases || {})) - runtimeHost.setModelAlias(source, key, provider.id + ":" + value) +export function applyModelProviderAliases(id: string, source: "cli" | "env" | "config" | "script") { + dbg(`apply provider ${id} from ${source}`); + if (!id) return; + const provider = providerFeatures(id); + if (!provider) throw new Error(`Model provider not found: ${id}`); + for (const [key, value] of Object.entries(provider.aliases || {})) + runtimeHost.setModelAlias(source, key, provider.id + ":" + value); } /** @@ -41,26 +38,21 @@ export function applyModelProviderAliases( * @param source - The origin of the configuration (e.g., `cli`, `env`, `config`, or `script`). */ export function applyModelOptions( - options: Partial< - Pick< - PromptScriptRunOptions, - "model" | "smallModel" | "visionModel" | "modelAlias" | "provider" - > - >, - source: "cli" | "env" | "config" | "script" + options: Partial< + Pick + >, + source: "cli" | "env" | "config" | "script", ) { - dbg(`apply model options from ${source}`, options) - if (options.provider) applyModelProviderAliases(options.provider, source) - if (options.model) runtimeHost.setModelAlias(source, "large", options.model) - if (options.smallModel) - runtimeHost.setModelAlias(source, "small", options.smallModel) - if (options.visionModel) - runtimeHost.setModelAlias(source, "vision", options.visionModel) - for (const kv of options.modelAlias || []) { - const aliases = parseKeyValuePair(kv) - for (const [key, value] of Object.entries(aliases)) - runtimeHost.setModelAlias(source, key, value) - } + dbg(`apply model options from ${source}`, options); + if (options.provider) applyModelProviderAliases(options.provider, source); + if (options.model) runtimeHost.setModelAlias(source, "large", options.model); + if (options.smallModel) runtimeHost.setModelAlias(source, "small", options.smallModel); + if (options.visionModel) runtimeHost.setModelAlias(source, "vision", options.visionModel); + for (const kv of options.modelAlias || []) { + const aliases = parseKeyValuePair(kv); + for (const [key, value] of Object.entries(aliases)) + runtimeHost.setModelAlias(source, key, value); + } } /** @@ -76,11 +68,11 @@ export function applyModelOptions( * environment using `runtimeHost.setModelAlias`, where the alias name and value are registered. */ export function applyScriptModelAliases(script: PromptScript) { - applyModelOptions(script, "script") - if (script.modelAliases) - Object.entries(script.modelAliases).forEach(([name, alias]) => { - runtimeHost.setModelAlias("script", name, alias) - }) + applyModelOptions(script, "script"); + if (script.modelAliases) + Object.entries(script.modelAliases).forEach(([name, alias]) => { + runtimeHost.setModelAlias("script", name, alias); + }); } /** @@ -90,11 +82,8 @@ export function applyScriptModelAliases(script: PromptScript) { * @param options.all - If true, logs all aliases, including those with the "default" source. */ export function logModelAliases(options?: { all?: boolean }) { - const { all } = options || {} - let aliases = Object.entries(runtimeHost.modelAliases) - if (!all) - aliases = aliases.filter(([, value]) => value.source !== "default") - aliases.forEach(([key, value]) => - dbg(`${key}: ${value.model} (${value.source})`) - ) + const { all } = options || {}; + let aliases = Object.entries(runtimeHost.modelAliases); + if (!all) aliases = aliases.filter(([, value]) => value.source !== "default"); + aliases.forEach(([key, value]) => dbg(`${key}: ${value.model} (${value.source})`)); } diff --git a/packages/core/src/models.test.ts b/packages/core/src/models.test.ts index 095bce7602..9e5f301ed2 100644 --- a/packages/core/src/models.test.ts +++ b/packages/core/src/models.test.ts @@ -1,87 +1,81 @@ -import test, { describe } from "node:test" -import { parseModelIdentifier } from "./models" -import assert from "node:assert" +import test, { describe } from "node:test"; +import { parseModelIdentifier } from "./models"; +import assert from "node:assert"; import { - MODEL_PROVIDER_GITHUB, - MODEL_PROVIDER_LLAMAFILE, - MODEL_PROVIDER_OLLAMA, - MODEL_PROVIDER_OPENAI, -} from "./constants" + MODEL_PROVIDER_GITHUB, + MODEL_PROVIDER_LLAMAFILE, + MODEL_PROVIDER_OLLAMA, + MODEL_PROVIDER_OPENAI, +} from "./constants"; // generate unit tests for parseModelIdentifier describe("parseModelIdentifier", () => { - test("ollama:phi3", () => { - const { provider, model, tag, family } = - parseModelIdentifier("ollama:phi3") - assert(provider === MODEL_PROVIDER_OLLAMA) - assert(model === "phi3") - assert(family === "phi3") - }) - test("ollama:gemma2:2b", () => { - const { provider, model, tag, family } = - parseModelIdentifier("ollama:gemma2:2b") - assert(provider === MODEL_PROVIDER_OLLAMA) - assert(model === "gemma2:2b") - assert(family === "gemma2") - }) - test("llamafile", () => { - const { provider, model, family } = parseModelIdentifier("llamafile") - assert(provider === MODEL_PROVIDER_LLAMAFILE) - assert(family === "*") - assert(model === "*") - }) - test("github:gpt4", () => { - const { provider, model, family } = parseModelIdentifier("github:gpt4") - assert(provider === MODEL_PROVIDER_GITHUB) - assert(model === "gpt4") - assert(family === "gpt4") - }) - test("openai:gpt4", () => { - const { provider, model, family } = parseModelIdentifier("openai:gpt4") - assert(provider === MODEL_PROVIDER_OPENAI) - assert(model === "gpt4") - assert(family === "gpt4") - }) - test("anthropic_bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0", () => { - const res = parseModelIdentifier( - "anthropic_bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0" - ) - assert.deepEqual(res, { - provider: "anthropic_bedrock", - family: "anthropic.claude-3-7-sonnet-20250219-v1", - model: "anthropic.claude-3-7-sonnet-20250219-v1:0", - tag: "0", - }) - }) - test("anthropic_bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0:high", () => { - const res = parseModelIdentifier( - "anthropic_bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0:high" - ) - assert.deepEqual(res, { - provider: "anthropic_bedrock", - family: "anthropic.claude-3-7-sonnet-20250219-v1", - model: "anthropic.claude-3-7-sonnet-20250219-v1:0", - tag: "0", - reasoningEffort: "high", - }) - }) - test("anthropic:claude-3-7-sonnet-latest", () => { - const res = parseModelIdentifier("anthropic:claude-3-7-sonnet-latest") - assert.deepEqual(res, { - provider: "anthropic", - family: "claude-3-7-sonnet-latest", - model: "claude-3-7-sonnet-latest", - }) - }) - test("anthropic:claude-3-7-sonnet-latest:high", () => { - const res = parseModelIdentifier( - "anthropic:claude-3-7-sonnet-latest:high" - ) - assert.deepEqual(res, { - provider: "anthropic", - family: "claude-3-7-sonnet-latest", - model: "claude-3-7-sonnet-latest", - reasoningEffort: "high", - }) - }) -}) + test("ollama:phi3", () => { + const { provider, model, tag, family } = parseModelIdentifier("ollama:phi3"); + assert(provider === MODEL_PROVIDER_OLLAMA); + assert(model === "phi3"); + assert(family === "phi3"); + }); + test("ollama:gemma2:2b", () => { + const { provider, model, tag, family } = parseModelIdentifier("ollama:gemma2:2b"); + assert(provider === MODEL_PROVIDER_OLLAMA); + assert(model === "gemma2:2b"); + assert(family === "gemma2"); + }); + test("llamafile", () => { + const { provider, model, family } = parseModelIdentifier("llamafile"); + assert(provider === MODEL_PROVIDER_LLAMAFILE); + assert(family === "*"); + assert(model === "*"); + }); + test("github:gpt4", () => { + const { provider, model, family } = parseModelIdentifier("github:gpt4"); + assert(provider === MODEL_PROVIDER_GITHUB); + assert(model === "gpt4"); + assert(family === "gpt4"); + }); + test("openai:gpt4", () => { + const { provider, model, family } = parseModelIdentifier("openai:gpt4"); + assert(provider === MODEL_PROVIDER_OPENAI); + assert(model === "gpt4"); + assert(family === "gpt4"); + }); + test("anthropic_bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0", () => { + const res = parseModelIdentifier("anthropic_bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0"); + assert.deepEqual(res, { + provider: "anthropic_bedrock", + family: "anthropic.claude-3-7-sonnet-20250219-v1", + model: "anthropic.claude-3-7-sonnet-20250219-v1:0", + tag: "0", + }); + }); + test("anthropic_bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0:high", () => { + const res = parseModelIdentifier( + "anthropic_bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0:high", + ); + assert.deepEqual(res, { + provider: "anthropic_bedrock", + family: "anthropic.claude-3-7-sonnet-20250219-v1", + model: "anthropic.claude-3-7-sonnet-20250219-v1:0", + tag: "0", + reasoningEffort: "high", + }); + }); + test("anthropic:claude-3-7-sonnet-latest", () => { + const res = parseModelIdentifier("anthropic:claude-3-7-sonnet-latest"); + assert.deepEqual(res, { + provider: "anthropic", + family: "claude-3-7-sonnet-latest", + model: "claude-3-7-sonnet-latest", + }); + }); + test("anthropic:claude-3-7-sonnet-latest:high", () => { + const res = parseModelIdentifier("anthropic:claude-3-7-sonnet-latest:high"); + assert.deepEqual(res, { + provider: "anthropic", + family: "claude-3-7-sonnet-latest", + model: "claude-3-7-sonnet-latest", + reasoningEffort: "high", + }); + }); +}); diff --git a/packages/core/src/models.ts b/packages/core/src/models.ts index b1235bf4c7..ec90d74ef2 100644 --- a/packages/core/src/models.ts +++ b/packages/core/src/models.ts @@ -1,24 +1,24 @@ -import debug from "debug" -const dbg = debug("genaiscript:models") +import debug from "debug"; +const dbg = debug("genaiscript:models"); -import { uniq } from "es-toolkit" -import { LARGE_MODEL_ID } from "./constants" -import { errorMessage } from "./error" -import { host, ModelConfiguration, runtimeHost } from "./host" -import { MarkdownTrace, TraceOptions } from "./trace" -import { arrayify, assert, logVerbose, toStringList } from "./util" -import { CancellationOptions } from "./cancellation" -import { LanguageModelConfiguration } from "./server/messages" -import { roundWithPrecision } from "./precision" -import { logModelAliases } from "./modelalias" -import { ChatCompletionReasoningEffort } from "./chattypes" +import { uniq } from "es-toolkit"; +import { LARGE_MODEL_ID } from "./constants"; +import { errorMessage } from "./error"; +import { host, ModelConfiguration, runtimeHost } from "./host"; +import { MarkdownTrace, TraceOptions } from "./trace"; +import { arrayify, assert, logVerbose, toStringList } from "./util"; +import { CancellationOptions } from "./cancellation"; +import { LanguageModelConfiguration } from "./server/messages"; +import { roundWithPrecision } from "./precision"; +import { logModelAliases } from "./modelalias"; +import { ChatCompletionReasoningEffort } from "./chattypes"; export interface ParsedModelType { - provider: string - family: string - model: string - tag?: string - reasoningEffort?: ChatCompletionReasoningEffort + provider: string; + family: string; + model: string; + tag?: string; + reasoningEffort?: ChatCompletionReasoningEffort; } /** @@ -37,38 +37,37 @@ export interface ParsedModelType { * - Error if the model identifier is not specified. */ export function parseModelIdentifier(id: string): { - provider: string - family: string - model: string - tag?: string - reasoningEffort?: ChatCompletionReasoningEffort + provider: string; + family: string; + model: string; + tag?: string; + reasoningEffort?: ChatCompletionReasoningEffort; } { - if (!id) throw new Error("Model identifier not specified") - let reasoningEffort: ChatCompletionReasoningEffort - const parts = id.split(":") - if (/^(high|medium|low)$/.test(parts.at(-1))) - reasoningEffort = parts.pop() as ChatCompletionReasoningEffort + if (!id) throw new Error("Model identifier not specified"); + let reasoningEffort: ChatCompletionReasoningEffort; + const parts = id.split(":"); + if (/^(high|medium|low)$/.test(parts.at(-1))) + reasoningEffort = parts.pop() as ChatCompletionReasoningEffort; - let res: ParsedModelType - if (parts.length >= 3) - res = { - provider: parts[0], - family: parts[1], - tag: parts.slice(2).join(":"), - model: parts.slice(1).join(":"), - } - else if (parts.length === 2) - res = { provider: parts[0], family: parts[1], model: parts[1] } - else res = { provider: id, family: "*", model: "*" } - if (reasoningEffort) res.reasoningEffort = reasoningEffort - return res + let res: ParsedModelType; + if (parts.length >= 3) + res = { + provider: parts[0], + family: parts[1], + tag: parts.slice(2).join(":"), + model: parts.slice(1).join(":"), + }; + else if (parts.length === 2) res = { provider: parts[0], family: parts[1], model: parts[1] }; + else res = { provider: id, family: "*", model: "*" }; + if (reasoningEffort) res.reasoningEffort = reasoningEffort; + return res; } export interface ModelConnectionInfo - extends ModelConnectionOptions, - Partial { - error?: string - model: string + extends ModelConnectionOptions, + Partial { + error?: string; + model: string; } /** @@ -101,77 +100,72 @@ export interface ModelConnectionInfo * and configuration metadata from the runtime environment. Ensures detailed logs for better traceability. */ export function traceLanguageModelConnection( - trace: MarkdownTrace, - options: ModelOptions, - connectionToken: LanguageModelConfiguration + trace: MarkdownTrace, + options: ModelOptions, + connectionToken: LanguageModelConfiguration, ) { - const { - model, - temperature, - reasoningEffort, - fallbackTools, - topP, - maxTokens, - seed, - cache, - logprobs, - topLogprobs, - responseType, - responseSchema, - fenceFormat, - } = options - const choices = arrayify(options.choices) - const { base, type, version, source, provider } = connectionToken - trace.startDetails(`⚙️ configuration`) - try { - trace.itemValue(`model`, model) - trace.itemValue(`version`, version) - trace.itemValue(`source`, source) - trace.itemValue(`provider`, provider) - trace.itemValue(`temperature`, temperature) - trace.itemValue(`reasoningEffort`, reasoningEffort) - trace.itemValue(`fallbackTools`, fallbackTools) - trace.itemValue(`topP`, topP) - trace.itemValue(`maxTokens`, maxTokens) - trace.itemValue(`base`, base) - trace.itemValue(`type`, type) - trace.itemValue(`seed`, seed) - if (choices.length) - trace.itemValue( - `choices`, - choices - .map((c) => - typeof c === "string" - ? c - : `${c.token} - ${roundWithPrecision(c.weight, 2)}` - ) - .join(",") - ) - trace.itemValue(`logprobs`, logprobs) - if (topLogprobs) trace.itemValue(`topLogprobs`, topLogprobs) - trace.itemValue(`cache`, cache) - trace.itemValue(`fence format`, fenceFormat) - trace.itemValue(`response type`, responseType) - if (responseSchema) - trace.detailsFenced(`📦 response schema`, responseSchema, "json") + const { + model, + temperature, + reasoningEffort, + fallbackTools, + topP, + maxTokens, + seed, + cache, + logprobs, + topLogprobs, + responseType, + responseSchema, + fenceFormat, + } = options; + const choices = arrayify(options.choices); + const { base, type, version, source, provider } = connectionToken; + trace.startDetails(`⚙️ configuration`); + try { + trace.itemValue(`model`, model); + trace.itemValue(`version`, version); + trace.itemValue(`source`, source); + trace.itemValue(`provider`, provider); + trace.itemValue(`temperature`, temperature); + trace.itemValue(`reasoningEffort`, reasoningEffort); + trace.itemValue(`fallbackTools`, fallbackTools); + trace.itemValue(`topP`, topP); + trace.itemValue(`maxTokens`, maxTokens); + trace.itemValue(`base`, base); + trace.itemValue(`type`, type); + trace.itemValue(`seed`, seed); + if (choices.length) + trace.itemValue( + `choices`, + choices + .map((c) => + typeof c === "string" ? c : `${c.token} - ${roundWithPrecision(c.weight, 2)}`, + ) + .join(","), + ); + trace.itemValue(`logprobs`, logprobs); + if (topLogprobs) trace.itemValue(`topLogprobs`, topLogprobs); + trace.itemValue(`cache`, cache); + trace.itemValue(`fence format`, fenceFormat); + trace.itemValue(`response type`, responseType); + if (responseSchema) trace.detailsFenced(`📦 response schema`, responseSchema, "json"); - trace.startDetails(`🔗 model aliases`) - Object.entries(runtimeHost.modelAliases).forEach(([key, value]) => - trace.itemValue( - key, - toStringList( - `\`${value.model}\``, - isNaN(value.temperature) - ? undefined - : `temperature: \`${value.temperature}\``, - `source: \`${value.source}\`` - ) - ) - ) - trace.endDetails() - } finally { - trace.endDetails() - } + trace.startDetails(`🔗 model aliases`); + Object.entries(runtimeHost.modelAliases).forEach(([key, value]) => + trace.itemValue( + key, + toStringList( + `\`${value.model}\``, + isNaN(value.temperature) ? undefined : `temperature: \`${value.temperature}\``, + `source: \`${value.source}\``, + ), + ), + ); + trace.endDetails(); + } finally { + trace.endDetails(); + } } /** @@ -182,8 +176,8 @@ export function traceLanguageModelConnection( * @returns True if the given model identifier is an alias, otherwise false. */ export function isModelAlias(model: string): boolean { - const res = !!runtimeHost.modelAliases[model] - return res + const res = !!runtimeHost.modelAliases[model]; + return res; } /** @@ -200,24 +194,22 @@ export function isModelAlias(model: string): boolean { * - The fully resolved ModelConfiguration object, containing the final model identifier and its source. */ export function resolveModelAlias(model: string): ModelConfiguration { - if (!model) throw new Error("Model not specified") - const { modelAliases } = runtimeHost - const seen: string[] = [] - let res: ModelConfiguration = { - model, - source: "script", - } - while (modelAliases[res.model]) { - let next = modelAliases[res.model] - dbg(`alias ${res.model} -> ${next.model}`) - if (seen.includes(next.model)) - throw new Error( - `Circular model alias: ${next.model}, seen ${[...seen].join(",")}` - ) - seen.push(next.model) - res = next - } - return res + if (!model) throw new Error("Model not specified"); + const { modelAliases } = runtimeHost; + const seen: string[] = []; + let res: ModelConfiguration = { + model, + source: "script", + }; + while (modelAliases[res.model]) { + let next = modelAliases[res.model]; + dbg(`alias ${res.model} -> ${next.model}`); + if (seen.includes(next.model)) + throw new Error(`Circular model alias: ${next.model}, seen ${[...seen].join(",")}`); + seen.push(next.model); + res = next; + } + return res; } /** @@ -240,112 +232,100 @@ export function resolveModelAlias(model: string): ModelConfiguration { * Includes fallback handling for missing or invalid model configurations. */ export async function resolveModelConnectionInfo( - conn: ModelConnectionOptions, - options?: { - model?: string - defaultModel?: string - token?: boolean - } & TraceOptions & - CancellationOptions + conn: ModelConnectionOptions, + options?: { + model?: string; + defaultModel?: string; + token?: boolean; + } & TraceOptions & + CancellationOptions, ): Promise<{ - info: ModelConnectionInfo - configuration?: LanguageModelConfiguration + info: ModelConnectionInfo; + configuration?: LanguageModelConfiguration; }> { - const { - trace, - token: askToken, - defaultModel, - cancellationToken, - } = options || {} - const hint = options?.model || conn.model - dbg(`resolving model for '${hint || ""}'`) - // supports candidate if no model hint or hint is a model alias - const resolved = resolveModelAlias(hint || defaultModel) - if (!resolved) - return { - info: { error: "missing error information", model: undefined }, - } + const { trace, token: askToken, defaultModel, cancellationToken } = options || {}; + const hint = options?.model || conn.model; + dbg(`resolving model for '${hint || ""}'`); + // supports candidate if no model hint or hint is a model alias + const resolved = resolveModelAlias(hint || defaultModel); + if (!resolved) + return { + info: { error: "missing error information", model: undefined }, + }; - const supportsCandidates = !hint || isModelAlias(hint) - const modelId = resolved.model - let candidates = supportsCandidates ? resolved.candidates : undefined + const supportsCandidates = !hint || isModelAlias(hint); + const modelId = resolved.model; + let candidates = supportsCandidates ? resolved.candidates : undefined; - const resolveModel = async ( - model: string, - resolveOptions: { withToken: boolean; reportError: boolean } - ): Promise<{ - info: ModelConnectionInfo - configuration?: LanguageModelConfiguration - }> => { - try { - dbg(`resolving ${model}`) - const configuration = await host.getLanguageModelConfiguration( - model, - { - token: resolveOptions.withToken, - cancellationToken, - trace, - } - ) - if (!configuration) { - dbg(`configuration not found`) - return { info: { ...conn, model } } - } else { - const { token: theToken, ...rest } = configuration - return { - info: { - ...conn, - ...rest, - model, - token: theToken - ? resolveOptions.withToken - ? theToken - : "***" - : "", - }, - configuration, - } - } - } catch (e) { - dbg(`error resolving ${model}: ${e}`) - if (resolveOptions.reportError) trace?.error(undefined, e) - return { - info: { - ...conn, - model, - error: errorMessage(e), - }, - } - } + const resolveModel = async ( + model: string, + resolveOptions: { withToken: boolean; reportError: boolean }, + ): Promise<{ + info: ModelConnectionInfo; + configuration?: LanguageModelConfiguration; + }> => { + try { + dbg(`resolving ${model}`); + const configuration = await host.getLanguageModelConfiguration(model, { + token: resolveOptions.withToken, + cancellationToken, + trace, + }); + if (!configuration) { + dbg(`configuration not found`); + return { info: { ...conn, model } }; + } else { + const { token: theToken, ...rest } = configuration; + return { + info: { + ...conn, + ...rest, + model, + token: theToken ? (resolveOptions.withToken ? theToken : "***") : "", + }, + configuration, + }; + } + } catch (e) { + dbg(`error resolving ${model}: ${e}`); + if (resolveOptions.reportError) trace?.error(undefined, e); + return { + info: { + ...conn, + model, + error: errorMessage(e), + }, + }; } + }; - if (!supportsCandidates) { - dbg(`candidate ${modelId}`) - return await resolveModel(modelId, { - withToken: askToken, - reportError: true, - }) - } else { - candidates = uniq([modelId, ...(candidates || [])].filter((c) => !!c)) - dbg(`candidates: ${candidates?.join(", ")}`) - for (const candidate of candidates) { - const res = await resolveModel(candidate, { - withToken: askToken, - reportError: false, - }) - if (!res.info.error && res.info.token) { - dbg(`resolved ${candidate}`) - return res - } - } - debug(`no candidates resolved`) - return { - info: { - model: "?", - error: hint - ? `LLM provider not configured or refresh token expired for '${hint}'` - : "LLM provider not configured or refresh token expired", - }, - } + if (!supportsCandidates) { + dbg(`candidate ${modelId}`); + return await resolveModel(modelId, { + withToken: askToken, + reportError: true, + }); + } else { + candidates = uniq([modelId, ...(candidates || [])].filter((c) => !!c)); + dbg(`candidates: ${candidates?.join(", ")}`); + for (const candidate of candidates) { + const res = await resolveModel(candidate, { + withToken: askToken, + reportError: false, + }); + if (!res.info.error && res.info.token) { + dbg(`resolved ${candidate}`); + return res; + } } + debug(`no candidates resolved`); + return { + info: { + model: "?", + error: hint + ? `LLM provider not configured or refresh token expired for '${hint}'` + : "LLM provider not configured or refresh token expired", + }, + }; + } } diff --git a/packages/core/src/mustache.test.ts b/packages/core/src/mustache.test.ts index cb89ca8f59..dbd1dd2c5a 100644 --- a/packages/core/src/mustache.test.ts +++ b/packages/core/src/mustache.test.ts @@ -1,10 +1,10 @@ -import { describe, test, beforeEach } from "node:test" -import assert from "node:assert/strict" -import { interpolateVariables } from "./mustache" +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; +import { interpolateVariables } from "./mustache"; describe("interpolateVariables", () => { - test("should interpolate variables correctly in markdown", async () => { - const md = `--- + test("should interpolate variables correctly in markdown", async () => { + const md = `--- name: Basic Prompt description: A basic prompt that uses the chat API to answer questions model: @@ -26,84 +26,84 @@ You are an AI assistant who helps people find information. As the assistant, you answer questions briefly, succinctly. user: -{{question}}` - const expectedOutput = `Hello, John Doe. You are 30 years old.` // Assume this is the correct interpolation - const output = await interpolateVariables(md, { - question: "THE QUESTION", - }) - assert.strictEqual( - output, - ` +{{question}}`; + const expectedOutput = `Hello, John Doe. You are 30 years old.`; // Assume this is the correct interpolation + const output = await interpolateVariables(md, { + question: "THE QUESTION", + }); + assert.strictEqual( + output, + ` You are an AI assistant who helps people find information. As the assistant, you answer questions briefly, succinctly. -THE QUESTION` - ) - }) - test("should interpolate jinja variables when format is jinja", async () => { - const md = `--- +THE QUESTION`, + ); + }); + test("should interpolate jinja variables when format is jinja", async () => { + const md = `--- name: Jinja Template Test --- -Hello {{ name }}! Your age is {{ age }}.` +Hello {{ name }}! Your age is {{ age }}.`; - const output = await interpolateVariables( - md, - { - name: "Alice", - age: 25, - }, - { format: "jinja" } - ) + const output = await interpolateVariables( + md, + { + name: "Alice", + age: 25, + }, + { format: "jinja" }, + ); - assert.strictEqual(output, "Hello Alice! Your age is 25.") - }) + assert.strictEqual(output, "Hello Alice! Your age is 25."); + }); - test("should handle jinja conditionals", async () => { - const md = `--- + test("should handle jinja conditionals", async () => { + const md = `--- name: Jinja Conditional Test --- -{% if age >= 18 %}You are an adult.{% else %}You are a minor.{% endif %}` +{% if age >= 18 %}You are an adult.{% else %}You are a minor.{% endif %}`; - const adultOutput = await interpolateVariables( - md, - { - age: 25, - }, - { format: "jinja" } - ) + const adultOutput = await interpolateVariables( + md, + { + age: 25, + }, + { format: "jinja" }, + ); - assert.strictEqual(adultOutput, "You are an adult.") + assert.strictEqual(adultOutput, "You are an adult."); - const minorOutput = await interpolateVariables( - md, - { - age: 15, - }, - { format: "jinja" } - ) + const minorOutput = await interpolateVariables( + md, + { + age: 15, + }, + { format: "jinja" }, + ); - assert.strictEqual(minorOutput, "You are a minor.") - }) + assert.strictEqual(minorOutput, "You are a minor."); + }); - test("should handle jinja loops", async () => { - const md = `--- + test("should handle jinja loops", async () => { + const md = `--- name: Jinja Loop Test --- Items: {% for item in items %} - {{ item }} -{% endfor %}` +{% endfor %}`; - const output = await interpolateVariables( - md, - { - items: ["apple", "banana", "cherry"], - }, - { format: "jinja" } - ) + const output = await interpolateVariables( + md, + { + items: ["apple", "banana", "cherry"], + }, + { format: "jinja" }, + ); - assert.strictEqual(output, "Items:\n- apple\n- banana\n- cherry\n") - }) -}) + assert.strictEqual(output, "Items:\n- apple\n- banana\n- cherry\n"); + }); +}); diff --git a/packages/core/src/mustache.ts b/packages/core/src/mustache.ts index 050a4ab497..0f43e7a069 100644 --- a/packages/core/src/mustache.ts +++ b/packages/core/src/mustache.ts @@ -1,6 +1,6 @@ -import { splitMarkdown } from "./frontmatter" -import Mustache from "mustache" -import { jinjaRender } from "./jinja" +import { splitMarkdown } from "./frontmatter"; +import Mustache from "mustache"; +import { jinjaRender } from "./jinja"; /** * Processes a markdown string by applying Mustache or Jinja templating. @@ -11,27 +11,27 @@ import { jinjaRender } from "./jinja" * @returns The processed markdown string with interpolated variables. */ export async function interpolateVariables( - md: string, - data: Record, - options?: ImportTemplateOptions + md: string, + data: Record, + options?: ImportTemplateOptions, ): Promise { - if (!md || !data) return md - const { format } = options || {} - // remove frontmatter - let { content } = splitMarkdown(md) + if (!md || !data) return md; + const { format } = options || {}; + // remove frontmatter + let { content } = splitMarkdown(md); - // remove prompty roles - // https://github.com/microsoft/prompty/blob/main/runtime/prompty/prompty/parsers.py#L113C21-L113C77 - content = content.replace(/^\s*(system|user|assistant)\s*:\s*$/gim, "\n") + // remove prompty roles + // https://github.com/microsoft/prompty/blob/main/runtime/prompty/prompty/parsers.py#L113C21-L113C77 + content = content.replace(/^\s*(system|user|assistant)\s*:\s*$/gim, "\n"); - if (content) { - // remove xml tags - // https://humanloop.com/docs/prompt-file-format - if (format === "jinja") content = jinjaRender(content, data ?? {}) - else content = Mustache.render(content, data ?? {}) - } + if (content) { + // remove xml tags + // https://humanloop.com/docs/prompt-file-format + if (format === "jinja") content = jinjaRender(content, data ?? {}); + else content = Mustache.render(content, data ?? {}); + } - return content + return content; } -export const mustacheRender = Mustache.render +export const mustacheRender = Mustache.render; diff --git a/packages/core/src/net.ts b/packages/core/src/net.ts index d9f1240112..a5f7c60fec 100644 --- a/packages/core/src/net.ts +++ b/packages/core/src/net.ts @@ -4,15 +4,15 @@ * @returns A promise that resolves to an available port number. */ export function findRandomOpenPort(): Promise { - return new Promise((resolve, reject) => { - const server = require("net").createServer() - server.unref() - server.on("error", reject) - server.listen(0, () => { - const port = server.address().port - server.close(() => resolve(port)) - }) - }) + return new Promise((resolve, reject) => { + const server = require("net").createServer(); + server.unref(); + server.on("error", reject); + server.listen(0, () => { + const port = server.address().port; + server.close(() => resolve(port)); + }); + }); } /** @@ -22,18 +22,18 @@ export function findRandomOpenPort(): Promise { * @returns A promise that resolves to true if the port is in use, or false otherwise. */ export function isPortInUse(port: number): Promise { - return new Promise((resolve, reject) => { - const server = require("net").createServer() - server.once("error", (err: any) => { - if (err.code === "EADDRINUSE") { - resolve(true) - } else { - reject(err) - } - }) - server.once("listening", () => { - server.close(() => resolve(false)) - }) - server.listen(port) - }) + return new Promise((resolve, reject) => { + const server = require("net").createServer(); + server.once("error", (err: any) => { + if (err.code === "EADDRINUSE") { + resolve(true); + } else { + reject(err); + } + }); + server.once("listening", () => { + server.close(() => resolve(false)); + }); + server.listen(port); + }); } diff --git a/packages/core/src/nodepackage.ts b/packages/core/src/nodepackage.ts index b4b6aaa529..45e1e77776 100644 --- a/packages/core/src/nodepackage.ts +++ b/packages/core/src/nodepackage.ts @@ -1,31 +1,31 @@ -import debug from "debug" -const dbg = debug("genaiscript:node:package") -import { tryReadJSON } from "./fs" +import debug from "debug"; +const dbg = debug("genaiscript:node:package"); +import { tryReadJSON } from "./fs"; export interface NodePackage { - type?: string - name?: string - version?: string - description?: string - main?: string - scripts?: Record - dependencies?: Record - devDependencies?: Record - peerDependencies?: Record - optionalDependencies?: Record - bundledDependencies?: string[] - engines?: Record - os?: string[] - cpu?: string[] - private?: boolean - publishConfig?: Record - repository?: Record - author?: string - license?: string - bugs?: Record - homepage?: string - keywords?: string[] - displayName?: string + type?: string; + name?: string; + version?: string; + description?: string; + main?: string; + scripts?: Record; + dependencies?: Record; + devDependencies?: Record; + peerDependencies?: Record; + optionalDependencies?: Record; + bundledDependencies?: string[]; + engines?: Record; + os?: string[]; + cpu?: string[]; + private?: boolean; + publishConfig?: Record; + repository?: Record; + author?: string; + license?: string; + bugs?: Record; + homepage?: string; + keywords?: string[]; + displayName?: string; } /** @@ -35,7 +35,7 @@ export interface NodePackage { * If the file cannot be read or parsed, the promise may reject with an error. */ export async function nodeTryReadPackage(): Promise { - return await tryReadJSON("package.json") + return await tryReadJSON("package.json"); } /** @@ -44,8 +44,8 @@ export async function nodeTryReadPackage(): Promise { * @returns A promise that resolves to a boolean indicating if the package type is "module". */ export async function nodeIsPackageTypeModule() { - const pkg = await nodeTryReadPackage() - dbg(`type: ${pkg?.type || ""}`) - const isModule = pkg?.type === "module" - return isModule + const pkg = await nodeTryReadPackage(); + dbg(`type: ${pkg?.type || ""}`); + const isModule = pkg?.type === "module"; + return isModule; } diff --git a/packages/core/src/nonemodel.ts b/packages/core/src/nonemodel.ts index e04921243f..860ba4efe5 100644 --- a/packages/core/src/nonemodel.ts +++ b/packages/core/src/nonemodel.ts @@ -1,13 +1,13 @@ -import { LanguageModel } from "./chat" -import { MODEL_PROVIDER_NONE } from "./constants" -import { serializeError } from "./error" +import { LanguageModel } from "./chat"; +import { MODEL_PROVIDER_NONE } from "./constants"; +import { serializeError } from "./error"; export const NoneModel = Object.freeze({ - id: MODEL_PROVIDER_NONE, - completer: async (req, connection, options) => { - return { - finishReason: "fail", - error: serializeError("No LLM execution allowed in this context."), - } - }, -}) + id: MODEL_PROVIDER_NONE, + completer: async (req, connection, options) => { + return { + finishReason: "fail", + error: serializeError("No LLM execution allowed in this context."), + }; + }, +}); diff --git a/packages/core/src/ollama.test.ts b/packages/core/src/ollama.test.ts index 8743628dac..d1d3c6780a 100644 --- a/packages/core/src/ollama.test.ts +++ b/packages/core/src/ollama.test.ts @@ -1,54 +1,54 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { ollamaParseHostVariable } from "./env" -import { OLLAMA_API_BASE, OLLAMA_DEFAULT_PORT } from "./constants" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { ollamaParseHostVariable } from "./env"; +import { OLLAMA_API_BASE, OLLAMA_DEFAULT_PORT } from "./constants"; describe("parseHostVariable", () => { - test("parses OLLAMA_HOST environment variable correctly", () => { - const env = { OLLAMA_HOST: "http://localhost:3000" } - const result = ollamaParseHostVariable(env) - assert.strictEqual(result, "http://localhost:3000/") - }) - - test("parses OLLAMA_API_BASE environment variable correctly", () => { - const env = { OLLAMA_API_BASE: "http://api.ollama.com" } - const result = ollamaParseHostVariable(env) - assert.strictEqual(result, "http://api.ollama.com/") - }) - - test("falls back to OLLAMA_API_BASE constant if no environment variable is set", () => { - const env = {} - const result = ollamaParseHostVariable(env) - assert.strictEqual(result, OLLAMA_API_BASE) - }) - - test("parses IP address with port correctly", () => { - const env = { OLLAMA_HOST: "192.168.1.1:8080" } - const result = ollamaParseHostVariable(env) - assert.strictEqual(result, "http://192.168.1.1:8080") - }) - - test("parses IP address without port correctly", () => { - const env = { OLLAMA_HOST: "192.168.1.1" } - const result = ollamaParseHostVariable(env) - assert.strictEqual(result, `http://192.168.1.1:${OLLAMA_DEFAULT_PORT}`) - }) - - test("parses 0.0.0.0 with port correctly", () => { - const env = { OLLAMA_HOST: "0.0.0.0:4000" } - const result = ollamaParseHostVariable(env) - assert.strictEqual(result, "http://0.0.0.0:4000") - }) - - test("parses localhost with port correctly", () => { - const env = { OLLAMA_HOST: "localhost:4000" } - const result = ollamaParseHostVariable(env) - assert.strictEqual(result, "http://localhost:4000") - }) - - test("parses 0.0.0.0 without port correctly", () => { - const env = { OLLAMA_HOST: "0.0.0.0" } - const result = ollamaParseHostVariable(env) - assert.strictEqual(result, `http://0.0.0.0:${OLLAMA_DEFAULT_PORT}`) - }) -}) + test("parses OLLAMA_HOST environment variable correctly", () => { + const env = { OLLAMA_HOST: "http://localhost:3000" }; + const result = ollamaParseHostVariable(env); + assert.strictEqual(result, "http://localhost:3000/"); + }); + + test("parses OLLAMA_API_BASE environment variable correctly", () => { + const env = { OLLAMA_API_BASE: "http://api.ollama.com" }; + const result = ollamaParseHostVariable(env); + assert.strictEqual(result, "http://api.ollama.com/"); + }); + + test("falls back to OLLAMA_API_BASE constant if no environment variable is set", () => { + const env = {}; + const result = ollamaParseHostVariable(env); + assert.strictEqual(result, OLLAMA_API_BASE); + }); + + test("parses IP address with port correctly", () => { + const env = { OLLAMA_HOST: "192.168.1.1:8080" }; + const result = ollamaParseHostVariable(env); + assert.strictEqual(result, "http://192.168.1.1:8080"); + }); + + test("parses IP address without port correctly", () => { + const env = { OLLAMA_HOST: "192.168.1.1" }; + const result = ollamaParseHostVariable(env); + assert.strictEqual(result, `http://192.168.1.1:${OLLAMA_DEFAULT_PORT}`); + }); + + test("parses 0.0.0.0 with port correctly", () => { + const env = { OLLAMA_HOST: "0.0.0.0:4000" }; + const result = ollamaParseHostVariable(env); + assert.strictEqual(result, "http://0.0.0.0:4000"); + }); + + test("parses localhost with port correctly", () => { + const env = { OLLAMA_HOST: "localhost:4000" }; + const result = ollamaParseHostVariable(env); + assert.strictEqual(result, "http://localhost:4000"); + }); + + test("parses 0.0.0.0 without port correctly", () => { + const env = { OLLAMA_HOST: "0.0.0.0" }; + const result = ollamaParseHostVariable(env); + assert.strictEqual(result, `http://0.0.0.0:${OLLAMA_DEFAULT_PORT}`); + }); +}); diff --git a/packages/core/src/ollama.ts b/packages/core/src/ollama.ts index 3e6c97ab5c..ca960e96dc 100644 --- a/packages/core/src/ollama.ts +++ b/packages/core/src/ollama.ts @@ -1,12 +1,12 @@ // Import necessary modules and types for handling chat completions and model management -import { LanguageModel, ListModelsFunction, PullModelFunction } from "./chat" -import { MODEL_PROVIDER_OLLAMA, TOOL_ID } from "./constants" -import { serializeError } from "./error" -import { createFetch, iterateBody } from "./fetch" -import { OpenAIChatCompletion, OpenAIEmbedder } from "./openai" -import { logError, logVerbose } from "./util" -import { JSONLTryParse } from "./jsonl" -import { stderr } from "./stdio" +import { LanguageModel, ListModelsFunction, PullModelFunction } from "./chat"; +import { MODEL_PROVIDER_OLLAMA, TOOL_ID } from "./constants"; +import { serializeError } from "./error"; +import { createFetch, iterateBody } from "./fetch"; +import { OpenAIChatCompletion, OpenAIEmbedder } from "./openai"; +import { logError, logVerbose } from "./util"; +import { JSONLTryParse } from "./jsonl"; +import { stderr } from "./stdio"; /** * Lists available models for the Ollama language model configuration. @@ -16,98 +16,98 @@ import { stderr } from "./stdio" * @returns A promise that resolves to an array of LanguageModelInfo objects. */ const listModels: ListModelsFunction = async (cfg, options) => { - try { - // Create a fetch instance to make HTTP requests - const fetch = await createFetch({ retries: 0, ...options }) - // Fetch the list of models from the remote API - const res = await fetch(cfg.base.replace("/v1", "/api/tags"), { - method: "GET", - }) - if (res.status !== 200) - return { - ok: false, - status: res.status, - error: serializeError(res.statusText), - } - // Parse and format the response into LanguageModelInfo objects - const { models } = (await res.json()) as { - models: { - name: string - size: number - details: { - parameter_size: string - family: string - } - }[] - } - return { - ok: true, - models: models.map( - (m) => - ({ - id: m.name, - details: `${m.name}, ${m.details.parameter_size}`, - url: `https://ollama.com/library/${m.name}`, - }) satisfies LanguageModelInfo - ), - } - } catch (e) { - return { ok: false, error: serializeError(e) } - } -} + try { + // Create a fetch instance to make HTTP requests + const fetch = await createFetch({ retries: 0, ...options }); + // Fetch the list of models from the remote API + const res = await fetch(cfg.base.replace("/v1", "/api/tags"), { + method: "GET", + }); + if (res.status !== 200) + return { + ok: false, + status: res.status, + error: serializeError(res.statusText), + }; + // Parse and format the response into LanguageModelInfo objects + const { models } = (await res.json()) as { + models: { + name: string; + size: number; + details: { + parameter_size: string; + family: string; + }; + }[]; + }; + return { + ok: true, + models: models.map( + (m) => + ({ + id: m.name, + details: `${m.name}, ${m.details.parameter_size}`, + url: `https://ollama.com/library/${m.name}`, + }) satisfies LanguageModelInfo, + ), + }; + } catch (e) { + return { ok: false, error: serializeError(e) }; + } +}; const pullModel: PullModelFunction = async (cfg, options) => { - const { trace, cancellationToken } = options || {} - const { provider, model } = cfg - const fetch = await createFetch({ retries: 0, ...options }) - const base = cfg.base.replace(/\/v1$/i, "") - try { - // pull - logVerbose(`${provider}: pull ${model}`) - const resPull = await fetch(`${base}/api/pull`, { - method: "POST", - headers: { - "Content-Type": "application/json", - "User-Agent": TOOL_ID, - }, - body: JSON.stringify({ model }), - }) - if (!resPull.ok) { - logError(`${provider}: failed to pull model ${model}`) - logVerbose(resPull.statusText) - return { ok: false, status: resPull.status } - } - let lastStatus = "" - for await (const chunk of iterateBody(resPull, { cancellationToken })) { - const cs = JSONLTryParse(chunk) as { - status?: string - error?: string - }[] - for (const c of cs) { - if (c?.error) { - return { - ok: false, - error: serializeError(c.error), - } - } - } - stderr.write(".") + const { trace, cancellationToken } = options || {}; + const { provider, model } = cfg; + const fetch = await createFetch({ retries: 0, ...options }); + const base = cfg.base.replace(/\/v1$/i, ""); + try { + // pull + logVerbose(`${provider}: pull ${model}`); + const resPull = await fetch(`${base}/api/pull`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "User-Agent": TOOL_ID, + }, + body: JSON.stringify({ model }), + }); + if (!resPull.ok) { + logError(`${provider}: failed to pull model ${model}`); + logVerbose(resPull.statusText); + return { ok: false, status: resPull.status }; + } + let lastStatus = ""; + for await (const chunk of iterateBody(resPull, { cancellationToken })) { + const cs = JSONLTryParse(chunk) as { + status?: string; + error?: string; + }[]; + for (const c of cs) { + if (c?.error) { + return { + ok: false, + error: serializeError(c.error), + }; } - stderr.write("\n") - logVerbose(`${provider}: pulled ${model}`) - return { ok: true } - } catch (e) { - logError(e) - trace.error(e) - return { ok: false, error: serializeError(e) } + } + stderr.write("."); } -} + stderr.write("\n"); + logVerbose(`${provider}: pulled ${model}`); + return { ok: true }; + } catch (e) { + logError(e); + trace.error(e); + return { ok: false, error: serializeError(e) }; + } +}; // Define the Ollama model with its completion handler and model listing function export const OllamaModel = Object.freeze({ - id: MODEL_PROVIDER_OLLAMA, - completer: OpenAIChatCompletion, - listModels, - pullModel, - embedder: OpenAIEmbedder, -}) + id: MODEL_PROVIDER_OLLAMA, + completer: OpenAIChatCompletion, + listModels, + pullModel, + embedder: OpenAIEmbedder, +}); diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts index 6d9b9b8299..cfe42ccfcf 100644 --- a/packages/core/src/openai.ts +++ b/packages/core/src/openai.ts @@ -1,77 +1,67 @@ -import { ellipse, logError, logInfo, logVerbose } from "./util" -import { host } from "./host" +import { ellipse, logError, logInfo, logVerbose } from "./util"; +import { host } from "./host"; import { - AZURE_AI_INFERENCE_VERSION, - AZURE_OPENAI_API_VERSION, - MODEL_PROVIDER_AZURE_OPENAI, - MODEL_PROVIDER_AZURE_SERVERLESS_MODELS, - MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, - MODEL_PROVIDER_OPENAI_HOSTS, - OPENROUTER_API_CHAT_URL, - OPENROUTER_SITE_NAME_HEADER, - OPENROUTER_SITE_URL_HEADER, - THINK_END_TOKEN_REGEX, - THINK_START_TOKEN_REGEX, - TOOL_ID, - TOOL_NAME, - TOOL_URL, -} from "./constants" -import { approximateTokens } from "./tokens" + AZURE_AI_INFERENCE_VERSION, + AZURE_OPENAI_API_VERSION, + MODEL_PROVIDER_AZURE_OPENAI, + MODEL_PROVIDER_AZURE_SERVERLESS_MODELS, + MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, + MODEL_PROVIDER_OPENAI_HOSTS, + OPENROUTER_API_CHAT_URL, + OPENROUTER_SITE_NAME_HEADER, + OPENROUTER_SITE_URL_HEADER, + THINK_END_TOKEN_REGEX, + THINK_START_TOKEN_REGEX, + TOOL_ID, + TOOL_NAME, + TOOL_URL, +} from "./constants"; +import { approximateTokens } from "./tokens"; import { - ChatCompletionHandler, - CreateImageRequest, - CreateImageResult, - CreateSpeechRequest, - CreateSpeechResult, - CreateTranscriptionRequest, - LanguageModel, - ListModelsFunction, -} from "./chat" + ChatCompletionHandler, + CreateImageRequest, + CreateImageResult, + CreateSpeechRequest, + CreateSpeechResult, + CreateTranscriptionRequest, + LanguageModel, + ListModelsFunction, +} from "./chat"; +import { RequestError, errorMessage, isCancelError, serializeError } from "./error"; +import { createFetch } from "./fetch"; +import { parseModelIdentifier } from "./models"; +import { JSON5TryParse } from "./json5"; import { - RequestError, - errorMessage, - isCancelError, - serializeError, -} from "./error" -import { createFetch } from "./fetch" -import { parseModelIdentifier } from "./models" -import { JSON5TryParse } from "./json5" -import { - ChatCompletionToolCall, - ChatCompletionResponse, - ChatCompletionChunk, - ChatCompletionUsage, - ChatCompletion, - ChatCompletionChunkChoice, - ChatCompletionChoice, - CreateChatCompletionRequest, - ChatCompletionTokenLogprob, - EmbeddingCreateResponse, - EmbeddingCreateParams, - EmbeddingResult, - ImageGenerationResponse, -} from "./chattypes" -import { resolveTokenEncoder } from "./encoders" -import { CancellationOptions, checkCancelled } from "./cancellation" -import { INITryParse } from "./ini" -import { serializeChunkChoiceToLogProbs } from "./logprob" -import { TraceOptions } from "./trace" -import { LanguageModelConfiguration } from "./server/messages" -import prettyBytes from "pretty-bytes" -import { - deleteUndefinedValues, - isEmptyString, - normalizeInt, - trimTrailingSlash, -} from "./cleaners" -import { fromBase64 } from "./base64" -import debug from "debug" -import { traceFetchPost } from "./fetchtext" -import { providerFeatures } from "./features" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("openai") -const dbgMessages = dbg.extend("msg") -dbgMessages.enabled = false + ChatCompletionToolCall, + ChatCompletionResponse, + ChatCompletionChunk, + ChatCompletionUsage, + ChatCompletion, + ChatCompletionChunkChoice, + ChatCompletionChoice, + CreateChatCompletionRequest, + ChatCompletionTokenLogprob, + EmbeddingCreateResponse, + EmbeddingCreateParams, + EmbeddingResult, + ImageGenerationResponse, +} from "./chattypes"; +import { resolveTokenEncoder } from "./encoders"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { INITryParse } from "./ini"; +import { serializeChunkChoiceToLogProbs } from "./logprob"; +import { TraceOptions } from "./trace"; +import { LanguageModelConfiguration } from "./server/messages"; +import prettyBytes from "pretty-bytes"; +import { deleteUndefinedValues, isEmptyString, normalizeInt, trimTrailingSlash } from "./cleaners"; +import { fromBase64 } from "./base64"; +import debug from "debug"; +import { traceFetchPost } from "./fetchtext"; +import { providerFeatures } from "./features"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("openai"); +const dbgMessages = dbg.extend("msg"); +dbgMessages.enabled = false; /** * Generates configuration headers for API requests based on the provided configuration object. @@ -87,540 +77,484 @@ dbgMessages.enabled = false * - User-Agent: A constant user agent identifier for the tool. */ export function getConfigHeaders(cfg: LanguageModelConfiguration) { - let { token, type, base, provider } = cfg - if (type === "azure_serverless_models") { - const keys = INITryParse(token) - if (keys && Object.keys(keys).length > 1) token = keys[cfg.model] - } - const features = providerFeatures(provider) - const useBearer = features?.bearerToken !== false - const isBearer = /^Bearer /i.test(cfg.token) - const Authorization = isBearer - ? token - : token && (useBearer || base === OPENROUTER_API_CHAT_URL) - ? `Bearer ${token}` - : undefined - const apiKey = Authorization ? undefined : token - const res: Record = deleteUndefinedValues({ - Authorization, - "api-key": apiKey, - "User-Agent": TOOL_ID, - }) - return res + let { token, type, base, provider } = cfg; + if (type === "azure_serverless_models") { + const keys = INITryParse(token); + if (keys && Object.keys(keys).length > 1) token = keys[cfg.model]; + } + const features = providerFeatures(provider); + const useBearer = features?.bearerToken !== false; + const isBearer = /^Bearer /i.test(cfg.token); + const Authorization = isBearer + ? token + : token && (useBearer || base === OPENROUTER_API_CHAT_URL) + ? `Bearer ${token}` + : undefined; + const apiKey = Authorization ? undefined : token; + const res: Record = deleteUndefinedValues({ + Authorization, + "api-key": apiKey, + "User-Agent": TOOL_ID, + }); + return res; } -export const OpenAIChatCompletion: ChatCompletionHandler = async ( - req, - cfg, - options, - trace -) => { - const { - requestOptions, - partialCb, - retry, - retryDelay, - maxDelay, - cancellationToken, - inner, - } = options - const { headers = {}, ...rest } = requestOptions || {} - const { provider, model, family, reasoningEffort } = parseModelIdentifier( - req.model - ) - const features = providerFeatures(provider) - const { encode: encoder } = await resolveTokenEncoder(family) - - const postReq = structuredClone({ - ...req, - stream: true, - stream_options: { include_usage: true }, - model, - messages: req.messages.map(({ cacheControl, ...rest }) => ({ - ...rest, - })), - } satisfies CreateChatCompletionRequest) - - // stream_options fails in some cases - if (family === "gpt-4-turbo-v" || /mistral/i.test(family)) { - dbg(`removing stream_options`) - delete postReq.stream_options +export const OpenAIChatCompletion: ChatCompletionHandler = async (req, cfg, options, trace) => { + const { requestOptions, partialCb, retry, retryDelay, maxDelay, cancellationToken, inner } = + options; + const { headers = {}, ...rest } = requestOptions || {}; + const { provider, model, family, reasoningEffort } = parseModelIdentifier(req.model); + const features = providerFeatures(provider); + const { encode: encoder } = await resolveTokenEncoder(family); + + const postReq = structuredClone({ + ...req, + stream: true, + stream_options: { include_usage: true }, + model, + messages: req.messages.map(({ cacheControl, ...rest }) => ({ + ...rest, + })), + } satisfies CreateChatCompletionRequest); + + // stream_options fails in some cases + if (family === "gpt-4-turbo-v" || /mistral/i.test(family)) { + dbg(`removing stream_options`); + delete postReq.stream_options; + } + + if (MODEL_PROVIDER_OPENAI_HOSTS.includes(provider)) { + if (/^o\d|gpt-4\.1/.test(family)) { + dbg(`changing max_tokens to max_completion_tokens`); + if (postReq.max_tokens) { + postReq.max_completion_tokens = postReq.max_tokens; + delete postReq.max_tokens; + } } - if (MODEL_PROVIDER_OPENAI_HOSTS.includes(provider)) { - if (/^o\d|gpt-4\.1/.test(family)) { - dbg(`changing max_tokens to max_completion_tokens`) - if (postReq.max_tokens) { - postReq.max_completion_tokens = postReq.max_tokens - delete postReq.max_tokens - } - } + if (/^o\d/.test(family)) { + dbg(`removing options to support o1/o3/o4`); + delete postReq.temperature; + delete postReq.top_p; + delete postReq.presence_penalty; + delete postReq.frequency_penalty; + delete postReq.logprobs; + delete postReq.top_logprobs; + delete postReq.logit_bias; + if (!postReq.reasoning_effort && reasoningEffort) { + postReq.model = family; + postReq.reasoning_effort = reasoningEffort; + } + } - if (/^o\d/.test(family)) { - dbg(`removing options to support o1/o3/o4`) - delete postReq.temperature - delete postReq.top_p - delete postReq.presence_penalty - delete postReq.frequency_penalty - delete postReq.logprobs - delete postReq.top_logprobs - delete postReq.logit_bias - if (!postReq.reasoning_effort && reasoningEffort) { - postReq.model = family - postReq.reasoning_effort = reasoningEffort - } + if (/^o1/.test(family)) { + dbg(`removing options to support o1`); + const preview = /^o1-(preview|mini)/i.test(family); + delete postReq.stream; + delete postReq.stream_options; + for (const msg of postReq.messages) { + if (msg.role === "system") { + (msg as any).role = preview ? "user" : "developer"; } - - if (/^o1/.test(family)) { - dbg(`removing options to support o1`) - const preview = /^o1-(preview|mini)/i.test(family) - delete postReq.stream - delete postReq.stream_options - for (const msg of postReq.messages) { - if (msg.role === "system") { - ;(msg as any).role = preview ? "user" : "developer" - } - } - } else if (/^o3/i.test(family)) { - for (const msg of postReq.messages) { - if (msg.role === "system") { - ;(msg as any).role = "developer" - } - } + } + } else if (/^o3/i.test(family)) { + for (const msg of postReq.messages) { + if (msg.role === "system") { + (msg as any).role = "developer"; } + } } + } - const singleModel = !!features?.singleModel - if (singleModel) delete postReq.model + const singleModel = !!features?.singleModel; + if (singleModel) delete postReq.model; - let url = "" - const toolCalls: ChatCompletionToolCall[] = [] + let url = ""; + const toolCalls: ChatCompletionToolCall[] = []; - if ( - cfg.type === "openai" || - cfg.type === "localai" || - cfg.type === "alibaba" - ) { - url = trimTrailingSlash(cfg.base) + "/chat/completions" - if (url === OPENROUTER_API_CHAT_URL) { - ;(headers as any)[OPENROUTER_SITE_URL_HEADER] = - process.env.OPENROUTER_SITE_URL || TOOL_URL - ;(headers as any)[OPENROUTER_SITE_NAME_HEADER] = - process.env.OPENROUTER_SITE_NAME || TOOL_NAME - } - } else if (cfg.type === "azure") { - delete postReq.model - const version = cfg.version || AZURE_OPENAI_API_VERSION - trace?.itemValue(`version`, version) - url = - trimTrailingSlash(cfg.base) + - "/" + - family + - `/chat/completions?api-version=${version}` - } else if (cfg.type === "azure_ai_inference") { - const version = cfg.version - trace?.itemValue(`version`, version) - url = trimTrailingSlash(cfg.base) + `/chat/completions` - if (version) url += `?api-version=${version}` - ;(headers as any)["extra-parameters"] = "pass-through" - } else if (cfg.type === "azure_serverless_models") { - const version = cfg.version || AZURE_AI_INFERENCE_VERSION - trace?.itemValue(`version`, version) - url = - trimTrailingSlash(cfg.base).replace( - /^https?:\/\/(?[^\.]+)\.(?[^\.]+)\.models\.ai\.azure\.com/i, - (m, deployment, region) => - `https://${postReq.model}.${region}.models.ai.azure.com` - ) + `/chat/completions?api-version=${version}` - ;(headers as any)["extra-parameters"] = "pass-through" - delete postReq.model - delete postReq.stream_options - } else if (cfg.type === "azure_serverless") { - const version = cfg.version || AZURE_AI_INFERENCE_VERSION - trace?.itemValue(`version`, version) - url = - trimTrailingSlash(cfg.base) + - "/" + - family + - `/chat/completions?api-version=${version}` - // https://learn.microsoft.com/en-us/azure/machine-learning/reference-model-inference-api?view=azureml-api-2&tabs=javascript#extensibility - ;(headers as any)["extra-parameters"] = "pass-through" - delete postReq.model - } else if (cfg.type === "github") { - url = cfg.base - const { prefix } = - /^(?[^-]+)-([^\/]+)$/.exec(postReq.model)?.groups || {} - const patch = { - gpt: "openai", - o: "openai", - "text-embedding": "openai", - phi: "microsoft", - meta: "meta", - llama: "meta", - mistral: "mistral-ai", - deepseek: "deepseek", - }[prefix?.toLowerCase() || ""] - if (patch) { - postReq.model = `${patch}/${postReq.model}` - dbg(`updated model to ${postReq.model}`) - } - } else if (cfg.type === "huggingface") { - // https://github.com/huggingface/text-generation-inference/issues/2946 - delete postReq.model - url = - trimTrailingSlash(cfg.base).replace(/\/v1$/, "") + - "/models/" + - family + - `/v1/chat/completions` - } else throw new Error(`api type ${cfg.type} not supported`) - - trace?.itemValue(`url`, `[${url}](${url})`) - dbg(`url: ${url}`) - - let numTokens = 0 - let numReasoningTokens = 0 - const fetchRetry = await createFetch({ - trace, - retries: retry, - retryDelay, - maxDelay, - cancellationToken, - }) - trace?.dispatchChange() - - const fetchHeaders: HeadersInit = { - "Content-Type": "application/json", - ...getConfigHeaders(cfg), - ...(headers || {}), + if (cfg.type === "openai" || cfg.type === "localai" || cfg.type === "alibaba") { + url = trimTrailingSlash(cfg.base) + "/chat/completions"; + if (url === OPENROUTER_API_CHAT_URL) { + (headers as any)[OPENROUTER_SITE_URL_HEADER] = process.env.OPENROUTER_SITE_URL || TOOL_URL; + (headers as any)[OPENROUTER_SITE_NAME_HEADER] = process.env.OPENROUTER_SITE_NAME || TOOL_NAME; + } + } else if (cfg.type === "azure") { + delete postReq.model; + const version = cfg.version || AZURE_OPENAI_API_VERSION; + trace?.itemValue(`version`, version); + url = trimTrailingSlash(cfg.base) + "/" + family + `/chat/completions?api-version=${version}`; + } else if (cfg.type === "azure_ai_inference") { + const version = cfg.version; + trace?.itemValue(`version`, version); + url = trimTrailingSlash(cfg.base) + `/chat/completions`; + if (version) url += `?api-version=${version}`; + (headers as any)["extra-parameters"] = "pass-through"; + } else if (cfg.type === "azure_serverless_models") { + const version = cfg.version || AZURE_AI_INFERENCE_VERSION; + trace?.itemValue(`version`, version); + url = + trimTrailingSlash(cfg.base).replace( + /^https?:\/\/(?[^\.]+)\.(?[^\.]+)\.models\.ai\.azure\.com/i, + (m, deployment, region) => `https://${postReq.model}.${region}.models.ai.azure.com`, + ) + `/chat/completions?api-version=${version}`; + (headers as any)["extra-parameters"] = "pass-through"; + delete postReq.model; + delete postReq.stream_options; + } else if (cfg.type === "azure_serverless") { + const version = cfg.version || AZURE_AI_INFERENCE_VERSION; + trace?.itemValue(`version`, version); + url = trimTrailingSlash(cfg.base) + "/" + family + `/chat/completions?api-version=${version}`; + // https://learn.microsoft.com/en-us/azure/machine-learning/reference-model-inference-api?view=azureml-api-2&tabs=javascript#extensibility + (headers as any)["extra-parameters"] = "pass-through"; + delete postReq.model; + } else if (cfg.type === "github") { + url = cfg.base; + const { prefix } = /^(?[^-]+)-([^\/]+)$/.exec(postReq.model)?.groups || {}; + const patch = { + gpt: "openai", + o: "openai", + "text-embedding": "openai", + phi: "microsoft", + meta: "meta", + llama: "meta", + mistral: "mistral-ai", + deepseek: "deepseek", + }[prefix?.toLowerCase() || ""]; + if (patch) { + postReq.model = `${patch}/${postReq.model}`; + dbg(`updated model to ${postReq.model}`); } - traceFetchPost(trace, url, fetchHeaders as any, postReq) - const body = JSON.stringify(postReq) - let r: Response + } else if (cfg.type === "huggingface") { + // https://github.com/huggingface/text-generation-inference/issues/2946 + delete postReq.model; + url = + trimTrailingSlash(cfg.base).replace(/\/v1$/, "") + + "/models/" + + family + + `/v1/chat/completions`; + } else throw new Error(`api type ${cfg.type} not supported`); + + trace?.itemValue(`url`, `[${url}](${url})`); + dbg(`url: ${url}`); + + let numTokens = 0; + let numReasoningTokens = 0; + const fetchRetry = await createFetch({ + trace, + retries: retry, + retryDelay, + maxDelay, + cancellationToken, + }); + trace?.dispatchChange(); + + const fetchHeaders: HeadersInit = { + "Content-Type": "application/json", + ...getConfigHeaders(cfg), + ...(headers || {}), + }; + traceFetchPost(trace, url, fetchHeaders as any, postReq); + const body = JSON.stringify(postReq); + let r: Response; + try { + r = await fetchRetry(url, { + headers: fetchHeaders, + body, + method: "POST", + ...(rest || {}), + }); + } catch (e) { + trace?.error(errorMessage(e), e); + throw e; + } + + trace?.itemValue(`status`, `${r.status} ${r.statusText}`); + dbg(`response: ${r.status} ${r.statusText}`); + if (r.status !== 200) { + let responseBody: string; try { - r = await fetchRetry(url, { - headers: fetchHeaders, - body, - method: "POST", - ...(rest || {}), - }) - } catch (e) { - trace?.error(errorMessage(e), e) - throw e + responseBody = await r.text(); + } catch (e) {} + if (!responseBody) responseBody; + trace?.fence(responseBody, "json"); + const errors = JSON5TryParse(responseBody, {}) as + | { + error: any; + message: string; + } + | { error: { message: string } }[] + | { error: { message: string } }; + const error = Array.isArray(errors) ? errors[0]?.error : errors; + throw new RequestError( + r.status, + errorMessage(error) || r.statusText, + errors, + responseBody, + normalizeInt(r.headers.get("retry-after")), + ); + } + + let done = false; + let finishReason: ChatCompletionResponse["finishReason"] = undefined; + let chatResp = ""; + let reasoningChatResp = ""; + let pref = ""; + let usage: ChatCompletionUsage; + let error: SerializedError; + let responseModel: string; + let lbs: ChatCompletionTokenLogprob[] = []; + + let reasoning = false; + + const doChoices = (json: string, tokens: Logprob[], reasoningTokens: Logprob[]) => { + const obj: ChatCompletionChunk | ChatCompletion = JSON.parse(json); + + if (!postReq.stream) trace?.detailsFenced(`📬 response`, obj, "json"); + dbgMessages(`%O`, obj); + + if (obj.usage) usage = obj.usage; + if (!responseModel && obj.model) { + responseModel = obj.model; + dbg(`model: ${responseModel}`); } - - trace?.itemValue(`status`, `${r.status} ${r.statusText}`) - dbg(`response: ${r.status} ${r.statusText}`) - if (r.status !== 200) { - let responseBody: string - try { - responseBody = await r.text() - } catch (e) {} - if (!responseBody) responseBody - trace?.fence(responseBody, "json") - const errors = JSON5TryParse(responseBody, {}) as - | { - error: any - message: string - } - | { error: { message: string } }[] - | { error: { message: string } } - const error = Array.isArray(errors) ? errors[0]?.error : errors - throw new RequestError( - r.status, - errorMessage(error) || r.statusText, - errors, - responseBody, - normalizeInt(r.headers.get("retry-after")) - ) + if (!obj.choices?.length) return; + else if (obj.choices?.length != 1) throw new Error("too many choices in response"); + const choice = obj.choices[0]; + const { finish_reason } = choice; + if (finish_reason) { + dbg(`finish reason: ${finish_reason}`); + finishReason = finish_reason as any; } + if ((choice as ChatCompletionChunkChoice).delta) { + const { delta, logprobs } = choice as ChatCompletionChunkChoice; + if (logprobs?.content) lbs.push(...logprobs.content); + if (typeof delta?.content === "string" && delta.content !== "") { + let content = delta.content; + if (!reasoning && THINK_START_TOKEN_REGEX.test(content)) { + dbg(`entering `); + reasoning = true; + content = content.replace(THINK_START_TOKEN_REGEX, ""); + } else if (reasoning && THINK_END_TOKEN_REGEX.test(content)) { + dbg(`leaving `); + reasoning = false; + content = content.replace(THINK_END_TOKEN_REGEX, ""); + } - let done = false - let finishReason: ChatCompletionResponse["finishReason"] = undefined - let chatResp = "" - let reasoningChatResp = "" - let pref = "" - let usage: ChatCompletionUsage - let error: SerializedError - let responseModel: string - let lbs: ChatCompletionTokenLogprob[] = [] - - let reasoning = false - - const doChoices = ( - json: string, - tokens: Logprob[], - reasoningTokens: Logprob[] - ) => { - const obj: ChatCompletionChunk | ChatCompletion = JSON.parse(json) - - if (!postReq.stream) trace?.detailsFenced(`📬 response`, obj, "json") - dbgMessages(`%O`, obj) - - if (obj.usage) usage = obj.usage - if (!responseModel && obj.model) { - responseModel = obj.model - dbg(`model: ${responseModel}`) + if (!isEmptyString(content)) { + if (reasoning) { + numReasoningTokens += approximateTokens(content, { + encoder, + }); + reasoningChatResp += content; + reasoningTokens.push( + ...serializeChunkChoiceToLogProbs(choice as ChatCompletionChunkChoice), + ); + } else { + numTokens += approximateTokens(content, { encoder }); + chatResp += content; + tokens.push(...serializeChunkChoiceToLogProbs(choice as ChatCompletionChunkChoice)); + } + trace?.appendToken(content); } - if (!obj.choices?.length) return - else if (obj.choices?.length != 1) - throw new Error("too many choices in response") - const choice = obj.choices[0] - const { finish_reason } = choice - if (finish_reason) { - dbg(`finish reason: ${finish_reason}`) - finishReason = finish_reason as any + } + if (typeof delta?.reasoning_content === "string" && delta.reasoning_content !== "") { + numTokens += approximateTokens(delta.reasoning_content, { + encoder, + }); + reasoningChatResp += delta.reasoning_content; + reasoningTokens.push( + ...serializeChunkChoiceToLogProbs(choice as ChatCompletionChunkChoice), + ); + trace?.appendToken(delta.reasoning_content); + } + if (Array.isArray(delta?.tool_calls)) { + const { tool_calls } = delta; + for (const call of tool_calls) { + const index = call.index ?? toolCalls.length; + const tc = + toolCalls[index] || + (toolCalls[index] = { + id: call.id, + name: call.function.name, + arguments: "", + }); + if (call.function.arguments) tc.arguments += call.function.arguments; } - if ((choice as ChatCompletionChunkChoice).delta) { - const { delta, logprobs } = choice as ChatCompletionChunkChoice - if (logprobs?.content) lbs.push(...logprobs.content) - if (typeof delta?.content === "string" && delta.content !== "") { - let content = delta.content - if (!reasoning && THINK_START_TOKEN_REGEX.test(content)) { - dbg(`entering `) - reasoning = true - content = content.replace(THINK_START_TOKEN_REGEX, "") - } else if (reasoning && THINK_END_TOKEN_REGEX.test(content)) { - dbg(`leaving `) - reasoning = false - content = content.replace(THINK_END_TOKEN_REGEX, "") - } - - if (!isEmptyString(content)) { - if (reasoning) { - numReasoningTokens += approximateTokens(content, { - encoder, - }) - reasoningChatResp += content - reasoningTokens.push( - ...serializeChunkChoiceToLogProbs( - choice as ChatCompletionChunkChoice - ) - ) - } else { - numTokens += approximateTokens(content, { encoder }) - chatResp += content - tokens.push( - ...serializeChunkChoiceToLogProbs( - choice as ChatCompletionChunkChoice - ) - ) - } - trace?.appendToken(content) - } - } - if ( - typeof delta?.reasoning_content === "string" && - delta.reasoning_content !== "" - ) { - numTokens += approximateTokens(delta.reasoning_content, { - encoder, - }) - reasoningChatResp += delta.reasoning_content - reasoningTokens.push( - ...serializeChunkChoiceToLogProbs( - choice as ChatCompletionChunkChoice - ) - ) - trace?.appendToken(delta.reasoning_content) - } - if (Array.isArray(delta?.tool_calls)) { - const { tool_calls } = delta - for (const call of tool_calls) { - const index = call.index ?? toolCalls.length - const tc = - toolCalls[index] || - (toolCalls[index] = { - id: call.id, - name: call.function.name, - arguments: "", - }) - if (call.function.arguments) - tc.arguments += call.function.arguments - } - } - } else if ((choice as ChatCompletionChoice).message) { - const { message } = choice as ChatCompletionChoice - chatResp = message.content - reasoningChatResp = message.reasoning_content - numTokens = - usage?.total_tokens ?? approximateTokens(chatResp, { encoder }) - if (Array.isArray(message?.tool_calls)) { - const { tool_calls } = message - for (let calli = 0; calli < tool_calls.length; calli++) { - const call = tool_calls[calli] - const tc = - toolCalls[calli] || - (toolCalls[calli] = { - id: call.id, - name: call.function.name, - arguments: "", - }) - if (call.function.arguments) - tc.arguments += call.function.arguments - } - } - partialCb?.( - deleteUndefinedValues({ - responseSoFar: chatResp, - reasoningSoFar: reasoningChatResp, - tokensSoFar: numTokens, - responseChunk: chatResp, - reasoningChunk: reasoningChatResp, - inner, - }) - ) - } - - if (finish_reason === "function_call" || toolCalls.length > 0) { - finishReason = "tool_calls" - } else { - finishReason = finish_reason + } + } else if ((choice as ChatCompletionChoice).message) { + const { message } = choice as ChatCompletionChoice; + chatResp = message.content; + reasoningChatResp = message.reasoning_content; + numTokens = usage?.total_tokens ?? approximateTokens(chatResp, { encoder }); + if (Array.isArray(message?.tool_calls)) { + const { tool_calls } = message; + for (let calli = 0; calli < tool_calls.length; calli++) { + const call = tool_calls[calli]; + const tc = + toolCalls[calli] || + (toolCalls[calli] = { + id: call.id, + name: call.function.name, + arguments: "", + }); + if (call.function.arguments) tc.arguments += call.function.arguments; } + } + partialCb?.( + deleteUndefinedValues({ + responseSoFar: chatResp, + reasoningSoFar: reasoningChatResp, + tokensSoFar: numTokens, + responseChunk: chatResp, + reasoningChunk: reasoningChatResp, + inner, + }), + ); } - trace?.appendContent("\n\n") - if (!postReq.stream) { - const responseBody = await r.text() - doChoices(responseBody, [], []) + if (finish_reason === "function_call" || toolCalls.length > 0) { + finishReason = "tool_calls"; } else { - const decoder = host.createUTF8Decoder() - const doChunk = (value: Uint8Array) => { - // Massage and parse the chunk of data - const tokens: Logprob[] = [] - const reasoningTokens: Logprob[] = [] - let chunk = decoder.decode(value, { stream: true }) - - chunk = pref + chunk - const ch0 = chatResp - const rch0 = reasoningChatResp - chunk = chunk.replace(/^data:\s*(.*)[\r\n]+/gm, (_, json) => { - if (json === "[DONE]") { - done = true - return "" - } - try { - doChoices(json, tokens, reasoningTokens) - } catch (e) { - trace?.error(`error processing chunk`, e) - } - return "" - }) - // end replace - const reasoningProgress = reasoningChatResp.slice(rch0.length) - const chatProgress = chatResp.slice(ch0.length) - if ( - !isEmptyString(chatProgress) || - !isEmptyString(reasoningProgress) - ) { - // logVerbose(`... ${progress.length} chars`); - partialCb?.( - deleteUndefinedValues({ - responseSoFar: chatResp, - reasoningSoFar: reasoningChatResp, - reasoningChunk: reasoningProgress, - tokensSoFar: numTokens, - responseChunk: chatProgress, - responseTokens: tokens, - reasoningTokens, - inner, - }) - ) - } - pref = chunk + finishReason = finish_reason; + } + }; + + trace?.appendContent("\n\n"); + if (!postReq.stream) { + const responseBody = await r.text(); + doChoices(responseBody, [], []); + } else { + const decoder = host.createUTF8Decoder(); + const doChunk = (value: Uint8Array) => { + // Massage and parse the chunk of data + const tokens: Logprob[] = []; + const reasoningTokens: Logprob[] = []; + let chunk = decoder.decode(value, { stream: true }); + + chunk = pref + chunk; + const ch0 = chatResp; + const rch0 = reasoningChatResp; + chunk = chunk.replace(/^data:\s*(.*)[\r\n]+/gm, (_, json) => { + if (json === "[DONE]") { + done = true; + return ""; } - try { - if (r.body.getReader) { - const reader = r.body.getReader() - while (!cancellationToken?.isCancellationRequested && !done) { - const { done: readerDone, value } = await reader.read() - if (readerDone) break - doChunk(value) - } - } else { - for await (const value of r.body as any) { - if (cancellationToken?.isCancellationRequested || done) - break - doChunk(value) - } - } - if (cancellationToken?.isCancellationRequested) - finishReason = "cancel" - else if (toolCalls?.length) finishReason = "tool_calls" - finishReason = finishReason || "stop" // some provider do not implement this final mesage + doChoices(json, tokens, reasoningTokens); } catch (e) { - finishReason = "fail" - error = serializeError(e) + trace?.error(`error processing chunk`, e); } - } - - trace?.appendContent("\n\n") - if (responseModel) trace?.itemValue(`model`, responseModel) - trace?.itemValue(`🏁 finish reason`, finishReason) - if (usage?.total_tokens) { - trace?.itemValue( - `🪙 tokens`, - `${usage.total_tokens} total, ${usage.prompt_tokens} prompt, ${usage.completion_tokens} completion` - ) - } - - return deleteUndefinedValues({ - text: chatResp, - reasoning: reasoningChatResp, - toolCalls, - finishReason, - usage, - error, - model: responseModel, - logprobs: lbs, - }) satisfies ChatCompletionResponse -} + return ""; + }); + // end replace + const reasoningProgress = reasoningChatResp.slice(rch0.length); + const chatProgress = chatResp.slice(ch0.length); + if (!isEmptyString(chatProgress) || !isEmptyString(reasoningProgress)) { + // logVerbose(`... ${progress.length} chars`); + partialCb?.( + deleteUndefinedValues({ + responseSoFar: chatResp, + reasoningSoFar: reasoningChatResp, + reasoningChunk: reasoningProgress, + tokensSoFar: numTokens, + responseChunk: chatProgress, + responseTokens: tokens, + reasoningTokens, + inner, + }), + ); + } + pref = chunk; + }; -export const OpenAIListModels: ListModelsFunction = async (cfg, options) => { try { - const fetch = await createFetch({ retries: 0, ...(options || {}) }) - let url = trimTrailingSlash(cfg.base) + "/models" - if (cfg.provider === MODEL_PROVIDER_AZURE_OPENAI) { - url = - trimTrailingSlash(cfg.base).replace(/deployments$/, "") + - "/models" - } - const res = await fetch(url, { - method: "GET", - headers: { - ...getConfigHeaders(cfg), - Accept: "application/json", - }, - }) - if (res.status !== 200) - return { - ok: false, - status: res.status, - error: serializeError(await res.json()), - } - const { data } = (await res.json()) as { - object: "list" - data: { - id: string - object: "model" - created: number - owned_by: string - }[] + if (r.body.getReader) { + const reader = r.body.getReader(); + while (!cancellationToken?.isCancellationRequested && !done) { + const { done: readerDone, value } = await reader.read(); + if (readerDone) break; + doChunk(value); } - return { - ok: true, - models: data.map( - (m) => - ({ - id: m.id, - details: `${m.id}, ${m.owned_by}`, - }) satisfies LanguageModelInfo - ), + } else { + for await (const value of r.body as any) { + if (cancellationToken?.isCancellationRequested || done) break; + doChunk(value); } + } + if (cancellationToken?.isCancellationRequested) finishReason = "cancel"; + else if (toolCalls?.length) finishReason = "tool_calls"; + finishReason = finishReason || "stop"; // some provider do not implement this final mesage } catch (e) { - return { ok: false, error: serializeError(e) } + finishReason = "fail"; + error = serializeError(e); } -} + } + + trace?.appendContent("\n\n"); + if (responseModel) trace?.itemValue(`model`, responseModel); + trace?.itemValue(`🏁 finish reason`, finishReason); + if (usage?.total_tokens) { + trace?.itemValue( + `🪙 tokens`, + `${usage.total_tokens} total, ${usage.prompt_tokens} prompt, ${usage.completion_tokens} completion`, + ); + } + + return deleteUndefinedValues({ + text: chatResp, + reasoning: reasoningChatResp, + toolCalls, + finishReason, + usage, + error, + model: responseModel, + logprobs: lbs, + }) satisfies ChatCompletionResponse; +}; + +export const OpenAIListModels: ListModelsFunction = async (cfg, options) => { + try { + const fetch = await createFetch({ retries: 0, ...(options || {}) }); + let url = trimTrailingSlash(cfg.base) + "/models"; + if (cfg.provider === MODEL_PROVIDER_AZURE_OPENAI) { + url = trimTrailingSlash(cfg.base).replace(/deployments$/, "") + "/models"; + } + const res = await fetch(url, { + method: "GET", + headers: { + ...getConfigHeaders(cfg), + Accept: "application/json", + }, + }); + if (res.status !== 200) + return { + ok: false, + status: res.status, + error: serializeError(await res.json()), + }; + const { data } = (await res.json()) as { + object: "list"; + data: { + id: string; + object: "model"; + created: number; + owned_by: string; + }[]; + }; + return { + ok: true, + models: data.map( + (m) => + ({ + id: m.id, + details: `${m.id}, ${m.owned_by}`, + }) satisfies LanguageModelInfo, + ), + }; + } catch (e) { + return { ok: false, error: serializeError(e) }; + } +}; /** * Transcribes an audio file using the specified language model configuration. @@ -644,51 +578,47 @@ export const OpenAIListModels: ListModelsFunction = async (cfg, options) => { * - `error`: Details of any error encountered. */ export async function OpenAITranscribe( - req: CreateTranscriptionRequest, - cfg: LanguageModelConfiguration, - options: TraceOptions & CancellationOptions & RetryOptions + req: CreateTranscriptionRequest, + cfg: LanguageModelConfiguration, + options: TraceOptions & CancellationOptions & RetryOptions, ): Promise { - const { trace } = options || {} - try { - logVerbose( - `${cfg.provider}: transcribe ${req.file.type} ${prettyBytes(req.file.size)} with ${cfg.model}` - ) - const route = req.translate ? "translations" : "transcriptions" - const url = `${cfg.base}/audio/${route}` - trace?.itemValue(`url`, `[${url}](${url})`) - trace?.itemValue(`size`, req.file.size) - trace?.itemValue(`mime`, req.file.type) - const body = new FormData() - body.append("model", req.model) - body.append( - "response_format", - /whisper/.test(req.model) ? "verbose_json" : "json" - ) - if (req.temperature) - body.append("temperature", req.temperature.toString()) - if (req.language) body.append("language", req.language) - body.append("file", req.file) - - const freq = { - method: "POST", - headers: { - ...getConfigHeaders(cfg), - Accept: "application/json", - }, - body: body, - } - traceFetchPost(trace, url, freq.headers, freq.body) - // TODO: switch back to cross-fetch in the future - const res = await global.fetch(url, freq as any) - trace?.itemValue(`status`, `${res.status} ${res.statusText}`) - const j = await res.json() - if (!res.ok) return { text: undefined, error: j?.error } - else return j - } catch (e) { - logError(e) - trace?.error(e) - return { text: undefined, error: serializeError(e) } - } + const { trace } = options || {}; + try { + logVerbose( + `${cfg.provider}: transcribe ${req.file.type} ${prettyBytes(req.file.size)} with ${cfg.model}`, + ); + const route = req.translate ? "translations" : "transcriptions"; + const url = `${cfg.base}/audio/${route}`; + trace?.itemValue(`url`, `[${url}](${url})`); + trace?.itemValue(`size`, req.file.size); + trace?.itemValue(`mime`, req.file.type); + const body = new FormData(); + body.append("model", req.model); + body.append("response_format", /whisper/.test(req.model) ? "verbose_json" : "json"); + if (req.temperature) body.append("temperature", req.temperature.toString()); + if (req.language) body.append("language", req.language); + body.append("file", req.file); + + const freq = { + method: "POST", + headers: { + ...getConfigHeaders(cfg), + Accept: "application/json", + }, + body: body, + }; + traceFetchPost(trace, url, freq.headers, freq.body); + // TODO: switch back to cross-fetch in the future + const res = await global.fetch(url, freq as any); + trace?.itemValue(`status`, `${res.status} ${res.statusText}`); + const j = await res.json(); + if (!res.ok) return { text: undefined, error: j?.error }; + else return j; + } catch (e) { + logError(e); + trace?.error(e); + return { text: undefined, error: serializeError(e) }; + } } /** @@ -711,47 +641,46 @@ export async function OpenAITranscribe( * - error: Information about any error that occurred, or undefined if successful. */ export async function OpenAISpeech( - req: CreateSpeechRequest, - cfg: LanguageModelConfiguration, - options: TraceOptions & CancellationOptions & RetryOptions + req: CreateSpeechRequest, + cfg: LanguageModelConfiguration, + options: TraceOptions & CancellationOptions & RetryOptions, ): Promise { - const { model, input, voice = "alloy", ...rest } = req - const { trace } = options || {} - const fetch = await createFetch(options) - try { - logVerbose(`${cfg.provider}: speak with ${cfg.model}`) - const url = `${cfg.base}/audio/speech` - trace?.itemValue(`url`, `[${url}](${url})`) - const body = { - model, - input, - voice, - ...rest, - } - const freq = { - method: "POST", - headers: { - ...getConfigHeaders(cfg), - "Content-Type": "application/json", - }, - body: JSON.stringify(body), - } - traceFetchPost(trace, url, freq.headers, body) - // TODO: switch back to cross-fetch in the future - const res = await fetch(url, freq as any) - trace?.itemValue(`status`, `${res.status} ${res.statusText}`) - if (!res.ok) - return { audio: undefined, error: (await res.json())?.error } - const j = await res.arrayBuffer() - return { audio: new Uint8Array(j) } satisfies CreateSpeechResult - } catch (e) { - logError(e) - trace?.error(e) - return { - audio: undefined, - error: serializeError(e), - } satisfies CreateSpeechResult - } + const { model, input, voice = "alloy", ...rest } = req; + const { trace } = options || {}; + const fetch = await createFetch(options); + try { + logVerbose(`${cfg.provider}: speak with ${cfg.model}`); + const url = `${cfg.base}/audio/speech`; + trace?.itemValue(`url`, `[${url}](${url})`); + const body = { + model, + input, + voice, + ...rest, + }; + const freq = { + method: "POST", + headers: { + ...getConfigHeaders(cfg), + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + }; + traceFetchPost(trace, url, freq.headers, body); + // TODO: switch back to cross-fetch in the future + const res = await fetch(url, freq as any); + trace?.itemValue(`status`, `${res.status} ${res.statusText}`); + if (!res.ok) return { audio: undefined, error: (await res.json())?.error }; + const j = await res.arrayBuffer(); + return { audio: new Uint8Array(j) } satisfies CreateSpeechResult; + } catch (e) { + logError(e); + trace?.error(e); + return { + audio: undefined, + error: serializeError(e), + } satisfies CreateSpeechResult; + } } /** @@ -776,128 +705,110 @@ export async function OpenAISpeech( * @returns - A result containing either the generated image as a Uint8Array, the revised prompt, usage information, or an error message. */ export async function OpenAIImageGeneration( - req: CreateImageRequest, - cfg: LanguageModelConfiguration, - options: TraceOptions & CancellationOptions & RetryOptions + req: CreateImageRequest, + cfg: LanguageModelConfiguration, + options: TraceOptions & CancellationOptions & RetryOptions, ): Promise { - const { - model, - prompt, - size = "1024x1024", - quality, - style, - outputFormat, - ...rest - } = req - const { trace } = options || {} - let url = `${cfg.base}/images/generations` - - const isDallE = /^dall-e/i.test(model) - const isDallE2 = /^dall-e-2/i.test(model) - const isDallE3 = /^dall-e-3/i.test(model) - const isGpt = /^gpt-image/i.test(model) - - const body: any = { - model, - prompt, - size, - quality, - style, - ...rest, - } - - // auto is the default quality, so always delete it - if (body.quality === "auto" || isDallE2) delete body.quality - if (isDallE3) { - if (body.quality === "high") body.quality = "hd" - else delete body.quality - } - if (isGpt && body.quality === "hd") body.quality = "high" - if (!isDallE3) delete body.style - if (isDallE) body.response_format = "b64_json" - - if (isDallE3) { - if (body.size === "portrait") body.size = "1024x1792" - else if (body.size === "landscape") body.size = "1792x1024" - else if (body.size === "square") body.size = "1024x1024" - } else if (isDallE2) { - if ( - body.size === "portrait" || - body.size === "landscape" || - body.size === "square" - ) - body.size = "1024x1024" - } else if (isGpt) { - if (body.size === "portrait") body.size = "1024x1536" - else if (body.size === "landscape") body.size = "1536x1024" - else if (body.size === "square") body.size = "1024x1024" - if (outputFormat) body.output_format = outputFormat - } - - if (body.size === "auto") delete body.size - - dbg("%o", { - quality: body.quality, - style: body.style, - response_format: body.response_format, - size: body.size, - }) - - if (cfg.type === "azure") { - const version = cfg.version || AZURE_OPENAI_API_VERSION - trace?.itemValue(`version`, version) - url = - trimTrailingSlash(cfg.base) + - "/" + - body.model + - `/images/generations?api-version=${version}` - delete body.model - } - - const fetch = await createFetch(options) - try { - logInfo( - `generate image with ${cfg.provider}:${cfg.model} (this may take a while)` - ) - const freq = { - method: "POST", - headers: { - ...getConfigHeaders(cfg), - "Content-Type": "application/json", - }, - body: JSON.stringify(body), - } - // TODO: switch back to cross-fetch in the future - trace?.itemValue(`url`, `[${url}](${url})`) - traceFetchPost(trace, url, freq.headers, body) - const res = await fetch(url, freq as any) - dbg(`response: %d %s`, res.status, res.statusText) - trace?.itemValue(`status`, `${res.status} ${res.statusText}`) - if (!res.ok) - return { - image: undefined, - error: (await res.json())?.error || res.statusText, - } - const j: ImageGenerationResponse = await res.json() - dbg(`%O`, j) - const revisedPrompt = j.data[0]?.revised_prompt - if (revisedPrompt) - trace?.details(`📷 revised prompt`, j.data[0].revised_prompt) - const usage = j.usage - const buffer = fromBase64(j.data[0].b64_json) - return { - image: new Uint8Array(buffer), - revisedPrompt, - usage, - } satisfies CreateImageResult - } catch (e) { - logError(e) - trace?.error(e) - return { - image: undefined, - error: serializeError(e), - } satisfies CreateImageResult - } + const { model, prompt, size = "1024x1024", quality, style, outputFormat, ...rest } = req; + const { trace } = options || {}; + let url = `${cfg.base}/images/generations`; + + const isDallE = /^dall-e/i.test(model); + const isDallE2 = /^dall-e-2/i.test(model); + const isDallE3 = /^dall-e-3/i.test(model); + const isGpt = /^gpt-image/i.test(model); + + const body: any = { + model, + prompt, + size, + quality, + style, + ...rest, + }; + + // auto is the default quality, so always delete it + if (body.quality === "auto" || isDallE2) delete body.quality; + if (isDallE3) { + if (body.quality === "high") body.quality = "hd"; + else delete body.quality; + } + if (isGpt && body.quality === "hd") body.quality = "high"; + if (!isDallE3) delete body.style; + if (isDallE) body.response_format = "b64_json"; + + if (isDallE3) { + if (body.size === "portrait") body.size = "1024x1792"; + else if (body.size === "landscape") body.size = "1792x1024"; + else if (body.size === "square") body.size = "1024x1024"; + } else if (isDallE2) { + if (body.size === "portrait" || body.size === "landscape" || body.size === "square") + body.size = "1024x1024"; + } else if (isGpt) { + if (body.size === "portrait") body.size = "1024x1536"; + else if (body.size === "landscape") body.size = "1536x1024"; + else if (body.size === "square") body.size = "1024x1024"; + if (outputFormat) body.output_format = outputFormat; + } + + if (body.size === "auto") delete body.size; + + dbg("%o", { + quality: body.quality, + style: body.style, + response_format: body.response_format, + size: body.size, + }); + + if (cfg.type === "azure") { + const version = cfg.version || AZURE_OPENAI_API_VERSION; + trace?.itemValue(`version`, version); + url = + trimTrailingSlash(cfg.base) + "/" + body.model + `/images/generations?api-version=${version}`; + delete body.model; + } + + const fetch = await createFetch(options); + try { + logInfo(`generate image with ${cfg.provider}:${cfg.model} (this may take a while)`); + const freq = { + method: "POST", + headers: { + ...getConfigHeaders(cfg), + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + }; + // TODO: switch back to cross-fetch in the future + trace?.itemValue(`url`, `[${url}](${url})`); + traceFetchPost(trace, url, freq.headers, body); + const res = await fetch(url, freq as any); + dbg(`response: %d %s`, res.status, res.statusText); + trace?.itemValue(`status`, `${res.status} ${res.statusText}`); + if (!res.ok) + return { + image: undefined, + error: (await res.json())?.error || res.statusText, + }; + const j: ImageGenerationResponse = await res.json(); + dbg(`%O`, j); + const revisedPrompt = j.data[0]?.revised_prompt; + if (revisedPrompt) trace?.details(`📷 revised prompt`, j.data[0].revised_prompt); + const usage = j.usage; + const buffer = fromBase64(j.data[0].b64_json); + return { + image: new Uint8Array(buffer), + revisedPrompt, + usage, + } satisfies CreateImageResult; + } catch (e) { + logError(e); + trace?.error(e); + return { + image: undefined, + error: serializeError(e), + } satisfies CreateImageResult; + } } /** @@ -912,73 +823,68 @@ export async function OpenAIImageGeneration( * for the given input. Handles response parsing, error checking, and supports cancellation. */ export async function OpenAIEmbedder( - input: string, - cfg: LanguageModelConfiguration, - options: TraceOptions & CancellationOptions & RetryOptions + input: string, + cfg: LanguageModelConfiguration, + options: TraceOptions & CancellationOptions & RetryOptions, ): Promise { - const { trace, cancellationToken } = options || {} - const { base, provider, type, model } = cfg - try { - const route = "embeddings" - let url: string - const body: EmbeddingCreateParams = { input, model: cfg.model } - - // Determine the URL based on provider type - if ( - provider === MODEL_PROVIDER_AZURE_OPENAI || - provider === MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI || - type === "azure" || - type === "azure_serverless" - ) { - url = `${trimTrailingSlash(base)}/${model}/embeddings?api-version=${AZURE_OPENAI_API_VERSION}` - delete body.model - } else if (provider === MODEL_PROVIDER_AZURE_SERVERLESS_MODELS) { - url = base.replace(/^https?:\/\/([^/]+)\/?/, body.model) - delete body.model - } else { - url = `${base}/${route}` - } + const { trace, cancellationToken } = options || {}; + const { base, provider, type, model } = cfg; + try { + const route = "embeddings"; + let url: string; + const body: EmbeddingCreateParams = { input, model: cfg.model }; + + // Determine the URL based on provider type + if ( + provider === MODEL_PROVIDER_AZURE_OPENAI || + provider === MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI || + type === "azure" || + type === "azure_serverless" + ) { + url = `${trimTrailingSlash(base)}/${model}/embeddings?api-version=${AZURE_OPENAI_API_VERSION}`; + delete body.model; + } else if (provider === MODEL_PROVIDER_AZURE_SERVERLESS_MODELS) { + url = base.replace(/^https?:\/\/([^/]+)\/?/, body.model); + delete body.model; + } else { + url = `${base}/${route}`; + } - trace?.itemValue(`url`, `[${url}](${url})`) + trace?.itemValue(`url`, `[${url}](${url})`); - const freq = { - method: "POST", - headers: { - ...getConfigHeaders(cfg), - "Content-Type": "application/json", - Accept: "application/json", - }, - body: JSON.stringify(body), - } - // traceFetchPost(trace, url, freq.headers, body) - logVerbose( - `${type}: embedding ${ellipse(input, 44)} with ${provider}:${model}` - ) - const fetch = await createFetch(options) - checkCancelled(cancellationToken) - const res = await fetch(url, freq) - trace?.itemValue(`response`, `${res.status} ${res.statusText}`) - - if (res.status === 429) - return { error: "rate limited", status: "rate_limited" } - else if (res.status < 300) { - const data = (await res.json()) as EmbeddingCreateResponse - return { - status: "success", - data: data.data - .sort((a, b) => a.index - b.index) - .map((d) => d.embedding), - model: data.model, - } - } else { - return { error: res.statusText, status: "error" } - } - } catch (e) { - if (isCancelError(e)) return { status: "cancelled" } - logError(e) - trace?.error(e) - return { status: "error", error: errorMessage(e) } + const freq = { + method: "POST", + headers: { + ...getConfigHeaders(cfg), + "Content-Type": "application/json", + Accept: "application/json", + }, + body: JSON.stringify(body), + }; + // traceFetchPost(trace, url, freq.headers, body) + logVerbose(`${type}: embedding ${ellipse(input, 44)} with ${provider}:${model}`); + const fetch = await createFetch(options); + checkCancelled(cancellationToken); + const res = await fetch(url, freq); + trace?.itemValue(`response`, `${res.status} ${res.statusText}`); + + if (res.status === 429) return { error: "rate limited", status: "rate_limited" }; + else if (res.status < 300) { + const data = (await res.json()) as EmbeddingCreateResponse; + return { + status: "success", + data: data.data.sort((a, b) => a.index - b.index).map((d) => d.embedding), + model: data.model, + }; + } else { + return { error: res.statusText, status: "error" }; } + } catch (e) { + if (isCancelError(e)) return { status: "cancelled" }; + logError(e); + trace?.error(e); + return { status: "error", error: errorMessage(e) }; + } } /** @@ -994,25 +900,23 @@ export async function OpenAIEmbedder( * @returns A frozen object defining the language model with specified capabilities. */ export function LocalOpenAICompatibleModel( - providerId: string, - options: { - listModels?: boolean - transcribe?: boolean - speech?: boolean - imageGeneration?: boolean - } + providerId: string, + options: { + listModels?: boolean; + transcribe?: boolean; + speech?: boolean; + imageGeneration?: boolean; + }, ) { - return Object.freeze( - deleteUndefinedValues({ - completer: OpenAIChatCompletion, - id: providerId, - listModels: options?.listModels ? OpenAIListModels : undefined, - transcriber: options?.transcribe ? OpenAITranscribe : undefined, - speaker: options?.speech ? OpenAISpeech : undefined, - imageGenerator: options?.imageGeneration - ? OpenAIImageGeneration - : undefined, - embedder: OpenAIEmbedder, - }) - ) + return Object.freeze( + deleteUndefinedValues({ + completer: OpenAIChatCompletion, + id: providerId, + listModels: options?.listModels ? OpenAIListModels : undefined, + transcriber: options?.transcribe ? OpenAITranscribe : undefined, + speaker: options?.speech ? OpenAISpeech : undefined, + imageGenerator: options?.imageGeneration ? OpenAIImageGeneration : undefined, + embedder: OpenAIEmbedder, + }), + ); } diff --git a/packages/core/src/packagemanagers.ts b/packages/core/src/packagemanagers.ts index 20dae4ac7a..2857ccfc59 100644 --- a/packages/core/src/packagemanagers.ts +++ b/packages/core/src/packagemanagers.ts @@ -1,6 +1,6 @@ -import { resolveCommand, detect, Agent } from "package-manager-detector" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("pkg") +import { resolveCommand, detect, Agent } from "package-manager-detector"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("pkg"); /** * Resolves the install command for the detected package manager in a given directory. @@ -9,46 +9,46 @@ const dbg = genaiscriptDebug("pkg") * @returns The resolved command and arguments for a "frozen" install mode, or undefined if no package manager is detected. */ export async function packageResolveInstall(cwd: string) { - const pm = await detect({ cwd }) - if (!pm) return undefined + const pm = await detect({ cwd }); + if (!pm) return undefined; - const { command, args } = resolveCommand(pm.agent, "frozen", []) - return { command, args } + const { command, args } = resolveCommand(pm.agent, "frozen", []); + return { command, args }; } export async function packageResolveExecute( - cwd: string, - args: string[], - options?: { - agent?: "npm" | "yarn" | "pnpm" | "auto" - } + cwd: string, + args: string[], + options?: { + agent?: "npm" | "yarn" | "pnpm" | "auto"; + }, ): Promise<{ - command: string - args: string[] + command: string; + args: string[]; }> { - dbg(`resolving`) - args = args.filter((a) => a !== undefined) - let agent: Agent = options?.agent === "auto" ? undefined : options?.agent - if (!agent) { - const pm = await detect({ cwd }) - if ( - pm && - (pm.agent === "npm" || - pm.agent === "pnpm" || - pm.agent === "pnpm@6" || - pm.agent === "yarn" || - pm.agent === "yarn@berry") - ) - agent = pm.agent - } - agent = agent || "npm" - dbg(`agent: %s`, agent) - if (agent === "npm") args.unshift("--yes") - const resolved = resolveCommand( - agent, - "execute", - args.filter((a) => a !== undefined) + dbg(`resolving`); + args = args.filter((a) => a !== undefined); + let agent: Agent = options?.agent === "auto" ? undefined : options?.agent; + if (!agent) { + const pm = await detect({ cwd }); + if ( + pm && + (pm.agent === "npm" || + pm.agent === "pnpm" || + pm.agent === "pnpm@6" || + pm.agent === "yarn" || + pm.agent === "yarn@berry") ) - dbg(`resolved: %o`, resolved) - return resolved + agent = pm.agent; + } + agent = agent || "npm"; + dbg(`agent: %s`, agent); + if (agent === "npm") args.unshift("--yes"); + const resolved = resolveCommand( + agent, + "execute", + args.filter((a) => a !== undefined), + ); + dbg(`resolved: %o`, resolved); + return resolved; } diff --git a/packages/core/src/parameters.test.ts b/packages/core/src/parameters.test.ts index 593fc6871f..35bc75007e 100644 --- a/packages/core/src/parameters.test.ts +++ b/packages/core/src/parameters.test.ts @@ -1,140 +1,137 @@ -import { describe, test } from "node:test" -import assert from "node:assert" +import { describe, test } from "node:test"; +import assert from "node:assert"; -import { - promptParameterTypeToJSONSchema, - promptParametersSchemaToJSONSchema, -} from "./parameters" -import { parametersToVars, proxifyEnvVars } from "./vars" +import { promptParameterTypeToJSONSchema, promptParametersSchemaToJSONSchema } from "./parameters"; +import { parametersToVars, proxifyEnvVars } from "./vars"; describe("promptParameterTypeToJSONSchema", () => { - test("string type", () => { - const result = promptParameterTypeToJSONSchema("test") - assert.deepStrictEqual(result, { type: "string", default: "test" }) - }) - test("schema string type", () => { - const result = promptParameterTypeToJSONSchema({ - type: "string", - default: "test", - }) - assert.deepStrictEqual(result, { type: "string", default: "test" }) - }) + test("string type", () => { + const result = promptParameterTypeToJSONSchema("test"); + assert.deepStrictEqual(result, { type: "string", default: "test" }); + }); + test("schema string type", () => { + const result = promptParameterTypeToJSONSchema({ + type: "string", + default: "test", + }); + assert.deepStrictEqual(result, { type: "string", default: "test" }); + }); - test("schema string type", () => { - const result = promptParameterTypeToJSONSchema({ - type: "string", - required: true, - }) - assert.deepStrictEqual(result, { type: "string" }) - }) + test("schema string type", () => { + const result = promptParameterTypeToJSONSchema({ + type: "string", + required: true, + }); + assert.deepStrictEqual(result, { type: "string" }); + }); - test("number type", () => { - const result = promptParameterTypeToJSONSchema(42) - assert.deepStrictEqual(result, { type: "integer", default: 42 }) - }) + test("number type", () => { + const result = promptParameterTypeToJSONSchema(42); + assert.deepStrictEqual(result, { type: "integer", default: 42 }); + }); - test("boolean type", () => { - const result = promptParameterTypeToJSONSchema(true) - assert.deepStrictEqual(result, { type: "boolean", default: true }) - }) + test("boolean type", () => { + const result = promptParameterTypeToJSONSchema(true); + assert.deepStrictEqual(result, { type: "boolean", default: true }); + }); - test("array type", () => { - const result = promptParameterTypeToJSONSchema([42]) - assert.deepStrictEqual(result, { - type: "array", - items: { type: "integer", default: 42 }, - }) - }) + test("array type", () => { + const result = promptParameterTypeToJSONSchema([42]); + assert.deepStrictEqual(result, { + type: "array", + items: { type: "integer", default: 42 }, + }); + }); - test("object type", () => { - const result = promptParameterTypeToJSONSchema({ key: "value" }) - assert.deepStrictEqual(result, { - type: "object", - properties: { key: { type: "string", default: "value" } }, - required: [], - }) - }) + test("object type", () => { + const result = promptParameterTypeToJSONSchema({ key: "value" }); + assert.deepStrictEqual(result, { + type: "object", + properties: { key: { type: "string", default: "value" } }, + required: [], + }); + }); - test("object required type", () => { - const result = promptParameterTypeToJSONSchema({ - key: "value", - key2: { type: "string", required: true }, - }) - assert.deepStrictEqual(result, { - type: "object", - properties: { - key: { type: "string", default: "value" }, - key2: { type: "string" }, - }, - required: ["key2"], - }) - }) + test("object required type", () => { + const result = promptParameterTypeToJSONSchema({ + key: "value", + key2: { type: "string", required: true }, + }); + assert.deepStrictEqual(result, { + type: "object", + properties: { + key: { type: "string", default: "value" }, + key2: { type: "string" }, + }, + required: ["key2"], + }); + }); - test("unsupported type", () => { - assert.throws(() => promptParameterTypeToJSONSchema(() => {}), Error) - }) -}) + test("unsupported type", () => { + assert.throws(() => promptParameterTypeToJSONSchema(() => {}), Error); + }); +}); describe("promptParametersSchemaToJSONSchema", () => { - test("'value'", () => { - const parameters = { key: "value" } - const result = promptParametersSchemaToJSONSchema(parameters) - assert.deepStrictEqual(result, { - type: "object", - properties: { key: { type: "string", default: "value" } }, - required: [], - }) - }) - test("''", () => { - const parameters = { key: "" } - const result = promptParametersSchemaToJSONSchema(parameters) - assert.deepStrictEqual(result, { - type: "object", - properties: { key: { type: "string" } }, - required: ["key"], - }) - }) - test("123", () => { - const parameters = { key: 123 } - const result = promptParametersSchemaToJSONSchema(parameters) - assert.deepStrictEqual(result, { - type: "object", - properties: { key: { type: "integer", default: 123 } }, - required: [], - }) - }) - test("12.3", () => { - const parameters = { key: 12.3 } - const result = promptParametersSchemaToJSONSchema(parameters) - assert.deepStrictEqual(result, { - type: "object", - properties: { key: { type: "number", default: 12.3 } }, - required: [], - }) - }) - test("NaN", () => { - const parameters = { key: NaN } - const result = promptParametersSchemaToJSONSchema(parameters) - assert.deepStrictEqual(result, { - type: "object", - properties: { key: { type: "number" } }, - required: ["key"], - }) - }) -}) + test("'value'", () => { + const parameters = { key: "value" }; + const result = promptParametersSchemaToJSONSchema(parameters); + assert.deepStrictEqual(result, { + type: "object", + properties: { key: { type: "string", default: "value" } }, + required: [], + }); + }); + test("''", () => { + const parameters = { key: "" }; + const result = promptParametersSchemaToJSONSchema(parameters); + assert.deepStrictEqual(result, { + type: "object", + properties: { key: { type: "string" } }, + required: ["key"], + }); + }); + test("123", () => { + const parameters = { key: 123 }; + const result = promptParametersSchemaToJSONSchema(parameters); + assert.deepStrictEqual(result, { + type: "object", + properties: { key: { type: "integer", default: 123 } }, + required: [], + }); + }); + test("12.3", () => { + const parameters = { key: 12.3 }; + const result = promptParametersSchemaToJSONSchema(parameters); + assert.deepStrictEqual(result, { + type: "object", + properties: { key: { type: "number", default: 12.3 } }, + required: [], + }); + }); + test("NaN", () => { + const parameters = { key: NaN }; + const result = promptParametersSchemaToJSONSchema(parameters); + assert.deepStrictEqual(result, { + type: "object", + properties: { key: { type: "number" } }, + required: ["key"], + }); + }); +}); describe("proxifyVars", () => { - test("proxify variables", () => { - const res = { key: "value" } - const proxy = proxifyEnvVars(res) - assert.strictEqual(proxy.key, "value") - }) -}) + test("proxify variables", () => { + const res = { key: "value" }; + const proxy = proxifyEnvVars(res); + assert.strictEqual(proxy.key, "value"); + }); +}); describe("parametersToVars", () => { - test("convert parameters to vars", () => { - const parameters = { key: "value" } - const result = parametersToVars(parameters) - assert.deepStrictEqual(result, ["key=value"]) - }) -}) + test("convert parameters to vars", () => { + const parameters = { key: "value" }; + const result = parametersToVars(parameters); + assert.deepStrictEqual(result, ["key=value"]); + }); +}); diff --git a/packages/core/src/parameters.ts b/packages/core/src/parameters.ts index c1f7a88d79..ddd9234ce9 100644 --- a/packages/core/src/parameters.ts +++ b/packages/core/src/parameters.ts @@ -1,20 +1,20 @@ -import { deleteUndefinedValues } from "./cleaners" +import { deleteUndefinedValues } from "./cleaners"; function isJSONSchema(obj: any) { - if (typeof obj === "object" && obj.type === "object") return true - if (typeof obj === "object" && obj.type === "array") return true - return false + if (typeof obj === "object" && obj.type === "object") return true; + if (typeof obj === "object" && obj.type === "array") return true; + return false; } function isPromptParameterTypeRequired(t: PromptParameterType): boolean { - const ta = t as any - if (typeof t === "string" && t === "") return true - if (typeof t === "number" && isNaN(t)) return true - return !!ta?.required + const ta = t as any; + if (typeof t === "string" && t === "") return true; + if (typeof t === "number" && isNaN(t)) return true; + return !!ta?.required; } export interface PromptParametersSchemaConversionOptions { - noDefaults?: boolean + noDefaults?: boolean; } /** @@ -26,63 +26,48 @@ export interface PromptParametersSchemaConversionOptions { * @throws Will throw an error if the input type is not supported. */ export function promptParameterTypeToJSONSchema( - t: PromptParameterType | [PromptParameterType], - options?: PromptParametersSchemaConversionOptions -): - | JSONSchemaNumber - | JSONSchemaString - | JSONSchemaBoolean - | JSONSchemaObject - | JSONSchemaArray { - const { noDefaults } = options || {} - if (typeof t === "string") - return deleteUndefinedValues({ - type: "string", - default: noDefaults || t === "" ? undefined : t, - }) satisfies JSONSchemaString - else if (typeof t === "number") - return deleteUndefinedValues({ - type: Number.isInteger(t) ? "integer" : "number", - default: noDefaults || isNaN(t) ? undefined : t, - }) satisfies JSONSchemaNumber - else if (typeof t === "boolean") - return deleteUndefinedValues({ - type: "boolean", - default: noDefaults ? undefined : t, - }) satisfies JSONSchemaBoolean - else if (Array.isArray(t)) - return { - type: "array", - items: promptParameterTypeToJSONSchema(t[0], options), - } satisfies JSONSchemaArray - else if ( - typeof t === "object" && - ["number", "integer", "string", "boolean", "object"].includes( - (t as any).type - ) - ) { - const { required, ...rest } = t as any - return < - | JSONSchemaNumber - | JSONSchemaString - | JSONSchemaBoolean - | JSONSchemaObject - >{ ...rest } - } else if (typeof t === "object") { - const o = { - type: "object", - properties: Object.fromEntries( - Object.entries(t).map(([k, v]) => [ - k, - promptParameterTypeToJSONSchema(v, options), - ]) - ), - required: Object.entries(t) - .filter(([, v]) => isPromptParameterTypeRequired(v)) - .map(([k]) => k), - } satisfies JSONSchemaObject - return o - } else throw new Error(`prompt type ${typeof t} not supported`) + t: PromptParameterType | [PromptParameterType], + options?: PromptParametersSchemaConversionOptions, +): JSONSchemaNumber | JSONSchemaString | JSONSchemaBoolean | JSONSchemaObject | JSONSchemaArray { + const { noDefaults } = options || {}; + if (typeof t === "string") + return deleteUndefinedValues({ + type: "string", + default: noDefaults || t === "" ? undefined : t, + }) satisfies JSONSchemaString; + else if (typeof t === "number") + return deleteUndefinedValues({ + type: Number.isInteger(t) ? "integer" : "number", + default: noDefaults || isNaN(t) ? undefined : t, + }) satisfies JSONSchemaNumber; + else if (typeof t === "boolean") + return deleteUndefinedValues({ + type: "boolean", + default: noDefaults ? undefined : t, + }) satisfies JSONSchemaBoolean; + else if (Array.isArray(t)) + return { + type: "array", + items: promptParameterTypeToJSONSchema(t[0], options), + } satisfies JSONSchemaArray; + else if ( + typeof t === "object" && + ["number", "integer", "string", "boolean", "object"].includes((t as any).type) + ) { + const { required, ...rest } = t as any; + return { ...rest }; + } else if (typeof t === "object") { + const o = { + type: "object", + properties: Object.fromEntries( + Object.entries(t).map(([k, v]) => [k, promptParameterTypeToJSONSchema(v, options)]), + ), + required: Object.entries(t) + .filter(([, v]) => isPromptParameterTypeRequired(v)) + .map(([k]) => k), + } satisfies JSONSchemaObject; + return o; + } else throw new Error(`prompt type ${typeof t} not supported`); } /** @@ -93,26 +78,23 @@ export function promptParameterTypeToJSONSchema( * @returns A JSONSchema object or undefined if the input parameters are undefined. */ export function promptParametersSchemaToJSONSchema( - parameters: PromptParametersSchema | JSONSchema | undefined, - options?: PromptParametersSchemaConversionOptions + parameters: PromptParametersSchema | JSONSchema | undefined, + options?: PromptParametersSchemaConversionOptions, ): JSONSchema | undefined { - if (!parameters) return undefined - if (isJSONSchema(parameters)) return parameters as JSONSchema + if (!parameters) return undefined; + if (isJSONSchema(parameters)) return parameters as JSONSchema; - const res: Required< - Pick - > = { - type: "object", - properties: {}, - required: [], - } + const res: Required> = { + type: "object", + properties: {}, + required: [], + }; - for (const [k, v] of Object.entries(parameters as PromptParametersSchema)) { - const t = promptParameterTypeToJSONSchema(v, options) - const required = isPromptParameterTypeRequired(v) - res.properties[k] = t - if (t.type !== "object" && t.type !== "array" && required) - res.required.push(k) - } - return res satisfies JSONSchemaObject + for (const [k, v] of Object.entries(parameters as PromptParametersSchema)) { + const t = promptParameterTypeToJSONSchema(v, options); + const required = isPromptParameterTypeRequired(v); + res.properties[k] = t; + if (t.type !== "object" && t.type !== "array" && required) res.required.push(k); + } + return res satisfies JSONSchemaObject; } diff --git a/packages/core/src/parser.ts b/packages/core/src/parser.ts index 497d0fb759..59a594189f 100644 --- a/packages/core/src/parser.ts +++ b/packages/core/src/parser.ts @@ -1,17 +1,17 @@ // Importing utility functions and constants from other files -import { logVerbose, logWarn, strcmp } from "./util" // String comparison function -import { parsePromptScript } from "./template" // Function to parse scripts -import { readText } from "./fs" // Function to read text from a file -import { GENAI_ANYTS_REGEX } from "./constants" // Constants for MIME types and prefixes -import { Project } from "./server/messages" -import { resolveSystems } from "./systems" -import { resolveScriptParametersSchema } from "./vars" -import { dirname, join, resolve } from "node:path" -import { fileURLToPath } from "node:url" -import { readdir } from "node:fs/promises" -import { uniq } from "es-toolkit" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("parser") +import { logVerbose, logWarn, strcmp } from "./util"; // String comparison function +import { parsePromptScript } from "./template"; // Function to parse scripts +import { readText } from "./fs"; // Function to read text from a file +import { GENAI_ANYTS_REGEX } from "./constants"; // Constants for MIME types and prefixes +import { Project } from "./server/messages"; +import { resolveSystems } from "./systems"; +import { resolveScriptParametersSchema } from "./vars"; +import { dirname, join, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +import { readdir } from "node:fs/promises"; +import { uniq } from "es-toolkit"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("parser"); /** * Converts a string to a character position represented as [row, column]. @@ -21,8 +21,8 @@ const dbg = genaiscriptDebug("parser") * @returns The position as [row, column]. */ export function stringToPos(str: string): CharPosition { - if (!str) return [0, 0] // Return default position if string is empty - return [str.replace(/[^\n]/g, "").length, str.replace(/[^]*\n/, "").length] + if (!str) return [0, 0]; // Return default position if string is empty + return [str.replace(/[^\n]/g, "").length, str.replace(/[^]*\n/, "").length]; } /** @@ -34,75 +34,68 @@ export function stringToPos(str: string): CharPosition { * @returns Project - The project with processed templates and diagnostics. */ export async function parseProject(options: { scriptFiles: string[] }) { - const { scriptFiles } = options - const genaisrcDir = resolve( - join( - dirname(dirname(__filename ?? fileURLToPath(import.meta.url))), - "genaisrc" - ) - ) // ignore esbuild warning - dbg(`genaisrc: %s`, genaisrcDir) - const prj: Project = { - systemDir: genaisrcDir, - scripts: [], - diagnostics: [], - } - const systemPrompts = await ( - await readdir(genaisrcDir) - ).filter((f) => GENAI_ANYTS_REGEX.test(f)) - dbg(`system prompts: %d`, systemPrompts.length) - // Process each script file, parsing its content and updating the project - const scripts: Record = {} - for (const fn of systemPrompts) { - const f = join(genaisrcDir, fn) - const tmpl = await parsePromptScript(f, await readText(f)) - if (!tmpl) { - logWarn(`skipping invalid system script: ${fn}`) - continue - } // Skip if no template is parsed - prj.scripts.push(tmpl) // Add to project templates - scripts[tmpl.id] = tmpl - } + const { scriptFiles } = options; + const genaisrcDir = resolve( + join(dirname(dirname(__filename ?? fileURLToPath(import.meta.url))), "genaisrc"), + ); // ignore esbuild warning + dbg(`genaisrc: %s`, genaisrcDir); + const prj: Project = { + systemDir: genaisrcDir, + scripts: [], + diagnostics: [], + }; + const systemPrompts = await (await readdir(genaisrcDir)).filter((f) => GENAI_ANYTS_REGEX.test(f)); + dbg(`system prompts: %d`, systemPrompts.length); + // Process each script file, parsing its content and updating the project + const scripts: Record = {}; + for (const fn of systemPrompts) { + const f = join(genaisrcDir, fn); + const tmpl = await parsePromptScript(f, await readText(f)); + if (!tmpl) { + logWarn(`skipping invalid system script: ${fn}`); + continue; + } // Skip if no template is parsed + prj.scripts.push(tmpl); // Add to project templates + scripts[tmpl.id] = tmpl; + } - dbg(`user scripts: %d`, scriptFiles.length) - for (const f of uniq(scriptFiles).filter( - (f) => resolve(dirname(f)) !== genaisrcDir - )) { - const tmpl = await parsePromptScript(f, await readText(f)) - if (!tmpl) { - logWarn(`skipping invalid script ${f}`) - continue - } // Skip if no template is parsed - if (scripts[tmpl.id]) { - logWarn(`duplicate script '${tmpl.id}' (${f})`) - logVerbose(` already defined in ${scripts[tmpl.id].filename}`) - continue - } - prj.scripts.push(tmpl) // Add t - scripts[tmpl.id] = tmpl + dbg(`user scripts: %d`, scriptFiles.length); + for (const f of uniq(scriptFiles).filter((f) => resolve(dirname(f)) !== genaisrcDir)) { + const tmpl = await parsePromptScript(f, await readText(f)); + if (!tmpl) { + logWarn(`skipping invalid script ${f}`); + continue; + } // Skip if no template is parsed + if (scripts[tmpl.id]) { + logWarn(`duplicate script '${tmpl.id}' (${f})`); + logVerbose(` already defined in ${scripts[tmpl.id].filename}`); + continue; } + prj.scripts.push(tmpl); // Add t + scripts[tmpl.id] = tmpl; + } - /** - * Generates a sorting key for a PromptScript - * Determines priority based on whether a script is unlisted or has a filename. - * @param t - The PromptScript to generate the key for. - * @returns string - The sorting key. - */ - function templKey(t: PromptScript) { - const pref = t.unlisted ? "Z" : t.filename ? "A" : "B" // Determine prefix for sorting - return pref + t.title + t.id // Concatenate for final sorting key - } + /** + * Generates a sorting key for a PromptScript + * Determines priority based on whether a script is unlisted or has a filename. + * @param t - The PromptScript to generate the key for. + * @returns string - The sorting key. + */ + function templKey(t: PromptScript) { + const pref = t.unlisted ? "Z" : t.filename ? "A" : "B"; // Determine prefix for sorting + return pref + t.title + t.id; // Concatenate for final sorting key + } - // Sort templates by the generated key - prj.scripts.sort((a, b) => strcmp(templKey(a), templKey(b))) + // Sort templates by the generated key + prj.scripts.sort((a, b) => strcmp(templKey(a), templKey(b))); - // compute systems - prj.scripts - .filter((s) => !s.isSystem) - .forEach((s) => { - s.resolvedSystem = resolveSystems(prj, s) - s.inputSchema = resolveScriptParametersSchema(prj, s) - }) + // compute systems + prj.scripts + .filter((s) => !s.isSystem) + .forEach((s) => { + s.resolvedSystem = resolveSystems(prj, s); + s.inputSchema = resolveScriptParametersSchema(prj, s); + }); - return prj // Return the fully parsed project + return prj; // Return the fully parsed project } diff --git a/packages/core/src/parsers.test.ts b/packages/core/src/parsers.test.ts index 384ba204ef..1d4d59a786 100644 --- a/packages/core/src/parsers.test.ts +++ b/packages/core/src/parsers.test.ts @@ -1,195 +1,190 @@ -import { describe, beforeEach, test } from "node:test" -import assert from "node:assert/strict" -import { createParsers } from "./parsers" -import { MarkdownTrace } from "./trace" -import { XLSXParse } from "./xlsx" -import { readFile } from "fs/promises" -import { resolve } from "path" -import { TestHost } from "./testhost" -import { estimateTokens } from "./tokens" -import { writeFile } from "fs/promises" +import { describe, beforeEach, test } from "node:test"; +import assert from "node:assert/strict"; +import { createParsers } from "./parsers"; +import { MarkdownTrace } from "./trace"; +import { XLSXParse } from "./xlsx"; +import { readFile } from "fs/promises"; +import { resolve } from "path"; +import { TestHost } from "./testhost"; +import { estimateTokens } from "./tokens"; +import { writeFile } from "fs/promises"; describe("parsers", async () => { - let trace: MarkdownTrace - let model: string - let parsers: Awaited> - - beforeEach(async () => { - trace = new MarkdownTrace({}) - model = "test model" - parsers = await createParsers({ trace, model }) - TestHost.install() - }) - - test("JSON5", () => { - const result = parsers.JSON5('{"key": "value"}') - assert.deepStrictEqual(result, { key: "value" }) - }) - - test("JSONL", () => { - const result = parsers.JSONL('{"key": "value"}\n{"key2": "value2"}') - assert.deepStrictEqual(result[0], { key: "value" }) - assert.deepStrictEqual(result[1], { key2: "value2" }) - }) - - test("YAML", () => { - const result = parsers.YAML("key: value") - assert.deepStrictEqual(result, { key: "value" }) - }) - - test("XML parser", () => { - const result = parsers.XML("value") - assert.deepStrictEqual(result, { key: "value" }) - }) - - test("TOML", () => { - const result = parsers.TOML('key = "value"') - assert.equal(result.key, "value") - }) - - await test("PDF", async () => { - const result = await parsers.PDF({ - filename: "../sample/src/rag/loremipsum.pdf", - }) - assert(result.file.content.includes("Lorem")) - }) - - await test("prompty", async () => { - const result = await parsers.prompty({ - filename: "../sample/src/chat.prompty", - }) - assert(result) - assert(result.messages.length === 2) - }) - - await test("PDF-image", async () => { - const result = await parsers.PDF( - { filename: "../sample/src/rag/loremipsum.pdf" }, - { renderAsImage: true } - ) - let i = 1 - for (const img of result.images) { - await writeFile(`./loremipsum.temp.${i++}.png`, img) - } - assert(result.file.content.includes("Lorem")) - }) - - await test("DOCX - markdown", async () => { - const result = await parsers.DOCX( - { - filename: "../sample/src/rag/Document.docx", - }, - { format: "markdown" } - ) - assert(result.file.content.includes("Microsoft")) - }) - await test("DOCX - html", async () => { - const result = await parsers.DOCX( - { - filename: "../sample/src/rag/Document.docx", - }, - { format: "html" } - ) - assert(result.file.content.includes("Microsoft")) - }) - await test("DOCX - text", async () => { - const result = await parsers.DOCX( - { - filename: "../sample/src/rag/Document.docx", - }, - { format: "text" } - ) - assert(result.file.content.includes("Microsoft")) - }) - - test("CSV", () => { - const result = parsers.CSV("key,value\n1,2") - assert.deepStrictEqual(result, [{ key: "1", value: "2" }]) - }) - - test("XLSX", async () => { - const result = await XLSXParse( - await readFile(resolve("./src/parsers.test.xlsx")) - ) - assert.deepStrictEqual(result, [ - { name: "Sheet1", rows: [{ key: 1, value: 2 }] }, - ]) - }) - - test("frontmatter", () => { - const result = parsers.frontmatter("---\nkey: value\n---\n") - assert.deepStrictEqual(result, { key: "value" }) - }) - - test("zip", async () => { - const result = await parsers.unzip( - { - filename: "./src/parsers.test.zip", - content: undefined, - }, - { glob: "*.md" } - ) - assert(result.find((f) => f.filename === "markdown.md")) - assert(!result.find((f) => f.filename === "loremipsum.pdf")) - }) - - test("math", async () => { - const res = await parsers.math("1 + 3") - assert.strictEqual(res, 4) - }) - - test("validateJSON", () => { - const res = parsers.validateJSON( - { - type: "object", - properties: { - key: { type: "string" }, - }, - required: ["key"], - }, - { key: "value" } - ) - assert.strictEqual(res.pathValid, true) - }) - - // write test about hash - test("hash", async () => { - const result = await parsers.hash( - { test: "test string", arr: [1, 2, "32"], v: new Uint8Array(123) }, - { length: 20, version: false } - ) - assert.strictEqual(result, "43ebfdc72c65bbf157ff") // Example hash value - }) - - test("dedent", () => { - const indentedText = ` + let trace: MarkdownTrace; + let model: string; + let parsers: Awaited>; + + beforeEach(async () => { + trace = new MarkdownTrace({}); + model = "test model"; + parsers = await createParsers({ trace, model }); + TestHost.install(); + }); + + test("JSON5", () => { + const result = parsers.JSON5('{"key": "value"}'); + assert.deepStrictEqual(result, { key: "value" }); + }); + + test("JSONL", () => { + const result = parsers.JSONL('{"key": "value"}\n{"key2": "value2"}'); + assert.deepStrictEqual(result[0], { key: "value" }); + assert.deepStrictEqual(result[1], { key2: "value2" }); + }); + + test("YAML", () => { + const result = parsers.YAML("key: value"); + assert.deepStrictEqual(result, { key: "value" }); + }); + + test("XML parser", () => { + const result = parsers.XML("value"); + assert.deepStrictEqual(result, { key: "value" }); + }); + + test("TOML", () => { + const result = parsers.TOML('key = "value"'); + assert.equal(result.key, "value"); + }); + + await test("PDF", async () => { + const result = await parsers.PDF({ + filename: "../sample/src/rag/loremipsum.pdf", + }); + assert(result.file.content.includes("Lorem")); + }); + + await test("prompty", async () => { + const result = await parsers.prompty({ + filename: "../sample/src/chat.prompty", + }); + assert(result); + assert(result.messages.length === 2); + }); + + await test("PDF-image", async () => { + const result = await parsers.PDF( + { filename: "../sample/src/rag/loremipsum.pdf" }, + { renderAsImage: true }, + ); + let i = 1; + for (const img of result.images) { + await writeFile(`./loremipsum.temp.${i++}.png`, img); + } + assert(result.file.content.includes("Lorem")); + }); + + await test("DOCX - markdown", async () => { + const result = await parsers.DOCX( + { + filename: "../sample/src/rag/Document.docx", + }, + { format: "markdown" }, + ); + assert(result.file.content.includes("Microsoft")); + }); + await test("DOCX - html", async () => { + const result = await parsers.DOCX( + { + filename: "../sample/src/rag/Document.docx", + }, + { format: "html" }, + ); + assert(result.file.content.includes("Microsoft")); + }); + await test("DOCX - text", async () => { + const result = await parsers.DOCX( + { + filename: "../sample/src/rag/Document.docx", + }, + { format: "text" }, + ); + assert(result.file.content.includes("Microsoft")); + }); + + test("CSV", () => { + const result = parsers.CSV("key,value\n1,2"); + assert.deepStrictEqual(result, [{ key: "1", value: "2" }]); + }); + + test("XLSX", async () => { + const result = await XLSXParse(await readFile(resolve("./src/parsers.test.xlsx"))); + assert.deepStrictEqual(result, [{ name: "Sheet1", rows: [{ key: 1, value: 2 }] }]); + }); + + test("frontmatter", () => { + const result = parsers.frontmatter("---\nkey: value\n---\n"); + assert.deepStrictEqual(result, { key: "value" }); + }); + + test("zip", async () => { + const result = await parsers.unzip( + { + filename: "./src/parsers.test.zip", + content: undefined, + }, + { glob: "*.md" }, + ); + assert(result.find((f) => f.filename === "markdown.md")); + assert(!result.find((f) => f.filename === "loremipsum.pdf")); + }); + + test("math", async () => { + const res = await parsers.math("1 + 3"); + assert.strictEqual(res, 4); + }); + + test("validateJSON", () => { + const res = parsers.validateJSON( + { + type: "object", + properties: { + key: { type: "string" }, + }, + required: ["key"], + }, + { key: "value" }, + ); + assert.strictEqual(res.pathValid, true); + }); + + // write test about hash + test("hash", async () => { + const result = await parsers.hash( + { test: "test string", arr: [1, 2, "32"], v: new Uint8Array(123) }, + { length: 20, version: false }, + ); + assert.strictEqual(result, "43ebfdc72c65bbf157ff"); // Example hash value + }); + + test("dedent", () => { + const indentedText = ` This is an indented line This is more indented Back to first level - ` - const result = parsers.dedent(indentedText) - assert.strictEqual( - result, - `This is an indented line + `; + const result = parsers.dedent(indentedText); + assert.strictEqual( + result, + `This is an indented line This is more indented -Back to first level` - ) - }) - - test("unthink", () => { - const text = - "I think the answer is 42. Actually, it should be 43" - const result = parsers.unthink(text) - assert.strictEqual(result, "I think the answer is 42. ") - }) - - test("tokens", () => { - const result = parsers.tokens("Hello world") - assert(typeof result === "number") - assert(result > 0) - }) - test("transcription", () => { - const vttContent = `WEBVTT +Back to first level`, + ); + }); + + test("unthink", () => { + const text = "I think the answer is 42. Actually, it should be 43"; + const result = parsers.unthink(text); + assert.strictEqual(result, "I think the answer is 42. "); + }); + + test("tokens", () => { + const result = parsers.tokens("Hello world"); + assert(typeof result === "number"); + assert(result > 0); + }); + test("transcription", () => { + const vttContent = `WEBVTT 1 00:00:00.000 --> 00:00:05.000 @@ -197,25 +192,25 @@ Hello world 2 00:00:05.500 --> 00:00:10.000 -This is a test` - - const result = parsers.transcription(vttContent) - assert.deepStrictEqual(result[0], { - id: "1", - start: 0, - end: 5000, - text: "Hello world", - }) - assert.deepStrictEqual(result[1], { - id: "2", - start: 5500, - end: 10000, - text: "This is a test", - }) - }) - test("unfence", () => { - const fencedText = '```json\n{"key": "value"}\n```' - const result = parsers.unfence(fencedText, "json") - assert.strictEqual(result, '{"key": "value"}') - }) -}) +This is a test`; + + const result = parsers.transcription(vttContent); + assert.deepStrictEqual(result[0], { + id: "1", + start: 0, + end: 5000, + text: "Hello world", + }); + assert.deepStrictEqual(result[1], { + id: "2", + start: 5500, + end: 10000, + text: "This is a test", + }); + }); + test("unfence", () => { + const fencedText = '```json\n{"key": "value"}\n```'; + const result = parsers.unfence(fencedText, "json"); + assert.strictEqual(result, '{"key": "value"}'); + }); +}); diff --git a/packages/core/src/parsers.ts b/packages/core/src/parsers.ts index 38e3e8369e..7143652699 100644 --- a/packages/core/src/parsers.ts +++ b/packages/core/src/parsers.ts @@ -1,45 +1,41 @@ -import { CSVTryParse } from "./csv" -import { - filenameOrFileToContent, - filenameOrFileToFilename, - unfence, -} from "./unwrappers" -import { JSON5TryParse, JSONLLMTryParse } from "./json5" -import { estimateTokens } from "./tokens" -import { TOMLTryParse } from "./toml" -import { TraceOptions } from "./trace" -import { YAMLTryParse } from "./yaml" -import { DOCXTryParse } from "./docx" -import { frontmatterTryParse } from "./frontmatter" -import { extractFenced } from "./fence" -import { parseAnnotations } from "./annotations" -import { dotEnvTryParse } from "./dotenv" -import { INITryParse } from "./ini" -import { XMLTryParse } from "./xml" -import { parsePdf } from "./pdf" -import { HTMLToMarkdown, HTMLToText } from "./html" -import { MathTryEvaluate } from "./math" -import { tryValidateJSONWithSchema, validateJSONWithSchema } from "./schema" -import { XLSXTryParse } from "./xlsx" -import { host } from "./host" -import { unzip } from "./zip" -import { JSONLTryParse } from "./jsonl" -import { resolveFileContent } from "./file" -import { resolveTokenEncoder } from "./encoders" -import { mustacheRender } from "./mustache" -import { jinjaRender } from "./jinja" -import { llmifyDiff } from "./llmdiff" -import { tidyData } from "./tidy" -import { hash } from "./crypto" -import { GROQEvaluate } from "./groq" -import { unthink } from "./think" -import { CancellationOptions } from "./cancellation" -import { dedent } from "./indent" -import { vttSrtParse } from "./transcription" -import { encodeIDs } from "./cleaners" -import { diffCreatePatch } from "./diff" -import { promptyParse } from "./prompty" -import { mermaidParse } from "./mermaid" +import { CSVTryParse } from "./csv"; +import { filenameOrFileToContent, filenameOrFileToFilename, unfence } from "./unwrappers"; +import { JSON5TryParse, JSONLLMTryParse } from "./json5"; +import { estimateTokens } from "./tokens"; +import { TOMLTryParse } from "./toml"; +import { TraceOptions } from "./trace"; +import { YAMLTryParse } from "./yaml"; +import { DOCXTryParse } from "./docx"; +import { frontmatterTryParse } from "./frontmatter"; +import { extractFenced } from "./fence"; +import { parseAnnotations } from "./annotations"; +import { dotEnvTryParse } from "./dotenv"; +import { INITryParse } from "./ini"; +import { XMLTryParse } from "./xml"; +import { parsePdf } from "./pdf"; +import { HTMLToMarkdown, HTMLToText } from "./html"; +import { MathTryEvaluate } from "./math"; +import { tryValidateJSONWithSchema, validateJSONWithSchema } from "./schema"; +import { XLSXTryParse } from "./xlsx"; +import { host } from "./host"; +import { unzip } from "./zip"; +import { JSONLTryParse } from "./jsonl"; +import { resolveFileContent } from "./file"; +import { resolveTokenEncoder } from "./encoders"; +import { mustacheRender } from "./mustache"; +import { jinjaRender } from "./jinja"; +import { llmifyDiff } from "./llmdiff"; +import { tidyData } from "./tidy"; +import { hash } from "./crypto"; +import { GROQEvaluate } from "./groq"; +import { unthink } from "./think"; +import { CancellationOptions } from "./cancellation"; +import { dedent } from "./indent"; +import { vttSrtParse } from "./transcription"; +import { encodeIDs } from "./cleaners"; +import { diffCreatePatch } from "./diff"; +import { promptyParse } from "./prompty"; +import { mermaidParse } from "./mermaid"; /** * Asynchronously creates a set of parsers for handling various file formats, data operations, @@ -85,136 +81,113 @@ import { mermaidParse } from "./mermaid" * - encodeIDs: Encodes identifiers for use in various operations. */ export async function createParsers( - options: { - model: string - } & TraceOptions & - CancellationOptions + options: { + model: string; + } & TraceOptions & + CancellationOptions, ): Promise { - const { trace, model, cancellationToken } = options - const { encode: encoder } = await resolveTokenEncoder(model) - return Object.freeze({ - JSON5: (text, options) => - tryValidateJSONWithSchema( - JSON5TryParse( - filenameOrFileToContent(text), - options?.defaultValue - ), - options - ), - JSONLLM: (text) => JSONLLMTryParse(text), - JSONL: (text) => JSONLTryParse(filenameOrFileToContent(text)), - YAML: (text, options) => - tryValidateJSONWithSchema( - YAMLTryParse( - filenameOrFileToContent(text), - options?.defaultValue - ), - options - ), - XML: (text, options) => { - const { defaultValue, ...rest } = options || {} - return tryValidateJSONWithSchema( - XMLTryParse(filenameOrFileToContent(text), defaultValue, rest), - options - ) + const { trace, model, cancellationToken } = options; + const { encode: encoder } = await resolveTokenEncoder(model); + return Object.freeze({ + JSON5: (text, options) => + tryValidateJSONWithSchema( + JSON5TryParse(filenameOrFileToContent(text), options?.defaultValue), + options, + ), + JSONLLM: (text) => JSONLLMTryParse(text), + JSONL: (text) => JSONLTryParse(filenameOrFileToContent(text)), + YAML: (text, options) => + tryValidateJSONWithSchema( + YAMLTryParse(filenameOrFileToContent(text), options?.defaultValue), + options, + ), + XML: (text, options) => { + const { defaultValue, ...rest } = options || {}; + return tryValidateJSONWithSchema( + XMLTryParse(filenameOrFileToContent(text), defaultValue, rest), + options, + ); + }, + TOML: (text, options) => + tryValidateJSONWithSchema(TOMLTryParse(filenameOrFileToContent(text), options), options), + frontmatter: (text, options) => + tryValidateJSONWithSchema( + frontmatterTryParse(filenameOrFileToContent(text), options)?.value, + options, + ), + CSV: (text, options) => + tryValidateJSONWithSchema(CSVTryParse(filenameOrFileToContent(text), options), options), + XLSX: async (file, options) => + await XLSXTryParse(await host.readFile(filenameOrFileToFilename(file)), options), + dotEnv: (text) => dotEnvTryParse(filenameOrFileToContent(text)), + INI: (text, options) => + tryValidateJSONWithSchema( + INITryParse(filenameOrFileToContent(text), options?.defaultValue), + options, + ), + transcription: (text) => vttSrtParse(filenameOrFileToContent(text)), + unzip: async (file, options) => await unzip(await host.readFile(file.filename), options), + tokens: (text) => estimateTokens(filenameOrFileToContent(text), encoder), + fences: (text) => extractFenced(filenameOrFileToContent(text)), + annotations: (text) => parseAnnotations(filenameOrFileToContent(text)), + HTMLToText: (text, options) => + HTMLToText(filenameOrFileToContent(text), { + ...(options || {}), + trace, + cancellationToken, + }), + HTMLToMarkdown: (text, options) => + HTMLToMarkdown(filenameOrFileToContent(text), { + ...(options || {}), + trace, + cancellationToken, + }), + DOCX: async (file, options) => await DOCXTryParse(file, options), + PDF: async (file, options) => { + if (!file) return { file: undefined, pages: [], data: [] }; + const opts = { + ...(options || {}), + trace, + cancellationToken, + }; + const filename = typeof file === "string" ? file : file.filename; + const { pages, content } = (await parsePdf(filename, opts)) || {}; + return { + file: { + filename, + content, }, - TOML: (text, options) => - tryValidateJSONWithSchema( - TOMLTryParse(filenameOrFileToContent(text), options), - options - ), - frontmatter: (text, options) => - tryValidateJSONWithSchema( - frontmatterTryParse(filenameOrFileToContent(text), options) - ?.value, - options - ), - CSV: (text, options) => - tryValidateJSONWithSchema( - CSVTryParse(filenameOrFileToContent(text), options), - options - ), - XLSX: async (file, options) => - await XLSXTryParse( - await host.readFile(filenameOrFileToFilename(file)), - options - ), - dotEnv: (text) => dotEnvTryParse(filenameOrFileToContent(text)), - INI: (text, options) => - tryValidateJSONWithSchema( - INITryParse( - filenameOrFileToContent(text), - options?.defaultValue - ), - options - ), - transcription: (text) => vttSrtParse(filenameOrFileToContent(text)), - unzip: async (file, options) => - await unzip(await host.readFile(file.filename), options), - tokens: (text) => - estimateTokens(filenameOrFileToContent(text), encoder), - fences: (text) => extractFenced(filenameOrFileToContent(text)), - annotations: (text) => parseAnnotations(filenameOrFileToContent(text)), - HTMLToText: (text, options) => - HTMLToText(filenameOrFileToContent(text), { - ...(options || {}), - trace, - cancellationToken, - }), - HTMLToMarkdown: (text, options) => - HTMLToMarkdown(filenameOrFileToContent(text), { - ...(options || {}), - trace, - cancellationToken, - }), - DOCX: async (file, options) => await DOCXTryParse(file, options), - PDF: async (file, options) => { - if (!file) return { file: undefined, pages: [], data: [] } - const opts = { - ...(options || {}), - trace, - cancellationToken, - } - const filename = typeof file === "string" ? file : file.filename - const { pages, content } = (await parsePdf(filename, opts)) || {} - return { - file: { - filename, - content, - }, - pages: pages?.map((p) => p.content), - images: pages?.map((p) => p.image), - data: pages, - } - }, - mermaid: async (file) => { - const f = filenameOrFileToContent(file) - const res = await mermaidParse(f) - return res - }, - math: async (expression, scope) => - await MathTryEvaluate(expression, { scope, trace }), - validateJSON: (schema, content) => - validateJSONWithSchema(content, schema, { trace }), - mustache: (file, args) => { - const f = filenameOrFileToContent(file) - return mustacheRender(f, args) - }, - jinja: (file, data) => { - const f = filenameOrFileToContent(file) - return jinjaRender(f, data) - }, - diff: (f1, f2) => llmifyDiff(diffCreatePatch(f1, f2)), - tidyData: (rows, options) => tidyData(rows, options), - hash: async (text, options) => await hash(text, options), - unfence: unfence, - GROQ: GROQEvaluate, - unthink: unthink, - dedent: dedent, - encodeIDs: encodeIDs, - prompty: async (file) => { - await resolveFileContent(file, { trace }) - return promptyParse(file.filename, file.content) - }, - }) + pages: pages?.map((p) => p.content), + images: pages?.map((p) => p.image), + data: pages, + }; + }, + mermaid: async (file) => { + const f = filenameOrFileToContent(file); + const res = await mermaidParse(f); + return res; + }, + math: async (expression, scope) => await MathTryEvaluate(expression, { scope, trace }), + validateJSON: (schema, content) => validateJSONWithSchema(content, schema, { trace }), + mustache: (file, args) => { + const f = filenameOrFileToContent(file); + return mustacheRender(f, args); + }, + jinja: (file, data) => { + const f = filenameOrFileToContent(file); + return jinjaRender(f, data); + }, + diff: (f1, f2) => llmifyDiff(diffCreatePatch(f1, f2)), + tidyData: (rows, options) => tidyData(rows, options), + hash: async (text, options) => await hash(text, options), + unfence: unfence, + GROQ: GROQEvaluate, + unthink: unthink, + dedent: dedent, + encodeIDs: encodeIDs, + prompty: async (file) => { + await resolveFileContent(file, { trace }); + return promptyParse(file.filename, file.content); + }, + }); } diff --git a/packages/core/src/path.ts b/packages/core/src/path.ts index 73e7d97335..f22b52857d 100644 --- a/packages/core/src/path.ts +++ b/packages/core/src/path.ts @@ -1,17 +1,17 @@ import { - dirname, - extname, - basename, - join, - normalize, - relative, - resolve, - isAbsolute, - parse, -} from "node:path" -import { changeext } from "../../core/src/fs" -import { fileURLToPath } from "node:url" -import { sanitizeFilename } from "../../core/src/sanitize" + dirname, + extname, + basename, + join, + normalize, + relative, + resolve, + isAbsolute, + parse, +} from "node:path"; +import { changeext } from "../../core/src/fs"; +import { fileURLToPath } from "node:url"; +import { sanitizeFilename } from "../../core/src/sanitize"; /** * Creates a frozen object that provides various path manipulation functions. @@ -19,22 +19,22 @@ import { sanitizeFilename } from "../../core/src/sanitize" * @returns A frozen object with methods for path handling. */ export function createNodePath(): Path { - // Return a frozen object containing path manipulation functions. - // These functions are imported from node:path and facilitate - // various operations on file paths. + // Return a frozen object containing path manipulation functions. + // These functions are imported from node:path and facilitate + // various operations on file paths. - return Object.freeze({ - parse, - dirname, // Get the directory name of a path - extname, // Get the extension of a path - basename, // Get the basename of a path - join, // Join multiple path segments - normalize, // Normalize a path to remove redundant separators - relative, // Get the relative path between two paths - resolve, // Resolve a sequence of paths to an absolute path - isAbsolute, // Check if a path is absolute - changeext, - resolveFileURL: fileURLToPath, - sanitize: sanitizeFilename, - } satisfies Path) + return Object.freeze({ + parse, + dirname, // Get the directory name of a path + extname, // Get the extension of a path + basename, // Get the basename of a path + join, // Join multiple path segments + normalize, // Normalize a path to remove redundant separators + relative, // Get the relative path between two paths + resolve, // Resolve a sequence of paths to an absolute path + isAbsolute, // Check if a path is absolute + changeext, + resolveFileURL: fileURLToPath, + sanitize: sanitizeFilename, + } satisfies Path); } diff --git a/packages/core/src/pdf.ts b/packages/core/src/pdf.ts index 61ac97662b..933e313980 100644 --- a/packages/core/src/pdf.ts +++ b/packages/core/src/pdf.ts @@ -1,28 +1,28 @@ // Import necessary types and modules -import type { TextItem } from "pdfjs-dist/types/src/display/api" -import { host } from "./host" -import { TraceOptions } from "./trace" -import os from "os" -import { serializeError } from "./error" -import { logVerbose, logWarn } from "./util" -import { INVALID_FILENAME_REGEX, PDF_HASH_LENGTH, PDF_SCALE } from "./constants" -import { resolveGlobal } from "./global" -import { isUint8Array, isUint8ClampedArray } from "util/types" -import { hash } from "./crypto" -import { join } from "path" -import { readFile, writeFile } from "fs/promises" -import { ensureDir } from "fs-extra" -import { YAMLStringify } from "./yaml" -import { deleteUndefinedValues } from "./cleaners" -import { CancellationOptions, checkCancelled } from "./cancellation" -import { measure } from "./performance" -import { dotGenaiscriptPath } from "./workdir" -import { genaiscriptDebug } from "./debug" -import type { Canvas } from "@napi-rs/canvas" -import { pathToFileURL } from "url" -const dbg = genaiscriptDebug("pdf") - -let standardFontDataUrl: string +import type { TextItem } from "pdfjs-dist/types/src/display/api"; +import { host } from "./host"; +import { TraceOptions } from "./trace"; +import os from "os"; +import { serializeError } from "./error"; +import { logVerbose, logWarn } from "./util"; +import { INVALID_FILENAME_REGEX, PDF_HASH_LENGTH, PDF_SCALE } from "./constants"; +import { resolveGlobal } from "./global"; +import { isUint8Array, isUint8ClampedArray } from "util/types"; +import { hash } from "./crypto"; +import { join } from "path"; +import { readFile, writeFile } from "fs/promises"; +import { ensureDir } from "fs-extra"; +import { YAMLStringify } from "./yaml"; +import { deleteUndefinedValues } from "./cleaners"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { measure } from "./performance"; +import { dotGenaiscriptPath } from "./workdir"; +import { genaiscriptDebug } from "./debug"; +import type { Canvas } from "@napi-rs/canvas"; +import { pathToFileURL } from "url"; +const dbg = genaiscriptDebug("pdf"); + +let standardFontDataUrl: string; /** * Attempts to import pdfjs and configure worker source @@ -31,144 +31,137 @@ let standardFontDataUrl: string * @returns A promise resolving to the pdfjs module */ async function tryImportPdfjs(options?: TraceOptions) { - const { trace } = options || {} - installPromiseWithResolversShim() // Ensure Promise.withResolvers is available - const pdfjs = await import("pdfjs-dist") - let workerSrc = require.resolve("pdfjs-dist/build/pdf.worker.min.mjs") - - // Adjust worker source path for Windows platform - if (os.platform() === "win32") { - dbg("detected Windows platform, adjusting workerSrc: %s", workerSrc) - workerSrc = "file://" + workerSrc.replace(/\\/g, "/") - } - - standardFontDataUrl = pathToFileURL( - workerSrc.replace("build/pdf.worker.min.mjs", "standard_fonts/") - ).toString() - dbg(`standardFontDataUrl: %s`, standardFontDataUrl) - pdfjs.GlobalWorkerOptions.workerSrc = workerSrc - return pdfjs + const { trace } = options || {}; + installPromiseWithResolversShim(); // Ensure Promise.withResolvers is available + const pdfjs = await import("pdfjs-dist"); + let workerSrc = require.resolve("pdfjs-dist/build/pdf.worker.min.mjs"); + + // Adjust worker source path for Windows platform + if (os.platform() === "win32") { + dbg("detected Windows platform, adjusting workerSrc: %s", workerSrc); + workerSrc = "file://" + workerSrc.replace(/\\/g, "/"); + } + + standardFontDataUrl = pathToFileURL( + workerSrc.replace("build/pdf.worker.min.mjs", "standard_fonts/"), + ).toString(); + dbg(`standardFontDataUrl: %s`, standardFontDataUrl); + pdfjs.GlobalWorkerOptions.workerSrc = workerSrc; + return pdfjs; } class CanvasFactory { - static createCanvas: (w: number, h: number) => Canvas + static createCanvas: (w: number, h: number) => Canvas; - constructor() {} + constructor() {} - create(width: number, height: number) { - if (width <= 0 || height <= 0) { - dbg("invalid canvas dimensions: width=%d, height=%d", width, height) - throw new Error("Invalid canvas size") - } - const canvas = this._createCanvas(width, height) - return { - canvas, - context: canvas.getContext("2d"), - } + create(width: number, height: number) { + if (width <= 0 || height <= 0) { + dbg("invalid canvas dimensions: width=%d, height=%d", width, height); + throw new Error("Invalid canvas size"); } - - reset(canvasAndContext: any, width: number, height: number) { - if (!canvasAndContext.canvas) { - dbg("reset called with missing canvas") - throw new Error("Canvas is not specified") - } - if (width <= 0 || height <= 0) { - dbg( - "reset called with invalid canvas size: width=%d, height=%d", - width, - height - ) - throw new Error("Invalid canvas size") - } - canvasAndContext.canvas.width = width - canvasAndContext.canvas.height = height + const canvas = this._createCanvas(width, height); + return { + canvas, + context: canvas.getContext("2d"), + }; + } + + reset(canvasAndContext: any, width: number, height: number) { + if (!canvasAndContext.canvas) { + dbg("reset called with missing canvas"); + throw new Error("Canvas is not specified"); } - - destroy(canvasAndContext: any) { - if (!canvasAndContext.canvas) { - dbg("destroy called with missing canvas") - throw new Error("Canvas is not specified") - } - // Zeroing the width and height cause Firefox to release graphics - // resources immediately, which can greatly reduce memory consumption. - canvasAndContext.canvas.width = 0 - canvasAndContext.canvas.height = 0 - canvasAndContext.canvas = null - canvasAndContext.context = null + if (width <= 0 || height <= 0) { + dbg("reset called with invalid canvas size: width=%d, height=%d", width, height); + throw new Error("Invalid canvas size"); } - - /** - * @ignore - */ - _createCanvas(width: number, height: number) { - return CanvasFactory.createCanvas(width, height) + canvasAndContext.canvas.width = width; + canvasAndContext.canvas.height = height; + } + + destroy(canvasAndContext: any) { + if (!canvasAndContext.canvas) { + dbg("destroy called with missing canvas"); + throw new Error("Canvas is not specified"); } + // Zeroing the width and height cause Firefox to release graphics + // resources immediately, which can greatly reduce memory consumption. + canvasAndContext.canvas.width = 0; + canvasAndContext.canvas.height = 0; + canvasAndContext.canvas = null; + canvasAndContext.context = null; + } + + /** + * @ignore + */ + _createCanvas(width: number, height: number) { + return CanvasFactory.createCanvas(width, height); + } } async function tryImportCanvas() { - if (CanvasFactory.createCanvas) { - return CanvasFactory.createCanvas - } - - try { - dbg(`initializing pdf canvas`) - const canvas = await import("@napi-rs/canvas") - const createCanvas = (w: number, h: number) => canvas.createCanvas(w, h) - const glob = resolveGlobal() - glob.ImageData ??= canvas.ImageData - glob.Path2D ??= canvas.Path2D - glob.Canvas ??= canvas.Canvas - glob.DOMMatrix ??= canvas.DOMMatrix - CanvasFactory.createCanvas = createCanvas - dbg(`pdf canvas initialized`) - return createCanvas - } catch (error) { - logWarn("Failed to import canvas") - logVerbose(error) - return undefined - } + if (CanvasFactory.createCanvas) { + return CanvasFactory.createCanvas; + } + + try { + dbg(`initializing pdf canvas`); + const canvas = await import("@napi-rs/canvas"); + const createCanvas = (w: number, h: number) => canvas.createCanvas(w, h); + const glob = resolveGlobal(); + glob.ImageData ??= canvas.ImageData; + glob.Path2D ??= canvas.Path2D; + glob.Canvas ??= canvas.Canvas; + glob.DOMMatrix ??= canvas.DOMMatrix; + CanvasFactory.createCanvas = createCanvas; + dbg(`pdf canvas initialized`); + return createCanvas; + } catch (error) { + logWarn("Failed to import canvas"); + logVerbose(error); + return undefined; + } } /** * Installs a shim for Promise.withResolvers if not available. */ function installPromiseWithResolversShim() { - ;(Promise as any).withResolvers || - ((Promise as any).withResolvers = function () { - let rs, - rj, - pm = new this((resolve: any, reject: any) => { - rs = resolve - rj = reject - }) - return { - resolve: rs, - reject: rj, - promise: pm, - } - }) + (Promise as any).withResolvers || + ((Promise as any).withResolvers = function () { + let rs, + rj, + pm = new this((resolve: any, reject: any) => { + rs = resolve; + rj = reject; + }); + return { + resolve: rs, + reject: rj, + promise: pm, + }; + }); } enum ImageKind { - GRAYSCALE_1BPP = 1, - RGB_24BPP = 2, - RGBA_32BPP = 3, + GRAYSCALE_1BPP = 1, + RGB_24BPP = 2, + RGBA_32BPP = 3, } async function computeHashFolder( - filename: string | WorkspaceFile, - options: TraceOptions & ParsePDFOptions & { content?: Uint8Array } + filename: string | WorkspaceFile, + options: TraceOptions & ParsePDFOptions & { content?: Uint8Array }, ) { - const { trace, content, ...rest } = options - const h = await hash( - [typeof filename === "string" ? { filename } : filename, content, rest], - { - readWorkspaceFiles: true, - version: true, - length: PDF_HASH_LENGTH, - } - ) - return dotGenaiscriptPath("cache", "pdf", h) + const { trace, content, ...rest } = options; + const h = await hash([typeof filename === "string" ? { filename } : filename, content, rest], { + readWorkspaceFiles: true, + version: true, + length: PDF_HASH_LENGTH, + }); + return dotGenaiscriptPath("cache", "pdf", h); } /** @@ -179,262 +172,242 @@ async function computeHashFolder( * @returns An object indicating success or failure and the parsed pages */ async function PDFTryParse( - fileOrUrl: string, - content?: Uint8Array, - options?: ParsePDFOptions & TraceOptions & CancellationOptions + fileOrUrl: string, + content?: Uint8Array, + options?: ParsePDFOptions & TraceOptions & CancellationOptions, ) { - const { - cancellationToken, - disableCleanup, - trace, - renderAsImage, - scale = PDF_SCALE, - cache, - useSystemFonts, - } = options || {} - - const folder = await computeHashFolder(fileOrUrl, { - content, - ...(options || {}), - }) - const resFilename = join(folder, "res.json") - const readCache = async () => { - if (cache === false) { - dbg("cache is disabled, skipping cache read") - return undefined - } - try { - const res = JSON.parse( - await readFile(resFilename, { - encoding: "utf-8", - }) - ) - dbg(`cache hit at ${folder}`) - return res - } catch { - return undefined - } + const { + cancellationToken, + disableCleanup, + trace, + renderAsImage, + scale = PDF_SCALE, + cache, + useSystemFonts, + } = options || {}; + + const folder = await computeHashFolder(fileOrUrl, { + content, + ...(options || {}), + }); + const resFilename = join(folder, "res.json"); + const readCache = async () => { + if (cache === false) { + dbg("cache is disabled, skipping cache read"); + return undefined; } - - { - // try cache hit - const cached = await readCache() - if (cached) { - dbg("cache hit for pdf parsing, returning cached result") - return cached - } - } - - logVerbose(`pdf: decoding ${fileOrUrl || ""} in ${folder}`) - trace?.itemValue(`pdf: decoding ${fileOrUrl || ""}`, folder) - await ensureDir(folder) - const m = measure("parsers.pdf") try { - const createCanvas = await tryImportCanvas() - const pdfjs = await tryImportPdfjs(options) - checkCancelled(cancellationToken) - const { getDocument } = pdfjs - const data = content || (await host.readFile(fileOrUrl)) - // Check if we're running on Windows - const isWindows = os.platform() === "win32" - const loader = await getDocument({ - data, - useSystemFonts: useSystemFonts ?? !isWindows, - disableFontFace: true, - standardFontDataUrl, - CanvasFactory: createCanvas ? CanvasFactory : undefined, + const res = JSON.parse( + await readFile(resFilename, { + encoding: "utf-8", + }), + ); + dbg(`cache hit at ${folder}`); + return res; + } catch { + return undefined; + } + }; + + { + // try cache hit + const cached = await readCache(); + if (cached) { + dbg("cache hit for pdf parsing, returning cached result"); + return cached; + } + } + + logVerbose(`pdf: decoding ${fileOrUrl || ""} in ${folder}`); + trace?.itemValue(`pdf: decoding ${fileOrUrl || ""}`, folder); + await ensureDir(folder); + const m = measure("parsers.pdf"); + try { + const createCanvas = await tryImportCanvas(); + const pdfjs = await tryImportPdfjs(options); + checkCancelled(cancellationToken); + const { getDocument } = pdfjs; + const data = content || (await host.readFile(fileOrUrl)); + // Check if we're running on Windows + const isWindows = os.platform() === "win32"; + const loader = await getDocument({ + data, + useSystemFonts: useSystemFonts ?? !isWindows, + disableFontFace: true, + standardFontDataUrl, + CanvasFactory: createCanvas ? CanvasFactory : undefined, + }); + const doc = await loader.promise; + const pdfMetadata = await doc.getMetadata(); + const metadata = pdfMetadata + ? deleteUndefinedValues({ + info: deleteUndefinedValues({ + ...(pdfMetadata.info || {}), + }), }) - const doc = await loader.promise - const pdfMetadata = await doc.getMetadata() - const metadata = pdfMetadata - ? deleteUndefinedValues({ - info: deleteUndefinedValues({ - ...(pdfMetadata.info || {}), - }), - }) - : undefined - - const numPages = doc.numPages - const pages: PDFPage[] = [] - - // Iterate through each page and extract text content - for (let i = 0; i < numPages; i++) { - checkCancelled(cancellationToken) - const page = await doc.getPage(1 + i) // 1-indexed - const content = await page.getTextContent() - const items: TextItem[] = content.items.filter( - (item): item is TextItem => "str" in item - ) - let { lines } = parsePageItems(items) - - // Optionally clean up trailing spaces - if (!disableCleanup) { - dbg("trailing whitespace cleanup enabled for page lines") - lines = lines.map((line) => line.replace(/[\t ]+$/g, "")) + : undefined; + + const numPages = doc.numPages; + const pages: PDFPage[] = []; + + // Iterate through each page and extract text content + for (let i = 0; i < numPages; i++) { + checkCancelled(cancellationToken); + const page = await doc.getPage(1 + i); // 1-indexed + const content = await page.getTextContent(); + const items: TextItem[] = content.items.filter((item): item is TextItem => "str" in item); + let { lines } = parsePageItems(items); + + // Optionally clean up trailing spaces + if (!disableCleanup) { + dbg("trailing whitespace cleanup enabled for page lines"); + lines = lines.map((line) => line.replace(/[\t ]+$/g, "")); + } + + // Collapse trailing spaces + const p: PDFPage = { + index: i + 1, + content: lines.join("\n"), + }; + + await writeFile(join(folder, `page_${p.index}.txt`), p.content); + pages.push(p); + + if (createCanvas && renderAsImage) { + dbg("rendering page %d as PNG image", i + 1); + const viewport = page.getViewport({ scale }); + const canvas = await createCanvas(viewport.width, viewport.height); + const canvasContext = canvas.getContext("2d"); + const render = page.render({ + canvasContext: canvasContext as any, + viewport, + }); + await render.promise; + const buffer = canvas.toBuffer("image/png"); + p.image = join(folder, `page_${i + 1}.png`); + dbg(`writing page image %d to %s`, i + 1, p.image); + await writeFile(p.image, buffer); + } + + const opList = await page.getOperatorList(); + const figures: PDFPageImage[] = []; + for (let j = 0; j < opList.fnArray.length; j++) { + const fn = opList.fnArray[j]; + const args = opList.argsArray[j]; + if (fn === pdfjs.OPS.paintImageXObject && args) { + dbg("found image XObject in operator list at index %d", j); + const imageObj = args[0]; + if (imageObj) { + checkCancelled(cancellationToken); + const img = await new Promise((resolve, reject) => { + if (page.commonObjs.has(imageObj)) { + resolve(page.commonObjs.get(imageObj)); + } else if (page.objs.has(imageObj)) { + page.objs.get(imageObj, (r: any) => { + resolve(r); + }); + } else { + resolve(undefined); + } + }); + if (!img) { + continue; } - - // Collapse trailing spaces - const p: PDFPage = { - index: i + 1, - content: lines.join("\n"), - } - - await writeFile(join(folder, `page_${p.index}.txt`), p.content) - pages.push(p) - - if (createCanvas && renderAsImage) { - dbg("rendering page %d as PNG image", i + 1) - const viewport = page.getViewport({ scale }) - const canvas = await createCanvas( - viewport.width, - viewport.height - ) - const canvasContext = canvas.getContext("2d") - const render = page.render({ - canvasContext: canvasContext as any, - viewport, - }) - await render.promise - const buffer = canvas.toBuffer("image/png") - p.image = join(folder, `page_${i + 1}.png`) - dbg(`writing page image %d to %s`, i + 1, p.image) - await writeFile(p.image, buffer) + const fig = await decodeImage(p.index, img, createCanvas, imageObj, folder); + if (fig) { + figures.push(fig); } - - const opList = await page.getOperatorList() - const figures: PDFPageImage[] = [] - for (let j = 0; j < opList.fnArray.length; j++) { - const fn = opList.fnArray[j] - const args = opList.argsArray[j] - if (fn === pdfjs.OPS.paintImageXObject && args) { - dbg("found image XObject in operator list at index %d", j) - const imageObj = args[0] - if (imageObj) { - checkCancelled(cancellationToken) - const img = await new Promise( - (resolve, reject) => { - if (page.commonObjs.has(imageObj)) { - resolve(page.commonObjs.get(imageObj)) - } else if (page.objs.has(imageObj)) { - page.objs.get(imageObj, (r: any) => { - resolve(r) - }) - } else { - resolve(undefined) - } - } - ) - if (!img) { - continue - } - const fig = await decodeImage( - p.index, - img, - createCanvas, - imageObj, - folder - ) - if (fig) { - figures.push(fig) - } - } - } - } - p.figures = figures - - logVerbose( - `pdf: extracted ${fileOrUrl || ""} page ${i + 1} / ${numPages}, ${p.figures.length ? `${p.figures.length} figures` : ""}` - ) + } } + } + p.figures = figures; - const res = deleteUndefinedValues({ - metadata, - pages, - content: PDFPagesToString(pages), - }) - await writeFile(join(folder, "content.txt"), res.content) - await writeFile(resFilename, JSON.stringify(res)) - return res - } catch (error) { - logVerbose(error) - { - // try cache hit - const cached = await readCache() - if (cached) { - return cached - } - } - trace?.error(`reading pdf`, error) // Log error if tracing is enabled - await ensureDir(folder) - await writeFile( - join(folder, "error.txt"), - YAMLStringify(serializeError(error)) - ) - return { error: serializeError(error) } - } finally { - m() + logVerbose( + `pdf: extracted ${fileOrUrl || ""} page ${i + 1} / ${numPages}, ${p.figures.length ? `${p.figures.length} figures` : ""}`, + ); } - async function decodeImage( - pageIndex: number, - img: { - data: Uint8Array | Uint8ClampedArray - width: number - height: number - kind: ImageKind - }, - createCanvas: (w: number, h: number) => any, - imageObj: any, - folder: string - ) { - if (!isUint8ClampedArray(img?.data) && !isUint8Array(img?.data)) { - dbg( - "cannot decode—image data is not of type Uint8Array or Uint8ClampedArray" - ) - return undefined - } + const res = deleteUndefinedValues({ + metadata, + pages, + content: PDFPagesToString(pages), + }); + await writeFile(join(folder, "content.txt"), res.content); + await writeFile(resFilename, JSON.stringify(res)); + return res; + } catch (error) { + logVerbose(error); + { + // try cache hit + const cached = await readCache(); + if (cached) { + return cached; + } + } + trace?.error(`reading pdf`, error); // Log error if tracing is enabled + await ensureDir(folder); + await writeFile(join(folder, "error.txt"), YAMLStringify(serializeError(error))); + return { error: serializeError(error) }; + } finally { + m(); + } + + async function decodeImage( + pageIndex: number, + img: { + data: Uint8Array | Uint8ClampedArray; + width: number; + height: number; + kind: ImageKind; + }, + createCanvas: (w: number, h: number) => any, + imageObj: any, + folder: string, + ) { + if (!isUint8ClampedArray(img?.data) && !isUint8Array(img?.data)) { + dbg("cannot decode—image data is not of type Uint8Array or Uint8ClampedArray"); + return undefined; + } - const { width, height, data: _data, kind } = img - const imageData = new ImageData(width, height) - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const dstIdx = (y * width + x) * 4 - imageData.data[dstIdx + 3] = 255 // A - if (kind === ImageKind.GRAYSCALE_1BPP) { - const srcIdx = y * width + x - imageData.data[dstIdx + 0] = _data[srcIdx] // B - imageData.data[dstIdx + 1] = _data[srcIdx] // G - imageData.data[dstIdx + 2] = _data[srcIdx] // R - } else { - const srcIdx = - (y * width + x) * - (kind === ImageKind.RGBA_32BPP ? 4 : 3) - imageData.data[dstIdx + 0] = _data[srcIdx] // B - imageData.data[dstIdx + 1] = _data[srcIdx + 1] // G - imageData.data[dstIdx + 2] = _data[srcIdx + 2] // R - } - } + const { width, height, data: _data, kind } = img; + const imageData = new ImageData(width, height); + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const dstIdx = (y * width + x) * 4; + imageData.data[dstIdx + 3] = 255; // A + if (kind === ImageKind.GRAYSCALE_1BPP) { + const srcIdx = y * width + x; + imageData.data[dstIdx + 0] = _data[srcIdx]; // B + imageData.data[dstIdx + 1] = _data[srcIdx]; // G + imageData.data[dstIdx + 2] = _data[srcIdx]; // R + } else { + const srcIdx = (y * width + x) * (kind === ImageKind.RGBA_32BPP ? 4 : 3); + imageData.data[dstIdx + 0] = _data[srcIdx]; // B + imageData.data[dstIdx + 1] = _data[srcIdx + 1]; // G + imageData.data[dstIdx + 2] = _data[srcIdx + 2]; // R } - const canvas = await createCanvas(width, height) - const ctx = canvas.getContext("2d") - ctx.putImageData(imageData, 0, 0) - const buffer = canvas.toBuffer("image/png") - const fn = join( - folder, - `page-${pageIndex}-${imageObj.replace(INVALID_FILENAME_REGEX, "")}.png` - ) - dbg(`writing image to %s`, fn) - await writeFile(fn, buffer) - - return { - id: imageObj, - width, - height, - type: "image/png", - size: buffer.length, - filename: fn, - } satisfies PDFPageImage + } } + const canvas = await createCanvas(width, height); + const ctx = canvas.getContext("2d"); + ctx.putImageData(imageData, 0, 0); + const buffer = canvas.toBuffer("image/png"); + const fn = join( + folder, + `page-${pageIndex}-${imageObj.replace(INVALID_FILENAME_REGEX, "")}.png`, + ); + dbg(`writing image to %s`, fn); + await writeFile(fn, buffer); + + return { + id: imageObj, + width, + height, + type: "image/png", + size: buffer.length, + filename: fn, + } satisfies PDFPageImage; + } } /** @@ -443,9 +416,7 @@ async function PDFTryParse( * @returns A single string representing the entire document */ function PDFPagesToString(pages: PDFPage[]) { - return pages - ?.map((p) => `-------- Page ${p.index} --------\n\n${p.content}`) - .join("\n\n") + return pages?.map((p) => `-------- Page ${p.index} --------\n\n${p.content}`).join("\n\n"); } /** @@ -455,29 +426,21 @@ function PDFPagesToString(pages: PDFPage[]) { * @returns A promise resolving to an object with parsed pages, concatenated content, and metadata. Returns empty pages and content if an error occurs. Metadata may be undefined if not present. */ export async function parsePdf( - filenameOrBuffer: string | Uint8Array, - options?: ParsePDFOptions & TraceOptions & CancellationOptions + filenameOrBuffer: string | Uint8Array, + options?: ParsePDFOptions & TraceOptions & CancellationOptions, ): Promise<{ - pages: PDFPage[] - content: string - metadata?: Record + pages: PDFPage[]; + content: string; + metadata?: Record; }> { - const filename = - typeof filenameOrBuffer === "string" ? filenameOrBuffer : undefined - const bytes = - typeof filenameOrBuffer === "string" - ? undefined - : (filenameOrBuffer as Uint8Array) - const { pages, metadata, content, error } = await PDFTryParse( - filename, - bytes, - options - ) - if (error) { - dbg("pdf parsing returned error: %O", error) - return { pages: [], content: "" } - } - return { pages, content, metadata } + const filename = typeof filenameOrBuffer === "string" ? filenameOrBuffer : undefined; + const bytes = typeof filenameOrBuffer === "string" ? undefined : (filenameOrBuffer as Uint8Array); + const { pages, metadata, content, error } = await PDFTryParse(filename, bytes, options); + if (error) { + dbg("pdf parsing returned error: %O", error); + return { pages: [], content: "" }; + } + return { pages, content, metadata }; } /** @@ -486,95 +449,90 @@ export async function parsePdf( * @returns An object containing parsed lines */ function parsePageItems(pdfItems: TextItem[]) { - const lineData: { [y: number]: TextItem[] } = {} - - // Group text items by their vertical position (y-coordinate) - for (let i = 0; i < pdfItems.length; i++) { - const item = pdfItems[i] - const y = item?.transform[5] - if (!lineData.hasOwnProperty(y)) { - //dbg("grouping text item at y=%d into new line", y) - lineData[y] = [] - } - // Ensure the item is valid before adding - /* istanbul ignore next */ - if (item) { - //dbg("adding item to lineData at y=%d: %o", y, item) - lineData[y]?.push(item) - } + const lineData: { [y: number]: TextItem[] } = {}; + + // Group text items by their vertical position (y-coordinate) + for (let i = 0; i < pdfItems.length; i++) { + const item = pdfItems[i]; + const y = item?.transform[5]; + if (!lineData.hasOwnProperty(y)) { + //dbg("grouping text item at y=%d into new line", y) + lineData[y] = []; } - - const yCoords = Object.keys(lineData) - .map((key) => Number(key)) - // Sort by descending y-coordinate - .sort((a, b) => b - a) - // Insert empty lines based on line height differences - .reduce((accum: number[], currentY, index, array) => { - const nextY = array[index + 1] - if (nextY != undefined) { - const currentLine = lineData[currentY]! - const currentLineHeight: number = currentLine.reduce( - (finalValue, current) => - finalValue > current.height - ? finalValue - : current.height, - -1 - ) - - // Check if a new line is needed based on height - if (Math.floor((currentY - nextY) / currentLineHeight) > 1) { - const newY = currentY - currentLineHeight - lineData[newY] = [] - return accum.concat(currentY, newY) - } - } - return accum.concat(currentY) - }, []) - - const lines: string[] = [] - for (let i = 0; i < yCoords.length; i++) { - const y = yCoords[i] - // Ensure y-coordinate is defined - /* istanbul ignore next */ - if (y == undefined) { - continue + // Ensure the item is valid before adding + /* istanbul ignore next */ + if (item) { + //dbg("adding item to lineData at y=%d: %o", y, item) + lineData[y]?.push(item); + } + } + + const yCoords = Object.keys(lineData) + .map((key) => Number(key)) + // Sort by descending y-coordinate + .sort((a, b) => b - a) + // Insert empty lines based on line height differences + .reduce((accum: number[], currentY, index, array) => { + const nextY = array[index + 1]; + if (nextY != undefined) { + const currentLine = lineData[currentY]!; + const currentLineHeight: number = currentLine.reduce( + (finalValue, current) => (finalValue > current.height ? finalValue : current.height), + -1, + ); + + // Check if a new line is needed based on height + if (Math.floor((currentY - nextY) / currentLineHeight) > 1) { + const newY = currentY - currentLineHeight; + lineData[newY] = []; + return accum.concat(currentY, newY); } - // Sort by x position within each line - const lineItems = lineData[y]!.sort( - (a, b) => a.transform[4] - b.transform[4] - ).filter((item) => !!item.str) - const firstLineItem = lineItems[0]! - let line = lineItems.length ? firstLineItem.str : "" - - // Concatenate text items into a single line - for (let j = 1; j < lineItems.length; j++) { - const item = lineItems[j]! - const lastItem = lineItems[j - 1]! - const xDiff = - item.transform[4] - (lastItem.transform[4] + lastItem.width) - - // Insert spaces for horizontally distant items - /* istanbul ignore next */ - if ( - item.height !== 0 && - lastItem.height !== 0 && - (xDiff > item.height || xDiff > lastItem.height) - ) { - const spaceCountA = Math.ceil(xDiff / item.height) - let spaceCount = spaceCountA - if (lastItem.height !== item.height) { - const spaceCountB = Math.ceil(xDiff / lastItem.height) - spaceCount = - spaceCountA > spaceCountB ? spaceCountA : spaceCountB - } - line += Array(spaceCount).fill("").join(" ") - } - line += item.str + } + return accum.concat(currentY); + }, []); + + const lines: string[] = []; + for (let i = 0; i < yCoords.length; i++) { + const y = yCoords[i]; + // Ensure y-coordinate is defined + /* istanbul ignore next */ + if (y == undefined) { + continue; + } + // Sort by x position within each line + const lineItems = lineData[y]!.sort((a, b) => a.transform[4] - b.transform[4]).filter( + (item) => !!item.str, + ); + const firstLineItem = lineItems[0]!; + let line = lineItems.length ? firstLineItem.str : ""; + + // Concatenate text items into a single line + for (let j = 1; j < lineItems.length; j++) { + const item = lineItems[j]!; + const lastItem = lineItems[j - 1]!; + const xDiff = item.transform[4] - (lastItem.transform[4] + lastItem.width); + + // Insert spaces for horizontally distant items + /* istanbul ignore next */ + if ( + item.height !== 0 && + lastItem.height !== 0 && + (xDiff > item.height || xDiff > lastItem.height) + ) { + const spaceCountA = Math.ceil(xDiff / item.height); + let spaceCount = spaceCountA; + if (lastItem.height !== item.height) { + const spaceCountB = Math.ceil(xDiff / lastItem.height); + spaceCount = spaceCountA > spaceCountB ? spaceCountA : spaceCountB; } - lines.push(line) + line += Array(spaceCount).fill("").join(" "); + } + line += item.str; } + lines.push(line); + } - return { - lines, - } + return { + lines, + }; } diff --git a/packages/core/src/perf.ts b/packages/core/src/perf.ts index 2ad4b6bd00..24b979249e 100644 --- a/packages/core/src/perf.ts +++ b/packages/core/src/perf.ts @@ -6,7 +6,7 @@ * @param end - The name of the ending performance mark. */ export function logMeasure(name: string, start: string, end: string) { - performance.mark(end) - const m = performance.measure(name, start, end) - console.debug(`⏲️ ${m.name}: ${m.duration | 0}ms`) + performance.mark(end); + const m = performance.measure(name, start, end); + console.debug(`⏲️ ${m.name}: ${m.duration | 0}ms`); } diff --git a/packages/core/src/performance.ts b/packages/core/src/performance.ts index c8c00f17f9..045b5e90c3 100644 --- a/packages/core/src/performance.ts +++ b/packages/core/src/performance.ts @@ -1,6 +1,6 @@ -import { performance, PerformanceObserver } from "perf_hooks" -import { logVerbose, toStringList } from "./util" -import prettyMilliseconds from "pretty-ms" +import { performance, PerformanceObserver } from "perf_hooks"; +import { logVerbose, toStringList } from "./util"; +import prettyMilliseconds from "pretty-ms"; /** * Marks a specific point in the application's performance timeline. @@ -8,7 +8,7 @@ import prettyMilliseconds from "pretty-ms" * @param id - The unique identifier for the performance mark. */ export function mark(id: string) { - performance.mark(id) + performance.mark(id); } /** @@ -23,18 +23,14 @@ export function mark(id: string) { * @returns The duration between the start and end marks in milliseconds. */ export function measure(id: string, detail?: string) { - const start = id + ".start" - const end = id + ".end" - const startm = performance.mark(start) - return (endDetail?: string) => { - const endm = performance.mark(end) - performance.measure( - `${id} ${toStringList(detail, endDetail)}`, - start, - end - ) - return endm.startTime - startm.startTime - } + const start = id + ".start"; + const end = id + ".end"; + const startm = performance.mark(start); + return (endDetail?: string) => { + const endm = performance.mark(end); + performance.measure(`${id} ${toStringList(detail, endDetail)}`, start, end); + return endm.startTime - startm.startTime; + }; } /** @@ -50,15 +46,15 @@ export function measure(id: string, detail?: string) { * - Logs the duration of each measurement and its cumulative total using `logVerbose`. */ export function logPerformance() { - const measures: Record = {} - const perfObserver = new PerformanceObserver((items) => { - items.getEntries().forEach((entry) => { - const total = (measures[entry.name] || 0) + entry.duration - measures[entry.name] = total - logVerbose( - `perf> ${entry.name} ${prettyMilliseconds(entry.duration)}/${prettyMilliseconds(total)}` - ) - }) - }) - perfObserver.observe({ entryTypes: ["measure"], buffered: true }) + const measures: Record = {}; + const perfObserver = new PerformanceObserver((items) => { + items.getEntries().forEach((entry) => { + const total = (measures[entry.name] || 0) + entry.duration; + measures[entry.name] = total; + logVerbose( + `perf> ${entry.name} ${prettyMilliseconds(entry.duration)}/${prettyMilliseconds(total)}`, + ); + }); + }); + perfObserver.observe({ entryTypes: ["measure"], buffered: true }); } diff --git a/packages/core/src/precision.test.ts b/packages/core/src/precision.test.ts index 1158eae6ba..822d6a59fc 100644 --- a/packages/core/src/precision.test.ts +++ b/packages/core/src/precision.test.ts @@ -1,53 +1,53 @@ -import { roundWithPrecision, renderWithPrecision } from "./precision" -import { describe, test } from "node:test" -import assert from "node:assert/strict" +import { roundWithPrecision, renderWithPrecision } from "./precision"; +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; describe("roundWithPrecision", () => { - test("returns NaN for undefined input", () => { - assert.ok(Number.isNaN(roundWithPrecision(undefined, 2))) - }) - - test("rounds to integer when digits is 0 or negative", () => { - assert.strictEqual(roundWithPrecision(5.678, 0), 6) - assert.strictEqual(roundWithPrecision(5.678, -1), 6) - }) - - test("returns 0 when input is 0", () => { - assert.strictEqual(roundWithPrecision(0, 2), 0) - }) - - test("rounds to specified digits", () => { - assert.strictEqual(roundWithPrecision(5.678, 2), 5.68) - assert.strictEqual(roundWithPrecision(5.678, 1), 5.7) - assert.strictEqual(roundWithPrecision(5.678, 3), 5.678) - }) - - test("uses provided rounding function", () => { - assert.strictEqual(roundWithPrecision(5.678, 1, Math.floor), 5.6) - assert.strictEqual(roundWithPrecision(5.678, 1, Math.ceil), 5.7) - }) -}) + test("returns NaN for undefined input", () => { + assert.ok(Number.isNaN(roundWithPrecision(undefined, 2))); + }); + + test("rounds to integer when digits is 0 or negative", () => { + assert.strictEqual(roundWithPrecision(5.678, 0), 6); + assert.strictEqual(roundWithPrecision(5.678, -1), 6); + }); + + test("returns 0 when input is 0", () => { + assert.strictEqual(roundWithPrecision(0, 2), 0); + }); + + test("rounds to specified digits", () => { + assert.strictEqual(roundWithPrecision(5.678, 2), 5.68); + assert.strictEqual(roundWithPrecision(5.678, 1), 5.7); + assert.strictEqual(roundWithPrecision(5.678, 3), 5.678); + }); + + test("uses provided rounding function", () => { + assert.strictEqual(roundWithPrecision(5.678, 1, Math.floor), 5.6); + assert.strictEqual(roundWithPrecision(5.678, 1, Math.ceil), 5.7); + }); +}); describe("renderWithPrecision", () => { - test("returns '?' for undefined input", () => { - assert.strictEqual(renderWithPrecision(undefined, 2), "?") - }) - - test("adds trailing zeros to match digit count", () => { - assert.strictEqual(renderWithPrecision(5, 2), "5.00") - assert.strictEqual(renderWithPrecision(5.6, 2), "5.60") - }) - - test("adds decimal point and zeros when no decimal", () => { - assert.strictEqual(renderWithPrecision(5, 3), "5.000") - }) - - test("uses provided rounding function", () => { - assert.strictEqual(renderWithPrecision(5.678, 1, Math.floor), "5.6") - assert.strictEqual(renderWithPrecision(5.678, 1, Math.ceil), "5.7") - }) - - test("doesn't add zeros when digits is 0", () => { - assert.strictEqual(renderWithPrecision(5.678, 0), "6") - }) -}) + test("returns '?' for undefined input", () => { + assert.strictEqual(renderWithPrecision(undefined, 2), "?"); + }); + + test("adds trailing zeros to match digit count", () => { + assert.strictEqual(renderWithPrecision(5, 2), "5.00"); + assert.strictEqual(renderWithPrecision(5.6, 2), "5.60"); + }); + + test("adds decimal point and zeros when no decimal", () => { + assert.strictEqual(renderWithPrecision(5, 3), "5.000"); + }); + + test("uses provided rounding function", () => { + assert.strictEqual(renderWithPrecision(5.678, 1, Math.floor), "5.6"); + assert.strictEqual(renderWithPrecision(5.678, 1, Math.ceil), "5.7"); + }); + + test("doesn't add zeros when digits is 0", () => { + assert.strictEqual(renderWithPrecision(5.678, 0), "6"); + }); +}); diff --git a/packages/core/src/precision.ts b/packages/core/src/precision.ts index 770d052b3c..9144bb8b86 100644 --- a/packages/core/src/precision.ts +++ b/packages/core/src/precision.ts @@ -7,21 +7,21 @@ * @returns The rounded number, or NaN if the input is undefined. */ export function roundWithPrecision( - x: number | undefined, - digits: number, - round = Math.round + x: number | undefined, + digits: number, + round = Math.round, ): number { - if (x === undefined) return NaN - digits = digits | 0 - // invalid digits input - if (digits <= 0) return round(x) - if (x === 0) return 0 - let r = 0 - while (r == 0 && digits < 21) { - const d = Math.pow(10, digits++) - r = round(x * d + Number.EPSILON) / d - } - return r + if (x === undefined) return NaN; + digits = digits | 0; + // invalid digits input + if (digits <= 0) return round(x); + if (x === 0) return 0; + let r = 0; + while (r == 0 && digits < 21) { + const d = Math.pow(10, digits++); + r = round(x * d + Number.EPSILON) / d; + } + return r; } /** @@ -33,20 +33,20 @@ export function roundWithPrecision( * @returns A string representing the number formatted with the specified precision. */ export function renderWithPrecision( - x: number | undefined, - digits: number, - round = Math.round + x: number | undefined, + digits: number, + round = Math.round, ): string { - if (x === undefined) return "?" - const r = roundWithPrecision(x, digits, round) - let rs = r.toLocaleString() - if (digits > 0) { - let doti = rs.indexOf(".") - if (doti < 0) { - rs += "." - doti = rs.length - 1 - } - while (rs.length - 1 - doti < digits) rs += "0" + if (x === undefined) return "?"; + const r = roundWithPrecision(x, digits, round); + let rs = r.toLocaleString(); + if (digits > 0) { + let doti = rs.indexOf("."); + if (doti < 0) { + rs += "."; + doti = rs.length - 1; } - return rs + while (rs.length - 1 - doti < digits) rs += "0"; + } + return rs; } diff --git a/packages/core/src/pretty.ts b/packages/core/src/pretty.ts index 2260b70d54..bd5d17042c 100644 --- a/packages/core/src/pretty.ts +++ b/packages/core/src/pretty.ts @@ -1,12 +1,7 @@ -import type { ChatCompletionUsage } from "./chattypes" -import _prettyBytes from "pretty-bytes" -import { - CHAR_DOWN_ARROW, - CHAR_TEMPERATURE, - CHAR_UP_ARROW, - CHAR_UP_DOWN_ARROWS, -} from "./constants" -import { roundWithPrecision } from "./precision" +import type { ChatCompletionUsage } from "./chattypes"; +import _prettyBytes from "pretty-bytes"; +import { CHAR_DOWN_ARROW, CHAR_TEMPERATURE, CHAR_UP_ARROW, CHAR_UP_DOWN_ARROWS } from "./constants"; +import { roundWithPrecision } from "./precision"; /** * Formats token usage into a human-readable string indicating tokens per second. @@ -17,8 +12,8 @@ import { roundWithPrecision } from "./precision" * @returns A string representing tokens per second, formatted as "X.XXt/s", or an empty string if input is invalid. */ export function prettyTokensPerSecond(usage: ChatCompletionUsage) { - if (!usage || !usage.duration || !usage.total_tokens) return "" - return `${(usage.total_tokens / (usage.duration / 1000)).toFixed(2)}t/s` + if (!usage || !usage.duration || !usage.total_tokens) return ""; + return `${(usage.total_tokens / (usage.duration / 1000)).toFixed(2)}t/s`; } /** @@ -30,26 +25,23 @@ export function prettyTokensPerSecond(usage: ChatCompletionUsage) { * "completion" for output tokens (adds "↓" as prefix). Defaults to no prefix. * @returns A formatted string with units "t" for tokens, "kt" for kilo-tokens, or "Mt" for mega-tokens. */ -export function prettyTokens( - n: number, - direction?: "prompt" | "completion" | "both" -) { - if (isNaN(n)) return "" - const prefix = - direction === "both" - ? CHAR_UP_DOWN_ARROWS - : direction === "prompt" - ? CHAR_UP_ARROW - : direction === "completion" - ? CHAR_DOWN_ARROW - : "" - if (n < 1000) return `${prefix}${n.toString()}t` - if (n < 1e6) return `${prefix}${(n / 1e3).toFixed(1)}kt` - return `${prefix}${(n / 1e6).toFixed(1)}Mt` +export function prettyTokens(n: number, direction?: "prompt" | "completion" | "both") { + if (isNaN(n)) return ""; + const prefix = + direction === "both" + ? CHAR_UP_DOWN_ARROWS + : direction === "prompt" + ? CHAR_UP_ARROW + : direction === "completion" + ? CHAR_DOWN_ARROW + : ""; + if (n < 1000) return `${prefix}${n.toString()}t`; + if (n < 1e6) return `${prefix}${(n / 1e3).toFixed(1)}kt`; + return `${prefix}${(n / 1e6).toFixed(1)}Mt`; } export function prettyParenthesized(value: any) { - return value !== undefined ? `(${value})` : "" + return value !== undefined ? `(${value})` : ""; } /** @@ -63,11 +55,11 @@ export function prettyParenthesized(value: any) { * @returns A formatted string representing the duration. */ export function prettyDuration(ms: number) { - const prefix = "" - if (ms < 10000) return `${prefix}${Math.ceil(ms)}ms` - if (ms < 60 * 1000) return `${prefix}${(ms / 1000).toFixed(1)}s` - if (ms < 60 * 60 * 1000) return `${prefix}${(ms / 60 / 1000).toFixed(1)}m` - return `${prefix}${(ms / 60 / 60 / 1000).toFixed(1)}h` + const prefix = ""; + if (ms < 10000) return `${prefix}${Math.ceil(ms)}ms`; + if (ms < 60 * 1000) return `${prefix}${(ms / 1000).toFixed(1)}s`; + if (ms < 60 * 60 * 1000) return `${prefix}${(ms / 60 / 1000).toFixed(1)}m`; + return `${prefix}${(ms / 60 / 60 / 1000).toFixed(1)}h`; } /** @@ -77,12 +69,12 @@ export function prettyDuration(ms: number) { * @returns The formatted cost as a string, using cents or dollars. */ export function prettyCost(value: number) { - if (!value) return "" - return value <= 0.01 - ? `${(value * 100).toFixed(3)}¢` - : value <= 0.1 - ? `${(value * 100).toFixed(2)}¢` - : `${value.toFixed(2)}$` + if (!value) return ""; + return value <= 0.01 + ? `${(value * 100).toFixed(3)}¢` + : value <= 0.1 + ? `${(value * 100).toFixed(2)}¢` + : `${value.toFixed(2)}$`; } /** @@ -95,8 +87,8 @@ export function prettyCost(value: number) { * e.g., "1.2 kB", "3 MB". Returns an empty string for invalid input. */ export function prettyBytes(bytes: number) { - if (isNaN(bytes)) return "" - return _prettyBytes(bytes) + if (isNaN(bytes)) return ""; + return _prettyBytes(bytes); } /** @@ -106,23 +98,21 @@ export function prettyBytes(bytes: number) { * @returns A single string with valid input strings concatenated and separated by commas. */ export function prettyStrings(...token: string[]) { - const md = token - .filter((l) => l !== undefined && l !== null && l !== "") - .join(", ") - return md + const md = token.filter((l) => l !== undefined && l !== null && l !== "").join(", "); + return md; } export function prettyValue( - value: number | undefined, - options?: { emoji?: string; afterEmoji?: string; precision?: number } + value: number | undefined, + options?: { emoji?: string; afterEmoji?: string; precision?: number }, ) { - if (isNaN(value)) return "" - const { emoji = "", afterEmoji = "", precision = 2 } = options || {} - const v = roundWithPrecision(value, precision) - const s = `${emoji}${v}${afterEmoji}` - return s + if (isNaN(value)) return ""; + const { emoji = "", afterEmoji = "", precision = 2 } = options || {}; + const v = roundWithPrecision(value, precision); + const s = `${emoji}${v}${afterEmoji}`; + return s; } export function prettyTemperature(value: number) { - return prettyValue(value, { afterEmoji: CHAR_TEMPERATURE, precision: 1 }) + return prettyValue(value, { afterEmoji: CHAR_TEMPERATURE, precision: 1 }); } diff --git a/packages/core/src/progress.ts b/packages/core/src/progress.ts index f47c1c0aac..f830764d6b 100644 --- a/packages/core/src/progress.ts +++ b/packages/core/src/progress.ts @@ -2,23 +2,23 @@ * Defines a generalized way of reporting progress updates. */ export abstract class Progress { - abstract report(value: { - message?: string - count?: number - succeeded?: boolean | undefined - }): void + abstract report(value: { + message?: string; + count?: number; + succeeded?: boolean | undefined; + }): void; - start(message: string, count?: number) { - this.report({ message, count }) - } + start(message: string, count?: number) { + this.report({ message, count }); + } - succeed(message?: string) { - this.report({ message: message || "", succeeded: true }) - } + succeed(message?: string) { + this.report({ message: message || "", succeeded: true }); + } - fail(message?: string) { - this.report({ message: message || "", succeeded: false }) - } + fail(message?: string) { + this.report({ message: message || "", succeeded: false }); + } - stop() {} + stop() {} } diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts index e07b4fd78d..7f165bde7c 100644 --- a/packages/core/src/promptcontext.ts +++ b/packages/core/src/promptcontext.ts @@ -1,46 +1,39 @@ // This file defines the creation of a prompt context, which includes various services // like file operations, web search, fuzzy search, vector search, and more. // The context is essential for executing prompts within a project environment. -import debug from "debug" -import { arrayify, assert } from "./util" -import { runtimeHost } from "./host" -import { MarkdownTrace } from "./trace" -import { createParsers } from "./parsers" -import { bingSearch, tavilySearch } from "./websearch" -import { - RunPromptContextNode, - createChatGenerationContext, -} from "./runpromptcontext" -import { GenerationOptions } from "./generation" -import { fuzzSearch } from "./fuzzsearch" -import { grepSearch } from "./grep" -import { resolveFileContents, toWorkspaceFile } from "./file" -import { vectorCreateIndex, vectorSearch } from "./vectorsearch" -import { Project } from "./server/messages" -import { shellParse } from "./shell" -import { PLimitPromiseQueue } from "./concurrency" -import { proxifyEnvVars } from "./vars" -import { HTMLEscape } from "./htmlescape" -import { hash } from "./crypto" -import { resolveModelConnectionInfo } from "./models" -import { DOCS_WEB_SEARCH_URL, VECTOR_INDEX_HASH_LENGTH } from "./constants" -import { fetch } from "./fetch" -import { fetchText } from "./fetchtext" -import { fileWriteCached } from "./filecache" -import { join } from "node:path" -import { createMicrosoftTeamsChannelClient } from "./teams" -import { dotGenaiscriptPath } from "./workdir" -import { - astGrepCreateChangeSet, - astGrepFindFiles, - astGrepParse, -} from "./astgrep" -import { createCache } from "./cache" -import { loadZ3Client } from "./z3" -import { genaiscriptDebug } from "./debug" -import { resolveLanguageModelConfigurations } from "./config" -import { deleteUndefinedValues } from "./cleaners" -const dbg = genaiscriptDebug("promptcontext") +import debug from "debug"; +import { arrayify, assert } from "./util"; +import { runtimeHost } from "./host"; +import { MarkdownTrace } from "./trace"; +import { createParsers } from "./parsers"; +import { bingSearch, tavilySearch } from "./websearch"; +import { RunPromptContextNode, createChatGenerationContext } from "./runpromptcontext"; +import { GenerationOptions } from "./generation"; +import { fuzzSearch } from "./fuzzsearch"; +import { grepSearch } from "./grep"; +import { resolveFileContents, toWorkspaceFile } from "./file"; +import { vectorCreateIndex, vectorSearch } from "./vectorsearch"; +import { Project } from "./server/messages"; +import { shellParse } from "./shell"; +import { PLimitPromiseQueue } from "./concurrency"; +import { proxifyEnvVars } from "./vars"; +import { HTMLEscape } from "./htmlescape"; +import { hash } from "./crypto"; +import { resolveModelConnectionInfo } from "./models"; +import { DOCS_WEB_SEARCH_URL, VECTOR_INDEX_HASH_LENGTH } from "./constants"; +import { fetch } from "./fetch"; +import { fetchText } from "./fetchtext"; +import { fileWriteCached } from "./filecache"; +import { join } from "node:path"; +import { createMicrosoftTeamsChannelClient } from "./teams"; +import { dotGenaiscriptPath } from "./workdir"; +import { astGrepCreateChangeSet, astGrepFindFiles, astGrepParse } from "./astgrep"; +import { createCache } from "./cache"; +import { loadZ3Client } from "./z3"; +import { genaiscriptDebug } from "./debug"; +import { resolveLanguageModelConfigurations } from "./config"; +import { deleteUndefinedValues } from "./cleaners"; +const dbg = genaiscriptDebug("promptcontext"); /** * Creates a prompt context for the specified project, variables, trace, options, and model. @@ -53,365 +46,339 @@ const dbg = genaiscriptDebug("promptcontext") * @returns A context object providing methods for file operations, web retrieval, searches, execution, container operations, caching, and other utilities. Includes workspace file system operations (read/write files, grep, find files), retrieval methods (web search, fuzzy search, vector search, index creation), and host operations (command execution, browsing, container management, resource publishing, server management, etc.). */ export async function createPromptContext( - prj: Project, - ev: ExpansionVariables, - trace: MarkdownTrace, - options: GenerationOptions, - model: string + prj: Project, + ev: ExpansionVariables, + trace: MarkdownTrace, + options: GenerationOptions, + model: string, ) { - const { cancellationToken } = options - const { generator, vars, dbg, output, ...varsNoGenerator } = ev + const { cancellationToken } = options; + const { generator, vars, dbg, output, ...varsNoGenerator } = ev; - // Clone variables to prevent modification of the original object - const env = { - generator, - vars, - output, - dbg, - ...structuredClone(varsNoGenerator), - } - assert(!!output, "missing output") - // Create parsers for the given trace and model - const parsers = await createParsers({ trace, cancellationToken, model }) - const path = runtimeHost.path - const runDir = ev.runDir - assert(!!runDir, "missing run directory") + // Clone variables to prevent modification of the original object + const env = { + generator, + vars, + output, + dbg, + ...structuredClone(varsNoGenerator), + }; + assert(!!output, "missing output"); + // Create parsers for the given trace and model + const parsers = await createParsers({ trace, cancellationToken, model }); + const path = runtimeHost.path; + const runDir = ev.runDir; + assert(!!runDir, "missing run directory"); - // Define the workspace file system operations - const workspace: WorkspaceFileSystem = { - readText: (f) => runtimeHost.workspace.readText(f), - readJSON: (f, o) => runtimeHost.workspace.readJSON(f, o), - readYAML: (f, o) => runtimeHost.workspace.readYAML(f, o), - readXML: (f, o) => runtimeHost.workspace.readXML(f, o), - readCSV: (f, o) => runtimeHost.workspace.readCSV(f, o), - readINI: (f, o) => runtimeHost.workspace.readINI(f, o), - readData: (f, o) => runtimeHost.workspace.readData(f, o), - writeText: (f, c) => runtimeHost.workspace.writeText(f, c), - appendText: (f, c) => runtimeHost.workspace.appendText(f, c), - writeCached: async (f, options) => { - const { scope } = options || {} - const dir = - scope === "run" - ? join(runDir, "files") - : dotGenaiscriptPath("cache", "files") - return await fileWriteCached(dir, f, { - ...(options || {}), - cancellationToken, - trace, - }) - }, - copyFile: (src, dest) => runtimeHost.workspace.copyFile(src, dest), - cache: (n) => runtimeHost.workspace.cache(n), - findFiles: async (pattern, options) => { - const res = await runtimeHost.workspace.findFiles(pattern, options) - return res - }, - stat: (filename) => runtimeHost.workspace.stat(filename), - writeFiles: (file) => runtimeHost.workspace.writeFiles(file), - grep: async ( - query, - grepOptions: string | WorkspaceGrepOptions, - grepOptions2?: WorkspaceGrepOptions - ) => { - if (typeof grepOptions === "string") { - const p = runtimeHost.path - .dirname(grepOptions) - .replace(/(^|\/)\*\*$/, "") - const g = runtimeHost.path.basename(grepOptions) - grepOptions = { - path: p || undefined, - glob: g || undefined, - ...(grepOptions2 || {}), - } as WorkspaceGrepOptions - } - const { path, glob, ...rest } = grepOptions || {} - const grepTrace = trace.startTraceDetails( - `🌐 grep ${HTMLEscape(typeof query === "string" ? query : query.source)} ${glob ? `--glob ${glob}` : ""} ${path || ""}` - ) - try { - const { files, matches } = await grepSearch(query, { - path, - glob, - ...rest, - trace: grepTrace, - cancellationToken, - }) - grepTrace.files(matches, { - model, - secrets: env.secrets, - maxLength: 0, - }) - return { files, matches } - } finally { - grepTrace.endDetails() - } - }, - } + // Define the workspace file system operations + const workspace: WorkspaceFileSystem = { + readText: (f) => runtimeHost.workspace.readText(f), + readJSON: (f, o) => runtimeHost.workspace.readJSON(f, o), + readYAML: (f, o) => runtimeHost.workspace.readYAML(f, o), + readXML: (f, o) => runtimeHost.workspace.readXML(f, o), + readCSV: (f, o) => runtimeHost.workspace.readCSV(f, o), + readINI: (f, o) => runtimeHost.workspace.readINI(f, o), + readData: (f, o) => runtimeHost.workspace.readData(f, o), + writeText: (f, c) => runtimeHost.workspace.writeText(f, c), + appendText: (f, c) => runtimeHost.workspace.appendText(f, c), + writeCached: async (f, options) => { + const { scope } = options || {}; + const dir = scope === "run" ? join(runDir, "files") : dotGenaiscriptPath("cache", "files"); + return await fileWriteCached(dir, f, { + ...(options || {}), + cancellationToken, + trace, + }); + }, + copyFile: (src, dest) => runtimeHost.workspace.copyFile(src, dest), + cache: (n) => runtimeHost.workspace.cache(n), + findFiles: async (pattern, options) => { + const res = await runtimeHost.workspace.findFiles(pattern, options); + return res; + }, + stat: (filename) => runtimeHost.workspace.stat(filename), + writeFiles: (file) => runtimeHost.workspace.writeFiles(file), + grep: async ( + query, + grepOptions: string | WorkspaceGrepOptions, + grepOptions2?: WorkspaceGrepOptions, + ) => { + if (typeof grepOptions === "string") { + const p = runtimeHost.path.dirname(grepOptions).replace(/(^|\/)\*\*$/, ""); + const g = runtimeHost.path.basename(grepOptions); + grepOptions = { + path: p || undefined, + glob: g || undefined, + ...(grepOptions2 || {}), + } as WorkspaceGrepOptions; + } + const { path, glob, ...rest } = grepOptions || {}; + const grepTrace = trace.startTraceDetails( + `🌐 grep ${HTMLEscape(typeof query === "string" ? query : query.source)} ${glob ? `--glob ${glob}` : ""} ${path || ""}`, + ); + try { + const { files, matches } = await grepSearch(query, { + path, + glob, + ...rest, + trace: grepTrace, + cancellationToken, + }); + grepTrace.files(matches, { + model, + secrets: env.secrets, + maxLength: 0, + }); + return { files, matches }; + } finally { + grepTrace.endDetails(); + } + }, + }; - // Define retrieval operations - const retrieval: Retrieval = { - webSearch: async (q, options) => { - const { provider, count, ignoreMissingProvider } = options || {} - // Conduct a web search and return the results - const webTrace = trace.startTraceDetails( - `🌐 web search ${HTMLEscape(q)}` - ) - try { - let files: WorkspaceFile[] - if (provider === "bing") - files = await bingSearch(q, { trace: webTrace, count }) - else if (provider === "tavily") - files = await tavilySearch(q, { trace: webTrace, count }) - else { - for (const f of [bingSearch, tavilySearch]) { - files = await f(q, { - ignoreMissingApiKey: true, - trace: webTrace, - count, - }) - if (files) break - } - } - if (!files) { - if (ignoreMissingProvider) { - webTrace.log(`no search provider configured`) - return undefined - } - throw new Error( - `No search provider configured. See ${DOCS_WEB_SEARCH_URL}.` - ) - } - webTrace.files(files, { - model, - secrets: env.secrets, - maxLength: 0, - }) - return files - } finally { - webTrace.endDetails() - } - }, - fuzzSearch: async (q, files_, searchOptions) => { - // Perform a fuzzy search on the provided files - const files = arrayify(files_) - searchOptions = searchOptions || {} - const fuzzTrace = trace.startTraceDetails( - `🧐 fuzz search ${HTMLEscape(q)}` - ) - try { - if (!files?.length) { - fuzzTrace.error("no files provided") - return [] - } else { - const res = await fuzzSearch(q, files, { - ...searchOptions, - trace: fuzzTrace, - }) - fuzzTrace.files(res, { - model, - secrets: env.secrets, - skipIfEmpty: true, - maxLength: 0, - }) - return res - } - } finally { - fuzzTrace.endDetails() - } - }, - index: async (indexId, indexOptions) => { - const opts = { - ...(indexOptions || {}), - embeddingsModel: - indexOptions?.embeddingsModel || options?.embeddingsModel, - } - const res = await vectorCreateIndex(indexId, { - ...opts, - trace, - cancellationToken, - }) - return res - }, - vectorSearch: async (q, files_, searchOptions) => { - // Perform a vector-based search on the provided files - const files = arrayify(files_).map(toWorkspaceFile) - searchOptions = { ...(searchOptions || {}) } - const vecTrace = trace.startTraceDetails( - `🔍 vector search ${HTMLEscape(q)}` - ) - try { - if (!files?.length) { - vecTrace.error("no files provided") - return [] - } + // Define retrieval operations + const retrieval: Retrieval = { + webSearch: async (q, options) => { + const { provider, count, ignoreMissingProvider } = options || {}; + // Conduct a web search and return the results + const webTrace = trace.startTraceDetails(`🌐 web search ${HTMLEscape(q)}`); + try { + let files: WorkspaceFile[]; + if (provider === "bing") files = await bingSearch(q, { trace: webTrace, count }); + else if (provider === "tavily") files = await tavilySearch(q, { trace: webTrace, count }); + else { + for (const f of [bingSearch, tavilySearch]) { + files = await f(q, { + ignoreMissingApiKey: true, + trace: webTrace, + count, + }); + if (files) break; + } + } + if (!files) { + if (ignoreMissingProvider) { + webTrace.log(`no search provider configured`); + return undefined; + } + throw new Error(`No search provider configured. See ${DOCS_WEB_SEARCH_URL}.`); + } + webTrace.files(files, { + model, + secrets: env.secrets, + maxLength: 0, + }); + return files; + } finally { + webTrace.endDetails(); + } + }, + fuzzSearch: async (q, files_, searchOptions) => { + // Perform a fuzzy search on the provided files + const files = arrayify(files_); + searchOptions = searchOptions || {}; + const fuzzTrace = trace.startTraceDetails(`🧐 fuzz search ${HTMLEscape(q)}`); + try { + if (!files?.length) { + fuzzTrace.error("no files provided"); + return []; + } else { + const res = await fuzzSearch(q, files, { + ...searchOptions, + trace: fuzzTrace, + }); + fuzzTrace.files(res, { + model, + secrets: env.secrets, + skipIfEmpty: true, + maxLength: 0, + }); + return res; + } + } finally { + fuzzTrace.endDetails(); + } + }, + index: async (indexId, indexOptions) => { + const opts = { + ...(indexOptions || {}), + embeddingsModel: indexOptions?.embeddingsModel || options?.embeddingsModel, + }; + const res = await vectorCreateIndex(indexId, { + ...opts, + trace, + cancellationToken, + }); + return res; + }, + vectorSearch: async (q, files_, searchOptions) => { + // Perform a vector-based search on the provided files + const files = arrayify(files_).map(toWorkspaceFile); + searchOptions = { ...(searchOptions || {}) }; + const vecTrace = trace.startTraceDetails(`🔍 vector search ${HTMLEscape(q)}`); + try { + if (!files?.length) { + vecTrace.error("no files provided"); + return []; + } - await resolveFileContents(files) - searchOptions.embeddingsModel = - searchOptions?.embeddingsModel ?? options?.embeddingsModel - const key = - searchOptions?.indexName || - (await hash( - { files, searchOptions }, - { length: VECTOR_INDEX_HASH_LENGTH } - )) - const res = await vectorSearch(key, q, files, { - ...searchOptions, - trace: vecTrace, - cancellationToken, - }) - return res - } finally { - vecTrace.endDetails() - } - }, - } + await resolveFileContents(files); + searchOptions.embeddingsModel = searchOptions?.embeddingsModel ?? options?.embeddingsModel; + const key = + searchOptions?.indexName || + (await hash({ files, searchOptions }, { length: VECTOR_INDEX_HASH_LENGTH })); + const res = await vectorSearch(key, q, files, { + ...searchOptions, + trace: vecTrace, + cancellationToken, + }); + return res; + } finally { + vecTrace.endDetails(); + } + }, + }; - // Define the host for executing commands, browsing, and other operations - const promptHost: PromptHost = Object.freeze({ - logger: (category) => debug(category), - mcpServer: async (options) => - await runtimeHost.mcp.startMcpServer(options, { - trace, - cancellationToken, - }), - publishResource: async (name, content, options) => - await runtimeHost.resources.publishResource(name, content, options), - resources: async () => await runtimeHost.resources.resources(), - fetch: (url, options) => fetch(url, { ...(options || {}), trace }), - fetchText: (url, options) => - fetchText(url, { ...(options || {}), trace }), - resolveLanguageModel: async (modelId) => { - const { configuration } = await resolveModelConnectionInfo( - { model: modelId }, - { - token: false, - trace, - } - ) - return { - provider: configuration?.provider, - model: configuration?.model, - } satisfies LanguageModelReference - }, - resolveLanguageModelProvider: async (id, options) => { - if (!id) throw new Error("provider id is required") - const [provider] = await resolveLanguageModelConfigurations(id, { - ...(options || {}), - models: !!options?.listModels, - error: false, - hide: false, - token: true, - }) - if (provider.error) { - dbg(`Error resolving provider %s: %s`, id, provider.error) - return undefined - } - return deleteUndefinedValues({ - id: provider.provider, - error: provider.error, - base: provider.base, - version: provider.version, - token: options?.token ? provider.token : undefined, - models: options?.listModels ? provider.models || [] : undefined, - } satisfies LanguageModelProviderInfo) - }, - cache: async (name: string) => { - const res = createCache(name, { type: "memory" }) - return res - }, - z3: () => loadZ3Client({ trace, cancellationToken }), - exec: async ( - command: string, - args?: string[] | ShellOptions, - options?: ShellOptions - ) => { - // Parse the command and arguments if necessary - if (!Array.isArray(args) && typeof args === "object") { - // exec("cmd arg arg", {...}) - if (options !== undefined) - throw new Error("Options must be the second argument") - options = args as ShellOptions - const parsed = shellParse(command) - command = parsed[0] - args = parsed.slice(1) - } else if (args === undefined) { - // exec("cmd arg arg") - const parsed = shellParse(command) - command = parsed[0] - args = parsed.slice(1) - } - // Execute the command using the runtime host - const res = await runtimeHost.exec(undefined, command, args, { - ...(options || {}), - trace, - }) - return res - }, - browse: async (url, options) => { - // Browse a URL and return the result - const res = await runtimeHost.browse(url, { - trace, - ...(options || {}), - }) - return res - }, - container: async (options) => { - // Execute operations within a container and return the result - const res = await runtimeHost.container({ - ...(options || {}), - trace, - }) - return res + // Define the host for executing commands, browsing, and other operations + const promptHost: PromptHost = Object.freeze({ + logger: (category) => debug(category), + mcpServer: async (options) => + await runtimeHost.mcp.startMcpServer(options, { + trace, + cancellationToken, + }), + publishResource: async (name, content, options) => + await runtimeHost.resources.publishResource(name, content, options), + resources: async () => await runtimeHost.resources.resources(), + fetch: (url, options) => fetch(url, { ...(options || {}), trace }), + fetchText: (url, options) => fetchText(url, { ...(options || {}), trace }), + resolveLanguageModel: async (modelId) => { + const { configuration } = await resolveModelConnectionInfo( + { model: modelId }, + { + token: false, + trace, }, - select: async (message, choices, options) => - await runtimeHost.select(message, choices, options), - input: async (message) => await runtimeHost.input(message), - confirm: async (message) => await runtimeHost.confirm(message), - promiseQueue: (concurrency) => new PLimitPromiseQueue(concurrency), - contentSafety: async (id) => - await runtimeHost.contentSafety(id || options?.contentSafety, { - trace, - }), - python: async (pyOptions) => - await runtimeHost.python({ - trace, - cancellationToken, - ...(pyOptions || {}), - }), - teamsChannel: async (url) => createMicrosoftTeamsChannelClient(url), - astGrep: async () => - Object.freeze({ - changeset: astGrepCreateChangeSet, - search: (lang, glob, matcher, sgOptions) => - astGrepFindFiles(lang, glob, matcher, { - ...(sgOptions || {}), - cancellationToken, - }), - parse: (file, sgOptions) => - astGrepParse(file, { - ...(sgOptions || {}), - cancellationToken, - }), - }), - }) + ); + return { + provider: configuration?.provider, + model: configuration?.model, + } satisfies LanguageModelReference; + }, + resolveLanguageModelProvider: async (id, options) => { + if (!id) throw new Error("provider id is required"); + const [provider] = await resolveLanguageModelConfigurations(id, { + ...(options || {}), + models: !!options?.listModels, + error: false, + hide: false, + token: true, + }); + if (provider.error) { + dbg(`Error resolving provider %s: %s`, id, provider.error); + return undefined; + } + return deleteUndefinedValues({ + id: provider.provider, + error: provider.error, + base: provider.base, + version: provider.version, + token: options?.token ? provider.token : undefined, + models: options?.listModels ? provider.models || [] : undefined, + } satisfies LanguageModelProviderInfo); + }, + cache: async (name: string) => { + const res = createCache(name, { type: "memory" }); + return res; + }, + z3: () => loadZ3Client({ trace, cancellationToken }), + exec: async (command: string, args?: string[] | ShellOptions, options?: ShellOptions) => { + // Parse the command and arguments if necessary + if (!Array.isArray(args) && typeof args === "object") { + // exec("cmd arg arg", {...}) + if (options !== undefined) throw new Error("Options must be the second argument"); + options = args as ShellOptions; + const parsed = shellParse(command); + command = parsed[0]; + args = parsed.slice(1); + } else if (args === undefined) { + // exec("cmd arg arg") + const parsed = shellParse(command); + command = parsed[0]; + args = parsed.slice(1); + } + // Execute the command using the runtime host + const res = await runtimeHost.exec(undefined, command, args, { + ...(options || {}), + trace, + }); + return res; + }, + browse: async (url, options) => { + // Browse a URL and return the result + const res = await runtimeHost.browse(url, { + trace, + ...(options || {}), + }); + return res; + }, + container: async (options) => { + // Execute operations within a container and return the result + const res = await runtimeHost.container({ + ...(options || {}), + trace, + }); + return res; + }, + select: async (message, choices, options) => + await runtimeHost.select(message, choices, options), + input: async (message) => await runtimeHost.input(message), + confirm: async (message) => await runtimeHost.confirm(message), + promiseQueue: (concurrency) => new PLimitPromiseQueue(concurrency), + contentSafety: async (id) => + await runtimeHost.contentSafety(id || options?.contentSafety, { + trace, + }), + python: async (pyOptions) => + await runtimeHost.python({ + trace, + cancellationToken, + ...(pyOptions || {}), + }), + teamsChannel: async (url) => createMicrosoftTeamsChannelClient(url), + astGrep: async () => + Object.freeze({ + changeset: astGrepCreateChangeSet, + search: (lang, glob, matcher, sgOptions) => + astGrepFindFiles(lang, glob, matcher, { + ...(sgOptions || {}), + cancellationToken, + }), + parse: (file, sgOptions) => + astGrepParse(file, { + ...(sgOptions || {}), + cancellationToken, + }), + }), + }); - // Freeze project options to prevent modification - const projectOptions = Object.freeze({ prj, env }) - const ctx: PromptContext & RunPromptContextNode = { - ...createChatGenerationContext(options, trace, projectOptions), - script: () => {}, - system: () => {}, - env: undefined, // set later - path, - fs: workspace, - workspace, - parsers, - retrieval, - host: promptHost, - } - env.generator = ctx - env.vars = proxifyEnvVars(env.vars) - ctx.env = Object.freeze(env as ExpansionVariables) + // Freeze project options to prevent modification + const projectOptions = Object.freeze({ prj, env }); + const ctx: PromptContext & RunPromptContextNode = { + ...createChatGenerationContext(options, trace, projectOptions), + script: () => {}, + system: () => {}, + env: undefined, // set later + path, + fs: workspace, + workspace, + parsers, + retrieval, + host: promptHost, + }; + env.generator = ctx; + env.vars = proxifyEnvVars(env.vars); + ctx.env = Object.freeze(env as ExpansionVariables); - return ctx + return ctx; } diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index fc8f3cb832..300249da56 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -1,220 +1,209 @@ // Importing various utility functions and constants from different modules. -import { dataToMarkdownTable, CSVTryParse } from "./csv" -import { renderFileContent, resolveFileContent } from "./file" -import { addLineNumbers, extractRange } from "./liner" -import { JSONSchemaStringifyToTypeScript } from "./schema" -import { approximateTokens, truncateTextToTokens } from "./tokens" -import { MarkdownTrace, TraceOptions } from "./trace" +import { dataToMarkdownTable, CSVTryParse } from "./csv"; +import { renderFileContent, resolveFileContent } from "./file"; +import { addLineNumbers, extractRange } from "./liner"; +import { JSONSchemaStringifyToTypeScript } from "./schema"; +import { approximateTokens, truncateTextToTokens } from "./tokens"; +import { MarkdownTrace, TraceOptions } from "./trace"; +import { arrayify, assert, ellipse, logError, logWarn, toStringList } from "./util"; +import { YAMLStringify } from "./yaml"; import { - arrayify, - assert, - ellipse, - logError, - logWarn, - toStringList, -} from "./util" -import { YAMLStringify } from "./yaml" -import { - DEFAULT_FENCE_FORMAT, - MARKDOWN_PROMPT_FENCE, - PROMPT_FENCE, - PROMPTDOM_PREVIEW_MAX_LENGTH, - PROMPTY_REGEX, - SANITIZED_PROMPT_INJECTION, - SCHEMA_DEFAULT_FORMAT, - TEMPLATE_ARG_DATA_SLICE_SAMPLE, - TEMPLATE_ARG_FILE_MAX_TOKENS, -} from "./constants" -import { - appendAssistantMessage, - appendSystemMessage, - appendUserMessage, -} from "./chat" -import { errorMessage } from "./error" -import { sliceData, tidyData } from "./tidy" -import { dedent } from "./indent" -import { ChatCompletionMessageParam } from "./chattypes" -import { resolveTokenEncoder } from "./encoders" -import { expandFileOrWorkspaceFiles } from "./fs" -import { interpolateVariables } from "./mustache" -import { diffCreatePatch } from "./diff" -import { promptyParse } from "./prompty" -import { jinjaRenderChatMessage } from "./jinja" -import { runtimeHost } from "./host" -import { hash } from "./crypto" -import { tryZodToJsonSchema } from "./zod" -import { GROQEvaluate } from "./groq" -import { trimNewlines } from "./unwrappers" -import { CancellationOptions } from "./cancellation" -import { promptParametersSchemaToJSONSchema } from "./parameters" -import { redactSecrets } from "./secretscanner" -import { escapeToolName } from "./tools" -import { measure } from "./performance" -import debug from "debug" -import { imageEncodeForLLM } from "./image" -import { providerFeatures } from "./features" -import { parseModelIdentifier } from "./models" -const dbg = debug("genaiscript:prompt:dom") -const dbgMcp = debug("genaiscript:prompt:dom:mcp") + DEFAULT_FENCE_FORMAT, + MARKDOWN_PROMPT_FENCE, + PROMPT_FENCE, + PROMPTDOM_PREVIEW_MAX_LENGTH, + PROMPTY_REGEX, + SANITIZED_PROMPT_INJECTION, + SCHEMA_DEFAULT_FORMAT, + TEMPLATE_ARG_DATA_SLICE_SAMPLE, + TEMPLATE_ARG_FILE_MAX_TOKENS, +} from "./constants"; +import { appendAssistantMessage, appendSystemMessage, appendUserMessage } from "./chat"; +import { errorMessage } from "./error"; +import { sliceData, tidyData } from "./tidy"; +import { dedent } from "./indent"; +import { ChatCompletionMessageParam } from "./chattypes"; +import { resolveTokenEncoder } from "./encoders"; +import { expandFileOrWorkspaceFiles } from "./fs"; +import { interpolateVariables } from "./mustache"; +import { diffCreatePatch } from "./diff"; +import { promptyParse } from "./prompty"; +import { jinjaRenderChatMessage } from "./jinja"; +import { runtimeHost } from "./host"; +import { hash } from "./crypto"; +import { tryZodToJsonSchema } from "./zod"; +import { GROQEvaluate } from "./groq"; +import { trimNewlines } from "./unwrappers"; +import { CancellationOptions } from "./cancellation"; +import { promptParametersSchemaToJSONSchema } from "./parameters"; +import { redactSecrets } from "./secretscanner"; +import { escapeToolName } from "./tools"; +import { measure } from "./performance"; +import debug from "debug"; +import { imageEncodeForLLM } from "./image"; +import { providerFeatures } from "./features"; +import { parseModelIdentifier } from "./models"; +const dbg = debug("genaiscript:prompt:dom"); +const dbgMcp = debug("genaiscript:prompt:dom:mcp"); // Definition of the PromptNode interface which is an essential part of the code structure. export interface PromptNode extends ContextExpansionOptions { - // Describes the type of the node. - type?: - | "text" - | "image" - | "schema" - | "tool" - | "fileMerge" - | "outputProcessor" - | "stringTemplate" - | "assistant" - | "system" - | "def" - | "defData" - | "chatParticipant" - | "fileOutput" - | "importTemplate" - | "mcpServer" - | undefined - children?: PromptNode[] // Child nodes for hierarchical structure - error?: unknown // Error information if present - tokens?: number // Token count for the node - - /** - * Rendered markdown preview of the node - */ - preview?: string - name?: string - - /** - * Node removed from the tree - */ - deleted?: boolean + // Describes the type of the node. + type?: + | "text" + | "image" + | "schema" + | "tool" + | "fileMerge" + | "outputProcessor" + | "stringTemplate" + | "assistant" + | "system" + | "def" + | "defData" + | "chatParticipant" + | "fileOutput" + | "importTemplate" + | "mcpServer" + | undefined; + children?: PromptNode[]; // Child nodes for hierarchical structure + error?: unknown; // Error information if present + tokens?: number; // Token count for the node + + /** + * Rendered markdown preview of the node + */ + preview?: string; + name?: string; + + /** + * Node removed from the tree + */ + deleted?: boolean; } // Interface for a text node in the prompt tree. export interface PromptTextNode extends PromptNode { - type: "text" - value: Awaitable // The text content, potentially awaiting resolution - resolved?: string // Resolved text content + type: "text"; + value: Awaitable; // The text content, potentially awaiting resolution + resolved?: string; // Resolved text content } // Interface for a definition node, which includes options. export interface PromptDefNode extends PromptNode, DefOptions { - type: "def" - name: string // Name of the definition - value: Awaitable // File associated with the definition - resolved?: WorkspaceFile // Resolved file content + type: "def"; + name: string; // Name of the definition + value: Awaitable; // File associated with the definition + resolved?: WorkspaceFile; // Resolved file content } export interface PromptDefDataNode extends PromptNode, DefDataOptions { - type: "defData" - name: string // Name of the definition - value: Awaitable // Data associated with the definition - resolved?: object | object[] + type: "defData"; + name: string; // Name of the definition + value: Awaitable; // Data associated with the definition + resolved?: object | object[]; } export interface PromptPrediction { - type: "content" - content: string + type: "content"; + content: string; } // Interface for an assistant node. export interface PromptAssistantNode extends PromptNode { - type: "assistant" - value: Awaitable // Assistant-related content - resolved?: string // Resolved assistant content + type: "assistant"; + value: Awaitable; // Assistant-related content + resolved?: string; // Resolved assistant content } export interface PromptSystemNode extends PromptNode { - type: "system" - value: Awaitable // Assistant-related content - resolved?: string // Resolved assistant content + type: "system"; + value: Awaitable; // Assistant-related content + resolved?: string; // Resolved assistant content } // Interface for a string template node. export interface PromptStringTemplateNode extends PromptNode { - type: "stringTemplate" - strings: TemplateStringsArray // Template strings - args: any[] // Arguments for the template - transforms: ((s: string) => Awaitable)[] // Transform functions to apply to the template - resolved?: string // Resolved templated content - role?: ChatMessageRole + type: "stringTemplate"; + strings: TemplateStringsArray; // Template strings + args: any[]; // Arguments for the template + transforms: ((s: string) => Awaitable)[]; // Transform functions to apply to the template + resolved?: string; // Resolved templated content + role?: ChatMessageRole; } // Interface for an import template node. export interface PromptImportTemplate extends PromptNode { - type: "importTemplate" - files: ElementOrArray // Files to import - args?: Record // Arguments for the template - options?: ImportTemplateOptions // Additional options + type: "importTemplate"; + files: ElementOrArray; // Files to import + args?: Record; // Arguments for the template + options?: ImportTemplateOptions; // Additional options } // Interface representing a prompt image. export interface PromptImage { - url: string // URL of the image - filename?: string // Optional filename - detail?: "low" | "high" // Image detail level - width?: number // Width of the image - height?: number // Height of the image - type?: string // MIME type of the image + url: string; // URL of the image + filename?: string; // Optional filename + detail?: "low" | "high"; // Image detail level + width?: number; // Width of the image + height?: number; // Height of the image + type?: string; // MIME type of the image } // Interface for an image node. export interface PromptImageNode extends PromptNode { - type: "image" - value: Awaitable // Image information - resolved?: PromptImage // Resolved image information + type: "image"; + value: Awaitable; // Image information + resolved?: PromptImage; // Resolved image information } // Interface for a schema node. export interface PromptSchemaNode extends PromptNode { - type: "schema" - name: string // Name of the schema - value: JSONSchema // Schema definition - options?: DefSchemaOptions // Additional options + type: "schema"; + name: string; // Name of the schema + value: JSONSchema; // Schema definition + options?: DefSchemaOptions; // Additional options } // Interface for a function node. export interface PromptToolNode extends PromptNode { - type: "tool" - name: string // Function name - description: string // Description of the function - parameters: JSONSchema // Parameters for the function - impl: ChatFunctionHandler // Implementation of the function - options?: DefToolOptions - generator: ChatGenerationContext + type: "tool"; + name: string; // Function name + description: string; // Description of the function + parameters: JSONSchema; // Parameters for the function + impl: ChatFunctionHandler; // Implementation of the function + options?: DefToolOptions; + generator: ChatGenerationContext; } export interface PromptMcpServerNode extends PromptNode { - type: "mcpServer" - config: McpServerConfig + type: "mcpServer"; + config: McpServerConfig; } // Interface for a file merge node. export interface PromptFileMergeNode extends PromptNode { - type: "fileMerge" - fn: FileMergeHandler // Handler for the file merge + type: "fileMerge"; + fn: FileMergeHandler; // Handler for the file merge } // Interface for an output processor node. export interface PromptOutputProcessorNode extends PromptNode { - type: "outputProcessor" - fn: PromptOutputProcessorHandler // Handler for the output processing + type: "outputProcessor"; + fn: PromptOutputProcessorHandler; // Handler for the output processing } // Interface for a chat participant node. export interface PromptChatParticipantNode extends PromptNode { - type: "chatParticipant" - participant: ChatParticipant // Chat participant information - options?: ChatParticipantOptions // Additional options + type: "chatParticipant"; + participant: ChatParticipant; // Chat participant information + options?: ChatParticipantOptions; // Additional options } // Interface for a file output node. export interface FileOutputNode extends PromptNode { - type: "fileOutput" - output: FileOutput // File output information + type: "fileOutput"; + output: FileOutput; // File output information } /** @@ -225,11 +214,11 @@ export interface FileOutputNode extends PromptNode { * @returns A text node object with the specified value and options. */ export function createTextNode( - value: Awaitable, - options?: ContextExpansionOptions + value: Awaitable, + options?: ContextExpansionOptions, ): PromptTextNode { - assert(value !== undefined) // Ensure value is defined - return { type: "text", value, ...(options || {}) } + assert(value !== undefined); // Ensure value is defined + return { type: "text", value, ...(options || {}) }; } /** @@ -240,36 +229,33 @@ export function createTextNode( * If the `fenceFormat` is "xml", the name is wrapped in XML-like tags. * @returns The converted reference name, wrapped in XML tags if applicable. */ -export function toDefRefName( - name: string, - options: FenceFormatOptions -): string { - return name && options?.fenceFormat === "xml" ? `<${name}>` : name +export function toDefRefName(name: string, options: FenceFormatOptions): string { + return name && options?.fenceFormat === "xml" ? `<${name}>` : name; } // Function to create a definition node. export function createDef( - name: string, - file: WorkspaceFile, - options: DefOptions & TraceOptions + name: string, + file: WorkspaceFile, + options: DefOptions & TraceOptions, ): PromptDefNode { - name = name ?? "" - const render = async () => { - await resolveFileContent(file, options) - const res = await renderFileContent(file, options) - return res - } - const value = render() - return { type: "def", name, value, ...(options || {}) } + name = name ?? ""; + const render = async () => { + await resolveFileContent(file, options); + const res = await renderFileContent(file, options); + return res; + }; + const value = render(); + return { type: "def", name, value, ...(options || {}) }; } function cloneContextFields(n: PromptNode): Partial { - const r = {} as Partial - r.maxTokens = n.maxTokens - r.priority = n.priority - r.flex = n.flex - r.cacheControl = n.cacheControl - return r + const r = {} as Partial; + r.maxTokens = n.maxTokens; + r.priority = n.priority; + r.flex = n.flex; + r.cacheControl = n.cacheControl; + return r; } /** @@ -282,140 +268,130 @@ function cloneContextFields(n: PromptNode): Partial { * @returns A prompt definition node containing the diff results. */ export function createDefDiff( - name: string, - left: string | WorkspaceFile, - right: string | WorkspaceFile, - options?: DefDiffOptions & TraceOptions + name: string, + left: string | WorkspaceFile, + right: string | WorkspaceFile, + options?: DefDiffOptions & TraceOptions, ): PromptDefNode { - name = name ?? "" - - if (typeof left === "string") left = { filename: "", content: left } - if (typeof right === "string") right = { filename: "", content: right } - if (left?.content === undefined) - left = { filename: "", content: YAMLStringify(left) } - if (right?.content === undefined) - right = { filename: "", content: YAMLStringify(right) } - - const render = async () => { - await resolveFileContent(left, options) - const l = await renderFileContent(left, options) - await resolveFileContent(right, options) - const r = await renderFileContent(right, options) - return { filename: "", content: diffCreatePatch(l, r) } - } - const value = render() - return { type: "def", name, value, ...(options || {}) } + name = name ?? ""; + + if (typeof left === "string") left = { filename: "", content: left }; + if (typeof right === "string") right = { filename: "", content: right }; + if (left?.content === undefined) left = { filename: "", content: YAMLStringify(left) }; + if (right?.content === undefined) right = { filename: "", content: YAMLStringify(right) }; + + const render = async () => { + await resolveFileContent(left, options); + const l = await renderFileContent(left, options); + await resolveFileContent(right, options); + const r = await renderFileContent(right, options); + return { filename: "", content: diffCreatePatch(l, r) }; + }; + const value = render(); + return { type: "def", name, value, ...(options || {}) }; } // Function to render a definition node to a string. function renderDefNode(def: PromptDefNode): string { - const { name, resolved, language, lineNumbers, schema, prediction } = def - const { filename, content = "" } = resolved - let fenceFormat = def.fenceFormat - - const norm = (s: string, lang: string) => { - s = (s || "").replace(/\n*$/, "") - if (s && lineNumbers && !prediction) - s = addLineNumbers(s, { language: lang }) - if (s) s += "\n" - return s + const { name, resolved, language, lineNumbers, schema, prediction } = def; + const { filename, content = "" } = resolved; + let fenceFormat = def.fenceFormat; + + const norm = (s: string, lang: string) => { + s = (s || "").replace(/\n*$/, ""); + if (s && lineNumbers && !prediction) s = addLineNumbers(s, { language: lang }); + if (s) s += "\n"; + return s; + }; + + const dtype = language || /\.([^\.]+)$/i.exec(filename)?.[1] || ""; + let body = content; + if (/^(c|t)sv$/i.test(dtype)) { + const parsed = !/^\s*|/.test(content) && CSVTryParse(content); + if (parsed) { + body = dataToMarkdownTable(parsed); + fenceFormat = "none"; } - - const dtype = language || /\.([^\.]+)$/i.exec(filename)?.[1] || "" - let body = content - if (/^(c|t)sv$/i.test(dtype)) { - const parsed = !/^\s*|/.test(content) && CSVTryParse(content) - if (parsed) { - body = dataToMarkdownTable(parsed) - fenceFormat = "none" - } - } - body = norm(body, dtype) - const diffFormat = "" - //body.length > 500 && !prediction - // ? " preferred_output_format=CHANGELOG" - // : "" - - let res: string - if (name && fenceFormat === "xml") { - res = `\n<${name}${dtype ? ` lang="${dtype}"` : ""}${filename ? ` file="${filename}"` : ""}${schema ? ` schema=${schema}` : ""}${diffFormat}>\n${body}\n` - } else if (fenceFormat === "none") { - res = `\n${name ? name + ":\n" : ""}${body}\n` - } else { - const fence = - language === "markdown" || language === "mdx" - ? MARKDOWN_PROMPT_FENCE - : PROMPT_FENCE - let dfence = - /\.mdx?$/i.test(filename) || content?.includes(fence) - ? MARKDOWN_PROMPT_FENCE - : fence - while (dfence && body.includes(dfence)) { - dfence += "`" - } - res = - "\n" + - (name ? name + ":\n" : "") + - dfence + - dtype + - (filename ? ` file="${filename}"` : "") + - (schema ? ` schema=${schema}` : "") + - diffFormat + - "\n" + - body + - dfence + - "\n" + } + body = norm(body, dtype); + const diffFormat = ""; + //body.length > 500 && !prediction + // ? " preferred_output_format=CHANGELOG" + // : "" + + let res: string; + if (name && fenceFormat === "xml") { + res = `\n<${name}${dtype ? ` lang="${dtype}"` : ""}${filename ? ` file="${filename}"` : ""}${schema ? ` schema=${schema}` : ""}${diffFormat}>\n${body}\n`; + } else if (fenceFormat === "none") { + res = `\n${name ? name + ":\n" : ""}${body}\n`; + } else { + const fence = + language === "markdown" || language === "mdx" ? MARKDOWN_PROMPT_FENCE : PROMPT_FENCE; + let dfence = + /\.mdx?$/i.test(filename) || content?.includes(fence) ? MARKDOWN_PROMPT_FENCE : fence; + while (dfence && body.includes(dfence)) { + dfence += "`"; } - - return res + res = + "\n" + + (name ? name + ":\n" : "") + + dfence + + dtype + + (filename ? ` file="${filename}"` : "") + + (schema ? ` schema=${schema}` : "") + + diffFormat + + "\n" + + body + + dfence + + "\n"; + } + + return res; } async function renderDefDataNode(n: PromptDefDataNode): Promise { - const { name, headers, priority, cacheControl, query } = n - let data = n.resolved - let format = n.format - if ( - !format && - Array.isArray(data) && - data.length && - (headers?.length || haveSameKeysAndSimpleValues(data)) - ) - format = "csv" - else if (!format) format = "yaml" - - if (Array.isArray(data)) data = tidyData(data as object[], n) - else if ( - typeof data === "object" && - (n.sliceHead || n.sliceTail || n.sliceSample) - ) { - const entries = Object.entries(data) - const sliced = sliceData(entries, n) - data = Object.fromEntries(sliced) - } - if (query) data = await GROQEvaluate(query, data) - - let text: string - let lang: string - if (Array.isArray(data) && format === "csv") { - text = dataToMarkdownTable(data) - } else if (format === "json") { - text = JSON.stringify(data) - lang = "json" - } else { - text = YAMLStringify(data) - lang = "yaml" - } - - const value = lang - ? `<${name} lang="${lang}"> + const { name, headers, priority, cacheControl, query } = n; + let data = n.resolved; + let format = n.format; + if ( + !format && + Array.isArray(data) && + data.length && + (headers?.length || haveSameKeysAndSimpleValues(data)) + ) + format = "csv"; + else if (!format) format = "yaml"; + + if (Array.isArray(data)) data = tidyData(data as object[], n); + else if (typeof data === "object" && (n.sliceHead || n.sliceTail || n.sliceSample)) { + const entries = Object.entries(data); + const sliced = sliceData(entries, n); + data = Object.fromEntries(sliced); + } + if (query) data = await GROQEvaluate(query, data); + + let text: string; + let lang: string; + if (Array.isArray(data) && format === "csv") { + text = dataToMarkdownTable(data); + } else if (format === "json") { + text = JSON.stringify(data); + lang = "json"; + } else { + text = YAMLStringify(data); + lang = "yaml"; + } + + const value = lang + ? `<${name} lang="${lang}"> ${trimNewlines(text)} <${name}> ` - : `${name}: + : `${name}: ${trimNewlines(text)} -` - // TODO maxTokens does not work well with data - return value +`; + // TODO maxTokens does not work well with data + return value; } /** @@ -425,11 +401,11 @@ ${trimNewlines(text)} * @returns The created assistant node. */ export function createAssistantNode( - value: Awaitable, - options?: ContextExpansionOptions + value: Awaitable, + options?: ContextExpansionOptions, ): PromptAssistantNode { - assert(value !== undefined) - return { type: "assistant", value, ...(options || {}) } + assert(value !== undefined); + return { type: "assistant", value, ...(options || {}) }; } /** @@ -440,11 +416,11 @@ export function createAssistantNode( * @returns A system node object containing the specified content and options. */ export function createSystemNode( - value: Awaitable, - options?: ContextExpansionOptions + value: Awaitable, + options?: ContextExpansionOptions, ): PromptSystemNode { - assert(value !== undefined) - return { type: "system", value, ...(options || {}) } + assert(value !== undefined); + return { type: "system", value, ...(options || {}) }; } /** @@ -456,18 +432,18 @@ export function createSystemNode( * @returns The created string template node. */ export function createStringTemplateNode( - strings: TemplateStringsArray, - args: any[], - options?: ContextExpansionOptions + strings: TemplateStringsArray, + args: any[], + options?: ContextExpansionOptions, ): PromptStringTemplateNode { - assert(strings !== undefined) - return { - type: "stringTemplate", - strings, - args, - transforms: [], - ...(options || {}), - } + assert(strings !== undefined); + return { + type: "stringTemplate", + strings, + args, + transforms: [], + ...(options || {}), + }; } /** @@ -478,45 +454,38 @@ export function createStringTemplateNode( * @returns The created image node. */ export function createImageNode( - value: Awaitable, - options?: ContextExpansionOptions + value: Awaitable, + options?: ContextExpansionOptions, ): PromptImageNode { - assert(value !== undefined) - return { type: "image", value, ...(options || {}) } + assert(value !== undefined); + return { type: "image", value, ...(options || {}) }; } export function createFileImageNodes( - name: string, - file: WorkspaceFile, - defOptions?: DefImagesOptions, - options?: TraceOptions & CancellationOptions + name: string, + file: WorkspaceFile, + defOptions?: DefImagesOptions, + options?: TraceOptions & CancellationOptions, ): PromptNode[] { - const { trace, cancellationToken } = options || {} - const filename = - file.filename && !/^data:\/\//.test(file.filename) - ? file.filename - : undefined - return [ - name - ? createTextNode( - `<${name}${filename ? ` filename="${filename}"` : ``}>` - ) - : undefined, - createImageNode( - (async () => { - const encoded = await imageEncodeForLLM(file, { - ...(defOptions || {}), - cancellationToken, - trace, - }) - return { - filename: file.filename, - ...encoded, - } - })() - ), - name ? createTextNode(``) : undefined, - ].filter((n) => !!n) + const { trace, cancellationToken } = options || {}; + const filename = file.filename && !/^data:\/\//.test(file.filename) ? file.filename : undefined; + return [ + name ? createTextNode(`<${name}${filename ? ` filename="${filename}"` : ``}>`) : undefined, + createImageNode( + (async () => { + const encoded = await imageEncodeForLLM(file, { + ...(defOptions || {}), + cancellationToken, + trace, + }); + return { + filename: file.filename, + ...encoded, + }; + })(), + ), + name ? createTextNode(``) : undefined, + ].filter((n) => !!n); } /** @@ -528,45 +497,45 @@ export function createFileImageNodes( * - options: Optional configuration for the schema node. */ export function createSchemaNode( - name: string, - value: JSONSchema | ZodTypeLike, - options?: DefSchemaOptions + name: string, + value: JSONSchema | ZodTypeLike, + options?: DefSchemaOptions, ): PromptSchemaNode { - assert(!!name) - assert(value !== undefined) - // auto zod conversion - value = tryZodToJsonSchema(value as ZodTypeLike) ?? (value as JSONSchema) - return { type: "schema", name, value, options } + assert(!!name); + assert(value !== undefined); + // auto zod conversion + value = tryZodToJsonSchema(value as ZodTypeLike) ?? (value as JSONSchema); + return { type: "schema", name, value, options }; } // Function to create a function node. export function createToolNode( - name: string, - description: string, - parameters: JSONSchema, - impl: ChatFunctionHandler, - options: DefToolOptions, - generator: ChatGenerationContext + name: string, + description: string, + parameters: JSONSchema, + impl: ChatFunctionHandler, + options: DefToolOptions, + generator: ChatGenerationContext, ): PromptToolNode { - assert(!!name) - assert(!!description) - assert(parameters !== undefined) - assert(impl !== undefined) - return { - type: "tool", - name, - description: dedent(description), - parameters, - impl, - options, - generator, - } satisfies PromptToolNode + assert(!!name); + assert(!!description); + assert(parameters !== undefined); + assert(impl !== undefined); + return { + type: "tool", + name, + description: dedent(description), + parameters, + impl, + options, + generator, + } satisfies PromptToolNode; } // Function to create a file merge node. export function createFileMerge(fn: FileMergeHandler): PromptFileMergeNode { - assert(fn !== undefined) - return { type: "fileMerge", fn } + assert(fn !== undefined); + return { type: "fileMerge", fn }; } /** @@ -575,11 +544,9 @@ export function createFileMerge(fn: FileMergeHandler): PromptFileMergeNode { * @param fn - The handler function to process prompt outputs. Must not be undefined. Throws an error if undefined. * @returns An output processor node containing the handler function. */ -export function createOutputProcessor( - fn: PromptOutputProcessorHandler -): PromptOutputProcessorNode { - assert(fn !== undefined) - return { type: "outputProcessor", fn } +export function createOutputProcessor(fn: PromptOutputProcessorHandler): PromptOutputProcessorNode { + assert(fn !== undefined); + return { type: "outputProcessor", fn }; } /** @@ -587,10 +554,8 @@ export function createOutputProcessor( * @param participant - The chat participant to represent in the node. * @returns A node object with the participant's details. */ -export function createChatParticipant( - participant: ChatParticipant -): PromptChatParticipantNode { - return { type: "chatParticipant", participant } +export function createChatParticipant(participant: ChatParticipant): PromptChatParticipantNode { + return { type: "chatParticipant", participant }; } /** @@ -599,22 +564,22 @@ export function createChatParticipant( * @returns A file output node containing the specified output. */ export function createFileOutput(output: FileOutput): FileOutputNode { - return { type: "fileOutput", output } satisfies FileOutputNode + return { type: "fileOutput", output } satisfies FileOutputNode; } // Function to create an import template node. export function createImportTemplate( - files: ElementOrArray, - args?: Record, - options?: ImportTemplateOptions + files: ElementOrArray, + args?: Record, + options?: ImportTemplateOptions, ): PromptImportTemplate { - assert(!!files) - return { - type: "importTemplate", - files, - args: args || {}, - options, - } satisfies PromptImportTemplate + assert(!!files); + return { + type: "importTemplate", + files, + args: args || {}, + options, + } satisfies PromptImportTemplate; } /** @@ -626,156 +591,150 @@ export function createImportTemplate( * @returns An MCP server node configured with the provided details. */ export function createMcpServer( - id: string, - config: McpServerConfig, - options: DefToolOptions, - generator: ChatGenerationContext + id: string, + config: McpServerConfig, + options: DefToolOptions, + generator: ChatGenerationContext, ): PromptMcpServerNode { - return { - type: "mcpServer", - config: { ...config, generator, id, options }, - } satisfies PromptMcpServerNode + return { + type: "mcpServer", + config: { ...config, generator, id, options }, + } satisfies PromptMcpServerNode; } // Function to check if data objects have the same keys and simple values. function haveSameKeysAndSimpleValues(data: object[]): boolean { - if (data.length === 0) return true - const headers = Object.entries(data[0]) - return data.slice(1).every((obj) => { - const keys = Object.entries(obj) - return ( - headers.length === keys.length && - headers.every( - (h, i) => - keys[i][0] === h[0] && - /^(string|number|boolean|null|undefined)$/.test( - typeof keys[i][1] - ) - ) - ) - }) + if (data.length === 0) return true; + const headers = Object.entries(data[0]); + return data.slice(1).every((obj) => { + const keys = Object.entries(obj); + return ( + headers.length === keys.length && + headers.every( + (h, i) => + keys[i][0] === h[0] && /^(string|number|boolean|null|undefined)$/.test(typeof keys[i][1]), + ) + ); + }); } // Function to create a text node with data. export function createDefData( - name: string, - value: Awaitable, - options?: DefDataOptions + name: string, + value: Awaitable, + options?: DefDataOptions, ): PromptDefDataNode { - if (value === undefined) return undefined - return { - type: "defData", - name, - value, - ...(options || {}), - } + if (value === undefined) return undefined; + return { + type: "defData", + name, + value, + ...(options || {}), + }; } // Function to append a child node to a parent node. -export function appendChild( - parent: PromptNode, - ...children: PromptNode[] -): void { - if (!parent.children) { - parent.children = [] - } - parent.children.push(...children) +export function appendChild(parent: PromptNode, ...children: PromptNode[]): void { + if (!parent.children) { + parent.children = []; + } + parent.children.push(...children); } // Interface for visiting different types of prompt nodes. export interface PromptNodeVisitor { - node?: (node: PromptNode) => Awaitable // General node visitor - error?: (node: PromptNode) => Awaitable // Error handling visitor - afterNode?: (node: PromptNode) => Awaitable // Post node visitor - text?: (node: PromptTextNode) => Awaitable // Text node visitor - def?: (node: PromptDefNode) => Awaitable // Definition node visitor - defData?: (node: PromptDefDataNode) => Awaitable // Definition data node visitor - image?: (node: PromptImageNode) => Awaitable // Image node visitor - schema?: (node: PromptSchemaNode) => Awaitable // Schema node visitor - tool?: (node: PromptToolNode) => Awaitable // Function node visitor - fileMerge?: (node: PromptFileMergeNode) => Awaitable // File merge node visitor - stringTemplate?: (node: PromptStringTemplateNode) => Awaitable // String template node visitor - outputProcessor?: (node: PromptOutputProcessorNode) => Awaitable // Output processor node visitor - assistant?: (node: PromptAssistantNode) => Awaitable // Assistant node visitor - system?: (node: PromptSystemNode) => Awaitable // System node visitor - chatParticipant?: (node: PromptChatParticipantNode) => Awaitable // Chat participant node visitor - fileOutput?: (node: FileOutputNode) => Awaitable // File output node visitor - importTemplate?: (node: PromptImportTemplate) => Awaitable // Import template node visitor - mcpServer?: (node: PromptMcpServerNode) => Awaitable // Mcp server node visitor + node?: (node: PromptNode) => Awaitable; // General node visitor + error?: (node: PromptNode) => Awaitable; // Error handling visitor + afterNode?: (node: PromptNode) => Awaitable; // Post node visitor + text?: (node: PromptTextNode) => Awaitable; // Text node visitor + def?: (node: PromptDefNode) => Awaitable; // Definition node visitor + defData?: (node: PromptDefDataNode) => Awaitable; // Definition data node visitor + image?: (node: PromptImageNode) => Awaitable; // Image node visitor + schema?: (node: PromptSchemaNode) => Awaitable; // Schema node visitor + tool?: (node: PromptToolNode) => Awaitable; // Function node visitor + fileMerge?: (node: PromptFileMergeNode) => Awaitable; // File merge node visitor + stringTemplate?: (node: PromptStringTemplateNode) => Awaitable; // String template node visitor + outputProcessor?: (node: PromptOutputProcessorNode) => Awaitable; // Output processor node visitor + assistant?: (node: PromptAssistantNode) => Awaitable; // Assistant node visitor + system?: (node: PromptSystemNode) => Awaitable; // System node visitor + chatParticipant?: (node: PromptChatParticipantNode) => Awaitable; // Chat participant node visitor + fileOutput?: (node: FileOutputNode) => Awaitable; // File output node visitor + importTemplate?: (node: PromptImportTemplate) => Awaitable; // Import template node visitor + mcpServer?: (node: PromptMcpServerNode) => Awaitable; // Mcp server node visitor } // Function to visit nodes in the prompt tree. export async function visitNode(node: PromptNode, visitor: PromptNodeVisitor) { - await visitor.node?.(node) - switch (node.type) { - case "text": - await visitor.text?.(node as PromptTextNode) - break - case "def": - await visitor.def?.(node as PromptDefNode) - break - case "defData": - await visitor.defData?.(node as PromptDefDataNode) - break - case "image": - await visitor.image?.(node as PromptImageNode) - break - case "schema": - await visitor.schema?.(node as PromptSchemaNode) - break - case "tool": - await visitor.tool?.(node as PromptToolNode) - break - case "fileMerge": - await visitor.fileMerge?.(node as PromptFileMergeNode) - break - case "outputProcessor": - await visitor.outputProcessor?.(node as PromptOutputProcessorNode) - break - case "stringTemplate": - await visitor.stringTemplate?.(node as PromptStringTemplateNode) - break - case "assistant": - await visitor.assistant?.(node as PromptAssistantNode) - break - case "system": - await visitor.system?.(node as PromptSystemNode) - break - case "chatParticipant": - await visitor.chatParticipant?.(node as PromptChatParticipantNode) - break - case "fileOutput": - await visitor.fileOutput?.(node as FileOutputNode) - break - case "importTemplate": - await visitor.importTemplate?.(node as PromptImportTemplate) - break - case "mcpServer": - await visitor.mcpServer?.(node as PromptMcpServerNode) - break + await visitor.node?.(node); + switch (node.type) { + case "text": + await visitor.text?.(node as PromptTextNode); + break; + case "def": + await visitor.def?.(node as PromptDefNode); + break; + case "defData": + await visitor.defData?.(node as PromptDefDataNode); + break; + case "image": + await visitor.image?.(node as PromptImageNode); + break; + case "schema": + await visitor.schema?.(node as PromptSchemaNode); + break; + case "tool": + await visitor.tool?.(node as PromptToolNode); + break; + case "fileMerge": + await visitor.fileMerge?.(node as PromptFileMergeNode); + break; + case "outputProcessor": + await visitor.outputProcessor?.(node as PromptOutputProcessorNode); + break; + case "stringTemplate": + await visitor.stringTemplate?.(node as PromptStringTemplateNode); + break; + case "assistant": + await visitor.assistant?.(node as PromptAssistantNode); + break; + case "system": + await visitor.system?.(node as PromptSystemNode); + break; + case "chatParticipant": + await visitor.chatParticipant?.(node as PromptChatParticipantNode); + break; + case "fileOutput": + await visitor.fileOutput?.(node as FileOutputNode); + break; + case "importTemplate": + await visitor.importTemplate?.(node as PromptImportTemplate); + break; + case "mcpServer": + await visitor.mcpServer?.(node as PromptMcpServerNode); + break; + } + if (node.error) visitor.error?.(node); + if (!node.error && !node.deleted && node.children) { + for (const child of node.children) { + await visitNode(child, visitor); } - if (node.error) visitor.error?.(node) - if (!node.error && !node.deleted && node.children) { - for (const child of node.children) { - await visitNode(child, visitor) - } - node.children = node.children?.filter((c) => !c.deleted) - } - await visitor.afterNode?.(node) + node.children = node.children?.filter((c) => !c.deleted); + } + await visitor.afterNode?.(node); } interface PromptNodeRender { - images: PromptImage[] // Images included in the prompt - errors: unknown[] // Errors encountered during rendering - schemas: Record // Schemas included in the prompt - tools: ToolCallback[] // tools included in the prompt - fileMerges: FileMergeHandler[] // File merge handlers - outputProcessors: PromptOutputProcessorHandler[] // Output processor handlers - chatParticipants: ChatParticipant[] // Chat participants - messages: ChatCompletionMessageParam[] // Messages for chat completion - fileOutputs: FileOutput[] // File outputs - prediction: PromptPrediction // predicted output for the prompt - disposables: AsyncDisposable[] // Disposables + images: PromptImage[]; // Images included in the prompt + errors: unknown[]; // Errors encountered during rendering + schemas: Record; // Schemas included in the prompt + tools: ToolCallback[]; // tools included in the prompt + fileMerges: FileMergeHandler[]; // File merge handlers + outputProcessors: PromptOutputProcessorHandler[]; // Output processor handlers + chatParticipants: ChatParticipant[]; // Chat participants + messages: ChatCompletionMessageParam[]; // Messages for chat completion + fileOutputs: FileOutput[]; // File outputs + prediction: PromptPrediction; // predicted output for the prompt + disposables: AsyncDisposable[]; // Disposables } /** @@ -785,515 +744,460 @@ interface PromptNodeRender { * @returns The default fence format. */ export function resolveFenceFormat(modelId: string): FenceFormat { - return DEFAULT_FENCE_FORMAT + return DEFAULT_FENCE_FORMAT; } // Function to resolve a prompt node. async function resolvePromptNode( - encoder: TokenEncoder, - root: PromptNode, - options: TraceOptions + encoder: TokenEncoder, + root: PromptNode, + options: TraceOptions, ): Promise<{ errors: number }> { - const { trace } = options || {} - let err = 0 - const names = new Set() - const uniqueName = (n_: string) => { - let i = 1 - let n = n_ - while (names.has(n)) { - n = `${n_}${i++}` - } - names.add(n) - return n + const { trace } = options || {}; + let err = 0; + const names = new Set(); + const uniqueName = (n_: string) => { + let i = 1; + let n = n_; + while (names.has(n)) { + n = `${n_}${i++}`; } - - await visitNode(root, { - error: (node) => { - logError(node.error) - err++ - }, - text: async (n) => { - try { - const value = await n.value - n.resolved = n.preview = value - n.tokens = approximateTokens(value) - } catch (e) { - n.error = e - } - }, - def: async (n) => { - try { - names.add(n.name) - const value = await n.value - n.resolved = value - n.resolved.content = extractRange(n.resolved.content, n) - const rendered = renderDefNode(n) - n.preview = rendered - n.tokens = approximateTokens(rendered) - n.children = [createTextNode(rendered, cloneContextFields(n))] - } catch (e) { - n.error = e - } - }, - defData: async (n) => { - try { - names.add(n.name) - const value = await n.value - n.resolved = value - const rendered = await renderDefDataNode(n) - n.preview = rendered - n.tokens = approximateTokens(rendered) - n.children = [createTextNode(rendered, cloneContextFields(n))] - } catch (e) { - n.error = e - } - }, - system: async (n) => { - try { - const value = await n.value - n.resolved = n.preview = value - n.tokens = approximateTokens(value) - } catch (e) { - n.error = e - } - }, - assistant: async (n) => { - try { - const value = await n.value - n.resolved = n.preview = value - n.tokens = approximateTokens(value) - } catch (e) { - n.error = e - } - }, - stringTemplate: async (n) => { - const { strings, args } = n - try { - const resolvedStrings = await strings - const resolvedArgs = [] - - for (const arg of args) { - try { - let ra: any = await arg - if (typeof ra === "function") ra = ra() - ra = await ra - - // Render files - if (typeof ra === "object") { - if (ra.filename) { - n.children = [ - ...(n.children ?? []), - createDef(ra.filename, ra, { - ignoreEmpty: true, - maxTokens: TEMPLATE_ARG_FILE_MAX_TOKENS, - }), - ] - ra = ra.filename - } else if ( - // env.files - Array.isArray(ra) && - ra.every( - (r) => typeof r === "object" && r.filename - ) - ) { - // env.files - const fname = uniqueName("FILES") - n.children = n.children ?? [] - for (const r of ra) { - n.children.push( - createDef(fname, r, { - ignoreEmpty: true, - maxTokens: - TEMPLATE_ARG_FILE_MAX_TOKENS, - }) - ) - } - ra = fname - } else { - const dname = uniqueName("DATA") - n.children = [ - ...(n.children ?? []), - createDefData(dname, ra, { - sliceSample: - TEMPLATE_ARG_DATA_SLICE_SAMPLE, - }), - ] - ra = dname - } - } - resolvedArgs.push(ra ?? "") - } catch (e) { - n.error = e - resolvedArgs.push(errorMessage(e)) - } - } - let value = dedent(resolvedStrings, ...resolvedArgs) - if (n.transforms?.length) - for (const transform of n.transforms) - value = await transform(value) - n.resolved = n.preview = value - n.tokens = approximateTokens(value) - } catch (e) { - n.error = e - } - }, - importTemplate: async (n) => { - try { - const { files, args, options } = n - n.children = [] - n.preview = "" - const fs: WorkspaceFile[] = await expandFileOrWorkspaceFiles( - arrayify(files) - ) - if (fs.length === 0) - throw new Error(`No files found for import: ${files}`) - - const resolvedArgs: Record = - {} - for (const argkv of Object.entries(args || {})) { - let [argk, argv] = argkv - if (typeof argv === "function") argv = argv() - resolvedArgs[argk] = await argv + names.add(n); + return n; + }; + + await visitNode(root, { + error: (node) => { + logError(node.error); + err++; + }, + text: async (n) => { + try { + const value = await n.value; + n.resolved = n.preview = value; + n.tokens = approximateTokens(value); + } catch (e) { + n.error = e; + } + }, + def: async (n) => { + try { + names.add(n.name); + const value = await n.value; + n.resolved = value; + n.resolved.content = extractRange(n.resolved.content, n); + const rendered = renderDefNode(n); + n.preview = rendered; + n.tokens = approximateTokens(rendered); + n.children = [createTextNode(rendered, cloneContextFields(n))]; + } catch (e) { + n.error = e; + } + }, + defData: async (n) => { + try { + names.add(n.name); + const value = await n.value; + n.resolved = value; + const rendered = await renderDefDataNode(n); + n.preview = rendered; + n.tokens = approximateTokens(rendered); + n.children = [createTextNode(rendered, cloneContextFields(n))]; + } catch (e) { + n.error = e; + } + }, + system: async (n) => { + try { + const value = await n.value; + n.resolved = n.preview = value; + n.tokens = approximateTokens(value); + } catch (e) { + n.error = e; + } + }, + assistant: async (n) => { + try { + const value = await n.value; + n.resolved = n.preview = value; + n.tokens = approximateTokens(value); + } catch (e) { + n.error = e; + } + }, + stringTemplate: async (n) => { + const { strings, args } = n; + try { + const resolvedStrings = await strings; + const resolvedArgs = []; + + for (const arg of args) { + try { + let ra: any = await arg; + if (typeof ra === "function") ra = ra(); + ra = await ra; + + // Render files + if (typeof ra === "object") { + if (ra.filename) { + n.children = [ + ...(n.children ?? []), + createDef(ra.filename, ra, { + ignoreEmpty: true, + maxTokens: TEMPLATE_ARG_FILE_MAX_TOKENS, + }), + ]; + ra = ra.filename; + } else if ( + // env.files + Array.isArray(ra) && + ra.every((r) => typeof r === "object" && r.filename) + ) { + // env.files + const fname = uniqueName("FILES"); + n.children = n.children ?? []; + for (const r of ra) { + n.children.push( + createDef(fname, r, { + ignoreEmpty: true, + maxTokens: TEMPLATE_ARG_FILE_MAX_TOKENS, + }), + ); } - for (const f of fs) { - await resolveFileContent(f, { - ...(options || {}), - trace, - }) - if (PROMPTY_REGEX.test(f.filename)) - await resolveImportPrompty(n, f, resolvedArgs, options) - else { - const rendered = await interpolateVariables( - f.content, - resolvedArgs, - n.options - ) - n.children.push(createTextNode(rendered)) - n.preview += rendered + "\n" - } - } - n.tokens = approximateTokens(n.preview) - } catch (e) { - n.error = e - } - }, - image: async (n) => { - try { - const v = await n.value - n.resolved = v - n.preview = "image" // TODO - } catch (e) { - n.error = e + ra = fname; + } else { + const dname = uniqueName("DATA"); + n.children = [ + ...(n.children ?? []), + createDefData(dname, ra, { + sliceSample: TEMPLATE_ARG_DATA_SLICE_SAMPLE, + }), + ]; + ra = dname; + } } - }, - }) - return { errors: err } + resolvedArgs.push(ra ?? ""); + } catch (e) { + n.error = e; + resolvedArgs.push(errorMessage(e)); + } + } + let value = dedent(resolvedStrings, ...resolvedArgs); + if (n.transforms?.length) + for (const transform of n.transforms) value = await transform(value); + n.resolved = n.preview = value; + n.tokens = approximateTokens(value); + } catch (e) { + n.error = e; + } + }, + importTemplate: async (n) => { + try { + const { files, args, options } = n; + n.children = []; + n.preview = ""; + const fs: WorkspaceFile[] = await expandFileOrWorkspaceFiles(arrayify(files)); + if (fs.length === 0) throw new Error(`No files found for import: ${files}`); + + const resolvedArgs: Record = {}; + for (const argkv of Object.entries(args || {})) { + let [argk, argv] = argkv; + if (typeof argv === "function") argv = argv(); + resolvedArgs[argk] = await argv; + } + for (const f of fs) { + await resolveFileContent(f, { + ...(options || {}), + trace, + }); + if (PROMPTY_REGEX.test(f.filename)) + await resolveImportPrompty(n, f, resolvedArgs, options); + else { + const rendered = await interpolateVariables(f.content, resolvedArgs, n.options); + n.children.push(createTextNode(rendered)); + n.preview += rendered + "\n"; + } + } + n.tokens = approximateTokens(n.preview); + } catch (e) { + n.error = e; + } + }, + image: async (n) => { + try { + const v = await n.value; + n.resolved = v; + n.preview = "image"; // TODO + } catch (e) { + n.error = e; + } + }, + }); + return { errors: err }; } async function resolveImportPrompty( - n: PromptImportTemplate, - f: WorkspaceFile, - args: Record, - options: ImportTemplateOptions + n: PromptImportTemplate, + f: WorkspaceFile, + args: Record, + options: ImportTemplateOptions, ) { - const { allowExtraArguments } = options || {} - const { messages, meta } = promptyParse(f.filename, f.content) - const { parameters } = meta - args = args || {} - - const extra = Object.keys(args).find((arg) => !parameters?.[arg]) - if (extra) { - dbg(`extra argument ${extra} in ${f.filename}`) - if (!allowExtraArguments) { - const msg = `Extra input argument '${extra}'.` - throw new Error(msg) - } - } - if (parameters) { - const missings = Object.keys(parameters).filter( - (p) => args[p] === undefined - ) - if (missings.length > 0) - throw new Error( - `Missing input argument for '${missings.join(", ")}' in ${f.filename}` - ) - } - - for (const message of messages) { - const txt = jinjaRenderChatMessage(message, args) - if (message.role === "assistant") - n.children.push(createAssistantNode(txt)) - else if (message.role === "system") - n.children.push(createSystemNode(txt)) - else n.children.push(createTextNode(txt)) - n.preview += txt + "\n" + const { allowExtraArguments } = options || {}; + const { messages, meta } = promptyParse(f.filename, f.content); + const { parameters } = meta; + args = args || {}; + + const extra = Object.keys(args).find((arg) => !parameters?.[arg]); + if (extra) { + dbg(`extra argument ${extra} in ${f.filename}`); + if (!allowExtraArguments) { + const msg = `Extra input argument '${extra}'.`; + throw new Error(msg); } + } + if (parameters) { + const missings = Object.keys(parameters).filter((p) => args[p] === undefined); + if (missings.length > 0) + throw new Error(`Missing input argument for '${missings.join(", ")}' in ${f.filename}`); + } + + for (const message of messages) { + const txt = jinjaRenderChatMessage(message, args); + if (message.role === "assistant") n.children.push(createAssistantNode(txt)); + else if (message.role === "system") n.children.push(createSystemNode(txt)); + else n.children.push(createTextNode(txt)); + n.preview += txt + "\n"; + } } // Function to handle truncation of prompt nodes based on token limits. async function truncatePromptNode( - encoder: TokenEncoder, - node: PromptNode, - options?: TraceOptions + encoder: TokenEncoder, + node: PromptNode, + options?: TraceOptions, ): Promise { - const { trace } = options || {} - let truncated = false - - const cap = (n: { - error?: unknown - resolved?: string - tokens?: number - maxTokens?: number - preview?: string - }) => { - if ( - !n.error && - n.resolved !== undefined && - n.maxTokens !== undefined && - n.tokens > n.maxTokens - ) { - n.resolved = n.preview = truncateTextToTokens( - n.resolved, - n.maxTokens, - encoder, - { tokens: n.tokens } - ) - n.tokens = approximateTokens(n.resolved) - truncated = true - trace.log( - `truncated text to ${n.tokens} tokens (max ${n.maxTokens})` - ) - } + const { trace } = options || {}; + let truncated = false; + + const cap = (n: { + error?: unknown; + resolved?: string; + tokens?: number; + maxTokens?: number; + preview?: string; + }) => { + if ( + !n.error && + n.resolved !== undefined && + n.maxTokens !== undefined && + n.tokens > n.maxTokens + ) { + n.resolved = n.preview = truncateTextToTokens(n.resolved, n.maxTokens, encoder, { + tokens: n.tokens, + }); + n.tokens = approximateTokens(n.resolved); + truncated = true; + trace.log(`truncated text to ${n.tokens} tokens (max ${n.maxTokens})`); } + }; - const capDef = (n: PromptDefNode) => { - if ( - !n.error && - n.resolved !== undefined && - n.maxTokens !== undefined && - n.tokens > n.maxTokens - ) { - n.resolved.content = truncateTextToTokens( - n.resolved.content, - n.maxTokens, - encoder, - { - tokens: n.tokens, - } - ) - n.tokens = approximateTokens(n.resolved.content) - const rendered = renderDefNode(n) - n.preview = rendered - n.children = [createTextNode(rendered, cloneContextFields(n))] - truncated = true - trace.log( - `truncated def ${n.name} to ${n.tokens} tokens (max ${n.maxTokens})` - ) - } + const capDef = (n: PromptDefNode) => { + if ( + !n.error && + n.resolved !== undefined && + n.maxTokens !== undefined && + n.tokens > n.maxTokens + ) { + n.resolved.content = truncateTextToTokens(n.resolved.content, n.maxTokens, encoder, { + tokens: n.tokens, + }); + n.tokens = approximateTokens(n.resolved.content); + const rendered = renderDefNode(n); + n.preview = rendered; + n.children = [createTextNode(rendered, cloneContextFields(n))]; + truncated = true; + trace.log(`truncated def ${n.name} to ${n.tokens} tokens (max ${n.maxTokens})`); } + }; - await visitNode(node, { - text: cap, - assistant: cap, - stringTemplate: cap, - def: capDef, - }) + await visitNode(node, { + text: cap, + assistant: cap, + stringTemplate: cap, + def: capDef, + }); - return truncated + return truncated; } // Function to adjust token limits for nodes with flexibility. async function flexPromptNode( - root: PromptNode, - options?: { flexTokens: number } & TraceOptions + root: PromptNode, + options?: { flexTokens: number } & TraceOptions, ): Promise { - const PRIORITY_DEFAULT = 0 - - const { trace, flexTokens } = options || {} - - let log = "" - // Collect all nodes - const nodes: PromptNode[] = [] - await visitNode(root, { - node: (n) => { - nodes.push(n) - }, - }) - const totalTokens = nodes.reduce( - (total, node) => total + (node.tokens ?? 0), - 0 - ) - - if (totalTokens <= flexTokens) { - // No need to flex - return - } - - // Inspired from priompt, prompt-tsx, gpt-4 - // Sort by priority - nodes.sort( - (a, b) => - (a.priority ?? PRIORITY_DEFAULT) - (b.priority ?? PRIORITY_DEFAULT) - ) - const flexNodes = nodes.filter((n) => n.flex !== undefined) - const totalFlexTokens = flexNodes.reduce( - (total, node) => total + (node.tokens ?? 0), - 0 - ) - - // checking flexNodes sizes - if (totalFlexTokens <= flexTokens) { - return - } - - const totalFlex = flexNodes.reduce((total, node) => total + node.flex, 0) - const totalReserve = 0 - const totalRemaining = Math.max(0, flexTokens - totalReserve) - for (const node of flexNodes) { - const proportion = node.flex / totalFlex - const tokenBudget = Math.min( - node.maxTokens ?? Infinity, - Math.floor(totalRemaining * proportion) - ) - node.maxTokens = tokenBudget - log += `- flexed ${node.type} ${node.name || ""} to ${tokenBudget} tokens\n` - } - if (log) trace?.details(`flexing`, log) + const PRIORITY_DEFAULT = 0; + + const { trace, flexTokens } = options || {}; + + let log = ""; + // Collect all nodes + const nodes: PromptNode[] = []; + await visitNode(root, { + node: (n) => { + nodes.push(n); + }, + }); + const totalTokens = nodes.reduce((total, node) => total + (node.tokens ?? 0), 0); + + if (totalTokens <= flexTokens) { + // No need to flex + return; + } + + // Inspired from priompt, prompt-tsx, gpt-4 + // Sort by priority + nodes.sort((a, b) => (a.priority ?? PRIORITY_DEFAULT) - (b.priority ?? PRIORITY_DEFAULT)); + const flexNodes = nodes.filter((n) => n.flex !== undefined); + const totalFlexTokens = flexNodes.reduce((total, node) => total + (node.tokens ?? 0), 0); + + // checking flexNodes sizes + if (totalFlexTokens <= flexTokens) { + return; + } + + const totalFlex = flexNodes.reduce((total, node) => total + node.flex, 0); + const totalReserve = 0; + const totalRemaining = Math.max(0, flexTokens - totalReserve); + for (const node of flexNodes) { + const proportion = node.flex / totalFlex; + const tokenBudget = Math.min( + node.maxTokens ?? Infinity, + Math.floor(totalRemaining * proportion), + ); + node.maxTokens = tokenBudget; + log += `- flexed ${node.type} ${node.name || ""} to ${tokenBudget} tokens\n`; + } + if (log) trace?.details(`flexing`, log); } // Function to trace the prompt node structure for debugging. async function tracePromptNode( - trace: MarkdownTrace, - root: PromptNode, - options?: { label: string } + trace: MarkdownTrace, + root: PromptNode, + options?: { label: string }, ) { - if (!trace || !root.children?.length) return - - await visitNode(root, { - node: (n) => { - const error = errorMessage(n.error) - let title = toStringList( - n.type || `🌳 promptdom ${options?.label || ""}`, - n.priority ? `#${n.priority}` : undefined - ) - const value = toStringList( - n.tokens - ? `${n.tokens}${n.maxTokens ? `/${n.maxTokens}` : ""}t` - : undefined, - error - ) - if (value.length > 0) title += `: ${value}` - if (n.children?.length || n.preview) { - trace.startDetails(title, { - success: n.error ? false : undefined, - }) - if (n.preview) - trace.fence( - ellipse(n.preview, PROMPTDOM_PREVIEW_MAX_LENGTH), - "markdown" - ) - } else trace.resultItem(!n.error, title) - if (n.error) trace.error(undefined, n.error) - }, - afterNode: (n) => { - if (n.children?.length || n.preview) trace.endDetails() - }, - }) + if (!trace || !root.children?.length) return; + + await visitNode(root, { + node: (n) => { + const error = errorMessage(n.error); + let title = toStringList( + n.type || `🌳 promptdom ${options?.label || ""}`, + n.priority ? `#${n.priority}` : undefined, + ); + const value = toStringList( + n.tokens ? `${n.tokens}${n.maxTokens ? `/${n.maxTokens}` : ""}t` : undefined, + error, + ); + if (value.length > 0) title += `: ${value}`; + if (n.children?.length || n.preview) { + trace.startDetails(title, { + success: n.error ? false : undefined, + }); + if (n.preview) trace.fence(ellipse(n.preview, PROMPTDOM_PREVIEW_MAX_LENGTH), "markdown"); + } else trace.resultItem(!n.error, title); + if (n.error) trace.error(undefined, n.error); + }, + afterNode: (n) => { + if (n.children?.length || n.preview) trace.endDetails(); + }, + }); } -async function validateSafetyPromptNode( - trace: MarkdownTrace, - root: PromptNode -) { - let mod = false - let _contentSafety: ContentSafety - - const resolveContentSafety = async () => { - if (!_contentSafety) - _contentSafety = (await runtimeHost.contentSafety(undefined, { - trace, - })) || { id: undefined } - return _contentSafety.detectPromptInjection - } - - await visitNode(root, { - def: async (n) => { - if (!n.detectPromptInjection || !n.resolved?.content) return - - const detectPromptInjectionFn = await resolveContentSafety() - if ( - (!detectPromptInjectionFn && - n.detectPromptInjection === true) || - n.detectPromptInjection === "always" - ) - throw new Error("content safety service not available") - const { attackDetected } = - (await detectPromptInjectionFn?.(n.resolved)) || {} - if (attackDetected) { - mod = true - n.resolved = { - filename: n.resolved.filename, - content: SANITIZED_PROMPT_INJECTION, - } - n.preview = SANITIZED_PROMPT_INJECTION - n.children = [] - n.error = `safety: prompt injection detected` - trace.error( - `safety: prompt injection detected in ${n.resolved.filename}` - ) - } - }, - defData: async (n) => { - if (!n.detectPromptInjection || !n.preview) return - - const detectPromptInjectionFn = await resolveContentSafety() - if ( - (!detectPromptInjectionFn && - n.detectPromptInjection === true) || - n.detectPromptInjection === "always" - ) - throw new Error("content safety service not available") - const { attackDetected } = - (await detectPromptInjectionFn?.(n.preview)) || {} - if (attackDetected) { - mod = true - n.children = [] - n.preview = SANITIZED_PROMPT_INJECTION - n.error = `safety: prompt injection detected` - trace.error(`safety: prompt injection detected in data`) - } - }, - }) - return mod +async function validateSafetyPromptNode(trace: MarkdownTrace, root: PromptNode) { + let mod = false; + let _contentSafety: ContentSafety; + + const resolveContentSafety = async () => { + if (!_contentSafety) + _contentSafety = (await runtimeHost.contentSafety(undefined, { + trace, + })) || { id: undefined }; + return _contentSafety.detectPromptInjection; + }; + + await visitNode(root, { + def: async (n) => { + if (!n.detectPromptInjection || !n.resolved?.content) return; + + const detectPromptInjectionFn = await resolveContentSafety(); + if ( + (!detectPromptInjectionFn && n.detectPromptInjection === true) || + n.detectPromptInjection === "always" + ) + throw new Error("content safety service not available"); + const { attackDetected } = (await detectPromptInjectionFn?.(n.resolved)) || {}; + if (attackDetected) { + mod = true; + n.resolved = { + filename: n.resolved.filename, + content: SANITIZED_PROMPT_INJECTION, + }; + n.preview = SANITIZED_PROMPT_INJECTION; + n.children = []; + n.error = `safety: prompt injection detected`; + trace.error(`safety: prompt injection detected in ${n.resolved.filename}`); + } + }, + defData: async (n) => { + if (!n.detectPromptInjection || !n.preview) return; + + const detectPromptInjectionFn = await resolveContentSafety(); + if ( + (!detectPromptInjectionFn && n.detectPromptInjection === true) || + n.detectPromptInjection === "always" + ) + throw new Error("content safety service not available"); + const { attackDetected } = (await detectPromptInjectionFn?.(n.preview)) || {}; + if (attackDetected) { + mod = true; + n.children = []; + n.preview = SANITIZED_PROMPT_INJECTION; + n.error = `safety: prompt injection detected`; + trace.error(`safety: prompt injection detected in data`); + } + }, + }); + return mod; } async function deduplicatePromptNode(trace: MarkdownTrace, root: PromptNode) { - let mod = false - - const defs = new Set() - await visitNode(root, { - def: async (n) => { - const key = await hash(n) - if (defs.has(key)) { - trace.log(`duplicate definition and content: ${n.name}`) - n.deleted = true - mod = true - } else { - defs.add(key) - } - }, - defData: async (n) => { - const key = await hash(n) - if (defs.has(key)) { - trace.log(`duplicate definition and content: ${n.name}`) - n.deleted = true - mod = true - } else { - defs.add(key) - } - }, - }) - return mod + let mod = false; + + const defs = new Set(); + await visitNode(root, { + def: async (n) => { + const key = await hash(n); + if (defs.has(key)) { + trace.log(`duplicate definition and content: ${n.name}`); + n.deleted = true; + mod = true; + } else { + defs.add(key); + } + }, + defData: async (n) => { + const key = await hash(n); + if (defs.has(key)) { + trace.log(`duplicate definition and content: ${n.name}`); + n.deleted = true; + mod = true; + } else { + defs.add(key); + } + }, + }); + return mod; } /** @@ -1312,226 +1216,211 @@ async function deduplicatePromptNode(trace: MarkdownTrace, root: PromptNode) { * - A rendered prompt node with associated metadata, messages, resources, tools, errors, disposables, schemas, images, file outputs, and prediction. */ export async function renderPromptNode( - modelId: string, - node: PromptNode, - options?: ModelTemplateOptions & TraceOptions & CancellationOptions + modelId: string, + node: PromptNode, + options?: ModelTemplateOptions & TraceOptions & CancellationOptions, ): Promise { - const { trace, flexTokens } = options || {} - const { encode: encoder } = await resolveTokenEncoder(modelId) - - let m = measure("prompt.dom.resolve") - await resolvePromptNode(encoder, node, options) - await tracePromptNode(trace, node) - m() - - m = measure("prompt.dom.deduplicate") - if (await deduplicatePromptNode(trace, node)) - await tracePromptNode(trace, node, { label: "deduplicate" }) - m() - - m = measure("prompt.dom.flex") - if (flexTokens) - await flexPromptNode(node, { - ...options, - flexTokens, - }) - m() - - m = measure("prompt.dom.truncate") - const truncated = await truncatePromptNode(encoder, node, options) - if (truncated) await tracePromptNode(trace, node, { label: "truncated" }) - m() - - m = measure("prompt.dom.validate") - const safety = await validateSafetyPromptNode(trace, node) - if (safety) await tracePromptNode(trace, node, { label: "safety" }) - m() - - const messages: ChatCompletionMessageParam[] = [] - const appendSystem = (content: string, options: ContextExpansionOptions) => - appendSystemMessage(messages, content, options) - const appendUser = ( - content: string | PromptImage, - options: ContextExpansionOptions - ) => appendUserMessage(messages, content, options) - const appendAssistant = ( - content: string, - options: ContextExpansionOptions - ) => appendAssistantMessage(messages, content, options) - - const images: PromptImage[] = [] - const errors: unknown[] = [] - const schemas: Record = {} - const tools: ToolCallback[] = [] - const fileMerges: FileMergeHandler[] = [] - const outputProcessors: PromptOutputProcessorHandler[] = [] - const chatParticipants: ChatParticipant[] = [] - const fileOutputs: FileOutput[] = [] - const mcpServers: McpServerConfig[] = [] - const disposables: AsyncDisposable[] = [] - let prediction: PromptPrediction - - m = measure("prompt.dom.render") - await visitNode(node, { - error: (n) => { - errors.push(n.error) - }, - text: async (n) => { - if (n.resolved !== undefined) appendUser(n.resolved, n) - else if (typeof n.value === "string") appendUser(n.value, n) - }, - def: async (n) => { - const value = n.resolved - if (value !== undefined) { - if (n.prediction) { - if (prediction) n.error = "duplicate prediction" - else - prediction = { - type: "content", - content: extractRange(value.content, n), - } - } - } - }, - assistant: async (n) => { - const value = await n.resolved - if (value != undefined) appendAssistant(value, n) - }, - system: async (n) => { - const value = await n.resolved - if (value != undefined) appendSystem(value, n) - }, - stringTemplate: async (n) => { - const value = n.resolved - const role = n.role || "user" - if (value != undefined) { - if (role === "system") appendSystem(value, n) - else if (role === "assistant") appendAssistant(value, n) - else appendUser(value, n) - } - }, - image: async (n) => { - const value = n.resolved - if (value?.url) { - images.push(value) - appendUser(value, n) - } - }, - schema: (n) => { - const { name: schemaName, value: schema, options } = n - if (schemas[schemaName]) - trace.error("duplicate schema name: " + schemaName) - schemas[schemaName] = schema - const { format = SCHEMA_DEFAULT_FORMAT } = options || {} - let schemaText: string - switch (format) { - case "json": - schemaText = JSON.stringify(schema, null, 2) - break - case "yaml": - schemaText = YAMLStringify(schema) - break - default: - schemaText = JSONSchemaStringifyToTypeScript(schema, { - typeName: schemaName, - }) - break - } - const text = `<${schemaName} lang="${format}-schema"> + const { trace, flexTokens } = options || {}; + const { encode: encoder } = await resolveTokenEncoder(modelId); + + let m = measure("prompt.dom.resolve"); + await resolvePromptNode(encoder, node, options); + await tracePromptNode(trace, node); + m(); + + m = measure("prompt.dom.deduplicate"); + if (await deduplicatePromptNode(trace, node)) + await tracePromptNode(trace, node, { label: "deduplicate" }); + m(); + + m = measure("prompt.dom.flex"); + if (flexTokens) + await flexPromptNode(node, { + ...options, + flexTokens, + }); + m(); + + m = measure("prompt.dom.truncate"); + const truncated = await truncatePromptNode(encoder, node, options); + if (truncated) await tracePromptNode(trace, node, { label: "truncated" }); + m(); + + m = measure("prompt.dom.validate"); + const safety = await validateSafetyPromptNode(trace, node); + if (safety) await tracePromptNode(trace, node, { label: "safety" }); + m(); + + const messages: ChatCompletionMessageParam[] = []; + const appendSystem = (content: string, options: ContextExpansionOptions) => + appendSystemMessage(messages, content, options); + const appendUser = (content: string | PromptImage, options: ContextExpansionOptions) => + appendUserMessage(messages, content, options); + const appendAssistant = (content: string, options: ContextExpansionOptions) => + appendAssistantMessage(messages, content, options); + + const images: PromptImage[] = []; + const errors: unknown[] = []; + const schemas: Record = {}; + const tools: ToolCallback[] = []; + const fileMerges: FileMergeHandler[] = []; + const outputProcessors: PromptOutputProcessorHandler[] = []; + const chatParticipants: ChatParticipant[] = []; + const fileOutputs: FileOutput[] = []; + const mcpServers: McpServerConfig[] = []; + const disposables: AsyncDisposable[] = []; + let prediction: PromptPrediction; + + m = measure("prompt.dom.render"); + await visitNode(node, { + error: (n) => { + errors.push(n.error); + }, + text: async (n) => { + if (n.resolved !== undefined) appendUser(n.resolved, n); + else if (typeof n.value === "string") appendUser(n.value, n); + }, + def: async (n) => { + const value = n.resolved; + if (value !== undefined) { + if (n.prediction) { + if (prediction) n.error = "duplicate prediction"; + else + prediction = { + type: "content", + content: extractRange(value.content, n), + }; + } + } + }, + assistant: async (n) => { + const value = await n.resolved; + if (value != undefined) appendAssistant(value, n); + }, + system: async (n) => { + const value = await n.resolved; + if (value != undefined) appendSystem(value, n); + }, + stringTemplate: async (n) => { + const value = n.resolved; + const role = n.role || "user"; + if (value != undefined) { + if (role === "system") appendSystem(value, n); + else if (role === "assistant") appendAssistant(value, n); + else appendUser(value, n); + } + }, + image: async (n) => { + const value = n.resolved; + if (value?.url) { + images.push(value); + appendUser(value, n); + } + }, + schema: (n) => { + const { name: schemaName, value: schema, options } = n; + if (schemas[schemaName]) trace.error("duplicate schema name: " + schemaName); + schemas[schemaName] = schema; + const { format = SCHEMA_DEFAULT_FORMAT } = options || {}; + let schemaText: string; + switch (format) { + case "json": + schemaText = JSON.stringify(schema, null, 2); + break; + case "yaml": + schemaText = YAMLStringify(schema); + break; + default: + schemaText = JSONSchemaStringifyToTypeScript(schema, { + typeName: schemaName, + }); + break; + } + const text = `<${schemaName} lang="${format}-schema"> ${trimNewlines(schemaText)} -` - appendUser(text, n) - n.tokens = approximateTokens(text) - if (trace && format !== "json") - trace.detailsFenced( - `🧬 schema ${schemaName} as ${format}`, - schemaText, - format - ) - }, - tool: (n) => { - const { description, parameters, impl: fn, options, generator } = n - const { variant, variantDescription } = options || {} - const name = escapeToolName( - variant ? `${n.name}_${variant}` : n.name - ) - tools.push({ - spec: { - name, - description: variantDescription || description, - parameters, - }, - generator, - impl: fn, - options, - }) - trace.detailsFenced( - `🛠️ tool ${name}`, - { description, parameters }, - "yaml" - ) - }, - fileMerge: (n) => { - fileMerges.push(n.fn) - trace.itemValue(`file merge`, n.fn) - }, - outputProcessor: (n) => { - outputProcessors.push(n.fn) - trace.itemValue(`output processor`, n.fn.name) - }, - chatParticipant: (n) => { - chatParticipants.push(n.participant) - trace.itemValue( - `chat participant`, - n.participant.options?.label || n.participant.generator.name - ) - }, - fileOutput: (n) => { - fileOutputs.push(n.output) - trace.itemValue(`file output`, n.output.pattern) - }, - mcpServer: (n) => { - mcpServers.push(n.config) - trace.itemValue(`mcp server`, n.config.id) +`; + appendUser(text, n); + n.tokens = approximateTokens(text); + if (trace && format !== "json") + trace.detailsFenced(`🧬 schema ${schemaName} as ${format}`, schemaText, format); + }, + tool: (n) => { + const { description, parameters, impl: fn, options, generator } = n; + const { variant, variantDescription } = options || {}; + const name = escapeToolName(variant ? `${n.name}_${variant}` : n.name); + tools.push({ + spec: { + name, + description: variantDescription || description, + parameters, }, - }) - - if (mcpServers.length) { - for (const mcpServer of mcpServers) { - dbgMcp(`starting server ${mcpServer.id}`) - const res = await runtimeHost.mcp.startMcpServer(mcpServer, { - trace, - }) - disposables.push(res) - const mcpTools = await res.listToolCallbacks() - dbgMcp( - `tools %O`, - mcpTools?.map((t) => t.spec.name) - ) - tools.push(...mcpTools) - } + generator, + impl: fn, + options, + }); + trace.detailsFenced(`🛠️ tool ${name}`, { description, parameters }, "yaml"); + }, + fileMerge: (n) => { + fileMerges.push(n.fn); + trace.itemValue(`file merge`, n.fn); + }, + outputProcessor: (n) => { + outputProcessors.push(n.fn); + trace.itemValue(`output processor`, n.fn.name); + }, + chatParticipant: (n) => { + chatParticipants.push(n.participant); + trace.itemValue( + `chat participant`, + n.participant.options?.label || n.participant.generator.name, + ); + }, + fileOutput: (n) => { + fileOutputs.push(n.output); + trace.itemValue(`file output`, n.output.pattern); + }, + mcpServer: (n) => { + mcpServers.push(n.config); + trace.itemValue(`mcp server`, n.config.id); + }, + }); + + if (mcpServers.length) { + for (const mcpServer of mcpServers) { + dbgMcp(`starting server ${mcpServer.id}`); + const res = await runtimeHost.mcp.startMcpServer(mcpServer, { + trace, + }); + disposables.push(res); + const mcpTools = await res.listToolCallbacks(); + dbgMcp( + `tools %O`, + mcpTools?.map((t) => t.spec.name), + ); + tools.push(...mcpTools); } - m() - - const res = Object.freeze({ - images, - schemas, - tools, - fileMerges, - outputProcessors, - chatParticipants, - errors, - messages, - fileOutputs, - prediction, - disposables, - }) - - dbg( - `${res.messages.length} messages, tools: %o`, - res.tools.map((t) => t.spec.name) - ) - return res + } + m(); + + const res = Object.freeze({ + images, + schemas, + tools, + fileMerges, + outputProcessors, + chatParticipants, + errors, + messages, + fileOutputs, + prediction, + disposables, + }); + + dbg( + `${res.messages.length} messages, tools: %o`, + res.tools.map((t) => t.spec.name), + ); + return res; } /** @@ -1552,76 +1441,76 @@ ${trimNewlines(schemaText)} * @returns An object containing response type and schema details. */ export function finalizeMessages( - model: string, - messages: ChatCompletionMessageParam[], - options: { - fileOutputs?: FileOutput[] - } & ModelOptions & - TraceOptions & - ContentSafetyOptions & - SecretDetectionOptions + model: string, + messages: ChatCompletionMessageParam[], + options: { + fileOutputs?: FileOutput[]; + } & ModelOptions & + TraceOptions & + ContentSafetyOptions & + SecretDetectionOptions, ) { - dbg(`finalize messages for ${model}`) - const m = measure("prompt.dom.finalize") - const { fileOutputs, trace, secretScanning } = options || {} - if (fileOutputs?.length > 0) { - appendSystemMessage( - messages, - ` + dbg(`finalize messages for ${model}`); + const m = measure("prompt.dom.finalize"); + const { fileOutputs, trace, secretScanning } = options || {}; + if (fileOutputs?.length > 0) { + appendSystemMessage( + messages, + ` ## File generation rules When generating files, use the following rules which are formatted as "file glob: description": ${fileOutputs.map((fo) => ` ${fo.pattern}: ${fo.description || "generated file"}`)} -` - ) - } - - const responseSchema = promptParametersSchemaToJSONSchema( - options.responseSchema - ) as JSONSchemaObject - let responseType = options.responseType - - if (responseSchema && !responseType && responseType !== "json_schema") { - const { provider } = parseModelIdentifier(model) - const features = providerFeatures(provider) - responseType = features?.responseType || "json" - dbg(`response type: %s (auto)`, responseType) - } - if (responseType) trace.itemValue(`response type`, responseType) - if (responseSchema) { - trace.detailsFenced("📜 response schema", responseSchema) - if (responseType !== "json_schema") { - const typeName = "Output" - const schemaTs = JSONSchemaStringifyToTypeScript(responseSchema, { - typeName, - }) - appendSystemMessage( - messages, - `## Output Schema +`, + ); + } + + const responseSchema = promptParametersSchemaToJSONSchema( + options.responseSchema, + ) as JSONSchemaObject; + let responseType = options.responseType; + + if (responseSchema && !responseType && responseType !== "json_schema") { + const { provider } = parseModelIdentifier(model); + const features = providerFeatures(provider); + responseType = features?.responseType || "json"; + dbg(`response type: %s (auto)`, responseType); + } + if (responseType) trace.itemValue(`response type`, responseType); + if (responseSchema) { + trace.detailsFenced("📜 response schema", responseSchema); + if (responseType !== "json_schema") { + const typeName = "Output"; + const schemaTs = JSONSchemaStringifyToTypeScript(responseSchema, { + typeName, + }); + appendSystemMessage( + messages, + `## Output Schema You are a service that translates user requests into ${responseType === "yaml" ? "YAML" : "JSON"} objects of type "${typeName}" according to the following TypeScript definitions: <${typeName}> ${schemaTs} -` - ) - } +`, + ); } - - if (secretScanning !== false) { - // this is a bit brutal, but we don't want to miss secrets - // hidden in fields - const secrets = redactSecrets(JSON.stringify(messages), { trace }) - if (Object.keys(secrets.found).length) { - const newMessage = JSON.parse(secrets.text) - messages.splice(0, messages.length, ...newMessage) - } + } + + if (secretScanning !== false) { + // this is a bit brutal, but we don't want to miss secrets + // hidden in fields + const secrets = redactSecrets(JSON.stringify(messages), { trace }); + if (Object.keys(secrets.found).length) { + const newMessage = JSON.parse(secrets.text); + messages.splice(0, messages.length, ...newMessage); } - m() + } + m(); - return { - responseType, - responseSchema, - } + return { + responseType, + responseSchema, + }; } diff --git a/packages/core/src/promptfoo.ts b/packages/core/src/promptfoo.ts index 7dc579421c..9da63af004 100644 --- a/packages/core/src/promptfoo.ts +++ b/packages/core/src/promptfoo.ts @@ -1,92 +1,90 @@ // Import necessary utilities and constants import { - CSV_REGEX, - HTTPS_REGEX, - JSON5_REGEX, - MJS_REGEX, - MJTS_REGEX, - MODEL_PROVIDER_AZURE_OPENAI, - MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, - MODEL_PROVIDER_GITHUB, - MODEL_PROVIDER_OPENAI, - OPENAI_API_BASE, - PROMPTFOO_REDTEAM_NUM_TESTS, - TEST_CSV_ENTRY_SEPARATOR, - XML_REGEX, - YAML_REGEX, -} from "./constants" -import { arrayify, logWarn } from "./util" -import { runtimeHost } from "./host" -import { ModelConnectionInfo, parseModelIdentifier } from "./models" -import { deleteEmptyValues, deleteUndefinedValues } from "./cleaners" -import testSchema from "../../../docs/public/schemas/tests.json" -import { validateJSONWithSchema } from "./schema" -import { MarkdownTrace, TraceOptions } from "./trace" -import { CancellationOptions } from "./cancellation" -import { uniq } from "es-toolkit" -import { dedent } from "./indent" -import { importFile } from "./importprompt" + CSV_REGEX, + HTTPS_REGEX, + JSON5_REGEX, + MJS_REGEX, + MJTS_REGEX, + MODEL_PROVIDER_AZURE_OPENAI, + MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI, + MODEL_PROVIDER_GITHUB, + MODEL_PROVIDER_OPENAI, + OPENAI_API_BASE, + PROMPTFOO_REDTEAM_NUM_TESTS, + TEST_CSV_ENTRY_SEPARATOR, + XML_REGEX, + YAML_REGEX, +} from "./constants"; +import { arrayify, logWarn } from "./util"; +import { runtimeHost } from "./host"; +import { ModelConnectionInfo, parseModelIdentifier } from "./models"; +import { deleteEmptyValues, deleteUndefinedValues } from "./cleaners"; +import testSchema from "../../../docs/public/schemas/tests.json"; +import { validateJSONWithSchema } from "./schema"; +import { MarkdownTrace, TraceOptions } from "./trace"; +import { CancellationOptions } from "./cancellation"; +import { uniq } from "es-toolkit"; +import { dedent } from "./indent"; +import { importFile } from "./importprompt"; /** * Convert GenAIScript connection info into prompt foo configuration * @param info */ function resolveTestProvider( - info: ModelConnectionInfo, - modelType: "chat" | "embedding" + info: ModelConnectionInfo, + modelType: "chat" | "embedding", ): { - id: string - config?: { apiHost: string } + id: string; + config?: { apiHost: string }; } { - if (!info) return undefined + if (!info) return undefined; - const { base } = info - const { provider, model } = parseModelIdentifier(info.model) - const apiHost = base - .replace(HTTPS_REGEX, "") - .replace(/\/openai\/deployments$/i, "") - switch (provider) { - case MODEL_PROVIDER_AZURE_OPENAI: - case MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI: - return { - id: "azureopenai:" + modelType + ":" + model, - config: { - apiHost, - }, - } - case MODEL_PROVIDER_GITHUB: - return { - id: provider + ":" + model, - } - case MODEL_PROVIDER_OPENAI: - if (base === OPENAI_API_BASE) return { id: info.model } - return { id: info.model, config: { apiHost } } - default: - return { - id: provider + ":" + modelType + ":" + model, - config: { - apiHost, - }, - } - } + const { base } = info; + const { provider, model } = parseModelIdentifier(info.model); + const apiHost = base.replace(HTTPS_REGEX, "").replace(/\/openai\/deployments$/i, ""); + switch (provider) { + case MODEL_PROVIDER_AZURE_OPENAI: + case MODEL_PROVIDER_AZURE_SERVERLESS_OPENAI: + return { + id: "azureopenai:" + modelType + ":" + model, + config: { + apiHost, + }, + }; + case MODEL_PROVIDER_GITHUB: + return { + id: provider + ":" + model, + }; + case MODEL_PROVIDER_OPENAI: + if (base === OPENAI_API_BASE) return { id: info.model }; + return { id: info.model, config: { apiHost } }; + default: + return { + id: provider + ":" + modelType + ":" + model, + config: { + apiHost, + }, + }; + } } function renderPurpose(script: PromptScript): string { - const { description, title, id, redteam, jsSource } = script - const { purpose } = redteam || {} - const trace = new MarkdownTrace() - if (purpose) { - trace.heading(2, "Purpose") - trace.appendContent(purpose) - } - trace.heading(2, "Prompt details") - trace.appendContent( - `The prompt is written using GenAIScript (https://microsoft.github.io/genaiscript), a JavaScript-based DSL for creating AI prompts. The generated prompt will be injected in the 'env.files' variable.` - ) - trace.itemValue(`title`, title) - trace.itemValue(`description`, description) - if (jsSource) trace.fence(jsSource, "js") - return trace.content + const { description, title, id, redteam, jsSource } = script; + const { purpose } = redteam || {}; + const trace = new MarkdownTrace(); + if (purpose) { + trace.heading(2, "Purpose"); + trace.appendContent(purpose); + } + trace.heading(2, "Prompt details"); + trace.appendContent( + `The prompt is written using GenAIScript (https://microsoft.github.io/genaiscript), a JavaScript-based DSL for creating AI prompts. The generated prompt will be injected in the 'env.files' variable.`, + ); + trace.itemValue(`title`, title); + trace.itemValue(`description`, description); + if (jsSource) trace.fence(jsSource, "js"); + return trace.content; } /** @@ -111,274 +109,257 @@ function renderPurpose(script: PromptScript): string { * @returns A configuration object for PromptFoo based on the provided script and options. */ export async function generatePromptFooConfiguration( - script: PromptScript, - options: { - chatInfo: ModelConnectionInfo & ModelAliasesOptions - embeddingsInfo?: ModelConnectionInfo - provider?: string - out?: string - cli?: string - redteam?: boolean - models?: (ModelOptions & ModelAliasesOptions)[] - } & TraceOptions & - CancellationOptions + script: PromptScript, + options: { + chatInfo: ModelConnectionInfo & ModelAliasesOptions; + embeddingsInfo?: ModelConnectionInfo; + provider?: string; + out?: string; + cli?: string; + redteam?: boolean; + models?: (ModelOptions & ModelAliasesOptions)[]; + } & TraceOptions & + CancellationOptions, ) { - // Destructure options with default values - const { - provider = "provider.mjs", - chatInfo, - embeddingsInfo, - trace, - } = options || {} - const { title, id } = script - const description = dedent(script.description) - const models = options?.models || [] - const redteam: Partial = options?.redteam - ? script.redteam || {} - : undefined - const purpose = redteam ? renderPurpose(script) : undefined - const testsAndFiles = arrayify(script.tests) - const tests: PromptTest[] = [] - for (const testOrFile of testsAndFiles) { - if (Array.isArray(testOrFile)) tests.push(...testOrFile) - else if (typeof testOrFile === "object") tests.push(testOrFile) - else if (typeof testOrFile === "string") { - if (CSV_REGEX.test(testOrFile)) { - const data: any[] = await runtimeHost.workspace.readCSV( - testOrFile, - { - repair: false, - } - ) - if (!data.length) { - logWarn(`no data in ${testOrFile}`) - continue - } - const headers = Object.keys(data[0]) - if (!headers.length) { - logWarn(`no headers in ${testOrFile}`) - continue - } - for (const row of data) { - const test: PromptTest = { - files: [], - workspaceFiles: [], - vars: {}, - asserts: [], - } - for (let i = 0; i < headers.length; ++i) { - const header = headers[i] - const s = String(row[header]) - if (!s) continue - switch (header) { - case "name": - case "description": - test[header] = s?.trim() - break - case "keywords": - case "forbidden": - case "rubrics": - case "facts": - test[header] = s.split(TEST_CSV_ENTRY_SEPARATOR) - break - case "file": - ;(test.files as string[]).push(s) - break - case "fileContent": - ;(test.workspaceFiles as WorkspaceFile[]).push({ - filename: "", - content: s, - }) - break - default: - test.vars[header] = row[header] - break - } - } - tests.push(test) - } - } else if ( - JSON5_REGEX.test(testOrFile) || - YAML_REGEX.test(testOrFile) || - XML_REGEX.test(testOrFile) - ) { - const data = arrayify( - await runtimeHost.workspace.readData(testOrFile) - ) as (string | PromptTest)[] - for (const row of data) { - if (typeof row === "string") - tests.push({ - workspaceFiles: { filename: "", content: row }, - } satisfies PromptTest) - else if (typeof row === "object") tests.push(row) - } - } else if (MJTS_REGEX.test(testOrFile)) { - const res = await importFile(testOrFile, { - onImported: async (module) => { - let res = module.default - if (typeof res === "function") res = await res() - res = arrayify(res) - return res - }, - }) - tests.push(...res) + // Destructure options with default values + const { provider = "provider.mjs", chatInfo, embeddingsInfo, trace } = options || {}; + const { title, id } = script; + const description = dedent(script.description); + const models = options?.models || []; + const redteam: Partial = options?.redteam ? script.redteam || {} : undefined; + const purpose = redteam ? renderPurpose(script) : undefined; + const testsAndFiles = arrayify(script.tests); + const tests: PromptTest[] = []; + for (const testOrFile of testsAndFiles) { + if (Array.isArray(testOrFile)) tests.push(...testOrFile); + else if (typeof testOrFile === "object") tests.push(testOrFile); + else if (typeof testOrFile === "string") { + if (CSV_REGEX.test(testOrFile)) { + const data: any[] = await runtimeHost.workspace.readCSV(testOrFile, { + repair: false, + }); + if (!data.length) { + logWarn(`no data in ${testOrFile}`); + continue; + } + const headers = Object.keys(data[0]); + if (!headers.length) { + logWarn(`no headers in ${testOrFile}`); + continue; + } + for (const row of data) { + const test: PromptTest = { + files: [], + workspaceFiles: [], + vars: {}, + asserts: [], + }; + for (let i = 0; i < headers.length; ++i) { + const header = headers[i]; + const s = String(row[header]); + if (!s) continue; + switch (header) { + case "name": + case "description": + test[header] = s?.trim(); + break; + case "keywords": + case "forbidden": + case "rubrics": + case "facts": + test[header] = s.split(TEST_CSV_ENTRY_SEPARATOR); + break; + case "file": + (test.files as string[]).push(s); + break; + case "fileContent": + (test.workspaceFiles as WorkspaceFile[]).push({ + filename: "", + content: s, + }); + break; + default: + test.vars[header] = row[header]; + break; } + } + tests.push(test); + } + } else if ( + JSON5_REGEX.test(testOrFile) || + YAML_REGEX.test(testOrFile) || + XML_REGEX.test(testOrFile) + ) { + const data = arrayify(await runtimeHost.workspace.readData(testOrFile)) as ( + | string + | PromptTest + )[]; + for (const row of data) { + if (typeof row === "string") + tests.push({ + workspaceFiles: { filename: "", content: row }, + } satisfies PromptTest); + else if (typeof row === "object") tests.push(row); } + } else if (MJTS_REGEX.test(testOrFile)) { + const res = await importFile(testOrFile, { + onImported: async (module) => { + let res = module.default; + if (typeof res === "function") res = await res(); + res = arrayify(res); + return res; + }, + }); + tests.push(...res); + } } + } - for (const test of tests) { - const v = validateJSONWithSchema(test, testSchema as JSONSchema, { - trace, - }) - if (v.schemaError) throw new Error(v.schemaError) - } + for (const test of tests) { + const v = validateJSONWithSchema(test, testSchema as JSONSchema, { + trace, + }); + if (v.schemaError) throw new Error(v.schemaError); + } - // Ensure at least one model exists - if (!models.length) { - models.push({ - ...script, - model: chatInfo.model, - smallModel: chatInfo.smallModel, - visionModel: chatInfo.visionModel, - }) - } + // Ensure at least one model exists + if (!models.length) { + models.push({ + ...script, + model: chatInfo.model, + smallModel: chatInfo.smallModel, + visionModel: chatInfo.visionModel, + }); + } - const cli = options?.cli - const testTransforms = { - text: "output.text", - json: undefined as string, - } - const assertTransforms = { - text: undefined as string, - json: "output.text", - } + const cli = options?.cli; + const testTransforms = { + text: "output.text", + json: undefined as string, + }; + const assertTransforms = { + text: undefined as string, + json: "output.text", + }; - const resolveModel = (m: string) => runtimeHost.modelAliases[m]?.model ?? m + const resolveModel = (m: string) => runtimeHost.modelAliases[m]?.model ?? m; - const testProvider = deleteUndefinedValues({ - text: resolveTestProvider(chatInfo, "chat"), - embedding: resolveTestProvider(embeddingsInfo, "embedding"), - }) - const defaultTest = deleteUndefinedValues({ - transformVars: "{ ...vars, sessionId: context.uuid }", - options: deleteUndefinedValues({ - transform: testTransforms["text"], - provider: testProvider, - }), - }) + const testProvider = deleteUndefinedValues({ + text: resolveTestProvider(chatInfo, "chat"), + embedding: resolveTestProvider(embeddingsInfo, "embedding"), + }); + const defaultTest = deleteUndefinedValues({ + transformVars: "{ ...vars, sessionId: context.uuid }", + options: deleteUndefinedValues({ + transform: testTransforms["text"], + provider: testProvider, + }), + }); - // Create configuration object - const res = deleteUndefinedValues({ - // Description combining title and description - description: [title, description].filter((s) => s).join("\n"), - prompts: [id], - // Map model options to providers - providers: models - .map(({ model, smallModel, visionModel, temperature, topP }) => ({ - model: - resolveModel(model) ?? runtimeHost.modelAliases.large.model, - smallModel: - resolveModel(smallModel) ?? - runtimeHost.modelAliases.small.model, - visionModel: - resolveModel(visionModel) ?? - runtimeHost.modelAliases.vision.model, - temperature: !isNaN(temperature) - ? temperature - : runtimeHost.modelAliases.temperature, - top_p: topP, - })) - .map(({ model, smallModel, visionModel, temperature, top_p }) => ({ - id: provider, - label: [ - model, - `small=${smallModel}`, - `vision=${visionModel}`, - `temp=${temperature}`, - top_p !== undefined ? `p=${top_p}` : undefined, - ] - .filter((v) => v !== undefined) - .join(", "), - config: { - model, - smallModel, - visionModel, - temperature, - top_p, - cli, - }, + // Create configuration object + const res = deleteUndefinedValues({ + // Description combining title and description + description: [title, description].filter((s) => s).join("\n"), + prompts: [id], + // Map model options to providers + providers: models + .map(({ model, smallModel, visionModel, temperature, topP }) => ({ + model: resolveModel(model) ?? runtimeHost.modelAliases.large.model, + smallModel: resolveModel(smallModel) ?? runtimeHost.modelAliases.small.model, + visionModel: resolveModel(visionModel) ?? runtimeHost.modelAliases.vision.model, + temperature: !isNaN(temperature) ? temperature : runtimeHost.modelAliases.temperature, + top_p: topP, + })) + .map(({ model, smallModel, visionModel, temperature, top_p }) => ({ + id: provider, + label: [ + model, + `small=${smallModel}`, + `vision=${visionModel}`, + `temp=${temperature}`, + top_p !== undefined ? `p=${top_p}` : undefined, + ] + .filter((v) => v !== undefined) + .join(", "), + config: { + model, + smallModel, + visionModel, + temperature, + top_p, + cli, + }, + })), + defaultTest, + target: redteam + ? { + id: provider, + label: redteam.label || title || id, + } + : undefined, + redteam: redteam + ? deleteEmptyValues({ + purpose, + injectVar: "fileContent", + numTests: redteam.numTests || PROMPTFOO_REDTEAM_NUM_TESTS, + plugins: uniq(arrayify(redteam.plugins)), + strategies: uniq(arrayify(redteam.strategies)), + language: redteam.language, + }) + : undefined, + // Map tests to configuration format + tests: arrayify(tests).map( + ({ + description, + files, + workspaceFiles, + vars, + rubrics, + facts, + format = "text", + keywords = [], + forbidden = [], + asserts = [], + }) => + deleteEmptyValues({ + description, + vars: deleteEmptyValues({ + files, + workspaceFiles, + vars: Object.keys(vars || {}).length ? vars : undefined, + }), + options: { + transform: testTransforms[format], + }, + assert: [ + ...arrayify(keywords).map((kv) => ({ + type: "icontains", // Check if output contains keyword + value: kv, + transform: assertTransforms[format], })), - defaultTest, - target: redteam - ? { - id: provider, - label: redteam.label || title || id, - } - : undefined, - redteam: redteam - ? deleteEmptyValues({ - purpose, - injectVar: "fileContent", - numTests: redteam.numTests || PROMPTFOO_REDTEAM_NUM_TESTS, - plugins: uniq(arrayify(redteam.plugins)), - strategies: uniq(arrayify(redteam.strategies)), - language: redteam.language, - }) - : undefined, - // Map tests to configuration format - tests: arrayify(tests).map( - ({ - description, - files, - workspaceFiles, - vars, - rubrics, - facts, - format = "text", - keywords = [], - forbidden = [], - asserts = [], - }) => - deleteEmptyValues({ - description, - vars: deleteEmptyValues({ - files, - workspaceFiles, - vars: Object.keys(vars || {}).length ? vars : undefined, - }), - options: { - transform: testTransforms[format], - }, - assert: [ - ...arrayify(keywords).map((kv) => ({ - type: "icontains", // Check if output contains keyword - value: kv, - transform: assertTransforms[format], - })), - ...arrayify(forbidden).map((kv) => ({ - type: "not-icontains", // Check if output does not contain forbidden keyword - value: kv, - transform: assertTransforms[format], - })), - ...arrayify(rubrics).map((value) => ({ - type: "llm-rubric", // Use LLM rubric for evaluation - value, - transform: assertTransforms[format], - })), - ...arrayify(facts).map((value) => ({ - type: "factuality", // Check factuality of output - value, - transform: assertTransforms[format], - })), - ...arrayify(asserts).map((assert) => ({ - ...assert, - transform: - assert.transform || assertTransforms[format], // Default transform - })), - ].filter((a) => !!a), // Filter out any undefined assertions - }) - ), - }) + ...arrayify(forbidden).map((kv) => ({ + type: "not-icontains", // Check if output does not contain forbidden keyword + value: kv, + transform: assertTransforms[format], + })), + ...arrayify(rubrics).map((value) => ({ + type: "llm-rubric", // Use LLM rubric for evaluation + value, + transform: assertTransforms[format], + })), + ...arrayify(facts).map((value) => ({ + type: "factuality", // Check factuality of output + value, + transform: assertTransforms[format], + })), + ...arrayify(asserts).map((assert) => ({ + ...assert, + transform: assert.transform || assertTransforms[format], // Default transform + })), + ].filter((a) => !!a), // Filter out any undefined assertions + }), + ), + }); - return res // Return the generated configuration + return res; // Return the generated configuration } diff --git a/packages/core/src/promptrunner.ts b/packages/core/src/promptrunner.ts index ff5711b2fe..9dfba360f5 100644 --- a/packages/core/src/promptrunner.ts +++ b/packages/core/src/promptrunner.ts @@ -1,30 +1,30 @@ -import debug from "debug" -const runnerDbg = debug("genaiscript:promptrunner") +import debug from "debug"; +const runnerDbg = debug("genaiscript:promptrunner"); // Import necessary modules and functions for handling chat sessions, templates, file management, etc. -import { executeChatSession, tracePromptResult } from "./chat" -import { GenerationStatus, Project } from "./server/messages" -import { arrayify, assert, relativePath } from "./util" -import { runtimeHost } from "./host" -import { MarkdownTrace } from "./trace" -import { CORE_VERSION } from "./version" -import { expandFiles } from "./fs" -import { dataToMarkdownTable } from "./csv" -import { Fragment, GenerationOptions } from "./generation" -import { traceCliArgs } from "./clihelp" -import { GenerationResult } from "./server/messages" -import { resolveModelConnectionInfo } from "./models" -import { RequestError, errorMessage } from "./error" -import { renderFencedVariables } from "./fence" -import { parsePromptParameters } from "./vars" -import { resolveFileContent } from "./file" -import { expandTemplate } from "./expander" -import { resolveLanguageModel } from "./lm" -import { checkCancelled } from "./cancellation" -import { lastAssistantReasoning } from "./chatrender" -import { unthink } from "./think" -import { deleteUndefinedValues } from "./cleaners" -import { DEBUG_SCRIPT_CATEGORY } from "./constants" +import { executeChatSession, tracePromptResult } from "./chat"; +import { GenerationStatus, Project } from "./server/messages"; +import { arrayify, assert, relativePath } from "./util"; +import { runtimeHost } from "./host"; +import { MarkdownTrace } from "./trace"; +import { CORE_VERSION } from "./version"; +import { expandFiles } from "./fs"; +import { dataToMarkdownTable } from "./csv"; +import { Fragment, GenerationOptions } from "./generation"; +import { traceCliArgs } from "./clihelp"; +import { GenerationResult } from "./server/messages"; +import { resolveModelConnectionInfo } from "./models"; +import { RequestError, errorMessage } from "./error"; +import { renderFencedVariables } from "./fence"; +import { parsePromptParameters } from "./vars"; +import { resolveFileContent } from "./file"; +import { expandTemplate } from "./expander"; +import { resolveLanguageModel } from "./lm"; +import { checkCancelled } from "./cancellation"; +import { lastAssistantReasoning } from "./chatrender"; +import { unthink } from "./think"; +import { deleteUndefinedValues } from "./cleaners"; +import { DEBUG_SCRIPT_CATEGORY } from "./constants"; // Asynchronously resolve expansion variables needed for a template /** @@ -37,85 +37,83 @@ import { DEBUG_SCRIPT_CATEGORY } from "./constants" * @returns An object containing resolved variables. */ async function resolveExpansionVars( - project: Project, - trace: MarkdownTrace, - template: PromptScript, - fragment: Fragment, - output: OutputTrace, - options: GenerationOptions + project: Project, + trace: MarkdownTrace, + template: PromptScript, + fragment: Fragment, + output: OutputTrace, + options: GenerationOptions, ): Promise { - const { vars, runDir, runId } = options - const root = runtimeHost.projectFolder() + const { vars, runDir, runId } = options; + const root = runtimeHost.projectFolder(); - assert(!!vars) - assert(!!runDir) - assert(!!runId) + assert(!!vars); + assert(!!runDir); + assert(!!runId); - const files: WorkspaceFile[] = [] - const templateFiles = arrayify(template.files) - const referenceFiles = fragment.files.slice(0) - const workspaceFiles = fragment.workspaceFiles?.slice(0) || [] - const filenames = await expandFiles( - referenceFiles.length || workspaceFiles.length - ? referenceFiles - : templateFiles, - { - applyGitIgnore: false, - accept: template.accept, - } - ) - for (let filename of filenames) { - filename = relativePath(root, filename) + const files: WorkspaceFile[] = []; + const templateFiles = arrayify(template.files); + const referenceFiles = fragment.files.slice(0); + const workspaceFiles = fragment.workspaceFiles?.slice(0) || []; + const filenames = await expandFiles( + referenceFiles.length || workspaceFiles.length ? referenceFiles : templateFiles, + { + applyGitIgnore: false, + accept: template.accept, + }, + ); + for (let filename of filenames) { + filename = relativePath(root, filename); - // Skip if file already in the list - if (files.find((lk) => lk.filename === filename)) continue - const file: WorkspaceFile = { filename } - await resolveFileContent(file) - files.push(file) - } + // Skip if file already in the list + if (files.find((lk) => lk.filename === filename)) continue; + const file: WorkspaceFile = { filename }; + await resolveFileContent(file); + files.push(file); + } - for (const wf of workspaceFiles) { - if (!files.find((f) => f.filename === wf.filename)) { - await resolveFileContent(wf) - files.push(wf) - } + for (const wf of workspaceFiles) { + if (!files.find((f) => f.filename === wf.filename)) { + await resolveFileContent(wf); + files.push(wf); } + } - // Parse and obtain attributes from prompt parameters - const attrs = parsePromptParameters(project, template, vars) - const secrets: Record = {} + // Parse and obtain attributes from prompt parameters + const attrs = parsePromptParameters(project, template, vars); + const secrets: Record = {}; - // Read secrets defined in the template - for (const secret of template.secrets || []) { - const value = await runtimeHost.readSecret(secret) - if (value) { - trace.item(`secret \`${secret}\` used`) - secrets[secret] = value - } else trace.error(`secret \`${secret}\` not found`) - } + // Read secrets defined in the template + for (const secret of template.secrets || []) { + const value = await runtimeHost.readSecret(secret); + if (value) { + trace.item(`secret \`${secret}\` used`); + secrets[secret] = value; + } else trace.error(`secret \`${secret}\` not found`); + } - // Create and return an object containing resolved variables - const meta: PromptDefinition & ModelConnectionOptions = structuredClone({ - id: template.id, - title: template.title, - description: template.description, - group: template.group, - model: template.model, - defTools: template.defTools, - }) // frozen later - const res = { - dir: ".", - files, - meta, - vars: attrs, - secrets, - output, - generator: undefined as ChatGenerationContext, - runDir, - runId, - dbg: debug(DEBUG_SCRIPT_CATEGORY), - } satisfies ExpansionVariables - return res + // Create and return an object containing resolved variables + const meta: PromptDefinition & ModelConnectionOptions = structuredClone({ + id: template.id, + title: template.title, + description: template.description, + group: template.group, + model: template.model, + defTools: template.defTools, + }); // frozen later + const res = { + dir: ".", + files, + meta, + vars: attrs, + secrets, + output, + generator: undefined as ChatGenerationContext, + runDir, + runId, + dbg: debug(DEBUG_SCRIPT_CATEGORY), + } satisfies ExpansionVariables; + return res; } // Main function to run a template with given options @@ -129,265 +127,219 @@ async function resolveExpansionVars( * @returns A generation result containing execution details, outputs, and potential errors, including status, messages, edits, annotations, file changes, and usage statistics. */ export async function runTemplate( - prj: Project, - template: PromptScript, - fragment: Fragment, - options: GenerationOptions + prj: Project, + template: PromptScript, + fragment: Fragment, + options: GenerationOptions, ): Promise { - assert(fragment !== undefined) - assert(options !== undefined) - assert(options.trace !== undefined) - assert(options.outputTrace !== undefined) - const { - label, - cliInfo, - trace, - outputTrace, - cancellationToken, - model, - runId, - } = options - const version = CORE_VERSION - assert(model !== undefined) + assert(fragment !== undefined); + assert(options !== undefined); + assert(options.trace !== undefined); + assert(options.outputTrace !== undefined); + const { label, cliInfo, trace, outputTrace, cancellationToken, model, runId } = options; + const version = CORE_VERSION; + assert(model !== undefined); - runtimeHost.project = prj + runtimeHost.project = prj; - try { - if (cliInfo) { - trace.heading(3, `🤖 ${template.id}`) - traceCliArgs(trace, template, options) - } + try { + if (cliInfo) { + trace.heading(3, `🤖 ${template.id}`); + traceCliArgs(trace, template, options); + } - // Resolve expansion variables for the template - const env = await resolveExpansionVars( - prj, - trace, - template, - fragment, - outputTrace, - options - ) - let { - messages, - schemas, - tools, - fileMerges, - outputProcessors, - chatParticipants, - fileOutputs, - prediction, - status, - statusText, - temperature, - reasoningEffort, - topP, - maxTokens, - fallbackTools, - seed, - responseType, - responseSchema, - logprobs, - topLogprobs, - disposables, - cache, - metadata, - } = await expandTemplate(prj, template, options, env) - const { output, generator, secrets, dbg: envDbg, ...restEnv } = env + // Resolve expansion variables for the template + const env = await resolveExpansionVars(prj, trace, template, fragment, outputTrace, options); + let { + messages, + schemas, + tools, + fileMerges, + outputProcessors, + chatParticipants, + fileOutputs, + prediction, + status, + statusText, + temperature, + reasoningEffort, + topP, + maxTokens, + fallbackTools, + seed, + responseType, + responseSchema, + logprobs, + topLogprobs, + disposables, + cache, + metadata, + } = await expandTemplate(prj, template, options, env); + const { output, generator, secrets, dbg: envDbg, ...restEnv } = env; - runnerDbg(`messages ${messages.length}`) + runnerDbg(`messages ${messages.length}`); - // Handle failed expansion scenario - if (status !== "success" || !messages.length) { - trace.renderErrors() - return { - status: status as GenerationStatus, - statusText, - messages, - env: restEnv, - label, - version, - text: unthink(outputTrace.content), - reasoning: lastAssistantReasoning(messages), - edits: [], - annotations: [], - changelogs: [], - fileEdits: {}, - fences: [], - frames: [], - schemas: {}, - usage: undefined, - runId, - } satisfies GenerationResult - } + // Handle failed expansion scenario + if (status !== "success" || !messages.length) { + trace.renderErrors(); + return { + status: status as GenerationStatus, + statusText, + messages, + env: restEnv, + label, + version, + text: unthink(outputTrace.content), + reasoning: lastAssistantReasoning(messages), + edits: [], + annotations: [], + changelogs: [], + fileEdits: {}, + fences: [], + frames: [], + schemas: {}, + usage: undefined, + runId, + } satisfies GenerationResult; + } - // Resolve model connection information - const connection = await resolveModelConnectionInfo( - { model }, - { trace, token: true } - ) - if (connection.info.error) - throw new Error(errorMessage(connection.info.error)) - if (!connection.configuration) - throw new RequestError( - 403, - `LLM configuration missing for model ${model}`, - connection.info - ) - checkCancelled(cancellationToken) - const { ok } = await runtimeHost.pullModel( - connection.configuration, - options - ) - if (!ok) { - trace.renderErrors() - return deleteUndefinedValues({ - status: "error", - statusText: "", - messages, - env: restEnv, - label, - version, - text: unthink(outputTrace.content), - reasoning: lastAssistantReasoning(messages), - edits: [], - annotations: [], - changelogs: [], - fileEdits: {}, - fences: [], - frames: [], - schemas: {}, - usage: undefined, - runId, - } satisfies GenerationResult) - } + // Resolve model connection information + const connection = await resolveModelConnectionInfo({ model }, { trace, token: true }); + if (connection.info.error) throw new Error(errorMessage(connection.info.error)); + if (!connection.configuration) + throw new RequestError(403, `LLM configuration missing for model ${model}`, connection.info); + checkCancelled(cancellationToken); + const { ok } = await runtimeHost.pullModel(connection.configuration, options); + if (!ok) { + trace.renderErrors(); + return deleteUndefinedValues({ + status: "error", + statusText: "", + messages, + env: restEnv, + label, + version, + text: unthink(outputTrace.content), + reasoning: lastAssistantReasoning(messages), + edits: [], + annotations: [], + changelogs: [], + fileEdits: {}, + fences: [], + frames: [], + schemas: {}, + usage: undefined, + runId, + } satisfies GenerationResult); + } - const { completer } = await resolveLanguageModel( - connection.configuration.provider - ) + const { completer } = await resolveLanguageModel(connection.configuration.provider); - // Execute chat session with the resolved configuration - const runStats = options.stats.createChild(connection.info.model) - const genOptions: GenerationOptions = { - ...options, - cache, - choices: template.choices, - responseType, - responseSchema, - model, - temperature, - reasoningEffort, - maxTokens, - topP, - seed, - logprobs, - topLogprobs, - fallbackTools, - metadata, - stats: runStats, - } - const chatResult = await executeChatSession( - connection.configuration, - cancellationToken, - messages, - tools, - schemas, - fileOutputs, - outputProcessors, - fileMerges, - prediction, - completer, - chatParticipants, - disposables, - genOptions - ) - tracePromptResult(trace, chatResult) + // Execute chat session with the resolved configuration + const runStats = options.stats.createChild(connection.info.model); + const genOptions: GenerationOptions = { + ...options, + cache, + choices: template.choices, + responseType, + responseSchema, + model, + temperature, + reasoningEffort, + maxTokens, + topP, + seed, + logprobs, + topLogprobs, + fallbackTools, + metadata, + stats: runStats, + }; + const chatResult = await executeChatSession( + connection.configuration, + cancellationToken, + messages, + tools, + schemas, + fileOutputs, + outputProcessors, + fileMerges, + prediction, + completer, + chatParticipants, + disposables, + genOptions, + ); + tracePromptResult(trace, chatResult); - const { - json, - fences, - frames, - error, - finishReason, - fileEdits, - changelogs, - edits, - } = chatResult - let { annotations } = chatResult + const { json, fences, frames, error, finishReason, fileEdits, changelogs, edits } = chatResult; + let { annotations } = chatResult; - // Reporting and tracing output - if (fences?.length) - trace.details("📩 code regions", renderFencedVariables(fences)) - if (fileEdits && Object.keys(fileEdits).length) { - trace.startDetails("📝 file edits") - for (const [f, e] of Object.entries(fileEdits)) - trace.detailsFenced(f, e.after) - trace.endDetails() - } - if (annotations?.length) - trace.details( - "⚠️ annotations", - dataToMarkdownTable( - annotations.map((a) => ({ - ...a, - line: a.range?.[0]?.[0], - endLine: a.range?.[1]?.[0] ?? "", - code: a.code ?? "", - })), - { - headers: [ - "severity", - "filename", - "line", - "endLine", - "code", - "message", - ], - } - ) - ) + // Reporting and tracing output + if (fences?.length) trace.details("📩 code regions", renderFencedVariables(fences)); + if (fileEdits && Object.keys(fileEdits).length) { + trace.startDetails("📝 file edits"); + for (const [f, e] of Object.entries(fileEdits)) trace.detailsFenced(f, e.after); + trace.endDetails(); + } + if (annotations?.length) + trace.details( + "⚠️ annotations", + dataToMarkdownTable( + annotations.map((a) => ({ + ...a, + line: a.range?.[0]?.[0], + endLine: a.range?.[1]?.[0] ?? "", + code: a.code ?? "", + })), + { + headers: ["severity", "filename", "line", "endLine", "code", "message"], + }, + ), + ); - trace.renderErrors() - const res: GenerationResult = { - status: - finishReason === "cancel" - ? "cancelled" - : error - ? "error" - : finishReason === "stop" - ? "success" - : "error", - finishReason, - error, - messages, - env: restEnv, - edits, - annotations, - changelogs, - fileEdits, - text: unthink(outputTrace.content), - reasoning: lastAssistantReasoning(messages), - version, - fences, - frames, - schemas, - json, - choices: chatResult.choices, - logprobs: chatResult.logprobs, - perplexity: chatResult.perplexity, - uncertainty: chatResult.uncertainty, - usage: chatResult.usage, - runId, - } + trace.renderErrors(); + const res: GenerationResult = { + status: + finishReason === "cancel" + ? "cancelled" + : error + ? "error" + : finishReason === "stop" + ? "success" + : "error", + finishReason, + error, + messages, + env: restEnv, + edits, + annotations, + changelogs, + fileEdits, + text: unthink(outputTrace.content), + reasoning: lastAssistantReasoning(messages), + version, + fences, + frames, + schemas, + json, + choices: chatResult.choices, + logprobs: chatResult.logprobs, + perplexity: chatResult.perplexity, + uncertainty: chatResult.uncertainty, + usage: chatResult.usage, + runId, + }; - // If there's an error, provide status text - if (res.status === "error" && !res.statusText && res.finishReason) { - res.statusText = `LLM finish reason: ${res.finishReason}` - } - return res - } finally { - // Cleanup any resources like running containers or browsers - runtimeHost.userState = {} - await runtimeHost.removeContainers() - await runtimeHost.removeBrowsers() + // If there's an error, provide status text + if (res.status === "error" && !res.statusText && res.finishReason) { + res.statusText = `LLM finish reason: ${res.finishReason}`; } + return res; + } finally { + // Cleanup any resources like running containers or browsers + runtimeHost.userState = {}; + await runtimeHost.removeContainers(); + await runtimeHost.removeBrowsers(); + } } diff --git a/packages/core/src/prompty.test.ts b/packages/core/src/prompty.test.ts index 9db49c1ef2..b20bb0b164 100644 --- a/packages/core/src/prompty.test.ts +++ b/packages/core/src/prompty.test.ts @@ -1,31 +1,31 @@ -import { promptyParse } from "./prompty" -import { describe, test, beforeEach } from "node:test" -import assert from "node:assert/strict" +import { promptyParse } from "./prompty"; +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; describe("promptyParse", () => { - test("correctly parses an empty markdown string", () => { - const result = promptyParse(undefined, "") - assert.deepStrictEqual(result, { - meta: {}, - frontmatter: {}, - content: "", - messages: [], - }) - }) + test("correctly parses an empty markdown string", () => { + const result = promptyParse(undefined, ""); + assert.deepStrictEqual(result, { + meta: {}, + frontmatter: {}, + content: "", + messages: [], + }); + }); - test("correctly parses a markdown string without frontmatter", () => { - const content = "This is a sample content without frontmatter." - const result = promptyParse(undefined, content) - assert.deepStrictEqual(result, { - meta: {}, - frontmatter: {}, - content: content, - messages: [{ role: "system", content: content }], - }) - }) + test("correctly parses a markdown string without frontmatter", () => { + const content = "This is a sample content without frontmatter."; + const result = promptyParse(undefined, content); + assert.deepStrictEqual(result, { + meta: {}, + frontmatter: {}, + content: content, + messages: [{ role: "system", content: content }], + }); + }); - test("correctly parses a markdown string with valid frontmatter", () => { - const markdownString = `--- + test("correctly parses a markdown string with valid frontmatter", () => { + const markdownString = `--- name: Test description: A test description version: 1.0.0 @@ -39,39 +39,37 @@ sample: key: value --- # Heading -Content below heading.` - const result = promptyParse(undefined, markdownString) - assert.deepStrictEqual(result.frontmatter, { - name: "Test", - description: "A test description", - version: "1.0.0", - authors: ["Author1", "Author2"], - tags: ["tag1", "tag2"], - sample: { key: "value" }, - }) - assert.strictEqual(result.content, "# Heading\nContent below heading.") - }) +Content below heading.`; + const result = promptyParse(undefined, markdownString); + assert.deepStrictEqual(result.frontmatter, { + name: "Test", + description: "A test description", + version: "1.0.0", + authors: ["Author1", "Author2"], + tags: ["tag1", "tag2"], + sample: { key: "value" }, + }); + assert.strictEqual(result.content, "# Heading\nContent below heading."); + }); - test("correctly parses a markdown string with content split into roles", () => { - const markdownContent = `user: + test("correctly parses a markdown string with content split into roles", () => { + const markdownContent = `user: User's message assistant: Assistant's reply user: -Another message from the user` - const result = promptyParse(undefined, markdownContent) - assert.deepStrictEqual(result.messages, [ - { role: "user", content: "User's message" }, - { role: "assistant", content: "Assistant's reply" }, - { role: "user", content: "Another message from the user" }, - ]) - }) +Another message from the user`; + const result = promptyParse(undefined, markdownContent); + assert.deepStrictEqual(result.messages, [ + { role: "user", content: "User's message" }, + { role: "assistant", content: "Assistant's reply" }, + { role: "user", content: "Another message from the user" }, + ]); + }); - test("correctly handles a markdown string with content but without roles", () => { - const markdownContent = `Just some content without specifying roles.` - const result = promptyParse(undefined, markdownContent) - assert.deepStrictEqual(result.messages, [ - { role: "system", content: markdownContent }, - ]) - }) -}) + test("correctly handles a markdown string with content but without roles", () => { + const markdownContent = `Just some content without specifying roles.`; + const result = promptyParse(undefined, markdownContent); + assert.deepStrictEqual(result.messages, [{ role: "system", content: markdownContent }]); + }); +}); diff --git a/packages/core/src/prompty.ts b/packages/core/src/prompty.ts index 4ee4920035..cb15557a7c 100644 --- a/packages/core/src/prompty.ts +++ b/packages/core/src/prompty.ts @@ -1,82 +1,65 @@ -import { - ChatCompletionContentPart, - ChatCompletionMessageParam, -} from "./chattypes" -import { splitMarkdown } from "./frontmatter" -import { YAMLParse } from "./yaml" -import { deleteUndefinedValues } from "./cleaners" -import { JSON5Stringify } from "./json5" +import { ChatCompletionContentPart, ChatCompletionMessageParam } from "./chattypes"; +import { splitMarkdown } from "./frontmatter"; +import { YAMLParse } from "./yaml"; +import { deleteUndefinedValues } from "./cleaners"; +import { JSON5Stringify } from "./json5"; function promptyFrontmatterToMeta(frontmatter: PromptyFrontmatter): PromptArgs { - const { - name, - description, - tags = [], - sample, - inputs, - outputs, - model, - files, - tests, - } = frontmatter - const { - api = "chat", - configuration, - parameters: modelParameters, - } = model ?? {} - const parameters: Record = inputs - ? Object.entries(inputs).reduce>( - (acc, [k, v]) => { - if (v.type === "list") acc[k] = { type: "array" } - else acc[k] = v - return acc - }, - {} - ) - : undefined - if (parameters && sample && typeof sample === "object") - for (const p in sample) { - const s = sample[p] - const pp = parameters[p] - if (s !== undefined && pp) pp.default = s - } + const { + name, + description, + tags = [], + sample, + inputs, + outputs, + model, + files, + tests, + } = frontmatter; + const { api = "chat", configuration, parameters: modelParameters } = model ?? {}; + const parameters: Record = inputs + ? Object.entries(inputs).reduce>((acc, [k, v]) => { + if (v.type === "list") acc[k] = { type: "array" }; + else acc[k] = v; + return acc; + }, {}) + : undefined; + if (parameters && sample && typeof sample === "object") + for (const p in sample) { + const s = sample[p]; + const pp = parameters[p]; + if (s !== undefined && pp) pp.default = s; + } - let modelName: string = undefined - if (api !== "chat") throw new Error("completion api not supported") - if (modelParameters?.n > 1) throw new Error("multi-turn not supported") - if (modelParameters?.tools?.length) throw new Error("tools not supported") + let modelName: string = undefined; + if (api !== "chat") throw new Error("completion api not supported"); + if (modelParameters?.n > 1) throw new Error("multi-turn not supported"); + if (modelParameters?.tools?.length) throw new Error("tools not supported"); - // resolve model - if ( - configuration?.type === "azure_openai" || - configuration?.type === "azure" - ) { - if (!configuration.azure_deployment) - throw new Error("azure_deployment required") - modelName = `azure:${configuration.azure_deployment}` - } else if (configuration?.type === "azure_serverless") { - modelName = `azure_serverless:${configuration.azure_endpoint}` - } else if (configuration?.type === "openai") - modelName = `openai:${configuration.type}` - const unlisted = tags.includes("unlisted") - const meta = deleteUndefinedValues({ - model: modelName, - title: name, - description, - files, - tests, - unlisted: unlisted ? true : undefined, - parameters, - responseType: outputs - ? "json_object" - : modelParameters?.response_format?.type, - responseSchema: outputs, - temperature: modelParameters?.temperature, - maxTokens: modelParameters?.max_tokens, - topP: modelParameters?.top_p, - seed: modelParameters?.seed, - } satisfies PromptArgs) - return meta + // resolve model + if (configuration?.type === "azure_openai" || configuration?.type === "azure") { + if (!configuration.azure_deployment) throw new Error("azure_deployment required"); + modelName = `azure:${configuration.azure_deployment}`; + } else if (configuration?.type === "azure_serverless") { + modelName = `azure_serverless:${configuration.azure_endpoint}`; + } else if (configuration?.type === "openai") modelName = `openai:${configuration.type}`; + const unlisted = tags.includes("unlisted"); + const meta = deleteUndefinedValues({ + model: modelName, + title: name, + description, + files, + tests, + unlisted: unlisted ? true : undefined, + parameters, + responseType: outputs ? "json_object" : modelParameters?.response_format?.type, + responseSchema: outputs, + temperature: modelParameters?.temperature, + maxTokens: modelParameters?.max_tokens, + topP: modelParameters?.top_p, + seed: modelParameters?.seed, + } satisfies PromptArgs); + return meta; } /** @@ -94,44 +77,42 @@ function promptyFrontmatterToMeta(frontmatter: PromptyFrontmatter): PromptArgs { * - Throws an error if improper formatting, such as whitespace before frontmatter markers, is detected. */ export function promptyParse(filename: string, text: string): PromptyDocument { - const { frontmatter = "", content = "" } = splitMarkdown(text) - if (!frontmatter && /^\s+---/.test(frontmatter)) - throw new Error( - "Prompty: Frontmatter has invalid whitespace before ---" - ) - const fm = frontmatter ? YAMLParse(frontmatter) : {} - const meta: PromptArgs = fm ? promptyFrontmatterToMeta(fm) : {} - if (filename) meta.filename = filename - const messages: ChatCompletionMessageParam[] = [] + const { frontmatter = "", content = "" } = splitMarkdown(text); + if (!frontmatter && /^\s+---/.test(frontmatter)) + throw new Error("Prompty: Frontmatter has invalid whitespace before ---"); + const fm = frontmatter ? YAMLParse(frontmatter) : {}; + const meta: PromptArgs = fm ? promptyFrontmatterToMeta(fm) : {}; + if (filename) meta.filename = filename; + const messages: ChatCompletionMessageParam[] = []; - // split - const rx = /^\s*(system|user|assistant)\s*:\s*$/gim - const lines = content.split(/\r?\n/g) - let role: "system" | "user" | "assistant" | undefined = "system" - let chunk: string[] = [] + // split + const rx = /^\s*(system|user|assistant)\s*:\s*$/gim; + const lines = content.split(/\r?\n/g); + let role: "system" | "user" | "assistant" | undefined = "system"; + let chunk: string[] = []; - const pushMessage = () => { - if (role && chunk.length && chunk.some((l) => !!l)) { - messages.push({ - role, - content: chunk.join("\n").trim(), - }) - } + const pushMessage = () => { + if (role && chunk.length && chunk.some((l) => !!l)) { + messages.push({ + role, + content: chunk.join("\n").trim(), + }); } + }; - for (const line of lines) { - const m = rx.exec(line) - if (m) { - // next role starts - pushMessage() - role = m[1] as "system" | "user" | "assistant" - chunk = [] - } else { - chunk.push(line) - } + for (const line of lines) { + const m = rx.exec(line); + if (m) { + // next role starts + pushMessage(); + role = m[1] as "system" | "user" | "assistant"; + chunk = []; + } else { + chunk.push(line); } - pushMessage() - return { meta, frontmatter: fm, content, messages } + } + pushMessage(); + return { meta, frontmatter: fm, content, messages }; } /** @@ -149,38 +130,37 @@ export function promptyParse(filename: string, text: string): PromptyDocument { * Returns a string containing the final generated AI script. */ export function promptyToGenAIScript(doc: PromptyDocument): string { - const { messages, meta } = doc + const { messages, meta } = doc; - const renderJinja = (content: string) => - `$\`${content.replace(/`/g, "\\`")}\`${/\{(%|\{)/.test(content) ? `.jinja(env.vars)` : ""}` - const renderPart = (c: ChatCompletionContentPart) => - c.type === "text" - ? renderJinja(c.text) - : c.type === "image_url" - ? `defImages("${c.image_url}")` - : c.type === "input_audio" - ? `defAudio("${c.input_audio}")` - : `unknown message` + const renderJinja = (content: string) => + `$\`${content.replace(/`/g, "\\`")}\`${/\{(%|\{)/.test(content) ? `.jinja(env.vars)` : ""}`; + const renderPart = (c: ChatCompletionContentPart) => + c.type === "text" + ? renderJinja(c.text) + : c.type === "image_url" + ? `defImages("${c.image_url}")` + : c.type === "input_audio" + ? `defAudio("${c.input_audio}")` + : `unknown message`; - let src = `` - if (Object.keys(meta).length) { - src += `script(${JSON5Stringify(meta, null, 2)})\n\n` - } - src += messages - .map((msg) => { - const { role, content } = msg - if (role === "assistant") { - return `assistant(parsers.jinja(${JSON.stringify(content as string)}, env.vars))` - } else if (role === "system") { - return `writeText(${JSON.stringify(content as string)}, { role: "system" })` - } else { - if (typeof content === "string") return renderJinja(content) - else if (Array.isArray(content)) - return content.map(renderPart).join("\n") - else return renderPart(content) - } - }) - .join("\n") + let src = ``; + if (Object.keys(meta).length) { + src += `script(${JSON5Stringify(meta, null, 2)})\n\n`; + } + src += messages + .map((msg) => { + const { role, content } = msg; + if (role === "assistant") { + return `assistant(parsers.jinja(${JSON.stringify(content as string)}, env.vars))`; + } else if (role === "system") { + return `writeText(${JSON.stringify(content as string)}, { role: "system" })`; + } else { + if (typeof content === "string") return renderJinja(content); + else if (Array.isArray(content)) return content.map(renderPart).join("\n"); + else return renderPart(content); + } + }) + .join("\n"); - return src + return src; } diff --git a/packages/core/src/proxy.ts b/packages/core/src/proxy.ts index 6fd4994366..a676222c39 100644 --- a/packages/core/src/proxy.ts +++ b/packages/core/src/proxy.ts @@ -1,6 +1,6 @@ -import { ProxyAgent } from "undici" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("proxy") +import { ProxyAgent } from "undici"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("proxy"); /** * Resolves an HTTP proxy agent based on environment variables. @@ -22,15 +22,15 @@ const dbg = genaiscriptDebug("proxy") * or null if no proxy is detected. */ export function resolveHttpProxyAgent() { - // We create a proxy based on Node.js environment variables. - const proxy = - process.env.GENAISCRIPT_HTTPS_PROXY || - process.env.GENAISCRIPT_HTTP_PROXY || - process.env.HTTPS_PROXY || - process.env.HTTP_PROXY || - process.env.https_proxy || - process.env.http_proxy - if (proxy) dbg(`proxy: %s`, proxy) - const agent = proxy ? new ProxyAgent(proxy) : null - return agent + // We create a proxy based on Node.js environment variables. + const proxy = + process.env.GENAISCRIPT_HTTPS_PROXY || + process.env.GENAISCRIPT_HTTP_PROXY || + process.env.HTTPS_PROXY || + process.env.HTTP_PROXY || + process.env.https_proxy || + process.env.http_proxy; + if (proxy) dbg(`proxy: %s`, proxy); + const agent = proxy ? new ProxyAgent(proxy) : null; + return agent; } diff --git a/packages/core/src/pyodide.test.ts b/packages/core/src/pyodide.test.ts index 926fb56449..e44fec894e 100644 --- a/packages/core/src/pyodide.test.ts +++ b/packages/core/src/pyodide.test.ts @@ -1,64 +1,64 @@ -import { describe, test, before } from "node:test" -import assert from "node:assert/strict" -import { createPythonRuntime } from "./pyodide" -import { TestHost } from "./testhost" +import { describe, test, before } from "node:test"; +import assert from "node:assert/strict"; +import { createPythonRuntime } from "./pyodide"; +import { TestHost } from "./testhost"; describe("PyodideRuntime", async () => { - let runtime: PythonRuntime + let runtime: PythonRuntime; - before(async () => { - TestHost.install() - runtime = await createPythonRuntime() - }) - await test("should list current files from Python", async () => { - const result = await runtime.run(` + before(async () => { + TestHost.install(); + runtime = await createPythonRuntime(); + }); + await test("should list current files from Python", async () => { + const result = await runtime.run(` import os os.listdir('/workspace') -`) - assert(Array.isArray(result)) - }) - await test("should run Python code and return result", async () => { - const result = await runtime.run("print('Hello, World!')") - assert.equal(result, undefined) // Since print returns None in Python - }) - await test("should return Python version", async () => { - const result = await runtime.run("import sys; sys.version") - assert(result) - assert(typeof result === "string") - assert(result.includes("3.")) - }) - await test("should handle Python exceptions", async () => { - try { - await runtime.run("raise ValueError('Test error')") - assert.fail("Expected an error to be thrown") - } catch (error) { - assert(error instanceof Error) - assert(error.message.includes("ValueError: Test error")) - } - }) - await test("should install and use snowballstemmer", async () => { - await runtime.import("snowballstemmer") - const result = await runtime.run(` +`); + assert(Array.isArray(result)); + }); + await test("should run Python code and return result", async () => { + const result = await runtime.run("print('Hello, World!')"); + assert.equal(result, undefined); // Since print returns None in Python + }); + await test("should return Python version", async () => { + const result = await runtime.run("import sys; sys.version"); + assert(result); + assert(typeof result === "string"); + assert(result.includes("3.")); + }); + await test("should handle Python exceptions", async () => { + try { + await runtime.run("raise ValueError('Test error')"); + assert.fail("Expected an error to be thrown"); + } catch (error) { + assert(error instanceof Error); + assert(error.message.includes("ValueError: Test error")); + } + }); + await test("should install and use snowballstemmer", async () => { + await runtime.import("snowballstemmer"); + const result = await runtime.run(` import snowballstemmer stemmer = snowballstemmer.stemmer('english') stemmer.stemWords(['running', 'jumps', 'easily']) - `) - assert(Array.isArray(result)) - }) - await test("should set and get global variables", async () => { - await runtime.run("x = 42") - const result = runtime.globals.get("x") - assert.equal(result, 42) - }) + `); + assert(Array.isArray(result)); + }); + await test("should set and get global variables", async () => { + await runtime.run("x = 42"); + const result = runtime.globals.get("x"); + assert.equal(result, 42); + }); - await test("should update global variables", async () => { - runtime.globals.set("y", 100) - const result = await runtime.run("y") - assert.equal(result, 100) - }) + await test("should update global variables", async () => { + runtime.globals.set("y", 100); + const result = await runtime.run("y"); + assert.equal(result, 100); + }); - await test("should handle non-existent global variables", async () => { - const result = runtime.globals.get("non_existent_var") - assert.equal(result, undefined) - }) -}) + await test("should handle non-existent global variables", async () => { + const result = runtime.globals.get("non_existent_var"); + assert.equal(result, undefined); + }); +}); diff --git a/packages/core/src/pyodide.ts b/packages/core/src/pyodide.ts index e2760080d1..7205f1ba4f 100644 --- a/packages/core/src/pyodide.ts +++ b/packages/core/src/pyodide.ts @@ -1,63 +1,63 @@ -import type { PyodideInterface } from "pyodide" -import { dotGenaiscriptPath } from "./workdir" -import { TraceOptions } from "./trace" -import { hash } from "./crypto" -import { deleteUndefinedValues } from "./cleaners" -import { dedent } from "./indent" -import { PLimitPromiseQueue } from "./concurrency" -import { stderr } from "./stdio" +import type { PyodideInterface } from "pyodide"; +import { dotGenaiscriptPath } from "./workdir"; +import { TraceOptions } from "./trace"; +import { hash } from "./crypto"; +import { deleteUndefinedValues } from "./cleaners"; +import { dedent } from "./indent"; +import { PLimitPromiseQueue } from "./concurrency"; +import { stderr } from "./stdio"; class PyProxy implements PythonProxy { - constructor( - readonly runtime: PyodideInterface, - readonly proxy: any - ) {} + constructor( + readonly runtime: PyodideInterface, + readonly proxy: any, + ) {} - get(name: string): T { - return toJs(this.proxy.get(name)) - } + get(name: string): T { + return toJs(this.proxy.get(name)); + } - set(name: string, value: T) { - const p = this.runtime.toPy(value) - this.proxy.set(name, p) - } + set(name: string, value: T) { + const p = this.runtime.toPy(value); + this.proxy.set(name, p); + } } function toJs(res: any) { - return typeof res?.toJs === "function" ? res.toJs() : res + return typeof res?.toJs === "function" ? res.toJs() : res; } class PyodideRuntime implements PythonRuntime { - private queue: PLimitPromiseQueue = new PLimitPromiseQueue(1) - private micropip: { install: (packageName: string) => Promise } + private queue: PLimitPromiseQueue = new PLimitPromiseQueue(1); + private micropip: { install: (packageName: string) => Promise }; - constructor( - public readonly version: string, - public readonly runtime: PyodideInterface - ) {} + constructor( + public readonly version: string, + public readonly runtime: PyodideInterface, + ) {} - get globals(): PythonProxy { - return new PyProxy(this.runtime, this.runtime.globals) - } + get globals(): PythonProxy { + return new PyProxy(this.runtime, this.runtime.globals); + } - async import(pkg: string) { - await this.queue.add(async () => { - if (!this.micropip) { - await this.runtime.loadPackage("micropip") - this.micropip = this.runtime.pyimport("micropip") - } - await this.micropip.install(pkg) - }) - } + async import(pkg: string) { + await this.queue.add(async () => { + if (!this.micropip) { + await this.runtime.loadPackage("micropip"); + this.micropip = this.runtime.pyimport("micropip"); + } + await this.micropip.install(pkg); + }); + } - async run(code: string): Promise { - return await this.queue.add(async () => { - const d = dedent(code) - const res = await this.runtime.runPythonAsync(d) - const r = toJs(res) - return r - }) - } + async run(code: string): Promise { + return await this.queue.add(async () => { + const d = dedent(code); + const res = await this.runtime.runPythonAsync(d); + const r = toJs(res); + return r; + }); + } } /** @@ -73,19 +73,19 @@ class PyodideRuntime implements PythonRuntime { * of Python code and interaction with Python globals. */ export async function createPythonRuntime( - options?: PythonRuntimeOptions & TraceOptions + options?: PythonRuntimeOptions & TraceOptions, ): Promise { - const { cache } = options ?? {} - const { loadPyodide, version } = await import("pyodide") - const sha = await hash({ cache, version: true, pyodide: version }) - const pyodide = await loadPyodide( - deleteUndefinedValues({ - packageCacheDir: dotGenaiscriptPath("cache", "python", sha), - stdout: (msg: string) => stderr.write(msg), - stderr: (msg: string) => stderr.write(msg), - checkAPIVersion: true, - }) - ) - await pyodide.mountNodeFS("/workspace", process.cwd()) - return new PyodideRuntime(version, pyodide) + const { cache } = options ?? {}; + const { loadPyodide, version } = await import("pyodide"); + const sha = await hash({ cache, version: true, pyodide: version }); + const pyodide = await loadPyodide( + deleteUndefinedValues({ + packageCacheDir: dotGenaiscriptPath("cache", "python", sha), + stdout: (msg: string) => stderr.write(msg), + stderr: (msg: string) => stderr.write(msg), + checkAPIVersion: true, + }), + ); + await pyodide.mountNodeFS("/workspace", process.cwd()); + return new PyodideRuntime(version, pyodide); } diff --git a/packages/core/src/quiet.ts b/packages/core/src/quiet.ts index e96c6246ef..bdac161273 100644 --- a/packages/core/src/quiet.ts +++ b/packages/core/src/quiet.ts @@ -1,11 +1,11 @@ // Boolean indicating if debug messages should be suppressed // Controls whether debug messages are outputted -export let isQuiet = false +export let isQuiet = false; /** * Sets the quiet mode for suppressing debug messages. * @param v - Boolean to enable or disable quiet mode */ export function setQuiet(v: boolean) { - isQuiet = !!v + isQuiet = !!v; } diff --git a/packages/core/src/resources.test.ts b/packages/core/src/resources.test.ts index 6fc8fc1bb5..491b356219 100644 --- a/packages/core/src/resources.test.ts +++ b/packages/core/src/resources.test.ts @@ -1,100 +1,96 @@ -import { describe, test, beforeEach, afterEach } from "node:test" -import assert from "node:assert/strict" -import { tryResolveResource } from "./resources" -import { pathToFileURL } from "node:url" -import { join } from "node:path" -import { mkdtempSync, writeFileSync } from "node:fs" -import { tmpdir } from "node:os" -import { rmdir } from "node:fs/promises" -import { TestHost } from "./testhost" +import { describe, test, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { tryResolveResource } from "./resources"; +import { pathToFileURL } from "node:url"; +import { join } from "node:path"; +import { mkdtempSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { rmdir } from "node:fs/promises"; +import { TestHost } from "./testhost"; describe("resources", async () => { - let tempDir: string + let tempDir: string; - beforeEach(() => { - tempDir = mkdtempSync(join(tmpdir(), "resources-test-")) - TestHost.install() - }) + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), "resources-test-")); + TestHost.install(); + }); - afterEach(async () => { - // Cleanup is left minimal intentionally - await rmdir(tempDir, { recursive: true }) - }) + afterEach(async () => { + // Cleanup is left minimal intentionally + await rmdir(tempDir, { recursive: true }); + }); - await test("should resolve file URLs", async () => { - // Create a test file - const testFilePath = join(tempDir, "test-file.txt") - const testContent = "test content" - writeFileSync(testFilePath, testContent) + await test("should resolve file URLs", async () => { + // Create a test file + const testFilePath = join(tempDir, "test-file.txt"); + const testContent = "test content"; + writeFileSync(testFilePath, testContent); - const fileUrl = pathToFileURL(testFilePath).href - const result = await tryResolveResource(fileUrl) + const fileUrl = pathToFileURL(testFilePath).href; + const result = await tryResolveResource(fileUrl); - assert(result) - assert.equal(result.files.length, 1) - assert.equal(result.files[0].filename, testFilePath) - }) + assert(result); + assert.equal(result.files.length, 1); + assert.equal(result.files[0].filename, testFilePath); + }); - await test("should resolve https URL to raw content", async () => { - const url = - "https://raw.githubusercontent.com/microsoft/genaiscript/refs/heads/main/package.json" - const result = await tryResolveResource(url) + await test("should resolve https URL to raw content", async () => { + const url = + "https://raw.githubusercontent.com/microsoft/genaiscript/refs/heads/main/package.json"; + const result = await tryResolveResource(url); - assert(result) - assert.equal(result.files.length, 1) - assert(result.files[0].content) - assert(result.files[0].content.includes("GenAIScript")) - }) + assert(result); + assert.equal(result.files.length, 1); + assert(result.files[0].content); + assert(result.files[0].content.includes("GenAIScript")); + }); - await test("should adapt GitHub blob URLs to raw URLs", async () => { - const url = - "https://github.com/microsoft/genaiscript/blob/main/package.json" - const result = await tryResolveResource(url) + await test("should adapt GitHub blob URLs to raw URLs", async () => { + const url = "https://github.com/microsoft/genaiscript/blob/main/package.json"; + const result = await tryResolveResource(url); - assert(result) - assert.equal(result.files.length, 1) - assert(result.files[0].content) - assert(result.files[0].content.includes("GenAIScript")) - }) - await test("should resolve gist URLs", async () => { - // Using a public test gist - const url = - "https://github.com/pelikhan/7f3f28389b7a9712da340f08cd19cff5/" - const result = await tryResolveResource(url) + assert(result); + assert.equal(result.files.length, 1); + assert(result.files[0].content); + assert(result.files[0].content.includes("GenAIScript")); + }); + await test("should resolve gist URLs", async () => { + // Using a public test gist + const url = "https://github.com/pelikhan/7f3f28389b7a9712da340f08cd19cff5/"; + const result = await tryResolveResource(url); - assert(result) - assert(result.files.length > 0) - assert(result.files[0].content.includes("GenAIScript")) - }) - await test("should resolve gist URLs (gist.github.com)", async () => { - // Using a public test gist - const url = - "https://gist.github.com/pelikhan/7f3f28389b7a9712da340f08cd19cff5/" - const result = await tryResolveResource(url) + assert(result); + assert(result.files.length > 0); + assert(result.files[0].content.includes("GenAIScript")); + }); + await test("should resolve gist URLs (gist.github.com)", async () => { + // Using a public test gist + const url = "https://gist.github.com/pelikhan/7f3f28389b7a9712da340f08cd19cff5/"; + const result = await tryResolveResource(url); - assert(result) - assert(result.files.length > 0) - assert(result.files[0].content.includes("GenAIScript")) - }) - await test("should resolve gist URLs with files", async () => { - // Using a public test gist - const url = - "https://github.com/pelikhan/7f3f28389b7a9712da340f08cd19cff5/readme.md" - const result = await tryResolveResource(url) + assert(result); + assert(result.files.length > 0); + assert(result.files[0].content.includes("GenAIScript")); + }); + await test("should resolve gist URLs with files", async () => { + // Using a public test gist + const url = "https://github.com/pelikhan/7f3f28389b7a9712da340f08cd19cff5/readme.md"; + const result = await tryResolveResource(url); - assert(result) - assert(result.files.length === 1) - assert(result.files[0].content.includes("GenAIScript")) - }) + assert(result); + assert(result.files.length === 1); + assert(result.files[0].content.includes("GenAIScript")); + }); - await test("should resolve VSCode gistfs URLs", async () => { - const url = - "vscode://vsls-contrib.gistfs/open?gist=7f3f28389b7a9712da340f08cd19cff5&file=readme.md" - const result = await tryResolveResource(url) + await test("should resolve VSCode gistfs URLs", async () => { + const url = + "vscode://vsls-contrib.gistfs/open?gist=7f3f28389b7a9712da340f08cd19cff5&file=readme.md"; + const result = await tryResolveResource(url); - assert(result) - assert.equal(result.files.length > 0, true) - // The first file should be the one specified in the URL - assert(result.files[0].filename.includes("readme.md")) - }) -}) + assert(result); + assert.equal(result.files.length > 0, true); + // The first file should be the one specified in the URL + assert(result.files[0].filename.includes("readme.md")); + }); +}); diff --git a/packages/core/src/resources.ts b/packages/core/src/resources.ts index 97cc94ea63..51a3fe08e9 100644 --- a/packages/core/src/resources.ts +++ b/packages/core/src/resources.ts @@ -1,199 +1,189 @@ -import { fileURLToPath } from "node:url" -import { isBinaryMimeType } from "./binary" -import { CancellationOptions, checkCancelled } from "./cancellation" -import { genaiscriptDebug } from "./debug" -import { createFetch } from "./fetch" -import { GitHubClient } from "./githubclient" -import { TraceOptions } from "./trace" -import { uriRedact, uriScheme, uriTryParse } from "./url" -import { arrayify } from "./cleaners" -import { URL } from "node:url" -import { GitClient } from "./git" -import { expandFiles } from "./fs" -import { join } from "node:path" -import { isCancelError } from "./error" -import { GITHUB_ASSET_URL_RX } from "./constants" -const dbg = genaiscriptDebug("res") -const dbgAdaptors = dbg.extend("adaptors") -const dbgFiles = dbg.extend("files") -dbgFiles.enabled = false +import { fileURLToPath } from "node:url"; +import { isBinaryMimeType } from "./binary"; +import { CancellationOptions, checkCancelled } from "./cancellation"; +import { genaiscriptDebug } from "./debug"; +import { createFetch } from "./fetch"; +import { GitHubClient } from "./githubclient"; +import { TraceOptions } from "./trace"; +import { uriRedact, uriScheme, uriTryParse } from "./url"; +import { arrayify } from "./cleaners"; +import { URL } from "node:url"; +import { GitClient } from "./git"; +import { expandFiles } from "./fs"; +import { join } from "node:path"; +import { isCancelError } from "./error"; +import { GITHUB_ASSET_URL_RX } from "./constants"; +const dbg = genaiscriptDebug("res"); +const dbgAdaptors = dbg.extend("adaptors"); +const dbgFiles = dbg.extend("files"); +dbgFiles.enabled = false; const urlAdapters: { - id: string - matcher: (url: string) => Awaitable + id: string; + matcher: (url: string) => Awaitable; }[] = [ - { - id: "github blob", - /** - * Matches GitHub blob URLs and converts them to raw content URLs. - * Extracts user, repository, and file path from the blob URL. - * Constructs a raw URL using the extracted components. - * @param url - The GitHub blob URL. - * @returns The corresponding raw URL or undefined if no match is found. - */ - matcher: (url) => { - const m = - /^https:\/\/github\.com\/(?[^\/]+)\/(?[^\/]+)\/blob\/(?.+)#?/i.exec( - url - ) - return m - ? `https://raw.githubusercontent.com/${m.groups.owner}/${m.groups.repo}/refs/heads/${m.groups.path}` - : undefined - }, + { + id: "github blob", + /** + * Matches GitHub blob URLs and converts them to raw content URLs. + * Extracts user, repository, and file path from the blob URL. + * Constructs a raw URL using the extracted components. + * @param url - The GitHub blob URL. + * @returns The corresponding raw URL or undefined if no match is found. + */ + matcher: (url) => { + const m = + /^https:\/\/github\.com\/(?[^\/]+)\/(?[^\/]+)\/blob\/(?.+)#?/i.exec(url); + return m + ? `https://raw.githubusercontent.com/${m.groups.owner}/${m.groups.repo}/refs/heads/${m.groups.path}` + : undefined; }, - { - id: "github assets", - matcher: async (url) => { - if (GITHUB_ASSET_URL_RX.test(url)) { - const client = GitHubClient.default() - const resolved = await client.resolveAssetUrl(url) - return resolved - } - return undefined - }, + }, + { + id: "github assets", + matcher: async (url) => { + if (GITHUB_ASSET_URL_RX.test(url)) { + const client = GitHubClient.default(); + const resolved = await client.resolveAssetUrl(url); + return resolved; + } + return undefined; }, - { - id: "gist", - matcher: (url) => { - const m = - /^https:\/\/(gist\.)?github\.com\/(?[^\/]+)\/(?\w+)(\/(?.*))?$/i.exec( - url - ) - return m - ? `gist://${m.groups.id}/${m.groups.filename || ""}` - : undefined - }, + }, + { + id: "gist", + matcher: (url) => { + const m = + /^https:\/\/(gist\.)?github\.com\/(?[^\/]+)\/(?\w+)(\/(?.*))?$/i.exec( + url, + ); + return m ? `gist://${m.groups.id}/${m.groups.filename || ""}` : undefined; }, -] + }, +]; async function applyUrlAdapters(url: string) { - // Use URL adapters to modify the URL if needed - for (const a of urlAdapters) { - const newUrl = await a.matcher(url) - if (newUrl) { - dbgAdaptors(`%s: %s`, a.id, uriRedact(url)) - return newUrl - } + // Use URL adapters to modify the URL if needed + for (const a of urlAdapters) { + const newUrl = await a.matcher(url); + if (newUrl) { + dbgAdaptors(`%s: %s`, a.id, uriRedact(url)); + return newUrl; } - return url + } + return url; } const uriResolvers: Record< - string, - ( - dbg: debug.Debugger, - url: URL, - options?: TraceOptions & CancellationOptions - ) => Promise> + string, + ( + dbg: debug.Debugger, + url: URL, + options?: TraceOptions & CancellationOptions, + ) => Promise> > = { - file: async (dbg, uri) => { - const filename = fileURLToPath(uri) - const file = { filename } satisfies WorkspaceFile - return file - }, - https: async (dbg, url, options) => { - // https://.../.../....git - if (/\.git($|\/)/.test(url.pathname)) - return await uriResolvers.git(dbg, url, options) - // regular fetch - const fetch = await createFetch(options) - dbg(`fetch %s`, uriRedact(url.href)) - const res = await fetch(url, { method: "GET" }) - dbg(`res: %d %s`, res.status, res.statusText) - if (!res.ok) return undefined - const contentType = res.headers.get("Content-Type") - if (isBinaryMimeType(contentType)) { - const buffer = await res.arrayBuffer() - return { - filename: url.pathname, - content: Buffer.from(buffer).toString("base64"), - encoding: "base64", - type: contentType, - size: buffer.byteLength, - } satisfies WorkspaceFile - } else { - const content = await res.text() - return { - filename: url.pathname, - content, - type: contentType, - size: Buffer.byteLength(content, "utf8"), - } satisfies WorkspaceFile - } - }, - gist: async (dbg, url) => { - // gist://id/ - // gist://id/filename - const gh = GitHubClient.default() - const id = url.hostname - const filename = url.pathname.slice(1) || "" - if (!id) { - dbg(`missing gist id or filename`) - return undefined - } + file: async (dbg, uri) => { + const filename = fileURLToPath(uri); + const file = { filename } satisfies WorkspaceFile; + return file; + }, + https: async (dbg, url, options) => { + // https://.../.../....git + if (/\.git($|\/)/.test(url.pathname)) return await uriResolvers.git(dbg, url, options); + // regular fetch + const fetch = await createFetch(options); + dbg(`fetch %s`, uriRedact(url.href)); + const res = await fetch(url, { method: "GET" }); + dbg(`res: %d %s`, res.status, res.statusText); + if (!res.ok) return undefined; + const contentType = res.headers.get("Content-Type"); + if (isBinaryMimeType(contentType)) { + const buffer = await res.arrayBuffer(); + return { + filename: url.pathname, + content: Buffer.from(buffer).toString("base64"), + encoding: "base64", + type: contentType, + size: buffer.byteLength, + } satisfies WorkspaceFile; + } else { + const content = await res.text(); + return { + filename: url.pathname, + content, + type: contentType, + size: Buffer.byteLength(content, "utf8"), + } satisfies WorkspaceFile; + } + }, + gist: async (dbg, url) => { + // gist://id/ + // gist://id/filename + const gh = GitHubClient.default(); + const id = url.hostname; + const filename = url.pathname.slice(1) || ""; + if (!id) { + dbg(`missing gist id or filename`); + return undefined; + } - dbg(`gist %s %s`, id, filename) - const gist = await gh.getGist(id) - if (!gist) { - dbg(`missing gist %s`, id) - return undefined - } - const files = gist.files || [] - if (filename) { - dbg(`moving file %s to top`, filename) - const i = gist.files.findIndex((f) => f.filename === filename) - if (i < 0) { - dbg(`file %s not found in gist`, filename) - return undefined - } - const file = files[i] - files.splice(i, 1) - files.unshift(file) - } - return files - }, - vscode: async (dbg, url) => { - // vscode://vsls-contrib.gistfs/open?gist=8f7db2674f7b0eaaf563eae28253c2b0&file=poem.genai.mts - if (url.host === "vsls-contrib.gistfs" && url.pathname === "/open") { - const params = new URLSearchParams(url.search) - const gist = params.get("gist") - const file = params.get("file") || "" - if (!gist) { - dbg(`missing gist id %s`, gist) - return undefined - } - return await uriResolvers.gist( - dbg, - new URL(`gist://${gist}/${file}`) - ) - } - return undefined - }, - git: async (dbg, url) => { - // (git|https)://github.com/pelikhan/amazing-demo.git(/....) - let [owner, repo, ...filename] = url.pathname - .replace(/^\//, "") - .split("/") - repo = repo.replace(/\.git$/, "") - const repository = [url.origin, owner, repo].join("/") - const branch = url.hash.replace(/^#/, "") - dbg(`git %s %s %s`, repository, branch, filename) - const client = await GitClient.default() - const clone = await client.shallowClone(repository, { - branch, - }) - const cwd = clone.cwd - const glob = filename.length ? join(...filename) : "**/*" - dbg(`cloned at %s, glob %s`, cwd, glob) - const gitFolder = join(cwd, ".git") - const files = ( - await expandFiles([join(cwd, glob)], { - applyGitIgnore: false, - }) - ).filter((f) => !f.startsWith(gitFolder)) - return files.map((filename) => ({ filename })) - }, -} + dbg(`gist %s %s`, id, filename); + const gist = await gh.getGist(id); + if (!gist) { + dbg(`missing gist %s`, id); + return undefined; + } + const files = gist.files || []; + if (filename) { + dbg(`moving file %s to top`, filename); + const i = gist.files.findIndex((f) => f.filename === filename); + if (i < 0) { + dbg(`file %s not found in gist`, filename); + return undefined; + } + const file = files[i]; + files.splice(i, 1); + files.unshift(file); + } + return files; + }, + vscode: async (dbg, url) => { + // vscode://vsls-contrib.gistfs/open?gist=8f7db2674f7b0eaaf563eae28253c2b0&file=poem.genai.mts + if (url.host === "vsls-contrib.gistfs" && url.pathname === "/open") { + const params = new URLSearchParams(url.search); + const gist = params.get("gist"); + const file = params.get("file") || ""; + if (!gist) { + dbg(`missing gist id %s`, gist); + return undefined; + } + return await uriResolvers.gist(dbg, new URL(`gist://${gist}/${file}`)); + } + return undefined; + }, + git: async (dbg, url) => { + // (git|https)://github.com/pelikhan/amazing-demo.git(/....) + let [owner, repo, ...filename] = url.pathname.replace(/^\//, "").split("/"); + repo = repo.replace(/\.git$/, ""); + const repository = [url.origin, owner, repo].join("/"); + const branch = url.hash.replace(/^#/, ""); + dbg(`git %s %s %s`, repository, branch, filename); + const client = await GitClient.default(); + const clone = await client.shallowClone(repository, { + branch, + }); + const cwd = clone.cwd; + const glob = filename.length ? join(...filename) : "**/*"; + dbg(`cloned at %s, glob %s`, cwd, glob); + const gitFolder = join(cwd, ".git"); + const files = ( + await expandFiles([join(cwd, glob)], { + applyGitIgnore: false, + }) + ).filter((f) => !f.startsWith(gitFolder)); + return files.map((filename) => ({ filename })); + }, +}; /** * Attempts to resolve a given URL to its associated resources or files. @@ -218,44 +208,44 @@ const uriResolvers: Record< * - Throws an error if the cancellation token is triggered during the operation. */ export async function tryResolveResource( - url: string, - options?: TraceOptions & CancellationOptions + url: string, + options?: TraceOptions & CancellationOptions, ): Promise<{ uri: URL; files: WorkspaceFile[] } | undefined> { - if (!url) return undefined - url = await applyUrlAdapters(url) - const uri = uriTryParse(url) - if (!uri) return undefined - const { cancellationToken } = options || {} - dbg(`resolving %s`, uriRedact(url)) + if (!url) return undefined; + url = await applyUrlAdapters(url); + const uri = uriTryParse(url); + if (!uri) return undefined; + const { cancellationToken } = options || {}; + dbg(`resolving %s`, uriRedact(url)); - try { - // try to resolve - const scheme = uriScheme(uri) - const resolver = uriResolvers[scheme] - if (!resolver) { - dbg(`unsupported protocol %s`, scheme) - return undefined - } - - // download - const dbgUri = dbg.extend(uri.protocol.replace(/:$/, "")) - const files = arrayify(await resolver(dbgUri, uri, options)) - checkCancelled(cancellationToken) - dbg(`resolved %d files`, files.length) - dbgFiles( - "%O", - files.map((f) => f.filename) - ) - if (!files.length) { - dbg(`failed to resolve %s`, uriRedact(uri.href)) - return undefined - } + try { + // try to resolve + const scheme = uriScheme(uri); + const resolver = uriResolvers[scheme]; + if (!resolver) { + dbg(`unsupported protocol %s`, scheme); + return undefined; + } - // success - return { uri, files } - } catch (error) { - if (isCancelError(error)) throw error - dbg(`failed to parse uri %s`, uriRedact(uri.href), error) - return undefined + // download + const dbgUri = dbg.extend(uri.protocol.replace(/:$/, "")); + const files = arrayify(await resolver(dbgUri, uri, options)); + checkCancelled(cancellationToken); + dbg(`resolved %d files`, files.length); + dbgFiles( + "%O", + files.map((f) => f.filename), + ); + if (!files.length) { + dbg(`failed to resolve %s`, uriRedact(uri.href)); + return undefined; } + + // success + return { uri, files }; + } catch (error) { + if (isCancelError(error)) throw error; + dbg(`failed to parse uri %s`, uriRedact(uri.href), error); + return undefined; + } } diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts index 40285ff6c3..9164789d02 100644 --- a/packages/core/src/runpromptcontext.ts +++ b/packages/core/src/runpromptcontext.ts @@ -1,114 +1,102 @@ // cspell: disable import { - PromptNode, - appendChild, - createAssistantNode, - createChatParticipant, - createDefData, - createDefDiff, - createDef, - createFileOutput, - createToolNode, - createImageNode, - createImportTemplate, - createSchemaNode, - createStringTemplateNode, - createTextNode, - renderPromptNode, - createOutputProcessor, - createFileMerge, - createSystemNode, - finalizeMessages, - PromptImage, - PromptPrediction, - createMcpServer, - toDefRefName, - resolveFenceFormat, - createFileImageNodes, -} from "./promptdom" -import { MarkdownTrace } from "./trace" -import { GenerationOptions } from "./generation" -import { promptParametersSchemaToJSONSchema } from "./parameters" -import { consoleLogFormat } from "./logging" -import { isGlobMatch } from "./glob" + PromptNode, + appendChild, + createAssistantNode, + createChatParticipant, + createDefData, + createDefDiff, + createDef, + createFileOutput, + createToolNode, + createImageNode, + createImportTemplate, + createSchemaNode, + createStringTemplateNode, + createTextNode, + renderPromptNode, + createOutputProcessor, + createFileMerge, + createSystemNode, + finalizeMessages, + PromptImage, + PromptPrediction, + createMcpServer, + toDefRefName, + resolveFenceFormat, + createFileImageNodes, +} from "./promptdom"; +import { MarkdownTrace } from "./trace"; +import { GenerationOptions } from "./generation"; +import { promptParametersSchemaToJSONSchema } from "./parameters"; +import { consoleLogFormat } from "./logging"; +import { isGlobMatch } from "./glob"; +import { arrayify, assert, ellipse, logError, logVerbose, logWarn } from "./util"; +import { lastAssistantReasoning, renderShellOutput } from "./chatrender"; +import { jinjaRender } from "./jinja"; +import { mustacheRender } from "./mustache"; import { - arrayify, - assert, - ellipse, - logError, - logVerbose, - logWarn, -} from "./util" -import { lastAssistantReasoning, renderShellOutput } from "./chatrender" -import { jinjaRender } from "./jinja" -import { mustacheRender } from "./mustache" + imageEncodeForLLM, + imageTileEncodeForLLM, + imageTransform, + renderImageToTerminal, +} from "./image"; +import { delay, uniq } from "es-toolkit"; import { - imageEncodeForLLM, - imageTileEncodeForLLM, - imageTransform, - renderImageToTerminal, -} from "./image" -import { delay, uniq } from "es-toolkit" + addToolDefinitionsMessage, + appendSystemMessage, + CreateImageRequest, + CreateSpeechRequest, + executeChatSession, + mergeGenerationOptions, + tracePromptResult, +} from "./chat"; +import { CancellationToken, checkCancelled } from "./cancellation"; +import { ChatCompletionMessageParam } from "./chattypes"; +import { resolveModelConnectionInfo } from "./models"; import { - addToolDefinitionsMessage, - appendSystemMessage, - CreateImageRequest, - CreateSpeechRequest, - executeChatSession, - mergeGenerationOptions, - tracePromptResult, -} from "./chat" -import { CancellationToken, checkCancelled } from "./cancellation" -import { ChatCompletionMessageParam } from "./chattypes" -import { resolveModelConnectionInfo } from "./models" -import { - CHAT_REQUEST_PER_MODEL_CONCURRENT_LIMIT, - TOKEN_MISSING_INFO, - TOKEN_NO_ANSWER, - DOCS_DEF_FILES_IS_EMPTY_URL, - TRANSCRIPTION_CACHE_NAME, - TRANSCRIPTION_MODEL_ID, - SPEECH_MODEL_ID, - IMAGE_GENERATION_MODEL_ID, - LARGE_MODEL_ID, -} from "./constants" -import { addFallbackToolSystems, resolveSystems, resolveTools } from "./systems" -import { callExpander } from "./expander" -import { - errorMessage, - isCancelError, - NotSupportedError, - serializeError, -} from "./error" -import { resolveLanguageModel } from "./lm" -import { concurrentLimit } from "./concurrency" -import { resolveScript } from "./ast" -import { dedent } from "./indent" -import { runtimeHost } from "./host" -import { writeFileEdits } from "./fileedits" -import { agentAddMemory, agentCreateCache, agentQueryMemory } from "./agent" -import { YAMLStringify } from "./yaml" -import { Project } from "./server/messages" -import { mergeEnvVarsWithSystem, parametersToVars } from "./vars" -import { FFmepgClient } from "./ffmpeg" -import { BufferToBlob } from "./bufferlike" -import { host } from "./host" -import { srtVttRender } from "./transcription" -import { hash } from "./crypto" -import { fileTypeFromBuffer } from "./filetype" -import { deleteUndefinedValues } from "./cleaners" -import { sliceData } from "./tidy" -import { toBase64 } from "@smithy/util-base64" -import { consoleColors } from "./consolecolor" -import { terminalSize } from "./terminal" -import { stderr, stdout } from "./stdio" -import { dotGenaiscriptPath } from "./workdir" -import { prettyBytes } from "./pretty" -import { createCache } from "./cache" -import { measure } from "./performance" -import { genaiscriptDebug } from "./debug" -import debug from "debug" -const dbg = genaiscriptDebug("prompt:context") + CHAT_REQUEST_PER_MODEL_CONCURRENT_LIMIT, + TOKEN_MISSING_INFO, + TOKEN_NO_ANSWER, + DOCS_DEF_FILES_IS_EMPTY_URL, + TRANSCRIPTION_CACHE_NAME, + TRANSCRIPTION_MODEL_ID, + SPEECH_MODEL_ID, + IMAGE_GENERATION_MODEL_ID, + LARGE_MODEL_ID, +} from "./constants"; +import { addFallbackToolSystems, resolveSystems, resolveTools } from "./systems"; +import { callExpander } from "./expander"; +import { errorMessage, isCancelError, NotSupportedError, serializeError } from "./error"; +import { resolveLanguageModel } from "./lm"; +import { concurrentLimit } from "./concurrency"; +import { resolveScript } from "./ast"; +import { dedent } from "./indent"; +import { runtimeHost } from "./host"; +import { writeFileEdits } from "./fileedits"; +import { agentAddMemory, agentCreateCache, agentQueryMemory } from "./agent"; +import { YAMLStringify } from "./yaml"; +import { Project } from "./server/messages"; +import { mergeEnvVarsWithSystem, parametersToVars } from "./vars"; +import { FFmepgClient } from "./ffmpeg"; +import { BufferToBlob } from "./bufferlike"; +import { host } from "./host"; +import { srtVttRender } from "./transcription"; +import { hash } from "./crypto"; +import { fileTypeFromBuffer } from "./filetype"; +import { deleteUndefinedValues } from "./cleaners"; +import { sliceData } from "./tidy"; +import { toBase64 } from "@smithy/util-base64"; +import { consoleColors } from "./consolecolor"; +import { terminalSize } from "./terminal"; +import { stderr, stdout } from "./stdio"; +import { dotGenaiscriptPath } from "./workdir"; +import { prettyBytes } from "./pretty"; +import { createCache } from "./cache"; +import { measure } from "./performance"; +import { genaiscriptDebug } from "./debug"; +import debug from "debug"; +const dbg = genaiscriptDebug("prompt:context"); /** * Creates a chat turn generation context object for building prompt nodes and utilities in a chat session. @@ -134,1155 +122,966 @@ const dbg = genaiscriptDebug("prompt:context") * images, and system messages suitable for multi-turn chat generations. */ export function createChatTurnGenerationContext( - options: GenerationOptions, - trace: MarkdownTrace, - cancellationToken: CancellationToken + options: GenerationOptions, + trace: MarkdownTrace, + cancellationToken: CancellationToken, ): ChatTurnGenerationContext & { node: PromptNode } { - const node: PromptNode = { children: [] } - const fenceFormat = options.fenceFormat || resolveFenceFormat(options.model) - const lineNumbers = options.lineNumbers + const node: PromptNode = { children: [] }; + const fenceFormat = options.fenceFormat || resolveFenceFormat(options.model); + const lineNumbers = options.lineNumbers; - const console = Object.freeze({ - log: (...args: any[]) => { - const line = consoleLogFormat(...args) - if (line) { - trace.log(line) - stdout.write(line + "\n") - } + const console = Object.freeze({ + log: (...args: any[]) => { + const line = consoleLogFormat(...args); + if (line) { + trace.log(line); + stdout.write(line + "\n"); + } + }, + debug: (...args: any[]) => { + const line = consoleLogFormat(...args); + if (line) { + trace.log(line); + logVerbose(line); + } + }, + warn: (...args: any[]) => { + const line = consoleLogFormat(...args); + if (line) { + trace.warn(line); + logWarn(line); + } + }, + error: (...args: any[]) => { + const line = consoleLogFormat(...args); + if (line) { + trace.error(line); + logError(line); + } + }, + }); + + const defImages = ( + files: ElementOrArray, + defOptions?: DefImagesOptions, + ) => { + checkCancelled(cancellationToken); + if (files === undefined || files === null) { + if (defOptions?.ignoreEmpty) return; + throw new Error("no images provided"); + } + if (Array.isArray(files)) { + if (!files.length) { + if (defOptions?.ignoreEmpty) return; + throw new Error("no images provided"); + } + const sliced = sliceData(files, defOptions); + if (!defOptions?.tiled) sliced.forEach((file) => defImages(file, defOptions)); + else { + appendChild( + node, + createImageNode( + (async () => { + if (!files.length) return undefined; + const encoded = await imageTileEncodeForLLM(files, { + ...defOptions, + cancellationToken, + trace, + }); + return encoded; + })(), + ), + ); + } + } else if (typeof files === "string" || files instanceof Blob || files instanceof Buffer) { + const img = files; + appendChild( + node, + createImageNode( + (async () => { + const encoded = await imageEncodeForLLM(img, { + ...defOptions, + cancellationToken, + trace, + }); + return encoded; + })(), + ), + ); + } else { + const file = files as WorkspaceFile; + appendChild( + node, + ...createFileImageNodes(undefined, file, defOptions, { + trace, + cancellationToken, + }), + ); + } + }; + + const ctx: ChatTurnGenerationContext & { node: PromptNode } = { + node, + writeText: (body, options) => { + if (body !== undefined && body !== null) { + const { priority, maxTokens } = options || {}; + const role = options?.assistant ? "assistant" : options?.role || "user"; + appendChild( + node, + role === "assistant" + ? createAssistantNode(body, { priority, maxTokens }) + : role === "system" + ? createSystemNode(body, { priority, maxTokens }) + : createTextNode(body, { priority, maxTokens }), + ); + } + }, + assistant: (body, options) => + ctx.writeText(body, { + ...options, + role: "assistant", + } as WriteTextOptions), + $: (strings, ...args) => { + const current = createStringTemplateNode(strings, args); + appendChild(node, current); + const res: PromptTemplateString = Object.freeze({ + priority: (priority) => { + current.priority = priority; + return res; }, - debug: (...args: any[]) => { - const line = consoleLogFormat(...args) - if (line) { - trace.log(line) - logVerbose(line) - } + flex: (value) => { + current.flex = value; + return res; }, - warn: (...args: any[]) => { - const line = consoleLogFormat(...args) - if (line) { - trace.warn(line) - logWarn(line) - } + jinja: (data) => { + current.transforms.push((t) => jinjaRender(t, data)); + return res; }, - error: (...args: any[]) => { - const line = consoleLogFormat(...args) - if (line) { - trace.error(line) - logError(line) - } + mustache: (data) => { + current.transforms.push((t) => mustacheRender(t, data)); + return res; }, - }) - - const defImages = ( - files: ElementOrArray< - string | WorkspaceFile | Buffer | Blob | ReadableStream - >, - defOptions?: DefImagesOptions - ) => { - checkCancelled(cancellationToken) - if (files === undefined || files === null) { - if (defOptions?.ignoreEmpty) return - throw new Error("no images provided") - } - if (Array.isArray(files)) { - if (!files.length) { - if (defOptions?.ignoreEmpty) return - throw new Error("no images provided") - } - const sliced = sliceData(files, defOptions) - if (!defOptions?.tiled) - sliced.forEach((file) => defImages(file, defOptions)) - else { - appendChild( - node, - createImageNode( - (async () => { - if (!files.length) return undefined - const encoded = await imageTileEncodeForLLM(files, { - ...defOptions, - cancellationToken, - trace, - }) - return encoded - })() - ) - ) - } - } else if ( - typeof files === "string" || - files instanceof Blob || - files instanceof Buffer - ) { - const img = files - appendChild( - node, - createImageNode( - (async () => { - const encoded = await imageEncodeForLLM(img, { - ...defOptions, - cancellationToken, - trace, - }) - return encoded - })() - ) - ) - } else { - const file = files as WorkspaceFile - appendChild( - node, - ...createFileImageNodes(undefined, file, defOptions, { - trace, - cancellationToken, - }) - ) - } - } - - const ctx: ChatTurnGenerationContext & { node: PromptNode } = { - node, - writeText: (body, options) => { - if (body !== undefined && body !== null) { - const { priority, maxTokens } = options || {} - const role = options?.assistant - ? "assistant" - : options?.role || "user" - appendChild( - node, - role === "assistant" - ? createAssistantNode(body, { priority, maxTokens }) - : role === "system" - ? createSystemNode(body, { priority, maxTokens }) - : createTextNode(body, { priority, maxTokens }) - ) - } + maxTokens: (tokens) => { + current.maxTokens = tokens; + return res; }, - assistant: (body, options) => - ctx.writeText(body, { - ...options, - role: "assistant", - } as WriteTextOptions), - $: (strings, ...args) => { - const current = createStringTemplateNode(strings, args) - appendChild(node, current) - const res: PromptTemplateString = Object.freeze({ - priority: (priority) => { - current.priority = priority - return res - }, - flex: (value) => { - current.flex = value - return res - }, - jinja: (data) => { - current.transforms.push((t) => jinjaRender(t, data)) - return res - }, - mustache: (data) => { - current.transforms.push((t) => mustacheRender(t, data)) - return res - }, - maxTokens: (tokens) => { - current.maxTokens = tokens - return res - }, - role: (r) => { - current.role = r - return res - }, - cacheControl: (cc) => { - current.cacheControl = cc - return res - }, - } satisfies PromptTemplateString) - return res + role: (r) => { + current.role = r; + return res; }, - def: (name, body, defOptions) => { - name = name ?? "" - const doptions = { ...(defOptions || {}), trace } - doptions.lineNumbers = doptions.lineNumbers ?? lineNumbers - doptions.fenceFormat = doptions.fenceFormat ?? fenceFormat + cacheControl: (cc) => { + current.cacheControl = cc; + return res; + }, + } satisfies PromptTemplateString); + return res; + }, + def: (name, body, defOptions) => { + name = name ?? ""; + const doptions = { ...(defOptions || {}), trace }; + doptions.lineNumbers = doptions.lineNumbers ?? lineNumbers; + doptions.fenceFormat = doptions.fenceFormat ?? fenceFormat; - // shortcuts - if (body === undefined || body === null) { - if (!doptions.ignoreEmpty) - throw new Error( - `def ${name} is ${body}. See ${DOCS_DEF_FILES_IS_EMPTY_URL}` - ) - return undefined - } else if (Array.isArray(body)) { - if (body.length === 0 && !doptions.ignoreEmpty) - throw new Error( - `def ${name} is empty. See ${DOCS_DEF_FILES_IS_EMPTY_URL}` - ) - body.forEach((f) => ctx.def(name, f, defOptions)) - } else if (typeof body === "string") { - if (body.trim() === "" && !doptions.ignoreEmpty) - throw new Error( - `def ${name} is empty. See ${DOCS_DEF_FILES_IS_EMPTY_URL}` - ) - appendChild( - node, - createDef(name, { filename: "", content: body }, doptions) - ) - } else if ( - typeof body === "object" && - (body as WorkspaceFile).filename - ) { - const file = body as WorkspaceFile - const { glob } = defOptions || {} - const endsWith = arrayify(defOptions?.endsWith) - const { filename } = file - if (glob && filename) { - if (!isGlobMatch(filename, glob)) return undefined - } - if ( - endsWith.length && - !endsWith.some((ext) => filename.endsWith(ext)) - ) - return undefined + // shortcuts + if (body === undefined || body === null) { + if (!doptions.ignoreEmpty) + throw new Error(`def ${name} is ${body}. See ${DOCS_DEF_FILES_IS_EMPTY_URL}`); + return undefined; + } else if (Array.isArray(body)) { + if (body.length === 0 && !doptions.ignoreEmpty) + throw new Error(`def ${name} is empty. See ${DOCS_DEF_FILES_IS_EMPTY_URL}`); + body.forEach((f) => ctx.def(name, f, defOptions)); + } else if (typeof body === "string") { + if (body.trim() === "" && !doptions.ignoreEmpty) + throw new Error(`def ${name} is empty. See ${DOCS_DEF_FILES_IS_EMPTY_URL}`); + appendChild(node, createDef(name, { filename: "", content: body }, doptions)); + } else if (typeof body === "object" && (body as WorkspaceFile).filename) { + const file = body as WorkspaceFile; + const { glob } = defOptions || {}; + const endsWith = arrayify(defOptions?.endsWith); + const { filename } = file; + if (glob && filename) { + if (!isGlobMatch(filename, glob)) return undefined; + } + if (endsWith.length && !endsWith.some((ext) => filename.endsWith(ext))) return undefined; - // more robust check - if (/\.(png|jpeg|jpg|gif|webp)$/i.test(filename)) { - appendChild( - node, - ...createFileImageNodes(name, file, doptions, { - trace, - cancellationToken, - }) - ) - } else appendChild(node, createDef(name, file, doptions)) - } else if ( - typeof body === "object" && - (body as ShellOutput).exitCode !== undefined - ) { - appendChild( - node, - createDef( - name, - { - filename: "", - content: renderShellOutput(body as ShellOutput), - }, - { ...doptions, lineNumbers: false } - ) - ) - } else if (typeof body === "object" && (body as Fenced).content) { - const fenced = body as Fenced - appendChild( - node, - createDef( - name, - { filename: "", content: fenced.content }, - { language: fenced.language, ...(doptions || {}) } - ) - ) - } else if ( - typeof body === "object" && - (body as RunPromptResult).text - ) { - const res = body as RunPromptResult - const fence = - res.fences?.length === 1 ? res.fences[0] : undefined - appendChild( - node, - createDef( - name, - { filename: "", content: fence?.content ?? res.text }, - { language: fence?.language, ...(doptions || {}) } - ) - ) - } - return toDefRefName(name, doptions) - }, - defImages, - defData: (name, data, defOptions) => { - name = name ?? "" - const doptions = { ...(defOptions || {}), trace } - doptions.fenceFormat = doptions.fenceFormat ?? fenceFormat + // more robust check + if (/\.(png|jpeg|jpg|gif|webp)$/i.test(filename)) { + appendChild( + node, + ...createFileImageNodes(name, file, doptions, { + trace, + cancellationToken, + }), + ); + } else appendChild(node, createDef(name, file, doptions)); + } else if (typeof body === "object" && (body as ShellOutput).exitCode !== undefined) { + appendChild( + node, + createDef( + name, + { + filename: "", + content: renderShellOutput(body as ShellOutput), + }, + { ...doptions, lineNumbers: false }, + ), + ); + } else if (typeof body === "object" && (body as Fenced).content) { + const fenced = body as Fenced; + appendChild( + node, + createDef( + name, + { filename: "", content: fenced.content }, + { language: fenced.language, ...(doptions || {}) }, + ), + ); + } else if (typeof body === "object" && (body as RunPromptResult).text) { + const res = body as RunPromptResult; + const fence = res.fences?.length === 1 ? res.fences[0] : undefined; + appendChild( + node, + createDef( + name, + { filename: "", content: fence?.content ?? res.text }, + { language: fence?.language, ...(doptions || {}) }, + ), + ); + } + return toDefRefName(name, doptions); + }, + defImages, + defData: (name, data, defOptions) => { + name = name ?? ""; + const doptions = { ...(defOptions || {}), trace }; + doptions.fenceFormat = doptions.fenceFormat ?? fenceFormat; - appendChild(node, createDefData(name, data, doptions)) - return toDefRefName(name, doptions) - }, - defDiff: (name, left, right, defDiffOptions) => { - name = name ?? "" - const doptions = { ...(defDiffOptions || {}), trace } - doptions.fenceFormat = doptions.fenceFormat ?? fenceFormat + appendChild(node, createDefData(name, data, doptions)); + return toDefRefName(name, doptions); + }, + defDiff: (name, left, right, defDiffOptions) => { + name = name ?? ""; + const doptions = { ...(defDiffOptions || {}), trace }; + doptions.fenceFormat = doptions.fenceFormat ?? fenceFormat; - appendChild(node, createDefDiff(name, left, right, doptions)) - return toDefRefName(name, doptions) - }, - fence(body, options?: DefOptions) { - const doptions = { ...(options || {}), trace } - doptions.fenceFormat = doptions.fenceFormat ?? fenceFormat + appendChild(node, createDefDiff(name, left, right, doptions)); + return toDefRefName(name, doptions); + }, + fence(body, options?: DefOptions) { + const doptions = { ...(options || {}), trace }; + doptions.fenceFormat = doptions.fenceFormat ?? fenceFormat; - ctx.def("", body, doptions) - return undefined - }, - importTemplate: (template, data, options) => { - appendChild(node, createImportTemplate(template, data, options)) - return undefined - }, - console, - } + ctx.def("", body, doptions); + return undefined; + }, + importTemplate: (template, data, options) => { + appendChild(node, createImportTemplate(template, data, options)); + return undefined; + }, + console, + }; - return ctx + return ctx; } export interface RunPromptContextNode extends ChatGenerationContext { - node: PromptNode + node: PromptNode; } export function createChatGenerationContext( - options: GenerationOptions, - trace: MarkdownTrace, - projectOptions: { - prj: Project - env: ExpansionVariables - } + options: GenerationOptions, + trace: MarkdownTrace, + projectOptions: { + prj: Project; + env: ExpansionVariables; + }, ): RunPromptContextNode { - const { cancellationToken, infoCb, userState } = options || {} - const { prj, env } = projectOptions - assert(!!env.output, "output missing") - const turnCtx = createChatTurnGenerationContext( - options, - trace, - cancellationToken - ) - const node = turnCtx.node + const { cancellationToken, infoCb, userState } = options || {}; + const { prj, env } = projectOptions; + assert(!!env.output, "output missing"); + const turnCtx = createChatTurnGenerationContext(options, trace, cancellationToken); + const node = turnCtx.node; - // Default output processor for the prompt - const defOutputProcessor = (fn: PromptOutputProcessorHandler) => { - checkCancelled(cancellationToken) - if (fn) appendChild(node, createOutputProcessor(fn)) - } + // Default output processor for the prompt + const defOutputProcessor = (fn: PromptOutputProcessorHandler) => { + checkCancelled(cancellationToken); + if (fn) appendChild(node, createOutputProcessor(fn)); + }; - const defTool: ( - name: string | ToolCallback | McpServersConfig, - description: string | DefToolOptions, - parameters?: PromptParametersSchema | JSONSchemaObject, - fn?: ChatFunctionHandler, - defOptions?: DefToolOptions - ) => void = (name, description, parameters, fn, defOptions) => { - checkCancelled(cancellationToken) - if (name === undefined || name === null) - throw new Error("tool name is missing") - dbg(`tool %s`, name) - if (typeof name === "string") { - if (typeof description !== "string") - throw new Error("tool description is missing") - const parameterSchema = - promptParametersSchemaToJSONSchema(parameters) - appendChild( - node, - createToolNode( - name, - description, - parameterSchema, - fn, - defOptions, - ctx - ) - ) - } else if (typeof name === "object" && (name as ToolCallback).impl) { - const tool = name as ToolCallback - appendChild( - node, - createToolNode( - tool.spec.name, - tool.spec.description, - tool.spec.parameters as any, - tool.impl, - defOptions, - ctx - ) - ) - } else if (typeof name === "object") { - dbg(`mcp: %o`, Object.keys(name)) - for (const kv of Object.entries(name)) { - const [id, def] = kv - if ((def as McpServerConfig).command) { - const serverConfig = def as McpServerConfig - appendChild( - node, - createMcpServer(id, serverConfig, defOptions, ctx) - ) - } - } + const defTool: ( + name: string | ToolCallback | McpServersConfig, + description: string | DefToolOptions, + parameters?: PromptParametersSchema | JSONSchemaObject, + fn?: ChatFunctionHandler, + defOptions?: DefToolOptions, + ) => void = (name, description, parameters, fn, defOptions) => { + checkCancelled(cancellationToken); + if (name === undefined || name === null) throw new Error("tool name is missing"); + dbg(`tool %s`, name); + if (typeof name === "string") { + if (typeof description !== "string") throw new Error("tool description is missing"); + const parameterSchema = promptParametersSchemaToJSONSchema(parameters); + appendChild(node, createToolNode(name, description, parameterSchema, fn, defOptions, ctx)); + } else if (typeof name === "object" && (name as ToolCallback).impl) { + const tool = name as ToolCallback; + appendChild( + node, + createToolNode( + tool.spec.name, + tool.spec.description, + tool.spec.parameters as any, + tool.impl, + defOptions, + ctx, + ), + ); + } else if (typeof name === "object") { + dbg(`mcp: %o`, Object.keys(name)); + for (const kv of Object.entries(name)) { + const [id, def] = kv; + if ((def as McpServerConfig).command) { + const serverConfig = def as McpServerConfig; + appendChild(node, createMcpServer(id, serverConfig, defOptions, ctx)); } + } } + }; - const adbgm = debug(`agent:memory`) - const defAgent = ( - name: string, - description: string, - fn: ( - agentCtx: ChatGenerationContext, - args: ChatFunctionArgs - ) => Promise, - options?: DefAgentOptions - ): void => { - checkCancelled(cancellationToken) - const { - variant, - tools, - system, - disableMemory, - disableMemoryQuery, - ...rest - } = options || {} - const memory = disableMemory - ? undefined - : agentCreateCache({ userState }) + const adbgm = debug(`agent:memory`); + const defAgent = ( + name: string, + description: string, + fn: (agentCtx: ChatGenerationContext, args: ChatFunctionArgs) => Promise, + options?: DefAgentOptions, + ): void => { + checkCancelled(cancellationToken); + const { variant, tools, system, disableMemory, disableMemoryQuery, ...rest } = options || {}; + const memory = disableMemory ? undefined : agentCreateCache({ userState }); - name = name.replace(/^agent_/i, "") - const adbg = debug(`agent:${name}`) - adbg(`created ${variant || ""}`) - const agentName = `agent_${name}${variant ? "_" + variant : ""}` - const agentLabel = `agent ${name}${variant ? " " + variant : ""}` + name = name.replace(/^agent_/i, ""); + const adbg = debug(`agent:${name}`); + adbg(`created ${variant || ""}`); + const agentName = `agent_${name}${variant ? "_" + variant : ""}`; + const agentLabel = `agent ${name}${variant ? " " + variant : ""}`; - const agentSystem = uniq([ - "system.assistant", - "system.tools", - "system.explanations", - "system.safety_jailbreak", - "system.safety_harmful_content", - "system.safety_protected_material", - ...arrayify(system), - ]) - const agentTools = resolveTools( - runtimeHost.project, - agentSystem, - arrayify(tools) - ) - const agentDescription = ellipse( - `Agent that uses an LLM to ${description}.\nAvailable tools:${agentTools.map((t) => `- ${t.description}`).join("\n")}`, - 1020 - ) // DO NOT LEAK TOOL ID HERE - dbg(`description: ${agentDescription}`) + const agentSystem = uniq([ + "system.assistant", + "system.tools", + "system.explanations", + "system.safety_jailbreak", + "system.safety_harmful_content", + "system.safety_protected_material", + ...arrayify(system), + ]); + const agentTools = resolveTools(runtimeHost.project, agentSystem, arrayify(tools)); + const agentDescription = ellipse( + `Agent that uses an LLM to ${description}.\nAvailable tools:${agentTools.map((t) => `- ${t.description}`).join("\n")}`, + 1020, + ); // DO NOT LEAK TOOL ID HERE + dbg(`description: ${agentDescription}`); - defTool( - agentName, - agentDescription, - { - type: "object", - properties: { - query: { - type: "string", - description: "Query to answer by the LLM agent.", - }, - }, - required: ["query"], - }, - async (args) => { - // the LLM automatically adds extract arguments to the context - checkCancelled(cancellationToken) - const { context, ...argsRest } = args - const { query, ...argsNoQuery } = argsRest + defTool( + agentName, + agentDescription, + { + type: "object", + properties: { + query: { + type: "string", + description: "Query to answer by the LLM agent.", + }, + }, + required: ["query"], + }, + async (args) => { + // the LLM automatically adds extract arguments to the context + checkCancelled(cancellationToken); + const { context, ...argsRest } = args; + const { query, ...argsNoQuery } = argsRest; - infoCb?.({ - text: `${agentLabel}: ${query} ${parametersToVars(argsNoQuery)}`, - }) - adbg(`query: ${query}`) + infoCb?.({ + text: `${agentLabel}: ${query} ${parametersToVars(argsNoQuery)}`, + }); + adbg(`query: ${query}`); - const hasExtraArgs = Object.keys(argsNoQuery).length > 0 - if (hasExtraArgs) adbg(`extra args: %O`, argsNoQuery) + const hasExtraArgs = Object.keys(argsNoQuery).length > 0; + if (hasExtraArgs) adbg(`extra args: %O`, argsNoQuery); - let memoryAnswer: string - if (memory && query && !disableMemoryQuery) { - memoryAnswer = await agentQueryMemory( - memory, - ctx, - query + - (hasExtraArgs - ? `\n${YAMLStringify(argsNoQuery)}` - : ""), - { trace } - ) - if (memoryAnswer) adbgm(`found ${memoryAnswer}`) - } + let memoryAnswer: string; + if (memory && query && !disableMemoryQuery) { + memoryAnswer = await agentQueryMemory( + memory, + ctx, + query + (hasExtraArgs ? `\n${YAMLStringify(argsNoQuery)}` : ""), + { trace }, + ); + if (memoryAnswer) adbgm(`found ${memoryAnswer}`); + } - const res = await ctx.runPrompt( - async (_) => { - if (typeof fn === "string") - _.writeText(dedent(fn), { role: "system" }) - else await fn(_, args) - _.$`Make a plan and solve the task described in . + const res = await ctx.runPrompt( + async (_) => { + if (typeof fn === "string") _.writeText(dedent(fn), { role: "system" }); + else await fn(_, args); + _.$`Make a plan and solve the task described in . - Assume that your answer will be analyzed by an LLM, not a human. - If you are missing information, reply "${TOKEN_MISSING_INFO}: ". - If you cannot answer the query, return "${TOKEN_NO_ANSWER}: ". - Be concise. Minimize output to the most relevant information to save context tokens. - `.role("system") - if (memoryAnswer) - _.$`- The applied to the agent memory is in .`.role( - "system" - ) - _.def("QUERY", query) - if (Object.keys(argsNoQuery).length) - _.defData("QUERY_CONTEXT", argsNoQuery, { - format: "yaml", - }) + `.role("system"); + if (memoryAnswer) + _.$`- The applied to the agent memory is in .`.role("system"); + _.def("QUERY", query); + if (Object.keys(argsNoQuery).length) + _.defData("QUERY_CONTEXT", argsNoQuery, { + format: "yaml", + }); - if (memoryAnswer) _.def("MEMORY", memoryAnswer) - if (memory) - _.defOutputProcessor(async ({ text }) => { - if ( - text && - !( - text.startsWith(TOKEN_MISSING_INFO) || - text.startsWith(TOKEN_NO_ANSWER) - ) - ) { - adbgm(`add ${text}`) - await agentAddMemory( - memory, - agentName, - query, - text, - { - trace, - } - ) - } - }) - }, - { - model: "agent", - label: agentLabel, - system: agentSystem, - tools: agentTools.map(({ id }) => id), - ...rest, - } - ) - if (res.error) { - adbg(`error: ${res.error}`) - throw res.error + if (memoryAnswer) _.def("MEMORY", memoryAnswer); + if (memory) + _.defOutputProcessor(async ({ text }) => { + if ( + text && + !(text.startsWith(TOKEN_MISSING_INFO) || text.startsWith(TOKEN_NO_ANSWER)) + ) { + adbgm(`add ${text}`); + await agentAddMemory(memory, agentName, query, text, { + trace, + }); } - const response = res.text - adbgm(`response: %O`, response) - return response - } - ) - } - - const defSchema = ( - name: string, - schema: JSONSchema, - defOptions?: DefSchemaOptions - ) => { - checkCancelled(cancellationToken) - appendChild(node, createSchemaNode(name, schema, defOptions)) + }); + }, + { + model: "agent", + label: agentLabel, + system: agentSystem, + tools: agentTools.map(({ id }) => id), + ...rest, + }, + ); + if (res.error) { + adbg(`error: ${res.error}`); + throw res.error; + } + const response = res.text; + adbgm(`response: %O`, response); + return response; + }, + ); + }; - return name - } + const defSchema = (name: string, schema: JSONSchema, defOptions?: DefSchemaOptions) => { + checkCancelled(cancellationToken); + appendChild(node, createSchemaNode(name, schema, defOptions)); - const defChatParticipant = ( - generator: ChatParticipantHandler, - options?: ChatParticipantOptions - ) => { - checkCancelled(cancellationToken) - if (generator) - appendChild(node, createChatParticipant({ generator, options })) - } + return name; + }; - const defFileOutput = ( - pattern: ElementOrArray, - description: string, - options?: FileOutputOptions - ): void => { - checkCancelled(cancellationToken) - if (pattern) - appendChild( - node, - createFileOutput({ - pattern: arrayify(pattern).map((p) => - typeof p === "string" ? p : p.filename - ), - description, - options, - }) - ) - } + const defChatParticipant = ( + generator: ChatParticipantHandler, + options?: ChatParticipantOptions, + ) => { + checkCancelled(cancellationToken); + if (generator) appendChild(node, createChatParticipant({ generator, options })); + }; - const prompt = ( - strings: TemplateStringsArray, - ...args: any[] - ): RunPromptResultPromiseWithOptions => { - checkCancelled(cancellationToken) - const options: PromptGeneratorOptions = {} - const p: RunPromptResultPromiseWithOptions = - new Promise(async (resolve, reject) => { - try { - await delay(0) - // data race for options - const res = await ctx.runPrompt(async (_) => { - _.$(strings, ...args) - }, options) - resolve(res) - } catch (e) { - reject(e) - } - }) as any - p.options = (v) => { - if (v !== undefined) Object.assign(options, v) - return p - } - return p - } + const defFileOutput = ( + pattern: ElementOrArray, + description: string, + options?: FileOutputOptions, + ): void => { + checkCancelled(cancellationToken); + if (pattern) + appendChild( + node, + createFileOutput({ + pattern: arrayify(pattern).map((p) => (typeof p === "string" ? p : p.filename)), + description, + options, + }), + ); + }; - const transcribe = async ( - audio: string | WorkspaceFile, - options?: TranscriptionOptions - ): Promise => { - checkCancelled(cancellationToken) - const { cache, ...rest } = options || {} - const transcriptionTrace = trace.startTraceDetails("🎤 transcribe") + const prompt = ( + strings: TemplateStringsArray, + ...args: any[] + ): RunPromptResultPromiseWithOptions => { + checkCancelled(cancellationToken); + const options: PromptGeneratorOptions = {}; + const p: RunPromptResultPromiseWithOptions = new Promise( + async (resolve, reject) => { try { - const conn: ModelConnectionOptions = { - model: options?.model, - } - const { info, configuration } = await resolveModelConnectionInfo( - conn, - { - trace: transcriptionTrace, - defaultModel: TRANSCRIPTION_MODEL_ID, - cancellationToken, - token: true, - } - ) - if (info.error) throw new Error(info.error) - if (!configuration) throw new Error("model configuration not found") - checkCancelled(cancellationToken) - const { ok } = await runtimeHost.pullModel(configuration, { - trace: transcriptionTrace, - cancellationToken, - }) - if (!ok) throw new Error(`failed to pull model ${conn}`) - checkCancelled(cancellationToken) - const { transcriber } = await resolveLanguageModel( - configuration.provider - ) - if (!transcriber) - throw new Error("audio transcribe not found for " + info.model) - const ffmpeg = new FFmepgClient() - const audioFile = await ffmpeg.extractAudio(audio, { - transcription: true, - cache, - }) - const file = await BufferToBlob( - await host.readFile(audioFile), - "audio/ogg" - ) - const update: () => Promise = async () => { - transcriptionTrace.itemValue(`model`, configuration.model) - transcriptionTrace.itemValue( - `file size`, - prettyBytes(file.size) - ) - transcriptionTrace.itemValue(`file type`, file.type) - const res = await transcriber( - { - file, - model: configuration.model, - language: options?.language, - translate: options?.translate, - }, - configuration, - { - trace: transcriptionTrace, - cancellationToken, - } - ) - srtVttRender(res) - return res - } - - let res: TranscriptionResult - const _cache = createCache< - { file: Blob } & TranscriptionOptions, - TranscriptionResult - >( - cache === true - ? TRANSCRIPTION_CACHE_NAME - : typeof cache === "string" - ? cache - : undefined, - { type: "fs" } - ) - if (cache) { - const hit = await _cache.getOrUpdate( - { file, ...rest }, - update, - (res) => !res.error - ) - transcriptionTrace.itemValue( - `cache ${hit.cached ? "hit" : "miss"}`, - hit.key - ) - res = hit.value - } else res = await update() - transcriptionTrace.fence(res.text, "markdown") - if (res.error) transcriptionTrace.error(errorMessage(res.error)) - if (res.segments) transcriptionTrace.fence(res.segments, "yaml") - return res + await delay(0); + // data race for options + const res = await ctx.runPrompt(async (_) => { + _.$(strings, ...args); + }, options); + resolve(res); } catch (e) { - logError(e) - transcriptionTrace.error(e) - return { - text: undefined, - error: serializeError(e), - } satisfies TranscriptionResult - } finally { - transcriptionTrace.endDetails() + reject(e); } - } + }, + ) as any; + p.options = (v) => { + if (v !== undefined) Object.assign(options, v); + return p; + }; + return p; + }; - const speak = async ( - input: string, - options?: SpeechOptions - ): Promise => { - checkCancelled(cancellationToken) - const { cache, voice, instructions, ...rest } = options || {} - const speechTrace = trace.startTraceDetails("🦜 speak") - try { - const conn: ModelConnectionOptions = { - model: options?.model || SPEECH_MODEL_ID, - } - const { info, configuration } = await resolveModelConnectionInfo( - conn, - { - trace: speechTrace, - defaultModel: SPEECH_MODEL_ID, - cancellationToken, - token: true, - } - ) - if (info.error) throw new Error(info.error) - if (!configuration) throw new Error("model configuration not found") - checkCancelled(cancellationToken) - const { ok } = await runtimeHost.pullModel(configuration, { - trace: speechTrace, - cancellationToken, - }) - if (!ok) throw new Error(`failed to pull model ${conn}`) - checkCancelled(cancellationToken) - const { speaker } = await resolveLanguageModel( - configuration.provider - ) - if (!speaker) - throw new Error("speech converter not found for " + info.model) - speechTrace.itemValue(`model`, configuration.model) - const req = deleteUndefinedValues({ - input, - model: configuration.model, - voice, - instructions: dedent(instructions), - }) satisfies CreateSpeechRequest - const res = await speaker(req, configuration, { - trace: speechTrace, - cancellationToken, - }) - if (res.error) { - speechTrace.error(errorMessage(res.error)) - return { error: res.error } satisfies SpeechResult - } - const h = await hash(res.audio, { length: 20 }) - const { ext } = (await fileTypeFromBuffer(res.audio)) || {} - const filename = dotGenaiscriptPath("speech", h + "." + ext) - await host.writeFile(filename, res.audio) - return { - filename, - } satisfies SpeechResult - } catch (e) { - logError(e) - speechTrace.error(e) - return { - filename: undefined, - error: serializeError(e), - } satisfies SpeechResult - } finally { - speechTrace.endDetails() - } - } + const transcribe = async ( + audio: string | WorkspaceFile, + options?: TranscriptionOptions, + ): Promise => { + checkCancelled(cancellationToken); + const { cache, ...rest } = options || {}; + const transcriptionTrace = trace.startTraceDetails("🎤 transcribe"); + try { + const conn: ModelConnectionOptions = { + model: options?.model, + }; + const { info, configuration } = await resolveModelConnectionInfo(conn, { + trace: transcriptionTrace, + defaultModel: TRANSCRIPTION_MODEL_ID, + cancellationToken, + token: true, + }); + if (info.error) throw new Error(info.error); + if (!configuration) throw new Error("model configuration not found"); + checkCancelled(cancellationToken); + const { ok } = await runtimeHost.pullModel(configuration, { + trace: transcriptionTrace, + cancellationToken, + }); + if (!ok) throw new Error(`failed to pull model ${conn}`); + checkCancelled(cancellationToken); + const { transcriber } = await resolveLanguageModel(configuration.provider); + if (!transcriber) throw new Error("audio transcribe not found for " + info.model); + const ffmpeg = new FFmepgClient(); + const audioFile = await ffmpeg.extractAudio(audio, { + transcription: true, + cache, + }); + const file = await BufferToBlob(await host.readFile(audioFile), "audio/ogg"); + const update: () => Promise = async () => { + transcriptionTrace.itemValue(`model`, configuration.model); + transcriptionTrace.itemValue(`file size`, prettyBytes(file.size)); + transcriptionTrace.itemValue(`file type`, file.type); + const res = await transcriber( + { + file, + model: configuration.model, + language: options?.language, + translate: options?.translate, + }, + configuration, + { + trace: transcriptionTrace, + cancellationToken, + }, + ); + srtVttRender(res); + return res; + }; - const defFileMerge = (fn: FileMergeHandler) => { - checkCancelled(cancellationToken) - appendChild(node, createFileMerge(fn)) + let res: TranscriptionResult; + const _cache = createCache<{ file: Blob } & TranscriptionOptions, TranscriptionResult>( + cache === true ? TRANSCRIPTION_CACHE_NAME : typeof cache === "string" ? cache : undefined, + { type: "fs" }, + ); + if (cache) { + const hit = await _cache.getOrUpdate({ file, ...rest }, update, (res) => !res.error); + transcriptionTrace.itemValue(`cache ${hit.cached ? "hit" : "miss"}`, hit.key); + res = hit.value; + } else res = await update(); + transcriptionTrace.fence(res.text, "markdown"); + if (res.error) transcriptionTrace.error(errorMessage(res.error)); + if (res.segments) transcriptionTrace.fence(res.segments, "yaml"); + return res; + } catch (e) { + logError(e); + transcriptionTrace.error(e); + return { + text: undefined, + error: serializeError(e), + } satisfies TranscriptionResult; + } finally { + transcriptionTrace.endDetails(); } + }; - const runPrompt = async ( - generator: string | PromptGenerator, - runOptions?: PromptGeneratorOptions - ): Promise => { - checkCancelled(cancellationToken) - Object.freeze(runOptions) - const { label, applyEdits, throwOnError } = runOptions || {} - const runTrace = trace.startTraceDetails(`🎁 ${label || "prompt"}`) - let messages: ChatCompletionMessageParam[] = [] - try { - infoCb?.({ text: label || "prompt" }) + const speak = async (input: string, options?: SpeechOptions): Promise => { + checkCancelled(cancellationToken); + const { cache, voice, instructions, ...rest } = options || {}; + const speechTrace = trace.startTraceDetails("🦜 speak"); + try { + const conn: ModelConnectionOptions = { + model: options?.model || SPEECH_MODEL_ID, + }; + const { info, configuration } = await resolveModelConnectionInfo(conn, { + trace: speechTrace, + defaultModel: SPEECH_MODEL_ID, + cancellationToken, + token: true, + }); + if (info.error) throw new Error(info.error); + if (!configuration) throw new Error("model configuration not found"); + checkCancelled(cancellationToken); + const { ok } = await runtimeHost.pullModel(configuration, { + trace: speechTrace, + cancellationToken, + }); + if (!ok) throw new Error(`failed to pull model ${conn}`); + checkCancelled(cancellationToken); + const { speaker } = await resolveLanguageModel(configuration.provider); + if (!speaker) throw new Error("speech converter not found for " + info.model); + speechTrace.itemValue(`model`, configuration.model); + const req = deleteUndefinedValues({ + input, + model: configuration.model, + voice, + instructions: dedent(instructions), + }) satisfies CreateSpeechRequest; + const res = await speaker(req, configuration, { + trace: speechTrace, + cancellationToken, + }); + if (res.error) { + speechTrace.error(errorMessage(res.error)); + return { error: res.error } satisfies SpeechResult; + } + const h = await hash(res.audio, { length: 20 }); + const { ext } = (await fileTypeFromBuffer(res.audio)) || {}; + const filename = dotGenaiscriptPath("speech", h + "." + ext); + await host.writeFile(filename, res.audio); + return { + filename, + } satisfies SpeechResult; + } catch (e) { + logError(e); + speechTrace.error(e); + return { + filename: undefined, + error: serializeError(e), + } satisfies SpeechResult; + } finally { + speechTrace.endDetails(); + } + }; - const genOptions = mergeGenerationOptions(options, runOptions) - genOptions.inner = true - genOptions.trace = runTrace - const { info, configuration } = await resolveModelConnectionInfo( - genOptions, - { - trace: runTrace, - defaultModel: LARGE_MODEL_ID, - cancellationToken, - token: true, - } - ) - if (info.error) throw new Error(info.error) - if (!configuration) throw new Error("model configuration not found") - genOptions.model = info.model - genOptions.stats = genOptions.stats.createChild( - genOptions.model, - label - ) - const { ok } = await runtimeHost.pullModel(configuration, { - trace: runTrace, - cancellationToken, - }) - if (!ok) throw new Error(`failed to pull model ${genOptions.model}`) + const defFileMerge = (fn: FileMergeHandler) => { + checkCancelled(cancellationToken); + appendChild(node, createFileMerge(fn)); + }; - const runCtx = createChatGenerationContext( - genOptions, - runTrace, - projectOptions - ) - if (typeof generator === "string") - runCtx.node.children.push(createTextNode(generator)) - else await generator(runCtx) - const node = runCtx.node + const runPrompt = async ( + generator: string | PromptGenerator, + runOptions?: PromptGeneratorOptions, + ): Promise => { + checkCancelled(cancellationToken); + Object.freeze(runOptions); + const { label, applyEdits, throwOnError } = runOptions || {}; + const runTrace = trace.startTraceDetails(`🎁 ${label || "prompt"}`); + let messages: ChatCompletionMessageParam[] = []; + try { + infoCb?.({ text: label || "prompt" }); - checkCancelled(cancellationToken) + const genOptions = mergeGenerationOptions(options, runOptions); + genOptions.inner = true; + genOptions.trace = runTrace; + const { info, configuration } = await resolveModelConnectionInfo(genOptions, { + trace: runTrace, + defaultModel: LARGE_MODEL_ID, + cancellationToken, + token: true, + }); + if (info.error) throw new Error(info.error); + if (!configuration) throw new Error("model configuration not found"); + genOptions.model = info.model; + genOptions.stats = genOptions.stats.createChild(genOptions.model, label); + const { ok } = await runtimeHost.pullModel(configuration, { + trace: runTrace, + cancellationToken, + }); + if (!ok) throw new Error(`failed to pull model ${genOptions.model}`); - let tools: ToolCallback[] = undefined - let schemas: Record = undefined - let chatParticipants: ChatParticipant[] = undefined - const images: PromptImage[] = [] - const fileMerges: FileMergeHandler[] = [] - const outputProcessors: PromptOutputProcessorHandler[] = [] - const fileOutputs: FileOutput[] = [] - const disposables: AsyncDisposable[] = [] - let prediction: PromptPrediction + const runCtx = createChatGenerationContext(genOptions, runTrace, projectOptions); + if (typeof generator === "string") runCtx.node.children.push(createTextNode(generator)); + else await generator(runCtx); + const node = runCtx.node; - // expand template - const { - errors, - schemas: scs, - tools: fns, - messages: msgs, - chatParticipants: cps, - fileMerges: fms, - outputProcessors: ops, - fileOutputs: fos, - images: imgs, - prediction: pred, - disposables: dps, - } = await renderPromptNode(genOptions.model, node, { - flexTokens: genOptions.flexTokens, - fenceFormat: genOptions.fenceFormat, - trace: runTrace, - cancellationToken, - }) + checkCancelled(cancellationToken); - schemas = scs - tools = fns - chatParticipants = cps - messages.push(...msgs) - fileMerges.push(...fms) - outputProcessors.push(...ops) - fileOutputs.push(...fos) - images.push(...imgs) - disposables.push(...dps) - prediction = pred + let tools: ToolCallback[] = undefined; + let schemas: Record = undefined; + let chatParticipants: ChatParticipant[] = undefined; + const images: PromptImage[] = []; + const fileMerges: FileMergeHandler[] = []; + const outputProcessors: PromptOutputProcessorHandler[] = []; + const fileOutputs: FileOutput[] = []; + const disposables: AsyncDisposable[] = []; + let prediction: PromptPrediction; - if (errors?.length) { - logError(errors.map((err) => errorMessage(err)).join("\n")) - throw new Error("errors while running prompt") - } + // expand template + const { + errors, + schemas: scs, + tools: fns, + messages: msgs, + chatParticipants: cps, + fileMerges: fms, + outputProcessors: ops, + fileOutputs: fos, + images: imgs, + prediction: pred, + disposables: dps, + } = await renderPromptNode(genOptions.model, node, { + flexTokens: genOptions.flexTokens, + fenceFormat: genOptions.fenceFormat, + trace: runTrace, + cancellationToken, + }); - const systemScripts = resolveSystems(prj, runOptions ?? {}, tools) - if ( - addFallbackToolSystems( - systemScripts, - tools, - runOptions, - genOptions - ) - ) { - assert(!Object.isFrozen(genOptions)) - genOptions.fallbackTools = true - dbg(`fallback tools added ${genOptions.fallbackTools}`) - } + schemas = scs; + tools = fns; + chatParticipants = cps; + messages.push(...msgs); + fileMerges.push(...fms); + outputProcessors.push(...ops); + fileOutputs.push(...fos); + images.push(...imgs); + disposables.push(...dps); + prediction = pred; - if (systemScripts.length) - try { - runTrace.startDetails("👾 systems") - for (const systemId of systemScripts) { - checkCancelled(cancellationToken) - dbg(`system ${systemId.id}`, { - fallbackTools: genOptions.fallbackTools, - }) - const system = resolveScript(prj, systemId) - if (!system) - throw new Error( - `system template ${systemId.id} not found` - ) - runTrace.startDetails(`👾 ${system.id}`) - if (systemId.parameters) - runTrace.detailsFenced( - `parameters`, - YAMLStringify(systemId.parameters) - ) - const sysr = await callExpander( - prj, - system, - mergeEnvVarsWithSystem(env, systemId), - runTrace, - genOptions, - false - ) - if (sysr.images?.length) - throw new NotSupportedError("images") - if (sysr.schemas) Object.assign(schemas, sysr.schemas) - if (sysr.functions) tools.push(...sysr.functions) - if (sysr.fileMerges?.length) - fileMerges.push(...sysr.fileMerges) - if (sysr.outputProcessors?.length) - outputProcessors.push(...sysr.outputProcessors) - if (sysr.chatParticipants) - chatParticipants.push(...sysr.chatParticipants) - if (sysr.fileOutputs?.length) - fileOutputs.push(...sysr.fileOutputs) - if (sysr.disposables?.length) - disposables.push(...sysr.disposables) - if (sysr.logs?.length) - runTrace.details("📝 console.log", sysr.logs) - for (const smsg of sysr.messages) { - if ( - smsg.role === "user" && - typeof smsg.content === "string" - ) { - appendSystemMessage(messages, smsg.content) - runTrace.fence(smsg.content, "markdown") - } else - throw new NotSupportedError( - "only string user messages supported in system" - ) - } - genOptions.logprobs = - genOptions.logprobs || system.logprobs - runTrace.detailsFenced( - "💻 script source", - system.jsSource, - "js" - ) - runTrace.endDetails() - if (sysr.status !== "success") - throw new Error( - `system ${system.id} failed ${sysr.status} ${sysr.statusText}` - ) - } - } finally { - runTrace.endDetails() - } - - if (genOptions.fallbackTools) { - dbg(`fallback tools definitions added`) - addToolDefinitionsMessage(messages, tools) - } + if (errors?.length) { + logError(errors.map((err) => errorMessage(err)).join("\n")); + throw new Error("errors while running prompt"); + } - finalizeMessages(genOptions.model, messages, { - ...genOptions, - fileOutputs, - trace: runTrace, - }) - const { completer } = await resolveLanguageModel( - configuration.provider - ) - if (!completer) - throw new Error("model driver not found for " + info.model) - checkCancelled(cancellationToken) + const systemScripts = resolveSystems(prj, runOptions ?? {}, tools); + if (addFallbackToolSystems(systemScripts, tools, runOptions, genOptions)) { + assert(!Object.isFrozen(genOptions)); + genOptions.fallbackTools = true; + dbg(`fallback tools added ${genOptions.fallbackTools}`); + } - const modelConcurrency = - options.modelConcurrency?.[genOptions.model] ?? - CHAT_REQUEST_PER_MODEL_CONCURRENT_LIMIT - const modelLimit = concurrentLimit( - "model:" + genOptions.model, - modelConcurrency - ) - dbg(`run ${genOptions.model}`) - const resp = await modelLimit(() => - executeChatSession( - configuration, - cancellationToken, - messages, - tools, - schemas, - fileOutputs, - outputProcessors, - fileMerges, - prediction, - completer, - chatParticipants, - disposables, - genOptions - ) - ) - tracePromptResult(runTrace, resp) - await writeFileEdits(resp.fileEdits, { - applyEdits, - trace: runTrace, - }) - if (resp.error && throwOnError) - throw new Error(errorMessage(resp.error)) - return resp - } catch (e) { - runTrace.error(e) - if (throwOnError) throw e - return { - messages, - text: "", - reasoning: lastAssistantReasoning(messages), - finishReason: isCancelError(e) ? "cancel" : "fail", - error: serializeError(e), + if (systemScripts.length) + try { + runTrace.startDetails("👾 systems"); + for (const systemId of systemScripts) { + checkCancelled(cancellationToken); + dbg(`system ${systemId.id}`, { + fallbackTools: genOptions.fallbackTools, + }); + const system = resolveScript(prj, systemId); + if (!system) throw new Error(`system template ${systemId.id} not found`); + runTrace.startDetails(`👾 ${system.id}`); + if (systemId.parameters) + runTrace.detailsFenced(`parameters`, YAMLStringify(systemId.parameters)); + const sysr = await callExpander( + prj, + system, + mergeEnvVarsWithSystem(env, systemId), + runTrace, + genOptions, + false, + ); + if (sysr.images?.length) throw new NotSupportedError("images"); + if (sysr.schemas) Object.assign(schemas, sysr.schemas); + if (sysr.functions) tools.push(...sysr.functions); + if (sysr.fileMerges?.length) fileMerges.push(...sysr.fileMerges); + if (sysr.outputProcessors?.length) outputProcessors.push(...sysr.outputProcessors); + if (sysr.chatParticipants) chatParticipants.push(...sysr.chatParticipants); + if (sysr.fileOutputs?.length) fileOutputs.push(...sysr.fileOutputs); + if (sysr.disposables?.length) disposables.push(...sysr.disposables); + if (sysr.logs?.length) runTrace.details("📝 console.log", sysr.logs); + for (const smsg of sysr.messages) { + if (smsg.role === "user" && typeof smsg.content === "string") { + appendSystemMessage(messages, smsg.content); + runTrace.fence(smsg.content, "markdown"); + } else throw new NotSupportedError("only string user messages supported in system"); } + genOptions.logprobs = genOptions.logprobs || system.logprobs; + runTrace.detailsFenced("💻 script source", system.jsSource, "js"); + runTrace.endDetails(); + if (sysr.status !== "success") + throw new Error(`system ${system.id} failed ${sysr.status} ${sysr.statusText}`); + } } finally { - runTrace.endDetails() + runTrace.endDetails(); } + + if (genOptions.fallbackTools) { + dbg(`fallback tools definitions added`); + addToolDefinitionsMessage(messages, tools); + } + + finalizeMessages(genOptions.model, messages, { + ...genOptions, + fileOutputs, + trace: runTrace, + }); + const { completer } = await resolveLanguageModel(configuration.provider); + if (!completer) throw new Error("model driver not found for " + info.model); + checkCancelled(cancellationToken); + + const modelConcurrency = + options.modelConcurrency?.[genOptions.model] ?? CHAT_REQUEST_PER_MODEL_CONCURRENT_LIMIT; + const modelLimit = concurrentLimit("model:" + genOptions.model, modelConcurrency); + dbg(`run ${genOptions.model}`); + const resp = await modelLimit(() => + executeChatSession( + configuration, + cancellationToken, + messages, + tools, + schemas, + fileOutputs, + outputProcessors, + fileMerges, + prediction, + completer, + chatParticipants, + disposables, + genOptions, + ), + ); + tracePromptResult(runTrace, resp); + await writeFileEdits(resp.fileEdits, { + applyEdits, + trace: runTrace, + }); + if (resp.error && throwOnError) throw new Error(errorMessage(resp.error)); + return resp; + } catch (e) { + runTrace.error(e); + if (throwOnError) throw e; + return { + messages, + text: "", + reasoning: lastAssistantReasoning(messages), + finishReason: isCancelError(e) ? "cancel" : "fail", + error: serializeError(e), + }; + } finally { + runTrace.endDetails(); } + }; - const generateImage = async ( - prompt: string, - imageOptions?: ImageGenerationOptions - ): Promise<{ image: WorkspaceFile; revisedPrompt?: string }> => { - if (!prompt) throw new Error("prompt is missing") + const generateImage = async ( + prompt: string, + imageOptions?: ImageGenerationOptions, + ): Promise<{ image: WorkspaceFile; revisedPrompt?: string }> => { + if (!prompt) throw new Error("prompt is missing"); - const imgTrace = trace.startTraceDetails("🖼️ generate image") - try { - const { style, quality, size, outputFormat, mime, ...rest } = - imageOptions || {} - const conn: ModelConnectionOptions = { - model: imageOptions?.model || IMAGE_GENERATION_MODEL_ID, - } - const { info, configuration } = await resolveModelConnectionInfo( - conn, - { - trace: imgTrace, - defaultModel: IMAGE_GENERATION_MODEL_ID, - cancellationToken, - token: true, - } - ) - if (info.error) throw new Error(info.error) - if (!configuration) - throw new Error( - `model configuration not found for ${conn.model}` - ) - const stats = options.stats.createChild( - info.model, - "generate image" - ) - checkCancelled(cancellationToken) - const { ok } = await runtimeHost.pullModel(configuration, { - trace: imgTrace, - cancellationToken, - }) - if (!ok) throw new Error(`failed to pull model '${conn}'`) - checkCancelled(cancellationToken) - const { imageGenerator } = await resolveLanguageModel( - configuration.provider - ) - if (!imageGenerator) - throw new Error("image generator not found for " + info.model) - imgTrace.itemValue(`model`, configuration.model) - const req = deleteUndefinedValues({ - model: configuration.model, - prompt: dedent(prompt), - size, - quality, - style, - outputFormat, - }) satisfies CreateImageRequest - const m = measure("img.generate", `${req.model} -> image`) - const res = await imageGenerator(req, configuration, { - trace: imgTrace, - cancellationToken, - ...rest, - }) - const duration = m() - if (res.error) { - imgTrace.error(errorMessage(res.error)) - return undefined - } - dbg(`usage: %o`, res.usage) - stats.addImageGenerationUsage(res.usage, duration) + const imgTrace = trace.startTraceDetails("🖼️ generate image"); + try { + const { style, quality, size, outputFormat, mime, ...rest } = imageOptions || {}; + const conn: ModelConnectionOptions = { + model: imageOptions?.model || IMAGE_GENERATION_MODEL_ID, + }; + const { info, configuration } = await resolveModelConnectionInfo(conn, { + trace: imgTrace, + defaultModel: IMAGE_GENERATION_MODEL_ID, + cancellationToken, + token: true, + }); + if (info.error) throw new Error(info.error); + if (!configuration) throw new Error(`model configuration not found for ${conn.model}`); + const stats = options.stats.createChild(info.model, "generate image"); + checkCancelled(cancellationToken); + const { ok } = await runtimeHost.pullModel(configuration, { + trace: imgTrace, + cancellationToken, + }); + if (!ok) throw new Error(`failed to pull model '${conn}'`); + checkCancelled(cancellationToken); + const { imageGenerator } = await resolveLanguageModel(configuration.provider); + if (!imageGenerator) throw new Error("image generator not found for " + info.model); + imgTrace.itemValue(`model`, configuration.model); + const req = deleteUndefinedValues({ + model: configuration.model, + prompt: dedent(prompt), + size, + quality, + style, + outputFormat, + }) satisfies CreateImageRequest; + const m = measure("img.generate", `${req.model} -> image`); + const res = await imageGenerator(req, configuration, { + trace: imgTrace, + cancellationToken, + ...rest, + }); + const duration = m(); + if (res.error) { + imgTrace.error(errorMessage(res.error)); + return undefined; + } + dbg(`usage: %o`, res.usage); + stats.addImageGenerationUsage(res.usage, duration); - const h = await hash(res.image, { length: 20 }) - const buf = await imageTransform(res.image, { - ...(imageOptions || {}), - mime: - mime ?? - (outputFormat === "jpeg" || outputFormat === "webp" - ? `image/jpeg` - : outputFormat === "png" - ? `image/png` - : undefined), - cancellationToken, - trace: imgTrace, - }) - const { ext } = (await fileTypeFromBuffer(buf)) || {} - const filename = dotGenaiscriptPath("image", h + "." + ext) - await host.writeFile(filename, buf) + const h = await hash(res.image, { length: 20 }); + const buf = await imageTransform(res.image, { + ...(imageOptions || {}), + mime: + mime ?? + (outputFormat === "jpeg" || outputFormat === "webp" + ? `image/jpeg` + : outputFormat === "png" + ? `image/png` + : undefined), + cancellationToken, + trace: imgTrace, + }); + const { ext } = (await fileTypeFromBuffer(buf)) || {}; + const filename = dotGenaiscriptPath("image", h + "." + ext); + await host.writeFile(filename, buf); - if (consoleColors) { - const size = terminalSize() - stderr.write( - await renderImageToTerminal(buf, { - ...size, - label: filename, - usage: res.usage, - modelId: info.model, - }) - ) - } else logVerbose(`image: ${filename}`) + if (consoleColors) { + const size = terminalSize(); + stderr.write( + await renderImageToTerminal(buf, { + ...size, + label: filename, + usage: res.usage, + modelId: info.model, + }), + ); + } else logVerbose(`image: ${filename}`); - imgTrace.image(filename, `generated image`) - imgTrace.detailsFenced(`🔀 revised prompt`, res.revisedPrompt) - return { - image: { - filename, - encoding: "base64", - content: toBase64(res.image), - } satisfies WorkspaceFile, - revisedPrompt: res.revisedPrompt, - } - } finally { - imgTrace.endDetails() - } + imgTrace.image(filename, `generated image`); + imgTrace.detailsFenced(`🔀 revised prompt`, res.revisedPrompt); + return { + image: { + filename, + encoding: "base64", + content: toBase64(res.image), + } satisfies WorkspaceFile, + revisedPrompt: res.revisedPrompt, + }; + } finally { + imgTrace.endDetails(); } + }; - const ctx: RunPromptContextNode = Object.freeze({ - ...turnCtx, - defAgent, - defTool, - defSchema, - defChatParticipant, - defFileOutput, - defOutputProcessor, - defFileMerge, - prompt, - runPrompt, - transcribe, - speak, - generateImage, - env, - }) + const ctx: RunPromptContextNode = Object.freeze({ + ...turnCtx, + defAgent, + defTool, + defSchema, + defChatParticipant, + defFileOutput, + defOutputProcessor, + defFileMerge, + prompt, + runPrompt, + transcribe, + speak, + generateImage, + env, + }); - return ctx + return ctx; } diff --git a/packages/core/src/sanitize.ts b/packages/core/src/sanitize.ts index 62bcccc30e..4ddf66584b 100644 --- a/packages/core/src/sanitize.ts +++ b/packages/core/src/sanitize.ts @@ -1,3 +1,3 @@ -import _sanitize from "sanitize-filename" +import _sanitize from "sanitize-filename"; -export const sanitizeFilename = _sanitize +export const sanitizeFilename = _sanitize; diff --git a/packages/core/src/schema.test.ts b/packages/core/src/schema.test.ts index f92c74cf11..11893b0c88 100644 --- a/packages/core/src/schema.test.ts +++ b/packages/core/src/schema.test.ts @@ -1,461 +1,447 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; import { - JSONSchemaInfer, - JSONSchemaStringify, - JSONSchemaStringifyToTypeScript, - JSONSchemaToFunctionParameters, - toStrictJSONSchema, - tryValidateJSONWithSchema, - validateJSONWithSchema, - validateSchema, -} from "./schema" -import { MarkdownTrace } from "./trace" + JSONSchemaInfer, + JSONSchemaStringify, + JSONSchemaStringifyToTypeScript, + JSONSchemaToFunctionParameters, + toStrictJSONSchema, + tryValidateJSONWithSchema, + validateJSONWithSchema, + validateSchema, +} from "./schema"; +import { MarkdownTrace } from "./trace"; describe("schema", () => { - test("cities", () => { - const source: JSONSchema = { - type: "array", - description: - "A list of cities with population and elevation information.", - items: { - type: "object", - description: - "A city with population and elevation information.", - properties: { - name: { - type: "string", - description: "The name of the city.", - }, - population: { - type: "number", - description: "The population of the city.", - }, - url: { - type: "string", - description: "The URL of the city's Wikipedia page.", - }, - extra: { - anyOf: [ - { - type: "string", - }, - { - type: "number", - }, - ], - }, - }, - required: ["name", "population", "url"], - }, - } - - const ts = JSONSchemaStringifyToTypeScript(source, { typeName: "Foo" }) - // console.log(ts) - assert.equal( - ts, - "// A list of cities with population and elevation information.\n" + - "type Foo = Array<{\n" + - " // The name of the city.\n" + - " name: string,\n" + - " // The population of the city.\n" + - " population: number,\n" + - " // The URL of the city's Wikipedia page.\n" + - " url: string,\n" + - " extra?: string | number,\n" + - " }>" - ) - }) - test("city", () => { - const source: JSONSchema = { - type: "object", - description: "A city with population and elevation information.", - properties: { - name: { - type: "string", - description: "The name of the city.", - }, - population: { - type: "number", - description: `The population + test("cities", () => { + const source: JSONSchema = { + type: "array", + description: "A list of cities with population and elevation information.", + items: { + type: "object", + description: "A city with population and elevation information.", + properties: { + name: { + type: "string", + description: "The name of the city.", + }, + population: { + type: "number", + description: "The population of the city.", + }, + url: { + type: "string", + description: "The URL of the city's Wikipedia page.", + }, + extra: { + anyOf: [ + { + type: "string", + }, + { + type: "number", + }, + ], + }, + }, + required: ["name", "population", "url"], + }, + }; + + const ts = JSONSchemaStringifyToTypeScript(source, { typeName: "Foo" }); + // console.log(ts) + assert.equal( + ts, + "// A list of cities with population and elevation information.\n" + + "type Foo = Array<{\n" + + " // The name of the city.\n" + + " name: string,\n" + + " // The population of the city.\n" + + " population: number,\n" + + " // The URL of the city's Wikipedia page.\n" + + " url: string,\n" + + " extra?: string | number,\n" + + " }>", + ); + }); + test("city", () => { + const source: JSONSchema = { + type: "object", + description: "A city with population and elevation information.", + properties: { + name: { + type: "string", + description: "The name of the city.", + }, + population: { + type: "number", + description: `The population of the city.`, - }, - url: { - type: "string", - description: "The URL of the city's Wikipedia page.", - }, - }, - required: ["name", "url"], - } - - const ts = JSONSchemaStringifyToTypeScript(source) - // console.log(ts) - assert.equal( - ts, - "// A city with population and elevation information.\n" + - "type Response = {\n" + - " // The name of the city.\n" + - " name: string,\n" + - " /* The population \n" + - " of the city. */\n" + - " population?: number,\n" + - " // The URL of the city's Wikipedia page.\n" + - " url: string,\n" + - "}" - ) - }) - test("strict", () => { - const source: JSONSchema = { - type: "object", - description: "A city with population and elevation information.", - properties: { - name: { - type: "string", - description: "The name of the city.", - }, - population: { - type: "number", - description: `The population + }, + url: { + type: "string", + description: "The URL of the city's Wikipedia page.", + }, + }, + required: ["name", "url"], + }; + + const ts = JSONSchemaStringifyToTypeScript(source); + // console.log(ts) + assert.equal( + ts, + "// A city with population and elevation information.\n" + + "type Response = {\n" + + " // The name of the city.\n" + + " name: string,\n" + + " /* The population \n" + + " of the city. */\n" + + " population?: number,\n" + + " // The URL of the city's Wikipedia page.\n" + + " url: string,\n" + + "}", + ); + }); + test("strict", () => { + const source: JSONSchema = { + type: "object", + description: "A city with population and elevation information.", + properties: { + name: { + type: "string", + description: "The name of the city.", + }, + population: { + type: "number", + description: `The population of the city.`, - }, - url: { - type: "string", - description: "The URL of the city's Wikipedia page.", - }, - }, - required: ["url"], - } - - const res = toStrictJSONSchema(source) - assert.deepStrictEqual(res.required, ["url", "name", "population"]) - assert.deepStrictEqual(res.properties["url"].type, "string") - assert.deepStrictEqual(res.properties["name"].type, ["string", "null"]) - assert.strictEqual(res.additionalProperties, false) - }) - - test("validateSchema", async () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - } - - const result = await validateSchema(schema) - assert.strictEqual(result, true) - }) - - test("validateJSONWithSchema - valid object", () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - } - - const object = { name: "John", age: 30 } - const result = validateJSONWithSchema(object, schema) - assert.strictEqual(result.pathValid, true) - assert.strictEqual(result.schemaError, undefined) - }) - - test("validateJSONWithSchema - invalid object", () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - } - - const object = { age: 30 } - const result = validateJSONWithSchema(object, schema) - assert.strictEqual(result.pathValid, false) - assert.ok(result.schemaError) - }) - - test("JSONSchemaStringify", () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - } - - const result = JSONSchemaStringify(schema) - assert.strictEqual( - result, - JSON.stringify( - { - $schema: "http://json-schema.org/draft-07/schema#", - ...schema, - }, - null, - 2 - ) - ) - }) - - test("toStrictJSONSchema", () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - } - - const result = toStrictJSONSchema(schema) - assert.deepStrictEqual(result.required, ["name", "age"]) - assert.deepStrictEqual(result.properties["name"].type, "string") - assert.deepStrictEqual(result.properties["age"].type, [ - "number", - "null", - ]) - assert.strictEqual(result.additionalProperties, false) - }) - - test("infer object", async () => { - const obj = { name: "John", age: 30 } - const schema = await JSONSchemaInfer(obj) - //console.log({ obj, schema }) - assert.strictEqual(schema.type, "object") - assert.deepStrictEqual(schema.properties, { + }, + url: { + type: "string", + description: "The URL of the city's Wikipedia page.", + }, + }, + required: ["url"], + }; + + const res = toStrictJSONSchema(source); + assert.deepStrictEqual(res.required, ["url", "name", "population"]); + assert.deepStrictEqual(res.properties["url"].type, "string"); + assert.deepStrictEqual(res.properties["name"].type, ["string", "null"]); + assert.strictEqual(res.additionalProperties, false); + }); + + test("validateSchema", async () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + }; + + const result = await validateSchema(schema); + assert.strictEqual(result, true); + }); + + test("validateJSONWithSchema - valid object", () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + }; + + const object = { name: "John", age: 30 }; + const result = validateJSONWithSchema(object, schema); + assert.strictEqual(result.pathValid, true); + assert.strictEqual(result.schemaError, undefined); + }); + + test("validateJSONWithSchema - invalid object", () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + }; + + const object = { age: 30 }; + const result = validateJSONWithSchema(object, schema); + assert.strictEqual(result.pathValid, false); + assert.ok(result.schemaError); + }); + + test("JSONSchemaStringify", () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + }; + + const result = JSONSchemaStringify(schema); + assert.strictEqual( + result, + JSON.stringify( + { + $schema: "http://json-schema.org/draft-07/schema#", + ...schema, + }, + null, + 2, + ), + ); + }); + + test("toStrictJSONSchema", () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + }; + + const result = toStrictJSONSchema(schema); + assert.deepStrictEqual(result.required, ["name", "age"]); + assert.deepStrictEqual(result.properties["name"].type, "string"); + assert.deepStrictEqual(result.properties["age"].type, ["number", "null"]); + assert.strictEqual(result.additionalProperties, false); + }); + + test("infer object", async () => { + const obj = { name: "John", age: 30 }; + const schema = await JSONSchemaInfer(obj); + //console.log({ obj, schema }) + assert.strictEqual(schema.type, "object"); + assert.deepStrictEqual(schema.properties, { + name: { type: "string" }, + age: { type: "integer" }, + }); + }); + + test("infer array", async () => { + const obj = { links: [""] }; + const schema = await JSONSchemaInfer(obj); + //console.log({ obj, schema }) + assert.strictEqual(schema.type, "object"); + assert.deepStrictEqual(schema.properties, { + links: { type: "array", items: { type: "string" } }, + }); + }); + test("validateJSONWithSchema - missing required field", () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name", "age"], + }; + + const object = { name: "John" }; + const result = validateJSONWithSchema(object, schema); + assert.strictEqual(result.pathValid, false); + assert.ok(result.schemaError); + }); + + test("validateJSONWithSchema - additional properties", () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + additionalProperties: false, + }; + + const object = { name: "John", age: 30, extra: "extra value" }; + const result = validateJSONWithSchema(object, schema); + assert.strictEqual(result.pathValid, false); + assert.ok(result.schemaError); + }); + + test("JSONSchemaStringify - nested objects", () => { + const schema: JSONSchema = { + type: "object", + properties: { + user: { + type: "object", + properties: { name: { type: "string" }, - age: { type: "integer" }, - }) - }) - - test("infer array", async () => { - const obj = { links: [""] } - const schema = await JSONSchemaInfer(obj) - //console.log({ obj, schema }) - assert.strictEqual(schema.type, "object") - assert.deepStrictEqual(schema.properties, { - links: { type: "array", items: { type: "string" } }, - }) - }) - test("validateJSONWithSchema - missing required field", () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name", "age"], - } - - const object = { name: "John" } - const result = validateJSONWithSchema(object, schema) - assert.strictEqual(result.pathValid, false) - assert.ok(result.schemaError) - }) - - test("validateJSONWithSchema - additional properties", () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - additionalProperties: false, - } - - const object = { name: "John", age: 30, extra: "extra value" } - const result = validateJSONWithSchema(object, schema) - assert.strictEqual(result.pathValid, false) - assert.ok(result.schemaError) - }) - - test("JSONSchemaStringify - nested objects", () => { - const schema: JSONSchema = { - type: "object", - properties: { - user: { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - }, - }, - required: ["user"], - } - - const result = JSONSchemaStringify(schema) - assert.strictEqual( - result, - JSON.stringify( - { - $schema: "http://json-schema.org/draft-07/schema#", - ...schema, - }, - null, - 2 - ) - ) - }) - - test("validateSchema - invalid schema", async () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "invalidType" as any }, - }, - required: ["name"], - } - - const result = await validateSchema(schema) - assert.strictEqual(result, false) - }) - test("tryValidateJSONWithSchema - valid object with schema", () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - } - - const object = { name: "John", age: 30 } - const result = tryValidateJSONWithSchema(object, { schema }) - assert.deepStrictEqual(result, object) - }) - - test("tryValidateJSONWithSchema - invalid object with schema", () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - } - - const object = { age: 30 } - const result = tryValidateJSONWithSchema(object, { schema }) - assert.strictEqual(result, undefined) - }) - - test("tryValidateJSONWithSchema - valid object without schema", () => { - const object = { name: "John", age: 30 } - const result = tryValidateJSONWithSchema(object) - assert.deepStrictEqual(result, object) - }) - - test("tryValidateJSONWithSchema - invalid schema with throwOnSchemaError", () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "invalidType" as any }, - }, - required: ["name"], - } - - const object = { name: "John" } - assert.throws(() => { - tryValidateJSONWithSchema(object, { - schema, - throwOnValidationError: true, - }) - }, /schema is invalid/) - }) - - test("tryValidateJSONWithSchema - valid object with trace", () => { - const schema: JSONSchema = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - } - - const object = { name: "John", age: 30 } - const trace = new MarkdownTrace() - const result = tryValidateJSONWithSchema(object, { schema, trace }) - assert.deepStrictEqual(result, object) - }) - test("JSONSchemaToFunctionParameters - primitive types", () => { - assert.strictEqual(JSONSchemaToFunctionParameters("string"), "string") - assert.strictEqual(JSONSchemaToFunctionParameters("number"), "number") - assert.strictEqual(JSONSchemaToFunctionParameters("integer"), "number") - assert.strictEqual(JSONSchemaToFunctionParameters("boolean"), "boolean") - assert.strictEqual(JSONSchemaToFunctionParameters("null"), "null") - }) - - test("JSONSchemaToFunctionParameters - anyOf types", () => { - const schema: JSONSchemaAnyOf = { - anyOf: [{ type: "string" }, { type: "number" }], - } - assert.strictEqual( - JSONSchemaToFunctionParameters(schema), - "string | number" - ) - }) - - test("JSONSchemaToFunctionParameters - array type", () => { - const schema: JSONSchemaArray = { - type: "array", - items: { type: "string" }, - } - assert.strictEqual( - JSONSchemaToFunctionParameters(schema), - "{ string }[]" - ) - }) - - test("JSONSchemaToFunctionParameters - object type", () => { - const schema: JSONSchemaObject = { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - } - assert.strictEqual( - JSONSchemaToFunctionParameters(schema), - "name: string, age?: number" - ) - }) - - test("JSONSchemaToFunctionParameters - nested object", () => { - const schema: JSONSchemaObject = { - type: "object", - properties: { - user: { - type: "object", - properties: { - name: { type: "string" }, - age: { type: "number" }, - }, - required: ["name"], - }, - }, - required: ["user"], - } - assert.strictEqual( - JSONSchemaToFunctionParameters(schema), - "user: { name: string, age?: number }" - ) - }) - - test("JSONSchemaToFunctionParameters - unsupported schema", () => { - const schema: any = { type: "unsupported" } - assert.strictEqual(JSONSchemaToFunctionParameters(schema), "?") - }) -}) + age: { type: "number" }, + }, + required: ["name"], + }, + }, + required: ["user"], + }; + + const result = JSONSchemaStringify(schema); + assert.strictEqual( + result, + JSON.stringify( + { + $schema: "http://json-schema.org/draft-07/schema#", + ...schema, + }, + null, + 2, + ), + ); + }); + + test("validateSchema - invalid schema", async () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "invalidType" as any }, + }, + required: ["name"], + }; + + const result = await validateSchema(schema); + assert.strictEqual(result, false); + }); + test("tryValidateJSONWithSchema - valid object with schema", () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + }; + + const object = { name: "John", age: 30 }; + const result = tryValidateJSONWithSchema(object, { schema }); + assert.deepStrictEqual(result, object); + }); + + test("tryValidateJSONWithSchema - invalid object with schema", () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + }; + + const object = { age: 30 }; + const result = tryValidateJSONWithSchema(object, { schema }); + assert.strictEqual(result, undefined); + }); + + test("tryValidateJSONWithSchema - valid object without schema", () => { + const object = { name: "John", age: 30 }; + const result = tryValidateJSONWithSchema(object); + assert.deepStrictEqual(result, object); + }); + + test("tryValidateJSONWithSchema - invalid schema with throwOnSchemaError", () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "invalidType" as any }, + }, + required: ["name"], + }; + + const object = { name: "John" }; + assert.throws(() => { + tryValidateJSONWithSchema(object, { + schema, + throwOnValidationError: true, + }); + }, /schema is invalid/); + }); + + test("tryValidateJSONWithSchema - valid object with trace", () => { + const schema: JSONSchema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + }; + + const object = { name: "John", age: 30 }; + const trace = new MarkdownTrace(); + const result = tryValidateJSONWithSchema(object, { schema, trace }); + assert.deepStrictEqual(result, object); + }); + test("JSONSchemaToFunctionParameters - primitive types", () => { + assert.strictEqual(JSONSchemaToFunctionParameters("string"), "string"); + assert.strictEqual(JSONSchemaToFunctionParameters("number"), "number"); + assert.strictEqual(JSONSchemaToFunctionParameters("integer"), "number"); + assert.strictEqual(JSONSchemaToFunctionParameters("boolean"), "boolean"); + assert.strictEqual(JSONSchemaToFunctionParameters("null"), "null"); + }); + + test("JSONSchemaToFunctionParameters - anyOf types", () => { + const schema: JSONSchemaAnyOf = { + anyOf: [{ type: "string" }, { type: "number" }], + }; + assert.strictEqual(JSONSchemaToFunctionParameters(schema), "string | number"); + }); + + test("JSONSchemaToFunctionParameters - array type", () => { + const schema: JSONSchemaArray = { + type: "array", + items: { type: "string" }, + }; + assert.strictEqual(JSONSchemaToFunctionParameters(schema), "{ string }[]"); + }); + + test("JSONSchemaToFunctionParameters - object type", () => { + const schema: JSONSchemaObject = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + }; + assert.strictEqual(JSONSchemaToFunctionParameters(schema), "name: string, age?: number"); + }); + + test("JSONSchemaToFunctionParameters - nested object", () => { + const schema: JSONSchemaObject = { + type: "object", + properties: { + user: { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" }, + }, + required: ["name"], + }, + }, + required: ["user"], + }; + assert.strictEqual( + JSONSchemaToFunctionParameters(schema), + "user: { name: string, age?: number }", + ); + }); + + test("JSONSchemaToFunctionParameters - unsupported schema", () => { + const schema: any = { type: "unsupported" }; + assert.strictEqual(JSONSchemaToFunctionParameters(schema), "?"); + }); +}); diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index ddf1a389b8..0c0a2bf6b6 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -1,12 +1,12 @@ // Import necessary modules and functions -import { JSON5parse } from "./json5" -import { MarkdownTrace, TraceOptions } from "./trace" -import Ajv from "ajv" -import { YAMLParse } from "./yaml" -import { errorMessage } from "./error" -import { promptParametersSchemaToJSONSchema } from "./parameters" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("schema") +import { JSON5parse } from "./json5"; +import { MarkdownTrace, TraceOptions } from "./trace"; +import Ajv from "ajv"; +import { YAMLParse } from "./yaml"; +import { errorMessage } from "./error"; +import { promptParametersSchemaToJSONSchema } from "./parameters"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("schema"); /** * Checks if the given object is a valid JSON Schema. @@ -14,9 +14,9 @@ const dbg = genaiscriptDebug("schema") * @returns True if the object is a valid JSON Schema, false otherwise. */ export function isJSONSchema(obj: any) { - if (typeof obj === "object" && obj.type === "object") return true - if (typeof obj === "object" && obj.type === "array") return true - return false + if (typeof obj === "object" && obj.type === "object") return true; + if (typeof obj === "object" && obj.type === "array") return true; + return false; } /** @@ -26,56 +26,51 @@ export function isJSONSchema(obj: any) { * @returns A string representation of function parameters, compatible with the provided schema. */ export function JSONSchemaToFunctionParameters( - schema: JSONSchemaType | JSONSchemaTypeName + schema: JSONSchemaType | JSONSchemaTypeName, ): string { - return renderJSONSchemaToFunctionParameters(schema, 0) + return renderJSONSchemaToFunctionParameters(schema, 0); } function renderJSONSchemaToFunctionParameters( - schema: JSONSchemaType | JSONSchemaTypeName, - depth: number + schema: JSONSchemaType | JSONSchemaTypeName, + depth: number, ): string { - depth = depth + 1 - if (!schema) return "" - else if (schema === "string") return "string" - else if (schema === "number") return "number" - else if (schema === "integer") return "number" - else if (schema === "boolean") return "boolean" - else if (schema === "null") return "null" - else if ((schema as JSONSchemaAnyOf).anyOf) { - const anyof = schema as JSONSchemaAnyOf - delete anyof.uiGroup - return (anyof.anyOf || []) - .map((x) => renderJSONSchemaToFunctionParameters(x, depth)) - .join(" | ") - } else if (Array.isArray(schema)) { - return schema - .filter((t) => t !== "null") - .map((x) => renderJSONSchemaToFunctionParameters(x, depth)) - .join(" | ") - } else { - const single = schema as JSONSchemaSimpleType - if (single.type === "array") { - return `{ ${renderJSONSchemaToFunctionParameters(single.items, depth)} }[]` - } else if (single.type === "object") { - const required = single.required || [] - return `${depth > 1 ? `{ ` : ""}${Object.entries(single.properties) - .sort( - (l, r) => - (required.includes(l[0]) ? -1 : 1) - - (required.includes(r[0]) ? -1 : 1) - ) - .map( - ([name, prop]) => - `${name}${required.includes(name) ? "" : "?"}: ${renderJSONSchemaToFunctionParameters(prop, depth)}` - ) - .join(", ")}${depth > 1 ? " }" : ""}` - } else if (single.type === "string") return "string" - else if (single.type === "boolean") return "boolean" - else if (single.type === "number" || single.type === "integer") - return "number" - } - return "?" + depth = depth + 1; + if (!schema) return ""; + else if (schema === "string") return "string"; + else if (schema === "number") return "number"; + else if (schema === "integer") return "number"; + else if (schema === "boolean") return "boolean"; + else if (schema === "null") return "null"; + else if ((schema as JSONSchemaAnyOf).anyOf) { + const anyof = schema as JSONSchemaAnyOf; + delete anyof.uiGroup; + return (anyof.anyOf || []) + .map((x) => renderJSONSchemaToFunctionParameters(x, depth)) + .join(" | "); + } else if (Array.isArray(schema)) { + return schema + .filter((t) => t !== "null") + .map((x) => renderJSONSchemaToFunctionParameters(x, depth)) + .join(" | "); + } else { + const single = schema as JSONSchemaSimpleType; + if (single.type === "array") { + return `{ ${renderJSONSchemaToFunctionParameters(single.items, depth)} }[]`; + } else if (single.type === "object") { + const required = single.required || []; + return `${depth > 1 ? `{ ` : ""}${Object.entries(single.properties) + .sort((l, r) => (required.includes(l[0]) ? -1 : 1) - (required.includes(r[0]) ? -1 : 1)) + .map( + ([name, prop]) => + `${name}${required.includes(name) ? "" : "?"}: ${renderJSONSchemaToFunctionParameters(prop, depth)}`, + ) + .join(", ")}${depth > 1 ? " }" : ""}`; + } else if (single.type === "string") return "string"; + else if (single.type === "boolean") return "boolean"; + else if (single.type === "number" || single.type === "integer") return "number"; + } + return "?"; } /** @@ -86,125 +81,117 @@ function renderJSONSchemaToFunctionParameters( * @returns The TypeScript type definition as a string, including JSDoc comments for schema descriptions. */ export function JSONSchemaStringifyToTypeScript( - schema: JSONSchema | JSONSchemaType, - options?: { typeName?: string; export?: boolean } + schema: JSONSchema | JSONSchemaType, + options?: { typeName?: string; export?: boolean }, ) { - const { typeName = "Response" } = options || {} - let lines: string[] = [] // Array to accumulate lines of TypeScript code - let indent = 0 // Manage indentation level + const { typeName = "Response" } = options || {}; + let lines: string[] = []; // Array to accumulate lines of TypeScript code + let indent = 0; // Manage indentation level - const described = schema as JSONSchemaDescribed - appendJsDoc(described.title, described.description) // Add JSDoc for schema description - append( - `${options?.export ? "export " : ""}type ${typeName.replace(/\s+/g, "_")} =` - ) - stringifyNode(schema) // Convert schema to TypeScript - const res = lines.join("\n") // Join lines into a single TypeScript definition - dbg(res) - return res + const described = schema as JSONSchemaDescribed; + appendJsDoc(described.title, described.description); // Add JSDoc for schema description + append(`${options?.export ? "export " : ""}type ${typeName.replace(/\s+/g, "_")} =`); + stringifyNode(schema); // Convert schema to TypeScript + const res = lines.join("\n"); // Join lines into a single TypeScript definition + dbg(res); + return res; - // Append a line to the TypeScript definition - function append(line: string) { - if (/=$/.test(lines[lines.length - 1])) - lines[lines.length - 1] = lines[lines.length - 1] + " " + line - else if (/[<}]$/.test(lines[lines.length - 1])) - lines[lines.length - 1] = lines[lines.length - 1] + line - else lines.push(" ".repeat(indent) + line) - } + // Append a line to the TypeScript definition + function append(line: string) { + if (/=$/.test(lines[lines.length - 1])) + lines[lines.length - 1] = lines[lines.length - 1] + " " + line; + else if (/[<}]$/.test(lines[lines.length - 1])) + lines[lines.length - 1] = lines[lines.length - 1] + line; + else lines.push(" ".repeat(indent) + line); + } - // Append JSDoc comments - function appendJsDoc(...parts: string[]) { - const text = parts?.filter((d) => d).join("\n") - if (!text) return - if (text.indexOf("\n") > -1) - append( - `/* ${text.split(/\n/g).join("\n" + " ".repeat(indent))} */` - ) - else append(`// ${text}`) - } + // Append JSDoc comments + function appendJsDoc(...parts: string[]) { + const text = parts?.filter((d) => d).join("\n"); + if (!text) return; + if (text.indexOf("\n") > -1) + append(`/* ${text.split(/\n/g).join("\n" + " ".repeat(indent))} */`); + else append(`// ${text}`); + } - // Convert a JSON Schema node to TypeScript - function stringifyNode(node: JSONSchemaType): string { - if (node === undefined) return "any" - else if ((node as JSONSchemaAnyOf).anyOf) { - const n = node as JSONSchemaAnyOf - return n.anyOf - .map((x) => { - const v = stringifyNode(x) - return /\s/.test(v) ? `(${v})` : v - }) - .filter((x) => x) - .join(" | ") - } else { - const n = node as JSONSchemaSimpleType - if (n.type === "array") { - stringifyArray(n) - return undefined - } else if (n.type === "object") { - stringifyObject(n) - return undefined - } else if (n.type === "string") return "string" - else if (n.type === "boolean") return "boolean" - else if (n.type === "number" || n.type === "integer") - return "number" - } - return "unknown" + // Convert a JSON Schema node to TypeScript + function stringifyNode(node: JSONSchemaType): string { + if (node === undefined) return "any"; + else if ((node as JSONSchemaAnyOf).anyOf) { + const n = node as JSONSchemaAnyOf; + return n.anyOf + .map((x) => { + const v = stringifyNode(x); + return /\s/.test(v) ? `(${v})` : v; + }) + .filter((x) => x) + .join(" | "); + } else { + const n = node as JSONSchemaSimpleType; + if (n.type === "array") { + stringifyArray(n); + return undefined; + } else if (n.type === "object") { + stringifyObject(n); + return undefined; + } else if (n.type === "string") return "string"; + else if (n.type === "boolean") return "boolean"; + else if (n.type === "number" || n.type === "integer") return "number"; } + return "unknown"; + } - // Extract documentation for a node - function stringifyNodeDoc(node: JSONSchemaType): string { - const n = node as JSONSchemaSimpleType - const doc = [n?.title, n?.description] - switch (n.type) { - case "number": - case "integer": { - if (n.minimum !== undefined) doc.push(`minimum: ${n.minimum}`) - if (n.exclusiveMinimum !== undefined) - doc.push(`exclusiveMinimum: ${n.exclusiveMinimum}`) - if (n.exclusiveMaximum !== undefined) - doc.push(`exclusiveMaximum : ${n.exclusiveMaximum}`) - if (n.maximum !== undefined) doc.push(`maximum: ${n.maximum}`) - break - } - case "string": { - if (n.pattern) doc.push(`pattern: ${n.pattern}`) - break - } - } - return doc.filter((d) => d).join("\n") + // Extract documentation for a node + function stringifyNodeDoc(node: JSONSchemaType): string { + const n = node as JSONSchemaSimpleType; + const doc = [n?.title, n?.description]; + switch (n.type) { + case "number": + case "integer": { + if (n.minimum !== undefined) doc.push(`minimum: ${n.minimum}`); + if (n.exclusiveMinimum !== undefined) doc.push(`exclusiveMinimum: ${n.exclusiveMinimum}`); + if (n.exclusiveMaximum !== undefined) doc.push(`exclusiveMaximum : ${n.exclusiveMaximum}`); + if (n.maximum !== undefined) doc.push(`maximum: ${n.maximum}`); + break; + } + case "string": { + if (n.pattern) doc.push(`pattern: ${n.pattern}`); + break; + } } + return doc.filter((d) => d).join("\n"); + } - // Convert a JSON Schema object to TypeScript - function stringifyObject(object: JSONSchemaObject): void { - const { required, properties, additionalProperties } = object - append(`{`) - indent++ - if (additionalProperties) append(`[key: string]: any,`) - if (properties) - Object.keys(properties).forEach((key) => { - const prop = properties[key] - const field = `${key}${required?.includes(key) ? "" : "?"}` - const doc = stringifyNodeDoc(prop) - appendJsDoc(doc) - append(`${field}:`) - const v = stringifyNode(prop) - if (v) - lines[lines.length - 1] = lines[lines.length - 1] + " " + v - lines[lines.length - 1] = lines[lines.length - 1] + "," - }) - indent-- - append(`}`) - } + // Convert a JSON Schema object to TypeScript + function stringifyObject(object: JSONSchemaObject): void { + const { required, properties, additionalProperties } = object; + append(`{`); + indent++; + if (additionalProperties) append(`[key: string]: any,`); + if (properties) + Object.keys(properties).forEach((key) => { + const prop = properties[key]; + const field = `${key}${required?.includes(key) ? "" : "?"}`; + const doc = stringifyNodeDoc(prop); + appendJsDoc(doc); + append(`${field}:`); + const v = stringifyNode(prop); + if (v) lines[lines.length - 1] = lines[lines.length - 1] + " " + v; + lines[lines.length - 1] = lines[lines.length - 1] + ","; + }); + indent--; + append(`}`); + } - // Convert a JSON Schema array to TypeScript - function stringifyArray(array: JSONSchemaArray): void { - indent++ - append(`Array<`) - const v = stringifyNode(array.items) - indent-- - if (v) lines[lines.length - 1] = lines[lines.length - 1] + v + ">" - else append(`>`) - } + // Convert a JSON Schema array to TypeScript + function stringifyArray(array: JSONSchemaArray): void { + indent++; + append(`Array<`); + const v = stringifyNode(array.items); + indent--; + if (v) lines[lines.length - 1] = lines[lines.length - 1] + v + ">"; + else append(`>`); + } } /** @@ -213,24 +200,24 @@ export function JSONSchemaStringifyToTypeScript( * @returns A Promise resolving with the validation result, indicating whether the schema is valid or not. */ export async function validateSchema(schema: JSONSchema) { - const ajv = new Ajv() - return await ajv.validateSchema(schema, false) + const ajv = new Ajv(); + return await ajv.validateSchema(schema, false); } export function tryValidateJSONWithSchema( - object: T, - options?: JSONSchemaValidationOptions & TraceOptions + object: T, + options?: JSONSchemaValidationOptions & TraceOptions, ) { - const { schema, throwOnValidationError, trace } = options || {} - if (object !== undefined && schema) { - const validation = validateJSONWithSchema(object, schema, { trace }) - if (validation.schemaError) { - dbg("%O", validation) - if (throwOnValidationError) throw new Error(validation.schemaError) - return undefined - } + const { schema, throwOnValidationError, trace } = options || {}; + if (object !== undefined && schema) { + const validation = validateJSONWithSchema(object, schema, { trace }); + if (validation.schemaError) { + dbg("%O", validation); + if (throwOnValidationError) throw new Error(validation.schemaError); + return undefined; } - return object + } + return object; } /** @@ -241,40 +228,40 @@ export function tryValidateJSONWithSchema( * @returns Validation result indicating success status and error details if validation fails. */ export function validateJSONWithSchema( - object: any, - schema: JSONSchema, - options?: { trace: MarkdownTrace } + object: any, + schema: JSONSchema, + options?: { trace: MarkdownTrace }, ): FileEditValidation { - const { trace } = options || {} - if (!schema) - return { - pathValid: false, - schemaError: "no schema provided", - } + const { trace } = options || {}; + if (!schema) + return { + pathValid: false, + schemaError: "no schema provided", + }; - try { - const ajv = new Ajv({ - allowUnionTypes: true, - }) - const validate = ajv.compile(schema) - const valid = validate(object) - if (!valid) { - dbg(`validation failed: ${ajv.errorsText(validate.errors)}`) - trace?.warn(`schema validation failed`) - trace?.fence(validate.errors) - trace?.fence(schema, "json") - return { - schema, - pathValid: false, - schemaError: ajv.errorsText(validate.errors), - } - } - return { schema, pathValid: true } - } catch (e) { - dbg(`runtime error: ${errorMessage(e)}`) - trace?.warn("schema validation failed") - return { schema, pathValid: false, schemaError: errorMessage(e) } + try { + const ajv = new Ajv({ + allowUnionTypes: true, + }); + const validate = ajv.compile(schema); + const valid = validate(object); + if (!valid) { + dbg(`validation failed: ${ajv.errorsText(validate.errors)}`); + trace?.warn(`schema validation failed`); + trace?.fence(validate.errors); + trace?.fence(schema, "json"); + return { + schema, + pathValid: false, + schemaError: ajv.errorsText(validate.errors), + }; } + return { schema, pathValid: true }; + } catch (e) { + dbg(`runtime error: ${errorMessage(e)}`); + trace?.warn("schema validation failed"); + return { schema, pathValid: false, schemaError: errorMessage(e) }; + } } /** @@ -286,54 +273,48 @@ export function validateJSONWithSchema( * @returns Array of data frames containing validation results, parsed data, and associated schemas. */ export function validateFencesWithSchema( - fences: Fenced[], - schemas: Record, - options?: { trace: MarkdownTrace } + fences: Fenced[], + schemas: Record, + options?: { trace: MarkdownTrace }, ): DataFrame[] { - const frames: DataFrame[] = [] - // Validate schemas in fences - for (const fence of fences?.filter( - ({ language, args }) => - args?.schema && (language === "json" || language === "yaml") - )) { - const { language, content: val, args } = fence - const schema = args?.schema - - // Validate well-formed JSON/YAML - let data: any - try { - if (language === "json") data = JSON5parse(val, { repair: true }) - else if (language === "yaml") data = YAMLParse(val) - } catch (e) { - fence.validation = { - pathValid: false, - schemaError: errorMessage(e), - } - } - if (!fence.validation) { - // Check if schema specified - const schemaObj = schemas[schema] - if (!schemaObj) { - fence.validation = { - pathValid: false, - schemaError: `schema ${schema} not found`, - } - } else - fence.validation = validateJSONWithSchema( - data, - schemaObj, - options - ) - } + const frames: DataFrame[] = []; + // Validate schemas in fences + for (const fence of fences?.filter( + ({ language, args }) => args?.schema && (language === "json" || language === "yaml"), + )) { + const { language, content: val, args } = fence; + const schema = args?.schema; - // Add to frames - frames.push({ - schema, - data, - validation: fence.validation, - }) + // Validate well-formed JSON/YAML + let data: any; + try { + if (language === "json") data = JSON5parse(val, { repair: true }); + else if (language === "yaml") data = YAMLParse(val); + } catch (e) { + fence.validation = { + pathValid: false, + schemaError: errorMessage(e), + }; } - return frames + if (!fence.validation) { + // Check if schema specified + const schemaObj = schemas[schema]; + if (!schemaObj) { + fence.validation = { + pathValid: false, + schemaError: `schema ${schema} not found`, + }; + } else fence.validation = validateJSONWithSchema(data, schemaObj, options); + } + + // Add to frames + frames.push({ + schema, + data, + validation: fence.validation, + }); + } + return frames; } /** @@ -342,15 +323,14 @@ export function validateFencesWithSchema( * @returns The formatted JSON string representation of the schema. */ export function JSONSchemaStringify(schema: JSONSchema) { - return JSON.stringify( - { - $schema: - schema.$schema ?? "http://json-schema.org/draft-07/schema#", - ...schema, - }, - null, - 2 - ) + return JSON.stringify( + { + $schema: schema.$schema ?? "http://json-schema.org/draft-07/schema#", + ...schema, + }, + null, + 2, + ); } /** @@ -363,64 +343,59 @@ export function JSONSchemaStringify(schema: JSONSchema) { * @returns A strict JSON Schema with enforced constraints. */ export function toStrictJSONSchema( - schema: PromptParametersSchema | JSONSchema, - options?: { - noDefaults?: boolean - defaultOptional?: boolean - } + schema: PromptParametersSchema | JSONSchema, + options?: { + noDefaults?: boolean; + defaultOptional?: boolean; + }, ): any { - const { noDefaults, defaultOptional } = options || {} - const clone: JSONSchema = structuredClone( - promptParametersSchemaToJSONSchema(schema, { noDefaults }) - ) - visit(clone) + const { noDefaults, defaultOptional } = options || {}; + const clone: JSONSchema = structuredClone( + promptParametersSchemaToJSONSchema(schema, { noDefaults }), + ); + visit(clone); - //if (clone.type !== "object") - // throw new Error("top level schema must be object") + //if (clone.type !== "object") + // throw new Error("top level schema must be object") - // Recursive function to make the schema strict - function visit(node: JSONSchemaType): void { - const n = node as JSONSchemaSimpleType - delete n.uiGroup - switch (n.type) { - case "boolean": { - delete n.uiType - break - } - case "string": { - delete n.uiType - delete n.uiSuggestions - break - } - case "object": { - if (n.additionalProperties) - throw new Error("additionalProperties: true not supported") - n.additionalProperties = false - n.required = n.required || [] - for (const key in n.properties) { - // https://platform.openai.com/docs/guides/structured-outputs/all-fields-must-be-required - const child = n.properties[key] as JSONSchemaSimpleType - visit(child) - if (!defaultOptional && !n.required.includes(key)) { - n.required.push(key) - if ( - ["string", "number", "boolean", "integer"].includes( - child.type - ) - ) { - child.type = [child.type, "null"] as any - } - } - } - break - } - case "array": { - visit(n.items) - break + // Recursive function to make the schema strict + function visit(node: JSONSchemaType): void { + const n = node as JSONSchemaSimpleType; + delete n.uiGroup; + switch (n.type) { + case "boolean": { + delete n.uiType; + break; + } + case "string": { + delete n.uiType; + delete n.uiSuggestions; + break; + } + case "object": { + if (n.additionalProperties) throw new Error("additionalProperties: true not supported"); + n.additionalProperties = false; + n.required = n.required || []; + for (const key in n.properties) { + // https://platform.openai.com/docs/guides/structured-outputs/all-fields-must-be-required + const child = n.properties[key] as JSONSchemaSimpleType; + visit(child); + if (!defaultOptional && !n.required.includes(key)) { + n.required.push(key); + if (["string", "number", "boolean", "integer"].includes(child.type)) { + child.type = [child.type, "null"] as any; } + } } + break; + } + case "array": { + visit(n.items); + break; + } } - return clone + } + return clone; } /** @@ -431,6 +406,6 @@ export function toStrictJSONSchema( * @returns A Promise resolving to the inferred JSON Schema. */ export async function JSONSchemaInfer(obj: any): Promise { - const res = promptParametersSchemaToJSONSchema(obj, { noDefaults: true }) - return res + const res = promptParametersSchemaToJSONSchema(obj, { noDefaults: true }); + return res; } diff --git a/packages/core/src/scriptresolver.ts b/packages/core/src/scriptresolver.ts index 87574a428b..3a694ceea1 100644 --- a/packages/core/src/scriptresolver.ts +++ b/packages/core/src/scriptresolver.ts @@ -1,13 +1,13 @@ -import { RESOURCE_HASH_LENGTH } from "./constants" -import { runtimeHost } from "./host" -import { dotGenaiscriptPath } from "./workdir" -import { join } from "node:path" -import { CancellationOptions } from "./cancellation" -import { tryResolveResource } from "./resources" -import { TraceOptions } from "./trace" -import { genaiscriptDebug } from "./debug" -import { hash } from "./crypto" -const dbg = genaiscriptDebug("scripts:resolve") +import { RESOURCE_HASH_LENGTH } from "./constants"; +import { runtimeHost } from "./host"; +import { dotGenaiscriptPath } from "./workdir"; +import { join } from "node:path"; +import { CancellationOptions } from "./cancellation"; +import { tryResolveResource } from "./resources"; +import { TraceOptions } from "./trace"; +import { genaiscriptDebug } from "./debug"; +import { hash } from "./crypto"; +const dbg = genaiscriptDebug("scripts:resolve"); /** * Attempts to resolve a script from the provided URL and manages caching. @@ -23,34 +23,29 @@ const dbg = genaiscriptDebug("scripts:resolve") * If no cached content is found, it returns the filename of the first file in the resource. */ export async function tryResolveScript( - url: string, - options?: TraceOptions & CancellationOptions + url: string, + options?: TraceOptions & CancellationOptions, ): Promise { - const resource = await tryResolveResource(url, options) - if (!resource) return undefined + const resource = await tryResolveResource(url, options); + if (!resource) return undefined; - const { uri, files } = resource - dbg(`resolved resource %s %d`, uri, files?.length) - if (!files?.length) return undefined + const { uri, files } = resource; + dbg(`resolved resource %s %d`, uri, files?.length); + if (!files?.length) return undefined; - const cache = files.some((f) => f.content) - if (!cache) return files[0].filename - else { - const sha = await hash([files], { - length: RESOURCE_HASH_LENGTH, - }) - const fn = dotGenaiscriptPath( - "resources", - uri.protocol, - uri.hostname, - sha - ) - dbg(`resolved cache: %s`, fn) - const cached = files.map((f) => ({ - ...f, - filename: join(fn, f.filename), - })) - await runtimeHost.workspace.writeFiles(cached) - return cached[0].filename - } + const cache = files.some((f) => f.content); + if (!cache) return files[0].filename; + else { + const sha = await hash([files], { + length: RESOURCE_HASH_LENGTH, + }); + const fn = dotGenaiscriptPath("resources", uri.protocol, uri.hostname, sha); + dbg(`resolved cache: %s`, fn); + const cached = files.map((f) => ({ + ...f, + filename: join(fn, f.filename), + })); + await runtimeHost.workspace.writeFiles(cached); + return cached[0].filename; + } } diff --git a/packages/core/src/scripts.ts b/packages/core/src/scripts.ts index 566113c06a..184aebe6d6 100644 --- a/packages/core/src/scripts.ts +++ b/packages/core/src/scripts.ts @@ -1,22 +1,15 @@ -import { collectFolders } from "./ast" -import { - DOCS_URL, - NEW_SCRIPT_TEMPLATE, - TYPE_DEFINITION_BASENAME, -} from "./constants" -import { - githubCopilotInstructions as ghInstructions, - promptDefinitions, -} from "./default_prompts" -import { tryReadText, writeText } from "./fs" -import { host } from "./host" -import { logVerbose } from "./util" -import { Project } from "./server/messages" -import { collapseNewlines } from "./cleaners" -import { gitIgnoreEnsure } from "./gitignore" -import { dotGenaiscriptPath } from "./workdir" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("scripts") +import { collectFolders } from "./ast"; +import { DOCS_URL, NEW_SCRIPT_TEMPLATE, TYPE_DEFINITION_BASENAME } from "./constants"; +import { githubCopilotInstructions as ghInstructions, promptDefinitions } from "./default_prompts"; +import { tryReadText, writeText } from "./fs"; +import { host } from "./host"; +import { logVerbose } from "./util"; +import { Project } from "./server/messages"; +import { collapseNewlines } from "./cleaners"; +import { gitIgnoreEnsure } from "./gitignore"; +import { dotGenaiscriptPath } from "./workdir"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("scripts"); /** * Creates a new script object based on the provided name and optional template. @@ -27,21 +20,18 @@ const dbg = genaiscriptDebug("scripts") * @param options.title - A custom title for the script. Defaults to the provided name. * @returns A new script object with the specified or default attributes. */ -export function createScript( - name: string, - options?: { template: PromptScript; title?: string } -) { - const { template, title } = options || {} - const t = structuredClone( - template || { - id: "", - title: title || name, - text: "New script empty template", - jsSource: NEW_SCRIPT_TEMPLATE, - } - ) - t.id = "" - return t +export function createScript(name: string, options?: { template: PromptScript; title?: string }) { + const { template, title } = options || {}; + const t = structuredClone( + template || { + id: "", + title: title || name, + text: "New script empty template", + jsSource: NEW_SCRIPT_TEMPLATE, + }, + ); + t.id = ""; + return t; } /** @@ -56,77 +46,70 @@ export function createScript( * - `project.scripts`: An array of scripts from the project, where system scripts determine tool usage. * - `project.folders`: A set of folder data collected with relevant directory and file details. */ -export async function fixPromptDefinitions( - project: Project, - options?: { force?: boolean } -) { - const folders = collectFolders(project, options) - const systems = project.scripts.filter((t) => t.isSystem) - const tools = systems.map(({ defTools }) => defTools || []).flat() +export async function fixPromptDefinitions(project: Project, options?: { force?: boolean }) { + const folders = collectFolders(project, options); + const systems = project.scripts.filter((t) => t.isSystem); + const tools = systems.map(({ defTools }) => defTools || []).flat(); - logVerbose(`fixing type definitions`) - for (const folder of folders) { - const { dirname, ts, js } = folder - logVerbose(` ${dirname}`) - await gitIgnoreEnsure(dirname, [ - "genaiscript.d.ts", - "tsconfig.json", - "jsconfig.json", - ]) - for (let [defName, defContent] of Object.entries(promptDefinitions)) { - // patch genaiscript - if (defName === "genaiscript.d.ts") { - // update the system prompt identifiers - defContent = defContent - .replace( - "type SystemPromptId = OptionsOrString", - `type SystemPromptId = OptionsOrString<\n | ${systems - .sort((a, b) => a.id.localeCompare(b.id)) - .map((s) => JSON.stringify(s.id)) - .join("\n | ")}\n>` - ) - .replace( - " system?: SystemPromptId[]", - ` /** + logVerbose(`fixing type definitions`); + for (const folder of folders) { + const { dirname, ts, js } = folder; + logVerbose(` ${dirname}`); + await gitIgnoreEnsure(dirname, ["genaiscript.d.ts", "tsconfig.json", "jsconfig.json"]); + for (let [defName, defContent] of Object.entries(promptDefinitions)) { + // patch genaiscript + if (defName === "genaiscript.d.ts") { + // update the system prompt identifiers + defContent = defContent + .replace( + "type SystemPromptId = OptionsOrString", + `type SystemPromptId = OptionsOrString<\n | ${systems + .sort((a, b) => a.id.localeCompare(b.id)) + .map((s) => JSON.stringify(s.id)) + .join("\n | ")}\n>`, + ) + .replace( + " system?: SystemPromptId[]", + ` /** * System prompt identifiers ([reference](https://microsoft.github.io/genaiscript/reference/scripts/system/)) ${systems.map((s) => ` * - \`${s.id}\`: ${s.title || s.description}`).join("\n")} **/ - system?: SystemPromptId[]` - ) + system?: SystemPromptId[]`, + ); - // update the tool prompt identifiers - defContent = defContent - .replace( - "type SystemToolId = OptionsOrString", - `type SystemToolId = OptionsOrString<\n | ${tools - .sort((a, b) => a.id.localeCompare(b.id)) - .map((s) => JSON.stringify(s.id)) - .join("\n | ")}\n>` - ) - .replace( - " tools?: SystemToolId[]", - `/** + // update the tool prompt identifiers + defContent = defContent + .replace( + "type SystemToolId = OptionsOrString", + `type SystemToolId = OptionsOrString<\n | ${tools + .sort((a, b) => a.id.localeCompare(b.id)) + .map((s) => JSON.stringify(s.id)) + .join("\n | ")}\n>`, + ) + .replace( + " tools?: SystemToolId[]", + `/** * System tool identifiers ([reference](https://microsoft.github.io/genaiscript/reference/scripts/tools/)) ${tools.map((s) => `* - \`${s.id}\`: ${s.description}`).join("\n")} **/ - tools?: SystemToolId[]` - ) - } + tools?: SystemToolId[]`, + ); + } - if (defName === "tsconfig.json" && !ts) continue - if (defName === "jsconfig.json" && !js) continue + if (defName === "tsconfig.json" && !ts) continue; + if (defName === "jsconfig.json" && !js) continue; - const fn = host.path.join(dirname, defName) - const current = await tryReadText(fn) - if (current !== defContent) { - logVerbose(`updating ${fn}`) - await writeText(fn, defContent) - } - } + const fn = host.path.join(dirname, defName); + const current = await tryReadText(fn); + if (current !== defContent) { + logVerbose(`updating ${fn}`); + await writeText(fn, defContent); + } } + } } -let _fullDocsText: string +let _fullDocsText: string; /** * Updates custom prompts and related files with new definitions and data. * @@ -141,39 +124,36 @@ let _fullDocsText: string * Fetches and processes external documentation content if required. */ export async function fixGitHubCopilotInstructions(options?: { - githubCopilotInstructions?: boolean - docs?: boolean + githubCopilotInstructions?: boolean; + docs?: boolean; }) { - const { githubCopilotInstructions, docs } = options || {} - // write genaiscript.d.ts - const gdir = dotGenaiscriptPath() - await writeText(host.path.join(gdir, ".gitignore"), "*") - await writeText( - host.path.join(gdir, TYPE_DEFINITION_BASENAME), - promptDefinitions[TYPE_DEFINITION_BASENAME] - ) // Write the TypeScript definition file - if (githubCopilotInstructions) { - const pdir = dotGenaiscriptPath("instructions") - const pn = host.path.join(pdir, "genaiscript.instructions.md") - await writeText(pn, ghInstructions) // Write the GitHub Copilot instructions file - } - if (githubCopilotInstructions || docs) { - const ddir = dotGenaiscriptPath("instructions") - const route = "llms-full.txt" - const url = `${DOCS_URL}/${route}` - const dn = host.path.join(ddir, route) - let text = _fullDocsText - if (!text) { - const content = await fetch(url) - if (!content.ok) logVerbose(`failed to fetch ${url}`) - text = await content.text() - text = _fullDocsText = collapseNewlines( - text.replace( - /^\!\[\]\(" - ) - ) - } - await writeText(dn, text) // Write the GitHub Copilot prompt file + const { githubCopilotInstructions, docs } = options || {}; + // write genaiscript.d.ts + const gdir = dotGenaiscriptPath(); + await writeText(host.path.join(gdir, ".gitignore"), "*"); + await writeText( + host.path.join(gdir, TYPE_DEFINITION_BASENAME), + promptDefinitions[TYPE_DEFINITION_BASENAME], + ); // Write the TypeScript definition file + if (githubCopilotInstructions) { + const pdir = dotGenaiscriptPath("instructions"); + const pn = host.path.join(pdir, "genaiscript.instructions.md"); + await writeText(pn, ghInstructions); // Write the GitHub Copilot instructions file + } + if (githubCopilotInstructions || docs) { + const ddir = dotGenaiscriptPath("instructions"); + const route = "llms-full.txt"; + const url = `${DOCS_URL}/${route}`; + const dn = host.path.join(ddir, route); + let text = _fullDocsText; + if (!text) { + const content = await fetch(url); + if (!content.ok) logVerbose(`failed to fetch ${url}`); + text = await content.text(); + text = _fullDocsText = collapseNewlines( + text.replace(/^\!\[\]\("), + ); } + await writeText(dn, text); // Write the GitHub Copilot prompt file + } } diff --git a/packages/core/src/secretscanner.ts b/packages/core/src/secretscanner.ts index b30d0830b6..42be6ddd0f 100644 --- a/packages/core/src/secretscanner.ts +++ b/packages/core/src/secretscanner.ts @@ -1,10 +1,10 @@ -import { genaiscriptDebug } from "./debug" -import { runtimeHost } from "./host" -import { TraceOptions } from "./trace" -import { logWarn } from "./util" -const dbg = genaiscriptDebug("secrets") +import { genaiscriptDebug } from "./debug"; +import { runtimeHost } from "./host"; +import { TraceOptions } from "./trace"; +import { logWarn } from "./util"; +const dbg = genaiscriptDebug("secrets"); -const cachedSecretScanners: Record = {} +const cachedSecretScanners: Record = {}; /** * Redacts secrets from the provided text by replacing matches of configured secret patterns with ``. @@ -18,34 +18,30 @@ const cachedSecretScanners: Record = {} * - found: A record where keys are secret names and values are counts of occurrences detected. */ export function redactSecrets(text: string, options?: TraceOptions) { - const { trace } = options ?? {} - const { secretPatterns = {} } = runtimeHost.config - const found: Record = {} - const res = Object.entries(secretPatterns).reduce( - (acc, [name, pattern]) => { - if (!pattern) return acc // null, undefined, or empty string - const regex: RegExp = - cachedSecretScanners[pattern] ?? - (cachedSecretScanners[pattern] = new RegExp(pattern, "g")) - return acc.replace(regex, () => { - found[name] = (found[name] ?? 0) + 1 - return `` - }) - }, - text - ) + const { trace } = options ?? {}; + const { secretPatterns = {} } = runtimeHost.config; + const found: Record = {}; + const res = Object.entries(secretPatterns).reduce((acc, [name, pattern]) => { + if (!pattern) return acc; // null, undefined, or empty string + const regex: RegExp = + cachedSecretScanners[pattern] ?? (cachedSecretScanners[pattern] = new RegExp(pattern, "g")); + return acc.replace(regex, () => { + found[name] = (found[name] ?? 0) + 1; + return ``; + }); + }, text); - if (Object.keys(found).length > 0 && trace) { - const msg = `detected secrets: ${Object.entries(found) - .map(([k, v]) => `${k} (${v})`) - .join(", ")}` - dbg(msg) - logWarn(msg) - trace.warn(msg) - } + if (Object.keys(found).length > 0 && trace) { + const msg = `detected secrets: ${Object.entries(found) + .map(([k, v]) => `${k} (${v})`) + .join(", ")}`; + dbg(msg); + logWarn(msg); + trace.warn(msg); + } - return { - text: res, - found, - } + return { + text: res, + found, + }; } diff --git a/packages/core/src/semver.ts b/packages/core/src/semver.ts index 8bb0109acb..14687b342c 100644 --- a/packages/core/src/semver.ts +++ b/packages/core/src/semver.ts @@ -1,5 +1,5 @@ -import { satisfies, parse } from "semver" +import { satisfies, parse } from "semver"; -export const semverSatisfies = satisfies +export const semverSatisfies = satisfies; -export const semverParse = parse +export const semverParse = parse; diff --git a/packages/core/src/server/client.ts b/packages/core/src/server/client.ts index 1978c8a476..345ae389f6 100644 --- a/packages/core/src/server/client.ts +++ b/packages/core/src/server/client.ts @@ -1,202 +1,194 @@ -import type { ChatCompletionsProgressReport } from "../chattypes" -import { CLOSE, MESSAGE } from "../constants" -import { randomHex } from "../crypto" -import { errorMessage } from "../error" -import { generateId } from "../id" -import { MarkdownTrace } from "../trace" -import { logError } from "../util" +import type { ChatCompletionsProgressReport } from "../chattypes"; +import { CLOSE, MESSAGE } from "../constants"; +import { randomHex } from "../crypto"; +import { errorMessage } from "../error"; +import { generateId } from "../id"; +import { MarkdownTrace } from "../trace"; +import { logError } from "../util"; import type { - PromptScriptTestRun, - PromptScriptTestRunOptions, - PromptScriptTestRunResponse, - PromptScriptRunOptions, - PromptScriptStart, - PromptScriptResponseEvents, - ChatEvents, - ChatChunk, - ChatStart, - GenerationResult, -} from "./messages" -import { WebSocketClient } from "./wsclient" + PromptScriptTestRun, + PromptScriptTestRunOptions, + PromptScriptTestRunResponse, + PromptScriptRunOptions, + PromptScriptStart, + PromptScriptResponseEvents, + ChatEvents, + ChatChunk, + ChatStart, + GenerationResult, +} from "./messages"; +import { WebSocketClient } from "./wsclient"; export type LanguageModelChatRequest = ( - request: ChatStart, - onChunk: (param: Omit) => void -) => Promise + request: ChatStart, + onChunk: (param: Omit) => void, +) => Promise; export class VsCodeClient extends WebSocketClient { - chatRequest: LanguageModelChatRequest + chatRequest: LanguageModelChatRequest; - private runs: Record< - string, - { - script: string - files: string[] - options: Partial - trace: MarkdownTrace - infoCb: (partialResponse: { text: string }) => void - partialCb: (progress: ChatCompletionsProgressReport) => void - promise: Promise> - resolve: (value: Partial) => void - reject: (reason?: any) => void - signal: AbortSignal - } - > = {} - - constructor( - readonly url: string, - readonly externalUrl: string, - readonly cspUrl: string - ) { - super(url) - this.configure() + private runs: Record< + string, + { + script: string; + files: string[]; + options: Partial; + trace: MarkdownTrace; + infoCb: (partialResponse: { text: string }) => void; + partialCb: (progress: ChatCompletionsProgressReport) => void; + promise: Promise>; + resolve: (value: Partial) => void; + reject: (reason?: any) => void; + signal: AbortSignal; } + > = {}; - private installPolyfill() { - if (typeof WebSocket === "undefined") { - try { - require("websocket-polyfill") - } catch (err) { - logError("websocket polyfill failed") - logError(err) - } - } + constructor( + readonly url: string, + readonly externalUrl: string, + readonly cspUrl: string, + ) { + super(url); + this.configure(); + } + + private installPolyfill() { + if (typeof WebSocket === "undefined") { + try { + require("websocket-polyfill"); + } catch (err) { + logError("websocket polyfill failed"); + logError(err); + } } + } - private configure(): void { - this.installPolyfill() - this.addEventListener(CLOSE, (e) => { - const reason = (e as any).reason || "websocket closed" - for (const [runId, run] of Object.entries(this.runs)) { - run.reject(reason) - delete this.runs[runId] - } - }) + private configure(): void { + this.installPolyfill(); + this.addEventListener(CLOSE, (e) => { + const reason = (e as any).reason || "websocket closed"; + for (const [runId, run] of Object.entries(this.runs)) { + run.reject(reason); + delete this.runs[runId]; + } + }); - this.addEventListener(MESSAGE, async (e) => { - const event = e as MessageEvent< - PromptScriptResponseEvents | ChatEvents - > - // handle run progress - const ev = event.data as PromptScriptResponseEvents - const { runId, type } = ev - const run = this.runs[runId] + this.addEventListener(MESSAGE, async (e) => { + const event = e as MessageEvent; + // handle run progress + const ev = event.data as PromptScriptResponseEvents; + const { runId, type } = ev; + const run = this.runs[runId]; + if (run) { + switch (type) { + case "script.progress": { + if (ev.trace) run.trace.appendContent(ev.trace); + if (ev.progress && !ev.inner) run.infoCb({ text: ev.progress }); + if (ev.response || ev.tokens !== undefined) + run.partialCb({ + responseChunk: ev.responseChunk, + responseSoFar: ev.response, + reasoningSoFar: ev.reasoning, + tokensSoFar: ev.tokens, + inner: ev.inner, + }); + break; + } + case "script.end": { + const run = this.runs[runId]; + delete this.runs[runId]; if (run) { - switch (type) { - case "script.progress": { - if (ev.trace) run.trace.appendContent(ev.trace) - if (ev.progress && !ev.inner) - run.infoCb({ text: ev.progress }) - if (ev.response || ev.tokens !== undefined) - run.partialCb({ - responseChunk: ev.responseChunk, - responseSoFar: ev.response, - reasoningSoFar: ev.reasoning, - tokensSoFar: ev.tokens, - inner: ev.inner, - }) - break - } - case "script.end": { - const run = this.runs[runId] - delete this.runs[runId] - if (run) { - const res = structuredClone(ev.result) - if (res?.text) run.infoCb(res as { text: string }) - run.resolve(res) - } - break - } - } - } else { - const cev = event.data as ChatEvents - const { chatId, type } = cev - switch (type) { - case "chat.start": { - if (!this.chatRequest) - throw new Error( - "GitHub Copilot Chat Models not supported" - ) - await this.chatRequest(cev, (chunk) => { - this.queue({ - ...chunk, - chatId, - type: "chat.chunk", - }) - }) - // done - } - } + const res = structuredClone(ev.result); + if (res?.text) run.infoCb(res as { text: string }); + run.resolve(res); } - }) - } - - async runScript( - script: string, - files: string[], - options: Partial & { - jsSource?: string - signal: AbortSignal - trace: MarkdownTrace - infoCb: (partialResponse: { text: string }) => void - partialCb: (progress: ChatCompletionsProgressReport) => void - } - ) { - const runId = generateId() - const { signal, infoCb, partialCb, trace, ...optionsRest } = options - let resolve: (value: Partial) => void - let reject: (reason?: any) => void - const promise = new Promise>((res, rej) => { - resolve = res - reject = rej - }) - this.runs[runId] = { - script, - files, - options, - trace, - infoCb, - partialCb, - promise, - resolve, - reject, - signal, + break; + } } - signal?.addEventListener("abort", (ev) => { - this.abortScript(runId, "user aborted") - }) - const res = await this.queue({ - type: "script.start", - runId, - script, - files, - options: optionsRest, - }) - if (!res.response?.ok) { - delete this.runs[runId] // failed to start - throw new Error( - errorMessage(res.response?.error) ?? "failed to start script" - ) + } else { + const cev = event.data as ChatEvents; + const { chatId, type } = cev; + switch (type) { + case "chat.start": { + if (!this.chatRequest) throw new Error("GitHub Copilot Chat Models not supported"); + await this.chatRequest(cev, (chunk) => { + this.queue({ + ...chunk, + chatId, + type: "chat.chunk", + }); + }); + // done + } } - return { runId, request: promise } - } + } + }); + } - abortScriptRuns(reason: string) { - for (const runId of Object.keys(this.runs)) { - this.abortScript(runId, reason) - delete this.runs[runId] - } + async runScript( + script: string, + files: string[], + options: Partial & { + jsSource?: string; + signal: AbortSignal; + trace: MarkdownTrace; + infoCb: (partialResponse: { text: string }) => void; + partialCb: (progress: ChatCompletionsProgressReport) => void; + }, + ) { + const runId = generateId(); + const { signal, infoCb, partialCb, trace, ...optionsRest } = options; + let resolve: (value: Partial) => void; + let reject: (reason?: any) => void; + const promise = new Promise>((res, rej) => { + resolve = res; + reject = rej; + }); + this.runs[runId] = { + script, + files, + options, + trace, + infoCb, + partialCb, + promise, + resolve, + reject, + signal, + }; + signal?.addEventListener("abort", (ev) => { + this.abortScript(runId, "user aborted"); + }); + const res = await this.queue({ + type: "script.start", + runId, + script, + files, + options: optionsRest, + }); + if (!res.response?.ok) { + delete this.runs[runId]; // failed to start + throw new Error(errorMessage(res.response?.error) ?? "failed to start script"); } + return { runId, request: promise }; + } - async runTest( - script: PromptScript, - options?: PromptScriptTestRunOptions - ): Promise { - const res = await this.queue({ - type: "tests.run", - scripts: script?.id ? [script?.id] : undefined, - options, - }) - return res.response + abortScriptRuns(reason: string) { + for (const runId of Object.keys(this.runs)) { + this.abortScript(runId, reason); + delete this.runs[runId]; } + } + + async runTest( + script: PromptScript, + options?: PromptScriptTestRunOptions, + ): Promise { + const res = await this.queue({ + type: "tests.run", + scripts: script?.id ? [script?.id] : undefined, + options, + }); + return res.response; + } } diff --git a/packages/core/src/server/messages.ts b/packages/core/src/server/messages.ts index 949034d91f..fbdab47c07 100644 --- a/packages/core/src/server/messages.ts +++ b/packages/core/src/server/messages.ts @@ -1,405 +1,400 @@ -import type { - ChatCompletionAssistantMessageParam, - ChatCompletionMessageParam, -} from "../chattypes" +import type { ChatCompletionAssistantMessageParam, ChatCompletionMessageParam } from "../chattypes"; export interface ResponseStatus { - ok: boolean - error?: SerializedError - status?: number + ok: boolean; + error?: SerializedError; + status?: number; } export type OpenAIAPIType = - | "openai" - | "azure" - | "localai" - | "azure_ai_inference" - | "azure_serverless" - | "azure_serverless_models" - | "alibaba" - | "huggingface" - | "github" + | "openai" + | "azure" + | "localai" + | "azure_ai_inference" + | "azure_serverless" + | "azure_serverless_models" + | "alibaba" + | "huggingface" + | "github"; export type AzureCredentialsType = - | "default" - | "cli" - | "env" - | "powershell" - | "devcli" - | "managedidentity" - | "workloadidentity" + | "default" + | "cli" + | "env" + | "powershell" + | "devcli" + | "managedidentity" + | "workloadidentity"; export interface LanguageModelConfiguration extends LanguageModelReference { - base: string - token: string - source?: string - type?: OpenAIAPIType - version?: string - azureCredentialsType?: AzureCredentialsType + base: string; + token: string; + source?: string; + type?: OpenAIAPIType; + version?: string; + azureCredentialsType?: AzureCredentialsType; } -export type ResolvedLanguageModelConfiguration = - Partial & { - models?: LanguageModelInfo[] - error?: string - } +export type ResolvedLanguageModelConfiguration = Partial & { + models?: LanguageModelInfo[]; + error?: string; +}; /** * Represents a project containing templates and diagnostics. * Provides utility methods to manage templates and diagnose issues. */ export interface Project { - systemDir?: string - scripts: PromptScript[] // Array of templates within the project - diagnostics: Diagnostic[] // Array of diagnostic records + systemDir?: string; + scripts: PromptScript[]; // Array of templates within the project + diagnostics: Diagnostic[]; // Array of diagnostic records } export interface RequestMessage { - type: string - id: string - response?: ResponseStatus + type: string; + id: string; + response?: ResponseStatus; } export interface ServerKill extends RequestMessage { - type: "server.kill" + type: "server.kill"; } export interface ServerVersion extends RequestMessage { - type: "server.version" - version?: string + type: "server.version"; + version?: string; } export interface ServerEnv extends RequestMessage { - type: "server.env" + type: "server.env"; } export interface ServerEnvResponse extends ResponseStatus { - providers: ResolvedLanguageModelConfiguration[] - remote?: { - url: string - branch?: string - } - configuration: { - name?: string - description?: string - version?: string - homepage?: string - readme?: string - author?: string - } + providers: ResolvedLanguageModelConfiguration[]; + remote?: { + url: string; + branch?: string; + }; + configuration: { + name?: string; + description?: string; + version?: string; + homepage?: string; + readme?: string; + author?: string; + }; } -export interface PromptScriptTestRunOptions - extends PromptScriptModelRunOptions { - testProvider?: string - models?: string[] - groups?: string[] +export interface PromptScriptTestRunOptions extends PromptScriptModelRunOptions { + testProvider?: string; + models?: string[]; + groups?: string[]; } export interface PromptScriptModelRunOptions { - model?: string - smallModel?: string - visionModel?: string + model?: string; + smallModel?: string; + visionModel?: string; } export interface PromptScriptTestRun extends RequestMessage { - type: "tests.run" - scripts?: string[] - options?: PromptScriptTestRunOptions + type: "tests.run"; + scripts?: string[]; + options?: PromptScriptTestRunOptions; } export interface PromptScriptTestResult extends ResponseStatus { - script: string - value?: { - evalId: string - results: { - stats?: { - successes: number - failures: number - errors: number - tokenUsage?: { - cached?: number - completion?: number - prompt?: number - total?: number - } - } - } - } + script: string; + value?: { + evalId: string; + results: { + stats?: { + successes: number; + failures: number; + errors: number; + tokenUsage?: { + cached?: number; + completion?: number; + prompt?: number; + total?: number; + }; + }; + }; + }; } export interface PromptScriptTestRunResponse extends ResponseStatus { - value?: PromptScriptTestResult[] + value?: PromptScriptTestResult[]; } export interface PromptScriptRunOptions { - excludedFiles: string[] - ignoreGitIgnore: boolean - runRetry: string - out: string - retry: string - retryDelay: string - maxDelay: string - json: boolean - yaml: boolean - outTrace: string - outOutput: string - outAnnotations: string - outChangelogs: string - pullRequest: string - pullRequestComment: string | boolean - pullRequestDescription: string | boolean - pullRequestReviews: boolean - teamsMessage: boolean - outData: string - label: string - temperature: string | number - reasoningEffort: "high" | "low" | "medium" - topP: string | number - toolChoice: ChatToolChoice - seed: string | number - maxTokens: string | number - maxToolCalls: string | number - maxDataRepairs: string | number - model: string - smallModel: string - visionModel: string - embeddingsModel: string - modelAlias: string[] - provider: string - csvSeparator: string - cache: boolean | string - cacheName: string - applyEdits: boolean - failOnErrors: boolean - removeOut: boolean - vars: string[] | Record - fallbackTools: boolean - jsSource: string - logprobs: boolean - topLogprobs: number - fenceFormat: FenceFormat - workspaceFiles?: WorkspaceFile[] - runTrace: boolean - outputTrace: boolean - accept: string + excludedFiles: string[]; + ignoreGitIgnore: boolean; + runRetry: string; + out: string; + retry: string; + retryDelay: string; + maxDelay: string; + json: boolean; + yaml: boolean; + outTrace: string; + outOutput: string; + outAnnotations: string; + outChangelogs: string; + pullRequest: string; + pullRequestComment: string | boolean; + pullRequestDescription: string | boolean; + pullRequestReviews: boolean; + teamsMessage: boolean; + outData: string; + label: string; + temperature: string | number; + reasoningEffort: "high" | "low" | "medium"; + topP: string | number; + toolChoice: ChatToolChoice; + seed: string | number; + maxTokens: string | number; + maxToolCalls: string | number; + maxDataRepairs: string | number; + model: string; + smallModel: string; + visionModel: string; + embeddingsModel: string; + modelAlias: string[]; + provider: string; + csvSeparator: string; + cache: boolean | string; + cacheName: string; + applyEdits: boolean; + failOnErrors: boolean; + removeOut: boolean; + vars: string[] | Record; + fallbackTools: boolean; + jsSource: string; + logprobs: boolean; + topLogprobs: number; + fenceFormat: FenceFormat; + workspaceFiles?: WorkspaceFile[]; + runTrace: boolean; + outputTrace: boolean; + accept: string; } export interface RunResultList extends RequestMessage { - type: "run.list" + type: "run.list"; } export interface RunResultListResponse extends ResponseStatus { - runs: { scriptId: string; runId: string; creationTime: string }[] + runs: { scriptId: string; runId: string; creationTime: string }[]; } export interface PromptScriptList extends RequestMessage { - type: "script.list" + type: "script.list"; } export interface PromptScriptListResponse extends ResponseStatus { - project: Project + project: Project; } export interface PromptScriptStart extends RequestMessage { - type: "script.start" - runId: string - script: string - files?: string[] - options: Partial + type: "script.start"; + runId: string; + script: string; + files?: string[]; + options: Partial; } export interface PromptScriptStartResponse extends ResponseStatus { - runId: string + runId: string; } // Type representing possible statuses of generation -export type GenerationStatus = "success" | "error" | "cancelled" | undefined +export type GenerationStatus = "success" | "error" | "cancelled" | undefined; // Interface for the result of a generation process export interface GenerationResult extends GenerationOutput { - /** - * Run identifier - */ - runId: string - /** - * The environment variables passed to the prompt - */ - env: Partial - - /** - * Expanded prompt text composed of multiple messages - */ - messages: ChatCompletionMessageParam[] - - /** - * Edits to apply, if any - */ - edits: Edits[] - - /** - * Source annotations parsed as diagnostics - */ - annotations: Diagnostic[] - - /** - * Sections of the ChangeLog - */ - changelogs: string[] - - /** - * Error message or object, if any error occurred - */ - error?: SerializedError - - /** - * Status of the generation process (success, error, or cancelled) - */ - status: GenerationStatus - - /** - * Additional status information or message - */ - statusText?: string - - /** - * Completion status from the language model - */ - finishReason?: string - - /** - * Optional label for the run - */ - label?: string - - /** - * Version of the GenAIScript used - */ - version: string - - /** - * Log probs of the choices - */ - choices?: Logprob[] - - /** - * Logprobs if computed - */ - logprobs?: Logprob[] - - /** - * Statistics of the generation - */ - perplexity?: number - - /** - * Structural uncertainty - */ - uncertainty?: number + /** + * Run identifier + */ + runId: string; + /** + * The environment variables passed to the prompt + */ + env: Partial; + + /** + * Expanded prompt text composed of multiple messages + */ + messages: ChatCompletionMessageParam[]; + + /** + * Edits to apply, if any + */ + edits: Edits[]; + + /** + * Source annotations parsed as diagnostics + */ + annotations: Diagnostic[]; + + /** + * Sections of the ChangeLog + */ + changelogs: string[]; + + /** + * Error message or object, if any error occurred + */ + error?: SerializedError; + + /** + * Status of the generation process (success, error, or cancelled) + */ + status: GenerationStatus; + + /** + * Additional status information or message + */ + statusText?: string; + + /** + * Completion status from the language model + */ + finishReason?: string; + + /** + * Optional label for the run + */ + label?: string; + + /** + * Version of the GenAIScript used + */ + version: string; + + /** + * Log probs of the choices + */ + choices?: Logprob[]; + + /** + * Logprobs if computed + */ + logprobs?: Logprob[]; + + /** + * Statistics of the generation + */ + perplexity?: number; + + /** + * Structural uncertainty + */ + uncertainty?: number; } export interface PromptScriptEndResponseEvent { - type: "script.end" - runId: string - exitCode: number - result?: Partial - trace?: string + type: "script.end"; + runId: string; + exitCode: number; + result?: Partial; + trace?: string; } export interface PromptScriptAbort extends RequestMessage { - type: "script.abort" - reason: string - runId: string + type: "script.abort"; + reason: string; + runId: string; } export interface PromptScriptProgressResponseEvent { - type: "script.progress" - runId: string + type: "script.progress"; + runId: string; - trace?: string - output?: string + trace?: string; + output?: string; - progress?: string + progress?: string; - tokens?: number + tokens?: number; - response?: string - responseChunk?: string - responseTokens?: Logprob[] + response?: string; + responseChunk?: string; + responseTokens?: Logprob[]; - reasoning?: string - reasoningChunk?: string - reasoningTokens?: Logprob[] + reasoning?: string; + reasoningChunk?: string; + reasoningTokens?: Logprob[]; - inner?: boolean + inner?: boolean; } export interface LanguageModelConfigurationRequest extends RequestMessage { - type: "model.configuration" - model: string - token?: boolean - response?: LanguageModelConfigurationResponse + type: "model.configuration"; + model: string; + token?: boolean; + response?: LanguageModelConfigurationResponse; } export interface LanguageModelConfigurationResponse extends ResponseStatus { - info?: LanguageModelConfiguration + info?: LanguageModelConfiguration; } export interface ServerResponse extends ResponseStatus { - version: string - node: string - platform: string - arch: string - pid: number + version: string; + node: string; + platform: string; + arch: string; + pid: number; } export interface ChatStart { - type: "chat.start" - chatId: string - messages: ChatCompletionAssistantMessageParam[] - model: string - modelOptions?: { - temperature?: number - } + type: "chat.start"; + chatId: string; + messages: ChatCompletionAssistantMessageParam[]; + model: string; + modelOptions?: { + temperature?: number; + }; } export interface ChatCancel { - type: "chat.cancel" - chatId: string + type: "chat.cancel"; + chatId: string; } export interface ChatChunk extends RequestMessage { - type: "chat.chunk" - chatId: string - model?: string - finishReason?: string - chunk?: string - tokens?: number - error?: SerializedError + type: "chat.chunk"; + chatId: string; + model?: string; + finishReason?: string; + chunk?: string; + tokens?: number; + error?: SerializedError; } -export type LogLevel = "debug" | "info" | "warn" | "error" +export type LogLevel = "debug" | "info" | "warn" | "error"; export interface LogMessageEvent { - type: "log" - message: string - level: LogLevel + type: "log"; + message: string; + level: LogLevel; } export type RequestMessages = - | ServerKill - | ServerEnv - | ServerVersion - | PromptScriptTestRun - | PromptScriptStart - | PromptScriptAbort - | ChatChunk - | LanguageModelConfigurationRequest - | PromptScriptList - | RunResultList + | ServerKill + | ServerEnv + | ServerVersion + | PromptScriptTestRun + | PromptScriptStart + | PromptScriptAbort + | ChatChunk + | LanguageModelConfigurationRequest + | PromptScriptList + | RunResultList; export type PromptScriptResponseEvents = - | PromptScriptProgressResponseEvent - | PromptScriptEndResponseEvent + | PromptScriptProgressResponseEvent + | PromptScriptEndResponseEvent; -export type ChatEvents = ChatStart | ChatCancel +export type ChatEvents = ChatStart | ChatCancel; diff --git a/packages/core/src/server/wsclient.ts b/packages/core/src/server/wsclient.ts index 17dc2f3305..442ae20dce 100644 --- a/packages/core/src/server/wsclient.ts +++ b/packages/core/src/server/wsclient.ts @@ -1,303 +1,279 @@ import { - CHANGE, - CLIENT_RECONNECT_DELAY, - CLOSE, - CONNECT, - ERROR, - MESSAGE, - OPEN, - QUEUE_SCRIPT_START, - RECONNECT, -} from "../constants" + CHANGE, + CLIENT_RECONNECT_DELAY, + CLOSE, + CONNECT, + ERROR, + MESSAGE, + OPEN, + QUEUE_SCRIPT_START, + RECONNECT, +} from "../constants"; import type { - ChatEvents, - LanguageModelConfiguration, - LanguageModelConfigurationRequest, - LogMessageEvent, - Project, - PromptScriptAbort, - PromptScriptList, - PromptScriptListResponse, - PromptScriptResponseEvents, - PromptScriptRunOptions, - PromptScriptStart, - RequestMessage, - ResponseStatus, - ServerEnv, - ServerEnvResponse, - ServerResponse, - ServerVersion, -} from "./messages" + ChatEvents, + LanguageModelConfiguration, + LanguageModelConfigurationRequest, + LogMessageEvent, + Project, + PromptScriptAbort, + PromptScriptList, + PromptScriptListResponse, + PromptScriptResponseEvents, + PromptScriptRunOptions, + PromptScriptStart, + RequestMessage, + ResponseStatus, + ServerEnv, + ServerEnvResponse, + ServerResponse, + ServerVersion, +} from "./messages"; interface Awaiter { - msg: Omit - promise?: Promise - resolve: (data: any) => void - reject: (error: unknown) => void + msg: Omit; + promise?: Promise; + resolve: (data: any) => void; + reject: (error: unknown) => void; } export class WebSocketClient extends EventTarget { - private awaiters: Record = {} - private _nextId = 1 - private _ws: WebSocket - private _pendingMessages: string[] = [] - private _reconnectTimeout: ReturnType | undefined - private _error: unknown | undefined - connectedOnce = false - reconnectAttempts = 0 + private awaiters: Record = {}; + private _nextId = 1; + private _ws: WebSocket; + private _pendingMessages: string[] = []; + private _reconnectTimeout: ReturnType | undefined; + private _error: unknown | undefined; + connectedOnce = false; + reconnectAttempts = 0; - constructor(readonly url: string) { - super() - } + constructor(readonly url: string) { + super(); + } - private dispatchChange() { - this.dispatchEvent(new Event(CHANGE)) - } + private dispatchChange() { + this.dispatchEvent(new Event(CHANGE)); + } - async init(): Promise { - if (this._ws) return Promise.resolve(undefined) - this.connect() - } + async init(): Promise { + if (this._ws) return Promise.resolve(undefined); + this.connect(); + } - get readyState(): "connecting" | "open" | "closing" | "closed" | "error" { - const states = ["connecting", "open", "closing", "closed", "error"] - if (this._error) return "error" - return (states[this._ws?.readyState] as any) || "closed" - } + get readyState(): "connecting" | "open" | "closing" | "closed" | "error" { + const states = ["connecting", "open", "closing", "closed", "error"]; + if (this._error) return "error"; + return (states[this._ws?.readyState] as any) || "closed"; + } - get error() { - return this._error - } + get error() { + return this._error; + } - private reconnect() { - this.reconnectAttempts++ - this.dispatchEvent(new Event(RECONNECT)) - this._ws = undefined - clearTimeout(this._reconnectTimeout) - this._reconnectTimeout = setTimeout(() => { - try { - this.connect() - } catch (e) { - this._error = e - this.dispatchChange() - } - }, CLIENT_RECONNECT_DELAY) - } + private reconnect() { + this.reconnectAttempts++; + this.dispatchEvent(new Event(RECONNECT)); + this._ws = undefined; + clearTimeout(this._reconnectTimeout); + this._reconnectTimeout = setTimeout(() => { + try { + this.connect(); + } catch (e) { + this._error = e; + this.dispatchChange(); + } + }, CLIENT_RECONNECT_DELAY); + } - private connect(): void { - this._error = undefined - this._ws = new WebSocket(this.url) - this._ws.addEventListener( - OPEN, - () => { - // clear counter - this.connectedOnce = true - this.reconnectAttempts = 0 - // flush cached messages - let m: string - while ( - this._ws?.readyState === WebSocket.OPEN && - (m = this._pendingMessages.pop()) - ) - this._ws.send(m) - this.dispatchEvent(new Event(OPEN)) - this.dispatchChange() - }, - false - ) - this._ws.addEventListener( - ERROR, - (ev) => { - this.reconnect() - this.dispatchChange() - }, - false - ) - this._ws.addEventListener( - CLOSE, - // CloseEvent not defined in electron - (ev: Event) => { - const reason = (ev as any).reason || "websocket closed" - this.cancel(reason) - this.dispatchEvent(new Event(CLOSE)) - this.dispatchChange() - this.reconnect() - }, - false - ) - this._ws.addEventListener( - MESSAGE, - <(event: MessageEvent) => void>(async (e) => { - const event = e as MessageEvent - const data = JSON.parse(event.data) - // handle responses - const req: { id: string } = data - const { id } = req - const awaiter = this.awaiters[id] - if (awaiter) { - delete this.awaiters[id] - await awaiter.resolve(req) - } - // not a response - this.dispatchEvent( - new MessageEvent< - | PromptScriptResponseEvents - | ChatEvents - | LogMessageEvent - >(MESSAGE, { data }) - ) - }), - false - ) - this.dispatchEvent(new Event(CONNECT)) + private connect(): void { + this._error = undefined; + this._ws = new WebSocket(this.url); + this._ws.addEventListener( + OPEN, + () => { + // clear counter + this.connectedOnce = true; + this.reconnectAttempts = 0; + // flush cached messages + let m: string; + while (this._ws?.readyState === WebSocket.OPEN && (m = this._pendingMessages.pop())) + this._ws.send(m); + this.dispatchEvent(new Event(OPEN)); + this.dispatchChange(); + }, + false, + ); + this._ws.addEventListener( + ERROR, + (ev) => { + this.reconnect(); + this.dispatchChange(); + }, + false, + ); + this._ws.addEventListener( + CLOSE, + // CloseEvent not defined in electron + (ev: Event) => { + const reason = (ev as any).reason || "websocket closed"; + this.cancel(reason); + this.dispatchEvent(new Event(CLOSE)); + this.dispatchChange(); + this.reconnect(); + }, + false, + ); + this._ws.addEventListener( + MESSAGE, + <(event: MessageEvent) => void>(async (e) => { + const event = e as MessageEvent; + const data = JSON.parse(event.data); + // handle responses + const req: { id: string } = data; + const { id } = req; + const awaiter = this.awaiters[id]; + if (awaiter) { + delete this.awaiters[id]; + await awaiter.resolve(req); + } + // not a response + this.dispatchEvent( + new MessageEvent(MESSAGE, { + data, + }), + ); + }), + false, + ); + this.dispatchEvent(new Event(CONNECT)); + } + + queue(msg: Omit, options?: { reuse: boolean }): Promise { + const { reuse } = options || {}; + if (reuse) { + const awaiter = Object.values(this.awaiters).find((a) => a.msg.type === msg.type); + if (awaiter?.promise) { + return awaiter.promise; + } } - queue( - msg: Omit, - options?: { reuse: boolean } - ): Promise { - const { reuse } = options || {} - if (reuse) { - const awaiter = Object.values(this.awaiters).find( - (a) => a.msg.type === msg.type - ) - if (awaiter?.promise) { - return awaiter.promise - } - } + const id = this._nextId++ + ""; + const mo: any = { ...msg, id }; + // avoid pollution + delete mo.trace; + if (mo.options) delete mo.options.trace; + const m = JSON.stringify(mo); - const id = this._nextId++ + "" - const mo: any = { ...msg, id } - // avoid pollution - delete mo.trace - if (mo.options) delete mo.options.trace - const m = JSON.stringify(mo) + this.init(); + let awaiter: Awaiter; + const p = new Promise((resolve, reject) => { + awaiter = this.awaiters[id] = { + msg, + resolve: (data) => resolve(data), + reject, + } satisfies Awaiter; + if (this._ws?.readyState === WebSocket.OPEN) { + this._ws.send(m); + } else this._pendingMessages.push(m); + }); + awaiter.promise = p; + return p; + } - this.init() - let awaiter: Awaiter - const p = new Promise((resolve, reject) => { - awaiter = this.awaiters[id] = { - msg, - resolve: (data) => resolve(data), - reject, - } satisfies Awaiter - if (this._ws?.readyState === WebSocket.OPEN) { - this._ws.send(m) - } else this._pendingMessages.push(m) - }) - awaiter.promise = p - return p - } + get pending() { + return this._pendingMessages?.length > 0; + } - get pending() { - return this._pendingMessages?.length > 0 + stop() { + this.reconnectAttempts = 0; + if (this._reconnectTimeout) { + clearTimeout(this._reconnectTimeout); + this._reconnectTimeout = undefined; } - - stop() { - this.reconnectAttempts = 0 - if (this._reconnectTimeout) { - clearTimeout(this._reconnectTimeout) - this._reconnectTimeout = undefined + if (this._ws) { + const ws = this._ws; + this._ws = undefined; + if (ws.readyState !== WebSocket.CLOSED) + try { + ws.close(); + } finally { } - if (this._ws) { - const ws = this._ws - this._ws = undefined - if (ws.readyState !== WebSocket.CLOSED) - try { - ws.close() - } finally { - } - } - this.cancel() } + this.cancel(); + } - cancel(reason?: string) { - this.reconnectAttempts = 0 - this._pendingMessages = [] - const cancellers = Object.values(this.awaiters) - this.awaiters = {} - cancellers.forEach((a) => a.reject(reason || "cancelled")) - } + cancel(reason?: string) { + this.reconnectAttempts = 0; + this._pendingMessages = []; + const cancellers = Object.values(this.awaiters); + this.awaiters = {}; + cancellers.forEach((a) => a.reject(reason || "cancelled")); + } - kill(): void { - if ( - typeof WebSocket !== "undefined" && - this._ws?.readyState === WebSocket.OPEN - ) - this._ws.send( - JSON.stringify({ type: "server.kill", id: this._nextId++ + "" }) - ) - this.stop() - } + kill(): void { + if (typeof WebSocket !== "undefined" && this._ws?.readyState === WebSocket.OPEN) + this._ws.send(JSON.stringify({ type: "server.kill", id: this._nextId++ + "" })); + this.stop(); + } - dispose(): any { - this.kill() - return undefined - } + dispose(): any { + this.kill(); + return undefined; + } - async getLanguageModelConfiguration( - modelId: string, - options?: { token?: boolean } - ): Promise { - const res = await this.queue( - { - type: "model.configuration", - model: modelId, - token: options?.token, - }, - { reuse: true } - ) - return res.response?.ok ? res.response.info : undefined - } + async getLanguageModelConfiguration( + modelId: string, + options?: { token?: boolean }, + ): Promise { + const res = await this.queue( + { + type: "model.configuration", + model: modelId, + token: options?.token, + }, + { reuse: true }, + ); + return res.response?.ok ? res.response.info : undefined; + } - async version(): Promise { - const res = await this.queue( - { type: "server.version" }, - { reuse: true } - ) - return res.response as ServerResponse - } + async version(): Promise { + const res = await this.queue({ type: "server.version" }, { reuse: true }); + return res.response as ServerResponse; + } - async infoEnv(): Promise { - const res = await this.queue( - { type: "server.env" }, - { reuse: true } - ) - return res.response as ServerEnvResponse - } + async infoEnv(): Promise { + const res = await this.queue({ type: "server.env" }, { reuse: true }); + return res.response as ServerEnvResponse; + } - async listScripts(): Promise { - const res = await this.queue( - { type: "script.list" }, - { reuse: true } - ) - const project = (res.response as PromptScriptListResponse)?.project - return project - } + async listScripts(): Promise { + const res = await this.queue({ type: "script.list" }, { reuse: true }); + const project = (res.response as PromptScriptListResponse)?.project; + return project; + } - async startScript( - runId: string, - script: string, - files: string[], - options: Partial - ) { - this.dispatchEvent(new Event(QUEUE_SCRIPT_START)) - return this.queue({ - type: "script.start", - runId, - script, - files, - options, - }) - } + async startScript( + runId: string, + script: string, + files: string[], + options: Partial, + ) { + this.dispatchEvent(new Event(QUEUE_SCRIPT_START)); + return this.queue({ + type: "script.start", + runId, + script, + files, + options, + }); + } - async abortScript(runId: string, reason: string): Promise { - if (!runId) return { ok: true } - const res = await this.queue({ - type: "script.abort", - runId, - reason, - }) - return res.response - } + async abortScript(runId: string, reason: string): Promise { + if (!runId) return { ok: true }; + const res = await this.queue({ + type: "script.abort", + runId, + reason, + }); + return res.response; + } } diff --git a/packages/core/src/shell.ts b/packages/core/src/shell.ts index 17e0b049b4..9312b12cbd 100644 --- a/packages/core/src/shell.ts +++ b/packages/core/src/shell.ts @@ -1,4 +1,4 @@ -import { parse, quote } from "shell-quote" +import { parse, quote } from "shell-quote"; /** * Parses a shell command into an array of arguments. @@ -8,17 +8,13 @@ import { parse, quote } from "shell-quote" * it resolves operation types (e.g., globs or operators) and includes them in the result. */ export function shellParse(cmd: string): string[] { - const args = parse(cmd) - const res = args - .filter((e) => !(e as any).comment) - .map((e) => - typeof e === "string" - ? e - : (e as any).op === "glob" - ? (e as any).pattern - : (e as any).op - ) - return res + const args = parse(cmd); + const res = args + .filter((e) => !(e as any).comment) + .map((e) => + typeof e === "string" ? e : (e as any).op === "glob" ? (e as any).pattern : (e as any).op, + ); + return res; } /** @@ -29,7 +25,7 @@ export function shellParse(cmd: string): string[] { * @returns A single string where the input arguments are properly quoted for shell usage. */ export function shellQuote(args: string[]): string { - return quote(args) + return quote(args); } /** @@ -39,5 +35,5 @@ export function shellQuote(args: string[]): string { * @returns The string with ANSI color codes removed. */ export function shellRemoveAsciiColors(text: string) { - return text?.replace(/\x1b\[[0-9;]*m/g, "") // ascii colors + return text?.replace(/\x1b\[[0-9;]*m/g, ""); // ascii colors } diff --git a/packages/core/src/stdio.ts b/packages/core/src/stdio.ts index 2ae61e3ebf..bde5e444c3 100644 --- a/packages/core/src/stdio.ts +++ b/packages/core/src/stdio.ts @@ -1,5 +1,5 @@ -export let stdout: NodeJS.WriteStream = process.stdout -export let stderr: NodeJS.WriteStream = process.stderr +export let stdout: NodeJS.WriteStream = process.stdout; +export let stderr: NodeJS.WriteStream = process.stderr; /** * Overrides the standard output stream with the standard error stream. @@ -9,5 +9,5 @@ export let stderr: NodeJS.WriteStream = process.stderr * instead be redirected to the standard error stream. */ export function overrideStdoutWithStdErr() { - stdout = stderr + stdout = stderr; } diff --git a/packages/core/src/systems.ts b/packages/core/src/systems.ts index f897a54258..0a7e11ba7c 100644 --- a/packages/core/src/systems.ts +++ b/packages/core/src/systems.ts @@ -2,16 +2,16 @@ // This module resolves and returns a list of applicable systems based on the provided script and project. // It analyzes script options and the JavaScript source code to determine which systems to include or exclude. -import { uniq } from "es-toolkit" -import { arrayify } from "./util" -import type { GenerationOptions } from "./generation" -import { isToolsSupported } from "./tools" -import type { Project } from "./server/messages" -import { deleteUndefinedValues } from "./cleaners" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("systems") -const dbgr = dbg.extend("resolve") -dbgr.enabled = false +import { uniq } from "es-toolkit"; +import { arrayify } from "./util"; +import type { GenerationOptions } from "./generation"; +import { isToolsSupported } from "./tools"; +import type { Project } from "./server/messages"; +import { deleteUndefinedValues } from "./cleaners"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("systems"); +const dbgr = dbg.extend("resolve"); +dbgr.enabled = false; /** * Resolves and returns a list of unique systems based on the provided script and project. @@ -23,198 +23,178 @@ dbgr.enabled = false * @returns An array of unique system prompt instances applicable based on the analysis, including both system IDs and instances. */ export function resolveSystems( - prj: Project, - script: PromptSystemOptions & - ModelOptions & - ContentSafetyOptions & { jsSource?: string }, - resolvedTools?: ToolCallback[] + prj: Project, + script: PromptSystemOptions & ModelOptions & ContentSafetyOptions & { jsSource?: string }, + resolvedTools?: ToolCallback[], ): SystemPromptInstance[] { - const { - jsSource, - responseType, - responseSchema, - systemSafety, - mcpServers, - mcpAgentServers, - } = script - // Initialize systems array from script.system, converting to array if necessary using arrayify utility - let systems = arrayify(script.system).filter((s) => typeof s === "string") - const systemInstances = arrayify(script.system).filter( - (s) => typeof s === "object" - ) + const { jsSource, responseType, responseSchema, systemSafety, mcpServers, mcpAgentServers } = + script; + // Initialize systems array from script.system, converting to array if necessary using arrayify utility + let systems = arrayify(script.system).filter((s) => typeof s === "string"); + const systemInstances = arrayify(script.system).filter((s) => typeof s === "object"); - const excludedSystem = arrayify(script.excludedSystem) - const tools = arrayify(script.tools) - const dataMode = - responseSchema || - (responseType && responseType !== "markdown" && responseType !== "text") - const safeties = [ - "system.safety_jailbreak", - "system.safety_harmful_content", - "system.safety_protected_material", - ] + const excludedSystem = arrayify(script.excludedSystem); + const tools = arrayify(script.tools); + const dataMode = + responseSchema || (responseType && responseType !== "markdown" && responseType !== "text"); + const safeties = [ + "system.safety_jailbreak", + "system.safety_harmful_content", + "system.safety_protected_material", + ]; - // If no system is defined in the script, determine systems based on jsSource - if (script.system === undefined) { - // current date - // safety - if (systemSafety !== false) { - dbgr(`adding safeties to systems`) - systems.push(...safeties) - } - // Check for schema definition in jsSource using regex - const useSchema = /\Wdefschema\W/i.test(jsSource) - - // Default systems if no responseType is specified - if (!dataMode) { - dbgr(`adding default systems`) - systems.push("system") - systems.push("system.explanations") - if (!responseType) { - dbgr(`adding system.output_markdown`) - systems.push("system.output_markdown") - } - } - - // Add planner system if any tool starts with "agent" - if (tools.some((t) => /^agent/.test(t))) { - dbgr(`tool starts with "agent", adding system.planner`) - systems.push("system.planner") - } - // Add harmful content system if images are defined - if (/\Wdefimages\W/i.test(jsSource)) { - dbgr(`images found, adding system.safety_harmful_content`) - systems.push("system.safety_harmful_content") - } - // Determine additional systems based on content of jsSource - if (/\Wfile\W/i.test(jsSource)) { - dbgr(`file references found, adding system.files`) - systems.push("system.files") - // Add file schema system if schema is used - if (useSchema) { - dbgr(`schema is used, adding system.files_schema`) - systems.push("system.files_schema") - } - } - if (/\Wchangelog\W/i.test(jsSource)) { - dbgr(`changelog references found, adding system.changelog`) - systems.push("system.changelog") - } - // Add schema system if schema is used - if (useSchema) { - dbgr(`schema is used, adding system.schema`) - systems.push("system.schema") - } - // Add annotation system if annotations, warnings, or errors are found - if (/\W(annotations|warnings|errors)\W/i.test(jsSource)) { - dbgr( - `annotations, warnings, or errors found, adding system.annotations` - ) - systems.push("system.annotations") - } - // Add diagram system if diagrams or charts are found - if (/\W(diagram|chart)\W/i.test(jsSource)) { - dbgr(`diagrams or charts found, adding system.diagrams`) - systems.push("system.diagrams") - } - // Add git information system if git is found - if (/\W(git)\W/i.test(jsSource)) { - dbgr(`git references found, adding system.git_info`) - systems.push("system.git_info") - } - // Add GitHub information system if GitHub is found - if (/\W(github)\W/i.test(jsSource)) { - dbgr(`GitHub references found, adding system.github_info`) - systems.push("system.github_info") - } - // Add system.today if "today" is found in jsSource - if (/today/i.test(jsSource)) { - dbgr(`adding system.today to systems`) - systems.push("system.today") - } + // If no system is defined in the script, determine systems based on jsSource + if (script.system === undefined) { + // current date + // safety + if (systemSafety !== false) { + dbgr(`adding safeties to systems`); + systems.push(...safeties); } + // Check for schema definition in jsSource using regex + const useSchema = /\Wdefschema\W/i.test(jsSource); - // insert safety first - if (systemSafety === "default") { - dbgr(`inserting safety systems`) - systems.unshift(...safeties) + // Default systems if no responseType is specified + if (!dataMode) { + dbgr(`adding default systems`); + systems.push("system"); + systems.push("system.explanations"); + if (!responseType) { + dbgr(`adding system.output_markdown`); + systems.push("system.output_markdown"); + } } - // output format - switch (responseType) { - case "markdown": - systems.push("system.output_markdown") - break - case "text": - systems.push("system.output_plaintext") - break - case "json": - case "json_object": - case "json_schema": - systems.push("system.output_json") - break - case "yaml": - systems.push("system.output_yaml") - break + // Add planner system if any tool starts with "agent" + if (tools.some((t) => /^agent/.test(t))) { + dbgr(`tool starts with "agent", adding system.planner`); + systems.push("system.planner"); } - if (responseSchema && !responseType) { - dbgr(`adding system.output_json to match responseSchema`) - systems.push("system.output_json") + // Add harmful content system if images are defined + if (/\Wdefimages\W/i.test(jsSource)) { + dbgr(`images found, adding system.safety_harmful_content`); + systems.push("system.safety_harmful_content"); } - - // Include tools-related systems if specified in the script - if (tools.length || resolvedTools?.length) { - dbgr(`tools or resolvedTools found, adding system.tools`) - systems.push("system.tools") - // Resolve and add each tool's systems based on its definition in the project - tools.forEach((tool) => - systems.push(...resolveSystemFromTools(prj, tool)) - ) + // Determine additional systems based on content of jsSource + if (/\Wfile\W/i.test(jsSource)) { + dbgr(`file references found, adding system.files`); + systems.push("system.files"); + // Add file schema system if schema is used + if (useSchema) { + dbgr(`schema is used, adding system.files_schema`); + systems.push("system.files_schema"); + } + } + if (/\Wchangelog\W/i.test(jsSource)) { + dbgr(`changelog references found, adding system.changelog`); + systems.push("system.changelog"); + } + // Add schema system if schema is used + if (useSchema) { + dbgr(`schema is used, adding system.schema`); + systems.push("system.schema"); + } + // Add annotation system if annotations, warnings, or errors are found + if (/\W(annotations|warnings|errors)\W/i.test(jsSource)) { + dbgr(`annotations, warnings, or errors found, adding system.annotations`); + systems.push("system.annotations"); + } + // Add diagram system if diagrams or charts are found + if (/\W(diagram|chart)\W/i.test(jsSource)) { + dbgr(`diagrams or charts found, adding system.diagrams`); + systems.push("system.diagrams"); + } + // Add git information system if git is found + if (/\W(git)\W/i.test(jsSource)) { + dbgr(`git references found, adding system.git_info`); + systems.push("system.git_info"); } + // Add GitHub information system if GitHub is found + if (/\W(github)\W/i.test(jsSource)) { + dbgr(`GitHub references found, adding system.github_info`); + systems.push("system.github_info"); + } + // Add system.today if "today" is found in jsSource + if (/today/i.test(jsSource)) { + dbgr(`adding system.today to systems`); + systems.push("system.today"); + } + } + + // insert safety first + if (systemSafety === "default") { + dbgr(`inserting safety systems`); + systems.unshift(...safeties); + } + + // output format + switch (responseType) { + case "markdown": + systems.push("system.output_markdown"); + break; + case "text": + systems.push("system.output_plaintext"); + break; + case "json": + case "json_object": + case "json_schema": + systems.push("system.output_json"); + break; + case "yaml": + systems.push("system.output_yaml"); + break; + } + if (responseSchema && !responseType) { + dbgr(`adding system.output_json to match responseSchema`); + systems.push("system.output_json"); + } - // map mcps to system scripts - if (typeof mcpServers === "object") { - for (const [id, config] of Object.entries(mcpServers)) { - systemInstances.push({ - id: "system.mcp", - parameters: { - id, - ...config, - }, - }) - } + // Include tools-related systems if specified in the script + if (tools.length || resolvedTools?.length) { + dbgr(`tools or resolvedTools found, adding system.tools`); + systems.push("system.tools"); + // Resolve and add each tool's systems based on its definition in the project + tools.forEach((tool) => systems.push(...resolveSystemFromTools(prj, tool))); + } + + // map mcps to system scripts + if (typeof mcpServers === "object") { + for (const [id, config] of Object.entries(mcpServers)) { + systemInstances.push({ + id: "system.mcp", + parameters: { + id, + ...config, + }, + }); } + } - if (typeof mcpAgentServers === "object") { - for (const [id, config] of Object.entries(mcpAgentServers)) { - systemInstances.push({ - id: "system.agent_mcp", - parameters: { - id, - ...config, - }, - }) - } + if (typeof mcpAgentServers === "object") { + for (const [id, config] of Object.entries(mcpAgentServers)) { + systemInstances.push({ + id: "system.agent_mcp", + parameters: { + id, + ...config, + }, + }); } + } - // filter out - systems = systems - .filter((s) => !!s) - .filter((s) => !excludedSystem.includes(s)) + // filter out + systems = systems.filter((s) => !!s).filter((s) => !excludedSystem.includes(s)); - // Return a unique list of non-empty systems - // Filters out duplicates and empty entries using unique utility - systems = uniq(systems) + // Return a unique list of non-empty systems + // Filters out duplicates and empty entries using unique utility + systems = uniq(systems); - // now compute system instances - const res: SystemPromptInstance[] = [ - ...systems.map((id) => ({ id })), - ...systemInstances, - ] + // now compute system instances + const res: SystemPromptInstance[] = [...systems.map((id) => ({ id })), ...systemInstances]; - dbgr(`resolved %O`, res) + dbgr(`resolved %O`, res); - return res + return res; } /** @@ -229,36 +209,30 @@ export function resolveSystems( * @returns A boolean indicating if fallback tools were added. */ export function addFallbackToolSystems( - systems: SystemPromptInstance[], - tools: ToolCallback[], - options?: ModelOptions, - genOptions?: GenerationOptions + systems: SystemPromptInstance[], + tools: ToolCallback[], + options?: ModelOptions, + genOptions?: GenerationOptions, ) { - if ( - !tools?.length || - systems.find(({ id }) => id === "system.tool_calls") - ) { - dbg(`no tools or fallback tools found, skip fallback tools`) - return false - } + if (!tools?.length || systems.find(({ id }) => id === "system.tool_calls")) { + dbg(`no tools or fallback tools found, skip fallback tools`); + return false; + } - const supported = isToolsSupported(options?.model || genOptions?.model) - const fallbackTools = - supported === false || - options?.fallbackTools || - genOptions?.fallbackTools - if (fallbackTools) { - dbg( - `adding fallback tools to systems`, - deleteUndefinedValues({ - supported, - options: options?.fallbackTools, - genOptions: genOptions?.fallbackTools, - }) - ) - systems.push({ id: "system.tool_calls" }) - } - return fallbackTools + const supported = isToolsSupported(options?.model || genOptions?.model); + const fallbackTools = supported === false || options?.fallbackTools || genOptions?.fallbackTools; + if (fallbackTools) { + dbg( + `adding fallback tools to systems`, + deleteUndefinedValues({ + supported, + options: options?.fallbackTools, + genOptions: genOptions?.fallbackTools, + }), + ); + systems.push({ id: "system.tool_calls" }); + } + return fallbackTools; } /** @@ -270,12 +244,12 @@ export function addFallbackToolSystems( * @returns An array of system IDs associated with the specified tool. */ function resolveSystemFromTools(prj: Project, tool: string): string[] { - const system = prj.scripts.filter( - (t) => t.isSystem && t.defTools?.find((to) => to.id.startsWith(tool)) - ) - const res = system.map(({ id }) => id) + const system = prj.scripts.filter( + (t) => t.isSystem && t.defTools?.find((to) => to.id.startsWith(tool)), + ); + const res = system.map(({ id }) => id); - return res + return res; } /** @@ -288,21 +262,15 @@ function resolveSystemFromTools(prj: Project, tool: string): string[] { * @returns A list of tool objects, each containing an ID and description, associated with the provided systems and tools. */ export function resolveTools( - prj: Project, - systems: (string | SystemPromptInstance)[], - tools: string[] + prj: Project, + systems: (string | SystemPromptInstance)[], + tools: string[], ): { id: string; description: string }[] { - const { scripts: scripts } = prj - const toolScripts = uniq([ - ...systems.map((sys) => - scripts.find((s) => - typeof sys === "string" ? s.id === sys : false - ) - ), - ...tools.map((tid) => - scripts.find((s) => s.defTools?.find((t) => t.id.startsWith(tid))) - ), - ]).filter((s) => !!s) - const res = toolScripts.map(({ defTools }) => defTools ?? []).flat() - return res + const { scripts: scripts } = prj; + const toolScripts = uniq([ + ...systems.map((sys) => scripts.find((s) => (typeof sys === "string" ? s.id === sys : false))), + ...tools.map((tid) => scripts.find((s) => s.defTools?.find((t) => t.id.startsWith(tid)))), + ]).filter((s) => !!s); + const res = toolScripts.map(({ defTools }) => defTools ?? []).flat(); + return res; } diff --git a/packages/core/src/tags.test.ts b/packages/core/src/tags.test.ts index 414206fc67..38ed2476d4 100644 --- a/packages/core/src/tags.test.ts +++ b/packages/core/src/tags.test.ts @@ -1,68 +1,52 @@ -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { tagFilter } from "./tags" +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { tagFilter } from "./tags"; describe("tagFilter", () => { - test("should return true when no tags are provided", () => { - assert.equal(tagFilter([], "example"), true) - assert.equal(tagFilter(undefined as any, "example"), true) - assert.equal(tagFilter(null as any, "example"), true) - }) + test("should return true when no tags are provided", () => { + assert.equal(tagFilter([], "example"), true); + assert.equal(tagFilter(undefined as any, "example"), true); + assert.equal(tagFilter(null as any, "example"), true); + }); - test("should return true when tag starts with any tag in the list", () => { - assert.equal(tagFilter(["example"], "example"), true) - assert.equal(tagFilter(["ex"], "example"), true) - assert.equal(tagFilter(["other", "ex"], "example"), true) - }) + test("should return true when tag starts with any tag in the list", () => { + assert.equal(tagFilter(["example"], "example"), true); + assert.equal(tagFilter(["ex"], "example"), true); + assert.equal(tagFilter(["other", "ex"], "example"), true); + }); - test("should be case insensitive", () => { - assert.equal(tagFilter(["Example"], "example"), true) - assert.equal(tagFilter(["example"], "Example"), true) - }) + test("should be case insensitive", () => { + assert.equal(tagFilter(["Example"], "example"), true); + assert.equal(tagFilter(["example"], "Example"), true); + }); - test("should return false when tag does not start with any tag in the list", () => { - assert.equal(tagFilter(["other"], "example"), false) - assert.equal(tagFilter(["ampl"], "example"), false) - }) + test("should return false when tag does not start with any tag in the list", () => { + assert.equal(tagFilter(["other"], "example"), false); + assert.equal(tagFilter(["ampl"], "example"), false); + }); - test("should handle exclusions correctly", () => { - assert.equal( - tagFilter([":!ex"], "example"), - false, - "exclusion should take precedence" - ) - assert.equal( - tagFilter([":!example"], "example"), - false, - "exclusion should take precedence 2" - ) - assert.equal( - tagFilter([":!other"], "example"), - true, - "inclusion should take precedence" - ) - }) + test("should handle exclusions correctly", () => { + assert.equal(tagFilter([":!ex"], "example"), false, "exclusion should take precedence"); + assert.equal(tagFilter([":!example"], "example"), false, "exclusion should take precedence 2"); + assert.equal(tagFilter([":!other"], "example"), true, "inclusion should take precedence"); + }); - test("should handle mixed inclusions and exclusions", () => { - assert.equal( - tagFilter(["ex", ":!example"], "example"), - false, - "exclusion should take precedence" - ) - assert.equal( - tagFilter(["other", ":!ex"], "example"), - false, - "exclusion should take precedence 2" - ) - assert.equal( - tagFilter(["ex", ":!other"], "example"), - true, - "inclusion should take precedence" - ) - }) + test("should handle mixed inclusions and exclusions", () => { + assert.equal( + tagFilter(["ex", ":!example"], "example"), + false, + "exclusion should take precedence", + ); + assert.equal( + tagFilter(["other", ":!ex"], "example"), + false, + "exclusion should take precedence 2", + ); + assert.equal(tagFilter(["ex", ":!other"], "example"), true, "inclusion should take precedence"); + }); - test("should handle undefined or null tag", () => { - assert.equal(tagFilter(["example"], undefined as any), false) - assert.equal(tagFilter(["example"], null as any), false) - }) -}) + test("should handle undefined or null tag", () => { + assert.equal(tagFilter(["example"], undefined as any), false); + assert.equal(tagFilter(["example"], null as any), false); + }); +}); diff --git a/packages/core/src/tags.ts b/packages/core/src/tags.ts index c35d0ea5be..e205af78d6 100644 --- a/packages/core/src/tags.ts +++ b/packages/core/src/tags.ts @@ -12,27 +12,27 @@ * @returns Whether the tag passes the filter. */ export function tagFilter(tags: string[], tag: string) { - if (!tags?.length) return true + if (!tags?.length) return true; - // normalize tag - const ltag = tag?.toLocaleLowerCase() || "" + // normalize tag + const ltag = tag?.toLocaleLowerCase() || ""; - let noMatchDefault = false - // apply exclusions first - for (const t of tags.filter((t) => t.startsWith(":!"))) { - const lt = t.toLocaleLowerCase() - if (ltag.startsWith(lt.slice(2))) return false - noMatchDefault = true // if any exclusion is found, set noMatchDefault to true - } + let noMatchDefault = false; + // apply exclusions first + for (const t of tags.filter((t) => t.startsWith(":!"))) { + const lt = t.toLocaleLowerCase(); + if (ltag.startsWith(lt.slice(2))) return false; + noMatchDefault = true; // if any exclusion is found, set noMatchDefault to true + } - // apply inclusions - for (const t of tags.filter((t) => !t.startsWith(":!"))) { - noMatchDefault = false // if any inclusion is found, set noMatchDefault to false - // check if the tag starts with the inclusion tag - const lt = t.toLocaleLowerCase() - if (ltag.startsWith(lt)) return true - } + // apply inclusions + for (const t of tags.filter((t) => !t.startsWith(":!"))) { + noMatchDefault = false; // if any inclusion is found, set noMatchDefault to false + // check if the tag starts with the inclusion tag + const lt = t.toLocaleLowerCase(); + if (ltag.startsWith(lt)) return true; + } - // no matches - return noMatchDefault + // no matches + return noMatchDefault; } diff --git a/packages/core/src/teams.test.ts b/packages/core/src/teams.test.ts index 866eea0e7d..c01763a64e 100644 --- a/packages/core/src/teams.test.ts +++ b/packages/core/src/teams.test.ts @@ -1,78 +1,68 @@ -import { convertMarkdownToTeamsHTML } from "./teams" -import { describe, test } from "node:test" -import assert from "node:assert/strict" +import { convertMarkdownToTeamsHTML } from "./teams"; +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; describe("convertMarkdownToTeamsHTML", () => { - test("converts headers correctly", () => { - const markdown = - "# Subject\n## Heading 1\n### Heading 2\n#### Heading 3" - const result = convertMarkdownToTeamsHTML(markdown) - assert.strictEqual(result.subject, "Subject") - assert.strictEqual( - result.content, - "
\n

Heading 1

\n

Heading 2

\n

Heading 3

" - ) - }) + test("converts headers correctly", () => { + const markdown = "# Subject\n## Heading 1\n### Heading 2\n#### Heading 3"; + const result = convertMarkdownToTeamsHTML(markdown); + assert.strictEqual(result.subject, "Subject"); + assert.strictEqual( + result.content, + "
\n

Heading 1

\n

Heading 2

\n

Heading 3

", + ); + }); - test("converts bold, italic, code, and strike correctly", () => { - const markdown = "**bold** *italic* `code` ~~strike~~" - const result = convertMarkdownToTeamsHTML(markdown) - assert.strictEqual( - result.content, - "
bold italic code strike
" - ) - }) + test("converts bold, italic, code, and strike correctly", () => { + const markdown = "**bold** *italic* `code` ~~strike~~"; + const result = convertMarkdownToTeamsHTML(markdown); + assert.strictEqual( + result.content, + "
bold italic code strike
", + ); + }); - test("converts blockquotes correctly", () => { - const markdown = "> This is a blockquote" - const result = convertMarkdownToTeamsHTML(markdown) - assert.strictEqual( - result.content, - "
This is a blockquote
\n
" - ) - }) - test("handles empty markdown string", () => { - const markdown = "" - const result = convertMarkdownToTeamsHTML(markdown) - assert.strictEqual(result.content, "
") - assert.strictEqual(result.subject, undefined) - }) + test("converts blockquotes correctly", () => { + const markdown = "> This is a blockquote"; + const result = convertMarkdownToTeamsHTML(markdown); + assert.strictEqual( + result.content, + "
This is a blockquote
\n
", + ); + }); + test("handles empty markdown string", () => { + const markdown = ""; + const result = convertMarkdownToTeamsHTML(markdown); + assert.strictEqual(result.content, "
"); + assert.strictEqual(result.subject, undefined); + }); - test("handles markdown without subject", () => { - const markdown = "## Heading 1\nContent" - const result = convertMarkdownToTeamsHTML(markdown) - assert.strictEqual(result.subject, undefined) - assert.strictEqual( - result.content, - "

Heading 1

\nContent
" - ) - }) - test("converts unordered lists correctly", () => { - const markdown = "- Item 1\n- Item 2\n- Item 3" - const result = convertMarkdownToTeamsHTML(markdown) - assert.strictEqual( - result.content, - "

- Item 1\n
- Item 2\n
- Item 3
" - ) - }) + test("handles markdown without subject", () => { + const markdown = "## Heading 1\nContent"; + const result = convertMarkdownToTeamsHTML(markdown); + assert.strictEqual(result.subject, undefined); + assert.strictEqual(result.content, "

Heading 1

\nContent
"); + }); + test("converts unordered lists correctly", () => { + const markdown = "- Item 1\n- Item 2\n- Item 3"; + const result = convertMarkdownToTeamsHTML(markdown); + assert.strictEqual(result.content, "

- Item 1\n
- Item 2\n
- Item 3
"); + }); - test("converts mixed content correctly", () => { - const markdown = - "# Subject\n## Heading 1\nContent with **bold**, *italic*, `code`, and ~~strike~~.\n- List item 1\n- List item 2\n> Blockquote" - const result = convertMarkdownToTeamsHTML(markdown) - assert.strictEqual(result.subject, "Subject") - assert.strictEqual( - result.content, - "
\n

Heading 1

\nContent with bold, italic, code, and strike.\n
- List item 1\n
- List item 2\n
Blockquote
\n
" - ) - }) + test("converts mixed content correctly", () => { + const markdown = + "# Subject\n## Heading 1\nContent with **bold**, *italic*, `code`, and ~~strike~~.\n- List item 1\n- List item 2\n> Blockquote"; + const result = convertMarkdownToTeamsHTML(markdown); + assert.strictEqual(result.subject, "Subject"); + assert.strictEqual( + result.content, + "
\n

Heading 1

\nContent with bold, italic, code, and strike.\n
- List item 1\n
- List item 2\n
Blockquote
\n
", + ); + }); - test("converts multiple paragraphs correctly", () => { - const markdown = "Paragraph 1\n\nParagraph 2" - const result = convertMarkdownToTeamsHTML(markdown) - assert.strictEqual( - result.content, - "
Paragraph 1\n\nParagraph 2
" - ) - }) -}) + test("converts multiple paragraphs correctly", () => { + const markdown = "Paragraph 1\n\nParagraph 2"; + const result = convertMarkdownToTeamsHTML(markdown); + assert.strictEqual(result.content, "
Paragraph 1\n\nParagraph 2
"); + }); +}); diff --git a/packages/core/src/teams.ts b/packages/core/src/teams.ts index c7d45cd44e..138c25c439 100644 --- a/packages/core/src/teams.ts +++ b/packages/core/src/teams.ts @@ -1,17 +1,17 @@ -import { fileTypeFromBuffer } from "./filetype" -import { CancellationOptions } from "./cancellation" -import { deleteUndefinedValues } from "./cleaners" -import { createFetch } from "./fetch" -import { runtimeHost } from "./host" -import { HTMLEscape } from "./htmlescape" -import { TraceOptions } from "./trace" -import { logError, logVerbose } from "./util" -import { dedent } from "./indent" -import { TOOL_ID } from "./constants" -import { filenameOrFileToFilename } from "./unwrappers" -import { resolveFileBytes } from "./file" -import { basename } from "node:path" -import { frontmatterTryParse, splitMarkdown } from "./frontmatter" +import { fileTypeFromBuffer } from "./filetype"; +import { CancellationOptions } from "./cancellation"; +import { deleteUndefinedValues } from "./cleaners"; +import { createFetch } from "./fetch"; +import { runtimeHost } from "./host"; +import { HTMLEscape } from "./htmlescape"; +import { TraceOptions } from "./trace"; +import { logError, logVerbose } from "./util"; +import { dedent } from "./indent"; +import { TOOL_ID } from "./constants"; +import { filenameOrFileToFilename } from "./unwrappers"; +import { resolveFileBytes } from "./file"; +import { basename } from "node:path"; +import { frontmatterTryParse, splitMarkdown } from "./frontmatter"; /** * Converts a Markdown string into HTML formatted for Microsoft Teams. @@ -24,50 +24,49 @@ import { frontmatterTryParse, splitMarkdown } from "./frontmatter" * - `subject`: The extracted title if available, or undefined. */ export function convertMarkdownToTeamsHTML(markdown: string) { - // using regexes, convert headers, lists, links, bold, italic, code, and quotes - const { content, frontmatter } = splitMarkdown(markdown || "") - const fm = frontmatterTryParse(frontmatter) - let subject = fm?.value?.title as string - const html = - "
" + - (content || "") - .replace(/^# (.*$)/gim, (m, t) => { - subject = t - return "" - }) - .replace(/^#### (.*$)/gim, "

$1

") - .replace(/^### (.*$)/gim, "

$1

") - .replace(/^## (.*$)/gim, "

$1

") - .replace(/^\> (.*$)/gim, "
$1
\n") - .replace(/\*\*(.*)\*\*/gim, "$1") - .replace(/\*(.*)\*/gim, "$1") - .replace(/__(.*)__/gim, "$1") - .replace(/`(.*?)`/gim, "$1") - .replace(/~~(.*?)~~/gim, "$1") - .replace(/^- (.*$)/gim, "
- $1") + - "
" - return { content: html.trim(), subject: subject?.trim() } + // using regexes, convert headers, lists, links, bold, italic, code, and quotes + const { content, frontmatter } = splitMarkdown(markdown || ""); + const fm = frontmatterTryParse(frontmatter); + let subject = fm?.value?.title as string; + const html = + "
" + + (content || "") + .replace(/^# (.*$)/gim, (m, t) => { + subject = t; + return ""; + }) + .replace(/^#### (.*$)/gim, "

$1

") + .replace(/^### (.*$)/gim, "

$1

") + .replace(/^## (.*$)/gim, "

$1

") + .replace(/^\> (.*$)/gim, "
$1
\n") + .replace(/\*\*(.*)\*\*/gim, "$1") + .replace(/\*(.*)\*/gim, "$1") + .replace(/__(.*)__/gim, "$1") + .replace(/`(.*?)`/gim, "$1") + .replace(/~~(.*?)~~/gim, "$1") + .replace(/^- (.*$)/gim, "
- $1") + + "
"; + return { content: html.trim(), subject: subject?.trim() }; } function parseTeamsChannelUrl(url: string) { - const m = - /^https:\/\/teams.microsoft.com\/[^\/]{1,32}\/channel\/(?.+)\/.*\?groupId=(?([a-z0-9\-])+)$/.exec( - url - ) - if (!m) throw new Error("Invalid Teams channel URL") - const { teamId, channelId } = m.groups - return { teamId, channelId } + const m = + /^https:\/\/teams.microsoft.com\/[^\/]{1,32}\/channel\/(?.+)\/.*\?groupId=(?([a-z0-9\-])+)$/.exec( + url, + ); + if (!m) throw new Error("Invalid Teams channel URL"); + const { teamId, channelId } = m.groups; + return { teamId, channelId }; } export interface MicrosoftTeamsEntity { - webUrl: string - name: string + webUrl: string; + name: string; } function generatedByFooter(script: PromptScript, info: { runUrl?: string }) { - if (!script) - return `\n
AI-generated may be incorrect
\n` - return `\n
AI-generated by ${info?.runUrl ? `${HTMLEscape(script.id)}` : HTMLEscape(script.id)} may be incorrect
\n` + if (!script) return `\n
AI-generated may be incorrect
\n`; + return `\n
AI-generated by ${info?.runUrl ? `${HTMLEscape(script.id)}` : HTMLEscape(script.id)} may be incorrect
\n`; } /** @@ -77,88 +76,83 @@ function generatedByFooter(script: PromptScript, info: { runUrl?: string }) { * @returns */ async function microsoftTeamsChannelUploadFile( - token: string, - channelUrl: string, - file: string | WorkspaceFileWithDescription, - options?: { folder?: string; disclaimer?: string } & TraceOptions & - CancellationOptions + token: string, + channelUrl: string, + file: string | WorkspaceFileWithDescription, + options?: { folder?: string; disclaimer?: string } & TraceOptions & CancellationOptions, ): Promise { - const { disclaimer } = options || {} + const { disclaimer } = options || {}; - const filename = filenameOrFileToFilename(file) - const description = typeof file === "object" ? file.description : undefined - logVerbose(`teams: uploading ${filename}...`) + const filename = filenameOrFileToFilename(file); + const description = typeof file === "object" ? file.description : undefined; + logVerbose(`teams: uploading ${filename}...`); - const { teamId, channelId } = parseTeamsChannelUrl(channelUrl) - const Authorization = `Bearer ${token}` + const { teamId, channelId } = parseTeamsChannelUrl(channelUrl); + const Authorization = `Bearer ${token}`; - const channelInfoUrl = `https://graph.microsoft.com/v1.0/teams/${teamId}/channels/${channelId}` - const fetch = await createFetch({ ...(options || {}), retries: 1 }) - const channelInfoRes = await fetch(channelInfoUrl, { - headers: { - Authorization, - }, - }) - if (!channelInfoRes.ok) { - throw new Error( - `Failed to get channel info: ${channelInfoRes.status} ${channelInfoRes.statusText}` - ) - } - const channelInfo = await channelInfoRes.json() - const root = channelInfo.displayName + const channelInfoUrl = `https://graph.microsoft.com/v1.0/teams/${teamId}/channels/${channelId}`; + const fetch = await createFetch({ ...(options || {}), retries: 1 }); + const channelInfoRes = await fetch(channelInfoUrl, { + headers: { + Authorization, + }, + }); + if (!channelInfoRes.ok) { + throw new Error( + `Failed to get channel info: ${channelInfoRes.status} ${channelInfoRes.statusText}`, + ); + } + const channelInfo = await channelInfoRes.json(); + const root = channelInfo.displayName; - // resolve channel folder name - const content = await resolveFileBytes(file, options) - if (!file) throw new Error(`${filename} not found`) - const folder = options?.folder || TOOL_ID - const itemUrl = `https://graph.microsoft.com/v1.0/groups/${teamId}/drive/root:/${root}/${folder}/${basename( - filename - )}` - const contentUrl = `${itemUrl}:/content` - const mime = await fileTypeFromBuffer(content) - const res = await fetch(contentUrl, { - method: "PUT", - headers: { - Authorization, - "Content-Type": mime?.mime || "application/octet-stream", - }, - body: content, - }) - if (!res.ok) { - logError(await res.text()) - throw new Error( - `Failed to upload file: ${res.status} ${res.statusText}` - ) - } - const j = (await res.json()) as MicrosoftTeamsEntity - logVerbose(`teams: uploaded ${filename} to ${j.webUrl}`) + // resolve channel folder name + const content = await resolveFileBytes(file, options); + if (!file) throw new Error(`${filename} not found`); + const folder = options?.folder || TOOL_ID; + const itemUrl = `https://graph.microsoft.com/v1.0/groups/${teamId}/drive/root:/${root}/${folder}/${basename( + filename, + )}`; + const contentUrl = `${itemUrl}:/content`; + const mime = await fileTypeFromBuffer(content); + const res = await fetch(contentUrl, { + method: "PUT", + headers: { + Authorization, + "Content-Type": mime?.mime || "application/octet-stream", + }, + body: content, + }); + if (!res.ok) { + logError(await res.text()); + throw new Error(`Failed to upload file: ${res.status} ${res.statusText}`); + } + const j = (await res.json()) as MicrosoftTeamsEntity; + logVerbose(`teams: uploaded ${filename} to ${j.webUrl}`); - if (disclaimer || description) { - const html = convertMarkdownToTeamsHTML(description) - if (disclaimer) html.content += disclaimer + if (disclaimer || description) { + const html = convertMarkdownToTeamsHTML(description); + if (disclaimer) html.content += disclaimer; - const dbody = deleteUndefinedValues({ - description: html.content, - title: html.subject, - }) - const resd = await fetch(itemUrl, { - method: "PATCH", - headers: { - Authorization, - "Content-Type": "application/json", - }, - body: JSON.stringify(dbody), - }) - if (!resd.ok) { - logVerbose(`description: ${dbody.description}`) - logVerbose(await resd.json()) - throw new Error( - `Failed to update file description: ${resd.status} ${resd.statusText}` - ) - } + const dbody = deleteUndefinedValues({ + description: html.content, + title: html.subject, + }); + const resd = await fetch(itemUrl, { + method: "PATCH", + headers: { + Authorization, + "Content-Type": "application/json", + }, + body: JSON.stringify(dbody), + }); + if (!resd.ok) { + logVerbose(`description: ${dbody.description}`); + logVerbose(await resd.json()); + throw new Error(`Failed to update file description: ${resd.status} ${resd.statusText}`); } + } - return j + return j; } /** @@ -175,139 +169,130 @@ async function microsoftTeamsChannelUploadFile( * @returns A promise resolving to the created message entity containing the message's metadata, including its web URL. */ export async function microsoftTeamsChannelPostMessage( - channelUrl: string, - message: string, - options?: { - script?: PromptScript - info?: { runUrl?: string } - files?: (string | WorkspaceFileWithDescription)[] - folder?: string - disclaimer?: boolean | string - } & TraceOptions & - CancellationOptions + channelUrl: string, + message: string, + options?: { + script?: PromptScript; + info?: { runUrl?: string }; + files?: (string | WorkspaceFileWithDescription)[]; + folder?: string; + disclaimer?: boolean | string; + } & TraceOptions & + CancellationOptions, ): Promise { - logVerbose(`teams: posting message to ${channelUrl}`) + logVerbose(`teams: posting message to ${channelUrl}`); - const { files = [] } = options || {} - const { teamId, channelId } = parseTeamsChannelUrl(channelUrl) - const authToken = await runtimeHost.microsoftGraphToken.token("default") - const token = authToken?.token?.token - if (!token) { - logError("Microsoft Graph token not available") - return undefined - } + const { files = [] } = options || {}; + const { teamId, channelId } = parseTeamsChannelUrl(channelUrl); + const authToken = await runtimeHost.microsoftGraphToken.token("default"); + const token = authToken?.token?.token; + if (!token) { + logError("Microsoft Graph token not available"); + return undefined; + } - // convert message to html - const { content, subject } = convertMarkdownToTeamsHTML(message) - const disclaimer = - typeof options.disclaimer === "string" - ? `\n
${HTMLEscape(options.disclaimer)}
\n` - : options.disclaimer !== false - ? generatedByFooter(options?.script, options?.info) - : undefined + // convert message to html + const { content, subject } = convertMarkdownToTeamsHTML(message); + const disclaimer = + typeof options.disclaimer === "string" + ? `\n
${HTMLEscape(options.disclaimer)}
\n` + : options.disclaimer !== false + ? generatedByFooter(options?.script, options?.info) + : undefined; - const body = deleteUndefinedValues({ - body: { - contentType: "html", - content, - }, - subject, - attachments: [] as any[], - }) + const body = deleteUndefinedValues({ + body: { + contentType: "html", + content, + }, + subject, + attachments: [] as any[], + }); - for (const file of files) { - const fres = await microsoftTeamsChannelUploadFile( - token, - channelUrl, - file, - { - ...options, - disclaimer, - } - ) - const guid = crypto.randomUUID() - body.body.content += "\n" + `` - body.attachments.push({ - id: guid, - contentType: "reference", - contentUrl: fres.webUrl, - name: fres.name, - thumbnailUrl: null, - }) - } + for (const file of files) { + const fres = await microsoftTeamsChannelUploadFile(token, channelUrl, file, { + ...options, + disclaimer, + }); + const guid = crypto.randomUUID(); + body.body.content += "\n" + ``; + body.attachments.push({ + id: guid, + contentType: "reference", + contentUrl: fres.webUrl, + name: fres.name, + thumbnailUrl: null, + }); + } - // finalize message - if (disclaimer) body.body.content += disclaimer + // finalize message + if (disclaimer) body.body.content += disclaimer; - const url = `https://graph.microsoft.com/v1.0/teams/${teamId}/channels/${channelId}/messages` - const fetch = await createFetch({ ...(options || {}), retries: 1 }) - const response = await fetch(url, { - method: "POST", - headers: { - Authorization: `Bearer ${token}`, - "Content-Type": "application/json", - }, - body: JSON.stringify(body), - }) + const url = `https://graph.microsoft.com/v1.0/teams/${teamId}/channels/${channelId}/messages`; + const fetch = await createFetch({ ...(options || {}), retries: 1 }); + const response = await fetch(url, { + method: "POST", + headers: { + Authorization: `Bearer ${token}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + }); - if (!response.ok) { - const err: any = await response.text() - logError(err) - return undefined - } + if (!response.ok) { + const err: any = await response.text(); + logError(err); + return undefined; + } - const data: any = await response.json() - const { webUrl } = data - logVerbose(`teams: message created at ${webUrl}`) - return data + const data: any = await response.json(); + const { webUrl } = data; + logVerbose(`teams: message created at ${webUrl}`); + return data; } class MicrosoftTeamsChannelClient implements MessageChannelClient { - constructor(public readonly channelUrl: string) {} + constructor(public readonly channelUrl: string) {} - get teamId() { - const { teamId } = parseTeamsChannelUrl(this.channelUrl) - return teamId - } + get teamId() { + const { teamId } = parseTeamsChannelUrl(this.channelUrl); + return teamId; + } - get channelId() { - const { channelId } = parseTeamsChannelUrl(this.channelUrl) - return channelId - } + get channelId() { + const { channelId } = parseTeamsChannelUrl(this.channelUrl); + return channelId; + } - /** - * Posts a message with attachments to the channel - * @param message - * @param options - */ - async postMessage( - message: string, - options?: { - /** - * File attachments that will be added in the channel folder - */ - files?: string[] - /** - * Sets to false to remove AI generated disclaimer - */ - disclaimer?: boolean | string - } - ): Promise { - const { files, disclaimer } = options || {} - const res = await microsoftTeamsChannelPostMessage( - this.channelUrl, - dedent(message), - { - files, - disclaimer, - } - ) - return res.webUrl - } + /** + * Posts a message with attachments to the channel + * @param message + * @param options + */ + async postMessage( + message: string, + options?: { + /** + * File attachments that will be added in the channel folder + */ + files?: string[]; + /** + * Sets to false to remove AI generated disclaimer + */ + disclaimer?: boolean | string; + }, + ): Promise { + const { files, disclaimer } = options || {}; + const res = await microsoftTeamsChannelPostMessage(this.channelUrl, dedent(message), { + files, + disclaimer, + }); + return res.webUrl; + } - toString() { - return this.channelUrl - } + toString() { + return this.channelUrl; + } } /** @@ -323,13 +308,8 @@ class MicrosoftTeamsChannelClient implements MessageChannelClient { * * @returns An instance of a MicrosoftTeamsChannelClient for interacting with the specified channel. */ -export function createMicrosoftTeamsChannelClient( - url: string -): MessageChannelClient { - if (!url) - url = - process.env.GENAISCRIPT_TEAMS_CHANNEL_URL || - process.env.GENAISCRIPT_TEAMS_URL - if (!parseTeamsChannelUrl(url)) throw new Error("Invalid Teams channel URL") - return new MicrosoftTeamsChannelClient(url) +export function createMicrosoftTeamsChannelClient(url: string): MessageChannelClient { + if (!url) url = process.env.GENAISCRIPT_TEAMS_CHANNEL_URL || process.env.GENAISCRIPT_TEAMS_URL; + if (!parseTeamsChannelUrl(url)) throw new Error("Invalid Teams channel URL"); + return new MicrosoftTeamsChannelClient(url); } diff --git a/packages/core/src/template.ts b/packages/core/src/template.ts index c679455921..d75fc10e6d 100644 --- a/packages/core/src/template.ts +++ b/packages/core/src/template.ts @@ -4,13 +4,13 @@ * data types and formats. */ -import { GENAI_ANY_REGEX, PROMPTY_REGEX } from "./constants" -import { host } from "./host" -import { JSON5TryParse } from "./json5" -import { humanize } from "./inflection" -import { promptyParse, promptyToGenAIScript } from "./prompty" -import { metadataValidate } from "./metadata" -import { deleteUndefinedValues } from "./cleaners" +import { GENAI_ANY_REGEX, PROMPTY_REGEX } from "./constants"; +import { host } from "./host"; +import { JSON5TryParse } from "./json5"; +import { humanize } from "./inflection"; +import { promptyParse, promptyToGenAIScript } from "./prompty"; +import { metadataValidate } from "./metadata"; +import { deleteUndefinedValues } from "./cleaners"; /** * Extracts a template ID from the given filename by removing specific extensions @@ -20,10 +20,10 @@ import { deleteUndefinedValues } from "./cleaners" * @returns The extracted template ID. */ export function templateIdFromFileName(filename: string) { - return filename - .replace(/\.(mjs|ts|js|mts|prompty)$/i, "") - .replace(/\.genai$/i, "") - .replace(/.*[\/\\]/, "") + return filename + .replace(/\.(mjs|ts|js|mts|prompty)$/i, "") + .replace(/\.genai$/i, "") + .replace(/.*[\/\\]/, ""); } /** @@ -34,38 +34,34 @@ export function templateIdFromFileName(filename: string) { * @returns An object containing extracted metadata, tool definitions, and system-specific properties. */ export function parsePromptScriptMeta( - jsSource: string + jsSource: string, ): PromptArgs & Pick { - const m = /\b(?system|script)\(\s*(?\{.*?\})\s*\)/s.exec( - jsSource - ) - const meta: PromptArgs & Pick = - JSON5TryParse(m?.groups?.meta) ?? {} - if (m?.groups?.kind === "system") { - meta.unlisted = true - meta.isSystem = true - meta.group = meta.group || "system" - } - meta.defTools = parsePromptScriptTools(jsSource) - meta.metadata = metadataValidate(meta.metadata) - return deleteUndefinedValues(meta) + const m = /\b(?system|script)\(\s*(?\{.*?\})\s*\)/s.exec(jsSource); + const meta: PromptArgs & Pick = JSON5TryParse(m?.groups?.meta) ?? {}; + if (m?.groups?.kind === "system") { + meta.unlisted = true; + meta.isSystem = true; + meta.group = meta.group || "system"; + } + meta.defTools = parsePromptScriptTools(jsSource); + meta.metadata = metadataValidate(meta.metadata); + return deleteUndefinedValues(meta); } function parsePromptScriptTools(jsSource: string) { - const tools: { id: string; description: string; kind: "tool" | "agent" }[] = - [] - jsSource.replace( - /def(?Tool|Agent)\s*\(\s*"(?[^"]+?)"\s*,\s*"(?[^"]+?)"/g, - (m, kind, id, description) => { - tools.push({ - id: kind === "Agent" ? "agent_" + id : id, - description, - kind: kind.toLocaleLowerCase(), - }) - return "" - } - ) - return tools + const tools: { id: string; description: string; kind: "tool" | "agent" }[] = []; + jsSource.replace( + /def(?Tool|Agent)\s*\(\s*"(?[^"]+?)"\s*,\s*"(?[^"]+?)"/g, + (m, kind, id, description) => { + tools.push({ + id: kind === "Agent" ? "agent_" + id : id, + description, + kind: kind.toLocaleLowerCase(), + }); + return ""; + }, + ); + return tools; } /** @@ -78,17 +74,15 @@ function parsePromptScriptTools(jsSource: string) { * @returns The parsed PromptScript or undefined in case of errors. */ async function parsePromptTemplateCore(filename: string, content: string) { - const r = { - id: templateIdFromFileName(filename), - title: humanize( - host.path.basename(filename).replace(GENAI_ANY_REGEX, "") - ), - jsSource: content, - } as PromptScript - r.filename = host.path.resolve(filename) - const meta = parsePromptScriptMeta(r.jsSource) - Object.assign(r, meta) - return r + const r = { + id: templateIdFromFileName(filename), + title: humanize(host.path.basename(filename).replace(GENAI_ANY_REGEX, "")), + jsSource: content, + } as PromptScript; + r.filename = host.path.resolve(filename); + const meta = parsePromptScriptMeta(r.jsSource); + Object.assign(r, meta); + return r; } /** @@ -99,14 +93,14 @@ async function parsePromptTemplateCore(filename: string, content: string) { * @returns The parsed PromptScript or undefined in case of errors. */ export async function parsePromptScript(filename: string, content: string) { - let text: string = undefined - if (PROMPTY_REGEX.test(filename)) { - text = content - const doc = await promptyParse(filename, content) - content = await promptyToGenAIScript(doc) - } + let text: string = undefined; + if (PROMPTY_REGEX.test(filename)) { + text = content; + const doc = await promptyParse(filename, content); + content = await promptyToGenAIScript(doc); + } - const script = await parsePromptTemplateCore(filename, content) - if (text) script.text = text - return script + const script = await parsePromptTemplateCore(filename, content); + if (text) script.text = text; + return script; } diff --git a/packages/core/src/terminal.ts b/packages/core/src/terminal.ts index 916c3b7296..7ef0407b2a 100644 --- a/packages/core/src/terminal.ts +++ b/packages/core/src/terminal.ts @@ -1,3 +1,3 @@ -import terminalSize_ from "terminal-size" +import terminalSize_ from "terminal-size"; -export const terminalSize = terminalSize_ +export const terminalSize = terminalSize_; diff --git a/packages/core/src/testhost.ts b/packages/core/src/testhost.ts index cb83fd4d26..91f3ef9f07 100644 --- a/packages/core/src/testhost.ts +++ b/packages/core/src/testhost.ts @@ -3,235 +3,223 @@ // Tags: RuntimeHost, TestHost, LanguageModel, FileSystem, Node.js // Import necessary modules and functions from various files -import { readFile, writeFile } from "fs/promises" -import { ensureDir } from "fs-extra" +import { readFile, writeFile } from "fs/promises"; +import { ensureDir } from "fs-extra"; import { - ServerManager, - UTF8Decoder, - UTF8Encoder, - setRuntimeHost, - RuntimeHost, - ModelConfigurations, - ModelConfiguration, -} from "./host" -import { TraceOptions } from "./trace" -import { resolve } from "node:path" -import { LanguageModel } from "./chat" -import { errorMessage, NotSupportedError } from "./error" -import { - LanguageModelConfiguration, - LogLevel, - Project, - ResponseStatus, -} from "./server/messages" -import { defaultModelConfigurations } from "./llms" -import { CancellationToken } from "./cancellation" -import { createNodePath } from "./path" -import { McpClientManager } from "./mcpclient" -import { ResourceManager } from "./mcpresource" -import { execSync } from "node:child_process" -import { shellQuote } from "./shell" -import { genaiscriptDebug } from "./debug" -const dbg = genaiscriptDebug("host:test") + ServerManager, + UTF8Decoder, + UTF8Encoder, + setRuntimeHost, + RuntimeHost, + ModelConfigurations, + ModelConfiguration, +} from "./host"; +import { TraceOptions } from "./trace"; +import { resolve } from "node:path"; +import { LanguageModel } from "./chat"; +import { errorMessage, NotSupportedError } from "./error"; +import { LanguageModelConfiguration, LogLevel, Project, ResponseStatus } from "./server/messages"; +import { defaultModelConfigurations } from "./llms"; +import { CancellationToken } from "./cancellation"; +import { createNodePath } from "./path"; +import { McpClientManager } from "./mcpclient"; +import { ResourceManager } from "./mcpresource"; +import { execSync } from "node:child_process"; +import { shellQuote } from "./shell"; +import { genaiscriptDebug } from "./debug"; +const dbg = genaiscriptDebug("host:test"); // Class representing a test host for runtime, implementing the RuntimeHost interface export class TestHost implements RuntimeHost { - project: Project - // State object to store user-specific data - userState: any = {} - // Server management service - server: ServerManager - // Instance of the path utility - path: Path = createNodePath() - // File system for workspace - workspace: WorkspaceFileSystem - - // Default options for language models - readonly modelAliases: ModelConfigurations = defaultModelConfigurations() - readonly mcp: McpClientManager - readonly resources: ResourceManager - - // Static method to set this class as the runtime host - static install() { - setRuntimeHost(new TestHost()) - } - - constructor() { - this.resources = new ResourceManager() - } - - async pullModel( - cfg: LanguageModelConfiguration, - options?: TraceOptions & CancellationToken - ): Promise { - return { ok: true } - } - - clearModelAlias(source: "cli" | "env" | "config" | "script"): void { - ;(this.modelAliases as any)[source] = {} - } - setModelAlias( - source: "cli" | "env" | "config", - id: string, - value: string | ModelConfiguration - ): void { - if (typeof value === "string") value = { source, model: value } - this.modelAliases[id] = value - } - async readConfig() { - return {} - } - - get config() { - return {} - } - - contentSafety( - id?: "azure", - options?: TraceOptions - ): Promise { - throw new NotSupportedError("contentSafety") - } - - // Method to create a UTF-8 decoder - createUTF8Decoder(): UTF8Decoder { - return new TextDecoder("utf-8") - } - - // Method to create a UTF-8 encoder - createUTF8Encoder(): UTF8Encoder { - return new TextEncoder() - } - - // Method to get the current project folder path - projectFolder(): string { - return resolve(".") - } - - // Placeholder for path resolution method - resolvePath(...segments: string[]): string { - return this.path.resolve(...segments) - } - - // Placeholder for reading a secret value - readSecret(name: string): Promise { - throw new Error("Method not implemented.") - } - - // Placeholder for browsing a URL - browse(url: string, options?: BrowseSessionOptions): Promise { - throw new Error("Method not implemented.") - } - - // Placeholder for getting language model configuration - getLanguageModelConfiguration( - modelId: string - ): Promise { - throw new Error("Method not implemented.") - } - - // Optional client language model - clientLanguageModel?: LanguageModel - - // Placeholder for logging functionality - log(level: LogLevel, msg: string): void { - console[level](msg) - } - - // Method to read a file and return its content as a Uint8Array - async readFile(name: string): Promise { - return new Uint8Array(await readFile(resolve(name))) - } - - async statFile(name: string): Promise<{ - size: number - type: "file" | "directory" - }> { - return undefined - } - - // Method to write content to a file - async writeFile(name: string, content: Uint8Array): Promise { - await writeFile(resolve(name), content) - } - - // Placeholder for file deletion functionality - deleteFile(name: string): Promise { - throw new Error("Method not implemented.") - } - - // Placeholder for finding files with a glob pattern - async findFiles(pattern: string, options?: {}): Promise { - return [pattern] - } - - // Placeholder for creating a directory - async createDirectory(name: string): Promise { - await ensureDir(name) - } - - // Placeholder for deleting a directory - deleteDirectory(name: string): Promise { - throw new Error("Method not implemented.") - } - - // Placeholder for executing a shell command in a container - async exec( - containerId: string, - command: string, - args: string[], - options: ShellOptions - ): Promise { - if (containerId) throw new Error("Container not started") - try { - const cmd = command + " " + shellQuote(args) - dbg(`%s> %s`, process.cwd(), cmd) - const stdout = await execSync(cmd, { encoding: "utf-8" }) - return { - stdout, - exitCode: 0, - failed: false, - } - } catch (error) { - return { - stderr: errorMessage(error), - failed: true, - exitCode: -1, - } - } - } - // Placeholder for creating a container host - container( - options: ContainerOptions & TraceOptions - ): Promise { - throw new Error("Method not implemented.") - } - - /** - * Instantiates a python evaluation environment - */ - python(options?: PythonRuntimeOptions): Promise { - throw new Error("python") - } - - // Async method to remove containers - async removeContainers(): Promise {} - - // Async method to remove browsers - async removeBrowsers(): Promise {} - - // Placeholder for selecting an option from a list - select(message: string, options: string[]): Promise { - throw new Error("Method not implemented.") - } - - // Placeholder for input functionality - input(message: string): Promise { - throw new Error("Method not implemented.") - } - - // Placeholder for confirmation functionality - confirm(message: string): Promise { - throw new Error("Method not implemented.") - } + project: Project; + // State object to store user-specific data + userState: any = {}; + // Server management service + server: ServerManager; + // Instance of the path utility + path: Path = createNodePath(); + // File system for workspace + workspace: WorkspaceFileSystem; + + // Default options for language models + readonly modelAliases: ModelConfigurations = defaultModelConfigurations(); + readonly mcp: McpClientManager; + readonly resources: ResourceManager; + + // Static method to set this class as the runtime host + static install() { + setRuntimeHost(new TestHost()); + } + + constructor() { + this.resources = new ResourceManager(); + } + + async pullModel( + cfg: LanguageModelConfiguration, + options?: TraceOptions & CancellationToken, + ): Promise { + return { ok: true }; + } + + clearModelAlias(source: "cli" | "env" | "config" | "script"): void { + (this.modelAliases as any)[source] = {}; + } + setModelAlias( + source: "cli" | "env" | "config", + id: string, + value: string | ModelConfiguration, + ): void { + if (typeof value === "string") value = { source, model: value }; + this.modelAliases[id] = value; + } + async readConfig() { + return {}; + } + + get config() { + return {}; + } + + contentSafety(id?: "azure", options?: TraceOptions): Promise { + throw new NotSupportedError("contentSafety"); + } + + // Method to create a UTF-8 decoder + createUTF8Decoder(): UTF8Decoder { + return new TextDecoder("utf-8"); + } + + // Method to create a UTF-8 encoder + createUTF8Encoder(): UTF8Encoder { + return new TextEncoder(); + } + + // Method to get the current project folder path + projectFolder(): string { + return resolve("."); + } + + // Placeholder for path resolution method + resolvePath(...segments: string[]): string { + return this.path.resolve(...segments); + } + + // Placeholder for reading a secret value + readSecret(name: string): Promise { + throw new Error("Method not implemented."); + } + + // Placeholder for browsing a URL + browse(url: string, options?: BrowseSessionOptions): Promise { + throw new Error("Method not implemented."); + } + + // Placeholder for getting language model configuration + getLanguageModelConfiguration(modelId: string): Promise { + throw new Error("Method not implemented."); + } + + // Optional client language model + clientLanguageModel?: LanguageModel; + + // Placeholder for logging functionality + log(level: LogLevel, msg: string): void { + console[level](msg); + } + + // Method to read a file and return its content as a Uint8Array + async readFile(name: string): Promise { + return new Uint8Array(await readFile(resolve(name))); + } + + async statFile(name: string): Promise<{ + size: number; + type: "file" | "directory"; + }> { + return undefined; + } + + // Method to write content to a file + async writeFile(name: string, content: Uint8Array): Promise { + await writeFile(resolve(name), content); + } + + // Placeholder for file deletion functionality + deleteFile(name: string): Promise { + throw new Error("Method not implemented."); + } + + // Placeholder for finding files with a glob pattern + async findFiles(pattern: string, options?: {}): Promise { + return [pattern]; + } + + // Placeholder for creating a directory + async createDirectory(name: string): Promise { + await ensureDir(name); + } + + // Placeholder for deleting a directory + deleteDirectory(name: string): Promise { + throw new Error("Method not implemented."); + } + + // Placeholder for executing a shell command in a container + async exec( + containerId: string, + command: string, + args: string[], + options: ShellOptions, + ): Promise { + if (containerId) throw new Error("Container not started"); + try { + const cmd = command + " " + shellQuote(args); + dbg(`%s> %s`, process.cwd(), cmd); + const stdout = await execSync(cmd, { encoding: "utf-8" }); + return { + stdout, + exitCode: 0, + failed: false, + }; + } catch (error) { + return { + stderr: errorMessage(error), + failed: true, + exitCode: -1, + }; + } + } + // Placeholder for creating a container host + container(options: ContainerOptions & TraceOptions): Promise { + throw new Error("Method not implemented."); + } + + /** + * Instantiates a python evaluation environment + */ + python(options?: PythonRuntimeOptions): Promise { + throw new Error("python"); + } + + // Async method to remove containers + async removeContainers(): Promise {} + + // Async method to remove browsers + async removeBrowsers(): Promise {} + + // Placeholder for selecting an option from a list + select(message: string, options: string[]): Promise { + throw new Error("Method not implemented."); + } + + // Placeholder for input functionality + input(message: string): Promise { + throw new Error("Method not implemented."); + } + + // Placeholder for confirmation functionality + confirm(message: string): Promise { + throw new Error("Method not implemented."); + } } diff --git a/packages/core/src/textsplitter.test.ts b/packages/core/src/textsplitter.test.ts index a6a681e11a..c9d6dda83d 100644 --- a/packages/core/src/textsplitter.test.ts +++ b/packages/core/src/textsplitter.test.ts @@ -1,151 +1,151 @@ -import { TextSplitter, TextSplitterConfig, unchunk } from "./textsplitter" -import { describe, test } from "node:test" -import assert from "node:assert/strict" -import { resolveTokenEncoder } from "./encoders" -import { glob } from "glob" -import { readFile } from "fs/promises" -import { text } from "node:stream/consumers" +import { TextSplitter, TextSplitterConfig, unchunk } from "./textsplitter"; +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { resolveTokenEncoder } from "./encoders"; +import { glob } from "glob"; +import { readFile } from "fs/promises"; +import { text } from "node:stream/consumers"; describe("TextSplitter", async () => { - const defaultConfig: Partial = { - chunkSize: 10, - chunkOverlap: 2, - tokenizer: await resolveTokenEncoder("gpt-4o"), - } + const defaultConfig: Partial = { + chunkSize: 10, + chunkOverlap: 2, + tokenizer: await resolveTokenEncoder("gpt-4o"), + }; - test("TextSplitter split undefined", () => { - const textSplitter = new TextSplitter(defaultConfig) - const chunks = textSplitter.split(undefined) - assert.equal(chunks.length, 0) - }) + test("TextSplitter split undefined", () => { + const textSplitter = new TextSplitter(defaultConfig); + const chunks = textSplitter.split(undefined); + assert.equal(chunks.length, 0); + }); - test("TextSplitter should split text into chunks based on default separators", () => { - const textSplitter = new TextSplitter(defaultConfig) - const text = "This is a test text to split into chunks." - const chunks = textSplitter.split(text) + test("TextSplitter should split text into chunks based on default separators", () => { + const textSplitter = new TextSplitter(defaultConfig); + const text = "This is a test text to split into chunks."; + const chunks = textSplitter.split(text); - assert(chunks.length > 0) - const rebuild = chunks.map((c) => c.text).join("") - assert.equal(rebuild, text) - chunks.forEach((chunk) => { - assert(chunk.text) - assert(chunk.tokens.length <= defaultConfig.chunkSize!) - }) - }) + assert(chunks.length > 0); + const rebuild = chunks.map((c) => c.text).join(""); + assert.equal(rebuild, text); + chunks.forEach((chunk) => { + assert(chunk.text); + assert(chunk.tokens.length <= defaultConfig.chunkSize!); + }); + }); - test("TextSplitter should split text into chunks with overlap", () => { - const config: Partial = { - ...defaultConfig, - chunkSize: 5, - chunkOverlap: 2, - } - const textSplitter = new TextSplitter(config) - const text = "This is a test text to split into chunks." - const chunks = textSplitter.split(text) + test("TextSplitter should split text into chunks with overlap", () => { + const config: Partial = { + ...defaultConfig, + chunkSize: 5, + chunkOverlap: 2, + }; + const textSplitter = new TextSplitter(config); + const text = "This is a test text to split into chunks."; + const chunks = textSplitter.split(text); - assert(chunks.length > 0) - const rebuild = chunks.map((c) => c.text).join("") - assert.equal(rebuild, text) - chunks.forEach((chunk, index) => { - assert(chunk.text) - assert(chunk.tokens.length <= config.chunkSize!) - if (index > 0) { - assert(chunk.startOverlap.length === config.chunkOverlap) - } - }) - }) + assert(chunks.length > 0); + const rebuild = chunks.map((c) => c.text).join(""); + assert.equal(rebuild, text); + chunks.forEach((chunk, index) => { + assert(chunk.text); + assert(chunk.tokens.length <= config.chunkSize!); + if (index > 0) { + assert(chunk.startOverlap.length === config.chunkOverlap); + } + }); + }); - test("TextSplitter should throw an error if tokenizer is not provided", () => { - assert.throws(() => new TextSplitter({} as TextSplitterConfig), { - message: "Tokenizer is required", - }) - }) + test("TextSplitter should throw an error if tokenizer is not provided", () => { + assert.throws(() => new TextSplitter({} as TextSplitterConfig), { + message: "Tokenizer is required", + }); + }); - test("TextSplitter should throw an error if chunkSize is less than 1", () => { - assert.throws( - () => - new TextSplitter({ - ...defaultConfig, - chunkSize: 0, - } as TextSplitterConfig), - { - message: "chunkSize must be >= 1", - } - ) - }) + test("TextSplitter should throw an error if chunkSize is less than 1", () => { + assert.throws( + () => + new TextSplitter({ + ...defaultConfig, + chunkSize: 0, + } as TextSplitterConfig), + { + message: "chunkSize must be >= 1", + }, + ); + }); - test("TextSplitter should throw an error if chunkOverlap is less than 0", () => { - assert.throws( - () => - new TextSplitter({ - ...defaultConfig, - chunkOverlap: -1, - } as TextSplitterConfig), - { - message: "chunkOverlap must be >= 0", - } - ) - }) + test("TextSplitter should throw an error if chunkOverlap is less than 0", () => { + assert.throws( + () => + new TextSplitter({ + ...defaultConfig, + chunkOverlap: -1, + } as TextSplitterConfig), + { + message: "chunkOverlap must be >= 0", + }, + ); + }); - test("TextSplitter should throw an error if chunkOverlap is greater than chunkSize", () => { - assert.throws( - () => - new TextSplitter({ - ...defaultConfig, - chunkOverlap: 11, - } as TextSplitterConfig), - { - message: "chunkOverlap must be <= chunkSize", - } - ) - }) + test("TextSplitter should throw an error if chunkOverlap is greater than chunkSize", () => { + assert.throws( + () => + new TextSplitter({ + ...defaultConfig, + chunkOverlap: 11, + } as TextSplitterConfig), + { + message: "chunkOverlap must be <= chunkSize", + }, + ); + }); - test("TextSplitter should use default separators if none are provided", () => { - const textSplitter = new TextSplitter({ - ...defaultConfig, - separators: [], - }) - const text = "This is a test text to split into chunks." - const chunks = textSplitter.split(text) + test("TextSplitter should use default separators if none are provided", () => { + const textSplitter = new TextSplitter({ + ...defaultConfig, + separators: [], + }); + const text = "This is a test text to split into chunks."; + const chunks = textSplitter.split(text); - assert(chunks.length > 0) - console.log(chunks) - const rebuild = unchunk(text, chunks) - assert.equal(rebuild, text) - }) + assert(chunks.length > 0); + console.log(chunks); + const rebuild = unchunk(text, chunks); + assert.equal(rebuild, text); + }); - test("TextSplitter should split text based on provided separators", () => { - const config: Partial = { - ...defaultConfig, - separators: [" "], - } - const textSplitter = new TextSplitter(config) - const text = "This is a test text to split into chunks." - const chunks = textSplitter.split(text) + test("TextSplitter should split text based on provided separators", () => { + const config: Partial = { + ...defaultConfig, + separators: [" "], + }; + const textSplitter = new TextSplitter(config); + const text = "This is a test text to split into chunks."; + const chunks = textSplitter.split(text); - assert(chunks.length > 0) - chunks.forEach((chunk) => { - assert(chunk.text) - }) - }) + assert(chunks.length > 0); + chunks.forEach((chunk) => { + assert(chunk.text); + }); + }); - const docs = await glob("../../docs/src/**/*.mdx?") - for (const doc of docs) { - await test(doc, async () => { - const text = await readFile(doc, { encoding: "utf-8" }) - for (let i = 0; i < 10; i++) { - const chunkSize = Math.floor(Math.random() * 20) + 10 - const textSplitter = new TextSplitter({ - ...defaultConfig, - docType: i % 2 ? "markdown" : undefined, - chunkSize: Math.floor(Math.random() * 20) + 1, - }) - const chunks = textSplitter.split(text) - console.log(`chunk: ${chunkSize} -> ${chunks.length}`) - assert(chunks.length > 0) - const rebuild = unchunk(text, chunks) - assert.equal(rebuild, text) - } - }) - } -}) + const docs = await glob("../../docs/src/**/*.mdx?"); + for (const doc of docs) { + await test(doc, async () => { + const text = await readFile(doc, { encoding: "utf-8" }); + for (let i = 0; i < 10; i++) { + const chunkSize = Math.floor(Math.random() * 20) + 10; + const textSplitter = new TextSplitter({ + ...defaultConfig, + docType: i % 2 ? "markdown" : undefined, + chunkSize: Math.floor(Math.random() * 20) + 1, + }); + const chunks = textSplitter.split(text); + console.log(`chunk: ${chunkSize} -> ${chunks.length}`); + assert(chunks.length > 0); + const rebuild = unchunk(text, chunks); + assert.equal(rebuild, text); + } + }); + } +}); diff --git a/packages/core/src/textsplitter.ts b/packages/core/src/textsplitter.ts index a3703146c9..45fd0d198b 100644 --- a/packages/core/src/textsplitter.ts +++ b/packages/core/src/textsplitter.ts @@ -2,21 +2,21 @@ // removed tokenizer dependency export interface TextSplitterConfig { - separators: string[] - keepSeparators: boolean - chunkSize: number - chunkOverlap: number - tokenizer: Tokenizer - docType?: string + separators: string[]; + keepSeparators: boolean; + chunkSize: number; + chunkOverlap: number; + tokenizer: Tokenizer; + docType?: string; } export interface TextChunk { - text: string - tokens: number[] - startPos: number - endPos: number - startOverlap: number[] - endOverlap: number[] + text: string; + tokens: number[]; + startPos: number; + endPos: number; + startOverlap: number[]; + endOverlap: number[]; } /** @@ -28,587 +28,559 @@ export interface TextChunk { * @returns The reconstructed text built by combining all text chunks and their respective positions. */ export function unchunk(text: string, chunks: TextChunk[]) { - let rebuild = "" - for (let i = 0; i < chunks.length; i++) { - const chunk = chunks[i] - if (i === 0 && chunk.startPos > 0) - rebuild += text.slice(0, chunk.startPos) - rebuild += text.slice(chunk.startPos, chunk.endPos) - if (chunk.endPos < text.length) { - const nextChuk = chunks[i + 1] - rebuild += text.slice(chunk.endPos, nextChuk?.startPos) - } + let rebuild = ""; + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]; + if (i === 0 && chunk.startPos > 0) rebuild += text.slice(0, chunk.startPos); + rebuild += text.slice(chunk.startPos, chunk.endPos); + if (chunk.endPos < text.length) { + const nextChuk = chunks[i + 1]; + rebuild += text.slice(chunk.endPos, nextChuk?.startPos); } - return rebuild + } + return rebuild; } export class TextSplitter { - private readonly _config: TextSplitterConfig + private readonly _config: TextSplitterConfig; - constructor(config?: Partial) { - this._config = Object.assign( - { - keepSeparators: false, - chunkSize: 400, - chunkOverlap: 40, - } as TextSplitterConfig, - config - ) + constructor(config?: Partial) { + this._config = Object.assign( + { + keepSeparators: false, + chunkSize: 400, + chunkOverlap: 40, + } as TextSplitterConfig, + config, + ); - if (!this._config.tokenizer) throw new Error("Tokenizer is required") + if (!this._config.tokenizer) throw new Error("Tokenizer is required"); - // Use default separators if none are provided - if (!this._config.separators || this._config.separators.length === 0) { - this._config.separators = this.getSeparators(this._config.docType) - } - - // Validate the config settings - if (this._config.chunkSize < 1) { - throw new Error("chunkSize must be >= 1") - } else if (this._config.chunkOverlap < 0) { - throw new Error("chunkOverlap must be >= 0") - } else if (this._config.chunkOverlap > this._config.chunkSize) { - throw new Error("chunkOverlap must be <= chunkSize") - } + // Use default separators if none are provided + if (!this._config.separators || this._config.separators.length === 0) { + this._config.separators = this.getSeparators(this._config.docType); } - public split(text: string): TextChunk[] { - if (!text) return [] + // Validate the config settings + if (this._config.chunkSize < 1) { + throw new Error("chunkSize must be >= 1"); + } else if (this._config.chunkOverlap < 0) { + throw new Error("chunkOverlap must be >= 0"); + } else if (this._config.chunkOverlap > this._config.chunkSize) { + throw new Error("chunkOverlap must be <= chunkSize"); + } + } - // Get basic chunks - const chunks = this.recursiveSplit(text, this._config.separators, 0) + public split(text: string): TextChunk[] { + if (!text) return []; - const that = this - function getOverlapTokens(tokens?: number[]): number[] { - if (tokens != undefined) { - const len = - tokens.length > that._config.chunkOverlap - ? that._config.chunkOverlap - : tokens.length - return tokens.slice(0, len) - } else { - return [] - } - } + // Get basic chunks + const chunks = this.recursiveSplit(text, this._config.separators, 0); - // Add overlap tokens and text to the start and end of each chunk - if (this._config.chunkOverlap > 0) { - for (let i = 1; i < chunks.length; i++) { - const previousChunk = chunks[i - 1] - const chunk = chunks[i] - const nextChunk = - i < chunks.length - 1 ? chunks[i + 1] : undefined - chunk.startOverlap = getOverlapTokens( - previousChunk.tokens.reverse() - ).reverse() - chunk.endOverlap = getOverlapTokens(nextChunk?.tokens) - } - } + const that = this; + function getOverlapTokens(tokens?: number[]): number[] { + if (tokens != undefined) { + const len = + tokens.length > that._config.chunkOverlap ? that._config.chunkOverlap : tokens.length; + return tokens.slice(0, len); + } else { + return []; + } + } - return chunks + // Add overlap tokens and text to the start and end of each chunk + if (this._config.chunkOverlap > 0) { + for (let i = 1; i < chunks.length; i++) { + const previousChunk = chunks[i - 1]; + const chunk = chunks[i]; + const nextChunk = i < chunks.length - 1 ? chunks[i + 1] : undefined; + chunk.startOverlap = getOverlapTokens(previousChunk.tokens.reverse()).reverse(); + chunk.endOverlap = getOverlapTokens(nextChunk?.tokens); + } } - private recursiveSplit( - text: string, - separators: string[], - startPos: number - ): TextChunk[] { - const chunks: TextChunk[] = [] - if (text.length > 0) { - // Split text into parts - let parts: string[] - let separator = "" - const nextSeparators = - separators.length > 1 ? separators.slice(1) : [] - if (separators.length > 0) { - // Split by separator - separator = separators[0] - parts = - separator == " " - ? this.splitBySpaces(text) - : text.split(separator) - } else { - // Cut text in half - const half = Math.floor(text.length / 2) - parts = [text.substring(0, half), text.substring(half)] - } + return chunks; + } - // Iterate over parts - for (let i = 0; i < parts.length; i++) { - const lastChunk = i === parts.length - 1 + private recursiveSplit(text: string, separators: string[], startPos: number): TextChunk[] { + const chunks: TextChunk[] = []; + if (text.length > 0) { + // Split text into parts + let parts: string[]; + let separator = ""; + const nextSeparators = separators.length > 1 ? separators.slice(1) : []; + if (separators.length > 0) { + // Split by separator + separator = separators[0]; + parts = separator == " " ? this.splitBySpaces(text) : text.split(separator); + } else { + // Cut text in half + const half = Math.floor(text.length / 2); + parts = [text.substring(0, half), text.substring(half)]; + } - // Get chunk text and endPos - let chunk = parts[i] - const endPos = - startPos + - (chunk.length - 1) + - (lastChunk ? 0 : separator.length) - if (this._config.keepSeparators && !lastChunk) { - chunk += separator - } + // Iterate over parts + for (let i = 0; i < parts.length; i++) { + const lastChunk = i === parts.length - 1; - // Ensure chunk contains text - if ( - !this._config.keepSeparators && - !this.containsAlphanumeric(chunk) - ) { - continue - } + // Get chunk text and endPos + let chunk = parts[i]; + const endPos = startPos + (chunk.length - 1) + (lastChunk ? 0 : separator.length); + if (this._config.keepSeparators && !lastChunk) { + chunk += separator; + } - // Optimization to avoid encoding really large chunks - if (chunk.length / 6 > this._config.chunkSize) { - // Break the text into smaller chunks - const subChunks = this.recursiveSplit( - chunk, - nextSeparators, - startPos - ) - chunks.push(...subChunks) - } else { - // Encode chunk text - const tokens = this._config.tokenizer.encode(chunk) - if (tokens.length > this._config.chunkSize) { - // Break the text into smaller chunks - const subChunks = this.recursiveSplit( - chunk, - nextSeparators, - startPos - ) - chunks.push(...subChunks) - } else { - // Append chunk to output - chunks.push({ - text: chunk, - tokens: tokens, - startPos: startPos, - endPos: endPos, - startOverlap: [], - endOverlap: [], - }) - } - } + // Ensure chunk contains text + if (!this._config.keepSeparators && !this.containsAlphanumeric(chunk)) { + continue; + } - // Update startPos - startPos = endPos + 1 - } + // Optimization to avoid encoding really large chunks + if (chunk.length / 6 > this._config.chunkSize) { + // Break the text into smaller chunks + const subChunks = this.recursiveSplit(chunk, nextSeparators, startPos); + chunks.push(...subChunks); + } else { + // Encode chunk text + const tokens = this._config.tokenizer.encode(chunk); + if (tokens.length > this._config.chunkSize) { + // Break the text into smaller chunks + const subChunks = this.recursiveSplit(chunk, nextSeparators, startPos); + chunks.push(...subChunks); + } else { + // Append chunk to output + chunks.push({ + text: chunk, + tokens: tokens, + startPos: startPos, + endPos: endPos, + startOverlap: [], + endOverlap: [], + }); + } } - return this.combineChunks(chunks) + // Update startPos + startPos = endPos + 1; + } } - private combineChunks(chunks: TextChunk[]): TextChunk[] { - const combinedChunks: TextChunk[] = [] - let currentChunk: TextChunk | undefined - let currentLength = 0 - const separator = this._config.keepSeparators ? "" : " " - for (let i = 0; i < chunks.length; i++) { - const chunk = chunks[i] - if (currentChunk) { - const length = currentChunk.tokens.length + chunk.tokens.length - if (length > this._config.chunkSize) { - combinedChunks.push(currentChunk) - currentChunk = chunk - currentLength = chunk.tokens.length - } else { - currentChunk.text += separator + chunk.text - currentChunk.endPos = chunk.endPos - currentChunk.tokens.push(...chunk.tokens) - currentLength += chunk.tokens.length - } - } else { - currentChunk = chunk - currentLength = chunk.tokens.length - } - } - if (currentChunk) { - combinedChunks.push(currentChunk) + return this.combineChunks(chunks); + } + + private combineChunks(chunks: TextChunk[]): TextChunk[] { + const combinedChunks: TextChunk[] = []; + let currentChunk: TextChunk | undefined; + let currentLength = 0; + const separator = this._config.keepSeparators ? "" : " "; + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]; + if (currentChunk) { + const length = currentChunk.tokens.length + chunk.tokens.length; + if (length > this._config.chunkSize) { + combinedChunks.push(currentChunk); + currentChunk = chunk; + currentLength = chunk.tokens.length; + } else { + currentChunk.text += separator + chunk.text; + currentChunk.endPos = chunk.endPos; + currentChunk.tokens.push(...chunk.tokens); + currentLength += chunk.tokens.length; } - return combinedChunks + } else { + currentChunk = chunk; + currentLength = chunk.tokens.length; + } } - - private containsAlphanumeric(text: string): boolean { - return /[a-z0-9]/i.test(text) + if (currentChunk) { + combinedChunks.push(currentChunk); } + return combinedChunks; + } - private splitBySpaces(text: string): string[] { - // Split text by tokens and return parts - const parts: string[] = [] - let tokens = this._config.tokenizer.encode(text) - do { - if (tokens.length <= this._config.chunkSize) { - parts.push(this._config.tokenizer.decode(tokens)) - break - } else { - const span = tokens.splice(0, this._config.chunkSize) - parts.push(this._config.tokenizer.decode(span)) - } - } while (true) + private containsAlphanumeric(text: string): boolean { + return /[a-z0-9]/i.test(text); + } - return parts - } + private splitBySpaces(text: string): string[] { + // Split text by tokens and return parts + const parts: string[] = []; + let tokens = this._config.tokenizer.encode(text); + do { + if (tokens.length <= this._config.chunkSize) { + parts.push(this._config.tokenizer.decode(tokens)); + break; + } else { + const span = tokens.splice(0, this._config.chunkSize); + parts.push(this._config.tokenizer.decode(span)); + } + } while (true); - private getSeparators(docType?: string): string[] { - switch (docType ?? "") { - case "cpp": - return [ - // Split along class definitions - "\nclass ", - // Split along function definitions - "\nvoid ", - "\nint ", - "\nfloat ", - "\ndouble ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nwhile ", - "\nswitch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "go": - return [ - // Split along function definitions - "\nfunc ", - "\nvar ", - "\nconst ", - "\ntype ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nswitch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "java": - case "c#": - case "csharp": - case "cs": - case "ts": - case "tsx": - case "typescript": - return [ - // split along regions - "// LLM-REGION", - "/* LLM-REGION", - "/** LLM-REGION", - // Split along class definitions - "\nclass ", - // Split along method definitions - "\npublic ", - "\nprotected ", - "\nprivate ", - "\nstatic ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nwhile ", - "\nswitch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "js": - case "jsx": - case "javascript": - return [ - // split along regions - "// LLM-REGION", - "/* LLM-REGION", - "/** LLM-REGION", - // Split along class definitions - "\nclass ", - // Split along function definitions - "\nfunction ", - "\nconst ", - "\nlet ", - "\nvar ", - "\nclass ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nwhile ", - "\nswitch ", - "\ncase ", - "\ndefault ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "php": - return [ - // Split along function definitions - "\nfunction ", - // Split along class definitions - "\nclass ", - // Split along control flow statements - "\nif ", - "\nforeach ", - "\nwhile ", - "\ndo ", - "\nswitch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "proto": - return [ - // Split along message definitions - "\nmessage ", - // Split along service definitions - "\nservice ", - // Split along enum definitions - "\nenum ", - // Split along option definitions - "\noption ", - // Split along import statements - "\nimport ", - // Split along syntax declarations - "\nsyntax ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "python": - case "py": - return [ - // First, try to split along class definitions - "\nclass ", - "\ndef ", - "\n\tdef ", - // Now split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "rst": - return [ - // Split along section titles - "\n===\n", - "\n---\n", - "\n***\n", - // Split along directive markers - "\n.. ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "ruby": - return [ - // Split along method definitions - "\ndef ", - "\nclass ", - // Split along control flow statements - "\nif ", - "\nunless ", - "\nwhile ", - "\nfor ", - "\ndo ", - "\nbegin ", - "\nrescue ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "rust": - return [ - // Split along function definitions - "\nfn ", - "\nconst ", - "\nlet ", - // Split along control flow statements - "\nif ", - "\nwhile ", - "\nfor ", - "\nloop ", - "\nmatch ", - "\nconst ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "scala": - return [ - // Split along class definitions - "\nclass ", - "\nobject ", - // Split along method definitions - "\ndef ", - "\nval ", - "\nvar ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nwhile ", - "\nmatch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "swift": - return [ - // Split along function definitions - "\nfunc ", - // Split along class definitions - "\nclass ", - "\nstruct ", - "\nenum ", - // Split along control flow statements - "\nif ", - "\nfor ", - "\nwhile ", - "\ndo ", - "\nswitch ", - "\ncase ", - // Split by the normal type of lines - "\n\n", - "\n", - " ", - ] - case "md": - case "markdown": - return [ - // First, try to split along Markdown headings (starting with level 2) - "\n## ", - "\n### ", - "\n#### ", - "\n##### ", - "\n###### ", - // Note the alternative syntax for headings (below) is not handled here - // Heading level 2 - // --------------- - // End of code block - "```\n\n", - // Horizontal lines - "\n\n***\n\n", - "\n\n---\n\n", - "\n\n___\n\n", - // Note that this splitter doesn't handle horizontal lines defined - // by *three or more* of ***, ---, or ___, but this is not handled - // Github tables - "", - // "", - // "", + // "
", - // "", + // "
", + // "", - "
", - "

", - "
", - "

  • ", - "

    ", - "

    ", - "

    ", - "

    ", - "

    ", - "
    ", - "", - "", - "", - "
    ", - "", - "