11import type { Static , TSchema } from "typebox" ;
22
3- import { ITool , type Tools } from ".." ;
3+ import { ITool } from ".." ;
44
55/**
66 * Built-in tool for prompting Large Language Models (LLMs).
@@ -43,7 +43,7 @@ import { ITool, type Tools } from "..";
4343 * });
4444 *
4545 * const response = await this.ai.prompt({
46- * model: AIModel.GPT_4O_MINI ,
46+ * model: { speed: "fast", cost: "medium" } ,
4747 * system: "Classify emails into categories: work, personal, spam, or promotional.",
4848 * prompt: `Categorize this email: ${emailContent}`,
4949 * outputSchema: schema
@@ -54,7 +54,7 @@ import { ITool, type Tools } from "..";
5454 *
5555 * async generateResponse(emailContent: string) {
5656 * const response = await this.ai.prompt({
57- * model: AIModel.GPT_4O_MINI ,
57+ * model: AIModel.GPT_5_MINI ,
5858 * system: "Generate professional email responses that are helpful and concise.",
5959 * prompt: `Write a response to: ${emailContent}`
6060 * });
@@ -76,24 +76,31 @@ export abstract class AI extends ITool {
7676 *
7777 * @example
7878 * ```typescript
79- * // Simple text generation
79+ * // Simple text generation with specific model
8080 * const response = await ai.prompt({
81- * model: AIModel.GPT_4O_MINI ,
81+ * model: AIModel.GPT_5_MINI ,
8282 * prompt: "Explain quantum computing in simple terms"
8383 * });
8484 * console.log(response.text);
8585 *
86+ * // Using model preferences for automatic selection
87+ * const response = await ai.prompt({
88+ * model: { speed: "fast", cost: "low" },
89+ * prompt: "Summarize this text..."
90+ * });
91+ * console.log(response.text);
92+ *
8693 * // With system instructions
8794 * const response = await ai.prompt({
88- * model: AIModel.CLAUDE_35_SONNET ,
95+ * model: { speed: "capable", cost: "high" } ,
8996 * system: "You are a helpful physics tutor.",
9097 * prompt: "Explain quantum entanglement"
9198 * });
9299 * console.log(response.text);
93100 *
94101 * // Multi-turn conversation
95102 * const response = await ai.prompt({
96- * model: AIModel.CLAUDE_35_SONNET ,
103+ * model: AIModel.CLAUDE_SONNET_45 ,
97104 * messages: [
98105 * { role: "user", content: "What is 2+2?" },
99106 * { role: "assistant", content: "2+2 equals 4." },
@@ -134,51 +141,128 @@ export abstract class AI extends ITool {
134141 * ```
135142 */
136143 abstract prompt < TOOLS extends AIToolSet , SCHEMA extends TSchema = never > (
137- _request : AIRequest < TOOLS , SCHEMA > ,
144+ _request : AIRequest < TOOLS , SCHEMA >
138145 ) : Promise < AIResponse < TOOLS , SCHEMA > > ;
139146}
140147
148+ /**
149+ * Model preferences for selecting an AI model based on performance and cost requirements.
150+ *
151+ * Allows specifying desired speed and cost tiers instead of picking a specific model.
152+ * The AI tool will automatically select the best available model matching these preferences.
153+ *
154+ * @example
155+ * ```typescript
156+ * // Fast and cheap - uses Workers AI models like Llama 3.2 1B
157+ * const response = await ai.prompt({
158+ * model: { speed: "fast", cost: "low" },
159+ * prompt: "Summarize this in one sentence: ..."
160+ * });
161+ *
162+ * // Balanced performance - uses GPT-5 Mini or Gemini 2.5 Flash
163+ * const response = await ai.prompt({
164+ * model: { speed: "balanced", cost: "medium" },
165+ * prompt: "Analyze this data..."
166+ * });
167+ *
168+ * // Most capable - uses Claude Sonnet 4.5 or Opus 4.1
169+ * const response = await ai.prompt({
170+ * model: { speed: "capable", cost: "high" },
171+ * prompt: "Solve this complex reasoning problem..."
172+ * });
173+ *
174+ * // Override with specific model hint
175+ * const response = await ai.prompt({
176+ * model: { speed: "balanced", cost: "medium", hint: "anthropic/claude-sonnet-4-5" },
177+ * prompt: "..."
178+ * });
179+ * ```
180+ */
181+ export type ModelPreferences = {
182+ /**
183+ * Desired speed tier:
184+ * - "fast": Optimized for low latency and quick responses
185+ * - "balanced": Good balance of speed and capability
186+ * - "capable": Maximum reasoning and problem-solving ability
187+ */
188+ speed : "fast" | "balanced" | "capable" ;
189+
190+ /**
191+ * Desired cost tier:
192+ * - "low": Minimal cost, often using Workers AI models (free/very cheap)
193+ * - "medium": Moderate pricing for good performance
194+ * - "high": Premium pricing for best-in-class models
195+ */
196+ cost : "low" | "medium" | "high" ;
197+
198+ /**
199+ * Optional hint to override automatic model selection.
200+ * Specify a specific model ID to use instead of letting the system choose.
201+ */
202+ hint ?: string ;
203+ } ;
204+
141205/**
142206 * Supported AI models available through Cloudflare AI Gateway and Workers AI.
143207 *
144208 * Models are organized by provider:
145209 * - **OpenAI**: Latest GPT models via AI Gateway
146210 * - **Anthropic**: Claude models via AI Gateway (prefix with "anthropic/")
147211 * - **Google**: Gemini models via AI Gateway (prefix with "google-ai-studio/")
148- * - **Workers AI**: Models running on Cloudflare's network
212+ * - **Workers AI**: Models running on Cloudflare's network (free/low cost)
149213 */
150214export enum AIModel {
151- // OpenAI models
215+ // OpenAI models - Latest GPT and reasoning models
216+ GPT_5 = "openai/gpt-5" ,
217+ GPT_5_PRO = "openai/gpt-5-pro" ,
218+ GPT_5_MINI = "openai/gpt-5-mini" ,
219+ GPT_5_NANO = "openai/gpt-5-nano" ,
152220 GPT_4O = "openai/gpt-4o" ,
153221 GPT_4O_MINI = "openai/gpt-4o-mini" ,
154- GPT_4_TURBO = "openai/gpt-4-turbo " ,
155- GPT_35_TURBO = "openai/gpt-3.5-turbo " ,
222+ O3 = "openai/o3 " ,
223+ O3_MINI = "openai/o3-mini " ,
156224
157- // Anthropic models
158- CLAUDE_SONNET_4_5 = "anthropic/claude-sonnet-4-5" ,
159- CLAUDE_35_SONNET = "anthropic/claude-3-5-sonnet" ,
160- CLAUDE_3_OPUS = "anthropic/claude-3-opus" ,
225+ // Anthropic models - Claude 4.x and 3.7 series
226+ CLAUDE_SONNET_45 = "anthropic/claude-sonnet-4-5" ,
227+ CLAUDE_HAIKU_45 = "anthropic/claude-haiku-4-5" ,
228+ CLAUDE_OPUS_41 = "anthropic/claude-opus-4-1" ,
229+ CLAUDE_37_SONNET = "anthropic/claude-3-7-sonnet-latest" ,
161230
162- // Google models
231+ // Google models - Gemini 2.x series
232+ GEMINI_25_PRO = "google/gemini-2.5-pro" ,
163233 GEMINI_25_FLASH = "google/gemini-2.5-flash" ,
234+ GEMINI_25_FLASH_LITE = "google/gemini-2.5-flash-lite" ,
235+ GEMINI_20_FLASH = "google/gemini-2.0-flash" ,
236+ GEMINI_20_FLASH_LITE = "google/gemini-2.0-flash-lite" ,
164237
165- // Cloudflare Workers AI models
238+ // Cloudflare Workers AI models - Free/low-cost models running on Cloudflare's network
239+ LLAMA_4_SCOUT_17B = "meta/llama-4-scout-17b-16e-instruct" ,
166240 LLAMA_33_70B = "meta/llama-3.3-70b-instruct-fp8-fast" ,
167- LLAMA_31_8B = "meta/llama-3.1-8b-instruct-fast" ,
168- MISTRAL_7B = "meta/mistral-7b-instruct-v0.2" ,
241+ LLAMA_31_8B = "meta/llama-3.1-8b-instruct-fp8" ,
242+ LLAMA_32_1B = "meta/llama-3.2-1b-instruct" ,
243+ DEEPSEEK_R1_32B = "deepseek-ai/deepseek-r1-distill-qwen-32b" ,
169244}
170245
171246/**
172247 * Request parameters for AI text generation, matching Vercel AI SDK's generateText() function.
173248 */
174249export interface AIRequest <
175250 TOOLS extends AIToolSet ,
176- SCHEMA extends TSchema = never ,
251+ SCHEMA extends TSchema = never
177252> {
178253 /**
179254 * The AI model to use for generation.
255+ * Can be either a specific model from the AIModel enum or preferences (speed/cost tiers).
256+ *
257+ * @example
258+ * // Using a specific model
259+ * model: AIModel.GPT_5_MINI
260+ *
261+ * @example
262+ * // Using preferences
263+ * model: { speed: "fast", cost: "low" }
180264 */
181- model : AIModel ;
265+ model : AIModel | ModelPreferences ;
182266
183267 /**
184268 * System instructions to guide the model's behavior.
@@ -240,7 +324,7 @@ export interface AIRequest<
240324 */
241325export interface AIResponse <
242326 TOOLS extends AIToolSet ,
243- SCHEMA extends TSchema = never ,
327+ SCHEMA extends TSchema = never
244328> {
245329 /**
246330 * The generated text.
@@ -656,7 +740,7 @@ export type AITool<PARAMETERS extends ToolParameters = any, RESULT = any> = {
656740 */
657741 execute ?: (
658742 args : inferParameters < PARAMETERS > ,
659- options : ToolExecutionOptions ,
743+ options : ToolExecutionOptions
660744 ) => PromiseLike < RESULT > ;
661745} & (
662746 | {
0 commit comments