Skip to content

Commit febbd2e

Browse files
committed
AI model selection based on preferences
1 parent d8454b5 commit febbd2e

File tree

2 files changed

+113
-24
lines changed

2 files changed

+113
-24
lines changed

.changeset/fiery-times-create.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@plotday/sdk": minor
3+
---
4+
5+
Changed: BREAKING: Use ModelPreferences instead of an explicit AIModel in AI.prompt(). This supports BYOK and user preferences.

sdk/src/tools/ai.ts

Lines changed: 108 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import type { Static, TSchema } from "typebox";
22

3-
import { ITool, type Tools } from "..";
3+
import { ITool } from "..";
44

55
/**
66
* Built-in tool for prompting Large Language Models (LLMs).
@@ -43,7 +43,7 @@ import { ITool, type Tools } from "..";
4343
* });
4444
*
4545
* const response = await this.ai.prompt({
46-
* model: AIModel.GPT_4O_MINI,
46+
* model: { speed: "fast", cost: "medium" },
4747
* system: "Classify emails into categories: work, personal, spam, or promotional.",
4848
* prompt: `Categorize this email: ${emailContent}`,
4949
* outputSchema: schema
@@ -54,7 +54,7 @@ import { ITool, type Tools } from "..";
5454
*
5555
* async generateResponse(emailContent: string) {
5656
* const response = await this.ai.prompt({
57-
* model: AIModel.GPT_4O_MINI,
57+
* model: AIModel.GPT_5_MINI,
5858
* system: "Generate professional email responses that are helpful and concise.",
5959
* prompt: `Write a response to: ${emailContent}`
6060
* });
@@ -76,24 +76,31 @@ export abstract class AI extends ITool {
7676
*
7777
* @example
7878
* ```typescript
79-
* // Simple text generation
79+
* // Simple text generation with specific model
8080
* const response = await ai.prompt({
81-
* model: AIModel.GPT_4O_MINI,
81+
* model: AIModel.GPT_5_MINI,
8282
* prompt: "Explain quantum computing in simple terms"
8383
* });
8484
* console.log(response.text);
8585
*
86+
* // Using model preferences for automatic selection
87+
* const response = await ai.prompt({
88+
* model: { speed: "fast", cost: "low" },
89+
* prompt: "Summarize this text..."
90+
* });
91+
* console.log(response.text);
92+
*
8693
* // With system instructions
8794
* const response = await ai.prompt({
88-
* model: AIModel.CLAUDE_35_SONNET,
95+
* model: { speed: "capable", cost: "high" },
8996
* system: "You are a helpful physics tutor.",
9097
* prompt: "Explain quantum entanglement"
9198
* });
9299
* console.log(response.text);
93100
*
94101
* // Multi-turn conversation
95102
* const response = await ai.prompt({
96-
* model: AIModel.CLAUDE_35_SONNET,
103+
* model: AIModel.CLAUDE_SONNET_45,
97104
* messages: [
98105
* { role: "user", content: "What is 2+2?" },
99106
* { role: "assistant", content: "2+2 equals 4." },
@@ -134,51 +141,128 @@ export abstract class AI extends ITool {
134141
* ```
135142
*/
136143
abstract prompt<TOOLS extends AIToolSet, SCHEMA extends TSchema = never>(
137-
_request: AIRequest<TOOLS, SCHEMA>,
144+
_request: AIRequest<TOOLS, SCHEMA>
138145
): Promise<AIResponse<TOOLS, SCHEMA>>;
139146
}
140147

148+
/**
149+
* Model preferences for selecting an AI model based on performance and cost requirements.
150+
*
151+
* Allows specifying desired speed and cost tiers instead of picking a specific model.
152+
* The AI tool will automatically select the best available model matching these preferences.
153+
*
154+
* @example
155+
* ```typescript
156+
* // Fast and cheap - uses Workers AI models like Llama 3.2 1B
157+
* const response = await ai.prompt({
158+
* model: { speed: "fast", cost: "low" },
159+
* prompt: "Summarize this in one sentence: ..."
160+
* });
161+
*
162+
* // Balanced performance - uses GPT-5 Mini or Gemini 2.5 Flash
163+
* const response = await ai.prompt({
164+
* model: { speed: "balanced", cost: "medium" },
165+
* prompt: "Analyze this data..."
166+
* });
167+
*
168+
* // Most capable - uses Claude Sonnet 4.5 or Opus 4.1
169+
* const response = await ai.prompt({
170+
* model: { speed: "capable", cost: "high" },
171+
* prompt: "Solve this complex reasoning problem..."
172+
* });
173+
*
174+
* // Override with specific model hint
175+
* const response = await ai.prompt({
176+
* model: { speed: "balanced", cost: "medium", hint: "anthropic/claude-sonnet-4-5" },
177+
* prompt: "..."
178+
* });
179+
* ```
180+
*/
181+
export type ModelPreferences = {
182+
/**
183+
* Desired speed tier:
184+
* - "fast": Optimized for low latency and quick responses
185+
* - "balanced": Good balance of speed and capability
186+
* - "capable": Maximum reasoning and problem-solving ability
187+
*/
188+
speed: "fast" | "balanced" | "capable";
189+
190+
/**
191+
* Desired cost tier:
192+
* - "low": Minimal cost, often using Workers AI models (free/very cheap)
193+
* - "medium": Moderate pricing for good performance
194+
* - "high": Premium pricing for best-in-class models
195+
*/
196+
cost: "low" | "medium" | "high";
197+
198+
/**
199+
* Optional hint to override automatic model selection.
200+
* Specify a specific model ID to use instead of letting the system choose.
201+
*/
202+
hint?: string;
203+
};
204+
141205
/**
142206
* Supported AI models available through Cloudflare AI Gateway and Workers AI.
143207
*
144208
* Models are organized by provider:
145209
* - **OpenAI**: Latest GPT models via AI Gateway
146210
* - **Anthropic**: Claude models via AI Gateway (prefix with "anthropic/")
147211
* - **Google**: Gemini models via AI Gateway (prefix with "google-ai-studio/")
148-
* - **Workers AI**: Models running on Cloudflare's network
212+
* - **Workers AI**: Models running on Cloudflare's network (free/low cost)
149213
*/
150214
export enum AIModel {
151-
// OpenAI models
215+
// OpenAI models - Latest GPT and reasoning models
216+
GPT_5 = "openai/gpt-5",
217+
GPT_5_PRO = "openai/gpt-5-pro",
218+
GPT_5_MINI = "openai/gpt-5-mini",
219+
GPT_5_NANO = "openai/gpt-5-nano",
152220
GPT_4O = "openai/gpt-4o",
153221
GPT_4O_MINI = "openai/gpt-4o-mini",
154-
GPT_4_TURBO = "openai/gpt-4-turbo",
155-
GPT_35_TURBO = "openai/gpt-3.5-turbo",
222+
O3 = "openai/o3",
223+
O3_MINI = "openai/o3-mini",
156224

157-
// Anthropic models
158-
CLAUDE_SONNET_4_5 = "anthropic/claude-sonnet-4-5",
159-
CLAUDE_35_SONNET = "anthropic/claude-3-5-sonnet",
160-
CLAUDE_3_OPUS = "anthropic/claude-3-opus",
225+
// Anthropic models - Claude 4.x and 3.7 series
226+
CLAUDE_SONNET_45 = "anthropic/claude-sonnet-4-5",
227+
CLAUDE_HAIKU_45 = "anthropic/claude-haiku-4-5",
228+
CLAUDE_OPUS_41 = "anthropic/claude-opus-4-1",
229+
CLAUDE_37_SONNET = "anthropic/claude-3-7-sonnet-latest",
161230

162-
// Google models
231+
// Google models - Gemini 2.x series
232+
GEMINI_25_PRO = "google/gemini-2.5-pro",
163233
GEMINI_25_FLASH = "google/gemini-2.5-flash",
234+
GEMINI_25_FLASH_LITE = "google/gemini-2.5-flash-lite",
235+
GEMINI_20_FLASH = "google/gemini-2.0-flash",
236+
GEMINI_20_FLASH_LITE = "google/gemini-2.0-flash-lite",
164237

165-
// Cloudflare Workers AI models
238+
// Cloudflare Workers AI models - Free/low-cost models running on Cloudflare's network
239+
LLAMA_4_SCOUT_17B = "meta/llama-4-scout-17b-16e-instruct",
166240
LLAMA_33_70B = "meta/llama-3.3-70b-instruct-fp8-fast",
167-
LLAMA_31_8B = "meta/llama-3.1-8b-instruct-fast",
168-
MISTRAL_7B = "meta/mistral-7b-instruct-v0.2",
241+
LLAMA_31_8B = "meta/llama-3.1-8b-instruct-fp8",
242+
LLAMA_32_1B = "meta/llama-3.2-1b-instruct",
243+
DEEPSEEK_R1_32B = "deepseek-ai/deepseek-r1-distill-qwen-32b",
169244
}
170245

171246
/**
172247
* Request parameters for AI text generation, matching Vercel AI SDK's generateText() function.
173248
*/
174249
export interface AIRequest<
175250
TOOLS extends AIToolSet,
176-
SCHEMA extends TSchema = never,
251+
SCHEMA extends TSchema = never
177252
> {
178253
/**
179254
* The AI model to use for generation.
255+
* Can be either a specific model from the AIModel enum or preferences (speed/cost tiers).
256+
*
257+
* @example
258+
* // Using a specific model
259+
* model: AIModel.GPT_5_MINI
260+
*
261+
* @example
262+
* // Using preferences
263+
* model: { speed: "fast", cost: "low" }
180264
*/
181-
model: AIModel;
265+
model: AIModel | ModelPreferences;
182266

183267
/**
184268
* System instructions to guide the model's behavior.
@@ -240,7 +324,7 @@ export interface AIRequest<
240324
*/
241325
export interface AIResponse<
242326
TOOLS extends AIToolSet,
243-
SCHEMA extends TSchema = never,
327+
SCHEMA extends TSchema = never
244328
> {
245329
/**
246330
* The generated text.
@@ -656,7 +740,7 @@ export type AITool<PARAMETERS extends ToolParameters = any, RESULT = any> = {
656740
*/
657741
execute?: (
658742
args: inferParameters<PARAMETERS>,
659-
options: ToolExecutionOptions,
743+
options: ToolExecutionOptions
660744
) => PromiseLike<RESULT>;
661745
} & (
662746
| {

0 commit comments

Comments
 (0)