Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions payloads/cases/advanced.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
OPENAI_RESPONSES_MODEL,
ANTHROPIC_MODEL,
BEDROCK_MODEL,
MISTRAL_MODEL,
} from "./models";

const IMAGE_BASE64 =
Expand Down Expand Up @@ -121,6 +122,9 @@ export const advancedCases: TestCaseCollection = {
maxTokens: 300,
},
},

// Mistral-7B doesn't support multimodal (images)
mistral: null,
},

complexReasoningRequest: {
Expand Down Expand Up @@ -193,6 +197,18 @@ export const advancedCases: TestCaseCollection = {
maxTokens: 20_000,
},
},

mistral: {
model: MISTRAL_MODEL,
maxTokens: 20_000,
messages: [
{
role: "user",
content:
"There is a digital clock, with minutes and hours in the form of 00:00. The clock shows all times from 00:00 to 23:59 and repeating. Imagine you had a list of all these times. Which digit(s) is the most common and which is the rarest? Can you find their percentage?",
},
],
},
},

reasoningWithOutput: {
Expand Down Expand Up @@ -244,6 +260,16 @@ export const advancedCases: TestCaseCollection = {
},
],
},

mistral: {
model: MISTRAL_MODEL,
messages: [
{
role: "user",
content: "What color is the sky?",
},
],
},
},

toolCallRequest: {
Expand Down Expand Up @@ -394,5 +420,36 @@ export const advancedCases: TestCaseCollection = {
],
},
},

mistral: {
model: MISTRAL_MODEL,
maxTokens: 20_000,
messages: [
{
role: "user",
content: "What's the weather like in San Francisco?",
},
],
tools: [
{
type: "function",
function: {
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: "object",
properties: {
location: {
type: "string",
description: "The city and state, e.g. San Francisco, CA",
},
},
required: ["location"],
},
},
},
],
toolChoice: "auto",
},
},
};
1 change: 1 addition & 0 deletions payloads/cases/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ export const OPENAI_RESPONSES_MODEL = "gpt-5-nano";
export const ANTHROPIC_MODEL = "claude-sonnet-4-20250514";
export const GOOGLE_MODEL = "gemini-2.5-flash";
export const BEDROCK_MODEL = "us.anthropic.claude-haiku-4-5-20251001-v1:0";
export const MISTRAL_MODEL = "open-mistral-7b";
64 changes: 64 additions & 0 deletions payloads/cases/simple.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
OPENAI_RESPONSES_MODEL,
ANTHROPIC_MODEL,
BEDROCK_MODEL,
MISTRAL_MODEL,
} from "./models";

// Simple test cases - basic functionality testing
Expand Down Expand Up @@ -62,6 +63,16 @@ export const simpleCases: TestCaseCollection = {
},
],
},

mistral: {
model: MISTRAL_MODEL,
messages: [
{
role: "user",
content: "What is the capital of France?",
},
],
},
},

reasoningRequest: {
Expand Down Expand Up @@ -125,6 +136,17 @@ export const simpleCases: TestCaseCollection = {
},
],
},

mistral: {
model: MISTRAL_MODEL,
messages: [
{
role: "user",
content:
"Solve this step by step: If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?",
},
],
},
},

reasoningRequestTruncated: {
Expand Down Expand Up @@ -196,6 +218,18 @@ export const simpleCases: TestCaseCollection = {
maxTokens: 100,
},
},

mistral: {
model: MISTRAL_MODEL,
maxTokens: 100,
messages: [
{
role: "user",
content:
"Solve this step by step: If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?",
},
],
},
},

toolCallRequest: {
Expand Down Expand Up @@ -343,5 +377,35 @@ export const simpleCases: TestCaseCollection = {
],
},
},

mistral: {
model: MISTRAL_MODEL,
messages: [
{
role: "user",
content: "What's the weather like in San Francisco?",
},
],
tools: [
{
type: "function",
function: {
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: "object",
properties: {
location: {
type: "string",
description: "The city and state, e.g. San Francisco, CA",
},
},
required: ["location"],
},
},
},
],
toolChoice: "auto",
},
},
};
6 changes: 6 additions & 0 deletions payloads/cases/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import OpenAI from "openai";
import Anthropic from "@anthropic-ai/sdk";
import type { Content, GenerateContentConfig, Tool } from "@google/genai";
import type { ConverseCommandInput } from "@aws-sdk/client-bedrock-runtime";
import type { ChatCompletionRequest } from "@mistralai/mistralai/models/components";

// Google Gemini API request type (matching the js-genai library)
export interface GoogleGenerateContentRequest {
Expand All @@ -14,13 +15,17 @@ export interface GoogleGenerateContentRequest {
// Re-export Bedrock type for convenience
export type BedrockConverseRequest = ConverseCommandInput;

// Re-export Mistral type for convenience
export type MistralChatCompletionRequest = ChatCompletionRequest;

// Well-defined types for test cases
export interface TestCase {
"chat-completions": OpenAI.Chat.Completions.ChatCompletionCreateParams | null;
responses: OpenAI.Responses.ResponseCreateParams | null;
anthropic: Anthropic.Messages.MessageCreateParams | null;
google: GoogleGenerateContentRequest | null;
bedrock: BedrockConverseRequest | null;
mistral: MistralChatCompletionRequest | null;
}

// Collection of test cases organized by name
Expand All @@ -37,4 +42,5 @@ export const PROVIDER_TYPES = [
"anthropic",
"google",
"bedrock",
"mistral",
] as const;
1 change: 1 addition & 0 deletions payloads/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"@anthropic-ai/sdk": "^0.63.0",
"@aws-sdk/client-bedrock-runtime": "^3.700.0",
"@google/genai": "^1.34.0",
"@mistralai/mistralai": "^1.11.0",
"openai": "^5.22.0"
},
"devDependencies": {
Expand Down
2 changes: 2 additions & 0 deletions payloads/scripts/capture.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { openaiResponsesExecutor } from "./providers/openai-responses";
import { anthropicExecutor } from "./providers/anthropic";
import { googleExecutor } from "./providers/google";
import { bedrockExecutor } from "./providers/bedrock";
import { mistralExecutor } from "./providers/mistral";
import { ProviderExecutor } from "./types";

// Update provider names to be more descriptive
Expand All @@ -20,6 +21,7 @@ const allProviders = [
anthropicExecutor,
googleExecutor,
bedrockExecutor,
mistralExecutor,
] as const;

interface CaptureOptions {
Expand Down
Loading