Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions crates/openfang-runtime/src/agent_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,11 @@ async fn call_with_retry(
Err(e) => {
// Use classifier for smarter error handling
let raw_error = e.to_string();
let classified = llm_errors::classify_error(&raw_error, None);
let status = match &e {
LlmError::Api { status, .. } => Some(*status),
_ => None,
};
let classified = llm_errors::classify_error(&raw_error, status);
warn!(
category = ?classified.category,
retryable = classified.is_retryable,
Expand Down Expand Up @@ -983,7 +987,11 @@ async fn stream_with_retry(
}
Err(e) => {
let raw_error = e.to_string();
let classified = llm_errors::classify_error(&raw_error, None);
let status = match &e {
LlmError::Api { status, .. } => Some(*status),
_ => None,
};
let classified = llm_errors::classify_error(&raw_error, status);
warn!(
category = ?classified.category,
retryable = classified.is_retryable,
Expand Down
74 changes: 70 additions & 4 deletions crates/openfang-runtime/src/drivers/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ impl OpenAIDriver {
}
}

/// True if this provider is Moonshot/Kimi and requires reasoning_content on assistant messages with tool_calls.
fn kimi_needs_reasoning_content(&self, model: &str) -> bool {
self.base_url.contains("moonshot") || model.to_lowercase().contains("kimi")
}

/// Create a driver with additional HTTP headers (e.g. for Copilot IDE auth).
pub fn with_extra_headers(mut self, headers: Vec<(String, String)>) -> Self {
self.extra_headers = headers;
Expand All @@ -55,6 +60,9 @@ struct OaiRequest {
tool_choice: Option<serde_json::Value>,
#[serde(skip_serializing_if = "std::ops::Not::not")]
stream: bool,
/// Moonshot Kimi K2.5: disable thinking so multi-turn with tool_calls works without preserving reasoning_content.
#[serde(skip_serializing_if = "Option::is_none")]
thinking: Option<serde_json::Value>,
}

/// Returns true if a model uses `max_completion_tokens` instead of `max_tokens`.
Expand All @@ -78,6 +86,12 @@ fn rejects_temperature(model: &str) -> bool {
|| m.starts_with("o4")
}

/// Returns true if a model only accepts temperature = 1 (e.g. Moonshot Kimi K2/K2.5).
fn temperature_must_be_one(model: &str) -> bool {
let m = model.to_lowercase();
m.starts_with("kimi-k2") || m == "kimi-k2.5" || m == "kimi-k2.5-0711"
}

#[derive(Debug, Serialize)]
struct OaiMessage {
role: String,
Expand All @@ -87,6 +101,9 @@ struct OaiMessage {
tool_calls: Option<Vec<OaiToolCall>>,
#[serde(skip_serializing_if = "Option::is_none")]
tool_call_id: Option<String>,
/// Moonshot Kimi: sent as empty string on assistant messages with tool_calls when using Kimi (thinking is disabled for multi-turn compatibility).
#[serde(skip_serializing_if = "Option::is_none")]
reasoning_content: Option<String>,
}

/// Content can be a plain string or an array of content parts (for images).
Expand Down Expand Up @@ -176,6 +193,7 @@ impl LlmDriver for OpenAIDriver {
content: Some(OaiMessageContent::Text(system.clone())),
tool_calls: None,
tool_call_id: None,
reasoning_content: None,
});
}

Expand All @@ -189,6 +207,7 @@ impl LlmDriver for OpenAIDriver {
content: Some(OaiMessageContent::Text(text.clone())),
tool_calls: None,
tool_call_id: None,
reasoning_content: None,
});
}
}
Expand All @@ -198,6 +217,7 @@ impl LlmDriver for OpenAIDriver {
content: Some(OaiMessageContent::Text(text.clone())),
tool_calls: None,
tool_call_id: None,
reasoning_content: None,
});
}
(Role::Assistant, MessageContent::Text(text)) => {
Expand All @@ -206,6 +226,7 @@ impl LlmDriver for OpenAIDriver {
content: Some(OaiMessageContent::Text(text.clone())),
tool_calls: None,
tool_call_id: None,
reasoning_content: None,
});
}
(Role::User, MessageContent::Blocks(blocks)) => {
Expand All @@ -227,6 +248,7 @@ impl LlmDriver for OpenAIDriver {
)),
tool_calls: None,
tool_call_id: Some(tool_use_id.clone()),
reasoning_content: None,
});
}
ContentBlock::Text { text } => {
Expand All @@ -249,6 +271,7 @@ impl LlmDriver for OpenAIDriver {
content: Some(OaiMessageContent::Parts(parts)),
tool_calls: None,
tool_call_id: None,
reasoning_content: None,
});
}
}
Expand All @@ -272,6 +295,7 @@ impl LlmDriver for OpenAIDriver {
_ => {}
}
}
let has_tool_calls = !tool_calls.is_empty();
oai_messages.push(OaiMessage {
role: "assistant".to_string(),
content: if text_parts.is_empty() {
Expand All @@ -285,6 +309,11 @@ impl LlmDriver for OpenAIDriver {
Some(tool_calls)
},
tool_call_id: None,
reasoning_content: if has_tool_calls && self.kimi_needs_reasoning_content(&request.model) {
Some(String::new())
} else {
None
},
});
}
_ => {}
Expand Down Expand Up @@ -323,10 +352,24 @@ impl LlmDriver for OpenAIDriver {
messages: oai_messages,
max_tokens: mt,
max_completion_tokens: mct,
temperature: if rejects_temperature(&request.model) { None } else { Some(request.temperature) },
temperature: if self.kimi_needs_reasoning_content(&request.model) {
// Kimi with thinking disabled uses fixed 0.6; with thinking enabled uses 1.0 (we disable thinking for multi-turn).
Some(0.6)
} else if temperature_must_be_one(&request.model) {
Some(1.0)
} else if rejects_temperature(&request.model) {
None
} else {
Some(request.temperature)
},
tools: oai_tools,
tool_choice,
stream: false,
thinking: if self.kimi_needs_reasoning_content(&request.model) {
Some(serde_json::json!({"type": "disabled"}))
} else {
None
},
};

let max_retries = 3;
Expand Down Expand Up @@ -541,6 +584,7 @@ impl LlmDriver for OpenAIDriver {
content: Some(OaiMessageContent::Text(system.clone())),
tool_calls: None,
tool_call_id: None,
reasoning_content: None,
});
}

Expand All @@ -553,6 +597,7 @@ impl LlmDriver for OpenAIDriver {
content: Some(OaiMessageContent::Text(text.clone())),
tool_calls: None,
tool_call_id: None,
reasoning_content: None,
});
}
}
Expand All @@ -562,6 +607,7 @@ impl LlmDriver for OpenAIDriver {
content: Some(OaiMessageContent::Text(text.clone())),
tool_calls: None,
tool_call_id: None,
reasoning_content: None,
});
}
(Role::Assistant, MessageContent::Text(text)) => {
Expand All @@ -570,6 +616,7 @@ impl LlmDriver for OpenAIDriver {
content: Some(OaiMessageContent::Text(text.clone())),
tool_calls: None,
tool_call_id: None,
reasoning_content: None,
});
}
(Role::User, MessageContent::Blocks(blocks)) => {
Expand All @@ -587,6 +634,7 @@ impl LlmDriver for OpenAIDriver {
)),
tool_calls: None,
tool_call_id: Some(tool_use_id.clone()),
reasoning_content: None,
});
}
}
Expand All @@ -611,6 +659,7 @@ impl LlmDriver for OpenAIDriver {
_ => {}
}
}
let has_tool_calls = !tool_calls_out.is_empty();
oai_messages.push(OaiMessage {
role: "assistant".to_string(),
content: if text_parts.is_empty() {
Expand All @@ -624,6 +673,11 @@ impl LlmDriver for OpenAIDriver {
Some(tool_calls_out)
},
tool_call_id: None,
reasoning_content: if has_tool_calls && self.kimi_needs_reasoning_content(&request.model) {
Some(String::new())
} else {
None
},
});
}
_ => {}
Expand Down Expand Up @@ -662,10 +716,23 @@ impl LlmDriver for OpenAIDriver {
messages: oai_messages,
max_tokens: mt,
max_completion_tokens: mct,
temperature: if rejects_temperature(&request.model) { None } else { Some(request.temperature) },
temperature: if self.kimi_needs_reasoning_content(&request.model) {
Some(0.6)
} else if temperature_must_be_one(&request.model) {
Some(1.0)
} else if rejects_temperature(&request.model) {
None
} else {
Some(request.temperature)
},
tools: oai_tools,
tool_choice,
stream: true,
thinking: if self.kimi_needs_reasoning_content(&request.model) {
Some(serde_json::json!({"type": "disabled"}))
} else {
None
},
};

// Retry loop for the initial HTTP request
Expand Down Expand Up @@ -971,7 +1038,6 @@ impl LlmDriver for OpenAIDriver {
}
}

/// Parse Groq's `tool_use_failed` error and extract the tool call from `failed_generation`.
/// Extract the max_tokens limit from an API error message.
/// Looks for patterns like: `must be less than or equal to \`8192\``
fn extract_max_tokens_limit(body: &str) -> Option<u32> {
Expand All @@ -996,7 +1062,7 @@ fn extract_max_tokens_limit(body: &str) -> Option<u32> {
None
}

///
/// Parse Groq's `tool_use_failed` error and extract the tool call from `failed_generation`.
/// Some models (e.g. Llama 3.3) generate tool calls as XML: `<function=NAME ARGS></function>`
/// instead of the proper JSON format. Groq rejects these with `tool_use_failed` but includes
/// the raw generation. We parse it and construct a proper CompletionResponse.
Expand Down
12 changes: 12 additions & 0 deletions crates/openfang-runtime/src/llm_errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ pub fn classify_error(message: &str, status: Option<u16>) -> ClassifiedError {
}
return build(LlmErrorCategory::Auth);
}
404 => return build(LlmErrorCategory::ModelNotFound),
_ => {}
}
}
Expand Down Expand Up @@ -583,6 +584,17 @@ mod tests {

let e = classify_error("Unknown model: claude-99", None);
assert_eq!(e.category, LlmErrorCategory::ModelNotFound);

// 404 must be ModelNotFound even when body contains "permission denied" (e.g. Moonshot API)
let e = classify_error(
"Not found the model kimi-k2.5-0711 or Permission denied",
Some(404),
);
assert_eq!(e.category, LlmErrorCategory::ModelNotFound);

// Status 404 alone (generic message) must classify as ModelNotFound
let e = classify_error("Not found", Some(404));
assert_eq!(e.category, LlmErrorCategory::ModelNotFound);
}

#[test]
Expand Down
4 changes: 2 additions & 2 deletions crates/openfang-runtime/src/model_catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2884,7 +2884,7 @@ fn builtin_models() -> Vec<ModelCatalogEntry> {
aliases: vec!["kimi-k2".into()],
},
ModelCatalogEntry {
id: "kimi-k2.5-0711".into(),
id: "kimi-k2.5".into(),
display_name: "Kimi K2.5".into(),
provider: "moonshot".into(),
tier: ModelTier::Frontier,
Expand All @@ -2895,7 +2895,7 @@ fn builtin_models() -> Vec<ModelCatalogEntry> {
supports_tools: true,
supports_vision: true,
supports_streaming: true,
aliases: vec!["kimi-k2.5".into()],
aliases: vec!["kimi-k2.5-0711".into()],
},
// ══════════════════════════════════════════════════════════════
// Baidu Qianfan / ERNIE (3)
Expand Down
2 changes: 1 addition & 1 deletion crates/openfang-types/src/model_catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub const ZHIPU_CODING_BASE_URL: &str = "https://open.bigmodel.cn/api/coding/paa
/// Z.AI domain aliases (same API, different domain).
pub const ZAI_BASE_URL: &str = "https://api.z.ai/api/paas/v4";
pub const ZAI_CODING_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4";
pub const MOONSHOT_BASE_URL: &str = "https://api.moonshot.cn/v1";
pub const MOONSHOT_BASE_URL: &str = "https://api.moonshot.ai/v1";
pub const QIANFAN_BASE_URL: &str = "https://qianfan.baidubce.com/v2";
pub const VOLCENGINE_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/v3";
pub const VOLCENGINE_CODING_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/coding/v3";
Expand Down