diff --git a/crates/openfang-runtime/src/agent_loop.rs b/crates/openfang-runtime/src/agent_loop.rs index 11fa8ea12..536ad6fca 100644 --- a/crates/openfang-runtime/src/agent_loop.rs +++ b/crates/openfang-runtime/src/agent_loop.rs @@ -873,7 +873,11 @@ async fn call_with_retry( Err(e) => { // Use classifier for smarter error handling let raw_error = e.to_string(); - let classified = llm_errors::classify_error(&raw_error, None); + let status = match &e { + LlmError::Api { status, .. } => Some(*status), + _ => None, + }; + let classified = llm_errors::classify_error(&raw_error, status); warn!( category = ?classified.category, retryable = classified.is_retryable, @@ -983,7 +987,11 @@ async fn stream_with_retry( } Err(e) => { let raw_error = e.to_string(); - let classified = llm_errors::classify_error(&raw_error, None); + let status = match &e { + LlmError::Api { status, .. } => Some(*status), + _ => None, + }; + let classified = llm_errors::classify_error(&raw_error, status); warn!( category = ?classified.category, retryable = classified.is_retryable, diff --git a/crates/openfang-runtime/src/drivers/openai.rs b/crates/openfang-runtime/src/drivers/openai.rs index b12039273..19e0defe3 100644 --- a/crates/openfang-runtime/src/drivers/openai.rs +++ b/crates/openfang-runtime/src/drivers/openai.rs @@ -30,6 +30,11 @@ impl OpenAIDriver { } } + /// True if this provider is Moonshot/Kimi and requires reasoning_content on assistant messages with tool_calls. + fn kimi_needs_reasoning_content(&self, model: &str) -> bool { + self.base_url.contains("moonshot") || model.to_lowercase().contains("kimi") + } + /// Create a driver with additional HTTP headers (e.g. for Copilot IDE auth). pub fn with_extra_headers(mut self, headers: Vec<(String, String)>) -> Self { self.extra_headers = headers; @@ -55,6 +60,9 @@ struct OaiRequest { tool_choice: Option, #[serde(skip_serializing_if = "std::ops::Not::not")] stream: bool, + /// Moonshot Kimi K2.5: disable thinking so multi-turn with tool_calls works without preserving reasoning_content. + #[serde(skip_serializing_if = "Option::is_none")] + thinking: Option, } /// Returns true if a model uses `max_completion_tokens` instead of `max_tokens`. @@ -78,6 +86,12 @@ fn rejects_temperature(model: &str) -> bool { || m.starts_with("o4") } +/// Returns true if a model only accepts temperature = 1 (e.g. Moonshot Kimi K2/K2.5). +fn temperature_must_be_one(model: &str) -> bool { + let m = model.to_lowercase(); + m.starts_with("kimi-k2") || m == "kimi-k2.5" || m == "kimi-k2.5-0711" +} + #[derive(Debug, Serialize)] struct OaiMessage { role: String, @@ -87,6 +101,9 @@ struct OaiMessage { tool_calls: Option>, #[serde(skip_serializing_if = "Option::is_none")] tool_call_id: Option, + /// Moonshot Kimi: sent as empty string on assistant messages with tool_calls when using Kimi (thinking is disabled for multi-turn compatibility). + #[serde(skip_serializing_if = "Option::is_none")] + reasoning_content: Option, } /// Content can be a plain string or an array of content parts (for images). @@ -176,6 +193,7 @@ impl LlmDriver for OpenAIDriver { content: Some(OaiMessageContent::Text(system.clone())), tool_calls: None, tool_call_id: None, + reasoning_content: None, }); } @@ -189,6 +207,7 @@ impl LlmDriver for OpenAIDriver { content: Some(OaiMessageContent::Text(text.clone())), tool_calls: None, tool_call_id: None, + reasoning_content: None, }); } } @@ -198,6 +217,7 @@ impl LlmDriver for OpenAIDriver { content: Some(OaiMessageContent::Text(text.clone())), tool_calls: None, tool_call_id: None, + reasoning_content: None, }); } (Role::Assistant, MessageContent::Text(text)) => { @@ -206,6 +226,7 @@ impl LlmDriver for OpenAIDriver { content: Some(OaiMessageContent::Text(text.clone())), tool_calls: None, tool_call_id: None, + reasoning_content: None, }); } (Role::User, MessageContent::Blocks(blocks)) => { @@ -227,6 +248,7 @@ impl LlmDriver for OpenAIDriver { )), tool_calls: None, tool_call_id: Some(tool_use_id.clone()), + reasoning_content: None, }); } ContentBlock::Text { text } => { @@ -249,6 +271,7 @@ impl LlmDriver for OpenAIDriver { content: Some(OaiMessageContent::Parts(parts)), tool_calls: None, tool_call_id: None, + reasoning_content: None, }); } } @@ -272,6 +295,7 @@ impl LlmDriver for OpenAIDriver { _ => {} } } + let has_tool_calls = !tool_calls.is_empty(); oai_messages.push(OaiMessage { role: "assistant".to_string(), content: if text_parts.is_empty() { @@ -285,6 +309,11 @@ impl LlmDriver for OpenAIDriver { Some(tool_calls) }, tool_call_id: None, + reasoning_content: if has_tool_calls && self.kimi_needs_reasoning_content(&request.model) { + Some(String::new()) + } else { + None + }, }); } _ => {} @@ -323,10 +352,24 @@ impl LlmDriver for OpenAIDriver { messages: oai_messages, max_tokens: mt, max_completion_tokens: mct, - temperature: if rejects_temperature(&request.model) { None } else { Some(request.temperature) }, + temperature: if self.kimi_needs_reasoning_content(&request.model) { + // Kimi with thinking disabled uses fixed 0.6; with thinking enabled uses 1.0 (we disable thinking for multi-turn). + Some(0.6) + } else if temperature_must_be_one(&request.model) { + Some(1.0) + } else if rejects_temperature(&request.model) { + None + } else { + Some(request.temperature) + }, tools: oai_tools, tool_choice, stream: false, + thinking: if self.kimi_needs_reasoning_content(&request.model) { + Some(serde_json::json!({"type": "disabled"})) + } else { + None + }, }; let max_retries = 3; @@ -541,6 +584,7 @@ impl LlmDriver for OpenAIDriver { content: Some(OaiMessageContent::Text(system.clone())), tool_calls: None, tool_call_id: None, + reasoning_content: None, }); } @@ -553,6 +597,7 @@ impl LlmDriver for OpenAIDriver { content: Some(OaiMessageContent::Text(text.clone())), tool_calls: None, tool_call_id: None, + reasoning_content: None, }); } } @@ -562,6 +607,7 @@ impl LlmDriver for OpenAIDriver { content: Some(OaiMessageContent::Text(text.clone())), tool_calls: None, tool_call_id: None, + reasoning_content: None, }); } (Role::Assistant, MessageContent::Text(text)) => { @@ -570,6 +616,7 @@ impl LlmDriver for OpenAIDriver { content: Some(OaiMessageContent::Text(text.clone())), tool_calls: None, tool_call_id: None, + reasoning_content: None, }); } (Role::User, MessageContent::Blocks(blocks)) => { @@ -587,6 +634,7 @@ impl LlmDriver for OpenAIDriver { )), tool_calls: None, tool_call_id: Some(tool_use_id.clone()), + reasoning_content: None, }); } } @@ -611,6 +659,7 @@ impl LlmDriver for OpenAIDriver { _ => {} } } + let has_tool_calls = !tool_calls_out.is_empty(); oai_messages.push(OaiMessage { role: "assistant".to_string(), content: if text_parts.is_empty() { @@ -624,6 +673,11 @@ impl LlmDriver for OpenAIDriver { Some(tool_calls_out) }, tool_call_id: None, + reasoning_content: if has_tool_calls && self.kimi_needs_reasoning_content(&request.model) { + Some(String::new()) + } else { + None + }, }); } _ => {} @@ -662,10 +716,23 @@ impl LlmDriver for OpenAIDriver { messages: oai_messages, max_tokens: mt, max_completion_tokens: mct, - temperature: if rejects_temperature(&request.model) { None } else { Some(request.temperature) }, + temperature: if self.kimi_needs_reasoning_content(&request.model) { + Some(0.6) + } else if temperature_must_be_one(&request.model) { + Some(1.0) + } else if rejects_temperature(&request.model) { + None + } else { + Some(request.temperature) + }, tools: oai_tools, tool_choice, stream: true, + thinking: if self.kimi_needs_reasoning_content(&request.model) { + Some(serde_json::json!({"type": "disabled"})) + } else { + None + }, }; // Retry loop for the initial HTTP request @@ -971,7 +1038,6 @@ impl LlmDriver for OpenAIDriver { } } -/// Parse Groq's `tool_use_failed` error and extract the tool call from `failed_generation`. /// Extract the max_tokens limit from an API error message. /// Looks for patterns like: `must be less than or equal to \`8192\`` fn extract_max_tokens_limit(body: &str) -> Option { @@ -996,7 +1062,7 @@ fn extract_max_tokens_limit(body: &str) -> Option { None } -/// +/// Parse Groq's `tool_use_failed` error and extract the tool call from `failed_generation`. /// Some models (e.g. Llama 3.3) generate tool calls as XML: `` /// instead of the proper JSON format. Groq rejects these with `tool_use_failed` but includes /// the raw generation. We parse it and construct a proper CompletionResponse. diff --git a/crates/openfang-runtime/src/llm_errors.rs b/crates/openfang-runtime/src/llm_errors.rs index 0f4278fde..63ab63689 100644 --- a/crates/openfang-runtime/src/llm_errors.rs +++ b/crates/openfang-runtime/src/llm_errors.rs @@ -221,6 +221,7 @@ pub fn classify_error(message: &str, status: Option) -> ClassifiedError { } return build(LlmErrorCategory::Auth); } + 404 => return build(LlmErrorCategory::ModelNotFound), _ => {} } } @@ -583,6 +584,17 @@ mod tests { let e = classify_error("Unknown model: claude-99", None); assert_eq!(e.category, LlmErrorCategory::ModelNotFound); + + // 404 must be ModelNotFound even when body contains "permission denied" (e.g. Moonshot API) + let e = classify_error( + "Not found the model kimi-k2.5-0711 or Permission denied", + Some(404), + ); + assert_eq!(e.category, LlmErrorCategory::ModelNotFound); + + // Status 404 alone (generic message) must classify as ModelNotFound + let e = classify_error("Not found", Some(404)); + assert_eq!(e.category, LlmErrorCategory::ModelNotFound); } #[test] diff --git a/crates/openfang-runtime/src/model_catalog.rs b/crates/openfang-runtime/src/model_catalog.rs index 1df4eb3c2..ab28062ed 100644 --- a/crates/openfang-runtime/src/model_catalog.rs +++ b/crates/openfang-runtime/src/model_catalog.rs @@ -2884,7 +2884,7 @@ fn builtin_models() -> Vec { aliases: vec!["kimi-k2".into()], }, ModelCatalogEntry { - id: "kimi-k2.5-0711".into(), + id: "kimi-k2.5".into(), display_name: "Kimi K2.5".into(), provider: "moonshot".into(), tier: ModelTier::Frontier, @@ -2895,7 +2895,7 @@ fn builtin_models() -> Vec { supports_tools: true, supports_vision: true, supports_streaming: true, - aliases: vec!["kimi-k2.5".into()], + aliases: vec!["kimi-k2.5-0711".into()], }, // ══════════════════════════════════════════════════════════════ // Baidu Qianfan / ERNIE (3) diff --git a/crates/openfang-types/src/model_catalog.rs b/crates/openfang-types/src/model_catalog.rs index 92019518d..792a658f8 100644 --- a/crates/openfang-types/src/model_catalog.rs +++ b/crates/openfang-types/src/model_catalog.rs @@ -42,7 +42,7 @@ pub const ZHIPU_CODING_BASE_URL: &str = "https://open.bigmodel.cn/api/coding/paa /// Z.AI domain aliases (same API, different domain). pub const ZAI_BASE_URL: &str = "https://api.z.ai/api/paas/v4"; pub const ZAI_CODING_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4"; -pub const MOONSHOT_BASE_URL: &str = "https://api.moonshot.cn/v1"; +pub const MOONSHOT_BASE_URL: &str = "https://api.moonshot.ai/v1"; pub const QIANFAN_BASE_URL: &str = "https://qianfan.baidubce.com/v2"; pub const VOLCENGINE_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/v3"; pub const VOLCENGINE_CODING_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/coding/v3";