RightNow-AI · modship · Mar 8, 2026
diff --git a/crates/openfang-runtime/src/agent_loop.rs b/crates/openfang-runtime/src/agent_loop.rs
@@ -873,7 +873,11 @@ async fn call_with_retry(
             Err(e) => {
                 // Use classifier for smarter error handling
                 let raw_error = e.to_string();
-                let classified = llm_errors::classify_error(&raw_error, None);
+                let status = match &e {
+                    LlmError::Api { status, .. } => Some(*status),
+                    _ => None,
+                };
+                let classified = llm_errors::classify_error(&raw_error, status);
                 warn!(
                     category = ?classified.category,
                     retryable = classified.is_retryable,
@@ -983,7 +987,11 @@ async fn stream_with_retry(
             }
             Err(e) => {
                 let raw_error = e.to_string();
-                let classified = llm_errors::classify_error(&raw_error, None);
+                let status = match &e {
+                    LlmError::Api { status, .. } => Some(*status),
+                    _ => None,
+                };
+                let classified = llm_errors::classify_error(&raw_error, status);
                 warn!(
                     category = ?classified.category,
                     retryable = classified.is_retryable,

diff --git a/crates/openfang-runtime/src/drivers/openai.rs b/crates/openfang-runtime/src/drivers/openai.rs
@@ -30,6 +30,11 @@ impl OpenAIDriver {
         }
     }
 
+    /// True if this provider is Moonshot/Kimi and requires reasoning_content on assistant messages with tool_calls.
+    fn kimi_needs_reasoning_content(&self, model: &str) -> bool {
+        self.base_url.contains("moonshot") || model.to_lowercase().contains("kimi")
+    }
+
     /// Create a driver with additional HTTP headers (e.g. for Copilot IDE auth).
     pub fn with_extra_headers(mut self, headers: Vec<(String, String)>) -> Self {
         self.extra_headers = headers;
@@ -55,6 +60,9 @@ struct OaiRequest {
     tool_choice: Option<serde_json::Value>,
     #[serde(skip_serializing_if = "std::ops::Not::not")]
     stream: bool,
+    /// Moonshot Kimi K2.5: disable thinking so multi-turn with tool_calls works without preserving reasoning_content.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    thinking: Option<serde_json::Value>,
 }
 
 /// Returns true if a model uses `max_completion_tokens` instead of `max_tokens`.
@@ -78,6 +86,12 @@ fn rejects_temperature(model: &str) -> bool {
         || m.starts_with("o4")
 }
 
+/// Returns true if a model only accepts temperature = 1 (e.g. Moonshot Kimi K2/K2.5).
+fn temperature_must_be_one(model: &str) -> bool {
+    let m = model.to_lowercase();
+    m.starts_with("kimi-k2") || m == "kimi-k2.5" || m == "kimi-k2.5-0711"
+}
+
 #[derive(Debug, Serialize)]
 struct OaiMessage {
     role: String,
@@ -87,6 +101,9 @@ struct OaiMessage {
     tool_calls: Option<Vec<OaiToolCall>>,
     #[serde(skip_serializing_if = "Option::is_none")]
     tool_call_id: Option<String>,
+    /// Moonshot Kimi: sent as empty string on assistant messages with tool_calls when using Kimi (thinking is disabled for multi-turn compatibility).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    reasoning_content: Option<String>,
 }
 
 /// Content can be a plain string or an array of content parts (for images).
@@ -176,6 +193,7 @@ impl LlmDriver for OpenAIDriver {
                 content: Some(OaiMessageContent::Text(system.clone())),
                 tool_calls: None,
                 tool_call_id: None,
+                reasoning_content: None,
             });
         }
 
@@ -189,6 +207,7 @@ impl LlmDriver for OpenAIDriver {
                             content: Some(OaiMessageContent::Text(text.clone())),
                             tool_calls: None,
                             tool_call_id: None,
+                            reasoning_content: None,
                         });
                     }
                 }
@@ -198,6 +217,7 @@ impl LlmDriver for OpenAIDriver {
                         content: Some(OaiMessageContent::Text(text.clone())),
                         tool_calls: None,
                         tool_call_id: None,
+                        reasoning_content: None,
                     });
                 }
                 (Role::Assistant, MessageContent::Text(text)) => {
@@ -206,6 +226,7 @@ impl LlmDriver for OpenAIDriver {
                         content: Some(OaiMessageContent::Text(text.clone())),
                         tool_calls: None,
                         tool_call_id: None,
+                        reasoning_content: None,
                     });
                 }
                 (Role::User, MessageContent::Blocks(blocks)) => {
@@ -227,6 +248,7 @@ impl LlmDriver for OpenAIDriver {
                                     )),
                                     tool_calls: None,
                                     tool_call_id: Some(tool_use_id.clone()),
+                                    reasoning_content: None,
                                 });
                             }
                             ContentBlock::Text { text } => {
@@ -249,6 +271,7 @@ impl LlmDriver for OpenAIDriver {
                             content: Some(OaiMessageContent::Parts(parts)),
                             tool_calls: None,
                             tool_call_id: None,
+                            reasoning_content: None,
                         });
                     }
                 }
@@ -272,6 +295,7 @@ impl LlmDriver for OpenAIDriver {
                             _ => {}
                         }
                     }
+                    let has_tool_calls = !tool_calls.is_empty();
                     oai_messages.push(OaiMessage {
                         role: "assistant".to_string(),
                         content: if text_parts.is_empty() {
@@ -285,6 +309,11 @@ impl LlmDriver for OpenAIDriver {
                             Some(tool_calls)
                         },
                         tool_call_id: None,
+                        reasoning_content: if has_tool_calls && self.kimi_needs_reasoning_content(&request.model) {
+                            Some(String::new())
+                        } else {
+                            None
+                        },
                     });
                 }
                 _ => {}
@@ -323,10 +352,24 @@ impl LlmDriver for OpenAIDriver {
             messages: oai_messages,
             max_tokens: mt,
             max_completion_tokens: mct,
-            temperature: if rejects_temperature(&request.model) { None } else { Some(request.temperature) },
+            temperature: if self.kimi_needs_reasoning_content(&request.model) {
+                // Kimi with thinking disabled uses fixed 0.6; with thinking enabled uses 1.0 (we disable thinking for multi-turn).
+                Some(0.6)
+            } else if temperature_must_be_one(&request.model) {
+                Some(1.0)
+            } else if rejects_temperature(&request.model) {
+                None
+            } else {
+                Some(request.temperature)
+            },
             tools: oai_tools,
             tool_choice,
             stream: false,
+            thinking: if self.kimi_needs_reasoning_content(&request.model) {
+                Some(serde_json::json!({"type": "disabled"}))
+            } else {
+                None
+            },
         };
 
         let max_retries = 3;
@@ -541,6 +584,7 @@ impl LlmDriver for OpenAIDriver {
                 content: Some(OaiMessageContent::Text(system.clone())),
                 tool_calls: None,
                 tool_call_id: None,
+                reasoning_content: None,
             });
         }
 
@@ -553,6 +597,7 @@ impl LlmDriver for OpenAIDriver {
                             content: Some(OaiMessageContent::Text(text.clone())),
                             tool_calls: None,
                             tool_call_id: None,
+                            reasoning_content: None,
                         });
                     }
                 }
@@ -562,6 +607,7 @@ impl LlmDriver for OpenAIDriver {
                         content: Some(OaiMessageContent::Text(text.clone())),
                         tool_calls: None,
                         tool_call_id: None,
+                        reasoning_content: None,
                     });
                 }
                 (Role::Assistant, MessageContent::Text(text)) => {
@@ -570,6 +616,7 @@ impl LlmDriver for OpenAIDriver {
                         content: Some(OaiMessageContent::Text(text.clone())),
                         tool_calls: None,
                         tool_call_id: None,
+                        reasoning_content: None,
                     });
                 }
                 (Role::User, MessageContent::Blocks(blocks)) => {
@@ -587,6 +634,7 @@ impl LlmDriver for OpenAIDriver {
                                 )),
                                 tool_calls: None,
                                 tool_call_id: Some(tool_use_id.clone()),
+                                reasoning_content: None,
                             });
                         }
                     }
@@ -611,6 +659,7 @@ impl LlmDriver for OpenAIDriver {
                             _ => {}
                         }
                     }
+                    let has_tool_calls = !tool_calls_out.is_empty();
                     oai_messages.push(OaiMessage {
                         role: "assistant".to_string(),
                         content: if text_parts.is_empty() {
@@ -624,6 +673,11 @@ impl LlmDriver for OpenAIDriver {
                             Some(tool_calls_out)
                         },
                         tool_call_id: None,
+                        reasoning_content: if has_tool_calls && self.kimi_needs_reasoning_content(&request.model) {
+                            Some(String::new())
+                        } else {
+                            None
+                        },
                     });
                 }
                 _ => {}
@@ -662,10 +716,23 @@ impl LlmDriver for OpenAIDriver {
             messages: oai_messages,
             max_tokens: mt,
             max_completion_tokens: mct,
-            temperature: if rejects_temperature(&request.model) { None } else { Some(request.temperature) },
+            temperature: if self.kimi_needs_reasoning_content(&request.model) {
+                Some(0.6)
+            } else if temperature_must_be_one(&request.model) {
+                Some(1.0)
+            } else if rejects_temperature(&request.model) {
+                None
+            } else {
+                Some(request.temperature)
+            },
             tools: oai_tools,
             tool_choice,
             stream: true,
+            thinking: if self.kimi_needs_reasoning_content(&request.model) {
+                Some(serde_json::json!({"type": "disabled"}))
+            } else {
+                None
+            },
         };
 
         // Retry loop for the initial HTTP request
@@ -971,7 +1038,6 @@ impl LlmDriver for OpenAIDriver {
     }
 }
 
-/// Parse Groq's `tool_use_failed` error and extract the tool call from `failed_generation`.
 /// Extract the max_tokens limit from an API error message.
 /// Looks for patterns like: `must be less than or equal to \`8192\``
 fn extract_max_tokens_limit(body: &str) -> Option<u32> {
@@ -996,7 +1062,7 @@ fn extract_max_tokens_limit(body: &str) -> Option<u32> {
     None
 }
 
-///
+/// Parse Groq's `tool_use_failed` error and extract the tool call from `failed_generation`.
 /// Some models (e.g. Llama 3.3) generate tool calls as XML: `<function=NAME ARGS></function>`
 /// instead of the proper JSON format. Groq rejects these with `tool_use_failed` but includes
 /// the raw generation. We parse it and construct a proper CompletionResponse.

diff --git a/crates/openfang-runtime/src/llm_errors.rs b/crates/openfang-runtime/src/llm_errors.rs
@@ -221,6 +221,7 @@ pub fn classify_error(message: &str, status: Option<u16>) -> ClassifiedError {
                 }
                 return build(LlmErrorCategory::Auth);
             }
+            404 => return build(LlmErrorCategory::ModelNotFound),
             _ => {}
         }
     }
@@ -583,6 +584,17 @@ mod tests {
 
         let e = classify_error("Unknown model: claude-99", None);
         assert_eq!(e.category, LlmErrorCategory::ModelNotFound);
+
+        // 404 must be ModelNotFound even when body contains "permission denied" (e.g. Moonshot API)
+        let e = classify_error(
+            "Not found the model kimi-k2.5-0711 or Permission denied",
+            Some(404),
+        );
+        assert_eq!(e.category, LlmErrorCategory::ModelNotFound);
+
+        // Status 404 alone (generic message) must classify as ModelNotFound
+        let e = classify_error("Not found", Some(404));
+        assert_eq!(e.category, LlmErrorCategory::ModelNotFound);
     }
 
     #[test]

diff --git a/crates/openfang-runtime/src/model_catalog.rs b/crates/openfang-runtime/src/model_catalog.rs
@@ -2884,7 +2884,7 @@ fn builtin_models() -> Vec<ModelCatalogEntry> {
             aliases: vec!["kimi-k2".into()],
         },
         ModelCatalogEntry {
-            id: "kimi-k2.5-0711".into(),
+            id: "kimi-k2.5".into(),
             display_name: "Kimi K2.5".into(),
             provider: "moonshot".into(),
             tier: ModelTier::Frontier,
@@ -2895,7 +2895,7 @@ fn builtin_models() -> Vec<ModelCatalogEntry> {
             supports_tools: true,
             supports_vision: true,
             supports_streaming: true,
-            aliases: vec!["kimi-k2.5".into()],
+            aliases: vec!["kimi-k2.5-0711".into()],
         },
         // ══════════════════════════════════════════════════════════════
         // Baidu Qianfan / ERNIE (3)

diff --git a/crates/openfang-types/src/model_catalog.rs b/crates/openfang-types/src/model_catalog.rs
@@ -42,7 +42,7 @@ pub const ZHIPU_CODING_BASE_URL: &str = "https://open.bigmodel.cn/api/coding/paa
 /// Z.AI domain aliases (same API, different domain).
 pub const ZAI_BASE_URL: &str = "https://api.z.ai/api/paas/v4";
 pub const ZAI_CODING_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4";
-pub const MOONSHOT_BASE_URL: &str = "https://api.moonshot.cn/v1";
+pub const MOONSHOT_BASE_URL: &str = "https://api.moonshot.ai/v1";
 pub const QIANFAN_BASE_URL: &str = "https://qianfan.baidubce.com/v2";
 pub const VOLCENGINE_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/v3";
 pub const VOLCENGINE_CODING_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/coding/v3";