From d5c6d186c6f580589235ea990b86b03820c8b52a Mon Sep 17 00:00:00 2001
From: modship <yeskinokay@gmail.com>
Date: Sun, 8 Mar 2026 13:55:06 +0100
Subject: [PATCH] Enhance error classification and model handling for Moonshot
 Kimi. Updated error handling to classify 404 errors as ModelNotFound and
 adjusted model IDs in the catalog. Added reasoning content handling for Kimi
 models in OpenAI driver.

---
 crates/openfang-runtime/src/agent_loop.rs     | 12 ++-
 crates/openfang-runtime/src/drivers/openai.rs | 74 ++++++++++++++++++-
 crates/openfang-runtime/src/llm_errors.rs     | 12 +++
 crates/openfang-runtime/src/model_catalog.rs  |  4 +-
 crates/openfang-types/src/model_catalog.rs    |  2 +-
 5 files changed, 95 insertions(+), 9 deletions(-)
diff --git a/crates/openfang-runtime/src/agent_loop.rs b/crates/openfang-runtime/src/agent_loop.rs
index 11fa8ea12..536ad6fca 100644
--- a/crates/openfang-runtime/src/agent_loop.rs
+++ b/crates/openfang-runtime/src/agent_loop.rs
@@ -873,7 +873,11 @@ async fn call_with_retry(
             Err(e) => {
                 // Use classifier for smarter error handling
                 let raw_error = e.to_string();
-                let classified = llm_errors::classify_error(&raw_error, None);
+                let status = match &e {
+                    LlmError::Api { status, .. } => Some(*status),
+                    _ => None,
+                };
+                let classified = llm_errors::classify_error(&raw_error, status);
                 warn!(
                     category = ?classified.category,
                     retryable = classified.is_retryable,
@@ -983,7 +987,11 @@ async fn stream_with_retry(
             }
             Err(e) => {
                 let raw_error = e.to_string();
-                let classified = llm_errors::classify_error(&raw_error, None);
+                let status = match &e {
+                    LlmError::Api { status, .. } => Some(*status),
+                    _ => None,
+                };
+                let classified = llm_errors::classify_error(&raw_error, status);
                 warn!(
                     category = ?classified.category,
                     retryable = classified.is_retryable,
diff --git a/crates/openfang-runtime/src/drivers/openai.rs b/crates/openfang-runtime/src/drivers/openai.rs
index b12039273..19e0defe3 100644
--- a/crates/openfang-runtime/src/drivers/openai.rs
+++ b/crates/openfang-runtime/src/drivers/openai.rs
@@ -30,6 +30,11 @@ impl OpenAIDriver {
         }
     }
 
+    /// True if this provider is Moonshot/Kimi and requires reasoning_content on assistant messages with tool_calls.
+    fn kimi_needs_reasoning_content(&self, model: &str) -> bool {
+        self.base_url.contains("moonshot") || model.to_lowercase().contains("kimi")
+    }
+
     /// Create a driver with additional HTTP headers (e.g. for Copilot IDE auth).
     pub fn with_extra_headers(mut self, headers: Vec<(String, String)>) -> Self {
         self.extra_headers = headers;
@@ -55,6 +60,9 @@ struct OaiRequest {
     tool_choice: Option<serde_json::Value>,
     #[serde(skip_serializing_if = "std::ops::Not::not")]
     stream: bool,
+    /// Moonshot Kimi K2.5: disable thinking so multi-turn with tool_calls works without preserving reasoning_content.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    thinking: Option<serde_json::Value>,
 }
 
 /// Returns true if a model uses `max_completion_tokens` instead of `max_tokens`.
@@ -78,6 +86,12 @@ fn rejects_temperature(model: &str) -> bool {
         || m.starts_with("o4")
 }
 
+/// Returns true if a model only accepts temperature = 1 (e.g. Moonshot Kimi K2/K2.5).
+fn temperature_must_be_one(model: &str) -> bool {
+    let m = model.to_lowercase();
+    m.starts_with("kimi-k2") || m == "kimi-k2.5" || m == "kimi-k2.5-0711"
+}
+
 #[derive(Debug, Serialize)]
 struct OaiMessage {
     role: String,
@@ -87,6 +101,9 @@ struct OaiMessage {
     tool_calls: Option<Vec<OaiToolCall>>,
     #[serde(skip_serializing_if = "Option::is_none")]
     tool_call_id: Option<String>,
+    /// Moonshot Kimi: sent as empty string on assistant messages with tool_calls when using Kimi (thinking is disabled for multi-turn compatibility).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    reasoning_content: Option<String>,
 }
 
 /// Content can be a plain string or an array of content parts (for images).
@@ -176,6 +193,7 @@ impl LlmDriver for OpenAIDriver {
                 content: Some(OaiMessageContent::Text(system.clone())),
                 tool_calls: None,
                 tool_call_id: None,
+                reasoning_content: None,
             });
         }
 
@@ -189,6 +207,7 @@ impl LlmDriver for OpenAIDriver {
                             content: Some(OaiMessageContent::Text(text.clone())),
                             tool_calls: None,
                             tool_call_id: None,
+                            reasoning_content: None,
                         });
                     }
                 }
@@ -198,6 +217,7 @@ impl LlmDriver for OpenAIDriver {
                         content: Some(OaiMessageContent::Text(text.clone())),
                         tool_calls: None,
                         tool_call_id: None,
+                        reasoning_content: None,
                     });
                 }
                 (Role::Assistant, MessageContent::Text(text)) => {
@@ -206,6 +226,7 @@ impl LlmDriver for OpenAIDriver {
                         content: Some(OaiMessageContent::Text(text.clone())),
                         tool_calls: None,
                         tool_call_id: None,
+                        reasoning_content: None,
                     });
                 }
                 (Role::User, MessageContent::Blocks(blocks)) => {
@@ -227,6 +248,7 @@ impl LlmDriver for OpenAIDriver {
                                     )),
                                     tool_calls: None,
                                     tool_call_id: Some(tool_use_id.clone()),
+                                    reasoning_content: None,
                                 });
                             }
                             ContentBlock::Text { text } => {
@@ -249,6 +271,7 @@ impl LlmDriver for OpenAIDriver {
                             content: Some(OaiMessageContent::Parts(parts)),
                             tool_calls: None,
                             tool_call_id: None,
+                            reasoning_content: None,
                         });
                     }
                 }
@@ -272,6 +295,7 @@ impl LlmDriver for OpenAIDriver {
                             _ => {}
                         }
                     }
+                    let has_tool_calls = !tool_calls.is_empty();
                     oai_messages.push(OaiMessage {
                         role: "assistant".to_string(),
                         content: if text_parts.is_empty() {
@@ -285,6 +309,11 @@ impl LlmDriver for OpenAIDriver {
                             Some(tool_calls)
                         },
                         tool_call_id: None,
+                        reasoning_content: if has_tool_calls && self.kimi_needs_reasoning_content(&request.model) {
+                            Some(String::new())
+                        } else {
+                            None
+                        },
                     });
                 }
                 _ => {}
@@ -323,10 +352,24 @@ impl LlmDriver for OpenAIDriver {
             messages: oai_messages,
             max_tokens: mt,
             max_completion_tokens: mct,
-            temperature: if rejects_temperature(&request.model) { None } else { Some(request.temperature) },
+            temperature: if self.kimi_needs_reasoning_content(&request.model) {
+                // Kimi with thinking disabled uses fixed 0.6; with thinking enabled uses 1.0 (we disable thinking for multi-turn).
+                Some(0.6)
+            } else if temperature_must_be_one(&request.model) {
+                Some(1.0)
+            } else if rejects_temperature(&request.model) {
+                None
+            } else {
+                Some(request.temperature)
+            },
             tools: oai_tools,
             tool_choice,
             stream: false,
+            thinking: if self.kimi_needs_reasoning_content(&request.model) {
+                Some(serde_json::json!({"type": "disabled"}))
+            } else {
+                None
+            },
         };
 
         let max_retries = 3;
@@ -541,6 +584,7 @@ impl LlmDriver for OpenAIDriver {
                 content: Some(OaiMessageContent::Text(system.clone())),
                 tool_calls: None,
                 tool_call_id: None,
+                reasoning_content: None,
             });
         }
 
@@ -553,6 +597,7 @@ impl LlmDriver for OpenAIDriver {
                             content: Some(OaiMessageContent::Text(text.clone())),
                             tool_calls: None,
                             tool_call_id: None,
+                            reasoning_content: None,
                         });
                     }
                 }
@@ -562,6 +607,7 @@ impl LlmDriver for OpenAIDriver {
                         content: Some(OaiMessageContent::Text(text.clone())),
                         tool_calls: None,
                         tool_call_id: None,
+                        reasoning_content: None,
                     });
                 }
                 (Role::Assistant, MessageContent::Text(text)) => {
@@ -570,6 +616,7 @@ impl LlmDriver for OpenAIDriver {
                         content: Some(OaiMessageContent::Text(text.clone())),
                         tool_calls: None,
                         tool_call_id: None,
+                        reasoning_content: None,
                     });
                 }
                 (Role::User, MessageContent::Blocks(blocks)) => {
@@ -587,6 +634,7 @@ impl LlmDriver for OpenAIDriver {
                                 )),
                                 tool_calls: None,
                                 tool_call_id: Some(tool_use_id.clone()),
+                                reasoning_content: None,
                             });
                         }
                     }
@@ -611,6 +659,7 @@ impl LlmDriver for OpenAIDriver {
                             _ => {}
                         }
                     }
+                    let has_tool_calls = !tool_calls_out.is_empty();
                     oai_messages.push(OaiMessage {
                         role: "assistant".to_string(),
                         content: if text_parts.is_empty() {
@@ -624,6 +673,11 @@ impl LlmDriver for OpenAIDriver {
                             Some(tool_calls_out)
                         },
                         tool_call_id: None,
+                        reasoning_content: if has_tool_calls && self.kimi_needs_reasoning_content(&request.model) {
+                            Some(String::new())
+                        } else {
+                            None
+                        },
                     });
                 }
                 _ => {}
@@ -662,10 +716,23 @@ impl LlmDriver for OpenAIDriver {
             messages: oai_messages,
             max_tokens: mt,
             max_completion_tokens: mct,
-            temperature: if rejects_temperature(&request.model) { None } else { Some(request.temperature) },
+            temperature: if self.kimi_needs_reasoning_content(&request.model) {
+                Some(0.6)
+            } else if temperature_must_be_one(&request.model) {
+                Some(1.0)
+            } else if rejects_temperature(&request.model) {
+                None
+            } else {
+                Some(request.temperature)
+            },
             tools: oai_tools,
             tool_choice,
             stream: true,
+            thinking: if self.kimi_needs_reasoning_content(&request.model) {
+                Some(serde_json::json!({"type": "disabled"}))
+            } else {
+                None
+            },
         };
 
         // Retry loop for the initial HTTP request
@@ -971,7 +1038,6 @@ impl LlmDriver for OpenAIDriver {
     }
 }
 
-/// Parse Groq's `tool_use_failed` error and extract the tool call from `failed_generation`.
 /// Extract the max_tokens limit from an API error message.
 /// Looks for patterns like: `must be less than or equal to \`8192\``
 fn extract_max_tokens_limit(body: &str) -> Option<u32> {
@@ -996,7 +1062,7 @@ fn extract_max_tokens_limit(body: &str) -> Option<u32> {
     None
 }
 
-///
+/// Parse Groq's `tool_use_failed` error and extract the tool call from `failed_generation`.
 /// Some models (e.g. Llama 3.3) generate tool calls as XML: `<function=NAME ARGS></function>`
 /// instead of the proper JSON format. Groq rejects these with `tool_use_failed` but includes
 /// the raw generation. We parse it and construct a proper CompletionResponse.
diff --git a/crates/openfang-runtime/src/llm_errors.rs b/crates/openfang-runtime/src/llm_errors.rs
index 0f4278fde..63ab63689 100644
--- a/crates/openfang-runtime/src/llm_errors.rs
+++ b/crates/openfang-runtime/src/llm_errors.rs
@@ -221,6 +221,7 @@ pub fn classify_error(message: &str, status: Option<u16>) -> ClassifiedError {
                 }
                 return build(LlmErrorCategory::Auth);
             }
+            404 => return build(LlmErrorCategory::ModelNotFound),
             _ => {}
         }
     }
@@ -583,6 +584,17 @@ mod tests {
 
         let e = classify_error("Unknown model: claude-99", None);
         assert_eq!(e.category, LlmErrorCategory::ModelNotFound);
+
+        // 404 must be ModelNotFound even when body contains "permission denied" (e.g. Moonshot API)
+        let e = classify_error(
+            "Not found the model kimi-k2.5-0711 or Permission denied",
+            Some(404),
+        );
+        assert_eq!(e.category, LlmErrorCategory::ModelNotFound);
+
+        // Status 404 alone (generic message) must classify as ModelNotFound
+        let e = classify_error("Not found", Some(404));
+        assert_eq!(e.category, LlmErrorCategory::ModelNotFound);
     }
 
     #[test]
diff --git a/crates/openfang-runtime/src/model_catalog.rs b/crates/openfang-runtime/src/model_catalog.rs
index 1df4eb3c2..ab28062ed 100644
--- a/crates/openfang-runtime/src/model_catalog.rs
+++ b/crates/openfang-runtime/src/model_catalog.rs
@@ -2884,7 +2884,7 @@ fn builtin_models() -> Vec<ModelCatalogEntry> {
             aliases: vec!["kimi-k2".into()],
         },
         ModelCatalogEntry {
-            id: "kimi-k2.5-0711".into(),
+            id: "kimi-k2.5".into(),
             display_name: "Kimi K2.5".into(),
             provider: "moonshot".into(),
             tier: ModelTier::Frontier,
@@ -2895,7 +2895,7 @@ fn builtin_models() -> Vec<ModelCatalogEntry> {
             supports_tools: true,
             supports_vision: true,
             supports_streaming: true,
-            aliases: vec!["kimi-k2.5".into()],
+            aliases: vec!["kimi-k2.5-0711".into()],
         },
         // ══════════════════════════════════════════════════════════════
         // Baidu Qianfan / ERNIE (3)
diff --git a/crates/openfang-types/src/model_catalog.rs b/crates/openfang-types/src/model_catalog.rs
index 92019518d..792a658f8 100644
--- a/crates/openfang-types/src/model_catalog.rs
+++ b/crates/openfang-types/src/model_catalog.rs
@@ -42,7 +42,7 @@ pub const ZHIPU_CODING_BASE_URL: &str = "https://open.bigmodel.cn/api/coding/paa
 /// Z.AI domain aliases (same API, different domain).
 pub const ZAI_BASE_URL: &str = "https://api.z.ai/api/paas/v4";
 pub const ZAI_CODING_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4";
-pub const MOONSHOT_BASE_URL: &str = "https://api.moonshot.cn/v1";
+pub const MOONSHOT_BASE_URL: &str = "https://api.moonshot.ai/v1";
 pub const QIANFAN_BASE_URL: &str = "https://qianfan.baidubce.com/v2";
 pub const VOLCENGINE_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/v3";
 pub const VOLCENGINE_CODING_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/coding/v3";