From 6b3d3e015f3c80e79a5df2162da9bad7309ff36c Mon Sep 17 00:00:00 2001
From: Dylan Ross <pdylanross@gmail.com>
Date: Sun, 19 Oct 2025 20:23:24 -0500
Subject: [PATCH 01/11] added ModelName partial eq implementations for string
 types (#94)

---
 src/common/model_name.rs | 42 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
diff --git a/src/common/model_name.rs b/src/common/model_name.rs
index 9517d8bc..b8821d28 100644
--- a/src/common/model_name.rs
+++ b/src/common/model_name.rs
@@ -69,6 +69,48 @@ impl Deref for ModelName {
 
 // endregion: --- Froms
 
+// region:    --- EQ
+
+// PartialEq implementations for various string types
+impl PartialEq<str> for ModelName {
+	fn eq(&self, other: &str) -> bool {
+		&*self.0 == other
+	}
+}
+
+impl PartialEq<&str> for ModelName {
+	fn eq(&self, other: &&str) -> bool {
+		&*self.0 == *other
+	}
+}
+
+impl PartialEq<String> for ModelName {
+	fn eq(&self, other: &String) -> bool {
+		&*self.0 == other
+	}
+}
+
+// Symmetric implementations (allow "string" == model_name)
+impl PartialEq<ModelName> for str {
+	fn eq(&self, other: &ModelName) -> bool {
+		self == &*other.0
+	}
+}
+
+impl PartialEq<ModelName> for &str {
+	fn eq(&self, other: &ModelName) -> bool {
+		*self == &*other.0
+	}
+}
+
+impl PartialEq<ModelName> for String {
+	fn eq(&self, other: &ModelName) -> bool {
+		self == &*other.0
+	}
+}
+
+// endregion: --- EQ
+
 // TODO: replace with derive_more Display
 impl std::fmt::Display for ModelName {
 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {

From d3fcf0b7613d5ff1e50e33f59371ccbf6c5e507e Mon Sep 17 00:00:00 2001
From: Bart Carroll <103963480+bartCarroll@users.noreply.github.com>
Date: Mon, 20 Oct 2025 16:11:49 -0500
Subject: [PATCH 02/11] Fixed streaming tool calls for openai models (#91)

---
 src/adapter/adapters/openai/streamer.rs | 33 +++++++++++++------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs
index f6a29bfe..4370cee9 100644
--- a/src/adapter/adapters/openai/streamer.rs
+++ b/src/adapter/adapters/openai/streamer.rs
@@ -135,13 +135,11 @@ impl futures::Stream for OpenAIStreamer {
 										.unwrap_or_else(|_| format!("call_{index}"));
 									let fn_name = function.x_take::<String>("name").unwrap_or_default();
 									let arguments = function.x_take::<String>("arguments").unwrap_or_default();
-									// Create the tool call
-									let fn_arguments = serde_json::from_str(&arguments)
-										.unwrap_or(serde_json::Value::String(arguments.clone()));
+									// Don't parse yet - accumulate as string first
 									let mut tool_call = crate::chat::ToolCall {
 										call_id,
 										fn_name,
-										fn_arguments: fn_arguments.clone(),
+										fn_arguments: serde_json::Value::String(arguments.clone()),
 									};
 
 									// Capture the tool call if enabled
@@ -149,19 +147,22 @@ impl futures::Stream for OpenAIStreamer {
 										match &mut self.captured_data.tool_calls {
 											Some(calls) => {
 												self.captured_data.tool_calls = Some({
-													// When fn_arguments can not be parsed, we need to append the arguments to the existing fn_arguments as json string
-													let mut captured_fn_argments = String::new();
-													if calls[index as usize].fn_arguments.is_string() {
-														captured_fn_argments.push_str(
-															calls[index as usize].fn_arguments.as_str().unwrap_or(""),
-														);
-														captured_fn_argments.push_str(&arguments);
+													// Accumulate arguments as strings, don't parse until complete
+													let accumulated = if let Some(existing) = calls[index as usize].fn_arguments.as_str() {
+														format!("{}{}", existing, arguments)
+													} else {
+														arguments.clone()
+													};
+													
+													// Store as string (will be parsed at stream end)
+													calls[index as usize].fn_arguments = serde_json::Value::String(accumulated);
+													
+													// Update call_id and fn_name on first chunk
+													if !tool_call.fn_name.is_empty() {
+														calls[index as usize].call_id = tool_call.call_id.clone();
+														calls[index as usize].fn_name = tool_call.fn_name.clone();
 													}
-													let fn_arguments = serde_json::from_str(&captured_fn_argments)
-														.unwrap_or(serde_json::Value::String(
-															captured_fn_argments.clone(),
-														));
-													calls[index as usize].fn_arguments = fn_arguments.clone();
+													
 													tool_call = calls[index as usize].clone();
 													calls.to_vec()
 												})

From 4123ee2db51a4dcb461e85cf22ac5733ffcae958 Mon Sep 17 00:00:00 2001
From: Jeremy Chone <jeremy.chone@gmail.com>
Date: Mon, 20 Oct 2025 16:47:47 -0700
Subject: [PATCH 03/11] - stream tool - openai - fix issue that
 captured_tool_calls ado not have the fn_arguments parsed

---
 src/adapter/adapters/openai/streamer.rs | 54 +++++++++++++++++++++----
 tests/support/common_tests.rs           | 31 ++++++++++++++
 tests/tests_p_openai.rs                 |  6 +++
 3 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs
index 4370cee9..787386f1 100644
--- a/src/adapter/adapters/openai/streamer.rs
+++ b/src/adapter/adapters/openai/streamer.rs
@@ -2,7 +2,7 @@ use crate::adapter::AdapterKind;
 use crate::adapter::adapters::support::{StreamerCapturedData, StreamerOptions};
 use crate::adapter::inter_stream::{InterStreamEnd, InterStreamEvent};
 use crate::adapter::openai::OpenAIAdapter;
-use crate::chat::ChatOptionsSet;
+use crate::chat::{ChatOptionsSet, ToolCall};
 use crate::{Error, ModelIden, Result};
 use reqwest_eventsource::{Event, EventSource};
 use serde_json::Value;
@@ -58,11 +58,48 @@ impl futures::Stream for OpenAIStreamer {
 							None
 						};
 
+						// -- Process the captured_tool_calls
+						// NOTE: here we attempt to parse the `fn_arguments` if it is string, because it means that it was accumulated
+						let captured_tool_calls = if let Some(tools_calls) = self.captured_data.tool_calls.take() {
+							let tools_calls: Vec<ToolCall> = tools_calls
+								.into_iter()
+								.map(|tool_call| {
+									// extrat
+									let ToolCall {
+										call_id,
+										fn_name,
+										fn_arguments,
+									} = tool_call;
+									// parse fn_arguments if needed
+									let fn_arguments = match fn_arguments {
+										Value::String(fn_arguments_string) => {
+											// NOTE: Here we are resilient for now, if we cannot parse, just return the original String
+											match serde_json::from_str::<Value>(&fn_arguments_string) {
+												Ok(fn_arguments) => fn_arguments,
+												Err(_) => Value::String(fn_arguments_string),
+											}
+										}
+										_ => fn_arguments,
+									};
+
+									ToolCall {
+										call_id,
+										fn_name,
+										fn_arguments,
+									}
+								})
+								.collect();
+							Some(tools_calls)
+						} else {
+							None
+						};
+
+						// Return the internal stream end
 						let inter_stream_end = InterStreamEnd {
 							captured_usage,
 							captured_text_content: self.captured_data.content.take(),
 							captured_reasoning_content: self.captured_data.reasoning_content.take(),
-							captured_tool_calls: self.captured_data.tool_calls.take(),
+							captured_tool_calls,
 						};
 
 						return Poll::Ready(Some(Ok(InterStreamEvent::End(inter_stream_end))));
@@ -148,21 +185,24 @@ impl futures::Stream for OpenAIStreamer {
 											Some(calls) => {
 												self.captured_data.tool_calls = Some({
 													// Accumulate arguments as strings, don't parse until complete
-													let accumulated = if let Some(existing) = calls[index as usize].fn_arguments.as_str() {
+													let accumulated = if let Some(existing) =
+														calls[index as usize].fn_arguments.as_str()
+													{
 														format!("{}{}", existing, arguments)
 													} else {
 														arguments.clone()
 													};
-													
+
 													// Store as string (will be parsed at stream end)
-													calls[index as usize].fn_arguments = serde_json::Value::String(accumulated);
-													
+													calls[index as usize].fn_arguments =
+														serde_json::Value::String(accumulated);
+
 													// Update call_id and fn_name on first chunk
 													if !tool_call.fn_name.is_empty() {
 														calls[index as usize].call_id = tool_call.call_id.clone();
 														calls[index as usize].fn_name = tool_call.fn_name.clone();
 													}
-													
+
 													tool_call = calls[index as usize].clone();
 													calls.to_vec()
 												})
diff --git a/tests/support/common_tests.rs b/tests/support/common_tests.rs
index 10efdc20..e338c520 100644
--- a/tests/support/common_tests.rs
+++ b/tests/support/common_tests.rs
@@ -655,6 +655,37 @@ pub async fn common_test_chat_stream_capture_all_ok(model: &str, checks: Option<
 	Ok(())
 }
 
+/// Just making the tool request, and checking the tool call response
+/// `complete_check` if for LLMs that are better at giving back the unit and weather.
+pub async fn common_test_chat_stream_tool_capture_ok(model: &str) -> TestResult<()> {
+	// -- Setup & Fixtures
+	let client = Client::default();
+	let chat_req = seed_chat_req_tool_simple();
+	let mut chat_options = ChatOptions::default().with_capture_tool_calls(true);
+
+	// -- Exec
+	let chat_res = client.exec_chat_stream(model, chat_req, Some(&chat_options)).await?;
+
+	// Extract Stream content
+	let StreamExtract {
+		stream_end,
+		content,
+		reasoning_content,
+	} = extract_stream_end(chat_res.stream).await?;
+
+	// -- Check
+	let mut tool_calls = stream_end.captured_tool_calls().ok_or("Should have captured tools")?;
+	if tool_calls.is_empty() {
+		return Err("Should have tool calls in chat_res".into());
+	}
+	let tool_call = tool_calls.pop().ok_or("Should have at least one tool call")?;
+	assert_eq!(tool_call.fn_arguments.x_get_as::<&str>("city")?, "Paris");
+	assert_eq!(tool_call.fn_arguments.x_get_as::<&str>("country")?, "France");
+	assert_eq!(tool_call.fn_arguments.x_get_as::<&str>("unit")?, "C");
+
+	Ok(())
+}
+
 // endregion: --- Chat Stream Tests
 
 // region:    --- Binaries
diff --git a/tests/tests_p_openai.rs b/tests/tests_p_openai.rs
index 8c3571e4..74fc6f56 100644
--- a/tests/tests_p_openai.rs
+++ b/tests/tests_p_openai.rs
@@ -97,6 +97,12 @@ async fn test_chat_stream_capture_all_ok() -> TestResult<()> {
 	common_tests::common_test_chat_stream_capture_all_ok(MODEL, Some(Check::REASONING_USAGE)).await
 }
 
+#[tokio::test]
+async fn test_chat_stream_tool_capture_ok() -> TestResult<()> {
+	// NOTE: For now the OpenAI Adapter do not capture the thinking as not available in chat completions
+	common_tests::common_test_chat_stream_tool_capture_ok(MODEL).await
+}
+
 // endregion: --- Chat Stream Tests
 
 // region:    --- Binary Tests

From 5a1df3c1ab2c47987f073cb6bd4904cf534ca81f Mon Sep 17 00:00:00 2001
From: Rui Andrada <27135+shingonoide@users.noreply.github.com>
Date: Tue, 21 Oct 2025 21:53:42 -0300
Subject: [PATCH 04/11] Refactor ZHIPU adapter to ZAI with namespace-based
 endpoint routing (#95)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename zhipu adapter to zai adapter
- Implement namespace-based routing for regular vs coding endpoints
- Regular models: "glm-4.6" → credit-based API
- Coding models: "zai::glm-4.6" → subscription API
- Add comprehensive example c07-zai.rs
- Update adapter kind detection to handle "zai" namespace
- Remove dual-endpoint complexity from user-facing API

Co-authored-by: Rui Andrada <rui.andrada@imaginedone.com.br>
---
 examples/c00-readme.rs                        |  6 +-
 examples/c07-zai.rs                           | 56 +++++++++++++++
 src/adapter/adapter_kind.rs                   | 22 +++---
 src/adapter/adapters/mod.rs                   |  2 +-
 src/adapter/adapters/openai/streamer.rs       |  2 +-
 .../adapters/{zhipu => zai}/adapter_impl.rs   | 72 ++++++++++++++++---
 src/adapter/adapters/zai/mod.rs               | 61 ++++++++++++++++
 src/adapter/adapters/zhipu/mod.rs             | 12 ----
 src/adapter/dispatcher.rs                     | 20 +++---
 tests/{tests_p_zhipu.rs => tests_p_zai.rs}    |  6 +-
 ..._reasoning.rs => tests_p_zai_reasoning.rs} |  4 +-
 11 files changed, 212 insertions(+), 51 deletions(-)
 create mode 100644 examples/c07-zai.rs
 rename src/adapter/adapters/{zhipu => zai}/adapter_impl.rs (51%)
 create mode 100644 src/adapter/adapters/zai/mod.rs
 delete mode 100644 src/adapter/adapters/zhipu/mod.rs
 rename tests/{tests_p_zhipu.rs => tests_p_zai.rs} (95%)
 rename tests/{tests_p_zhipu_reasoning.rs => tests_p_zai_reasoning.rs} (96%)

diff --git a/examples/c00-readme.rs b/examples/c00-readme.rs
index eb2f7fe7..83fb4760 100644
--- a/examples/c00-readme.rs
+++ b/examples/c00-readme.rs
@@ -15,7 +15,7 @@ const MODEL_GROQ: &str = "llama-3.1-8b-instant";
 const MODEL_OLLAMA: &str = "gemma:2b"; // sh: `ollama pull gemma:2b`
 const MODEL_XAI: &str = "grok-3-mini";
 const MODEL_DEEPSEEK: &str = "deepseek-chat";
-const MODEL_ZHIPU: &str = "glm-4-plus";
+const MODEL_ZAI: &str = "glm-4-plus";
 const MODEL_COHERE: &str = "command-r7b-12-2024";
 
 // NOTE: These are the default environment keys for each AI Adapter Type.
@@ -31,7 +31,7 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[
 	(MODEL_XAI, "XAI_API_KEY"),
 	(MODEL_DEEPSEEK, "DEEPSEEK_API_KEY"),
 	(MODEL_OLLAMA, ""),
-	(MODEL_ZHIPU, "ZHIPU_API_KEY"),
+	(MODEL_ZAI, "ZAI_API_KEY"),
 	(MODEL_COHERE, "COHERE_API_KEY"),
 ];
 
@@ -41,7 +41,7 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[
 //  - starts_with "command"  -> Cohere
 //  - starts_with "gemini"   -> Gemini
 //  - model in Groq models   -> Groq
-//  - starts_with "glm"      -> Zhipu
+//  - starts_with "glm"      -> ZAI
 //  - For anything else      -> Ollama
 //
 // This can be customized; see `examples/c03-mapper.rs`
diff --git a/examples/c07-zai.rs b/examples/c07-zai.rs
new file mode 100644
index 00000000..5ee13223
--- /dev/null
+++ b/examples/c07-zai.rs
@@ -0,0 +1,56 @@
+//! ZAI (Zhipu AI) adapter example
+//! 
+//! Demonstrates how to use ZAI models with automatic endpoint routing:
+//! - `glm-4.6` → Regular credit-based API
+//! - `zai::glm-4.6` → Coding subscription API (automatically routed)
+
+use genai::chat::{ChatMessage, ChatRequest};
+use genai::Client;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let client = Client::builder().build();
+
+    // Test cases demonstrating automatic endpoint routing
+    let test_cases = vec![
+        ("glm-4.6", "Regular ZAI model"),
+        ("zai::glm-4.6", "Coding subscription model"),
+    ];
+
+    for (model_name, description) in test_cases {
+        println!("\n=== {} ===", description);
+        println!("Model: {}", model_name);
+
+        let chat_req = ChatRequest::default()
+            .with_system("You are a helpful assistant.")
+            .append_message(ChatMessage::user("Say 'hello' and nothing else."));
+
+        match client.exec_chat(model_name, chat_req, None).await {
+            Ok(response) => {
+                println!("✅ Success!");
+                if let Some(content) = response.first_text() {
+                    println!("Response: {}", content);
+                }
+                if response.usage.prompt_tokens.is_some() || response.usage.completion_tokens.is_some() {
+                    println!("Usage: prompt={}, output={}", 
+                        response.usage.prompt_tokens.unwrap_or(0), 
+                        response.usage.completion_tokens.unwrap_or(0));
+                }
+            }
+            Err(e) => {
+                println!("❌ Error: {}", e);
+                if e.to_string().contains("insufficient balance") {
+                    println!("ℹ️  This model requires credits or subscription");
+                } else if e.to_string().contains("401") {
+                    println!("ℹ️  Set ZAI_API_KEY environment variable");
+                }
+            }
+        }
+    }
+
+    println!("\n=== SUMMARY ===");
+    println!("✅ ZAI adapter handles namespace routing automatically");
+    println!("✅ Use ZAI_API_KEY environment variable");
+
+    Ok(())
+}
\ No newline at end of file
diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs
index 9fa0f6ad..03fb87f5 100644
--- a/src/adapter/adapter_kind.rs
+++ b/src/adapter/adapter_kind.rs
@@ -8,7 +8,7 @@ use crate::adapter::groq::{self, GroqAdapter};
 use crate::adapter::nebius::NebiusAdapter;
 use crate::adapter::openai::OpenAIAdapter;
 use crate::adapter::xai::XaiAdapter;
-use crate::adapter::zhipu::ZhipuAdapter;
+use crate::adapter::adapters::zai::ZaiAdapter;
 use crate::{ModelName, Result};
 use derive_more::Display;
 use serde::{Deserialize, Serialize};
@@ -39,8 +39,8 @@ pub enum AdapterKind {
 	Xai,
 	/// For DeepSeek (Mostly use OpenAI)
 	DeepSeek,
-	/// For Zhipu (Mostly use OpenAI)
-	Zhipu,
+	/// For ZAI (Mostly use OpenAI)
+	Zai,
 	/// Cohere today use it's own native protocol but might move to OpenAI Adapter
 	Cohere,
 	/// OpenAI shared behavior + some custom. (currently, localhost only, can be customize with ServerTargetResolver).
@@ -62,7 +62,7 @@ impl AdapterKind {
 			AdapterKind::Nebius => "Nebius",
 			AdapterKind::Xai => "xAi",
 			AdapterKind::DeepSeek => "DeepSeek",
-			AdapterKind::Zhipu => "Zhipu",
+			AdapterKind::Zai => "Zai",
 			AdapterKind::Cohere => "Cohere",
 			AdapterKind::Ollama => "Ollama",
 		}
@@ -81,7 +81,7 @@ impl AdapterKind {
 			AdapterKind::Nebius => "nebius",
 			AdapterKind::Xai => "xai",
 			AdapterKind::DeepSeek => "deepseek",
-			AdapterKind::Zhipu => "zhipu",
+			AdapterKind::Zai => "zai",
 			AdapterKind::Cohere => "cohere",
 			AdapterKind::Ollama => "ollama",
 		}
@@ -99,7 +99,7 @@ impl AdapterKind {
 			"nebius" => Some(AdapterKind::Nebius),
 			"xai" => Some(AdapterKind::Xai),
 			"deepseek" => Some(AdapterKind::DeepSeek),
-			"zhipu" => Some(AdapterKind::Zhipu),
+			"zai" => Some(AdapterKind::Zai),
 			"cohere" => Some(AdapterKind::Cohere),
 			"ollama" => Some(AdapterKind::Ollama),
 			_ => None,
@@ -122,7 +122,7 @@ impl AdapterKind {
 			AdapterKind::Nebius => Some(NebiusAdapter::API_KEY_DEFAULT_ENV_NAME),
 			AdapterKind::Xai => Some(XaiAdapter::API_KEY_DEFAULT_ENV_NAME),
 			AdapterKind::DeepSeek => Some(DeepSeekAdapter::API_KEY_DEFAULT_ENV_NAME),
-			AdapterKind::Zhipu => Some(ZhipuAdapter::API_KEY_DEFAULT_ENV_NAME),
+			AdapterKind::Zai => Some(ZaiAdapter::API_KEY_DEFAULT_ENV_NAME),
 			AdapterKind::Cohere => Some(CohereAdapter::API_KEY_DEFAULT_ENV_NAME),
 			AdapterKind::Ollama => None,
 		}
@@ -149,6 +149,7 @@ impl AdapterKind {
 	/// Other Some adapters have to have model name namespaced to be used,
 	/// - e.g., for together.ai `together::meta-llama/Llama-3-8b-chat-hf`
 	/// - e.g., for nebius with `nebius::Qwen/Qwen3-235B-A22B`
+	/// - e.g., for ZAI coding plan with `coding::glm-4.6`
 	///
 	/// And all adapters can be force namspaced as well.
 	///
@@ -157,6 +158,11 @@ impl AdapterKind {
 	pub fn from_model(model: &str) -> Result<Self> {
 		// -- First check if namespaced
 		if let (_, Some(ns)) = ModelName::model_name_and_namespace(model) {
+			// Special handling: "zai" namespace should route to ZAI for coding endpoint
+			if ns == "zai" {
+				return Ok(AdapterKind::Zai);
+			}
+			
 			if let Some(adapter) = Self::from_lower_str(ns) {
 				return Ok(adapter);
 			} else {
@@ -194,7 +200,7 @@ impl AdapterKind {
 		} else if model.starts_with("grok") {
 			Ok(Self::Xai)
 		} else if model.starts_with("glm") {
-			Ok(Self::Zhipu)
+			Ok(Self::Zai)
 		}
 		// For now, fallback to Ollama
 		else {
diff --git a/src/adapter/adapters/mod.rs b/src/adapter/adapters/mod.rs
index a31217cd..b6495189 100644
--- a/src/adapter/adapters/mod.rs
+++ b/src/adapter/adapters/mod.rs
@@ -12,4 +12,4 @@ pub(super) mod openai;
 pub(super) mod openai_resp;
 pub(super) mod together;
 pub(super) mod xai;
-pub(super) mod zhipu;
+pub(super) mod zai;
diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs
index 787386f1..65d4d44e 100644
--- a/src/adapter/adapters/openai/streamer.rs
+++ b/src/adapter/adapters/openai/streamer.rs
@@ -136,7 +136,7 @@ impl futures::Stream for OpenAIStreamer {
 										self.captured_data.usage = Some(usage)
 									}
 									AdapterKind::DeepSeek
-									| AdapterKind::Zhipu
+									| AdapterKind::Zai
 									| AdapterKind::Fireworks
 									| AdapterKind::Together => {
 										let usage = message_data
diff --git a/src/adapter/adapters/zhipu/adapter_impl.rs b/src/adapter/adapters/zai/adapter_impl.rs
similarity index 51%
rename from src/adapter/adapters/zhipu/adapter_impl.rs
rename to src/adapter/adapters/zai/adapter_impl.rs
index ae684724..7f1ce6e2 100644
--- a/src/adapter/adapters/zhipu/adapter_impl.rs
+++ b/src/adapter/adapters/zai/adapter_impl.rs
@@ -7,10 +7,46 @@ use crate::webc::WebResponse;
 use crate::{Result, ServiceTarget};
 use reqwest::RequestBuilder;
 
-pub struct ZhipuAdapter;
+/// Helper structure to hold ZAI model parsing information
+struct ZaiModelEndpoint {
+	endpoint: Endpoint,
+}
+
+impl ZaiModelEndpoint {
+	/// Parse ModelIden to determine if it's a coding model and return endpoint
+	fn from_model(model: &ModelIden) -> Self {
+		let (_, namespace) = model.model_name.as_model_name_and_namespace();
+
+		// Check if namespace is "zai" to route to coding endpoint
+		let endpoint = match namespace {
+			Some("zai") => Endpoint::from_static("https://api.z.ai/api/coding/paas/v4/"),
+			_ => ZaiAdapter::default_endpoint(),
+		};
+
+		Self { endpoint }
+	}
+}
+
+/// The ZAI API is mostly compatible with the OpenAI API.
+///
+/// NOTE: This adapter will automatically route to the coding endpoint
+///       when the model name starts with "zai::".
+///
+/// For example, `glm-4.6` uses the regular API endpoint,
+/// while `zai::glm-4.6` uses the coding plan endpoint.
+///
+pub struct ZaiAdapter;
 
 pub(in crate::adapter) const MODELS: &[&str] = &[
 	"glm-4-plus",
+	"glm-4.6",
+	"glm-4.5",
+	"glm-4.5v",
+	"glm-4.5-x",
+	"glm-4.5-air",
+	"glm-4.5-airx",
+	"glm-4-32b-0414-128k",
+	"glm-4.5-flash",
 	"glm-4-air-250414",
 	"glm-4-flashx-250414",
 	"glm-4-flash-250414",
@@ -26,17 +62,16 @@ pub(in crate::adapter) const MODELS: &[&str] = &[
 	"glm-z1-flashx",
 	"glm-4.1v-thinking-flash",
 	"glm-4.1v-thinking-flashx",
-	"glm-4.5",
 ];
 
-impl ZhipuAdapter {
-	pub const API_KEY_DEFAULT_ENV_NAME: &str = "ZHIPU_API_KEY";
+impl ZaiAdapter {
+	pub const API_KEY_DEFAULT_ENV_NAME: &str = "ZAI_API_KEY";
 }
 
-// The Zhipu API is mostly compatible with the OpenAI API.
-impl Adapter for ZhipuAdapter {
+// The ZAI API is mostly compatible with the OpenAI API.
+impl Adapter for ZaiAdapter {
 	fn default_endpoint() -> Endpoint {
-		const BASE_URL: &str = "https://open.bigmodel.cn/api/paas/v4/";
+		const BASE_URL: &str = "https://api.z.ai/api/paas/v4/";
 		Endpoint::from_static(BASE_URL)
 	}
 
@@ -48,16 +83,28 @@ impl Adapter for ZhipuAdapter {
 		Ok(MODELS.iter().map(|s| s.to_string()).collect())
 	}
 
-	fn get_service_url(model: &ModelIden, service_type: ServiceType, endpoint: Endpoint) -> Result<String> {
-		OpenAIAdapter::util_get_service_url(model, service_type, endpoint)
+	fn get_service_url(_model: &ModelIden, service_type: ServiceType, endpoint: Endpoint) -> Result<String> {
+		// For ZAI, we need to handle model-specific routing at this level
+		// because get_service_url is called with the modified endpoint from to_web_request_data
+		let base_url = endpoint.base_url();
+
+		let url = match service_type {
+			ServiceType::Chat | ServiceType::ChatStream => format!("{base_url}chat/completions"),
+			ServiceType::Embed => format!("{base_url}embeddings"),
+		};
+		Ok(url)
 	}
 
 	fn to_web_request_data(
-		target: ServiceTarget,
+		mut target: ServiceTarget,
 		service_type: ServiceType,
 		chat_req: ChatRequest,
 		chat_options: ChatOptionsSet<'_, '_>,
 	) -> Result<WebRequestData> {
+		// Parse model name and determine appropriate endpoint
+		let zai_info = ZaiModelEndpoint::from_model(&target.model);
+		target.endpoint = zai_info.endpoint;
+
 		OpenAIAdapter::util_to_web_request_data(target, service_type, chat_req, chat_options, None)
 	}
 
@@ -78,10 +125,13 @@ impl Adapter for ZhipuAdapter {
 	}
 
 	fn to_embed_request_data(
-		service_target: crate::ServiceTarget,
+		mut service_target: crate::ServiceTarget,
 		embed_req: crate::embed::EmbedRequest,
 		options_set: crate::embed::EmbedOptionsSet<'_, '_>,
 	) -> Result<crate::adapter::WebRequestData> {
+		let zai_info = ZaiModelEndpoint::from_model(&service_target.model);
+		service_target.endpoint = zai_info.endpoint;
+
 		OpenAIAdapter::to_embed_request_data(service_target, embed_req, options_set)
 	}
 
diff --git a/src/adapter/adapters/zai/mod.rs b/src/adapter/adapters/zai/mod.rs
new file mode 100644
index 00000000..3eaf9b80
--- /dev/null
+++ b/src/adapter/adapters/zai/mod.rs
@@ -0,0 +1,61 @@
+//! ZAI API Documentation
+//! API Documentation:     <https://api.z.ai>
+//! Model Names:           GLM series models
+//! Pricing:               <https://api.z.ai/pricing>
+//!
+//! ## Dual Endpoint Support
+//!
+//! ZAI supports two different API endpoints using the ServiceTargetResolver pattern:
+//!
+//! ### Regular API (Credit-based)
+//! - Endpoint: `https://api.z.ai/api/paas/v4/`
+//! - Models: `glm-4.6`, `glm-4.5`, etc.
+//! - Usage: Standard API calls billed per token
+//!
+//! ### Coding Plan (Subscription-based)  
+//! - Endpoint: `https://api.z.ai/api/coding/paas/v4/`
+//! - Models: `coding::glm-4.6`, `coding:glm-4.5`, etc.
+//! - Usage: Fixed monthly subscription for coding tasks
+//!
+//! ## Usage with ServiceTargetResolver
+//!
+//! ```rust
+//! use genai::resolver::{Endpoint, ServiceTargetResolver};
+//! use genai::{Client, AdapterKind, ModelIden};
+//!
+//! let target_resolver = ServiceTargetResolver::from_resolver_fn(
+//!     |service_target| -> Result<ServiceTarget, _> {
+//!         let model_name = service_target.model.model_name.to_string();
+//!         
+//!         // Route to appropriate endpoint based on model naming
+//!         let endpoint_url = if model_name.starts_with("coding::") {
+//!             "https://api.z.ai/api/coding/paas/v4/"
+//!         } else {
+//!             "https://api.z.ai/api/paas/v4/"
+//!         };
+//!         
+//!         let final_endpoint = Endpoint::from_static(endpoint_url);
+//!         let final_model = ModelIden::new(AdapterKind::Zai, clean_model_name);
+//!         
+//!         Ok(ServiceTarget { endpoint: final_endpoint, model: final_model })
+//!     }
+//! );
+//!
+//! let client = Client::builder().with_service_target_resolver(target_resolver).build();
+//!
+//! // Use regular API
+//! let response = client.exec_chat("glm-4.6", chat_request, None).await?;
+//!
+//! // Use coding plan
+//! let response = client.exec_chat("coding::glm-4.6", chat_request, None).await?;
+//! ```
+//!
+//! See `examples/c07-zai-dual-endpoints.rs` for a complete working example.
+
+// region:    --- Modules
+
+mod adapter_impl;
+
+pub use adapter_impl::*;
+
+// endregion: --- Modules
\ No newline at end of file
diff --git a/src/adapter/adapters/zhipu/mod.rs b/src/adapter/adapters/zhipu/mod.rs
deleted file mode 100644
index bc7f0f9f..00000000
--- a/src/adapter/adapters/zhipu/mod.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-//! Click the globe icon on the top-right corner of the page to switch language.
-//! API Documentation:     <https://bigmodel.cn/dev/api>
-//! Model Names:           <https://bigmodel.cn/dev/howuse/model>
-//! Pricing:               <https://bigmodel.cn/pricing>
-
-// region:    --- Modules
-
-mod adapter_impl;
-
-pub use adapter_impl::*;
-
-// endregion: --- Modules
diff --git a/src/adapter/dispatcher.rs b/src/adapter/dispatcher.rs
index 4e6030e7..f0909dc9 100644
--- a/src/adapter/dispatcher.rs
+++ b/src/adapter/dispatcher.rs
@@ -10,7 +10,7 @@ use crate::adapter::ollama::OllamaAdapter;
 use crate::adapter::openai::OpenAIAdapter;
 use crate::adapter::openai_resp::OpenAIRespAdapter;
 use crate::adapter::xai::XaiAdapter;
-use crate::adapter::zhipu::ZhipuAdapter;
+use crate::adapter::adapters::zai::ZaiAdapter;
 use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData};
 use crate::chat::{ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse};
 use crate::embed::{EmbedOptionsSet, EmbedRequest, EmbedResponse};
@@ -40,7 +40,7 @@ impl AdapterDispatcher {
 			AdapterKind::Nebius => NebiusAdapter::default_endpoint(),
 			AdapterKind::Xai => XaiAdapter::default_endpoint(),
 			AdapterKind::DeepSeek => DeepSeekAdapter::default_endpoint(),
-			AdapterKind::Zhipu => ZhipuAdapter::default_endpoint(),
+			AdapterKind::Zai => ZaiAdapter::default_endpoint(),
 			AdapterKind::Cohere => CohereAdapter::default_endpoint(),
 			AdapterKind::Ollama => OllamaAdapter::default_endpoint(),
 		}
@@ -58,7 +58,7 @@ impl AdapterDispatcher {
 			AdapterKind::Nebius => NebiusAdapter::default_auth(),
 			AdapterKind::Xai => XaiAdapter::default_auth(),
 			AdapterKind::DeepSeek => DeepSeekAdapter::default_auth(),
-			AdapterKind::Zhipu => ZhipuAdapter::default_auth(),
+			AdapterKind::Zai => ZaiAdapter::default_auth(),
 			AdapterKind::Cohere => CohereAdapter::default_auth(),
 			AdapterKind::Ollama => OllamaAdapter::default_auth(),
 		}
@@ -76,7 +76,7 @@ impl AdapterDispatcher {
 			AdapterKind::Nebius => NebiusAdapter::all_model_names(kind).await,
 			AdapterKind::Xai => XaiAdapter::all_model_names(kind).await,
 			AdapterKind::DeepSeek => DeepSeekAdapter::all_model_names(kind).await,
-			AdapterKind::Zhipu => ZhipuAdapter::all_model_names(kind).await,
+			AdapterKind::Zai => ZaiAdapter::all_model_names(kind).await,
 			AdapterKind::Cohere => CohereAdapter::all_model_names(kind).await,
 			AdapterKind::Ollama => OllamaAdapter::all_model_names(kind).await,
 		}
@@ -94,7 +94,7 @@ impl AdapterDispatcher {
 			AdapterKind::Nebius => NebiusAdapter::get_service_url(model, service_type, endpoint),
 			AdapterKind::Xai => XaiAdapter::get_service_url(model, service_type, endpoint),
 			AdapterKind::DeepSeek => DeepSeekAdapter::get_service_url(model, service_type, endpoint),
-			AdapterKind::Zhipu => ZhipuAdapter::get_service_url(model, service_type, endpoint),
+			AdapterKind::Zai => ZaiAdapter::get_service_url(model, service_type, endpoint),
 			AdapterKind::Cohere => CohereAdapter::get_service_url(model, service_type, endpoint),
 			AdapterKind::Ollama => OllamaAdapter::get_service_url(model, service_type, endpoint),
 		}
@@ -124,7 +124,7 @@ impl AdapterDispatcher {
 			AdapterKind::Nebius => NebiusAdapter::to_web_request_data(target, service_type, chat_req, options_set),
 			AdapterKind::Xai => XaiAdapter::to_web_request_data(target, service_type, chat_req, options_set),
 			AdapterKind::DeepSeek => DeepSeekAdapter::to_web_request_data(target, service_type, chat_req, options_set),
-			AdapterKind::Zhipu => ZhipuAdapter::to_web_request_data(target, service_type, chat_req, options_set),
+			AdapterKind::Zai => ZaiAdapter::to_web_request_data(target, service_type, chat_req, options_set),
 			AdapterKind::Cohere => CohereAdapter::to_web_request_data(target, service_type, chat_req, options_set),
 			AdapterKind::Ollama => OllamaAdapter::to_web_request_data(target, service_type, chat_req, options_set),
 		}
@@ -146,7 +146,7 @@ impl AdapterDispatcher {
 			AdapterKind::Nebius => NebiusAdapter::to_chat_response(model_iden, web_response, options_set),
 			AdapterKind::Xai => XaiAdapter::to_chat_response(model_iden, web_response, options_set),
 			AdapterKind::DeepSeek => DeepSeekAdapter::to_chat_response(model_iden, web_response, options_set),
-			AdapterKind::Zhipu => ZhipuAdapter::to_chat_response(model_iden, web_response, options_set),
+			AdapterKind::Zai => ZaiAdapter::to_chat_response(model_iden, web_response, options_set),
 			AdapterKind::Cohere => CohereAdapter::to_chat_response(model_iden, web_response, options_set),
 			AdapterKind::Ollama => OllamaAdapter::to_chat_response(model_iden, web_response, options_set),
 		}
@@ -171,7 +171,7 @@ impl AdapterDispatcher {
 			AdapterKind::Nebius => NebiusAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
 			AdapterKind::Xai => XaiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
 			AdapterKind::DeepSeek => DeepSeekAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
-			AdapterKind::Zhipu => ZhipuAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
+			AdapterKind::Zai => ZaiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
 			AdapterKind::Cohere => CohereAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
 			AdapterKind::Ollama => OllamaAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
 		}
@@ -197,7 +197,7 @@ impl AdapterDispatcher {
 			AdapterKind::Nebius => NebiusAdapter::to_embed_request_data(target, embed_req, options_set),
 			AdapterKind::Xai => XaiAdapter::to_embed_request_data(target, embed_req, options_set),
 			AdapterKind::DeepSeek => DeepSeekAdapter::to_embed_request_data(target, embed_req, options_set),
-			AdapterKind::Zhipu => ZhipuAdapter::to_embed_request_data(target, embed_req, options_set),
+			AdapterKind::Zai => ZaiAdapter::to_embed_request_data(target, embed_req, options_set),
 			AdapterKind::Cohere => CohereAdapter::to_embed_request_data(target, embed_req, options_set),
 			AdapterKind::Ollama => OllamaAdapter::to_embed_request_data(target, embed_req, options_set),
 		}
@@ -222,7 +222,7 @@ impl AdapterDispatcher {
 			AdapterKind::Nebius => NebiusAdapter::to_embed_response(model_iden, web_response, options_set),
 			AdapterKind::Xai => XaiAdapter::to_embed_response(model_iden, web_response, options_set),
 			AdapterKind::DeepSeek => DeepSeekAdapter::to_embed_response(model_iden, web_response, options_set),
-			AdapterKind::Zhipu => ZhipuAdapter::to_embed_response(model_iden, web_response, options_set),
+			AdapterKind::Zai => ZaiAdapter::to_embed_response(model_iden, web_response, options_set),
 			AdapterKind::Cohere => CohereAdapter::to_embed_response(model_iden, web_response, options_set),
 			AdapterKind::Ollama => OllamaAdapter::to_embed_response(model_iden, web_response, options_set),
 		}
diff --git a/tests/tests_p_zhipu.rs b/tests/tests_p_zai.rs
similarity index 95%
rename from tests/tests_p_zhipu.rs
rename to tests/tests_p_zai.rs
index d32f53df..70bc6952 100644
--- a/tests/tests_p_zhipu.rs
+++ b/tests/tests_p_zai.rs
@@ -5,7 +5,7 @@ use genai::adapter::AdapterKind;
 use genai::resolver::AuthData;
 
 const MODEL: &str = "glm-4-plus";
-const MODEL_NS: &str = "zhipu::glm-4-plus";
+const MODEL_NS: &str = "zai::glm-4-plus";
 const MODEL_V: &str = "glm-4v-flash"; // Visual language model does not support function calling
 
 // region:    --- Chat
@@ -106,7 +106,7 @@ async fn test_tool_full_flow_ok() -> TestResult<()> {
 
 #[tokio::test]
 async fn test_resolver_auth_ok() -> TestResult<()> {
-	common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZHIPU_API_KEY")).await
+	common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZAI_API_KEY")).await
 }
 
 // endregion: --- Resolver Tests
@@ -115,7 +115,7 @@ async fn test_resolver_auth_ok() -> TestResult<()> {
 
 #[tokio::test]
 async fn test_list_models() -> TestResult<()> {
-	common_tests::common_test_list_models(AdapterKind::Zhipu, "glm-4-plus").await
+	common_tests::common_test_list_models(AdapterKind::Zai, "glm-4-plus").await
 }
 
 // endregion: --- List
diff --git a/tests/tests_p_zhipu_reasoning.rs b/tests/tests_p_zai_reasoning.rs
similarity index 96%
rename from tests/tests_p_zhipu_reasoning.rs
rename to tests/tests_p_zai_reasoning.rs
index 9031a409..c405e759 100644
--- a/tests/tests_p_zhipu_reasoning.rs
+++ b/tests/tests_p_zai_reasoning.rs
@@ -66,7 +66,7 @@ async fn test_chat_stream_capture_content_ok() -> TestResult<()> {
 
 #[tokio::test]
 async fn test_resolver_auth_ok() -> TestResult<()> {
-	common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZHIPU_API_KEY")).await
+	common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZAI_API_KEY")).await
 }
 
 // endregion: --- Resolver Tests
@@ -75,7 +75,7 @@ async fn test_resolver_auth_ok() -> TestResult<()> {
 
 #[tokio::test]
 async fn test_list_models() -> TestResult<()> {
-	common_tests::common_test_list_models(AdapterKind::Zhipu, "glm-z1-flash").await
+	common_tests::common_test_list_models(AdapterKind::Zai, "glm-z1-flash").await
 }
 
 // endregion: --- List

From 4aef5cafd51cabbdaf316b281cb5b29b0600f6d7 Mon Sep 17 00:00:00 2001
From: Jeremy Chone <jeremy.chone@gmail.com>
Date: Fri, 24 Oct 2025 16:36:51 -0700
Subject: [PATCH 05/11] . anthropic - update model name for haiku 4.5

---
 src/adapter/adapters/anthropic/adapter_impl.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs
index 334e514c..5b16e281 100644
--- a/src/adapter/adapters/anthropic/adapter_impl.rs
+++ b/src/adapter/adapters/anthropic/adapter_impl.rs
@@ -41,7 +41,7 @@ const ANTHROPIC_VERSION: &str = "2023-06-01";
 const MODELS: &[&str] = &[
 	"claude-opus-4-1-20250805",
 	"claude-sonnet-4-5-20250929",
-	"claude-3-5-haiku-latest",
+	"claude-haiku-4-5-20251001",
 ];
 
 impl AnthropicAdapter {

From d23451338fbea160350bb143c08dc41c9d31a336 Mon Sep 17 00:00:00 2001
From: Jeremy Chone <jeremy.chone@gmail.com>
Date: Fri, 24 Oct 2025 16:42:51 -0700
Subject: [PATCH 06/11] . anthropic - update claude-haiku(4.5) max tokens to
 64k as per spec

---
 src/adapter/adapters/anthropic/adapter_impl.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs
index 5b16e281..505a3f31 100644
--- a/src/adapter/adapters/anthropic/adapter_impl.rs
+++ b/src/adapter/adapters/anthropic/adapter_impl.rs
@@ -31,7 +31,7 @@ const REASONING_HIGH: u32 = 24000;
 // For max model tokens see: https://docs.anthropic.com/en/docs/about-claude/models/overview
 //
 // fall back
-const MAX_TOKENS_64K: u32 = 64000; // claude-3-7-sonnet, claude-sonnet-4.x
+const MAX_TOKENS_64K: u32 = 64000; // claude-3-7-sonnet, claude-sonnet-4.x, claude-haiku-4-5
 // custom
 const MAX_TOKENS_32K: u32 = 32000; // claude-opus-4
 const MAX_TOKENS_8K: u32 = 8192; // claude-3-5-sonnet, claude-3-5-haiku
@@ -181,7 +181,10 @@ impl Adapter for AnthropicAdapter {
 		// const MAX_TOKENS_4K: u32 = 4096; // claude-3-opus, claude-3-haiku
 		let max_tokens = options_set.max_tokens().unwrap_or_else(|| {
 			// most likely models used, so put first. Also a little wider with `claude-sonnet` (since name from version 4)
-			if model_name.contains("claude-sonnet") || model_name.contains("claude-3-7-sonnet") {
+			if model_name.contains("claude-sonnet")
+				|| model_name.contains("claude-haiku")
+				|| model_name.contains("claude-3-7-sonnet")
+			{
 				MAX_TOKENS_64K
 			} else if model_name.contains("claude-opus-4") {
 				MAX_TOKENS_32K

From db19d5dc1a9e71bd93ac3893d8d5dc855a8894a0 Mon Sep 17 00:00:00 2001
From: Jeremy Chone <jeremy.chone@gmail.com>
Date: Sat, 25 Oct 2025 10:22:00 -0700
Subject: [PATCH 07/11] . first pass at adding the module spec

---
 .gitignore               |  1 +
 dev/spec/_spec-rules.md  | 59 +++++++++++++++++++++++++++++++++++
 dev/spec/spec-adapter.md | 33 ++++++++++++++++++++
 dev/spec/spec-chat.md    | 66 ++++++++++++++++++++++++++++++++++++++++
 dev/spec/spec-client.md  | 59 +++++++++++++++++++++++++++++++++++
 dev/spec/spec-common.md  | 36 ++++++++++++++++++++++
 dev/spec/spec-webc.md    | 36 ++++++++++++++++++++++
 7 files changed, 290 insertions(+)
 create mode 100644 dev/spec/_spec-rules.md
 create mode 100644 dev/spec/spec-adapter.md
 create mode 100644 dev/spec/spec-chat.md
 create mode 100644 dev/spec/spec-client.md
 create mode 100644 dev/spec/spec-common.md
 create mode 100644 dev/spec/spec-webc.md

diff --git a/.gitignore b/.gitignore
index 8d6ad4fd..4ade7c75 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
 _*
 # '_' in src dir, ok.
 !**/src/**/_*
+!**/spec/**/_*
 
 *.lock
 *.lockb
diff --git a/dev/spec/_spec-rules.md b/dev/spec/_spec-rules.md
new file mode 100644
index 00000000..a666acb3
--- /dev/null
+++ b/dev/spec/_spec-rules.md
@@ -0,0 +1,59 @@
+# Specification Guidelines
+
+This document defines the rules for creating and maintaining specification files.
+
+Important formatting rules
+
+- Use `-` for bullet points. 
+- For numbering bullet point style, have empty lines between numbering line. 
+
+
+## Types of Specification Files
+
+### `spec--index.md`
+
+A single file providing a high-level summary of the entire system.
+
+### `spec-module_name.md`
+
+A specification file for each individual module.  
+- `module-path-name` represents the module’s hierarchy path, flattened with `-`.  
+- Each file documents the specification for a single module.
+
+Make sure that the `module_name` is the top most common just after `src/`
+
+For example `src/module_01/sub_mod/some_file.rs` the spec module name will be `dev/spec/spec-module_01.md`
+
+(module_name is lowercase)
+
+## Required Structure for Module Specification Files
+
+Each `spec-module-path-name.md` file must include the following sections.
+
+<module_spec_template>
+
+## module-path-name
+
+### Goal
+
+A clear description of the module’s purpose and responsibilities.
+
+### Public Module API
+
+A description of the APIs exposed by the module.  
+- Define what is exported and how it can be consumed by other modules.  
+- Include function signatures, data structures, or endpoints as needed.
+
+### Module Parts
+
+A breakdown of the module’s internal components.  
+- May reference sub-files or sub-modules.  
+- Should explain how the parts work together.
+
+### Key Design Considerations
+
+Key design considerations of this module and of its key parts. 
+
+
+
+</module_spec_template>
diff --git a/dev/spec/spec-adapter.md b/dev/spec/spec-adapter.md
new file mode 100644
index 00000000..17e24158
--- /dev/null
+++ b/dev/spec/spec-adapter.md
@@ -0,0 +1,33 @@
+## adapter
+
+### Goal
+
+The `adapter` module is responsible for abstracting the communication with various Generative AI providers (e.g., OpenAI, Gemini, Anthropic, Groq, DeepSeek). It translates generic GenAI requests (like `ChatRequest` and `EmbedRequest`) into provider-specific HTTP request data and converts provider-specific web responses back into generic GenAI response structures. It acts as the translation and dispatch layer between the client logic and the underlying web communication.
+
+### Public Module API
+
+The primary public API exposed by the `adapter` module is:
+
+- `AdapterKind`: An enum identifying the AI provider or protocol type (e.g., `OpenAI`, `Gemini`, `Anthropic`, `Cohere`). This type is used by the client and resolver layers to determine which adapter implementation should handle a specific model request.
+
+### Module Parts
+
+- `adapter_kind.rs`: Defines the `AdapterKind` enum. It includes implementation details for serialization, environment variable name resolution, and a default static mapping logic (`from_model`) to associate model names with a specific `AdapterKind`.
+
+- `adapter_types.rs`: Defines the `Adapter` trait, which sets the contract for all concrete adapter implementations. It also defines common types like `ServiceType` (Chat, ChatStream, Embed) and `WebRequestData` (the normalized structure holding URL, headers, and payload before web execution).
+
+- `dispatcher.rs`: Contains the `AdapterDispatcher` struct, which acts as the central routing mechanism. It dispatches calls from the client layer to the correct concrete adapter implementation based on the resolved `AdapterKind`.
+
+- `inter_stream.rs`: Defines internal types (`InterStreamEvent`, `InterStreamEnd`) used by streaming adapters to standardize the output format from diverse provider streaming protocols. This intermediary layer handles complex stream features like capturing usage, reasoning content, and tool calls before conversion to public `ChatStreamResponse` events.
+
+- `adapters/`: This submodule contains the concrete implementation of the `Adapter` trait for each provider (e.g., `openai`, `gemini`, `anthropic`, `zai`). These submodules handle the specific request/response translation logic for their respective protocols.
+
+### Key Design Considerations
+
+- **Stateless and Static Dispatch:** Adapters are designed to be stateless, with all methods in the `Adapter` trait being associated functions (static). Requests are routed efficiently using static dispatch through the `AdapterDispatcher`, minimizing runtime overhead and simplifying dependency management.
+
+- **Request/Response Normalization:** The adapter layer ensures that incoming requests and outgoing responses conform to generic GenAI types, hiding provider-specific implementation details from the rest of the library.
+
+- **Dynamic Resolution:** While `AdapterKind::from_model` provides a default mapping from model names (based on common prefixes or keywords), the system allows this to be overridden by custom `ServiceTargetResolver` configurations, enabling flexible routing (e.g., mapping a custom model name to an `OpenAI` adapter with a custom endpoint).
+
+- **Stream Intermediation:** The introduction of `InterStreamEvent` is crucial for handling the variance in streaming protocols across providers. it ensures that complex data transmitted at the end of a stream (like final usage statistics or aggregated tool calls) can be correctly collected and normalized, regardless of the provider's specific event format.
diff --git a/dev/spec/spec-chat.md b/dev/spec/spec-chat.md
new file mode 100644
index 00000000..52c1be62
--- /dev/null
+++ b/dev/spec/spec-chat.md
@@ -0,0 +1,66 @@
+## chat
+
+### Goal
+
+The `chat` module provides the core primitives for constructing chat requests, defining messages (including multi-part content like text, binary, and tool data), and handling synchronous and asynchronous (streaming) chat responses across all supported AI providers. It standardizes the data structures necessary for modern LLM interactions.
+
+### Public Module API
+
+The module exports the following key data structures:
+
+- **Request/Message Structure:**
+  - `ChatRequest`: The primary structure for initiating a chat completion call, containing the history (`messages`), an optional system prompt (`system`), and tool definitions (`tools`).
+  - `ChatMessage`: Represents a single interaction turn, comprising a `ChatRole`, `MessageContent`, and optional `MessageOptions`.
+  - `ChatRole`: Enum defining message roles (`System`, `User`, `Assistant`, `Tool`).
+  - `MessageContent`: A unified container for multi-part content, wrapping a list of `ContentPart`s.
+  - `ContentPart`: Enum defining content types: `Text`, `Binary`, `ToolCall`, `ToolResponse`.
+  - `Binary`, `BinarySource`: Structures defining binary payloads (e.g., images), sourced via base64 or URL.
+  - `MessageOptions`, `CacheControl`: Per-message configuration hints (e.g., for cache behavior).
+
+- **Configuration:**
+  - `ChatOptions`: General request configuration, including sampling parameters (`temperature`, `max_tokens`, `top_p`, `seed`), streaming capture flags, and format control.
+  - `ReasoningEffort`, `Verbosity`: Provider-specific hints for reasoning intensity or output verbosity.
+  - `ChatResponseFormat`, `JsonSpec`: Defines desired structured output formats (e.g., JSON mode).
+
+- **Responses:**
+  - `ChatResponse`: The result of a non-streaming request, including final content, usage, and model identifiers.
+  - `ChatStreamResponse`: The result wrapper for streaming requests, containing the `ChatStream` and model identity.
+
+- **Streaming:**
+  - `ChatStream`: A `futures::Stream` implementation yielding `ChatStreamEvent`s.
+  - `ChatStreamEvent`: Enum defining streaming events: `Start`, `Chunk` (content), `ReasoningChunk`, `ToolCallChunk`, and `End`.
+  - `StreamEnd`: Terminal event data including optional captured usage, content, and reasoning content.
+
+- **Tooling:**
+  - `Tool`: Metadata and schema defining a function the model can call.
+  - `ToolCall`: The model's invocation request for a specific tool.
+  - `ToolResponse`: The output returned from executing a tool, matched by call ID.
+
+- **Metadata:**
+  - `Usage`, `PromptTokensDetails`, `CompletionTokensDetails`: Normalized token usage statistics.
+
+- **Utilities:**
+  - `printer` module: Contains `print_chat_stream` for console output utilities.
+
+### Module Parts
+
+The functionality is divided into specialized files/sub-modules:
+
+- `chat_message.rs`: Defines the `ChatMessage` fundamental structure and associated types (`ChatRole`, `MessageOptions`).
+- `chat_options.rs`: Manages request configuration (`ChatOptions`) and provides parsing logic for provider-specific hints like `ReasoningEffort` and `Verbosity`.
+- `chat_req_response_format.rs`: Handles configuration for structured output (`ChatResponseFormat`, `JsonSpec`).
+- `chat_request.rs`: Defines the top-level `ChatRequest` and methods for managing the request history and properties.
+- `chat_response.rs`: Defines synchronous chat response structures (`ChatResponse`).
+- `chat_stream.rs`: Implements the public `ChatStream` and its events, mapping from the internal adapter stream.
+- `content_part.rs`: Defines `ContentPart`, `Binary`, and `BinarySource` for handling multi-modal inputs/outputs.
+- `message_content.rs`: Defines `MessageContent`, focusing on collection management and convenient accessors for content parts (e.g., joining all text).
+- `tool/mod.rs` (and associated files): Defines the tooling primitives (`Tool`, `ToolCall`, `ToolResponse`).
+- `usage.rs`: Defines the normalized token counting structures (`Usage`).
+- `printer.rs`: Provides utility functions for rendering stream events to standard output.
+
+### Key Design Considerations
+
+- **Unified Content Model:** The use of `MessageContent` composed of `ContentPart` allows any message role (user, assistant, tool) to handle complex, multi-part data seamlessly, including text, binary payloads, and tooling actions.
+- **Decoupled Streaming:** The public `ChatStream` is an abstraction layer over an internal stream (`InterStream`), ensuring a consistent external interface regardless of adapter implementation details (like internal handling of usage reporting or reasoning chunks).
+- **Normalized Usage Metrics:** The `Usage` structure provides an OpenAI-compatible interface while allowing for provider-specific breakdowns (e.g., caching or reasoning tokens) via detailed sub-structures.
+- **Hierarchical Options:** `ChatOptions` can be applied globally at the client level or specifically per request. The internal resolution logic ensures request-specific options take precedence over client defaults.
diff --git a/dev/spec/spec-client.md b/dev/spec/spec-client.md
new file mode 100644
index 00000000..cedd505d
--- /dev/null
+++ b/dev/spec/spec-client.md
@@ -0,0 +1,59 @@
+## client
+
+### Goal
+
+The `client` module provides the core entry point (`Client`) for interacting with various Generative AI providers. It encapsulates configuration (`ClientConfig`, `WebConfig`), a builder pattern (`ClientBuilder`), request execution (`exec_chat`, `exec_embed`), and service resolution logic (e.g., determining endpoints and authentication).
+
+### Public Module API
+
+The `client` module exposes the following public types:
+
+- **`Client`**: The main interface for executing AI requests (chat, embedding, streaming, model listing).
+  - `Client::builder()`: Starts the configuration process.
+  - `Client::default()`: Creates a client with default configuration.
+  - Core execution methods: `exec_chat`, `exec_chat_stream`, `exec_embed`, `embed`, `embed_batch`.
+  - Resolution/Discovery methods: `all_model_names`, `resolve_service_target`.
+
+- **`ClientBuilder`**: Provides a fluent interface for constructing a `Client`. Used to set `ClientConfig`, default `ChatOptions`, `EmbedOptions`, and custom resolvers (`AuthResolver`, `ServiceTargetResolver`, `ModelMapper`).
+
+- **`ClientConfig`**: Holds the resolved and default configurations used by the `Client`, including resolver functions and default options.
+
+- **`Headers`**: A simple map wrapper (`HashMap<String, String>`) for managing HTTP headers in requests.
+
+- **`ServiceTarget`**: A struct containing the final resolved components needed to execute a request: `Endpoint`, `AuthData`, and `ModelIden`.
+
+- **`WebConfig`**: Configuration options specifically for building the underlying `reqwest::Client` (e.g., timeouts, proxies, default headers).
+
+### Module Parts
+
+The module is composed of several files that implement the layered client architecture:
+
+- `builder.rs`: Implements `ClientBuilder`, handling the creation and configuration flow. It initializes or updates the nested `ClientConfig` and optionally an internal `WebClient`.
+
+- `client_types.rs`: Defines the main `Client` struct and `ClientInner` (which holds `WebClient` and `ClientConfig` behind an `Arc`).
+
+- `config.rs`: Defines `ClientConfig` and the core `resolve_service_target` logic, which orchestrates calls to `ModelMapper`, `AuthResolver`, and `ServiceTargetResolver` before falling back to adapter defaults.
+
+- `client_impl.rs`: Contains the main implementation of the public API methods on `Client`, such as `exec_chat` and `exec_embed`. These methods perform service resolution and delegate to `AdapterDispatcher` for request creation and response parsing.
+
+- `headers.rs`: Implements the `Headers` utility for managing key-value HTTP header maps.
+
+- `service_target.rs`: Defines the `ServiceTarget` structure for resolved endpoints, authentication, and model identifiers.
+
+- `web_config.rs`: Defines `WebConfig` and its logic for applying settings to a `reqwest::ClientBuilder`.
+
+### Key Design Considerations
+
+- **Client Immutability and Sharing**: The `Client` holds its internal state (`ClientInner` with `WebClient` and `ClientConfig`) wrapped in an `Arc`. This design ensures that the client is thread-safe and cheaply cloneable, aligning with common client patterns in asynchronous Rust applications.
+
+- **Config Layering and Resolution**: The client architecture employs a sophisticated resolution process managed by `ClientConfig::resolve_service_target`.
+  - It first applies a `ModelMapper` to potentially translate the input model identifier.
+  - It then consults the `AuthResolver` for authentication data. If the resolver is absent or returns `None`, it defaults to the adapter's standard authentication mechanism (e.g., API key headers).
+  - It determines the adapter's default endpoint.
+  - Finally, it applies the optional `ServiceTargetResolver`, allowing users to override the endpoint, auth, or model for complex scenarios (e.g., custom proxies or routing).
+
+- **WebClient Abstraction**: The core HTTP client logic is delegated to the `WebClient` (from the `webc` module), which handles low-level request execution and streaming setup. This separation keeps the `client` module focused on business logic and AI provider orchestration.
+
+- **Builder Pattern for Configuration**: `ClientBuilder` enforces configuration before client creation, simplifying object construction and ensuring necessary dependencies are set up correctly.
+
+- **Headers Simplification**: The `Headers` struct abstracts HTTP header management, ensuring that subsequent merges or overrides result in a single, final header value, which is typical for API key authorization overrides.
diff --git a/dev/spec/spec-common.md b/dev/spec/spec-common.md
new file mode 100644
index 00000000..b2d13024
--- /dev/null
+++ b/dev/spec/spec-common.md
@@ -0,0 +1,36 @@
+## common
+
+### Goal
+
+The `common` module provides fundamental data structures used throughout the `genai` library, primarily focusing on identifying models and adapters in a clear and efficient manner.
+
+### Public Module API
+
+The module exposes two main types: `ModelName` and `ModelIden`.
+
+- `ModelName`: Represents a generative AI model identifier (e.g., `"gpt-4o"`, `"claude-3-opus"`).
+  - It wraps an `Arc<str>` for efficient cloning and sharing across threads.
+  - Implements `From<String>`, `From<&String>`, `From<&str>`, and `Deref<Target = str>`.
+  - Supports equality comparison (`PartialEq`) with various string types (`&str`, `String`).
+
+- `ModelIden`: Uniquely identifies a model by coupling an `AdapterKind` with a `ModelName`.
+  - Fields:
+    - `adapter_kind: AdapterKind`
+    - `model_name: ModelName`
+  - Constructor: `fn new(adapter_kind: AdapterKind, model_name: impl Into<ModelName>) -> Self`
+  - Utility methods for creating new identifiers based on name changes:
+    - `fn from_name<T>(&self, new_name: T) -> ModelIden`
+    - `fn from_optional_name(&self, new_name: Option<String>) -> ModelIden`
+
+### Module Parts
+
+The `common` module consists of:
+
+- `model_name.rs`: Defines the `ModelName` type and related string manipulation utilities, including parsing optional namespaces (e.g., `namespace::model_name`).
+- `model_iden.rs`: Defines the `ModelIden` type, which associates a `ModelName` with an `AdapterKind`.
+
+### Key Design Considerations
+
+- **Efficiency of ModelName:** `ModelName` uses `Arc<str>` to ensure that cloning the model identifier is cheap, which is crucial as model identifiers are frequently passed around in request and response structures.
+- **Deref Implementation:** Implementing `Deref<Target = str>` for `ModelName` allows it to be used naturally as a string reference.
+- **ModelIden Immutability:** `ModelIden` is designed to be immutable and fully identifiable, combining the model string identity (`ModelName`) with the service provider identity (`AdapterKind`).
diff --git a/dev/spec/spec-webc.md b/dev/spec/spec-webc.md
new file mode 100644
index 00000000..a4bd3232
--- /dev/null
+++ b/dev/spec/spec-webc.md
@@ -0,0 +1,36 @@
+## webc
+
+### Goal
+
+The `webc` module provides a low-level, internal web client layer utilizing `reqwest`. Its primary role is to abstract standard HTTP requests (GET/POST) and manage complex streaming responses required by various AI providers, especially those that do not fully conform to the Server-Sent Events (SSE) standard (`text/event-stream`). It handles standard JSON requests/responses and custom stream parsing.
+
+### Public Module API
+
+The `webc` module is primarily an internal component, only exposing its dedicated error type publicly.
+
+- `pub use error::Error;`
+    - `Error`: An enum representing all possible errors originating from the web communication layer (e.g., failed status codes, JSON parsing errors, reqwest errors, stream clone errors).
+
+(All other types like `WebClient`, `WebResponse`, `WebStream`, and `Result` are exported as `pub(crate)` for internal library use.)
+
+### Module Parts
+
+The module consists of three main internal components:
+
+- `error.rs`: Defines the `Error` enum and the module-scoped `Result<T>` type alias. It captures network/HTTP related failures and external errors like `reqwest::Error` and `value_ext::JsonValueExtError`.
+
+- `web_client.rs`: Contains the `WebClient` struct, a thin wrapper around `reqwest::Client`. It provides methods (`do_get`, `do_post`) for non-streaming standard HTTP communication, which assumes the response body is JSON and is parsed into `serde_json::Value`. It also defines `WebResponse`, which encapsulates the HTTP status and parsed JSON body.
+
+- `web_stream.rs`: Implements `WebStream`, a custom `futures::Stream` implementation designed for handling non-SSE streaming protocols used by some AI providers (e.g., Cohere, Gemini). It defines `StreamMode` to specify how stream chunks should be parsed (either by a fixed delimiter or specialized handling for "Pretty JSON Array" formats).
+
+### Key Design Considerations
+
+- **Internal Focus:** The module is designed strictly for internal use (`pub(crate)`) except for the public error type. This shields the rest of the library from direct `reqwest` dependency details.
+
+- **Custom Streaming:** `WebStream` exists specifically to manage streaming protocols that deviate from the standard SSE format, providing message splitting based on `StreamMode`. This ensures compatibility with providers like Cohere (delimiter-based) and Gemini (JSON array chunking).
+
+- **Generic JSON Response Handling:** `WebResponse` abstracts successful non-streaming responses by immediately parsing the body into `serde_json::Value`. This allows adapter modules to deserialize into their specific structures subsequently.
+
+- **Error Richness:** The `Error::ResponseFailedStatus` variant includes the `StatusCode`, full `body`, and `HeaderMap` to provide comprehensive debugging information upon API failure.
+
+- **Async Implementation:** All network operations rely on `tokio` and `reqwest`, ensuring non-blocking execution throughout the I/O layer. `WebStream` leverages `futures::Stream` traits for integration with standard Rust async infrastructure.

From 28011fdcdc241ffae55c5bfc06f3e511a22af79c Mon Sep 17 00:00:00 2001
From: Jeremy Chone <jeremy.chone@gmail.com>
Date: Sat, 25 Oct 2025 10:41:03 -0700
Subject: [PATCH 08/11] . cargo fmt

---
 examples/c07-zai.rs             | 93 ++++++++++++++++-----------------
 src/adapter/adapter_kind.rs     |  4 +-
 src/adapter/adapters/zai/mod.rs |  2 +-
 src/adapter/dispatcher.rs       |  2 +-
 4 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/examples/c07-zai.rs b/examples/c07-zai.rs
index 5ee13223..e8f7b13b 100644
--- a/examples/c07-zai.rs
+++ b/examples/c07-zai.rs
@@ -1,56 +1,55 @@
 //! ZAI (Zhipu AI) adapter example
-//! 
+//!
 //! Demonstrates how to use ZAI models with automatic endpoint routing:
 //! - `glm-4.6` → Regular credit-based API
 //! - `zai::glm-4.6` → Coding subscription API (automatically routed)
 
-use genai::chat::{ChatMessage, ChatRequest};
 use genai::Client;
+use genai::chat::{ChatMessage, ChatRequest};
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let client = Client::builder().build();
-
-    // Test cases demonstrating automatic endpoint routing
-    let test_cases = vec![
-        ("glm-4.6", "Regular ZAI model"),
-        ("zai::glm-4.6", "Coding subscription model"),
-    ];
-
-    for (model_name, description) in test_cases {
-        println!("\n=== {} ===", description);
-        println!("Model: {}", model_name);
-
-        let chat_req = ChatRequest::default()
-            .with_system("You are a helpful assistant.")
-            .append_message(ChatMessage::user("Say 'hello' and nothing else."));
-
-        match client.exec_chat(model_name, chat_req, None).await {
-            Ok(response) => {
-                println!("✅ Success!");
-                if let Some(content) = response.first_text() {
-                    println!("Response: {}", content);
-                }
-                if response.usage.prompt_tokens.is_some() || response.usage.completion_tokens.is_some() {
-                    println!("Usage: prompt={}, output={}", 
-                        response.usage.prompt_tokens.unwrap_or(0), 
-                        response.usage.completion_tokens.unwrap_or(0));
-                }
-            }
-            Err(e) => {
-                println!("❌ Error: {}", e);
-                if e.to_string().contains("insufficient balance") {
-                    println!("ℹ️  This model requires credits or subscription");
-                } else if e.to_string().contains("401") {
-                    println!("ℹ️  Set ZAI_API_KEY environment variable");
-                }
-            }
-        }
-    }
-
-    println!("\n=== SUMMARY ===");
-    println!("✅ ZAI adapter handles namespace routing automatically");
-    println!("✅ Use ZAI_API_KEY environment variable");
-
-    Ok(())
-}
\ No newline at end of file
+	let client = Client::builder().build();
+
+	// Test cases demonstrating automatic endpoint routing
+	let test_cases = vec![("glm-4.6", "Regular ZAI model"), ("zai::glm-4.6", "Coding subscription model")];
+
+	for (model_name, description) in test_cases {
+		println!("\n=== {} ===", description);
+		println!("Model: {}", model_name);
+
+		let chat_req = ChatRequest::default()
+			.with_system("You are a helpful assistant.")
+			.append_message(ChatMessage::user("Say 'hello' and nothing else."));
+
+		match client.exec_chat(model_name, chat_req, None).await {
+			Ok(response) => {
+				println!("✅ Success!");
+				if let Some(content) = response.first_text() {
+					println!("Response: {}", content);
+				}
+				if response.usage.prompt_tokens.is_some() || response.usage.completion_tokens.is_some() {
+					println!(
+						"Usage: prompt={}, output={}",
+						response.usage.prompt_tokens.unwrap_or(0),
+						response.usage.completion_tokens.unwrap_or(0)
+					);
+				}
+			}
+			Err(e) => {
+				println!("❌ Error: {}", e);
+				if e.to_string().contains("insufficient balance") {
+					println!("ℹ️  This model requires credits or subscription");
+				} else if e.to_string().contains("401") {
+					println!("ℹ️  Set ZAI_API_KEY environment variable");
+				}
+			}
+		}
+	}
+
+	println!("\n=== SUMMARY ===");
+	println!("✅ ZAI adapter handles namespace routing automatically");
+	println!("✅ Use ZAI_API_KEY environment variable");
+
+	Ok(())
+}
diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs
index 03fb87f5..9e52d50a 100644
--- a/src/adapter/adapter_kind.rs
+++ b/src/adapter/adapter_kind.rs
@@ -1,4 +1,5 @@
 use crate::adapter::adapters::together::TogetherAdapter;
+use crate::adapter::adapters::zai::ZaiAdapter;
 use crate::adapter::anthropic::AnthropicAdapter;
 use crate::adapter::cohere::CohereAdapter;
 use crate::adapter::deepseek::{self, DeepSeekAdapter};
@@ -8,7 +9,6 @@ use crate::adapter::groq::{self, GroqAdapter};
 use crate::adapter::nebius::NebiusAdapter;
 use crate::adapter::openai::OpenAIAdapter;
 use crate::adapter::xai::XaiAdapter;
-use crate::adapter::adapters::zai::ZaiAdapter;
 use crate::{ModelName, Result};
 use derive_more::Display;
 use serde::{Deserialize, Serialize};
@@ -162,7 +162,7 @@ impl AdapterKind {
 			if ns == "zai" {
 				return Ok(AdapterKind::Zai);
 			}
-			
+
 			if let Some(adapter) = Self::from_lower_str(ns) {
 				return Ok(adapter);
 			} else {
diff --git a/src/adapter/adapters/zai/mod.rs b/src/adapter/adapters/zai/mod.rs
index 3eaf9b80..a7d774ff 100644
--- a/src/adapter/adapters/zai/mod.rs
+++ b/src/adapter/adapters/zai/mod.rs
@@ -58,4 +58,4 @@ mod adapter_impl;
 
 pub use adapter_impl::*;
 
-// endregion: --- Modules
\ No newline at end of file
+// endregion: --- Modules
diff --git a/src/adapter/dispatcher.rs b/src/adapter/dispatcher.rs
index f0909dc9..f2fd064f 100644
--- a/src/adapter/dispatcher.rs
+++ b/src/adapter/dispatcher.rs
@@ -1,5 +1,6 @@
 use super::groq::GroqAdapter;
 use crate::adapter::adapters::together::TogetherAdapter;
+use crate::adapter::adapters::zai::ZaiAdapter;
 use crate::adapter::anthropic::AnthropicAdapter;
 use crate::adapter::cohere::CohereAdapter;
 use crate::adapter::deepseek::DeepSeekAdapter;
@@ -10,7 +11,6 @@ use crate::adapter::ollama::OllamaAdapter;
 use crate::adapter::openai::OpenAIAdapter;
 use crate::adapter::openai_resp::OpenAIRespAdapter;
 use crate::adapter::xai::XaiAdapter;
-use crate::adapter::adapters::zai::ZaiAdapter;
 use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData};
 use crate::chat::{ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse};
 use crate::embed::{EmbedOptionsSet, EmbedRequest, EmbedResponse};

From 1181667fc6c81223ad6a20131206272b7568fa7f Mon Sep 17 00:00:00 2001
From: Jeremy Chone <jeremy.chone@gmail.com>
Date: Sat, 25 Oct 2025 10:41:16 -0700
Subject: [PATCH 09/11] . update to v0.4.3

---
 CHANGELOG.md | 7 +++++++
 Cargo.toml   | 2 +-
 README.md    | 4 +++-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 910efe74..0ae27500 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,13 @@
 `.` minor | `-` Fix | `+` Addition | `^` improvement | `!` Change | `*` Refactor
 
 
+## 2025-10-25 - [v0.4.3](https://github.com/jeremychone/rust-genai/compare/v0.4.2...v0.4.3)
+
+- `!` Refactor ZHIPU adapter to ZAI with namespace-based endpoint routing (#95)
+- `-` openai - stream tool - Fix streaming too issue (#91)
+- `.` added ModelName partial eq implementations for string types (#94)
+- `.` anthropic - update model name for haiku 4.5
+
 ## 2025-10-12 - [v0.4.2](https://github.com/jeremychone/rust-genai/compare/v0.4.1...v0.4.2)
 
 - `.` test - make the common_test_chat_stop_sequences_ok more resilient
diff --git a/Cargo.toml b/Cargo.toml
index 4c1ff3f6..fe5de75e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "genai"
-version = "0.4.3-wip"
+version = "0.4.3"
 edition = "2024"
 license = "MIT OR Apache-2.0"
 description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)"
diff --git a/README.md b/README.md
index 2581d6a9..6cff5844 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ Provides a single, ergonomic API to many generative AI providers, such as Anthro
 
 **NOTE:** Big update with **v0.4.x** - More adapters, PDF and image support, embeddings, custom headers, and transparent support for the OpenAI Responses API (gpt-5-codex)
 
-## v0.4.0 Big Release
+## v0.4.x Big Release
 
 - **What's new**:
     - **PDF and Images** support (thanks to [Andrew Rademacher](https://github.com/AndrewRademacher))
@@ -39,6 +39,8 @@ See:
 
 ## Big Thanks to
 
+- [Bart Carroll](https://github.com/bartCarroll) For [#91](https://github.com/jeremychone/rust-genai/pull/91) Fixed streaming tool calls for openai models
+- [Rui Andrada](https://github.com/shingonoide) For [#95](https://github.com/jeremychone/rust-genai/pull/95) refactoring ZHIPU adapter to ZAI
 - [Adrien](https://github.com/XciD) Extra headers in requests, seed for chat requests, and fixes (with [Julien Chaumond](https://github.com/julien-c) for extra headers)
 - [Andrew Rademacher](https://github.com/AndrewRademacher) for PDF support, Anthropic streamer, and insight on flattening the message content (e.g., ContentParts)
 - [Jesus Santander](https://github.com/jsantanders) Embedding support [PR #83](https://github.com/jeremychone/rust-genai/pull/83)

From 7b30e42d4513da16cb08715001527591ff658f10 Mon Sep 17 00:00:00 2001
From: Jeremy Chone <jeremy.chone@gmail.com>
Date: Sat, 25 Oct 2025 10:42:57 -0700
Subject: [PATCH 10/11] . v0.4.4-WIP

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index fe5de75e..6e04e7f1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "genai"
-version = "0.4.3"
+version = "0.4.4-WIP"
 edition = "2024"
 license = "MIT OR Apache-2.0"
 description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)"

From 0f8839adb4666a35c102e507d544c514a928800d Mon Sep 17 00:00:00 2001
From: AlexMikhalev <alex@metacortex.engineer>
Date: Mon, 3 Nov 2025 13:40:08 +0000
Subject: [PATCH 11/11] fix: Clean up adapter imports and remove duplicate ZAi
 references
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fixed import paths for adapters after module reorganization
- Removed remaining duplicate ZAi adapter references
- Added missing openrouter module export
- Ensured consistent adapter import structure

🤖 Generated with [terraphim.ai](https://terraphim.ai)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/adapter/adapter_kind.rs |  4 ++--
 src/adapter/adapters/mod.rs |  1 +
 src/adapter/dispatcher.rs   | 16 +++-------------
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs
index c439446f..7c2c882e 100644
--- a/src/adapter/adapter_kind.rs
+++ b/src/adapter/adapter_kind.rs
@@ -1,5 +1,3 @@
-use crate::adapter::adapters::together::TogetherAdapter;
-use crate::adapter::adapters::zai::{self, ZaiAdapter};
 use crate::adapter::anthropic::AnthropicAdapter;
 use crate::adapter::cerebras::CerebrasAdapter;
 use crate::adapter::cohere::CohereAdapter;
@@ -10,7 +8,9 @@ use crate::adapter::groq::{self, GroqAdapter};
 use crate::adapter::nebius::NebiusAdapter;
 use crate::adapter::openai::OpenAIAdapter;
 use crate::adapter::openrouter::OpenRouterAdapter;
+use crate::adapter::together::TogetherAdapter;
 use crate::adapter::xai::XaiAdapter;
+use crate::adapter::zai::{self, ZaiAdapter};
 use crate::{ModelName, Result};
 use derive_more::Display;
 use serde::{Deserialize, Serialize};
diff --git a/src/adapter/adapters/mod.rs b/src/adapter/adapters/mod.rs
index 21a059c7..bc32c2d4 100644
--- a/src/adapter/adapters/mod.rs
+++ b/src/adapter/adapters/mod.rs
@@ -11,6 +11,7 @@ pub(super) mod nebius;
 pub(super) mod ollama;
 pub(super) mod openai;
 pub(super) mod openai_resp;
+pub(super) mod openrouter;
 pub(super) mod together;
 pub(super) mod xai;
 pub(super) mod zai;
diff --git a/src/adapter/dispatcher.rs b/src/adapter/dispatcher.rs
index d08c2d0e..d9f9194a 100644
--- a/src/adapter/dispatcher.rs
+++ b/src/adapter/dispatcher.rs
@@ -1,19 +1,18 @@
-use super::groq::GroqAdapter;
-use crate::adapter::adapters::together::TogetherAdapter;
-use crate::adapter::adapters::zai::ZaiAdapter;
 use crate::adapter::anthropic::AnthropicAdapter;
 use crate::adapter::cerebras::CerebrasAdapter;
 use crate::adapter::cohere::CohereAdapter;
 use crate::adapter::deepseek::DeepSeekAdapter;
 use crate::adapter::fireworks::FireworksAdapter;
 use crate::adapter::gemini::GeminiAdapter;
+use crate::adapter::groq::GroqAdapter;
 use crate::adapter::nebius::NebiusAdapter;
 use crate::adapter::ollama::OllamaAdapter;
 use crate::adapter::openai::OpenAIAdapter;
 use crate::adapter::openai_resp::OpenAIRespAdapter;
 use crate::adapter::openrouter::OpenRouterAdapter;
-
+use crate::adapter::together::TogetherAdapter;
 use crate::adapter::xai::XaiAdapter;
+use crate::adapter::zai::ZaiAdapter;
 use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData};
 use crate::chat::{ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse};
 use crate::embed::{EmbedOptionsSet, EmbedRequest, EmbedResponse};
@@ -47,7 +46,6 @@ impl AdapterDispatcher {
 			AdapterKind::Cohere => CohereAdapter::default_endpoint(),
 			AdapterKind::Ollama => OllamaAdapter::default_endpoint(),
 			AdapterKind::Cerebras => CerebrasAdapter::default_endpoint(),
-			AdapterKind::ZAi => ZAiAdapter::default_endpoint(),
 			AdapterKind::OpenRouter => Endpoint::from_static("https://openrouter.ai/api/v1/"),
 		}
 	}
@@ -68,7 +66,6 @@ impl AdapterDispatcher {
 			AdapterKind::Cohere => CohereAdapter::default_auth(),
 			AdapterKind::Ollama => OllamaAdapter::default_auth(),
 			AdapterKind::Cerebras => CerebrasAdapter::default_auth(),
-			AdapterKind::ZAi => ZAiAdapter::default_auth(),
 			AdapterKind::OpenRouter => AuthData::from_env(OpenRouterAdapter::API_KEY_DEFAULT_ENV_NAME),
 		}
 	}
@@ -89,7 +86,6 @@ impl AdapterDispatcher {
 			AdapterKind::Cohere => CohereAdapter::all_model_names(kind).await,
 			AdapterKind::Ollama => OllamaAdapter::all_model_names(kind).await,
 			AdapterKind::Cerebras => CerebrasAdapter::all_model_names(kind).await,
-			AdapterKind::ZAi => ZAiAdapter::all_model_names(kind).await,
 			AdapterKind::OpenRouter => OpenRouterAdapter::all_model_names(kind).await,
 		}
 	}
@@ -110,7 +106,6 @@ impl AdapterDispatcher {
 			AdapterKind::Cohere => CohereAdapter::get_service_url(model, service_type, endpoint),
 			AdapterKind::Ollama => OllamaAdapter::get_service_url(model, service_type, endpoint),
 			AdapterKind::Cerebras => CerebrasAdapter::get_service_url(model, service_type, endpoint),
-			AdapterKind::ZAi => ZAiAdapter::get_service_url(model, service_type, endpoint),
 			AdapterKind::OpenRouter => OpenRouterAdapter::get_service_url(model, service_type, endpoint),
 		}
 	}
@@ -143,7 +138,6 @@ impl AdapterDispatcher {
 			AdapterKind::Cohere => CohereAdapter::to_web_request_data(target, service_type, chat_req, options_set),
 			AdapterKind::Ollama => OllamaAdapter::to_web_request_data(target, service_type, chat_req, options_set),
 			AdapterKind::Cerebras => CerebrasAdapter::to_web_request_data(target, service_type, chat_req, options_set),
-			AdapterKind::ZAi => ZAiAdapter::to_web_request_data(target, service_type, chat_req, options_set),
 			AdapterKind::OpenRouter => {
 				OpenRouterAdapter::to_web_request_data(target, service_type, chat_req, options_set)
 			}
@@ -170,7 +164,6 @@ impl AdapterDispatcher {
 			AdapterKind::Cohere => CohereAdapter::to_chat_response(model_iden, web_response, options_set),
 			AdapterKind::Ollama => OllamaAdapter::to_chat_response(model_iden, web_response, options_set),
 			AdapterKind::Cerebras => CerebrasAdapter::to_chat_response(model_iden, web_response, options_set),
-			AdapterKind::ZAi => ZAiAdapter::to_chat_response(model_iden, web_response, options_set),
 			AdapterKind::OpenRouter => OpenRouterAdapter::to_chat_response(model_iden, web_response, options_set),
 		}
 	}
@@ -198,7 +191,6 @@ impl AdapterDispatcher {
 			AdapterKind::Cohere => CohereAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
 			AdapterKind::Ollama => OllamaAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
 			AdapterKind::Cerebras => CerebrasAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
-			AdapterKind::ZAi => ZAiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
 			AdapterKind::OpenRouter => OpenRouterAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
 		}
 	}
@@ -227,7 +219,6 @@ impl AdapterDispatcher {
 			AdapterKind::Cohere => CohereAdapter::to_embed_request_data(target, embed_req, options_set),
 			AdapterKind::Ollama => OllamaAdapter::to_embed_request_data(target, embed_req, options_set),
 			AdapterKind::Cerebras => CerebrasAdapter::to_embed_request_data(target, embed_req, options_set),
-			AdapterKind::ZAi => ZAiAdapter::to_embed_request_data(target, embed_req, options_set),
 			AdapterKind::OpenRouter => OpenRouterAdapter::to_embed_request_data(target, embed_req, options_set),
 		}
 	}
@@ -255,7 +246,6 @@ impl AdapterDispatcher {
 			AdapterKind::Cohere => CohereAdapter::to_embed_response(model_iden, web_response, options_set),
 			AdapterKind::Ollama => OllamaAdapter::to_embed_response(model_iden, web_response, options_set),
 			AdapterKind::Cerebras => CerebrasAdapter::to_embed_response(model_iden, web_response, options_set),
-			AdapterKind::ZAi => ZAiAdapter::to_embed_response(model_iden, web_response, options_set),
 			AdapterKind::OpenRouter => OpenRouterAdapter::to_embed_response(model_iden, web_response, options_set),
 		}
 	}