diff --git a/.gitignore b/.gitignore
index 8d6ad4fd..4ade7c75 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
_*
# '_' in src dir, ok.
!**/src/**/_*
+!**/spec/**/_*
*.lock
*.lockb
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 910efe74..0ae27500 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,13 @@
`.` minor | `-` Fix | `+` Addition | `^` improvement | `!` Change | `*` Refactor
+## 2025-10-25 - [v0.4.3](https://github.com/jeremychone/rust-genai/compare/v0.4.2...v0.4.3)
+
+- `!` Refactor ZHIPU adapter to ZAI with namespace-based endpoint routing (#95)
+- `-` openai - stream tool - Fix streaming too issue (#91)
+- `.` added ModelName partial eq implementations for string types (#94)
+- `.` anthropic - update model name for haiku 4.5
+
## 2025-10-12 - [v0.4.2](https://github.com/jeremychone/rust-genai/compare/v0.4.1...v0.4.2)
- `.` test - make the common_test_chat_stop_sequences_ok more resilient
diff --git a/Cargo.toml b/Cargo.toml
index 7049a622..167e4172 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "genai"
-version = "0.4.3-wip"
+version = "0.4.4-WIP"
edition = "2024"
license = "MIT OR Apache-2.0"
description = "Multi-AI Providers Library for Rust. (OpenAI, Gemini, Anthropic, xAI, Ollama, Groq, DeepSeek, Grok)"
diff --git a/README.md b/README.md
index 7fbcca65..7e6def18 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ Provides a single, ergonomic API to many generative AI providers, such as Anthro
**NOTE:** Big update with **v0.4.x** - More adapters, PDF and image support, embeddings, custom headers, and transparent support for the OpenAI Responses API (gpt-5-codex)
-## v0.4.0 Big Release
+## v0.4.x Big Release
- **What's new**:
- **PDF and Images** support (thanks to [Andrew Rademacher](https://github.com/AndrewRademacher))
@@ -39,6 +39,8 @@ See:
## Big Thanks to
+- [Bart Carroll](https://github.com/bartCarroll) For [#91](https://github.com/jeremychone/rust-genai/pull/91) Fixed streaming tool calls for openai models
+- [Rui Andrada](https://github.com/shingonoide) For [#95](https://github.com/jeremychone/rust-genai/pull/95) refactoring ZHIPU adapter to ZAI
- [Adrien](https://github.com/XciD) Extra headers in requests, seed for chat requests, and fixes (with [Julien Chaumond](https://github.com/julien-c) for extra headers)
- [Andrew Rademacher](https://github.com/AndrewRademacher) for PDF support, Anthropic streamer, and insight on flattening the message content (e.g., ContentParts)
- [Jesus Santander](https://github.com/jsantanders) Embedding support [PR #83](https://github.com/jeremychone/rust-genai/pull/83)
diff --git a/dev/spec/_spec-rules.md b/dev/spec/_spec-rules.md
new file mode 100644
index 00000000..a666acb3
--- /dev/null
+++ b/dev/spec/_spec-rules.md
@@ -0,0 +1,59 @@
+# Specification Guidelines
+
+This document defines the rules for creating and maintaining specification files.
+
+Important formatting rules
+
+- Use `-` for bullet points.
+- For numbering bullet point style, have empty lines between numbering line.
+
+
+## Types of Specification Files
+
+### `spec--index.md`
+
+A single file providing a high-level summary of the entire system.
+
+### `spec-module_name.md`
+
+A specification file for each individual module.
+- `module-path-name` represents the module’s hierarchy path, flattened with `-`.
+- Each file documents the specification for a single module.
+
+Make sure that the `module_name` is the top most common just after `src/`
+
+For example `src/module_01/sub_mod/some_file.rs` the spec module name will be `dev/spec/spec-module_01.md`
+
+(module_name is lowercase)
+
+## Required Structure for Module Specification Files
+
+Each `spec-module-path-name.md` file must include the following sections.
+
+
+
+## module-path-name
+
+### Goal
+
+A clear description of the module’s purpose and responsibilities.
+
+### Public Module API
+
+A description of the APIs exposed by the module.
+- Define what is exported and how it can be consumed by other modules.
+- Include function signatures, data structures, or endpoints as needed.
+
+### Module Parts
+
+A breakdown of the module’s internal components.
+- May reference sub-files or sub-modules.
+- Should explain how the parts work together.
+
+### Key Design Considerations
+
+Key design considerations of this module and of its key parts.
+
+
+
+
diff --git a/dev/spec/spec-adapter.md b/dev/spec/spec-adapter.md
new file mode 100644
index 00000000..17e24158
--- /dev/null
+++ b/dev/spec/spec-adapter.md
@@ -0,0 +1,33 @@
+## adapter
+
+### Goal
+
+The `adapter` module is responsible for abstracting the communication with various Generative AI providers (e.g., OpenAI, Gemini, Anthropic, Groq, DeepSeek). It translates generic GenAI requests (like `ChatRequest` and `EmbedRequest`) into provider-specific HTTP request data and converts provider-specific web responses back into generic GenAI response structures. It acts as the translation and dispatch layer between the client logic and the underlying web communication.
+
+### Public Module API
+
+The primary public API exposed by the `adapter` module is:
+
+- `AdapterKind`: An enum identifying the AI provider or protocol type (e.g., `OpenAI`, `Gemini`, `Anthropic`, `Cohere`). This type is used by the client and resolver layers to determine which adapter implementation should handle a specific model request.
+
+### Module Parts
+
+- `adapter_kind.rs`: Defines the `AdapterKind` enum. It includes implementation details for serialization, environment variable name resolution, and a default static mapping logic (`from_model`) to associate model names with a specific `AdapterKind`.
+
+- `adapter_types.rs`: Defines the `Adapter` trait, which sets the contract for all concrete adapter implementations. It also defines common types like `ServiceType` (Chat, ChatStream, Embed) and `WebRequestData` (the normalized structure holding URL, headers, and payload before web execution).
+
+- `dispatcher.rs`: Contains the `AdapterDispatcher` struct, which acts as the central routing mechanism. It dispatches calls from the client layer to the correct concrete adapter implementation based on the resolved `AdapterKind`.
+
+- `inter_stream.rs`: Defines internal types (`InterStreamEvent`, `InterStreamEnd`) used by streaming adapters to standardize the output format from diverse provider streaming protocols. This intermediary layer handles complex stream features like capturing usage, reasoning content, and tool calls before conversion to public `ChatStreamResponse` events.
+
+- `adapters/`: This submodule contains the concrete implementation of the `Adapter` trait for each provider (e.g., `openai`, `gemini`, `anthropic`, `zai`). These submodules handle the specific request/response translation logic for their respective protocols.
+
+### Key Design Considerations
+
+- **Stateless and Static Dispatch:** Adapters are designed to be stateless, with all methods in the `Adapter` trait being associated functions (static). Requests are routed efficiently using static dispatch through the `AdapterDispatcher`, minimizing runtime overhead and simplifying dependency management.
+
+- **Request/Response Normalization:** The adapter layer ensures that incoming requests and outgoing responses conform to generic GenAI types, hiding provider-specific implementation details from the rest of the library.
+
+- **Dynamic Resolution:** While `AdapterKind::from_model` provides a default mapping from model names (based on common prefixes or keywords), the system allows this to be overridden by custom `ServiceTargetResolver` configurations, enabling flexible routing (e.g., mapping a custom model name to an `OpenAI` adapter with a custom endpoint).
+
+- **Stream Intermediation:** The introduction of `InterStreamEvent` is crucial for handling the variance in streaming protocols across providers. it ensures that complex data transmitted at the end of a stream (like final usage statistics or aggregated tool calls) can be correctly collected and normalized, regardless of the provider's specific event format.
diff --git a/dev/spec/spec-chat.md b/dev/spec/spec-chat.md
new file mode 100644
index 00000000..52c1be62
--- /dev/null
+++ b/dev/spec/spec-chat.md
@@ -0,0 +1,66 @@
+## chat
+
+### Goal
+
+The `chat` module provides the core primitives for constructing chat requests, defining messages (including multi-part content like text, binary, and tool data), and handling synchronous and asynchronous (streaming) chat responses across all supported AI providers. It standardizes the data structures necessary for modern LLM interactions.
+
+### Public Module API
+
+The module exports the following key data structures:
+
+- **Request/Message Structure:**
+ - `ChatRequest`: The primary structure for initiating a chat completion call, containing the history (`messages`), an optional system prompt (`system`), and tool definitions (`tools`).
+ - `ChatMessage`: Represents a single interaction turn, comprising a `ChatRole`, `MessageContent`, and optional `MessageOptions`.
+ - `ChatRole`: Enum defining message roles (`System`, `User`, `Assistant`, `Tool`).
+ - `MessageContent`: A unified container for multi-part content, wrapping a list of `ContentPart`s.
+ - `ContentPart`: Enum defining content types: `Text`, `Binary`, `ToolCall`, `ToolResponse`.
+ - `Binary`, `BinarySource`: Structures defining binary payloads (e.g., images), sourced via base64 or URL.
+ - `MessageOptions`, `CacheControl`: Per-message configuration hints (e.g., for cache behavior).
+
+- **Configuration:**
+ - `ChatOptions`: General request configuration, including sampling parameters (`temperature`, `max_tokens`, `top_p`, `seed`), streaming capture flags, and format control.
+ - `ReasoningEffort`, `Verbosity`: Provider-specific hints for reasoning intensity or output verbosity.
+ - `ChatResponseFormat`, `JsonSpec`: Defines desired structured output formats (e.g., JSON mode).
+
+- **Responses:**
+ - `ChatResponse`: The result of a non-streaming request, including final content, usage, and model identifiers.
+ - `ChatStreamResponse`: The result wrapper for streaming requests, containing the `ChatStream` and model identity.
+
+- **Streaming:**
+ - `ChatStream`: A `futures::Stream` implementation yielding `ChatStreamEvent`s.
+ - `ChatStreamEvent`: Enum defining streaming events: `Start`, `Chunk` (content), `ReasoningChunk`, `ToolCallChunk`, and `End`.
+ - `StreamEnd`: Terminal event data including optional captured usage, content, and reasoning content.
+
+- **Tooling:**
+ - `Tool`: Metadata and schema defining a function the model can call.
+ - `ToolCall`: The model's invocation request for a specific tool.
+ - `ToolResponse`: The output returned from executing a tool, matched by call ID.
+
+- **Metadata:**
+ - `Usage`, `PromptTokensDetails`, `CompletionTokensDetails`: Normalized token usage statistics.
+
+- **Utilities:**
+ - `printer` module: Contains `print_chat_stream` for console output utilities.
+
+### Module Parts
+
+The functionality is divided into specialized files/sub-modules:
+
+- `chat_message.rs`: Defines the `ChatMessage` fundamental structure and associated types (`ChatRole`, `MessageOptions`).
+- `chat_options.rs`: Manages request configuration (`ChatOptions`) and provides parsing logic for provider-specific hints like `ReasoningEffort` and `Verbosity`.
+- `chat_req_response_format.rs`: Handles configuration for structured output (`ChatResponseFormat`, `JsonSpec`).
+- `chat_request.rs`: Defines the top-level `ChatRequest` and methods for managing the request history and properties.
+- `chat_response.rs`: Defines synchronous chat response structures (`ChatResponse`).
+- `chat_stream.rs`: Implements the public `ChatStream` and its events, mapping from the internal adapter stream.
+- `content_part.rs`: Defines `ContentPart`, `Binary`, and `BinarySource` for handling multi-modal inputs/outputs.
+- `message_content.rs`: Defines `MessageContent`, focusing on collection management and convenient accessors for content parts (e.g., joining all text).
+- `tool/mod.rs` (and associated files): Defines the tooling primitives (`Tool`, `ToolCall`, `ToolResponse`).
+- `usage.rs`: Defines the normalized token counting structures (`Usage`).
+- `printer.rs`: Provides utility functions for rendering stream events to standard output.
+
+### Key Design Considerations
+
+- **Unified Content Model:** The use of `MessageContent` composed of `ContentPart` allows any message role (user, assistant, tool) to handle complex, multi-part data seamlessly, including text, binary payloads, and tooling actions.
+- **Decoupled Streaming:** The public `ChatStream` is an abstraction layer over an internal stream (`InterStream`), ensuring a consistent external interface regardless of adapter implementation details (like internal handling of usage reporting or reasoning chunks).
+- **Normalized Usage Metrics:** The `Usage` structure provides an OpenAI-compatible interface while allowing for provider-specific breakdowns (e.g., caching or reasoning tokens) via detailed sub-structures.
+- **Hierarchical Options:** `ChatOptions` can be applied globally at the client level or specifically per request. The internal resolution logic ensures request-specific options take precedence over client defaults.
diff --git a/dev/spec/spec-client.md b/dev/spec/spec-client.md
new file mode 100644
index 00000000..cedd505d
--- /dev/null
+++ b/dev/spec/spec-client.md
@@ -0,0 +1,59 @@
+## client
+
+### Goal
+
+The `client` module provides the core entry point (`Client`) for interacting with various Generative AI providers. It encapsulates configuration (`ClientConfig`, `WebConfig`), a builder pattern (`ClientBuilder`), request execution (`exec_chat`, `exec_embed`), and service resolution logic (e.g., determining endpoints and authentication).
+
+### Public Module API
+
+The `client` module exposes the following public types:
+
+- **`Client`**: The main interface for executing AI requests (chat, embedding, streaming, model listing).
+ - `Client::builder()`: Starts the configuration process.
+ - `Client::default()`: Creates a client with default configuration.
+ - Core execution methods: `exec_chat`, `exec_chat_stream`, `exec_embed`, `embed`, `embed_batch`.
+ - Resolution/Discovery methods: `all_model_names`, `resolve_service_target`.
+
+- **`ClientBuilder`**: Provides a fluent interface for constructing a `Client`. Used to set `ClientConfig`, default `ChatOptions`, `EmbedOptions`, and custom resolvers (`AuthResolver`, `ServiceTargetResolver`, `ModelMapper`).
+
+- **`ClientConfig`**: Holds the resolved and default configurations used by the `Client`, including resolver functions and default options.
+
+- **`Headers`**: A simple map wrapper (`HashMap`) for managing HTTP headers in requests.
+
+- **`ServiceTarget`**: A struct containing the final resolved components needed to execute a request: `Endpoint`, `AuthData`, and `ModelIden`.
+
+- **`WebConfig`**: Configuration options specifically for building the underlying `reqwest::Client` (e.g., timeouts, proxies, default headers).
+
+### Module Parts
+
+The module is composed of several files that implement the layered client architecture:
+
+- `builder.rs`: Implements `ClientBuilder`, handling the creation and configuration flow. It initializes or updates the nested `ClientConfig` and optionally an internal `WebClient`.
+
+- `client_types.rs`: Defines the main `Client` struct and `ClientInner` (which holds `WebClient` and `ClientConfig` behind an `Arc`).
+
+- `config.rs`: Defines `ClientConfig` and the core `resolve_service_target` logic, which orchestrates calls to `ModelMapper`, `AuthResolver`, and `ServiceTargetResolver` before falling back to adapter defaults.
+
+- `client_impl.rs`: Contains the main implementation of the public API methods on `Client`, such as `exec_chat` and `exec_embed`. These methods perform service resolution and delegate to `AdapterDispatcher` for request creation and response parsing.
+
+- `headers.rs`: Implements the `Headers` utility for managing key-value HTTP header maps.
+
+- `service_target.rs`: Defines the `ServiceTarget` structure for resolved endpoints, authentication, and model identifiers.
+
+- `web_config.rs`: Defines `WebConfig` and its logic for applying settings to a `reqwest::ClientBuilder`.
+
+### Key Design Considerations
+
+- **Client Immutability and Sharing**: The `Client` holds its internal state (`ClientInner` with `WebClient` and `ClientConfig`) wrapped in an `Arc`. This design ensures that the client is thread-safe and cheaply cloneable, aligning with common client patterns in asynchronous Rust applications.
+
+- **Config Layering and Resolution**: The client architecture employs a sophisticated resolution process managed by `ClientConfig::resolve_service_target`.
+ - It first applies a `ModelMapper` to potentially translate the input model identifier.
+ - It then consults the `AuthResolver` for authentication data. If the resolver is absent or returns `None`, it defaults to the adapter's standard authentication mechanism (e.g., API key headers).
+ - It determines the adapter's default endpoint.
+ - Finally, it applies the optional `ServiceTargetResolver`, allowing users to override the endpoint, auth, or model for complex scenarios (e.g., custom proxies or routing).
+
+- **WebClient Abstraction**: The core HTTP client logic is delegated to the `WebClient` (from the `webc` module), which handles low-level request execution and streaming setup. This separation keeps the `client` module focused on business logic and AI provider orchestration.
+
+- **Builder Pattern for Configuration**: `ClientBuilder` enforces configuration before client creation, simplifying object construction and ensuring necessary dependencies are set up correctly.
+
+- **Headers Simplification**: The `Headers` struct abstracts HTTP header management, ensuring that subsequent merges or overrides result in a single, final header value, which is typical for API key authorization overrides.
diff --git a/dev/spec/spec-common.md b/dev/spec/spec-common.md
new file mode 100644
index 00000000..b2d13024
--- /dev/null
+++ b/dev/spec/spec-common.md
@@ -0,0 +1,36 @@
+## common
+
+### Goal
+
+The `common` module provides fundamental data structures used throughout the `genai` library, primarily focusing on identifying models and adapters in a clear and efficient manner.
+
+### Public Module API
+
+The module exposes two main types: `ModelName` and `ModelIden`.
+
+- `ModelName`: Represents a generative AI model identifier (e.g., `"gpt-4o"`, `"claude-3-opus"`).
+ - It wraps an `Arc` for efficient cloning and sharing across threads.
+ - Implements `From`, `From<&String>`, `From<&str>`, and `Deref`.
+ - Supports equality comparison (`PartialEq`) with various string types (`&str`, `String`).
+
+- `ModelIden`: Uniquely identifies a model by coupling an `AdapterKind` with a `ModelName`.
+ - Fields:
+ - `adapter_kind: AdapterKind`
+ - `model_name: ModelName`
+ - Constructor: `fn new(adapter_kind: AdapterKind, model_name: impl Into) -> Self`
+ - Utility methods for creating new identifiers based on name changes:
+ - `fn from_name(&self, new_name: T) -> ModelIden`
+ - `fn from_optional_name(&self, new_name: Option) -> ModelIden`
+
+### Module Parts
+
+The `common` module consists of:
+
+- `model_name.rs`: Defines the `ModelName` type and related string manipulation utilities, including parsing optional namespaces (e.g., `namespace::model_name`).
+- `model_iden.rs`: Defines the `ModelIden` type, which associates a `ModelName` with an `AdapterKind`.
+
+### Key Design Considerations
+
+- **Efficiency of ModelName:** `ModelName` uses `Arc` to ensure that cloning the model identifier is cheap, which is crucial as model identifiers are frequently passed around in request and response structures.
+- **Deref Implementation:** Implementing `Deref` for `ModelName` allows it to be used naturally as a string reference.
+- **ModelIden Immutability:** `ModelIden` is designed to be immutable and fully identifiable, combining the model string identity (`ModelName`) with the service provider identity (`AdapterKind`).
diff --git a/dev/spec/spec-webc.md b/dev/spec/spec-webc.md
new file mode 100644
index 00000000..a4bd3232
--- /dev/null
+++ b/dev/spec/spec-webc.md
@@ -0,0 +1,36 @@
+## webc
+
+### Goal
+
+The `webc` module provides a low-level, internal web client layer utilizing `reqwest`. Its primary role is to abstract standard HTTP requests (GET/POST) and manage complex streaming responses required by various AI providers, especially those that do not fully conform to the Server-Sent Events (SSE) standard (`text/event-stream`). It handles standard JSON requests/responses and custom stream parsing.
+
+### Public Module API
+
+The `webc` module is primarily an internal component, only exposing its dedicated error type publicly.
+
+- `pub use error::Error;`
+ - `Error`: An enum representing all possible errors originating from the web communication layer (e.g., failed status codes, JSON parsing errors, reqwest errors, stream clone errors).
+
+(All other types like `WebClient`, `WebResponse`, `WebStream`, and `Result` are exported as `pub(crate)` for internal library use.)
+
+### Module Parts
+
+The module consists of three main internal components:
+
+- `error.rs`: Defines the `Error` enum and the module-scoped `Result` type alias. It captures network/HTTP related failures and external errors like `reqwest::Error` and `value_ext::JsonValueExtError`.
+
+- `web_client.rs`: Contains the `WebClient` struct, a thin wrapper around `reqwest::Client`. It provides methods (`do_get`, `do_post`) for non-streaming standard HTTP communication, which assumes the response body is JSON and is parsed into `serde_json::Value`. It also defines `WebResponse`, which encapsulates the HTTP status and parsed JSON body.
+
+- `web_stream.rs`: Implements `WebStream`, a custom `futures::Stream` implementation designed for handling non-SSE streaming protocols used by some AI providers (e.g., Cohere, Gemini). It defines `StreamMode` to specify how stream chunks should be parsed (either by a fixed delimiter or specialized handling for "Pretty JSON Array" formats).
+
+### Key Design Considerations
+
+- **Internal Focus:** The module is designed strictly for internal use (`pub(crate)`) except for the public error type. This shields the rest of the library from direct `reqwest` dependency details.
+
+- **Custom Streaming:** `WebStream` exists specifically to manage streaming protocols that deviate from the standard SSE format, providing message splitting based on `StreamMode`. This ensures compatibility with providers like Cohere (delimiter-based) and Gemini (JSON array chunking).
+
+- **Generic JSON Response Handling:** `WebResponse` abstracts successful non-streaming responses by immediately parsing the body into `serde_json::Value`. This allows adapter modules to deserialize into their specific structures subsequently.
+
+- **Error Richness:** The `Error::ResponseFailedStatus` variant includes the `StatusCode`, full `body`, and `HeaderMap` to provide comprehensive debugging information upon API failure.
+
+- **Async Implementation:** All network operations rely on `tokio` and `reqwest`, ensuring non-blocking execution throughout the I/O layer. `WebStream` leverages `futures::Stream` traits for integration with standard Rust async infrastructure.
diff --git a/examples/c00-readme.rs b/examples/c00-readme.rs
index eb2f7fe7..83fb4760 100644
--- a/examples/c00-readme.rs
+++ b/examples/c00-readme.rs
@@ -15,7 +15,7 @@ const MODEL_GROQ: &str = "llama-3.1-8b-instant";
const MODEL_OLLAMA: &str = "gemma:2b"; // sh: `ollama pull gemma:2b`
const MODEL_XAI: &str = "grok-3-mini";
const MODEL_DEEPSEEK: &str = "deepseek-chat";
-const MODEL_ZHIPU: &str = "glm-4-plus";
+const MODEL_ZAI: &str = "glm-4-plus";
const MODEL_COHERE: &str = "command-r7b-12-2024";
// NOTE: These are the default environment keys for each AI Adapter Type.
@@ -31,7 +31,7 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[
(MODEL_XAI, "XAI_API_KEY"),
(MODEL_DEEPSEEK, "DEEPSEEK_API_KEY"),
(MODEL_OLLAMA, ""),
- (MODEL_ZHIPU, "ZHIPU_API_KEY"),
+ (MODEL_ZAI, "ZAI_API_KEY"),
(MODEL_COHERE, "COHERE_API_KEY"),
];
@@ -41,7 +41,7 @@ const MODEL_AND_KEY_ENV_NAME_LIST: &[(&str, &str)] = &[
// - starts_with "command" -> Cohere
// - starts_with "gemini" -> Gemini
// - model in Groq models -> Groq
-// - starts_with "glm" -> Zhipu
+// - starts_with "glm" -> ZAI
// - For anything else -> Ollama
//
// This can be customized; see `examples/c03-mapper.rs`
diff --git a/examples/c07-zai.rs b/examples/c07-zai.rs
new file mode 100644
index 00000000..e8f7b13b
--- /dev/null
+++ b/examples/c07-zai.rs
@@ -0,0 +1,55 @@
+//! ZAI (Zhipu AI) adapter example
+//!
+//! Demonstrates how to use ZAI models with automatic endpoint routing:
+//! - `glm-4.6` → Regular credit-based API
+//! - `zai::glm-4.6` → Coding subscription API (automatically routed)
+
+use genai::Client;
+use genai::chat::{ChatMessage, ChatRequest};
+
+#[tokio::main]
+async fn main() -> Result<(), Box> {
+ let client = Client::builder().build();
+
+ // Test cases demonstrating automatic endpoint routing
+ let test_cases = vec![("glm-4.6", "Regular ZAI model"), ("zai::glm-4.6", "Coding subscription model")];
+
+ for (model_name, description) in test_cases {
+ println!("\n=== {} ===", description);
+ println!("Model: {}", model_name);
+
+ let chat_req = ChatRequest::default()
+ .with_system("You are a helpful assistant.")
+ .append_message(ChatMessage::user("Say 'hello' and nothing else."));
+
+ match client.exec_chat(model_name, chat_req, None).await {
+ Ok(response) => {
+ println!("✅ Success!");
+ if let Some(content) = response.first_text() {
+ println!("Response: {}", content);
+ }
+ if response.usage.prompt_tokens.is_some() || response.usage.completion_tokens.is_some() {
+ println!(
+ "Usage: prompt={}, output={}",
+ response.usage.prompt_tokens.unwrap_or(0),
+ response.usage.completion_tokens.unwrap_or(0)
+ );
+ }
+ }
+ Err(e) => {
+ println!("❌ Error: {}", e);
+ if e.to_string().contains("insufficient balance") {
+ println!("ℹ️ This model requires credits or subscription");
+ } else if e.to_string().contains("401") {
+ println!("ℹ️ Set ZAI_API_KEY environment variable");
+ }
+ }
+ }
+ }
+
+ println!("\n=== SUMMARY ===");
+ println!("✅ ZAI adapter handles namespace routing automatically");
+ println!("✅ Use ZAI_API_KEY environment variable");
+
+ Ok(())
+}
diff --git a/src/adapter/adapter_kind.rs b/src/adapter/adapter_kind.rs
index 2430084a..7c2c882e 100644
--- a/src/adapter/adapter_kind.rs
+++ b/src/adapter/adapter_kind.rs
@@ -1,4 +1,3 @@
-use crate::adapter::adapters::together::TogetherAdapter;
use crate::adapter::anthropic::AnthropicAdapter;
use crate::adapter::cerebras::CerebrasAdapter;
use crate::adapter::cohere::CohereAdapter;
@@ -9,9 +8,9 @@ use crate::adapter::groq::{self, GroqAdapter};
use crate::adapter::nebius::NebiusAdapter;
use crate::adapter::openai::OpenAIAdapter;
use crate::adapter::openrouter::OpenRouterAdapter;
+use crate::adapter::together::TogetherAdapter;
use crate::adapter::xai::XaiAdapter;
-use crate::adapter::zai::{self, ZAiAdapter};
-use crate::adapter::zhipu::ZhipuAdapter;
+use crate::adapter::zai::{self, ZaiAdapter};
use crate::{ModelName, Result};
use derive_more::Display;
use serde::{Deserialize, Serialize};
@@ -44,16 +43,14 @@ pub enum AdapterKind {
Xai,
/// For DeepSeek (Mostly use OpenAI)
DeepSeek,
- /// For Zhipu (Mostly use OpenAI)
- Zhipu,
+ /// For ZAI (OpenAI-compatible protocol)
+ Zai,
/// Cohere today use it's own native protocol but might move to OpenAI Adapter
Cohere,
/// OpenAI shared behavior + some custom. (currently, localhost only, can be customize with ServerTargetResolver).
Ollama,
/// Cerebras (OpenAI-compatible protocol)
Cerebras,
- /// Z.AI (Anthropic-compatible protocol)
- ZAi,
}
/// Serialization/Parse implementations
@@ -72,11 +69,10 @@ impl AdapterKind {
AdapterKind::Nebius => "Nebius",
AdapterKind::Xai => "xAi",
AdapterKind::DeepSeek => "DeepSeek",
- AdapterKind::Zhipu => "Zhipu",
+ AdapterKind::Zai => "Zai",
AdapterKind::Cohere => "Cohere",
AdapterKind::Ollama => "Ollama",
AdapterKind::Cerebras => "Cerebras",
- AdapterKind::ZAi => "ZAi",
}
}
@@ -94,11 +90,10 @@ impl AdapterKind {
AdapterKind::Nebius => "nebius",
AdapterKind::Xai => "xai",
AdapterKind::DeepSeek => "deepseek",
- AdapterKind::Zhipu => "zhipu",
+ AdapterKind::Zai => "zai",
AdapterKind::Cohere => "cohere",
AdapterKind::Ollama => "ollama",
AdapterKind::Cerebras => "cerebras",
- AdapterKind::ZAi => "zai",
}
}
@@ -115,11 +110,10 @@ impl AdapterKind {
"nebius" => Some(AdapterKind::Nebius),
"xai" => Some(AdapterKind::Xai),
"deepseek" => Some(AdapterKind::DeepSeek),
- "zhipu" => Some(AdapterKind::Zhipu),
+ "zai" => Some(AdapterKind::Zai),
"cohere" => Some(AdapterKind::Cohere),
"ollama" => Some(AdapterKind::Ollama),
"cerebras" => Some(AdapterKind::Cerebras),
- "zai" => Some(AdapterKind::ZAi),
_ => None,
}
}
@@ -141,11 +135,10 @@ impl AdapterKind {
AdapterKind::Nebius => Some(NebiusAdapter::API_KEY_DEFAULT_ENV_NAME),
AdapterKind::Xai => Some(XaiAdapter::API_KEY_DEFAULT_ENV_NAME),
AdapterKind::DeepSeek => Some(DeepSeekAdapter::API_KEY_DEFAULT_ENV_NAME),
- AdapterKind::Zhipu => Some(ZhipuAdapter::API_KEY_DEFAULT_ENV_NAME),
+ AdapterKind::Zai => Some(ZaiAdapter::API_KEY_DEFAULT_ENV_NAME),
AdapterKind::Cohere => Some(CohereAdapter::API_KEY_DEFAULT_ENV_NAME),
AdapterKind::Ollama => None,
AdapterKind::Cerebras => Some(CerebrasAdapter::API_KEY_DEFAULT_ENV_NAME),
- AdapterKind::ZAi => Some(ZAiAdapter::API_KEY_DEFAULT_ENV_NAME),
}
}
}
@@ -171,6 +164,7 @@ impl AdapterKind {
/// - e.g., for together.ai `together::meta-llama/Llama-3-8b-chat-hf`
/// - e.g., for nebius with `nebius::Qwen/Qwen3-235B-A22B`
/// - e.g., for cerebras with `cerebras::llama-3.1-8b`
+ /// - e.g., for ZAI coding plan with `coding::glm-4.6`
///
/// And all adapters can be force namspaced as well.
///
@@ -179,6 +173,11 @@ impl AdapterKind {
pub fn from_model(model: &str) -> Result {
// -- First check if namespaced (explicit :: namespace has priority)
if let (_, Some(ns)) = ModelName::model_name_and_namespace(model) {
+ // Special handling: "zai" namespace should route to ZAI for coding endpoint
+ if ns == "zai" {
+ return Ok(AdapterKind::Zai);
+ }
+
if let Some(adapter) = Self::from_lower_str(ns) {
return Ok(adapter);
} else {
@@ -218,7 +217,7 @@ impl AdapterKind {
} else if model.starts_with("claude") {
Ok(Self::Anthropic)
} else if zai::MODELS.contains(&model) {
- Ok(Self::ZAi)
+ Ok(Self::Zai)
} else if model.contains("fireworks") {
Ok(Self::Fireworks)
} else if groq::MODELS.contains(&model) {
@@ -230,7 +229,7 @@ impl AdapterKind {
} else if model.starts_with("grok") {
Ok(Self::Xai)
} else if model.starts_with("glm") {
- Ok(Self::Zhipu)
+ Ok(Self::Zai)
}
// For now, fallback to Ollama
else {
diff --git a/src/adapter/adapters/anthropic/adapter_impl.rs b/src/adapter/adapters/anthropic/adapter_impl.rs
index 334e514c..505a3f31 100644
--- a/src/adapter/adapters/anthropic/adapter_impl.rs
+++ b/src/adapter/adapters/anthropic/adapter_impl.rs
@@ -31,7 +31,7 @@ const REASONING_HIGH: u32 = 24000;
// For max model tokens see: https://docs.anthropic.com/en/docs/about-claude/models/overview
//
// fall back
-const MAX_TOKENS_64K: u32 = 64000; // claude-3-7-sonnet, claude-sonnet-4.x
+const MAX_TOKENS_64K: u32 = 64000; // claude-3-7-sonnet, claude-sonnet-4.x, claude-haiku-4-5
// custom
const MAX_TOKENS_32K: u32 = 32000; // claude-opus-4
const MAX_TOKENS_8K: u32 = 8192; // claude-3-5-sonnet, claude-3-5-haiku
@@ -41,7 +41,7 @@ const ANTHROPIC_VERSION: &str = "2023-06-01";
const MODELS: &[&str] = &[
"claude-opus-4-1-20250805",
"claude-sonnet-4-5-20250929",
- "claude-3-5-haiku-latest",
+ "claude-haiku-4-5-20251001",
];
impl AnthropicAdapter {
@@ -181,7 +181,10 @@ impl Adapter for AnthropicAdapter {
// const MAX_TOKENS_4K: u32 = 4096; // claude-3-opus, claude-3-haiku
let max_tokens = options_set.max_tokens().unwrap_or_else(|| {
// most likely models used, so put first. Also a little wider with `claude-sonnet` (since name from version 4)
- if model_name.contains("claude-sonnet") || model_name.contains("claude-3-7-sonnet") {
+ if model_name.contains("claude-sonnet")
+ || model_name.contains("claude-haiku")
+ || model_name.contains("claude-3-7-sonnet")
+ {
MAX_TOKENS_64K
} else if model_name.contains("claude-opus-4") {
MAX_TOKENS_32K
diff --git a/src/adapter/adapters/mod.rs b/src/adapter/adapters/mod.rs
index a9579f22..bc32c2d4 100644
--- a/src/adapter/adapters/mod.rs
+++ b/src/adapter/adapters/mod.rs
@@ -15,4 +15,3 @@ pub(super) mod openrouter;
pub(super) mod together;
pub(super) mod xai;
pub(super) mod zai;
-pub(super) mod zhipu;
diff --git a/src/adapter/adapters/openai/streamer.rs b/src/adapter/adapters/openai/streamer.rs
index f6a29bfe..65d4d44e 100644
--- a/src/adapter/adapters/openai/streamer.rs
+++ b/src/adapter/adapters/openai/streamer.rs
@@ -2,7 +2,7 @@ use crate::adapter::AdapterKind;
use crate::adapter::adapters::support::{StreamerCapturedData, StreamerOptions};
use crate::adapter::inter_stream::{InterStreamEnd, InterStreamEvent};
use crate::adapter::openai::OpenAIAdapter;
-use crate::chat::ChatOptionsSet;
+use crate::chat::{ChatOptionsSet, ToolCall};
use crate::{Error, ModelIden, Result};
use reqwest_eventsource::{Event, EventSource};
use serde_json::Value;
@@ -58,11 +58,48 @@ impl futures::Stream for OpenAIStreamer {
None
};
+ // -- Process the captured_tool_calls
+ // NOTE: here we attempt to parse the `fn_arguments` if it is string, because it means that it was accumulated
+ let captured_tool_calls = if let Some(tools_calls) = self.captured_data.tool_calls.take() {
+ let tools_calls: Vec = tools_calls
+ .into_iter()
+ .map(|tool_call| {
+ // extrat
+ let ToolCall {
+ call_id,
+ fn_name,
+ fn_arguments,
+ } = tool_call;
+ // parse fn_arguments if needed
+ let fn_arguments = match fn_arguments {
+ Value::String(fn_arguments_string) => {
+ // NOTE: Here we are resilient for now, if we cannot parse, just return the original String
+ match serde_json::from_str::(&fn_arguments_string) {
+ Ok(fn_arguments) => fn_arguments,
+ Err(_) => Value::String(fn_arguments_string),
+ }
+ }
+ _ => fn_arguments,
+ };
+
+ ToolCall {
+ call_id,
+ fn_name,
+ fn_arguments,
+ }
+ })
+ .collect();
+ Some(tools_calls)
+ } else {
+ None
+ };
+
+ // Return the internal stream end
let inter_stream_end = InterStreamEnd {
captured_usage,
captured_text_content: self.captured_data.content.take(),
captured_reasoning_content: self.captured_data.reasoning_content.take(),
- captured_tool_calls: self.captured_data.tool_calls.take(),
+ captured_tool_calls,
};
return Poll::Ready(Some(Ok(InterStreamEvent::End(inter_stream_end))));
@@ -99,7 +136,7 @@ impl futures::Stream for OpenAIStreamer {
self.captured_data.usage = Some(usage)
}
AdapterKind::DeepSeek
- | AdapterKind::Zhipu
+ | AdapterKind::Zai
| AdapterKind::Fireworks
| AdapterKind::Together => {
let usage = message_data
@@ -135,13 +172,11 @@ impl futures::Stream for OpenAIStreamer {
.unwrap_or_else(|_| format!("call_{index}"));
let fn_name = function.x_take::("name").unwrap_or_default();
let arguments = function.x_take::("arguments").unwrap_or_default();
- // Create the tool call
- let fn_arguments = serde_json::from_str(&arguments)
- .unwrap_or(serde_json::Value::String(arguments.clone()));
+ // Don't parse yet - accumulate as string first
let mut tool_call = crate::chat::ToolCall {
call_id,
fn_name,
- fn_arguments: fn_arguments.clone(),
+ fn_arguments: serde_json::Value::String(arguments.clone()),
};
// Capture the tool call if enabled
@@ -149,19 +184,25 @@ impl futures::Stream for OpenAIStreamer {
match &mut self.captured_data.tool_calls {
Some(calls) => {
self.captured_data.tool_calls = Some({
- // When fn_arguments can not be parsed, we need to append the arguments to the existing fn_arguments as json string
- let mut captured_fn_argments = String::new();
- if calls[index as usize].fn_arguments.is_string() {
- captured_fn_argments.push_str(
- calls[index as usize].fn_arguments.as_str().unwrap_or(""),
- );
- captured_fn_argments.push_str(&arguments);
+ // Accumulate arguments as strings, don't parse until complete
+ let accumulated = if let Some(existing) =
+ calls[index as usize].fn_arguments.as_str()
+ {
+ format!("{}{}", existing, arguments)
+ } else {
+ arguments.clone()
+ };
+
+ // Store as string (will be parsed at stream end)
+ calls[index as usize].fn_arguments =
+ serde_json::Value::String(accumulated);
+
+ // Update call_id and fn_name on first chunk
+ if !tool_call.fn_name.is_empty() {
+ calls[index as usize].call_id = tool_call.call_id.clone();
+ calls[index as usize].fn_name = tool_call.fn_name.clone();
}
- let fn_arguments = serde_json::from_str(&captured_fn_argments)
- .unwrap_or(serde_json::Value::String(
- captured_fn_argments.clone(),
- ));
- calls[index as usize].fn_arguments = fn_arguments.clone();
+
tool_call = calls[index as usize].clone();
calls.to_vec()
})
diff --git a/src/adapter/adapters/zai/adapter_impl.rs b/src/adapter/adapters/zai/adapter_impl.rs
index 205bb0b8..7f1ce6e2 100644
--- a/src/adapter/adapters/zai/adapter_impl.rs
+++ b/src/adapter/adapters/zai/adapter_impl.rs
@@ -2,70 +2,110 @@ use crate::ModelIden;
use crate::adapter::openai::OpenAIAdapter;
use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData};
use crate::chat::{ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse};
-use crate::embed::{EmbedOptionsSet, EmbedRequest, EmbedResponse};
use crate::resolver::{AuthData, Endpoint};
use crate::webc::WebResponse;
use crate::{Result, ServiceTarget};
use reqwest::RequestBuilder;
-pub struct ZAiAdapter;
+/// Helper structure to hold ZAI model parsing information
+struct ZaiModelEndpoint {
+ endpoint: Endpoint,
+}
+
+impl ZaiModelEndpoint {
+ /// Parse ModelIden to determine if it's a coding model and return endpoint
+ fn from_model(model: &ModelIden) -> Self {
+ let (_, namespace) = model.model_name.as_model_name_and_namespace();
+
+ // Check if namespace is "zai" to route to coding endpoint
+ let endpoint = match namespace {
+ Some("zai") => Endpoint::from_static("https://api.z.ai/api/coding/paas/v4/"),
+ _ => ZaiAdapter::default_endpoint(),
+ };
+
+ Self { endpoint }
+ }
+}
+
+/// The ZAI API is mostly compatible with the OpenAI API.
+///
+/// NOTE: This adapter will automatically route to the coding endpoint
+/// when the model name starts with "zai::".
+///
+/// For example, `glm-4.6` uses the regular API endpoint,
+/// while `zai::glm-4.6` uses the coding plan endpoint.
+///
+pub struct ZaiAdapter;
-// Z.AI model names
-// Based on https://z.ai/model-api documentation
-// These are the models Z.AI supports
pub(in crate::adapter) const MODELS: &[&str] = &[
- "glm-4.6", "glm-4.5", "glm-4", "glm-4.1v", "glm-4.5v", "vidu", "vidu-q1",
- "vidu-2.0",
- // Note: No turbo models are supported by Z.AI
+ "glm-4-plus",
+ "glm-4.6",
+ "glm-4.5",
+ "glm-4.5v",
+ "glm-4.5-x",
+ "glm-4.5-air",
+ "glm-4.5-airx",
+ "glm-4-32b-0414-128k",
+ "glm-4.5-flash",
+ "glm-4-air-250414",
+ "glm-4-flashx-250414",
+ "glm-4-flash-250414",
+ "glm-4-air",
+ "glm-4-airx",
+ "glm-4-long",
+ "glm-4-flash",
+ "glm-4v-plus-0111",
+ "glm-4v-flash",
+ "glm-z1-air",
+ "glm-z1-airx",
+ "glm-z1-flash",
+ "glm-z1-flashx",
+ "glm-4.1v-thinking-flash",
+ "glm-4.1v-thinking-flashx",
];
-impl ZAiAdapter {
+impl ZaiAdapter {
pub const API_KEY_DEFAULT_ENV_NAME: &str = "ZAI_API_KEY";
}
-// Z.AI adapter uses OpenAI-compatible implementation (most common format)
-// Note: This may need adjustment based on actual Z.AI API documentation
-impl Adapter for ZAiAdapter {
- fn default_auth() -> AuthData {
- AuthData::from_env(Self::API_KEY_DEFAULT_ENV_NAME)
- }
-
+// The ZAI API is mostly compatible with the OpenAI API.
+impl Adapter for ZaiAdapter {
fn default_endpoint() -> Endpoint {
- const BASE_URL: &str = "https://api.z.ai/v1/";
+ const BASE_URL: &str = "https://api.z.ai/api/paas/v4/";
Endpoint::from_static(BASE_URL)
}
+ fn default_auth() -> AuthData {
+ AuthData::from_env(Self::API_KEY_DEFAULT_ENV_NAME)
+ }
+
async fn all_model_names(_kind: AdapterKind) -> Result> {
Ok(MODELS.iter().map(|s| s.to_string()).collect())
}
- fn get_service_url(model: &ModelIden, service_type: ServiceType, endpoint: Endpoint) -> Result {
- OpenAIAdapter::util_get_service_url(model, service_type, endpoint)
+ fn get_service_url(_model: &ModelIden, service_type: ServiceType, endpoint: Endpoint) -> Result {
+ // For ZAI, we need to handle model-specific routing at this level
+ // because get_service_url is called with the modified endpoint from to_web_request_data
+ let base_url = endpoint.base_url();
+
+ let url = match service_type {
+ ServiceType::Chat | ServiceType::ChatStream => format!("{base_url}chat/completions"),
+ ServiceType::Embed => format!("{base_url}embeddings"),
+ };
+ Ok(url)
}
fn to_web_request_data(
- target: ServiceTarget,
+ mut target: ServiceTarget,
service_type: ServiceType,
chat_req: ChatRequest,
chat_options: ChatOptionsSet<'_, '_>,
) -> Result {
- OpenAIAdapter::util_to_web_request_data(target, service_type, chat_req, chat_options, None)
- }
+ // Parse model name and determine appropriate endpoint
+ let zai_info = ZaiModelEndpoint::from_model(&target.model);
+ target.endpoint = zai_info.endpoint;
- fn to_embed_request_data(
- target: ServiceTarget,
- embed_req: EmbedRequest,
- options_set: EmbedOptionsSet<'_, '_>,
- ) -> Result {
- OpenAIAdapter::to_embed_request_data(target, embed_req, options_set)
- }
-
- fn to_embed_response(
- model_iden: ModelIden,
- web_response: WebResponse,
- options_set: EmbedOptionsSet<'_, '_>,
- ) -> Result {
- OpenAIAdapter::to_embed_response(model_iden, web_response, options_set)
+ OpenAIAdapter::util_to_web_request_data(target, service_type, chat_req, chat_options, None)
}
fn to_chat_response(
@@ -83,4 +123,23 @@ impl Adapter for ZAiAdapter {
) -> Result {
OpenAIAdapter::to_chat_stream(model_iden, reqwest_builder, options_set)
}
+
+ fn to_embed_request_data(
+ mut service_target: crate::ServiceTarget,
+ embed_req: crate::embed::EmbedRequest,
+ options_set: crate::embed::EmbedOptionsSet<'_, '_>,
+ ) -> Result {
+ let zai_info = ZaiModelEndpoint::from_model(&service_target.model);
+ service_target.endpoint = zai_info.endpoint;
+
+ OpenAIAdapter::to_embed_request_data(service_target, embed_req, options_set)
+ }
+
+ fn to_embed_response(
+ model_iden: crate::ModelIden,
+ web_response: crate::webc::WebResponse,
+ options_set: crate::embed::EmbedOptionsSet<'_, '_>,
+ ) -> Result {
+ OpenAIAdapter::to_embed_response(model_iden, web_response, options_set)
+ }
}
diff --git a/src/adapter/adapters/zai/mod.rs b/src/adapter/adapters/zai/mod.rs
index 2acb6732..a7d774ff 100644
--- a/src/adapter/adapters/zai/mod.rs
+++ b/src/adapter/adapters/zai/mod.rs
@@ -1,7 +1,56 @@
-//! API Documentation: https://z.ai/docs
-//! Model Names: https://z.ai/docs/models
-//! Pricing: https://z.ai/docs/pricing
-//! Note: Z.AI API is compatible with Anthropic's API
+//! ZAI API Documentation
+//! API Documentation:
+//! Model Names: GLM series models
+//! Pricing:
+//!
+//! ## Dual Endpoint Support
+//!
+//! ZAI supports two different API endpoints using the ServiceTargetResolver pattern:
+//!
+//! ### Regular API (Credit-based)
+//! - Endpoint: `https://api.z.ai/api/paas/v4/`
+//! - Models: `glm-4.6`, `glm-4.5`, etc.
+//! - Usage: Standard API calls billed per token
+//!
+//! ### Coding Plan (Subscription-based)
+//! - Endpoint: `https://api.z.ai/api/coding/paas/v4/`
+//! - Models: `coding::glm-4.6`, `coding:glm-4.5`, etc.
+//! - Usage: Fixed monthly subscription for coding tasks
+//!
+//! ## Usage with ServiceTargetResolver
+//!
+//! ```rust
+//! use genai::resolver::{Endpoint, ServiceTargetResolver};
+//! use genai::{Client, AdapterKind, ModelIden};
+//!
+//! let target_resolver = ServiceTargetResolver::from_resolver_fn(
+//! |service_target| -> Result {
+//! let model_name = service_target.model.model_name.to_string();
+//!
+//! // Route to appropriate endpoint based on model naming
+//! let endpoint_url = if model_name.starts_with("coding::") {
+//! "https://api.z.ai/api/coding/paas/v4/"
+//! } else {
+//! "https://api.z.ai/api/paas/v4/"
+//! };
+//!
+//! let final_endpoint = Endpoint::from_static(endpoint_url);
+//! let final_model = ModelIden::new(AdapterKind::Zai, clean_model_name);
+//!
+//! Ok(ServiceTarget { endpoint: final_endpoint, model: final_model })
+//! }
+//! );
+//!
+//! let client = Client::builder().with_service_target_resolver(target_resolver).build();
+//!
+//! // Use regular API
+//! let response = client.exec_chat("glm-4.6", chat_request, None).await?;
+//!
+//! // Use coding plan
+//! let response = client.exec_chat("coding::glm-4.6", chat_request, None).await?;
+//! ```
+//!
+//! See `examples/c07-zai-dual-endpoints.rs` for a complete working example.
// region: --- Modules
diff --git a/src/adapter/adapters/zhipu/adapter_impl.rs b/src/adapter/adapters/zhipu/adapter_impl.rs
deleted file mode 100644
index ae684724..00000000
--- a/src/adapter/adapters/zhipu/adapter_impl.rs
+++ /dev/null
@@ -1,95 +0,0 @@
-use crate::ModelIden;
-use crate::adapter::openai::OpenAIAdapter;
-use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData};
-use crate::chat::{ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse};
-use crate::resolver::{AuthData, Endpoint};
-use crate::webc::WebResponse;
-use crate::{Result, ServiceTarget};
-use reqwest::RequestBuilder;
-
-pub struct ZhipuAdapter;
-
-pub(in crate::adapter) const MODELS: &[&str] = &[
- "glm-4-plus",
- "glm-4-air-250414",
- "glm-4-flashx-250414",
- "glm-4-flash-250414",
- "glm-4-air",
- "glm-4-airx",
- "glm-4-long",
- "glm-4-flash",
- "glm-4v-plus-0111",
- "glm-4v-flash",
- "glm-z1-air",
- "glm-z1-airx",
- "glm-z1-flash",
- "glm-z1-flashx",
- "glm-4.1v-thinking-flash",
- "glm-4.1v-thinking-flashx",
- "glm-4.5",
-];
-
-impl ZhipuAdapter {
- pub const API_KEY_DEFAULT_ENV_NAME: &str = "ZHIPU_API_KEY";
-}
-
-// The Zhipu API is mostly compatible with the OpenAI API.
-impl Adapter for ZhipuAdapter {
- fn default_endpoint() -> Endpoint {
- const BASE_URL: &str = "https://open.bigmodel.cn/api/paas/v4/";
- Endpoint::from_static(BASE_URL)
- }
-
- fn default_auth() -> AuthData {
- AuthData::from_env(Self::API_KEY_DEFAULT_ENV_NAME)
- }
-
- async fn all_model_names(_kind: AdapterKind) -> Result> {
- Ok(MODELS.iter().map(|s| s.to_string()).collect())
- }
-
- fn get_service_url(model: &ModelIden, service_type: ServiceType, endpoint: Endpoint) -> Result {
- OpenAIAdapter::util_get_service_url(model, service_type, endpoint)
- }
-
- fn to_web_request_data(
- target: ServiceTarget,
- service_type: ServiceType,
- chat_req: ChatRequest,
- chat_options: ChatOptionsSet<'_, '_>,
- ) -> Result {
- OpenAIAdapter::util_to_web_request_data(target, service_type, chat_req, chat_options, None)
- }
-
- fn to_chat_response(
- model_iden: ModelIden,
- web_response: WebResponse,
- options_set: ChatOptionsSet<'_, '_>,
- ) -> Result {
- OpenAIAdapter::to_chat_response(model_iden, web_response, options_set)
- }
-
- fn to_chat_stream(
- model_iden: ModelIden,
- reqwest_builder: RequestBuilder,
- options_set: ChatOptionsSet<'_, '_>,
- ) -> Result {
- OpenAIAdapter::to_chat_stream(model_iden, reqwest_builder, options_set)
- }
-
- fn to_embed_request_data(
- service_target: crate::ServiceTarget,
- embed_req: crate::embed::EmbedRequest,
- options_set: crate::embed::EmbedOptionsSet<'_, '_>,
- ) -> Result {
- OpenAIAdapter::to_embed_request_data(service_target, embed_req, options_set)
- }
-
- fn to_embed_response(
- model_iden: crate::ModelIden,
- web_response: crate::webc::WebResponse,
- options_set: crate::embed::EmbedOptionsSet<'_, '_>,
- ) -> Result {
- OpenAIAdapter::to_embed_response(model_iden, web_response, options_set)
- }
-}
diff --git a/src/adapter/adapters/zhipu/mod.rs b/src/adapter/adapters/zhipu/mod.rs
deleted file mode 100644
index bc7f0f9f..00000000
--- a/src/adapter/adapters/zhipu/mod.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-//! Click the globe icon on the top-right corner of the page to switch language.
-//! API Documentation:
-//! Model Names:
-//! Pricing:
-
-// region: --- Modules
-
-mod adapter_impl;
-
-pub use adapter_impl::*;
-
-// endregion: --- Modules
diff --git a/src/adapter/dispatcher.rs b/src/adapter/dispatcher.rs
index b8fd7c17..d9f9194a 100644
--- a/src/adapter/dispatcher.rs
+++ b/src/adapter/dispatcher.rs
@@ -1,20 +1,18 @@
-use super::groq::GroqAdapter;
-use crate::adapter::adapters::together::TogetherAdapter;
use crate::adapter::anthropic::AnthropicAdapter;
use crate::adapter::cerebras::CerebrasAdapter;
use crate::adapter::cohere::CohereAdapter;
use crate::adapter::deepseek::DeepSeekAdapter;
use crate::adapter::fireworks::FireworksAdapter;
use crate::adapter::gemini::GeminiAdapter;
+use crate::adapter::groq::GroqAdapter;
use crate::adapter::nebius::NebiusAdapter;
use crate::adapter::ollama::OllamaAdapter;
use crate::adapter::openai::OpenAIAdapter;
use crate::adapter::openai_resp::OpenAIRespAdapter;
use crate::adapter::openrouter::OpenRouterAdapter;
-
+use crate::adapter::together::TogetherAdapter;
use crate::adapter::xai::XaiAdapter;
-use crate::adapter::zai::ZAiAdapter;
-use crate::adapter::zhipu::ZhipuAdapter;
+use crate::adapter::zai::ZaiAdapter;
use crate::adapter::{Adapter, AdapterKind, ServiceType, WebRequestData};
use crate::chat::{ChatOptionsSet, ChatRequest, ChatResponse, ChatStreamResponse};
use crate::embed::{EmbedOptionsSet, EmbedRequest, EmbedResponse};
@@ -44,11 +42,10 @@ impl AdapterDispatcher {
AdapterKind::Nebius => NebiusAdapter::default_endpoint(),
AdapterKind::Xai => XaiAdapter::default_endpoint(),
AdapterKind::DeepSeek => DeepSeekAdapter::default_endpoint(),
- AdapterKind::Zhipu => ZhipuAdapter::default_endpoint(),
+ AdapterKind::Zai => ZaiAdapter::default_endpoint(),
AdapterKind::Cohere => CohereAdapter::default_endpoint(),
AdapterKind::Ollama => OllamaAdapter::default_endpoint(),
AdapterKind::Cerebras => CerebrasAdapter::default_endpoint(),
- AdapterKind::ZAi => ZAiAdapter::default_endpoint(),
AdapterKind::OpenRouter => Endpoint::from_static("https://openrouter.ai/api/v1/"),
}
}
@@ -65,11 +62,10 @@ impl AdapterDispatcher {
AdapterKind::Nebius => NebiusAdapter::default_auth(),
AdapterKind::Xai => XaiAdapter::default_auth(),
AdapterKind::DeepSeek => DeepSeekAdapter::default_auth(),
- AdapterKind::Zhipu => ZhipuAdapter::default_auth(),
+ AdapterKind::Zai => ZaiAdapter::default_auth(),
AdapterKind::Cohere => CohereAdapter::default_auth(),
AdapterKind::Ollama => OllamaAdapter::default_auth(),
AdapterKind::Cerebras => CerebrasAdapter::default_auth(),
- AdapterKind::ZAi => ZAiAdapter::default_auth(),
AdapterKind::OpenRouter => AuthData::from_env(OpenRouterAdapter::API_KEY_DEFAULT_ENV_NAME),
}
}
@@ -86,11 +82,10 @@ impl AdapterDispatcher {
AdapterKind::Nebius => NebiusAdapter::all_model_names(kind).await,
AdapterKind::Xai => XaiAdapter::all_model_names(kind).await,
AdapterKind::DeepSeek => DeepSeekAdapter::all_model_names(kind).await,
- AdapterKind::Zhipu => ZhipuAdapter::all_model_names(kind).await,
+ AdapterKind::Zai => ZaiAdapter::all_model_names(kind).await,
AdapterKind::Cohere => CohereAdapter::all_model_names(kind).await,
AdapterKind::Ollama => OllamaAdapter::all_model_names(kind).await,
AdapterKind::Cerebras => CerebrasAdapter::all_model_names(kind).await,
- AdapterKind::ZAi => ZAiAdapter::all_model_names(kind).await,
AdapterKind::OpenRouter => OpenRouterAdapter::all_model_names(kind).await,
}
}
@@ -107,11 +102,10 @@ impl AdapterDispatcher {
AdapterKind::Nebius => NebiusAdapter::get_service_url(model, service_type, endpoint),
AdapterKind::Xai => XaiAdapter::get_service_url(model, service_type, endpoint),
AdapterKind::DeepSeek => DeepSeekAdapter::get_service_url(model, service_type, endpoint),
- AdapterKind::Zhipu => ZhipuAdapter::get_service_url(model, service_type, endpoint),
+ AdapterKind::Zai => ZaiAdapter::get_service_url(model, service_type, endpoint),
AdapterKind::Cohere => CohereAdapter::get_service_url(model, service_type, endpoint),
AdapterKind::Ollama => OllamaAdapter::get_service_url(model, service_type, endpoint),
AdapterKind::Cerebras => CerebrasAdapter::get_service_url(model, service_type, endpoint),
- AdapterKind::ZAi => ZAiAdapter::get_service_url(model, service_type, endpoint),
AdapterKind::OpenRouter => OpenRouterAdapter::get_service_url(model, service_type, endpoint),
}
}
@@ -140,11 +134,10 @@ impl AdapterDispatcher {
AdapterKind::Nebius => NebiusAdapter::to_web_request_data(target, service_type, chat_req, options_set),
AdapterKind::Xai => XaiAdapter::to_web_request_data(target, service_type, chat_req, options_set),
AdapterKind::DeepSeek => DeepSeekAdapter::to_web_request_data(target, service_type, chat_req, options_set),
- AdapterKind::Zhipu => ZhipuAdapter::to_web_request_data(target, service_type, chat_req, options_set),
+ AdapterKind::Zai => ZaiAdapter::to_web_request_data(target, service_type, chat_req, options_set),
AdapterKind::Cohere => CohereAdapter::to_web_request_data(target, service_type, chat_req, options_set),
AdapterKind::Ollama => OllamaAdapter::to_web_request_data(target, service_type, chat_req, options_set),
AdapterKind::Cerebras => CerebrasAdapter::to_web_request_data(target, service_type, chat_req, options_set),
- AdapterKind::ZAi => ZAiAdapter::to_web_request_data(target, service_type, chat_req, options_set),
AdapterKind::OpenRouter => {
OpenRouterAdapter::to_web_request_data(target, service_type, chat_req, options_set)
}
@@ -167,11 +160,10 @@ impl AdapterDispatcher {
AdapterKind::Nebius => NebiusAdapter::to_chat_response(model_iden, web_response, options_set),
AdapterKind::Xai => XaiAdapter::to_chat_response(model_iden, web_response, options_set),
AdapterKind::DeepSeek => DeepSeekAdapter::to_chat_response(model_iden, web_response, options_set),
- AdapterKind::Zhipu => ZhipuAdapter::to_chat_response(model_iden, web_response, options_set),
+ AdapterKind::Zai => ZaiAdapter::to_chat_response(model_iden, web_response, options_set),
AdapterKind::Cohere => CohereAdapter::to_chat_response(model_iden, web_response, options_set),
AdapterKind::Ollama => OllamaAdapter::to_chat_response(model_iden, web_response, options_set),
AdapterKind::Cerebras => CerebrasAdapter::to_chat_response(model_iden, web_response, options_set),
- AdapterKind::ZAi => ZAiAdapter::to_chat_response(model_iden, web_response, options_set),
AdapterKind::OpenRouter => OpenRouterAdapter::to_chat_response(model_iden, web_response, options_set),
}
}
@@ -195,11 +187,10 @@ impl AdapterDispatcher {
AdapterKind::Nebius => NebiusAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
AdapterKind::Xai => XaiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
AdapterKind::DeepSeek => DeepSeekAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
- AdapterKind::Zhipu => ZhipuAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
+ AdapterKind::Zai => ZaiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
AdapterKind::Cohere => CohereAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
AdapterKind::Ollama => OllamaAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
AdapterKind::Cerebras => CerebrasAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
- AdapterKind::ZAi => ZAiAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
AdapterKind::OpenRouter => OpenRouterAdapter::to_chat_stream(model_iden, reqwest_builder, options_set),
}
}
@@ -224,11 +215,10 @@ impl AdapterDispatcher {
AdapterKind::Nebius => NebiusAdapter::to_embed_request_data(target, embed_req, options_set),
AdapterKind::Xai => XaiAdapter::to_embed_request_data(target, embed_req, options_set),
AdapterKind::DeepSeek => DeepSeekAdapter::to_embed_request_data(target, embed_req, options_set),
- AdapterKind::Zhipu => ZhipuAdapter::to_embed_request_data(target, embed_req, options_set),
+ AdapterKind::Zai => ZaiAdapter::to_embed_request_data(target, embed_req, options_set),
AdapterKind::Cohere => CohereAdapter::to_embed_request_data(target, embed_req, options_set),
AdapterKind::Ollama => OllamaAdapter::to_embed_request_data(target, embed_req, options_set),
AdapterKind::Cerebras => CerebrasAdapter::to_embed_request_data(target, embed_req, options_set),
- AdapterKind::ZAi => ZAiAdapter::to_embed_request_data(target, embed_req, options_set),
AdapterKind::OpenRouter => OpenRouterAdapter::to_embed_request_data(target, embed_req, options_set),
}
}
@@ -252,11 +242,10 @@ impl AdapterDispatcher {
AdapterKind::Nebius => NebiusAdapter::to_embed_response(model_iden, web_response, options_set),
AdapterKind::Xai => XaiAdapter::to_embed_response(model_iden, web_response, options_set),
AdapterKind::DeepSeek => DeepSeekAdapter::to_embed_response(model_iden, web_response, options_set),
- AdapterKind::Zhipu => ZhipuAdapter::to_embed_response(model_iden, web_response, options_set),
+ AdapterKind::Zai => ZaiAdapter::to_embed_response(model_iden, web_response, options_set),
AdapterKind::Cohere => CohereAdapter::to_embed_response(model_iden, web_response, options_set),
AdapterKind::Ollama => OllamaAdapter::to_embed_response(model_iden, web_response, options_set),
AdapterKind::Cerebras => CerebrasAdapter::to_embed_response(model_iden, web_response, options_set),
- AdapterKind::ZAi => ZAiAdapter::to_embed_response(model_iden, web_response, options_set),
AdapterKind::OpenRouter => OpenRouterAdapter::to_embed_response(model_iden, web_response, options_set),
}
}
diff --git a/src/common/model_name.rs b/src/common/model_name.rs
index 9517d8bc..b8821d28 100644
--- a/src/common/model_name.rs
+++ b/src/common/model_name.rs
@@ -69,6 +69,48 @@ impl Deref for ModelName {
// endregion: --- Froms
+// region: --- EQ
+
+// PartialEq implementations for various string types
+impl PartialEq for ModelName {
+ fn eq(&self, other: &str) -> bool {
+ &*self.0 == other
+ }
+}
+
+impl PartialEq<&str> for ModelName {
+ fn eq(&self, other: &&str) -> bool {
+ &*self.0 == *other
+ }
+}
+
+impl PartialEq for ModelName {
+ fn eq(&self, other: &String) -> bool {
+ &*self.0 == other
+ }
+}
+
+// Symmetric implementations (allow "string" == model_name)
+impl PartialEq for str {
+ fn eq(&self, other: &ModelName) -> bool {
+ self == &*other.0
+ }
+}
+
+impl PartialEq for &str {
+ fn eq(&self, other: &ModelName) -> bool {
+ *self == &*other.0
+ }
+}
+
+impl PartialEq for String {
+ fn eq(&self, other: &ModelName) -> bool {
+ self == &*other.0
+ }
+}
+
+// endregion: --- EQ
+
// TODO: replace with derive_more Display
impl std::fmt::Display for ModelName {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
diff --git a/tests/support/common_tests.rs b/tests/support/common_tests.rs
index 10efdc20..e338c520 100644
--- a/tests/support/common_tests.rs
+++ b/tests/support/common_tests.rs
@@ -655,6 +655,37 @@ pub async fn common_test_chat_stream_capture_all_ok(model: &str, checks: Option<
Ok(())
}
+/// Just making the tool request, and checking the tool call response
+/// `complete_check` if for LLMs that are better at giving back the unit and weather.
+pub async fn common_test_chat_stream_tool_capture_ok(model: &str) -> TestResult<()> {
+ // -- Setup & Fixtures
+ let client = Client::default();
+ let chat_req = seed_chat_req_tool_simple();
+ let mut chat_options = ChatOptions::default().with_capture_tool_calls(true);
+
+ // -- Exec
+ let chat_res = client.exec_chat_stream(model, chat_req, Some(&chat_options)).await?;
+
+ // Extract Stream content
+ let StreamExtract {
+ stream_end,
+ content,
+ reasoning_content,
+ } = extract_stream_end(chat_res.stream).await?;
+
+ // -- Check
+ let mut tool_calls = stream_end.captured_tool_calls().ok_or("Should have captured tools")?;
+ if tool_calls.is_empty() {
+ return Err("Should have tool calls in chat_res".into());
+ }
+ let tool_call = tool_calls.pop().ok_or("Should have at least one tool call")?;
+ assert_eq!(tool_call.fn_arguments.x_get_as::<&str>("city")?, "Paris");
+ assert_eq!(tool_call.fn_arguments.x_get_as::<&str>("country")?, "France");
+ assert_eq!(tool_call.fn_arguments.x_get_as::<&str>("unit")?, "C");
+
+ Ok(())
+}
+
// endregion: --- Chat Stream Tests
// region: --- Binaries
diff --git a/tests/tests_p_openai.rs b/tests/tests_p_openai.rs
index 8c3571e4..74fc6f56 100644
--- a/tests/tests_p_openai.rs
+++ b/tests/tests_p_openai.rs
@@ -97,6 +97,12 @@ async fn test_chat_stream_capture_all_ok() -> TestResult<()> {
common_tests::common_test_chat_stream_capture_all_ok(MODEL, Some(Check::REASONING_USAGE)).await
}
+#[tokio::test]
+async fn test_chat_stream_tool_capture_ok() -> TestResult<()> {
+ // NOTE: For now the OpenAI Adapter do not capture the thinking as not available in chat completions
+ common_tests::common_test_chat_stream_tool_capture_ok(MODEL).await
+}
+
// endregion: --- Chat Stream Tests
// region: --- Binary Tests
diff --git a/tests/tests_p_zhipu.rs b/tests/tests_p_zai.rs
similarity index 95%
rename from tests/tests_p_zhipu.rs
rename to tests/tests_p_zai.rs
index d32f53df..70bc6952 100644
--- a/tests/tests_p_zhipu.rs
+++ b/tests/tests_p_zai.rs
@@ -5,7 +5,7 @@ use genai::adapter::AdapterKind;
use genai::resolver::AuthData;
const MODEL: &str = "glm-4-plus";
-const MODEL_NS: &str = "zhipu::glm-4-plus";
+const MODEL_NS: &str = "zai::glm-4-plus";
const MODEL_V: &str = "glm-4v-flash"; // Visual language model does not support function calling
// region: --- Chat
@@ -106,7 +106,7 @@ async fn test_tool_full_flow_ok() -> TestResult<()> {
#[tokio::test]
async fn test_resolver_auth_ok() -> TestResult<()> {
- common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZHIPU_API_KEY")).await
+ common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZAI_API_KEY")).await
}
// endregion: --- Resolver Tests
@@ -115,7 +115,7 @@ async fn test_resolver_auth_ok() -> TestResult<()> {
#[tokio::test]
async fn test_list_models() -> TestResult<()> {
- common_tests::common_test_list_models(AdapterKind::Zhipu, "glm-4-plus").await
+ common_tests::common_test_list_models(AdapterKind::Zai, "glm-4-plus").await
}
// endregion: --- List
diff --git a/tests/tests_p_zhipu_reasoning.rs b/tests/tests_p_zai_reasoning.rs
similarity index 96%
rename from tests/tests_p_zhipu_reasoning.rs
rename to tests/tests_p_zai_reasoning.rs
index 9031a409..c405e759 100644
--- a/tests/tests_p_zhipu_reasoning.rs
+++ b/tests/tests_p_zai_reasoning.rs
@@ -66,7 +66,7 @@ async fn test_chat_stream_capture_content_ok() -> TestResult<()> {
#[tokio::test]
async fn test_resolver_auth_ok() -> TestResult<()> {
- common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZHIPU_API_KEY")).await
+ common_tests::common_test_resolver_auth_ok(MODEL, AuthData::from_env("ZAI_API_KEY")).await
}
// endregion: --- Resolver Tests
@@ -75,7 +75,7 @@ async fn test_resolver_auth_ok() -> TestResult<()> {
#[tokio::test]
async fn test_list_models() -> TestResult<()> {
- common_tests::common_test_list_models(AdapterKind::Zhipu, "glm-z1-flash").await
+ common_tests::common_test_list_models(AdapterKind::Zai, "glm-z1-flash").await
}
// endregion: --- List