From a3dfe703507494e6082f142276fb91a743375250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 21 Nov 2025 12:44:51 +0700 Subject: [PATCH 001/139] Remove the unused auto-refresh functionality and related imports. They are no longer needed since the underlying library issue has been resolved. --- app/services/client.py | 46 +----------------------------------------- 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 825564b..1554bdd 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -8,11 +8,8 @@ from gemini_webapi import GeminiClient, ModelOutput from gemini_webapi.client import ChatSession from gemini_webapi.constants import Model -from gemini_webapi.exceptions import AuthError, ModelInvalid +from gemini_webapi.exceptions import ModelInvalid from gemini_webapi.types import Gem -from gemini_webapi.utils import rotate_tasks -from gemini_webapi.utils.rotate_1psidts import rotate_1psidts -from loguru import logger from ..models import Message from ..utils import g_config @@ -76,47 +73,6 @@ async def init( verbose=verbose, ) - async def start_auto_refresh(self) -> None: - """ - Refresh the __Secure-1PSIDTS cookie periodically and keep the HTTP client in sync. - """ - while True: - new_1psidts: str | None = None - try: - new_1psidts = await rotate_1psidts(self.cookies, self.proxy) - except AuthError: - if task := rotate_tasks.get(self.cookies.get("__Secure-1PSID", "")): - task.cancel() - logger.warning( - "Failed to refresh Gemini cookies (AuthError). Auto refresh task canceled." - ) - return - except Exception as exc: - logger.warning(f"Unexpected error while refreshing Gemini cookies: {exc}") - - if new_1psidts: - self.cookies["__Secure-1PSIDTS"] = new_1psidts - self._sync_httpx_cookie("__Secure-1PSIDTS", new_1psidts) - logger.debug("Gemini cookies refreshed. New __Secure-1PSIDTS applied.") - await asyncio.sleep(self.refresh_interval) - - def _sync_httpx_cookie(self, name: str, value: str) -> None: - """ - Ensure the underlying httpx client uses the refreshed cookie value. - """ - if not self.client: - return - - jar = self.client.cookies.jar - matched = False - for cookie in jar: - if cookie.name == name: - cookie.value = value - matched = True - if not matched: - # Fall back to setting the cookie with default scope if we did not find an existing entry. - self.client.cookies.set(name, value) - async def generate_content( self, prompt: str, From 3a692ab014bf6d0cb98f38d499dc2760eb92c096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 22 Nov 2025 14:54:53 +0700 Subject: [PATCH 002/139] Enhance error handling in client initialization and message sending --- app/server/chat.py | 12 ++++++++-- app/services/client.py | 52 +++++++++++------------------------------- app/services/pool.py | 26 ++++++++++++++------- 3 files changed, 41 insertions(+), 49 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 66fa6ce..e8752cf 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1129,7 +1129,11 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s """ if len(text) <= MAX_CHARS_PER_REQUEST: # No need to split - a single request is fine. - return await session.send_message(text, files=files) + try: + return await session.send_message(text, files=files) + except Exception as e: + logger.exception(f"Error sending message to Gemini: {e}") + raise hint_len = len(CONTINUATION_HINT) chunk_size = MAX_CHARS_PER_REQUEST - hint_len @@ -1155,7 +1159,11 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s raise # The last chunk carries the files (if any) and we return its response. - return await session.send_message(chunks[-1], files=files) + try: + return await session.send_message(chunks[-1], files=files) + except Exception as e: + logger.exception(f"Error sending final chunk to Gemini: {e}") + raise def _iter_stream_segments(model_output: str, chunk_size: int = 64): diff --git a/app/services/client.py b/app/services/client.py index 1554bdd..26be26f 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -1,4 +1,3 @@ -import asyncio import html import json import re @@ -6,10 +5,7 @@ from typing import Any, cast from gemini_webapi import GeminiClient, ModelOutput -from gemini_webapi.client import ChatSession -from gemini_webapi.constants import Model -from gemini_webapi.exceptions import ModelInvalid -from gemini_webapi.types import Gem +from loguru import logger from ..models import Message from ..utils import g_config @@ -64,40 +60,18 @@ async def init( refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval)) verbose = cast(bool, _resolve(verbose, config.verbose)) - await super().init( - timeout=timeout, - auto_close=auto_close, - close_delay=close_delay, - auto_refresh=auto_refresh, - refresh_interval=refresh_interval, - verbose=verbose, - ) - - async def generate_content( - self, - prompt: str, - files: list[str | Path] | None = None, - model: Model | str = Model.UNSPECIFIED, - gem: Gem | str | None = None, - chat: ChatSession | None = None, - **kwargs, - ) -> ModelOutput: - cnt = 2 # Try 2 times before giving up - last_exception: ModelInvalid | None = None - while cnt: - cnt -= 1 - try: - return await super().generate_content(prompt, files, model, gem, chat, **kwargs) - except ModelInvalid as e: - # This is not always caused by model selection. Instead, it can be solved by retrying. - # So we catch it and retry as a workaround. - await asyncio.sleep(1) - last_exception = e - - # If retrying failed, re-raise ModelInvalid - if last_exception is not None: - raise last_exception - raise RuntimeError("generate_content failed without receiving a ModelInvalid error.") + try: + await super().init( + timeout=timeout, + auto_close=auto_close, + close_delay=close_delay, + auto_refresh=auto_refresh, + refresh_interval=refresh_interval, + verbose=verbose, + ) + except Exception: + logger.exception(f"Failed to initialize GeminiClient {self.id}") + raise @staticmethod async def process_message( diff --git a/app/services/pool.py b/app/services/pool.py index abf1fa0..24a21dc 100644 --- a/app/services/pool.py +++ b/app/services/pool.py @@ -35,14 +35,24 @@ def __init__(self) -> None: async def init(self) -> None: """Initialize all clients in the pool.""" + success_count = 0 for client in self._clients: if not client.running: - await client.init( - timeout=g_config.gemini.timeout, - auto_refresh=g_config.gemini.auto_refresh, - verbose=g_config.gemini.verbose, - refresh_interval=g_config.gemini.refresh_interval, - ) + try: + await client.init( + timeout=g_config.gemini.timeout, + auto_refresh=g_config.gemini.auto_refresh, + verbose=g_config.gemini.verbose, + refresh_interval=g_config.gemini.refresh_interval, + ) + except Exception: + logger.exception(f"Failed to initialize client {client.id}") + + if client.running: + success_count += 1 + + if success_count == 0: + raise RuntimeError("Failed to initialize any Gemini clients") async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper: """Return a healthy client by id or using round-robin.""" @@ -89,8 +99,8 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: ) logger.info(f"Restarted Gemini client {client.id} after it stopped.") return True - except Exception as exc: - logger.warning(f"Failed to restart Gemini client {client.id}: {exc}") + except Exception: + logger.exception(f"Failed to restart Gemini client {client.id}") return False @property From d57e3676fed9fa03e1f51a5aed80d4b7f88e6a88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 22 Nov 2025 17:49:41 +0700 Subject: [PATCH 003/139] Refactor link handling to extract file paths and simplify Google search links --- app/services/client.py | 46 +++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 26be26f..f5a39dd 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -24,9 +24,20 @@ ) HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);") + MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=\s*[-\\`*_{}\[\]()#+.!<>])") + CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`]*`)", re.DOTALL) +FILE_PATH_PATTERN = re.compile( + r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|Gemfile|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", + re.IGNORECASE, +) + +GOOGLE_SEARCH_LINK_PATTERN = re.compile( + r"(?:`\s*)?`?\[`?([^`\]]+)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(? str: text = _unescape_html(text) text = _unescape_markdown(text) - def simplify_link_target(text_content: str) -> str: - match_colon_num = re.match(r"([^:]+:\d+)", text_content) - if match_colon_num: - return match_colon_num.group(1) - return text_content + def extract_file_path_from_display_text(text_content: str) -> str | None: + match = re.match(FILE_PATH_PATTERN, text_content) + if match: + return match.group(1) + return None def replacer(match: re.Match) -> str: - outer_open_paren = match.group(1) - display_text = match.group(2) + display_text = str(match.group(1)).strip() + google_search_prefix = match.group(2) + query_part = match.group(3) - new_target_url = simplify_link_target(display_text) - new_link_segment = f"[`{display_text}`]({new_target_url})" + file_path = extract_file_path_from_display_text(display_text) - if outer_open_paren: - return f"{outer_open_paren}{new_link_segment})" + if file_path: + # If it's a file path, transform it into a self-referencing Markdown link + return f"[`{file_path}`]({file_path})" else: - return new_link_segment - - # Replace Google search links with simplified Markdown links - pattern = r"(\()?\[`([^`]+?)`\]\((https://www.google.com/search\?q=)(.*?)(? Date: Sat, 22 Nov 2025 18:29:41 +0700 Subject: [PATCH 004/139] Fix regex pattern for Google search link matching --- app/services/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/client.py b/app/services/client.py index f5a39dd..ffc559e 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -35,7 +35,7 @@ ) GOOGLE_SEARCH_LINK_PATTERN = re.compile( - r"(?:`\s*)?`?\[`?([^`\]]+)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(? Date: Sat, 22 Nov 2025 21:44:09 +0700 Subject: [PATCH 005/139] Fix regex patterns for Markdown escaping, code fence and Google search link matching --- app/services/client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index ffc559e..0088c74 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -25,17 +25,17 @@ HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);") -MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=\s*[-\\`*_{}\[\]()#+.!<>])") +MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])") -CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`]*`)", re.DOTALL) +CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL) FILE_PATH_PATTERN = re.compile( - r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|Gemfile|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", + r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, ) GOOGLE_SEARCH_LINK_PATTERN = re.compile( - r"(?:`\s*)?`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(? Date: Sat, 22 Nov 2025 22:52:27 +0700 Subject: [PATCH 006/139] Increase timeout value in configuration files from 60 to 120 seconds to better handle heavy tasks --- app/server/chat.py | 2 -- app/services/client.py | 8 -------- app/utils/config.py | 2 +- config/config.yaml | 6 +++--- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index e8752cf..b4e88da 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -48,9 +48,7 @@ # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) - CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')" - TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)```", re.DOTALL | re.IGNORECASE) TOOL_CALL_RE = re.compile( r"(.*?)", re.DOTALL | re.IGNORECASE diff --git a/app/services/client.py b/app/services/client.py index 0088c74..166eb70 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -16,29 +16,21 @@ '```xml\n{"arg": "value"}\n```\n' "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n" ) - CODE_BLOCK_HINT = ( "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced " "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n" "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n" ) - HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);") - MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])") - CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL) - FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, ) - GOOGLE_SEARCH_LINK_PATTERN = re.compile( r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?" ) - - _UNSET = object() diff --git a/app/utils/config.py b/app/utils/config.py index 48f0792..796ca75 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -56,7 +56,7 @@ class GeminiConfig(BaseModel): clients: list[GeminiClientSettings] = Field( ..., description="List of Gemini client credential pairs" ) - timeout: int = Field(default=60, ge=1, description="Init timeout") + timeout: int = Field(default=120, ge=1, description="Init timeout") auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies") refresh_interval: int = Field( default=540, ge=1, description="Interval in seconds to refresh Gemini cookies" diff --git a/config/config.yaml b/config/config.yaml index b0f8fbf..89c88b7 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -21,8 +21,8 @@ gemini: - id: "example-id-1" # Arbitrary client ID secure_1psid: "YOUR_SECURE_1PSID_HERE" secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE" - proxy: null # Optional proxy URL (null/empty means direct connection) - timeout: 60 # Init timeout in seconds + proxy: null # Optional proxy URL (null/empty means direct connection) + timeout: 120 # Init timeout in seconds auto_refresh: true # Auto-refresh session cookies refresh_interval: 540 # Refresh interval in seconds verbose: false # Enable verbose logging for Gemini requests @@ -34,4 +34,4 @@ storage: retention_days: 14 # Number of days to retain conversations before cleanup logging: - level: "INFO" # Log level: DEBUG, INFO, WARNING, ERROR + level: "INFO" # Log level: DEBUG, INFO, WARNING, ERROR From f00ebfcbd0424c7ab06d680f308349a04aff3be0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 2 Dec 2025 13:15:27 +0700 Subject: [PATCH 007/139] Fix Image generation --- .github/workflows/docker.yaml | 10 ++--- .github/workflows/track.yml | 12 +++--- app/models/models.py | 14 +++---- app/server/chat.py | 77 +++++++++++++++++++++-------------- app/services/client.py | 4 +- app/utils/helper.py | 10 ++++- 6 files changed, 75 insertions(+), 52 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 4527f3d..eef2a41 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -5,11 +5,11 @@ on: branches: - main tags: - - 'v*' + - "v*" paths-ignore: - - '**/*.md' - - '.github/workflows/ruff.yaml' - - '.github/workflows/track.yml' + - "**/*.md" + - ".github/workflows/ruff.yaml" + - ".github/workflows/track.yml" env: REGISTRY: ghcr.io @@ -57,4 +57,4 @@ jobs: labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64 cache-from: type=gha - cache-to: type=gha,mode=max \ No newline at end of file + cache-to: type=gha,mode=max diff --git a/.github/workflows/track.yml b/.github/workflows/track.yml index 63afbec..838dcf8 100644 --- a/.github/workflows/track.yml +++ b/.github/workflows/track.yml @@ -2,7 +2,7 @@ name: Update gemini-webapi on: schedule: - - cron: '0 0 * * *' # Runs every day at midnight + - cron: "0 0 * * *" # Runs every day at midnight workflow_dispatch: jobs: @@ -24,7 +24,7 @@ jobs: run: | # Install dependencies first to enable uv pip show uv sync - + # Get current version of gemini-webapi before upgrade OLD_VERSION=$(uv pip show gemini-webapi 2>/dev/null | grep ^Version: | awk '{print $2}') if [ -z "$OLD_VERSION" ]; then @@ -32,10 +32,10 @@ jobs: exit 1 fi echo "Current gemini-webapi version: $OLD_VERSION" - + # Update the package using uv, which handles pyproject.toml and uv.lock uv add --upgrade gemini-webapi - + # Get new version of gemini-webapi after upgrade NEW_VERSION=$(uv pip show gemini-webapi | grep ^Version: | awk '{print $2}') if [ -z "$NEW_VERSION" ]; then @@ -43,7 +43,7 @@ jobs: exit 1 fi echo "New gemini-webapi version: $NEW_VERSION" - + # Only proceed if gemini-webapi version has changed if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then echo "gemini-webapi has been updated from $OLD_VERSION to $NEW_VERSION" @@ -63,7 +63,7 @@ jobs: title: ":arrow_up: update gemini-webapi to ${{ steps.update.outputs.version }}" body: | Update `gemini-webapi` to version `${{ steps.update.outputs.version }}`. - + Auto-generated by GitHub Actions using `uv`. branch: update-gemini-webapi base: main diff --git a/app/models/models.py b/app/models/models.py index 3991f12..74d8cd5 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -154,11 +154,13 @@ class ConversationInStore(BaseModel): class ResponseInputContent(BaseModel): """Content item for Responses API input.""" - type: Literal["input_text", "input_image"] + type: Literal["input_text", "input_image", "input_file"] text: Optional[str] = None image_url: Optional[str] = None - image_base64: Optional[str] = None - mime_type: Optional[str] = None + detail: Optional[Literal["auto", "low", "high"]] = None + file_url: Optional[str] = None + file_data: Optional[str] = None + filename: Optional[str] = None class ResponseInputItem(BaseModel): @@ -212,12 +214,8 @@ class ResponseUsage(BaseModel): class ResponseOutputContent(BaseModel): """Content item for Responses API output.""" - type: Literal["output_text", "output_image"] + type: Literal["output_text"] text: Optional[str] = None - image_base64: Optional[str] = None - mime_type: Optional[str] = None - width: Optional[int] = None - height: Optional[int] = None class ResponseOutputMessage(BaseModel): diff --git a/app/server/chat.py b/app/server/chat.py index b4e88da..76dc632 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -381,14 +381,6 @@ def _strip_tagged_blocks(text: str) -> str: return "".join(result) -def _ensure_data_url(part: ResponseInputContent) -> str | None: - image_url = part.image_url - if not image_url and part.image_base64: - mime_type = part.mime_type or "image/png" - image_url = f"data:{mime_type};base64,{part.image_base64}" - return image_url - - def _response_items_to_messages( items: str | list[ResponseInputItem], ) -> tuple[list[Message], str | list[ResponseInputItem]]: @@ -422,14 +414,34 @@ def _response_items_to_messages( if text_value: converted.append(ContentItem(type="text", text=text_value)) elif part.type == "input_image": - image_url = _ensure_data_url(part) + image_url = part.image_url if image_url: normalized_contents.append( - ResponseInputContent(type="input_image", image_url=image_url) + ResponseInputContent( + type="input_image", + image_url=image_url, + detail=part.detail if part.detail else "auto", + ) ) converted.append( - ContentItem(type="image_url", image_url={"url": image_url}) + ContentItem( + type="image_url", + image_url={ + "url": image_url, + "detail": part.detail if part.detail else "auto", + }, + ) ) + elif part.type == "input_file": + if part.file_url or part.file_data: + normalized_contents.append(part) + file_info = {} + if part.file_data: + file_info["file_data"] = part.file_data + file_info["filename"] = part.filename + if part.file_url: + file_info["url"] = part.file_url + converted.append(ContentItem(type="file", file=file_info)) messages.append(Message(role=role, content=converted or None)) normalized_input.append( @@ -472,11 +484,26 @@ def _instructions_to_messages( if text_value: converted.append(ContentItem(type="text", text=text_value)) elif part.type == "input_image": - image_url = _ensure_data_url(part) + image_url = part.image_url if image_url: converted.append( - ContentItem(type="image_url", image_url={"url": image_url}) + ContentItem( + type="image_url", + image_url={ + "url": image_url, + "detail": part.detail if part.detail else "auto", + }, + ) ) + elif part.type == "input_file": + file_info = {} + if part.file_data: + file_info["file_data"] = part.file_data + file_info["filename"] = part.filename + if part.file_url: + file_info["url"] = part.file_url + if file_info: + converted.append(ContentItem(type="file", file=file_info)) instruction_messages.append(Message(role=role, content=converted or None)) return instruction_messages @@ -799,13 +826,13 @@ async def create_response( session, client, remaining_messages = await _find_reusable_session(db, pool, model, messages) async def _build_payload( - payload_messages: list[Message], reuse_session: bool + _payload_messages: list[Message], _reuse_session: bool ) -> tuple[str, list[Path | str]]: - if reuse_session and len(payload_messages) == 1: + if _reuse_session and len(_payload_messages) == 1: return await GeminiClientWrapper.process_message( - payload_messages[0], tmp_dir, tagged=False + _payload_messages[0], tmp_dir, tagged=False ) - return await GeminiClientWrapper.process_conversation(payload_messages, tmp_dir) + return await GeminiClientWrapper.process_conversation(_payload_messages, tmp_dir) reuse_session = session is not None if reuse_session: @@ -821,7 +848,7 @@ async def _build_payload( detail="No new messages to send for the existing session.", ) payload_messages = messages_to_send - model_input, files = await _build_payload(payload_messages, reuse_session=True) + model_input, files = await _build_payload(payload_messages, _reuse_session=True) logger.debug( f"Reused session {session.metadata} - sending {len(payload_messages)} prepared messages." ) @@ -830,7 +857,7 @@ async def _build_payload( client = await pool.acquire() session = client.start_chat(model=model) payload_messages = messages - model_input, files = await _build_payload(payload_messages, reuse_session=False) + model_input, files = await _build_payload(payload_messages, _reuse_session=False) except ValueError as e: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) except RuntimeError as e: @@ -935,7 +962,6 @@ async def _build_payload( detail = f"{detail} Assistant response: {summary}" raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail) - image_contents: list[ResponseOutputContent] = [] image_call_items: list[ResponseImageGenerationCall] = [] for image in images: try: @@ -943,16 +969,6 @@ async def _build_payload( except Exception as exc: logger.warning(f"Failed to download generated image: {exc}") continue - mime_type = "image/png" if isinstance(image, GeneratedImage) else "image/jpeg" - image_contents.append( - ResponseOutputContent( - type="output_image", - image_base64=image_base64, - mime_type=mime_type, - width=width, - height=height, - ) - ) image_call_items.append( ResponseImageGenerationCall( id=f"img_{uuid.uuid4().hex}", @@ -977,7 +993,6 @@ async def _build_payload( response_contents: list[ResponseOutputContent] = [] if assistant_text: response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text)) - response_contents.extend(image_contents) if not response_contents: response_contents.append(ResponseOutputContent(type="output_text", text="")) diff --git a/app/services/client.py b/app/services/client.py index 166eb70..0207114 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -113,8 +113,10 @@ async def process_message( if file_data := item.file.get("file_data", None): filename = item.file.get("filename", "") files.append(await save_file_to_tempfile(file_data, filename, tempdir)) + elif url := item.file.get("url", None): + files.append(await save_url_to_tempfile(url, tempdir)) else: - raise ValueError("File must contain 'file_data' key") + raise ValueError("File must contain 'file_data' or 'url' key") elif message.content is not None: raise ValueError("Unsupported message content type.") diff --git a/app/utils/helper.py b/app/utils/helper.py index 48fc99d..3bff469 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -1,4 +1,5 @@ import base64 +import mimetypes import tempfile from pathlib import Path @@ -40,9 +41,16 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None): suffix: str | None = None if url.startswith("data:image/"): # Base64 encoded image + metadata_part = url.split(",")[0] + mime_type = metadata_part.split(":")[1].split(";")[0] + base64_data = url.split(",")[1] data = base64.b64decode(base64_data) - suffix = ".png" + + # Guess extension from mime type, default to the subtype if not found + suffix = mimetypes.guess_extension(mime_type) + if not suffix: + suffix = f".{mime_type.split('/')[1]}" else: # http files async with httpx.AsyncClient() as client: From d911c33e81e83211ed53d77b300c4c203df7b53c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 2 Dec 2025 15:50:45 +0700 Subject: [PATCH 008/139] Refactor tool handling to support standard and image generation tools separately --- app/models/models.py | 7 ++++--- app/server/chat.py | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 74d8cd5..52dd414 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -174,7 +174,8 @@ class ResponseInputItem(BaseModel): class ResponseToolChoice(BaseModel): """Tool choice enforcing a specific tool in Responses API.""" - type: Literal["image_generation"] + type: Literal["function", "image_generation"] + function: Optional[ToolChoiceFunctionDetail] = None class ResponseImageTool(BaseModel): @@ -195,8 +196,8 @@ class ResponseCreateRequest(BaseModel): top_p: Optional[float] = 1.0 max_output_tokens: Optional[int] = None stream: Optional[bool] = False - tool_choice: Optional[ResponseToolChoice] = None - tools: Optional[List[ResponseImageTool]] = None + tool_choice: Optional[Union[str, ResponseToolChoice]] = None + tools: Optional[List[Union[Tool, ResponseImageTool]]] = None store: Optional[bool] = None user: Optional[str] = None response_format: Optional[Dict[str, Any]] = None diff --git a/app/server/chat.py b/app/server/chat.py index 76dc632..8277d0c 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -795,7 +795,28 @@ async def create_response( f"Structured response requested for /v1/responses (schema={structured_requirement.schema_name})." ) - image_instruction = _build_image_generation_instruction(request.tools, request.tool_choice) + # Separate standard tools from image generation tools + standard_tools: list[Tool] = [] + image_tools: list[ResponseImageTool] = [] + + if request.tools: + for t in request.tools: + if isinstance(t, Tool): + standard_tools.append(t) + elif isinstance(t, ResponseImageTool): + image_tools.append(t) + # Handle dicts if Pydantic didn't convert them fully (fallback) + elif isinstance(t, dict): + t_type = t.get("type") + if t_type == "function": + standard_tools.append(Tool.model_validate(t)) + elif t_type == "image_generation": + image_tools.append(ResponseImageTool.model_validate(t)) + + image_instruction = _build_image_generation_instruction( + image_tools, + request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None, + ) if image_instruction: extra_instructions.append(image_instruction) logger.debug("Image generation support enabled for /v1/responses request.") @@ -808,10 +829,19 @@ async def create_response( f"Injected {len(preface_messages)} instruction messages before sending to Gemini." ) + # Pass standard tools to the prompt builder + # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction) + model_tool_choice = None + if isinstance(request.tool_choice, str): + model_tool_choice = request.tool_choice + elif isinstance(request.tool_choice, ToolChoiceFunction): + model_tool_choice = request.tool_choice + # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice. + messages = _prepare_messages_for_model( conversation_messages, - tools=None, - tool_choice=None, + tools=standard_tools or None, + tool_choice=model_tool_choice, extra_instructions=extra_instructions or None, ) From a8241ad78831b675d0321bbe5271c1bf10a6ce2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 2 Dec 2025 17:17:27 +0700 Subject: [PATCH 009/139] Fix: use "ascii" decoding for base64-encoded image data consistency --- app/server/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index 8277d0c..67790ab 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1524,4 +1524,4 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non data = Path(saved_path).read_bytes() width, height = _extract_image_dimensions(data) - return base64.b64encode(data).decode("utf-8"), width, height + return base64.b64encode(data).decode("ascii"), width, height From fd2723d49b5929cb770a231aeb479f392f7a7d53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 3 Dec 2025 12:08:19 +0700 Subject: [PATCH 010/139] Fix: replace `running` with `_running` for internal client status checks --- app/services/pool.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/services/pool.py b/app/services/pool.py index 24a21dc..28a3435 100644 --- a/app/services/pool.py +++ b/app/services/pool.py @@ -37,7 +37,7 @@ async def init(self) -> None: """Initialize all clients in the pool.""" success_count = 0 for client in self._clients: - if not client.running: + if not client._running: try: await client.init( timeout=g_config.gemini.timeout, @@ -48,7 +48,7 @@ async def init(self) -> None: except Exception: logger.exception(f"Failed to initialize client {client.id}") - if client.running: + if client._running: success_count += 1 if success_count == 0: @@ -79,7 +79,7 @@ async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper: async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: """Make sure the client is running, attempting a restart if needed.""" - if client.running: + if client._running: return True lock = self._restart_locks.get(client.id) @@ -87,7 +87,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: return False # Should not happen async with lock: - if client.running: + if client._running: return True try: @@ -110,4 +110,4 @@ def clients(self) -> List[GeminiClientWrapper]: def status(self) -> Dict[str, bool]: """Return running status for each client.""" - return {client.id: client.running for client in self._clients} + return {client.id: client._running for client in self._clients} From 8ee6cc0335e4b63df2126a6bf69d6c9e42505485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 3 Dec 2025 14:10:06 +0700 Subject: [PATCH 011/139] Refactor: replace direct `_running` access with `running()` method in client status checks --- app/services/client.py | 3 +++ app/services/pool.py | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 0207114..09c52c1 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -76,6 +76,9 @@ async def init( logger.exception(f"Failed to initialize GeminiClient {self.id}") raise + def running(self) -> bool: + return self._running + @staticmethod async def process_message( message: Message, tempdir: Path | None = None, tagged: bool = True diff --git a/app/services/pool.py b/app/services/pool.py index 28a3435..a134dda 100644 --- a/app/services/pool.py +++ b/app/services/pool.py @@ -37,7 +37,7 @@ async def init(self) -> None: """Initialize all clients in the pool.""" success_count = 0 for client in self._clients: - if not client._running: + if not client.running(): try: await client.init( timeout=g_config.gemini.timeout, @@ -48,7 +48,7 @@ async def init(self) -> None: except Exception: logger.exception(f"Failed to initialize client {client.id}") - if client._running: + if client.running(): success_count += 1 if success_count == 0: @@ -79,7 +79,7 @@ async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper: async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: """Make sure the client is running, attempting a restart if needed.""" - if client._running: + if client.running(): return True lock = self._restart_locks.get(client.id) @@ -87,7 +87,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: return False # Should not happen async with lock: - if client._running: + if client.running(): return True try: @@ -110,4 +110,4 @@ def clients(self) -> List[GeminiClientWrapper]: def status(self) -> Dict[str, bool]: """Return running status for each client.""" - return {client.id: client._running for client in self._clients} + return {client.id: client.running() for client in self._clients} From 453700eba682cfdd4bfc2e061a8139129654d017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 3 Dec 2025 22:11:11 +0700 Subject: [PATCH 012/139] Extend models with new fields for annotations, reasoning, audio, log probabilities, and token details; adjust response handling accordingly. --- app/models/models.py | 13 ++++++++++++- app/server/chat.py | 7 ++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 52dd414..1d7368c 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -12,7 +12,9 @@ class ContentItem(BaseModel): type: Literal["text", "image_url", "file", "input_audio"] text: Optional[str] = None image_url: Optional[Dict[str, str]] = None + input_audio: Optional[Dict[str, Any]] = None file: Optional[Dict[str, str]] = None + annotations: List[Dict[str, Any]] = Field(default_factory=list) class Message(BaseModel): @@ -22,6 +24,10 @@ class Message(BaseModel): content: Union[str, List[ContentItem], None] = None name: Optional[str] = None tool_calls: Optional[List["ToolCall"]] = None + refusal: Optional[str] = None + reasoning_content: Optional[str] = None + audio: Optional[Dict[str, Any]] = None + annotations: List[Dict[str, Any]] = Field(default_factory=list) class Choice(BaseModel): @@ -30,6 +36,7 @@ class Choice(BaseModel): index: int message: Message finish_reason: str + logprobs: Optional[Dict[str, Any]] = None class FunctionCall(BaseModel): @@ -81,6 +88,8 @@ class Usage(BaseModel): prompt_tokens: int completion_tokens: int total_tokens: int + prompt_tokens_details: Optional[Dict[str, int]] = None + completion_tokens_details: Optional[Dict[str, int]] = None class ModelData(BaseModel): @@ -118,6 +127,8 @@ class ChatCompletionResponse(BaseModel): model: str choices: List[Choice] usage: Usage + system_fingerprint: Optional[str] = None + service_tier: Optional[str] = None class ModelListResponse(BaseModel): @@ -217,6 +228,7 @@ class ResponseOutputContent(BaseModel): type: Literal["output_text"] text: Optional[str] = None + annotations: List[Dict[str, Any]] = Field(default_factory=list) class ResponseOutputMessage(BaseModel): @@ -257,7 +269,6 @@ class ResponseCreateResponse(BaseModel): created: int model: str output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]] - output_text: Optional[str] = None status: Literal[ "in_progress", "completed", diff --git a/app/server/chat.py b/app/server/chat.py index 67790ab..5848a39 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1022,10 +1022,12 @@ async def _build_payload( response_contents: list[ResponseOutputContent] = [] if assistant_text: - response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text)) + response_contents.append( + ResponseOutputContent(type="output_text", text=assistant_text, annotations=[]) + ) if not response_contents: - response_contents.append(ResponseOutputContent(type="output_text", text="")) + response_contents.append(ResponseOutputContent(type="output_text", text="", annotations=[])) created_time = int(datetime.now(tz=timezone.utc).timestamp()) response_id = f"resp_{uuid.uuid4().hex}" @@ -1059,7 +1061,6 @@ async def _build_payload( *tool_call_items, *image_call_items, ], - output_text=assistant_text or None, status="completed", usage=usage, input=normalized_input or None, From 9260f8b5cc37192716d4127ed6ab98a087e7e3ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 3 Dec 2025 22:51:54 +0700 Subject: [PATCH 013/139] Extend models with new fields (annotations, error), add `normalize_output_text` validator, rename `created` to `created_at`, and update response handling accordingly. --- app/models/models.py | 16 +++++++++++++--- app/server/chat.py | 8 ++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 1d7368c..8d5102c 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -3,7 +3,7 @@ from datetime import datetime from typing import Any, Dict, List, Literal, Optional, Union -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator class ContentItem(BaseModel): @@ -127,7 +127,6 @@ class ChatCompletionResponse(BaseModel): model: str choices: List[Choice] usage: Usage - system_fingerprint: Optional[str] = None service_tier: Optional[str] = None @@ -172,6 +171,15 @@ class ResponseInputContent(BaseModel): file_url: Optional[str] = None file_data: Optional[str] = None filename: Optional[str] = None + annotations: List[Dict[str, Any]] = Field(default_factory=list) + + @model_validator(mode="before") + @classmethod + def normalize_output_text(cls, data: Any) -> Any: + """Allow output_text (from previous turns) to be treated as input_text.""" + if isinstance(data, dict) and data.get("type") == "output_text": + data["type"] = "input_text" + return data class ResponseInputItem(BaseModel): @@ -266,7 +274,7 @@ class ResponseCreateResponse(BaseModel): id: str object: Literal["response"] = "response" - created: int + created_at: int model: str output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]] status: Literal[ @@ -274,9 +282,11 @@ class ResponseCreateResponse(BaseModel): "completed", "failed", "incomplete", + "cancelled", "requires_action", ] = "completed" usage: ResponseUsage + error: Optional[Dict[str, Any]] = None metadata: Optional[Dict[str, Any]] = None system_fingerprint: Optional[str] = None input: Optional[Union[str, List[ResponseInputItem]]] = None diff --git a/app/server/chat.py b/app/server/chat.py index 5848a39..ef508b9 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1049,7 +1049,7 @@ async def _build_payload( response_payload = ResponseCreateResponse( id=response_id, - created=created_time, + created_at=created_time, model=request.model, output=[ ResponseOutputMessage( @@ -1334,7 +1334,7 @@ def _create_responses_streaming_response( response_dict = response_payload.model_dump(mode="json") response_id = response_payload.id - created_time = response_payload.created + created_time = response_payload.created_at model = response_payload.model logger.debug( @@ -1344,14 +1344,14 @@ def _create_responses_streaming_response( base_event = { "id": response_id, "object": "response", - "created": created_time, + "created_at": created_time, "model": model, } created_snapshot: dict[str, Any] = { "id": response_id, "object": "response", - "created": created_time, + "created_at": created_time, "model": model, "status": "in_progress", } From d6a8e6bdb786bb90dd653cd9aa3fc88469c2b505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 09:35:30 +0700 Subject: [PATCH 014/139] Extend response models to support tool choices, image output, and improved streaming of response items. Refactor image generation handling for consistency and add compatibility with output content. --- app/models/models.py | 7 ++-- app/server/chat.py | 83 ++++++++++++++++++++++++++++++++------------ 2 files changed, 65 insertions(+), 25 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 8d5102c..bbc2140 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -127,7 +127,6 @@ class ChatCompletionResponse(BaseModel): model: str choices: List[Choice] usage: Usage - service_tier: Optional[str] = None class ModelListResponse(BaseModel): @@ -234,8 +233,9 @@ class ResponseUsage(BaseModel): class ResponseOutputContent(BaseModel): """Content item for Responses API output.""" - type: Literal["output_text"] + type: Literal["output_text", "output_image"] text: Optional[str] = None + image_url: Optional[str] = None annotations: List[Dict[str, Any]] = Field(default_factory=list) @@ -285,10 +285,11 @@ class ResponseCreateResponse(BaseModel): "cancelled", "requires_action", ] = "completed" + tool_choice: Optional[Union[str, ResponseToolChoice]] = None + tools: Optional[List[Union[Tool, ResponseImageTool]]] = None usage: ResponseUsage error: Optional[Dict[str, Any]] = None metadata: Optional[Dict[str, Any]] = None - system_fingerprint: Optional[str] = None input: Optional[Union[str, List[ResponseInputItem]]] = None diff --git a/app/server/chat.py b/app/server/chat.py index ef508b9..cb498a5 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -992,6 +992,7 @@ async def _build_payload( detail = f"{detail} Assistant response: {summary}" raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail) + response_contents: list[ResponseOutputContent] = [] image_call_items: list[ResponseImageGenerationCall] = [] for image in images: try: @@ -999,15 +1000,25 @@ async def _build_payload( except Exception as exc: logger.warning(f"Failed to download generated image: {exc}") continue + + img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" image_call_items.append( ResponseImageGenerationCall( id=f"img_{uuid.uuid4().hex}", status="completed", result=image_base64, - output_format="png" if isinstance(image, GeneratedImage) else "jpeg", + output_format=img_format, size=f"{width}x{height}" if width and height else None, ) ) + # Add as output_image content for compatibility + response_contents.append( + ResponseOutputContent( + type="output_image", + image_url=f"data:image/{img_format};base64,{image_base64}", + annotations=[], + ) + ) tool_call_items: list[ResponseToolCall] = [] if detected_tool_calls: @@ -1020,7 +1031,6 @@ async def _build_payload( for call in detected_tool_calls ] - response_contents: list[ResponseOutputContent] = [] if assistant_text: response_contents.append( ResponseOutputContent(type="output_text", text=assistant_text, annotations=[]) @@ -1065,6 +1075,8 @@ async def _build_payload( usage=usage, input=normalized_input or None, metadata=request.metadata or None, + tools=request.tools, + tool_choice=request.tool_choice, ) try: @@ -1359,6 +1371,10 @@ def _create_responses_streaming_response( created_snapshot["metadata"] = response_dict["metadata"] if response_dict.get("input") is not None: created_snapshot["input"] = response_dict["input"] + if response_dict.get("tools") is not None: + created_snapshot["tools"] = response_dict["tools"] + if response_dict.get("tool_choice") is not None: + created_snapshot["tool_choice"] = response_dict["tool_choice"] async def generate_stream(): # Emit creation event @@ -1369,30 +1385,53 @@ async def generate_stream(): } yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" - # Stream textual content, if any - if assistant_text: - for chunk in _iter_stream_segments(assistant_text): - delta_event = { - **base_event, - "type": "response.output_text.delta", - "output_index": 0, - "delta": chunk, - } - yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n" - - done_event = { + # Stream output items (Message/Text, Tool Calls, Images) + for i, item in enumerate(response_payload.output): + item_json = item.model_dump(mode="json", exclude_none=True) + + added_event = { **base_event, - "type": "response.output_text.done", - "output_index": 0, + "type": "response.output_item.added", + "output_index": i, + "item": item_json, } - yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n" - else: - done_event = { + yield f"data: {orjson.dumps(added_event).decode('utf-8')}\n\n" + + # 2. Stream content if it's a message (text) + if item.type == "message": + content_text = "" + # Aggregate text content to stream + for c in item.content: + if c.type == "output_text" and c.text: + content_text += c.text + + if content_text: + for chunk in _iter_stream_segments(content_text): + delta_event = { + **base_event, + "type": "response.output_text.delta", + "output_index": i, + "delta": chunk, + } + yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n" + + # Text done + done_event = { + **base_event, + "type": "response.output_text.done", + "output_index": i, + } + yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n" + + # 3. Emit output_item.done for all types + # This confirms the item is fully transferred. + item_done_event = { **base_event, - "type": "response.output_text.done", - "output_index": 0, + "type": "response.output_item.done", + "output_index": i, + "item": item_json, } - yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n" + yield f"data: {orjson.dumps(item_done_event).decode('utf-8')}\n\n" # Emit completed event with full payload completed_event = { From 16435a2ce12a4d37e9f3cfa758f384000aa41123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 09:50:47 +0700 Subject: [PATCH 015/139] Set default `text` value to an empty string for `ResponseOutputContent` and ensure consistent initialization in image output handling. --- app/models/models.py | 2 +- app/server/chat.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/app/models/models.py b/app/models/models.py index bbc2140..2c987b8 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -234,7 +234,7 @@ class ResponseOutputContent(BaseModel): """Content item for Responses API output.""" type: Literal["output_text", "output_image"] - text: Optional[str] = None + text: Optional[str] = "" image_url: Optional[str] = None annotations: List[Dict[str, Any]] = Field(default_factory=list) diff --git a/app/server/chat.py b/app/server/chat.py index cb498a5..7745a26 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1015,6 +1015,7 @@ async def _build_payload( response_contents.append( ResponseOutputContent( type="output_image", + text="", image_url=f"data:image/{img_format};base64,{image_base64}", annotations=[], ) From fc99c2d60193f346006f5cf17af4e849d8ea2669 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:03:50 +0700 Subject: [PATCH 016/139] feat: Add /images endpoint with dedicated router and improved image management Add dedicated router for /images endpoint and refactor image handling logic for better modularity. Enhance temporary image management with secure naming, token verification, and cleanup functionality. --- app/main.py | 9 +++++- app/server/chat.py | 35 ++++++++++++++++-------- app/server/images.py | 15 ++++++++++ app/server/middleware.py | 59 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 13 deletions(-) create mode 100644 app/server/images.py diff --git a/app/main.py b/app/main.py index 95458d3..c215e2a 100644 --- a/app/main.py +++ b/app/main.py @@ -6,7 +6,12 @@ from .server.chat import router as chat_router from .server.health import router as health_router -from .server.middleware import add_cors_middleware, add_exception_handler +from .server.images import router as images_router +from .server.middleware import ( + add_cors_middleware, + add_exception_handler, + cleanup_expired_images, +) from .services import GeminiClientPool, LMDBConversationStore RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60 # 6 hours @@ -28,6 +33,7 @@ async def _run_retention_cleanup(stop_event: asyncio.Event) -> None: while not stop_event.is_set(): try: store.cleanup_expired() + cleanup_expired_images(store.retention_days) except Exception: logger.exception("LMDB retention cleanup task failed.") @@ -93,5 +99,6 @@ def create_app() -> FastAPI: app.include_router(health_router, tags=["Health"]) app.include_router(chat_router, tags=["Chat"]) + app.include_router(images_router, tags=["Images"]) return app diff --git a/app/server/chat.py b/app/server/chat.py index 7745a26..db92dbc 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -44,7 +44,7 @@ from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT from ..utils import g_config from ..utils.helper import estimate_tokens -from .middleware import get_temp_dir, verify_api_key +from .middleware import get_image_store_dir, get_temp_dir, verify_api_key # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) @@ -588,6 +588,7 @@ async def create_chat_completion( request: ChatCompletionRequest, api_key: str = Depends(verify_api_key), tmp_dir: Path = Depends(get_temp_dir), + image_store: Path = Depends(get_image_store_dir), ): pool = GeminiClientPool() db = LMDBConversationStore() @@ -775,6 +776,7 @@ async def create_response( request: ResponseCreateRequest, api_key: str = Depends(verify_api_key), tmp_dir: Path = Depends(get_temp_dir), + image_store: Path = Depends(get_image_store_dir), ): base_messages, normalized_input = _response_items_to_messages(request.input) if not base_messages: @@ -996,12 +998,16 @@ async def _build_payload( image_call_items: list[ResponseImageGenerationCall] = [] for image in images: try: - image_base64, width, height = await _image_to_base64(image, tmp_dir) + image_base64, width, height, filename = await _image_to_base64(image, tmp_dir) except Exception as exc: logger.warning(f"Failed to download generated image: {exc}") continue img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" + + # Use static URL for compatibility + image_url = f"{request.base_url}images/{filename}" + image_call_items.append( ResponseImageGenerationCall( id=f"img_{uuid.uuid4().hex}", @@ -1013,12 +1019,10 @@ async def _build_payload( ) # Add as output_image content for compatibility response_contents.append( - ResponseOutputContent( - type="output_image", - text="", - image_url=f"data:image/{img_format};base64,{image_base64}", - annotations=[], - ) + ResponseOutputContent(type="output_text", text=image_url, annotations=[]) + ) + response_contents.append( + ResponseOutputContent(type="output_image", text="", image_url=image_url, annotations=[]) ) tool_call_items: list[ResponseToolCall] = [] @@ -1553,8 +1557,8 @@ def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: return None, None -async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None]: - """Persist an image provided by gemini_webapi and return base64 plus dimensions.""" +async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]: + """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename.""" if isinstance(image, GeneratedImage): saved_path = await image.save(path=str(temp_dir), full_size=True) else: @@ -1563,6 +1567,13 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non if not saved_path: raise ValueError("Failed to save generated image") - data = Path(saved_path).read_bytes() + # Rename file to a random UUID to ensure uniqueness and unpredictability + original_path = Path(saved_path) + random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}" + new_path = temp_dir / random_name + original_path.rename(new_path) + + data = new_path.read_bytes() width, height = _extract_image_dimensions(data) - return base64.b64encode(data).decode("ascii"), width, height + filename = random_name + return base64.b64encode(data).decode("ascii"), width, height, filename diff --git a/app/server/images.py b/app/server/images.py new file mode 100644 index 0000000..2867239 --- /dev/null +++ b/app/server/images.py @@ -0,0 +1,15 @@ +from fastapi import APIRouter, HTTPException +from fastapi.responses import FileResponse + +from ..server.middleware import get_image_store_dir + +router = APIRouter() + + +@router.get("/images/{filename}", tags=["Images"]) +async def get_image(filename: str): + image_store = get_image_store_dir() + file_path = image_store / filename + if not file_path.exists(): + raise HTTPException(status_code=404, detail="Image not found") + return FileResponse(file_path) diff --git a/app/server/middleware.py b/app/server/middleware.py index b12024f..60e4c8d 100644 --- a/app/server/middleware.py +++ b/app/server/middleware.py @@ -1,13 +1,72 @@ +import hashlib +import hmac import tempfile +import time from pathlib import Path from fastapi import Depends, FastAPI, HTTPException, Request, status from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import ORJSONResponse from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from loguru import logger from ..utils import g_config +# Persistent directory for storing generated images +IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "gemini_fastapi_images" +IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True) + + +def get_image_store_dir() -> Path: + """Returns a persistent directory for storing images.""" + return IMAGE_STORE_DIR + + +def get_image_token(filename: str) -> str: + """Generate a HMAC-SHA256 token for a filename using the API key.""" + secret = g_config.server.api_key + if not secret: + return "" + + msg = filename.encode("utf-8") + secret_bytes = secret.encode("utf-8") + return hmac.new(secret_bytes, msg, hashlib.sha256).hexdigest() + + +def verify_image_token(filename: str, token: str | None) -> bool: + """Verify the provided token against the filename.""" + expected = get_image_token(filename) + if not expected: + return True # No auth required + if not token: + return False + return hmac.compare_digest(token, expected) + + +def cleanup_expired_images(retention_days: int) -> int: + """Delete images in IMAGE_STORE_DIR older than retention_days.""" + if retention_days <= 0: + return 0 + + now = time.time() + retention_seconds = retention_days * 24 * 60 * 60 + cutoff = now - retention_seconds + + count = 0 + for file_path in IMAGE_STORE_DIR.iterdir(): + if not file_path.is_file(): + continue + try: + if file_path.stat().st_mtime < cutoff: + file_path.unlink() + count += 1 + except Exception as e: + logger.warning(f"Failed to delete expired image {file_path}: {e}") + + if count > 0: + logger.info(f"Cleaned up {count} expired images.") + return count + def global_exception_handler(request: Request, exc: Exception): if isinstance(exc, HTTPException): From 28441765f3fa47787027620cdc4a6d9e7ddbdd94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:10:29 +0700 Subject: [PATCH 017/139] feat: Add token-based verification for image access --- app/server/chat.py | 4 ++-- app/server/images.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index db92dbc..9371137 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -44,7 +44,7 @@ from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT from ..utils import g_config from ..utils.helper import estimate_tokens -from .middleware import get_image_store_dir, get_temp_dir, verify_api_key +from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) @@ -1006,7 +1006,7 @@ async def _build_payload( img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" # Use static URL for compatibility - image_url = f"{request.base_url}images/{filename}" + image_url = f"{request.base_url}images/{filename}?token={get_image_token(filename)}" image_call_items.append( ResponseImageGenerationCall( diff --git a/app/server/images.py b/app/server/images.py index 2867239..fe078f7 100644 --- a/app/server/images.py +++ b/app/server/images.py @@ -1,13 +1,16 @@ -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, HTTPException, Query from fastapi.responses import FileResponse -from ..server.middleware import get_image_store_dir +from ..server.middleware import get_image_store_dir, verify_image_token router = APIRouter() @router.get("/images/{filename}", tags=["Images"]) -async def get_image(filename: str): +async def get_image(filename: str, token: str | None = Query(default=None)): + if not verify_image_token(filename, token): + raise HTTPException(status_code=403, detail="Invalid token") + image_store = get_image_store_dir() file_path = image_store / filename if not file_path.exists(): From 4509c14dfd5a38dfa6b989b3e9ac308e3bc8c982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:22:07 +0700 Subject: [PATCH 018/139] Refactor: rename image store directory to `ai_generated_images` for clarity --- app/server/middleware.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server/middleware.py b/app/server/middleware.py index 60e4c8d..630e1f5 100644 --- a/app/server/middleware.py +++ b/app/server/middleware.py @@ -13,7 +13,7 @@ from ..utils import g_config # Persistent directory for storing generated images -IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "gemini_fastapi_images" +IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "ai_generated_images" IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True) From 75e2f61d3a6b1d12269af2ee82344ab643f34e83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:31:15 +0700 Subject: [PATCH 019/139] fix: Update create_response to use FastAPI Request object for base_url and refactor variable handling --- app/server/chat.py | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 9371137..0010f4a 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -9,7 +9,7 @@ from typing import Any, Iterator import orjson -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi.responses import StreamingResponse from gemini_webapi.client import ChatSession from gemini_webapi.constants import Model @@ -773,19 +773,15 @@ async def create_chat_completion( @router.post("/v1/responses") async def create_response( - request: ResponseCreateRequest, + request_data: ResponseCreateRequest, + request: Request, api_key: str = Depends(verify_api_key), tmp_dir: Path = Depends(get_temp_dir), image_store: Path = Depends(get_image_store_dir), ): - base_messages, normalized_input = _response_items_to_messages(request.input) - if not base_messages: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail="No message input provided." - ) - - structured_requirement = _build_structured_requirement(request.response_format) - if structured_requirement and request.stream: + base_messages, normalized_input = _response_items_to_messages(request_data.input) + structured_requirement = _build_structured_requirement(request_data.response_format) + if structured_requirement and request_data.stream: logger.debug( "Structured response requested with streaming enabled; streaming not supported for Responses." ) @@ -801,7 +797,7 @@ async def create_response( standard_tools: list[Tool] = [] image_tools: list[ResponseImageTool] = [] - if request.tools: + if request_data.tools: for t in request.tools: if isinstance(t, Tool): standard_tools.append(t) @@ -817,13 +813,15 @@ async def create_response( image_instruction = _build_image_generation_instruction( image_tools, - request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None, + request_data.tool_choice + if isinstance(request_data.tool_choice, ResponseToolChoice) + else None, ) if image_instruction: extra_instructions.append(image_instruction) logger.debug("Image generation support enabled for /v1/responses request.") - preface_messages = _instructions_to_messages(request.instructions) + preface_messages = _instructions_to_messages(request_data.instructions) conversation_messages = base_messages if preface_messages: conversation_messages = [*preface_messages, *base_messages] @@ -834,10 +832,10 @@ async def create_response( # Pass standard tools to the prompt builder # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction) model_tool_choice = None - if isinstance(request.tool_choice, str): - model_tool_choice = request.tool_choice - elif isinstance(request.tool_choice, ToolChoiceFunction): - model_tool_choice = request.tool_choice + if isinstance(request_data.tool_choice, str): + model_tool_choice = request_data.tool_choice + elif isinstance(request_data.tool_choice, ToolChoiceFunction): + model_tool_choice = request_data.tool_choice # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice. messages = _prepare_messages_for_model( @@ -851,7 +849,7 @@ async def create_response( db = LMDBConversationStore() try: - model = Model.from_name(request.model) + model = Model.from_name(request_data.model) except ValueError as exc: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc @@ -971,7 +969,7 @@ async def _build_payload( ) expects_image = ( - request.tool_choice is not None and request.tool_choice.type == "image_generation" + request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation" ) images = model_output.images or [] logger.debug( @@ -1065,7 +1063,7 @@ async def _build_payload( response_payload = ResponseCreateResponse( id=response_id, created_at=created_time, - model=request.model, + model=request_data.model, output=[ ResponseOutputMessage( id=message_id, @@ -1079,9 +1077,9 @@ async def _build_payload( status="completed", usage=usage, input=normalized_input or None, - metadata=request.metadata or None, - tools=request.tools, - tool_choice=request.tool_choice, + metadata=request_data.metadata or None, + tools=request_data.tools, + tool_choice=request_data.tool_choice, ) try: From bde6d0d146fc9088df947cfc0958dc88963e93ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:35:44 +0700 Subject: [PATCH 020/139] fix: Correct attribute access in request_data handling within `chat.py` for tools, tool_choice, and streaming settings --- app/server/chat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 0010f4a..9a3f19f 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -798,7 +798,7 @@ async def create_response( image_tools: list[ResponseImageTool] = [] if request_data.tools: - for t in request.tools: + for t in request_data.tools: if isinstance(t, Tool): standard_tools.append(t) elif isinstance(t, ResponseImageTool): @@ -984,7 +984,7 @@ async def _build_payload( summary = f"{summary[:197]}..." logger.warning( "Image generation requested but Gemini produced no images. " - f"client_id={client_id}, forced_tool_choice={request.tool_choice is not None}, " + f"client_id={client_id}, forced_tool_choice={request_data.tool_choice is not None}, " f"instruction_applied={bool(image_instruction)}, assistant_preview='{summary}'" ) detail = "LLM returned no images for the requested image_generation tool." @@ -1100,7 +1100,7 @@ async def _build_payload( except Exception as exc: logger.warning(f"Failed to save Responses conversation to LMDB: {exc}") - if request.stream: + if request_data.stream: logger.debug( f"Streaming Responses API payload (response_id={response_payload.id}, text_chunks={bool(assistant_text)})." ) From 601451a8dbf8cf689a482fd75cda399b5e815cd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:45:49 +0700 Subject: [PATCH 021/139] fix: Save generated images to persistent storage --- app/server/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index 9a3f19f..4246c53 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -996,7 +996,7 @@ async def _build_payload( image_call_items: list[ResponseImageGenerationCall] = [] for image in images: try: - image_base64, width, height, filename = await _image_to_base64(image, tmp_dir) + image_base64, width, height, filename = await _image_to_base64(image, image_store) except Exception as exc: logger.warning(f"Failed to download generated image: {exc}") continue From 893eb6d47305f60c4b13896bfc48beb89909dd88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 11:51:42 +0700 Subject: [PATCH 022/139] fix: Remove unused `output_image` type from `ResponseOutputContent` and update response handling for consistency --- app/models/models.py | 3 +-- app/server/chat.py | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 2c987b8..c27e024 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -233,9 +233,8 @@ class ResponseUsage(BaseModel): class ResponseOutputContent(BaseModel): """Content item for Responses API output.""" - type: Literal["output_text", "output_image"] + type: Literal["output_text"] text: Optional[str] = "" - image_url: Optional[str] = None annotations: List[Dict[str, Any]] = Field(default_factory=list) diff --git a/app/server/chat.py b/app/server/chat.py index 4246c53..3396df0 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1015,13 +1015,10 @@ async def _build_payload( size=f"{width}x{height}" if width and height else None, ) ) - # Add as output_image content for compatibility + # Add as output_text content for compatibility response_contents.append( ResponseOutputContent(type="output_text", text=image_url, annotations=[]) ) - response_contents.append( - ResponseOutputContent(type="output_image", text="", image_url=image_url, annotations=[]) - ) tool_call_items: list[ResponseToolCall] = [] if detected_tool_calls: From 80462b586a110cad7e5b5cc259424e405ecbafc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 4 Dec 2025 13:24:14 +0700 Subject: [PATCH 023/139] fix: Update image URL generation in chat response to use Markdown format for compatibility --- app/server/chat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index 3396df0..c2a60ab 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1004,7 +1004,9 @@ async def _build_payload( img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" # Use static URL for compatibility - image_url = f"{request.base_url}images/{filename}?token={get_image_token(filename)}" + image_url = ( + f"![{filename}]({request.base_url}images/{filename}?token={get_image_token(filename)})" + ) image_call_items.append( ResponseImageGenerationCall( From 8d49a72e0b5c605e2439d6dcbf149925cb670ded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 8 Dec 2025 09:45:58 +0700 Subject: [PATCH 024/139] fix: Enhance error handling for full-size image saving and add fallback to default size --- app/server/chat.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index c2a60ab..d14e9ce 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1557,7 +1557,11 @@ def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]: """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename.""" if isinstance(image, GeneratedImage): - saved_path = await image.save(path=str(temp_dir), full_size=True) + try: + saved_path = await image.save(path=str(temp_dir), full_size=True) + except Exception as e: + logger.warning(f"Failed to download full-size image, retrying with default size: {e}") + saved_path = await image.save(path=str(temp_dir), full_size=False) else: saved_path = await image.save(path=str(temp_dir)) From d37eae0ab8c4590b3301dc8853ef22a512ab0d98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 9 Dec 2025 20:46:03 +0700 Subject: [PATCH 025/139] fix: Use filename as image ID to ensure consistency in generated image handling --- app/server/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index d14e9ce..fc69293 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1010,7 +1010,7 @@ async def _build_payload( image_call_items.append( ResponseImageGenerationCall( - id=f"img_{uuid.uuid4().hex}", + id=filename.split(".")[0], status="completed", result=image_base64, output_format=img_format, From b9f776dfbb9d251ee016e05a1f6001907c3f8b84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 16 Dec 2025 19:50:07 +0700 Subject: [PATCH 026/139] fix: Enhance tempfile saving by adding custom headers, content-type handling, and improved extension determination --- app/utils/helper.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 3bff469..89fc31e 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -2,12 +2,17 @@ import mimetypes import tempfile from pathlib import Path +from urllib.parse import urlparse import httpx from loguru import logger VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36" +} + def add_tag(role: str, content: str, unclose: bool = False) -> str: """Surround content with role tags""" @@ -36,7 +41,7 @@ async def save_file_to_tempfile( return path -async def save_url_to_tempfile(url: str, tempdir: Path | None = None): +async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: data: bytes | None = None suffix: str | None = None if url.startswith("data:image/"): @@ -47,17 +52,26 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None): base64_data = url.split(",")[1] data = base64.b64decode(base64_data) - # Guess extension from mime type, default to the subtype if not found suffix = mimetypes.guess_extension(mime_type) if not suffix: suffix = f".{mime_type.split('/')[1]}" else: - # http files - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True) as client: resp = await client.get(url) resp.raise_for_status() data = resp.content - suffix = Path(url).suffix or ".bin" + content_type = resp.headers.get("content-type") + + if content_type: + mime_type = content_type.split(";")[0].strip() + suffix = mimetypes.guess_extension(mime_type) + + if not suffix: + path_url = urlparse(url).path + suffix = Path(path_url).suffix + + if not suffix: + suffix = ".bin" with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp: tmp.write(data) From 4b5fe078250ce0496ca93b1861f9622fc5171746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 30 Dec 2025 22:39:05 +0700 Subject: [PATCH 027/139] feat: Add support for custom Gemini models and model loading strategies - Introduced `model_strategy` configuration for "append" (default + custom models) or "overwrite" (custom models only). - Enhanced `/v1/models` endpoint to return models based on the configured strategy. - Improved model loading with environment variable overrides and validation. - Refactored model handling logic for improved modularity and error handling. --- app/server/chat.py | 70 ++++++++++++++++++++++++++++++++++-------- app/utils/config.py | 75 ++++++++++++++++++++++++++++++++++++++++++++- config/config.yaml | 5 +++ 3 files changed, 136 insertions(+), 14 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index fc69293..0a4c16c 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -562,24 +562,64 @@ def _replace(match: re.Match[str]) -> str: return cleaned, tool_calls -@router.get("/v1/models", response_model=ModelListResponse) -async def list_models(api_key: str = Depends(verify_api_key)): - now = int(datetime.now(tz=timezone.utc).timestamp()) +def _get_model_by_name(name: str) -> Model: + """ + Retrieve a Model instance by name, considering custom models from config + and the update strategy (append or overwrite). + """ + strategy = g_config.gemini.model_strategy + custom_models = {m.model_name: m for m in g_config.gemini.models if m.model_name} - models = [] - for model in Model: - m_name = model.model_name - if not m_name or m_name == "unspecified": - continue + if name in custom_models: + return Model.from_dict(custom_models[name].model_dump()) + + if strategy == "overwrite": + raise ValueError(f"Model '{name}' not found in custom models (strategy='overwrite').") - models.append( + return Model.from_name(name) + + +def _get_available_models() -> list[ModelData]: + """ + Return a list of available models based on configuration strategy. + """ + now = int(datetime.now(tz=timezone.utc).timestamp()) + strategy = g_config.gemini.model_strategy + models_data = [] + + custom_models = [m for m in g_config.gemini.models if m.model_name] + for m in custom_models: + models_data.append( ModelData( - id=m_name, + id=m.model_name, created=now, - owned_by="gemini-web", + owned_by="custom", ) ) + if strategy == "append": + custom_ids = {m.model_name for m in custom_models} + for model in Model: + m_name = model.model_name + if not m_name or m_name == "unspecified": + continue + if m_name in custom_ids: + continue + + models_data.append( + ModelData( + id=m_name, + created=now, + owned_by="gemini-web", + ) + ) + + return models_data + + +@router.get("/v1/models", response_model=ModelListResponse) +async def list_models(api_key: str = Depends(verify_api_key)): + models = _get_available_models() return ModelListResponse(data=models) @@ -592,7 +632,11 @@ async def create_chat_completion( ): pool = GeminiClientPool() db = LMDBConversationStore() - model = Model.from_name(request.model) + + try: + model = _get_model_by_name(request.model) + except ValueError as exc: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc if len(request.messages) == 0: raise HTTPException( @@ -849,7 +893,7 @@ async def create_response( db = LMDBConversationStore() try: - model = Model.from_name(request_data.model) + model = _get_model_by_name(request_data.model) except ValueError as exc: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc diff --git a/app/utils/config.py b/app/utils/config.py index 796ca75..a5c924a 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -50,12 +50,26 @@ def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]: return stripped or None +class GeminiModelConfig(BaseModel): + """Configuration for a custom Gemini model.""" + + model_name: Optional[str] = Field(default=None, description="Name of the model") + model_header: Optional[dict[str, Optional[str]]] = Field( + default=None, description="Header for the model" + ) + + class GeminiConfig(BaseModel): """Gemini API configuration""" clients: list[GeminiClientSettings] = Field( ..., description="List of Gemini client credential pairs" ) + models: list[GeminiModelConfig] = Field(default=[], description="List of custom Gemini models") + model_strategy: Literal["append", "overwrite"] = Field( + default="append", + description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom", + ) timeout: int = Field(default=120, ge=1, description="Init timeout") auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies") refresh_interval: int = Field( @@ -68,6 +82,13 @@ class GeminiConfig(BaseModel): description="Maximum characters Gemini Web can accept per request", ) + @field_validator("models") + @classmethod + def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]: + """Filter out models that don't have a name set (placeholders).""" + + return [model for model in v if model.model_name] + class CORSConfig(BaseModel): """CORS configuration""" @@ -211,6 +232,53 @@ def _merge_clients_with_env( return result_clients if result_clients else base_clients +def extract_gemini_models_env() -> dict[int, dict[str, str]]: + """Extract and remove all Gemini models related environment variables, return a mapping from index to field dict.""" + prefix = "CONFIG_GEMINI__MODELS__" + env_overrides: dict[int, dict[str, str]] = {} + to_delete = [] + for k, v in os.environ.items(): + if k.startswith(prefix): + parts = k.split("__") + if len(parts) < 4: + continue + index_str, field = parts[2], parts[3].lower() + if not index_str.isdigit(): + continue + idx = int(index_str) + env_overrides.setdefault(idx, {})[field] = v + to_delete.append(k) + # Remove these environment variables to avoid Pydantic parsing errors + for k in to_delete: + del os.environ[k] + return env_overrides + + +def _merge_models_with_env( + base_models: list[GeminiModelConfig] | None, + env_overrides: dict[int, dict[str, str]], +): + """Override base_models with env_overrides, return the new models list.""" + if not env_overrides: + return base_models or [] + result_models: list[GeminiModelConfig] = [] + if base_models: + result_models = [model.model_copy() for model in base_models] + + for idx in sorted(env_overrides): + overrides = env_overrides[idx] + if idx < len(result_models): + model_dict = result_models[idx].model_dump() + model_dict.update(overrides) + result_models[idx] = GeminiModelConfig(**model_dict) + elif idx == len(result_models): + new_model = GeminiModelConfig(**overrides) + result_models.append(new_model) + else: + raise IndexError(f"Model index {idx} in env is out of range (must be contiguous).") + return result_models + + def initialize_config() -> Config: """ Initialize the configuration. @@ -221,6 +289,8 @@ def initialize_config() -> Config: try: # First, extract and remove Gemini clients related environment variables env_clients_overrides = extract_gemini_clients_env() + # Extract and remove Gemini models related environment variables + env_models_overrides = extract_gemini_models_env() # Then, initialize Config with pydantic_settings config = Config() # type: ignore @@ -228,7 +298,10 @@ def initialize_config() -> Config: # Synthesize clients config.gemini.clients = _merge_clients_with_env( config.gemini.clients, env_clients_overrides - ) # type: ignore + ) + + # Synthesize models + config.gemini.models = _merge_models_with_env(config.gemini.models, env_models_overrides) return config except ValidationError as e: diff --git a/config/config.yaml b/config/config.yaml index 89c88b7..84c4602 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -27,6 +27,11 @@ gemini: refresh_interval: 540 # Refresh interval in seconds verbose: false # Enable verbose logging for Gemini requests max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit + models: + - model_name: null + model_header: + x-goog-ext-xxxxxxxxx-jspb: null + model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only) storage: path: "data/lmdb" # Database storage path From 5cb29e8ea7333fd3c207f60a75b5269105bae8b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 30 Dec 2025 23:19:49 +0700 Subject: [PATCH 028/139] feat: Improve Gemini model environment variable parsing and nested field support - Enhanced `extract_gemini_models_env` to handle nested fields within environment variables. - Updated type hints for more flexibility in model overrides. - Improved `_merge_models_with_env` to better support field-level updates and appending new models. --- app/utils/config.py | 31 +++++++++++++++++++++++-------- config/config.yaml | 2 +- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/app/utils/config.py b/app/utils/config.py index a5c924a..5782c66 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -1,6 +1,6 @@ import os import sys -from typing import Literal, Optional +from typing import Any, Literal, Optional from loguru import logger from pydantic import BaseModel, Field, ValidationError, field_validator @@ -232,21 +232,34 @@ def _merge_clients_with_env( return result_clients if result_clients else base_clients -def extract_gemini_models_env() -> dict[int, dict[str, str]]: - """Extract and remove all Gemini models related environment variables, return a mapping from index to field dict.""" +def extract_gemini_models_env() -> dict[int, dict[str, Any]]: + """Extract and remove all Gemini models related environment variables, supporting nested fields.""" prefix = "CONFIG_GEMINI__MODELS__" - env_overrides: dict[int, dict[str, str]] = {} + env_overrides: dict[int, dict[str, Any]] = {} to_delete = [] for k, v in os.environ.items(): if k.startswith(prefix): parts = k.split("__") if len(parts) < 4: continue - index_str, field = parts[2], parts[3].lower() + index_str = parts[2] if not index_str.isdigit(): continue idx = int(index_str) - env_overrides.setdefault(idx, {})[field] = v + + # Navigate to the correct nested dict + current = env_overrides.setdefault(idx, {}) + for i in range(3, len(parts) - 1): + field_name = parts[i].lower() + current = current.setdefault(field_name, {}) + + # Set the value (lowercase root field names, preserve sub-key casing) + last_part = parts[-1] + if len(parts) == 4: + current[last_part.lower()] = v + else: + current[last_part] = v + to_delete.append(k) # Remove these environment variables to avoid Pydantic parsing errors for k in to_delete: @@ -256,9 +269,9 @@ def extract_gemini_models_env() -> dict[int, dict[str, str]]: def _merge_models_with_env( base_models: list[GeminiModelConfig] | None, - env_overrides: dict[int, dict[str, str]], + env_overrides: dict[int, dict[str, Any]], ): - """Override base_models with env_overrides, return the new models list.""" + """Override base_models with env_overrides using standard update (replace whole fields).""" if not env_overrides: return base_models or [] result_models: list[GeminiModelConfig] = [] @@ -268,10 +281,12 @@ def _merge_models_with_env( for idx in sorted(env_overrides): overrides = env_overrides[idx] if idx < len(result_models): + # Update existing model: overwrite fields found in env model_dict = result_models[idx].model_dump() model_dict.update(overrides) result_models[idx] = GeminiModelConfig(**model_dict) elif idx == len(result_models): + # Append new model new_model = GeminiModelConfig(**overrides) result_models.append(new_model) else: diff --git a/config/config.yaml b/config/config.yaml index 84c4602..2fbc061 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -27,11 +27,11 @@ gemini: refresh_interval: 540 # Refresh interval in seconds verbose: false # Enable verbose logging for Gemini requests max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit + model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only) models: - model_name: null model_header: x-goog-ext-xxxxxxxxx-jspb: null - model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only) storage: path: "data/lmdb" # Database storage path From f25f16d00118ebeea7936cea34797270d5137b5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 09:52:49 +0700 Subject: [PATCH 029/139] refactor: Consolidate utility functions and clean up unused code - Moved utility functions like `strip_code_fence`, `extract_tool_calls`, and `iter_stream_segments` to a centralized helper module. - Removed unused and redundant private methods from `chat.py`, including `_strip_code_fence`, `_strip_tagged_blocks`, and `_strip_system_hints`. - Updated imports and references across modules for consistency. - Simplified tool call and streaming logic by replacing inline implementations with shared helper functions. --- app/server/chat.py | 306 ++++------------------------------------ app/services/client.py | 16 +-- app/utils/helper.py | 312 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 342 insertions(+), 292 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 0a4c16c..9485f7a 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1,12 +1,11 @@ import base64 import json import re -import struct import uuid from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Any, Iterator +from typing import Any import orjson from fastapi import APIRouter, Depends, HTTPException, Request, status @@ -21,7 +20,6 @@ ChatCompletionRequest, ContentItem, ConversationInStore, - FunctionCall, Message, ModelData, ModelListResponse, @@ -37,26 +35,28 @@ ResponseToolChoice, ResponseUsage, Tool, - ToolCall, ToolChoiceFunction, ) from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore -from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT from ..utils import g_config -from ..utils.helper import estimate_tokens +from ..utils.helper import ( + CODE_BLOCK_HINT, + CODE_HINT_STRIPPED, + XML_HINT_STRIPPED, + XML_WRAP_HINT, + estimate_tokens, + extract_image_dimensions, + extract_tool_calls, + iter_stream_segments, + remove_tool_call_blocks, + strip_code_fence, + text_from_message, +) from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')" -TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)```", re.DOTALL | re.IGNORECASE) -TOOL_CALL_RE = re.compile( - r"(.*?)", re.DOTALL | re.IGNORECASE -) -JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE) -CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") -XML_HINT_STRIPPED = XML_WRAP_HINT.strip() -CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip() router = APIRouter() @@ -118,14 +118,6 @@ def _build_structured_requirement( ) -def _strip_code_fence(text: str) -> str: - """Remove surrounding ```json fences if present.""" - match = JSON_FENCE_RE.match(text.strip()) - if match: - return match.group(1).strip() - return text.strip() - - def _build_tool_prompt( tools: list[Tool], tool_choice: str | ToolChoiceFunction | None, @@ -312,75 +304,6 @@ def _prepare_messages_for_model( return prepared -def _strip_system_hints(text: str) -> str: - """Remove system-level hint text from a given string.""" - if not text: - return text - cleaned = _strip_tagged_blocks(text) - cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "") - cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "") - cleaned = CONTROL_TOKEN_RE.sub("", cleaned) - return cleaned.strip() - - -def _strip_tagged_blocks(text: str) -> str: - """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely. - - tool blocks are removed entirely (if missing end marker, drop to EOF). - - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF). - """ - if not text: - return text - - result: list[str] = [] - idx = 0 - length = len(text) - start_marker = "<|im_start|>" - end_marker = "<|im_end|>" - - while idx < length: - start = text.find(start_marker, idx) - if start == -1: - result.append(text[idx:]) - break - - # append any content before this block - result.append(text[idx:start]) - - role_start = start + len(start_marker) - newline = text.find("\n", role_start) - if newline == -1: - # malformed block; keep remainder as-is (safe behavior) - result.append(text[start:]) - break - - role = text[role_start:newline].strip().lower() - - end = text.find(end_marker, newline + 1) - if end == -1: - # missing end marker - if role == "tool": - # drop from start marker to EOF (skip remainder) - break - else: - # keep inner content from after the role newline to EOF - result.append(text[newline + 1 :]) - break - - block_end = end + len(end_marker) - - if role == "tool": - # drop whole block - idx = block_end - continue - - # keep the content without role markers - content = text[newline + 1 : end] - result.append(content) - idx = block_end - - return "".join(result) - - def _response_items_to_messages( items: str | list[ResponseInputItem], ) -> tuple[list[Message], str | list[ResponseInputItem]]: @@ -509,59 +432,6 @@ def _instructions_to_messages( return instruction_messages -def _remove_tool_call_blocks(text: str) -> str: - """Strip tool call code blocks from text.""" - if not text: - return text - cleaned = TOOL_BLOCK_RE.sub("", text) - return _strip_system_hints(cleaned) - - -def _extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: - """Extract tool call definitions and return cleaned text.""" - if not text: - return text, [] - - tool_calls: list[ToolCall] = [] - - def _replace(match: re.Match[str]) -> str: - block_content = match.group(1) - if not block_content: - return "" - - for call_match in TOOL_CALL_RE.finditer(block_content): - name = (call_match.group(1) or "").strip() - raw_args = (call_match.group(2) or "").strip() - if not name: - logger.warning( - f"Encountered tool_call block without a function name: {block_content}" - ) - continue - - arguments = raw_args - try: - parsed_args = json.loads(raw_args) - arguments = json.dumps(parsed_args, ensure_ascii=False) - except json.JSONDecodeError: - logger.warning( - f"Failed to parse tool call arguments for '{name}'. Passing raw string." - ) - - tool_calls.append( - ToolCall( - id=f"call_{uuid.uuid4().hex}", - type="function", - function=FunctionCall(name=name, arguments=arguments), - ) - ) - - return "" - - cleaned = TOOL_BLOCK_RE.sub(_replace, text) - cleaned = _strip_system_hints(cleaned) - return cleaned, tool_calls - - def _get_model_by_name(name: str) -> Model: """ Retrieve a Model instance by name, considering custom models from config @@ -742,12 +612,12 @@ async def create_chat_completion( detail="Gemini output parsing failed unexpectedly.", ) from exc - visible_output, tool_calls = _extract_tool_calls(raw_output_with_think) - storage_output = _remove_tool_call_blocks(raw_output_clean).strip() + visible_output, tool_calls = extract_tool_calls(raw_output_with_think) + storage_output = remove_tool_call_blocks(raw_output_clean).strip() tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls] if structured_requirement: - cleaned_visible = _strip_code_fence(visible_output or "") + cleaned_visible = strip_code_fence(visible_output or "") if not cleaned_visible: raise HTTPException( status_code=status.HTTP_502_BAD_GATEWAY, @@ -982,12 +852,12 @@ async def _build_payload( detail="Gemini output parsing failed unexpectedly.", ) from exc - visible_text, detected_tool_calls = _extract_tool_calls(text_with_think) - storage_output = _remove_tool_call_blocks(text_without_think).strip() + visible_text, detected_tool_calls = extract_tool_calls(text_with_think) + storage_output = remove_tool_call_blocks(text_without_think).strip() assistant_text = LMDBConversationStore.remove_think_tags(visible_text.strip()) if structured_requirement: - cleaned_visible = _strip_code_fence(assistant_text or "") + cleaned_visible = strip_code_fence(assistant_text or "") if not cleaned_visible: raise HTTPException( status_code=status.HTTP_502_BAD_GATEWAY, @@ -1089,7 +959,7 @@ async def _build_payload( response_id = f"resp_{uuid.uuid4().hex}" message_id = f"msg_{uuid.uuid4().hex}" - input_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages) + input_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) tool_arg_text = "".join(call.function.arguments or "" for call in detected_tool_calls) completion_basis = assistant_text or "" if tool_arg_text: @@ -1152,25 +1022,6 @@ async def _build_payload( return response_payload -def _text_from_message(message: Message) -> str: - """Return text content from a message for token estimation.""" - base_text = "" - if isinstance(message.content, str): - base_text = message.content - elif isinstance(message.content, list): - base_text = "\n".join( - item.text or "" for item in message.content if getattr(item, "type", "") == "text" - ) - elif message.content is None: - base_text = "" - - if message.tool_calls: - tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls) - base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text - - return base_text - - async def _find_reusable_session( db: LMDBConversationStore, pool: GeminiClientPool, @@ -1268,47 +1119,6 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s raise -def _iter_stream_segments(model_output: str, chunk_size: int = 64): - """Yield stream segments while keeping markers and words intact.""" - if not model_output: - return - - token_pattern = re.compile(r"\s+|\S+\s*") - pending = "" - - def _flush_pending() -> Iterator[str]: - nonlocal pending - if pending: - yield pending - pending = "" - - # Split on boundaries so the markers are never fragmented. - parts = re.split(r"()", model_output) - for part in parts: - if not part: - continue - if part in {"", ""}: - yield from _flush_pending() - yield part - continue - - for match in token_pattern.finditer(part): - token = match.group(0) - - if len(token) > chunk_size: - yield from _flush_pending() - for idx in range(0, len(token), chunk_size): - yield token[idx : idx + chunk_size] - continue - - if pending and len(pending) + len(token) > chunk_size: - yield from _flush_pending() - - pending += token - - yield from _flush_pending() - - def _create_streaming_response( model_output: str, tool_calls: list[dict], @@ -1320,7 +1130,7 @@ def _create_streaming_response( """Create streaming response with `usage` calculation included in the final chunk.""" # Calculate token usage - prompt_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages) + prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or []) completion_tokens = estimate_tokens(model_output + tool_args) total_tokens = prompt_tokens + completion_tokens @@ -1338,7 +1148,7 @@ async def generate_stream(): yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" # Stream output text in chunks for efficiency - for chunk in _iter_stream_segments(model_output): + for chunk in iter_stream_segments(model_output): data = { "id": completion_id, "object": "chat.completion.chunk", @@ -1452,7 +1262,7 @@ async def generate_stream(): content_text += c.text if content_text: - for chunk in _iter_stream_segments(content_text): + for chunk in iter_stream_segments(content_text): delta_event = { **base_event, "type": "response.output_text.delta", @@ -1501,7 +1311,7 @@ def _create_standard_response( ) -> dict: """Create standard response""" # Calculate token usage - prompt_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages) + prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or []) completion_tokens = estimate_tokens(model_output + tool_args) total_tokens = prompt_tokens + completion_tokens @@ -1534,70 +1344,6 @@ def _create_standard_response( return result -def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: - """Return image dimensions (width, height) if PNG or JPEG headers are present.""" - # PNG: dimensions stored in bytes 16..24 of the IHDR chunk - if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"): - try: - width, height = struct.unpack(">II", data[16:24]) - return int(width), int(height) - except struct.error: - return None, None - - # JPEG: dimensions stored in SOF segment; iterate through markers to locate it - if len(data) >= 4 and data[0:2] == b"\xff\xd8": - idx = 2 - length = len(data) - sof_markers = { - 0xC0, - 0xC1, - 0xC2, - 0xC3, - 0xC5, - 0xC6, - 0xC7, - 0xC9, - 0xCA, - 0xCB, - 0xCD, - 0xCE, - 0xCF, - } - while idx < length: - # Find marker alignment (markers are prefixed with 0xFF bytes) - if data[idx] != 0xFF: - idx += 1 - continue - while idx < length and data[idx] == 0xFF: - idx += 1 - if idx >= length: - break - marker = data[idx] - idx += 1 - - if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7: - continue - - if idx + 1 >= length: - break - segment_length = (data[idx] << 8) + data[idx + 1] - idx += 2 - if segment_length < 2: - break - - if marker in sof_markers: - if idx + 4 < length: - # Skip precision byte at idx, then read height/width (big-endian) - height = (data[idx + 1] << 8) + data[idx + 2] - width = (data[idx + 3] << 8) + data[idx + 4] - return int(width), int(height) - break - - idx += segment_length - 2 - - return None, None - - async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]: """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename.""" if isinstance(image, GeneratedImage): @@ -1619,6 +1365,6 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non original_path.rename(new_path) data = new_path.read_bytes() - width, height = _extract_image_dimensions(data) + width, height = extract_image_dimensions(data) filename = random_name return base64.b64encode(data).decode("ascii"), width, height, filename diff --git a/app/services/client.py b/app/services/client.py index 09c52c1..87c0ca7 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -9,18 +9,12 @@ from ..models import Message from ..utils import g_config -from ..utils.helper import add_tag, save_file_to_tempfile, save_url_to_tempfile - -XML_WRAP_HINT = ( - "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n" - '```xml\n{"arg": "value"}\n```\n' - "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n" -) -CODE_BLOCK_HINT = ( - "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced " - "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n" - "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n" +from ..utils.helper import ( + add_tag, + save_file_to_tempfile, + save_url_to_tempfile, ) + HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);") MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])") CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL) diff --git a/app/utils/helper.py b/app/utils/helper.py index 89fc31e..2627faa 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -1,17 +1,41 @@ import base64 +import json import mimetypes +import re +import struct import tempfile +import uuid from pathlib import Path +from typing import Iterator from urllib.parse import urlparse import httpx from loguru import logger -VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} +from ..models import FunctionCall, Message, ToolCall HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36" } +VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} +XML_WRAP_HINT = ( + "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n" + '```xml\n{"arg": "value"}\n```\n' + "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n" +) +CODE_BLOCK_HINT = ( + "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced " + "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n" + "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n" +) +TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) +TOOL_CALL_RE = re.compile( + r"(.*?)", re.DOTALL | re.IGNORECASE +) +JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE) +CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") +XML_HINT_STRIPPED = XML_WRAP_HINT.strip() +CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip() def add_tag(role: str, content: str, unclose: bool = False) -> str: @@ -78,3 +102,289 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: path = Path(tmp.name) return path + + +def strip_code_fence(text: str) -> str: + """Remove surrounding ```json fences if present.""" + match = JSON_FENCE_RE.match(text.strip()) + if match: + return match.group(1).strip() + return text.strip() + + +def strip_tagged_blocks(text: str) -> str: + """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely. + - tool blocks are removed entirely (if missing end marker, drop to EOF). + - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF). + """ + if not text: + return text + + result: list[str] = [] + idx = 0 + length = len(text) + start_marker = "<|im_start|>" + end_marker = "<|im_end|>" + + while idx < length: + start = text.find(start_marker, idx) + if start == -1: + result.append(text[idx:]) + break + + # append any content before this block + result.append(text[idx:start]) + + role_start = start + len(start_marker) + newline = text.find("\n", role_start) + if newline == -1: + # malformed block; keep remainder as-is (safe behavior) + result.append(text[start:]) + break + + role = text[role_start:newline].strip().lower() + + end = text.find(end_marker, newline + 1) + if end == -1: + # missing end marker + if role == "tool": + # drop from start marker to EOF (skip remainder) + break + else: + # keep inner content from after the role newline to EOF + result.append(text[newline + 1 :]) + break + + block_end = end + len(end_marker) + + if role == "tool": + # drop whole block + idx = block_end + continue + + # keep the content without role markers + content = text[newline + 1 : end] + result.append(content) + idx = block_end + + return "".join(result) + + +def strip_system_hints(text: str) -> str: + """Remove system-level hint text from a given string.""" + if not text: + return text + cleaned = strip_tagged_blocks(text) + cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "") + cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "") + cleaned = CONTROL_TOKEN_RE.sub("", cleaned) + return cleaned.strip() + + +def remove_tool_call_blocks(text: str) -> str: + """Strip tool call code blocks from text.""" + if not text: + return text + + # 1. Remove fenced blocks ONLY if they contain tool calls + def _replace_block(match: re.Match[str]) -> str: + block_content = match.group(1) + if not block_content: + return match.group(0) + + # Check if the block contains any tool call tag + if TOOL_CALL_RE.search(block_content): + return "" + + # Preserve the block if no tool call found + return match.group(0) + + cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) + + # 2. Remove orphaned tool calls + cleaned = TOOL_CALL_RE.sub("", cleaned) + + return strip_system_hints(cleaned) + + +def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: + """Extract tool call definitions and return cleaned text.""" + if not text: + return text, [] + + tool_calls: list[ToolCall] = [] + + def _create_tool_call(name: str, raw_args: str) -> None: + """Helper to parse args and append to tool_calls list.""" + if not name: + logger.warning("Encountered tool_call without a function name.") + return + + arguments = raw_args + try: + parsed_args = json.loads(raw_args) + arguments = json.dumps(parsed_args, ensure_ascii=False) + except json.JSONDecodeError: + logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") + + tool_calls.append( + ToolCall( + id=f"call_{uuid.uuid4().hex}", + type="function", + function=FunctionCall(name=name, arguments=arguments), + ) + ) + + def _replace_block(match: re.Match[str]) -> str: + block_content = match.group(1) + if not block_content: + return match.group(0) + + found_in_block = False + for call_match in TOOL_CALL_RE.finditer(block_content): + found_in_block = True + name = (call_match.group(1) or "").strip() + raw_args = (call_match.group(2) or "").strip() + _create_tool_call(name, raw_args) + + if found_in_block: + return "" + else: + return match.group(0) + + cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) + + def _replace_orphan(match: re.Match[str]) -> str: + name = (match.group(1) or "").strip() + raw_args = (match.group(2) or "").strip() + _create_tool_call(name, raw_args) + return "" + + cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned) + + cleaned = strip_system_hints(cleaned) + return cleaned, tool_calls + + +def iter_stream_segments(model_output: str, chunk_size: int = 64) -> Iterator[str]: + """Yield stream segments while keeping markers and words intact.""" + if not model_output: + return + + token_pattern = re.compile(r"\s+|\S+\s*") + pending = "" + + def _flush_pending() -> Iterator[str]: + nonlocal pending + if pending: + yield pending + pending = "" + + # Split on boundaries so the markers are never fragmented. + parts = re.split(r"()", model_output) + for part in parts: + if not part: + continue + if part in {"", ""}: + yield from _flush_pending() + yield part + continue + + for match in token_pattern.finditer(part): + token = match.group(0) + + if len(token) > chunk_size: + yield from _flush_pending() + for idx in range(0, len(token), chunk_size): + yield token[idx : idx + chunk_size] + continue + + if pending and len(pending) + len(token) > chunk_size: + yield from _flush_pending() + + pending += token + + yield from _flush_pending() + + +def text_from_message(message: Message) -> str: + """Return text content from a message for token estimation.""" + base_text = "" + if isinstance(message.content, str): + base_text = message.content + elif isinstance(message.content, list): + base_text = "\n".join( + item.text or "" for item in message.content if getattr(item, "type", "") == "text" + ) + elif message.content is None: + base_text = "" + + if message.tool_calls: + tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls) + base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text + + return base_text + + +def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: + """Return image dimensions (width, height) if PNG or JPEG headers are present.""" + # PNG: dimensions stored in bytes 16..24 of the IHDR chunk + if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"): + try: + width, height = struct.unpack(">II", data[16:24]) + return int(width), int(height) + except struct.error: + return None, None + + # JPEG: dimensions stored in SOF segment; iterate through markers to locate it + if len(data) >= 4 and data[0:2] == b"\xff\xd8": + idx = 2 + length = len(data) + sof_markers = { + 0xC0, + 0xC1, + 0xC2, + 0xC3, + 0xC5, + 0xC6, + 0xC7, + 0xC9, + 0xCA, + 0xCB, + 0xCD, + 0xCE, + 0xCF, + } + while idx < length: + # Find marker alignment (markers are prefixed with 0xFF bytes) + if data[idx] != 0xFF: + idx += 1 + continue + while idx < length and data[idx] == 0xFF: + idx += 1 + if idx >= length: + break + marker = data[idx] + idx += 1 + + if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7: + continue + + if idx + 1 >= length: + break + segment_length = (data[idx] << 8) + data[idx + 1] + idx += 2 + if segment_length < 2: + break + + if marker in sof_markers: + if idx + 4 < length: + # Skip precision byte at idx, then read height/width (big-endian) + height = (data[idx + 1] << 8) + data[idx + 2] + width = (data[idx + 3] << 8) + data[idx + 4] + return int(width), int(height) + break + + idx += segment_length - 2 + + return None, None From a1bc8e289ee797a761eb506dc4d01e486c919aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 10:01:17 +0700 Subject: [PATCH 030/139] fix: Handle None input in `estimate_tokens` and return 0 for empty text --- app/utils/helper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 2627faa..28be240 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -47,8 +47,10 @@ def add_tag(role: str, content: str, unclose: bool = False) -> str: return f"<|im_start|>{role}\n{content}" + ("\n<|im_end|>" if not unclose else "") -def estimate_tokens(text: str) -> int: +def estimate_tokens(text: str | None) -> int: """Estimate the number of tokens heuristically based on character count""" + if not text: + return 0 return int(len(text) / 3) From a7e15d96bd2a4f62094bea02be7e86c8d305e59e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 13:32:49 +0700 Subject: [PATCH 031/139] refactor: Simplify model configuration and add JSON parsing validators - Replaced unused model placeholder in `config.yaml` with an empty list. - Added JSON parsing validators for `model_header` and `models` to enhance flexibility and error handling. - Improved validation to filter out incomplete model configurations. --- app/utils/config.py | 24 +++++++++++++++++++++++- config/config.yaml | 5 +---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/app/utils/config.py b/app/utils/config.py index 5782c66..69a4fac 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -1,3 +1,4 @@ +import json import os import sys from typing import Any, Literal, Optional @@ -58,6 +59,17 @@ class GeminiModelConfig(BaseModel): default=None, description="Header for the model" ) + @field_validator("model_header", mode="before") + @classmethod + def _parse_json_string(cls, v: Any) -> Any: + if isinstance(v, str) and v.strip().startswith("{"): + try: + return json.loads(v) + except json.JSONDecodeError: + # Return the original value to let Pydantic handle the error or type mismatch + return v + return v + class GeminiConfig(BaseModel): """Gemini API configuration""" @@ -82,11 +94,21 @@ class GeminiConfig(BaseModel): description="Maximum characters Gemini Web can accept per request", ) + @field_validator("models", mode="before") + @classmethod + def _parse_models_json(cls, v: Any) -> Any: + if isinstance(v, str) and v.strip().startswith("["): + try: + return json.loads(v) + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse models JSON string: {e}") + return v + return v + @field_validator("models") @classmethod def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]: """Filter out models that don't have a name set (placeholders).""" - return [model for model in v if model.model_name] diff --git a/config/config.yaml b/config/config.yaml index 2fbc061..f2b17fb 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -28,10 +28,7 @@ gemini: verbose: false # Enable verbose logging for Gemini requests max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only) - models: - - model_name: null - model_header: - x-goog-ext-xxxxxxxxx-jspb: null + models: [] storage: path: "data/lmdb" # Database storage path From 61c5f3b7af4ef6b78d5dc7e3d5ba9e6009b7d3cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 13:46:58 +0700 Subject: [PATCH 032/139] refactor: Simplify Gemini model environment variable parsing with JSON support - Replaced prefix-based parsing with a root key approach. - Added JSON parsing to handle list-based model configurations. - Improved handling of errors and cleanup of environment variables. --- app/utils/config.py | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/app/utils/config.py b/app/utils/config.py index 69a4fac..6cb5664 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -256,36 +256,26 @@ def _merge_clients_with_env( def extract_gemini_models_env() -> dict[int, dict[str, Any]]: """Extract and remove all Gemini models related environment variables, supporting nested fields.""" - prefix = "CONFIG_GEMINI__MODELS__" - env_overrides: dict[int, dict[str, Any]] = {} - to_delete = [] - for k, v in os.environ.items(): - if k.startswith(prefix): - parts = k.split("__") - if len(parts) < 4: - continue - index_str = parts[2] - if not index_str.isdigit(): - continue - idx = int(index_str) + import json - # Navigate to the correct nested dict - current = env_overrides.setdefault(idx, {}) - for i in range(3, len(parts) - 1): - field_name = parts[i].lower() - current = current.setdefault(field_name, {}) + root_key = "CONFIG_GEMINI__MODELS" + env_overrides: dict[int, dict[str, Any]] = {} - # Set the value (lowercase root field names, preserve sub-key casing) - last_part = parts[-1] - if len(parts) == 4: - current[last_part.lower()] = v - else: - current[last_part] = v + if root_key in os.environ: + try: + val = os.environ[root_key] + if val.strip().startswith("["): + models_list = json.loads(val) + if isinstance(models_list, list): + for idx, model_data in enumerate(models_list): + if isinstance(model_data, dict): + env_overrides[idx] = model_data + + # Remove the environment variable to avoid Pydantic parsing errors + del os.environ[root_key] + except Exception as e: + logger.warning(f"Failed to parse {root_key} as JSON: {e}") - to_delete.append(k) - # Remove these environment variables to avoid Pydantic parsing errors - for k in to_delete: - del os.environ[k] return env_overrides From efd056c270db5130c59b4e66c2543be7f5e8c6e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 14:09:41 +0700 Subject: [PATCH 033/139] fix: Enhance Gemini model environment variable parsing with fallback to Python literals - Added `ast.literal_eval` as a fallback for parsing environment variables when JSON decoding fails. - Improved error handling and logging for invalid configurations. - Ensured proper cleanup of environment variables post-parsing. --- app/utils/config.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/app/utils/config.py b/app/utils/config.py index 6cb5664..74a5294 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -1,3 +1,4 @@ +import ast import json import os import sys @@ -256,25 +257,31 @@ def _merge_clients_with_env( def extract_gemini_models_env() -> dict[int, dict[str, Any]]: """Extract and remove all Gemini models related environment variables, supporting nested fields.""" - import json - root_key = "CONFIG_GEMINI__MODELS" env_overrides: dict[int, dict[str, Any]] = {} if root_key in os.environ: + val = os.environ[root_key] + models_list = None + parsed_successfully = False + try: - val = os.environ[root_key] - if val.strip().startswith("["): - models_list = json.loads(val) - if isinstance(models_list, list): - for idx, model_data in enumerate(models_list): - if isinstance(model_data, dict): - env_overrides[idx] = model_data + models_list = json.loads(val) + parsed_successfully = True + except json.JSONDecodeError: + try: + models_list = ast.literal_eval(val) + parsed_successfully = True + except (ValueError, SyntaxError) as e: + logger.warning(f"Failed to parse {root_key} as JSON or Python literal: {e}") + + if parsed_successfully and isinstance(models_list, list): + for idx, model_data in enumerate(models_list): + if isinstance(model_data, dict): + env_overrides[idx] = model_data # Remove the environment variable to avoid Pydantic parsing errors del os.environ[root_key] - except Exception as e: - logger.warning(f"Failed to parse {root_key} as JSON: {e}") return env_overrides @@ -298,7 +305,7 @@ def _merge_models_with_env( model_dict.update(overrides) result_models[idx] = GeminiModelConfig(**model_dict) elif idx == len(result_models): - # Append new model + # Append new models new_model = GeminiModelConfig(**overrides) result_models.append(new_model) else: From 476b9dd228aa99501638987d1f44fe3c5eb23067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 31 Dec 2025 17:53:38 +0700 Subject: [PATCH 034/139] fix: Improve regex patterns in helper module - Adjusted `TOOL_CALL_RE` regex pattern for better accuracy. --- app/utils/helper.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 28be240..99e6d7a 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -30,7 +30,7 @@ ) TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) TOOL_CALL_RE = re.compile( - r"(.*?)", re.DOTALL | re.IGNORECASE + r"(.*?)", re.DOTALL | re.IGNORECASE ) JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE) CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") @@ -140,7 +140,7 @@ def strip_tagged_blocks(text: str) -> str: role_start = start + len(start_marker) newline = text.find("\n", role_start) if newline == -1: - # malformed block; keep remainder as-is (safe behavior) + # malformed block; keep the remainder as-is (safe behavior) result.append(text[start:]) break @@ -150,7 +150,7 @@ def strip_tagged_blocks(text: str) -> str: if end == -1: # missing end marker if role == "tool": - # drop from start marker to EOF (skip remainder) + # drop from the start marker to EOF (skip the remainder) break else: # keep inner content from after the role newline to EOF @@ -160,7 +160,7 @@ def strip_tagged_blocks(text: str) -> str: block_end = end + len(end_marker) if role == "tool": - # drop whole block + # drop the whole block idx = block_end continue @@ -217,7 +217,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: tool_calls: list[ToolCall] = [] def _create_tool_call(name: str, raw_args: str) -> None: - """Helper to parse args and append to tool_calls list.""" + """Helper to parse args and append to the tool_calls list.""" if not name: logger.warning("Encountered tool_call without a function name.") return From 35c1e99993d11033ae9047e85f645ce5def7f09b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 13 Jan 2026 09:02:10 +0700 Subject: [PATCH 035/139] docs: Update README files to include custom model configuration and environment variable setup --- README.md | 47 ++++++++++++++++++++++++++++++++++++++++++++++- README.zh.md | 51 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 94 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2df3a73..5d6de40 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ services: - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSID=${SECURE_1PSID} - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS=${SECURE_1PSIDTS} - GEMINI_COOKIE_PATH=/app/cache # must match the cache volume mount above - restart: on-failure:3 # Avoid retrying too many times + restart: on-failure:3 # Avoid retrying too many times ``` Then run: @@ -187,6 +187,51 @@ To use Gemini-FastAPI, you need to extract your Gemini session cookies: Each client entry can be configured with a different proxy to work around rate limits. Omit the `proxy` field or set it to `null` or an empty string to keep a direct connection. +### Custom Models + +You can define custom models in `config/config.yaml` or via environment variables. + +#### YAML Configuration + +```yaml +gemini: + model_strategy: "append" # "append" (default + custom) or "overwrite" (custom only) + models: + - model_name: "gemini-3.0-pro" + model_header: + x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]' +``` + +#### Environment Variables + +You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`. + +##### Bash + +```bash +export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" +export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' +``` + +##### Docker Compose + +```yaml +services: + gemini-fastapi: + environment: + - CONFIG_GEMINI__MODEL_STRATEGY=overwrite + - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}] +``` + +##### Docker CLI + +```bash +docker run -d \ + -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \ + -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \ + ghcr.io/nativu5/gemini-fastapi +``` + ## Acknowledgments - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - The underlying Gemini web API client diff --git a/README.zh.md b/README.zh.md index 6b7dd74..791afd8 100644 --- a/README.zh.md +++ b/README.zh.md @@ -4,7 +4,6 @@ [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) - [ [English](README.md) | 中文 ] 将 Gemini 网页端模型封装为兼容 OpenAI API 的 API Server。基于 [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) 实现。 @@ -50,6 +49,7 @@ pip install -e . ### 配置 编辑 `config/config.yaml` 并提供至少一组凭证: + ```yaml gemini: clients: @@ -118,7 +118,7 @@ services: - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSID=${SECURE_1PSID} - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS=${SECURE_1PSIDTS} - GEMINI_COOKIE_PATH=/app/cache # must match the cache volume mount above - restart: on-failure:3 # Avoid retrying too many times + restart: on-failure:3 # Avoid retrying too many times ``` 然后运行: @@ -186,6 +186,51 @@ export CONFIG_STORAGE__MAX_SIZE=268435456 # 256 MB 每个客户端条目可以配置不同的代理,从而规避速率限制。省略 `proxy` 字段或将其设置为 `null` 或空字符串以保持直连。 +### 自定义模型 + +你可以在 `config/config.yaml` 中或通过环境变量定义自定义模型。 + +#### YAML 配置 + +```yaml +gemini: + model_strategy: "append" # "append" (默认 + 自定义) 或 "overwrite" (仅限自定义) + models: + - model_name: "gemini-3.0-pro" + model_header: + x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]' +``` + +#### 环境变量 + +你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。 + +##### Bash + +```bash +export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" +export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' +``` + +##### Docker Compose + +```yaml +services: + gemini-fastapi: + environment: + - CONFIG_GEMINI__MODEL_STRATEGY=overwrite + - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}] +``` + +##### Docker CLI + +```bash +docker run -d \ + -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \ + -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \ + ghcr.io/nativu5/gemini-fastapi +``` + ## 鸣谢 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - 底层 Gemini Web API 客户端 @@ -193,4 +238,4 @@ export CONFIG_STORAGE__MAX_SIZE=268435456 # 256 MB ## 免责声明 -本项目与 Google 或 OpenAI 无关,仅供学习和研究使用。本项目使用了逆向工程 API,可能不符合 Google 服务条款。使用风险自负。 \ No newline at end of file +本项目与 Google 或 OpenAI 无关,仅供学习和研究使用。本项目使用了逆向工程 API,可能不符合 Google 服务条款。使用风险自负。 From 9b8162133e86a323400e7e2fb36ed651b31c795f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 13 Jan 2026 09:23:28 +0700 Subject: [PATCH 036/139] fix: Remove unused headers from HTTP client in helper module --- app/utils/helper.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 99e6d7a..51a6ccf 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -14,9 +14,6 @@ from ..models import FunctionCall, Message, ToolCall -HEADERS = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36" -} VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} XML_WRAP_HINT = ( "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n" @@ -82,7 +79,7 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: if not suffix: suffix = f".{mime_type.split('/')[1]}" else: - async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True) as client: + async with httpx.AsyncClient(follow_redirects=True) as client: resp = await client.get(url) resp.raise_for_status() data = resp.content From 32a48dcdc98d9e96e791ae6f914e6b3f12804c97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 15 Jan 2026 10:18:58 +0700 Subject: [PATCH 037/139] fix: Update README and README.zh to clarify model configuration via environment variables; enhance error logging in config validation --- README.md | 23 +---------------------- README.zh.md | 23 +---------------------- app/server/chat.py | 6 ++++-- app/utils/config.py | 27 +++++++++++++++++++++++---- 4 files changed, 29 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 5d6de40..d7a7214 100644 --- a/README.md +++ b/README.md @@ -204,34 +204,13 @@ gemini: #### Environment Variables -You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`. - -##### Bash +You can supply models as a JSON string via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments without modifying the configuration file. ```bash export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' ``` -##### Docker Compose - -```yaml -services: - gemini-fastapi: - environment: - - CONFIG_GEMINI__MODEL_STRATEGY=overwrite - - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}] -``` - -##### Docker CLI - -```bash -docker run -d \ - -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \ - -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \ - ghcr.io/nativu5/gemini-fastapi -``` - ## Acknowledgments - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - The underlying Gemini web API client diff --git a/README.zh.md b/README.zh.md index 791afd8..09d80a4 100644 --- a/README.zh.md +++ b/README.zh.md @@ -203,34 +203,13 @@ gemini: #### 环境变量 -你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。 - -##### Bash +你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串的形式提供模型。这为通过 shell 或在自动化环境中覆盖设置提供了一种灵活的方式,无需修改配置文件。 ```bash export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' ``` -##### Docker Compose - -```yaml -services: - gemini-fastapi: - environment: - - CONFIG_GEMINI__MODEL_STRATEGY=overwrite - - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}] -``` - -##### Docker CLI - -```bash -docker run -d \ - -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \ - -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \ - ghcr.io/nativu5/gemini-fastapi -``` - ## 鸣谢 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - 底层 Gemini Web API 客户端 diff --git a/app/server/chat.py b/app/server/chat.py index 9485f7a..6e517ea 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -924,7 +924,7 @@ async def _build_payload( image_call_items.append( ResponseImageGenerationCall( - id=filename.split(".")[0], + id=filename.rsplit(".", 1)[0], status="completed", result=image_base64, output_format=img_format, @@ -1350,7 +1350,9 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non try: saved_path = await image.save(path=str(temp_dir), full_size=True) except Exception as e: - logger.warning(f"Failed to download full-size image, retrying with default size: {e}") + logger.warning( + f"Failed to download full-size GeneratedImage, retrying with default size: {e}" + ) saved_path = await image.save(path=str(temp_dir), full_size=False) else: saved_path = await image.save(path=str(temp_dir)) diff --git a/app/utils/config.py b/app/utils/config.py index 74a5294..a9c5d44 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -109,8 +109,21 @@ def _parse_models_json(cls, v: Any) -> Any: @field_validator("models") @classmethod def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]: - """Filter out models that don't have a name set (placeholders).""" - return [model for model in v if model.model_name] + """Filter out models that don't have all required fields set.""" + valid_models = [] + for model in v: + if model.model_name and model.model_header: + valid_models.append(model) + else: + missing = [] + if not model.model_name: + missing.append("model_name") + if not model.model_header: + missing.append("model_header") + logger.warning( + f"Discarding custom model due to missing {', '.join(missing)}: {model}" + ) + return valid_models class CORSConfig(BaseModel): @@ -251,7 +264,10 @@ def _merge_clients_with_env( new_client = GeminiClientSettings(**overrides) result_clients.append(new_client) else: - raise IndexError(f"Client index {idx} in env is out of range.") + raise IndexError( + f"Client index {idx} in env is out of range (current count: {len(result_clients)}). " + "Client indices must be contiguous starting from 0." + ) return result_clients if result_clients else base_clients @@ -309,7 +325,10 @@ def _merge_models_with_env( new_model = GeminiModelConfig(**overrides) result_models.append(new_model) else: - raise IndexError(f"Model index {idx} in env is out of range (must be contiguous).") + raise IndexError( + f"Model index {idx} in env is out of range (current count: {len(result_models)}). " + "Model indices must be contiguous starting from 0." + ) return result_models From 0c00b089d5b33e394abaac6a1d36ae08cede166c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 15 Jan 2026 11:24:08 +0700 Subject: [PATCH 038/139] Update README and README.zh to clarify model configuration via JSON string or list structure for enhanced flexibility in automated environments --- README.md | 2 +- README.zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d7a7214..330e9c8 100644 --- a/README.md +++ b/README.md @@ -204,7 +204,7 @@ gemini: #### Environment Variables -You can supply models as a JSON string via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments without modifying the configuration file. +You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments (e.g. Docker) without modifying the configuration file. ```bash export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" diff --git a/README.zh.md b/README.zh.md index 09d80a4..2f9e1b5 100644 --- a/README.zh.md +++ b/README.zh.md @@ -203,7 +203,7 @@ gemini: #### 环境变量 -你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串的形式提供模型。这为通过 shell 或在自动化环境中覆盖设置提供了一种灵活的方式,无需修改配置文件。 +你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。这为通过 shell 或在自动化环境(例如 Docker)中覆盖设置提供了一种灵活的方式,而无需修改配置文件。 ```bash export CONFIG_GEMINI__MODEL_STRATEGY="overwrite" From b599d99f9967188bb8a277fd09951ddf32006f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 23 Jan 2026 12:14:40 +0700 Subject: [PATCH 039/139] Refactor: compress JSON content to save tokens and streamline sending multiple chunks --- app/server/chat.py | 50 +++++++++++++++++++++++++++++------------- app/services/client.py | 4 ++-- app/utils/helper.py | 2 +- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 6e517ea..1e7d786 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1,5 +1,7 @@ +import asyncio import base64 import json +import random import re import uuid from dataclasses import dataclass @@ -95,7 +97,7 @@ def _build_structured_requirement( schema_name = json_schema.get("name") or "response" strict = json_schema.get("strict", True) - pretty_schema = json.dumps(schema, ensure_ascii=False, indent=2, sort_keys=True) + pretty_schema = json.dumps(schema, ensure_ascii=False, separators=(",", ":"), sort_keys=True) instruction_parts = [ "You must respond with a single valid JSON document that conforms to the schema shown below.", "Do not include explanations, comments, or any text before or after the JSON.", @@ -135,7 +137,7 @@ def _build_tool_prompt( description = function.description or "No description provided." lines.append(f"Tool `{function.name}`: {description}") if function.parameters: - schema_text = json.dumps(function.parameters, ensure_ascii=False, indent=2) + schema_text = json.dumps(function.parameters, ensure_ascii=False, separators=(",", ":")) lines.append("Arguments JSON schema:") lines.append(schema_text) else: @@ -635,7 +637,7 @@ async def create_chat_completion( detail="LLM returned invalid JSON for the requested response_format.", ) from exc - canonical_output = json.dumps(structured_payload, ensure_ascii=False) + canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":")) visible_output = canonical_output storage_output = canonical_output @@ -875,7 +877,7 @@ async def _build_payload( detail="LLM returned invalid JSON for the requested response_format.", ) from exc - canonical_output = json.dumps(structured_payload, ensure_ascii=False) + canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":")) assistant_text = canonical_output storage_output = canonical_output logger.debug( @@ -1081,38 +1083,56 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s that Gemini can produce the actual answer. """ if len(text) <= MAX_CHARS_PER_REQUEST: - # No need to split - a single request is fine. try: return await session.send_message(text, files=files) except Exception as e: logger.exception(f"Error sending message to Gemini: {e}") raise + hint_len = len(CONTINUATION_HINT) - chunk_size = MAX_CHARS_PER_REQUEST - hint_len + safe_chunk_size = MAX_CHARS_PER_REQUEST - hint_len chunks: list[str] = [] pos = 0 total = len(text) + while pos < total: - end = min(pos + chunk_size, total) - chunk = text[pos:end] - pos = end + remaining = total - pos + if remaining <= MAX_CHARS_PER_REQUEST: + chunks.append(text[pos:]) + break + + end = pos + safe_chunk_size + slice_candidate = text[pos:end] + # Try to find a safe split point + split_idx = -1 + idx = slice_candidate.rfind("\n") + if idx != -1: + split_idx = idx + + if split_idx != -1: + split_at = pos + split_idx + 1 + else: + split_at = end - # If this is NOT the last chunk, add the continuation hint. - if end < total: - chunk += CONTINUATION_HINT + chunk = text[pos:split_at] + CONTINUATION_HINT chunks.append(chunk) + pos = split_at - # Fire off all but the last chunk, discarding the interim "ok" replies. - for chk in chunks[:-1]: + chunks_size = len(chunks) + for i, chk in enumerate(chunks[:-1]): try: + logger.debug(f"Sending chunk {i + 1}/{chunks_size}...") await session.send_message(chk) + delay = random.uniform(1.0, 3.0) + logger.debug(f"Sleeping for {delay:.2f}s...") + await asyncio.sleep(delay) except Exception as e: logger.exception(f"Error sending chunk to Gemini: {e}") raise - # The last chunk carries the files (if any) and we return its response. try: + logger.debug(f"Sending final chunk {chunks_size}/{chunks_size}...") return await session.send_message(chunks[-1], files=files) except Exception as e: logger.exception(f"Error sending final chunk to Gemini: {e}") diff --git a/app/services/client.py b/app/services/client.py index 87c0ca7..1f23271 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -123,7 +123,7 @@ async def process_message( args_text = call.function.arguments.strip() try: parsed_args = json.loads(args_text) - args_text = json.dumps(parsed_args, ensure_ascii=False) + args_text = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":")) except (json.JSONDecodeError, TypeError): # Leave args_text as is if it is not valid JSON pass @@ -132,7 +132,7 @@ async def process_message( ) if tool_blocks: - tool_section = "```xml\n" + "\n".join(tool_blocks) + "\n```" + tool_section = "```xml\n" + "".join(tool_blocks) + "\n```" text_fragments.append(tool_section) model_input = "\n".join(fragment for fragment in text_fragments if fragment) diff --git a/app/utils/helper.py b/app/utils/helper.py index 51a6ccf..578b666 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -222,7 +222,7 @@ def _create_tool_call(name: str, raw_args: str) -> None: arguments = raw_args try: parsed_args = json.loads(raw_args) - arguments = json.dumps(parsed_args, ensure_ascii=False) + arguments = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":")) except json.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") From 186b8448d7f088df621b627ca7b28c5a7acaf341 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 23 Jan 2026 23:08:32 +0700 Subject: [PATCH 040/139] Refactor: Modify the LMDB store to fix issues where no conversation is found in either the raw or cleaned history. --- app/services/lmdb.py | 46 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 8ccb0d4..d671663 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -9,7 +9,7 @@ import orjson from loguru import logger -from ..models import ConversationInStore, Message +from ..models import ContentItem, ConversationInStore, Message from ..utils import g_config from ..utils.singleton import Singleton @@ -18,6 +18,19 @@ def _hash_message(message: Message) -> str: """Generate a hash for a single message.""" # Convert message to dict and sort keys for consistent hashing message_dict = message.model_dump(mode="json") + content = message_dict.get("content") + if isinstance(content, list): + is_pure_text = True + text_parts = [] + for item in content: + if not isinstance(item, dict) or item.get("type") != "text": + is_pure_text = False + break + text_parts.append(item.get("text") or "") + + if is_pure_text: + message_dict["content"] = "".join(text_parts) + message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS) return hashlib.sha256(message_bytes).hexdigest() @@ -435,12 +448,31 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: """ cleaned_messages = [] for msg in messages: - if msg.role == "assistant" and isinstance(msg.content, str): - normalized_content = LMDBConversationStore.remove_think_tags(msg.content) - # Only create a new object if content actually changed - if normalized_content != msg.content: - cleaned_msg = Message(role=msg.role, content=normalized_content, name=msg.name) - cleaned_messages.append(cleaned_msg) + if msg.role == "assistant": + if isinstance(msg.content, str): + normalized_content = LMDBConversationStore.remove_think_tags(msg.content) + if normalized_content != msg.content: + cleaned_msg = Message( + role=msg.role, content=normalized_content, name=msg.name + ) + cleaned_messages.append(cleaned_msg) + else: + cleaned_messages.append(msg) + elif isinstance(msg.content, list): + new_content = [] + changed = False + for item in msg.content: + if isinstance(item, ContentItem) and item.type == "text" and item.text: + cleaned_text = LMDBConversationStore.remove_think_tags(item.text) + if cleaned_text != item.text: + changed = True + item = item.model_copy(update={"text": cleaned_text}) + new_content.append(item) + + if changed: + cleaned_messages.append(msg.model_copy(update={"content": new_content})) + else: + cleaned_messages.append(msg) else: cleaned_messages.append(msg) else: From 6dd1fecdced932c537f579a3c5dd3db87847d475 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 10:03:24 +0700 Subject: [PATCH 041/139] Refactor: Modify the LMDB store to fix issues where no conversation is found. --- app/services/lmdb.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index d671663..93c7723 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -18,8 +18,12 @@ def _hash_message(message: Message) -> str: """Generate a hash for a single message.""" # Convert message to dict and sort keys for consistent hashing message_dict = message.model_dump(mode="json") + + # Normalize content: empty string -> None content = message_dict.get("content") - if isinstance(content, list): + if content == "": + message_dict["content"] = None + elif isinstance(content, list): is_pure_text = True text_parts = [] for item in content: @@ -29,7 +33,27 @@ def _hash_message(message: Message) -> str: text_parts.append(item.get("text") or "") if is_pure_text: - message_dict["content"] = "".join(text_parts) + text_content = "".join(text_parts) + message_dict["content"] = text_content if text_content else None + + # Normalize tool_calls: empty list -> None, and canonicalize arguments + tool_calls = message_dict.get("tool_calls") + if not tool_calls: + message_dict["tool_calls"] = None + elif isinstance(tool_calls, list): + for tool_call in tool_calls: + if isinstance(tool_call, dict) and "function" in tool_call: + func = tool_call["function"] + args = func.get("arguments") + if isinstance(args, str): + try: + # Parse and re-dump to canonicalize (remove extra whitespace, sort keys) + parsed = orjson.loads(args) + func["arguments"] = orjson.dumps( + parsed, option=orjson.OPT_SORT_KEYS + ).decode("utf-8") + except Exception: + pass message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS) return hashlib.sha256(message_bytes).hexdigest() From 20ed2456d2324501bbe4ba6392870cd612c9083c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 10:46:27 +0700 Subject: [PATCH 042/139] Refactor: Update all functions to use orjson for better performance --- app/main.py | 2 ++ app/server/chat.py | 17 ++++++++--------- app/services/client.py | 8 ++++---- app/utils/config.py | 14 +++++++------- app/utils/helper.py | 8 ++++---- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/app/main.py b/app/main.py index c215e2a..307eb36 100644 --- a/app/main.py +++ b/app/main.py @@ -2,6 +2,7 @@ from contextlib import asynccontextmanager from fastapi import FastAPI +from fastapi.responses import ORJSONResponse from loguru import logger from .server.chat import router as chat_router @@ -92,6 +93,7 @@ def create_app() -> FastAPI: description="OpenAI-compatible API for Gemini Web", version="1.0.0", lifespan=lifespan, + default_response_class=ORJSONResponse, ) add_cors_middleware(app) diff --git a/app/server/chat.py b/app/server/chat.py index 1e7d786..a9d9dec 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1,6 +1,5 @@ import asyncio import base64 -import json import random import re import uuid @@ -97,7 +96,7 @@ def _build_structured_requirement( schema_name = json_schema.get("name") or "response" strict = json_schema.get("strict", True) - pretty_schema = json.dumps(schema, ensure_ascii=False, separators=(",", ":"), sort_keys=True) + pretty_schema = orjson.dumps(schema, option=orjson.OPT_SORT_KEYS).decode("utf-8") instruction_parts = [ "You must respond with a single valid JSON document that conforms to the schema shown below.", "Do not include explanations, comments, or any text before or after the JSON.", @@ -137,7 +136,7 @@ def _build_tool_prompt( description = function.description or "No description provided." lines.append(f"Tool `{function.name}`: {description}") if function.parameters: - schema_text = json.dumps(function.parameters, ensure_ascii=False, separators=(",", ":")) + schema_text = orjson.dumps(function.parameters).decode("utf-8") lines.append("Arguments JSON schema:") lines.append(schema_text) else: @@ -626,8 +625,8 @@ async def create_chat_completion( detail="LLM returned an empty response while JSON schema output was requested.", ) try: - structured_payload = json.loads(cleaned_visible) - except json.JSONDecodeError as exc: + structured_payload = orjson.loads(cleaned_visible) + except orjson.JSONDecodeError as exc: logger.warning( f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): " f"{cleaned_visible}" @@ -637,7 +636,7 @@ async def create_chat_completion( detail="LLM returned invalid JSON for the requested response_format.", ) from exc - canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":")) + canonical_output = orjson.dumps(structured_payload).decode("utf-8") visible_output = canonical_output storage_output = canonical_output @@ -866,8 +865,8 @@ async def _build_payload( detail="LLM returned an empty response while JSON schema output was requested.", ) try: - structured_payload = json.loads(cleaned_visible) - except json.JSONDecodeError as exc: + structured_payload = orjson.loads(cleaned_visible) + except orjson.JSONDecodeError as exc: logger.warning( f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): " f"{cleaned_visible}" @@ -877,7 +876,7 @@ async def _build_payload( detail="LLM returned invalid JSON for the requested response_format.", ) from exc - canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":")) + canonical_output = orjson.dumps(structured_payload).decode("utf-8") assistant_text = canonical_output storage_output = canonical_output logger.debug( diff --git a/app/services/client.py b/app/services/client.py index 1f23271..55be11a 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -1,9 +1,9 @@ import html -import json import re from pathlib import Path from typing import Any, cast +import orjson from gemini_webapi import GeminiClient, ModelOutput from loguru import logger @@ -122,9 +122,9 @@ async def process_message( for call in message.tool_calls: args_text = call.function.arguments.strip() try: - parsed_args = json.loads(args_text) - args_text = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":")) - except (json.JSONDecodeError, TypeError): + parsed_args = orjson.loads(args_text) + args_text = orjson.dumps(parsed_args).decode("utf-8") + except orjson.JSONDecodeError: # Leave args_text as is if it is not valid JSON pass tool_blocks.append( diff --git a/app/utils/config.py b/app/utils/config.py index a9c5d44..708462d 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -1,9 +1,9 @@ import ast -import json import os import sys from typing import Any, Literal, Optional +import orjson from loguru import logger from pydantic import BaseModel, Field, ValidationError, field_validator from pydantic_settings import ( @@ -65,8 +65,8 @@ class GeminiModelConfig(BaseModel): def _parse_json_string(cls, v: Any) -> Any: if isinstance(v, str) and v.strip().startswith("{"): try: - return json.loads(v) - except json.JSONDecodeError: + return orjson.loads(v) + except orjson.JSONDecodeError: # Return the original value to let Pydantic handle the error or type mismatch return v return v @@ -100,8 +100,8 @@ class GeminiConfig(BaseModel): def _parse_models_json(cls, v: Any) -> Any: if isinstance(v, str) and v.strip().startswith("["): try: - return json.loads(v) - except json.JSONDecodeError as e: + return orjson.loads(v) + except orjson.JSONDecodeError as e: logger.warning(f"Failed to parse models JSON string: {e}") return v return v @@ -282,9 +282,9 @@ def extract_gemini_models_env() -> dict[int, dict[str, Any]]: parsed_successfully = False try: - models_list = json.loads(val) + models_list = orjson.loads(val) parsed_successfully = True - except json.JSONDecodeError: + except orjson.JSONDecodeError: try: models_list = ast.literal_eval(val) parsed_successfully = True diff --git a/app/utils/helper.py b/app/utils/helper.py index 578b666..1dc518f 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -1,5 +1,4 @@ import base64 -import json import mimetypes import re import struct @@ -10,6 +9,7 @@ from urllib.parse import urlparse import httpx +import orjson from loguru import logger from ..models import FunctionCall, Message, ToolCall @@ -221,9 +221,9 @@ def _create_tool_call(name: str, raw_args: str) -> None: arguments = raw_args try: - parsed_args = json.loads(raw_args) - arguments = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":")) - except json.JSONDecodeError: + parsed_args = orjson.loads(raw_args) + arguments = orjson.dumps(parsed_args).decode("utf-8") + except orjson.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") tool_calls.append( From f67fe63b3b654d3a28cc5ca0363a4ad894831d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 10:47:26 +0700 Subject: [PATCH 043/139] Update project dependencies --- pyproject.toml | 21 ++++----- uv.lock | 118 +++++++++++++++++++++++++------------------------ 2 files changed, 71 insertions(+), 68 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 32a42b4..1c30f8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,24 +5,25 @@ description = "FastAPI Server built on Gemini Web API" readme = "README.md" requires-python = "==3.12.*" dependencies = [ - "fastapi>=0.115.12", - "gemini-webapi>=1.17.0", - "lmdb>=1.6.2", - "loguru>=0.7.0", - "pydantic-settings[yaml]>=2.9.1", - "uvicorn>=0.34.1", - "uvloop>=0.21.0; sys_platform != 'win32'", + "fastapi>=0.128.0", + "gemini-webapi>=1.17.3", + "lmdb>=1.7.5", + "loguru>=0.7.3", + "orjson>=3.11.5", + "pydantic-settings[yaml]>=2.12.0", + "uvicorn>=0.40.0", + "uvloop>=0.22.1; sys_platform != 'win32'", ] [project.optional-dependencies] dev = [ - "ruff>=0.11.7", + "ruff>=0.14.14", ] [tool.ruff] line-length = 100 lint.select = ["E", "F", "W", "I", "RUF"] -lint.ignore = ["E501"] +lint.ignore = ["E501"] [tool.ruff.format] quote-style = "double" @@ -30,5 +31,5 @@ indent-style = "space" [dependency-groups] dev = [ - "ruff>=0.11.13", + "ruff>=0.14.14", ] diff --git a/uv.lock b/uv.lock index 923e6d3..50a73be 100644 --- a/uv.lock +++ b/uv.lock @@ -22,24 +22,24 @@ wheels = [ [[package]] name = "anyio" -version = "4.12.0" +version = "4.12.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "idna" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/16/ce/8a777047513153587e5434fd752e89334ac33e379aa3497db860eeb60377/anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0", size = 228266, upload-time = "2025-11-28T23:37:38.911Z" } +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/9c/36c5c37947ebfb8c7f22e0eb6e4d188ee2d53aa3880f3f2744fb894f0cb1/anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb", size = 113362, upload-time = "2025-11-28T23:36:57.897Z" }, + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, ] [[package]] name = "certifi" -version = "2025.11.12" +version = "2026.1.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, ] [[package]] @@ -65,7 +65,7 @@ wheels = [ [[package]] name = "fastapi" -version = "0.123.10" +version = "0.128.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, @@ -73,9 +73,9 @@ dependencies = [ { name = "starlette" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/22/ff/e01087de891010089f1620c916c0c13130f3898177955c13e2b02d22ec4a/fastapi-0.123.10.tar.gz", hash = "sha256:624d384d7cda7c096449c889fc776a0571948ba14c3c929fa8e9a78cd0b0a6a8", size = 356360, upload-time = "2025-12-05T21:27:46.237Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d7/f0/7cb92c4a720def85240fd63fbbcf147ce19e7a731c8e1032376bb5a486ac/fastapi-0.123.10-py3-none-any.whl", hash = "sha256:0503b7b7bc71bc98f7c90c9117d21fdf6147c0d74703011b87936becc86985c1", size = 111774, upload-time = "2025-12-05T21:27:44.78Z" }, + { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" }, ] [[package]] @@ -87,6 +87,7 @@ dependencies = [ { name = "gemini-webapi" }, { name = "lmdb" }, { name = "loguru" }, + { name = "orjson" }, { name = "pydantic-settings", extra = ["yaml"] }, { name = "uvicorn" }, { name = "uvloop", marker = "sys_platform != 'win32'" }, @@ -104,19 +105,20 @@ dev = [ [package.metadata] requires-dist = [ - { name = "fastapi", specifier = ">=0.115.12" }, - { name = "gemini-webapi", specifier = ">=1.17.0" }, - { name = "lmdb", specifier = ">=1.6.2" }, - { name = "loguru", specifier = ">=0.7.0" }, - { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.9.1" }, - { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.11.7" }, - { name = "uvicorn", specifier = ">=0.34.1" }, - { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.21.0" }, + { name = "fastapi", specifier = ">=0.128.0" }, + { name = "gemini-webapi", specifier = ">=1.17.3" }, + { name = "lmdb", specifier = ">=1.7.5" }, + { name = "loguru", specifier = ">=0.7.3" }, + { name = "orjson", specifier = ">=3.11.5" }, + { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" }, + { name = "uvicorn", specifier = ">=0.40.0" }, + { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" }, ] provides-extras = ["dev"] [package.metadata.requires-dev] -dev = [{ name = "ruff", specifier = ">=0.11.13" }] +dev = [{ name = "ruff", specifier = ">=0.14.14" }] [[package]] name = "gemini-webapi" @@ -209,25 +211,25 @@ wheels = [ [[package]] name = "orjson" -version = "3.11.4" +version = "3.11.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c6/fe/ed708782d6709cc60eb4c2d8a361a440661f74134675c72990f2c48c785f/orjson-3.11.4.tar.gz", hash = "sha256:39485f4ab4c9b30a3943cfe99e1a213c4776fb69e8abd68f66b83d5a0b0fdc6d", size = 5945188, upload-time = "2025-10-24T15:50:38.027Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347, upload-time = "2025-12-06T15:55:39.458Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/63/51/6b556192a04595b93e277a9ff71cd0cc06c21a7df98bcce5963fa0f5e36f/orjson-3.11.4-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:d4371de39319d05d3f482f372720b841c841b52f5385bd99c61ed69d55d9ab50", size = 243571, upload-time = "2025-10-24T15:49:10.008Z" }, - { url = "https://files.pythonhosted.org/packages/1c/2c/2602392ddf2601d538ff11848b98621cd465d1a1ceb9db9e8043181f2f7b/orjson-3.11.4-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:e41fd3b3cac850eaae78232f37325ed7d7436e11c471246b87b2cd294ec94853", size = 128891, upload-time = "2025-10-24T15:49:11.297Z" }, - { url = "https://files.pythonhosted.org/packages/4e/47/bf85dcf95f7a3a12bf223394a4f849430acd82633848d52def09fa3f46ad/orjson-3.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:600e0e9ca042878c7fdf189cf1b028fe2c1418cc9195f6cb9824eb6ed99cb938", size = 130137, upload-time = "2025-10-24T15:49:12.544Z" }, - { url = "https://files.pythonhosted.org/packages/b4/4d/a0cb31007f3ab6f1fd2a1b17057c7c349bc2baf8921a85c0180cc7be8011/orjson-3.11.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7bbf9b333f1568ef5da42bc96e18bf30fd7f8d54e9ae066d711056add508e415", size = 129152, upload-time = "2025-10-24T15:49:13.754Z" }, - { url = "https://files.pythonhosted.org/packages/f7/ef/2811def7ce3d8576b19e3929fff8f8f0d44bc5eb2e0fdecb2e6e6cc6c720/orjson-3.11.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4806363144bb6e7297b8e95870e78d30a649fdc4e23fc84daa80c8ebd366ce44", size = 136834, upload-time = "2025-10-24T15:49:15.307Z" }, - { url = "https://files.pythonhosted.org/packages/00/d4/9aee9e54f1809cec8ed5abd9bc31e8a9631d19460e3b8470145d25140106/orjson-3.11.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad355e8308493f527d41154e9053b86a5be892b3b359a5c6d5d95cda23601cb2", size = 137519, upload-time = "2025-10-24T15:49:16.557Z" }, - { url = "https://files.pythonhosted.org/packages/db/ea/67bfdb5465d5679e8ae8d68c11753aaf4f47e3e7264bad66dc2f2249e643/orjson-3.11.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a7517482667fb9f0ff1b2f16fe5829296ed7a655d04d68cd9711a4d8a4e708", size = 136749, upload-time = "2025-10-24T15:49:17.796Z" }, - { url = "https://files.pythonhosted.org/packages/01/7e/62517dddcfce6d53a39543cd74d0dccfcbdf53967017c58af68822100272/orjson-3.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97eb5942c7395a171cbfecc4ef6701fc3c403e762194683772df4c54cfbb2210", size = 136325, upload-time = "2025-10-24T15:49:19.347Z" }, - { url = "https://files.pythonhosted.org/packages/18/ae/40516739f99ab4c7ec3aaa5cc242d341fcb03a45d89edeeaabc5f69cb2cf/orjson-3.11.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:149d95d5e018bdd822e3f38c103b1a7c91f88d38a88aada5c4e9b3a73a244241", size = 140204, upload-time = "2025-10-24T15:49:20.545Z" }, - { url = "https://files.pythonhosted.org/packages/82/18/ff5734365623a8916e3a4037fcef1cd1782bfc14cf0992afe7940c5320bf/orjson-3.11.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:624f3951181eb46fc47dea3d221554e98784c823e7069edb5dbd0dc826ac909b", size = 406242, upload-time = "2025-10-24T15:49:21.884Z" }, - { url = "https://files.pythonhosted.org/packages/e1/43/96436041f0a0c8c8deca6a05ebeaf529bf1de04839f93ac5e7c479807aec/orjson-3.11.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:03bfa548cf35e3f8b3a96c4e8e41f753c686ff3d8e182ce275b1751deddab58c", size = 150013, upload-time = "2025-10-24T15:49:23.185Z" }, - { url = "https://files.pythonhosted.org/packages/1b/48/78302d98423ed8780479a1e682b9aecb869e8404545d999d34fa486e573e/orjson-3.11.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:525021896afef44a68148f6ed8a8bf8375553d6066c7f48537657f64823565b9", size = 139951, upload-time = "2025-10-24T15:49:24.428Z" }, - { url = "https://files.pythonhosted.org/packages/4a/7b/ad613fdcdaa812f075ec0875143c3d37f8654457d2af17703905425981bf/orjson-3.11.4-cp312-cp312-win32.whl", hash = "sha256:b58430396687ce0f7d9eeb3dd47761ca7d8fda8e9eb92b3077a7a353a75efefa", size = 136049, upload-time = "2025-10-24T15:49:25.973Z" }, - { url = "https://files.pythonhosted.org/packages/b9/3c/9cf47c3ff5f39b8350fb21ba65d789b6a1129d4cbb3033ba36c8a9023520/orjson-3.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:c6dbf422894e1e3c80a177133c0dda260f81428f9de16d61041949f6a2e5c140", size = 131461, upload-time = "2025-10-24T15:49:27.259Z" }, - { url = "https://files.pythonhosted.org/packages/c6/3b/e2425f61e5825dc5b08c2a5a2b3af387eaaca22a12b9c8c01504f8614c36/orjson-3.11.4-cp312-cp312-win_arm64.whl", hash = "sha256:d38d2bc06d6415852224fcc9c0bfa834c25431e466dc319f0edd56cca81aa96e", size = 126167, upload-time = "2025-10-24T15:49:28.511Z" }, + { url = "https://files.pythonhosted.org/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347, upload-time = "2025-12-06T15:54:22.061Z" }, + { url = "https://files.pythonhosted.org/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435, upload-time = "2025-12-06T15:54:23.615Z" }, + { url = "https://files.pythonhosted.org/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074, upload-time = "2025-12-06T15:54:24.694Z" }, + { url = "https://files.pythonhosted.org/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520, upload-time = "2025-12-06T15:54:26.185Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209, upload-time = "2025-12-06T15:54:27.264Z" }, + { url = "https://files.pythonhosted.org/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837, upload-time = "2025-12-06T15:54:28.75Z" }, + { url = "https://files.pythonhosted.org/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307, upload-time = "2025-12-06T15:54:29.856Z" }, + { url = "https://files.pythonhosted.org/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020, upload-time = "2025-12-06T15:54:31.024Z" }, + { url = "https://files.pythonhosted.org/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099, upload-time = "2025-12-06T15:54:32.196Z" }, + { url = "https://files.pythonhosted.org/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540, upload-time = "2025-12-06T15:54:33.361Z" }, + { url = "https://files.pythonhosted.org/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530, upload-time = "2025-12-06T15:54:34.6Z" }, + { url = "https://files.pythonhosted.org/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863, upload-time = "2025-12-06T15:54:35.801Z" }, + { url = "https://files.pythonhosted.org/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255, upload-time = "2025-12-06T15:54:37.209Z" }, + { url = "https://files.pythonhosted.org/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252, upload-time = "2025-12-06T15:54:38.401Z" }, + { url = "https://files.pythonhosted.org/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777, upload-time = "2025-12-06T15:54:39.515Z" }, ] [[package]] @@ -322,28 +324,28 @@ wheels = [ [[package]] name = "ruff" -version = "0.14.8" +version = "0.14.14" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ed/d9/f7a0c4b3a2bf2556cd5d99b05372c29980249ef71e8e32669ba77428c82c/ruff-0.14.8.tar.gz", hash = "sha256:774ed0dd87d6ce925e3b8496feb3a00ac564bea52b9feb551ecd17e0a23d1eed", size = 5765385, upload-time = "2025-12-04T15:06:17.669Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/b8/9537b52010134b1d2b72870cc3f92d5fb759394094741b09ceccae183fbe/ruff-0.14.8-py3-none-linux_armv6l.whl", hash = "sha256:ec071e9c82eca417f6111fd39f7043acb53cd3fde9b1f95bbed745962e345afb", size = 13441540, upload-time = "2025-12-04T15:06:14.896Z" }, - { url = "https://files.pythonhosted.org/packages/24/00/99031684efb025829713682012b6dd37279b1f695ed1b01725f85fd94b38/ruff-0.14.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8cdb162a7159f4ca36ce980a18c43d8f036966e7f73f866ac8f493b75e0c27e9", size = 13669384, upload-time = "2025-12-04T15:06:51.809Z" }, - { url = "https://files.pythonhosted.org/packages/72/64/3eb5949169fc19c50c04f28ece2c189d3b6edd57e5b533649dae6ca484fe/ruff-0.14.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e2fcbefe91f9fad0916850edf0854530c15bd1926b6b779de47e9ab619ea38f", size = 12806917, upload-time = "2025-12-04T15:06:08.925Z" }, - { url = "https://files.pythonhosted.org/packages/c4/08/5250babb0b1b11910f470370ec0cbc67470231f7cdc033cee57d4976f941/ruff-0.14.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d70721066a296f45786ec31916dc287b44040f553da21564de0ab4d45a869b", size = 13256112, upload-time = "2025-12-04T15:06:23.498Z" }, - { url = "https://files.pythonhosted.org/packages/78/4c/6c588e97a8e8c2d4b522c31a579e1df2b4d003eddfbe23d1f262b1a431ff/ruff-0.14.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c87e09b3cd9d126fc67a9ecd3b5b1d3ded2b9c7fce3f16e315346b9d05cfb52", size = 13227559, upload-time = "2025-12-04T15:06:33.432Z" }, - { url = "https://files.pythonhosted.org/packages/23/ce/5f78cea13eda8eceac71b5f6fa6e9223df9b87bb2c1891c166d1f0dce9f1/ruff-0.14.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d62cb310c4fbcb9ee4ac023fe17f984ae1e12b8a4a02e3d21489f9a2a5f730c", size = 13896379, upload-time = "2025-12-04T15:06:02.687Z" }, - { url = "https://files.pythonhosted.org/packages/cf/79/13de4517c4dadce9218a20035b21212a4c180e009507731f0d3b3f5df85a/ruff-0.14.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1af35c2d62633d4da0521178e8a2641c636d2a7153da0bac1b30cfd4ccd91344", size = 15372786, upload-time = "2025-12-04T15:06:29.828Z" }, - { url = "https://files.pythonhosted.org/packages/00/06/33df72b3bb42be8a1c3815fd4fae83fa2945fc725a25d87ba3e42d1cc108/ruff-0.14.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25add4575ffecc53d60eed3f24b1e934493631b48ebbc6ebaf9d8517924aca4b", size = 14990029, upload-time = "2025-12-04T15:06:36.812Z" }, - { url = "https://files.pythonhosted.org/packages/64/61/0f34927bd90925880394de0e081ce1afab66d7b3525336f5771dcf0cb46c/ruff-0.14.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c943d847b7f02f7db4201a0600ea7d244d8a404fbb639b439e987edcf2baf9a", size = 14407037, upload-time = "2025-12-04T15:06:39.979Z" }, - { url = "https://files.pythonhosted.org/packages/96/bc/058fe0aefc0fbf0d19614cb6d1a3e2c048f7dc77ca64957f33b12cfdc5ef/ruff-0.14.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb6e8bf7b4f627548daa1b69283dac5a296bfe9ce856703b03130732e20ddfe2", size = 14102390, upload-time = "2025-12-04T15:06:46.372Z" }, - { url = "https://files.pythonhosted.org/packages/af/a4/e4f77b02b804546f4c17e8b37a524c27012dd6ff05855d2243b49a7d3cb9/ruff-0.14.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:7aaf2974f378e6b01d1e257c6948207aec6a9b5ba53fab23d0182efb887a0e4a", size = 14230793, upload-time = "2025-12-04T15:06:20.497Z" }, - { url = "https://files.pythonhosted.org/packages/3f/52/bb8c02373f79552e8d087cedaffad76b8892033d2876c2498a2582f09dcf/ruff-0.14.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e5758ca513c43ad8a4ef13f0f081f80f08008f410790f3611a21a92421ab045b", size = 13160039, upload-time = "2025-12-04T15:06:49.06Z" }, - { url = "https://files.pythonhosted.org/packages/1f/ad/b69d6962e477842e25c0b11622548df746290cc6d76f9e0f4ed7456c2c31/ruff-0.14.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f74f7ba163b6e85a8d81a590363bf71618847e5078d90827749bfda1d88c9cdf", size = 13205158, upload-time = "2025-12-04T15:06:54.574Z" }, - { url = "https://files.pythonhosted.org/packages/06/63/54f23da1315c0b3dfc1bc03fbc34e10378918a20c0b0f086418734e57e74/ruff-0.14.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:eed28f6fafcc9591994c42254f5a5c5ca40e69a30721d2ab18bb0bb3baac3ab6", size = 13469550, upload-time = "2025-12-04T15:05:59.209Z" }, - { url = "https://files.pythonhosted.org/packages/70/7d/a4d7b1961e4903bc37fffb7ddcfaa7beb250f67d97cfd1ee1d5cddb1ec90/ruff-0.14.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:21d48fa744c9d1cb8d71eb0a740c4dd02751a5de9db9a730a8ef75ca34cf138e", size = 14211332, upload-time = "2025-12-04T15:06:06.027Z" }, - { url = "https://files.pythonhosted.org/packages/5d/93/2a5063341fa17054e5c86582136e9895db773e3c2ffb770dde50a09f35f0/ruff-0.14.8-py3-none-win32.whl", hash = "sha256:15f04cb45c051159baebb0f0037f404f1dc2f15a927418f29730f411a79bc4e7", size = 13151890, upload-time = "2025-12-04T15:06:11.668Z" }, - { url = "https://files.pythonhosted.org/packages/02/1c/65c61a0859c0add13a3e1cbb6024b42de587456a43006ca2d4fd3d1618fe/ruff-0.14.8-py3-none-win_amd64.whl", hash = "sha256:9eeb0b24242b5bbff3011409a739929f497f3fb5fe3b5698aba5e77e8c833097", size = 14537826, upload-time = "2025-12-04T15:06:26.409Z" }, - { url = "https://files.pythonhosted.org/packages/6d/63/8b41cea3afd7f58eb64ac9251668ee0073789a3bc9ac6f816c8c6fef986d/ruff-0.14.8-py3-none-win_arm64.whl", hash = "sha256:965a582c93c63fe715fd3e3f8aa37c4b776777203d8e1d8aa3cc0c14424a4b99", size = 13634522, upload-time = "2025-12-04T15:06:43.212Z" }, + { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" }, + { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" }, + { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" }, + { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" }, + { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" }, + { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" }, + { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" }, + { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" }, + { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" }, + { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" }, + { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" }, + { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" }, + { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" }, + { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" }, + { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" }, + { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" }, + { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" }, + { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, ] [[package]] @@ -382,15 +384,15 @@ wheels = [ [[package]] name = "uvicorn" -version = "0.38.0" +version = "0.40.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605, upload-time = "2025-10-18T13:46:44.63Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" }, + { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" }, ] [[package]] From 889f2d257ba15a61339de924fb6a67a6fefe6516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 11:15:41 +0700 Subject: [PATCH 044/139] Fix IDE warnings --- app/services/lmdb.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 93c7723..dec148b 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -52,7 +52,7 @@ def _hash_message(message: Message) -> str: func["arguments"] = orjson.dumps( parsed, option=orjson.OPT_SORT_KEYS ).decode("utf-8") - except Exception: + except orjson.JSONDecodeError: pass message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS) @@ -175,7 +175,7 @@ def store( value = orjson.dumps(conv.model_dump(mode="json")) try: - with self._get_transaction(write=True) as txn: + with self._get_transaction(self, write=True) as txn: # Store main data txn.put(storage_key.encode("utf-8"), value, overwrite=True) @@ -203,7 +203,7 @@ def get(self, key: str) -> Optional[ConversationInStore]: Conversation or None if not found """ try: - with self._get_transaction(write=False) as txn: + with self._get_transaction(self, write=False) as txn: data = txn.get(key.encode("utf-8"), default=None) if not data: return None @@ -255,7 +255,7 @@ def _find_by_message_list( key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}" try: - with self._get_transaction(write=False) as txn: + with self._get_transaction(self, write=False) as txn: if mapped := txn.get(key.encode("utf-8")): # type: ignore return self.get(mapped.decode("utf-8")) # type: ignore except Exception as e: @@ -279,7 +279,7 @@ def exists(self, key: str) -> bool: bool: True if key exists, False otherwise """ try: - with self._get_transaction(write=False) as txn: + with self._get_transaction(self, write=False) as txn: return txn.get(key.encode("utf-8")) is not None except Exception as e: logger.error(f"Failed to check existence of key {key}: {e}") @@ -296,7 +296,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]: ConversationInStore: The deleted conversation data, or None if not found """ try: - with self._get_transaction(write=True) as txn: + with self._get_transaction(self, write=True) as txn: # Get data first to clean up hash mapping data = txn.get(key.encode("utf-8")) if not data: @@ -333,7 +333,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: """ keys = [] try: - with self._get_transaction(write=False) as txn: + with self._get_transaction(self, write=False) as txn: cursor = txn.cursor() cursor.first() @@ -377,7 +377,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: expired_entries: list[tuple[str, ConversationInStore]] = [] try: - with self._get_transaction(write=False) as txn: + with self._get_transaction(self, write=False) as txn: cursor = txn.cursor() for key_bytes, value_bytes in cursor: @@ -407,7 +407,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: removed = 0 try: - with self._get_transaction(write=True) as txn: + with self._get_transaction(self, write=True) as txn: for key_str, conv in expired_entries: key_bytes = key_str.encode("utf-8") if not txn.delete(key_bytes): From 66b62020330e690499ef386e81cee52dc0f97cce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 11:26:16 +0700 Subject: [PATCH 045/139] Incorrect IDE warnings --- app/services/lmdb.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index dec148b..c8e78a9 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -175,7 +175,7 @@ def store( value = orjson.dumps(conv.model_dump(mode="json")) try: - with self._get_transaction(self, write=True) as txn: + with self._get_transaction(write=True) as txn: # Store main data txn.put(storage_key.encode("utf-8"), value, overwrite=True) @@ -203,7 +203,7 @@ def get(self, key: str) -> Optional[ConversationInStore]: Conversation or None if not found """ try: - with self._get_transaction(self, write=False) as txn: + with self._get_transaction(write=False) as txn: data = txn.get(key.encode("utf-8"), default=None) if not data: return None @@ -255,7 +255,7 @@ def _find_by_message_list( key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}" try: - with self._get_transaction(self, write=False) as txn: + with self._get_transaction(write=False) as txn: if mapped := txn.get(key.encode("utf-8")): # type: ignore return self.get(mapped.decode("utf-8")) # type: ignore except Exception as e: @@ -279,7 +279,7 @@ def exists(self, key: str) -> bool: bool: True if key exists, False otherwise """ try: - with self._get_transaction(self, write=False) as txn: + with self._get_transaction(write=False) as txn: return txn.get(key.encode("utf-8")) is not None except Exception as e: logger.error(f"Failed to check existence of key {key}: {e}") @@ -296,7 +296,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]: ConversationInStore: The deleted conversation data, or None if not found """ try: - with self._get_transaction(self, write=True) as txn: + with self._get_transaction(write=True) as txn: # Get data first to clean up hash mapping data = txn.get(key.encode("utf-8")) if not data: @@ -333,7 +333,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: """ keys = [] try: - with self._get_transaction(self, write=False) as txn: + with self._get_transaction(write=False) as txn: cursor = txn.cursor() cursor.first() @@ -377,7 +377,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: expired_entries: list[tuple[str, ConversationInStore]] = [] try: - with self._get_transaction(self, write=False) as txn: + with self._get_transaction(write=False) as txn: cursor = txn.cursor() for key_bytes, value_bytes in cursor: @@ -407,7 +407,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: removed = 0 try: - with self._get_transaction(self, write=True) as txn: + with self._get_transaction(write=True) as txn: for key_str, conv in expired_entries: key_bytes = key_str.encode("utf-8") if not txn.delete(key_bytes): From 3297f534f035f869bd7e4a867618b39bc7256f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 12:05:26 +0700 Subject: [PATCH 046/139] Refactor: Modify the LMDB store to fix issues where no conversation is found. --- app/services/lmdb.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index c8e78a9..a55d3a9 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -476,9 +476,7 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: if isinstance(msg.content, str): normalized_content = LMDBConversationStore.remove_think_tags(msg.content) if normalized_content != msg.content: - cleaned_msg = Message( - role=msg.role, content=normalized_content, name=msg.name - ) + cleaned_msg = msg.model_copy(update={"content": normalized_content}) cleaned_messages.append(cleaned_msg) else: cleaned_messages.append(msg) From 5399b260595e77d6c1f0a8d24a880c59d165a57b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 12:06:52 +0700 Subject: [PATCH 047/139] Refactor: Centralized the mapping of the 'developer' role to 'system' for better Gemini compatibility. --- app/models/models.py | 7 +++++++ app/server/chat.py | 6 +----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index c27e024..63ddb94 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -29,6 +29,13 @@ class Message(BaseModel): audio: Optional[Dict[str, Any]] = None annotations: List[Dict[str, Any]] = Field(default_factory=list) + @model_validator(mode="after") + def normalize_role(self) -> "Message": + """Normalize 'developer' role to 'system' for Gemini compatibility.""" + if self.role == "developer": + self.role = "system" + return self + class Choice(BaseModel): """Choice model""" diff --git a/app/server/chat.py b/app/server/chat.py index a9d9dec..66a2720 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -319,8 +319,6 @@ def _response_items_to_messages( normalized_input: list[ResponseInputItem] = [] for item in items: role = item.role - if role == "developer": - role = "system" content = item.content normalized_contents: list[ResponseInputContent] = [] @@ -394,8 +392,6 @@ def _instructions_to_messages( continue role = item.role - if role == "developer": - role = "system" content = item.content if isinstance(content, str): @@ -1054,7 +1050,7 @@ async def _find_reusable_session( while search_end >= 2: search_history = messages[:search_end] - # Only try to match if the last stored message would be assistant/system. + # Only try to match if the last stored message would be assistant/system before querying LMDB. if search_history[-1].role in {"assistant", "system"}: try: if conv := db.find(model.model_name, search_history): From de01c7850fa44f4dcbd8f31c47bccaf301861a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 13:04:31 +0700 Subject: [PATCH 048/139] Refactor: Modify the LMDB store to fix issues where no conversation is found. --- app/models/models.py | 1 + app/services/lmdb.py | 95 +++++++++++++++++++++++++------------------- app/utils/helper.py | 10 +++-- 3 files changed, 63 insertions(+), 43 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 63ddb94..4072b29 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -24,6 +24,7 @@ class Message(BaseModel): content: Union[str, List[ContentItem], None] = None name: Optional[str] = None tool_calls: Optional[List["ToolCall"]] = None + tool_call_id: Optional[str] = None refusal: Optional[str] = None reasoning_content: Optional[str] = None audio: Optional[Dict[str, Any]] = None diff --git a/app/services/lmdb.py b/app/services/lmdb.py index a55d3a9..594acf0 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -15,53 +15,69 @@ def _hash_message(message: Message) -> str: - """Generate a hash for a single message.""" - # Convert message to dict and sort keys for consistent hashing - message_dict = message.model_dump(mode="json") - - # Normalize content: empty string -> None - content = message_dict.get("content") - if content == "": - message_dict["content"] = None + """Generate a consistent hash for a single message focusing only on core identity fields.""" + # Pick only fields that define the message in a conversation history + core_data = { + "role": message.role, + "name": message.name, + "tool_call_id": message.tool_call_id, + } + + # Normalize content: strip, handle empty/None, and list-of-text items + content = message.content + if not content: + core_data["content"] = None + elif isinstance(content, str): + stripped = content.strip() + core_data["content"] = stripped if stripped else None elif isinstance(content, list): - is_pure_text = True text_parts = [] for item in content: - if not isinstance(item, dict) or item.get("type") != "text": - is_pure_text = False + if isinstance(item, ContentItem) and item.type == "text": + text_parts.append(item.text or "") + elif isinstance(item, dict) and item.get("type") == "text": + text_parts.append(item.get("text") or "") + else: + # If it contains non-text (images/files), keep the full list for hashing + text_parts = None break - text_parts.append(item.get("text") or "") - - if is_pure_text: - text_content = "".join(text_parts) - message_dict["content"] = text_content if text_content else None - - # Normalize tool_calls: empty list -> None, and canonicalize arguments - tool_calls = message_dict.get("tool_calls") - if not tool_calls: - message_dict["tool_calls"] = None - elif isinstance(tool_calls, list): - for tool_call in tool_calls: - if isinstance(tool_call, dict) and "function" in tool_call: - func = tool_call["function"] - args = func.get("arguments") - if isinstance(args, str): - try: - # Parse and re-dump to canonicalize (remove extra whitespace, sort keys) - parsed = orjson.loads(args) - func["arguments"] = orjson.dumps( - parsed, option=orjson.OPT_SORT_KEYS - ).decode("utf-8") - except orjson.JSONDecodeError: - pass - - message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS) + + if text_parts is not None: + text_content = "".join(text_parts).strip() + core_data["content"] = text_content if text_content else None + else: + core_data["content"] = message.model_dump(mode="json")["content"] + + # Normalize tool_calls: canonicalize arguments and sort by name if multiple calls exist + if message.tool_calls: + calls_data = [] + for tc in message.tool_calls: + args = tc.function.arguments or "{}" + try: + parsed = orjson.loads(args) + canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8") + except orjson.JSONDecodeError: + canon_args = args + + calls_data.append( + { + "id": tc.id, # Deterministic IDs ensure this is stable + "name": tc.function.name, + "arguments": canon_args, + } + ) + # Sort calls to be order-independent + calls_data.sort(key=lambda x: (x["name"], x["arguments"])) + core_data["tool_calls"] = calls_data + else: + core_data["tool_calls"] = None + + message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) return hashlib.sha256(message_bytes).hexdigest() def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: - """Generate a hash for a list of messages and client id.""" - # Create a combined hash from all individual message hashes + """Generate a hash for a list of messages and model name, tied to a specific client_id.""" combined_hash = hashlib.sha256() combined_hash.update(client_id.encode("utf-8")) combined_hash.update(model.encode("utf-8")) @@ -252,7 +268,6 @@ def _find_by_message_list( """Internal find implementation based on a message list.""" for c in g_config.gemini.clients: message_hash = _hash_conversation(c.id, model, messages) - key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}" try: with self._get_transaction(write=False) as txn: diff --git a/app/utils/helper.py b/app/utils/helper.py index 1dc518f..239b7f4 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -1,9 +1,9 @@ import base64 +import hashlib import mimetypes import re import struct import tempfile -import uuid from pathlib import Path from typing import Iterator from urllib.parse import urlparse @@ -222,13 +222,17 @@ def _create_tool_call(name: str, raw_args: str) -> None: arguments = raw_args try: parsed_args = orjson.loads(raw_args) - arguments = orjson.dumps(parsed_args).decode("utf-8") + arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8") except orjson.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") + # Generate a deterministic ID based on name and arguments to avoid hash mismatch in LMDB + seed = f"{name}:{arguments}".encode("utf-8") + call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}" + tool_calls.append( ToolCall( - id=f"call_{uuid.uuid4().hex}", + id=call_id, type="function", function=FunctionCall(name=name, arguments=arguments), ) From 196414755e860f1f6d9c840954eb45c53225a864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 13:26:58 +0700 Subject: [PATCH 049/139] Refactor: Modify the LMDB store to fix issues where no conversation is found. --- app/server/chat.py | 10 +++++++++- app/services/lmdb.py | 7 ++----- app/utils/helper.py | 13 +++++++------ 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 66a2720..7c683cd 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1047,6 +1047,8 @@ async def _find_reusable_session( # Start with the full history and iteratively trim from the end. search_end = len(messages) + logger.debug(f"Searching for reusable session in history of length {search_end}...") + while search_end >= 2: search_history = messages[:search_end] @@ -1057,14 +1059,20 @@ async def _find_reusable_session( client = await pool.acquire(conv.client_id) session = client.start_chat(metadata=conv.metadata, model=model) remain = messages[search_end:] + logger.debug( + f"Match found at prefix length {search_end}. Client: {conv.client_id}" + ) return session, client, remain except Exception as e: - logger.warning(f"Error checking LMDB for reusable session: {e}") + logger.warning( + f"Error checking LMDB for reusable session at length {search_end}: {e}" + ) break # Trim one message and try again. search_end -= 1 + logger.debug("No reusable session found after checking all possible prefixes.") return None, None, messages diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 594acf0..5aefa4b 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -15,12 +15,10 @@ def _hash_message(message: Message) -> str: - """Generate a consistent hash for a single message focusing only on core identity fields.""" - # Pick only fields that define the message in a conversation history + """Generate a consistent hash for a single message focusing ONLY on logic/content, ignoring technical IDs.""" core_data = { "role": message.role, "name": message.name, - "tool_call_id": message.tool_call_id, } # Normalize content: strip, handle empty/None, and list-of-text items @@ -48,7 +46,7 @@ def _hash_message(message: Message) -> str: else: core_data["content"] = message.model_dump(mode="json")["content"] - # Normalize tool_calls: canonicalize arguments and sort by name if multiple calls exist + # Normalize tool_calls: Focus ONLY on function name and arguments if message.tool_calls: calls_data = [] for tc in message.tool_calls: @@ -61,7 +59,6 @@ def _hash_message(message: Message) -> str: calls_data.append( { - "id": tc.id, # Deterministic IDs ensure this is stable "name": tc.function.name, "arguments": canon_args, } diff --git a/app/utils/helper.py b/app/utils/helper.py index 239b7f4..ecf4a47 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -213,7 +213,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: tool_calls: list[ToolCall] = [] - def _create_tool_call(name: str, raw_args: str) -> None: + def _create_tool_call(name: str, raw_args: str, index: int) -> None: """Helper to parse args and append to the tool_calls list.""" if not name: logger.warning("Encountered tool_call without a function name.") @@ -226,8 +226,8 @@ def _create_tool_call(name: str, raw_args: str) -> None: except orjson.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") - # Generate a deterministic ID based on name and arguments to avoid hash mismatch in LMDB - seed = f"{name}:{arguments}".encode("utf-8") + # Generate a deterministic ID based on name, arguments, and index to avoid collisions + seed = f"{name}:{arguments}:{index}".encode("utf-8") call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}" tool_calls.append( @@ -244,11 +244,11 @@ def _replace_block(match: re.Match[str]) -> str: return match.group(0) found_in_block = False - for call_match in TOOL_CALL_RE.finditer(block_content): + for i, call_match in enumerate(TOOL_CALL_RE.finditer(block_content)): found_in_block = True name = (call_match.group(1) or "").strip() raw_args = (call_match.group(2) or "").strip() - _create_tool_call(name, raw_args) + _create_tool_call(name, raw_args, i) if found_in_block: return "" @@ -258,9 +258,10 @@ def _replace_block(match: re.Match[str]) -> str: cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) def _replace_orphan(match: re.Match[str]) -> str: + # Note: orphan calls are handled with a fallback index if they appear outside blocks name = (match.group(1) or "").strip() raw_args = (match.group(2) or "").strip() - _create_tool_call(name, raw_args) + _create_tool_call(name, raw_args, len(tool_calls)) return "" cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned) From 8c5c7498230bc680bf50464dacf0b6f001888981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 13:42:09 +0700 Subject: [PATCH 050/139] Refactor: Modify the LMDB store to fix issues where no conversation is found. --- app/server/chat.py | 4 ++-- app/services/lmdb.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 7c683cd..0d64b71 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1052,8 +1052,8 @@ async def _find_reusable_session( while search_end >= 2: search_history = messages[:search_end] - # Only try to match if the last stored message would be assistant/system before querying LMDB. - if search_history[-1].role in {"assistant", "system"}: + # Only try to match if the last stored message would be assistant/system/tool before querying LMDB. + if search_history[-1].role in {"assistant", "system", "tool"}: try: if conv := db.find(model.model_name, search_history): client = await pool.acquire(conv.client_id) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 5aefa4b..c612d9e 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -70,7 +70,11 @@ def _hash_message(message: Message) -> str: core_data["tool_calls"] = None message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) - return hashlib.sha256(message_bytes).hexdigest() + msg_hash = hashlib.sha256(message_bytes).hexdigest() + logger.debug( + f"Hashing message (role={message.role}): {message_bytes.decode('utf-8')} -> {msg_hash}" + ) + return msg_hash def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: From ce67d664b5443726fe518aee1cc9ef550ae640fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 14:41:55 +0700 Subject: [PATCH 051/139] Refactor: Avoid reusing an existing chat session if its idle time exceeds METADATA_TTL_MINUTES. --- app/server/chat.py | 14 ++++++++++++-- app/services/lmdb.py | 9 ++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 0d64b71..6fbb818 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -58,6 +58,7 @@ # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')" +METADATA_TTL_MINUTES = 20 router = APIRouter() @@ -1047,7 +1048,6 @@ async def _find_reusable_session( # Start with the full history and iteratively trim from the end. search_end = len(messages) - logger.debug(f"Searching for reusable session in history of length {search_end}...") while search_end >= 2: search_history = messages[:search_end] @@ -1056,6 +1056,17 @@ async def _find_reusable_session( if search_history[-1].role in {"assistant", "system", "tool"}: try: if conv := db.find(model.model_name, search_history): + # Check if metadata is too old + now = datetime.now() + updated_at = conv.updated_at or conv.created_at or now + age_minutes = (now - updated_at).total_seconds() / 60 + + if age_minutes > METADATA_TTL_MINUTES: + logger.debug( + f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse." + ) + break + client = await pool.acquire(conv.client_id) session = client.start_chat(metadata=conv.metadata, model=model) remain = messages[search_end:] @@ -1072,7 +1083,6 @@ async def _find_reusable_session( # Trim one message and try again. search_end -= 1 - logger.debug("No reusable session found after checking all possible prefixes.") return None, None, messages diff --git a/app/services/lmdb.py b/app/services/lmdb.py index c612d9e..424b357 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -68,13 +68,8 @@ def _hash_message(message: Message) -> str: core_data["tool_calls"] = calls_data else: core_data["tool_calls"] = None - - message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) - msg_hash = hashlib.sha256(message_bytes).hexdigest() - logger.debug( - f"Hashing message (role={message.role}): {message_bytes.decode('utf-8')} -> {msg_hash}" - ) - return msg_hash + message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) + return hashlib.sha256(message_bytes).hexdigest() def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: From 3d32d1226b1399f4286aadd95b2c4a52228fac45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 14:58:58 +0700 Subject: [PATCH 052/139] Refactor: Update the LMDB store to resolve issues preventing conversation from being saved --- app/services/lmdb.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 424b357..2dbe7b2 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -68,15 +68,16 @@ def _hash_message(message: Message) -> str: core_data["tool_calls"] = calls_data else: core_data["tool_calls"] = None - message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) - return hashlib.sha256(message_bytes).hexdigest() + + message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) + return hashlib.sha256(message_bytes).hexdigest() def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: """Generate a hash for a list of messages and model name, tied to a specific client_id.""" combined_hash = hashlib.sha256() - combined_hash.update(client_id.encode("utf-8")) - combined_hash.update(model.encode("utf-8")) + combined_hash.update((client_id or "").encode("utf-8")) + combined_hash.update((model or "").encode("utf-8")) for message in messages: message_hash = _hash_message(message) combined_hash.update(message_hash.encode("utf-8")) From 2eb9f05142ddfa1cb665b248f3faf2e278b619c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 24 Jan 2026 17:57:04 +0700 Subject: [PATCH 053/139] Refactor: Update the _prepare_messages_for_model helper to omit the system instruction when reusing a session to save tokens. --- app/server/chat.py | 66 +++++++++++++++++++++++++---------------- app/services/lmdb.py | 70 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 96 insertions(+), 40 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 6fbb818..646f4fa 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -58,7 +58,7 @@ # Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')" -METADATA_TTL_MINUTES = 20 +METADATA_TTL_MINUTES = 15 router = APIRouter() @@ -268,31 +268,35 @@ def _prepare_messages_for_model( tools: list[Tool] | None, tool_choice: str | ToolChoiceFunction | None, extra_instructions: list[str] | None = None, + inject_system_defaults: bool = True, ) -> list[Message]: """Return a copy of messages enriched with tool instructions when needed.""" prepared = [msg.model_copy(deep=True) for msg in source_messages] instructions: list[str] = [] - if tools: - tool_prompt = _build_tool_prompt(tools, tool_choice) - if tool_prompt: - instructions.append(tool_prompt) - - if extra_instructions: - instructions.extend(instr for instr in extra_instructions if instr) - logger.debug( - f"Applied {len(extra_instructions)} extra instructions for tool/structured output." - ) + if inject_system_defaults: + if tools: + tool_prompt = _build_tool_prompt(tools, tool_choice) + if tool_prompt: + instructions.append(tool_prompt) + + if extra_instructions: + instructions.extend(instr for instr in extra_instructions if instr) + logger.debug( + f"Applied {len(extra_instructions)} extra instructions for tool/structured output." + ) - if not _conversation_has_code_hint(prepared): - instructions.append(CODE_BLOCK_HINT) - logger.debug("Injected default code block hint for Gemini conversation.") + if not _conversation_has_code_hint(prepared): + instructions.append(CODE_BLOCK_HINT) + logger.debug("Injected default code block hint for Gemini conversation.") if not instructions: + # Still need to ensure XML hint for the last user message if tools are present + if tools and tool_choice != "none": + _append_xml_hint_to_last_user_message(prepared) return prepared combined_instructions = "\n\n".join(instructions) - if prepared and prepared[0].role == "system" and isinstance(prepared[0].content, str): existing = prepared[0].content or "" separator = "\n\n" if existing else "" @@ -530,8 +534,14 @@ async def create_chat_completion( ) if session: + # Optimization: When reusing a session, we don't need to resend the heavy tool definitions + # or structured output instructions as they are already in the Gemini session history. messages_to_send = _prepare_messages_for_model( - remaining_messages, request.tools, request.tool_choice, extra_instructions + remaining_messages, + request.tools, + request.tool_choice, + extra_instructions, + inject_system_defaults=False, ) if not messages_to_send: raise HTTPException( @@ -642,17 +652,20 @@ async def create_chat_completion( # After formatting, persist the conversation to LMDB try: - last_message = Message( + current_assistant_message = Message( role="assistant", content=storage_output or None, tool_calls=tool_calls or None, ) - cleaned_history = db.sanitize_assistant_messages(request.messages) + # Sanitize the entire history including the new message to ensure consistency + full_history = [*request.messages, current_assistant_message] + cleaned_history = db.sanitize_assistant_messages(full_history) + conv = ConversationInStore( model=model.model_name, client_id=client.id, metadata=session.metadata, - messages=[*cleaned_history, last_message], + messages=cleaned_history, ) key = db.store(conv) logger.debug(f"Conversation saved to LMDB with key: {key}") @@ -780,9 +793,10 @@ async def _build_payload( if reuse_session: messages_to_send = _prepare_messages_for_model( remaining_messages, - tools=None, - tool_choice=None, - extra_instructions=extra_instructions or None, + tools=request_data.tools, # Keep for XML hint logic + tool_choice=request_data.tool_choice, + extra_instructions=None, # Already in session history + inject_system_defaults=False, ) if not messages_to_send: raise HTTPException( @@ -994,17 +1008,19 @@ async def _build_payload( ) try: - last_message = Message( + current_assistant_message = Message( role="assistant", content=storage_output or None, tool_calls=detected_tool_calls or None, ) - cleaned_history = db.sanitize_assistant_messages(messages) + full_history = [*messages, current_assistant_message] + cleaned_history = db.sanitize_assistant_messages(full_history) + conv = ConversationInStore( model=model.model_name, client_id=client.id, metadata=session.metadata, - messages=[*cleaned_history, last_message], + messages=cleaned_history, ) key = db.store(conv) logger.debug(f"Conversation saved to LMDB with key: {key}") diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 2dbe7b2..f4c9938 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -11,6 +11,7 @@ from ..models import ContentItem, ConversationInStore, Message from ..utils import g_config +from ..utils.helper import extract_tool_calls, remove_tool_call_blocks from ..utils.singleton import Singleton @@ -26,8 +27,9 @@ def _hash_message(message: Message) -> str: if not content: core_data["content"] = None elif isinstance(content, str): - stripped = content.strip() - core_data["content"] = stripped if stripped else None + # Normalize line endings and strip whitespace + normalized = content.replace("\r\n", "\n").strip() + core_data["content"] = normalized if normalized else None elif isinstance(content, list): text_parts = [] for item in content: @@ -41,7 +43,7 @@ def _hash_message(message: Message) -> str: break if text_parts is not None: - text_content = "".join(text_parts).strip() + text_content = "".join(text_parts).replace("\r\n", "\n").strip() core_data["content"] = text_content if text_content else None else: core_data["content"] = message.model_dump(mode="json")["content"] @@ -260,7 +262,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt return None def _find_by_message_list( - self, model: str, messages: List[Message] + self, + model: str, + messages: List[Message], ) -> Optional[ConversationInStore]: """Internal find implementation based on a message list.""" for c in g_config.gemini.clients: @@ -471,40 +475,76 @@ def __del__(self): @staticmethod def remove_think_tags(text: str) -> str: """ - Remove ... tags at the start of text and strip whitespace. + Remove all ... tags and strip whitespace. """ - cleaned_content = re.sub(r"^(\s*.*?\n?)", "", text, flags=re.DOTALL) + # Remove all think blocks anywhere in the text + cleaned_content = re.sub(r".*?", "", text, flags=re.DOTALL) return cleaned_content.strip() @staticmethod def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: """ - Create a new list of messages with assistant content cleaned of tags. - This is useful for store the chat history. + Create a new list of messages with assistant content cleaned of tags + and system hints/tool call blocks. This is used for both storing and + searching chat history to ensure consistency. + + If a message has no tool_calls but contains tool call XML blocks in its + content, they will be extracted and moved to the tool_calls field. """ cleaned_messages = [] for msg in messages: if msg.role == "assistant": if isinstance(msg.content, str): - normalized_content = LMDBConversationStore.remove_think_tags(msg.content) - if normalized_content != msg.content: - cleaned_msg = msg.model_copy(update={"content": normalized_content}) + text = LMDBConversationStore.remove_think_tags(msg.content) + tool_calls = msg.tool_calls + if not tool_calls: + text, tool_calls = extract_tool_calls(text) + else: + text = remove_tool_call_blocks(text).strip() + + normalized_content = text.strip() + + if normalized_content != msg.content or tool_calls != msg.tool_calls: + cleaned_msg = msg.model_copy( + update={ + "content": normalized_content or None, + "tool_calls": tool_calls or None, + } + ) cleaned_messages.append(cleaned_msg) else: cleaned_messages.append(msg) elif isinstance(msg.content, list): new_content = [] + all_extracted_calls = list(msg.tool_calls or []) changed = False + for item in msg.content: if isinstance(item, ContentItem) and item.type == "text" and item.text: - cleaned_text = LMDBConversationStore.remove_think_tags(item.text) - if cleaned_text != item.text: + text = LMDBConversationStore.remove_think_tags(item.text) + + if not msg.tool_calls: + text, extracted = extract_tool_calls(text) + if extracted: + all_extracted_calls.extend(extracted) + changed = True + else: + text = remove_tool_call_blocks(text).strip() + + if text != item.text: changed = True - item = item.model_copy(update={"text": cleaned_text}) + item = item.model_copy(update={"text": text.strip() or None}) new_content.append(item) if changed: - cleaned_messages.append(msg.model_copy(update={"content": new_content})) + cleaned_messages.append( + msg.model_copy( + update={ + "content": new_content, + "tool_calls": all_extracted_calls or None, + } + ) + ) else: cleaned_messages.append(msg) else: From ade61d6826af1f256e7141ab6c1815b047cf8744 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 26 Jan 2026 11:01:41 +0700 Subject: [PATCH 054/139] Refactor: Modify the logic to convert a large prompt into a temporary text file attachment - When multiple chunks are sent simultaneously, Google will immediately invalidate the access token and reject the request - When a prompt contains a structured format like JSON, splitting it can break the format and may cause the model to misunderstand the context - Another minor tweak as Copilot suggested --- app/server/chat.py | 104 ++++++++++++++++--------------------------- app/services/lmdb.py | 5 ++- app/utils/helper.py | 13 +++--- 3 files changed, 49 insertions(+), 73 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 646f4fa..063d4d4 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1,7 +1,6 @@ -import asyncio import base64 -import random import re +import tempfile import uuid from dataclasses import dataclass from datetime import datetime, timezone @@ -375,9 +374,7 @@ def _response_items_to_messages( ResponseInputItem(type="message", role=item.role, content=normalized_contents or []) ) - logger.debug( - f"Normalized Responses input: {len(normalized_input)} message items (developer roles mapped to system)." - ) + logger.debug(f"Normalized Responses input: {len(normalized_input)} message items.") return messages, normalized_input @@ -1077,19 +1074,18 @@ async def _find_reusable_session( updated_at = conv.updated_at or conv.created_at or now age_minutes = (now - updated_at).total_seconds() / 60 - if age_minutes > METADATA_TTL_MINUTES: + if age_minutes <= METADATA_TTL_MINUTES: + client = await pool.acquire(conv.client_id) + session = client.start_chat(metadata=conv.metadata, model=model) + remain = messages[search_end:] + logger.debug( + f"Match found at prefix length {search_end}. Client: {conv.client_id}" + ) + return session, client, remain + else: logger.debug( f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse." ) - break - - client = await pool.acquire(conv.client_id) - session = client.start_chat(metadata=conv.metadata, model=model) - remain = messages[search_end:] - logger.debug( - f"Match found at prefix length {search_end}. Client: {conv.client_id}" - ) - return session, client, remain except Exception as e: logger.warning( f"Error checking LMDB for reusable session at length {search_end}: {e}" @@ -1103,13 +1099,9 @@ async def _find_reusable_session( async def _send_with_split(session: ChatSession, text: str, files: list[Path | str] | None = None): - """Send text to Gemini, automatically splitting into multiple batches if it is - longer than ``MAX_CHARS_PER_REQUEST``. - - Every intermediate batch (that is **not** the last one) is suffixed with a hint - telling Gemini that more content will come, and it should simply reply with - "ok". The final batch carries any file uploads and the real user prompt so - that Gemini can produce the actual answer. + """ + Send text to Gemini. If text is longer than ``MAX_CHARS_PER_REQUEST``, + it is converted into a temporary text file attachment to avoid splitting issues. """ if len(text) <= MAX_CHARS_PER_REQUEST: try: @@ -1118,55 +1110,37 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s logger.exception(f"Error sending message to Gemini: {e}") raise - hint_len = len(CONTINUATION_HINT) - safe_chunk_size = MAX_CHARS_PER_REQUEST - hint_len - - chunks: list[str] = [] - pos = 0 - total = len(text) - - while pos < total: - remaining = total - pos - if remaining <= MAX_CHARS_PER_REQUEST: - chunks.append(text[pos:]) - break - - end = pos + safe_chunk_size - slice_candidate = text[pos:end] - # Try to find a safe split point - split_idx = -1 - idx = slice_candidate.rfind("\n") - if idx != -1: - split_idx = idx - - if split_idx != -1: - split_at = pos + split_idx + 1 - else: - split_at = end + logger.info( + f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment." + ) - chunk = text[pos:split_at] + CONTINUATION_HINT - chunks.append(chunk) - pos = split_at + # Create a temporary directory to hold the message.txt file + # This ensures the filename is exactly 'message.txt' as expected by the instruction. + with tempfile.TemporaryDirectory() as tmpdirname: + temp_file_path = Path(tmpdirname) / "message.txt" + temp_file_path.write_text(text, encoding="utf-8") - chunks_size = len(chunks) - for i, chk in enumerate(chunks[:-1]): try: - logger.debug(f"Sending chunk {i + 1}/{chunks_size}...") - await session.send_message(chk) - delay = random.uniform(1.0, 3.0) - logger.debug(f"Sleeping for {delay:.2f}s...") - await asyncio.sleep(delay) + # Prepare the files list + final_files = list(files) if files else [] + final_files.append(temp_file_path) + + instruction = ( + "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n" + "**System Instruction:**\n" + "1. Read the content of `message.txt`.\n" + "2. Treat that content as the **primary** user prompt for this turn.\n" + "3. Execute the instructions or answer the questions found *inside* that file immediately.\n" + ) + + logger.debug(f"Sending prompt as temporary file: {temp_file_path}") + + return await session.send_message(instruction, files=final_files) + except Exception as e: - logger.exception(f"Error sending chunk to Gemini: {e}") + logger.exception(f"Error sending large text as file to Gemini: {e}") raise - try: - logger.debug(f"Sending final chunk {chunks_size}/{chunks_size}...") - return await session.send_message(chunks[-1], files=files) - except Exception as e: - logger.exception(f"Error sending final chunk to Gemini: {e}") - raise - def _create_streaming_response( model_output: str, diff --git a/app/services/lmdb.py b/app/services/lmdb.py index f4c9938..c9d42cd 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -43,8 +43,9 @@ def _hash_message(message: Message) -> str: break if text_parts is not None: - text_content = "".join(text_parts).replace("\r\n", "\n").strip() - core_data["content"] = text_content if text_content else None + # Normalize each part but keep them as a list to preserve boundaries and avoid collisions + normalized_parts = [p.replace("\r\n", "\n") for p in text_parts] + core_data["content"] = normalized_parts if normalized_parts else None else: core_data["content"] = message.model_dump(mode="json")["content"] diff --git a/app/utils/helper.py b/app/utils/helper.py index ecf4a47..190b5ce 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -213,7 +213,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: tool_calls: list[ToolCall] = [] - def _create_tool_call(name: str, raw_args: str, index: int) -> None: + def _create_tool_call(name: str, raw_args: str) -> None: """Helper to parse args and append to the tool_calls list.""" if not name: logger.warning("Encountered tool_call without a function name.") @@ -226,7 +226,9 @@ def _create_tool_call(name: str, raw_args: str, index: int) -> None: except orjson.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") - # Generate a deterministic ID based on name, arguments, and index to avoid collisions + # Generate a deterministic ID based on name, arguments, and its global sequence index + # to ensure uniqueness across multiple fenced blocks while remaining stable for storage. + index = len(tool_calls) seed = f"{name}:{arguments}:{index}".encode("utf-8") call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}" @@ -244,11 +246,11 @@ def _replace_block(match: re.Match[str]) -> str: return match.group(0) found_in_block = False - for i, call_match in enumerate(TOOL_CALL_RE.finditer(block_content)): + for call_match in TOOL_CALL_RE.finditer(block_content): found_in_block = True name = (call_match.group(1) or "").strip() raw_args = (call_match.group(2) or "").strip() - _create_tool_call(name, raw_args, i) + _create_tool_call(name, raw_args) if found_in_block: return "" @@ -258,10 +260,9 @@ def _replace_block(match: re.Match[str]) -> str: cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) def _replace_orphan(match: re.Match[str]) -> str: - # Note: orphan calls are handled with a fallback index if they appear outside blocks name = (match.group(1) or "").strip() raw_args = (match.group(2) or "").strip() - _create_tool_call(name, raw_args, len(tool_calls)) + _create_tool_call(name, raw_args) return "" cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned) From bdd893ff9a2d2c58fcbc3eb0c01aab337177edd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 28 Jan 2026 13:37:47 +0700 Subject: [PATCH 055/139] Enable streaming responses and fully resolve the problem with reusable sessions. - Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR. --- app/main.py | 2 +- app/models/models.py | 4 +- app/server/chat.py | 1867 ++++++++++++++++++++++------------------ app/services/client.py | 11 +- app/services/lmdb.py | 152 ++-- app/services/pool.py | 4 +- app/utils/helper.py | 113 +-- 7 files changed, 1162 insertions(+), 991 deletions(-) diff --git a/app/main.py b/app/main.py index 307eb36..f4e6711 100644 --- a/app/main.py +++ b/app/main.py @@ -15,7 +15,7 @@ ) from .services import GeminiClientPool, LMDBConversationStore -RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60 # 6 hours +RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60 # Check every 6 hours async def _run_retention_cleanup(stop_event: asyncio.Event) -> None: diff --git a/app/models/models.py b/app/models/models.py index 4072b29..64ceaa9 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -7,7 +7,7 @@ class ContentItem(BaseModel): - """Content item model""" + """Individual content item (text, image, or file) within a message.""" type: Literal["text", "image_url", "file", "input_audio"] text: Optional[str] = None @@ -159,7 +159,7 @@ class ConversationInStore(BaseModel): created_at: Optional[datetime] = Field(default=None) updated_at: Optional[datetime] = Field(default=None) - # NOTE: Gemini Web API do not support changing models once a conversation is created. + # Gemini Web API does not support changing models once a conversation is created. model: str = Field(..., description="Model used for the conversation") client_id: str = Field(..., description="Identifier of the Gemini client") metadata: list[str | None] = Field( diff --git a/app/server/chat.py b/app/server/chat.py index 063d4d4..37d3c70 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1,18 +1,18 @@ import base64 -import re -import tempfile +import io +import reprlib import uuid from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Any +from typing import Any, AsyncGenerator import orjson from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi.responses import StreamingResponse +from gemini_webapi import ModelOutput from gemini_webapi.client import ChatSession from gemini_webapi.constants import Model -from gemini_webapi.exceptions import APIError from gemini_webapi.types.image import GeneratedImage, Image from loguru import logger @@ -42,21 +42,18 @@ from ..utils.helper import ( CODE_BLOCK_HINT, CODE_HINT_STRIPPED, + CONTROL_TOKEN_RE, XML_HINT_STRIPPED, XML_WRAP_HINT, estimate_tokens, extract_image_dimensions, extract_tool_calls, - iter_stream_segments, - remove_tool_call_blocks, strip_code_fence, text_from_message, ) from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key -# Maximum characters Gemini Web can accept in a single request (configurable) MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) -CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')" METADATA_TTL_MINUTES = 15 router = APIRouter() @@ -72,6 +69,210 @@ class StructuredOutputRequirement: raw_format: dict[str, Any] +# --- Helper Functions --- + + +async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]: + """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename.""" + if isinstance(image, GeneratedImage): + try: + saved_path = await image.save(path=str(temp_dir), full_size=True) + except Exception as e: + logger.warning( + f"Failed to download full-size GeneratedImage, retrying with default size: {e}" + ) + saved_path = await image.save(path=str(temp_dir), full_size=False) + else: + saved_path = await image.save(path=str(temp_dir)) + + if not saved_path: + raise ValueError("Failed to save generated image") + + original_path = Path(saved_path) + random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}" + new_path = temp_dir / random_name + original_path.rename(new_path) + + data = new_path.read_bytes() + width, height = extract_image_dimensions(data) + filename = random_name + return base64.b64encode(data).decode("ascii"), width, height, filename + + +def _calculate_usage( + messages: list[Message], + assistant_text: str | None, + tool_calls: list[Any] | None, +) -> tuple[int, int, int]: + """Calculate prompt, completion and total tokens consistently.""" + prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) + tool_args_text = "" + if tool_calls: + for call in tool_calls: + if hasattr(call, "function"): + tool_args_text += call.function.arguments or "" + elif isinstance(call, dict): + tool_args_text += call.get("function", {}).get("arguments", "") + + completion_basis = assistant_text or "" + if tool_args_text: + completion_basis = ( + f"{completion_basis}\n{tool_args_text}" if completion_basis else tool_args_text + ) + + completion_tokens = estimate_tokens(completion_basis) + return prompt_tokens, completion_tokens, prompt_tokens + completion_tokens + + +def _create_responses_standard_payload( + response_id: str, + created_time: int, + model_name: str, + assistant_text: str | None, + detected_tool_calls: list[Any] | None, + image_call_items: list[ResponseImageGenerationCall], + response_contents: list[ResponseOutputContent], + usage: ResponseUsage, + request_data: ResponseCreateRequest, + normalized_input: Any, +) -> ResponseCreateResponse: + """Unified factory for building ResponseCreateResponse objects.""" + message_id = f"msg_{uuid.uuid4().hex}" + tool_call_items: list[ResponseToolCall] = [] + if detected_tool_calls: + tool_call_items = [ + ResponseToolCall( + id=call.id if hasattr(call, "id") else call["id"], + status="completed", + function=call.function if hasattr(call, "function") else call["function"], + ) + for call in detected_tool_calls + ] + + return ResponseCreateResponse( + id=response_id, + created_at=created_time, + model=model_name, + output=[ + ResponseOutputMessage( + id=message_id, + type="message", + role="assistant", + content=response_contents, + ), + *tool_call_items, + *image_call_items, + ], + status="completed", + usage=usage, + input=normalized_input or None, + metadata=request_data.metadata or None, + tools=request_data.tools, + tool_choice=request_data.tool_choice, + ) + + +def _create_chat_completion_standard_payload( + completion_id: str, + created_time: int, + model_name: str, + visible_output: str | None, + tool_calls_payload: list[dict] | None, + finish_reason: str, + usage: dict, +) -> dict: + """Unified factory for building Chat Completion response dictionaries.""" + return { + "id": completion_id, + "object": "chat.completion", + "created": created_time, + "model": model_name, + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": visible_output or None, + "tool_calls": tool_calls_payload or None, + }, + "finish_reason": finish_reason, + } + ], + "usage": usage, + } + + +def _process_llm_output( + raw_output_with_think: str, + raw_output_clean: str, + structured_requirement: StructuredOutputRequirement | None, +) -> tuple[str, str, list[Any]]: + """ + Common post-processing logic for Gemini output. + Returns: (visible_text, storage_output, tool_calls) + """ + visible_with_think, tool_calls = extract_tool_calls(raw_output_with_think) + if tool_calls: + logger.debug(f"Detected {len(tool_calls)} tool call(s) in model output.") + + visible_output = visible_with_think.strip() + + storage_output, _ = extract_tool_calls(raw_output_clean) + storage_output = storage_output.strip() + + if structured_requirement: + cleaned_for_json = LMDBConversationStore.remove_think_tags(visible_output) + json_text = strip_code_fence(cleaned_for_json or "") + if json_text: + try: + structured_payload = orjson.loads(json_text) + canonical_output = orjson.dumps(structured_payload).decode("utf-8") + visible_output = canonical_output + storage_output = canonical_output + logger.debug( + f"Structured response fulfilled (schema={structured_requirement.schema_name})." + ) + except orjson.JSONDecodeError: + logger.warning( + f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name})." + ) + + return visible_output, storage_output, tool_calls + + +def _persist_conversation( + db: LMDBConversationStore, + model_name: str, + client_id: str, + metadata: list[str | None], + messages: list[Message], + storage_output: str | None, + tool_calls: list[Any] | None, +) -> str | None: + """Unified logic to save conversation history to LMDB.""" + try: + current_assistant_message = Message( + role="assistant", + content=storage_output or None, + tool_calls=tool_calls or None, + ) + full_history = [*messages, current_assistant_message] + cleaned_history = db.sanitize_assistant_messages(full_history) + + conv = ConversationInStore( + model=model_name, + client_id=client_id, + metadata=metadata, + messages=cleaned_history, + ) + key = db.store(conv) + logger.debug(f"Conversation saved to LMDB with key: {key[:12]}") + return key + except Exception as e: + logger.warning(f"Failed to save {len(messages) + 1} messages to LMDB: {e}") + return None + + def _build_structured_requirement( response_format: dict[str, Any] | None, ) -> StructuredOutputRequirement | None: @@ -80,17 +281,23 @@ def _build_structured_requirement( return None if response_format.get("type") != "json_schema": - logger.warning(f"Unsupported response_format type requested: {response_format}") + logger.warning( + f"Unsupported response_format type requested: {reprlib.repr(response_format)}" + ) return None json_schema = response_format.get("json_schema") if not isinstance(json_schema, dict): - logger.warning(f"Invalid json_schema payload in response_format: {response_format}") + logger.warning( + f"Invalid json_schema payload in response_format: {reprlib.repr(response_format)}" + ) return None schema = json_schema.get("schema") if not isinstance(schema, dict): - logger.warning(f"Missing `schema` object in response_format payload: {response_format}") + logger.warning( + f"Missing `schema` object in response_format payload: {reprlib.repr(response_format)}" + ) return None schema_name = json_schema.get("name") or "response" @@ -136,7 +343,9 @@ def _build_tool_prompt( description = function.description or "No description provided." lines.append(f"Tool `{function.name}`: {description}") if function.parameters: - schema_text = orjson.dumps(function.parameters).decode("utf-8") + schema_text = orjson.dumps(function.parameters, option=orjson.OPT_SORT_KEYS).decode( + "utf-8" + ) lines.append("Arguments JSON schema:") lines.append(schema_text) else: @@ -155,7 +364,6 @@ def _build_tool_prompt( lines.append( f"You are required to call the tool named `{target}`. Do not call any other tool." ) - # `auto` or None fall back to default instructions. lines.append( "When you decide to call a tool you MUST respond with nothing except a single fenced block exactly like the template below." @@ -221,7 +429,7 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None: if isinstance(msg.content, str): if XML_HINT_STRIPPED not in msg.content: - msg.content = f"{msg.content}{XML_WRAP_HINT}" + msg.content = f"{msg.content}\n{XML_WRAP_HINT}" return if isinstance(msg.content, list): @@ -231,15 +439,13 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None: text_value = part.text or "" if XML_HINT_STRIPPED in text_value: return - part.text = f"{text_value}{XML_WRAP_HINT}" + part.text = f"{text_value}\n{XML_WRAP_HINT}" return messages_text = XML_WRAP_HINT.strip() msg.content.append(ContentItem(type="text", text=messages_text)) return - # No user message to annotate; nothing to do. - def _conversation_has_code_hint(messages: list[Message]) -> bool: """Return True if any system message already includes the code block hint.""" @@ -290,7 +496,6 @@ def _prepare_messages_for_model( logger.debug("Injected default code block hint for Gemini conversation.") if not instructions: - # Still need to ensure XML hint for the last user message if tools are present if tools and tool_choice != "none": _append_xml_hint_to_last_user_message(prepared) return prepared @@ -323,7 +528,6 @@ def _response_items_to_messages( normalized_input: list[ResponseInputItem] = [] for item in items: role = item.role - content = item.content normalized_contents: list[ResponseInputContent] = [] if isinstance(content, str): @@ -394,7 +598,6 @@ def _instructions_to_messages( continue role = item.role - content = item.content if isinstance(content, str): instruction_messages.append(Message(role=role, content=content)) @@ -432,10 +635,7 @@ def _instructions_to_messages( def _get_model_by_name(name: str) -> Model: - """ - Retrieve a Model instance by name, considering custom models from config - and the update strategy (append or overwrite). - """ + """Retrieve a Model instance by name.""" strategy = g_config.gemini.model_strategy custom_models = {m.model_name: m for m in g_config.gemini.models if m.model_name} @@ -449,9 +649,7 @@ def _get_model_by_name(name: str) -> Model: def _get_available_models() -> list[ModelData]: - """ - Return a list of available models based on configuration strategy. - """ + """Return a list of available models based on configuration strategy.""" now = int(datetime.now(tz=timezone.utc).timestamp()) strategy = g_config.gemini.model_strategy models_data = [] @@ -486,910 +684,897 @@ def _get_available_models() -> list[ModelData]: return models_data -@router.get("/v1/models", response_model=ModelListResponse) -async def list_models(api_key: str = Depends(verify_api_key)): - models = _get_available_models() - return ModelListResponse(data=models) - - -@router.post("/v1/chat/completions") -async def create_chat_completion( - request: ChatCompletionRequest, - api_key: str = Depends(verify_api_key), - tmp_dir: Path = Depends(get_temp_dir), - image_store: Path = Depends(get_image_store_dir), -): - pool = GeminiClientPool() - db = LMDBConversationStore() - - try: - model = _get_model_by_name(request.model) - except ValueError as exc: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc - - if len(request.messages) == 0: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="At least one message is required in the conversation.", - ) +async def _find_reusable_session( + db: LMDBConversationStore, + pool: GeminiClientPool, + model: Model, + messages: list[Message], +) -> tuple[ChatSession | None, GeminiClientWrapper | None, list[Message]]: + """Find an existing chat session matching the longest suitable history prefix.""" + if len(messages) < 2: + return None, None, messages - structured_requirement = _build_structured_requirement(request.response_format) - if structured_requirement and request.stream: - logger.debug( - "Structured response requested with streaming enabled; will stream canonical JSON once ready." - ) - if structured_requirement: - logger.debug( - f"Structured response requested for /v1/chat/completions (schema={structured_requirement.schema_name})." - ) + search_end = len(messages) + while search_end >= 2: + search_history = messages[:search_end] + if search_history[-1].role in {"assistant", "system", "tool"}: + try: + if conv := db.find(model.model_name, search_history): + now = datetime.now() + updated_at = conv.updated_at or conv.created_at or now + age_minutes = (now - updated_at).total_seconds() / 60 + if age_minutes <= METADATA_TTL_MINUTES: + client = await pool.acquire(conv.client_id) + session = client.start_chat(metadata=conv.metadata, model=model) + remain = messages[search_end:] + logger.debug( + f"Match found at prefix length {search_end}/{len(messages)}. Client: {conv.client_id}" + ) + return session, client, remain + else: + logger.debug( + f"Matched conversation at length {search_end} is too old ({age_minutes:.1f}m), skipping reuse." + ) + else: + # Log that we tried this prefix but failed + pass + except Exception as e: + logger.warning( + f"Error checking LMDB for reusable session at length {search_end}: {e}" + ) + break + search_end -= 1 - extra_instructions = [structured_requirement.instruction] if structured_requirement else None + logger.debug(f"No reusable session found for {len(messages)} messages.") + return None, None, messages - # Check if conversation is reusable - session, client, remaining_messages = await _find_reusable_session( - db, pool, model, request.messages - ) - if session: - # Optimization: When reusing a session, we don't need to resend the heavy tool definitions - # or structured output instructions as they are already in the Gemini session history. - messages_to_send = _prepare_messages_for_model( - remaining_messages, - request.tools, - request.tool_choice, - extra_instructions, - inject_system_defaults=False, - ) - if not messages_to_send: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="No new messages to send for the existing session.", - ) - if len(messages_to_send) == 1: - model_input, files = await GeminiClientWrapper.process_message( - messages_to_send[0], tmp_dir, tagged=False - ) - else: - model_input, files = await GeminiClientWrapper.process_conversation( - messages_to_send, tmp_dir - ) - logger.debug( - f"Reused session {session.metadata} - sending {len(messages_to_send)} prepared messages." - ) - else: - # Start a new session and concat messages into a single string +async def _send_with_split( + session: ChatSession, + text: str, + files: list[Path | str | io.BytesIO] | None = None, + stream: bool = False, +) -> AsyncGenerator[ModelOutput, None] | ModelOutput: + """Send text to Gemini, splitting or converting to attachment if too long.""" + if len(text) <= MAX_CHARS_PER_REQUEST: try: - client = await pool.acquire() - session = client.start_chat(model=model) - messages_to_send = _prepare_messages_for_model( - request.messages, request.tools, request.tool_choice, extra_instructions - ) - model_input, files = await GeminiClientWrapper.process_conversation( - messages_to_send, tmp_dir - ) - except ValueError as e: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) - except RuntimeError as e: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)) + if stream: + return session.send_message_stream(text, files=files) + return await session.send_message(text, files=files) except Exception as e: - logger.exception(f"Error in preparing conversation: {e}") + logger.exception(f"Error sending message to Gemini: {e}") raise - logger.debug("New session started.") - # Generate response + logger.info( + f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment." + ) + file_obj = io.BytesIO(text.encode("utf-8")) + file_obj.name = "message.txt" try: - assert session and client, "Session and client not available" - client_id = client.id - logger.debug( - f"Client ID: {client_id}, Input length: {len(model_input)}, files count: {len(files)}" + final_files = list(files) if files else [] + final_files.append(file_obj) + instruction = ( + "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n" + "**System Instruction:**\n" + "1. Read the content of `message.txt`.\n" + "2. Treat that content as the **primary** user prompt for this turn.\n" + "3. Execute the instructions or answer the questions found *inside* that file immediately.\n" ) - response = await _send_with_split(session, model_input, files=files) - except APIError as exc: - client_id = client.id if client else "unknown" - logger.warning(f"Gemini API returned invalid response for client {client_id}: {exc}") - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Gemini temporarily returned an invalid response. Please retry.", - ) from exc - except HTTPException: - raise + if stream: + return session.send_message_stream(instruction, files=final_files) + return await session.send_message(instruction, files=final_files) except Exception as e: - logger.exception(f"Unexpected error generating content from Gemini API: {e}") - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail="Gemini returned an unexpected error.", - ) from e + logger.exception(f"Error sending large text as file to Gemini: {e}") + raise - # Format the response from API - try: - raw_output_with_think = GeminiClientWrapper.extract_output(response, include_thoughts=True) - raw_output_clean = GeminiClientWrapper.extract_output(response, include_thoughts=False) - except IndexError as exc: - logger.exception("Gemini output parsing failed (IndexError).") - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail="Gemini returned malformed response content.", - ) from exc - except Exception as exc: - logger.exception("Gemini output parsing failed unexpectedly.") - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Gemini output parsing failed unexpectedly.", - ) from exc - visible_output, tool_calls = extract_tool_calls(raw_output_with_think) - storage_output = remove_tool_call_blocks(raw_output_clean).strip() - tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls] +class StreamingOutputFilter: + """ + Enhanced streaming filter that suppresses: + 1. XML tool call blocks: ```xml ... ``` + 2. ChatML tool blocks: <|im_start|>tool\n...<|im_end|> + 3. ChatML role headers: <|im_start|>role\n (only suppresses the header, keeps content) + 4. Control tokens: <|im_start|>, <|im_end|> + 5. System instructions/hints: XML_WRAP_HINT, CODE_BLOCK_HINT, etc. + """ - if structured_requirement: - cleaned_visible = strip_code_fence(visible_output or "") - if not cleaned_visible: - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail="LLM returned an empty response while JSON schema output was requested.", - ) - try: - structured_payload = orjson.loads(cleaned_visible) - except orjson.JSONDecodeError as exc: - logger.warning( - f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): " - f"{cleaned_visible}" - ) - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail="LLM returned invalid JSON for the requested response_format.", - ) from exc + def __init__(self): + self.buffer = "" + self.in_xml_tool = False + self.in_tagged_block = False + self.in_role_header = False + self.current_role = "" + + self.XML_START = "```xml" + self.XML_END = "```" + self.TAG_START = "<|im_start|>" + self.TAG_END = "<|im_end|>" + self.SYSTEM_HINTS = [ + XML_WRAP_HINT, + XML_HINT_STRIPPED, + CODE_BLOCK_HINT, + CODE_HINT_STRIPPED, + ] - canonical_output = orjson.dumps(structured_payload).decode("utf-8") - visible_output = canonical_output - storage_output = canonical_output + def process(self, chunk: str) -> str: + self.buffer += chunk + to_yield = "" + + while self.buffer: + if self.in_xml_tool: + end_idx = self.buffer.find(self.XML_END) + if end_idx != -1: + self.buffer = self.buffer[end_idx + len(self.XML_END) :] + self.in_xml_tool = False + else: + break + elif self.in_role_header: + nl_idx = self.buffer.find("\n") + if nl_idx != -1: + role_text = self.buffer[:nl_idx].strip().lower() + self.current_role = role_text + self.buffer = self.buffer[nl_idx + 1 :] + self.in_role_header = False + self.in_tagged_block = True + else: + break + elif self.in_tagged_block: + end_idx = self.buffer.find(self.TAG_END) + if end_idx != -1: + content = self.buffer[:end_idx] + if self.current_role != "tool": + to_yield += content + self.buffer = self.buffer[end_idx + len(self.TAG_END) :] + self.in_tagged_block = False + self.current_role = "" + else: + if self.current_role == "tool": + break + else: + yield_len = len(self.buffer) - (len(self.TAG_END) - 1) + if yield_len > 0: + to_yield += self.buffer[:yield_len] + self.buffer = self.buffer[yield_len:] + break + else: + # Outside any special block. Look for starts. + earliest_idx = -1 + match_type = "" + + xml_idx = self.buffer.find(self.XML_START) + if xml_idx != -1: + earliest_idx = xml_idx + match_type = "xml" + + tag_s_idx = self.buffer.find(self.TAG_START) + if tag_s_idx != -1: + if earliest_idx == -1 or tag_s_idx < earliest_idx: + earliest_idx = tag_s_idx + match_type = "tag_start" + + tag_e_idx = self.buffer.find(self.TAG_END) + if tag_e_idx != -1: + if earliest_idx == -1 or tag_e_idx < earliest_idx: + earliest_idx = tag_e_idx + match_type = "tag_end" + + if earliest_idx != -1: + # Yield text before the match + to_yield += self.buffer[:earliest_idx] + self.buffer = self.buffer[earliest_idx:] + + if match_type == "xml": + self.in_xml_tool = True + self.buffer = self.buffer[len(self.XML_START) :] + elif match_type == "tag_start": + self.in_role_header = True + self.buffer = self.buffer[len(self.TAG_START) :] + elif match_type == "tag_end": + # Orphaned end tag, just skip it + self.buffer = self.buffer[len(self.TAG_END) :] + continue + else: + # Check for prefixes + prefixes = [self.XML_START, self.TAG_START, self.TAG_END] + max_keep = 0 + for p in prefixes: + for i in range(len(p) - 1, 0, -1): + if self.buffer.endswith(p[:i]): + max_keep = max(max_keep, i) + break - if tool_calls_payload: - logger.debug(f"Detected tool calls: {tool_calls_payload}") + yield_len = len(self.buffer) - max_keep + if yield_len > 0: + to_yield += self.buffer[:yield_len] + self.buffer = self.buffer[yield_len:] + break - # After formatting, persist the conversation to LMDB - try: - current_assistant_message = Message( - role="assistant", - content=storage_output or None, - tool_calls=tool_calls or None, - ) - # Sanitize the entire history including the new message to ensure consistency - full_history = [*request.messages, current_assistant_message] - cleaned_history = db.sanitize_assistant_messages(full_history) + # Final pass: filter out system hints from the text to be yielded + for hint in self.SYSTEM_HINTS: + if hint in to_yield: + to_yield = to_yield.replace(hint, "") - conv = ConversationInStore( - model=model.model_name, - client_id=client.id, - metadata=session.metadata, - messages=cleaned_history, - ) - key = db.store(conv) - logger.debug(f"Conversation saved to LMDB with key: {key}") - except Exception as e: - # We can still return the response even if saving fails - logger.warning(f"Failed to save conversation to LMDB: {e}") + return to_yield - # Return with streaming or standard response - completion_id = f"chatcmpl-{uuid.uuid4()}" - timestamp = int(datetime.now(tz=timezone.utc).timestamp()) - if request.stream: - return _create_streaming_response( - visible_output, - tool_calls_payload, - completion_id, - timestamp, - request.model, - request.messages, - ) - else: - return _create_standard_response( - visible_output, - tool_calls_payload, - completion_id, - timestamp, - request.model, - request.messages, - ) - - -@router.post("/v1/responses") -async def create_response( - request_data: ResponseCreateRequest, - request: Request, - api_key: str = Depends(verify_api_key), - tmp_dir: Path = Depends(get_temp_dir), - image_store: Path = Depends(get_image_store_dir), -): - base_messages, normalized_input = _response_items_to_messages(request_data.input) - structured_requirement = _build_structured_requirement(request_data.response_format) - if structured_requirement and request_data.stream: - logger.debug( - "Structured response requested with streaming enabled; streaming not supported for Responses." - ) - - extra_instructions: list[str] = [] - if structured_requirement: - extra_instructions.append(structured_requirement.instruction) - logger.debug( - f"Structured response requested for /v1/responses (schema={structured_requirement.schema_name})." - ) - - # Separate standard tools from image generation tools - standard_tools: list[Tool] = [] - image_tools: list[ResponseImageTool] = [] - - if request_data.tools: - for t in request_data.tools: - if isinstance(t, Tool): - standard_tools.append(t) - elif isinstance(t, ResponseImageTool): - image_tools.append(t) - # Handle dicts if Pydantic didn't convert them fully (fallback) - elif isinstance(t, dict): - t_type = t.get("type") - if t_type == "function": - standard_tools.append(Tool.model_validate(t)) - elif t_type == "image_generation": - image_tools.append(ResponseImageTool.model_validate(t)) - - image_instruction = _build_image_generation_instruction( - image_tools, - request_data.tool_choice - if isinstance(request_data.tool_choice, ResponseToolChoice) - else None, - ) - if image_instruction: - extra_instructions.append(image_instruction) - logger.debug("Image generation support enabled for /v1/responses request.") - - preface_messages = _instructions_to_messages(request_data.instructions) - conversation_messages = base_messages - if preface_messages: - conversation_messages = [*preface_messages, *base_messages] - logger.debug( - f"Injected {len(preface_messages)} instruction messages before sending to Gemini." - ) - - # Pass standard tools to the prompt builder - # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction) - model_tool_choice = None - if isinstance(request_data.tool_choice, str): - model_tool_choice = request_data.tool_choice - elif isinstance(request_data.tool_choice, ToolChoiceFunction): - model_tool_choice = request_data.tool_choice - # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice. - - messages = _prepare_messages_for_model( - conversation_messages, - tools=standard_tools or None, - tool_choice=model_tool_choice, - extra_instructions=extra_instructions or None, - ) - - pool = GeminiClientPool() - db = LMDBConversationStore() - - try: - model = _get_model_by_name(request_data.model) - except ValueError as exc: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc - - session, client, remaining_messages = await _find_reusable_session(db, pool, model, messages) - - async def _build_payload( - _payload_messages: list[Message], _reuse_session: bool - ) -> tuple[str, list[Path | str]]: - if _reuse_session and len(_payload_messages) == 1: - return await GeminiClientWrapper.process_message( - _payload_messages[0], tmp_dir, tagged=False - ) - return await GeminiClientWrapper.process_conversation(_payload_messages, tmp_dir) - - reuse_session = session is not None - if reuse_session: - messages_to_send = _prepare_messages_for_model( - remaining_messages, - tools=request_data.tools, # Keep for XML hint logic - tool_choice=request_data.tool_choice, - extra_instructions=None, # Already in session history - inject_system_defaults=False, - ) - if not messages_to_send: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="No new messages to send for the existing session.", - ) - payload_messages = messages_to_send - model_input, files = await _build_payload(payload_messages, _reuse_session=True) - logger.debug( - f"Reused session {session.metadata} - sending {len(payload_messages)} prepared messages." - ) - else: - try: - client = await pool.acquire() - session = client.start_chat(model=model) - payload_messages = messages - model_input, files = await _build_payload(payload_messages, _reuse_session=False) - except ValueError as e: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) - except RuntimeError as e: - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)) - except Exception as e: - logger.exception(f"Error in preparing conversation for responses API: {e}") - raise - logger.debug("New session started for /v1/responses request.") + def flush(self) -> str: + # If we are stuck in a tool block or role header at the end, + # it usually means malformed output. + if self.in_xml_tool or (self.in_tagged_block and self.current_role == "tool"): + return "" - try: - assert session and client, "Session and client not available" - client_id = client.id - logger.debug( - f"Client ID: {client_id}, Input length: {len(model_input)}, files count: {len(files)}" - ) - model_output = await _send_with_split(session, model_input, files=files) - except APIError as exc: - client_id = client.id if client else "unknown" - logger.warning(f"Gemini API returned invalid response for client {client_id}: {exc}") - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Gemini temporarily returned an invalid response. Please retry.", - ) from exc - except HTTPException: - raise - except Exception as e: - logger.exception(f"Unexpected error generating content from Gemini API for responses: {e}") - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail="Gemini returned an unexpected error.", - ) from e + final_text = self.buffer + self.buffer = "" - try: - text_with_think = GeminiClientWrapper.extract_output(model_output, include_thoughts=True) - text_without_think = GeminiClientWrapper.extract_output( - model_output, include_thoughts=False - ) - except IndexError as exc: - logger.exception("Gemini output parsing failed (IndexError).") - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail="Gemini returned malformed response content.", - ) from exc - except Exception as exc: - logger.exception("Gemini output parsing failed unexpectedly.") - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Gemini output parsing failed unexpectedly.", - ) from exc + # Filter out any orphaned/partial control tokens or hints + final_text = CONTROL_TOKEN_RE.sub("", final_text) + for hint in self.SYSTEM_HINTS: + final_text = final_text.replace(hint, "") - visible_text, detected_tool_calls = extract_tool_calls(text_with_think) - storage_output = remove_tool_call_blocks(text_without_think).strip() - assistant_text = LMDBConversationStore.remove_think_tags(visible_text.strip()) + return final_text.strip() - if structured_requirement: - cleaned_visible = strip_code_fence(assistant_text or "") - if not cleaned_visible: - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail="LLM returned an empty response while JSON schema output was requested.", - ) - try: - structured_payload = orjson.loads(cleaned_visible) - except orjson.JSONDecodeError as exc: - logger.warning( - f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): " - f"{cleaned_visible}" - ) - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail="LLM returned invalid JSON for the requested response_format.", - ) from exc - - canonical_output = orjson.dumps(structured_payload).decode("utf-8") - assistant_text = canonical_output - storage_output = canonical_output - logger.debug( - f"Structured response fulfilled for /v1/responses (schema={structured_requirement.schema_name})." - ) - expects_image = ( - request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation" - ) - images = model_output.images or [] - logger.debug( - f"Gemini returned {len(images)} image(s) for /v1/responses " - f"(expects_image={expects_image}, instruction_applied={bool(image_instruction)})." - ) - if expects_image and not images: - summary = assistant_text.strip() if assistant_text else "" - if summary: - summary = re.sub(r"\s+", " ", summary) - if len(summary) > 200: - summary = f"{summary[:197]}..." - logger.warning( - "Image generation requested but Gemini produced no images. " - f"client_id={client_id}, forced_tool_choice={request_data.tool_choice is not None}, " - f"instruction_applied={bool(image_instruction)}, assistant_preview='{summary}'" - ) - detail = "LLM returned no images for the requested image_generation tool." - if summary: - detail = f"{detail} Assistant response: {summary}" - raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail) +# --- Response Builders & Streaming --- - response_contents: list[ResponseOutputContent] = [] - image_call_items: list[ResponseImageGenerationCall] = [] - for image in images: - try: - image_base64, width, height, filename = await _image_to_base64(image, image_store) - except Exception as exc: - logger.warning(f"Failed to download generated image: {exc}") - continue - - img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" - - # Use static URL for compatibility - image_url = ( - f"![{filename}]({request.base_url}images/{filename}?token={get_image_token(filename)})" - ) - - image_call_items.append( - ResponseImageGenerationCall( - id=filename.rsplit(".", 1)[0], - status="completed", - result=image_base64, - output_format=img_format, - size=f"{width}x{height}" if width and height else None, - ) - ) - # Add as output_text content for compatibility - response_contents.append( - ResponseOutputContent(type="output_text", text=image_url, annotations=[]) - ) - - tool_call_items: list[ResponseToolCall] = [] - if detected_tool_calls: - tool_call_items = [ - ResponseToolCall( - id=call.id, - status="completed", - function=call.function, - ) - for call in detected_tool_calls - ] - - if assistant_text: - response_contents.append( - ResponseOutputContent(type="output_text", text=assistant_text, annotations=[]) - ) - if not response_contents: - response_contents.append(ResponseOutputContent(type="output_text", text="", annotations=[])) - - created_time = int(datetime.now(tz=timezone.utc).timestamp()) - response_id = f"resp_{uuid.uuid4().hex}" - message_id = f"msg_{uuid.uuid4().hex}" - - input_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) - tool_arg_text = "".join(call.function.arguments or "" for call in detected_tool_calls) - completion_basis = assistant_text or "" - if tool_arg_text: - completion_basis = ( - f"{completion_basis}\n{tool_arg_text}" if completion_basis else tool_arg_text - ) - output_tokens = estimate_tokens(completion_basis) - usage = ResponseUsage( - input_tokens=input_tokens, - output_tokens=output_tokens, - total_tokens=input_tokens + output_tokens, - ) - - response_payload = ResponseCreateResponse( - id=response_id, - created_at=created_time, - model=request_data.model, - output=[ - ResponseOutputMessage( - id=message_id, - type="message", - role="assistant", - content=response_contents, - ), - *tool_call_items, - *image_call_items, - ], - status="completed", - usage=usage, - input=normalized_input or None, - metadata=request_data.metadata or None, - tools=request_data.tools, - tool_choice=request_data.tool_choice, - ) - - try: - current_assistant_message = Message( - role="assistant", - content=storage_output or None, - tool_calls=detected_tool_calls or None, - ) - full_history = [*messages, current_assistant_message] - cleaned_history = db.sanitize_assistant_messages(full_history) - - conv = ConversationInStore( - model=model.model_name, - client_id=client.id, - metadata=session.metadata, - messages=cleaned_history, - ) - key = db.store(conv) - logger.debug(f"Conversation saved to LMDB with key: {key}") - except Exception as exc: - logger.warning(f"Failed to save Responses conversation to LMDB: {exc}") - - if request_data.stream: - logger.debug( - f"Streaming Responses API payload (response_id={response_payload.id}, text_chunks={bool(assistant_text)})." - ) - return _create_responses_streaming_response(response_payload, assistant_text or "") - - return response_payload - - -async def _find_reusable_session( +def _create_real_streaming_response( + generator: AsyncGenerator[ModelOutput, None], + completion_id: str, + created_time: int, + model_name: str, + messages: list[Message], db: LMDBConversationStore, - pool: GeminiClientPool, model: Model, - messages: list[Message], -) -> tuple[ChatSession | None, GeminiClientWrapper | None, list[Message]]: - """Find an existing chat session that matches the *longest* prefix of - ``messages`` **whose last element is an assistant/system reply**. - - Rationale - --------- - When a reply was generated by *another* server instance, the local LMDB may - only contain an older part of the conversation. However, as long as we can - line up **any** earlier assistant/system response, we can restore the - corresponding Gemini session and replay the *remaining* turns locally - (including that missing assistant reply and the subsequent user prompts). - - The algorithm therefore walks backwards through the history **one message at - a time**, each time requiring the current tail to be assistant/system before - querying LMDB. As soon as a match is found we recreate the session and - return the untouched suffix as ``remaining_messages``. - """ - - if len(messages) < 2: - return None, None, messages - - # Start with the full history and iteratively trim from the end. - search_end = len(messages) - - while search_end >= 2: - search_history = messages[:search_end] - - # Only try to match if the last stored message would be assistant/system/tool before querying LMDB. - if search_history[-1].role in {"assistant", "system", "tool"}: - try: - if conv := db.find(model.model_name, search_history): - # Check if metadata is too old - now = datetime.now() - updated_at = conv.updated_at or conv.created_at or now - age_minutes = (now - updated_at).total_seconds() / 60 - - if age_minutes <= METADATA_TTL_MINUTES: - client = await pool.acquire(conv.client_id) - session = client.start_chat(metadata=conv.metadata, model=model) - remain = messages[search_end:] - logger.debug( - f"Match found at prefix length {search_end}. Client: {conv.client_id}" - ) - return session, client, remain - else: - logger.debug( - f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse." - ) - except Exception as e: - logger.warning( - f"Error checking LMDB for reusable session at length {search_end}: {e}" - ) - break - - # Trim one message and try again. - search_end -= 1 - - return None, None, messages - - -async def _send_with_split(session: ChatSession, text: str, files: list[Path | str] | None = None): + client_wrapper: GeminiClientWrapper, + session: ChatSession, + structured_requirement: StructuredOutputRequirement | None = None, +) -> StreamingResponse: """ - Send text to Gemini. If text is longer than ``MAX_CHARS_PER_REQUEST``, - it is converted into a temporary text file attachment to avoid splitting issues. + Create a real-time streaming response. + Reconciles manual delta accumulation with the model's final authoritative state. """ - if len(text) <= MAX_CHARS_PER_REQUEST: - try: - return await session.send_message(text, files=files) - except Exception as e: - logger.exception(f"Error sending message to Gemini: {e}") - raise - - logger.info( - f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment." - ) - - # Create a temporary directory to hold the message.txt file - # This ensures the filename is exactly 'message.txt' as expected by the instruction. - with tempfile.TemporaryDirectory() as tmpdirname: - temp_file_path = Path(tmpdirname) / "message.txt" - temp_file_path.write_text(text, encoding="utf-8") + async def generate_stream(): + full_thoughts, full_text = "", "" + has_started = False + last_chunk_was_thought = False + all_outputs: list[ModelOutput] = [] + suppressor = StreamingOutputFilter() try: - # Prepare the files list - final_files = list(files) if files else [] - final_files.append(temp_file_path) - - instruction = ( - "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n" - "**System Instruction:**\n" - "1. Read the content of `message.txt`.\n" - "2. Treat that content as the **primary** user prompt for this turn.\n" - "3. Execute the instructions or answer the questions found *inside* that file immediately.\n" - ) - - logger.debug(f"Sending prompt as temporary file: {temp_file_path}") - - return await session.send_message(instruction, files=final_files) - + async for chunk in generator: + all_outputs.append(chunk) + if not has_started: + data = { + "id": completion_id, + "object": "chat.completion.chunk", + "created": created_time, + "model": model_name, + "choices": [ + {"index": 0, "delta": {"role": "assistant"}, "finish_reason": None} + ], + } + yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" + has_started = True + + if t_delta := chunk.thoughts_delta: + if not last_chunk_was_thought and not full_thoughts: + yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': ''}, 'finish_reason': None}]}).decode('utf-8')}\n\n" + full_thoughts += t_delta + data = { + "id": completion_id, + "object": "chat.completion.chunk", + "created": created_time, + "model": model_name, + "choices": [ + {"index": 0, "delta": {"content": t_delta}, "finish_reason": None} + ], + } + yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" + last_chunk_was_thought = True + + if text_delta := chunk.text_delta: + if last_chunk_was_thought: + yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n" + last_chunk_was_thought = False + full_text += text_delta + if visible_delta := suppressor.process(text_delta): + data = { + "id": completion_id, + "object": "chat.completion.chunk", + "created": created_time, + "model": model_name, + "choices": [ + { + "index": 0, + "delta": {"content": visible_delta}, + "finish_reason": None, + } + ], + } + yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" except Exception as e: - logger.exception(f"Error sending large text as file to Gemini: {e}") - raise + logger.exception(f"Error during OpenAI streaming: {e}") + yield f"data: {orjson.dumps({'error': {'message': 'Streaming error occurred.', 'type': 'server_error', 'param': None, 'code': None}}).decode('utf-8')}\n\n" + return + if all_outputs: + final_chunk = all_outputs[-1] + if final_chunk.text: + full_text = final_chunk.text + if final_chunk.thoughts: + full_thoughts = final_chunk.thoughts -def _create_streaming_response( - model_output: str, - tool_calls: list[dict], - completion_id: str, - created_time: int, - model: str, - messages: list[Message], -) -> StreamingResponse: - """Create streaming response with `usage` calculation included in the final chunk.""" + if last_chunk_was_thought: + yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n" - # Calculate token usage - prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) - tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or []) - completion_tokens = estimate_tokens(model_output + tool_args) - total_tokens = prompt_tokens + completion_tokens - finish_reason = "tool_calls" if tool_calls else "stop" + if remaining_text := suppressor.flush(): + data = { + "id": completion_id, + "object": "chat.completion.chunk", + "created": created_time, + "model": model_name, + "choices": [ + {"index": 0, "delta": {"content": remaining_text}, "finish_reason": None} + ], + } + yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" - async def generate_stream(): - # Send start event - data = { - "id": completion_id, - "object": "chat.completion.chunk", - "created": created_time, - "model": model, - "choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}], - } - yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" + raw_output_with_think = f"{full_thoughts}\n" if full_thoughts else "" + raw_output_with_think += full_text + assistant_text, storage_output, tool_calls = _process_llm_output( + raw_output_with_think, full_text, structured_requirement + ) - # Stream output text in chunks for efficiency - for chunk in iter_stream_segments(model_output): + images = [] + for out in all_outputs: + if out.images: + images.extend(out.images) + + image_markdown = "" + for image in images: + try: + image_store = get_image_store_dir() + _, _, _, filename = await _image_to_base64(image, image_store) + img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})" + image_markdown += f"\n\n{img_url}" + except Exception as exc: + logger.warning(f"Failed to process image in OpenAI stream: {exc}") + + if image_markdown: + assistant_text += image_markdown + storage_output += image_markdown + # Send the image markdown as a final text chunk before usage data = { "id": completion_id, "object": "chat.completion.chunk", "created": created_time, - "model": model, - "choices": [{"index": 0, "delta": {"content": chunk}, "finish_reason": None}], + "model": model_name, + "choices": [ + {"index": 0, "delta": {"content": image_markdown}, "finish_reason": None} + ], } yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" - if tool_calls: - tool_calls_delta = [{**call, "index": idx} for idx, call in enumerate(tool_calls)] + tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls] + if tool_calls_payload: + tool_calls_delta = [ + {**call, "index": idx} for idx, call in enumerate(tool_calls_payload) + ] data = { "id": completion_id, "object": "chat.completion.chunk", "created": created_time, - "model": model, + "model": model_name, "choices": [ - { - "index": 0, - "delta": {"tool_calls": tool_calls_delta}, - "finish_reason": None, - } + {"index": 0, "delta": {"tool_calls": tool_calls_delta}, "finish_reason": None} ], } yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" - # Send end event + p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, tool_calls) + usage = {"prompt_tokens": p_tok, "completion_tokens": c_tok, "total_tokens": t_tok} data = { "id": completion_id, "object": "chat.completion.chunk", "created": created_time, - "model": model, - "choices": [{"index": 0, "delta": {}, "finish_reason": finish_reason}], - "usage": { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": total_tokens, - }, + "model": model_name, + "choices": [ + {"index": 0, "delta": {}, "finish_reason": "tool_calls" if tool_calls else "stop"} + ], + "usage": usage, } + _persist_conversation( + db, + model.model_name, + client_wrapper.id, + session.metadata, + messages, # This should be the prepared messages + storage_output, + tool_calls, + ) yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" yield "data: [DONE]\n\n" return StreamingResponse(generate_stream(), media_type="text/event-stream") -def _create_responses_streaming_response( - response_payload: ResponseCreateResponse, - assistant_text: str | None, +def _create_responses_real_streaming_response( + generator: AsyncGenerator[ModelOutput, None], + response_id: str, + created_time: int, + model_name: str, + messages: list[Message], + db: LMDBConversationStore, + model: Model, + client_wrapper: GeminiClientWrapper, + session: ChatSession, + request_data: ResponseCreateRequest, + image_store: Path, + base_url: str, + structured_requirement: StructuredOutputRequirement | None = None, ) -> StreamingResponse: - """Create streaming response for Responses API using event types defined by OpenAI.""" - - response_dict = response_payload.model_dump(mode="json") - response_id = response_payload.id - created_time = response_payload.created_at - model = response_payload.model - - logger.debug( - f"Preparing streaming envelope for /v1/responses (response_id={response_id}, model={model})." - ) - + """ + Create a real-time streaming response for the Responses API. + Ensures final accumulated text and thoughts are synchronized. + """ base_event = { "id": response_id, "object": "response", "created_at": created_time, - "model": model, + "model": model_name, } - created_snapshot: dict[str, Any] = { - "id": response_id, - "object": "response", - "created_at": created_time, - "model": model, - "status": "in_progress", - } - if response_dict.get("metadata") is not None: - created_snapshot["metadata"] = response_dict["metadata"] - if response_dict.get("input") is not None: - created_snapshot["input"] = response_dict["input"] - if response_dict.get("tools") is not None: - created_snapshot["tools"] = response_dict["tools"] - if response_dict.get("tool_choice") is not None: - created_snapshot["tool_choice"] = response_dict["tool_choice"] - async def generate_stream(): - # Emit creation event - data = { - **base_event, - "type": "response.created", - "response": created_snapshot, - } - yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n" + yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request_data.metadata, 'input': None, 'tools': request_data.tools, 'tool_choice': request_data.tool_choice}}).decode('utf-8')}\n\n" + message_id = f"msg_{uuid.uuid4().hex}" + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n" - # Stream output items (Message/Text, Tool Calls, Images) - for i, item in enumerate(response_payload.output): - item_json = item.model_dump(mode="json", exclude_none=True) + full_thoughts, full_text = "", "" + last_chunk_was_thought = False + all_outputs: list[ModelOutput] = [] + suppressor = StreamingOutputFilter() - added_event = { - **base_event, - "type": "response.output_item.added", - "output_index": i, - "item": item_json, - } - yield f"data: {orjson.dumps(added_event).decode('utf-8')}\n\n" - - # 2. Stream content if it's a message (text) - if item.type == "message": - content_text = "" - # Aggregate text content to stream - for c in item.content: - if c.type == "output_text" and c.text: - content_text += c.text - - if content_text: - for chunk in iter_stream_segments(content_text): - delta_event = { - **base_event, - "type": "response.output_text.delta", - "output_index": i, - "delta": chunk, - } - yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n" + try: + async for chunk in generator: + all_outputs.append(chunk) + if t_delta := chunk.thoughts_delta: + if not last_chunk_was_thought and not full_thoughts: + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': ''}).decode('utf-8')}\n\n" + full_thoughts += t_delta + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': t_delta}).decode('utf-8')}\n\n" + last_chunk_was_thought = True + if text_delta := chunk.text_delta: + if last_chunk_was_thought: + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '\n'}).decode('utf-8')}\n\n" + last_chunk_was_thought = False + full_text += text_delta + if visible_delta := suppressor.process(text_delta): + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': visible_delta}).decode('utf-8')}\n\n" + except Exception as e: + logger.exception(f"Error during Responses API streaming: {e}") + yield f"data: {orjson.dumps({**base_event, 'type': 'error', 'error': {'message': 'Streaming error.'}}).decode('utf-8')}\n\n" + return - # Text done - done_event = { - **base_event, - "type": "response.output_text.done", - "output_index": i, - } - yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n" - - # 3. Emit output_item.done for all types - # This confirms the item is fully transferred. - item_done_event = { - **base_event, - "type": "response.output_item.done", - "output_index": i, - "item": item_json, - } - yield f"data: {orjson.dumps(item_done_event).decode('utf-8')}\n\n" + if all_outputs: + final_chunk = all_outputs[-1] + if final_chunk.text: + full_text = final_chunk.text + if final_chunk.thoughts: + full_thoughts = final_chunk.thoughts + + if last_chunk_was_thought: + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '\n'}).decode('utf-8')}\n\n" + if remaining_text := suppressor.flush(): + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': remaining_text}).decode('utf-8')}\n\n" + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.done', 'output_index': 0}).decode('utf-8')}\n\n" + + raw_output_with_think = f"{full_thoughts}\n" if full_thoughts else "" + raw_output_with_think += full_text + assistant_text, storage_output, detected_tool_calls = _process_llm_output( + raw_output_with_think, full_text, structured_requirement + ) - # Emit completed event with full payload - completed_event = { - **base_event, - "type": "response.completed", - "response": response_dict, - } - yield f"data: {orjson.dumps(completed_event).decode('utf-8')}\n\n" + images = [] + for out in all_outputs: + if out.images: + images.extend(out.images) + + response_contents, image_call_items = [], [] + for image in images: + try: + image_base64, width, height, filename = await _image_to_base64(image, image_store) + img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" + image_url = ( + f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})" + ) + image_call_items.append( + ResponseImageGenerationCall( + id=filename.rsplit(".", 1)[0], + result=image_base64, + output_format=img_format, + size=f"{width}x{height}" if width and height else None, + ) + ) + response_contents.append(ResponseOutputContent(type="output_text", text=image_url)) + except Exception as exc: + logger.warning(f"Failed to process image in stream: {exc}") + + if assistant_text: + response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text)) + if not response_contents: + response_contents.append(ResponseOutputContent(type="output_text", text="")) + + # Aggregate images for storage + image_markdown = "" + for img_call in image_call_items: + fname = f"{img_call.id}.{img_call.output_format}" + img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})" + image_markdown += f"\n\n{img_url}" + + if image_markdown: + storage_output += image_markdown + + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': [c.model_dump(mode='json') for c in response_contents]}}).decode('utf-8')}\n\n" + + current_idx = 1 + for call in detected_tool_calls: + tc_item = ResponseToolCall(id=call.id, status="completed", function=call.function) + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': tc_item.model_dump(mode='json')}).decode('utf-8')}\n\n" + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': tc_item.model_dump(mode='json')}).decode('utf-8')}\n\n" + current_idx += 1 + for img_call in image_call_items: + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': img_call.model_dump(mode='json')}).decode('utf-8')}\n\n" + yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': img_call.model_dump(mode='json')}).decode('utf-8')}\n\n" + current_idx += 1 + + p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, detected_tool_calls) + usage = ResponseUsage(input_tokens=p_tok, output_tokens=c_tok, total_tokens=t_tok) + payload = _create_responses_standard_payload( + response_id, + created_time, + model_name, + assistant_text, + detected_tool_calls, + image_call_items, + response_contents, + usage, + request_data, + None, + ) + _persist_conversation( + db, + model.model_name, + client_wrapper.id, + session.metadata, + messages, + storage_output, + detected_tool_calls, + ) + yield f"data: {orjson.dumps({**base_event, 'type': 'response.completed', 'response': payload.model_dump(mode='json')}).decode('utf-8')}\n\n" yield "data: [DONE]\n\n" return StreamingResponse(generate_stream(), media_type="text/event-stream") -def _create_standard_response( - model_output: str, - tool_calls: list[dict], - completion_id: str, - created_time: int, - model: str, - messages: list[Message], -) -> dict: - """Create standard response""" - # Calculate token usage - prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) - tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or []) - completion_tokens = estimate_tokens(model_output + tool_args) - total_tokens = prompt_tokens + completion_tokens - finish_reason = "tool_calls" if tool_calls else "stop" +# --- Main Router Endpoints --- - message_payload: dict = {"role": "assistant", "content": model_output or None} - if tool_calls: - message_payload["tool_calls"] = tool_calls - result = { - "id": completion_id, - "object": "chat.completion", - "created": created_time, - "model": model, - "choices": [ - { - "index": 0, - "message": message_payload, - "finish_reason": finish_reason, - } - ], - "usage": { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": total_tokens, - }, - } +@router.get("/v1/models", response_model=ModelListResponse) +async def list_models(api_key: str = Depends(verify_api_key)): + models = _get_available_models() + return ModelListResponse(data=models) + + +@router.post("/v1/chat/completions") +async def create_chat_completion( + request: ChatCompletionRequest, + api_key: str = Depends(verify_api_key), + tmp_dir: Path = Depends(get_temp_dir), + image_store: Path = Depends(get_image_store_dir), +): + pool, db = GeminiClientPool(), LMDBConversationStore() + try: + model = _get_model_by_name(request.model) + except ValueError as exc: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc + if not request.messages: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Messages required.") - logger.debug(f"Response created with {total_tokens} total tokens") - return result + structured_requirement = _build_structured_requirement(request.response_format) + extra_instr = [structured_requirement.instruction] if structured_requirement else None + # This ensures that server-injected system instructions are part of the history + msgs = _prepare_messages_for_model( + request.messages, request.tools, request.tool_choice, extra_instr + ) -async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]: - """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename.""" - if isinstance(image, GeneratedImage): + session, client, remain = await _find_reusable_session(db, pool, model, msgs) + + if session: + if not remain: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.") + + # For reused sessions, we only need to process the remaining messages. + # We don't re-inject system defaults to avoid duplicating instructions already in history. + input_msgs = _prepare_messages_for_model( + remain, request.tools, request.tool_choice, extra_instr, False + ) + if len(input_msgs) == 1: + m_input, files = await GeminiClientWrapper.process_message( + input_msgs[0], tmp_dir, tagged=False + ) + else: + m_input, files = await GeminiClientWrapper.process_conversation(input_msgs, tmp_dir) + + logger.debug( + f"Reused session {reprlib.repr(session.metadata)} - sending {len(input_msgs)} prepared messages." + ) + else: try: - saved_path = await image.save(path=str(temp_dir), full_size=True) + client = await pool.acquire() + session = client.start_chat(model=model) + # Use the already prepared 'msgs' for a fresh session + m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir) except Exception as e: - logger.warning( - f"Failed to download full-size GeneratedImage, retrying with default size: {e}" - ) - saved_path = await image.save(path=str(temp_dir), full_size=False) + logger.exception("Error in preparing conversation") + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)) + + completion_id = f"chatcmpl-{uuid.uuid4()}" + created_time = int(datetime.now(tz=timezone.utc).timestamp()) + + try: + assert session and client + logger.debug( + f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}" + ) + resp_or_stream = await _send_with_split( + session, m_input, files=files, stream=request.stream + ) + except Exception as e: + logger.exception("Gemini API error") + raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) + + if request.stream: + return _create_real_streaming_response( + resp_or_stream, + completion_id, + created_time, + request.model, + msgs, # Use prepared 'msgs' + db, + model, + client, + session, + structured_requirement, + ) + + try: + raw_with_t = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=True) + raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False) + except Exception as exc: + logger.exception("Gemini output parsing failed.") + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response." + ) from exc + + visible_output, storage_output, tool_calls = _process_llm_output( + raw_with_t, raw_clean, structured_requirement + ) + + # Process images for OpenAI non-streaming flow + images = resp_or_stream.images or [] + image_markdown = "" + for image in images: + try: + _, _, _, filename = await _image_to_base64(image, image_store) + img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})" + image_markdown += f"\n\n{img_url}" + except Exception as exc: + logger.warning(f"Failed to process image in OpenAI response: {exc}") + + if image_markdown: + visible_output += image_markdown + storage_output += image_markdown + + tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls] + if tool_calls_payload: + logger.debug(f"Detected tool calls: {reprlib.repr(tool_calls_payload)}") + + p_tok, c_tok, t_tok = _calculate_usage(request.messages, visible_output, tool_calls) + usage = {"prompt_tokens": p_tok, "completion_tokens": c_tok, "total_tokens": t_tok} + payload = _create_chat_completion_standard_payload( + completion_id, + created_time, + request.model, + visible_output, + tool_calls_payload, + "tool_calls" if tool_calls else "stop", + usage, + ) + _persist_conversation( + db, + model.model_name, + client.id, + session.metadata, + msgs, # Use prepared messages 'msgs' + storage_output, + tool_calls, + ) + return payload + + +@router.post("/v1/responses") +async def create_response( + request_data: ResponseCreateRequest, + request: Request, + api_key: str = Depends(verify_api_key), + tmp_dir: Path = Depends(get_temp_dir), + image_store: Path = Depends(get_image_store_dir), +): + base_messages, norm_input = _response_items_to_messages(request_data.input) + struct_req = _build_structured_requirement(request_data.response_format) + extra_instr = [struct_req.instruction] if struct_req else [] + + standard_tools, image_tools = [], [] + if request_data.tools: + for t in request_data.tools: + if isinstance(t, Tool): + standard_tools.append(t) + elif isinstance(t, ResponseImageTool): + image_tools.append(t) + elif isinstance(t, dict): + if t.get("type") == "function": + standard_tools.append(Tool.model_validate(t)) + elif t.get("type") == "image_generation": + image_tools.append(ResponseImageTool.model_validate(t)) + + img_instr = _build_image_generation_instruction( + image_tools, + request_data.tool_choice + if isinstance(request_data.tool_choice, ResponseToolChoice) + else None, + ) + if img_instr: + extra_instr.append(img_instr) + preface = _instructions_to_messages(request_data.instructions) + conv_messages = [*preface, *base_messages] if preface else base_messages + model_tool_choice = ( + request_data.tool_choice + if isinstance(request_data.tool_choice, (str, ToolChoiceFunction)) + else None + ) + + messages = _prepare_messages_for_model( + conv_messages, standard_tools or None, model_tool_choice, extra_instr or None + ) + pool, db = GeminiClientPool(), LMDBConversationStore() + try: + model = _get_model_by_name(request_data.model) + except ValueError as exc: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc + + session, client, remain = await _find_reusable_session(db, pool, model, messages) + if session: + msgs = _prepare_messages_for_model( + remain, request_data.tools, request_data.tool_choice, None, False + ) + if not msgs: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.") + m_input, files = ( + await GeminiClientWrapper.process_message(msgs[0], tmp_dir, tagged=False) + if len(msgs) == 1 + else await GeminiClientWrapper.process_conversation(msgs, tmp_dir) + ) + logger.debug( + f"Reused session {reprlib.repr(session.metadata)} - sending {len(msgs)} prepared messages." + ) else: - saved_path = await image.save(path=str(temp_dir)) + try: + client = await pool.acquire() + session = client.start_chat(model=model) + m_input, files = await GeminiClientWrapper.process_conversation(messages, tmp_dir) + except Exception as e: + logger.exception("Error in preparing conversation") + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)) - if not saved_path: - raise ValueError("Failed to save generated image") + response_id = f"resp_{uuid.uuid4().hex}" + created_time = int(datetime.now(tz=timezone.utc).timestamp()) - # Rename file to a random UUID to ensure uniqueness and unpredictability - original_path = Path(saved_path) - random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}" - new_path = temp_dir / random_name - original_path.rename(new_path) + try: + assert session and client + logger.debug( + f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}" + ) + resp_or_stream = await _send_with_split( + session, m_input, files=files, stream=request_data.stream + ) + except Exception as e: + logger.exception("Gemini API error") + raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) - data = new_path.read_bytes() - width, height = extract_image_dimensions(data) - filename = random_name - return base64.b64encode(data).decode("ascii"), width, height, filename + if request_data.stream: + return _create_responses_real_streaming_response( + resp_or_stream, + response_id, + created_time, + request_data.model, + messages, + db, + model, + client, + session, + request_data, + image_store, + str(request.base_url), + struct_req, + ) + + try: + raw_t = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=True) + raw_c = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False) + except Exception as exc: + logger.exception("Gemini parsing failed") + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response." + ) from exc + + assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req) + images = resp_or_stream.images or [] + if ( + request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation" + ) and not images: + raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.") + + contents, img_calls = [], [] + for img in images: + try: + b64, w, h, fname = await _image_to_base64(img, image_store) + contents.append( + ResponseOutputContent( + type="output_text", + text=f"![{fname}]({request.base_url}images/{fname}?token={get_image_token(fname)})", + ) + ) + img_calls.append( + ResponseImageGenerationCall( + id=fname.rsplit(".", 1)[0], + result=b64, + output_format="png" if isinstance(img, GeneratedImage) else "jpeg", + size=f"{w}x{h}" if w and h else None, + ) + ) + except Exception as e: + logger.warning(f"Image error: {e}") + + if assistant_text: + contents.append(ResponseOutputContent(type="output_text", text=assistant_text)) + if not contents: + contents.append(ResponseOutputContent(type="output_text", text="")) + + # Aggregate images for storage + image_markdown = "" + for img_call in img_calls: + fname = f"{img_call.id}.{img_call.output_format}" + img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})" + image_markdown += f"\n\n{img_url}" + + if image_markdown: + storage_output += image_markdown + + p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, tool_calls) + usage = ResponseUsage(input_tokens=p_tok, output_tokens=c_tok, total_tokens=t_tok) + payload = _create_responses_standard_payload( + response_id, + created_time, + request_data.model, + assistant_text, + tool_calls, + img_calls, + contents, + usage, + request_data, + norm_input, + ) + _persist_conversation( + db, model.model_name, client.id, session.metadata, messages, storage_output, tool_calls + ) + return payload diff --git a/app/services/client.py b/app/services/client.py index 55be11a..eda1691 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -78,7 +78,8 @@ async def process_message( message: Message, tempdir: Path | None = None, tagged: bool = True ) -> tuple[str, list[Path | str]]: """ - Process a single message and return model input. + Process a single Message object into a format suitable for the Gemini API. + Extracts text fragments, handles images and files, and appends tool call blocks if present. """ files: list[Path | str] = [] text_fragments: list[str] = [] @@ -88,8 +89,7 @@ async def process_message( if message.content: text_fragments.append(message.content) elif isinstance(message.content, list): - # Mixed content - # TODO: Use Pydantic to enforce the value checking + # Mixed content (text, image_url, or file) for item in message.content: if item.type == "text": # Append multiple text fragments @@ -177,7 +177,8 @@ async def process_conversation( @staticmethod def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str: """ - Extract and format the output text from the Gemini response. + Extract and format the output text from a ModelOutput. + Includes reasoning thoughts (wrapped in tags) and unescapes content. """ text = "" @@ -191,6 +192,7 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str: # Fix some escaped characters def _unescape_html(text_content: str) -> str: + """Unescape HTML entities only in non-code sections of the text.""" parts: list[str] = [] last_index = 0 for match in CODE_FENCE_RE.finditer(text_content): @@ -205,6 +207,7 @@ def _unescape_html(text_content: str) -> str: return "".join(parts) def _unescape_markdown(text_content: str) -> str: + """Remove backslash escapes for markdown characters in non-code sections.""" parts: list[str] = [] last_index = 0 for match in CODE_FENCE_RE.finditer(text_content): diff --git a/app/services/lmdb.py b/app/services/lmdb.py index c9d42cd..6ab2302 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -11,45 +11,98 @@ from ..models import ContentItem, ConversationInStore, Message from ..utils import g_config -from ..utils.helper import extract_tool_calls, remove_tool_call_blocks +from ..utils.helper import ( + CODE_BLOCK_HINT, + CODE_HINT_STRIPPED, + XML_HINT_STRIPPED, + XML_WRAP_HINT, + extract_tool_calls, + remove_tool_call_blocks, +) from ..utils.singleton import Singleton def _hash_message(message: Message) -> str: - """Generate a consistent hash for a single message focusing ONLY on logic/content, ignoring technical IDs.""" + """ + Generate a stable, canonical hash for a single message. + Strips system hints, thoughts, and tool call blocks to ensure + identical logical content produces the same hash regardless of format. + """ core_data = { "role": message.role, "name": message.name, + "tool_call_id": message.tool_call_id, } - # Normalize content: strip, handle empty/None, and list-of-text items content = message.content if not content: core_data["content"] = None elif isinstance(content, str): - # Normalize line endings and strip whitespace - normalized = content.replace("\r\n", "\n").strip() + normalized = content.replace("\r\n", "\n") + + normalized = LMDBConversationStore.remove_think_tags(normalized) + + for hint in [ + XML_WRAP_HINT, + XML_HINT_STRIPPED, + CODE_BLOCK_HINT, + CODE_HINT_STRIPPED, + ]: + normalized = normalized.replace(hint, "") + + if message.tool_calls: + normalized = remove_tool_call_blocks(normalized) + else: + temp_text, _extracted = extract_tool_calls(normalized) + normalized = temp_text + + normalized = normalized.strip() core_data["content"] = normalized if normalized else None elif isinstance(content, list): text_parts = [] for item in content: + text_val = "" if isinstance(item, ContentItem) and item.type == "text": - text_parts.append(item.text or "") + text_val = item.text or "" elif isinstance(item, dict) and item.get("type") == "text": - text_parts.append(item.get("text") or "") + text_val = item.get("text") or "" + + if text_val: + text_val = text_val.replace("\r\n", "\n") + text_val = LMDBConversationStore.remove_think_tags(text_val) + for hint in [ + XML_WRAP_HINT, + XML_HINT_STRIPPED, + CODE_BLOCK_HINT, + CODE_HINT_STRIPPED, + ]: + text_val = text_val.replace(hint, "") + text_val = remove_tool_call_blocks(text_val).strip() + if text_val: + text_parts.append(text_val) + elif isinstance(item, ContentItem) and item.type in ("image_url", "file"): + # For non-text items, include their unique markers to distinguish them + if item.type == "image_url": + text_parts.append( + f"[image_url:{item.image_url.get('url') if item.image_url else ''}]" + ) + elif item.type == "file": + text_parts.append( + f"[file:{item.file.get('url') or item.file.get('filename') if item.file else ''}]" + ) else: - # If it contains non-text (images/files), keep the full list for hashing - text_parts = None - break - - if text_parts is not None: - # Normalize each part but keep them as a list to preserve boundaries and avoid collisions - normalized_parts = [p.replace("\r\n", "\n") for p in text_parts] - core_data["content"] = normalized_parts if normalized_parts else None - else: - core_data["content"] = message.model_dump(mode="json")["content"] + # Fallback for other dict-based content parts + part_type = item.get("type") if isinstance(item, dict) else None + if part_type == "image_url": + url = item.get("image_url", {}).get("url") + text_parts.append(f"[image_url:{url}]") + elif part_type == "file": + url = item.get("file", {}).get("url") or item.get("file", {}).get("filename") + text_parts.append(f"[file:{url}]") + + combined_text = "\n".join(text_parts).replace("\r\n", "\n").strip() + core_data["content"] = combined_text if combined_text else None - # Normalize tool_calls: Focus ONLY on function name and arguments if message.tool_calls: calls_data = [] for tc in message.tool_calls: @@ -66,14 +119,14 @@ def _hash_message(message: Message) -> str: "arguments": canon_args, } ) - # Sort calls to be order-independent calls_data.sort(key=lambda x: (x["name"], x["arguments"])) core_data["tool_calls"] = calls_data else: core_data["tool_calls"] = None message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) - return hashlib.sha256(message_bytes).hexdigest() + digest = hashlib.sha256(message_bytes).hexdigest() + return digest def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: @@ -123,16 +176,14 @@ def __init__( self._init_environment() def _ensure_db_path(self) -> None: - """Ensure database directory exists.""" self.db_path.parent.mkdir(parents=True, exist_ok=True) def _init_environment(self) -> None: - """Initialize LMDB environment.""" try: self._env = lmdb.open( str(self.db_path), map_size=self.max_db_size, - max_dbs=3, # main, metadata, and index databases + max_dbs=3, writemap=True, readahead=False, meminit=False, @@ -144,7 +195,6 @@ def _init_environment(self) -> None: @contextmanager def _get_transaction(self, write: bool = False): - """Get LMDB transaction context manager.""" if not self._env: raise RuntimeError("LMDB environment not initialized") @@ -178,11 +228,15 @@ def store( if not conv: raise ValueError("Messages list cannot be empty") + # Sanitize messages before computing hash and storing to ensure consistency + # with the search (find) logic, which also sanitizes its prefix. + sanitized_messages = self.sanitize_assistant_messages(conv.messages) + conv.messages = sanitized_messages + # Generate hash for the message list message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages) storage_key = custom_key or message_hash - # Prepare data for storage now = datetime.now() if conv.created_at is None: conv.created_at = now @@ -192,20 +246,18 @@ def store( try: with self._get_transaction(write=True) as txn: - # Store main data txn.put(storage_key.encode("utf-8"), value, overwrite=True) - # Store hash -> key mapping for reverse lookup txn.put( f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"), storage_key.encode("utf-8"), ) - logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key}") + logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}") return storage_key except Exception as e: - logger.error(f"Failed to store conversation: {e}") + logger.error(f"Failed to store messages with key {storage_key[:12]}: {e}") raise def get(self, key: str) -> Optional[ConversationInStore]: @@ -227,39 +279,35 @@ def get(self, key: str) -> Optional[ConversationInStore]: storage_data = orjson.loads(data) # type: ignore conv = ConversationInStore.model_validate(storage_data) - logger.debug(f"Retrieved {len(conv.messages)} messages for key: {key}") + logger.debug(f"Retrieved {len(conv.messages)} messages with key: {key[:12]}") return conv except Exception as e: - logger.error(f"Failed to retrieve messages for key {key}: {e}") + logger.error(f"Failed to retrieve messages with key {key[:12]}: {e}") return None def find(self, model: str, messages: List[Message]) -> Optional[ConversationInStore]: """ Search conversation data by message list. - - Args: - model: Model name of the conversations - messages: List of messages to search for - - Returns: - Conversation or None if not found """ if not messages: return None # --- Find with raw messages --- if conv := self._find_by_message_list(model, messages): - logger.debug("Found conversation with raw message history.") + logger.debug(f"Session found for '{model}' with {len(messages)} raw messages.") return conv # --- Find with cleaned messages --- cleaned_messages = self.sanitize_assistant_messages(messages) - if conv := self._find_by_message_list(model, cleaned_messages): - logger.debug("Found conversation with cleaned message history.") - return conv + if cleaned_messages != messages: + if conv := self._find_by_message_list(model, cleaned_messages): + logger.debug( + f"Session found for '{model}' with {len(cleaned_messages)} cleaned messages." + ) + return conv - logger.debug("No conversation found for either raw or cleaned history.") + logger.debug(f"No session found for '{model}' with {len(messages)} messages.") return None def _find_by_message_list( @@ -330,11 +378,11 @@ def delete(self, key: str) -> Optional[ConversationInStore]: if message_hash and key != message_hash: txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8")) - logger.debug(f"Deleted messages with key: {key}") + logger.debug(f"Deleted messages with key: {key[:12]}") return conv except Exception as e: - logger.error(f"Failed to delete key {key}: {e}") + logger.error(f"Failed to delete messages with key {key[:12]}: {e}") return None def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: @@ -478,6 +526,8 @@ def remove_think_tags(text: str) -> str: """ Remove all ... tags and strip whitespace. """ + if not text: + return text # Remove all think blocks anywhere in the text cleaned_content = re.sub(r".*?", "", text, flags=re.DOTALL) return cleaned_content.strip() @@ -485,12 +535,8 @@ def remove_think_tags(text: str) -> str: @staticmethod def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: """ - Create a new list of messages with assistant content cleaned of tags - and system hints/tool call blocks. This is used for both storing and - searching chat history to ensure consistency. - - If a message has no tool_calls but contains tool call XML blocks in its - content, they will be extracted and moved to the tool_calls field. + Produce a canonical history where assistant messages are cleaned of + internal markers and tool call blocks are moved to metadata. """ cleaned_messages = [] for msg in messages: @@ -503,12 +549,12 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: else: text = remove_tool_call_blocks(text).strip() - normalized_content = text.strip() + normalized_content = text.strip() or None if normalized_content != msg.content or tool_calls != msg.tool_calls: cleaned_msg = msg.model_copy( update={ - "content": normalized_content or None, + "content": normalized_content, "tool_calls": tool_calls or None, } ) diff --git a/app/services/pool.py b/app/services/pool.py index a134dda..0f95203 100644 --- a/app/services/pool.py +++ b/app/services/pool.py @@ -31,7 +31,7 @@ def __init__(self) -> None: self._clients.append(client) self._id_map[c.id] = client self._round_robin.append(client) - self._restart_locks[c.id] = asyncio.Lock() # Pre-initialize + self._restart_locks[c.id] = asyncio.Lock() async def init(self) -> None: """Initialize all clients in the pool.""" @@ -84,7 +84,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: lock = self._restart_locks.get(client.id) if lock is None: - return False # Should not happen + return False async with lock: if client.running(): diff --git a/app/utils/helper.py b/app/utils/helper.py index 190b5ce..7606dd3 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -5,7 +5,6 @@ import struct import tempfile from pathlib import Path -from typing import Iterator from urllib.parse import urlparse import httpx @@ -68,7 +67,6 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: data: bytes | None = None suffix: str | None = None if url.startswith("data:image/"): - # Base64 encoded image metadata_part = url.split(",")[0] mime_type = metadata_part.split(":")[1].split(";")[0] @@ -131,13 +129,11 @@ def strip_tagged_blocks(text: str) -> str: result.append(text[idx:]) break - # append any content before this block result.append(text[idx:start]) role_start = start + len(start_marker) newline = text.find("\n", role_start) if newline == -1: - # malformed block; keep the remainder as-is (safe behavior) result.append(text[start:]) break @@ -145,23 +141,18 @@ def strip_tagged_blocks(text: str) -> str: end = text.find(end_marker, newline + 1) if end == -1: - # missing end marker if role == "tool": - # drop from the start marker to EOF (skip the remainder) break else: - # keep inner content from after the role newline to EOF result.append(text[newline + 1 :]) break block_end = end + len(end_marker) if role == "tool": - # drop the whole block idx = block_end continue - # keep the content without role markers content = text[newline + 1 : end] result.append(content) idx = block_end @@ -180,41 +171,19 @@ def strip_system_hints(text: str) -> str: return cleaned.strip() -def remove_tool_call_blocks(text: str) -> str: - """Strip tool call code blocks from text.""" - if not text: - return text - - # 1. Remove fenced blocks ONLY if they contain tool calls - def _replace_block(match: re.Match[str]) -> str: - block_content = match.group(1) - if not block_content: - return match.group(0) - - # Check if the block contains any tool call tag - if TOOL_CALL_RE.search(block_content): - return "" - - # Preserve the block if no tool call found - return match.group(0) - - cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) - - # 2. Remove orphaned tool calls - cleaned = TOOL_CALL_RE.sub("", cleaned) - - return strip_system_hints(cleaned) - - -def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: - """Extract tool call definitions and return cleaned text.""" +def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]: + """ + Unified engine for stripping tool call blocks and extracting tool metadata. + If extract=True, parses JSON arguments and assigns deterministic call IDs. + """ if not text: return text, [] tool_calls: list[ToolCall] = [] def _create_tool_call(name: str, raw_args: str) -> None: - """Helper to parse args and append to the tool_calls list.""" + if not extract: + return if not name: logger.warning("Encountered tool_call without a function name.") return @@ -226,8 +195,6 @@ def _create_tool_call(name: str, raw_args: str) -> None: except orjson.JSONDecodeError: logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") - # Generate a deterministic ID based on name, arguments, and its global sequence index - # to ensure uniqueness across multiple fenced blocks while remaining stable for storage. index = len(tool_calls) seed = f"{name}:{arguments}:{index}".encode("utf-8") call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}" @@ -245,14 +212,14 @@ def _replace_block(match: re.Match[str]) -> str: if not block_content: return match.group(0) - found_in_block = False - for call_match in TOOL_CALL_RE.finditer(block_content): - found_in_block = True - name = (call_match.group(1) or "").strip() - raw_args = (call_match.group(2) or "").strip() - _create_tool_call(name, raw_args) + is_tool_block = bool(TOOL_CALL_RE.search(block_content)) - if found_in_block: + if is_tool_block: + if extract: + for call_match in TOOL_CALL_RE.finditer(block_content): + name = (call_match.group(1) or "").strip() + raw_args = (call_match.group(2) or "").strip() + _create_tool_call(name, raw_args) return "" else: return match.group(0) @@ -260,56 +227,26 @@ def _replace_block(match: re.Match[str]) -> str: cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) def _replace_orphan(match: re.Match[str]) -> str: - name = (match.group(1) or "").strip() - raw_args = (match.group(2) or "").strip() - _create_tool_call(name, raw_args) + if extract: + name = (match.group(1) or "").strip() + raw_args = (match.group(2) or "").strip() + _create_tool_call(name, raw_args) return "" cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned) - cleaned = strip_system_hints(cleaned) return cleaned, tool_calls -def iter_stream_segments(model_output: str, chunk_size: int = 64) -> Iterator[str]: - """Yield stream segments while keeping markers and words intact.""" - if not model_output: - return - - token_pattern = re.compile(r"\s+|\S+\s*") - pending = "" - - def _flush_pending() -> Iterator[str]: - nonlocal pending - if pending: - yield pending - pending = "" - - # Split on boundaries so the markers are never fragmented. - parts = re.split(r"()", model_output) - for part in parts: - if not part: - continue - if part in {"", ""}: - yield from _flush_pending() - yield part - continue - - for match in token_pattern.finditer(part): - token = match.group(0) - - if len(token) > chunk_size: - yield from _flush_pending() - for idx in range(0, len(token), chunk_size): - yield token[idx : idx + chunk_size] - continue - - if pending and len(pending) + len(token) > chunk_size: - yield from _flush_pending() +def remove_tool_call_blocks(text: str) -> str: + """Strip tool call code blocks from text.""" + cleaned, _ = _process_tools_internal(text, extract=False) + return cleaned - pending += token - yield from _flush_pending() +def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: + """Extract tool call definitions and return cleaned text.""" + return _process_tools_internal(text, extract=True) def text_from_message(message: Message) -> str: From 52547a923276c5b5de3ba0394939478ac4166417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 30 Jan 2026 13:34:04 +0700 Subject: [PATCH 056/139] Enable real-time streaming responses and completely solve the issue with reusable sessions. - Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR. - Introducing a new feature for real-time streaming responses. - Fully resolve the problem with reusable sessions. - Break down similar flow logic into helper functions. - All endpoints now support inline Markdown images. - Switch large prompts to use BytesIO to avoid reading and writing to disk. --- app/server/chat.py | 70 +++++++++++++++++------------------------- app/services/client.py | 2 +- 2 files changed, 29 insertions(+), 43 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 37d3c70..ae1533e 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -8,7 +8,7 @@ from typing import Any, AsyncGenerator import orjson -from fastapi import APIRouter, Depends, HTTPException, Request, status +from fastapi import APIRouter, Depends, HTTPException, status from fastapi.responses import StreamingResponse from gemini_webapi import ModelOutput from gemini_webapi.client import ChatSession @@ -128,12 +128,11 @@ def _create_responses_standard_payload( response_id: str, created_time: int, model_name: str, - assistant_text: str | None, detected_tool_calls: list[Any] | None, image_call_items: list[ResponseImageGenerationCall], response_contents: list[ResponseOutputContent], usage: ResponseUsage, - request_data: ResponseCreateRequest, + request: ResponseCreateRequest, normalized_input: Any, ) -> ResponseCreateResponse: """Unified factory for building ResponseCreateResponse objects.""" @@ -166,9 +165,9 @@ def _create_responses_standard_payload( status="completed", usage=usage, input=normalized_input or None, - metadata=request_data.metadata or None, - tools=request_data.tools, - tool_choice=request_data.tool_choice, + metadata=request.metadata or None, + tools=request.tools, + tool_choice=request.tool_choice, ) @@ -1042,7 +1041,7 @@ async def generate_stream(): if image_markdown: assistant_text += image_markdown storage_output += image_markdown - # Send the image markdown as a final text chunk before usage + # Send the image Markdown as a final text chunk before usage data = { "id": completion_id, "object": "chat.completion.chunk", @@ -1107,9 +1106,8 @@ def _create_responses_real_streaming_response( model: Model, client_wrapper: GeminiClientWrapper, session: ChatSession, - request_data: ResponseCreateRequest, + request: ResponseCreateRequest, image_store: Path, - base_url: str, structured_requirement: StructuredOutputRequirement | None = None, ) -> StreamingResponse: """ @@ -1124,7 +1122,7 @@ def _create_responses_real_streaming_response( } async def generate_stream(): - yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request_data.metadata, 'input': None, 'tools': request_data.tools, 'tool_choice': request_data.tool_choice}}).decode('utf-8')}\n\n" + yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request.metadata, 'input': None, 'tools': request.tools, 'tool_choice': request.tool_choice}}).decode('utf-8')}\n\n" message_id = f"msg_{uuid.uuid4().hex}" yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n" @@ -1183,9 +1181,7 @@ async def generate_stream(): try: image_base64, width, height, filename = await _image_to_base64(image, image_store) img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" - image_url = ( - f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})" - ) + image_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})" image_call_items.append( ResponseImageGenerationCall( id=filename.rsplit(".", 1)[0], @@ -1232,12 +1228,11 @@ async def generate_stream(): response_id, created_time, model_name, - assistant_text, detected_tool_calls, image_call_items, response_contents, usage, - request_data, + request, None, ) _persist_conversation( @@ -1404,19 +1399,18 @@ async def create_chat_completion( @router.post("/v1/responses") async def create_response( - request_data: ResponseCreateRequest, - request: Request, + request: ResponseCreateRequest, api_key: str = Depends(verify_api_key), tmp_dir: Path = Depends(get_temp_dir), image_store: Path = Depends(get_image_store_dir), ): - base_messages, norm_input = _response_items_to_messages(request_data.input) - struct_req = _build_structured_requirement(request_data.response_format) + base_messages, norm_input = _response_items_to_messages(request.input) + struct_req = _build_structured_requirement(request.response_format) extra_instr = [struct_req.instruction] if struct_req else [] standard_tools, image_tools = [], [] - if request_data.tools: - for t in request_data.tools: + if request.tools: + for t in request.tools: if isinstance(t, Tool): standard_tools.append(t) elif isinstance(t, ResponseImageTool): @@ -1429,18 +1423,14 @@ async def create_response( img_instr = _build_image_generation_instruction( image_tools, - request_data.tool_choice - if isinstance(request_data.tool_choice, ResponseToolChoice) - else None, + request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None, ) if img_instr: extra_instr.append(img_instr) - preface = _instructions_to_messages(request_data.instructions) + preface = _instructions_to_messages(request.instructions) conv_messages = [*preface, *base_messages] if preface else base_messages model_tool_choice = ( - request_data.tool_choice - if isinstance(request_data.tool_choice, (str, ToolChoiceFunction)) - else None + request.tool_choice if isinstance(request.tool_choice, (str, ToolChoiceFunction)) else None ) messages = _prepare_messages_for_model( @@ -1448,15 +1438,13 @@ async def create_response( ) pool, db = GeminiClientPool(), LMDBConversationStore() try: - model = _get_model_by_name(request_data.model) + model = _get_model_by_name(request.model) except ValueError as exc: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc session, client, remain = await _find_reusable_session(db, pool, model, messages) if session: - msgs = _prepare_messages_for_model( - remain, request_data.tools, request_data.tool_choice, None, False - ) + msgs = _prepare_messages_for_model(remain, request.tools, request.tool_choice, None, False) if not msgs: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.") m_input, files = ( @@ -1485,26 +1473,25 @@ async def create_response( f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}" ) resp_or_stream = await _send_with_split( - session, m_input, files=files, stream=request_data.stream + session, m_input, files=files, stream=request.stream ) except Exception as e: logger.exception("Gemini API error") raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) - if request_data.stream: + if request.stream: return _create_responses_real_streaming_response( resp_or_stream, response_id, created_time, - request_data.model, + request.model, messages, db, model, client, session, - request_data, + request, image_store, - str(request.base_url), struct_req, ) @@ -1520,7 +1507,7 @@ async def create_response( assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req) images = resp_or_stream.images or [] if ( - request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation" + request.tool_choice is not None and request.tool_choice.type == "image_generation" ) and not images: raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.") @@ -1531,7 +1518,7 @@ async def create_response( contents.append( ResponseOutputContent( type="output_text", - text=f"![{fname}]({request.base_url}images/{fname}?token={get_image_token(fname)})", + text=f"![{fname}](images/{fname}?token={get_image_token(fname)})", ) ) img_calls.append( @@ -1565,13 +1552,12 @@ async def create_response( payload = _create_responses_standard_payload( response_id, created_time, - request_data.model, - assistant_text, + request.model, tool_calls, img_calls, contents, usage, - request_data, + request, norm_input, ) _persist_conversation( diff --git a/app/services/client.py b/app/services/client.py index eda1691..dd1d74f 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -207,7 +207,7 @@ def _unescape_html(text_content: str) -> str: return "".join(parts) def _unescape_markdown(text_content: str) -> str: - """Remove backslash escapes for markdown characters in non-code sections.""" + """Remove backslash escapes for Markdown characters in non-code sections.""" parts: list[str] = [] last_index = 0 for match in CODE_FENCE_RE.finditer(text_content): From c0b32c62113acdac21407c629252f35c2ed2bbf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 30 Jan 2026 17:50:02 +0700 Subject: [PATCH 057/139] Enable real-time streaming responses and completely solve the issue with reusable sessions. - Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR. - Introducing a new feature for real-time streaming responses. - Fully resolve the problem with reusable sessions. - Break down similar flow logic into helper functions. - All endpoints now support inline Markdown images. - Switch large prompts to use BytesIO to avoid reading and writing to disk. - Remove duplicate images when saving and responding. --- app/server/chat.py | 86 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 16 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index ae1533e..4c64390 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1,4 +1,5 @@ import base64 +import hashlib import io import reprlib import uuid @@ -8,7 +9,7 @@ from typing import Any, AsyncGenerator import orjson -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi.responses import StreamingResponse from gemini_webapi import ModelOutput from gemini_webapi.client import ChatSession @@ -72,8 +73,10 @@ class StructuredOutputRequirement: # --- Helper Functions --- -async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]: - """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename.""" +async def _image_to_base64( + image: Image, temp_dir: Path +) -> tuple[str, int | None, int | None, str, str]: + """Persist an image provided by gemini_webapi and return base64 plus dimensions, filename, and hash.""" if isinstance(image, GeneratedImage): try: saved_path = await image.save(path=str(temp_dir), full_size=True) @@ -96,7 +99,8 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non data = new_path.read_bytes() width, height = extract_image_dimensions(data) filename = random_name - return base64.b64encode(data).decode("ascii"), width, height, filename + file_hash = hashlib.sha256(data).hexdigest() + return base64.b64encode(data).decode("ascii"), width, height, filename, file_hash def _calculate_usage( @@ -925,6 +929,7 @@ def _create_real_streaming_response( model: Model, client_wrapper: GeminiClientWrapper, session: ChatSession, + base_url: str, structured_requirement: StructuredOutputRequirement | None = None, ) -> StreamingResponse: """ @@ -1024,16 +1029,30 @@ async def generate_stream(): ) images = [] + seen_urls = set() for out in all_outputs: if out.images: - images.extend(out.images) + for img in out.images: + # Use the image URL as a stable identifier across chunks + if img.url not in seen_urls: + images.append(img) + seen_urls.add(img.url) image_markdown = "" + seen_hashes = set() for image in images: try: image_store = get_image_store_dir() - _, _, _, filename = await _image_to_base64(image, image_store) - img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})" + _, _, _, filename, file_hash = await _image_to_base64(image, image_store) + if file_hash in seen_hashes: + # Duplicate content, delete the file and skip + (image_store / filename).unlink(missing_ok=True) + continue + seen_hashes.add(file_hash) + + img_url = ( + f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})" + ) image_markdown += f"\n\n{img_url}" except Exception as exc: logger.warning(f"Failed to process image in OpenAI stream: {exc}") @@ -1108,6 +1127,7 @@ def _create_responses_real_streaming_response( session: ChatSession, request: ResponseCreateRequest, image_store: Path, + base_url: str, structured_requirement: StructuredOutputRequirement | None = None, ) -> StreamingResponse: """ @@ -1172,16 +1192,30 @@ async def generate_stream(): ) images = [] + seen_urls = set() for out in all_outputs: if out.images: - images.extend(out.images) + for img in out.images: + if img.url not in seen_urls: + images.append(img) + seen_urls.add(img.url) response_contents, image_call_items = [], [] + seen_hashes = set() for image in images: try: - image_base64, width, height, filename = await _image_to_base64(image, image_store) + image_base64, width, height, filename, file_hash = await _image_to_base64( + image, image_store + ) + if file_hash in seen_hashes: + (image_store / filename).unlink(missing_ok=True) + continue + seen_hashes.add(file_hash) + img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" - image_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})" + image_url = ( + f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})" + ) image_call_items.append( ResponseImageGenerationCall( id=filename.rsplit(".", 1)[0], @@ -1203,7 +1237,7 @@ async def generate_stream(): image_markdown = "" for img_call in image_call_items: fname = f"{img_call.id}.{img_call.output_format}" - img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})" + img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})" image_markdown += f"\n\n{img_url}" if image_markdown: @@ -1262,10 +1296,12 @@ async def list_models(api_key: str = Depends(verify_api_key)): @router.post("/v1/chat/completions") async def create_chat_completion( request: ChatCompletionRequest, + raw_request: Request, api_key: str = Depends(verify_api_key), tmp_dir: Path = Depends(get_temp_dir), image_store: Path = Depends(get_image_store_dir), ): + base_url = str(raw_request.base_url) pool, db = GeminiClientPool(), LMDBConversationStore() try: model = _get_model_by_name(request.model) @@ -1339,6 +1375,7 @@ async def create_chat_completion( model, client, session, + base_url, structured_requirement, ) @@ -1358,10 +1395,18 @@ async def create_chat_completion( # Process images for OpenAI non-streaming flow images = resp_or_stream.images or [] image_markdown = "" + seen_hashes = set() for image in images: try: - _, _, _, filename = await _image_to_base64(image, image_store) - img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})" + _, _, _, filename, file_hash = await _image_to_base64(image, image_store) + if file_hash in seen_hashes: + (image_store / filename).unlink(missing_ok=True) + continue + seen_hashes.add(file_hash) + + img_url = ( + f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})" + ) image_markdown += f"\n\n{img_url}" except Exception as exc: logger.warning(f"Failed to process image in OpenAI response: {exc}") @@ -1400,10 +1445,12 @@ async def create_chat_completion( @router.post("/v1/responses") async def create_response( request: ResponseCreateRequest, + raw_request: Request, api_key: str = Depends(verify_api_key), tmp_dir: Path = Depends(get_temp_dir), image_store: Path = Depends(get_image_store_dir), ): + base_url = str(raw_request.base_url) base_messages, norm_input = _response_items_to_messages(request.input) struct_req = _build_structured_requirement(request.response_format) extra_instr = [struct_req.instruction] if struct_req else [] @@ -1492,6 +1539,7 @@ async def create_response( session, request, image_store, + base_url, struct_req, ) @@ -1512,13 +1560,19 @@ async def create_response( raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.") contents, img_calls = [], [] + seen_hashes = set() for img in images: try: - b64, w, h, fname = await _image_to_base64(img, image_store) + b64, w, h, fname, fhash = await _image_to_base64(img, image_store) + if fhash in seen_hashes: + (image_store / fname).unlink(missing_ok=True) + continue + seen_hashes.add(fhash) + contents.append( ResponseOutputContent( type="output_text", - text=f"![{fname}](images/{fname}?token={get_image_token(fname)})", + text=f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})", ) ) img_calls.append( @@ -1541,7 +1595,7 @@ async def create_response( image_markdown = "" for img_call in img_calls: fname = f"{img_call.id}.{img_call.output_format}" - img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})" + img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})" image_markdown += f"\n\n{img_url}" if image_markdown: From 4d51a5fc8d19431712f2e13dd2d1a0395150e252 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 2 Feb 2026 18:33:27 +0700 Subject: [PATCH 058/139] Enable real-time streaming responses and completely solve the issue with reusable sessions. - Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR. - Introducing a new feature for real-time streaming responses. - Fully resolve the problem with reusable sessions. - Break down similar flow logic into helper functions. - All endpoints now support inline Markdown images. - Switch large prompts to use BytesIO to avoid reading and writing to disk. - Remove duplicate images when saving and responding. --- app/server/chat.py | 11 +++++++++++ app/services/client.py | 22 ++++++++++++++++------ app/utils/helper.py | 6 +++--- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 4c64390..b8f611d 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -481,6 +481,17 @@ def _prepare_messages_for_model( """Return a copy of messages enriched with tool instructions when needed.""" prepared = [msg.model_copy(deep=True) for msg in source_messages] + # Resolve tool names for 'tool' messages by looking back at previous assistant tool calls + tool_id_to_name = {} + for msg in prepared: + if msg.role == "assistant" and msg.tool_calls: + for tc in msg.tool_calls: + tool_id_to_name[tc.id] = tc.function.name + + for msg in prepared: + if msg.role == "tool" and not msg.name and msg.tool_call_id: + msg.name = tool_id_to_name.get(msg.tool_call_id) + instructions: list[str] = [] if inject_system_defaults: if tools: diff --git a/app/services/client.py b/app/services/client.py index dd1d74f..803bc23 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -86,15 +86,15 @@ async def process_message( if isinstance(message.content, str): # Pure text content - if message.content: - text_fragments.append(message.content) + if message.content or message.role == "tool": + text_fragments.append(message.content or "") elif isinstance(message.content, list): # Mixed content (text, image_url, or file) for item in message.content: if item.type == "text": # Append multiple text fragments - if item.text: - text_fragments.append(item.text) + if item.text or message.role == "tool": + text_fragments.append(item.text or "") elif item.type == "image_url": if not item.image_url: @@ -114,9 +114,19 @@ async def process_message( files.append(await save_url_to_tempfile(url, tempdir)) else: raise ValueError("File must contain 'file_data' or 'url' key") + elif message.content is None and message.role == "tool": + text_fragments.append("") elif message.content is not None: raise ValueError("Unsupported message content type.") + # Special handling for tool response format + if message.role == "tool": + tool_name = message.name or "unknown" + combined_content = "\n".join(text_fragments) + text_fragments = [ + f'```xml\n{combined_content}\n```' + ] + if message.tool_calls: tool_blocks: list[str] = [] for call in message.tool_calls: @@ -135,10 +145,10 @@ async def process_message( tool_section = "```xml\n" + "".join(tool_blocks) + "\n```" text_fragments.append(tool_section) - model_input = "\n".join(fragment for fragment in text_fragments if fragment) + model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None) # Add role tag if needed - if model_input: + if model_input or message.role == "tool": if tagged: model_input = add_tag(message.role, model_input) diff --git a/app/utils/helper.py b/app/utils/helper.py index 7606dd3..38b6400 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -110,9 +110,9 @@ def strip_code_fence(text: str) -> str: def strip_tagged_blocks(text: str) -> str: - """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely. - - tool blocks are removed entirely (if missing end marker, drop to EOF). - - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF). + """Remove <|im_start|>role ... <|im_end|> sections. + - tool blocks are removed entirely (including content). + - other roles: remove markers and role, keep inner content. """ if not text: return text From d69aaf02f2b6b7ff331564b526178a447c3b49e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 2 Feb 2026 18:52:51 +0700 Subject: [PATCH 059/139] Enable real-time streaming responses and completely solve the issue with reusable sessions. - Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR. - Introducing a new feature for real-time streaming responses. - Fully resolve the problem with reusable sessions. - Break down similar flow logic into helper functions. - All endpoints now support inline Markdown images. - Switch large prompts to use BytesIO to avoid reading and writing to disk. - Remove duplicate images when saving and responding. --- app/services/client.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/services/client.py b/app/services/client.py index 803bc23..4146b7e 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -122,7 +122,10 @@ async def process_message( # Special handling for tool response format if message.role == "tool": tool_name = message.name or "unknown" - combined_content = "\n".join(text_fragments) + combined_content = "\n".join(text_fragments).strip() + # If the tool result is literally empty, provide a clear indicator like empty JSON + if not combined_content: + combined_content = "{}" text_fragments = [ f'```xml\n{combined_content}\n```' ] From 8e15a8698d4a3df53a7bc3f676c63c0a492c9a01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 2 Feb 2026 19:27:08 +0700 Subject: [PATCH 060/139] Enable real-time streaming responses and completely solve the issue with reusable sessions. - Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR. - Introducing a new feature for real-time streaming responses. - Fully resolve the problem with reusable sessions. - Break down similar flow logic into helper functions. - All endpoints now support inline Markdown images. - Switch large prompts to use BytesIO to avoid reading and writing to disk. - Remove duplicate images when saving and responding. --- app/services/client.py | 43 +++++------------------------------------- 1 file changed, 5 insertions(+), 38 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 4146b7e..a35146f 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -85,17 +85,13 @@ async def process_message( text_fragments: list[str] = [] if isinstance(message.content, str): - # Pure text content if message.content or message.role == "tool": - text_fragments.append(message.content or "") + text_fragments.append(message.content or "{}") elif isinstance(message.content, list): - # Mixed content (text, image_url, or file) for item in message.content: if item.type == "text": - # Append multiple text fragments if item.text or message.role == "tool": - text_fragments.append(item.text or "") - + text_fragments.append(item.text or "{}") elif item.type == "image_url": if not item.image_url: raise ValueError("Image URL cannot be empty") @@ -103,7 +99,6 @@ async def process_message( files.append(await save_url_to_tempfile(url, tempdir)) else: raise ValueError("Image URL must contain 'url' key") - elif item.type == "file": if not item.file: raise ValueError("File cannot be empty") @@ -115,19 +110,15 @@ async def process_message( else: raise ValueError("File must contain 'file_data' or 'url' key") elif message.content is None and message.role == "tool": - text_fragments.append("") + text_fragments.append("{}") elif message.content is not None: raise ValueError("Unsupported message content type.") - # Special handling for tool response format if message.role == "tool": tool_name = message.name or "unknown" - combined_content = "\n".join(text_fragments).strip() - # If the tool result is literally empty, provide a clear indicator like empty JSON - if not combined_content: - combined_content = "{}" + combined_content = "\n".join(text_fragments).strip() or "{}" text_fragments = [ - f'```xml\n{combined_content}\n```' + f'{combined_content}' ] if message.tool_calls: @@ -138,7 +129,6 @@ async def process_message( parsed_args = orjson.loads(args_text) args_text = orjson.dumps(parsed_args).decode("utf-8") except orjson.JSONDecodeError: - # Leave args_text as is if it is not valid JSON pass tool_blocks.append( f'{args_text}' @@ -150,7 +140,6 @@ async def process_message( model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None) - # Add role tag if needed if model_input or message.role == "tool": if tagged: model_input = add_tag(message.role, model_input) @@ -161,51 +150,30 @@ async def process_message( async def process_conversation( messages: list[Message], tempdir: Path | None = None ) -> tuple[str, list[Path | str]]: - """ - Process the entire conversation and return a formatted string and list of - files. The last message is assumed to be the assistant's response. - """ - # Determine once whether we need to wrap messages with role tags: only required - # if the history already contains assistant/system messages. When every message - # so far is from the user, we can skip tagging entirely. need_tag = any(m.role != "user" for m in messages) - conversation: list[str] = [] files: list[Path | str] = [] - for msg in messages: input_part, files_part = await GeminiClientWrapper.process_message( msg, tempdir, tagged=need_tag ) conversation.append(input_part) files.extend(files_part) - - # Append an opening assistant tag only when we used tags above so that Gemini - # knows where to start its reply. if need_tag: conversation.append(add_tag("assistant", "", unclose=True)) - return "\n".join(conversation), files @staticmethod def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str: - """ - Extract and format the output text from a ModelOutput. - Includes reasoning thoughts (wrapped in tags) and unescapes content. - """ text = "" - if include_thoughts and response.thoughts: text += f"{response.thoughts}\n" - if response.text: text += response.text else: text += str(response) - # Fix some escaped characters def _unescape_html(text_content: str) -> str: - """Unescape HTML entities only in non-code sections of the text.""" parts: list[str] = [] last_index = 0 for match in CODE_FENCE_RE.finditer(text_content): @@ -220,7 +188,6 @@ def _unescape_html(text_content: str) -> str: return "".join(parts) def _unescape_markdown(text_content: str) -> str: - """Remove backslash escapes for Markdown characters in non-code sections.""" parts: list[str] = [] last_index = 0 for match in CODE_FENCE_RE.finditer(text_content): From 7716c62a8df23b6557841e5e4cdd571b025d5e4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 3 Feb 2026 09:33:18 +0700 Subject: [PATCH 061/139] build: update dependencies --- pyproject.toml | 4 +-- uv.lock | 82 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1c30f8e..dc08571 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,10 +6,10 @@ readme = "README.md" requires-python = "==3.12.*" dependencies = [ "fastapi>=0.128.0", - "gemini-webapi>=1.17.3", + "gemini-webapi>=1.18.0", "lmdb>=1.7.5", "loguru>=0.7.3", - "orjson>=3.11.5", + "orjson>=3.11.7", "pydantic-settings[yaml]>=2.12.0", "uvicorn>=0.40.0", "uvloop>=0.22.1; sys_platform != 'win32'", diff --git a/uv.lock b/uv.lock index 50a73be..34a949c 100644 --- a/uv.lock +++ b/uv.lock @@ -106,10 +106,10 @@ dev = [ [package.metadata] requires-dist = [ { name = "fastapi", specifier = ">=0.128.0" }, - { name = "gemini-webapi", specifier = ">=1.17.3" }, + { name = "gemini-webapi", specifier = ">=1.18.0" }, { name = "lmdb", specifier = ">=1.7.5" }, { name = "loguru", specifier = ">=0.7.3" }, - { name = "orjson", specifier = ">=3.11.5" }, + { name = "orjson", specifier = ">=3.11.7" }, { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" }, { name = "uvicorn", specifier = ">=0.40.0" }, @@ -122,17 +122,17 @@ dev = [{ name = "ruff", specifier = ">=0.14.14" }] [[package]] name = "gemini-webapi" -version = "1.17.3" +version = "1.18.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "httpx" }, + { name = "httpx", extra = ["http2"] }, { name = "loguru" }, { name = "orjson" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/aa/74/1a31f3605250eb5cbcbfb15559c43b0d71734c8d286cfa9a7833841306e3/gemini_webapi-1.17.3.tar.gz", hash = "sha256:6201f9eaf5f562c5dc589d71c0edbba9e2eb8f780febbcf35307697bf474d577", size = 259418, upload-time = "2025-12-05T22:38:44.426Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c6/03/eb06536f287a8b7fb4808b00a60d9a9a3694f8a4079b77730325c639fbbe/gemini_webapi-1.18.0.tar.gz", hash = "sha256:0688a080fc3c95be55e723a66b2b69ec3ffcd58b07c50cf627d85d59d1181a86", size = 264630, upload-time = "2026-02-03T01:18:39.794Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/a3/a88ff45197dce68a81d92c8d40368e4c26f67faf3af3273357f3f71f5c3d/gemini_webapi-1.17.3-py3-none-any.whl", hash = "sha256:d83969b1fa3236f3010d856d191b35264c936ece81f1be4c1de53ec1cf0855c8", size = 56659, upload-time = "2025-12-05T22:38:42.93Z" }, + { url = "https://files.pythonhosted.org/packages/40/33/85f520f56faddd68442c7efe7086ff5593b213bd8fc3768835dbe610fd9b/gemini_webapi-1.18.0-py3-none-any.whl", hash = "sha256:2fe25b5f8185aba1ca109e1280ef3eb79e5bd8a81fba16e01fbc4a177b72362c", size = 61523, upload-time = "2026-02-03T01:18:38.322Z" }, ] [[package]] @@ -144,6 +144,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -172,6 +194,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + [[package]] name = "idna" version = "3.11" @@ -211,25 +247,25 @@ wheels = [ [[package]] name = "orjson" -version = "3.11.5" +version = "3.11.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347, upload-time = "2025-12-06T15:55:39.458Z" } +sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347, upload-time = "2025-12-06T15:54:22.061Z" }, - { url = "https://files.pythonhosted.org/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435, upload-time = "2025-12-06T15:54:23.615Z" }, - { url = "https://files.pythonhosted.org/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074, upload-time = "2025-12-06T15:54:24.694Z" }, - { url = "https://files.pythonhosted.org/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520, upload-time = "2025-12-06T15:54:26.185Z" }, - { url = "https://files.pythonhosted.org/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209, upload-time = "2025-12-06T15:54:27.264Z" }, - { url = "https://files.pythonhosted.org/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837, upload-time = "2025-12-06T15:54:28.75Z" }, - { url = "https://files.pythonhosted.org/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307, upload-time = "2025-12-06T15:54:29.856Z" }, - { url = "https://files.pythonhosted.org/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020, upload-time = "2025-12-06T15:54:31.024Z" }, - { url = "https://files.pythonhosted.org/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099, upload-time = "2025-12-06T15:54:32.196Z" }, - { url = "https://files.pythonhosted.org/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540, upload-time = "2025-12-06T15:54:33.361Z" }, - { url = "https://files.pythonhosted.org/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530, upload-time = "2025-12-06T15:54:34.6Z" }, - { url = "https://files.pythonhosted.org/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863, upload-time = "2025-12-06T15:54:35.801Z" }, - { url = "https://files.pythonhosted.org/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255, upload-time = "2025-12-06T15:54:37.209Z" }, - { url = "https://files.pythonhosted.org/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252, upload-time = "2025-12-06T15:54:38.401Z" }, - { url = "https://files.pythonhosted.org/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777, upload-time = "2025-12-06T15:54:39.515Z" }, + { url = "https://files.pythonhosted.org/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" }, + { url = "https://files.pythonhosted.org/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" }, + { url = "https://files.pythonhosted.org/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" }, + { url = "https://files.pythonhosted.org/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" }, + { url = "https://files.pythonhosted.org/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" }, + { url = "https://files.pythonhosted.org/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" }, + { url = "https://files.pythonhosted.org/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" }, + { url = "https://files.pythonhosted.org/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" }, + { url = "https://files.pythonhosted.org/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" }, + { url = "https://files.pythonhosted.org/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" }, + { url = "https://files.pythonhosted.org/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" }, + { url = "https://files.pythonhosted.org/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" }, + { url = "https://files.pythonhosted.org/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" }, + { url = "https://files.pythonhosted.org/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" }, + { url = "https://files.pythonhosted.org/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" }, ] [[package]] From 61672cc46948a501a0f2af3761eb231e40ec6831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 3 Feb 2026 11:44:54 +0700 Subject: [PATCH 062/139] Refactor: Use `strip_system_hints` to standardize the content. --- app/services/client.py | 4 +++- app/services/lmdb.py | 22 +++------------------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index a35146f..89ad3ba 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -127,7 +127,9 @@ async def process_message( args_text = call.function.arguments.strip() try: parsed_args = orjson.loads(args_text) - args_text = orjson.dumps(parsed_args).decode("utf-8") + args_text = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode( + "utf-8" + ) except orjson.JSONDecodeError: pass tool_blocks.append( diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 6ab2302..d5424e0 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -12,12 +12,9 @@ from ..models import ContentItem, ConversationInStore, Message from ..utils import g_config from ..utils.helper import ( - CODE_BLOCK_HINT, - CODE_HINT_STRIPPED, - XML_HINT_STRIPPED, - XML_WRAP_HINT, extract_tool_calls, remove_tool_call_blocks, + strip_system_hints, ) from ..utils.singleton import Singleton @@ -41,14 +38,7 @@ def _hash_message(message: Message) -> str: normalized = content.replace("\r\n", "\n") normalized = LMDBConversationStore.remove_think_tags(normalized) - - for hint in [ - XML_WRAP_HINT, - XML_HINT_STRIPPED, - CODE_BLOCK_HINT, - CODE_HINT_STRIPPED, - ]: - normalized = normalized.replace(hint, "") + normalized = strip_system_hints(normalized) if message.tool_calls: normalized = remove_tool_call_blocks(normalized) @@ -70,13 +60,7 @@ def _hash_message(message: Message) -> str: if text_val: text_val = text_val.replace("\r\n", "\n") text_val = LMDBConversationStore.remove_think_tags(text_val) - for hint in [ - XML_WRAP_HINT, - XML_HINT_STRIPPED, - CODE_BLOCK_HINT, - CODE_HINT_STRIPPED, - ]: - text_val = text_val.replace(hint, "") + text_val = strip_system_hints(text_val) text_val = remove_tool_call_blocks(text_val).strip() if text_val: text_parts.append(text_val) From cc0b13f40b0ed875ba9f1274101c7c9be49e8e52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 3 Feb 2026 19:04:01 +0700 Subject: [PATCH 063/139] Refactor: Only inject code block hint if NOT a structured response request --- app/server/chat.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index b8f611d..608b52f 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -477,6 +477,7 @@ def _prepare_messages_for_model( tool_choice: str | ToolChoiceFunction | None, extra_instructions: list[str] | None = None, inject_system_defaults: bool = True, + is_structured: bool = False, ) -> list[Message]: """Return a copy of messages enriched with tool instructions when needed.""" prepared = [msg.model_copy(deep=True) for msg in source_messages] @@ -505,7 +506,8 @@ def _prepare_messages_for_model( f"Applied {len(extra_instructions)} extra instructions for tool/structured output." ) - if not _conversation_has_code_hint(prepared): + # Only inject code block hint if NOT a structured response request + if not is_structured and not _conversation_has_code_hint(prepared): instructions.append(CODE_BLOCK_HINT) logger.debug("Injected default code block hint for Gemini conversation.") @@ -1326,7 +1328,11 @@ async def create_chat_completion( # This ensures that server-injected system instructions are part of the history msgs = _prepare_messages_for_model( - request.messages, request.tools, request.tool_choice, extra_instr + request.messages, + request.tools, + request.tool_choice, + extra_instr, + is_structured=structured_requirement is not None, ) session, client, remain = await _find_reusable_session(db, pool, model, msgs) @@ -1338,7 +1344,12 @@ async def create_chat_completion( # For reused sessions, we only need to process the remaining messages. # We don't re-inject system defaults to avoid duplicating instructions already in history. input_msgs = _prepare_messages_for_model( - remain, request.tools, request.tool_choice, extra_instr, False + remain, + request.tools, + request.tool_choice, + extra_instr, + False, + is_structured=structured_requirement is not None, ) if len(input_msgs) == 1: m_input, files = await GeminiClientWrapper.process_message( @@ -1492,7 +1503,11 @@ async def create_response( ) messages = _prepare_messages_for_model( - conv_messages, standard_tools or None, model_tool_choice, extra_instr or None + conv_messages, + standard_tools or None, + model_tool_choice, + extra_instr or None, + is_structured=struct_req is not None, ) pool, db = GeminiClientPool(), LMDBConversationStore() try: @@ -1502,7 +1517,14 @@ async def create_response( session, client, remain = await _find_reusable_session(db, pool, model, messages) if session: - msgs = _prepare_messages_for_model(remain, request.tools, request.tool_choice, None, False) + msgs = _prepare_messages_for_model( + remain, + request.tools, + request.tool_choice, + None, + False, + is_structured=struct_req is not None, + ) if not msgs: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.") m_input, files = ( From 6b90e5d15d942c96ccfd272f9cb9ef23e4f7ac31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 3 Feb 2026 20:43:18 +0700 Subject: [PATCH 064/139] Refactor: Remove the code block hint entirely --- app/server/chat.py | 56 +++++---------------------------------------- app/utils/helper.py | 16 ------------- 2 files changed, 6 insertions(+), 66 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 608b52f..43f5e12 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -41,15 +41,12 @@ from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore from ..utils import g_config from ..utils.helper import ( - CODE_BLOCK_HINT, - CODE_HINT_STRIPPED, - CONTROL_TOKEN_RE, XML_HINT_STRIPPED, XML_WRAP_HINT, estimate_tokens, extract_image_dimensions, extract_tool_calls, - strip_code_fence, + strip_system_hints, text_from_message, ) from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key @@ -225,10 +222,9 @@ def _process_llm_output( if structured_requirement: cleaned_for_json = LMDBConversationStore.remove_think_tags(visible_output) - json_text = strip_code_fence(cleaned_for_json or "") - if json_text: + if cleaned_for_json: try: - structured_payload = orjson.loads(json_text) + structured_payload = orjson.loads(cleaned_for_json) canonical_output = orjson.dumps(structured_payload).decode("utf-8") visible_output = canonical_output storage_output = canonical_output @@ -450,27 +446,6 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None: return -def _conversation_has_code_hint(messages: list[Message]) -> bool: - """Return True if any system message already includes the code block hint.""" - for msg in messages: - if msg.role != "system" or msg.content is None: - continue - - if isinstance(msg.content, str): - if CODE_HINT_STRIPPED in msg.content: - return True - continue - - if isinstance(msg.content, list): - for part in msg.content: - if getattr(part, "type", None) != "text": - continue - if part.text and CODE_HINT_STRIPPED in part.text: - return True - - return False - - def _prepare_messages_for_model( source_messages: list[Message], tools: list[Tool] | None, @@ -506,11 +481,6 @@ def _prepare_messages_for_model( f"Applied {len(extra_instructions)} extra instructions for tool/structured output." ) - # Only inject code block hint if NOT a structured response request - if not is_structured and not _conversation_has_code_hint(prepared): - instructions.append(CODE_BLOCK_HINT) - logger.debug("Injected default code block hint for Gemini conversation.") - if not instructions: if tools and tool_choice != "none": _append_xml_hint_to_last_user_message(prepared) @@ -791,7 +761,7 @@ class StreamingOutputFilter: 2. ChatML tool blocks: <|im_start|>tool\n...<|im_end|> 3. ChatML role headers: <|im_start|>role\n (only suppresses the header, keeps content) 4. Control tokens: <|im_start|>, <|im_end|> - 5. System instructions/hints: XML_WRAP_HINT, CODE_BLOCK_HINT, etc. + 5. System instructions/hints. """ def __init__(self): @@ -805,12 +775,6 @@ def __init__(self): self.XML_END = "```" self.TAG_START = "<|im_start|>" self.TAG_END = "<|im_end|>" - self.SYSTEM_HINTS = [ - XML_WRAP_HINT, - XML_HINT_STRIPPED, - CODE_BLOCK_HINT, - CODE_HINT_STRIPPED, - ] def process(self, chunk: str) -> str: self.buffer += chunk @@ -906,11 +870,7 @@ def process(self, chunk: str) -> str: break # Final pass: filter out system hints from the text to be yielded - for hint in self.SYSTEM_HINTS: - if hint in to_yield: - to_yield = to_yield.replace(hint, "") - - return to_yield + return strip_system_hints(to_yield) def flush(self) -> str: # If we are stuck in a tool block or role header at the end, @@ -922,11 +882,7 @@ def flush(self) -> str: self.buffer = "" # Filter out any orphaned/partial control tokens or hints - final_text = CONTROL_TOKEN_RE.sub("", final_text) - for hint in self.SYSTEM_HINTS: - final_text = final_text.replace(hint, "") - - return final_text.strip() + return strip_system_hints(final_text) # --- Response Builders & Streaming --- diff --git a/app/utils/helper.py b/app/utils/helper.py index 38b6400..1281f9b 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -19,19 +19,12 @@ '```xml\n{"arg": "value"}\n```\n' "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n" ) -CODE_BLOCK_HINT = ( - "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced " - "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n" - "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n" -) TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) TOOL_CALL_RE = re.compile( r"(.*?)", re.DOTALL | re.IGNORECASE ) -JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE) CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") XML_HINT_STRIPPED = XML_WRAP_HINT.strip() -CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip() def add_tag(role: str, content: str, unclose: bool = False) -> str: @@ -101,14 +94,6 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: return path -def strip_code_fence(text: str) -> str: - """Remove surrounding ```json fences if present.""" - match = JSON_FENCE_RE.match(text.strip()) - if match: - return match.group(1).strip() - return text.strip() - - def strip_tagged_blocks(text: str) -> str: """Remove <|im_start|>role ... <|im_end|> sections. - tool blocks are removed entirely (including content). @@ -166,7 +151,6 @@ def strip_system_hints(text: str) -> str: return text cleaned = strip_tagged_blocks(text) cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "") - cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "") cleaned = CONTROL_TOKEN_RE.sub("", cleaned) return cleaned.strip() From 553bd94b4631832694de40fe7246063ab359fc46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 3 Feb 2026 20:52:54 +0700 Subject: [PATCH 065/139] Refactor: Remove the code block hint entirely --- app/server/chat.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 43f5e12..0bb2722 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -452,7 +452,6 @@ def _prepare_messages_for_model( tool_choice: str | ToolChoiceFunction | None, extra_instructions: list[str] | None = None, inject_system_defaults: bool = True, - is_structured: bool = False, ) -> list[Message]: """Return a copy of messages enriched with tool instructions when needed.""" prepared = [msg.model_copy(deep=True) for msg in source_messages] @@ -1288,7 +1287,6 @@ async def create_chat_completion( request.tools, request.tool_choice, extra_instr, - is_structured=structured_requirement is not None, ) session, client, remain = await _find_reusable_session(db, pool, model, msgs) @@ -1305,7 +1303,6 @@ async def create_chat_completion( request.tool_choice, extra_instr, False, - is_structured=structured_requirement is not None, ) if len(input_msgs) == 1: m_input, files = await GeminiClientWrapper.process_message( @@ -1463,7 +1460,6 @@ async def create_response( standard_tools or None, model_tool_choice, extra_instr or None, - is_structured=struct_req is not None, ) pool, db = GeminiClientPool(), LMDBConversationStore() try: @@ -1479,7 +1475,6 @@ async def create_response( request.tool_choice, None, False, - is_structured=struct_req is not None, ) if not msgs: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.") From fd767dad79266486f14bd5610054220881bc9a73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 3 Feb 2026 22:40:38 +0700 Subject: [PATCH 066/139] Refactor: fix missing whitespace in the streaming response. --- app/utils/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 1281f9b..a8b40aa 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -152,7 +152,7 @@ def strip_system_hints(text: str) -> str: cleaned = strip_tagged_blocks(text) cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "") cleaned = CONTROL_TOKEN_RE.sub("", cleaned) - return cleaned.strip() + return cleaned def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]: From 4beb33bb5a27368f0a14c769c213aea44103c100 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 4 Feb 2026 07:24:13 +0700 Subject: [PATCH 067/139] Refactor: remove unnecessary code --- app/services/lmdb.py | 13 +------------ app/utils/helper.py | 2 +- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index d5424e0..8dc3722 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -14,7 +14,6 @@ from ..utils.helper import ( extract_tool_calls, remove_tool_call_blocks, - strip_system_hints, ) from ..utils.singleton import Singleton @@ -36,17 +35,8 @@ def _hash_message(message: Message) -> str: core_data["content"] = None elif isinstance(content, str): normalized = content.replace("\r\n", "\n") - normalized = LMDBConversationStore.remove_think_tags(normalized) - normalized = strip_system_hints(normalized) - - if message.tool_calls: - normalized = remove_tool_call_blocks(normalized) - else: - temp_text, _extracted = extract_tool_calls(normalized) - normalized = temp_text - - normalized = normalized.strip() + normalized = remove_tool_call_blocks(normalized).strip() core_data["content"] = normalized if normalized else None elif isinstance(content, list): text_parts = [] @@ -60,7 +50,6 @@ def _hash_message(message: Message) -> str: if text_val: text_val = text_val.replace("\r\n", "\n") text_val = LMDBConversationStore.remove_think_tags(text_val) - text_val = strip_system_hints(text_val) text_val = remove_tool_call_blocks(text_val).strip() if text_val: text_parts.append(text_val) diff --git a/app/utils/helper.py b/app/utils/helper.py index a8b40aa..b6bb5cb 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -16,7 +16,7 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} XML_WRAP_HINT = ( "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n" - '```xml\n{"arg": "value"}\n```\n' + '```xml\n{"argument": "value"}\n```\n' "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n" ) TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) From 6b8dd4e5b893e689521efe93261825e95f1a1a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 4 Feb 2026 09:35:21 +0700 Subject: [PATCH 068/139] Refactor: Update `StreamingOutputFilter` logic to improve handling of streaming responses --- app/server/chat.py | 187 ++++++++++++++++++++++++-------------------- app/utils/helper.py | 16 +++- 2 files changed, 115 insertions(+), 88 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 0bb2722..87f29a6 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -41,6 +41,8 @@ from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore from ..utils import g_config from ..utils.helper import ( + XML_HINT_LINE_END, + XML_HINT_LINE_START, XML_HINT_STRIPPED, XML_WRAP_HINT, estimate_tokens, @@ -755,133 +757,146 @@ async def _send_with_split( class StreamingOutputFilter: """ - Enhanced streaming filter that suppresses: - 1. XML tool call blocks: ```xml ... ``` - 2. ChatML tool blocks: <|im_start|>tool\n...<|im_end|> - 3. ChatML role headers: <|im_start|>role\n (only suppresses the header, keeps content) - 4. Control tokens: <|im_start|>, <|im_end|> - 5. System instructions/hints. + Simplified State Machine filter to suppress technical markers, tool calls, and system hints. + States: NORMAL, IN_XML, IN_TAG, IN_BLOCK, IN_HINT """ def __init__(self): self.buffer = "" - self.in_xml_tool = False - self.in_tagged_block = False - self.in_role_header = False + self.state = "NORMAL" self.current_role = "" + self.block_buffer = "" self.XML_START = "```xml" self.XML_END = "```" self.TAG_START = "<|im_start|>" self.TAG_END = "<|im_end|>" + self.HINT_START = f"\n{XML_HINT_LINE_START}" if XML_HINT_LINE_START else "" + self.HINT_END = XML_HINT_LINE_END + + self.WATCH_PREFIXES = [self.XML_START, self.TAG_START, self.TAG_END] + if self.HINT_START: + self.WATCH_PREFIXES.append(self.HINT_START) def process(self, chunk: str) -> str: self.buffer += chunk - to_yield = "" + output = [] while self.buffer: - if self.in_xml_tool: + if self.state == "NORMAL": + xml_idx = self.buffer.find(self.XML_START) + tag_idx = self.buffer.find(self.TAG_START) + end_idx = self.buffer.find(self.TAG_END) + hint_idx = self.buffer.find(self.HINT_START) + + indices = [ + (i, t) + for i, t in [ + (xml_idx, "XML"), + (tag_idx, "TAG"), + (end_idx, "END"), + (hint_idx, "HINT"), + ] + if i != -1 + ] + + if not indices: + keep_len = 0 + for p in self.WATCH_PREFIXES: + for i in range(len(p) - 1, 0, -1): + if self.buffer.endswith(p[:i]): + keep_len = max(keep_len, i) + break + + yield_len = len(self.buffer) - keep_len + if yield_len > 0: + output.append(self.buffer[:yield_len]) + self.buffer = self.buffer[yield_len:] + break + + indices.sort() + idx, m_type = indices[0] + output.append(self.buffer[:idx]) + self.buffer = self.buffer[idx:] + + if m_type == "XML": + self.state = "IN_XML" + self.block_buffer = "" + self.buffer = self.buffer[len(self.XML_START) :] + elif m_type == "TAG": + self.state = "IN_TAG" + self.buffer = self.buffer[len(self.TAG_START) :] + elif m_type == "END": + self.buffer = self.buffer[len(self.TAG_END) :] + elif m_type == "HINT": + self.state = "IN_HINT" + self.buffer = self.buffer[len(self.HINT_START) :] + + elif self.state == "IN_HINT": + end_idx = self.buffer.find(self.HINT_END) + if end_idx != -1: + self.buffer = self.buffer[end_idx + len(self.HINT_END) :] + self.state = "NORMAL" + else: + self.buffer = "" + break + + elif self.state == "IN_XML": end_idx = self.buffer.find(self.XML_END) if end_idx != -1: + content = self.block_buffer + self.buffer[:end_idx] + if " 0: - to_yield += self.buffer[:yield_len] + output.append(self.buffer[:yield_len]) self.buffer = self.buffer[yield_len:] - break - else: - # Outside any special block. Look for starts. - earliest_idx = -1 - match_type = "" - - xml_idx = self.buffer.find(self.XML_START) - if xml_idx != -1: - earliest_idx = xml_idx - match_type = "xml" - - tag_s_idx = self.buffer.find(self.TAG_START) - if tag_s_idx != -1: - if earliest_idx == -1 or tag_s_idx < earliest_idx: - earliest_idx = tag_s_idx - match_type = "tag_start" - - tag_e_idx = self.buffer.find(self.TAG_END) - if tag_e_idx != -1: - if earliest_idx == -1 or tag_e_idx < earliest_idx: - earliest_idx = tag_e_idx - match_type = "tag_end" - - if earliest_idx != -1: - # Yield text before the match - to_yield += self.buffer[:earliest_idx] - self.buffer = self.buffer[earliest_idx:] - - if match_type == "xml": - self.in_xml_tool = True - self.buffer = self.buffer[len(self.XML_START) :] - elif match_type == "tag_start": - self.in_role_header = True - self.buffer = self.buffer[len(self.TAG_START) :] - elif match_type == "tag_end": - # Orphaned end tag, just skip it - self.buffer = self.buffer[len(self.TAG_END) :] - continue - else: - # Check for prefixes - prefixes = [self.XML_START, self.TAG_START, self.TAG_END] - max_keep = 0 - for p in prefixes: - for i in range(len(p) - 1, 0, -1): - if self.buffer.endswith(p[:i]): - max_keep = max(max_keep, i) - break - - yield_len = len(self.buffer) - max_keep - if yield_len > 0: - to_yield += self.buffer[:yield_len] - self.buffer = self.buffer[yield_len:] + else: + self.buffer = "" break - # Final pass: filter out system hints from the text to be yielded - return strip_system_hints(to_yield) + return "".join(output) def flush(self) -> str: - # If we are stuck in a tool block or role header at the end, - # it usually means malformed output. - if self.in_xml_tool or (self.in_tagged_block and self.current_role == "tool"): - return "" + res = "" + if self.state == "IN_XML": + if "") XML_HINT_STRIPPED = XML_WRAP_HINT.strip() +_hint_lines = [line.strip() for line in XML_WRAP_HINT.split("\n") if line.strip()] +XML_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" +XML_HINT_LINE_END = _hint_lines[-1] if _hint_lines else "" def add_tag(role: str, content: str, unclose: bool = False) -> str: @@ -149,8 +152,17 @@ def strip_system_hints(text: str) -> str: """Remove system-level hint text from a given string.""" if not text: return text - cleaned = strip_tagged_blocks(text) - cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "") + + # Remove the full hints first + cleaned = text.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "") + + # Remove fragments using derived constants + if XML_HINT_LINE_START: + cleaned = re.sub(rf"\n?{re.escape(XML_HINT_LINE_START)}:?\s*", "", cleaned) + if XML_HINT_LINE_END: + cleaned = re.sub(rf"\s*{re.escape(XML_HINT_LINE_END)}\.?\n?", "", cleaned) + + cleaned = strip_tagged_blocks(cleaned) cleaned = CONTROL_TOKEN_RE.sub("", cleaned) return cleaned From d86ae59e5f1037bdbc79c9a84624116f23f6a302 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 4 Feb 2026 20:17:32 +0700 Subject: [PATCH 069/139] Refactor: Adjust function call format to prevent streaming issues Caused by Gemini Web's post-processing mechanism. --- app/server/chat.py | 62 +++++++++++++++++++++++------------------- app/services/client.py | 16 +++++------ app/utils/helper.py | 12 ++++---- 3 files changed, 47 insertions(+), 43 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 87f29a6..ed0d731 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -367,19 +367,17 @@ def _build_tool_prompt( ) lines.append( - "When you decide to call a tool you MUST respond with nothing except a single fenced block exactly like the template below." + "When you decide to call a tool you MUST respond with nothing except a single [function_calls] block exactly like the template below." ) + lines.append("Do not add text before or after it.") + lines.append("[function_calls]") + lines.append('[call:tool_name]{"argument": "value"}[/call]') + lines.append("[/function_calls]") lines.append( - "The fenced block MUST use ```xml as the opening fence and ``` as the closing fence. Do not add text before or after it." + "Use double quotes for JSON keys and values. If you omit the block or include any extra text, the system will assume you are NOT calling a tool and your request will fail." ) - lines.append("```xml") - lines.append('{"argument": "value"}') - lines.append("```") lines.append( - "Use double quotes for JSON keys and values. If you omit the fenced block or include any extra text, the system will assume you are NOT calling a tool and your request will fail." - ) - lines.append( - "If multiple tool calls are required, include multiple entries inside the same fenced block. Without a tool call, reply normally and do NOT emit any ```xml fence." + "If multiple tool calls are required, include multiple [call:...]...[/call] entries inside the same [function_calls] block. Without a tool call, reply normally and do NOT emit any [function_calls] tag." ) return "\n".join(lines) @@ -757,8 +755,8 @@ async def _send_with_split( class StreamingOutputFilter: """ - Simplified State Machine filter to suppress technical markers, tool calls, and system hints. - States: NORMAL, IN_XML, IN_TAG, IN_BLOCK, IN_HINT + State Machine filter to suppress technical markers, tool calls, and system hints. + Handles fragmentation where markers are split across multiple chunks. """ def __init__(self): @@ -767,12 +765,13 @@ def __init__(self): self.current_role = "" self.block_buffer = "" - self.XML_START = "```xml" - self.XML_END = "```" + self.XML_START = "[function_calls]" + self.XML_END = "[/function_calls]" self.TAG_START = "<|im_start|>" self.TAG_END = "<|im_end|>" self.HINT_START = f"\n{XML_HINT_LINE_START}" if XML_HINT_LINE_START else "" self.HINT_END = XML_HINT_LINE_END + self.TOOL_START = "[call:" self.WATCH_PREFIXES = [self.XML_START, self.TAG_START, self.TAG_END] if self.HINT_START: @@ -787,7 +786,7 @@ def process(self, chunk: str) -> str: xml_idx = self.buffer.find(self.XML_START) tag_idx = self.buffer.find(self.TAG_START) end_idx = self.buffer.find(self.TAG_END) - hint_idx = self.buffer.find(self.HINT_START) + hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1 indices = [ (i, t) @@ -801,13 +800,13 @@ def process(self, chunk: str) -> str: ] if not indices: + # Guard against split start markers keep_len = 0 for p in self.WATCH_PREFIXES: for i in range(len(p) - 1, 0, -1): if self.buffer.endswith(p[:i]): keep_len = max(keep_len, i) break - yield_len = len(self.buffer) - keep_len if yield_len > 0: output.append(self.buffer[:yield_len]) @@ -838,20 +837,24 @@ def process(self, chunk: str) -> str: self.buffer = self.buffer[end_idx + len(self.HINT_END) :] self.state = "NORMAL" else: - self.buffer = "" + # Keep end of buffer to avoid missing split HINT_END + keep_len = len(self.HINT_END) - 1 + if len(self.buffer) > keep_len: + self.buffer = self.buffer[-keep_len:] break elif self.state == "IN_XML": end_idx = self.buffer.find(self.XML_END) if end_idx != -1: - content = self.block_buffer + self.buffer[:end_idx] - if " keep_len: + self.block_buffer += self.buffer[:-keep_len] + self.buffer = self.buffer[-keep_len:] break elif self.state == "IN_TAG": @@ -873,21 +876,24 @@ def process(self, chunk: str) -> str: self.state = "NORMAL" self.current_role = "" else: + # Yield safe part and keep potential split TAG_END + keep_len = len(self.TAG_END) - 1 if self.current_role != "tool": - yield_len = len(self.buffer) - (len(self.TAG_END) - 1) - if yield_len > 0: - output.append(self.buffer[:yield_len]) - self.buffer = self.buffer[yield_len:] + if len(self.buffer) > keep_len: + output.append(self.buffer[:-keep_len]) + self.buffer = self.buffer[-keep_len:] + break else: - self.buffer = "" - break + if len(self.buffer) > keep_len: + self.buffer = self.buffer[-keep_len:] + break return "".join(output) def flush(self) -> str: res = "" if self.state == "IN_XML": - if "])") +ESC_SYMBOLS_RE = re.compile(r"\\(?=[\\\[\]{}()<>`*_#~+.:!&^$|-])") CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL) FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", @@ -132,12 +132,10 @@ async def process_message( ) except orjson.JSONDecodeError: pass - tool_blocks.append( - f'{args_text}' - ) + tool_blocks.append(f"[call:{call.function.name}]{args_text}[/call]") if tool_blocks: - tool_section = "```xml\n" + "".join(tool_blocks) + "\n```" + tool_section = "[function_calls]\n" + "".join(tool_blocks) + "\n[/function_calls]" text_fragments.append(tool_section) model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None) @@ -189,22 +187,22 @@ def _unescape_html(text_content: str) -> str: parts.append(HTML_ESCAPE_RE.sub(lambda m: html.unescape(m.group(0)), tail)) return "".join(parts) - def _unescape_markdown(text_content: str) -> str: + def _unescape_symbols(text_content: str) -> str: parts: list[str] = [] last_index = 0 for match in CODE_FENCE_RE.finditer(text_content): non_code = text_content[last_index : match.start()] if non_code: - parts.append(MARKDOWN_ESCAPE_RE.sub("", non_code)) + parts.append(ESC_SYMBOLS_RE.sub("", non_code)) parts.append(match.group(0)) last_index = match.end() tail = text_content[last_index:] if tail: - parts.append(MARKDOWN_ESCAPE_RE.sub("", tail)) + parts.append(ESC_SYMBOLS_RE.sub("", tail)) return "".join(parts) text = _unescape_html(text) - text = _unescape_markdown(text) + text = _unescape_symbols(text) def extract_file_path_from_display_text(text_content: str) -> str | None: match = re.match(FILE_PATH_PATTERN, text_content) diff --git a/app/utils/helper.py b/app/utils/helper.py index 78494a3..5ca812c 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -15,14 +15,14 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} XML_WRAP_HINT = ( - "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n" - '```xml\n{"argument": "value"}\n```\n' - "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n" + "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n" + '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n' + "Do not surround the block with any other text or whitespace; otherwise the call will be ignored.\n" ) -TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) -TOOL_CALL_RE = re.compile( - r"(.*?)", re.DOTALL | re.IGNORECASE +TOOL_BLOCK_RE = re.compile( + r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE ) +TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE) CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") XML_HINT_STRIPPED = XML_WRAP_HINT.strip() _hint_lines = [line.strip() for line in XML_WRAP_HINT.split("\n") if line.strip()] From db39ad10637754ca4f109614e499e34827429b0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 4 Feb 2026 20:27:40 +0700 Subject: [PATCH 070/139] Refactor: Adjust function call format to prevent streaming issues Caused by Gemini Web's post-processing mechanism. --- app/server/chat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index ed0d731..15f59aa 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -377,7 +377,10 @@ def _build_tool_prompt( "Use double quotes for JSON keys and values. If you omit the block or include any extra text, the system will assume you are NOT calling a tool and your request will fail." ) lines.append( - "If multiple tool calls are required, include multiple [call:...]...[/call] entries inside the same [function_calls] block. Without a tool call, reply normally and do NOT emit any [function_calls] tag." + "To call multiple tools, list each [call:tool_name]...[/call] entry sequentially within a single [function_calls] block." + ) + lines.append( + "If no tool call is needed, provide a normal response and DO NOT use the [function_calls] tag." ) return "\n".join(lines) From 556a638fc1d34fc377a593fc2e98acd4d9a0ea6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 4 Feb 2026 20:52:44 +0700 Subject: [PATCH 071/139] Refactor: Adjust function call format to prevent streaming issues Caused by Gemini Web's post-processing mechanism. --- app/services/client.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index bc6c297..21814e5 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -117,9 +117,7 @@ async def process_message( if message.role == "tool": tool_name = message.name or "unknown" combined_content = "\n".join(text_fragments).strip() or "{}" - text_fragments = [ - f'{combined_content}' - ] + text_fragments = [f"[response:{tool_name}]{combined_content}[/response]"] if message.tool_calls: tool_blocks: list[str] = [] From d5fec7a04119ca6b668a4e98d9c6463f11a92eb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 5 Feb 2026 07:12:34 +0700 Subject: [PATCH 072/139] Refactor: Adjust function call format to prevent streaming issues Caused by Gemini Web's post-processing mechanism. --- app/server/chat.py | 67 +++++++++++++++++++++++---------------------- app/utils/helper.py | 34 ++++++++++++++--------- 2 files changed, 55 insertions(+), 46 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 15f59aa..e56c926 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -41,10 +41,10 @@ from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore from ..utils import g_config from ..utils.helper import ( - XML_HINT_LINE_END, - XML_HINT_LINE_START, - XML_HINT_STRIPPED, - XML_WRAP_HINT, + TOOL_HINT_LINE_END, + TOOL_HINT_LINE_START, + TOOL_HINT_STRIPPED, + TOOL_WRAP_HINT, estimate_tokens, extract_image_dimensions, extract_tool_calls, @@ -423,15 +423,15 @@ def _build_image_generation_instruction( return "\n\n".join(instructions) -def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None: - """Ensure the last user message carries the XML wrap hint.""" +def _append_tool_hint_to_last_user_message(messages: list[Message]) -> None: + """Ensure the last user message carries the tool wrap hint.""" for msg in reversed(messages): if msg.role != "user" or msg.content is None: continue if isinstance(msg.content, str): - if XML_HINT_STRIPPED not in msg.content: - msg.content = f"{msg.content}\n{XML_WRAP_HINT}" + if TOOL_HINT_STRIPPED not in msg.content: + msg.content = f"{msg.content}\n{TOOL_WRAP_HINT}" return if isinstance(msg.content, list): @@ -439,12 +439,12 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None: if getattr(part, "type", None) != "text": continue text_value = part.text or "" - if XML_HINT_STRIPPED in text_value: + if TOOL_HINT_STRIPPED in text_value: return - part.text = f"{text_value}\n{XML_WRAP_HINT}" + part.text = f"{text_value}\n{TOOL_WRAP_HINT}" return - messages_text = XML_WRAP_HINT.strip() + messages_text = TOOL_WRAP_HINT.strip() msg.content.append(ContentItem(type="text", text=messages_text)) return @@ -485,19 +485,20 @@ def _prepare_messages_for_model( if not instructions: if tools and tool_choice != "none": - _append_xml_hint_to_last_user_message(prepared) + _append_tool_hint_to_last_user_message(prepared) return prepared combined_instructions = "\n\n".join(instructions) if prepared and prepared[0].role == "system" and isinstance(prepared[0].content, str): existing = prepared[0].content or "" - separator = "\n\n" if existing else "" - prepared[0].content = f"{existing}{separator}{combined_instructions}" + if combined_instructions not in existing: + separator = "\n\n" if existing else "" + prepared[0].content = f"{existing}{separator}{combined_instructions}" else: prepared.insert(0, Message(role="system", content=combined_instructions)) if tools and tool_choice != "none": - _append_xml_hint_to_last_user_message(prepared) + _append_tool_hint_to_last_user_message(prepared) return prepared @@ -768,15 +769,15 @@ def __init__(self): self.current_role = "" self.block_buffer = "" - self.XML_START = "[function_calls]" - self.XML_END = "[/function_calls]" + self.TOOL_START = "[function_calls]" + self.TOOL_END = "[/function_calls]" self.TAG_START = "<|im_start|>" self.TAG_END = "<|im_end|>" - self.HINT_START = f"\n{XML_HINT_LINE_START}" if XML_HINT_LINE_START else "" - self.HINT_END = XML_HINT_LINE_END - self.TOOL_START = "[call:" + self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else "" + self.HINT_END = TOOL_HINT_LINE_END + self.TOOL_PREFIX = "[call:" - self.WATCH_PREFIXES = [self.XML_START, self.TAG_START, self.TAG_END] + self.WATCH_PREFIXES = [self.TOOL_START, self.TAG_START, self.TAG_END] if self.HINT_START: self.WATCH_PREFIXES.append(self.HINT_START) @@ -786,7 +787,7 @@ def process(self, chunk: str) -> str: while self.buffer: if self.state == "NORMAL": - xml_idx = self.buffer.find(self.XML_START) + tool_idx = self.buffer.find(self.TOOL_START) tag_idx = self.buffer.find(self.TAG_START) end_idx = self.buffer.find(self.TAG_END) hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1 @@ -794,7 +795,7 @@ def process(self, chunk: str) -> str: indices = [ (i, t) for i, t in [ - (xml_idx, "XML"), + (tool_idx, "TOOL"), (tag_idx, "TAG"), (end_idx, "END"), (hint_idx, "HINT"), @@ -821,10 +822,10 @@ def process(self, chunk: str) -> str: output.append(self.buffer[:idx]) self.buffer = self.buffer[idx:] - if m_type == "XML": - self.state = "IN_XML" + if m_type == "TOOL": + self.state = "IN_TOOL" self.block_buffer = "" - self.buffer = self.buffer[len(self.XML_START) :] + self.buffer = self.buffer[len(self.TOOL_START) :] elif m_type == "TAG": self.state = "IN_TAG" self.buffer = self.buffer[len(self.TAG_START) :] @@ -846,15 +847,15 @@ def process(self, chunk: str) -> str: self.buffer = self.buffer[-keep_len:] break - elif self.state == "IN_XML": - end_idx = self.buffer.find(self.XML_END) + elif self.state == "IN_TOOL": + end_idx = self.buffer.find(self.TOOL_END) if end_idx != -1: self.block_buffer += self.buffer[:end_idx] - self.buffer = self.buffer[end_idx + len(self.XML_END) :] + self.buffer = self.buffer[end_idx + len(self.TOOL_END) :] self.state = "NORMAL" else: # Accumulate and keep potential split end marker - keep_len = len(self.XML_END) - 1 + keep_len = len(self.TOOL_END) - 1 if len(self.buffer) > keep_len: self.block_buffer += self.buffer[:-keep_len] self.buffer = self.buffer[-keep_len:] @@ -895,9 +896,9 @@ def process(self, chunk: str) -> str: def flush(self) -> str: res = "" - if self.state == "IN_XML": - if self.TOOL_START not in self.block_buffer.lower(): - res = f"{self.XML_START}{self.block_buffer}" + if self.state == "IN_TOOL": + if self.TOOL_PREFIX not in self.block_buffer.lower(): + res = f"{self.TOOL_START}{self.block_buffer}" elif self.state == "IN_BLOCK" and self.current_role != "tool": res = self.buffer elif self.state == "NORMAL": diff --git a/app/utils/helper.py b/app/utils/helper.py index 5ca812c..99c3d84 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -14,7 +14,7 @@ from ..models import FunctionCall, Message, ToolCall VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} -XML_WRAP_HINT = ( +TOOL_WRAP_HINT = ( "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n" '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n' "Do not surround the block with any other text or whitespace; otherwise the call will be ignored.\n" @@ -24,10 +24,10 @@ ) TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE) CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") -XML_HINT_STRIPPED = XML_WRAP_HINT.strip() -_hint_lines = [line.strip() for line in XML_WRAP_HINT.split("\n") if line.strip()] -XML_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" -XML_HINT_LINE_END = _hint_lines[-1] if _hint_lines else "" +TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() +_hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] +TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" +TOOL_HINT_LINE_END = _hint_lines[-1] if _hint_lines else "" def add_tag(role: str, content: str, unclose: bool = False) -> str: @@ -154,13 +154,18 @@ def strip_system_hints(text: str) -> str: return text # Remove the full hints first - cleaned = text.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "") + cleaned = text.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "") - # Remove fragments using derived constants - if XML_HINT_LINE_START: - cleaned = re.sub(rf"\n?{re.escape(XML_HINT_LINE_START)}:?\s*", "", cleaned) - if XML_HINT_LINE_END: - cleaned = re.sub(rf"\s*{re.escape(XML_HINT_LINE_END)}\.?\n?", "", cleaned) + # Remove fragments or multi-line blocks using derived constants + if TOOL_HINT_LINE_START and TOOL_HINT_LINE_END: + # Match from the start line to the end line, inclusive, handling internal modifications + pattern = rf"\n?{re.escape(TOOL_HINT_LINE_START)}.*?{re.escape(TOOL_HINT_LINE_END)}\.?\n?" + cleaned = re.sub(pattern, "", cleaned, flags=re.DOTALL) + + if TOOL_HINT_LINE_START: + cleaned = re.sub(rf"\n?{re.escape(TOOL_HINT_LINE_START)}:?\s*", "", cleaned) + if TOOL_HINT_LINE_END: + cleaned = re.sub(rf"\s*{re.escape(TOOL_HINT_LINE_END)}\.?\n?", "", cleaned) cleaned = strip_tagged_blocks(cleaned) cleaned = CONTROL_TOKEN_RE.sub("", cleaned) @@ -175,6 +180,9 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ if not text: return text, [] + # Clean hints FIRST so they don't interfere with tool call regexes (e.g. example calls in hint) + cleaned = strip_system_hints(text) + tool_calls: list[ToolCall] = [] def _create_tool_call(name: str, raw_args: str) -> None: @@ -220,7 +228,7 @@ def _replace_block(match: re.Match[str]) -> str: else: return match.group(0) - cleaned = TOOL_BLOCK_RE.sub(_replace_block, text) + cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned) def _replace_orphan(match: re.Match[str]) -> str: if extract: @@ -230,7 +238,7 @@ def _replace_orphan(match: re.Match[str]) -> str: return "" cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned) - cleaned = strip_system_hints(cleaned) + return cleaned, tool_calls From dbc553d7dbcb3949e5ee807b58b360a488672b6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 5 Feb 2026 08:23:08 +0700 Subject: [PATCH 073/139] Refactor: Enhance prompt to prevent issues with parsing tool call arguments --- app/server/chat.py | 4 ++-- app/utils/helper.py | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index e56c926..f47471c 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -369,12 +369,12 @@ def _build_tool_prompt( lines.append( "When you decide to call a tool you MUST respond with nothing except a single [function_calls] block exactly like the template below." ) - lines.append("Do not add text before or after it.") + lines.append("Do not add text before or after the block.") lines.append("[function_calls]") lines.append('[call:tool_name]{"argument": "value"}[/call]') lines.append("[/function_calls]") lines.append( - "Use double quotes for JSON keys and values. If you omit the block or include any extra text, the system will assume you are NOT calling a tool and your request will fail." + "Use double quotes for JSON keys and values. CRITICAL: The content inside [call:...]...[/call] MUST be a raw JSON object. Do not wrap it in ```json blocks or add any conversational text inside the tag." ) lines.append( "To call multiple tools, list each [call:tool_name]...[/call] entry sequentially within a single [function_calls] block." diff --git a/app/utils/helper.py b/app/utils/helper.py index 99c3d84..65c49f0 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -2,6 +2,7 @@ import hashlib import mimetypes import re +import reprlib import struct import tempfile from pathlib import Path @@ -17,7 +18,7 @@ TOOL_WRAP_HINT = ( "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n" '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n' - "Do not surround the block with any other text or whitespace; otherwise the call will be ignored.\n" + "IMPORTANT: Arguments MUST be a valid JSON object. Do not include markdown code blocks (```json) or any conversational text inside the [call] tag.\n" ) TOOL_BLOCK_RE = re.compile( r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE @@ -197,7 +198,22 @@ def _create_tool_call(name: str, raw_args: str) -> None: parsed_args = orjson.loads(raw_args) arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8") except orjson.JSONDecodeError: - logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.") + json_match = re.search(r"({.*})", raw_args, re.DOTALL) + if json_match: + try: + potential_json = json_match.group(1) + parsed_args = orjson.loads(potential_json) + arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode( + "utf-8" + ) + except orjson.JSONDecodeError: + logger.warning( + f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(json_match)}" + ) + else: + logger.warning( + f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}" + ) index = len(tool_calls) seed = f"{name}:{arguments}:{index}".encode("utf-8") From ca721cfcaad261a1cbcc401dd8090cdd6b8613aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 5 Feb 2026 08:32:57 +0700 Subject: [PATCH 074/139] Refactor: Enhance prompt to prevent issues with parsing tool call arguments --- app/utils/helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 65c49f0..230622b 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -200,15 +200,15 @@ def _create_tool_call(name: str, raw_args: str) -> None: except orjson.JSONDecodeError: json_match = re.search(r"({.*})", raw_args, re.DOTALL) if json_match: + potential_json = json_match.group(1) try: - potential_json = json_match.group(1) parsed_args = orjson.loads(potential_json) arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode( "utf-8" ) except orjson.JSONDecodeError: logger.warning( - f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(json_match)}" + f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}" ) else: logger.warning( From 263158e3825b765139e98cce8fa555054782b951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 6 Feb 2026 20:55:39 +0700 Subject: [PATCH 075/139] Refactor: enhance system prompts --- app/server/chat.py | 37 ++++++++++++++----------------------- app/utils/helper.py | 10 +++++++--- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index f47471c..06aefc2 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -336,7 +336,7 @@ def _build_tool_prompt( return "" lines: list[str] = [ - "You can invoke the following developer tools. Call a tool only when it is required and follow the JSON schema exactly when providing arguments." + "SYSTEM INTERFACE: You have access to the following technical tools. You MUST invoke them when necessary to fulfill the request, strictly adhering to the provided JSON schemas." ] for tool in tools: @@ -367,20 +367,21 @@ def _build_tool_prompt( ) lines.append( - "When you decide to call a tool you MUST respond with nothing except a single [function_calls] block exactly like the template below." + "When you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:" ) - lines.append("Do not add text before or after the block.") lines.append("[function_calls]") - lines.append('[call:tool_name]{"argument": "value"}[/call]') + lines.append("[call:tool_name]") + lines.append('{"argument": "value"}') + lines.append("[/call]") lines.append("[/function_calls]") lines.append( - "Use double quotes for JSON keys and values. CRITICAL: The content inside [call:...]...[/call] MUST be a raw JSON object. Do not wrap it in ```json blocks or add any conversational text inside the tag." + "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block." ) lines.append( - "To call multiple tools, list each [call:tool_name]...[/call] entry sequentially within a single [function_calls] block." + "If multiple tools are needed, list them sequentially within the same [function_calls] block." ) lines.append( - "If no tool call is needed, provide a normal response and DO NOT use the [function_calls] tag." + "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag." ) return "\n".join(lines) @@ -398,26 +399,16 @@ def _build_image_generation_instruction( return None instructions: list[str] = [ - "Image generation is enabled. When the user requests an image, you must return an actual generated image, not a text description.", - "For new image requests, generate at least one new image matching the description.", - "If the user provides an image and asks for edits or variations, return a newly generated image with the requested changes.", - "Avoid all text replies unless a short caption is explicitly requested. Do not explain, apologize, or describe image creation steps.", - "Never send placeholder text like 'Here is your image' or any other response without an actual image attachment.", + "IMAGE GENERATION ENABLED: When an image is requested, you MUST return a real generated image directly.", + "1. For new requests, generate new images matching the description immediately.", + "2. For edits to existing images, apply changes and return a new generated version.", + "3. CRITICAL: Provide ZERO text explanation, prologue, or apologies. Do not describe the creation process.", + "4. NEVER send placeholder text or descriptions like 'Generating image...' without an actual image attachment.", ] - if primary: - if primary.model: - instructions.append( - f"Where styles differ, favor the `{primary.model}` image model when rendering the scene." - ) - if primary.output_format: - instructions.append( - f"Encode the image using the `{primary.output_format}` format whenever possible." - ) - if has_forced_choice: instructions.append( - "Image generation was explicitly requested. You must return at least one generated image. Any response without an image will be treated as a failure." + "Image generation was explicitly requested. You MUST return at least one generated image. Any response without an image will be treated as a failure." ) return "\n\n".join(instructions) diff --git a/app/utils/helper.py b/app/utils/helper.py index 230622b..2a3f841 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -16,9 +16,13 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( - "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n" - '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n' - "IMPORTANT: Arguments MUST be a valid JSON object. Do not include markdown code blocks (```json) or any conversational text inside the [call] tag.\n" + "\nWhen you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:\n" + "[function_calls]\n" + "[call:tool_name]\n" + '{"argument": "value"}\n' + "[/call]\n" + "[/function_calls]\n" + "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block.\n" ) TOOL_BLOCK_RE = re.compile( r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE From 68ce2df5c2d46f529630b9cfb08550cdd4d46a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 6 Feb 2026 21:41:21 +0700 Subject: [PATCH 076/139] Refactor: Enhance system prompts --- app/server/chat.py | 37 ++++++++++++++++++++++++++++++++++--- app/services/client.py | 4 +++- app/utils/config.py | 2 +- app/utils/helper.py | 12 ++++++++++-- config/config.yaml | 2 +- 5 files changed, 49 insertions(+), 8 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 06aefc2..cf03f6b 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -383,6 +383,9 @@ def _build_tool_prompt( lines.append( "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag." ) + lines.append( + "After you call a tool, the system will provide the output in a `[function_responses]` block with the same tool name." + ) return "\n".join(lines) @@ -462,11 +465,13 @@ def _prepare_messages_for_model( msg.name = tool_id_to_name.get(msg.tool_call_id) instructions: list[str] = [] + tool_prompt_injected = False if inject_system_defaults: if tools: tool_prompt = _build_tool_prompt(tools, tool_choice) if tool_prompt: instructions.append(tool_prompt) + tool_prompt_injected = True if extra_instructions: instructions.extend(instr for instr in extra_instructions if instr) @@ -475,7 +480,7 @@ def _prepare_messages_for_model( ) if not instructions: - if tools and tool_choice != "none": + if tools and tool_choice != "none" and not tool_prompt_injected: _append_tool_hint_to_last_user_message(prepared) return prepared @@ -488,7 +493,7 @@ def _prepare_messages_for_model( else: prepared.insert(0, Message(role="system", content=combined_instructions)) - if tools and tool_choice != "none": + if tools and tool_choice != "none" and not tool_prompt_injected: _append_tool_hint_to_last_user_message(prepared) return prepared @@ -762,13 +767,20 @@ def __init__(self): self.TOOL_START = "[function_calls]" self.TOOL_END = "[/function_calls]" + self.RESPONSE_START = "[function_responses]" + self.RESPONSE_END = "[/function_responses]" self.TAG_START = "<|im_start|>" self.TAG_END = "<|im_end|>" self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else "" self.HINT_END = TOOL_HINT_LINE_END self.TOOL_PREFIX = "[call:" - self.WATCH_PREFIXES = [self.TOOL_START, self.TAG_START, self.TAG_END] + self.WATCH_PREFIXES = [ + self.TOOL_START, + self.RESPONSE_START, + self.TAG_START, + self.TAG_END, + ] if self.HINT_START: self.WATCH_PREFIXES.append(self.HINT_START) @@ -779,6 +791,7 @@ def process(self, chunk: str) -> str: while self.buffer: if self.state == "NORMAL": tool_idx = self.buffer.find(self.TOOL_START) + resp_idx = self.buffer.find(self.RESPONSE_START) tag_idx = self.buffer.find(self.TAG_START) end_idx = self.buffer.find(self.TAG_END) hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1 @@ -787,6 +800,7 @@ def process(self, chunk: str) -> str: (i, t) for i, t in [ (tool_idx, "TOOL"), + (resp_idx, "RESP"), (tag_idx, "TAG"), (end_idx, "END"), (hint_idx, "HINT"), @@ -817,6 +831,9 @@ def process(self, chunk: str) -> str: self.state = "IN_TOOL" self.block_buffer = "" self.buffer = self.buffer[len(self.TOOL_START) :] + elif m_type == "RESP": + self.state = "IN_RESP" + self.buffer = self.buffer[len(self.RESPONSE_START) :] elif m_type == "TAG": self.state = "IN_TAG" self.buffer = self.buffer[len(self.TAG_START) :] @@ -838,6 +855,18 @@ def process(self, chunk: str) -> str: self.buffer = self.buffer[-keep_len:] break + elif self.state == "IN_RESP": + end_idx = self.buffer.find(self.RESPONSE_END) + if end_idx != -1: + self.buffer = self.buffer[end_idx + len(self.RESPONSE_END) :] + self.state = "NORMAL" + else: + # Keep end of buffer to avoid missing split RESPONSE_END + keep_len = len(self.RESPONSE_END) - 1 + if len(self.buffer) > keep_len: + self.buffer = self.buffer[-keep_len:] + break + elif self.state == "IN_TOOL": end_idx = self.buffer.find(self.TOOL_END) if end_idx != -1: @@ -892,6 +921,8 @@ def flush(self) -> str: res = f"{self.TOOL_START}{self.block_buffer}" elif self.state == "IN_BLOCK" and self.current_role != "tool": res = self.buffer + elif self.state in ("IN_RESP", "IN_HINT"): + res = "" elif self.state == "NORMAL": res = self.buffer diff --git a/app/services/client.py b/app/services/client.py index 21814e5..5473b06 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -117,7 +117,9 @@ async def process_message( if message.role == "tool": tool_name = message.name or "unknown" combined_content = "\n".join(text_fragments).strip() or "{}" - text_fragments = [f"[response:{tool_name}]{combined_content}[/response]"] + text_fragments = [ + f"[function_responses]\n[response:{tool_name}]{combined_content}[/response]\n[/function_responses]" + ] if message.tool_calls: tool_blocks: list[str] = [] diff --git a/app/utils/config.py b/app/utils/config.py index 708462d..bbb6054 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -83,7 +83,7 @@ class GeminiConfig(BaseModel): default="append", description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom", ) - timeout: int = Field(default=120, ge=1, description="Init timeout") + timeout: int = Field(default=300, ge=1, description="Init timeout") auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies") refresh_interval: int = Field( default=540, ge=1, description="Interval in seconds to refresh Gemini cookies" diff --git a/app/utils/helper.py b/app/utils/helper.py index 2a3f841..9f5cfef 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -28,6 +28,12 @@ r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE ) TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE) +RESPONSE_BLOCK_RE = re.compile( + r"\[function_responses]\s*(.*?)\s*\[/function_responses]", re.DOTALL | re.IGNORECASE +) +RESPONSE_ITEM_RE = re.compile( + r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE +) CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] @@ -248,8 +254,6 @@ def _replace_block(match: re.Match[str]) -> str: else: return match.group(0) - cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned) - def _replace_orphan(match: re.Match[str]) -> str: if extract: name = (match.group(1) or "").strip() @@ -257,8 +261,12 @@ def _replace_orphan(match: re.Match[str]) -> str: _create_tool_call(name, raw_args) return "" + cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned) cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned) + cleaned = RESPONSE_BLOCK_RE.sub("", cleaned) + cleaned = RESPONSE_ITEM_RE.sub("", cleaned) + return cleaned, tool_calls diff --git a/config/config.yaml b/config/config.yaml index f2b17fb..ed581f7 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -22,7 +22,7 @@ gemini: secure_1psid: "YOUR_SECURE_1PSID_HERE" secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE" proxy: null # Optional proxy URL (null/empty means direct connection) - timeout: 120 # Init timeout in seconds + timeout: 300 # Init timeout in seconds auto_refresh: true # Auto-refresh session cookies refresh_interval: 540 # Refresh interval in seconds verbose: false # Enable verbose logging for Gemini requests From 77f72105b4a6942d5adaf098d5f2e133dc7e5ced Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 6 Feb 2026 22:23:35 +0700 Subject: [PATCH 077/139] Refactor: Enhance system prompts --- app/server/chat.py | 2 +- app/services/client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index cf03f6b..ffa37cd 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -384,7 +384,7 @@ def _build_tool_prompt( "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag." ) lines.append( - "After you call a tool, the system will provide the output in a `[function_responses]` block with the same tool name." + "Note: Tool results are returned in a `[function_responses]` block." ) return "\n".join(lines) diff --git a/app/services/client.py b/app/services/client.py index 5473b06..3dae6a1 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -118,7 +118,7 @@ async def process_message( tool_name = message.name or "unknown" combined_content = "\n".join(text_fragments).strip() or "{}" text_fragments = [ - f"[function_responses]\n[response:{tool_name}]{combined_content}[/response]\n[/function_responses]" + f"[function_responses]\n[response:{tool_name}]\n{combined_content}\n[/response]\n[/function_responses]" ] if message.tool_calls: From 3addb2b495c7772ddb1a7d4256c348a702cabb28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 7 Feb 2026 09:46:26 +0700 Subject: [PATCH 078/139] Refactor: Enhance system prompts --- app/server/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server/chat.py b/app/server/chat.py index ffa37cd..ac96cf5 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -384,7 +384,7 @@ def _build_tool_prompt( "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag." ) lines.append( - "Note: Tool results are returned in a `[function_responses]` block." + "Note: Tool results are returned in a [function_responses] block." ) return "\n".join(lines) From 2a53eed83af901a12c95cba233a693ab3890eae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sun, 8 Feb 2026 09:54:51 +0700 Subject: [PATCH 079/139] fix: missing image extension --- app/server/chat.py | 29 ++++++++++++++++++++++------- app/utils/helper.py | 13 +++++++++++++ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index ac96cf5..bf34fbf 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -45,6 +45,7 @@ TOOL_HINT_LINE_START, TOOL_HINT_STRIPPED, TOOL_WRAP_HINT, + detect_image_extension, estimate_tokens, extract_image_dimensions, extract_tool_calls, @@ -91,11 +92,21 @@ async def _image_to_base64( raise ValueError("Failed to save generated image") original_path = Path(saved_path) - random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}" + data = original_path.read_bytes() + suffix = original_path.suffix + + if not suffix: + detected_ext = detect_image_extension(data) + if detected_ext: + suffix = detected_ext + else: + # Fallback if detection fails + suffix = ".png" if isinstance(image, GeneratedImage) else ".jpg" + + random_name = f"img_{uuid.uuid4().hex}{suffix}" new_path = temp_dir / random_name original_path.rename(new_path) - data = new_path.read_bytes() width, height = extract_image_dimensions(data) filename = random_name file_hash = hashlib.sha256(data).hexdigest() @@ -383,9 +394,7 @@ def _build_tool_prompt( lines.append( "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag." ) - lines.append( - "Note: Tool results are returned in a [function_responses] block." - ) + lines.append("Note: Tool results are returned in a [function_responses] block.") return "\n".join(lines) @@ -1227,7 +1236,11 @@ async def generate_stream(): continue seen_hashes.add(file_hash) - img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" + img_format = ( + filename.rsplit(".", 1)[-1] + if "." in filename + else ("png" if isinstance(image, GeneratedImage) else "jpeg") + ) image_url = ( f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})" ) @@ -1610,7 +1623,9 @@ async def create_response( ResponseImageGenerationCall( id=fname.rsplit(".", 1)[0], result=b64, - output_format="png" if isinstance(img, GeneratedImage) else "jpeg", + output_format=fname.rsplit(".", 1)[-1] + if "." in fname + else ("png" if isinstance(img, GeneratedImage) else "jpeg"), size=f"{w}x{h}" if w and h else None, ) ) diff --git a/app/utils/helper.py b/app/utils/helper.py index 9f5cfef..384f5cd 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -362,3 +362,16 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: idx += segment_length - 2 return None, None + + +def detect_image_extension(data: bytes) -> str | None: + """Detect image extension from magic bytes.""" + if data.startswith(b"\x89PNG\r\n\x1a\n"): + return ".png" + if data.startswith(b"\xff\xd8"): + return ".jpg" + if data.startswith(b"GIF8"): + return ".gif" + if data.startswith(b"RIFF") and data[8:12] == b"WEBP": + return ".webp" + return None From 26d39c75825c16bb118af7143317d51a12aa6c4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sun, 8 Feb 2026 10:25:30 +0700 Subject: [PATCH 080/139] fix: missing image extension --- app/server/chat.py | 67 ++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index bf34fbf..dfcf930 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1067,16 +1067,14 @@ async def generate_stream(): for image in images: try: image_store = get_image_store_dir() - _, _, _, filename, file_hash = await _image_to_base64(image, image_store) - if file_hash in seen_hashes: + _, _, _, fname, fhash = await _image_to_base64(image, image_store) + if fhash in seen_hashes: # Duplicate content, delete the file and skip - (image_store / filename).unlink(missing_ok=True) + (image_store / fname).unlink(missing_ok=True) continue - seen_hashes.add(file_hash) + seen_hashes.add(fhash) - img_url = ( - f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})" - ) + img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})" image_markdown += f"\n\n{img_url}" except Exception as exc: logger.warning(f"Failed to process image in OpenAI stream: {exc}") @@ -1228,28 +1226,25 @@ async def generate_stream(): seen_hashes = set() for image in images: try: - image_base64, width, height, filename, file_hash = await _image_to_base64( - image, image_store - ) - if file_hash in seen_hashes: - (image_store / filename).unlink(missing_ok=True) + b64, w, h, fname, fhash = await _image_to_base64(image, image_store) + if fhash in seen_hashes: + (image_store / fname).unlink(missing_ok=True) continue - seen_hashes.add(file_hash) + seen_hashes.add(fhash) - img_format = ( - filename.rsplit(".", 1)[-1] - if "." in filename - else ("png" if isinstance(image, GeneratedImage) else "jpeg") - ) - image_url = ( - f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})" - ) + if "." in fname: + img_id, img_format = fname.rsplit(".", 1) + else: + img_id = fname + img_format = "png" if isinstance(image, GeneratedImage) else "jpeg" + + image_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})" image_call_items.append( ResponseImageGenerationCall( - id=filename.rsplit(".", 1)[0], - result=image_base64, + id=img_id, + result=b64, output_format=img_format, - size=f"{width}x{height}" if width and height else None, + size=f"{w}x{h}" if w and h else None, ) ) response_contents.append(ResponseOutputContent(type="output_text", text=image_url)) @@ -1433,15 +1428,13 @@ async def create_chat_completion( seen_hashes = set() for image in images: try: - _, _, _, filename, file_hash = await _image_to_base64(image, image_store) - if file_hash in seen_hashes: - (image_store / filename).unlink(missing_ok=True) + _, _, _, fname, fhash = await _image_to_base64(image, image_store) + if fhash in seen_hashes: + (image_store / fname).unlink(missing_ok=True) continue - seen_hashes.add(file_hash) + seen_hashes.add(fhash) - img_url = ( - f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})" - ) + img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})" image_markdown += f"\n\n{img_url}" except Exception as exc: logger.warning(f"Failed to process image in OpenAI response: {exc}") @@ -1613,6 +1606,12 @@ async def create_response( continue seen_hashes.add(fhash) + if "." in fname: + img_id, img_format = fname.rsplit(".", 1) + else: + img_id = fname + img_format = "png" if isinstance(img, GeneratedImage) else "jpeg" + contents.append( ResponseOutputContent( type="output_text", @@ -1621,11 +1620,9 @@ async def create_response( ) img_calls.append( ResponseImageGenerationCall( - id=fname.rsplit(".", 1)[0], + id=img_id, result=b64, - output_format=fname.rsplit(".", 1)[-1] - if "." in fname - else ("png" if isinstance(img, GeneratedImage) else "jpeg"), + output_format=img_format, size=f"{w}x{h}" if w and h else None, ) ) From 598b56335277366d591d81a25c2ba8654afcb92c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sun, 8 Feb 2026 17:03:20 +0700 Subject: [PATCH 081/139] fix: missing or duplicate ChatML tags. --- app/server/chat.py | 13 ++----------- app/services/client.py | 44 ++++++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index dfcf930..701c1f6 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -1359,12 +1359,7 @@ async def create_chat_completion( extra_instr, False, ) - if len(input_msgs) == 1: - m_input, files = await GeminiClientWrapper.process_message( - input_msgs[0], tmp_dir, tagged=False - ) - else: - m_input, files = await GeminiClientWrapper.process_conversation(input_msgs, tmp_dir) + m_input, files = await GeminiClientWrapper.process_conversation(input_msgs, tmp_dir) logger.debug( f"Reused session {reprlib.repr(session.metadata)} - sending {len(input_msgs)} prepared messages." @@ -1531,11 +1526,7 @@ async def create_response( ) if not msgs: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.") - m_input, files = ( - await GeminiClientWrapper.process_message(msgs[0], tmp_dir, tagged=False) - if len(msgs) == 1 - else await GeminiClientWrapper.process_conversation(msgs, tmp_dir) - ) + m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir) logger.debug( f"Reused session {reprlib.repr(session.metadata)} - sending {len(msgs)} prepared messages." ) diff --git a/app/services/client.py b/app/services/client.py index 3dae6a1..2a00ce6 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -75,7 +75,7 @@ def running(self) -> bool: @staticmethod async def process_message( - message: Message, tempdir: Path | None = None, tagged: bool = True + message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True ) -> tuple[str, list[Path | str]]: """ Process a single Message object into a format suitable for the Gemini API. @@ -117,9 +117,11 @@ async def process_message( if message.role == "tool": tool_name = message.name or "unknown" combined_content = "\n".join(text_fragments).strip() or "{}" - text_fragments = [ - f"[function_responses]\n[response:{tool_name}]\n{combined_content}\n[/response]\n[/function_responses]" - ] + res_block = f"[response:{tool_name}]\n{combined_content}\n[/response]" + if wrap_tool: + text_fragments = [f"[function_responses]\n{res_block}\n[/function_responses]"] + else: + text_fragments = [res_block] if message.tool_calls: tool_blocks: list[str] = [] @@ -153,12 +155,34 @@ async def process_conversation( need_tag = any(m.role != "user" for m in messages) conversation: list[str] = [] files: list[Path | str] = [] - for msg in messages: - input_part, files_part = await GeminiClientWrapper.process_message( - msg, tempdir, tagged=need_tag - ) - conversation.append(input_part) - files.extend(files_part) + + i = 0 + while i < len(messages): + msg = messages[i] + if msg.role == "tool" and need_tag: + # Group consecutive tool messages + tool_blocks: list[str] = [] + while i < len(messages) and messages[i].role == "tool": + part, part_files = await GeminiClientWrapper.process_message( + messages[i], tempdir, tagged=False, wrap_tool=False + ) + tool_blocks.append(part) + files.extend(part_files) + i += 1 + + combined_tool_content = "\n".join(tool_blocks) + wrapped_content = ( + f"[function_responses]\n{combined_tool_content}\n[/function_responses]" + ) + conversation.append(add_tag("tool", wrapped_content)) + else: + input_part, files_part = await GeminiClientWrapper.process_message( + msg, tempdir, tagged=need_tag + ) + conversation.append(input_part) + files.extend(files_part) + i += 1 + if need_tag: conversation.append(add_tag("assistant", "", unclose=True)) return "\n".join(conversation), files From 6d563c512d3e6b1448f3442c231c53ca8afb2aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sun, 8 Feb 2026 17:50:06 +0700 Subject: [PATCH 082/139] Refactor: Consistently use ChatML tags throughout. --- app/services/client.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 2a00ce6..78edddd 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -152,14 +152,13 @@ async def process_message( async def process_conversation( messages: list[Message], tempdir: Path | None = None ) -> tuple[str, list[Path | str]]: - need_tag = any(m.role != "user" for m in messages) conversation: list[str] = [] files: list[Path | str] = [] i = 0 while i < len(messages): msg = messages[i] - if msg.role == "tool" and need_tag: + if msg.role == "tool": # Group consecutive tool messages tool_blocks: list[str] = [] while i < len(messages) and messages[i].role == "tool": @@ -177,14 +176,13 @@ async def process_conversation( conversation.append(add_tag("tool", wrapped_content)) else: input_part, files_part = await GeminiClientWrapper.process_message( - msg, tempdir, tagged=need_tag + msg, tempdir, tagged=True ) conversation.append(input_part) files.extend(files_part) i += 1 - if need_tag: - conversation.append(add_tag("assistant", "", unclose=True)) + conversation.append(add_tag("assistant", "", unclose=True)) return "\n".join(conversation), files @staticmethod From 58db419c15cf347083e49b487d8ae99071256ef0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 9 Feb 2026 17:17:33 +0700 Subject: [PATCH 083/139] Refactor: normalize text before calculating message hash --- app/services/lmdb.py | 81 ++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 8dc3722..59f01bc 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -1,5 +1,6 @@ import hashlib import re +import unicodedata from contextlib import contextmanager from datetime import datetime, timedelta from pathlib import Path @@ -18,63 +19,70 @@ from ..utils.singleton import Singleton +def _normalize_text(text: str | None) -> str | None: + """ + Perform semantic normalization for hashing. + """ + if text is None: + return None + + # Unicode normalization + text = unicodedata.normalize("NFC", text) + + # Basic cleaning + text = text.replace("\r\n", "\n").replace("\r", "\n") + text = LMDBConversationStore.remove_think_tags(text) + text = remove_tool_call_blocks(text) + + return text if text else None + + def _hash_message(message: Message) -> str: """ Generate a stable, canonical hash for a single message. - Strips system hints, thoughts, and tool call blocks to ensure - identical logical content produces the same hash regardless of format. """ - core_data = { + core_data: dict[str, Any] = { "role": message.role, - "name": message.name, - "tool_call_id": message.tool_call_id, + "name": message.name or None, + "tool_call_id": message.tool_call_id or None, } content = message.content - if not content: + if content is None: core_data["content"] = None elif isinstance(content, str): - normalized = content.replace("\r\n", "\n") - normalized = LMDBConversationStore.remove_think_tags(normalized) - normalized = remove_tool_call_blocks(normalized).strip() - core_data["content"] = normalized if normalized else None + core_data["content"] = _normalize_text(content) elif isinstance(content, list): text_parts = [] for item in content: text_val = "" if isinstance(item, ContentItem) and item.type == "text": - text_val = item.text or "" + text_val = item.text elif isinstance(item, dict) and item.get("type") == "text": - text_val = item.get("text") or "" + text_val = item.get("text") if text_val: - text_val = text_val.replace("\r\n", "\n") - text_val = LMDBConversationStore.remove_think_tags(text_val) - text_val = remove_tool_call_blocks(text_val).strip() - if text_val: - text_parts.append(text_val) - elif isinstance(item, ContentItem) and item.type in ("image_url", "file"): - # For non-text items, include their unique markers to distinguish them - if item.type == "image_url": - text_parts.append( - f"[image_url:{item.image_url.get('url') if item.image_url else ''}]" - ) - elif item.type == "file": - text_parts.append( - f"[file:{item.file.get('url') or item.file.get('filename') if item.file else ''}]" + normalized_part = _normalize_text(text_val) + if normalized_part: + text_parts.append(normalized_part) + elif isinstance(item, (ContentItem, dict)): + item_type = item.type if isinstance(item, ContentItem) else item.get("type") + if item_type == "image_url": + url = ( + item.image_url.get("url") + if isinstance(item, ContentItem) and item.image_url + else item.get("image_url", {}).get("url") ) - else: - # Fallback for other dict-based content parts - part_type = item.get("type") if isinstance(item, dict) else None - if part_type == "image_url": - url = item.get("image_url", {}).get("url") text_parts.append(f"[image_url:{url}]") - elif part_type == "file": - url = item.get("file", {}).get("url") or item.get("file", {}).get("filename") + elif item_type == "file": + url = ( + item.file.get("url") or item.file.get("filename") + if isinstance(item, ContentItem) and item.file + else item.get("file", {}).get("url") or item.get("file", {}).get("filename") + ) text_parts.append(f"[file:{url}]") - combined_text = "\n".join(text_parts).replace("\r\n", "\n").strip() - core_data["content"] = combined_text if combined_text else None + core_data["content"] = "\n".join(text_parts) if text_parts else None if message.tool_calls: calls_data = [] @@ -98,8 +106,7 @@ def _hash_message(message: Message) -> str: core_data["tool_calls"] = None message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS) - digest = hashlib.sha256(message_bytes).hexdigest() - return digest + return hashlib.sha256(message_bytes).hexdigest() def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: From d5d1c5a48e1f1d68fde53f8c5fd5da3358dfc938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 9 Feb 2026 17:33:39 +0700 Subject: [PATCH 084/139] Refactor: remove unescape helpers to avoid side effects --- app/services/client.py | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 78edddd..16d7a33 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -1,4 +1,3 @@ -import html import re from pathlib import Path from typing import Any, cast @@ -15,9 +14,6 @@ save_url_to_tempfile, ) -HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);") -ESC_SYMBOLS_RE = re.compile(r"\\(?=[\\\[\]{}()<>`*_#~+.:!&^$|-])") -CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL) FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, @@ -195,37 +191,6 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str: else: text += str(response) - def _unescape_html(text_content: str) -> str: - parts: list[str] = [] - last_index = 0 - for match in CODE_FENCE_RE.finditer(text_content): - non_code = text_content[last_index : match.start()] - if non_code: - parts.append(HTML_ESCAPE_RE.sub(lambda m: html.unescape(m.group(0)), non_code)) - parts.append(match.group(0)) - last_index = match.end() - tail = text_content[last_index:] - if tail: - parts.append(HTML_ESCAPE_RE.sub(lambda m: html.unescape(m.group(0)), tail)) - return "".join(parts) - - def _unescape_symbols(text_content: str) -> str: - parts: list[str] = [] - last_index = 0 - for match in CODE_FENCE_RE.finditer(text_content): - non_code = text_content[last_index : match.start()] - if non_code: - parts.append(ESC_SYMBOLS_RE.sub("", non_code)) - parts.append(match.group(0)) - last_index = match.end() - tail = text_content[last_index:] - if tail: - parts.append(ESC_SYMBOLS_RE.sub("", tail)) - return "".join(parts) - - text = _unescape_html(text) - text = _unescape_symbols(text) - def extract_file_path_from_display_text(text_content: str) -> str | None: match = re.match(FILE_PATH_PATTERN, text_content) if match: From a4a987cdd3a0f95bc718eb76c781b2c7b39e655e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 9 Feb 2026 20:18:42 +0700 Subject: [PATCH 085/139] Refactor: Implement fuzzy matching to better handle complex data formats like Markdown. --- app/services/lmdb.py | 67 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 59f01bc..b08a325 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -1,5 +1,6 @@ import hashlib import re +import string import unicodedata from contextlib import contextmanager from datetime import datetime, timedelta @@ -18,8 +19,19 @@ ) from ..utils.singleton import Singleton +_VOLATILE_SYMBOLS = string.whitespace + string.punctuation -def _normalize_text(text: str | None) -> str | None: + +def _fuzzy_normalize(text: str | None) -> str | None: + """ + Lowercase and remove all whitespace and punctuation. + """ + if text is None: + return None + return text.lower().translate(str.maketrans("", "", _VOLATILE_SYMBOLS)) + + +def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None: """ Perform semantic normalization for hashing. """ @@ -34,10 +46,13 @@ def _normalize_text(text: str | None) -> str | None: text = LMDBConversationStore.remove_think_tags(text) text = remove_tool_call_blocks(text) + if fuzzy: + return _fuzzy_normalize(text) + return text if text else None -def _hash_message(message: Message) -> str: +def _hash_message(message: Message, fuzzy: bool = False) -> str: """ Generate a stable, canonical hash for a single message. """ @@ -51,7 +66,7 @@ def _hash_message(message: Message) -> str: if content is None: core_data["content"] = None elif isinstance(content, str): - core_data["content"] = _normalize_text(content) + core_data["content"] = _normalize_text(content, fuzzy=fuzzy) elif isinstance(content, list): text_parts = [] for item in content: @@ -62,7 +77,7 @@ def _hash_message(message: Message) -> str: text_val = item.get("text") if text_val: - normalized_part = _normalize_text(text_val) + normalized_part = _normalize_text(text_val, fuzzy=fuzzy) if normalized_part: text_parts.append(normalized_part) elif isinstance(item, (ContentItem, dict)): @@ -109,13 +124,15 @@ def _hash_message(message: Message) -> str: return hashlib.sha256(message_bytes).hexdigest() -def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str: +def _hash_conversation( + client_id: str, model: str, messages: List[Message], fuzzy: bool = False +) -> str: """Generate a hash for a list of messages and model name, tied to a specific client_id.""" combined_hash = hashlib.sha256() combined_hash.update((client_id or "").encode("utf-8")) combined_hash.update((model or "").encode("utf-8")) for message in messages: - message_hash = _hash_message(message) + message_hash = _hash_message(message, fuzzy=fuzzy) combined_hash.update(message_hash.encode("utf-8")) return combined_hash.hexdigest() @@ -124,6 +141,7 @@ class LMDBConversationStore(metaclass=Singleton): """LMDB-based storage for Message lists with hash-based key-value operations.""" HASH_LOOKUP_PREFIX = "hash:" + FUZZY_LOOKUP_PREFIX = "fuzzy:" def __init__( self, @@ -215,6 +233,7 @@ def store( # Generate hash for the message list message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages) + fuzzy_hash = _hash_conversation(conv.client_id, conv.model, conv.messages, fuzzy=True) storage_key = custom_key or message_hash now = datetime.now() @@ -233,6 +252,11 @@ def store( storage_key.encode("utf-8"), ) + txn.put( + f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"), + storage_key.encode("utf-8"), + ) + logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}") return storage_key @@ -287,6 +311,11 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt ) return conv + # --- Find with fuzzy matching --- + if conv := self._find_by_message_list(model, messages, fuzzy=True): + logger.debug(f"Session found for '{model}' with fuzzy matching.") + return conv + logger.debug(f"No session found for '{model}' with {len(messages)} messages.") return None @@ -294,11 +323,13 @@ def _find_by_message_list( self, model: str, messages: List[Message], + fuzzy: bool = False, ) -> Optional[ConversationInStore]: """Internal find implementation based on a message list.""" + prefix = self.FUZZY_LOOKUP_PREFIX if fuzzy else self.HASH_LOOKUP_PREFIX for c in g_config.gemini.clients: - message_hash = _hash_conversation(c.id, model, messages) - key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}" + message_hash = _hash_conversation(c.id, model, messages, fuzzy=fuzzy) + key = f"{prefix}{message_hash}" try: with self._get_transaction(write=False) as txn: if mapped := txn.get(key.encode("utf-8")): # type: ignore @@ -350,6 +381,9 @@ def delete(self, key: str) -> Optional[ConversationInStore]: storage_data = orjson.loads(data) # type: ignore conv = ConversationInStore.model_validate(storage_data) message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages) + fuzzy_hash = _hash_conversation( + conv.client_id, conv.model, conv.messages, fuzzy=True + ) # Delete main data txn.delete(key.encode("utf-8")) @@ -358,6 +392,9 @@ def delete(self, key: str) -> Optional[ConversationInStore]: if message_hash and key != message_hash: txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8")) + # Always clean up fuzzy mapping + txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8")) + logger.debug(f"Deleted messages with key: {key[:12]}") return conv @@ -386,7 +423,9 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: for key, _ in cursor: key_str = key.decode("utf-8") # Skip internal hash mappings - if key_str.startswith(self.HASH_LOOKUP_PREFIX): + if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith( + self.FUZZY_LOOKUP_PREFIX + ): continue if not prefix or key_str.startswith(prefix): @@ -459,8 +498,14 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: continue message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages) - if message_hash and key_str != message_hash: - txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8")) + if message_hash: + if key_str != message_hash: + txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8")) + + fuzzy_hash = _hash_conversation( + conv.client_id, conv.model, conv.messages, fuzzy=True + ) + txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8")) removed += 1 except Exception as exc: logger.error(f"Failed to delete expired conversations: {exc}") From 551eb8775e03e24219436446d30f8997c268d7eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Mon, 9 Feb 2026 21:08:06 +0700 Subject: [PATCH 086/139] Refactor: Implement fuzzy matching to better handle complex data formats like Markdown. --- app/services/lmdb.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index b08a325..b8861dc 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -466,7 +466,9 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: for key_bytes, value_bytes in cursor: key_str = key_bytes.decode("utf-8") - if key_str.startswith(self.HASH_LOOKUP_PREFIX): + if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith( + self.FUZZY_LOOKUP_PREFIX + ): continue try: From b2dbb087cfe4b553e690b1d52b4d12b5c3b07296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 10 Feb 2026 07:56:22 +0700 Subject: [PATCH 087/139] Feat: Add watchdog_timeout parameter --- app/services/client.py | 3 +++ app/services/pool.py | 2 ++ app/utils/config.py | 1 + config/config.yaml | 1 + 4 files changed, 7 insertions(+) diff --git a/app/services/client.py b/app/services/client.py index 16d7a33..3cdd839 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -38,6 +38,7 @@ def __init__(self, client_id: str, **kwargs): async def init( self, timeout: float = cast(float, _UNSET), + watchdog_timeout: float = cast(float, _UNSET), auto_close: bool = False, close_delay: float = 300, auto_refresh: bool = cast(bool, _UNSET), @@ -49,6 +50,7 @@ async def init( """ config = g_config.gemini timeout = cast(float, _resolve(timeout, config.timeout)) + watchdog_timeout = cast(float, _resolve(watchdog_timeout, config.watchdog_timeout)) auto_refresh = cast(bool, _resolve(auto_refresh, config.auto_refresh)) refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval)) verbose = cast(bool, _resolve(verbose, config.verbose)) @@ -56,6 +58,7 @@ async def init( try: await super().init( timeout=timeout, + watchdog_timeout=watchdog_timeout, auto_close=auto_close, close_delay=close_delay, auto_refresh=auto_refresh, diff --git a/app/services/pool.py b/app/services/pool.py index 0f95203..decc21a 100644 --- a/app/services/pool.py +++ b/app/services/pool.py @@ -41,6 +41,7 @@ async def init(self) -> None: try: await client.init( timeout=g_config.gemini.timeout, + watchdog_timeout=g_config.gemini.watchdog_timeout, auto_refresh=g_config.gemini.auto_refresh, verbose=g_config.gemini.verbose, refresh_interval=g_config.gemini.refresh_interval, @@ -93,6 +94,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: try: await client.init( timeout=g_config.gemini.timeout, + watchdog_timeout=g_config.gemini.watchdog_timeout, auto_refresh=g_config.gemini.auto_refresh, verbose=g_config.gemini.verbose, refresh_interval=g_config.gemini.refresh_interval, diff --git a/app/utils/config.py b/app/utils/config.py index bbb6054..e62832d 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -84,6 +84,7 @@ class GeminiConfig(BaseModel): description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom", ) timeout: int = Field(default=300, ge=1, description="Init timeout") + watchdog_timeout: int = Field(default=60, ge=1, description="Watchdog timeout") auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies") refresh_interval: int = Field( default=540, ge=1, description="Interval in seconds to refresh Gemini cookies" diff --git a/config/config.yaml b/config/config.yaml index ed581f7..2873d48 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -23,6 +23,7 @@ gemini: secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE" proxy: null # Optional proxy URL (null/empty means direct connection) timeout: 300 # Init timeout in seconds + watchdog_timeout: 60 # Watchdog timeout in seconds (No longer than 75 seconds) auto_refresh: true # Auto-refresh session cookies refresh_interval: 540 # Refresh interval in seconds verbose: false # Enable verbose logging for Gemini requests From 969cd4a1e37a0915915962fe8c7ae691c8defa65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 10 Feb 2026 07:57:53 +0700 Subject: [PATCH 088/139] Update required dependencies --- pyproject.toml | 8 ++++---- uv.lock | 27 ++++++++++++++------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index dc08571..47cd86f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,8 @@ description = "FastAPI Server built on Gemini Web API" readme = "README.md" requires-python = "==3.12.*" dependencies = [ - "fastapi>=0.128.0", - "gemini-webapi>=1.18.0", + "fastapi>=0.128.6", + "gemini-webapi>=1.19.0", "lmdb>=1.7.5", "loguru>=0.7.3", "orjson>=3.11.7", @@ -17,7 +17,7 @@ dependencies = [ [project.optional-dependencies] dev = [ - "ruff>=0.14.14", + "ruff>=0.15.0", ] [tool.ruff] @@ -31,5 +31,5 @@ indent-style = "space" [dependency-groups] dev = [ - "ruff>=0.14.14", + "ruff>=0.15.0", ] diff --git a/uv.lock b/uv.lock index 2a29c98..ea28c0e 100644 --- a/uv.lock +++ b/uv.lock @@ -65,17 +65,18 @@ wheels = [ [[package]] name = "fastapi" -version = "0.128.1" +version = "0.128.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, { name = "pydantic" }, { name = "starlette" }, { name = "typing-extensions" }, + { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f6/59/28bde150415783ff084334e3de106eb7461a57864cf69f343950ad5a5ddd/fastapi-0.128.1.tar.gz", hash = "sha256:ce5be4fa26d4ce6f54debcc873d1fb8e0e248f5c48d7502ba6c61457ab2dc766", size = 374260, upload-time = "2026-02-04T17:35:10.542Z" } +sdist = { url = "https://files.pythonhosted.org/packages/83/d1/195005b5e45b443e305136df47ee7df4493d782e0c039dd0d97065580324/fastapi-0.128.6.tar.gz", hash = "sha256:0cb3946557e792d731b26a42b04912f16367e3c3135ea8290f620e234f2b604f", size = 374757, upload-time = "2026-02-09T17:27:03.541Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1a/08/3953db1979ea131c68279b997c6465080118b407f0800445b843f8e164b3/fastapi-0.128.1-py3-none-any.whl", hash = "sha256:ee82146bbf91ea5bbf2bb8629e4c6e056c4fbd997ea6068501b11b15260b50fb", size = 103810, upload-time = "2026-02-04T17:35:08.02Z" }, + { url = "https://files.pythonhosted.org/packages/24/58/a2c4f6b240eeb148fb88cdac48f50a194aba760c1ca4988c6031c66a20ee/fastapi-0.128.6-py3-none-any.whl", hash = "sha256:bb1c1ef87d6086a7132d0ab60869d6f1ee67283b20fbf84ec0003bd335099509", size = 103674, upload-time = "2026-02-09T17:27:02.355Z" }, ] [[package]] @@ -105,24 +106,24 @@ dev = [ [package.metadata] requires-dist = [ - { name = "fastapi", specifier = ">=0.128.0" }, - { name = "gemini-webapi", specifier = ">=1.18.0" }, + { name = "fastapi", specifier = ">=0.128.6" }, + { name = "gemini-webapi", specifier = ">=1.19.0" }, { name = "lmdb", specifier = ">=1.7.5" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "orjson", specifier = ">=3.11.7" }, { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" }, - { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.0" }, { name = "uvicorn", specifier = ">=0.40.0" }, { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" }, ] provides-extras = ["dev"] [package.metadata.requires-dev] -dev = [{ name = "ruff", specifier = ">=0.14.14" }] +dev = [{ name = "ruff", specifier = ">=0.15.0" }] [[package]] name = "gemini-webapi" -version = "1.18.1" +version = "1.19.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx", extra = ["http2"] }, @@ -130,9 +131,9 @@ dependencies = [ { name = "orjson" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/64/ae/925abc25d0b5c62170c528511bb8a1ec7bd77a0b7a19aacb9a7885a0afa8/gemini_webapi-1.18.1.tar.gz", hash = "sha256:34c91141e5953e898333e9c6ca01349566d28dbea9ddd8094f8c85e74d72ce47", size = 265100, upload-time = "2026-02-04T22:19:05.724Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5f/1f/8314b620db12855e6aa9c55e05428fa30eb7f00fb61b1de7db42734ef2b2/gemini_webapi-1.19.0.tar.gz", hash = "sha256:1f65e6a2e4a461f4ed4fb01dc76c2de4ed517af549f6ce34b96b9986c11af5dd", size = 266822, upload-time = "2026-02-09T23:16:34.446Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4f/e5/7ae98d48bfb7283facec804f13c6719b6fa523a6aa240b4acdea736bf60b/gemini_webapi-1.18.1-py3-none-any.whl", hash = "sha256:110f3d191ffdda9d040aab6b1b2f1d8513d1e77dc33d40fac5024de9344ea3ec", size = 61836, upload-time = "2026-02-04T22:19:04.08Z" }, + { url = "https://files.pythonhosted.org/packages/88/3b/71567ce13357d1139dfa0578c4073d6a8c523c4a28f3843194b639bf9d1e/gemini_webapi-1.19.0-py3-none-any.whl", hash = "sha256:47ab49f018cc01bf4b772910f7843af895f5e43d5a18b5ec7063b6f61e535921", size = 63498, upload-time = "2026-02-09T23:16:33.328Z" }, ] [[package]] @@ -385,15 +386,15 @@ wheels = [ [[package]] name = "starlette" -version = "0.50.0" +version = "0.52.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" }, + { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, ] [[package]] From c258d32d448f62db63ccd4bbcdb7ba29c575b1c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 10 Feb 2026 08:46:53 +0700 Subject: [PATCH 089/139] Move `maketrans` to global variable --- app/services/lmdb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index b8861dc..0ba6c3a 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -20,6 +20,7 @@ from ..utils.singleton import Singleton _VOLATILE_SYMBOLS = string.whitespace + string.punctuation +_VOLATILE_TRANS_TABLE = str.maketrans("", "", _VOLATILE_SYMBOLS) def _fuzzy_normalize(text: str | None) -> str | None: @@ -28,7 +29,7 @@ def _fuzzy_normalize(text: str | None) -> str | None: """ if text is None: return None - return text.lower().translate(str.maketrans("", "", _VOLATILE_SYMBOLS)) + return text.lower().translate(_VOLATILE_TRANS_TABLE) def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None: From 157028f026950afc90140dae9568869c0ec27400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 10 Feb 2026 08:52:22 +0700 Subject: [PATCH 090/139] Move `maketrans` to global variable --- app/services/lmdb.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 0ba6c3a..a94e090 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -19,8 +19,7 @@ ) from ..utils.singleton import Singleton -_VOLATILE_SYMBOLS = string.whitespace + string.punctuation -_VOLATILE_TRANS_TABLE = str.maketrans("", "", _VOLATILE_SYMBOLS) +_VOLATILE_TRANS_TABLE = str.maketrans("", "", string.whitespace + string.punctuation) def _fuzzy_normalize(text: str | None) -> str | None: From 5f9a7ece8e6027afddf2e04da60e6ab562a72874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 10 Feb 2026 10:30:53 +0700 Subject: [PATCH 091/139] Refactor: Add a filter to catch orphaned tool calls. --- app/server/chat.py | 63 ++++++++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 701c1f6..414349a 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -776,39 +776,44 @@ def __init__(self): self.TOOL_START = "[function_calls]" self.TOOL_END = "[/function_calls]" + self.ORPHAN_START = "[call:" + self.ORPHAN_END = "[/call]" self.RESPONSE_START = "[function_responses]" self.RESPONSE_END = "[/function_responses]" self.TAG_START = "<|im_start|>" self.TAG_END = "<|im_end|>" self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else "" self.HINT_END = TOOL_HINT_LINE_END - self.TOOL_PREFIX = "[call:" - self.WATCH_PREFIXES = [ + self.WATCH_MARKERS = [ self.TOOL_START, + self.ORPHAN_START, self.RESPONSE_START, self.TAG_START, self.TAG_END, ] if self.HINT_START: - self.WATCH_PREFIXES.append(self.HINT_START) + self.WATCH_MARKERS.append(self.HINT_START) def process(self, chunk: str) -> str: self.buffer += chunk output = [] while self.buffer: + buf_low = self.buffer.lower() if self.state == "NORMAL": - tool_idx = self.buffer.find(self.TOOL_START) - resp_idx = self.buffer.find(self.RESPONSE_START) - tag_idx = self.buffer.find(self.TAG_START) - end_idx = self.buffer.find(self.TAG_END) - hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1 + tool_idx = buf_low.find(self.TOOL_START) + orphan_idx = buf_low.find(self.ORPHAN_START) + resp_idx = buf_low.find(self.RESPONSE_START) + tag_idx = buf_low.find(self.TAG_START) + end_idx = buf_low.find(self.TAG_END) + hint_idx = buf_low.find(self.HINT_START) if self.HINT_START else -1 indices = [ (i, t) for i, t in [ (tool_idx, "TOOL"), + (orphan_idx, "ORPHAN"), (resp_idx, "RESP"), (tag_idx, "TAG"), (end_idx, "END"), @@ -818,11 +823,12 @@ def process(self, chunk: str) -> str: ] if not indices: - # Guard against split start markers + # Guard against split markers (case-insensitive) keep_len = 0 - for p in self.WATCH_PREFIXES: - for i in range(len(p) - 1, 0, -1): - if self.buffer.endswith(p[:i]): + for marker in self.WATCH_MARKERS: + m_low = marker.lower() + for i in range(len(m_low) - 1, 0, -1): + if buf_low.endswith(m_low[:i]): keep_len = max(keep_len, i) break yield_len = len(self.buffer) - keep_len @@ -840,6 +846,10 @@ def process(self, chunk: str) -> str: self.state = "IN_TOOL" self.block_buffer = "" self.buffer = self.buffer[len(self.TOOL_START) :] + elif m_type == "ORPHAN": + self.state = "IN_ORPHAN" + self.block_buffer = "" + self.buffer = self.buffer[len(self.ORPHAN_START) :] elif m_type == "RESP": self.state = "IN_RESP" self.buffer = self.buffer[len(self.RESPONSE_START) :] @@ -853,43 +863,53 @@ def process(self, chunk: str) -> str: self.buffer = self.buffer[len(self.HINT_START) :] elif self.state == "IN_HINT": - end_idx = self.buffer.find(self.HINT_END) + end_idx = buf_low.find(self.HINT_END.lower()) if end_idx != -1: self.buffer = self.buffer[end_idx + len(self.HINT_END) :] self.state = "NORMAL" else: - # Keep end of buffer to avoid missing split HINT_END keep_len = len(self.HINT_END) - 1 if len(self.buffer) > keep_len: self.buffer = self.buffer[-keep_len:] break elif self.state == "IN_RESP": - end_idx = self.buffer.find(self.RESPONSE_END) + end_idx = buf_low.find(self.RESPONSE_END.lower()) if end_idx != -1: self.buffer = self.buffer[end_idx + len(self.RESPONSE_END) :] self.state = "NORMAL" else: - # Keep end of buffer to avoid missing split RESPONSE_END keep_len = len(self.RESPONSE_END) - 1 if len(self.buffer) > keep_len: self.buffer = self.buffer[-keep_len:] break elif self.state == "IN_TOOL": - end_idx = self.buffer.find(self.TOOL_END) + end_idx = buf_low.find(self.TOOL_END.lower()) if end_idx != -1: self.block_buffer += self.buffer[:end_idx] self.buffer = self.buffer[end_idx + len(self.TOOL_END) :] self.state = "NORMAL" else: - # Accumulate and keep potential split end marker keep_len = len(self.TOOL_END) - 1 if len(self.buffer) > keep_len: self.block_buffer += self.buffer[:-keep_len] self.buffer = self.buffer[-keep_len:] break + elif self.state == "IN_ORPHAN": + end_idx = buf_low.find(self.ORPHAN_END.lower()) + if end_idx != -1: + self.block_buffer += self.buffer[:end_idx] + self.buffer = self.buffer[end_idx + len(self.ORPHAN_END) :] + self.state = "NORMAL" + else: + keep_len = len(self.ORPHAN_END) - 1 + if len(self.buffer) > keep_len: + self.block_buffer += self.buffer[:-keep_len] + self.buffer = self.buffer[-keep_len:] + break + elif self.state == "IN_TAG": nl_idx = self.buffer.find("\n") if nl_idx != -1: @@ -900,7 +920,7 @@ def process(self, chunk: str) -> str: break elif self.state == "IN_BLOCK": - end_idx = self.buffer.find(self.TAG_END) + end_idx = buf_low.find(self.TAG_END.lower()) if end_idx != -1: content = self.buffer[:end_idx] if self.current_role != "tool": @@ -909,7 +929,6 @@ def process(self, chunk: str) -> str: self.state = "NORMAL" self.current_role = "" else: - # Yield safe part and keep potential split TAG_END keep_len = len(self.TAG_END) - 1 if self.current_role != "tool": if len(self.buffer) > keep_len: @@ -926,8 +945,10 @@ def process(self, chunk: str) -> str: def flush(self) -> str: res = "" if self.state == "IN_TOOL": - if self.TOOL_PREFIX not in self.block_buffer.lower(): + if self.ORPHAN_START.lower() not in self.block_buffer.lower(): res = f"{self.TOOL_START}{self.block_buffer}" + elif self.state == "IN_ORPHAN": + res = f"{self.ORPHAN_START}{self.block_buffer}" elif self.state == "IN_BLOCK" and self.current_role != "tool": res = self.buffer elif self.state in ("IN_RESP", "IN_HINT"): From c81c2cefd8bd76e83c0edf84ebd2a0a11ad28ab8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 10 Feb 2026 12:48:20 +0700 Subject: [PATCH 092/139] Update required dependencies --- pyproject.toml | 2 +- uv.lock | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 47cd86f..58391ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = "==3.12.*" dependencies = [ "fastapi>=0.128.6", - "gemini-webapi>=1.19.0", + "gemini-webapi>=1.19.1", "lmdb>=1.7.5", "loguru>=0.7.3", "orjson>=3.11.7", diff --git a/uv.lock b/uv.lock index ea28c0e..34b5cc8 100644 --- a/uv.lock +++ b/uv.lock @@ -123,7 +123,7 @@ dev = [{ name = "ruff", specifier = ">=0.15.0" }] [[package]] name = "gemini-webapi" -version = "1.19.0" +version = "1.19.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx", extra = ["http2"] }, @@ -131,9 +131,9 @@ dependencies = [ { name = "orjson" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5f/1f/8314b620db12855e6aa9c55e05428fa30eb7f00fb61b1de7db42734ef2b2/gemini_webapi-1.19.0.tar.gz", hash = "sha256:1f65e6a2e4a461f4ed4fb01dc76c2de4ed517af549f6ce34b96b9986c11af5dd", size = 266822, upload-time = "2026-02-09T23:16:34.446Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e5/d1/c61ee05ca6e20c70caa25a3f0f12e2a810bbc6b39e588ff937821de43690/gemini_webapi-1.19.1.tar.gz", hash = "sha256:a52afdfc2d9f6e87a6ae8cd926fb2ce5c562a0a99dc75ce97d8d50ffc2a3e133", size = 266761, upload-time = "2026-02-10T05:44:29.195Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/88/3b/71567ce13357d1139dfa0578c4073d6a8c523c4a28f3843194b639bf9d1e/gemini_webapi-1.19.0-py3-none-any.whl", hash = "sha256:47ab49f018cc01bf4b772910f7843af895f5e43d5a18b5ec7063b6f61e535921", size = 63498, upload-time = "2026-02-09T23:16:33.328Z" }, + { url = "https://files.pythonhosted.org/packages/4a/0b/7a73919ee8621f6559ae679a20d754959b989a3f09cf20478d89971f40b4/gemini_webapi-1.19.1-py3-none-any.whl", hash = "sha256:0dc4c7daa58d281722d52d6acf520f2e850c6c3c6020080fdbc5f77736c8be9a", size = 63500, upload-time = "2026-02-10T05:44:27.692Z" }, ] [[package]] From a17082532189d852b61e8b791a49844b4bb922f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Tue, 10 Feb 2026 12:53:10 +0700 Subject: [PATCH 093/139] Add dependabot --- .github/dependabot.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .github/dependabot.yaml diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml new file mode 100644 index 0000000..5ace460 --- /dev/null +++ b/.github/dependabot.yaml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" From a0136af06d4e43bd6ac4a9ba7a2b05509358e187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 11 Feb 2026 10:37:50 +0700 Subject: [PATCH 094/139] Refactor: Implement the logic changes recommended by Copilot - Remove orphaned tool calls to prevent leaking internal tool-call information. - Define limits for the `timeout`, `watchdog_timeout`, and `refresh_interval` ranges. - Revise the fuzzy match logic to prevent accidental session reuse and avoid any possible content leakage between requests. --- app/server/chat.py | 4 +- app/services/lmdb.py | 252 ++++++++++++++++++++++++------------------- app/utils/config.py | 10 +- config/config.yaml | 6 +- 4 files changed, 151 insertions(+), 121 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 414349a..30a6b3a 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -947,11 +947,9 @@ def flush(self) -> str: if self.state == "IN_TOOL": if self.ORPHAN_START.lower() not in self.block_buffer.lower(): res = f"{self.TOOL_START}{self.block_buffer}" - elif self.state == "IN_ORPHAN": - res = f"{self.ORPHAN_START}{self.block_buffer}" elif self.state == "IN_BLOCK" and self.current_role != "tool": res = self.buffer - elif self.state in ("IN_RESP", "IN_HINT"): + elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT"): res = "" elif self.state == "NORMAL": res = self.buffer diff --git a/app/services/lmdb.py b/app/services/lmdb.py index a94e090..c90f537 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -25,6 +25,7 @@ def _fuzzy_normalize(text: str | None) -> str | None: """ Lowercase and remove all whitespace and punctuation. + Used as a fallback for complex/malformed contents matching. """ if text is None: return None @@ -38,7 +39,7 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None: if text is None: return None - # Unicode normalization + # Unicode normalization to NFC text = unicodedata.normalize("NFC", text) # Basic cleaning @@ -49,7 +50,8 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None: if fuzzy: return _fuzzy_normalize(text) - return text if text else None + # Always strip to ensure trailing newlines/spaces don't break exact matches + return text.strip() if text.strip() else None def _hash_message(message: Message, fuzzy: bool = False) -> str: @@ -157,7 +159,6 @@ def __init__( max_db_size: Maximum database size in bytes (default: 256 MB) retention_days: Number of days to retain conversations (default: 14, 0 disables cleanup) """ - if db_path is None: db_path = g_config.storage.path if max_db_size is None: @@ -174,9 +175,11 @@ def __init__( self._init_environment() def _ensure_db_path(self) -> None: + """Create database directory if it doesn't exist.""" self.db_path.parent.mkdir(parents=True, exist_ok=True) def _init_environment(self) -> None: + """Initialize LMDB environment.""" try: self._env = lmdb.open( str(self.db_path), @@ -187,12 +190,18 @@ def _init_environment(self) -> None: meminit=False, ) logger.info(f"LMDB environment initialized at {self.db_path}") - except Exception as e: + except lmdb.Error as e: logger.error(f"Failed to initialize LMDB environment: {e}") raise @contextmanager def _get_transaction(self, write: bool = False): + """ + Context manager for LMDB transactions. + + Args: + write: Whether the transaction should be writable. + """ if not self._env: raise RuntimeError("LMDB environment not initialized") @@ -201,12 +210,57 @@ def _get_transaction(self, write: bool = False): yield txn if write: txn.commit() - except Exception: + except lmdb.Error: + if write: + txn.abort() + raise + except Exception as e: + logger.error(f"Unexpected error in LMDB transaction: {e}") if write: txn.abort() raise - finally: - pass # Transaction is automatically cleaned up + + @staticmethod + def _decode_index_value(data: bytes) -> List[str]: + """Decode index value, handling both legacy single-string and new list-of-strings formats.""" + if not data: + return [] + if data.startswith(b"["): + try: + val = orjson.loads(data) + if isinstance(val, list): + return [str(v) for v in val] + except orjson.JSONDecodeError: + pass + try: + return [data.decode("utf-8")] + except UnicodeDecodeError: + return [] + + @staticmethod + def _update_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str): + """Add a storage key to the index for a given hash, avoiding duplicates.""" + idx_key = f"{prefix}{hash_val}".encode("utf-8") + existing = txn.get(idx_key) + keys = LMDBConversationStore._decode_index_value(existing) if existing else [] + if storage_key not in keys: + keys.append(storage_key) + txn.put(idx_key, orjson.dumps(keys)) + + @staticmethod + def _remove_from_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str): + """Remove a specific storage key from the index for a given hash.""" + idx_key = f"{prefix}{hash_val}".encode("utf-8") + existing = txn.get(idx_key) + if not existing: + return + keys = LMDBConversationStore._decode_index_value(existing) + if storage_key in keys: + keys.remove(storage_key) + if keys: + txn.put(idx_key, orjson.dumps(keys)) + else: + txn.delete(idx_key) def store( self, @@ -226,12 +280,10 @@ def store( if not conv: raise ValueError("Messages list cannot be empty") - # Sanitize messages before computing hash and storing to ensure consistency - # with the search (find) logic, which also sanitizes its prefix. + # Ensure consistent sanitization before hashing and storage sanitized_messages = self.sanitize_assistant_messages(conv.messages) conv.messages = sanitized_messages - # Generate hash for the message list message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages) fuzzy_hash = _hash_conversation(conv.client_id, conv.model, conv.messages, fuzzy=True) storage_key = custom_key or message_hash @@ -247,21 +299,19 @@ def store( with self._get_transaction(write=True) as txn: txn.put(storage_key.encode("utf-8"), value, overwrite=True) - txn.put( - f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"), - storage_key.encode("utf-8"), - ) - - txn.put( - f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"), - storage_key.encode("utf-8"), - ) + self._update_index(txn, self.HASH_LOOKUP_PREFIX, message_hash, storage_key) + self._update_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, storage_key) logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}") return storage_key + except lmdb.Error as e: + logger.error(f"LMDB error while storing messages with key {storage_key[:12]}: {e}") + raise except Exception as e: - logger.error(f"Failed to store messages with key {storage_key[:12]}: {e}") + logger.error( + f"Unexpected error while storing messages with key {storage_key[:12]}: {e}" + ) raise def get(self, key: str) -> Optional[ConversationInStore]: @@ -280,29 +330,37 @@ def get(self, key: str) -> Optional[ConversationInStore]: if not data: return None - storage_data = orjson.loads(data) # type: ignore + storage_data = orjson.loads(data) conv = ConversationInStore.model_validate(storage_data) logger.debug(f"Retrieved {len(conv.messages)} messages with key: {key[:12]}") return conv - + except (lmdb.Error, orjson.JSONDecodeError) as e: + logger.error(f"Failed to retrieve/parse messages with key {key[:12]}: {e}") + return None except Exception as e: - logger.error(f"Failed to retrieve messages with key {key[:12]}: {e}") + logger.error(f"Unexpected error retrieving messages with key {key[:12]}: {e}") return None def find(self, model: str, messages: List[Message]) -> Optional[ConversationInStore]: """ Search conversation data by message list. + Tries raw matching, then sanitized matching, and finally fuzzy matching. + + Args: + model: Model name + messages: List of messages to match + + Returns: + ConversationInStore or None if not found """ if not messages: return None - # --- Find with raw messages --- if conv := self._find_by_message_list(model, messages): logger.debug(f"Session found for '{model}' with {len(messages)} raw messages.") return conv - # --- Find with cleaned messages --- cleaned_messages = self.sanitize_assistant_messages(messages) if cleaned_messages != messages: if conv := self._find_by_message_list(model, cleaned_messages): @@ -311,7 +369,6 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt ) return conv - # --- Find with fuzzy matching --- if conv := self._find_by_message_list(model, messages, fuzzy=True): logger.debug(f"Session found for '{model}' with fuzzy matching.") return conv @@ -325,18 +382,49 @@ def _find_by_message_list( messages: List[Message], fuzzy: bool = False, ) -> Optional[ConversationInStore]: - """Internal find implementation based on a message list.""" + """ + Internal find implementation based on a message list. + + Args: + model: Model name + messages: Message list to hash + fuzzy: Whether to use fuzzy hashing + + Returns: + ConversationInStore or None if not found + """ prefix = self.FUZZY_LOOKUP_PREFIX if fuzzy else self.HASH_LOOKUP_PREFIX + target_len = len(messages) + for c in g_config.gemini.clients: message_hash = _hash_conversation(c.id, model, messages, fuzzy=fuzzy) key = f"{prefix}{message_hash}" try: with self._get_transaction(write=False) as txn: - if mapped := txn.get(key.encode("utf-8")): # type: ignore - return self.get(mapped.decode("utf-8")) # type: ignore - except Exception as e: + if mapped := txn.get(key.encode("utf-8")): + candidate_keys = self._decode_index_value(mapped) + # Try candidates from newest to oldest + for ck in reversed(candidate_keys): + if conv := self.get(ck): + if len(conv.messages) != target_len: + continue + + if fuzzy: + # For fuzzy matching, verify each message hash individually + # to prevent semantic collisions (e.g., "1.2" vs "12") + match_found = True + for i in range(target_len): + if _hash_message( + conv.messages[i], fuzzy=True + ) != _hash_message(messages[i], fuzzy=True): + match_found = False + break + if not match_found: + continue + return conv + except lmdb.Error as e: logger.error( - f"Failed to retrieve messages by message list for hash {message_hash} and client {c.id}: {e}" + f"LMDB error while searching for hash {message_hash} and client {c.id}: {e}" ) continue @@ -345,74 +433,42 @@ def _find_by_message_list( return None def exists(self, key: str) -> bool: - """ - Check if a key exists in the store. - - Args: - key: Storage key to check - - Returns: - bool: True if key exists, False otherwise - """ + """Check if a key exists in the store.""" try: with self._get_transaction(write=False) as txn: return txn.get(key.encode("utf-8")) is not None - except Exception as e: + except lmdb.Error as e: logger.error(f"Failed to check existence of key {key}: {e}") return False def delete(self, key: str) -> Optional[ConversationInStore]: - """ - Delete conversation model by key. - - Args: - key: Storage key to delete - - Returns: - ConversationInStore: The deleted conversation data, or None if not found - """ + """Delete conversation model by key.""" try: with self._get_transaction(write=True) as txn: - # Get data first to clean up hash mapping data = txn.get(key.encode("utf-8")) if not data: return None - storage_data = orjson.loads(data) # type: ignore + storage_data = orjson.loads(data) conv = ConversationInStore.model_validate(storage_data) message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages) fuzzy_hash = _hash_conversation( conv.client_id, conv.model, conv.messages, fuzzy=True ) - # Delete main data txn.delete(key.encode("utf-8")) - # Clean up hash mapping if it exists - if message_hash and key != message_hash: - txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8")) - - # Always clean up fuzzy mapping - txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8")) + self._remove_from_index(txn, self.HASH_LOOKUP_PREFIX, message_hash, key) + self._remove_from_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, key) logger.debug(f"Deleted messages with key: {key[:12]}") return conv - - except Exception as e: + except (lmdb.Error, orjson.JSONDecodeError) as e: logger.error(f"Failed to delete messages with key {key[:12]}: {e}") return None def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: - """ - List all keys in the store, optionally filtered by prefix. - - Args: - prefix: Optional prefix to filter keys - limit: Optional limit on number of keys returned - - Returns: - List of keys - """ + """List all keys in the store, optionally filtered by prefix.""" keys = [] try: with self._get_transaction(write=False) as txn: @@ -422,7 +478,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: count = 0 for key, _ in cursor: key_str = key.decode("utf-8") - # Skip internal hash mappings + # Skip internal index mappings if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith( self.FUZZY_LOOKUP_PREFIX ): @@ -431,25 +487,14 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: if not prefix or key_str.startswith(prefix): keys.append(key_str) count += 1 - if limit and count >= limit: break - - except Exception as e: + except lmdb.Error as e: logger.error(f"Failed to list keys: {e}") - return keys def cleanup_expired(self, retention_days: Optional[int] = None) -> int: - """ - Delete conversations older than the given retention period. - - Args: - retention_days: Optional override for retention period in days. - - Returns: - Number of conversations removed. - """ + """Delete conversations older than the given retention period.""" retention_value = ( self.retention_days if retention_days is None else max(0, int(retention_days)) ) @@ -463,7 +508,6 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: try: with self._get_transaction(write=False) as txn: cursor = txn.cursor() - for key_bytes, value_bytes in cursor: key_str = key_bytes.decode("utf-8") if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith( @@ -472,9 +516,9 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: continue try: - storage_data = orjson.loads(value_bytes) # type: ignore[arg-type] + storage_data = orjson.loads(value_bytes) conv = ConversationInStore.model_validate(storage_data) - except Exception as exc: + except (orjson.JSONDecodeError, Exception) as exc: logger.warning(f"Failed to decode record for key {key_str}: {exc}") continue @@ -484,7 +528,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: if timestamp < cutoff: expired_entries.append((key_str, conv)) - except Exception as exc: + except lmdb.Error as exc: logger.error(f"Failed to scan LMDB for retention cleanup: {exc}") raise @@ -501,15 +545,13 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages) if message_hash: - if key_str != message_hash: - txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8")) - + self._remove_from_index(txn, self.HASH_LOOKUP_PREFIX, message_hash, key_str) fuzzy_hash = _hash_conversation( conv.client_id, conv.model, conv.messages, fuzzy=True ) - txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8")) + self._remove_from_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, key_str) removed += 1 - except Exception as exc: + except lmdb.Error as exc: logger.error(f"Failed to delete expired conversations: {exc}") raise @@ -521,19 +563,13 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: return removed def stats(self) -> Dict[str, Any]: - """ - Get database statistics. - - Returns: - Dict with database statistics - """ + """Get database statistics.""" if not self._env: logger.error("LMDB environment not initialized") return {} - try: return self._env.stat() - except Exception as e: + except lmdb.Error as e: logger.error(f"Failed to get database stats: {e}") return {} @@ -550,21 +586,15 @@ def __del__(self): @staticmethod def remove_think_tags(text: str) -> str: - """ - Remove all ... tags and strip whitespace. - """ + """Remove all ... tags and strip whitespace.""" if not text: return text - # Remove all think blocks anywhere in the text cleaned_content = re.sub(r".*?", "", text, flags=re.DOTALL) return cleaned_content.strip() @staticmethod def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: - """ - Produce a canonical history where assistant messages are cleaned of - internal markers and tool call blocks are moved to metadata. - """ + """Clean assistant messages of internal markers and move tool calls to metadata.""" cleaned_messages = [] for msg in messages: if msg.role == "assistant": @@ -596,7 +626,6 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: for item in msg.content: if isinstance(item, ContentItem) and item.type == "text" and item.text: text = LMDBConversationStore.remove_think_tags(item.text) - if not msg.tool_calls: text, extracted = extract_tool_calls(text) if extracted: @@ -625,5 +654,4 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: cleaned_messages.append(msg) else: cleaned_messages.append(msg) - return cleaned_messages diff --git a/app/utils/config.py b/app/utils/config.py index e62832d..3b24931 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -83,11 +83,15 @@ class GeminiConfig(BaseModel): default="append", description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom", ) - timeout: int = Field(default=300, ge=1, description="Init timeout") - watchdog_timeout: int = Field(default=60, ge=1, description="Watchdog timeout") + timeout: int = Field(default=300, ge=30, description="Init timeout") + watchdog_timeout: int = Field( + default=60, ge=10, le=75, description="Watchdog timeout in seconds (Not more than 75s)" + ) auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies") refresh_interval: int = Field( - default=540, ge=1, description="Interval in seconds to refresh Gemini cookies" + default=540, + ge=60, + description="Interval in seconds to refresh Gemini cookies (Not less than 60s)", ) verbose: bool = Field(False, description="Enable verbose logging for Gemini API requests") max_chars_per_request: int = Field( diff --git a/config/config.yaml b/config/config.yaml index 2873d48..3d5e6f4 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -22,10 +22,10 @@ gemini: secure_1psid: "YOUR_SECURE_1PSID_HERE" secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE" proxy: null # Optional proxy URL (null/empty means direct connection) - timeout: 300 # Init timeout in seconds - watchdog_timeout: 60 # Watchdog timeout in seconds (No longer than 75 seconds) + timeout: 300 # Init timeout in seconds (Not less than 30s) + watchdog_timeout: 60 # Watchdog timeout in seconds (Not more than 75s) auto_refresh: true # Auto-refresh session cookies - refresh_interval: 540 # Refresh interval in seconds + refresh_interval: 540 # Refresh interval in seconds (Not less than 60s) verbose: false # Enable verbose logging for Gemini requests max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only) From 5eb9f509d451d76739da1f34b5003b1d7628279b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 11 Feb 2026 12:24:06 +0700 Subject: [PATCH 095/139] Refactor: Optimize fuzzy matching logic --- app/services/lmdb.py | 31 ++++++++++++++++--------------- app/utils/config.py | 2 +- scripts/dump_lmdb.py | 2 +- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index c90f537..4b57f60 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -370,7 +370,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt return conv if conv := self._find_by_message_list(model, messages, fuzzy=True): - logger.debug(f"Session found for '{model}' with fuzzy matching.") + logger.debug( + f"Session found for '{model}' with {len(messages)} fuzzy matching messages." + ) return conv logger.debug(f"No session found for '{model}' with {len(messages)} messages.") @@ -396,6 +398,8 @@ def _find_by_message_list( prefix = self.FUZZY_LOOKUP_PREFIX if fuzzy else self.HASH_LOOKUP_PREFIX target_len = len(messages) + target_hashes = [_hash_message(m, fuzzy=fuzzy) for m in messages] + for c in g_config.gemini.clients: message_hash = _hash_conversation(c.id, model, messages, fuzzy=fuzzy) key = f"{prefix}{message_hash}" @@ -403,25 +407,22 @@ def _find_by_message_list( with self._get_transaction(write=False) as txn: if mapped := txn.get(key.encode("utf-8")): candidate_keys = self._decode_index_value(mapped) - # Try candidates from newest to oldest for ck in reversed(candidate_keys): if conv := self.get(ck): if len(conv.messages) != target_len: continue - if fuzzy: - # For fuzzy matching, verify each message hash individually - # to prevent semantic collisions (e.g., "1.2" vs "12") - match_found = True - for i in range(target_len): - if _hash_message( - conv.messages[i], fuzzy=True - ) != _hash_message(messages[i], fuzzy=True): - match_found = False - break - if not match_found: - continue - return conv + match_found = True + for i in range(target_len): + if ( + _hash_message(conv.messages[i], fuzzy=fuzzy) + != target_hashes[i] + ): + match_found = False + break + + if match_found: + return conv except lmdb.Error as e: logger.error( f"LMDB error while searching for hash {message_hash} and client {c.id}: {e}" diff --git a/app/utils/config.py b/app/utils/config.py index 3b24931..4c1709f 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -83,7 +83,7 @@ class GeminiConfig(BaseModel): default="append", description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom", ) - timeout: int = Field(default=300, ge=30, description="Init timeout") + timeout: int = Field(default=300, ge=30, description="Init timeout in seconds") watchdog_timeout: int = Field( default=60, ge=10, le=75, description="Watchdog timeout in seconds (Not more than 75s)" ) diff --git a/scripts/dump_lmdb.py b/scripts/dump_lmdb.py index b06b1b4..a331325 100644 --- a/scripts/dump_lmdb.py +++ b/scripts/dump_lmdb.py @@ -42,7 +42,7 @@ def dump_lmdb(path: Path, keys: Iterable[str] | None = None) -> None: records = _dump_all(txn) env.close() - print(orjson.dumps(records, option=orjson.OPT_INDENT_2).decode()) + print(orjson.dumps(records, option=orjson.OPT_INDENT_2).decode("utf-8")) def main() -> None: From 971f2c70f81ac82640cb6a9f3c800be0d7c1143a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 11 Feb 2026 12:28:11 +0700 Subject: [PATCH 096/139] Update dependencies --- pyproject.toml | 2 +- uv.lock | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 58391ff..d3a1aaf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "FastAPI Server built on Gemini Web API" readme = "README.md" requires-python = "==3.12.*" dependencies = [ - "fastapi>=0.128.6", + "fastapi>=0.128.7", "gemini-webapi>=1.19.1", "lmdb>=1.7.5", "loguru>=0.7.3", diff --git a/uv.lock b/uv.lock index 34b5cc8..c038f53 100644 --- a/uv.lock +++ b/uv.lock @@ -65,7 +65,7 @@ wheels = [ [[package]] name = "fastapi" -version = "0.128.6" +version = "0.128.7" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, @@ -74,9 +74,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/83/d1/195005b5e45b443e305136df47ee7df4493d782e0c039dd0d97065580324/fastapi-0.128.6.tar.gz", hash = "sha256:0cb3946557e792d731b26a42b04912f16367e3c3135ea8290f620e234f2b604f", size = 374757, upload-time = "2026-02-09T17:27:03.541Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a0/fc/af386750b3fd8d8828167e4c82b787a8eeca2eca5c5429c9db8bb7c70e04/fastapi-0.128.7.tar.gz", hash = "sha256:783c273416995486c155ad2c0e2b45905dedfaf20b9ef8d9f6a9124670639a24", size = 375325, upload-time = "2026-02-10T12:26:40.968Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/24/58/a2c4f6b240eeb148fb88cdac48f50a194aba760c1ca4988c6031c66a20ee/fastapi-0.128.6-py3-none-any.whl", hash = "sha256:bb1c1ef87d6086a7132d0ab60869d6f1ee67283b20fbf84ec0003bd335099509", size = 103674, upload-time = "2026-02-09T17:27:02.355Z" }, + { url = "https://files.pythonhosted.org/packages/af/1a/f983b45661c79c31be575c570d46c437a5409b67a939c1b3d8d6b3ed7a7f/fastapi-0.128.7-py3-none-any.whl", hash = "sha256:6bd9bd31cb7047465f2d3fa3ba3f33b0870b17d4eaf7cdb36d1576ab060ad662", size = 103630, upload-time = "2026-02-10T12:26:39.414Z" }, ] [[package]] @@ -106,8 +106,8 @@ dev = [ [package.metadata] requires-dist = [ - { name = "fastapi", specifier = ">=0.128.6" }, - { name = "gemini-webapi", specifier = ">=1.19.0" }, + { name = "fastapi", specifier = ">=0.128.7" }, + { name = "gemini-webapi", specifier = ">=1.19.1" }, { name = "lmdb", specifier = ">=1.7.5" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "orjson", specifier = ">=3.11.7" }, From cad23795e41a97fd7cb7e5dd371c03d1bdbec607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 11 Feb 2026 20:46:42 +0700 Subject: [PATCH 097/139] Refactor: Update Markdown unescape helpers to prevent impacting clients like Roo Code --- app/services/client.py | 6 ++++++ app/utils/helper.py | 44 ++++++++++++++++-------------------------- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 3cdd839..5d248c2 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -14,6 +14,10 @@ save_url_to_tempfile, ) +COMMONMARK_UNESCAPE_RE = re.compile( + r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])" +) # See: https://spec.commonmark.org/current/#backslash-escapes + FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, @@ -194,6 +198,8 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str: else: text += str(response) + text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text) + def extract_file_path_from_display_text(text_content: str) -> str | None: match = re.match(FILE_PATH_PATTERN, text_content) if match: diff --git a/app/utils/helper.py b/app/utils/helper.py index 384f5cd..67bfa55 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -191,7 +191,6 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ if not text: return text, [] - # Clean hints FIRST so they don't interfere with tool call regexes (e.g. example calls in hint) cleaned = strip_system_hints(text) tool_calls: list[ToolCall] = [] @@ -237,33 +236,24 @@ def _create_tool_call(name: str, raw_args: str) -> None: ) ) - def _replace_block(match: re.Match[str]) -> str: - block_content = match.group(1) - if not block_content: - return match.group(0) - - is_tool_block = bool(TOOL_CALL_RE.search(block_content)) - - if is_tool_block: - if extract: - for call_match in TOOL_CALL_RE.finditer(block_content): - name = (call_match.group(1) or "").strip() - raw_args = (call_match.group(2) or "").strip() - _create_tool_call(name, raw_args) - return "" - else: - return match.group(0) - - def _replace_orphan(match: re.Match[str]) -> str: - if extract: - name = (match.group(1) or "").strip() - raw_args = (match.group(2) or "").strip() - _create_tool_call(name, raw_args) - return "" - - cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned) - cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned) + all_calls = [] + for match in TOOL_CALL_RE.finditer(cleaned): + all_calls.append( + { + "start": match.start(), + "name": (match.group(1) or "").strip(), + "args": (match.group(2) or "").strip(), + } + ) + + all_calls.sort(key=lambda x: x["start"]) + + if extract: + for call in all_calls: + _create_tool_call(call["name"], call["args"]) + cleaned = TOOL_BLOCK_RE.sub("", cleaned) + cleaned = TOOL_CALL_RE.sub("", cleaned) cleaned = RESPONSE_BLOCK_RE.sub("", cleaned) cleaned = RESPONSE_ITEM_RE.sub("", cleaned) From 795b8d88a3cfb29e46c4e369c277e810308cc8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 11 Feb 2026 21:20:11 +0700 Subject: [PATCH 098/139] Refactor: Update Markdown unescape helpers to prevent impacting clients like Roo Code --- app/services/client.py | 6 ------ app/utils/helper.py | 37 +++++++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 5d248c2..3cdd839 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -14,10 +14,6 @@ save_url_to_tempfile, ) -COMMONMARK_UNESCAPE_RE = re.compile( - r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])" -) # See: https://spec.commonmark.org/current/#backslash-escapes - FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, @@ -198,8 +194,6 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str: else: text += str(response) - text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text) - def extract_file_path_from_display_text(text_content: str) -> str | None: match = re.match(FILE_PATH_PATTERN, text_content) if match: diff --git a/app/utils/helper.py b/app/utils/helper.py index 67bfa55..ce781bd 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -34,6 +34,9 @@ RESPONSE_ITEM_RE = re.compile( r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE ) +COMMONMARK_UNESCAPE_RE = re.compile( + r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])" +) # See: https://spec.commonmark.org/current/#backslash-escapes CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] @@ -192,9 +195,12 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ return text, [] cleaned = strip_system_hints(text) - tool_calls: list[ToolCall] = [] + def _unescape_markdown(s: str) -> str: + """Restores characters escaped for Markdown rendering.""" + return COMMONMARK_UNESCAPE_RE.sub(r"\1", s) + def _create_tool_call(name: str, raw_args: str) -> None: if not extract: return @@ -202,20 +208,33 @@ def _create_tool_call(name: str, raw_args: str) -> None: logger.warning("Encountered tool_call without a function name.") return + prev_name = "" + while name != prev_name: + prev_name = name + name = _unescape_markdown(name) + + def _try_parse_json(s: str) -> dict | None: + try: + return orjson.loads(s) + except orjson.JSONDecodeError: + try: + return orjson.loads(_unescape_markdown(s)) + except orjson.JSONDecodeError: + return None + arguments = raw_args - try: - parsed_args = orjson.loads(raw_args) - arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8") - except orjson.JSONDecodeError: + parsed_args = _try_parse_json(raw_args) + + if parsed_args is None: json_match = re.search(r"({.*})", raw_args, re.DOTALL) if json_match: potential_json = json_match.group(1) - try: - parsed_args = orjson.loads(potential_json) + parsed_args = _try_parse_json(potential_json) + if parsed_args is not None: arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode( "utf-8" ) - except orjson.JSONDecodeError: + else: logger.warning( f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}" ) @@ -223,6 +242,8 @@ def _create_tool_call(name: str, raw_args: str) -> None: logger.warning( f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}" ) + else: + arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8") index = len(tool_calls) seed = f"{name}:{arguments}:{index}".encode("utf-8") From e85252a3c6000af5f5094560ad95aa8a8e78c184 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 12 Feb 2026 07:59:28 +0700 Subject: [PATCH 099/139] Revert "Refactor: Update Markdown unescape helpers to prevent impacting clients like Roo Code" This reverts commit 795b8d88a3cfb29e46c4e369c277e810308cc8e8. --- app/services/client.py | 6 ++++++ app/utils/helper.py | 37 ++++++++----------------------------- 2 files changed, 14 insertions(+), 29 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 3cdd839..5d248c2 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -14,6 +14,10 @@ save_url_to_tempfile, ) +COMMONMARK_UNESCAPE_RE = re.compile( + r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])" +) # See: https://spec.commonmark.org/current/#backslash-escapes + FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, @@ -194,6 +198,8 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str: else: text += str(response) + text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text) + def extract_file_path_from_display_text(text_content: str) -> str | None: match = re.match(FILE_PATH_PATTERN, text_content) if match: diff --git a/app/utils/helper.py b/app/utils/helper.py index ce781bd..67bfa55 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -34,9 +34,6 @@ RESPONSE_ITEM_RE = re.compile( r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE ) -COMMONMARK_UNESCAPE_RE = re.compile( - r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])" -) # See: https://spec.commonmark.org/current/#backslash-escapes CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] @@ -195,11 +192,8 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ return text, [] cleaned = strip_system_hints(text) - tool_calls: list[ToolCall] = [] - def _unescape_markdown(s: str) -> str: - """Restores characters escaped for Markdown rendering.""" - return COMMONMARK_UNESCAPE_RE.sub(r"\1", s) + tool_calls: list[ToolCall] = [] def _create_tool_call(name: str, raw_args: str) -> None: if not extract: @@ -208,33 +202,20 @@ def _create_tool_call(name: str, raw_args: str) -> None: logger.warning("Encountered tool_call without a function name.") return - prev_name = "" - while name != prev_name: - prev_name = name - name = _unescape_markdown(name) - - def _try_parse_json(s: str) -> dict | None: - try: - return orjson.loads(s) - except orjson.JSONDecodeError: - try: - return orjson.loads(_unescape_markdown(s)) - except orjson.JSONDecodeError: - return None - arguments = raw_args - parsed_args = _try_parse_json(raw_args) - - if parsed_args is None: + try: + parsed_args = orjson.loads(raw_args) + arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8") + except orjson.JSONDecodeError: json_match = re.search(r"({.*})", raw_args, re.DOTALL) if json_match: potential_json = json_match.group(1) - parsed_args = _try_parse_json(potential_json) - if parsed_args is not None: + try: + parsed_args = orjson.loads(potential_json) arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode( "utf-8" ) - else: + except orjson.JSONDecodeError: logger.warning( f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}" ) @@ -242,8 +223,6 @@ def _try_parse_json(s: str) -> dict | None: logger.warning( f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}" ) - else: - arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8") index = len(tool_calls) seed = f"{name}:{arguments}:{index}".encode("utf-8") From 4be41506c95673bf7a747f8ff2629a45c99b8309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 12 Feb 2026 12:17:12 +0700 Subject: [PATCH 100/139] Refactor: Rewrite the function call format to match the client's complex argument structure, such as in Roo Code. --- app/server/chat.py | 259 +++++++++++++++++++++++++---------------- app/services/client.py | 39 ++++--- app/services/lmdb.py | 7 +- app/utils/helper.py | 89 +++++++++----- 4 files changed, 245 insertions(+), 149 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 30a6b3a..080d015 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -382,11 +382,16 @@ def _build_tool_prompt( ) lines.append("[function_calls]") lines.append("[call:tool_name]") - lines.append('{"argument": "value"}') + lines.append("@args") + lines.append("") + lines.append("<<>>") + lines.append("value") + lines.append("<<>>") + lines.append("") lines.append("[/call]") lines.append("[/function_calls]") lines.append( - "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block." + "CRITICAL: Arguments MUST use <<>>...<<>> tags. Content inside tags can be any format." ) lines.append( "If multiple tools are needed, list them sequentially within the same [function_calls] block." @@ -394,7 +399,9 @@ def _build_tool_prompt( lines.append( "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag." ) - lines.append("Note: Tool results are returned in a [function_responses] block.") + lines.append( + "Note: Tool results are returned in a [function_responses] block using @results and <<>> tags." + ) return "\n".join(lines) @@ -774,26 +781,44 @@ def __init__(self): self.current_role = "" self.block_buffer = "" - self.TOOL_START = "[function_calls]" - self.TOOL_END = "[/function_calls]" - self.ORPHAN_START = "[call:" - self.ORPHAN_END = "[/call]" - self.RESPONSE_START = "[function_responses]" - self.RESPONSE_END = "[/function_responses]" - self.TAG_START = "<|im_start|>" - self.TAG_END = "<|im_end|>" - self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else "" - self.HINT_END = TOOL_HINT_LINE_END - - self.WATCH_MARKERS = [ - self.TOOL_START, - self.ORPHAN_START, - self.RESPONSE_START, - self.TAG_START, - self.TAG_END, - ] - if self.HINT_START: - self.WATCH_MARKERS.append(self.HINT_START) + self.STATE_MARKERS = { + "TOOL": { + "starts": ["[function_calls]", "\\[function_calls\\]"], + "ends": ["[/function_calls]", "\\[/function_calls\\]"], + }, + "ORPHAN": { + "starts": ["[call:", "\\[call:"], + "ends": ["[/call]", "\\[/call\\]"], + }, + "RESP": { + "starts": ["[function_responses]", "\\[function_responses\\]"], + "ends": ["[/function_responses]", "\\[/function_responses\\]"], + }, + "ARG": { + "starts": ["<<>>", "\\<\\<\\\\>\\>"], + "ends": ["<<>>", "\\<\\<\\\\>\\>"], + }, + "TAG": { + "starts": ["<|im_start|>", "\\<|im_start|\\>"], + "ends": ["<|im_end|>", "\\<|im_end|\\>"], + }, + } + + hint_start = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else "" + if hint_start: + self.STATE_MARKERS["HINT"] = { + "starts": [hint_start], + "ends": [TOOL_HINT_LINE_END], + } + + self.WATCH_MARKERS = [] + for cfg in self.STATE_MARKERS.values(): + self.WATCH_MARKERS.extend(cfg["starts"]) + self.WATCH_MARKERS.extend(cfg.get("ends", [])) def process(self, chunk: str) -> str: self.buffer += chunk @@ -802,25 +827,12 @@ def process(self, chunk: str) -> str: while self.buffer: buf_low = self.buffer.lower() if self.state == "NORMAL": - tool_idx = buf_low.find(self.TOOL_START) - orphan_idx = buf_low.find(self.ORPHAN_START) - resp_idx = buf_low.find(self.RESPONSE_START) - tag_idx = buf_low.find(self.TAG_START) - end_idx = buf_low.find(self.TAG_END) - hint_idx = buf_low.find(self.HINT_START) if self.HINT_START else -1 - - indices = [ - (i, t) - for i, t in [ - (tool_idx, "TOOL"), - (orphan_idx, "ORPHAN"), - (resp_idx, "RESP"), - (tag_idx, "TAG"), - (end_idx, "END"), - (hint_idx, "HINT"), - ] - if i != -1 - ] + indices = [] + for m_type, cfg in self.STATE_MARKERS.items(): + for p in cfg["starts"]: + idx = buf_low.find(p.lower()) + if idx != -1: + indices.append((idx, m_type, len(p))) if not indices: # Guard against split markers (case-insensitive) @@ -838,76 +850,111 @@ def process(self, chunk: str) -> str: break indices.sort() - idx, m_type = indices[0] + idx, m_type, m_len = indices[0] output.append(self.buffer[:idx]) self.buffer = self.buffer[idx:] - if m_type == "TOOL": - self.state = "IN_TOOL" - self.block_buffer = "" - self.buffer = self.buffer[len(self.TOOL_START) :] - elif m_type == "ORPHAN": - self.state = "IN_ORPHAN" + self.state = f"IN_{m_type}" + if m_type in ("TOOL", "ORPHAN"): self.block_buffer = "" - self.buffer = self.buffer[len(self.ORPHAN_START) :] - elif m_type == "RESP": - self.state = "IN_RESP" - self.buffer = self.buffer[len(self.RESPONSE_START) :] - elif m_type == "TAG": - self.state = "IN_TAG" - self.buffer = self.buffer[len(self.TAG_START) :] - elif m_type == "END": - self.buffer = self.buffer[len(self.TAG_END) :] - elif m_type == "HINT": - self.state = "IN_HINT" - self.buffer = self.buffer[len(self.HINT_START) :] + + self.buffer = self.buffer[m_len:] elif self.state == "IN_HINT": - end_idx = buf_low.find(self.HINT_END.lower()) - if end_idx != -1: - self.buffer = self.buffer[end_idx + len(self.HINT_END) :] + cfg = self.STATE_MARKERS["HINT"] + found_idx, found_len = -1, 0 + for p in cfg["ends"]: + idx = buf_low.find(p.lower()) + if idx != -1 and (found_idx == -1 or idx < found_idx): + found_idx, found_len = idx, len(p) + + if found_idx != -1: + self.buffer = self.buffer[found_idx + found_len :] + self.state = "NORMAL" + else: + max_end_len = max(len(p) for p in cfg["ends"]) + if len(self.buffer) > max_end_len: + self.buffer = self.buffer[-max_end_len:] + break + + elif self.state == "IN_ARG": + cfg = self.STATE_MARKERS["ARG"] + found_idx, found_len = -1, 0 + for p in cfg["ends"]: + idx = buf_low.find(p.lower()) + if idx != -1 and (found_idx == -1 or idx < found_idx): + found_idx, found_len = idx, len(p) + + if found_idx != -1: + bracket_idx = self.buffer.find(">", found_idx + found_len) + if bracket_idx != -1: + end_pos = bracket_idx + 1 + while end_pos < len(self.buffer) and self.buffer[end_pos] == ">": + end_pos += 1 + + self.buffer = self.buffer[end_pos:] + self.state = "NORMAL" + else: + break + else: + break + + elif self.state == "IN_RESULT": + cfg = self.STATE_MARKERS["RESULT"] + found_idx, found_len = -1, 0 + for p in cfg["ends"]: + idx = buf_low.find(p.lower()) + if idx != -1 and (found_idx == -1 or idx < found_idx): + found_idx, found_len = idx, len(p) + + if found_idx != -1: + self.buffer = self.buffer[found_idx + found_len :] self.state = "NORMAL" else: - keep_len = len(self.HINT_END) - 1 - if len(self.buffer) > keep_len: - self.buffer = self.buffer[-keep_len:] break elif self.state == "IN_RESP": - end_idx = buf_low.find(self.RESPONSE_END.lower()) - if end_idx != -1: - self.buffer = self.buffer[end_idx + len(self.RESPONSE_END) :] + cfg = self.STATE_MARKERS["RESP"] + found_idx, found_len = -1, 0 + for p in cfg["ends"]: + idx = buf_low.find(p.lower()) + if idx != -1 and (found_idx == -1 or idx < found_idx): + found_idx, found_len = idx, len(p) + + if found_idx != -1: + self.buffer = self.buffer[found_idx + found_len :] self.state = "NORMAL" else: - keep_len = len(self.RESPONSE_END) - 1 - if len(self.buffer) > keep_len: - self.buffer = self.buffer[-keep_len:] break elif self.state == "IN_TOOL": - end_idx = buf_low.find(self.TOOL_END.lower()) - if end_idx != -1: - self.block_buffer += self.buffer[:end_idx] - self.buffer = self.buffer[end_idx + len(self.TOOL_END) :] + cfg = self.STATE_MARKERS["TOOL"] + found_idx, found_len = -1, 0 + for p in cfg["ends"]: + idx = buf_low.find(p.lower()) + if idx != -1 and (found_idx == -1 or idx < found_idx): + found_idx, found_len = idx, len(p) + + if found_idx != -1: + self.block_buffer += self.buffer[:found_idx] + self.buffer = self.buffer[found_idx + found_len :] self.state = "NORMAL" else: - keep_len = len(self.TOOL_END) - 1 - if len(self.buffer) > keep_len: - self.block_buffer += self.buffer[:-keep_len] - self.buffer = self.buffer[-keep_len:] break elif self.state == "IN_ORPHAN": - end_idx = buf_low.find(self.ORPHAN_END.lower()) - if end_idx != -1: - self.block_buffer += self.buffer[:end_idx] - self.buffer = self.buffer[end_idx + len(self.ORPHAN_END) :] + cfg = self.STATE_MARKERS["ORPHAN"] + found_idx, found_len = -1, 0 + for p in cfg["ends"]: + idx = buf_low.find(p.lower()) + if idx != -1 and (found_idx == -1 or idx < found_idx): + found_idx, found_len = idx, len(p) + + if found_idx != -1: + self.block_buffer += self.buffer[:found_idx] + self.buffer = self.buffer[found_idx + found_len :] self.state = "NORMAL" else: - keep_len = len(self.ORPHAN_END) - 1 - if len(self.buffer) > keep_len: - self.block_buffer += self.buffer[:-keep_len] - self.buffer = self.buffer[-keep_len:] break elif self.state == "IN_TAG": @@ -920,24 +967,30 @@ def process(self, chunk: str) -> str: break elif self.state == "IN_BLOCK": - end_idx = buf_low.find(self.TAG_END.lower()) - if end_idx != -1: - content = self.buffer[:end_idx] + cfg = self.STATE_MARKERS["TAG"] + found_idx, found_len = -1, 0 + for p in cfg["ends"]: + idx = buf_low.find(p.lower()) + if idx != -1 and (found_idx == -1 or idx < found_idx): + found_idx, found_len = idx, len(p) + + if found_idx != -1: + content = self.buffer[:found_idx] if self.current_role != "tool": output.append(content) - self.buffer = self.buffer[end_idx + len(self.TAG_END) :] + self.buffer = self.buffer[found_idx + found_len :] self.state = "NORMAL" self.current_role = "" else: - keep_len = len(self.TAG_END) - 1 + max_end_len = max(len(p) for p in cfg["ends"]) if self.current_role != "tool": - if len(self.buffer) > keep_len: - output.append(self.buffer[:-keep_len]) - self.buffer = self.buffer[-keep_len:] + if len(self.buffer) > max_end_len: + output.append(self.buffer[:-max_end_len]) + self.buffer = self.buffer[-max_end_len:] break else: - if len(self.buffer) > keep_len: - self.buffer = self.buffer[-keep_len:] + if len(self.buffer) > max_end_len: + self.buffer = self.buffer[-max_end_len:] break return "".join(output) @@ -945,11 +998,13 @@ def process(self, chunk: str) -> str: def flush(self) -> str: res = "" if self.state == "IN_TOOL": - if self.ORPHAN_START.lower() not in self.block_buffer.lower(): - res = f"{self.TOOL_START}{self.block_buffer}" + orphan_starts = self.STATE_MARKERS["ORPHAN"]["starts"] + is_orphan = any(p.lower() in self.block_buffer.lower() for p in orphan_starts) + if not is_orphan: + res = f"{self.STATE_MARKERS['TOOL']['starts'][0]}{self.block_buffer}" elif self.state == "IN_BLOCK" and self.current_role != "tool": res = self.buffer - elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT"): + elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"): res = "" elif self.state == "NORMAL": res = self.buffer diff --git a/app/services/client.py b/app/services/client.py index 5d248c2..c955456 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -10,14 +10,11 @@ from ..utils import g_config from ..utils.helper import ( add_tag, + normalize_llm_text, save_file_to_tempfile, save_url_to_tempfile, ) -COMMONMARK_UNESCAPE_RE = re.compile( - r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])" -) # See: https://spec.commonmark.org/current/#backslash-escapes - FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, @@ -89,12 +86,12 @@ async def process_message( if isinstance(message.content, str): if message.content or message.role == "tool": - text_fragments.append(message.content or "{}") + text_fragments.append(message.content or "") elif isinstance(message.content, list): for item in message.content: if item.type == "text": if item.text or message.role == "tool": - text_fragments.append(item.text or "{}") + text_fragments.append(item.text or "") elif item.type == "image_url": if not item.image_url: raise ValueError("Image URL cannot be empty") @@ -113,14 +110,19 @@ async def process_message( else: raise ValueError("File must contain 'file_data' or 'url' key") elif message.content is None and message.role == "tool": - text_fragments.append("{}") + text_fragments.append("") elif message.content is not None: raise ValueError("Unsupported message content type.") if message.role == "tool": tool_name = message.name or "unknown" - combined_content = "\n".join(text_fragments).strip() or "{}" - res_block = f"[response:{tool_name}]\n{combined_content}\n[/response]" + combined_content = "\n".join(text_fragments).strip() + res_block = ( + f"[response:{tool_name}]\n" + f"@results\n\n" + f"<<>>\n{combined_content}\n<<>>\n\n" + f"[/response]" + ) if wrap_tool: text_fragments = [f"[function_responses]\n{res_block}\n[/function_responses]"] else: @@ -130,17 +132,22 @@ async def process_message( tool_blocks: list[str] = [] for call in message.tool_calls: args_text = call.function.arguments.strip() + formatted_args = "\n@args\n" try: parsed_args = orjson.loads(args_text) - args_text = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode( - "utf-8" - ) + if isinstance(parsed_args, dict): + for k, v in parsed_args.items(): + val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") + formatted_args += f"\n<<>>\n{val_str}\n<<>>\n" + else: + formatted_args += args_text except orjson.JSONDecodeError: - pass - tool_blocks.append(f"[call:{call.function.name}]{args_text}[/call]") + formatted_args += args_text + + tool_blocks.append(f"[call:{call.function.name}]{formatted_args}\n[/call]") if tool_blocks: - tool_section = "[function_calls]\n" + "".join(tool_blocks) + "\n[/function_calls]" + tool_section = "[function_calls]\n" + "\n".join(tool_blocks) + "\n[/function_calls]" text_fragments.append(tool_section) model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None) @@ -198,7 +205,7 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str: else: text += str(response) - text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text) + text = normalize_llm_text(text) def extract_file_path_from_display_text(text_content: str) -> str | None: match = re.match(FILE_PATH_PATTERN, text_content) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 4b57f60..a90c684 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -1,7 +1,6 @@ import hashlib import re import string -import unicodedata from contextlib import contextmanager from datetime import datetime, timedelta from pathlib import Path @@ -15,6 +14,7 @@ from ..utils import g_config from ..utils.helper import ( extract_tool_calls, + normalize_llm_text, remove_tool_call_blocks, ) from ..utils.singleton import Singleton @@ -39,11 +39,8 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None: if text is None: return None - # Unicode normalization to NFC - text = unicodedata.normalize("NFC", text) + text = normalize_llm_text(text) - # Basic cleaning - text = text.replace("\r\n", "\n").replace("\r", "\n") text = LMDBConversationStore.remove_think_tags(text) text = remove_tool_call_blocks(text) diff --git a/app/utils/helper.py b/app/utils/helper.py index 67bfa55..dfb4abd 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -1,10 +1,12 @@ import base64 import hashlib +import html import mimetypes import re import reprlib import struct import tempfile +import unicodedata from pathlib import Path from urllib.parse import urlparse @@ -19,22 +21,39 @@ "\nWhen you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:\n" "[function_calls]\n" "[call:tool_name]\n" - '{"argument": "value"}\n' + "@args\n" + "\n<<>>\n" + "value\n" + "<<>>\n" "[/call]\n" "[/function_calls]\n" - "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block.\n" + "CRITICAL: Arguments MUST use <<>>...<<>> tags. Content inside tags can be any format.\n" ) TOOL_BLOCK_RE = re.compile( - r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE + r"\\?\[function_calls\\?]\s*(.*?)\s*\\?\[/function_calls\\?]", re.DOTALL | re.IGNORECASE +) +TOOL_CALL_RE = re.compile( + r"\\?\[call:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/call\\?]", re.DOTALL | re.IGNORECASE ) -TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE) RESPONSE_BLOCK_RE = re.compile( - r"\[function_responses]\s*(.*?)\s*\[/function_responses]", re.DOTALL | re.IGNORECASE + r"\\?\[function_responses\\?]\s*(.*?)\s*\\?\[/function_responses\\?]", + re.DOTALL | re.IGNORECASE, ) RESPONSE_ITEM_RE = re.compile( - r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE + r"\\?\[response:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/response\\?]", re.DOTALL | re.IGNORECASE +) +TAGGED_ARG_RE = re.compile( + r"(?:\\?<){3}ARG:([^>\\]+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:\1(?:\\?>){3}", + re.DOTALL | re.IGNORECASE, +) +TAGGED_RESULT_RE = re.compile( + r"(?:\\?<){3}RESULT(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:RESULT(?:\\?>){3}", + re.DOTALL | re.IGNORECASE, ) -CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>") +CONTROL_TOKEN_RE = re.compile(r"\\?<\|im_(?:start|end)\|\\?>") +COMMONMARK_UNESCAPE_RE = re.compile( + r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])" +) # See: https://spec.commonmark.org/current/#backslash-escapes TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" @@ -50,6 +69,26 @@ def add_tag(role: str, content: str, unclose: bool = False) -> str: return f"<|im_start|>{role}\n{content}" + ("\n<|im_end|>" if not unclose else "") +def normalize_llm_text(s: str) -> str: + """ + Safely normalize LLM-generated text for both display and hashing. + Includes: HTML unescaping, NFC normalization, and line ending standardization. + """ + if not s: + return "" + + s = html.unescape(s) + s = unicodedata.normalize("NFC", s) + s = s.replace("\r\n", "\n").replace("\r", "\n") + + return s + + +def unescape_llm_text(s: str) -> str: + r"""Unescape characters escaped by Gemini Web's post-processing.""" + return COMMONMARK_UNESCAPE_RE.sub(r"\1", s) + + def estimate_tokens(text: str | None) -> int: """Estimate the number of tokens heuristically based on character count""" if not text: @@ -202,27 +241,23 @@ def _create_tool_call(name: str, raw_args: str) -> None: logger.warning("Encountered tool_call without a function name.") return - arguments = raw_args - try: - parsed_args = orjson.loads(raw_args) - arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8") - except orjson.JSONDecodeError: - json_match = re.search(r"({.*})", raw_args, re.DOTALL) - if json_match: - potential_json = json_match.group(1) - try: - parsed_args = orjson.loads(potential_json) - arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode( - "utf-8" - ) - except orjson.JSONDecodeError: - logger.warning( - f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}" - ) + name = unescape_llm_text(name.strip()) + raw_args = unescape_llm_text(raw_args) + + arg_matches = TAGGED_ARG_RE.findall(raw_args) + if arg_matches: + args_dict = {arg_name.strip(): arg_value.strip() for arg_name, arg_value in arg_matches} + arguments = orjson.dumps(args_dict).decode("utf-8") + logger.debug(f"Successfully parsed {len(args_dict)} tagged arguments for tool: {name}") + else: + cleaned_raw = raw_args.replace("@args", "").strip() + if not cleaned_raw: + logger.debug(f"Tool '{name}' called without arguments.") else: logger.warning( - f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}" + f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}" ) + arguments = "{}" index = len(tool_calls) seed = f"{name}:{arguments}:{index}".encode("utf-8") @@ -241,7 +276,7 @@ def _create_tool_call(name: str, raw_args: str) -> None: all_calls.append( { "start": match.start(), - "name": (match.group(1) or "").strip(), + "name": unescape_llm_text((match.group(1) or "").strip()), "args": (match.group(2) or "").strip(), } ) @@ -256,6 +291,8 @@ def _create_tool_call(name: str, raw_args: str) -> None: cleaned = TOOL_CALL_RE.sub("", cleaned) cleaned = RESPONSE_BLOCK_RE.sub("", cleaned) cleaned = RESPONSE_ITEM_RE.sub("", cleaned) + cleaned = TAGGED_ARG_RE.sub("", cleaned) + cleaned = TAGGED_RESULT_RE.sub("", cleaned) return cleaned, tool_calls From d86798bc360b5ba76f3fb778c3b7e86b736400f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 12 Feb 2026 15:05:48 +0700 Subject: [PATCH 101/139] Refactor: Rewrite the function call format to match the client's complex argument structure, such as in Roo Code. --- app/server/chat.py | 114 ++++++++++++++++++------------- app/services/client.py | 24 ++++--- app/services/lmdb.py | 4 +- app/utils/helper.py | 150 ++++++++++++++++------------------------- 4 files changed, 141 insertions(+), 151 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 080d015..4262d0d 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -342,7 +342,7 @@ def _build_tool_prompt( tools: list[Tool], tool_choice: str | ToolChoiceFunction | None, ) -> str: - """Generate a system prompt chunk describing available tools.""" + """Generate a system prompt describing available tools and the PascalCase protocol.""" if not tools: return "" @@ -378,29 +378,27 @@ def _build_tool_prompt( ) lines.append( - "When you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:" + "When you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:" ) - lines.append("[function_calls]") - lines.append("[call:tool_name]") + lines.append("[ToolCalls]") + lines.append("[Call:tool_name]") lines.append("@args") - lines.append("") - lines.append("<<>>") + lines.append("<<>>") lines.append("value") - lines.append("<<>>") - lines.append("") - lines.append("[/call]") - lines.append("[/function_calls]") + lines.append("<<>>") + lines.append("[/Call]") + lines.append("[/ToolCalls]") lines.append( - "CRITICAL: Arguments MUST use <<>>...<<>> tags. Content inside tags can be any format." + "CRITICAL: Every argument MUST be enclosed in <<>>...<<>>. Output as RAW text. Content inside tags can be any format." ) lines.append( - "If multiple tools are needed, list them sequentially within the same [function_calls] block." + "If multiple tools are needed, list them sequentially within the same [ToolCalls] block." ) lines.append( - "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag." + "If no tool call is needed, provide a normal response and NEVER use the [ToolCalls] tag." ) lines.append( - "Note: Tool results are returned in a [function_responses] block using @results and <<>> tags." + "Note: Tool results are returned in a [ToolResults] block using @results and <<>> tags." ) return "\n".join(lines) @@ -771,8 +769,8 @@ async def _send_with_split( class StreamingOutputFilter: """ - State Machine filter to suppress technical markers, tool calls, and system hints. - Handles fragmentation where markers are split across multiple chunks. + Filter to suppress technical protocol markers, tool calls, and system hints from the stream. + Uses a state machine to handle fragmentation where markers are split across multiple chunks. """ def __init__(self): @@ -783,28 +781,32 @@ def __init__(self): self.STATE_MARKERS = { "TOOL": { - "starts": ["[function_calls]", "\\[function_calls\\]"], - "ends": ["[/function_calls]", "\\[/function_calls\\]"], + "starts": ["[ToolCalls]", "\\[ToolCalls\\]"], + "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"], }, "ORPHAN": { - "starts": ["[call:", "\\[call:"], - "ends": ["[/call]", "\\[/call\\]"], + "starts": ["[Call:", "\\[Call:", "\\[Call\\:"], + "ends": ["[/Call]", "\\[/Call\\]"], }, "RESP": { - "starts": ["[function_responses]", "\\[function_responses\\]"], - "ends": ["[/function_responses]", "\\[/function_responses\\]"], + "starts": ["[ToolResults]", "\\[ToolResults\\]"], + "ends": ["[/ToolResults]", "\\[/ToolResults\\]"], }, "ARG": { - "starts": ["<<>>", "\\<\\<\\\\>\\>"], }, "RESULT": { - "starts": ["<<>>", "\\<\\<\\\\>\\>"], - "ends": ["<<>>", "\\<\\<\\\\>\\>"], + "starts": ["<<>>", "\\<\\<\\\\>\\>"], + "ends": ["<<>>", "\\<\\<\\\\>\\>"], }, "TAG": { - "starts": ["<|im_start|>", "\\<|im_start|\\>"], - "ends": ["<|im_end|>", "\\<|im_end|\\>"], + "starts": ["<|im_start|>", "\\<|im\\_start|\\>"], + "ends": ["<|im_end|>", "\\<|im\\_end|\\>"], }, } @@ -815,10 +817,20 @@ def __init__(self): "ends": [TOOL_HINT_LINE_END], } + self.ORPHAN_ENDS = [ + "<|im_end|>", + "\\<|im\\_end|\\>", + "[/Call]", + "\\[/Call\\]", + "[/ToolCalls]", + "\\[/ToolCalls\\]", + ] + self.WATCH_MARKERS = [] for cfg in self.STATE_MARKERS.values(): self.WATCH_MARKERS.extend(cfg["starts"]) self.WATCH_MARKERS.extend(cfg.get("ends", [])) + self.WATCH_MARKERS.extend(self.ORPHAN_ENDS) def process(self, chunk: str) -> str: self.buffer += chunk @@ -834,8 +846,12 @@ def process(self, chunk: str) -> str: if idx != -1: indices.append((idx, m_type, len(p))) + for p in self.ORPHAN_ENDS: + idx = buf_low.find(p.lower()) + if idx != -1: + indices.append((idx, "SKIP", len(p))) + if not indices: - # Guard against split markers (case-insensitive) keep_len = 0 for marker in self.WATCH_MARKERS: m_low = marker.lower() @@ -854,6 +870,10 @@ def process(self, chunk: str) -> str: output.append(self.buffer[:idx]) self.buffer = self.buffer[idx:] + if m_type == "SKIP": + self.buffer = self.buffer[m_len:] + continue + self.state = f"IN_{m_type}" if m_type in ("TOOL", "ORPHAN"): self.block_buffer = "" @@ -886,17 +906,12 @@ def process(self, chunk: str) -> str: found_idx, found_len = idx, len(p) if found_idx != -1: - bracket_idx = self.buffer.find(">", found_idx + found_len) - if bracket_idx != -1: - end_pos = bracket_idx + 1 - while end_pos < len(self.buffer) and self.buffer[end_pos] == ">": - end_pos += 1 - - self.buffer = self.buffer[end_pos:] - self.state = "NORMAL" - else: - break + self.buffer = self.buffer[found_idx + found_len :] + self.state = "NORMAL" else: + max_end_len = max(len(p) for p in cfg["ends"]) + if len(self.buffer) > max_end_len: + self.buffer = self.buffer[-max_end_len:] break elif self.state == "IN_RESULT": @@ -911,6 +926,9 @@ def process(self, chunk: str) -> str: self.buffer = self.buffer[found_idx + found_len :] self.state = "NORMAL" else: + max_end_len = max(len(p) for p in cfg["ends"]) + if len(self.buffer) > max_end_len: + self.buffer = self.buffer[-max_end_len:] break elif self.state == "IN_RESP": @@ -940,6 +958,10 @@ def process(self, chunk: str) -> str: self.buffer = self.buffer[found_idx + found_len :] self.state = "NORMAL" else: + max_end_len = max(len(p) for p in cfg["ends"]) + if len(self.buffer) > max_end_len: + self.block_buffer += self.buffer[:-max_end_len] + self.buffer = self.buffer[-max_end_len:] break elif self.state == "IN_ORPHAN": @@ -955,6 +977,10 @@ def process(self, chunk: str) -> str: self.buffer = self.buffer[found_idx + found_len :] self.state = "NORMAL" else: + max_end_len = max(len(p) for p in cfg["ends"]) + if len(self.buffer) > max_end_len: + self.block_buffer += self.buffer[:-max_end_len] + self.buffer = self.buffer[-max_end_len:] break elif self.state == "IN_TAG": @@ -996,16 +1022,12 @@ def process(self, chunk: str) -> str: return "".join(output) def flush(self) -> str: + """Release remaining buffer content and perform final cleanup at stream end.""" res = "" - if self.state == "IN_TOOL": - orphan_starts = self.STATE_MARKERS["ORPHAN"]["starts"] - is_orphan = any(p.lower() in self.block_buffer.lower() for p in orphan_starts) - if not is_orphan: - res = f"{self.STATE_MARKERS['TOOL']['starts'][0]}{self.block_buffer}" + if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"): + res = "" elif self.state == "IN_BLOCK" and self.current_role != "tool": res = self.buffer - elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"): - res = "" elif self.state == "NORMAL": res = self.buffer diff --git a/app/services/client.py b/app/services/client.py index c955456..ba203d9 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -78,8 +78,8 @@ async def process_message( message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True ) -> tuple[str, list[Path | str]]: """ - Process a single Message object into a format suitable for the Gemini API. - Extracts text fragments, handles images and files, and appends tool call blocks if present. + Process a Message into Gemini API format using the PascalCase technical protocol. + Extracts text, handles files, and appends ToolCalls/ToolResults blocks. """ files: list[Path | str] = [] text_fragments: list[str] = [] @@ -118,13 +118,13 @@ async def process_message( tool_name = message.name or "unknown" combined_content = "\n".join(text_fragments).strip() res_block = ( - f"[response:{tool_name}]\n" - f"@results\n\n" - f"<<>>\n{combined_content}\n<<>>\n\n" - f"[/response]" + f"[Result:{tool_name}]\n" + f"@results\n" + f"<<>>\n{combined_content}\n<<>>\n" + f"[/Result]" ) if wrap_tool: - text_fragments = [f"[function_responses]\n{res_block}\n[/function_responses]"] + text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"] else: text_fragments = [res_block] @@ -132,22 +132,24 @@ async def process_message( tool_blocks: list[str] = [] for call in message.tool_calls: args_text = call.function.arguments.strip() - formatted_args = "\n@args\n" + formatted_args = "@args\n" try: parsed_args = orjson.loads(args_text) if isinstance(parsed_args, dict): for k, v in parsed_args.items(): val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") - formatted_args += f"\n<<>>\n{val_str}\n<<>>\n" + formatted_args += ( + f"<<>>\n{val_str}\n<<>>\n" + ) else: formatted_args += args_text except orjson.JSONDecodeError: formatted_args += args_text - tool_blocks.append(f"[call:{call.function.name}]{formatted_args}\n[/call]") + tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]") if tool_blocks: - tool_section = "[function_calls]\n" + "\n".join(tool_blocks) + "\n[/function_calls]" + tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]" text_fragments.append(tool_section) model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index a90c684..ad92bbf 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -33,9 +33,7 @@ def _fuzzy_normalize(text: str | None) -> str | None: def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None: - """ - Perform semantic normalization for hashing. - """ + """Perform safe semantic normalization for hashing using helper utilities.""" if text is None: return None diff --git a/app/utils/helper.py b/app/utils/helper.py index dfb4abd..25f9c9b 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -18,42 +18,43 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( - "\nWhen you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:\n" - "[function_calls]\n" - "[call:tool_name]\n" + "\nWhen you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n" + "[ToolCalls]\n" + "[Call:tool_name]\n" "@args\n" - "\n<<>>\n" + "<<>>\n" "value\n" - "<<>>\n" - "[/call]\n" - "[/function_calls]\n" - "CRITICAL: Arguments MUST use <<>>...<<>> tags. Content inside tags can be any format.\n" + "<<>>\n" + "[/Call]\n" + "[/ToolCalls]\n" + "CRITICAL: Every argument MUST be enclosed in <<>>...<<>>. Output as RAW text. Content inside tags can be any format.\n" ) TOOL_BLOCK_RE = re.compile( - r"\\?\[function_calls\\?]\s*(.*?)\s*\\?\[/function_calls\\?]", re.DOTALL | re.IGNORECASE + r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE ) TOOL_CALL_RE = re.compile( - r"\\?\[call:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/call\\?]", re.DOTALL | re.IGNORECASE + r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE ) RESPONSE_BLOCK_RE = re.compile( - r"\\?\[function_responses\\?]\s*(.*?)\s*\\?\[/function_responses\\?]", + r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]", re.DOTALL | re.IGNORECASE, ) RESPONSE_ITEM_RE = re.compile( - r"\\?\[response:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/response\\?]", re.DOTALL | re.IGNORECASE + r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]", + re.DOTALL | re.IGNORECASE, ) TAGGED_ARG_RE = re.compile( - r"(?:\\?<){3}ARG:([^>\\]+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:\1(?:\\?>){3}", + r"(?:\\?<){3}CallParameter\\?:((?:[^>\\]|\\.)+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndCallParameter(?:\\?>){3}", re.DOTALL | re.IGNORECASE, ) TAGGED_RESULT_RE = re.compile( - r"(?:\\?<){3}RESULT(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:RESULT(?:\\?>){3}", + r"(?:\\?<){3}ToolResult(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndToolResult(?:\\?>){3}", re.DOTALL | re.IGNORECASE, ) -CONTROL_TOKEN_RE = re.compile(r"\\?<\|im_(?:start|end)\|\\?>") -COMMONMARK_UNESCAPE_RE = re.compile( - r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])" -) # See: https://spec.commonmark.org/current/#backslash-escapes +CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE) +CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE) +CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE) +COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])") TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" @@ -61,7 +62,7 @@ def add_tag(role: str, content: str, unclose: bool = False) -> str: - """Surround content with role tags""" + """Surround content with ChatML role tags.""" if role not in VALID_TAG_ROLES: logger.warning(f"Unknown role: {role}, returning content without tags") return content @@ -85,12 +86,12 @@ def normalize_llm_text(s: str) -> str: def unescape_llm_text(s: str) -> str: - r"""Unescape characters escaped by Gemini Web's post-processing.""" + """Unescape characters escaped by Gemini Web's post-processing (e.g., \\_ to _).""" return COMMONMARK_UNESCAPE_RE.sub(r"\1", s) def estimate_tokens(text: str | None) -> int: - """Estimate the number of tokens heuristically based on character count""" + """Estimate the number of tokens heuristically based on character count.""" if not text: return 0 return int(len(text) / 3) @@ -99,6 +100,7 @@ def estimate_tokens(text: str | None) -> int: async def save_file_to_tempfile( file_in_base64: str, file_name: str = "", tempdir: Path | None = None ) -> Path: + """Decode base64 file data and save to a temporary file.""" data = base64.b64decode(file_in_base64) suffix = Path(file_name).suffix if file_name else ".bin" @@ -110,6 +112,7 @@ async def save_file_to_tempfile( async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: + """Download content from a URL and save to a temporary file.""" data: bytes | None = None suffix: str | None = None if url.startswith("data:image/"): @@ -148,67 +151,48 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: def strip_tagged_blocks(text: str) -> str: - """Remove <|im_start|>role ... <|im_end|> sections. - - tool blocks are removed entirely (including content). - - other roles: remove markers and role, keep inner content. + """ + Remove ChatML role blocks (<|im_start|>role...<|im_end|>). + Role 'tool' blocks are removed entirely; others have markers stripped but content preserved. + Handles both raw and escaped markers consistently. """ if not text: return text - result: list[str] = [] + result = [] idx = 0 - length = len(text) - start_marker = "<|im_start|>" - end_marker = "<|im_end|>" - - while idx < length: - start = text.find(start_marker, idx) - if start == -1: + while idx < len(text): + match_start = CHATML_START_RE.search(text, idx) + if not match_start: result.append(text[idx:]) break - result.append(text[idx:start]) + result.append(text[idx : match_start.start()]) + role = match_start.group(1).lower() + content_start = match_start.end() - role_start = start + len(start_marker) - newline = text.find("\n", role_start) - if newline == -1: - result.append(text[start:]) + match_end = CHATML_END_RE.search(text, content_start) + if not match_end: + if role != "tool": + result.append(text[content_start:]) break - role = text[role_start:newline].strip().lower() - - end = text.find(end_marker, newline + 1) - if end == -1: - if role == "tool": - break - else: - result.append(text[newline + 1 :]) - break - - block_end = end + len(end_marker) + if role != "tool": + result.append(text[content_start : match_end.start()]) - if role == "tool": - idx = block_end - continue - - content = text[newline + 1 : end] - result.append(content) - idx = block_end + idx = match_end.end() return "".join(result) def strip_system_hints(text: str) -> str: - """Remove system-level hint text from a given string.""" + """Remove system hints, ChatML tags, and technical protocol markers from text.""" if not text: return text - # Remove the full hints first cleaned = text.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "") - # Remove fragments or multi-line blocks using derived constants if TOOL_HINT_LINE_START and TOOL_HINT_LINE_END: - # Match from the start line to the end line, inclusive, handling internal modifications pattern = rf"\n?{re.escape(TOOL_HINT_LINE_START)}.*?{re.escape(TOOL_HINT_LINE_END)}\.?\n?" cleaned = re.sub(pattern, "", cleaned, flags=re.DOTALL) @@ -218,20 +202,26 @@ def strip_system_hints(text: str) -> str: cleaned = re.sub(rf"\s*{re.escape(TOOL_HINT_LINE_END)}\.?\n?", "", cleaned) cleaned = strip_tagged_blocks(cleaned) + cleaned = CONTROL_TOKEN_RE.sub("", cleaned) + cleaned = TOOL_BLOCK_RE.sub("", cleaned) + cleaned = TOOL_CALL_RE.sub("", cleaned) + cleaned = RESPONSE_BLOCK_RE.sub("", cleaned) + cleaned = RESPONSE_ITEM_RE.sub("", cleaned) + cleaned = TAGGED_ARG_RE.sub("", cleaned) + cleaned = TAGGED_RESULT_RE.sub("", cleaned) + return cleaned def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]: """ - Unified engine for stripping tool call blocks and extracting tool metadata. - If extract=True, parses JSON arguments and assigns deterministic call IDs. + Extract tool metadata and return text stripped of technical markers. + Arguments are parsed into JSON and assigned deterministic call IDs. """ if not text: return text, [] - cleaned = strip_system_hints(text) - tool_calls: list[ToolCall] = [] def _create_tool_call(name: str, raw_args: str) -> None: @@ -271,45 +261,27 @@ def _create_tool_call(name: str, raw_args: str) -> None: ) ) - all_calls = [] - for match in TOOL_CALL_RE.finditer(cleaned): - all_calls.append( - { - "start": match.start(), - "name": unescape_llm_text((match.group(1) or "").strip()), - "args": (match.group(2) or "").strip(), - } - ) - - all_calls.sort(key=lambda x: x["start"]) - - if extract: - for call in all_calls: - _create_tool_call(call["name"], call["args"]) + for match in TOOL_CALL_RE.finditer(text): + _create_tool_call(match.group(1), match.group(2)) - cleaned = TOOL_BLOCK_RE.sub("", cleaned) - cleaned = TOOL_CALL_RE.sub("", cleaned) - cleaned = RESPONSE_BLOCK_RE.sub("", cleaned) - cleaned = RESPONSE_ITEM_RE.sub("", cleaned) - cleaned = TAGGED_ARG_RE.sub("", cleaned) - cleaned = TAGGED_RESULT_RE.sub("", cleaned) + cleaned = strip_system_hints(text) return cleaned, tool_calls def remove_tool_call_blocks(text: str) -> str: - """Strip tool call code blocks from text.""" + """Strip tool call blocks from text for display.""" cleaned, _ = _process_tools_internal(text, extract=False) return cleaned def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: - """Extract tool call definitions and return cleaned text.""" + """Extract tool calls and return cleaned text.""" return _process_tools_internal(text, extract=True) def text_from_message(message: Message) -> str: - """Return text content from a message for token estimation.""" + """Concatenate text and tool arguments from a message for token estimation.""" base_text = "" if isinstance(message.content, str): base_text = message.content @@ -329,7 +301,6 @@ def text_from_message(message: Message) -> str: def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: """Return image dimensions (width, height) if PNG or JPEG headers are present.""" - # PNG: dimensions stored in bytes 16..24 of the IHDR chunk if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"): try: width, height = struct.unpack(">II", data[16:24]) @@ -337,7 +308,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: except struct.error: return None, None - # JPEG: dimensions stored in SOF segment; iterate through markers to locate it if len(data) >= 4 and data[0:2] == b"\xff\xd8": idx = 2 length = len(data) @@ -357,7 +327,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: 0xCF, } while idx < length: - # Find marker alignment (markers are prefixed with 0xFF bytes) if data[idx] != 0xFF: idx += 1 continue @@ -380,7 +349,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: if marker in sof_markers: if idx + 4 < length: - # Skip precision byte at idx, then read height/width (big-endian) height = (data[idx + 1] << 8) + data[idx + 2] width = (data[idx + 3] << 8) + data[idx + 4] return int(width), int(height) From 0d18e9e84525c346bf4cf5fb3b545f7884f2157f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 12 Feb 2026 16:55:17 +0700 Subject: [PATCH 102/139] Refactor: Rewrite the function call format to match the client's complex argument structure, such as in Roo Code. --- app/server/chat.py | 25 +++++++++++-------------- app/services/client.py | 14 +++----------- app/utils/helper.py | 15 ++++++--------- 3 files changed, 20 insertions(+), 34 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 4262d0d..66c6d11 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -382,14 +382,11 @@ def _build_tool_prompt( ) lines.append("[ToolCalls]") lines.append("[Call:tool_name]") - lines.append("@args") - lines.append("<<>>") - lines.append("value") - lines.append("<<>>") + lines.append("[CallParameter:arg_name]value[/CallParameter]") lines.append("[/Call]") lines.append("[/ToolCalls]") lines.append( - "CRITICAL: Every argument MUST be enclosed in <<>>...<<>>. Output as RAW text. Content inside tags can be any format." + "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format." ) lines.append( "If multiple tools are needed, list them sequentially within the same [ToolCalls] block." @@ -398,7 +395,7 @@ def _build_tool_prompt( "If no tool call is needed, provide a normal response and NEVER use the [ToolCalls] tag." ) lines.append( - "Note: Tool results are returned in a [ToolResults] block using @results and <<>> tags." + "Note: Tool results are returned in a [ToolResults] block using [ToolResult] tags." ) return "\n".join(lines) @@ -793,16 +790,12 @@ def __init__(self): "ends": ["[/ToolResults]", "\\[/ToolResults\\]"], }, "ARG": { - "starts": [ - "<<>>", "\\<\\<\\\\>\\>"], + "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"], + "ends": ["[/CallParameter]", "\\[/CallParameter\\]"], }, "RESULT": { - "starts": ["<<>>", "\\<\\<\\\\>\\>"], - "ends": ["<<>>", "\\<\\<\\\\>\\>"], + "starts": ["[ToolResult]", "\\[ToolResult\\]"], + "ends": ["[/ToolResult]", "\\[/ToolResult\\]"], }, "TAG": { "starts": ["<|im_start|>", "\\<|im\\_start|\\>"], @@ -824,6 +817,10 @@ def __init__(self): "\\[/Call\\]", "[/ToolCalls]", "\\[/ToolCalls\\]", + "[/CallParameter]", + "\\[/CallParameter\\]", + "[/ToolResult]", + "\\[/ToolResult\\]", ] self.WATCH_MARKERS = [] diff --git a/app/services/client.py b/app/services/client.py index ba203d9..9f9ac0f 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -118,10 +118,7 @@ async def process_message( tool_name = message.name or "unknown" combined_content = "\n".join(text_fragments).strip() res_block = ( - f"[Result:{tool_name}]\n" - f"@results\n" - f"<<>>\n{combined_content}\n<<>>\n" - f"[/Result]" + f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]" ) if wrap_tool: text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"] @@ -138,9 +135,7 @@ async def process_message( if isinstance(parsed_args, dict): for k, v in parsed_args.items(): val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") - formatted_args += ( - f"<<>>\n{val_str}\n<<>>\n" - ) + formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n" else: formatted_args += args_text except orjson.JSONDecodeError: @@ -171,7 +166,6 @@ async def process_conversation( while i < len(messages): msg = messages[i] if msg.role == "tool": - # Group consecutive tool messages tool_blocks: list[str] = [] while i < len(messages) and messages[i].role == "tool": part, part_files = await GeminiClientWrapper.process_message( @@ -182,9 +176,7 @@ async def process_conversation( i += 1 combined_tool_content = "\n".join(tool_blocks) - wrapped_content = ( - f"[function_responses]\n{combined_tool_content}\n[/function_responses]" - ) + wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]" conversation.append(add_tag("tool", wrapped_content)) else: input_part, files_part = await GeminiClientWrapper.process_message( diff --git a/app/utils/helper.py b/app/utils/helper.py index 25f9c9b..4172154 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -21,13 +21,10 @@ "\nWhen you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n" "[ToolCalls]\n" "[Call:tool_name]\n" - "@args\n" - "<<>>\n" - "value\n" - "<<>>\n" + "[CallParameter:arg_name]value[/CallParameter]\n" "[/Call]\n" "[/ToolCalls]\n" - "CRITICAL: Every argument MUST be enclosed in <<>>...<<>>. Output as RAW text. Content inside tags can be any format.\n" + "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format.\n" ) TOOL_BLOCK_RE = re.compile( r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE @@ -44,11 +41,11 @@ re.DOTALL | re.IGNORECASE, ) TAGGED_ARG_RE = re.compile( - r"(?:\\?<){3}CallParameter\\?:((?:[^>\\]|\\.)+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndCallParameter(?:\\?>){3}", + r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]", re.DOTALL | re.IGNORECASE, ) TAGGED_RESULT_RE = re.compile( - r"(?:\\?<){3}ToolResult(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndToolResult(?:\\?>){3}", + r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]", re.DOTALL | re.IGNORECASE, ) CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE) @@ -86,7 +83,7 @@ def normalize_llm_text(s: str) -> str: def unescape_llm_text(s: str) -> str: - """Unescape characters escaped by Gemini Web's post-processing (e.g., \\_ to _).""" + """Unescape characters escaped by Gemini Web's post-processing.""" return COMMONMARK_UNESCAPE_RE.sub(r"\1", s) @@ -240,7 +237,7 @@ def _create_tool_call(name: str, raw_args: str) -> None: arguments = orjson.dumps(args_dict).decode("utf-8") logger.debug(f"Successfully parsed {len(args_dict)} tagged arguments for tool: {name}") else: - cleaned_raw = raw_args.replace("@args", "").strip() + cleaned_raw = raw_args.strip() if not cleaned_raw: logger.debug(f"Tool '{name}' called without arguments.") else: From 8fa4329c5a1483784876630e0a891e7dba781fdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 12 Feb 2026 17:11:30 +0700 Subject: [PATCH 103/139] Refactor: Rewrite the function call format to match the client's complex argument structure, such as in Roo Code. --- app/server/chat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 66c6d11..c31a079 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -49,6 +49,7 @@ estimate_tokens, extract_image_dimensions, extract_tool_calls, + remove_tool_call_blocks, strip_system_hints, text_from_message, ) @@ -221,7 +222,7 @@ def _process_llm_output( structured_requirement: StructuredOutputRequirement | None, ) -> tuple[str, str, list[Any]]: """ - Common post-processing logic for Gemini output. + Post-process Gemini output to extract tool calls and prepare clean text for display and storage. Returns: (visible_text, storage_output, tool_calls) """ visible_with_think, tool_calls = extract_tool_calls(raw_output_with_think) @@ -230,7 +231,7 @@ def _process_llm_output( visible_output = visible_with_think.strip() - storage_output, _ = extract_tool_calls(raw_output_clean) + storage_output = remove_tool_call_blocks(raw_output_clean) storage_output = storage_output.strip() if structured_requirement: From dcd7276ee41a202eb840494c986074da3a53499d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 12 Feb 2026 20:36:20 +0700 Subject: [PATCH 104/139] Refactor: Rewrite the function call format to match the client's complex argument structure, such as in Roo Code. --- app/services/client.py | 33 +-------------------------------- app/utils/helper.py | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 36 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 9f9ac0f..6ab80cd 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -1,4 +1,3 @@ -import re from pathlib import Path from typing import Any, cast @@ -15,13 +14,6 @@ save_url_to_tempfile, ) -FILE_PATH_PATTERN = re.compile( - r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", - re.IGNORECASE, -) -GOOGLE_SEARCH_LINK_PATTERN = re.compile( - r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?" -) _UNSET = object() @@ -199,27 +191,4 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str: else: text += str(response) - text = normalize_llm_text(text) - - def extract_file_path_from_display_text(text_content: str) -> str | None: - match = re.match(FILE_PATH_PATTERN, text_content) - if match: - return match.group(1) - return None - - def replacer(match: re.Match) -> str: - display_text = str(match.group(1)).strip() - google_search_prefix = match.group(2) - query_part = match.group(3) - - file_path = extract_file_path_from_display_text(display_text) - - if file_path: - # If it's a file path, transform it into a self-referencing Markdown link - return f"[`{file_path}`]({file_path})" - else: - # Otherwise, reconstruct the original Google search link with the display_text - original_google_search_url = f"{google_search_prefix}{query_part}" - return f"[`{display_text}`]({original_google_search_url})" - - return re.sub(GOOGLE_SEARCH_LINK_PATTERN, replacer, text) + return normalize_llm_text(text) diff --git a/app/utils/helper.py b/app/utils/helper.py index 4172154..ec39ebc 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -52,6 +52,16 @@ CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE) CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE) COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])") +FILE_PATH_PATTERN = re.compile( + r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", + re.IGNORECASE, +) +GOOGLE_SEARCH_LINK_PATTERN = re.compile( + r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?" +) +CONFLICT_START_RE = re.compile(r"<(?:\s*<){6,}") +CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}") +CONFLICT_END_RE = re.compile(r">(?:\s*>){6,}") TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" @@ -82,9 +92,26 @@ def normalize_llm_text(s: str) -> str: return s +def _strip_google_search_links(match: re.Match) -> str: + """Extract local Markdown link from Google Search links if applicable.""" + display_text = str(match.group(1)).strip() + if FILE_PATH_PATTERN.match(display_text): + return f"[`{display_text}`]({display_text})" + return match.group(0) + + def unescape_llm_text(s: str) -> str: - """Unescape characters escaped by Gemini Web's post-processing.""" - return COMMONMARK_UNESCAPE_RE.sub(r"\1", s) + """Unescape and mend text fragments broken by Gemini Web's post-processing.""" + if not s: + return "" + + s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) + + s = CONFLICT_START_RE.sub("<<<<<<<", s) + s = CONFLICT_SEP_RE.sub("=======", s) + s = CONFLICT_END_RE.sub(">>>>>>>", s) + + return GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s) def estimate_tokens(text: str | None) -> int: @@ -235,11 +262,11 @@ def _create_tool_call(name: str, raw_args: str) -> None: if arg_matches: args_dict = {arg_name.strip(): arg_value.strip() for arg_name, arg_value in arg_matches} arguments = orjson.dumps(args_dict).decode("utf-8") - logger.debug(f"Successfully parsed {len(args_dict)} tagged arguments for tool: {name}") + logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}") else: cleaned_raw = raw_args.strip() if not cleaned_raw: - logger.debug(f"Tool '{name}' called without arguments.") + logger.debug(f"Successfully parsed 0 arguments for tool: {name}") else: logger.warning( f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}" From 737aa3aa4ac51a377ab3687e73623c7943c14872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Thu, 12 Feb 2026 23:24:44 +0700 Subject: [PATCH 105/139] Refactor: Rewrite the function call format to match the client's complex argument structure, such as in Roo Code. --- app/utils/helper.py | 66 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index ec39ebc..0a83f8d 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -100,18 +100,78 @@ def _strip_google_search_links(match: re.Match) -> str: return match.group(0) +def _remove_injected_fences(s: str) -> str: + """ + Strip anonymous Markdown code fences often injected by LLMs around + responses or tool calls, while preserving named blocks and all internal content. + """ + if not s: + return "" + + lines = s.splitlines() + out = [] + in_fence = False + fence_len = 0 + is_anonymous = False + + for line in lines: + stripped = line.strip() + if stripped.startswith("```"): + count = 0 + for char in stripped: + if char == "`": + count += 1 + else: + break + + lang = stripped[count:].strip() + + if not in_fence: + in_fence = True + fence_len = count + is_anonymous = not lang + if not is_anonymous: + out.append(line) + continue + + if count >= fence_len: + if is_anonymous and lang: + out.append(line) + continue + + if not is_anonymous: + out.append(line) + in_fence = False + is_anonymous = False + fence_len = 0 + continue + + out.append(line) + + return "\n".join(out) + + def unescape_llm_text(s: str) -> str: - """Unescape and mend text fragments broken by Gemini Web's post-processing.""" + """ + Standardize and repair LLM-generated text fragments. + + Sequence: + 1. Reverse CommonMark escapes. + 2. Restore git conflict markers broken by web processing. + 3. Strip injected anonymous code fences. + 4. Process and normalize Google Search links. + """ if not s: return "" s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) - s = CONFLICT_START_RE.sub("<<<<<<<", s) s = CONFLICT_SEP_RE.sub("=======", s) s = CONFLICT_END_RE.sub(">>>>>>>", s) + s = _remove_injected_fences(s) + s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s) - return GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s) + return s def estimate_tokens(text: str | None) -> int: From 2c808955f8f761e1ab062c4d2cd5aab705ac050c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 08:55:58 +0700 Subject: [PATCH 106/139] Refactor: Rewrite the function call format to match the client's complex argument structure, such as in Roo Code. --- app/utils/helper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/utils/helper.py b/app/utils/helper.py index 0a83f8d..2367b72 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -61,6 +61,7 @@ ) CONFLICT_START_RE = re.compile(r"<(?:\s*<){6,}") CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}") +CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}") CONFLICT_END_RE = re.compile(r">(?:\s*>){6,}") TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] @@ -167,6 +168,7 @@ def unescape_llm_text(s: str) -> str: s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) s = CONFLICT_START_RE.sub("<<<<<<<", s) s = CONFLICT_SEP_RE.sub("=======", s) + s = CONFLICT_SEP_DASH_RE.sub("-------", s) s = CONFLICT_END_RE.sub(">>>>>>>", s) s = _remove_injected_fences(s) s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s) From 9b0e1d5365ba323a406f77c7452283ef7a9879a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 09:23:08 +0700 Subject: [PATCH 107/139] Refactor: Rewrite the function call format to match the client's complex argument structure, such as in Roo Code. --- app/utils/helper.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 2367b72..e612252 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -59,10 +59,10 @@ GOOGLE_SEARCH_LINK_PATTERN = re.compile( r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?" ) -CONFLICT_START_RE = re.compile(r"<(?:\s*<){6,}") -CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}") -CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}") -CONFLICT_END_RE = re.compile(r">(?:\s*>){6,}") +CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}") +CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=\s*(?:=\s*){6,}") +CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—]\s*(?:[-—]\s*){6,}") +CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}") TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" @@ -155,21 +155,24 @@ def _remove_injected_fences(s: str) -> str: def unescape_llm_text(s: str) -> str: """ Standardize and repair LLM-generated text fragments. + These patches are specifically designed for complex clients like Roo Code to ensure + compatibility with their specialized tool protocols (e.g., apply_diff) which may be + mangled by Gemini Web's interface or browser auto-formatting. Sequence: - 1. Reverse CommonMark escapes. - 2. Restore git conflict markers broken by web processing. + 1. Restore git conflict markers and DOUBLE any leading backslash to protect it. + 2. Reverse CommonMark escapes (consuming one level of doubled backslashes). 3. Strip injected anonymous code fences. 4. Process and normalize Google Search links. """ if not s: return "" + s = CONFLICT_START_RE.sub(r"\1\1<<<<<<<", s) + s = CONFLICT_SEP_RE.sub(r"\1\1=======", s) + s = CONFLICT_SEP_DASH_RE.sub(r"\1\1-------", s) + s = CONFLICT_END_RE.sub(r"\1\1>>>>>>>", s) s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) - s = CONFLICT_START_RE.sub("<<<<<<<", s) - s = CONFLICT_SEP_RE.sub("=======", s) - s = CONFLICT_SEP_DASH_RE.sub("-------", s) - s = CONFLICT_END_RE.sub(">>>>>>>", s) s = _remove_injected_fences(s) s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s) From 95f87f6d905cdbb1352698e03f29e0821a224d31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 09:38:08 +0700 Subject: [PATCH 108/139] Refactor: Rewrite the function call format to match the client's complex argument structure, such as in Roo Code. --- app/utils/helper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index e612252..8262b85 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -60,8 +60,8 @@ r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?" ) CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}") -CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=\s*(?:=\s*){6,}") -CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—]\s*(?:[-—]\s*){6,}") +CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}") +CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}") CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}") TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] @@ -169,8 +169,8 @@ def unescape_llm_text(s: str) -> str: return "" s = CONFLICT_START_RE.sub(r"\1\1<<<<<<<", s) - s = CONFLICT_SEP_RE.sub(r"\1\1=======", s) - s = CONFLICT_SEP_DASH_RE.sub(r"\1\1-------", s) + s = CONFLICT_SEP_RE.sub("=======", s) + s = CONFLICT_SEP_DASH_RE.sub("-------", s) s = CONFLICT_END_RE.sub(r"\1\1>>>>>>>", s) s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) s = _remove_injected_fences(s) From 4569689ad1280aa2808927345c1a2b29e170f997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 10:47:56 +0700 Subject: [PATCH 109/139] Refactor: Update `unescape_llm_text` to standardize and repair LLM-generated text fragments. - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/utils/helper.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 8262b85..89429c9 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -59,10 +59,10 @@ GOOGLE_SEARCH_LINK_PATTERN = re.compile( r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?" ) -CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}") -CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}") -CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}") -CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}") +CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}(?:\s*(SEARCH)\b)?", re.IGNORECASE) +CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=(?:\s*=){6,}") +CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—](?:\s*[-—]){6,}") +CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}(?:\s*(REPLACE)\b)?", re.IGNORECASE) TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" @@ -168,10 +168,14 @@ def unescape_llm_text(s: str) -> str: if not s: return "" - s = CONFLICT_START_RE.sub(r"\1\1<<<<<<<", s) - s = CONFLICT_SEP_RE.sub("=======", s) - s = CONFLICT_SEP_DASH_RE.sub("-------", s) - s = CONFLICT_END_RE.sub(r"\1\1>>>>>>>", s) + s = CONFLICT_START_RE.sub( + lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s + ) + s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s) + s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s) + s = CONFLICT_END_RE.sub( + lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s + ) s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) s = _remove_injected_fences(s) s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s) From 5a65cb6a9ebb6a51a21016f5d977bbef76f23f55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 10:56:50 +0700 Subject: [PATCH 110/139] Refactor: Update `unescape_llm_text` to standardize and repair LLM-generated text fragments. - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/utils/helper.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 89429c9..ff339e1 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -154,28 +154,29 @@ def _remove_injected_fences(s: str) -> str: def unescape_llm_text(s: str) -> str: """ - Standardize and repair LLM-generated text fragments. - These patches are specifically designed for complex clients like Roo Code to ensure - compatibility with their specialized tool protocols (e.g., apply_diff) which may be - mangled by Gemini Web's interface or browser auto-formatting. + Standardize and repair LLM-generated text fragments for specialized client protocols. + Designed to ensure compatibility with clients like Roo Code by fixing + mangled conflict markers, escaping issues, and injected Markdown formatting. Sequence: - 1. Restore git conflict markers and DOUBLE any leading backslash to protect it. - 2. Reverse CommonMark escapes (consuming one level of doubled backslashes). - 3. Strip injected anonymous code fences. + 1. Normalize git conflict markers (handles mangled spacing and keyword standardization). + 2. Reverse CommonMark escapes (removes leading backslashes from structural markers). + 3. Strip injected anonymous Markdown code fences. 4. Process and normalize Google Search links. """ if not s: return "" - s = CONFLICT_START_RE.sub( - lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s - ) - s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s) - s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s) - s = CONFLICT_END_RE.sub( - lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s - ) + if any(c in s for c in ("<", "=", ">", "-", "—")): + s = CONFLICT_START_RE.sub( + lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s + ) + s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s) + s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s) + s = CONFLICT_END_RE.sub( + lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s + ) + s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) s = _remove_injected_fences(s) s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s) From 8a03c3347bce1e41bb0b694b42095aec62a9e6cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 12:18:06 +0700 Subject: [PATCH 111/139] Refactor: Update `unescape_llm_text` to standardize and repair LLM-generated text fragments. - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/utils/helper.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index ff339e1..fba276a 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -8,7 +8,7 @@ import tempfile import unicodedata from pathlib import Path -from urllib.parse import urlparse +from urllib.parse import unquote, urlparse import httpx import orjson @@ -56,8 +56,13 @@ r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, ) -GOOGLE_SEARCH_LINK_PATTERN = re.compile( - r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?" +GOOGLE_SEARCH_PATTERN = re.compile( + r"(?P`?\[`?)?" + r"(?P[^]]+)?" + r"(?(md_start)`?]\()?" + r"https://www\.google\.com/search\?q=(?P[^&\s\"'<>)]+)" + r"(?(md_start)\)?`?)", + re.IGNORECASE, ) CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}(?:\s*(SEARCH)\b)?", re.IGNORECASE) CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=(?:\s*=){6,}") @@ -93,11 +98,13 @@ def normalize_llm_text(s: str) -> str: return s -def _strip_google_search_links(match: re.Match) -> str: - """Extract local Markdown link from Google Search links if applicable.""" - display_text = str(match.group(1)).strip() - if FILE_PATH_PATTERN.match(display_text): - return f"[`{display_text}`]({display_text})" +def _strip_google_search(match: re.Match) -> str: + """Extract raw text from Google Search links if it looks like a file path.""" + text_to_check = match.group("text") if match.group("text") else unquote(match.group("query")) + text_to_check = unquote(text_to_check.strip()) + + if FILE_PATH_PATTERN.match(text_to_check): + return text_to_check return match.group(0) @@ -157,12 +164,6 @@ def unescape_llm_text(s: str) -> str: Standardize and repair LLM-generated text fragments for specialized client protocols. Designed to ensure compatibility with clients like Roo Code by fixing mangled conflict markers, escaping issues, and injected Markdown formatting. - - Sequence: - 1. Normalize git conflict markers (handles mangled spacing and keyword standardization). - 2. Reverse CommonMark escapes (removes leading backslashes from structural markers). - 3. Strip injected anonymous Markdown code fences. - 4. Process and normalize Google Search links. """ if not s: return "" @@ -179,7 +180,7 @@ def unescape_llm_text(s: str) -> str: s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) s = _remove_injected_fences(s) - s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s) + s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s) return s @@ -413,7 +414,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: 0xC5, 0xC6, 0xC7, - 0xC9, 0xCA, 0xCB, 0xCD, From 7ed21323a8b51798c3b02c1ce625e94a31b603cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 12:22:21 +0700 Subject: [PATCH 112/139] Refactor: Update `unescape_llm_text` to standardize and repair LLM-generated text fragments. - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/utils/helper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/utils/helper.py b/app/utils/helper.py index fba276a..8faa8b9 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -414,6 +414,7 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: 0xC5, 0xC6, 0xC7, + 0xC9, 0xCA, 0xCB, 0xCD, From d92bc1c43a051c818fd0810be94710580994b40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 15:28:52 +0700 Subject: [PATCH 113/139] Refactor: Update `unescape_llm_text` to standardize and repair LLM-generated text fragments. - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/server/chat.py | 12 +--- app/utils/helper.py | 139 +++++++++----------------------------------- 2 files changed, 30 insertions(+), 121 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index c31a079..3f8e0cd 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -378,17 +378,7 @@ def _build_tool_prompt( f"You are required to call the tool named `{target}`. Do not call any other tool." ) - lines.append( - "When you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:" - ) - lines.append("[ToolCalls]") - lines.append("[Call:tool_name]") - lines.append("[CallParameter:arg_name]value[/CallParameter]") - lines.append("[/Call]") - lines.append("[/ToolCalls]") - lines.append( - "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format." - ) + lines.append(TOOL_WRAP_HINT.strip()) lines.append( "If multiple tools are needed, list them sequentially within the same [ToolCalls] block." ) diff --git a/app/utils/helper.py b/app/utils/helper.py index 8faa8b9..28260c3 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -18,13 +18,13 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( - "\nWhen you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n" + "\nWhen calling tools, use this EXACT protocol:\n" "[ToolCalls]\n" "[Call:tool_name]\n" "[CallParameter:arg_name]value[/CallParameter]\n" "[/Call]\n" "[/ToolCalls]\n" - "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format.\n" + "CRITICAL: Wrap ALL multi-line or complex values in a markdown code block (e.g., [CallParameter:arg_name]```\nvalue\n```[/CallParameter]) to prevent rendering corruption.\n" ) TOOL_BLOCK_RE = re.compile( r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE @@ -64,10 +64,6 @@ r"(?(md_start)\)?`?)", re.IGNORECASE, ) -CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}(?:\s*(SEARCH)\b)?", re.IGNORECASE) -CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=(?:\s*=){6,}") -CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—](?:\s*[-—]){6,}") -CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}(?:\s*(REPLACE)\b)?", re.IGNORECASE) TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" @@ -108,78 +104,36 @@ def _strip_google_search(match: re.Match) -> str: return match.group(0) -def _remove_injected_fences(s: str) -> str: +def _strip_param_fences(s: str) -> str: """ - Strip anonymous Markdown code fences often injected by LLMs around - responses or tool calls, while preserving named blocks and all internal content. + Remove one layer of outermost Markdown code fences, + supporting nested blocks by detecting variable fence lengths. """ + s = s.strip() if not s: return "" - lines = s.splitlines() - out = [] - in_fence = False - fence_len = 0 - is_anonymous = False - - for line in lines: - stripped = line.strip() - if stripped.startswith("```"): - count = 0 - for char in stripped: - if char == "`": - count += 1 - else: - break - - lang = stripped[count:].strip() - - if not in_fence: - in_fence = True - fence_len = count - is_anonymous = not lang - if not is_anonymous: - out.append(line) - continue + match = re.match(r"^(?P`{3,})", s) + if not match or not s.endswith(match.group("fence")): + return s - if count >= fence_len: - if is_anonymous and lang: - out.append(line) - continue - - if not is_anonymous: - out.append(line) - in_fence = False - is_anonymous = False - fence_len = 0 - continue - - out.append(line) + lines = s.splitlines() + if len(lines) >= 2: + return "\n".join(lines[1:-1]) - return "\n".join(out) + n = len(match.group("fence")) + return s[n:-n].strip() def unescape_llm_text(s: str) -> str: """ - Standardize and repair LLM-generated text fragments for specialized client protocols. - Designed to ensure compatibility with clients like Roo Code by fixing - mangled conflict markers, escaping issues, and injected Markdown formatting. + Standardize and repair LLM-generated text fragments (unescaping, link normalization) + to ensure compatibility with specialized clients like Roo Code. """ if not s: return "" - if any(c in s for c in ("<", "=", ">", "-", "—")): - s = CONFLICT_START_RE.sub( - lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s - ) - s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s) - s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s) - s = CONFLICT_END_RE.sub( - lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s - ) - s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) - s = _remove_injected_fences(s) s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s) return s @@ -196,13 +150,11 @@ async def save_file_to_tempfile( file_in_base64: str, file_name: str = "", tempdir: Path | None = None ) -> Path: """Decode base64 file data and save to a temporary file.""" - data = base64.b64decode(file_in_base64) - suffix = Path(file_name).suffix if file_name else ".bin" - - with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp: - tmp.write(data) + with tempfile.NamedTemporaryFile( + delete=False, suffix=Path(file_name).suffix if file_name else ".bin", dir=tempdir + ) as tmp: + tmp.write(base64.b64decode(file_in_base64)) path = Path(tmp.name) - return path @@ -213,35 +165,22 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path: if url.startswith("data:image/"): metadata_part = url.split(",")[0] mime_type = metadata_part.split(":")[1].split(";")[0] - - base64_data = url.split(",")[1] - data = base64.b64decode(base64_data) - - suffix = mimetypes.guess_extension(mime_type) - if not suffix: - suffix = f".{mime_type.split('/')[1]}" + data = base64.b64decode(url.split(",")[1]) + suffix = mimetypes.guess_extension(mime_type) or f".{mime_type.split('/')[1]}" else: async with httpx.AsyncClient(follow_redirects=True) as client: resp = await client.get(url) resp.raise_for_status() data = resp.content content_type = resp.headers.get("content-type") - if content_type: - mime_type = content_type.split(";")[0].strip() - suffix = mimetypes.guess_extension(mime_type) - + suffix = mimetypes.guess_extension(content_type.split(";")[0].strip()) if not suffix: - path_url = urlparse(url).path - suffix = Path(path_url).suffix - - if not suffix: - suffix = ".bin" + suffix = Path(urlparse(url).path).suffix or ".bin" with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp: tmp.write(data) path = Path(tmp.name) - return path @@ -249,7 +188,6 @@ def strip_tagged_blocks(text: str) -> str: """ Remove ChatML role blocks (<|im_start|>role...<|im_end|>). Role 'tool' blocks are removed entirely; others have markers stripped but content preserved. - Handles both raw and escaped markers consistently. """ if not text: return text @@ -274,7 +212,6 @@ def strip_tagged_blocks(text: str) -> str: if role != "tool": result.append(text[content_start : match_end.start()]) - idx = match_end.end() return "".join(result) @@ -297,7 +234,6 @@ def strip_system_hints(text: str) -> str: cleaned = re.sub(rf"\s*{re.escape(TOOL_HINT_LINE_END)}\.?\n?", "", cleaned) cleaned = strip_tagged_blocks(cleaned) - cleaned = CONTROL_TOKEN_RE.sub("", cleaned) cleaned = TOOL_BLOCK_RE.sub("", cleaned) cleaned = TOOL_CALL_RE.sub("", cleaned) @@ -331,7 +267,10 @@ def _create_tool_call(name: str, raw_args: str) -> None: arg_matches = TAGGED_ARG_RE.findall(raw_args) if arg_matches: - args_dict = {arg_name.strip(): arg_value.strip() for arg_name, arg_value in arg_matches} + args_dict = { + arg_name.strip(): _strip_param_fences(arg_value) + for arg_name, arg_value in arg_matches + } arguments = orjson.dumps(args_dict).decode("utf-8") logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}") else: @@ -360,7 +299,6 @@ def _create_tool_call(name: str, raw_args: str) -> None: _create_tool_call(match.group(1), match.group(2)) cleaned = strip_system_hints(text) - return cleaned, tool_calls @@ -406,21 +344,7 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: if len(data) >= 4 and data[0:2] == b"\xff\xd8": idx = 2 length = len(data) - sof_markers = { - 0xC0, - 0xC1, - 0xC2, - 0xC3, - 0xC5, - 0xC6, - 0xC7, - 0xC9, - 0xCA, - 0xCB, - 0xCD, - 0xCE, - 0xCF, - } + sof_markers = {0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF} while idx < length: if data[idx] != 0xFF: idx += 1 @@ -431,26 +355,21 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]: break marker = data[idx] idx += 1 - if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7: continue - if idx + 1 >= length: break segment_length = (data[idx] << 8) + data[idx + 1] idx += 2 if segment_length < 2: break - if marker in sof_markers: if idx + 4 < length: height = (data[idx + 1] << 8) + data[idx + 2] width = (data[idx + 3] << 8) + data[idx + 4] return int(width), int(height) break - idx += segment_length - 2 - return None, None From 4ecad566edbe7cb1c37ad50ecc6da034c88e209a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 16:18:28 +0700 Subject: [PATCH 114/139] Refactor: Update `unescape_llm_text` to standardize and repair LLM-generated text fragments. - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/utils/helper.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 28260c3..99af0a5 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -18,13 +18,15 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( - "\nWhen calling tools, use this EXACT protocol:\n" + "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n" "[ToolCalls]\n" "[Call:tool_name]\n" - "[CallParameter:arg_name]value[/CallParameter]\n" + "[CallParameter:arg_name]\n" + "value\n" + "[/CallParameter]\n" "[/Call]\n" "[/ToolCalls]\n" - "CRITICAL: Wrap ALL multi-line or complex values in a markdown code block (e.g., [CallParameter:arg_name]```\nvalue\n```[/CallParameter]) to prevent rendering corruption.\n" + "CRITICAL: If 'value' is multi-line or complex, you MUST wrap it in a markdown code block within the tags (use a fence longer than any backtick sequence in the content) to prevent rendering corruption.\n" ) TOOL_BLOCK_RE = re.compile( r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE From 28378b4125e8900284814040d86f3139f2ddd644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 16:19:54 +0700 Subject: [PATCH 115/139] Update dependencies --- pyproject.toml | 4 ++-- uv.lock | 48 ++++++++++++++++++++++++------------------------ 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d3a1aaf..93dabab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "FastAPI Server built on Gemini Web API" readme = "README.md" requires-python = "==3.12.*" dependencies = [ - "fastapi>=0.128.7", + "fastapi>=0.129.0", "gemini-webapi>=1.19.1", "lmdb>=1.7.5", "loguru>=0.7.3", @@ -31,5 +31,5 @@ indent-style = "space" [dependency-groups] dev = [ - "ruff>=0.15.0", + "ruff>=0.15.1", ] diff --git a/uv.lock b/uv.lock index c038f53..249e84b 100644 --- a/uv.lock +++ b/uv.lock @@ -65,7 +65,7 @@ wheels = [ [[package]] name = "fastapi" -version = "0.128.7" +version = "0.129.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, @@ -74,9 +74,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a0/fc/af386750b3fd8d8828167e4c82b787a8eeca2eca5c5429c9db8bb7c70e04/fastapi-0.128.7.tar.gz", hash = "sha256:783c273416995486c155ad2c0e2b45905dedfaf20b9ef8d9f6a9124670639a24", size = 375325, upload-time = "2026-02-10T12:26:40.968Z" } +sdist = { url = "https://files.pythonhosted.org/packages/48/47/75f6bea02e797abff1bca968d5997793898032d9923c1935ae2efdece642/fastapi-0.129.0.tar.gz", hash = "sha256:61315cebd2e65df5f97ec298c888f9de30430dd0612d59d6480beafbc10655af", size = 375450, upload-time = "2026-02-12T13:54:52.541Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/af/1a/f983b45661c79c31be575c570d46c437a5409b67a939c1b3d8d6b3ed7a7f/fastapi-0.128.7-py3-none-any.whl", hash = "sha256:6bd9bd31cb7047465f2d3fa3ba3f33b0870b17d4eaf7cdb36d1576ab060ad662", size = 103630, upload-time = "2026-02-10T12:26:39.414Z" }, + { url = "https://files.pythonhosted.org/packages/9e/dd/d0ee25348ac58245ee9f90b6f3cbb666bf01f69be7e0911f9851bddbda16/fastapi-0.129.0-py3-none-any.whl", hash = "sha256:b4946880e48f462692b31c083be0432275cbfb6e2274566b1be91479cc1a84ec", size = 102950, upload-time = "2026-02-12T13:54:54.528Z" }, ] [[package]] @@ -106,7 +106,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "fastapi", specifier = ">=0.128.7" }, + { name = "fastapi", specifier = ">=0.129.0" }, { name = "gemini-webapi", specifier = ">=1.19.1" }, { name = "lmdb", specifier = ">=1.7.5" }, { name = "loguru", specifier = ">=0.7.3" }, @@ -119,7 +119,7 @@ requires-dist = [ provides-extras = ["dev"] [package.metadata.requires-dev] -dev = [{ name = "ruff", specifier = ">=0.15.0" }] +dev = [{ name = "ruff", specifier = ">=0.15.1" }] [[package]] name = "gemini-webapi" @@ -361,27 +361,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.0" +version = "0.15.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c8/39/5cee96809fbca590abea6b46c6d1c586b49663d1d2830a751cc8fc42c666/ruff-0.15.0.tar.gz", hash = "sha256:6bdea47cdbea30d40f8f8d7d69c0854ba7c15420ec75a26f463290949d7f7e9a", size = 4524893, upload-time = "2026-02-03T17:53:35.357Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/dc/4e6ac71b511b141cf626357a3946679abeba4cf67bc7cc5a17920f31e10d/ruff-0.15.1.tar.gz", hash = "sha256:c590fe13fb57c97141ae975c03a1aedb3d3156030cabd740d6ff0b0d601e203f", size = 4540855, upload-time = "2026-02-12T23:09:09.998Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/88/3fd1b0aa4b6330d6aaa63a285bc96c9f71970351579152d231ed90914586/ruff-0.15.0-py3-none-linux_armv6l.whl", hash = "sha256:aac4ebaa612a82b23d45964586f24ae9bc23ca101919f5590bdb368d74ad5455", size = 10354332, upload-time = "2026-02-03T17:52:54.892Z" }, - { url = "https://files.pythonhosted.org/packages/72/f6/62e173fbb7eb75cc29fe2576a1e20f0a46f671a2587b5f604bfb0eaf5f6f/ruff-0.15.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:dcd4be7cc75cfbbca24a98d04d0b9b36a270d0833241f776b788d59f4142b14d", size = 10767189, upload-time = "2026-02-03T17:53:19.778Z" }, - { url = "https://files.pythonhosted.org/packages/99/e4/968ae17b676d1d2ff101d56dc69cf333e3a4c985e1ec23803df84fc7bf9e/ruff-0.15.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d747e3319b2bce179c7c1eaad3d884dc0a199b5f4d5187620530adf9105268ce", size = 10075384, upload-time = "2026-02-03T17:53:29.241Z" }, - { url = "https://files.pythonhosted.org/packages/a2/bf/9843c6044ab9e20af879c751487e61333ca79a2c8c3058b15722386b8cae/ruff-0.15.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:650bd9c56ae03102c51a5e4b554d74d825ff3abe4db22b90fd32d816c2e90621", size = 10481363, upload-time = "2026-02-03T17:52:43.332Z" }, - { url = "https://files.pythonhosted.org/packages/55/d9/4ada5ccf4cd1f532db1c8d44b6f664f2208d3d93acbeec18f82315e15193/ruff-0.15.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6664b7eac559e3048223a2da77769c2f92b43a6dfd4720cef42654299a599c9", size = 10187736, upload-time = "2026-02-03T17:53:00.522Z" }, - { url = "https://files.pythonhosted.org/packages/86/e2/f25eaecd446af7bb132af0a1d5b135a62971a41f5366ff41d06d25e77a91/ruff-0.15.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f811f97b0f092b35320d1556f3353bf238763420ade5d9e62ebd2b73f2ff179", size = 10968415, upload-time = "2026-02-03T17:53:15.705Z" }, - { url = "https://files.pythonhosted.org/packages/e7/dc/f06a8558d06333bf79b497d29a50c3a673d9251214e0d7ec78f90b30aa79/ruff-0.15.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:761ec0a66680fab6454236635a39abaf14198818c8cdf691e036f4bc0f406b2d", size = 11809643, upload-time = "2026-02-03T17:53:23.031Z" }, - { url = "https://files.pythonhosted.org/packages/dd/45/0ece8db2c474ad7df13af3a6d50f76e22a09d078af63078f005057ca59eb/ruff-0.15.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:940f11c2604d317e797b289f4f9f3fa5555ffe4fb574b55ed006c3d9b6f0eb78", size = 11234787, upload-time = "2026-02-03T17:52:46.432Z" }, - { url = "https://files.pythonhosted.org/packages/8a/d9/0e3a81467a120fd265658d127db648e4d3acfe3e4f6f5d4ea79fac47e587/ruff-0.15.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcbca3d40558789126da91d7ef9a7c87772ee107033db7191edefa34e2c7f1b4", size = 11112797, upload-time = "2026-02-03T17:52:49.274Z" }, - { url = "https://files.pythonhosted.org/packages/b2/cb/8c0b3b0c692683f8ff31351dfb6241047fa873a4481a76df4335a8bff716/ruff-0.15.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9a121a96db1d75fa3eb39c4539e607f628920dd72ff1f7c5ee4f1b768ac62d6e", size = 11033133, upload-time = "2026-02-03T17:53:33.105Z" }, - { url = "https://files.pythonhosted.org/packages/f8/5e/23b87370cf0f9081a8c89a753e69a4e8778805b8802ccfe175cc410e50b9/ruff-0.15.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5298d518e493061f2eabd4abd067c7e4fb89e2f63291c94332e35631c07c3662", size = 10442646, upload-time = "2026-02-03T17:53:06.278Z" }, - { url = "https://files.pythonhosted.org/packages/e1/9a/3c94de5ce642830167e6d00b5c75aacd73e6347b4c7fc6828699b150a5ee/ruff-0.15.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:afb6e603d6375ff0d6b0cee563fa21ab570fd15e65c852cb24922cef25050cf1", size = 10195750, upload-time = "2026-02-03T17:53:26.084Z" }, - { url = "https://files.pythonhosted.org/packages/30/15/e396325080d600b436acc970848d69df9c13977942fb62bb8722d729bee8/ruff-0.15.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:77e515f6b15f828b94dc17d2b4ace334c9ddb7d9468c54b2f9ed2b9c1593ef16", size = 10676120, upload-time = "2026-02-03T17:53:09.363Z" }, - { url = "https://files.pythonhosted.org/packages/8d/c9/229a23d52a2983de1ad0fb0ee37d36e0257e6f28bfd6b498ee2c76361874/ruff-0.15.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6f6e80850a01eb13b3e42ee0ebdf6e4497151b48c35051aab51c101266d187a3", size = 11201636, upload-time = "2026-02-03T17:52:57.281Z" }, - { url = "https://files.pythonhosted.org/packages/6f/b0/69adf22f4e24f3677208adb715c578266842e6e6a3cc77483f48dd999ede/ruff-0.15.0-py3-none-win32.whl", hash = "sha256:238a717ef803e501b6d51e0bdd0d2c6e8513fe9eec14002445134d3907cd46c3", size = 10465945, upload-time = "2026-02-03T17:53:12.591Z" }, - { url = "https://files.pythonhosted.org/packages/51/ad/f813b6e2c97e9b4598be25e94a9147b9af7e60523b0cb5d94d307c15229d/ruff-0.15.0-py3-none-win_amd64.whl", hash = "sha256:dd5e4d3301dc01de614da3cdffc33d4b1b96fb89e45721f1598e5532ccf78b18", size = 11564657, upload-time = "2026-02-03T17:52:51.893Z" }, - { url = "https://files.pythonhosted.org/packages/f6/b0/2d823f6e77ebe560f4e397d078487e8d52c1516b331e3521bc75db4272ca/ruff-0.15.0-py3-none-win_arm64.whl", hash = "sha256:c480d632cc0ca3f0727acac8b7d053542d9e114a462a145d0b00e7cd658c515a", size = 10865753, upload-time = "2026-02-03T17:53:03.014Z" }, + { url = "https://files.pythonhosted.org/packages/23/bf/e6e4324238c17f9d9120a9d60aa99a7daaa21204c07fcd84e2ef03bb5fd1/ruff-0.15.1-py3-none-linux_armv6l.whl", hash = "sha256:b101ed7cf4615bda6ffe65bdb59f964e9f4a0d3f85cbf0e54f0ab76d7b90228a", size = 10367819, upload-time = "2026-02-12T23:09:03.598Z" }, + { url = "https://files.pythonhosted.org/packages/b3/ea/c8f89d32e7912269d38c58f3649e453ac32c528f93bb7f4219258be2e7ed/ruff-0.15.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:939c995e9277e63ea632cc8d3fae17aa758526f49a9a850d2e7e758bfef46602", size = 10798618, upload-time = "2026-02-12T23:09:22.928Z" }, + { url = "https://files.pythonhosted.org/packages/5e/0f/1d0d88bc862624247d82c20c10d4c0f6bb2f346559d8af281674cf327f15/ruff-0.15.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1d83466455fdefe60b8d9c8df81d3c1bbb2115cede53549d3b522ce2bc703899", size = 10148518, upload-time = "2026-02-12T23:08:58.339Z" }, + { url = "https://files.pythonhosted.org/packages/f5/c8/291c49cefaa4a9248e986256df2ade7add79388fe179e0691be06fae6f37/ruff-0.15.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9457e3c3291024866222b96108ab2d8265b477e5b1534c7ddb1810904858d16", size = 10518811, upload-time = "2026-02-12T23:09:31.865Z" }, + { url = "https://files.pythonhosted.org/packages/c3/1a/f5707440e5ae43ffa5365cac8bbb91e9665f4a883f560893829cf16a606b/ruff-0.15.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:92c92b003e9d4f7fbd33b1867bb15a1b785b1735069108dfc23821ba045b29bc", size = 10196169, upload-time = "2026-02-12T23:09:17.306Z" }, + { url = "https://files.pythonhosted.org/packages/2a/ff/26ddc8c4da04c8fd3ee65a89c9fb99eaa5c30394269d424461467be2271f/ruff-0.15.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fe5c41ab43e3a06778844c586251eb5a510f67125427625f9eb2b9526535779", size = 10990491, upload-time = "2026-02-12T23:09:25.503Z" }, + { url = "https://files.pythonhosted.org/packages/fc/00/50920cb385b89413f7cdb4bb9bc8fc59c1b0f30028d8bccc294189a54955/ruff-0.15.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66a6dd6df4d80dc382c6484f8ce1bcceb55c32e9f27a8b94c32f6c7331bf14fb", size = 11843280, upload-time = "2026-02-12T23:09:19.88Z" }, + { url = "https://files.pythonhosted.org/packages/5d/6d/2f5cad8380caf5632a15460c323ae326f1e1a2b5b90a6ee7519017a017ca/ruff-0.15.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a4a42cbb8af0bda9bcd7606b064d7c0bc311a88d141d02f78920be6acb5aa83", size = 11274336, upload-time = "2026-02-12T23:09:14.907Z" }, + { url = "https://files.pythonhosted.org/packages/a3/1d/5f56cae1d6c40b8a318513599b35ea4b075d7dc1cd1d04449578c29d1d75/ruff-0.15.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ab064052c31dddada35079901592dfba2e05f5b1e43af3954aafcbc1096a5b2", size = 11137288, upload-time = "2026-02-12T23:09:07.475Z" }, + { url = "https://files.pythonhosted.org/packages/cd/20/6f8d7d8f768c93b0382b33b9306b3b999918816da46537d5a61635514635/ruff-0.15.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:5631c940fe9fe91f817a4c2ea4e81f47bee3ca4aa646134a24374f3c19ad9454", size = 11070681, upload-time = "2026-02-12T23:08:55.43Z" }, + { url = "https://files.pythonhosted.org/packages/9a/67/d640ac76069f64cdea59dba02af2e00b1fa30e2103c7f8d049c0cff4cafd/ruff-0.15.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:68138a4ba184b4691ccdc39f7795c66b3c68160c586519e7e8444cf5a53e1b4c", size = 10486401, upload-time = "2026-02-12T23:09:27.927Z" }, + { url = "https://files.pythonhosted.org/packages/65/3d/e1429f64a3ff89297497916b88c32a5cc88eeca7e9c787072d0e7f1d3e1e/ruff-0.15.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:518f9af03bfc33c03bdb4cb63fabc935341bb7f54af500f92ac309ecfbba6330", size = 10197452, upload-time = "2026-02-12T23:09:12.147Z" }, + { url = "https://files.pythonhosted.org/packages/78/83/e2c3bade17dad63bf1e1c2ffaf11490603b760be149e1419b07049b36ef2/ruff-0.15.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:da79f4d6a826caaea95de0237a67e33b81e6ec2e25fc7e1993a4015dffca7c61", size = 10693900, upload-time = "2026-02-12T23:09:34.418Z" }, + { url = "https://files.pythonhosted.org/packages/a1/27/fdc0e11a813e6338e0706e8b39bb7a1d61ea5b36873b351acee7e524a72a/ruff-0.15.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3dd86dccb83cd7d4dcfac303ffc277e6048600dfc22e38158afa208e8bf94a1f", size = 11227302, upload-time = "2026-02-12T23:09:36.536Z" }, + { url = "https://files.pythonhosted.org/packages/f6/58/ac864a75067dcbd3b95be5ab4eb2b601d7fbc3d3d736a27e391a4f92a5c1/ruff-0.15.1-py3-none-win32.whl", hash = "sha256:660975d9cb49b5d5278b12b03bb9951d554543a90b74ed5d366b20e2c57c2098", size = 10462555, upload-time = "2026-02-12T23:09:29.899Z" }, + { url = "https://files.pythonhosted.org/packages/e0/5e/d4ccc8a27ecdb78116feac4935dfc39d1304536f4296168f91ed3ec00cd2/ruff-0.15.1-py3-none-win_amd64.whl", hash = "sha256:c820fef9dd5d4172a6570e5721704a96c6679b80cf7be41659ed439653f62336", size = 11599956, upload-time = "2026-02-12T23:09:01.157Z" }, + { url = "https://files.pythonhosted.org/packages/2a/07/5bda6a85b220c64c65686bc85bd0bbb23b29c62b3a9f9433fa55f17cda93/ruff-0.15.1-py3-none-win_arm64.whl", hash = "sha256:5ff7d5f0f88567850f45081fac8f4ec212be8d0b963e385c3f7d0d2eb4899416", size = 10874604, upload-time = "2026-02-12T23:09:05.515Z" }, ] [[package]] From 3fcd01ead021e9536f1cbdaf4c4f0c2b2a0047e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 17:28:44 +0700 Subject: [PATCH 116/139] Refactor: Rewrite the function call format to match the client's complex argument structure - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/server/chat.py | 9 --------- app/utils/helper.py | 10 +++++++--- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 3f8e0cd..114eedf 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -379,15 +379,6 @@ def _build_tool_prompt( ) lines.append(TOOL_WRAP_HINT.strip()) - lines.append( - "If multiple tools are needed, list them sequentially within the same [ToolCalls] block." - ) - lines.append( - "If no tool call is needed, provide a normal response and NEVER use the [ToolCalls] tag." - ) - lines.append( - "Note: Tool results are returned in a [ToolResults] block using [ToolResult] tags." - ) return "\n".join(lines) diff --git a/app/utils/helper.py b/app/utils/helper.py index 99af0a5..68066a3 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -18,15 +18,19 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( - "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n" + "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n\n" "[ToolCalls]\n" "[Call:tool_name]\n" "[CallParameter:arg_name]\n" "value\n" "[/CallParameter]\n" "[/Call]\n" - "[/ToolCalls]\n" - "CRITICAL: If 'value' is multi-line or complex, you MUST wrap it in a markdown code block within the tags (use a fence longer than any backtick sequence in the content) to prevent rendering corruption.\n" + "[/ToolCalls]\n\n" + "CRITICAL: If 'value' contains ANY newlines or special characters, you MUST wrap it in a markdown code block (triple backticks or longer) within the tags. " + "Failure to wrap multi-line content will result in a protocol rejection. Use a fence longer than any backtick sequence in the content.\n\n" + "Multiple tool calls MUST be listed sequentially within the same [ToolCalls] block.\n" + "If no tool is needed, respond naturally and NEVER use any [ToolCalls] or [Call] tags.\n" + "Note: Tool results are returned in [ToolResults] blocks." ) TOOL_BLOCK_RE = re.compile( r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE From ef24704b3d302f76233999002717a44d4a88c444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 17:42:59 +0700 Subject: [PATCH 117/139] Refactor: Rewrite the function call format to match the client's complex argument structure - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/utils/helper.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 68066a3..a2b4c23 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -18,7 +18,7 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( - "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n\n" + "When calling tools, respond ONLY with a single [ToolCalls] block. NO other text allowed. EXACT syntax:\n\n" "[ToolCalls]\n" "[Call:tool_name]\n" "[CallParameter:arg_name]\n" @@ -26,11 +26,9 @@ "[/CallParameter]\n" "[/Call]\n" "[/ToolCalls]\n\n" - "CRITICAL: If 'value' contains ANY newlines or special characters, you MUST wrap it in a markdown code block (triple backticks or longer) within the tags. " - "Failure to wrap multi-line content will result in a protocol rejection. Use a fence longer than any backtick sequence in the content.\n\n" - "Multiple tool calls MUST be listed sequentially within the same [ToolCalls] block.\n" - "If no tool is needed, respond naturally and NEVER use any [ToolCalls] or [Call] tags.\n" - "Note: Tool results are returned in [ToolResults] blocks." + "CRITICAL: If 'value' has ANY newline, you MUST wrap it in a code block (```). " + "The tags MUST contain ONLY the code block, no other text. Use a fence longer than any backticks in content.\n" + "Multiple calls: list [Call] blocks inside [ToolCalls]. No tools: respond naturally, NO tags." ) TOOL_BLOCK_RE = re.compile( r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE From 4edf4cdedc6c093a426da1554948fed074705edd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 18:08:52 +0700 Subject: [PATCH 118/139] Refactor: Rewrite the function call format to match the client's complex argument structure - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/utils/helper.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index a2b4c23..0efa460 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -18,17 +18,21 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( - "When calling tools, respond ONLY with a single [ToolCalls] block. NO other text allowed. EXACT syntax:\n\n" + "SYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n" + "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n" + "2. For ALL parameters, the value MUST be wrapped in a markdown code block inside the tags to prevent rendering corruption.\n" + "3. Use a markdown fence (backticks) longer than any backtick sequence in the content (e.g., use ```` if content has ```).\n\n" + "EXACT SYNTAX TEMPLATE:\n" "[ToolCalls]\n" "[Call:tool_name]\n" "[CallParameter:arg_name]\n" + "```\n" "value\n" + "```\n" "[/CallParameter]\n" "[/Call]\n" "[/ToolCalls]\n\n" - "CRITICAL: If 'value' has ANY newline, you MUST wrap it in a code block (```). " - "The tags MUST contain ONLY the code block, no other text. Use a fence longer than any backticks in content.\n" - "Multiple calls: list [Call] blocks inside [ToolCalls]. No tools: respond naturally, NO tags." + "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags." ) TOOL_BLOCK_RE = re.compile( r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE From dcadabb8341a4e94cc9a44a6c9e9350e70365a7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 18:23:20 +0700 Subject: [PATCH 119/139] Refactor: Rewrite the function call format to match the client's complex argument structure - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/server/chat.py | 2 +- app/utils/helper.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 114eedf..881f7c9 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -378,7 +378,7 @@ def _build_tool_prompt( f"You are required to call the tool named `{target}`. Do not call any other tool." ) - lines.append(TOOL_WRAP_HINT.strip()) + lines.append(TOOL_WRAP_HINT) return "\n".join(lines) diff --git a/app/utils/helper.py b/app/utils/helper.py index 0efa460..1e70c3b 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -18,7 +18,7 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( - "SYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n" + "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n" "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n" "2. For ALL parameters, the value MUST be wrapped in a markdown code block inside the tags to prevent rendering corruption.\n" "3. Use a markdown fence (backticks) longer than any backtick sequence in the content (e.g., use ```` if content has ```).\n\n" @@ -32,7 +32,7 @@ "[/CallParameter]\n" "[/Call]\n" "[/ToolCalls]\n\n" - "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags." + "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n" ) TOOL_BLOCK_RE = re.compile( r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE From 83904142bb97676f09d93725ee5d4ad19f5d7cd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 19:56:04 +0700 Subject: [PATCH 120/139] Refactor: Rewrite the function call format to match the client's complex argument structure - These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols. --- app/utils/helper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 1e70c3b..ae96f05 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -20,8 +20,8 @@ TOOL_WRAP_HINT = ( "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n" "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n" - "2. For ALL parameters, the value MUST be wrapped in a markdown code block inside the tags to prevent rendering corruption.\n" - "3. Use a markdown fence (backticks) longer than any backtick sequence in the content (e.g., use ```` if content has ```).\n\n" + "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n" + "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n" "EXACT SYNTAX TEMPLATE:\n" "[ToolCalls]\n" "[Call:tool_name]\n" @@ -32,6 +32,7 @@ "[/CallParameter]\n" "[/Call]\n" "[/ToolCalls]\n\n" + "CRITICAL: Every tag MUST be opened and closed accurately.\n\n" "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n" ) TOOL_BLOCK_RE = re.compile( From ce43d63ce2b2b6e042fa122d96d405c683407ef2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 22:26:00 +0700 Subject: [PATCH 121/139] Refactor: Remove all escape logic handlers. - Change tool call tags to snake_case. --- app/server/chat.py | 61 +++++++++++++++---------------- app/services/client.py | 32 ++++++++--------- app/services/lmdb.py | 22 ++++++------ app/utils/helper.py | 81 ++++++++++++++++++++---------------------- app/utils/logging.py | 2 +- 5 files changed, 93 insertions(+), 105 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 881f7c9..98277d6 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -121,18 +121,18 @@ def _calculate_usage( ) -> tuple[int, int, int]: """Calculate prompt, completion and total tokens consistently.""" prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) - tool_args_text = "" + tool_params_text = "" if tool_calls: for call in tool_calls: if hasattr(call, "function"): - tool_args_text += call.function.arguments or "" + tool_params_text += call.function.arguments or "" elif isinstance(call, dict): - tool_args_text += call.get("function", {}).get("arguments", "") + tool_params_text += call.get("function", {}).get("arguments", "") completion_basis = assistant_text or "" - if tool_args_text: + if tool_params_text: completion_basis = ( - f"{completion_basis}\n{tool_args_text}" if completion_basis else tool_args_text + f"{completion_basis}\n{tool_params_text}" if completion_basis else tool_params_text ) completion_tokens = estimate_tokens(completion_basis) @@ -343,7 +343,7 @@ def _build_tool_prompt( tools: list[Tool], tool_choice: str | ToolChoiceFunction | None, ) -> str: - """Generate a system prompt describing available tools and the PascalCase protocol.""" + """Generate a system prompt describing available tools and the snake_case protocol.""" if not tools: return "" @@ -359,10 +359,10 @@ def _build_tool_prompt( schema_text = orjson.dumps(function.parameters, option=orjson.OPT_SORT_KEYS).decode( "utf-8" ) - lines.append("Arguments JSON schema:") + lines.append("Parameters JSON schema:") lines.append(schema_text) else: - lines.append("Arguments JSON schema: {}") + lines.append("Parameters JSON schema: {}") if tool_choice == "none": lines.append( @@ -760,28 +760,28 @@ def __init__(self): self.STATE_MARKERS = { "TOOL": { - "starts": ["[ToolCalls]", "\\[ToolCalls\\]"], - "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"], + "starts": ["[tool_calls]"], + "ends": ["[/tool_calls]"], }, "ORPHAN": { - "starts": ["[Call:", "\\[Call:", "\\[Call\\:"], - "ends": ["[/Call]", "\\[/Call\\]"], + "starts": ["[call:"], + "ends": ["[/call]"], }, "RESP": { - "starts": ["[ToolResults]", "\\[ToolResults\\]"], - "ends": ["[/ToolResults]", "\\[/ToolResults\\]"], + "starts": ["[tool_results]"], + "ends": ["[/tool_results]"], }, - "ARG": { - "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"], - "ends": ["[/CallParameter]", "\\[/CallParameter\\]"], + "PARAM": { + "starts": ["[call_parameter:"], + "ends": ["[/call_parameter]"], }, "RESULT": { - "starts": ["[ToolResult]", "\\[ToolResult\\]"], - "ends": ["[/ToolResult]", "\\[/ToolResult\\]"], + "starts": ["[tool_result]"], + "ends": ["[/tool_result]"], }, "TAG": { - "starts": ["<|im_start|>", "\\<|im\\_start|\\>"], - "ends": ["<|im_end|>", "\\<|im\\_end|\\>"], + "starts": ["<|im_start|>"], + "ends": ["<|im_end|>"], }, } @@ -794,15 +794,10 @@ def __init__(self): self.ORPHAN_ENDS = [ "<|im_end|>", - "\\<|im\\_end|\\>", - "[/Call]", - "\\[/Call\\]", - "[/ToolCalls]", - "\\[/ToolCalls\\]", - "[/CallParameter]", - "\\[/CallParameter\\]", - "[/ToolResult]", - "\\[/ToolResult\\]", + "[/call]", + "[/tool_calls]", + "[/call_parameter]", + "[/tool_result]", ] self.WATCH_MARKERS = [] @@ -876,8 +871,8 @@ def process(self, chunk: str) -> str: self.buffer = self.buffer[-max_end_len:] break - elif self.state == "IN_ARG": - cfg = self.STATE_MARKERS["ARG"] + elif self.state == "IN_PARAM": + cfg = self.STATE_MARKERS["PARAM"] found_idx, found_len = -1, 0 for p in cfg["ends"]: idx = buf_low.find(p.lower()) @@ -1003,7 +998,7 @@ def process(self, chunk: str) -> str: def flush(self) -> str: """Release remaining buffer content and perform final cleanup at stream end.""" res = "" - if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"): + if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_PARAM", "IN_RESULT"): res = "" elif self.state == "IN_BLOCK" and self.current_role != "tool": res = self.buffer diff --git a/app/services/client.py b/app/services/client.py index 6ab80cd..1c826e5 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -70,8 +70,8 @@ async def process_message( message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True ) -> tuple[str, list[Path | str]]: """ - Process a Message into Gemini API format using the PascalCase technical protocol. - Extracts text, handles files, and appends ToolCalls/ToolResults blocks. + Process a Message into Gemini API format using the snake_case technical protocol. + Extracts text, handles files, and appends tool_calls/tool_results blocks. """ files: list[Path | str] = [] text_fragments: list[str] = [] @@ -109,34 +109,32 @@ async def process_message( if message.role == "tool": tool_name = message.name or "unknown" combined_content = "\n".join(text_fragments).strip() - res_block = ( - f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]" - ) + res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]" if wrap_tool: - text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"] + text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"] else: text_fragments = [res_block] if message.tool_calls: tool_blocks: list[str] = [] for call in message.tool_calls: - args_text = call.function.arguments.strip() - formatted_args = "@args\n" + params_text = call.function.arguments.strip() + formatted_params = "" try: - parsed_args = orjson.loads(args_text) - if isinstance(parsed_args, dict): - for k, v in parsed_args.items(): + parsed_params = orjson.loads(params_text) + if isinstance(parsed_params, dict): + for k, v in parsed_params.items(): val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") - formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n" + formatted_params += f"[call_parameter:{k}]{val_str}[/call_parameter]\n" else: - formatted_args += args_text + formatted_params += params_text except orjson.JSONDecodeError: - formatted_args += args_text + formatted_params += params_text - tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]") + tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]") if tool_blocks: - tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]" + tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]" text_fragments.append(tool_section) model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None) @@ -168,7 +166,7 @@ async def process_conversation( i += 1 combined_tool_content = "\n".join(tool_blocks) - wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]" + wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]" conversation.append(add_tag("tool", wrapped_content)) else: input_part, files_part = await GeminiClientWrapper.process_message( diff --git a/app/services/lmdb.py b/app/services/lmdb.py index ad92bbf..2f59662 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -99,17 +99,17 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str: if message.tool_calls: calls_data = [] for tc in message.tool_calls: - args = tc.function.arguments or "{}" + params = tc.function.arguments or "{}" try: - parsed = orjson.loads(args) - canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8") + parsed = orjson.loads(params) + canon_params = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8") except orjson.JSONDecodeError: - canon_args = args + canon_params = params calls_data.append( { "name": tc.function.name, - "arguments": canon_args, + "arguments": canon_params, } ) calls_data.sort(key=lambda x: (x["name"], x["arguments"])) @@ -149,7 +149,7 @@ def __init__( """ Initialize LMDB store. - Args: + Params: db_path: Path to LMDB database directory max_db_size: Maximum database size in bytes (default: 256 MB) retention_days: Number of days to retain conversations (default: 14, 0 disables cleanup) @@ -194,7 +194,7 @@ def _get_transaction(self, write: bool = False): """ Context manager for LMDB transactions. - Args: + Params: write: Whether the transaction should be writable. """ if not self._env: @@ -265,7 +265,7 @@ def store( """ Store a conversation model in LMDB. - Args: + Params: conv: Conversation model to store custom_key: Optional custom key, if not provided, hash will be used @@ -313,7 +313,7 @@ def get(self, key: str) -> Optional[ConversationInStore]: """ Retrieve conversation data by key. - Args: + Params: key: Storage key (hash or custom key) Returns: @@ -342,7 +342,7 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt Search conversation data by message list. Tries raw matching, then sanitized matching, and finally fuzzy matching. - Args: + Params: model: Model name messages: List of messages to match @@ -382,7 +382,7 @@ def _find_by_message_list( """ Internal find implementation based on a message list. - Args: + Params: model: Model name messages: Message list to hash fuzzy: Whether to use fuzzy hashing diff --git a/app/utils/helper.py b/app/utils/helper.py index ae96f05..752aed9 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -19,48 +19,45 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n" - "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n" + "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n" "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n" "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n" "EXACT SYNTAX TEMPLATE:\n" - "[ToolCalls]\n" - "[Call:tool_name]\n" - "[CallParameter:arg_name]\n" + "[tool_calls]\n" + "[call:tool_name]\n" + "[call_parameter:parameter_name]\n" "```\n" "value\n" "```\n" - "[/CallParameter]\n" - "[/Call]\n" - "[/ToolCalls]\n\n" + "[/call_parameter]\n" + "[/call]\n" + "[/tool_calls]\n\n" "CRITICAL: Every tag MUST be opened and closed accurately.\n\n" - "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n" -) -TOOL_BLOCK_RE = re.compile( - r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE + "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n" ) +TOOL_BLOCK_RE = re.compile(r"\[tool_calls]\s*(.*?)\s*\[/tool_calls]", re.DOTALL | re.IGNORECASE) TOOL_CALL_RE = re.compile( - r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE + r"\[call:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE ) RESPONSE_BLOCK_RE = re.compile( - r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]", + r"\[tool_results]\s*(.*?)\s*\[/tool_results]", re.DOTALL | re.IGNORECASE, ) RESPONSE_ITEM_RE = re.compile( - r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]", + r"\[result:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/result]", re.DOTALL | re.IGNORECASE, ) -TAGGED_ARG_RE = re.compile( - r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]", +TAGGED_PARAM_RE = re.compile( + r"\[call_parameter:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call_parameter]", re.DOTALL | re.IGNORECASE, ) TAGGED_RESULT_RE = re.compile( - r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]", + r"\[tool_result]\s*(.*?)\s*\[/tool_result]", re.DOTALL | re.IGNORECASE, ) -CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE) -CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE) -CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE) -COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])") +CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>", re.IGNORECASE) +CHATML_START_RE = re.compile(r"<\|im_start\|>\s*(\w+)\s*\n?", re.IGNORECASE) +CHATML_END_RE = re.compile(r"<\|im_end\|>", re.IGNORECASE) FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, @@ -134,15 +131,14 @@ def _strip_param_fences(s: str) -> str: return s[n:-n].strip() -def unescape_llm_text(s: str) -> str: +def _repair_param_value(s: str) -> str: """ - Standardize and repair LLM-generated text fragments (unescaping, link normalization) + Standardize and repair LLM-generated parameter values to ensure compatibility with specialized clients like Roo Code. """ if not s: return "" - s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s) return s @@ -248,7 +244,7 @@ def strip_system_hints(text: str) -> str: cleaned = TOOL_CALL_RE.sub("", cleaned) cleaned = RESPONSE_BLOCK_RE.sub("", cleaned) cleaned = RESPONSE_ITEM_RE.sub("", cleaned) - cleaned = TAGGED_ARG_RE.sub("", cleaned) + cleaned = TAGGED_PARAM_RE.sub("", cleaned) cleaned = TAGGED_RESULT_RE.sub("", cleaned) return cleaned @@ -257,38 +253,37 @@ def strip_system_hints(text: str) -> str: def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]: """ Extract tool metadata and return text stripped of technical markers. - Arguments are parsed into JSON and assigned deterministic call IDs. + Parameters are parsed into JSON and assigned deterministic call IDs. """ if not text: return text, [] tool_calls: list[ToolCall] = [] - def _create_tool_call(name: str, raw_args: str) -> None: + def _create_tool_call(name: str, raw_params: str) -> None: if not extract: return + + name = name.strip() if not name: logger.warning("Encountered tool_call without a function name.") return - name = unescape_llm_text(name.strip()) - raw_args = unescape_llm_text(raw_args) - - arg_matches = TAGGED_ARG_RE.findall(raw_args) - if arg_matches: - args_dict = { - arg_name.strip(): _strip_param_fences(arg_value) - for arg_name, arg_value in arg_matches + param_matches = TAGGED_PARAM_RE.findall(raw_params) + if param_matches: + params_dict = { + param_name.strip(): _repair_param_value(_strip_param_fences(param_value)) + for param_name, param_value in param_matches } - arguments = orjson.dumps(args_dict).decode("utf-8") - logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}") + arguments = orjson.dumps(params_dict).decode("utf-8") + logger.debug(f"Successfully parsed {len(params_dict)} parameters for tool: {name}") else: - cleaned_raw = raw_args.strip() + cleaned_raw = raw_params.strip() if not cleaned_raw: - logger.debug(f"Successfully parsed 0 arguments for tool: {name}") + logger.debug(f"Successfully parsed 0 parameters for tool: {name}") else: logger.warning( - f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}" + f"Malformed parameters for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}" ) arguments = "{}" @@ -323,7 +318,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: def text_from_message(message: Message) -> str: - """Concatenate text and tool arguments from a message for token estimation.""" + """Concatenate text and tool parameters from a message for token estimation.""" base_text = "" if isinstance(message.content, str): base_text = message.content @@ -335,8 +330,8 @@ def text_from_message(message: Message) -> str: base_text = "" if message.tool_calls: - tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls) - base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text + tool_param_text = "".join(call.function.arguments or "" for call in message.tool_calls) + base_text = f"{base_text}\n{tool_param_text}" if base_text else tool_param_text return base_text diff --git a/app/utils/logging.py b/app/utils/logging.py index 87fcc7f..da417f1 100644 --- a/app/utils/logging.py +++ b/app/utils/logging.py @@ -15,7 +15,7 @@ def setup_logging( """ Setup loguru logging configuration to unify all project logging output - Args: + Params: level: Log level diagnose: Whether to enable diagnostic information backtrace: Whether to enable backtrace information From 8792948642f510ca294323c12e90232cb1926e06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 22:35:39 +0700 Subject: [PATCH 122/139] Refactor: Remove all escape logic handlers. - Change tool call tags to snake_case. --- app/services/client.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 1c826e5..90474ad 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -125,11 +125,14 @@ async def process_message( if isinstance(parsed_params, dict): for k, v in parsed_params.items(): val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") - formatted_params += f"[call_parameter:{k}]{val_str}[/call_parameter]\n" + # Wrap value in triple backticks to match the required protocol hint + formatted_params += ( + f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n" + ) else: - formatted_params += params_text + formatted_params += f"```\n{params_text}\n```\n" except orjson.JSONDecodeError: - formatted_params += params_text + formatted_params += f"```\n{params_text}\n```\n" tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]") From 5482e0cd51fa8051a92dafb061fa4677e108b10f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 22:56:47 +0700 Subject: [PATCH 123/139] Refactor: Remove all escape logic handlers. - Change tool call tags to snake_case. --- app/services/client.py | 1 - app/utils/helper.py | 12 ++++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 90474ad..9a2742b 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -125,7 +125,6 @@ async def process_message( if isinstance(parsed_params, dict): for k, v in parsed_params.items(): val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") - # Wrap value in triple backticks to match the required protocol hint formatted_params += ( f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n" ) diff --git a/app/utils/helper.py b/app/utils/helper.py index 752aed9..3576667 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -58,6 +58,7 @@ CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>", re.IGNORECASE) CHATML_START_RE = re.compile(r"<\|im_start\|>\s*(\w+)\s*\n?", re.IGNORECASE) CHATML_END_RE = re.compile(r"<\|im_end\|>", re.IGNORECASE) +COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])") FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", re.IGNORECASE, @@ -131,14 +132,15 @@ def _strip_param_fences(s: str) -> str: return s[n:-n].strip() -def _repair_param_value(s: str) -> str: +def repair_param_value(s: str) -> str: """ - Standardize and repair LLM-generated parameter values + Standardize and repair LLM-generated values (unescaping, link normalization) to ensure compatibility with specialized clients like Roo Code. """ if not s: return "" + s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s) return s @@ -264,7 +266,9 @@ def _create_tool_call(name: str, raw_params: str) -> None: if not extract: return - name = name.strip() + name = repair_param_value(name.strip()) + raw_params = repair_param_value(raw_params) + if not name: logger.warning("Encountered tool_call without a function name.") return @@ -272,7 +276,7 @@ def _create_tool_call(name: str, raw_params: str) -> None: param_matches = TAGGED_PARAM_RE.findall(raw_params) if param_matches: params_dict = { - param_name.strip(): _repair_param_value(_strip_param_fences(param_value)) + param_name.strip(): _strip_param_fences(param_value) for param_name, param_value in param_matches } arguments = orjson.dumps(params_dict).decode("utf-8") From b324aef75d5a868d9ea409dffa2adc173f69c63b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 23:10:53 +0700 Subject: [PATCH 124/139] Revert "Refactor: Remove all escape logic handlers." This reverts commit ce43d63c --- app/server/chat.py | 61 ++++++++++++++++--------------- app/services/client.py | 34 +++++++++--------- app/services/lmdb.py | 22 ++++++------ app/utils/helper.py | 81 +++++++++++++++++++++--------------------- app/utils/logging.py | 2 +- 5 files changed, 103 insertions(+), 97 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 98277d6..881f7c9 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -121,18 +121,18 @@ def _calculate_usage( ) -> tuple[int, int, int]: """Calculate prompt, completion and total tokens consistently.""" prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages) - tool_params_text = "" + tool_args_text = "" if tool_calls: for call in tool_calls: if hasattr(call, "function"): - tool_params_text += call.function.arguments or "" + tool_args_text += call.function.arguments or "" elif isinstance(call, dict): - tool_params_text += call.get("function", {}).get("arguments", "") + tool_args_text += call.get("function", {}).get("arguments", "") completion_basis = assistant_text or "" - if tool_params_text: + if tool_args_text: completion_basis = ( - f"{completion_basis}\n{tool_params_text}" if completion_basis else tool_params_text + f"{completion_basis}\n{tool_args_text}" if completion_basis else tool_args_text ) completion_tokens = estimate_tokens(completion_basis) @@ -343,7 +343,7 @@ def _build_tool_prompt( tools: list[Tool], tool_choice: str | ToolChoiceFunction | None, ) -> str: - """Generate a system prompt describing available tools and the snake_case protocol.""" + """Generate a system prompt describing available tools and the PascalCase protocol.""" if not tools: return "" @@ -359,10 +359,10 @@ def _build_tool_prompt( schema_text = orjson.dumps(function.parameters, option=orjson.OPT_SORT_KEYS).decode( "utf-8" ) - lines.append("Parameters JSON schema:") + lines.append("Arguments JSON schema:") lines.append(schema_text) else: - lines.append("Parameters JSON schema: {}") + lines.append("Arguments JSON schema: {}") if tool_choice == "none": lines.append( @@ -760,28 +760,28 @@ def __init__(self): self.STATE_MARKERS = { "TOOL": { - "starts": ["[tool_calls]"], - "ends": ["[/tool_calls]"], + "starts": ["[ToolCalls]", "\\[ToolCalls\\]"], + "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"], }, "ORPHAN": { - "starts": ["[call:"], - "ends": ["[/call]"], + "starts": ["[Call:", "\\[Call:", "\\[Call\\:"], + "ends": ["[/Call]", "\\[/Call\\]"], }, "RESP": { - "starts": ["[tool_results]"], - "ends": ["[/tool_results]"], + "starts": ["[ToolResults]", "\\[ToolResults\\]"], + "ends": ["[/ToolResults]", "\\[/ToolResults\\]"], }, - "PARAM": { - "starts": ["[call_parameter:"], - "ends": ["[/call_parameter]"], + "ARG": { + "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"], + "ends": ["[/CallParameter]", "\\[/CallParameter\\]"], }, "RESULT": { - "starts": ["[tool_result]"], - "ends": ["[/tool_result]"], + "starts": ["[ToolResult]", "\\[ToolResult\\]"], + "ends": ["[/ToolResult]", "\\[/ToolResult\\]"], }, "TAG": { - "starts": ["<|im_start|>"], - "ends": ["<|im_end|>"], + "starts": ["<|im_start|>", "\\<|im\\_start|\\>"], + "ends": ["<|im_end|>", "\\<|im\\_end|\\>"], }, } @@ -794,10 +794,15 @@ def __init__(self): self.ORPHAN_ENDS = [ "<|im_end|>", - "[/call]", - "[/tool_calls]", - "[/call_parameter]", - "[/tool_result]", + "\\<|im\\_end|\\>", + "[/Call]", + "\\[/Call\\]", + "[/ToolCalls]", + "\\[/ToolCalls\\]", + "[/CallParameter]", + "\\[/CallParameter\\]", + "[/ToolResult]", + "\\[/ToolResult\\]", ] self.WATCH_MARKERS = [] @@ -871,8 +876,8 @@ def process(self, chunk: str) -> str: self.buffer = self.buffer[-max_end_len:] break - elif self.state == "IN_PARAM": - cfg = self.STATE_MARKERS["PARAM"] + elif self.state == "IN_ARG": + cfg = self.STATE_MARKERS["ARG"] found_idx, found_len = -1, 0 for p in cfg["ends"]: idx = buf_low.find(p.lower()) @@ -998,7 +1003,7 @@ def process(self, chunk: str) -> str: def flush(self) -> str: """Release remaining buffer content and perform final cleanup at stream end.""" res = "" - if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_PARAM", "IN_RESULT"): + if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"): res = "" elif self.state == "IN_BLOCK" and self.current_role != "tool": res = self.buffer diff --git a/app/services/client.py b/app/services/client.py index 9a2742b..6ab80cd 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -70,8 +70,8 @@ async def process_message( message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True ) -> tuple[str, list[Path | str]]: """ - Process a Message into Gemini API format using the snake_case technical protocol. - Extracts text, handles files, and appends tool_calls/tool_results blocks. + Process a Message into Gemini API format using the PascalCase technical protocol. + Extracts text, handles files, and appends ToolCalls/ToolResults blocks. """ files: list[Path | str] = [] text_fragments: list[str] = [] @@ -109,34 +109,34 @@ async def process_message( if message.role == "tool": tool_name = message.name or "unknown" combined_content = "\n".join(text_fragments).strip() - res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]" + res_block = ( + f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]" + ) if wrap_tool: - text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"] + text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"] else: text_fragments = [res_block] if message.tool_calls: tool_blocks: list[str] = [] for call in message.tool_calls: - params_text = call.function.arguments.strip() - formatted_params = "" + args_text = call.function.arguments.strip() + formatted_args = "@args\n" try: - parsed_params = orjson.loads(params_text) - if isinstance(parsed_params, dict): - for k, v in parsed_params.items(): + parsed_args = orjson.loads(args_text) + if isinstance(parsed_args, dict): + for k, v in parsed_args.items(): val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") - formatted_params += ( - f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n" - ) + formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n" else: - formatted_params += f"```\n{params_text}\n```\n" + formatted_args += args_text except orjson.JSONDecodeError: - formatted_params += f"```\n{params_text}\n```\n" + formatted_args += args_text - tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]") + tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]") if tool_blocks: - tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]" + tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]" text_fragments.append(tool_section) model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None) @@ -168,7 +168,7 @@ async def process_conversation( i += 1 combined_tool_content = "\n".join(tool_blocks) - wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]" + wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]" conversation.append(add_tag("tool", wrapped_content)) else: input_part, files_part = await GeminiClientWrapper.process_message( diff --git a/app/services/lmdb.py b/app/services/lmdb.py index 2f59662..ad92bbf 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -99,17 +99,17 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str: if message.tool_calls: calls_data = [] for tc in message.tool_calls: - params = tc.function.arguments or "{}" + args = tc.function.arguments or "{}" try: - parsed = orjson.loads(params) - canon_params = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8") + parsed = orjson.loads(args) + canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8") except orjson.JSONDecodeError: - canon_params = params + canon_args = args calls_data.append( { "name": tc.function.name, - "arguments": canon_params, + "arguments": canon_args, } ) calls_data.sort(key=lambda x: (x["name"], x["arguments"])) @@ -149,7 +149,7 @@ def __init__( """ Initialize LMDB store. - Params: + Args: db_path: Path to LMDB database directory max_db_size: Maximum database size in bytes (default: 256 MB) retention_days: Number of days to retain conversations (default: 14, 0 disables cleanup) @@ -194,7 +194,7 @@ def _get_transaction(self, write: bool = False): """ Context manager for LMDB transactions. - Params: + Args: write: Whether the transaction should be writable. """ if not self._env: @@ -265,7 +265,7 @@ def store( """ Store a conversation model in LMDB. - Params: + Args: conv: Conversation model to store custom_key: Optional custom key, if not provided, hash will be used @@ -313,7 +313,7 @@ def get(self, key: str) -> Optional[ConversationInStore]: """ Retrieve conversation data by key. - Params: + Args: key: Storage key (hash or custom key) Returns: @@ -342,7 +342,7 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt Search conversation data by message list. Tries raw matching, then sanitized matching, and finally fuzzy matching. - Params: + Args: model: Model name messages: List of messages to match @@ -382,7 +382,7 @@ def _find_by_message_list( """ Internal find implementation based on a message list. - Params: + Args: model: Model name messages: Message list to hash fuzzy: Whether to use fuzzy hashing diff --git a/app/utils/helper.py b/app/utils/helper.py index 3576667..ae96f05 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -19,45 +19,47 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n" - "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n" + "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n" "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n" "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n" "EXACT SYNTAX TEMPLATE:\n" - "[tool_calls]\n" - "[call:tool_name]\n" - "[call_parameter:parameter_name]\n" + "[ToolCalls]\n" + "[Call:tool_name]\n" + "[CallParameter:arg_name]\n" "```\n" "value\n" "```\n" - "[/call_parameter]\n" - "[/call]\n" - "[/tool_calls]\n\n" + "[/CallParameter]\n" + "[/Call]\n" + "[/ToolCalls]\n\n" "CRITICAL: Every tag MUST be opened and closed accurately.\n\n" - "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n" + "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n" +) +TOOL_BLOCK_RE = re.compile( + r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE ) -TOOL_BLOCK_RE = re.compile(r"\[tool_calls]\s*(.*?)\s*\[/tool_calls]", re.DOTALL | re.IGNORECASE) TOOL_CALL_RE = re.compile( - r"\[call:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE + r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE ) RESPONSE_BLOCK_RE = re.compile( - r"\[tool_results]\s*(.*?)\s*\[/tool_results]", + r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]", re.DOTALL | re.IGNORECASE, ) RESPONSE_ITEM_RE = re.compile( - r"\[result:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/result]", + r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]", re.DOTALL | re.IGNORECASE, ) -TAGGED_PARAM_RE = re.compile( - r"\[call_parameter:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call_parameter]", +TAGGED_ARG_RE = re.compile( + r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]", re.DOTALL | re.IGNORECASE, ) TAGGED_RESULT_RE = re.compile( - r"\[tool_result]\s*(.*?)\s*\[/tool_result]", + r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]", re.DOTALL | re.IGNORECASE, ) -CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>", re.IGNORECASE) -CHATML_START_RE = re.compile(r"<\|im_start\|>\s*(\w+)\s*\n?", re.IGNORECASE) -CHATML_END_RE = re.compile(r"<\|im_end\|>", re.IGNORECASE) +CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE) +CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE) +CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE) COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])") FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", @@ -132,9 +134,9 @@ def _strip_param_fences(s: str) -> str: return s[n:-n].strip() -def repair_param_value(s: str) -> str: +def unescape_llm_text(s: str) -> str: """ - Standardize and repair LLM-generated values (unescaping, link normalization) + Standardize and repair LLM-generated text fragments (unescaping, link normalization) to ensure compatibility with specialized clients like Roo Code. """ if not s: @@ -246,7 +248,7 @@ def strip_system_hints(text: str) -> str: cleaned = TOOL_CALL_RE.sub("", cleaned) cleaned = RESPONSE_BLOCK_RE.sub("", cleaned) cleaned = RESPONSE_ITEM_RE.sub("", cleaned) - cleaned = TAGGED_PARAM_RE.sub("", cleaned) + cleaned = TAGGED_ARG_RE.sub("", cleaned) cleaned = TAGGED_RESULT_RE.sub("", cleaned) return cleaned @@ -255,39 +257,38 @@ def strip_system_hints(text: str) -> str: def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]: """ Extract tool metadata and return text stripped of technical markers. - Parameters are parsed into JSON and assigned deterministic call IDs. + Arguments are parsed into JSON and assigned deterministic call IDs. """ if not text: return text, [] tool_calls: list[ToolCall] = [] - def _create_tool_call(name: str, raw_params: str) -> None: + def _create_tool_call(name: str, raw_args: str) -> None: if not extract: return - - name = repair_param_value(name.strip()) - raw_params = repair_param_value(raw_params) - if not name: logger.warning("Encountered tool_call without a function name.") return - param_matches = TAGGED_PARAM_RE.findall(raw_params) - if param_matches: - params_dict = { - param_name.strip(): _strip_param_fences(param_value) - for param_name, param_value in param_matches + name = unescape_llm_text(name.strip()) + raw_args = unescape_llm_text(raw_args) + + arg_matches = TAGGED_ARG_RE.findall(raw_args) + if arg_matches: + args_dict = { + arg_name.strip(): _strip_param_fences(arg_value) + for arg_name, arg_value in arg_matches } - arguments = orjson.dumps(params_dict).decode("utf-8") - logger.debug(f"Successfully parsed {len(params_dict)} parameters for tool: {name}") + arguments = orjson.dumps(args_dict).decode("utf-8") + logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}") else: - cleaned_raw = raw_params.strip() + cleaned_raw = raw_args.strip() if not cleaned_raw: - logger.debug(f"Successfully parsed 0 parameters for tool: {name}") + logger.debug(f"Successfully parsed 0 arguments for tool: {name}") else: logger.warning( - f"Malformed parameters for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}" + f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}" ) arguments = "{}" @@ -322,7 +323,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]: def text_from_message(message: Message) -> str: - """Concatenate text and tool parameters from a message for token estimation.""" + """Concatenate text and tool arguments from a message for token estimation.""" base_text = "" if isinstance(message.content, str): base_text = message.content @@ -334,8 +335,8 @@ def text_from_message(message: Message) -> str: base_text = "" if message.tool_calls: - tool_param_text = "".join(call.function.arguments or "" for call in message.tool_calls) - base_text = f"{base_text}\n{tool_param_text}" if base_text else tool_param_text + tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls) + base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text return base_text diff --git a/app/utils/logging.py b/app/utils/logging.py index da417f1..87fcc7f 100644 --- a/app/utils/logging.py +++ b/app/utils/logging.py @@ -15,7 +15,7 @@ def setup_logging( """ Setup loguru logging configuration to unify all project logging output - Params: + Args: level: Log level diagnose: Whether to enable diagnostic information backtrace: Whether to enable backtrace information From 8ef108d4827c29c617fbb968f2c09d7fe8d83fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 23:11:36 +0700 Subject: [PATCH 125/139] Refactor: Rewrite the function call format to match the client's complex argument structure --- app/services/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/client.py b/app/services/client.py index 6ab80cd..0b2aea5 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -121,7 +121,7 @@ async def process_message( tool_blocks: list[str] = [] for call in message.tool_calls: args_text = call.function.arguments.strip() - formatted_args = "@args\n" + formatted_args = "" try: parsed_args = orjson.loads(args_text) if isinstance(parsed_args, dict): From 30043e585650c11876312b6dadc2a52eaaacdf60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 23:14:45 +0700 Subject: [PATCH 126/139] Refactor: Rewrite the function call format to match the client's complex argument structure --- app/utils/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index ae96f05..0b91993 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -25,7 +25,7 @@ "EXACT SYNTAX TEMPLATE:\n" "[ToolCalls]\n" "[Call:tool_name]\n" - "[CallParameter:arg_name]\n" + "[CallParameter:parameter_name]\n" "```\n" "value\n" "```\n" From bc888d1fbb8a076074486293531441553c956028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 23:22:26 +0700 Subject: [PATCH 127/139] Refactor: Rewrite the function call format to match the client's complex argument structure --- app/services/client.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/app/services/client.py b/app/services/client.py index 0b2aea5..70dfce9 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -120,20 +120,25 @@ async def process_message( if message.tool_calls: tool_blocks: list[str] = [] for call in message.tool_calls: - args_text = call.function.arguments.strip() - formatted_args = "" - try: - parsed_args = orjson.loads(args_text) - if isinstance(parsed_args, dict): - for k, v in parsed_args.items(): - val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") - formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n" - else: - formatted_args += args_text - except orjson.JSONDecodeError: - formatted_args += args_text - - tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]") + params_text = call.function.arguments.strip() + formatted_params = "" + if params_text: + try: + parsed_params = orjson.loads(params_text) + if isinstance(parsed_params, dict): + for k, v in parsed_params.items(): + val_str = ( + v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") + ) + formatted_params += ( + f"[CallParameter:{k}]\n```\n{val_str}\n```\n[/CallParameter]\n" + ) + else: + formatted_params += f"```\n{params_text}\n```\n" + except orjson.JSONDecodeError: + formatted_params += f"```\n{params_text}\n```\n" + + tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_params}[/Call]") if tool_blocks: tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]" From d5349a096f9564d99b7939c6a32fae1193db0b8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Fri, 13 Feb 2026 23:29:23 +0700 Subject: [PATCH 128/139] Refactor: Rewrite the function call format to match the client's complex argument structure --- app/utils/helper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 0b91993..82bb562 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -134,7 +134,7 @@ def _strip_param_fences(s: str) -> str: return s[n:-n].strip() -def unescape_llm_text(s: str) -> str: +def _repair_param_value(s: str) -> str: """ Standardize and repair LLM-generated text fragments (unescaping, link normalization) to ensure compatibility with specialized clients like Roo Code. @@ -271,8 +271,8 @@ def _create_tool_call(name: str, raw_args: str) -> None: logger.warning("Encountered tool_call without a function name.") return - name = unescape_llm_text(name.strip()) - raw_args = unescape_llm_text(raw_args) + name = _repair_param_value(name.strip()) + raw_args = _repair_param_value(raw_args) arg_matches = TAGGED_ARG_RE.findall(raw_args) if arg_matches: From 7e217e917778211ad002511d1323739ed2b0e293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 14 Feb 2026 00:28:49 +0700 Subject: [PATCH 129/139] Reattempt changing tool call tags to `snake_case`. --- app/server/chat.py | 50 ++++++++++++++++++++++++------------------ app/services/client.py | 20 +++++++---------- app/utils/helper.py | 40 +++++++++++++++++++-------------- 3 files changed, 60 insertions(+), 50 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 881f7c9..1211c99 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -760,28 +760,32 @@ def __init__(self): self.STATE_MARKERS = { "TOOL": { - "starts": ["[ToolCalls]", "\\[ToolCalls\\]"], - "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"], + "starts": ["[tool_calls]", "\\[tool\\_calls\\]"], + "ends": ["[/tool_calls]", "\\[\\/tool\\_calls\\]"], }, "ORPHAN": { - "starts": ["[Call:", "\\[Call:", "\\[Call\\:"], - "ends": ["[/Call]", "\\[/Call\\]"], + "starts": ["[call:", "\\[call\\:"], + "ends": ["[/call]", "\\[\\/call\\]"], }, "RESP": { - "starts": ["[ToolResults]", "\\[ToolResults\\]"], - "ends": ["[/ToolResults]", "\\[/ToolResults\\]"], + "starts": ["[tool_results]", "\\[tool\\_results\\]"], + "ends": ["[/tool_results]", "\\[\\/tool\\_results\\]"], }, "ARG": { - "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"], - "ends": ["[/CallParameter]", "\\[/CallParameter\\]"], + "starts": ["[call_parameter:", "\\[call\\_parameter\\:"], + "ends": ["[/call_parameter]", "\\[\\/call\\_parameter\\]"], }, "RESULT": { - "starts": ["[ToolResult]", "\\[ToolResult\\]"], - "ends": ["[/ToolResult]", "\\[/ToolResult\\]"], + "starts": ["[tool_result]", "\\[tool\\_result\\]"], + "ends": ["[/tool_result]", "\\[\\/tool\\_result\\]"], + }, + "ITEM": { + "starts": ["[result:", "\\[result\\:"], + "ends": ["[/result]", "\\[\\/result\\]"], }, "TAG": { - "starts": ["<|im_start|>", "\\<|im\\_start|\\>"], - "ends": ["<|im_end|>", "\\<|im\\_end|\\>"], + "starts": ["<|im_start|>", "\\<\\|im\\_start\\|\\>"], + "ends": ["<|im_end|>", "\\<\\|im\\_end\\|\\>"], }, } @@ -794,15 +798,19 @@ def __init__(self): self.ORPHAN_ENDS = [ "<|im_end|>", - "\\<|im\\_end|\\>", - "[/Call]", - "\\[/Call\\]", - "[/ToolCalls]", - "\\[/ToolCalls\\]", - "[/CallParameter]", - "\\[/CallParameter\\]", - "[/ToolResult]", - "\\[/ToolResult\\]", + "\\<\\|im\\_end\\|\\>", + "[/call]", + "\\[\\/call\\]", + "[/tool_calls]", + "\\[\\/tool\\_calls\\]", + "[/call_parameter]", + "\\[\\/call\\_parameter\\]", + "[/tool_result]", + "\\[\\/tool\\_result\\]", + "[/tool_results]", + "\\[\\/tool\\_results\\]", + "[/result]", + "\\[\\/result\\]", ] self.WATCH_MARKERS = [] diff --git a/app/services/client.py b/app/services/client.py index 70dfce9..05e7415 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -70,8 +70,8 @@ async def process_message( message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True ) -> tuple[str, list[Path | str]]: """ - Process a Message into Gemini API format using the PascalCase technical protocol. - Extracts text, handles files, and appends ToolCalls/ToolResults blocks. + Process a Message into Gemini API format using the snake_case technical protocol. + Extracts text, handles files, and appends tool_calls/tool_results blocks. """ files: list[Path | str] = [] text_fragments: list[str] = [] @@ -109,11 +109,9 @@ async def process_message( if message.role == "tool": tool_name = message.name or "unknown" combined_content = "\n".join(text_fragments).strip() - res_block = ( - f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]" - ) + res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]" if wrap_tool: - text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"] + text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"] else: text_fragments = [res_block] @@ -130,18 +128,16 @@ async def process_message( val_str = ( v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") ) - formatted_params += ( - f"[CallParameter:{k}]\n```\n{val_str}\n```\n[/CallParameter]\n" - ) + formatted_params += f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n" else: formatted_params += f"```\n{params_text}\n```\n" except orjson.JSONDecodeError: formatted_params += f"```\n{params_text}\n```\n" - tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_params}[/Call]") + tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]") if tool_blocks: - tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]" + tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]" text_fragments.append(tool_section) model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None) @@ -173,7 +169,7 @@ async def process_conversation( i += 1 combined_tool_content = "\n".join(tool_blocks) - wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]" + wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]" conversation.append(add_tag("tool", wrapped_content)) else: input_part, files_part = await GeminiClientWrapper.process_message( diff --git a/app/utils/helper.py b/app/utils/helper.py index 82bb562..a1292d3 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -19,47 +19,53 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n" - "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n" + "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n" "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n" "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n" "EXACT SYNTAX TEMPLATE:\n" - "[ToolCalls]\n" - "[Call:tool_name]\n" - "[CallParameter:parameter_name]\n" + "[tool_calls]\n" + "[call:tool_name]\n" + "[call_parameter:parameter_name]\n" "```\n" "value\n" "```\n" - "[/CallParameter]\n" - "[/Call]\n" - "[/ToolCalls]\n\n" + "[/call_parameter]\n" + "[/call]\n" + "[/tool_calls]\n\n" "CRITICAL: Every tag MUST be opened and closed accurately.\n\n" - "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n" + "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n" ) TOOL_BLOCK_RE = re.compile( - r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE + r"(?:\[tool_calls]|\\\[tool\\_calls\\])\s*(.*?)\s*(?:\[/tool_calls]|\\\[\\/tool\\_calls\\])", + re.DOTALL | re.IGNORECASE, ) TOOL_CALL_RE = re.compile( - r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE + r"(?:\[call:|\\\[call\\:)(?P(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P.*?)\s*(?:\[/call]|\\\[\\/call\\])", + re.DOTALL | re.IGNORECASE, ) RESPONSE_BLOCK_RE = re.compile( - r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]", + r"(?:\[tool_results]|\\\[tool\\_results\\])\s*(.*?)\s*(?:\[/tool_results]|\\\[\\/tool\\_results\\])", re.DOTALL | re.IGNORECASE, ) RESPONSE_ITEM_RE = re.compile( - r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]", + r"(?:\[result:|\\\[result\\:)(?P(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P.*?)\s*(?:\[/result]|\\\[\\/result\\])", re.DOTALL | re.IGNORECASE, ) TAGGED_ARG_RE = re.compile( - r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]", + r"(?:\[call_parameter:|\\\[call\\_parameter\\:)(?P(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P.*?)\s*(?:\[/call_parameter]|\\\[\\/call\\_parameter\\])", re.DOTALL | re.IGNORECASE, ) TAGGED_RESULT_RE = re.compile( - r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]", + r"(?:\[tool_result]|\\\[tool\\_result\\])\s*(.*?)\s*(?:\[/tool_result]|\\\[\\/tool\\_result\\])", re.DOTALL | re.IGNORECASE, ) -CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE) -CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE) -CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE) +CONTROL_TOKEN_RE = re.compile( + r"<\|im_(?:start|end)\|>|\\<\\\|im\\_(?:start|end)\\\|\\>", re.IGNORECASE +) +CHATML_START_RE = re.compile( + r"(?:<\|im_start\|>|\\<\\\|im\\_start\\\|\\>)\s*(\w+)\s*\n?", re.IGNORECASE +) +CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE) COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])") FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", From fe30a5d47001c4956f9e64a3eda4cf4c7c4fc9db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 14 Feb 2026 00:51:42 +0700 Subject: [PATCH 130/139] Revert "Reattempt changing tool call tags to `snake_case`." This reverts commit 7e217e917778211ad002511d1323739ed2b0e293. --- app/server/chat.py | 50 ++++++++++++++++++------------------------ app/services/client.py | 20 ++++++++++------- app/utils/helper.py | 40 ++++++++++++++------------------- 3 files changed, 50 insertions(+), 60 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 1211c99..881f7c9 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -760,32 +760,28 @@ def __init__(self): self.STATE_MARKERS = { "TOOL": { - "starts": ["[tool_calls]", "\\[tool\\_calls\\]"], - "ends": ["[/tool_calls]", "\\[\\/tool\\_calls\\]"], + "starts": ["[ToolCalls]", "\\[ToolCalls\\]"], + "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"], }, "ORPHAN": { - "starts": ["[call:", "\\[call\\:"], - "ends": ["[/call]", "\\[\\/call\\]"], + "starts": ["[Call:", "\\[Call:", "\\[Call\\:"], + "ends": ["[/Call]", "\\[/Call\\]"], }, "RESP": { - "starts": ["[tool_results]", "\\[tool\\_results\\]"], - "ends": ["[/tool_results]", "\\[\\/tool\\_results\\]"], + "starts": ["[ToolResults]", "\\[ToolResults\\]"], + "ends": ["[/ToolResults]", "\\[/ToolResults\\]"], }, "ARG": { - "starts": ["[call_parameter:", "\\[call\\_parameter\\:"], - "ends": ["[/call_parameter]", "\\[\\/call\\_parameter\\]"], + "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"], + "ends": ["[/CallParameter]", "\\[/CallParameter\\]"], }, "RESULT": { - "starts": ["[tool_result]", "\\[tool\\_result\\]"], - "ends": ["[/tool_result]", "\\[\\/tool\\_result\\]"], - }, - "ITEM": { - "starts": ["[result:", "\\[result\\:"], - "ends": ["[/result]", "\\[\\/result\\]"], + "starts": ["[ToolResult]", "\\[ToolResult\\]"], + "ends": ["[/ToolResult]", "\\[/ToolResult\\]"], }, "TAG": { - "starts": ["<|im_start|>", "\\<\\|im\\_start\\|\\>"], - "ends": ["<|im_end|>", "\\<\\|im\\_end\\|\\>"], + "starts": ["<|im_start|>", "\\<|im\\_start|\\>"], + "ends": ["<|im_end|>", "\\<|im\\_end|\\>"], }, } @@ -798,19 +794,15 @@ def __init__(self): self.ORPHAN_ENDS = [ "<|im_end|>", - "\\<\\|im\\_end\\|\\>", - "[/call]", - "\\[\\/call\\]", - "[/tool_calls]", - "\\[\\/tool\\_calls\\]", - "[/call_parameter]", - "\\[\\/call\\_parameter\\]", - "[/tool_result]", - "\\[\\/tool\\_result\\]", - "[/tool_results]", - "\\[\\/tool\\_results\\]", - "[/result]", - "\\[\\/result\\]", + "\\<|im\\_end|\\>", + "[/Call]", + "\\[/Call\\]", + "[/ToolCalls]", + "\\[/ToolCalls\\]", + "[/CallParameter]", + "\\[/CallParameter\\]", + "[/ToolResult]", + "\\[/ToolResult\\]", ] self.WATCH_MARKERS = [] diff --git a/app/services/client.py b/app/services/client.py index 05e7415..70dfce9 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -70,8 +70,8 @@ async def process_message( message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True ) -> tuple[str, list[Path | str]]: """ - Process a Message into Gemini API format using the snake_case technical protocol. - Extracts text, handles files, and appends tool_calls/tool_results blocks. + Process a Message into Gemini API format using the PascalCase technical protocol. + Extracts text, handles files, and appends ToolCalls/ToolResults blocks. """ files: list[Path | str] = [] text_fragments: list[str] = [] @@ -109,9 +109,11 @@ async def process_message( if message.role == "tool": tool_name = message.name or "unknown" combined_content = "\n".join(text_fragments).strip() - res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]" + res_block = ( + f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]" + ) if wrap_tool: - text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"] + text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"] else: text_fragments = [res_block] @@ -128,16 +130,18 @@ async def process_message( val_str = ( v if isinstance(v, str) else orjson.dumps(v).decode("utf-8") ) - formatted_params += f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n" + formatted_params += ( + f"[CallParameter:{k}]\n```\n{val_str}\n```\n[/CallParameter]\n" + ) else: formatted_params += f"```\n{params_text}\n```\n" except orjson.JSONDecodeError: formatted_params += f"```\n{params_text}\n```\n" - tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]") + tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_params}[/Call]") if tool_blocks: - tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]" + tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]" text_fragments.append(tool_section) model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None) @@ -169,7 +173,7 @@ async def process_conversation( i += 1 combined_tool_content = "\n".join(tool_blocks) - wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]" + wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]" conversation.append(add_tag("tool", wrapped_content)) else: input_part, files_part = await GeminiClientWrapper.process_message( diff --git a/app/utils/helper.py b/app/utils/helper.py index a1292d3..82bb562 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -19,53 +19,47 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n" - "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n" + "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n" "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n" "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n" "EXACT SYNTAX TEMPLATE:\n" - "[tool_calls]\n" - "[call:tool_name]\n" - "[call_parameter:parameter_name]\n" + "[ToolCalls]\n" + "[Call:tool_name]\n" + "[CallParameter:parameter_name]\n" "```\n" "value\n" "```\n" - "[/call_parameter]\n" - "[/call]\n" - "[/tool_calls]\n\n" + "[/CallParameter]\n" + "[/Call]\n" + "[/ToolCalls]\n\n" "CRITICAL: Every tag MUST be opened and closed accurately.\n\n" - "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n" + "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n" ) TOOL_BLOCK_RE = re.compile( - r"(?:\[tool_calls]|\\\[tool\\_calls\\])\s*(.*?)\s*(?:\[/tool_calls]|\\\[\\/tool\\_calls\\])", - re.DOTALL | re.IGNORECASE, + r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE ) TOOL_CALL_RE = re.compile( - r"(?:\[call:|\\\[call\\:)(?P(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P.*?)\s*(?:\[/call]|\\\[\\/call\\])", - re.DOTALL | re.IGNORECASE, + r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE ) RESPONSE_BLOCK_RE = re.compile( - r"(?:\[tool_results]|\\\[tool\\_results\\])\s*(.*?)\s*(?:\[/tool_results]|\\\[\\/tool\\_results\\])", + r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]", re.DOTALL | re.IGNORECASE, ) RESPONSE_ITEM_RE = re.compile( - r"(?:\[result:|\\\[result\\:)(?P(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P.*?)\s*(?:\[/result]|\\\[\\/result\\])", + r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]", re.DOTALL | re.IGNORECASE, ) TAGGED_ARG_RE = re.compile( - r"(?:\[call_parameter:|\\\[call\\_parameter\\:)(?P(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P.*?)\s*(?:\[/call_parameter]|\\\[\\/call\\_parameter\\])", + r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]", re.DOTALL | re.IGNORECASE, ) TAGGED_RESULT_RE = re.compile( - r"(?:\[tool_result]|\\\[tool\\_result\\])\s*(.*?)\s*(?:\[/tool_result]|\\\[\\/tool\\_result\\])", + r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]", re.DOTALL | re.IGNORECASE, ) -CONTROL_TOKEN_RE = re.compile( - r"<\|im_(?:start|end)\|>|\\<\\\|im\\_(?:start|end)\\\|\\>", re.IGNORECASE -) -CHATML_START_RE = re.compile( - r"(?:<\|im_start\|>|\\<\\\|im\\_start\\\|\\>)\s*(\w+)\s*\n?", re.IGNORECASE -) -CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE) +CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE) +CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE) +CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE) COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])") FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", From 93e9ccdad4a55e1ed99cb2037f45aeba10e32226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 14 Feb 2026 01:18:56 +0700 Subject: [PATCH 131/139] Refactor: Handle all escape tags --- app/server/chat.py | 38 +++++++++------- app/services/lmdb.py | 105 ++++++++++++++++++++++--------------------- app/utils/helper.py | 35 ++++++++++----- 3 files changed, 101 insertions(+), 77 deletions(-) diff --git a/app/server/chat.py b/app/server/chat.py index 881f7c9..934091b 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -270,7 +270,7 @@ def _persist_conversation( tool_calls=tool_calls or None, ) full_history = [*messages, current_assistant_message] - cleaned_history = db.sanitize_assistant_messages(full_history) + cleaned_history = db.sanitize_messages(full_history) conv = ConversationInStore( model=model_name, @@ -761,27 +761,31 @@ def __init__(self): self.STATE_MARKERS = { "TOOL": { "starts": ["[ToolCalls]", "\\[ToolCalls\\]"], - "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"], + "ends": ["[/ToolCalls]", "\\[\\/ToolCalls\\]"], }, "ORPHAN": { - "starts": ["[Call:", "\\[Call:", "\\[Call\\:"], - "ends": ["[/Call]", "\\[/Call\\]"], + "starts": ["[Call:", "\\[Call\\:"], + "ends": ["[/Call]", "\\[\\/Call\\]"], }, "RESP": { "starts": ["[ToolResults]", "\\[ToolResults\\]"], - "ends": ["[/ToolResults]", "\\[/ToolResults\\]"], + "ends": ["[/ToolResults]", "\\[\\/ToolResults\\]"], }, "ARG": { - "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"], - "ends": ["[/CallParameter]", "\\[/CallParameter\\]"], + "starts": ["[CallParameter:", "\\[CallParameter\\:"], + "ends": ["[/CallParameter]", "\\[\\/CallParameter\\]"], }, "RESULT": { "starts": ["[ToolResult]", "\\[ToolResult\\]"], - "ends": ["[/ToolResult]", "\\[/ToolResult\\]"], + "ends": ["[/ToolResult]", "\\[\\/ToolResult\\]"], + }, + "ITEM": { + "starts": ["[Result:", "\\[Result\\:"], + "ends": ["[/Result]", "\\[\\/Result\\]"], }, "TAG": { - "starts": ["<|im_start|>", "\\<|im\\_start|\\>"], - "ends": ["<|im_end|>", "\\<|im\\_end|\\>"], + "starts": ["<|im_start|>", "\\<\\|im\\_start\\|\\>"], + "ends": ["<|im_end|>", "\\<\\|im\\_end\\|\\>"], }, } @@ -794,15 +798,19 @@ def __init__(self): self.ORPHAN_ENDS = [ "<|im_end|>", - "\\<|im\\_end|\\>", + "\\<\\|im\\_end\\|\\>", "[/Call]", - "\\[/Call\\]", + "\\[\\/Call\\]", "[/ToolCalls]", - "\\[/ToolCalls\\]", + "\\[\\/ToolCalls\\]", "[/CallParameter]", - "\\[/CallParameter\\]", + "\\[\\/CallParameter\\]", "[/ToolResult]", - "\\[/ToolResult\\]", + "\\[\\/ToolResult\\]", + "[/ToolResults]", + "\\[\\/ToolResults\\]", + "[/Result]", + "\\[\\/Result\\]", ] self.WATCH_MARKERS = [] diff --git a/app/services/lmdb.py b/app/services/lmdb.py index ad92bbf..dd4197a 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -16,6 +16,8 @@ extract_tool_calls, normalize_llm_text, remove_tool_call_blocks, + strip_system_hints, + unescape_text, ) from ..utils.singleton import Singleton @@ -38,6 +40,7 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None: return None text = normalize_llm_text(text) + text = unescape_text(text) text = LMDBConversationStore.remove_think_tags(text) text = remove_tool_call_blocks(text) @@ -589,63 +592,61 @@ def remove_think_tags(text: str) -> str: return cleaned_content.strip() @staticmethod - def sanitize_assistant_messages(messages: list[Message]) -> list[Message]: - """Clean assistant messages of internal markers and move tool calls to metadata.""" + def sanitize_messages(messages: list[Message]) -> list[Message]: + """Clean all messages of internal markers, hints and normalize tool calls.""" cleaned_messages = [] for msg in messages: - if msg.role == "assistant": - if isinstance(msg.content, str): - text = LMDBConversationStore.remove_think_tags(msg.content) - tool_calls = msg.tool_calls - if not tool_calls: - text, tool_calls = extract_tool_calls(text) - else: - text = remove_tool_call_blocks(text).strip() - - normalized_content = text.strip() or None - - if normalized_content != msg.content or tool_calls != msg.tool_calls: - cleaned_msg = msg.model_copy( + if isinstance(msg.content, str): + text = LMDBConversationStore.remove_think_tags(msg.content) + tool_calls = msg.tool_calls + + if msg.role == "assistant" and not tool_calls: + text, tool_calls = extract_tool_calls(text) + else: + text = strip_system_hints(text) + + normalized_content = text.strip() or None + + if normalized_content != msg.content or tool_calls != msg.tool_calls: + cleaned_msg = msg.model_copy( + update={ + "content": normalized_content, + "tool_calls": tool_calls or None, + } + ) + cleaned_messages.append(cleaned_msg) + else: + cleaned_messages.append(msg) + elif isinstance(msg.content, list): + new_content = [] + all_extracted_calls = list(msg.tool_calls or []) + changed = False + + for item in msg.content: + if isinstance(item, ContentItem) and item.type == "text" and item.text: + text = LMDBConversationStore.remove_think_tags(item.text) + if msg.role == "assistant" and not msg.tool_calls: + text, extracted = extract_tool_calls(text) + if extracted: + all_extracted_calls.extend(extracted) + changed = True + else: + text = strip_system_hints(text) + + if text != item.text: + changed = True + item = item.model_copy(update={"text": text.strip() or None}) + new_content.append(item) + + if changed: + cleaned_messages.append( + msg.model_copy( update={ - "content": normalized_content, - "tool_calls": tool_calls or None, + "content": new_content, + "tool_calls": all_extracted_calls or None, } ) - cleaned_messages.append(cleaned_msg) - else: - cleaned_messages.append(msg) - elif isinstance(msg.content, list): - new_content = [] - all_extracted_calls = list(msg.tool_calls or []) - changed = False - - for item in msg.content: - if isinstance(item, ContentItem) and item.type == "text" and item.text: - text = LMDBConversationStore.remove_think_tags(item.text) - if not msg.tool_calls: - text, extracted = extract_tool_calls(text) - if extracted: - all_extracted_calls.extend(extracted) - changed = True - else: - text = remove_tool_call_blocks(text).strip() - - if text != item.text: - changed = True - item = item.model_copy(update={"text": text.strip() or None}) - new_content.append(item) - - if changed: - cleaned_messages.append( - msg.model_copy( - update={ - "content": new_content, - "tool_calls": all_extracted_calls or None, - } - ) - ) - else: - cleaned_messages.append(msg) + ) else: cleaned_messages.append(msg) else: diff --git a/app/utils/helper.py b/app/utils/helper.py index 82bb562..9c75b45 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -36,30 +36,36 @@ "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n" ) TOOL_BLOCK_RE = re.compile( - r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE + r"(?:\[ToolCalls]|\\\[ToolCalls\\])\s*(.*?)\s*(?:\[/ToolCalls]|\\\[\\/ToolCalls\\])", + re.DOTALL | re.IGNORECASE, ) TOOL_CALL_RE = re.compile( - r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE + r"(?:\[Call:|\\\[Call\\:)(?P(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P.*?)\s*(?:\[/Call]|\\\[\\/Call\\])", + re.DOTALL | re.IGNORECASE, ) RESPONSE_BLOCK_RE = re.compile( - r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]", + r"(?:\[ToolResults]|\\\[ToolResults\\])\s*(.*?)\s*(?:\[/ToolResults]|\\\[\\/ToolResults\\])", re.DOTALL | re.IGNORECASE, ) RESPONSE_ITEM_RE = re.compile( - r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]", + r"(?:\[Result:|\\\[Result\\:)(?P(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P.*?)\s*(?:\[/Result]|\\\[\\/Result\\])", re.DOTALL | re.IGNORECASE, ) TAGGED_ARG_RE = re.compile( - r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]", + r"(?:\[CallParameter:|\\\[CallParameter\\:)(?P(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P.*?)\s*(?:\[/CallParameter]|\\\[\\/CallParameter\\])", re.DOTALL | re.IGNORECASE, ) TAGGED_RESULT_RE = re.compile( - r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]", + r"(?:\[ToolResult]|\\\[ToolResult\\])\s*(.*?)\s*(?:\[/ToolResult]|\\\[\\/ToolResult\\])", re.DOTALL | re.IGNORECASE, ) -CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE) -CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE) -CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE) +CONTROL_TOKEN_RE = re.compile( + r"<\|im_(?:start|end)\|>|\\<\\\|im\\_(?:start|end)\\\|\\>", re.IGNORECASE +) +CHATML_START_RE = re.compile( + r"(?:<\|im_start\|>|\\<\\\|im\\_start\\\|\\>)\s*(\w+)\s*\n?", re.IGNORECASE +) +CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE) COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])") FILE_PATH_PATTERN = re.compile( r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", @@ -103,6 +109,13 @@ def normalize_llm_text(s: str) -> str: return s +def unescape_text(s: str) -> str: + """Remove CommonMark backslash escapes.""" + if not s: + return "" + return COMMONMARK_UNESCAPE_RE.sub(r"\1", s) + + def _strip_google_search(match: re.Match) -> str: """Extract raw text from Google Search links if it looks like a file path.""" text_to_check = match.group("text") if match.group("text") else unquote(match.group("query")) @@ -231,7 +244,9 @@ def strip_system_hints(text: str) -> str: if not text: return text - cleaned = text.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "") + t_unescaped = unescape_text(text) + + cleaned = t_unescaped.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "") if TOOL_HINT_LINE_START and TOOL_HINT_LINE_END: pattern = rf"\n?{re.escape(TOOL_HINT_LINE_START)}.*?{re.escape(TOOL_HINT_LINE_END)}\.?\n?" From 30f61257a951b19fa0492fce6efa3421a2b990cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 14 Feb 2026 01:39:11 +0700 Subject: [PATCH 132/139] Refactor: Handle all escape tags --- app/services/lmdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/services/lmdb.py b/app/services/lmdb.py index dd4197a..abf8859 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -279,7 +279,7 @@ def store( raise ValueError("Messages list cannot be empty") # Ensure consistent sanitization before hashing and storage - sanitized_messages = self.sanitize_assistant_messages(conv.messages) + sanitized_messages = self.sanitize_messages(conv.messages) conv.messages = sanitized_messages message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages) @@ -359,7 +359,7 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt logger.debug(f"Session found for '{model}' with {len(messages)} raw messages.") return conv - cleaned_messages = self.sanitize_assistant_messages(messages) + cleaned_messages = self.sanitize_messages(messages) if cleaned_messages != messages: if conv := self._find_by_message_list(model, cleaned_messages): logger.debug( From a35525234ef206422570b168934d5e7c53a7a848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 14 Feb 2026 09:12:24 +0700 Subject: [PATCH 133/139] Refactor: Remove `_strip_google_search` as it's no longer needed --- app/utils/helper.py | 44 ++++---------------------------------------- 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index 9c75b45..f6a3e77 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -8,7 +8,7 @@ import tempfile import unicodedata from pathlib import Path -from urllib.parse import unquote, urlparse +from urllib.parse import urlparse import httpx import orjson @@ -67,18 +67,6 @@ ) CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE) COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])") -FILE_PATH_PATTERN = re.compile( - r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$", - re.IGNORECASE, -) -GOOGLE_SEARCH_PATTERN = re.compile( - r"(?P`?\[`?)?" - r"(?P[^]]+)?" - r"(?(md_start)`?]\()?" - r"https://www\.google\.com/search\?q=(?P[^&\s\"'<>)]+)" - r"(?(md_start)\)?`?)", - re.IGNORECASE, -) TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" @@ -110,22 +98,12 @@ def normalize_llm_text(s: str) -> str: def unescape_text(s: str) -> str: - """Remove CommonMark backslash escapes.""" + """Remove CommonMark backslash escapes from LLM-generated text.""" if not s: return "" return COMMONMARK_UNESCAPE_RE.sub(r"\1", s) -def _strip_google_search(match: re.Match) -> str: - """Extract raw text from Google Search links if it looks like a file path.""" - text_to_check = match.group("text") if match.group("text") else unquote(match.group("query")) - text_to_check = unquote(text_to_check.strip()) - - if FILE_PATH_PATTERN.match(text_to_check): - return text_to_check - return match.group(0) - - def _strip_param_fences(s: str) -> str: """ Remove one layer of outermost Markdown code fences, @@ -147,20 +125,6 @@ def _strip_param_fences(s: str) -> str: return s[n:-n].strip() -def _repair_param_value(s: str) -> str: - """ - Standardize and repair LLM-generated text fragments (unescaping, link normalization) - to ensure compatibility with specialized clients like Roo Code. - """ - if not s: - return "" - - s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s) - s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s) - - return s - - def estimate_tokens(text: str | None) -> int: """Estimate the number of tokens heuristically based on character count.""" if not text: @@ -286,8 +250,8 @@ def _create_tool_call(name: str, raw_args: str) -> None: logger.warning("Encountered tool_call without a function name.") return - name = _repair_param_value(name.strip()) - raw_args = _repair_param_value(raw_args) + name = unescape_text(name.strip()) + raw_args = unescape_text(raw_args) arg_matches = TAGGED_ARG_RE.findall(raw_args) if arg_matches: From 45af127004adcec8a48c9f9432f0c0b54a581724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 14 Feb 2026 12:30:29 +0700 Subject: [PATCH 134/139] Update `TOOL_WRAP_HINT` to ensure Gemini strictly follows the instructions. --- app/utils/helper.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/app/utils/helper.py b/app/utils/helper.py index f6a3e77..64df4f7 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -18,11 +18,12 @@ VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( - "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n" - "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n" - "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n" - "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n" - "EXACT SYNTAX TEMPLATE:\n" + "\n\n### SYSTEM: TOOL CALLING PROTOCOL (MANDATORY) ###\n" + "If tool execution is required, you MUST adhere to this EXACT protocol. No exceptions.\n\n" + "1. OUTPUT RESTRICTION: Your response MUST contain ONLY the [ToolCalls] block. Conversational filler, preambles, or concluding remarks are STRICTLY PROHIBITED.\n" + "2. WRAPPING LOGIC: Every parameter value MUST be enclosed in a markdown code block. Use 3 backticks (```) by default. If the value contains backticks, the outer fence MUST be longer than any sequence inside (e.g., ````).\n" + "3. TAG SYMMETRY: All tags MUST be balanced and closed in the exact reverse order of opening. Incomplete or unclosed blocks are strictly prohibited.\n\n" + "REQUIRED SYNTAX:\n" "[ToolCalls]\n" "[Call:tool_name]\n" "[CallParameter:parameter_name]\n" @@ -32,8 +33,7 @@ "[/CallParameter]\n" "[/Call]\n" "[/ToolCalls]\n\n" - "CRITICAL: Every tag MUST be opened and closed accurately.\n\n" - "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n" + "CRITICAL: Do NOT mix natural language with protocol tags. Either respond naturally OR provide the protocol block alone. There is no middle ground.\n" ) TOOL_BLOCK_RE = re.compile( r"(?:\[ToolCalls]|\\\[ToolCalls\\])\s*(.*?)\s*(?:\[/ToolCalls]|\\\[\\/ToolCalls\\])", From f144e1440f6812ae330438b316ad1e08eb052240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Sat, 14 Feb 2026 12:32:26 +0700 Subject: [PATCH 135/139] Update required dependencies --- pyproject.toml | 2 +- uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 93dabab..0cae786 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = "==3.12.*" dependencies = [ "fastapi>=0.129.0", - "gemini-webapi>=1.19.1", + "gemini-webapi>=1.19.2", "lmdb>=1.7.5", "loguru>=0.7.3", "orjson>=3.11.7", diff --git a/uv.lock b/uv.lock index 249e84b..5b687e4 100644 --- a/uv.lock +++ b/uv.lock @@ -107,7 +107,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "fastapi", specifier = ">=0.129.0" }, - { name = "gemini-webapi", specifier = ">=1.19.1" }, + { name = "gemini-webapi", specifier = ">=1.19.2" }, { name = "lmdb", specifier = ">=1.7.5" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "orjson", specifier = ">=3.11.7" }, @@ -123,7 +123,7 @@ dev = [{ name = "ruff", specifier = ">=0.15.1" }] [[package]] name = "gemini-webapi" -version = "1.19.1" +version = "1.19.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx", extra = ["http2"] }, @@ -131,9 +131,9 @@ dependencies = [ { name = "orjson" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e5/d1/c61ee05ca6e20c70caa25a3f0f12e2a810bbc6b39e588ff937821de43690/gemini_webapi-1.19.1.tar.gz", hash = "sha256:a52afdfc2d9f6e87a6ae8cd926fb2ce5c562a0a99dc75ce97d8d50ffc2a3e133", size = 266761, upload-time = "2026-02-10T05:44:29.195Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d4/d3/b4ff659bfb0fff378b16f934429d53b7f78451ac184406ab2f9ddda9357e/gemini_webapi-1.19.2.tar.gz", hash = "sha256:f6e96e28f3f1e78be6176fbb8b2eca25ad509aec6cfacf99c415559f27691b71", size = 266805, upload-time = "2026-02-14T05:26:04.103Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4a/0b/7a73919ee8621f6559ae679a20d754959b989a3f09cf20478d89971f40b4/gemini_webapi-1.19.1-py3-none-any.whl", hash = "sha256:0dc4c7daa58d281722d52d6acf520f2e850c6c3c6020080fdbc5f77736c8be9a", size = 63500, upload-time = "2026-02-10T05:44:27.692Z" }, + { url = "https://files.pythonhosted.org/packages/2d/e7/a676f721980e3daa43e05abe94884a84648efdc6203889e7a0f5c8ca2e98/gemini_webapi-1.19.2-py3-none-any.whl", hash = "sha256:fdc088ca35361301f40ea807a58c4bec18886b17a54164a1a8f3d639eadc6a66", size = 63524, upload-time = "2026-02-14T05:26:02.173Z" }, ] [[package]] From 9d014b093512b72d022b57133c3f31224c8f271e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 18 Feb 2026 14:29:41 +0700 Subject: [PATCH 136/139] Ignore github directory --- .github/workflows/docker.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 7231e08..995b60c 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -8,8 +8,7 @@ on: - "v*" paths-ignore: - "**/*.md" - - ".github/workflows/ruff.yaml" - - ".github/workflows/track.yml" + - ".github/*" env: REGISTRY: ghcr.io From 3fab502ab495868f20c725e3657b11829895204c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 18 Feb 2026 16:41:55 +0700 Subject: [PATCH 137/139] Upgrade to fully support Python 3.13 --- .github/workflows/docker.yaml | 6 + .github/workflows/ruff.yaml | 4 +- Dockerfile | 18 ++- README.md | 4 +- README.zh.md | 4 +- app/main.py | 2 +- app/models/__init__.py | 58 +++++++++- app/models/models.py | 150 ++++++++++++------------- app/server/chat.py | 54 ++++----- app/server/health.py | 4 +- app/server/images.py | 2 +- app/server/middleware.py | 2 +- app/services/client.py | 11 +- app/services/lmdb.py | 55 ++++----- app/services/pool.py | 18 +-- app/utils/config.py | 12 +- app/utils/helper.py | 7 +- app/utils/singleton.py | 6 +- pyproject.toml | 50 +++++++-- scripts/dump_lmdb.py | 16 ++- uv.lock | 203 ++++++++++++++++++++++------------ 21 files changed, 427 insertions(+), 259 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 995b60c..775e9e4 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -9,6 +9,8 @@ on: paths-ignore: - "**/*.md" - ".github/*" + - "LICENSE" + - ".gitignore" env: REGISTRY: ghcr.io @@ -25,6 +27,9 @@ jobs: - name: Checkout repository uses: actions/checkout@v6 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -45,6 +50,7 @@ jobs: type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} type=semver,pattern={{major}} + type=sha,format=short type=raw,value=latest,enable={{is_default_branch}} - name: Build and push Docker image diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index 6b9e536..d451cdc 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -19,12 +19,12 @@ jobs: - name: Set up Python uses: actions/setup-python@v6 with: - python-version: "3.12" + python-version: "3.13" - name: Install Ruff run: | python -m pip install --upgrade pip - pip install "ruff>=0.11.7" + pip install "ruff>=0.15.1" - name: Run Ruff run: ruff check . diff --git a/Dockerfile b/Dockerfile index 938bc2f..ef7f41e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,26 @@ -FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim +FROM ghcr.io/astral-sh/uv:python3.13-trixie-slim LABEL org.opencontainers.image.description="Web-based Gemini models wrapped into an OpenAI-compatible API." WORKDIR /app -# Install dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + tini \ + && rm -rf /var/lib/apt/lists/* + +ENV UV_COMPILE_BYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + COPY pyproject.toml uv.lock ./ -RUN uv sync --no-cache --no-dev +RUN uv sync --no-cache --frozen --no-install-project --no-dev COPY app/ app/ COPY config/ config/ COPY run.py . -# Command to run the application +EXPOSE 8000 + +ENTRYPOINT ["/usr/bin/tini", "--"] + CMD ["uv", "run", "--no-dev", "run.py"] diff --git a/README.md b/README.md index 330e9c8..91f687c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Gemini-FastAPI -[![Python 3.12](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/) +[![Python 3.13](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/) [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) @@ -24,7 +24,7 @@ Web-based Gemini models wrapped into an OpenAI-compatible API. Powered by [Hanao ### Prerequisites -- Python 3.12 +- Python 3.13 - Google account with Gemini access on web - `secure_1psid` and `secure_1psidts` cookies from Gemini web interface diff --git a/README.zh.md b/README.zh.md index 2f9e1b5..d23bec1 100644 --- a/README.zh.md +++ b/README.zh.md @@ -1,6 +1,6 @@ # Gemini-FastAPI -[![Python 3.12](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/) +[![Python 3.13](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/) [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) @@ -24,7 +24,7 @@ ### 前置条件 -- Python 3.12 +- Python 3.13 - 拥有网页版 Gemini 访问权限的 Google 账号 - 从 Gemini 网页获取的 `secure_1psid` 和 `secure_1psidts` Cookie diff --git a/app/main.py b/app/main.py index f4e6711..0634ce2 100644 --- a/app/main.py +++ b/app/main.py @@ -43,7 +43,7 @@ async def _run_retention_cleanup(stop_event: asyncio.Event) -> None: stop_event.wait(), timeout=RETENTION_CLEANUP_INTERVAL_SECONDS, ) - except asyncio.TimeoutError: + except TimeoutError: continue logger.info("LMDB retention cleanup task stopped.") diff --git a/app/models/__init__.py b/app/models/__init__.py index c6a3640..a72efdc 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -1 +1,57 @@ -from .models import * # noqa: F403 +from .models import ( + ChatCompletionRequest, + ChatCompletionResponse, + Choice, + ContentItem, + ConversationInStore, + FunctionCall, + HealthCheckResponse, + Message, + ModelData, + ModelListResponse, + ResponseCreateRequest, + ResponseCreateResponse, + ResponseImageGenerationCall, + ResponseImageTool, + ResponseInputContent, + ResponseInputItem, + ResponseOutputContent, + ResponseOutputMessage, + ResponseToolCall, + ResponseToolChoice, + Tool, + ToolCall, + ToolChoiceFunction, + ToolChoiceFunctionDetail, + ToolFunctionDefinition, + Usage, +) + +__all__ = [ + "ChatCompletionRequest", + "ChatCompletionResponse", + "Choice", + "ContentItem", + "ConversationInStore", + "FunctionCall", + "HealthCheckResponse", + "Message", + "ModelData", + "ModelListResponse", + "ResponseCreateRequest", + "ResponseCreateResponse", + "ResponseImageGenerationCall", + "ResponseImageTool", + "ResponseInputContent", + "ResponseInputItem", + "ResponseOutputContent", + "ResponseOutputMessage", + "ResponseToolCall", + "ResponseToolChoice", + "Tool", + "ToolCall", + "ToolChoiceFunction", + "ToolChoiceFunctionDetail", + "ToolFunctionDefinition", + "Usage", +] diff --git a/app/models/models.py b/app/models/models.py index 64ceaa9..ca206b7 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -1,7 +1,7 @@ from __future__ import annotations from datetime import datetime -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal from pydantic import BaseModel, Field, model_validator @@ -10,28 +10,28 @@ class ContentItem(BaseModel): """Individual content item (text, image, or file) within a message.""" type: Literal["text", "image_url", "file", "input_audio"] - text: Optional[str] = None - image_url: Optional[Dict[str, str]] = None - input_audio: Optional[Dict[str, Any]] = None - file: Optional[Dict[str, str]] = None - annotations: List[Dict[str, Any]] = Field(default_factory=list) + text: str | None = None + image_url: dict[str, str] | None = None + input_audio: dict[str, Any] | None = None + file: dict[str, str] | None = None + annotations: list[dict[str, Any]] = Field(default_factory=list) class Message(BaseModel): """Message model""" role: str - content: Union[str, List[ContentItem], None] = None - name: Optional[str] = None - tool_calls: Optional[List["ToolCall"]] = None - tool_call_id: Optional[str] = None - refusal: Optional[str] = None - reasoning_content: Optional[str] = None - audio: Optional[Dict[str, Any]] = None - annotations: List[Dict[str, Any]] = Field(default_factory=list) + content: str | list[ContentItem] | None = None + name: str | None = None + tool_calls: list[ToolCall] | None = None + tool_call_id: str | None = None + refusal: str | None = None + reasoning_content: str | None = None + audio: dict[str, Any] | None = None + annotations: list[dict[str, Any]] = Field(default_factory=list) @model_validator(mode="after") - def normalize_role(self) -> "Message": + def normalize_role(self) -> Message: """Normalize 'developer' role to 'system' for Gemini compatibility.""" if self.role == "developer": self.role = "system" @@ -44,7 +44,7 @@ class Choice(BaseModel): index: int message: Message finish_reason: str - logprobs: Optional[Dict[str, Any]] = None + logprobs: dict[str, Any] | None = None class FunctionCall(BaseModel): @@ -66,8 +66,8 @@ class ToolFunctionDefinition(BaseModel): """Function definition for tool.""" name: str - description: Optional[str] = None - parameters: Optional[Dict[str, Any]] = None + description: str | None = None + parameters: dict[str, Any] | None = None class Tool(BaseModel): @@ -96,8 +96,8 @@ class Usage(BaseModel): prompt_tokens: int completion_tokens: int total_tokens: int - prompt_tokens_details: Optional[Dict[str, int]] = None - completion_tokens_details: Optional[Dict[str, int]] = None + prompt_tokens_details: dict[str, int] | None = None + completion_tokens_details: dict[str, int] | None = None class ModelData(BaseModel): @@ -113,17 +113,17 @@ class ChatCompletionRequest(BaseModel): """Chat completion request model""" model: str - messages: List[Message] - stream: Optional[bool] = False - user: Optional[str] = None - temperature: Optional[float] = 0.7 - top_p: Optional[float] = 1.0 - max_tokens: Optional[int] = None - tools: Optional[List["Tool"]] = None - tool_choice: Optional[ - Union[Literal["none"], Literal["auto"], Literal["required"], "ToolChoiceFunction"] - ] = None - response_format: Optional[Dict[str, Any]] = None + messages: list[Message] + stream: bool | None = False + user: str | None = None + temperature: float | None = 0.7 + top_p: float | None = 1.0 + max_tokens: int | None = None + tools: list[Tool] | None = None + tool_choice: ( + Literal["none"] | Literal["auto"] | Literal["required"] | ToolChoiceFunction | None + ) = None + response_format: dict[str, Any] | None = None class ChatCompletionResponse(BaseModel): @@ -133,7 +133,7 @@ class ChatCompletionResponse(BaseModel): object: str = "chat.completion" created: int model: str - choices: List[Choice] + choices: list[Choice] usage: Usage @@ -141,23 +141,23 @@ class ModelListResponse(BaseModel): """Model list model""" object: str = "list" - data: List[ModelData] + data: list[ModelData] class HealthCheckResponse(BaseModel): """Health check response model""" ok: bool - storage: Optional[Dict[str, str | int]] = None - clients: Optional[Dict[str, bool]] = None - error: Optional[str] = None + storage: dict[str, str | int] | None = None + clients: dict[str, bool] | None = None + error: str | None = None class ConversationInStore(BaseModel): """Conversation model for storing in the database.""" - created_at: Optional[datetime] = Field(default=None) - updated_at: Optional[datetime] = Field(default=None) + created_at: datetime | None = Field(default=None) + updated_at: datetime | None = Field(default=None) # Gemini Web API does not support changing models once a conversation is created. model: str = Field(..., description="Model used for the conversation") @@ -172,13 +172,13 @@ class ResponseInputContent(BaseModel): """Content item for Responses API input.""" type: Literal["input_text", "input_image", "input_file"] - text: Optional[str] = None - image_url: Optional[str] = None - detail: Optional[Literal["auto", "low", "high"]] = None - file_url: Optional[str] = None - file_data: Optional[str] = None - filename: Optional[str] = None - annotations: List[Dict[str, Any]] = Field(default_factory=list) + text: str | None = None + image_url: str | None = None + detail: Literal["auto", "low", "high"] | None = None + file_url: str | None = None + file_data: str | None = None + filename: str | None = None + annotations: list[dict[str, Any]] = Field(default_factory=list) @model_validator(mode="before") @classmethod @@ -192,42 +192,42 @@ def normalize_output_text(cls, data: Any) -> Any: class ResponseInputItem(BaseModel): """Single input item for Responses API.""" - type: Optional[Literal["message"]] = "message" + type: Literal["message"] | None = "message" role: Literal["user", "assistant", "system", "developer"] - content: Union[str, List[ResponseInputContent]] + content: str | list[ResponseInputContent] class ResponseToolChoice(BaseModel): """Tool choice enforcing a specific tool in Responses API.""" type: Literal["function", "image_generation"] - function: Optional[ToolChoiceFunctionDetail] = None + function: ToolChoiceFunctionDetail | None = None class ResponseImageTool(BaseModel): """Image generation tool specification for Responses API.""" type: Literal["image_generation"] - model: Optional[str] = None - output_format: Optional[str] = None + model: str | None = None + output_format: str | None = None class ResponseCreateRequest(BaseModel): """Responses API request payload.""" model: str - input: Union[str, List[ResponseInputItem]] - instructions: Optional[Union[str, List[ResponseInputItem]]] = None - temperature: Optional[float] = 0.7 - top_p: Optional[float] = 1.0 - max_output_tokens: Optional[int] = None - stream: Optional[bool] = False - tool_choice: Optional[Union[str, ResponseToolChoice]] = None - tools: Optional[List[Union[Tool, ResponseImageTool]]] = None - store: Optional[bool] = None - user: Optional[str] = None - response_format: Optional[Dict[str, Any]] = None - metadata: Optional[Dict[str, Any]] = None + input: str | list[ResponseInputItem] + instructions: str | list[ResponseInputItem] | None = None + temperature: float | None = 0.7 + top_p: float | None = 1.0 + max_output_tokens: int | None = None + stream: bool | None = False + tool_choice: str | ResponseToolChoice | None = None + tools: list[Tool | ResponseImageTool] | None = None + store: bool | None = None + user: str | None = None + response_format: dict[str, Any] | None = None + metadata: dict[str, Any] | None = None class ResponseUsage(BaseModel): @@ -242,8 +242,8 @@ class ResponseOutputContent(BaseModel): """Content item for Responses API output.""" type: Literal["output_text"] - text: Optional[str] = "" - annotations: List[Dict[str, Any]] = Field(default_factory=list) + text: str | None = "" + annotations: list[dict[str, Any]] = Field(default_factory=list) class ResponseOutputMessage(BaseModel): @@ -252,7 +252,7 @@ class ResponseOutputMessage(BaseModel): id: str type: Literal["message"] role: Literal["assistant"] - content: List[ResponseOutputContent] + content: list[ResponseOutputContent] class ResponseImageGenerationCall(BaseModel): @@ -261,10 +261,10 @@ class ResponseImageGenerationCall(BaseModel): id: str type: Literal["image_generation_call"] = "image_generation_call" status: Literal["completed", "in_progress", "generating", "failed"] = "completed" - result: Optional[str] = None - output_format: Optional[str] = None - size: Optional[str] = None - revised_prompt: Optional[str] = None + result: str | None = None + output_format: str | None = None + size: str | None = None + revised_prompt: str | None = None class ResponseToolCall(BaseModel): @@ -283,7 +283,7 @@ class ResponseCreateResponse(BaseModel): object: Literal["response"] = "response" created_at: int model: str - output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]] + output: list[ResponseOutputMessage | ResponseImageGenerationCall | ResponseToolCall] status: Literal[ "in_progress", "completed", @@ -292,12 +292,12 @@ class ResponseCreateResponse(BaseModel): "cancelled", "requires_action", ] = "completed" - tool_choice: Optional[Union[str, ResponseToolChoice]] = None - tools: Optional[List[Union[Tool, ResponseImageTool]]] = None + tool_choice: str | ResponseToolChoice | None = None + tools: list[Tool | ResponseImageTool] | None = None usage: ResponseUsage - error: Optional[Dict[str, Any]] = None - metadata: Optional[Dict[str, Any]] = None - input: Optional[Union[str, List[ResponseInputItem]]] = None + error: dict[str, Any] | None = None + metadata: dict[str, Any] | None = None + input: str | list[ResponseInputItem] | None = None # Rebuild models with forward references diff --git a/app/server/chat.py b/app/server/chat.py index 934091b..3849af5 100644 --- a/app/server/chat.py +++ b/app/server/chat.py @@ -3,10 +3,11 @@ import io import reprlib import uuid +from collections.abc import AsyncGenerator from dataclasses import dataclass -from datetime import datetime, timezone +from datetime import UTC, datetime from pathlib import Path -from typing import Any, AsyncGenerator +from typing import Any import orjson from fastapi import APIRouter, Depends, HTTPException, Request, status @@ -17,7 +18,7 @@ from gemini_webapi.types.image import GeneratedImage, Image from loguru import logger -from ..models import ( +from app.models import ( ChatCompletionRequest, ContentItem, ConversationInStore, @@ -38,9 +39,15 @@ Tool, ToolChoiceFunction, ) -from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore -from ..utils import g_config -from ..utils.helper import ( +from app.server.middleware import ( + get_image_store_dir, + get_image_token, + get_temp_dir, + verify_api_key, +) +from app.services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore +from app.utils import g_config +from app.utils.helper import ( TOOL_HINT_LINE_END, TOOL_HINT_LINE_START, TOOL_HINT_STRIPPED, @@ -53,7 +60,6 @@ strip_system_hints, text_from_message, ) -from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9) METADATA_TTL_MINUTES = 15 @@ -98,11 +104,7 @@ async def _image_to_base64( if not suffix: detected_ext = detect_image_extension(data) - if detected_ext: - suffix = detected_ext - else: - # Fallback if detection fails - suffix = ".png" if isinstance(image, GeneratedImage) else ".jpg" + suffix = detected_ext or (".png" if isinstance(image, GeneratedImage) else ".jpg") random_name = f"img_{uuid.uuid4().hex}{suffix}" new_path = temp_dir / random_name @@ -628,7 +630,7 @@ def _get_model_by_name(name: str) -> Model: def _get_available_models() -> list[ModelData]: """Return a list of available models based on configuration strategy.""" - now = int(datetime.now(tz=timezone.utc).timestamp()) + now = int(datetime.now(tz=UTC).timestamp()) strategy = g_config.gemini.model_strategy models_data = [] @@ -712,7 +714,7 @@ async def _send_with_split( text: str, files: list[Path | str | io.BytesIO] | None = None, stream: bool = False, -) -> AsyncGenerator[ModelOutput, None] | ModelOutput: +) -> AsyncGenerator[ModelOutput] | ModelOutput: """Send text to Gemini, splitting or converting to attachment if too long.""" if len(text) <= MAX_CHARS_PER_REQUEST: try: @@ -1013,9 +1015,7 @@ def flush(self) -> str: res = "" if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"): res = "" - elif self.state == "IN_BLOCK" and self.current_role != "tool": - res = self.buffer - elif self.state == "NORMAL": + elif (self.state == "IN_BLOCK" and self.current_role != "tool") or self.state == "NORMAL": res = self.buffer self.buffer = "" @@ -1027,7 +1027,7 @@ def flush(self) -> str: def _create_real_streaming_response( - generator: AsyncGenerator[ModelOutput, None], + generator: AsyncGenerator[ModelOutput], completion_id: str, created_time: int, model_name: str, @@ -1221,7 +1221,7 @@ async def generate_stream(): def _create_responses_real_streaming_response( - generator: AsyncGenerator[ModelOutput, None], + generator: AsyncGenerator[ModelOutput], response_id: str, created_time: int, model_name: str, @@ -1455,10 +1455,12 @@ async def create_chat_completion( m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir) except Exception as e: logger.exception("Error in preparing conversation") - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)) + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e) + ) from e completion_id = f"chatcmpl-{uuid.uuid4()}" - created_time = int(datetime.now(tz=timezone.utc).timestamp()) + created_time = int(datetime.now(tz=UTC).timestamp()) try: assert session and client @@ -1470,7 +1472,7 @@ async def create_chat_completion( ) except Exception as e: logger.exception("Gemini API error") - raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) + raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e if request.stream: return _create_real_streaming_response( @@ -1620,10 +1622,12 @@ async def create_response( m_input, files = await GeminiClientWrapper.process_conversation(messages, tmp_dir) except Exception as e: logger.exception("Error in preparing conversation") - raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)) + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e) + ) from e response_id = f"resp_{uuid.uuid4().hex}" - created_time = int(datetime.now(tz=timezone.utc).timestamp()) + created_time = int(datetime.now(tz=UTC).timestamp()) try: assert session and client @@ -1635,7 +1639,7 @@ async def create_response( ) except Exception as e: logger.exception("Gemini API error") - raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) + raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e if request.stream: return _create_responses_real_streaming_response( diff --git a/app/server/health.py b/app/server/health.py index f521db1..444c938 100644 --- a/app/server/health.py +++ b/app/server/health.py @@ -1,8 +1,8 @@ from fastapi import APIRouter from loguru import logger -from ..models import HealthCheckResponse -from ..services import GeminiClientPool, LMDBConversationStore +from app.models import HealthCheckResponse +from app.services import GeminiClientPool, LMDBConversationStore router = APIRouter() diff --git a/app/server/images.py b/app/server/images.py index fe078f7..e1c161c 100644 --- a/app/server/images.py +++ b/app/server/images.py @@ -1,7 +1,7 @@ from fastapi import APIRouter, HTTPException, Query from fastapi.responses import FileResponse -from ..server.middleware import get_image_store_dir, verify_image_token +from app.server.middleware import get_image_store_dir, verify_image_token router = APIRouter() diff --git a/app/server/middleware.py b/app/server/middleware.py index 630e1f5..4bc358d 100644 --- a/app/server/middleware.py +++ b/app/server/middleware.py @@ -10,7 +10,7 @@ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from loguru import logger -from ..utils import g_config +from app.utils import g_config # Persistent directory for storing generated images IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "ai_generated_images" diff --git a/app/services/client.py b/app/services/client.py index 70dfce9..49d9e87 100644 --- a/app/services/client.py +++ b/app/services/client.py @@ -5,9 +5,9 @@ from gemini_webapi import GeminiClient, ModelOutput from loguru import logger -from ..models import Message -from ..utils import g_config -from ..utils.helper import ( +from app.models import Message +from app.utils import g_config +from app.utils.helper import ( add_tag, normalize_llm_text, save_file_to_tempfile, @@ -146,9 +146,8 @@ async def process_message( model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None) - if model_input or message.role == "tool": - if tagged: - model_input = add_tag(message.role, model_input) + if (model_input or message.role == "tool") and tagged: + model_input = add_tag(message.role, model_input) return model_input, files diff --git a/app/services/lmdb.py b/app/services/lmdb.py index abf8859..87a1449 100644 --- a/app/services/lmdb.py +++ b/app/services/lmdb.py @@ -4,22 +4,22 @@ from contextlib import contextmanager from datetime import datetime, timedelta from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any import lmdb import orjson from loguru import logger -from ..models import ContentItem, ConversationInStore, Message -from ..utils import g_config -from ..utils.helper import ( +from app.models import ContentItem, ConversationInStore, Message +from app.utils import g_config +from app.utils.helper import ( extract_tool_calls, normalize_llm_text, remove_tool_call_blocks, strip_system_hints, unescape_text, ) -from ..utils.singleton import Singleton +from app.utils.singleton import Singleton _VOLATILE_TRANS_TABLE = str.maketrans("", "", string.whitespace + string.punctuation) @@ -125,7 +125,7 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str: def _hash_conversation( - client_id: str, model: str, messages: List[Message], fuzzy: bool = False + client_id: str, model: str, messages: list[Message], fuzzy: bool = False ) -> str: """Generate a hash for a list of messages and model name, tied to a specific client_id.""" combined_hash = hashlib.sha256() @@ -145,9 +145,9 @@ class LMDBConversationStore(metaclass=Singleton): def __init__( self, - db_path: Optional[str] = None, - max_db_size: Optional[int] = None, - retention_days: Optional[int] = None, + db_path: str | None = None, + max_db_size: int | None = None, + retention_days: int | None = None, ): """ Initialize LMDB store. @@ -219,7 +219,7 @@ def _get_transaction(self, write: bool = False): raise @staticmethod - def _decode_index_value(data: bytes) -> List[str]: + def _decode_index_value(data: bytes) -> list[str]: """Decode index value, handling both legacy single-string and new list-of-strings formats.""" if not data: return [] @@ -238,7 +238,7 @@ def _decode_index_value(data: bytes) -> List[str]: @staticmethod def _update_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str): """Add a storage key to the index for a given hash, avoiding duplicates.""" - idx_key = f"{prefix}{hash_val}".encode("utf-8") + idx_key = f"{prefix}{hash_val}".encode() existing = txn.get(idx_key) keys = LMDBConversationStore._decode_index_value(existing) if existing else [] if storage_key not in keys: @@ -248,7 +248,7 @@ def _update_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key @staticmethod def _remove_from_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str): """Remove a specific storage key from the index for a given hash.""" - idx_key = f"{prefix}{hash_val}".encode("utf-8") + idx_key = f"{prefix}{hash_val}".encode() existing = txn.get(idx_key) if not existing: return @@ -263,7 +263,7 @@ def _remove_from_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storag def store( self, conv: ConversationInStore, - custom_key: Optional[str] = None, + custom_key: str | None = None, ) -> str: """ Store a conversation model in LMDB. @@ -312,7 +312,7 @@ def store( ) raise - def get(self, key: str) -> Optional[ConversationInStore]: + def get(self, key: str) -> ConversationInStore | None: """ Retrieve conversation data by key. @@ -340,7 +340,7 @@ def get(self, key: str) -> Optional[ConversationInStore]: logger.error(f"Unexpected error retrieving messages with key {key[:12]}: {e}") return None - def find(self, model: str, messages: List[Message]) -> Optional[ConversationInStore]: + def find(self, model: str, messages: list[Message]) -> ConversationInStore | None: """ Search conversation data by message list. Tries raw matching, then sanitized matching, and finally fuzzy matching. @@ -360,12 +360,13 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt return conv cleaned_messages = self.sanitize_messages(messages) - if cleaned_messages != messages: - if conv := self._find_by_message_list(model, cleaned_messages): - logger.debug( - f"Session found for '{model}' with {len(cleaned_messages)} cleaned messages." - ) - return conv + if cleaned_messages != messages and ( + conv := self._find_by_message_list(model, cleaned_messages) + ): + logger.debug( + f"Session found for '{model}' with {len(cleaned_messages)} cleaned messages." + ) + return conv if conv := self._find_by_message_list(model, messages, fuzzy=True): logger.debug( @@ -379,9 +380,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt def _find_by_message_list( self, model: str, - messages: List[Message], + messages: list[Message], fuzzy: bool = False, - ) -> Optional[ConversationInStore]: + ) -> ConversationInStore | None: """ Internal find implementation based on a message list. @@ -440,7 +441,7 @@ def exists(self, key: str) -> bool: logger.error(f"Failed to check existence of key {key}: {e}") return False - def delete(self, key: str) -> Optional[ConversationInStore]: + def delete(self, key: str) -> ConversationInStore | None: """Delete conversation model by key.""" try: with self._get_transaction(write=True) as txn: @@ -466,7 +467,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]: logger.error(f"Failed to delete messages with key {key[:12]}: {e}") return None - def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: + def keys(self, prefix: str = "", limit: int | None = None) -> list[str]: """List all keys in the store, optionally filtered by prefix.""" keys = [] try: @@ -492,7 +493,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]: logger.error(f"Failed to list keys: {e}") return keys - def cleanup_expired(self, retention_days: Optional[int] = None) -> int: + def cleanup_expired(self, retention_days: int | None = None) -> int: """Delete conversations older than the given retention period.""" retention_value = ( self.retention_days if retention_days is None else max(0, int(retention_days)) @@ -561,7 +562,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int: return removed - def stats(self) -> Dict[str, Any]: + def stats(self) -> dict[str, Any]: """Get database statistics.""" if not self._env: logger.error("LMDB environment not initialized") diff --git a/app/services/pool.py b/app/services/pool.py index decc21a..3b4197c 100644 --- a/app/services/pool.py +++ b/app/services/pool.py @@ -1,11 +1,11 @@ import asyncio from collections import deque -from typing import Dict, List, Optional from loguru import logger -from ..utils import g_config -from ..utils.singleton import Singleton +from app.utils import g_config +from app.utils.singleton import Singleton + from .client import GeminiClientWrapper @@ -13,10 +13,10 @@ class GeminiClientPool(metaclass=Singleton): """Pool of GeminiClient instances identified by unique ids.""" def __init__(self) -> None: - self._clients: List[GeminiClientWrapper] = [] - self._id_map: Dict[str, GeminiClientWrapper] = {} + self._clients: list[GeminiClientWrapper] = [] + self._id_map: dict[str, GeminiClientWrapper] = {} self._round_robin: deque[GeminiClientWrapper] = deque() - self._restart_locks: Dict[str, asyncio.Lock] = {} + self._restart_locks: dict[str, asyncio.Lock] = {} if len(g_config.gemini.clients) == 0: raise ValueError("No Gemini clients configured") @@ -55,7 +55,7 @@ async def init(self) -> None: if success_count == 0: raise RuntimeError("Failed to initialize any Gemini clients") - async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper: + async def acquire(self, client_id: str | None = None) -> GeminiClientWrapper: """Return a healthy client by id or using round-robin.""" if not self._round_robin: raise RuntimeError("No Gemini clients configured") @@ -106,10 +106,10 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool: return False @property - def clients(self) -> List[GeminiClientWrapper]: + def clients(self) -> list[GeminiClientWrapper]: """Return managed clients.""" return self._clients - def status(self) -> Dict[str, bool]: + def status(self) -> dict[str, bool]: """Return running status for each client.""" return {client.id: client.running() for client in self._clients} diff --git a/app/utils/config.py b/app/utils/config.py index 4c1709f..21d2891 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -1,7 +1,7 @@ import ast import os import sys -from typing import Any, Literal, Optional +from typing import Any, Literal import orjson from loguru import logger @@ -28,7 +28,7 @@ class ServerConfig(BaseModel): host: str = Field(default="0.0.0.0", description="Server host address") port: int = Field(default=8000, ge=1, le=65535, description="Server port number") - api_key: Optional[str] = Field( + api_key: str | None = Field( default=None, description="API key for authentication, if set, will enable API key validation", ) @@ -41,11 +41,11 @@ class GeminiClientSettings(BaseModel): id: str = Field(..., description="Unique identifier for the client") secure_1psid: str = Field(..., description="Gemini Secure 1PSID") secure_1psidts: str = Field(..., description="Gemini Secure 1PSIDTS") - proxy: Optional[str] = Field(default=None, description="Proxy URL for this Gemini client") + proxy: str | None = Field(default=None, description="Proxy URL for this Gemini client") @field_validator("proxy", mode="before") @classmethod - def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]: + def _blank_proxy_to_none(cls, value: str | None) -> str | None: if value is None: return None stripped = value.strip() @@ -55,8 +55,8 @@ def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]: class GeminiModelConfig(BaseModel): """Configuration for a custom Gemini model.""" - model_name: Optional[str] = Field(default=None, description="Name of the model") - model_header: Optional[dict[str, Optional[str]]] = Field( + model_name: str | None = Field(default=None, description="Name of the model") + model_header: dict[str, str | None] | None = Field( default=None, description="Header for the model" ) diff --git a/app/utils/helper.py b/app/utils/helper.py index 64df4f7..002d401 100644 --- a/app/utils/helper.py +++ b/app/utils/helper.py @@ -14,7 +14,7 @@ import orjson from loguru import logger -from ..models import FunctionCall, Message, ToolCall +from app.models import FunctionCall, Message, ToolCall VALID_TAG_ROLES = {"user", "assistant", "system", "tool"} TOOL_WRAP_HINT = ( @@ -67,6 +67,7 @@ ) CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE) COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])") +PARAM_FENCE_RE = re.compile(r"^(?P`{3,})") TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip() _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()] TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else "" @@ -113,7 +114,7 @@ def _strip_param_fences(s: str) -> str: if not s: return "" - match = re.match(r"^(?P`{3,})", s) + match = PARAM_FENCE_RE.match(s) if not match or not s.endswith(match.group("fence")): return s @@ -272,7 +273,7 @@ def _create_tool_call(name: str, raw_args: str) -> None: arguments = "{}" index = len(tool_calls) - seed = f"{name}:{arguments}:{index}".encode("utf-8") + seed = f"{name}:{arguments}:{index}".encode() call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}" tool_calls.append( diff --git a/app/utils/singleton.py b/app/utils/singleton.py index 489e87e..2a258af 100644 --- a/app/utils/singleton.py +++ b/app/utils/singleton.py @@ -1,10 +1,10 @@ -from typing import ClassVar, Dict +from typing import ClassVar class Singleton(type): - _instances: ClassVar[Dict[type, object]] = {} + _instances: ClassVar[dict[type, object]] = {} def __call__(cls, *args, **kwargs): if cls not in cls._instances: - cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) + cls._instances[cls] = super().__call__(*args, **kwargs) return cls._instances[cls] diff --git a/pyproject.toml b/pyproject.toml index 0cae786..a1ae29d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,33 +3,61 @@ name = "gemini-fastapi" version = "1.0.0" description = "FastAPI Server built on Gemini Web API" readme = "README.md" -requires-python = "==3.12.*" +requires-python = "==3.13.*" dependencies = [ "fastapi>=0.129.0", "gemini-webapi>=1.19.2", + "httptools>=0.7.1", "lmdb>=1.7.5", "loguru>=0.7.3", "orjson>=3.11.7", - "pydantic-settings[yaml]>=2.12.0", - "uvicorn>=0.40.0", + "pydantic-settings[yaml]>=2.13.0", + "uvicorn>=0.41.0", "uvloop>=0.22.1; sys_platform != 'win32'", ] +[project.urls] +Repository = "https://github.com/Nativu5/Gemini-FastAPI" + [project.optional-dependencies] dev = [ - "ruff>=0.15.0", + "pytest>=9.0.2", + "ruff>=0.15.1", +] + +[dependency-groups] +dev = [ + "gemini-fastapi[dev]", ] [tool.ruff] line-length = 100 -lint.select = ["E", "F", "W", "I", "RUF"] -lint.ignore = ["E501"] +target-version = "py313" + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "F", # pyflakes + "W", # pycodestyle warnings + "I", # isort + "UP", # pyupgrade + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "SIM", # flake8-simplify + "RUF", # ruff-specific rules + "TID", # flake8-tidy-imports +] +ignore = [ + "E501", # line too long +] + +[tool.ruff.lint.flake8-bugbear] +extend-immutable-calls = [ + "fastapi.Depends", + "fastapi.Query", + "fastapi.security.HTTPBearer", +] [tool.ruff.format] quote-style = "double" indent-style = "space" - -[dependency-groups] -dev = [ - "ruff>=0.15.1", -] diff --git a/scripts/dump_lmdb.py b/scripts/dump_lmdb.py index a331325..889af4f 100644 --- a/scripts/dump_lmdb.py +++ b/scripts/dump_lmdb.py @@ -1,6 +1,7 @@ import argparse +from collections.abc import Iterable from pathlib import Path -from typing import Any, Iterable, List +from typing import Any import lmdb import orjson @@ -14,17 +15,17 @@ def _decode_value(value: bytes) -> Any: return value.decode("utf-8", errors="replace") -def _dump_all(txn: lmdb.Transaction) -> List[dict[str, Any]]: +def _dump_all(txn: lmdb.Transaction) -> list[dict[str, Any]]: """Return all records from the database.""" - result: List[dict[str, Any]] = [] + result: list[dict[str, Any]] = [] for key, value in txn.cursor(): result.append({"key": key.decode("utf-8"), "value": _decode_value(value)}) return result -def _dump_selected(txn: lmdb.Transaction, keys: Iterable[str]) -> List[dict[str, Any]]: +def _dump_selected(txn: lmdb.Transaction, keys: Iterable[str]) -> list[dict[str, Any]]: """Return records for the provided keys.""" - result: List[dict[str, Any]] = [] + result: list[dict[str, Any]] = [] for key in keys: raw = txn.get(key.encode("utf-8")) if raw is not None: @@ -36,10 +37,7 @@ def dump_lmdb(path: Path, keys: Iterable[str] | None = None) -> None: """Print selected or all key-value pairs from the LMDB database.""" env = lmdb.open(str(path), readonly=True, lock=False) with env.begin() as txn: - if keys: - records = _dump_selected(txn, keys) - else: - records = _dump_all(txn) + records = _dump_selected(txn, keys) if keys else _dump_all(txn) env.close() print(orjson.dumps(records, option=orjson.OPT_INDENT_2).decode("utf-8")) diff --git a/uv.lock b/uv.lock index 5b687e4..4c819e7 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,6 @@ version = 1 revision = 3 -requires-python = "==3.12.*" +requires-python = "==3.13.*" [[package]] name = "annotated-doc" @@ -26,7 +26,6 @@ version = "4.12.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "idna" }, - { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" } wheels = [ @@ -86,6 +85,7 @@ source = { virtual = "." } dependencies = [ { name = "fastapi" }, { name = "gemini-webapi" }, + { name = "httptools" }, { name = "lmdb" }, { name = "loguru" }, { name = "orjson" }, @@ -96,30 +96,33 @@ dependencies = [ [package.optional-dependencies] dev = [ + { name = "pytest" }, { name = "ruff" }, ] [package.dev-dependencies] dev = [ - { name = "ruff" }, + { name = "gemini-fastapi", extra = ["dev"] }, ] [package.metadata] requires-dist = [ { name = "fastapi", specifier = ">=0.129.0" }, { name = "gemini-webapi", specifier = ">=1.19.2" }, + { name = "httptools", specifier = ">=0.7.1" }, { name = "lmdb", specifier = ">=1.7.5" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "orjson", specifier = ">=3.11.7" }, - { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" }, - { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.0" }, - { name = "uvicorn", specifier = ">=0.40.0" }, + { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.0" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.1" }, + { name = "uvicorn", specifier = ">=0.41.0" }, { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" }, ] provides-extras = ["dev"] [package.metadata.requires-dev] -dev = [{ name = "ruff", specifier = ">=0.15.1" }] +dev = [{ name = "gemini-fastapi", extras = ["dev"] }] [[package]] name = "gemini-webapi" @@ -180,6 +183,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, ] +[[package]] +name = "httptools" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/09/8f/c77b1fcbfd262d422f12da02feb0d218fa228d52485b77b953832105bb90/httptools-0.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6babce6cfa2a99545c60bfef8bee0cc0545413cb0018f617c8059a30ad985de3", size = 202889, upload-time = "2025-10-10T03:54:47.089Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1a/22887f53602feaa066354867bc49a68fc295c2293433177ee90870a7d517/httptools-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:601b7628de7504077dd3dcb3791c6b8694bbd967148a6d1f01806509254fb1ca", size = 108180, upload-time = "2025-10-10T03:54:48.052Z" }, + { url = "https://files.pythonhosted.org/packages/32/6a/6aaa91937f0010d288d3d124ca2946d48d60c3a5ee7ca62afe870e3ea011/httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04c6c0e6c5fb0739c5b8a9eb046d298650a0ff38cf42537fc372b28dc7e4472c", size = 478596, upload-time = "2025-10-10T03:54:48.919Z" }, + { url = "https://files.pythonhosted.org/packages/6d/70/023d7ce117993107be88d2cbca566a7c1323ccbaf0af7eabf2064fe356f6/httptools-0.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69d4f9705c405ae3ee83d6a12283dc9feba8cc6aaec671b412917e644ab4fa66", size = 473268, upload-time = "2025-10-10T03:54:49.993Z" }, + { url = "https://files.pythonhosted.org/packages/32/4d/9dd616c38da088e3f436e9a616e1d0cc66544b8cdac405cc4e81c8679fc7/httptools-0.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44c8f4347d4b31269c8a9205d8a5ee2df5322b09bbbd30f8f862185bb6b05346", size = 455517, upload-time = "2025-10-10T03:54:51.066Z" }, + { url = "https://files.pythonhosted.org/packages/1d/3a/a6c595c310b7df958e739aae88724e24f9246a514d909547778d776799be/httptools-0.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:465275d76db4d554918aba40bf1cbebe324670f3dfc979eaffaa5d108e2ed650", size = 458337, upload-time = "2025-10-10T03:54:52.196Z" }, + { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" }, +] + [[package]] name = "httpx" version = "0.28.1" @@ -218,18 +236,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + [[package]] name = "lmdb" version = "1.7.5" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/c7/a3/3756f2c6adba4a1413dba55e6c81a20b38a868656517308533e33cb59e1c/lmdb-1.7.5.tar.gz", hash = "sha256:f0604751762cb097059d5412444c4057b95f386c7ed958363cf63f453e5108da", size = 883490, upload-time = "2025-10-15T03:39:44.038Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/34/b4/8b862c4d7fd6f68cb33e2a919169fda8924121dc5ff61e3cc105304a6dd4/lmdb-1.7.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b48c2359eea876d7b634b49f84019ecc8c1626da97c795fc7b39a793676815df", size = 100910, upload-time = "2025-10-15T03:39:00.727Z" }, - { url = "https://files.pythonhosted.org/packages/27/64/8ab5da48180d5f13a293ea00a9f8758b1bee080e76ea0ab0d6be0d51b55f/lmdb-1.7.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2f84793baeb430ba984eb6c1b4e08c0a508b1c03e79ce79fcda0f29ecc06a95a", size = 99376, upload-time = "2025-10-15T03:39:01.791Z" }, - { url = "https://files.pythonhosted.org/packages/43/e0/51bc942fe5ed3fce69c631b54f52d97785de3d94487376139be6de1e199a/lmdb-1.7.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:68cc21314a33faac1b749645a976b7655e7fa7cc104a72365d2429d2db7f6342", size = 298556, upload-time = "2025-10-15T03:39:02.787Z" }, - { url = "https://files.pythonhosted.org/packages/66/c5/19ea75c88b91d12da5c6f4bbe2aca633047b6b270fd613d557583d32cc5c/lmdb-1.7.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f2d9b7e102fcfe5e0cfb3acdebd403eb55ccbe5f7202d8f49d60bdafb1546d1e", size = 299449, upload-time = "2025-10-15T03:39:03.903Z" }, - { url = "https://files.pythonhosted.org/packages/1b/74/365194203dbff47d3a1621366d6a1133cdcce261f4ac0e1d0496f01e6ace/lmdb-1.7.5-cp312-cp312-win_amd64.whl", hash = "sha256:69de89cc79e03e191fc6f95797f1bef91b45c415d1ea9d38872b00b2d989a50f", size = 99328, upload-time = "2025-10-15T03:39:04.949Z" }, - { url = "https://files.pythonhosted.org/packages/3f/3a/a441afebff5bd761f7f58d194fed7ac265279964957479a5c8a51c42f9ad/lmdb-1.7.5-cp312-cp312-win_arm64.whl", hash = "sha256:0c880ee4b309e900f2d58a710701f5e6316a351878588c6a95a9c0bcb640680b", size = 94191, upload-time = "2025-10-15T03:39:05.975Z" }, + { url = "https://files.pythonhosted.org/packages/38/f8/03275084218eacdbdf7e185d693e1db4cb79c35d18fac47fa0d388522a0d/lmdb-1.7.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:66ae02fa6179e46bb69fe446b7e956afe8706ae17ec1d4cd9f7056e161019156", size = 101508, upload-time = "2025-10-15T03:39:07.228Z" }, + { url = "https://files.pythonhosted.org/packages/20/b9/bc33ae2e4940359ba2fc412e6a755a2f126bc5062b4aaf35edd3a791f9a5/lmdb-1.7.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bf65c573311ac8330c7908257f76b28ae3576020123400a81a6b650990dc028c", size = 100105, upload-time = "2025-10-15T03:39:08.491Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f6/22f84b776a64d3992f052ecb637c35f1764a39df4f2190ecc5a3a1295bd7/lmdb-1.7.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97bcb3fc12841a8828db918e494fe0fd016a73d2680ad830d75719bb3bf4e76a", size = 301500, upload-time = "2025-10-15T03:39:09.463Z" }, + { url = "https://files.pythonhosted.org/packages/2a/4d/8e6be8d7d5a30d47fa0ce4b55e3a8050ad689556e6e979d206b4ac67b733/lmdb-1.7.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:865f374f6206ab4aacb92ffb1dc612ee1a31a421db7c89733abe06b81ac87cb0", size = 302285, upload-time = "2025-10-15T03:39:10.856Z" }, + { url = "https://files.pythonhosted.org/packages/5e/dc/7e04fb31a8f88951db81ac677e3ccb3e09248eda40e6ad52f74fd9370c32/lmdb-1.7.5-cp313-cp313-win_amd64.whl", hash = "sha256:82a04d5ca2a6a799c8db7f209354c48aebb49ff338530f5813721fc4c68e4450", size = 99447, upload-time = "2025-10-15T03:39:12.151Z" }, + { url = "https://files.pythonhosted.org/packages/5b/50/e3f97efab17b3fad4afde99b3c957ecac4ffbefada6874a57ad0c695660a/lmdb-1.7.5-cp313-cp313-win_arm64.whl", hash = "sha256:0ad85a15acbfe8a42fdef92ee5e869610286d38507e976755f211be0fc905ca7", size = 94145, upload-time = "2025-10-15T03:39:13.461Z" }, { url = "https://files.pythonhosted.org/packages/bd/2c/982cb5afed533d0cb8038232b40c19b5b85a2d887dec74dfd39e8351ef4b/lmdb-1.7.5-py3-none-any.whl", hash = "sha256:fc344bb8bc0786c87c4ccb19b31f09a38c08bd159ada6f037d669426fea06f03", size = 148539, upload-time = "2025-10-15T03:39:42.982Z" }, ] @@ -252,21 +279,39 @@ version = "3.11.7" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" }, - { url = "https://files.pythonhosted.org/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" }, - { url = "https://files.pythonhosted.org/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" }, - { url = "https://files.pythonhosted.org/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" }, - { url = "https://files.pythonhosted.org/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" }, - { url = "https://files.pythonhosted.org/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" }, - { url = "https://files.pythonhosted.org/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" }, - { url = "https://files.pythonhosted.org/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" }, - { url = "https://files.pythonhosted.org/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" }, - { url = "https://files.pythonhosted.org/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" }, - { url = "https://files.pythonhosted.org/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" }, - { url = "https://files.pythonhosted.org/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" }, - { url = "https://files.pythonhosted.org/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" }, - { url = "https://files.pythonhosted.org/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" }, - { url = "https://files.pythonhosted.org/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" }, + { url = "https://files.pythonhosted.org/packages/89/25/6e0e52cac5aab51d7b6dcd257e855e1dec1c2060f6b28566c509b4665f62/orjson-3.11.7-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1d98b30cc1313d52d4af17d9c3d307b08389752ec5f2e5febdfada70b0f8c733", size = 228390, upload-time = "2026-02-02T15:38:06.8Z" }, + { url = "https://files.pythonhosted.org/packages/a5/29/a77f48d2fc8a05bbc529e5ff481fb43d914f9e383ea2469d4f3d51df3d00/orjson-3.11.7-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:d897e81f8d0cbd2abb82226d1860ad2e1ab3ff16d7b08c96ca00df9d45409ef4", size = 125189, upload-time = "2026-02-02T15:38:08.181Z" }, + { url = "https://files.pythonhosted.org/packages/89/25/0a16e0729a0e6a1504f9d1a13cdd365f030068aab64cec6958396b9969d7/orjson-3.11.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814be4b49b228cfc0b3c565acf642dd7d13538f966e3ccde61f4f55be3e20785", size = 128106, upload-time = "2026-02-02T15:38:09.41Z" }, + { url = "https://files.pythonhosted.org/packages/66/da/a2e505469d60666a05ab373f1a6322eb671cb2ba3a0ccfc7d4bc97196787/orjson-3.11.7-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d06e5c5fed5caedd2e540d62e5b1c25e8c82431b9e577c33537e5fa4aa909539", size = 123363, upload-time = "2026-02-02T15:38:10.73Z" }, + { url = "https://files.pythonhosted.org/packages/23/bf/ed73f88396ea35c71b38961734ea4a4746f7ca0768bf28fd551d37e48dd0/orjson-3.11.7-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31c80ce534ac4ea3739c5ee751270646cbc46e45aea7576a38ffec040b4029a1", size = 129007, upload-time = "2026-02-02T15:38:12.138Z" }, + { url = "https://files.pythonhosted.org/packages/73/3c/b05d80716f0225fc9008fbf8ab22841dcc268a626aa550561743714ce3bf/orjson-3.11.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f50979824bde13d32b4320eedd513431c921102796d86be3eee0b58e58a3ecd1", size = 141667, upload-time = "2026-02-02T15:38:13.398Z" }, + { url = "https://files.pythonhosted.org/packages/61/e8/0be9b0addd9bf86abfc938e97441dcd0375d494594b1c8ad10fe57479617/orjson-3.11.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e54f3808e2b6b945078c41aa8d9b5834b28c50843846e97807e5adb75fa9705", size = 130832, upload-time = "2026-02-02T15:38:14.698Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ec/c68e3b9021a31d9ec15a94931db1410136af862955854ed5dd7e7e4f5bff/orjson-3.11.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12b80df61aab7b98b490fe9e4879925ba666fccdfcd175252ce4d9035865ace", size = 133373, upload-time = "2026-02-02T15:38:16.109Z" }, + { url = "https://files.pythonhosted.org/packages/d2/45/f3466739aaafa570cc8e77c6dbb853c48bf56e3b43738020e2661e08b0ac/orjson-3.11.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:996b65230271f1a97026fd0e6a753f51fbc0c335d2ad0c6201f711b0da32693b", size = 138307, upload-time = "2026-02-02T15:38:17.453Z" }, + { url = "https://files.pythonhosted.org/packages/e1/84/9f7f02288da1ffb31405c1be07657afd1eecbcb4b64ee2817b6fe0f785fa/orjson-3.11.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ab49d4b2a6a1d415ddb9f37a21e02e0d5dbfe10b7870b21bf779fc21e9156157", size = 408695, upload-time = "2026-02-02T15:38:18.831Z" }, + { url = "https://files.pythonhosted.org/packages/18/07/9dd2f0c0104f1a0295ffbe912bc8d63307a539b900dd9e2c48ef7810d971/orjson-3.11.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:390a1dce0c055ddf8adb6aa94a73b45a4a7d7177b5c584b8d1c1947f2ba60fb3", size = 144099, upload-time = "2026-02-02T15:38:20.28Z" }, + { url = "https://files.pythonhosted.org/packages/a5/66/857a8e4a3292e1f7b1b202883bcdeb43a91566cf59a93f97c53b44bd6801/orjson-3.11.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1eb80451a9c351a71dfaf5b7ccc13ad065405217726b59fdbeadbcc544f9d223", size = 134806, upload-time = "2026-02-02T15:38:22.186Z" }, + { url = "https://files.pythonhosted.org/packages/0a/5b/6ebcf3defc1aab3a338ca777214966851e92efb1f30dc7fc8285216e6d1b/orjson-3.11.7-cp313-cp313-win32.whl", hash = "sha256:7477aa6a6ec6139c5cb1cc7b214643592169a5494d200397c7fc95d740d5fcf3", size = 127914, upload-time = "2026-02-02T15:38:23.511Z" }, + { url = "https://files.pythonhosted.org/packages/00/04/c6f72daca5092e3117840a1b1e88dfc809cc1470cf0734890d0366b684a1/orjson-3.11.7-cp313-cp313-win_amd64.whl", hash = "sha256:b9f95dcdea9d4f805daa9ddf02617a89e484c6985fa03055459f90e87d7a0757", size = 124986, upload-time = "2026-02-02T15:38:24.836Z" }, + { url = "https://files.pythonhosted.org/packages/03/ba/077a0f6f1085d6b806937246860fafbd5b17f3919c70ee3f3d8d9c713f38/orjson-3.11.7-cp313-cp313-win_arm64.whl", hash = "sha256:800988273a014a0541483dc81021247d7eacb0c845a9d1a34a422bc718f41539", size = 126045, upload-time = "2026-02-02T15:38:26.216Z" }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] [[package]] @@ -293,38 +338,34 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, - { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, - { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, - { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, - { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, - { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, - { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, - { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, - { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, - { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, - { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, - { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, - { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, - { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, - { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, - { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, - { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, - { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, ] [[package]] name = "pydantic-settings" -version = "2.12.0" +version = "2.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" } +sdist = { url = "https://files.pythonhosted.org/packages/96/a1/ae859ffac5a3338a66b74c5e29e244fd3a3cc483c89feaf9f56c39898d75/pydantic_settings-2.13.0.tar.gz", hash = "sha256:95d875514610e8595672800a5c40b073e99e4aae467fa7c8f9c263061ea2e1fe", size = 222450, upload-time = "2026-02-15T12:11:23.476Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" }, + { url = "https://files.pythonhosted.org/packages/b0/1a/dd1b9d7e627486cf8e7523d09b70010e05a4bc41414f4ae6ce184cf0afb6/pydantic_settings-2.13.0-py3-none-any.whl", hash = "sha256:d67b576fff39cd086b595441bf9c75d4193ca9c0ed643b90360694d0f1240246", size = 58429, upload-time = "2026-02-15T12:11:22.133Z" }, ] [package.optional-dependencies] @@ -332,6 +373,31 @@ yaml = [ { name = "pyyaml" }, ] +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + [[package]] name = "python-dotenv" version = "1.2.1" @@ -347,16 +413,16 @@ version = "6.0.3" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, - { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, - { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, - { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, - { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, - { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, - { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, - { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, - { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, - { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, ] [[package]] @@ -390,7 +456,6 @@ version = "0.52.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, - { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" } wheels = [ @@ -420,15 +485,15 @@ wheels = [ [[package]] name = "uvicorn" -version = "0.40.0" +version = "0.41.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" } +sdist = { url = "https://files.pythonhosted.org/packages/32/ce/eeb58ae4ac36fe09e3842eb02e0eb676bf2c53ae062b98f1b2531673efdd/uvicorn-0.41.0.tar.gz", hash = "sha256:09d11cf7008da33113824ee5a1c6422d89fbc2ff476540d69a34c87fab8b571a", size = 82633, upload-time = "2026-02-16T23:07:24.1Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" }, + { url = "https://files.pythonhosted.org/packages/83/e4/d04a086285c20886c0daad0e026f250869201013d18f81d9ff5eada73a88/uvicorn-0.41.0-py3-none-any.whl", hash = "sha256:29e35b1d2c36a04b9e180d4007ede3bcb32a85fbdfd6c6aeb3f26839de088187", size = 68783, upload-time = "2026-02-16T23:07:22.357Z" }, ] [[package]] @@ -437,12 +502,12 @@ version = "0.22.1" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" }, - { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" }, - { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" }, - { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" }, - { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" }, - { url = "https://files.pythonhosted.org/packages/99/39/6b3f7d234ba3964c428a6e40006340f53ba37993f46ed6e111c6e9141d18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0", size = 4296343, upload-time = "2025-10-16T22:16:35.149Z" }, + { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" }, + { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" }, + { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" }, + { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" }, + { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" }, ] [[package]] From 35864f65d722b40ce8bbef15b5a5e17944adf96d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 18 Feb 2026 16:54:37 +0700 Subject: [PATCH 138/139] Upgrade to fully support Python 3.13 --- .github/workflows/docker.yaml | 2 +- app/models/__init__.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 775e9e4..11caa57 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -50,7 +50,7 @@ jobs: type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} type=semver,pattern={{major}} - type=sha,format=short + type=raw,value={{date 'YYYYMMDD'}} type=raw,value=latest,enable={{is_default_branch}} - name: Build and push Docker image diff --git a/app/models/__init__.py b/app/models/__init__.py index a72efdc..3896de1 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -19,6 +19,7 @@ ResponseOutputMessage, ResponseToolCall, ResponseToolChoice, + ResponseUsage, Tool, ToolCall, ToolChoiceFunction, @@ -48,6 +49,7 @@ "ResponseOutputMessage", "ResponseToolCall", "ResponseToolChoice", + "ResponseUsage", "Tool", "ToolCall", "ToolChoiceFunction", From a48c38d16fab0a0c27a716ee25bf12842c8bd9a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= Date: Wed, 18 Feb 2026 17:19:51 +0700 Subject: [PATCH 139/139] Upgrade to fully support Python 3.13 --- .github/workflows/docker.yaml | 2 +- app/models/models.py | 130 +++++++++++++++++----------------- 2 files changed, 67 insertions(+), 65 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 11caa57..1c5a2ee 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -50,7 +50,7 @@ jobs: type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} type=semver,pattern={{major}} - type=raw,value={{date 'YYYYMMDD'}} + type=raw,value={{date 'YYYYMMDD'}}-{{sha}} type=raw,value=latest,enable={{is_default_branch}} - name: Build and push Docker image diff --git a/app/models/models.py b/app/models/models.py index ca206b7..3b3e627 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -10,10 +10,10 @@ class ContentItem(BaseModel): """Individual content item (text, image, or file) within a message.""" type: Literal["text", "image_url", "file", "input_audio"] - text: str | None = None - image_url: dict[str, str] | None = None - input_audio: dict[str, Any] | None = None - file: dict[str, str] | None = None + text: str | None = Field(default=None) + image_url: dict[str, Any] | None = Field(default=None) + input_audio: dict[str, Any] | None = Field(default=None) + file: dict[str, Any] | None = Field(default=None) annotations: list[dict[str, Any]] = Field(default_factory=list) @@ -21,13 +21,13 @@ class Message(BaseModel): """Message model""" role: str - content: str | list[ContentItem] | None = None - name: str | None = None - tool_calls: list[ToolCall] | None = None - tool_call_id: str | None = None - refusal: str | None = None - reasoning_content: str | None = None - audio: dict[str, Any] | None = None + content: str | list[ContentItem] | None = Field(default=None) + name: str | None = Field(default=None) + tool_calls: list[ToolCall] | None = Field(default=None) + tool_call_id: str | None = Field(default=None) + refusal: str | None = Field(default=None) + reasoning_content: str | None = Field(default=None) + audio: dict[str, Any] | None = Field(default=None) annotations: list[dict[str, Any]] = Field(default_factory=list) @model_validator(mode="after") @@ -44,7 +44,7 @@ class Choice(BaseModel): index: int message: Message finish_reason: str - logprobs: dict[str, Any] | None = None + logprobs: dict[str, Any] | None = Field(default=None) class FunctionCall(BaseModel): @@ -66,8 +66,8 @@ class ToolFunctionDefinition(BaseModel): """Function definition for tool.""" name: str - description: str | None = None - parameters: dict[str, Any] | None = None + description: str | None = Field(default=None) + parameters: dict[str, Any] | None = Field(default=None) class Tool(BaseModel): @@ -96,8 +96,8 @@ class Usage(BaseModel): prompt_tokens: int completion_tokens: int total_tokens: int - prompt_tokens_details: dict[str, int] | None = None - completion_tokens_details: dict[str, int] | None = None + prompt_tokens_details: dict[str, int] | None = Field(default=None) + completion_tokens_details: dict[str, int] | None = Field(default=None) class ModelData(BaseModel): @@ -114,16 +114,16 @@ class ChatCompletionRequest(BaseModel): model: str messages: list[Message] - stream: bool | None = False - user: str | None = None - temperature: float | None = 0.7 - top_p: float | None = 1.0 - max_tokens: int | None = None - tools: list[Tool] | None = None + stream: bool | None = Field(default=False) + user: str | None = Field(default=None) + temperature: float | None = Field(default=0.7) + top_p: float | None = Field(default=1.0) + max_tokens: int | None = Field(default=None) + tools: list[Tool] | None = Field(default=None) tool_choice: ( Literal["none"] | Literal["auto"] | Literal["required"] | ToolChoiceFunction | None - ) = None - response_format: dict[str, Any] | None = None + ) = Field(default=None) + response_format: dict[str, Any] | None = Field(default=None) class ChatCompletionResponse(BaseModel): @@ -148,9 +148,9 @@ class HealthCheckResponse(BaseModel): """Health check response model""" ok: bool - storage: dict[str, str | int] | None = None - clients: dict[str, bool] | None = None - error: str | None = None + storage: dict[str, Any] | None = Field(default=None) + clients: dict[str, bool] | None = Field(default=None) + error: str | None = Field(default=None) class ConversationInStore(BaseModel): @@ -172,12 +172,12 @@ class ResponseInputContent(BaseModel): """Content item for Responses API input.""" type: Literal["input_text", "input_image", "input_file"] - text: str | None = None - image_url: str | None = None - detail: Literal["auto", "low", "high"] | None = None - file_url: str | None = None - file_data: str | None = None - filename: str | None = None + text: str | None = Field(default=None) + image_url: str | None = Field(default=None) + detail: Literal["auto", "low", "high"] | None = Field(default=None) + file_url: str | None = Field(default=None) + file_data: str | None = Field(default=None) + filename: str | None = Field(default=None) annotations: list[dict[str, Any]] = Field(default_factory=list) @model_validator(mode="before") @@ -192,7 +192,7 @@ def normalize_output_text(cls, data: Any) -> Any: class ResponseInputItem(BaseModel): """Single input item for Responses API.""" - type: Literal["message"] | None = "message" + type: Literal["message"] | None = Field(default="message") role: Literal["user", "assistant", "system", "developer"] content: str | list[ResponseInputContent] @@ -201,15 +201,15 @@ class ResponseToolChoice(BaseModel): """Tool choice enforcing a specific tool in Responses API.""" type: Literal["function", "image_generation"] - function: ToolChoiceFunctionDetail | None = None + function: ToolChoiceFunctionDetail | None = Field(default=None) class ResponseImageTool(BaseModel): """Image generation tool specification for Responses API.""" type: Literal["image_generation"] - model: str | None = None - output_format: str | None = None + model: str | None = Field(default=None) + output_format: str | None = Field(default=None) class ResponseCreateRequest(BaseModel): @@ -217,17 +217,17 @@ class ResponseCreateRequest(BaseModel): model: str input: str | list[ResponseInputItem] - instructions: str | list[ResponseInputItem] | None = None - temperature: float | None = 0.7 - top_p: float | None = 1.0 - max_output_tokens: int | None = None - stream: bool | None = False - tool_choice: str | ResponseToolChoice | None = None - tools: list[Tool | ResponseImageTool] | None = None - store: bool | None = None - user: str | None = None - response_format: dict[str, Any] | None = None - metadata: dict[str, Any] | None = None + instructions: str | list[ResponseInputItem] | None = Field(default=None) + temperature: float | None = Field(default=0.7) + top_p: float | None = Field(default=1.0) + max_output_tokens: int | None = Field(default=None) + stream: bool | None = Field(default=False) + tool_choice: str | ResponseToolChoice | None = Field(default=None) + tools: list[Tool | ResponseImageTool] | None = Field(default=None) + store: bool | None = Field(default=None) + user: str | None = Field(default=None) + response_format: dict[str, Any] | None = Field(default=None) + metadata: dict[str, Any] | None = Field(default=None) class ResponseUsage(BaseModel): @@ -242,7 +242,7 @@ class ResponseOutputContent(BaseModel): """Content item for Responses API output.""" type: Literal["output_text"] - text: str | None = "" + text: str | None = Field(default="") annotations: list[dict[str, Any]] = Field(default_factory=list) @@ -259,20 +259,22 @@ class ResponseImageGenerationCall(BaseModel): """Image generation call record emitted in Responses API.""" id: str - type: Literal["image_generation_call"] = "image_generation_call" - status: Literal["completed", "in_progress", "generating", "failed"] = "completed" - result: str | None = None - output_format: str | None = None - size: str | None = None - revised_prompt: str | None = None + type: Literal["image_generation_call"] = Field(default="image_generation_call") + status: Literal["completed", "in_progress", "generating", "failed"] = Field(default="completed") + result: str | None = Field(default=None) + output_format: str | None = Field(default=None) + size: str | None = Field(default=None) + revised_prompt: str | None = Field(default=None) class ResponseToolCall(BaseModel): """Tool call record emitted in Responses API.""" id: str - type: Literal["tool_call"] = "tool_call" - status: Literal["in_progress", "completed", "failed", "requires_action"] = "completed" + type: Literal["tool_call"] = Field(default="tool_call") + status: Literal["in_progress", "completed", "failed", "requires_action"] = Field( + default="completed" + ) function: FunctionCall @@ -280,7 +282,7 @@ class ResponseCreateResponse(BaseModel): """Responses API response payload.""" id: str - object: Literal["response"] = "response" + object: Literal["response"] = Field(default="response") created_at: int model: str output: list[ResponseOutputMessage | ResponseImageGenerationCall | ResponseToolCall] @@ -291,13 +293,13 @@ class ResponseCreateResponse(BaseModel): "incomplete", "cancelled", "requires_action", - ] = "completed" - tool_choice: str | ResponseToolChoice | None = None - tools: list[Tool | ResponseImageTool] | None = None + ] = Field(default="completed") + tool_choice: str | ResponseToolChoice | None = Field(default=None) + tools: list[Tool | ResponseImageTool] | None = Field(default=None) usage: ResponseUsage - error: dict[str, Any] | None = None - metadata: dict[str, Any] | None = None - input: str | list[ResponseInputItem] | None = None + error: dict[str, Any] | None = Field(default=None) + metadata: dict[str, Any] | None = Field(default=None) + input: str | list[ResponseInputItem] | None = Field(default=None) # Rebuild models with forward references