From a3dfe703507494e6082f142276fb91a743375250 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 21 Nov 2025 12:44:51 +0700
Subject: [PATCH 001/139] Remove the unused auto-refresh functionality and
 related imports.

They are no longer needed since the underlying library issue has been resolved.
---
 app/services/client.py | 46 +-----------------------------------------
 1 file changed, 1 insertion(+), 45 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 825564b..1554bdd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -8,11 +8,8 @@
 from gemini_webapi import GeminiClient, ModelOutput
 from gemini_webapi.client import ChatSession
 from gemini_webapi.constants import Model
-from gemini_webapi.exceptions import AuthError, ModelInvalid
+from gemini_webapi.exceptions import ModelInvalid
 from gemini_webapi.types import Gem
-from gemini_webapi.utils import rotate_tasks
-from gemini_webapi.utils.rotate_1psidts import rotate_1psidts
-from loguru import logger
 
 from ..models import Message
 from ..utils import g_config
@@ -76,47 +73,6 @@ async def init(
             verbose=verbose,
         )
 
-    async def start_auto_refresh(self) -> None:
-        """
-        Refresh the __Secure-1PSIDTS cookie periodically and keep the HTTP client in sync.
-        """
-        while True:
-            new_1psidts: str | None = None
-            try:
-                new_1psidts = await rotate_1psidts(self.cookies, self.proxy)
-            except AuthError:
-                if task := rotate_tasks.get(self.cookies.get("__Secure-1PSID", "")):
-                    task.cancel()
-                logger.warning(
-                    "Failed to refresh Gemini cookies (AuthError). Auto refresh task canceled."
-                )
-                return
-            except Exception as exc:
-                logger.warning(f"Unexpected error while refreshing Gemini cookies: {exc}")
-
-            if new_1psidts:
-                self.cookies["__Secure-1PSIDTS"] = new_1psidts
-                self._sync_httpx_cookie("__Secure-1PSIDTS", new_1psidts)
-                logger.debug("Gemini cookies refreshed. New __Secure-1PSIDTS applied.")
-            await asyncio.sleep(self.refresh_interval)
-
-    def _sync_httpx_cookie(self, name: str, value: str) -> None:
-        """
-        Ensure the underlying httpx client uses the refreshed cookie value.
-        """
-        if not self.client:
-            return
-
-        jar = self.client.cookies.jar
-        matched = False
-        for cookie in jar:
-            if cookie.name == name:
-                cookie.value = value
-                matched = True
-        if not matched:
-            # Fall back to setting the cookie with default scope if we did not find an existing entry.
-            self.client.cookies.set(name, value)
-
     async def generate_content(
         self,
         prompt: str,

From 3a692ab014bf6d0cb98f38d499dc2760eb92c096 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 14:54:53 +0700
Subject: [PATCH 002/139] Enhance error handling in client initialization and
 message sending

---
 app/server/chat.py     | 12 ++++++++--
 app/services/client.py | 52 +++++++++++-------------------------------
 app/services/pool.py   | 26 ++++++++++++++-------
 3 files changed, 41 insertions(+), 49 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 66fa6ce..e8752cf 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1129,7 +1129,11 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
     """
     if len(text) <= MAX_CHARS_PER_REQUEST:
         # No need to split - a single request is fine.
-        return await session.send_message(text, files=files)
+        try:
+            return await session.send_message(text, files=files)
+        except Exception as e:
+            logger.exception(f"Error sending message to Gemini: {e}")
+            raise
     hint_len = len(CONTINUATION_HINT)
     chunk_size = MAX_CHARS_PER_REQUEST - hint_len
 
@@ -1155,7 +1159,11 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
             raise
 
     # The last chunk carries the files (if any) and we return its response.
-    return await session.send_message(chunks[-1], files=files)
+    try:
+        return await session.send_message(chunks[-1], files=files)
+    except Exception as e:
+        logger.exception(f"Error sending final chunk to Gemini: {e}")
+        raise
 
 
 def _iter_stream_segments(model_output: str, chunk_size: int = 64):
diff --git a/app/services/client.py b/app/services/client.py
index 1554bdd..26be26f 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,4 +1,3 @@
-import asyncio
 import html
 import json
 import re
@@ -6,10 +5,7 @@
 from typing import Any, cast
 
 from gemini_webapi import GeminiClient, ModelOutput
-from gemini_webapi.client import ChatSession
-from gemini_webapi.constants import Model
-from gemini_webapi.exceptions import ModelInvalid
-from gemini_webapi.types import Gem
+from loguru import logger
 
 from ..models import Message
 from ..utils import g_config
@@ -64,40 +60,18 @@ async def init(
         refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
         verbose = cast(bool, _resolve(verbose, config.verbose))
 
-        await super().init(
-            timeout=timeout,
-            auto_close=auto_close,
-            close_delay=close_delay,
-            auto_refresh=auto_refresh,
-            refresh_interval=refresh_interval,
-            verbose=verbose,
-        )
-
-    async def generate_content(
-        self,
-        prompt: str,
-        files: list[str | Path] | None = None,
-        model: Model | str = Model.UNSPECIFIED,
-        gem: Gem | str | None = None,
-        chat: ChatSession | None = None,
-        **kwargs,
-    ) -> ModelOutput:
-        cnt = 2  # Try 2 times before giving up
-        last_exception: ModelInvalid | None = None
-        while cnt:
-            cnt -= 1
-            try:
-                return await super().generate_content(prompt, files, model, gem, chat, **kwargs)
-            except ModelInvalid as e:
-                # This is not always caused by model selection. Instead, it can be solved by retrying.
-                # So we catch it and retry as a workaround.
-                await asyncio.sleep(1)
-                last_exception = e
-
-        # If retrying failed, re-raise ModelInvalid
-        if last_exception is not None:
-            raise last_exception
-        raise RuntimeError("generate_content failed without receiving a ModelInvalid error.")
+        try:
+            await super().init(
+                timeout=timeout,
+                auto_close=auto_close,
+                close_delay=close_delay,
+                auto_refresh=auto_refresh,
+                refresh_interval=refresh_interval,
+                verbose=verbose,
+            )
+        except Exception:
+            logger.exception(f"Failed to initialize GeminiClient {self.id}")
+            raise
 
     @staticmethod
     async def process_message(
diff --git a/app/services/pool.py b/app/services/pool.py
index abf1fa0..24a21dc 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -35,14 +35,24 @@ def __init__(self) -> None:
 
     async def init(self) -> None:
         """Initialize all clients in the pool."""
+        success_count = 0
         for client in self._clients:
             if not client.running:
-                await client.init(
-                    timeout=g_config.gemini.timeout,
-                    auto_refresh=g_config.gemini.auto_refresh,
-                    verbose=g_config.gemini.verbose,
-                    refresh_interval=g_config.gemini.refresh_interval,
-                )
+                try:
+                    await client.init(
+                        timeout=g_config.gemini.timeout,
+                        auto_refresh=g_config.gemini.auto_refresh,
+                        verbose=g_config.gemini.verbose,
+                        refresh_interval=g_config.gemini.refresh_interval,
+                    )
+                except Exception:
+                    logger.exception(f"Failed to initialize client {client.id}")
+
+            if client.running:
+                success_count += 1
+
+        if success_count == 0:
+            raise RuntimeError("Failed to initialize any Gemini clients")
 
     async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
         """Return a healthy client by id or using round-robin."""
@@ -89,8 +99,8 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
                 )
                 logger.info(f"Restarted Gemini client {client.id} after it stopped.")
                 return True
-            except Exception as exc:
-                logger.warning(f"Failed to restart Gemini client {client.id}: {exc}")
+            except Exception:
+                logger.exception(f"Failed to restart Gemini client {client.id}")
                 return False
 
     @property

From d57e3676fed9fa03e1f51a5aed80d4b7f88e6a88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 17:49:41 +0700
Subject: [PATCH 003/139] Refactor link handling to extract file paths and
 simplify Google search links

---
 app/services/client.py | 46 +++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 26be26f..f5a39dd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -24,9 +24,20 @@
 )
 
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
+
 MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=\s*[-\\`*_{}\[\]()#+.!<>])")
+
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`]*`)", re.DOTALL)
 
+FILE_PATH_PATTERN = re.compile(
+    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|Gemfile|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
+    re.IGNORECASE,
+)
+
+GOOGLE_SEARCH_LINK_PATTERN = re.compile(
+    r"(?:`\s*)?`?\[`?([^`\]]+)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
+)
+
 
 _UNSET = object()
 
@@ -219,28 +230,25 @@ def _unescape_markdown(text_content: str) -> str:
         text = _unescape_html(text)
         text = _unescape_markdown(text)
 
-        def simplify_link_target(text_content: str) -> str:
-            match_colon_num = re.match(r"([^:]+:\d+)", text_content)
-            if match_colon_num:
-                return match_colon_num.group(1)
-            return text_content
+        def extract_file_path_from_display_text(text_content: str) -> str | None:
+            match = re.match(FILE_PATH_PATTERN, text_content)
+            if match:
+                return match.group(1)
+            return None
 
         def replacer(match: re.Match) -> str:
-            outer_open_paren = match.group(1)
-            display_text = match.group(2)
+            display_text = str(match.group(1)).strip()
+            google_search_prefix = match.group(2)
+            query_part = match.group(3)
 
-            new_target_url = simplify_link_target(display_text)
-            new_link_segment = f"[`{display_text}`]({new_target_url})"
+            file_path = extract_file_path_from_display_text(display_text)
 
-            if outer_open_paren:
-                return f"{outer_open_paren}{new_link_segment})"
+            if file_path:
+                # If it's a file path, transform it into a self-referencing Markdown link
+                return f"[`{file_path}`]({file_path})"
             else:
-                return new_link_segment
-
-        # Replace Google search links with simplified Markdown links
-        pattern = r"(\()?\[`([^`]+?)`\]\((https://www.google.com/search\?q=)(.*?)(?<!\\)\)\)*(\))?"
-        text = re.sub(pattern, replacer, text)
+                # Otherwise, reconstruct the original Google search link with the display_text
+                original_google_search_url = f"{google_search_prefix}{query_part}"
+                return f"[`{display_text}`]({original_google_search_url})"
 
-        # Fix inline code blocks
-        pattern = r"`(\[[^\]]+\]\([^\)]+\))`"
-        return re.sub(pattern, r"\1", text)
+        return re.sub(GOOGLE_SEARCH_LINK_PATTERN, replacer, text)

From ccd55f9fd3d9c48f986c42413b71971311ecb5d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 18:29:41 +0700
Subject: [PATCH 004/139] Fix regex pattern for Google search link matching

---
 app/services/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/services/client.py b/app/services/client.py
index f5a39dd..ffc559e 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -35,7 +35,7 @@
 )
 
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
-    r"(?:`\s*)?`?\[`?([^`\]]+)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
+    r"(?:`\s*)?`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
 )
 
 

From 37632b3fb89fab17f2cb728a1c5a66ab5dd013ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 21:44:09 +0700
Subject: [PATCH 005/139] Fix regex patterns for Markdown escaping, code fence
 and Google search link matching

---
 app/services/client.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index ffc559e..0088c74 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -25,17 +25,17 @@
 
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
 
-MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=\s*[-\\`*_{}\[\]()#+.!<>])")
+MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
 
-CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`]*`)", re.DOTALL)
+CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
 
 FILE_PATH_PATTERN = re.compile(
-    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|Gemfile|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
+    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
 )
 
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
-    r"(?:`\s*)?`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
+    r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
 
 

From b11cfcc45f20259d17b1ddfde0e07fdaa80d9532 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 22:52:27 +0700
Subject: [PATCH 006/139] Increase timeout value in configuration files from 60
 to 120 seconds to better handle heavy tasks

---
 app/server/chat.py     | 2 --
 app/services/client.py | 8 --------
 app/utils/config.py    | 2 +-
 config/config.yaml     | 6 +++---
 4 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index e8752cf..b4e88da 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -48,9 +48,7 @@
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
-
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
-
 TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)```", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
     r"<tool_call\s+name=\"([^\"]+)\">(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
diff --git a/app/services/client.py b/app/services/client.py
index 0088c74..166eb70 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -16,29 +16,21 @@
     '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
     "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
 )
-
 CODE_BLOCK_HINT = (
     "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
     "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
     "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
 )
-
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
-
 MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
-
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
-
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
 )
-
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
     r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
-
-
 _UNSET = object()
 
 
diff --git a/app/utils/config.py b/app/utils/config.py
index 48f0792..796ca75 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -56,7 +56,7 @@ class GeminiConfig(BaseModel):
     clients: list[GeminiClientSettings] = Field(
         ..., description="List of Gemini client credential pairs"
     )
-    timeout: int = Field(default=60, ge=1, description="Init timeout")
+    timeout: int = Field(default=120, ge=1, description="Init timeout")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
         default=540, ge=1, description="Interval in seconds to refresh Gemini cookies"
diff --git a/config/config.yaml b/config/config.yaml
index b0f8fbf..89c88b7 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -21,8 +21,8 @@ gemini:
     - id: "example-id-1"   # Arbitrary client ID
       secure_1psid: "YOUR_SECURE_1PSID_HERE"
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
-      proxy: null           # Optional proxy URL (null/empty means direct connection)
-  timeout: 60              # Init timeout in seconds
+      proxy: null          # Optional proxy URL (null/empty means direct connection)
+  timeout: 120             # Init timeout in seconds
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests
@@ -34,4 +34,4 @@ storage:
   retention_days: 14       # Number of days to retain conversations before cleanup
 
 logging:
-  level: "INFO"           # Log level: DEBUG, INFO, WARNING, ERROR
+  level: "INFO"            # Log level: DEBUG, INFO, WARNING, ERROR

From f00ebfcbd0424c7ab06d680f308349a04aff3be0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 2 Dec 2025 13:15:27 +0700
Subject: [PATCH 007/139] Fix Image generation

---
 .github/workflows/docker.yaml | 10 ++---
 .github/workflows/track.yml   | 12 +++---
 app/models/models.py          | 14 +++----
 app/server/chat.py            | 77 +++++++++++++++++++++--------------
 app/services/client.py        |  4 +-
 app/utils/helper.py           | 10 ++++-
 6 files changed, 75 insertions(+), 52 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 4527f3d..eef2a41 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -5,11 +5,11 @@ on:
     branches:
       - main
     tags:
-      - 'v*'
+      - "v*"
     paths-ignore:
-      - '**/*.md'
-      - '.github/workflows/ruff.yaml'
-      - '.github/workflows/track.yml'
+      - "**/*.md"
+      - ".github/workflows/ruff.yaml"
+      - ".github/workflows/track.yml"
 
 env:
   REGISTRY: ghcr.io
@@ -57,4 +57,4 @@ jobs:
           labels: ${{ steps.meta.outputs.labels }}
           platforms: linux/amd64,linux/arm64
           cache-from: type=gha
-          cache-to: type=gha,mode=max
\ No newline at end of file
+          cache-to: type=gha,mode=max
diff --git a/.github/workflows/track.yml b/.github/workflows/track.yml
index 63afbec..838dcf8 100644
--- a/.github/workflows/track.yml
+++ b/.github/workflows/track.yml
@@ -2,7 +2,7 @@ name: Update gemini-webapi
 
 on:
   schedule:
-    - cron: '0 0 * * *' # Runs every day at midnight
+    - cron: "0 0 * * *" # Runs every day at midnight
   workflow_dispatch:
 
 jobs:
@@ -24,7 +24,7 @@ jobs:
         run: |
           # Install dependencies first to enable uv pip show
           uv sync
-          
+
           # Get current version of gemini-webapi before upgrade
           OLD_VERSION=$(uv pip show gemini-webapi 2>/dev/null | grep ^Version: | awk '{print $2}')
           if [ -z "$OLD_VERSION" ]; then
@@ -32,10 +32,10 @@ jobs:
             exit 1
           fi
           echo "Current gemini-webapi version: $OLD_VERSION"
-          
+
           # Update the package using uv, which handles pyproject.toml and uv.lock
           uv add --upgrade gemini-webapi
-          
+
           # Get new version of gemini-webapi after upgrade
           NEW_VERSION=$(uv pip show gemini-webapi | grep ^Version: | awk '{print $2}')
           if [ -z "$NEW_VERSION" ]; then
@@ -43,7 +43,7 @@ jobs:
             exit 1
           fi
           echo "New gemini-webapi version: $NEW_VERSION"
-          
+
           # Only proceed if gemini-webapi version has changed
           if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
             echo "gemini-webapi has been updated from $OLD_VERSION to $NEW_VERSION"
@@ -63,7 +63,7 @@ jobs:
           title: ":arrow_up: update gemini-webapi to ${{ steps.update.outputs.version }}"
           body: |
             Update `gemini-webapi` to version `${{ steps.update.outputs.version }}`.
-            
+
             Auto-generated by GitHub Actions using `uv`.
           branch: update-gemini-webapi
           base: main
diff --git a/app/models/models.py b/app/models/models.py
index 3991f12..74d8cd5 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -154,11 +154,13 @@ class ConversationInStore(BaseModel):
 class ResponseInputContent(BaseModel):
     """Content item for Responses API input."""
 
-    type: Literal["input_text", "input_image"]
+    type: Literal["input_text", "input_image", "input_file"]
     text: Optional[str] = None
     image_url: Optional[str] = None
-    image_base64: Optional[str] = None
-    mime_type: Optional[str] = None
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    file_url: Optional[str] = None
+    file_data: Optional[str] = None
+    filename: Optional[str] = None
 
 
 class ResponseInputItem(BaseModel):
@@ -212,12 +214,8 @@ class ResponseUsage(BaseModel):
 class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
-    type: Literal["output_text", "output_image"]
+    type: Literal["output_text"]
     text: Optional[str] = None
-    image_base64: Optional[str] = None
-    mime_type: Optional[str] = None
-    width: Optional[int] = None
-    height: Optional[int] = None
 
 
 class ResponseOutputMessage(BaseModel):
diff --git a/app/server/chat.py b/app/server/chat.py
index b4e88da..76dc632 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -381,14 +381,6 @@ def _strip_tagged_blocks(text: str) -> str:
     return "".join(result)
 
 
-def _ensure_data_url(part: ResponseInputContent) -> str | None:
-    image_url = part.image_url
-    if not image_url and part.image_base64:
-        mime_type = part.mime_type or "image/png"
-        image_url = f"data:{mime_type};base64,{part.image_base64}"
-    return image_url
-
-
 def _response_items_to_messages(
     items: str | list[ResponseInputItem],
 ) -> tuple[list[Message], str | list[ResponseInputItem]]:
@@ -422,14 +414,34 @@ def _response_items_to_messages(
                     if text_value:
                         converted.append(ContentItem(type="text", text=text_value))
                 elif part.type == "input_image":
-                    image_url = _ensure_data_url(part)
+                    image_url = part.image_url
                     if image_url:
                         normalized_contents.append(
-                            ResponseInputContent(type="input_image", image_url=image_url)
+                            ResponseInputContent(
+                                type="input_image",
+                                image_url=image_url,
+                                detail=part.detail if part.detail else "auto",
+                            )
                         )
                         converted.append(
-                            ContentItem(type="image_url", image_url={"url": image_url})
+                            ContentItem(
+                                type="image_url",
+                                image_url={
+                                    "url": image_url,
+                                    "detail": part.detail if part.detail else "auto",
+                                },
+                            )
                         )
+                elif part.type == "input_file":
+                    if part.file_url or part.file_data:
+                        normalized_contents.append(part)
+                        file_info = {}
+                        if part.file_data:
+                            file_info["file_data"] = part.file_data
+                            file_info["filename"] = part.filename
+                        if part.file_url:
+                            file_info["url"] = part.file_url
+                        converted.append(ContentItem(type="file", file=file_info))
             messages.append(Message(role=role, content=converted or None))
 
         normalized_input.append(
@@ -472,11 +484,26 @@ def _instructions_to_messages(
                     if text_value:
                         converted.append(ContentItem(type="text", text=text_value))
                 elif part.type == "input_image":
-                    image_url = _ensure_data_url(part)
+                    image_url = part.image_url
                     if image_url:
                         converted.append(
-                            ContentItem(type="image_url", image_url={"url": image_url})
+                            ContentItem(
+                                type="image_url",
+                                image_url={
+                                    "url": image_url,
+                                    "detail": part.detail if part.detail else "auto",
+                                },
+                            )
                         )
+                elif part.type == "input_file":
+                    file_info = {}
+                    if part.file_data:
+                        file_info["file_data"] = part.file_data
+                        file_info["filename"] = part.filename
+                    if part.file_url:
+                        file_info["url"] = part.file_url
+                    if file_info:
+                        converted.append(ContentItem(type="file", file=file_info))
             instruction_messages.append(Message(role=role, content=converted or None))
 
     return instruction_messages
@@ -799,13 +826,13 @@ async def create_response(
     session, client, remaining_messages = await _find_reusable_session(db, pool, model, messages)
 
     async def _build_payload(
-        payload_messages: list[Message], reuse_session: bool
+        _payload_messages: list[Message], _reuse_session: bool
     ) -> tuple[str, list[Path | str]]:
-        if reuse_session and len(payload_messages) == 1:
+        if _reuse_session and len(_payload_messages) == 1:
             return await GeminiClientWrapper.process_message(
-                payload_messages[0], tmp_dir, tagged=False
+                _payload_messages[0], tmp_dir, tagged=False
             )
-        return await GeminiClientWrapper.process_conversation(payload_messages, tmp_dir)
+        return await GeminiClientWrapper.process_conversation(_payload_messages, tmp_dir)
 
     reuse_session = session is not None
     if reuse_session:
@@ -821,7 +848,7 @@ async def _build_payload(
                 detail="No new messages to send for the existing session.",
             )
         payload_messages = messages_to_send
-        model_input, files = await _build_payload(payload_messages, reuse_session=True)
+        model_input, files = await _build_payload(payload_messages, _reuse_session=True)
         logger.debug(
             f"Reused session {session.metadata} - sending {len(payload_messages)} prepared messages."
         )
@@ -830,7 +857,7 @@ async def _build_payload(
             client = await pool.acquire()
             session = client.start_chat(model=model)
             payload_messages = messages
-            model_input, files = await _build_payload(payload_messages, reuse_session=False)
+            model_input, files = await _build_payload(payload_messages, _reuse_session=False)
         except ValueError as e:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
         except RuntimeError as e:
@@ -935,7 +962,6 @@ async def _build_payload(
             detail = f"{detail} Assistant response: {summary}"
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail)
 
-    image_contents: list[ResponseOutputContent] = []
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
@@ -943,16 +969,6 @@ async def _build_payload(
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue
-        mime_type = "image/png" if isinstance(image, GeneratedImage) else "image/jpeg"
-        image_contents.append(
-            ResponseOutputContent(
-                type="output_image",
-                image_base64=image_base64,
-                mime_type=mime_type,
-                width=width,
-                height=height,
-            )
-        )
         image_call_items.append(
             ResponseImageGenerationCall(
                 id=f"img_{uuid.uuid4().hex}",
@@ -977,7 +993,6 @@ async def _build_payload(
     response_contents: list[ResponseOutputContent] = []
     if assistant_text:
         response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
-    response_contents.extend(image_contents)
 
     if not response_contents:
         response_contents.append(ResponseOutputContent(type="output_text", text=""))
diff --git a/app/services/client.py b/app/services/client.py
index 166eb70..0207114 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -113,8 +113,10 @@ async def process_message(
                     if file_data := item.file.get("file_data", None):
                         filename = item.file.get("filename", "")
                         files.append(await save_file_to_tempfile(file_data, filename, tempdir))
+                    elif url := item.file.get("url", None):
+                        files.append(await save_url_to_tempfile(url, tempdir))
                     else:
-                        raise ValueError("File must contain 'file_data' key")
+                        raise ValueError("File must contain 'file_data' or 'url' key")
         elif message.content is not None:
             raise ValueError("Unsupported message content type.")
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 48fc99d..3bff469 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,4 +1,5 @@
 import base64
+import mimetypes
 import tempfile
 from pathlib import Path
 
@@ -40,9 +41,16 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None):
     suffix: str | None = None
     if url.startswith("data:image/"):
         # Base64 encoded image
+        metadata_part = url.split(",")[0]
+        mime_type = metadata_part.split(":")[1].split(";")[0]
+
         base64_data = url.split(",")[1]
         data = base64.b64decode(base64_data)
-        suffix = ".png"
+
+        # Guess extension from mime type, default to the subtype if not found
+        suffix = mimetypes.guess_extension(mime_type)
+        if not suffix:
+            suffix = f".{mime_type.split('/')[1]}"
     else:
         # http files
         async with httpx.AsyncClient() as client:

From d911c33e81e83211ed53d77b300c4c203df7b53c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 2 Dec 2025 15:50:45 +0700
Subject: [PATCH 008/139] Refactor tool handling to support standard and image
 generation tools separately

---
 app/models/models.py |  7 ++++---
 app/server/chat.py   | 36 +++++++++++++++++++++++++++++++++---
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 74d8cd5..52dd414 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -174,7 +174,8 @@ class ResponseInputItem(BaseModel):
 class ResponseToolChoice(BaseModel):
     """Tool choice enforcing a specific tool in Responses API."""
 
-    type: Literal["image_generation"]
+    type: Literal["function", "image_generation"]
+    function: Optional[ToolChoiceFunctionDetail] = None
 
 
 class ResponseImageTool(BaseModel):
@@ -195,8 +196,8 @@ class ResponseCreateRequest(BaseModel):
     top_p: Optional[float] = 1.0
     max_output_tokens: Optional[int] = None
     stream: Optional[bool] = False
-    tool_choice: Optional[ResponseToolChoice] = None
-    tools: Optional[List[ResponseImageTool]] = None
+    tool_choice: Optional[Union[str, ResponseToolChoice]] = None
+    tools: Optional[List[Union[Tool, ResponseImageTool]]] = None
     store: Optional[bool] = None
     user: Optional[str] = None
     response_format: Optional[Dict[str, Any]] = None
diff --git a/app/server/chat.py b/app/server/chat.py
index 76dc632..8277d0c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -795,7 +795,28 @@ async def create_response(
             f"Structured response requested for /v1/responses (schema={structured_requirement.schema_name})."
         )
 
-    image_instruction = _build_image_generation_instruction(request.tools, request.tool_choice)
+    # Separate standard tools from image generation tools
+    standard_tools: list[Tool] = []
+    image_tools: list[ResponseImageTool] = []
+
+    if request.tools:
+        for t in request.tools:
+            if isinstance(t, Tool):
+                standard_tools.append(t)
+            elif isinstance(t, ResponseImageTool):
+                image_tools.append(t)
+            # Handle dicts if Pydantic didn't convert them fully (fallback)
+            elif isinstance(t, dict):
+                t_type = t.get("type")
+                if t_type == "function":
+                    standard_tools.append(Tool.model_validate(t))
+                elif t_type == "image_generation":
+                    image_tools.append(ResponseImageTool.model_validate(t))
+
+    image_instruction = _build_image_generation_instruction(
+        image_tools,
+        request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None,
+    )
     if image_instruction:
         extra_instructions.append(image_instruction)
         logger.debug("Image generation support enabled for /v1/responses request.")
@@ -808,10 +829,19 @@ async def create_response(
             f"Injected {len(preface_messages)} instruction messages before sending to Gemini."
         )
 
+    # Pass standard tools to the prompt builder
+    # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction)
+    model_tool_choice = None
+    if isinstance(request.tool_choice, str):
+        model_tool_choice = request.tool_choice
+    elif isinstance(request.tool_choice, ToolChoiceFunction):
+        model_tool_choice = request.tool_choice
+    # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice.
+
     messages = _prepare_messages_for_model(
         conversation_messages,
-        tools=None,
-        tool_choice=None,
+        tools=standard_tools or None,
+        tool_choice=model_tool_choice,
         extra_instructions=extra_instructions or None,
     )
 

From a8241ad78831b675d0321bbe5271c1bf10a6ce2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 2 Dec 2025 17:17:27 +0700
Subject: [PATCH 009/139] Fix: use "ascii" decoding for base64-encoded image
 data consistency

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 8277d0c..67790ab 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1524,4 +1524,4 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
 
     data = Path(saved_path).read_bytes()
     width, height = _extract_image_dimensions(data)
-    return base64.b64encode(data).decode("utf-8"), width, height
+    return base64.b64encode(data).decode("ascii"), width, height

From fd2723d49b5929cb770a231aeb479f392f7a7d53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 12:08:19 +0700
Subject: [PATCH 010/139] Fix: replace `running` with `_running` for internal
 client status checks

---
 app/services/pool.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/services/pool.py b/app/services/pool.py
index 24a21dc..28a3435 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -37,7 +37,7 @@ async def init(self) -> None:
         """Initialize all clients in the pool."""
         success_count = 0
         for client in self._clients:
-            if not client.running:
+            if not client._running:
                 try:
                     await client.init(
                         timeout=g_config.gemini.timeout,
@@ -48,7 +48,7 @@ async def init(self) -> None:
                 except Exception:
                     logger.exception(f"Failed to initialize client {client.id}")
 
-            if client.running:
+            if client._running:
                 success_count += 1
 
         if success_count == 0:
@@ -79,7 +79,7 @@ async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
 
     async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
         """Make sure the client is running, attempting a restart if needed."""
-        if client.running:
+        if client._running:
             return True
 
         lock = self._restart_locks.get(client.id)
@@ -87,7 +87,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
             return False  # Should not happen
 
         async with lock:
-            if client.running:
+            if client._running:
                 return True
 
             try:
@@ -110,4 +110,4 @@ def clients(self) -> List[GeminiClientWrapper]:
 
     def status(self) -> Dict[str, bool]:
         """Return running status for each client."""
-        return {client.id: client.running for client in self._clients}
+        return {client.id: client._running for client in self._clients}

From 8ee6cc0335e4b63df2126a6bf69d6c9e42505485 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 14:10:06 +0700
Subject: [PATCH 011/139] Refactor: replace direct `_running` access with
 `running()` method in client status checks

---
 app/services/client.py |  3 +++
 app/services/pool.py   | 10 +++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 0207114..09c52c1 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -76,6 +76,9 @@ async def init(
             logger.exception(f"Failed to initialize GeminiClient {self.id}")
             raise
 
+    def running(self) -> bool:
+        return self._running
+
     @staticmethod
     async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True
diff --git a/app/services/pool.py b/app/services/pool.py
index 28a3435..a134dda 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -37,7 +37,7 @@ async def init(self) -> None:
         """Initialize all clients in the pool."""
         success_count = 0
         for client in self._clients:
-            if not client._running:
+            if not client.running():
                 try:
                     await client.init(
                         timeout=g_config.gemini.timeout,
@@ -48,7 +48,7 @@ async def init(self) -> None:
                 except Exception:
                     logger.exception(f"Failed to initialize client {client.id}")
 
-            if client._running:
+            if client.running():
                 success_count += 1
 
         if success_count == 0:
@@ -79,7 +79,7 @@ async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
 
     async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
         """Make sure the client is running, attempting a restart if needed."""
-        if client._running:
+        if client.running():
             return True
 
         lock = self._restart_locks.get(client.id)
@@ -87,7 +87,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
             return False  # Should not happen
 
         async with lock:
-            if client._running:
+            if client.running():
                 return True
 
             try:
@@ -110,4 +110,4 @@ def clients(self) -> List[GeminiClientWrapper]:
 
     def status(self) -> Dict[str, bool]:
         """Return running status for each client."""
-        return {client.id: client._running for client in self._clients}
+        return {client.id: client.running() for client in self._clients}

From 453700eba682cfdd4bfc2e061a8139129654d017 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 22:11:11 +0700
Subject: [PATCH 012/139] Extend models with new fields for annotations,
 reasoning, audio, log probabilities, and token details; adjust response
 handling accordingly.

---
 app/models/models.py | 13 ++++++++++++-
 app/server/chat.py   |  7 ++++---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 52dd414..1d7368c 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -12,7 +12,9 @@ class ContentItem(BaseModel):
     type: Literal["text", "image_url", "file", "input_audio"]
     text: Optional[str] = None
     image_url: Optional[Dict[str, str]] = None
+    input_audio: Optional[Dict[str, Any]] = None
     file: Optional[Dict[str, str]] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
 class Message(BaseModel):
@@ -22,6 +24,10 @@ class Message(BaseModel):
     content: Union[str, List[ContentItem], None] = None
     name: Optional[str] = None
     tool_calls: Optional[List["ToolCall"]] = None
+    refusal: Optional[str] = None
+    reasoning_content: Optional[str] = None
+    audio: Optional[Dict[str, Any]] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
 class Choice(BaseModel):
@@ -30,6 +36,7 @@ class Choice(BaseModel):
     index: int
     message: Message
     finish_reason: str
+    logprobs: Optional[Dict[str, Any]] = None
 
 
 class FunctionCall(BaseModel):
@@ -81,6 +88,8 @@ class Usage(BaseModel):
     prompt_tokens: int
     completion_tokens: int
     total_tokens: int
+    prompt_tokens_details: Optional[Dict[str, int]] = None
+    completion_tokens_details: Optional[Dict[str, int]] = None
 
 
 class ModelData(BaseModel):
@@ -118,6 +127,8 @@ class ChatCompletionResponse(BaseModel):
     model: str
     choices: List[Choice]
     usage: Usage
+    system_fingerprint: Optional[str] = None
+    service_tier: Optional[str] = None
 
 
 class ModelListResponse(BaseModel):
@@ -217,6 +228,7 @@ class ResponseOutputContent(BaseModel):
 
     type: Literal["output_text"]
     text: Optional[str] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
 class ResponseOutputMessage(BaseModel):
@@ -257,7 +269,6 @@ class ResponseCreateResponse(BaseModel):
     created: int
     model: str
     output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]]
-    output_text: Optional[str] = None
     status: Literal[
         "in_progress",
         "completed",
diff --git a/app/server/chat.py b/app/server/chat.py
index 67790ab..5848a39 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1022,10 +1022,12 @@ async def _build_payload(
 
     response_contents: list[ResponseOutputContent] = []
     if assistant_text:
-        response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
+        response_contents.append(
+            ResponseOutputContent(type="output_text", text=assistant_text, annotations=[])
+        )
 
     if not response_contents:
-        response_contents.append(ResponseOutputContent(type="output_text", text=""))
+        response_contents.append(ResponseOutputContent(type="output_text", text="", annotations=[]))
 
     created_time = int(datetime.now(tz=timezone.utc).timestamp())
     response_id = f"resp_{uuid.uuid4().hex}"
@@ -1059,7 +1061,6 @@ async def _build_payload(
             *tool_call_items,
             *image_call_items,
         ],
-        output_text=assistant_text or None,
         status="completed",
         usage=usage,
         input=normalized_input or None,

From 9260f8b5cc37192716d4127ed6ab98a087e7e3ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 22:51:54 +0700
Subject: [PATCH 013/139] Extend models with new fields (annotations, error),
 add `normalize_output_text` validator, rename `created` to `created_at`, and
 update response handling accordingly.

---
 app/models/models.py | 16 +++++++++++++---
 app/server/chat.py   |  8 ++++----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 1d7368c..8d5102c 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -3,7 +3,7 @@
 from datetime import datetime
 from typing import Any, Dict, List, Literal, Optional, Union
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 
 
 class ContentItem(BaseModel):
@@ -127,7 +127,6 @@ class ChatCompletionResponse(BaseModel):
     model: str
     choices: List[Choice]
     usage: Usage
-    system_fingerprint: Optional[str] = None
     service_tier: Optional[str] = None
 
 
@@ -172,6 +171,15 @@ class ResponseInputContent(BaseModel):
     file_url: Optional[str] = None
     file_data: Optional[str] = None
     filename: Optional[str] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+
+    @model_validator(mode="before")
+    @classmethod
+    def normalize_output_text(cls, data: Any) -> Any:
+        """Allow output_text (from previous turns) to be treated as input_text."""
+        if isinstance(data, dict) and data.get("type") == "output_text":
+            data["type"] = "input_text"
+        return data
 
 
 class ResponseInputItem(BaseModel):
@@ -266,7 +274,7 @@ class ResponseCreateResponse(BaseModel):
 
     id: str
     object: Literal["response"] = "response"
-    created: int
+    created_at: int
     model: str
     output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]]
     status: Literal[
@@ -274,9 +282,11 @@ class ResponseCreateResponse(BaseModel):
         "completed",
         "failed",
         "incomplete",
+        "cancelled",
         "requires_action",
     ] = "completed"
     usage: ResponseUsage
+    error: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
     system_fingerprint: Optional[str] = None
     input: Optional[Union[str, List[ResponseInputItem]]] = None
diff --git a/app/server/chat.py b/app/server/chat.py
index 5848a39..ef508b9 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1049,7 +1049,7 @@ async def _build_payload(
 
     response_payload = ResponseCreateResponse(
         id=response_id,
-        created=created_time,
+        created_at=created_time,
         model=request.model,
         output=[
             ResponseOutputMessage(
@@ -1334,7 +1334,7 @@ def _create_responses_streaming_response(
 
     response_dict = response_payload.model_dump(mode="json")
     response_id = response_payload.id
-    created_time = response_payload.created
+    created_time = response_payload.created_at
     model = response_payload.model
 
     logger.debug(
@@ -1344,14 +1344,14 @@ def _create_responses_streaming_response(
     base_event = {
         "id": response_id,
         "object": "response",
-        "created": created_time,
+        "created_at": created_time,
         "model": model,
     }
 
     created_snapshot: dict[str, Any] = {
         "id": response_id,
         "object": "response",
-        "created": created_time,
+        "created_at": created_time,
         "model": model,
         "status": "in_progress",
     }

From d6a8e6bdb786bb90dd653cd9aa3fc88469c2b505 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 09:35:30 +0700
Subject: [PATCH 014/139] Extend response models to support tool choices, image
 output, and improved streaming of response items. Refactor image generation
 handling for consistency and add compatibility with output content.

---
 app/models/models.py |  7 ++--
 app/server/chat.py   | 83 ++++++++++++++++++++++++++++++++------------
 2 files changed, 65 insertions(+), 25 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 8d5102c..bbc2140 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -127,7 +127,6 @@ class ChatCompletionResponse(BaseModel):
     model: str
     choices: List[Choice]
     usage: Usage
-    service_tier: Optional[str] = None
 
 
 class ModelListResponse(BaseModel):
@@ -234,8 +233,9 @@ class ResponseUsage(BaseModel):
 class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
-    type: Literal["output_text"]
+    type: Literal["output_text", "output_image"]
     text: Optional[str] = None
+    image_url: Optional[str] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
@@ -285,10 +285,11 @@ class ResponseCreateResponse(BaseModel):
         "cancelled",
         "requires_action",
     ] = "completed"
+    tool_choice: Optional[Union[str, ResponseToolChoice]] = None
+    tools: Optional[List[Union[Tool, ResponseImageTool]]] = None
     usage: ResponseUsage
     error: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
-    system_fingerprint: Optional[str] = None
     input: Optional[Union[str, List[ResponseInputItem]]] = None
 
 
diff --git a/app/server/chat.py b/app/server/chat.py
index ef508b9..cb498a5 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -992,6 +992,7 @@ async def _build_payload(
             detail = f"{detail} Assistant response: {summary}"
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail)
 
+    response_contents: list[ResponseOutputContent] = []
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
@@ -999,15 +1000,25 @@ async def _build_payload(
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue
+
+        img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
         image_call_items.append(
             ResponseImageGenerationCall(
                 id=f"img_{uuid.uuid4().hex}",
                 status="completed",
                 result=image_base64,
-                output_format="png" if isinstance(image, GeneratedImage) else "jpeg",
+                output_format=img_format,
                 size=f"{width}x{height}" if width and height else None,
             )
         )
+        # Add as output_image content for compatibility
+        response_contents.append(
+            ResponseOutputContent(
+                type="output_image",
+                image_url=f"data:image/{img_format};base64,{image_base64}",
+                annotations=[],
+            )
+        )
 
     tool_call_items: list[ResponseToolCall] = []
     if detected_tool_calls:
@@ -1020,7 +1031,6 @@ async def _build_payload(
             for call in detected_tool_calls
         ]
 
-    response_contents: list[ResponseOutputContent] = []
     if assistant_text:
         response_contents.append(
             ResponseOutputContent(type="output_text", text=assistant_text, annotations=[])
@@ -1065,6 +1075,8 @@ async def _build_payload(
         usage=usage,
         input=normalized_input or None,
         metadata=request.metadata or None,
+        tools=request.tools,
+        tool_choice=request.tool_choice,
     )
 
     try:
@@ -1359,6 +1371,10 @@ def _create_responses_streaming_response(
         created_snapshot["metadata"] = response_dict["metadata"]
     if response_dict.get("input") is not None:
         created_snapshot["input"] = response_dict["input"]
+    if response_dict.get("tools") is not None:
+        created_snapshot["tools"] = response_dict["tools"]
+    if response_dict.get("tool_choice") is not None:
+        created_snapshot["tool_choice"] = response_dict["tool_choice"]
 
     async def generate_stream():
         # Emit creation event
@@ -1369,30 +1385,53 @@ async def generate_stream():
         }
         yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-        # Stream textual content, if any
-        if assistant_text:
-            for chunk in _iter_stream_segments(assistant_text):
-                delta_event = {
-                    **base_event,
-                    "type": "response.output_text.delta",
-                    "output_index": 0,
-                    "delta": chunk,
-                }
-                yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n"
-
-            done_event = {
+        # Stream output items (Message/Text, Tool Calls, Images)
+        for i, item in enumerate(response_payload.output):
+            item_json = item.model_dump(mode="json", exclude_none=True)
+
+            added_event = {
                 **base_event,
-                "type": "response.output_text.done",
-                "output_index": 0,
+                "type": "response.output_item.added",
+                "output_index": i,
+                "item": item_json,
             }
-            yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
-        else:
-            done_event = {
+            yield f"data: {orjson.dumps(added_event).decode('utf-8')}\n\n"
+
+            # 2. Stream content if it's a message (text)
+            if item.type == "message":
+                content_text = ""
+                # Aggregate text content to stream
+                for c in item.content:
+                    if c.type == "output_text" and c.text:
+                        content_text += c.text
+
+                if content_text:
+                    for chunk in _iter_stream_segments(content_text):
+                        delta_event = {
+                            **base_event,
+                            "type": "response.output_text.delta",
+                            "output_index": i,
+                            "delta": chunk,
+                        }
+                        yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n"
+
+                    # Text done
+                    done_event = {
+                        **base_event,
+                        "type": "response.output_text.done",
+                        "output_index": i,
+                    }
+                    yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
+
+            # 3. Emit output_item.done for all types
+            # This confirms the item is fully transferred.
+            item_done_event = {
                 **base_event,
-                "type": "response.output_text.done",
-                "output_index": 0,
+                "type": "response.output_item.done",
+                "output_index": i,
+                "item": item_json,
             }
-            yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps(item_done_event).decode('utf-8')}\n\n"
 
         # Emit completed event with full payload
         completed_event = {

From 16435a2ce12a4d37e9f3cfa758f384000aa41123 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 09:50:47 +0700
Subject: [PATCH 015/139] Set default `text` value to an empty string for
 `ResponseOutputContent` and ensure consistent initialization in image output
 handling.

---
 app/models/models.py | 2 +-
 app/server/chat.py   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/models/models.py b/app/models/models.py
index bbc2140..2c987b8 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -234,7 +234,7 @@ class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
     type: Literal["output_text", "output_image"]
-    text: Optional[str] = None
+    text: Optional[str] = ""
     image_url: Optional[str] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
diff --git a/app/server/chat.py b/app/server/chat.py
index cb498a5..7745a26 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1015,6 +1015,7 @@ async def _build_payload(
         response_contents.append(
             ResponseOutputContent(
                 type="output_image",
+                text="",
                 image_url=f"data:image/{img_format};base64,{image_base64}",
                 annotations=[],
             )

From fc99c2d60193f346006f5cf17af4e849d8ea2669 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:03:50 +0700
Subject: [PATCH 016/139] feat: Add /images endpoint with dedicated router and
 improved image management

Add dedicated router for /images endpoint and refactor image handling logic for better modularity. Enhance temporary image management with secure naming, token verification, and cleanup functionality.
---
 app/main.py              |  9 +++++-
 app/server/chat.py       | 35 ++++++++++++++++--------
 app/server/images.py     | 15 ++++++++++
 app/server/middleware.py | 59 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 105 insertions(+), 13 deletions(-)
 create mode 100644 app/server/images.py

diff --git a/app/main.py b/app/main.py
index 95458d3..c215e2a 100644
--- a/app/main.py
+++ b/app/main.py
@@ -6,7 +6,12 @@
 
 from .server.chat import router as chat_router
 from .server.health import router as health_router
-from .server.middleware import add_cors_middleware, add_exception_handler
+from .server.images import router as images_router
+from .server.middleware import (
+    add_cors_middleware,
+    add_exception_handler,
+    cleanup_expired_images,
+)
 from .services import GeminiClientPool, LMDBConversationStore
 
 RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60  # 6 hours
@@ -28,6 +33,7 @@ async def _run_retention_cleanup(stop_event: asyncio.Event) -> None:
     while not stop_event.is_set():
         try:
             store.cleanup_expired()
+            cleanup_expired_images(store.retention_days)
         except Exception:
             logger.exception("LMDB retention cleanup task failed.")
 
@@ -93,5 +99,6 @@ def create_app() -> FastAPI:
 
     app.include_router(health_router, tags=["Health"])
     app.include_router(chat_router, tags=["Chat"])
+    app.include_router(images_router, tags=["Images"])
 
     return app
diff --git a/app/server/chat.py b/app/server/chat.py
index 7745a26..db92dbc 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -44,7 +44,7 @@
 from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT
 from ..utils import g_config
 from ..utils.helper import estimate_tokens
-from .middleware import get_temp_dir, verify_api_key
+from .middleware import get_image_store_dir, get_temp_dir, verify_api_key
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
@@ -588,6 +588,7 @@ async def create_chat_completion(
     request: ChatCompletionRequest,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
+    image_store: Path = Depends(get_image_store_dir),
 ):
     pool = GeminiClientPool()
     db = LMDBConversationStore()
@@ -775,6 +776,7 @@ async def create_response(
     request: ResponseCreateRequest,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
+    image_store: Path = Depends(get_image_store_dir),
 ):
     base_messages, normalized_input = _response_items_to_messages(request.input)
     if not base_messages:
@@ -996,12 +998,16 @@ async def _build_payload(
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
-            image_base64, width, height = await _image_to_base64(image, tmp_dir)
+            image_base64, width, height, filename = await _image_to_base64(image, tmp_dir)
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue
 
         img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
+
+        # Use static URL for compatibility
+        image_url = f"{request.base_url}images/{filename}"
+
         image_call_items.append(
             ResponseImageGenerationCall(
                 id=f"img_{uuid.uuid4().hex}",
@@ -1013,12 +1019,10 @@ async def _build_payload(
         )
         # Add as output_image content for compatibility
         response_contents.append(
-            ResponseOutputContent(
-                type="output_image",
-                text="",
-                image_url=f"data:image/{img_format};base64,{image_base64}",
-                annotations=[],
-            )
+            ResponseOutputContent(type="output_text", text=image_url, annotations=[])
+        )
+        response_contents.append(
+            ResponseOutputContent(type="output_image", text="", image_url=image_url, annotations=[])
         )
 
     tool_call_items: list[ResponseToolCall] = []
@@ -1553,8 +1557,8 @@ def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
     return None, None
 
 
-async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None]:
-    """Persist an image provided by gemini_webapi and return base64 plus dimensions."""
+async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
+    """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
     if isinstance(image, GeneratedImage):
         saved_path = await image.save(path=str(temp_dir), full_size=True)
     else:
@@ -1563,6 +1567,13 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
     if not saved_path:
         raise ValueError("Failed to save generated image")
 
-    data = Path(saved_path).read_bytes()
+    # Rename file to a random UUID to ensure uniqueness and unpredictability
+    original_path = Path(saved_path)
+    random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}"
+    new_path = temp_dir / random_name
+    original_path.rename(new_path)
+
+    data = new_path.read_bytes()
     width, height = _extract_image_dimensions(data)
-    return base64.b64encode(data).decode("ascii"), width, height
+    filename = random_name
+    return base64.b64encode(data).decode("ascii"), width, height, filename
diff --git a/app/server/images.py b/app/server/images.py
new file mode 100644
index 0000000..2867239
--- /dev/null
+++ b/app/server/images.py
@@ -0,0 +1,15 @@
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse
+
+from ..server.middleware import get_image_store_dir
+
+router = APIRouter()
+
+
+@router.get("/images/{filename}", tags=["Images"])
+async def get_image(filename: str):
+    image_store = get_image_store_dir()
+    file_path = image_store / filename
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="Image not found")
+    return FileResponse(file_path)
diff --git a/app/server/middleware.py b/app/server/middleware.py
index b12024f..60e4c8d 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -1,13 +1,72 @@
+import hashlib
+import hmac
 import tempfile
+import time
 from pathlib import Path
 
 from fastapi import Depends, FastAPI, HTTPException, Request, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import ORJSONResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from loguru import logger
 
 from ..utils import g_config
 
+# Persistent directory for storing generated images
+IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "gemini_fastapi_images"
+IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def get_image_store_dir() -> Path:
+    """Returns a persistent directory for storing images."""
+    return IMAGE_STORE_DIR
+
+
+def get_image_token(filename: str) -> str:
+    """Generate a HMAC-SHA256 token for a filename using the API key."""
+    secret = g_config.server.api_key
+    if not secret:
+        return ""
+
+    msg = filename.encode("utf-8")
+    secret_bytes = secret.encode("utf-8")
+    return hmac.new(secret_bytes, msg, hashlib.sha256).hexdigest()
+
+
+def verify_image_token(filename: str, token: str | None) -> bool:
+    """Verify the provided token against the filename."""
+    expected = get_image_token(filename)
+    if not expected:
+        return True  # No auth required
+    if not token:
+        return False
+    return hmac.compare_digest(token, expected)
+
+
+def cleanup_expired_images(retention_days: int) -> int:
+    """Delete images in IMAGE_STORE_DIR older than retention_days."""
+    if retention_days <= 0:
+        return 0
+
+    now = time.time()
+    retention_seconds = retention_days * 24 * 60 * 60
+    cutoff = now - retention_seconds
+
+    count = 0
+    for file_path in IMAGE_STORE_DIR.iterdir():
+        if not file_path.is_file():
+            continue
+        try:
+            if file_path.stat().st_mtime < cutoff:
+                file_path.unlink()
+                count += 1
+        except Exception as e:
+            logger.warning(f"Failed to delete expired image {file_path}: {e}")
+
+    if count > 0:
+        logger.info(f"Cleaned up {count} expired images.")
+    return count
+
 
 def global_exception_handler(request: Request, exc: Exception):
     if isinstance(exc, HTTPException):

From 28441765f3fa47787027620cdc4a6d9e7ddbdd94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:10:29 +0700
Subject: [PATCH 017/139] feat: Add token-based verification for image access

---
 app/server/chat.py   | 4 ++--
 app/server/images.py | 9 ++++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index db92dbc..9371137 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -44,7 +44,7 @@
 from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT
 from ..utils import g_config
 from ..utils.helper import estimate_tokens
-from .middleware import get_image_store_dir, get_temp_dir, verify_api_key
+from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
@@ -1006,7 +1006,7 @@ async def _build_payload(
         img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
 
         # Use static URL for compatibility
-        image_url = f"{request.base_url}images/{filename}"
+        image_url = f"{request.base_url}images/{filename}?token={get_image_token(filename)}"
 
         image_call_items.append(
             ResponseImageGenerationCall(
diff --git a/app/server/images.py b/app/server/images.py
index 2867239..fe078f7 100644
--- a/app/server/images.py
+++ b/app/server/images.py
@@ -1,13 +1,16 @@
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, HTTPException, Query
 from fastapi.responses import FileResponse
 
-from ..server.middleware import get_image_store_dir
+from ..server.middleware import get_image_store_dir, verify_image_token
 
 router = APIRouter()
 
 
 @router.get("/images/{filename}", tags=["Images"])
-async def get_image(filename: str):
+async def get_image(filename: str, token: str | None = Query(default=None)):
+    if not verify_image_token(filename, token):
+        raise HTTPException(status_code=403, detail="Invalid token")
+
     image_store = get_image_store_dir()
     file_path = image_store / filename
     if not file_path.exists():

From 4509c14dfd5a38dfa6b989b3e9ac308e3bc8c982 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:22:07 +0700
Subject: [PATCH 018/139] Refactor: rename image store directory to
 `ai_generated_images` for clarity

---
 app/server/middleware.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/middleware.py b/app/server/middleware.py
index 60e4c8d..630e1f5 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -13,7 +13,7 @@
 from ..utils import g_config
 
 # Persistent directory for storing generated images
-IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "gemini_fastapi_images"
+IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "ai_generated_images"
 IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
 
 

From 75e2f61d3a6b1d12269af2ee82344ab643f34e83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:31:15 +0700
Subject: [PATCH 019/139] fix: Update create_response to use FastAPI Request
 object for base_url and refactor variable handling

---
 app/server/chat.py | 44 +++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 9371137..0010f4a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -9,7 +9,7 @@
 from typing import Any, Iterator
 
 import orjson
-from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import StreamingResponse
 from gemini_webapi.client import ChatSession
 from gemini_webapi.constants import Model
@@ -773,19 +773,15 @@ async def create_chat_completion(
 
 @router.post("/v1/responses")
 async def create_response(
-    request: ResponseCreateRequest,
+    request_data: ResponseCreateRequest,
+    request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
     image_store: Path = Depends(get_image_store_dir),
 ):
-    base_messages, normalized_input = _response_items_to_messages(request.input)
-    if not base_messages:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST, detail="No message input provided."
-        )
-
-    structured_requirement = _build_structured_requirement(request.response_format)
-    if structured_requirement and request.stream:
+    base_messages, normalized_input = _response_items_to_messages(request_data.input)
+    structured_requirement = _build_structured_requirement(request_data.response_format)
+    if structured_requirement and request_data.stream:
         logger.debug(
             "Structured response requested with streaming enabled; streaming not supported for Responses."
         )
@@ -801,7 +797,7 @@ async def create_response(
     standard_tools: list[Tool] = []
     image_tools: list[ResponseImageTool] = []
 
-    if request.tools:
+    if request_data.tools:
         for t in request.tools:
             if isinstance(t, Tool):
                 standard_tools.append(t)
@@ -817,13 +813,15 @@ async def create_response(
 
     image_instruction = _build_image_generation_instruction(
         image_tools,
-        request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None,
+        request_data.tool_choice
+        if isinstance(request_data.tool_choice, ResponseToolChoice)
+        else None,
     )
     if image_instruction:
         extra_instructions.append(image_instruction)
         logger.debug("Image generation support enabled for /v1/responses request.")
 
-    preface_messages = _instructions_to_messages(request.instructions)
+    preface_messages = _instructions_to_messages(request_data.instructions)
     conversation_messages = base_messages
     if preface_messages:
         conversation_messages = [*preface_messages, *base_messages]
@@ -834,10 +832,10 @@ async def create_response(
     # Pass standard tools to the prompt builder
     # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction)
     model_tool_choice = None
-    if isinstance(request.tool_choice, str):
-        model_tool_choice = request.tool_choice
-    elif isinstance(request.tool_choice, ToolChoiceFunction):
-        model_tool_choice = request.tool_choice
+    if isinstance(request_data.tool_choice, str):
+        model_tool_choice = request_data.tool_choice
+    elif isinstance(request_data.tool_choice, ToolChoiceFunction):
+        model_tool_choice = request_data.tool_choice
     # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice.
 
     messages = _prepare_messages_for_model(
@@ -851,7 +849,7 @@ async def create_response(
     db = LMDBConversationStore()
 
     try:
-        model = Model.from_name(request.model)
+        model = Model.from_name(request_data.model)
     except ValueError as exc:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
@@ -971,7 +969,7 @@ async def _build_payload(
         )
 
     expects_image = (
-        request.tool_choice is not None and request.tool_choice.type == "image_generation"
+        request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation"
     )
     images = model_output.images or []
     logger.debug(
@@ -1065,7 +1063,7 @@ async def _build_payload(
     response_payload = ResponseCreateResponse(
         id=response_id,
         created_at=created_time,
-        model=request.model,
+        model=request_data.model,
         output=[
             ResponseOutputMessage(
                 id=message_id,
@@ -1079,9 +1077,9 @@ async def _build_payload(
         status="completed",
         usage=usage,
         input=normalized_input or None,
-        metadata=request.metadata or None,
-        tools=request.tools,
-        tool_choice=request.tool_choice,
+        metadata=request_data.metadata or None,
+        tools=request_data.tools,
+        tool_choice=request_data.tool_choice,
     )
 
     try:

From bde6d0d146fc9088df947cfc0958dc88963e93ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:35:44 +0700
Subject: [PATCH 020/139] fix: Correct attribute access in request_data
 handling within `chat.py` for tools, tool_choice, and streaming settings

---
 app/server/chat.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0010f4a..9a3f19f 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -798,7 +798,7 @@ async def create_response(
     image_tools: list[ResponseImageTool] = []
 
     if request_data.tools:
-        for t in request.tools:
+        for t in request_data.tools:
             if isinstance(t, Tool):
                 standard_tools.append(t)
             elif isinstance(t, ResponseImageTool):
@@ -984,7 +984,7 @@ async def _build_payload(
                 summary = f"{summary[:197]}..."
         logger.warning(
             "Image generation requested but Gemini produced no images. "
-            f"client_id={client_id}, forced_tool_choice={request.tool_choice is not None}, "
+            f"client_id={client_id}, forced_tool_choice={request_data.tool_choice is not None}, "
             f"instruction_applied={bool(image_instruction)}, assistant_preview='{summary}'"
         )
         detail = "LLM returned no images for the requested image_generation tool."
@@ -1100,7 +1100,7 @@ async def _build_payload(
     except Exception as exc:
         logger.warning(f"Failed to save Responses conversation to LMDB: {exc}")
 
-    if request.stream:
+    if request_data.stream:
         logger.debug(
             f"Streaming Responses API payload (response_id={response_payload.id}, text_chunks={bool(assistant_text)})."
         )

From 601451a8dbf8cf689a482fd75cda399b5e815cd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:45:49 +0700
Subject: [PATCH 021/139] fix: Save generated images to persistent storage

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 9a3f19f..4246c53 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -996,7 +996,7 @@ async def _build_payload(
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
-            image_base64, width, height, filename = await _image_to_base64(image, tmp_dir)
+            image_base64, width, height, filename = await _image_to_base64(image, image_store)
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue

From 893eb6d47305f60c4b13896bfc48beb89909dd88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:51:42 +0700
Subject: [PATCH 022/139] fix: Remove unused `output_image` type from
 `ResponseOutputContent` and update response handling for consistency

---
 app/models/models.py | 3 +--
 app/server/chat.py   | 5 +----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 2c987b8..c27e024 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -233,9 +233,8 @@ class ResponseUsage(BaseModel):
 class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
-    type: Literal["output_text", "output_image"]
+    type: Literal["output_text"]
     text: Optional[str] = ""
-    image_url: Optional[str] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
diff --git a/app/server/chat.py b/app/server/chat.py
index 4246c53..3396df0 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1015,13 +1015,10 @@ async def _build_payload(
                 size=f"{width}x{height}" if width and height else None,
             )
         )
-        # Add as output_image content for compatibility
+        # Add as output_text content for compatibility
         response_contents.append(
             ResponseOutputContent(type="output_text", text=image_url, annotations=[])
         )
-        response_contents.append(
-            ResponseOutputContent(type="output_image", text="", image_url=image_url, annotations=[])
-        )
 
     tool_call_items: list[ResponseToolCall] = []
     if detected_tool_calls:

From 80462b586a110cad7e5b5cc259424e405ecbafc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 13:24:14 +0700
Subject: [PATCH 023/139] fix: Update image URL generation in chat response to
 use Markdown format for compatibility

---
 app/server/chat.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 3396df0..c2a60ab 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1004,7 +1004,9 @@ async def _build_payload(
         img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
 
         # Use static URL for compatibility
-        image_url = f"{request.base_url}images/{filename}?token={get_image_token(filename)}"
+        image_url = (
+            f"![{filename}]({request.base_url}images/{filename}?token={get_image_token(filename)})"
+        )
 
         image_call_items.append(
             ResponseImageGenerationCall(

From 8d49a72e0b5c605e2439d6dcbf149925cb670ded Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 8 Dec 2025 09:45:58 +0700
Subject: [PATCH 024/139] fix: Enhance error handling for full-size image
 saving and add fallback to default size

---
 app/server/chat.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index c2a60ab..d14e9ce 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1557,7 +1557,11 @@ def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
 async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
     """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
     if isinstance(image, GeneratedImage):
-        saved_path = await image.save(path=str(temp_dir), full_size=True)
+        try:
+            saved_path = await image.save(path=str(temp_dir), full_size=True)
+        except Exception as e:
+            logger.warning(f"Failed to download full-size image, retrying with default size: {e}")
+            saved_path = await image.save(path=str(temp_dir), full_size=False)
     else:
         saved_path = await image.save(path=str(temp_dir))
 

From d37eae0ab8c4590b3301dc8853ef22a512ab0d98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 9 Dec 2025 20:46:03 +0700
Subject: [PATCH 025/139] fix: Use filename as image ID to ensure consistency
 in generated image handling

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index d14e9ce..fc69293 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1010,7 +1010,7 @@ async def _build_payload(
 
         image_call_items.append(
             ResponseImageGenerationCall(
-                id=f"img_{uuid.uuid4().hex}",
+                id=filename.split(".")[0],
                 status="completed",
                 result=image_base64,
                 output_format=img_format,

From b9f776dfbb9d251ee016e05a1f6001907c3f8b84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 16 Dec 2025 19:50:07 +0700
Subject: [PATCH 026/139] fix: Enhance tempfile saving by adding custom
 headers, content-type handling, and improved extension determination

---
 app/utils/helper.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 3bff469..89fc31e 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -2,12 +2,17 @@
 import mimetypes
 import tempfile
 from pathlib import Path
+from urllib.parse import urlparse
 
 import httpx
 from loguru import logger
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
+}
+
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
     """Surround content with role tags"""
@@ -36,7 +41,7 @@ async def save_file_to_tempfile(
     return path
 
 
-async def save_url_to_tempfile(url: str, tempdir: Path | None = None):
+async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
     data: bytes | None = None
     suffix: str | None = None
     if url.startswith("data:image/"):
@@ -47,17 +52,26 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None):
         base64_data = url.split(",")[1]
         data = base64.b64decode(base64_data)
 
-        # Guess extension from mime type, default to the subtype if not found
         suffix = mimetypes.guess_extension(mime_type)
         if not suffix:
             suffix = f".{mime_type.split('/')[1]}"
     else:
-        # http files
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content
-            suffix = Path(url).suffix or ".bin"
+            content_type = resp.headers.get("content-type")
+
+            if content_type:
+                mime_type = content_type.split(";")[0].strip()
+                suffix = mimetypes.guess_extension(mime_type)
+
+            if not suffix:
+                path_url = urlparse(url).path
+                suffix = Path(path_url).suffix
+
+            if not suffix:
+                suffix = ".bin"
 
     with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp:
         tmp.write(data)

From 4b5fe078250ce0496ca93b1861f9622fc5171746 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 30 Dec 2025 22:39:05 +0700
Subject: [PATCH 027/139] feat: Add support for custom Gemini models and model
 loading strategies

- Introduced `model_strategy` configuration for "append" (default + custom models) or "overwrite" (custom models only).
- Enhanced `/v1/models` endpoint to return models based on the configured strategy.
- Improved model loading with environment variable overrides and validation.
- Refactored model handling logic for improved modularity and error handling.
---
 app/server/chat.py  | 70 ++++++++++++++++++++++++++++++++++--------
 app/utils/config.py | 75 ++++++++++++++++++++++++++++++++++++++++++++-
 config/config.yaml  |  5 +++
 3 files changed, 136 insertions(+), 14 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index fc69293..0a4c16c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -562,24 +562,64 @@ def _replace(match: re.Match[str]) -> str:
     return cleaned, tool_calls
 
 
-@router.get("/v1/models", response_model=ModelListResponse)
-async def list_models(api_key: str = Depends(verify_api_key)):
-    now = int(datetime.now(tz=timezone.utc).timestamp())
+def _get_model_by_name(name: str) -> Model:
+    """
+    Retrieve a Model instance by name, considering custom models from config
+    and the update strategy (append or overwrite).
+    """
+    strategy = g_config.gemini.model_strategy
+    custom_models = {m.model_name: m for m in g_config.gemini.models if m.model_name}
 
-    models = []
-    for model in Model:
-        m_name = model.model_name
-        if not m_name or m_name == "unspecified":
-            continue
+    if name in custom_models:
+        return Model.from_dict(custom_models[name].model_dump())
+
+    if strategy == "overwrite":
+        raise ValueError(f"Model '{name}' not found in custom models (strategy='overwrite').")
 
-        models.append(
+    return Model.from_name(name)
+
+
+def _get_available_models() -> list[ModelData]:
+    """
+    Return a list of available models based on configuration strategy.
+    """
+    now = int(datetime.now(tz=timezone.utc).timestamp())
+    strategy = g_config.gemini.model_strategy
+    models_data = []
+
+    custom_models = [m for m in g_config.gemini.models if m.model_name]
+    for m in custom_models:
+        models_data.append(
             ModelData(
-                id=m_name,
+                id=m.model_name,
                 created=now,
-                owned_by="gemini-web",
+                owned_by="custom",
             )
         )
 
+    if strategy == "append":
+        custom_ids = {m.model_name for m in custom_models}
+        for model in Model:
+            m_name = model.model_name
+            if not m_name or m_name == "unspecified":
+                continue
+            if m_name in custom_ids:
+                continue
+
+            models_data.append(
+                ModelData(
+                    id=m_name,
+                    created=now,
+                    owned_by="gemini-web",
+                )
+            )
+
+    return models_data
+
+
+@router.get("/v1/models", response_model=ModelListResponse)
+async def list_models(api_key: str = Depends(verify_api_key)):
+    models = _get_available_models()
     return ModelListResponse(data=models)
 
 
@@ -592,7 +632,11 @@ async def create_chat_completion(
 ):
     pool = GeminiClientPool()
     db = LMDBConversationStore()
-    model = Model.from_name(request.model)
+
+    try:
+        model = _get_model_by_name(request.model)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
     if len(request.messages) == 0:
         raise HTTPException(
@@ -849,7 +893,7 @@ async def create_response(
     db = LMDBConversationStore()
 
     try:
-        model = Model.from_name(request_data.model)
+        model = _get_model_by_name(request_data.model)
     except ValueError as exc:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
diff --git a/app/utils/config.py b/app/utils/config.py
index 796ca75..a5c924a 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -50,12 +50,26 @@ def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]:
         return stripped or None
 
 
+class GeminiModelConfig(BaseModel):
+    """Configuration for a custom Gemini model."""
+
+    model_name: Optional[str] = Field(default=None, description="Name of the model")
+    model_header: Optional[dict[str, Optional[str]]] = Field(
+        default=None, description="Header for the model"
+    )
+
+
 class GeminiConfig(BaseModel):
     """Gemini API configuration"""
 
     clients: list[GeminiClientSettings] = Field(
         ..., description="List of Gemini client credential pairs"
     )
+    models: list[GeminiModelConfig] = Field(default=[], description="List of custom Gemini models")
+    model_strategy: Literal["append", "overwrite"] = Field(
+        default="append",
+        description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
+    )
     timeout: int = Field(default=120, ge=1, description="Init timeout")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
@@ -68,6 +82,13 @@ class GeminiConfig(BaseModel):
         description="Maximum characters Gemini Web can accept per request",
     )
 
+    @field_validator("models")
+    @classmethod
+    def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]:
+        """Filter out models that don't have a name set (placeholders)."""
+
+        return [model for model in v if model.model_name]
+
 
 class CORSConfig(BaseModel):
     """CORS configuration"""
@@ -211,6 +232,53 @@ def _merge_clients_with_env(
     return result_clients if result_clients else base_clients
 
 
+def extract_gemini_models_env() -> dict[int, dict[str, str]]:
+    """Extract and remove all Gemini models related environment variables, return a mapping from index to field dict."""
+    prefix = "CONFIG_GEMINI__MODELS__"
+    env_overrides: dict[int, dict[str, str]] = {}
+    to_delete = []
+    for k, v in os.environ.items():
+        if k.startswith(prefix):
+            parts = k.split("__")
+            if len(parts) < 4:
+                continue
+            index_str, field = parts[2], parts[3].lower()
+            if not index_str.isdigit():
+                continue
+            idx = int(index_str)
+            env_overrides.setdefault(idx, {})[field] = v
+            to_delete.append(k)
+    # Remove these environment variables to avoid Pydantic parsing errors
+    for k in to_delete:
+        del os.environ[k]
+    return env_overrides
+
+
+def _merge_models_with_env(
+    base_models: list[GeminiModelConfig] | None,
+    env_overrides: dict[int, dict[str, str]],
+):
+    """Override base_models with env_overrides, return the new models list."""
+    if not env_overrides:
+        return base_models or []
+    result_models: list[GeminiModelConfig] = []
+    if base_models:
+        result_models = [model.model_copy() for model in base_models]
+
+    for idx in sorted(env_overrides):
+        overrides = env_overrides[idx]
+        if idx < len(result_models):
+            model_dict = result_models[idx].model_dump()
+            model_dict.update(overrides)
+            result_models[idx] = GeminiModelConfig(**model_dict)
+        elif idx == len(result_models):
+            new_model = GeminiModelConfig(**overrides)
+            result_models.append(new_model)
+        else:
+            raise IndexError(f"Model index {idx} in env is out of range (must be contiguous).")
+    return result_models
+
+
 def initialize_config() -> Config:
     """
     Initialize the configuration.
@@ -221,6 +289,8 @@ def initialize_config() -> Config:
     try:
         # First, extract and remove Gemini clients related environment variables
         env_clients_overrides = extract_gemini_clients_env()
+        # Extract and remove Gemini models related environment variables
+        env_models_overrides = extract_gemini_models_env()
 
         # Then, initialize Config with pydantic_settings
         config = Config()  # type: ignore
@@ -228,7 +298,10 @@ def initialize_config() -> Config:
         # Synthesize clients
         config.gemini.clients = _merge_clients_with_env(
             config.gemini.clients, env_clients_overrides
-        )  # type: ignore
+        )
+
+        # Synthesize models
+        config.gemini.models = _merge_models_with_env(config.gemini.models, env_models_overrides)
 
         return config
     except ValidationError as e:
diff --git a/config/config.yaml b/config/config.yaml
index 89c88b7..84c4602 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -27,6 +27,11 @@ gemini:
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
+  models:
+    - model_name: null
+      model_header:
+        x-goog-ext-xxxxxxxxx-jspb: null
+  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
 
 storage:
   path: "data/lmdb"        # Database storage path

From 5cb29e8ea7333fd3c207f60a75b5269105bae8b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 30 Dec 2025 23:19:49 +0700
Subject: [PATCH 028/139] feat: Improve Gemini model environment variable
 parsing and nested field support

- Enhanced `extract_gemini_models_env` to handle nested fields within environment variables.
- Updated type hints for more flexibility in model overrides.
- Improved `_merge_models_with_env` to better support field-level updates and appending new models.
---
 app/utils/config.py | 31 +++++++++++++++++++++++--------
 config/config.yaml  |  2 +-
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index a5c924a..5782c66 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,6 +1,6 @@
 import os
 import sys
-from typing import Literal, Optional
+from typing import Any, Literal, Optional
 
 from loguru import logger
 from pydantic import BaseModel, Field, ValidationError, field_validator
@@ -232,21 +232,34 @@ def _merge_clients_with_env(
     return result_clients if result_clients else base_clients
 
 
-def extract_gemini_models_env() -> dict[int, dict[str, str]]:
-    """Extract and remove all Gemini models related environment variables, return a mapping from index to field dict."""
+def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
+    """Extract and remove all Gemini models related environment variables, supporting nested fields."""
     prefix = "CONFIG_GEMINI__MODELS__"
-    env_overrides: dict[int, dict[str, str]] = {}
+    env_overrides: dict[int, dict[str, Any]] = {}
     to_delete = []
     for k, v in os.environ.items():
         if k.startswith(prefix):
             parts = k.split("__")
             if len(parts) < 4:
                 continue
-            index_str, field = parts[2], parts[3].lower()
+            index_str = parts[2]
             if not index_str.isdigit():
                 continue
             idx = int(index_str)
-            env_overrides.setdefault(idx, {})[field] = v
+
+            # Navigate to the correct nested dict
+            current = env_overrides.setdefault(idx, {})
+            for i in range(3, len(parts) - 1):
+                field_name = parts[i].lower()
+                current = current.setdefault(field_name, {})
+
+            # Set the value (lowercase root field names, preserve sub-key casing)
+            last_part = parts[-1]
+            if len(parts) == 4:
+                current[last_part.lower()] = v
+            else:
+                current[last_part] = v
+
             to_delete.append(k)
     # Remove these environment variables to avoid Pydantic parsing errors
     for k in to_delete:
@@ -256,9 +269,9 @@ def extract_gemini_models_env() -> dict[int, dict[str, str]]:
 
 def _merge_models_with_env(
     base_models: list[GeminiModelConfig] | None,
-    env_overrides: dict[int, dict[str, str]],
+    env_overrides: dict[int, dict[str, Any]],
 ):
-    """Override base_models with env_overrides, return the new models list."""
+    """Override base_models with env_overrides using standard update (replace whole fields)."""
     if not env_overrides:
         return base_models or []
     result_models: list[GeminiModelConfig] = []
@@ -268,10 +281,12 @@ def _merge_models_with_env(
     for idx in sorted(env_overrides):
         overrides = env_overrides[idx]
         if idx < len(result_models):
+            # Update existing model: overwrite fields found in env
             model_dict = result_models[idx].model_dump()
             model_dict.update(overrides)
             result_models[idx] = GeminiModelConfig(**model_dict)
         elif idx == len(result_models):
+            # Append new model
             new_model = GeminiModelConfig(**overrides)
             result_models.append(new_model)
         else:
diff --git a/config/config.yaml b/config/config.yaml
index 84c4602..2fbc061 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -27,11 +27,11 @@ gemini:
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
+  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
   models:
     - model_name: null
       model_header:
         x-goog-ext-xxxxxxxxx-jspb: null
-  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
 
 storage:
   path: "data/lmdb"        # Database storage path

From f25f16d00118ebeea7936cea34797270d5137b5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 09:52:49 +0700
Subject: [PATCH 029/139] refactor: Consolidate utility functions and clean up
 unused code

- Moved utility functions like `strip_code_fence`, `extract_tool_calls`, and `iter_stream_segments` to a centralized helper module.
- Removed unused and redundant private methods from `chat.py`, including `_strip_code_fence`, `_strip_tagged_blocks`, and `_strip_system_hints`.
- Updated imports and references across modules for consistency.
- Simplified tool call and streaming logic by replacing inline implementations with shared helper functions.
---
 app/server/chat.py     | 306 ++++------------------------------------
 app/services/client.py |  16 +--
 app/utils/helper.py    | 312 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 342 insertions(+), 292 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0a4c16c..9485f7a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,12 +1,11 @@
 import base64
 import json
 import re
-import struct
 import uuid
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Iterator
+from typing import Any
 
 import orjson
 from fastapi import APIRouter, Depends, HTTPException, Request, status
@@ -21,7 +20,6 @@
     ChatCompletionRequest,
     ContentItem,
     ConversationInStore,
-    FunctionCall,
     Message,
     ModelData,
     ModelListResponse,
@@ -37,26 +35,28 @@
     ResponseToolChoice,
     ResponseUsage,
     Tool,
-    ToolCall,
     ToolChoiceFunction,
 )
 from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
-from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT
 from ..utils import g_config
-from ..utils.helper import estimate_tokens
+from ..utils.helper import (
+    CODE_BLOCK_HINT,
+    CODE_HINT_STRIPPED,
+    XML_HINT_STRIPPED,
+    XML_WRAP_HINT,
+    estimate_tokens,
+    extract_image_dimensions,
+    extract_tool_calls,
+    iter_stream_segments,
+    remove_tool_call_blocks,
+    strip_code_fence,
+    text_from_message,
+)
 from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
-TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)```", re.DOTALL | re.IGNORECASE)
-TOOL_CALL_RE = re.compile(
-    r"<tool_call\s+name=\"([^\"]+)\">(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
-)
-JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
-CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
-XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
-CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip()
 
 router = APIRouter()
 
@@ -118,14 +118,6 @@ def _build_structured_requirement(
     )
 
 
-def _strip_code_fence(text: str) -> str:
-    """Remove surrounding ```json fences if present."""
-    match = JSON_FENCE_RE.match(text.strip())
-    if match:
-        return match.group(1).strip()
-    return text.strip()
-
-
 def _build_tool_prompt(
     tools: list[Tool],
     tool_choice: str | ToolChoiceFunction | None,
@@ -312,75 +304,6 @@ def _prepare_messages_for_model(
     return prepared
 
 
-def _strip_system_hints(text: str) -> str:
-    """Remove system-level hint text from a given string."""
-    if not text:
-        return text
-    cleaned = _strip_tagged_blocks(text)
-    cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
-    cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "")
-    cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
-    return cleaned.strip()
-
-
-def _strip_tagged_blocks(text: str) -> str:
-    """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely.
-    - tool blocks are removed entirely (if missing end marker, drop to EOF).
-    - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF).
-    """
-    if not text:
-        return text
-
-    result: list[str] = []
-    idx = 0
-    length = len(text)
-    start_marker = "<|im_start|>"
-    end_marker = "<|im_end|>"
-
-    while idx < length:
-        start = text.find(start_marker, idx)
-        if start == -1:
-            result.append(text[idx:])
-            break
-
-        # append any content before this block
-        result.append(text[idx:start])
-
-        role_start = start + len(start_marker)
-        newline = text.find("\n", role_start)
-        if newline == -1:
-            # malformed block; keep remainder as-is (safe behavior)
-            result.append(text[start:])
-            break
-
-        role = text[role_start:newline].strip().lower()
-
-        end = text.find(end_marker, newline + 1)
-        if end == -1:
-            # missing end marker
-            if role == "tool":
-                # drop from start marker to EOF (skip remainder)
-                break
-            else:
-                # keep inner content from after the role newline to EOF
-                result.append(text[newline + 1 :])
-                break
-
-        block_end = end + len(end_marker)
-
-        if role == "tool":
-            # drop whole block
-            idx = block_end
-            continue
-
-        # keep the content without role markers
-        content = text[newline + 1 : end]
-        result.append(content)
-        idx = block_end
-
-    return "".join(result)
-
-
 def _response_items_to_messages(
     items: str | list[ResponseInputItem],
 ) -> tuple[list[Message], str | list[ResponseInputItem]]:
@@ -509,59 +432,6 @@ def _instructions_to_messages(
     return instruction_messages
 
 
-def _remove_tool_call_blocks(text: str) -> str:
-    """Strip tool call code blocks from text."""
-    if not text:
-        return text
-    cleaned = TOOL_BLOCK_RE.sub("", text)
-    return _strip_system_hints(cleaned)
-
-
-def _extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
-    """Extract tool call definitions and return cleaned text."""
-    if not text:
-        return text, []
-
-    tool_calls: list[ToolCall] = []
-
-    def _replace(match: re.Match[str]) -> str:
-        block_content = match.group(1)
-        if not block_content:
-            return ""
-
-        for call_match in TOOL_CALL_RE.finditer(block_content):
-            name = (call_match.group(1) or "").strip()
-            raw_args = (call_match.group(2) or "").strip()
-            if not name:
-                logger.warning(
-                    f"Encountered tool_call block without a function name: {block_content}"
-                )
-                continue
-
-            arguments = raw_args
-            try:
-                parsed_args = json.loads(raw_args)
-                arguments = json.dumps(parsed_args, ensure_ascii=False)
-            except json.JSONDecodeError:
-                logger.warning(
-                    f"Failed to parse tool call arguments for '{name}'. Passing raw string."
-                )
-
-            tool_calls.append(
-                ToolCall(
-                    id=f"call_{uuid.uuid4().hex}",
-                    type="function",
-                    function=FunctionCall(name=name, arguments=arguments),
-                )
-            )
-
-        return ""
-
-    cleaned = TOOL_BLOCK_RE.sub(_replace, text)
-    cleaned = _strip_system_hints(cleaned)
-    return cleaned, tool_calls
-
-
 def _get_model_by_name(name: str) -> Model:
     """
     Retrieve a Model instance by name, considering custom models from config
@@ -742,12 +612,12 @@ async def create_chat_completion(
             detail="Gemini output parsing failed unexpectedly.",
         ) from exc
 
-    visible_output, tool_calls = _extract_tool_calls(raw_output_with_think)
-    storage_output = _remove_tool_call_blocks(raw_output_clean).strip()
+    visible_output, tool_calls = extract_tool_calls(raw_output_with_think)
+    storage_output = remove_tool_call_blocks(raw_output_clean).strip()
     tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
 
     if structured_requirement:
-        cleaned_visible = _strip_code_fence(visible_output or "")
+        cleaned_visible = strip_code_fence(visible_output or "")
         if not cleaned_visible:
             raise HTTPException(
                 status_code=status.HTTP_502_BAD_GATEWAY,
@@ -982,12 +852,12 @@ async def _build_payload(
             detail="Gemini output parsing failed unexpectedly.",
         ) from exc
 
-    visible_text, detected_tool_calls = _extract_tool_calls(text_with_think)
-    storage_output = _remove_tool_call_blocks(text_without_think).strip()
+    visible_text, detected_tool_calls = extract_tool_calls(text_with_think)
+    storage_output = remove_tool_call_blocks(text_without_think).strip()
     assistant_text = LMDBConversationStore.remove_think_tags(visible_text.strip())
 
     if structured_requirement:
-        cleaned_visible = _strip_code_fence(assistant_text or "")
+        cleaned_visible = strip_code_fence(assistant_text or "")
         if not cleaned_visible:
             raise HTTPException(
                 status_code=status.HTTP_502_BAD_GATEWAY,
@@ -1089,7 +959,7 @@ async def _build_payload(
     response_id = f"resp_{uuid.uuid4().hex}"
     message_id = f"msg_{uuid.uuid4().hex}"
 
-    input_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages)
+    input_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
     tool_arg_text = "".join(call.function.arguments or "" for call in detected_tool_calls)
     completion_basis = assistant_text or ""
     if tool_arg_text:
@@ -1152,25 +1022,6 @@ async def _build_payload(
     return response_payload
 
 
-def _text_from_message(message: Message) -> str:
-    """Return text content from a message for token estimation."""
-    base_text = ""
-    if isinstance(message.content, str):
-        base_text = message.content
-    elif isinstance(message.content, list):
-        base_text = "\n".join(
-            item.text or "" for item in message.content if getattr(item, "type", "") == "text"
-        )
-    elif message.content is None:
-        base_text = ""
-
-    if message.tool_calls:
-        tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls)
-        base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text
-
-    return base_text
-
-
 async def _find_reusable_session(
     db: LMDBConversationStore,
     pool: GeminiClientPool,
@@ -1268,47 +1119,6 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
         raise
 
 
-def _iter_stream_segments(model_output: str, chunk_size: int = 64):
-    """Yield stream segments while keeping <think> markers and words intact."""
-    if not model_output:
-        return
-
-    token_pattern = re.compile(r"\s+|\S+\s*")
-    pending = ""
-
-    def _flush_pending() -> Iterator[str]:
-        nonlocal pending
-        if pending:
-            yield pending
-            pending = ""
-
-    # Split on <think> boundaries so the markers are never fragmented.
-    parts = re.split(r"(</?think>)", model_output)
-    for part in parts:
-        if not part:
-            continue
-        if part in {"<think>", "</think>"}:
-            yield from _flush_pending()
-            yield part
-            continue
-
-        for match in token_pattern.finditer(part):
-            token = match.group(0)
-
-            if len(token) > chunk_size:
-                yield from _flush_pending()
-                for idx in range(0, len(token), chunk_size):
-                    yield token[idx : idx + chunk_size]
-                continue
-
-            if pending and len(pending) + len(token) > chunk_size:
-                yield from _flush_pending()
-
-            pending += token
-
-    yield from _flush_pending()
-
-
 def _create_streaming_response(
     model_output: str,
     tool_calls: list[dict],
@@ -1320,7 +1130,7 @@ def _create_streaming_response(
     """Create streaming response with `usage` calculation included in the final chunk."""
 
     # Calculate token usage
-    prompt_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages)
+    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
     tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or [])
     completion_tokens = estimate_tokens(model_output + tool_args)
     total_tokens = prompt_tokens + completion_tokens
@@ -1338,7 +1148,7 @@ async def generate_stream():
         yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
         # Stream output text in chunks for efficiency
-        for chunk in _iter_stream_segments(model_output):
+        for chunk in iter_stream_segments(model_output):
             data = {
                 "id": completion_id,
                 "object": "chat.completion.chunk",
@@ -1452,7 +1262,7 @@ async def generate_stream():
                         content_text += c.text
 
                 if content_text:
-                    for chunk in _iter_stream_segments(content_text):
+                    for chunk in iter_stream_segments(content_text):
                         delta_event = {
                             **base_event,
                             "type": "response.output_text.delta",
@@ -1501,7 +1311,7 @@ def _create_standard_response(
 ) -> dict:
     """Create standard response"""
     # Calculate token usage
-    prompt_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages)
+    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
     tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or [])
     completion_tokens = estimate_tokens(model_output + tool_args)
     total_tokens = prompt_tokens + completion_tokens
@@ -1534,70 +1344,6 @@ def _create_standard_response(
     return result
 
 
-def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
-    """Return image dimensions (width, height) if PNG or JPEG headers are present."""
-    # PNG: dimensions stored in bytes 16..24 of the IHDR chunk
-    if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"):
-        try:
-            width, height = struct.unpack(">II", data[16:24])
-            return int(width), int(height)
-        except struct.error:
-            return None, None
-
-    # JPEG: dimensions stored in SOF segment; iterate through markers to locate it
-    if len(data) >= 4 and data[0:2] == b"\xff\xd8":
-        idx = 2
-        length = len(data)
-        sof_markers = {
-            0xC0,
-            0xC1,
-            0xC2,
-            0xC3,
-            0xC5,
-            0xC6,
-            0xC7,
-            0xC9,
-            0xCA,
-            0xCB,
-            0xCD,
-            0xCE,
-            0xCF,
-        }
-        while idx < length:
-            # Find marker alignment (markers are prefixed with 0xFF bytes)
-            if data[idx] != 0xFF:
-                idx += 1
-                continue
-            while idx < length and data[idx] == 0xFF:
-                idx += 1
-            if idx >= length:
-                break
-            marker = data[idx]
-            idx += 1
-
-            if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7:
-                continue
-
-            if idx + 1 >= length:
-                break
-            segment_length = (data[idx] << 8) + data[idx + 1]
-            idx += 2
-            if segment_length < 2:
-                break
-
-            if marker in sof_markers:
-                if idx + 4 < length:
-                    # Skip precision byte at idx, then read height/width (big-endian)
-                    height = (data[idx + 1] << 8) + data[idx + 2]
-                    width = (data[idx + 3] << 8) + data[idx + 4]
-                    return int(width), int(height)
-                break
-
-            idx += segment_length - 2
-
-    return None, None
-
-
 async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
     """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
     if isinstance(image, GeneratedImage):
@@ -1619,6 +1365,6 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
     original_path.rename(new_path)
 
     data = new_path.read_bytes()
-    width, height = _extract_image_dimensions(data)
+    width, height = extract_image_dimensions(data)
     filename = random_name
     return base64.b64encode(data).decode("ascii"), width, height, filename
diff --git a/app/services/client.py b/app/services/client.py
index 09c52c1..87c0ca7 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -9,18 +9,12 @@
 
 from ..models import Message
 from ..utils import g_config
-from ..utils.helper import add_tag, save_file_to_tempfile, save_url_to_tempfile
-
-XML_WRAP_HINT = (
-    "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
-    '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
-    "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
-)
-CODE_BLOCK_HINT = (
-    "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
-    "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
-    "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
+from ..utils.helper import (
+    add_tag,
+    save_file_to_tempfile,
+    save_url_to_tempfile,
 )
+
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
 MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 89fc31e..2627faa 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,17 +1,41 @@
 import base64
+import json
 import mimetypes
+import re
+import struct
 import tempfile
+import uuid
 from pathlib import Path
+from typing import Iterator
 from urllib.parse import urlparse
 
 import httpx
 from loguru import logger
 
-VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
+from ..models import FunctionCall, Message, ToolCall
 
 HEADERS = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
 }
+VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
+XML_WRAP_HINT = (
+    "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
+    '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
+    "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
+)
+CODE_BLOCK_HINT = (
+    "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
+    "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
+    "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
+)
+TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
+TOOL_CALL_RE = re.compile(
+    r"<tool_call\s+name=[\"']([^\"']+)[\"']\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
+)
+JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
+XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
+CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip()
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
@@ -78,3 +102,289 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         path = Path(tmp.name)
 
     return path
+
+
+def strip_code_fence(text: str) -> str:
+    """Remove surrounding ```json fences if present."""
+    match = JSON_FENCE_RE.match(text.strip())
+    if match:
+        return match.group(1).strip()
+    return text.strip()
+
+
+def strip_tagged_blocks(text: str) -> str:
+    """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely.
+    - tool blocks are removed entirely (if missing end marker, drop to EOF).
+    - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF).
+    """
+    if not text:
+        return text
+
+    result: list[str] = []
+    idx = 0
+    length = len(text)
+    start_marker = "<|im_start|>"
+    end_marker = "<|im_end|>"
+
+    while idx < length:
+        start = text.find(start_marker, idx)
+        if start == -1:
+            result.append(text[idx:])
+            break
+
+        # append any content before this block
+        result.append(text[idx:start])
+
+        role_start = start + len(start_marker)
+        newline = text.find("\n", role_start)
+        if newline == -1:
+            # malformed block; keep remainder as-is (safe behavior)
+            result.append(text[start:])
+            break
+
+        role = text[role_start:newline].strip().lower()
+
+        end = text.find(end_marker, newline + 1)
+        if end == -1:
+            # missing end marker
+            if role == "tool":
+                # drop from start marker to EOF (skip remainder)
+                break
+            else:
+                # keep inner content from after the role newline to EOF
+                result.append(text[newline + 1 :])
+                break
+
+        block_end = end + len(end_marker)
+
+        if role == "tool":
+            # drop whole block
+            idx = block_end
+            continue
+
+        # keep the content without role markers
+        content = text[newline + 1 : end]
+        result.append(content)
+        idx = block_end
+
+    return "".join(result)
+
+
+def strip_system_hints(text: str) -> str:
+    """Remove system-level hint text from a given string."""
+    if not text:
+        return text
+    cleaned = strip_tagged_blocks(text)
+    cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
+    cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "")
+    cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
+    return cleaned.strip()
+
+
+def remove_tool_call_blocks(text: str) -> str:
+    """Strip tool call code blocks from text."""
+    if not text:
+        return text
+
+    # 1. Remove fenced blocks ONLY if they contain tool calls
+    def _replace_block(match: re.Match[str]) -> str:
+        block_content = match.group(1)
+        if not block_content:
+            return match.group(0)
+
+        # Check if the block contains any tool call tag
+        if TOOL_CALL_RE.search(block_content):
+            return ""
+
+        # Preserve the block if no tool call found
+        return match.group(0)
+
+    cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
+
+    # 2. Remove orphaned tool calls
+    cleaned = TOOL_CALL_RE.sub("", cleaned)
+
+    return strip_system_hints(cleaned)
+
+
+def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
+    """Extract tool call definitions and return cleaned text."""
+    if not text:
+        return text, []
+
+    tool_calls: list[ToolCall] = []
+
+    def _create_tool_call(name: str, raw_args: str) -> None:
+        """Helper to parse args and append to tool_calls list."""
+        if not name:
+            logger.warning("Encountered tool_call without a function name.")
+            return
+
+        arguments = raw_args
+        try:
+            parsed_args = json.loads(raw_args)
+            arguments = json.dumps(parsed_args, ensure_ascii=False)
+        except json.JSONDecodeError:
+            logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
+
+        tool_calls.append(
+            ToolCall(
+                id=f"call_{uuid.uuid4().hex}",
+                type="function",
+                function=FunctionCall(name=name, arguments=arguments),
+            )
+        )
+
+    def _replace_block(match: re.Match[str]) -> str:
+        block_content = match.group(1)
+        if not block_content:
+            return match.group(0)
+
+        found_in_block = False
+        for call_match in TOOL_CALL_RE.finditer(block_content):
+            found_in_block = True
+            name = (call_match.group(1) or "").strip()
+            raw_args = (call_match.group(2) or "").strip()
+            _create_tool_call(name, raw_args)
+
+        if found_in_block:
+            return ""
+        else:
+            return match.group(0)
+
+    cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
+
+    def _replace_orphan(match: re.Match[str]) -> str:
+        name = (match.group(1) or "").strip()
+        raw_args = (match.group(2) or "").strip()
+        _create_tool_call(name, raw_args)
+        return ""
+
+    cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
+
+    cleaned = strip_system_hints(cleaned)
+    return cleaned, tool_calls
+
+
+def iter_stream_segments(model_output: str, chunk_size: int = 64) -> Iterator[str]:
+    """Yield stream segments while keeping <think> markers and words intact."""
+    if not model_output:
+        return
+
+    token_pattern = re.compile(r"\s+|\S+\s*")
+    pending = ""
+
+    def _flush_pending() -> Iterator[str]:
+        nonlocal pending
+        if pending:
+            yield pending
+            pending = ""
+
+    # Split on <think> boundaries so the markers are never fragmented.
+    parts = re.split(r"(</?think>)", model_output)
+    for part in parts:
+        if not part:
+            continue
+        if part in {"<think>", "</think>"}:
+            yield from _flush_pending()
+            yield part
+            continue
+
+        for match in token_pattern.finditer(part):
+            token = match.group(0)
+
+            if len(token) > chunk_size:
+                yield from _flush_pending()
+                for idx in range(0, len(token), chunk_size):
+                    yield token[idx : idx + chunk_size]
+                continue
+
+            if pending and len(pending) + len(token) > chunk_size:
+                yield from _flush_pending()
+
+            pending += token
+
+    yield from _flush_pending()
+
+
+def text_from_message(message: Message) -> str:
+    """Return text content from a message for token estimation."""
+    base_text = ""
+    if isinstance(message.content, str):
+        base_text = message.content
+    elif isinstance(message.content, list):
+        base_text = "\n".join(
+            item.text or "" for item in message.content if getattr(item, "type", "") == "text"
+        )
+    elif message.content is None:
+        base_text = ""
+
+    if message.tool_calls:
+        tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls)
+        base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text
+
+    return base_text
+
+
+def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
+    """Return image dimensions (width, height) if PNG or JPEG headers are present."""
+    # PNG: dimensions stored in bytes 16..24 of the IHDR chunk
+    if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"):
+        try:
+            width, height = struct.unpack(">II", data[16:24])
+            return int(width), int(height)
+        except struct.error:
+            return None, None
+
+    # JPEG: dimensions stored in SOF segment; iterate through markers to locate it
+    if len(data) >= 4 and data[0:2] == b"\xff\xd8":
+        idx = 2
+        length = len(data)
+        sof_markers = {
+            0xC0,
+            0xC1,
+            0xC2,
+            0xC3,
+            0xC5,
+            0xC6,
+            0xC7,
+            0xC9,
+            0xCA,
+            0xCB,
+            0xCD,
+            0xCE,
+            0xCF,
+        }
+        while idx < length:
+            # Find marker alignment (markers are prefixed with 0xFF bytes)
+            if data[idx] != 0xFF:
+                idx += 1
+                continue
+            while idx < length and data[idx] == 0xFF:
+                idx += 1
+            if idx >= length:
+                break
+            marker = data[idx]
+            idx += 1
+
+            if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7:
+                continue
+
+            if idx + 1 >= length:
+                break
+            segment_length = (data[idx] << 8) + data[idx + 1]
+            idx += 2
+            if segment_length < 2:
+                break
+
+            if marker in sof_markers:
+                if idx + 4 < length:
+                    # Skip precision byte at idx, then read height/width (big-endian)
+                    height = (data[idx + 1] << 8) + data[idx + 2]
+                    width = (data[idx + 3] << 8) + data[idx + 4]
+                    return int(width), int(height)
+                break
+
+            idx += segment_length - 2
+
+    return None, None

From a1bc8e289ee797a761eb506dc4d01e486c919aef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 10:01:17 +0700
Subject: [PATCH 030/139] fix: Handle None input in `estimate_tokens` and
 return 0 for empty text

---
 app/utils/helper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 2627faa..28be240 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -47,8 +47,10 @@ def add_tag(role: str, content: str, unclose: bool = False) -> str:
     return f"<|im_start|>{role}\n{content}" + ("\n<|im_end|>" if not unclose else "")
 
 
-def estimate_tokens(text: str) -> int:
+def estimate_tokens(text: str | None) -> int:
     """Estimate the number of tokens heuristically based on character count"""
+    if not text:
+        return 0
     return int(len(text) / 3)
 
 

From a7e15d96bd2a4f62094bea02be7e86c8d305e59e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 13:32:49 +0700
Subject: [PATCH 031/139] refactor: Simplify model configuration and add JSON
 parsing validators

- Replaced unused model placeholder in `config.yaml` with an empty list.
- Added JSON parsing validators for `model_header` and `models` to enhance flexibility and error handling.
- Improved validation to filter out incomplete model configurations.
---
 app/utils/config.py | 24 +++++++++++++++++++++++-
 config/config.yaml  |  5 +----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 5782c66..69a4fac 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,3 +1,4 @@
+import json
 import os
 import sys
 from typing import Any, Literal, Optional
@@ -58,6 +59,17 @@ class GeminiModelConfig(BaseModel):
         default=None, description="Header for the model"
     )
 
+    @field_validator("model_header", mode="before")
+    @classmethod
+    def _parse_json_string(cls, v: Any) -> Any:
+        if isinstance(v, str) and v.strip().startswith("{"):
+            try:
+                return json.loads(v)
+            except json.JSONDecodeError:
+                # Return the original value to let Pydantic handle the error or type mismatch
+                return v
+        return v
+
 
 class GeminiConfig(BaseModel):
     """Gemini API configuration"""
@@ -82,11 +94,21 @@ class GeminiConfig(BaseModel):
         description="Maximum characters Gemini Web can accept per request",
     )
 
+    @field_validator("models", mode="before")
+    @classmethod
+    def _parse_models_json(cls, v: Any) -> Any:
+        if isinstance(v, str) and v.strip().startswith("["):
+            try:
+                return json.loads(v)
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse models JSON string: {e}")
+                return v
+        return v
+
     @field_validator("models")
     @classmethod
     def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]:
         """Filter out models that don't have a name set (placeholders)."""
-
         return [model for model in v if model.model_name]
 
 
diff --git a/config/config.yaml b/config/config.yaml
index 2fbc061..f2b17fb 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -28,10 +28,7 @@ gemini:
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
   model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
-  models:
-    - model_name: null
-      model_header:
-        x-goog-ext-xxxxxxxxx-jspb: null
+  models: []
 
 storage:
   path: "data/lmdb"        # Database storage path

From 61c5f3b7af4ef6b78d5dc7e3d5ba9e6009b7d3cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 13:46:58 +0700
Subject: [PATCH 032/139] refactor: Simplify Gemini model environment variable
 parsing with JSON support

- Replaced prefix-based parsing with a root key approach.
- Added JSON parsing to handle list-based model configurations.
- Improved handling of errors and cleanup of environment variables.
---
 app/utils/config.py | 44 +++++++++++++++++---------------------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 69a4fac..6cb5664 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -256,36 +256,26 @@ def _merge_clients_with_env(
 
 def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
     """Extract and remove all Gemini models related environment variables, supporting nested fields."""
-    prefix = "CONFIG_GEMINI__MODELS__"
-    env_overrides: dict[int, dict[str, Any]] = {}
-    to_delete = []
-    for k, v in os.environ.items():
-        if k.startswith(prefix):
-            parts = k.split("__")
-            if len(parts) < 4:
-                continue
-            index_str = parts[2]
-            if not index_str.isdigit():
-                continue
-            idx = int(index_str)
+    import json
 
-            # Navigate to the correct nested dict
-            current = env_overrides.setdefault(idx, {})
-            for i in range(3, len(parts) - 1):
-                field_name = parts[i].lower()
-                current = current.setdefault(field_name, {})
+    root_key = "CONFIG_GEMINI__MODELS"
+    env_overrides: dict[int, dict[str, Any]] = {}
 
-            # Set the value (lowercase root field names, preserve sub-key casing)
-            last_part = parts[-1]
-            if len(parts) == 4:
-                current[last_part.lower()] = v
-            else:
-                current[last_part] = v
+    if root_key in os.environ:
+        try:
+            val = os.environ[root_key]
+            if val.strip().startswith("["):
+                models_list = json.loads(val)
+                if isinstance(models_list, list):
+                    for idx, model_data in enumerate(models_list):
+                        if isinstance(model_data, dict):
+                            env_overrides[idx] = model_data
+
+            # Remove the environment variable to avoid Pydantic parsing errors
+            del os.environ[root_key]
+        except Exception as e:
+            logger.warning(f"Failed to parse {root_key} as JSON: {e}")
 
-            to_delete.append(k)
-    # Remove these environment variables to avoid Pydantic parsing errors
-    for k in to_delete:
-        del os.environ[k]
     return env_overrides
 
 

From efd056c270db5130c59b4e66c2543be7f5e8c6e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 14:09:41 +0700
Subject: [PATCH 033/139] fix: Enhance Gemini model environment variable
 parsing with fallback to Python literals

- Added `ast.literal_eval` as a fallback for parsing environment variables when JSON decoding fails.
- Improved error handling and logging for invalid configurations.
- Ensured proper cleanup of environment variables post-parsing.
---
 app/utils/config.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 6cb5664..74a5294 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,3 +1,4 @@
+import ast
 import json
 import os
 import sys
@@ -256,25 +257,31 @@ def _merge_clients_with_env(
 
 def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
     """Extract and remove all Gemini models related environment variables, supporting nested fields."""
-    import json
-
     root_key = "CONFIG_GEMINI__MODELS"
     env_overrides: dict[int, dict[str, Any]] = {}
 
     if root_key in os.environ:
+        val = os.environ[root_key]
+        models_list = None
+        parsed_successfully = False
+
         try:
-            val = os.environ[root_key]
-            if val.strip().startswith("["):
-                models_list = json.loads(val)
-                if isinstance(models_list, list):
-                    for idx, model_data in enumerate(models_list):
-                        if isinstance(model_data, dict):
-                            env_overrides[idx] = model_data
+            models_list = json.loads(val)
+            parsed_successfully = True
+        except json.JSONDecodeError:
+            try:
+                models_list = ast.literal_eval(val)
+                parsed_successfully = True
+            except (ValueError, SyntaxError) as e:
+                logger.warning(f"Failed to parse {root_key} as JSON or Python literal: {e}")
+
+        if parsed_successfully and isinstance(models_list, list):
+            for idx, model_data in enumerate(models_list):
+                if isinstance(model_data, dict):
+                    env_overrides[idx] = model_data
 
             # Remove the environment variable to avoid Pydantic parsing errors
             del os.environ[root_key]
-        except Exception as e:
-            logger.warning(f"Failed to parse {root_key} as JSON: {e}")
 
     return env_overrides
 
@@ -298,7 +305,7 @@ def _merge_models_with_env(
             model_dict.update(overrides)
             result_models[idx] = GeminiModelConfig(**model_dict)
         elif idx == len(result_models):
-            # Append new model
+            # Append new models
             new_model = GeminiModelConfig(**overrides)
             result_models.append(new_model)
         else:

From 476b9dd228aa99501638987d1f44fe3c5eb23067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 17:53:38 +0700
Subject: [PATCH 034/139] fix: Improve regex patterns in helper module

- Adjusted `TOOL_CALL_RE` regex pattern for better accuracy.
---
 app/utils/helper.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 28be240..99e6d7a 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -30,7 +30,7 @@
 )
 TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
-    r"<tool_call\s+name=[\"']([^\"']+)[\"']\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
+    r"<tool_call\s+name=\"([^\"]+)\"\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
 )
 JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
@@ -140,7 +140,7 @@ def strip_tagged_blocks(text: str) -> str:
         role_start = start + len(start_marker)
         newline = text.find("\n", role_start)
         if newline == -1:
-            # malformed block; keep remainder as-is (safe behavior)
+            # malformed block; keep the remainder as-is (safe behavior)
             result.append(text[start:])
             break
 
@@ -150,7 +150,7 @@ def strip_tagged_blocks(text: str) -> str:
         if end == -1:
             # missing end marker
             if role == "tool":
-                # drop from start marker to EOF (skip remainder)
+                # drop from the start marker to EOF (skip the remainder)
                 break
             else:
                 # keep inner content from after the role newline to EOF
@@ -160,7 +160,7 @@ def strip_tagged_blocks(text: str) -> str:
         block_end = end + len(end_marker)
 
         if role == "tool":
-            # drop whole block
+            # drop the whole block
             idx = block_end
             continue
 
@@ -217,7 +217,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
     tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
-        """Helper to parse args and append to tool_calls list."""
+        """Helper to parse args and append to the tool_calls list."""
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return

From 35c1e99993d11033ae9047e85f645ce5def7f09b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 13 Jan 2026 09:02:10 +0700
Subject: [PATCH 035/139] docs: Update README files to include custom model
 configuration and environment variable setup

---
 README.md    | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 README.zh.md | 51 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 2df3a73..5d6de40 100644
--- a/README.md
+++ b/README.md
@@ -118,7 +118,7 @@ services:
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSID=${SECURE_1PSID}
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS=${SECURE_1PSIDTS}
       - GEMINI_COOKIE_PATH=/app/cache # must match the cache volume mount above
-    restart: on-failure:3             # Avoid retrying too many times
+    restart: on-failure:3 # Avoid retrying too many times
 ```
 
 Then run:
@@ -187,6 +187,51 @@ To use Gemini-FastAPI, you need to extract your Gemini session cookies:
 
 Each client entry can be configured with a different proxy to work around rate limits. Omit the `proxy` field or set it to `null` or an empty string to keep a direct connection.
 
+### Custom Models
+
+You can define custom models in `config/config.yaml` or via environment variables.
+
+#### YAML Configuration
+
+```yaml
+gemini:
+  model_strategy: "append" # "append" (default + custom) or "overwrite" (custom only)
+  models:
+    - model_name: "gemini-3.0-pro"
+      model_header:
+        x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]'
+```
+
+#### Environment Variables
+
+You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`.
+
+##### Bash
+
+```bash
+export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
+export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
+```
+
+##### Docker Compose
+
+```yaml
+services:
+  gemini-fastapi:
+    environment:
+      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
+      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
+```
+
+##### Docker CLI
+
+```bash
+docker run -d \
+  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
+  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
+  ghcr.io/nativu5/gemini-fastapi
+```
+
 ## Acknowledgments
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - The underlying Gemini web API client
diff --git a/README.zh.md b/README.zh.md
index 6b7dd74..791afd8 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -4,7 +4,6 @@
 [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/)
 [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 
-
 [ [English](README.md) | 中文 ]
 
 将 Gemini 网页端模型封装为兼容 OpenAI API 的 API Server。基于 [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) 实现。
@@ -50,6 +49,7 @@ pip install -e .
 ### 配置
 
 编辑 `config/config.yaml` 并提供至少一组凭证：
+
 ```yaml
 gemini:
   clients:
@@ -118,7 +118,7 @@ services:
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSID=${SECURE_1PSID}
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS=${SECURE_1PSIDTS}
       - GEMINI_COOKIE_PATH=/app/cache # must match the cache volume mount above
-    restart: on-failure:3             # Avoid retrying too many times
+    restart: on-failure:3 # Avoid retrying too many times
 ```
 
 然后运行：
@@ -186,6 +186,51 @@ export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
 
 每个客户端条目可以配置不同的代理，从而规避速率限制。省略 `proxy` 字段或将其设置为 `null` 或空字符串以保持直连。
 
+### 自定义模型
+
+你可以在 `config/config.yaml` 中或通过环境变量定义自定义模型。
+
+#### YAML 配置
+
+```yaml
+gemini:
+  model_strategy: "append" # "append" (默认 + 自定义) 或 "overwrite" (仅限自定义)
+  models:
+    - model_name: "gemini-3.0-pro"
+      model_header:
+        x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]'
+```
+
+#### 环境变量
+
+你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。
+
+##### Bash
+
+```bash
+export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
+export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
+```
+
+##### Docker Compose
+
+```yaml
+services:
+  gemini-fastapi:
+    environment:
+      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
+      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
+```
+
+##### Docker CLI
+
+```bash
+docker run -d \
+  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
+  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
+  ghcr.io/nativu5/gemini-fastapi
+```
+
 ## 鸣谢
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - 底层 Gemini Web API 客户端
@@ -193,4 +238,4 @@ export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
 
 ## 免责声明
 
-本项目与 Google 或 OpenAI 无关，仅供学习和研究使用。本项目使用了逆向工程 API，可能不符合 Google 服务条款。使用风险自负。
\ No newline at end of file
+本项目与 Google 或 OpenAI 无关，仅供学习和研究使用。本项目使用了逆向工程 API，可能不符合 Google 服务条款。使用风险自负。

From 9b8162133e86a323400e7e2fb36ed651b31c795f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 13 Jan 2026 09:23:28 +0700
Subject: [PATCH 036/139] fix: Remove unused headers from HTTP client in helper
 module

---
 app/utils/helper.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 99e6d7a..51a6ccf 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -14,9 +14,6 @@
 
 from ..models import FunctionCall, Message, ToolCall
 
-HEADERS = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
-}
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 XML_WRAP_HINT = (
     "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
@@ -82,7 +79,7 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         if not suffix:
             suffix = f".{mime_type.split('/')[1]}"
     else:
-        async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True) as client:
+        async with httpx.AsyncClient(follow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content

From 32a48dcdc98d9e96e791ae6f914e6b3f12804c97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 15 Jan 2026 10:18:58 +0700
Subject: [PATCH 037/139] fix: Update README and README.zh to clarify model
 configuration via environment variables; enhance error logging in config
 validation

---
 README.md           | 23 +----------------------
 README.zh.md        | 23 +----------------------
 app/server/chat.py  |  6 ++++--
 app/utils/config.py | 27 +++++++++++++++++++++++----
 4 files changed, 29 insertions(+), 50 deletions(-)

diff --git a/README.md b/README.md
index 5d6de40..d7a7214 100644
--- a/README.md
+++ b/README.md
@@ -204,34 +204,13 @@ gemini:
 
 #### Environment Variables
 
-You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`.
-
-##### Bash
+You can supply models as a JSON string via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments without modifying the configuration file.
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
 export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
 ```
 
-##### Docker Compose
-
-```yaml
-services:
-  gemini-fastapi:
-    environment:
-      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
-      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
-```
-
-##### Docker CLI
-
-```bash
-docker run -d \
-  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
-  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
-  ghcr.io/nativu5/gemini-fastapi
-```
-
 ## Acknowledgments
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - The underlying Gemini web API client
diff --git a/README.zh.md b/README.zh.md
index 791afd8..09d80a4 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -203,34 +203,13 @@ gemini:
 
 #### 环境变量
 
-你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。
-
-##### Bash
+你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串的形式提供模型。这为通过 shell 或在自动化环境中覆盖设置提供了一种灵活的方式，无需修改配置文件。
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
 export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
 ```
 
-##### Docker Compose
-
-```yaml
-services:
-  gemini-fastapi:
-    environment:
-      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
-      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
-```
-
-##### Docker CLI
-
-```bash
-docker run -d \
-  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
-  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
-  ghcr.io/nativu5/gemini-fastapi
-```
-
 ## 鸣谢
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - 底层 Gemini Web API 客户端
diff --git a/app/server/chat.py b/app/server/chat.py
index 9485f7a..6e517ea 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -924,7 +924,7 @@ async def _build_payload(
 
         image_call_items.append(
             ResponseImageGenerationCall(
-                id=filename.split(".")[0],
+                id=filename.rsplit(".", 1)[0],
                 status="completed",
                 result=image_base64,
                 output_format=img_format,
@@ -1350,7 +1350,9 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
         try:
             saved_path = await image.save(path=str(temp_dir), full_size=True)
         except Exception as e:
-            logger.warning(f"Failed to download full-size image, retrying with default size: {e}")
+            logger.warning(
+                f"Failed to download full-size GeneratedImage, retrying with default size: {e}"
+            )
             saved_path = await image.save(path=str(temp_dir), full_size=False)
     else:
         saved_path = await image.save(path=str(temp_dir))
diff --git a/app/utils/config.py b/app/utils/config.py
index 74a5294..a9c5d44 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -109,8 +109,21 @@ def _parse_models_json(cls, v: Any) -> Any:
     @field_validator("models")
     @classmethod
     def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]:
-        """Filter out models that don't have a name set (placeholders)."""
-        return [model for model in v if model.model_name]
+        """Filter out models that don't have all required fields set."""
+        valid_models = []
+        for model in v:
+            if model.model_name and model.model_header:
+                valid_models.append(model)
+            else:
+                missing = []
+                if not model.model_name:
+                    missing.append("model_name")
+                if not model.model_header:
+                    missing.append("model_header")
+                logger.warning(
+                    f"Discarding custom model due to missing {', '.join(missing)}: {model}"
+                )
+        return valid_models
 
 
 class CORSConfig(BaseModel):
@@ -251,7 +264,10 @@ def _merge_clients_with_env(
             new_client = GeminiClientSettings(**overrides)
             result_clients.append(new_client)
         else:
-            raise IndexError(f"Client index {idx} in env is out of range.")
+            raise IndexError(
+                f"Client index {idx} in env is out of range (current count: {len(result_clients)}). "
+                "Client indices must be contiguous starting from 0."
+            )
     return result_clients if result_clients else base_clients
 
 
@@ -309,7 +325,10 @@ def _merge_models_with_env(
             new_model = GeminiModelConfig(**overrides)
             result_models.append(new_model)
         else:
-            raise IndexError(f"Model index {idx} in env is out of range (must be contiguous).")
+            raise IndexError(
+                f"Model index {idx} in env is out of range (current count: {len(result_models)}). "
+                "Model indices must be contiguous starting from 0."
+            )
     return result_models
 
 

From 0c00b089d5b33e394abaac6a1d36ae08cede166c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 15 Jan 2026 11:24:08 +0700
Subject: [PATCH 038/139] Update README and README.zh to clarify model
 configuration via JSON string or list structure for enhanced flexibility in
 automated environments

---
 README.md    | 2 +-
 README.zh.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d7a7214..330e9c8 100644
--- a/README.md
+++ b/README.md
@@ -204,7 +204,7 @@ gemini:
 
 #### Environment Variables
 
-You can supply models as a JSON string via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments without modifying the configuration file.
+You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments (e.g. Docker) without modifying the configuration file.
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
diff --git a/README.zh.md b/README.zh.md
index 09d80a4..2f9e1b5 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -203,7 +203,7 @@ gemini:
 
 #### 环境变量
 
-你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串的形式提供模型。这为通过 shell 或在自动化环境中覆盖设置提供了一种灵活的方式，无需修改配置文件。
+你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。这为通过 shell 或在自动化环境（例如 Docker）中覆盖设置提供了一种灵活的方式，而无需修改配置文件。
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"

From b599d99f9967188bb8a277fd09951ddf32006f20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 23 Jan 2026 12:14:40 +0700
Subject: [PATCH 039/139] Refactor: compress JSON content to save tokens and
 streamline sending multiple chunks

---
 app/server/chat.py     | 50 +++++++++++++++++++++++++++++-------------
 app/services/client.py |  4 ++--
 app/utils/helper.py    |  2 +-
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 6e517ea..1e7d786 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,5 +1,7 @@
+import asyncio
 import base64
 import json
+import random
 import re
 import uuid
 from dataclasses import dataclass
@@ -95,7 +97,7 @@ def _build_structured_requirement(
     schema_name = json_schema.get("name") or "response"
     strict = json_schema.get("strict", True)
 
-    pretty_schema = json.dumps(schema, ensure_ascii=False, indent=2, sort_keys=True)
+    pretty_schema = json.dumps(schema, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
     instruction_parts = [
         "You must respond with a single valid JSON document that conforms to the schema shown below.",
         "Do not include explanations, comments, or any text before or after the JSON.",
@@ -135,7 +137,7 @@ def _build_tool_prompt(
         description = function.description or "No description provided."
         lines.append(f"Tool `{function.name}`: {description}")
         if function.parameters:
-            schema_text = json.dumps(function.parameters, ensure_ascii=False, indent=2)
+            schema_text = json.dumps(function.parameters, ensure_ascii=False, separators=(",", ":"))
             lines.append("Arguments JSON schema:")
             lines.append(schema_text)
         else:
@@ -635,7 +637,7 @@ async def create_chat_completion(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False)
+        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
         visible_output = canonical_output
         storage_output = canonical_output
 
@@ -875,7 +877,7 @@ async def _build_payload(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False)
+        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
         assistant_text = canonical_output
         storage_output = canonical_output
         logger.debug(
@@ -1081,38 +1083,56 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
     that Gemini can produce the actual answer.
     """
     if len(text) <= MAX_CHARS_PER_REQUEST:
-        # No need to split - a single request is fine.
         try:
             return await session.send_message(text, files=files)
         except Exception as e:
             logger.exception(f"Error sending message to Gemini: {e}")
             raise
+
     hint_len = len(CONTINUATION_HINT)
-    chunk_size = MAX_CHARS_PER_REQUEST - hint_len
+    safe_chunk_size = MAX_CHARS_PER_REQUEST - hint_len
 
     chunks: list[str] = []
     pos = 0
     total = len(text)
+
     while pos < total:
-        end = min(pos + chunk_size, total)
-        chunk = text[pos:end]
-        pos = end
+        remaining = total - pos
+        if remaining <= MAX_CHARS_PER_REQUEST:
+            chunks.append(text[pos:])
+            break
+
+        end = pos + safe_chunk_size
+        slice_candidate = text[pos:end]
+        # Try to find a safe split point
+        split_idx = -1
+        idx = slice_candidate.rfind("\n")
+        if idx != -1:
+            split_idx = idx
+
+        if split_idx != -1:
+            split_at = pos + split_idx + 1
+        else:
+            split_at = end
 
-        # If this is NOT the last chunk, add the continuation hint.
-        if end < total:
-            chunk += CONTINUATION_HINT
+        chunk = text[pos:split_at] + CONTINUATION_HINT
         chunks.append(chunk)
+        pos = split_at
 
-    # Fire off all but the last chunk, discarding the interim "ok" replies.
-    for chk in chunks[:-1]:
+    chunks_size = len(chunks)
+    for i, chk in enumerate(chunks[:-1]):
         try:
+            logger.debug(f"Sending chunk {i + 1}/{chunks_size}...")
             await session.send_message(chk)
+            delay = random.uniform(1.0, 3.0)
+            logger.debug(f"Sleeping for {delay:.2f}s...")
+            await asyncio.sleep(delay)
         except Exception as e:
             logger.exception(f"Error sending chunk to Gemini: {e}")
             raise
 
-    # The last chunk carries the files (if any) and we return its response.
     try:
+        logger.debug(f"Sending final chunk {chunks_size}/{chunks_size}...")
         return await session.send_message(chunks[-1], files=files)
     except Exception as e:
         logger.exception(f"Error sending final chunk to Gemini: {e}")
diff --git a/app/services/client.py b/app/services/client.py
index 87c0ca7..1f23271 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -123,7 +123,7 @@ async def process_message(
                 args_text = call.function.arguments.strip()
                 try:
                     parsed_args = json.loads(args_text)
-                    args_text = json.dumps(parsed_args, ensure_ascii=False)
+                    args_text = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
                 except (json.JSONDecodeError, TypeError):
                     # Leave args_text as is if it is not valid JSON
                     pass
@@ -132,7 +132,7 @@ async def process_message(
                 )
 
             if tool_blocks:
-                tool_section = "```xml\n" + "\n".join(tool_blocks) + "\n```"
+                tool_section = "```xml\n" + "".join(tool_blocks) + "\n```"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 51a6ccf..578b666 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -222,7 +222,7 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         arguments = raw_args
         try:
             parsed_args = json.loads(raw_args)
-            arguments = json.dumps(parsed_args, ensure_ascii=False)
+            arguments = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
         except json.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 

From 186b8448d7f088df621b627ca7b28c5a7acaf341 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 23 Jan 2026 23:08:32 +0700
Subject: [PATCH 040/139] Refactor: Modify the LMDB store to fix issues where
 no conversation is found in either the raw or cleaned history.

---
 app/services/lmdb.py | 46 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 8ccb0d4..d671663 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -9,7 +9,7 @@
 import orjson
 from loguru import logger
 
-from ..models import ConversationInStore, Message
+from ..models import ContentItem, ConversationInStore, Message
 from ..utils import g_config
 from ..utils.singleton import Singleton
 
@@ -18,6 +18,19 @@ def _hash_message(message: Message) -> str:
     """Generate a hash for a single message."""
     # Convert message to dict and sort keys for consistent hashing
     message_dict = message.model_dump(mode="json")
+    content = message_dict.get("content")
+    if isinstance(content, list):
+        is_pure_text = True
+        text_parts = []
+        for item in content:
+            if not isinstance(item, dict) or item.get("type") != "text":
+                is_pure_text = False
+                break
+            text_parts.append(item.get("text") or "")
+
+        if is_pure_text:
+            message_dict["content"] = "".join(text_parts)
+
     message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
     return hashlib.sha256(message_bytes).hexdigest()
 
@@ -435,12 +448,31 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
         """
         cleaned_messages = []
         for msg in messages:
-            if msg.role == "assistant" and isinstance(msg.content, str):
-                normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
-                # Only create a new object if content actually changed
-                if normalized_content != msg.content:
-                    cleaned_msg = Message(role=msg.role, content=normalized_content, name=msg.name)
-                    cleaned_messages.append(cleaned_msg)
+            if msg.role == "assistant":
+                if isinstance(msg.content, str):
+                    normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
+                    if normalized_content != msg.content:
+                        cleaned_msg = Message(
+                            role=msg.role, content=normalized_content, name=msg.name
+                        )
+                        cleaned_messages.append(cleaned_msg)
+                    else:
+                        cleaned_messages.append(msg)
+                elif isinstance(msg.content, list):
+                    new_content = []
+                    changed = False
+                    for item in msg.content:
+                        if isinstance(item, ContentItem) and item.type == "text" and item.text:
+                            cleaned_text = LMDBConversationStore.remove_think_tags(item.text)
+                            if cleaned_text != item.text:
+                                changed = True
+                                item = item.model_copy(update={"text": cleaned_text})
+                        new_content.append(item)
+
+                    if changed:
+                        cleaned_messages.append(msg.model_copy(update={"content": new_content}))
+                    else:
+                        cleaned_messages.append(msg)
                 else:
                     cleaned_messages.append(msg)
             else:

From 6dd1fecdced932c537f579a3c5dd3db87847d475 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 10:03:24 +0700
Subject: [PATCH 041/139] Refactor: Modify the LMDB store to fix issues where
 no conversation is found.

---
 app/services/lmdb.py | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index d671663..93c7723 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -18,8 +18,12 @@ def _hash_message(message: Message) -> str:
     """Generate a hash for a single message."""
     # Convert message to dict and sort keys for consistent hashing
     message_dict = message.model_dump(mode="json")
+
+    # Normalize content: empty string -> None
     content = message_dict.get("content")
-    if isinstance(content, list):
+    if content == "":
+        message_dict["content"] = None
+    elif isinstance(content, list):
         is_pure_text = True
         text_parts = []
         for item in content:
@@ -29,7 +33,27 @@ def _hash_message(message: Message) -> str:
             text_parts.append(item.get("text") or "")
 
         if is_pure_text:
-            message_dict["content"] = "".join(text_parts)
+            text_content = "".join(text_parts)
+            message_dict["content"] = text_content if text_content else None
+
+    # Normalize tool_calls: empty list -> None, and canonicalize arguments
+    tool_calls = message_dict.get("tool_calls")
+    if not tool_calls:
+        message_dict["tool_calls"] = None
+    elif isinstance(tool_calls, list):
+        for tool_call in tool_calls:
+            if isinstance(tool_call, dict) and "function" in tool_call:
+                func = tool_call["function"]
+                args = func.get("arguments")
+                if isinstance(args, str):
+                    try:
+                        # Parse and re-dump to canonicalize (remove extra whitespace, sort keys)
+                        parsed = orjson.loads(args)
+                        func["arguments"] = orjson.dumps(
+                            parsed, option=orjson.OPT_SORT_KEYS
+                        ).decode("utf-8")
+                    except Exception:
+                        pass
 
     message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
     return hashlib.sha256(message_bytes).hexdigest()

From 20ed2456d2324501bbe4ba6392870cd612c9083c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 10:46:27 +0700
Subject: [PATCH 042/139] Refactor: Update all functions to use orjson for
 better performance

---
 app/main.py            |  2 ++
 app/server/chat.py     | 17 ++++++++---------
 app/services/client.py |  8 ++++----
 app/utils/config.py    | 14 +++++++-------
 app/utils/helper.py    |  8 ++++----
 5 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/app/main.py b/app/main.py
index c215e2a..307eb36 100644
--- a/app/main.py
+++ b/app/main.py
@@ -2,6 +2,7 @@
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI
+from fastapi.responses import ORJSONResponse
 from loguru import logger
 
 from .server.chat import router as chat_router
@@ -92,6 +93,7 @@ def create_app() -> FastAPI:
         description="OpenAI-compatible API for Gemini Web",
         version="1.0.0",
         lifespan=lifespan,
+        default_response_class=ORJSONResponse,
     )
 
     add_cors_middleware(app)
diff --git a/app/server/chat.py b/app/server/chat.py
index 1e7d786..a9d9dec 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,6 +1,5 @@
 import asyncio
 import base64
-import json
 import random
 import re
 import uuid
@@ -97,7 +96,7 @@ def _build_structured_requirement(
     schema_name = json_schema.get("name") or "response"
     strict = json_schema.get("strict", True)
 
-    pretty_schema = json.dumps(schema, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
+    pretty_schema = orjson.dumps(schema, option=orjson.OPT_SORT_KEYS).decode("utf-8")
     instruction_parts = [
         "You must respond with a single valid JSON document that conforms to the schema shown below.",
         "Do not include explanations, comments, or any text before or after the JSON.",
@@ -137,7 +136,7 @@ def _build_tool_prompt(
         description = function.description or "No description provided."
         lines.append(f"Tool `{function.name}`: {description}")
         if function.parameters:
-            schema_text = json.dumps(function.parameters, ensure_ascii=False, separators=(",", ":"))
+            schema_text = orjson.dumps(function.parameters).decode("utf-8")
             lines.append("Arguments JSON schema:")
             lines.append(schema_text)
         else:
@@ -626,8 +625,8 @@ async def create_chat_completion(
                 detail="LLM returned an empty response while JSON schema output was requested.",
             )
         try:
-            structured_payload = json.loads(cleaned_visible)
-        except json.JSONDecodeError as exc:
+            structured_payload = orjson.loads(cleaned_visible)
+        except orjson.JSONDecodeError as exc:
             logger.warning(
                 f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): "
                 f"{cleaned_visible}"
@@ -637,7 +636,7 @@ async def create_chat_completion(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
+        canonical_output = orjson.dumps(structured_payload).decode("utf-8")
         visible_output = canonical_output
         storage_output = canonical_output
 
@@ -866,8 +865,8 @@ async def _build_payload(
                 detail="LLM returned an empty response while JSON schema output was requested.",
             )
         try:
-            structured_payload = json.loads(cleaned_visible)
-        except json.JSONDecodeError as exc:
+            structured_payload = orjson.loads(cleaned_visible)
+        except orjson.JSONDecodeError as exc:
             logger.warning(
                 f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): "
                 f"{cleaned_visible}"
@@ -877,7 +876,7 @@ async def _build_payload(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
+        canonical_output = orjson.dumps(structured_payload).decode("utf-8")
         assistant_text = canonical_output
         storage_output = canonical_output
         logger.debug(
diff --git a/app/services/client.py b/app/services/client.py
index 1f23271..55be11a 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,9 +1,9 @@
 import html
-import json
 import re
 from pathlib import Path
 from typing import Any, cast
 
+import orjson
 from gemini_webapi import GeminiClient, ModelOutput
 from loguru import logger
 
@@ -122,9 +122,9 @@ async def process_message(
             for call in message.tool_calls:
                 args_text = call.function.arguments.strip()
                 try:
-                    parsed_args = json.loads(args_text)
-                    args_text = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
-                except (json.JSONDecodeError, TypeError):
+                    parsed_args = orjson.loads(args_text)
+                    args_text = orjson.dumps(parsed_args).decode("utf-8")
+                except orjson.JSONDecodeError:
                     # Leave args_text as is if it is not valid JSON
                     pass
                 tool_blocks.append(
diff --git a/app/utils/config.py b/app/utils/config.py
index a9c5d44..708462d 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,9 +1,9 @@
 import ast
-import json
 import os
 import sys
 from typing import Any, Literal, Optional
 
+import orjson
 from loguru import logger
 from pydantic import BaseModel, Field, ValidationError, field_validator
 from pydantic_settings import (
@@ -65,8 +65,8 @@ class GeminiModelConfig(BaseModel):
     def _parse_json_string(cls, v: Any) -> Any:
         if isinstance(v, str) and v.strip().startswith("{"):
             try:
-                return json.loads(v)
-            except json.JSONDecodeError:
+                return orjson.loads(v)
+            except orjson.JSONDecodeError:
                 # Return the original value to let Pydantic handle the error or type mismatch
                 return v
         return v
@@ -100,8 +100,8 @@ class GeminiConfig(BaseModel):
     def _parse_models_json(cls, v: Any) -> Any:
         if isinstance(v, str) and v.strip().startswith("["):
             try:
-                return json.loads(v)
-            except json.JSONDecodeError as e:
+                return orjson.loads(v)
+            except orjson.JSONDecodeError as e:
                 logger.warning(f"Failed to parse models JSON string: {e}")
                 return v
         return v
@@ -282,9 +282,9 @@ def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
         parsed_successfully = False
 
         try:
-            models_list = json.loads(val)
+            models_list = orjson.loads(val)
             parsed_successfully = True
-        except json.JSONDecodeError:
+        except orjson.JSONDecodeError:
             try:
                 models_list = ast.literal_eval(val)
                 parsed_successfully = True
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 578b666..1dc518f 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,5 +1,4 @@
 import base64
-import json
 import mimetypes
 import re
 import struct
@@ -10,6 +9,7 @@
 from urllib.parse import urlparse
 
 import httpx
+import orjson
 from loguru import logger
 
 from ..models import FunctionCall, Message, ToolCall
@@ -221,9 +221,9 @@ def _create_tool_call(name: str, raw_args: str) -> None:
 
         arguments = raw_args
         try:
-            parsed_args = json.loads(raw_args)
-            arguments = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
-        except json.JSONDecodeError:
+            parsed_args = orjson.loads(raw_args)
+            arguments = orjson.dumps(parsed_args).decode("utf-8")
+        except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
         tool_calls.append(

From f67fe63b3b654d3a28cc5ca0363a4ad894831d84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 10:47:26 +0700
Subject: [PATCH 043/139] Update project dependencies

---
 pyproject.toml |  21 ++++-----
 uv.lock        | 118 +++++++++++++++++++++++++------------------------
 2 files changed, 71 insertions(+), 68 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 32a42b4..1c30f8e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,24 +5,25 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
-    "fastapi>=0.115.12",
-    "gemini-webapi>=1.17.0",
-    "lmdb>=1.6.2",
-    "loguru>=0.7.0",
-    "pydantic-settings[yaml]>=2.9.1",
-    "uvicorn>=0.34.1",
-    "uvloop>=0.21.0; sys_platform != 'win32'",
+    "fastapi>=0.128.0",
+    "gemini-webapi>=1.17.3",
+    "lmdb>=1.7.5",
+    "loguru>=0.7.3",
+    "orjson>=3.11.5",
+    "pydantic-settings[yaml]>=2.12.0",
+    "uvicorn>=0.40.0",
+    "uvloop>=0.22.1; sys_platform != 'win32'",
 ]
 
 [project.optional-dependencies]
 dev = [
-    "ruff>=0.11.7",
+    "ruff>=0.14.14",
 ]
 
 [tool.ruff]
 line-length = 100
 lint.select = ["E", "F", "W", "I", "RUF"]
-lint.ignore = ["E501"] 
+lint.ignore = ["E501"]
 
 [tool.ruff.format]
 quote-style = "double"
@@ -30,5 +31,5 @@ indent-style = "space"
 
 [dependency-groups]
 dev = [
-    "ruff>=0.11.13",
+    "ruff>=0.14.14",
 ]
diff --git a/uv.lock b/uv.lock
index 923e6d3..50a73be 100644
--- a/uv.lock
+++ b/uv.lock
@@ -22,24 +22,24 @@ wheels = [
 
 [[package]]
 name = "anyio"
-version = "4.12.0"
+version = "4.12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/16/ce/8a777047513153587e5434fd752e89334ac33e379aa3497db860eeb60377/anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0", size = 228266, upload-time = "2025-11-28T23:37:38.911Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7f/9c/36c5c37947ebfb8c7f22e0eb6e4d188ee2d53aa3880f3f2744fb894f0cb1/anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb", size = 113362, upload-time = "2025-11-28T23:36:57.897Z" },
+    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]
 
 [[package]]
 name = "certifi"
-version = "2025.11.12"
+version = "2026.1.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
 [[package]]
@@ -65,7 +65,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.123.10"
+version = "0.128.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -73,9 +73,9 @@ dependencies = [
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/22/ff/e01087de891010089f1620c916c0c13130f3898177955c13e2b02d22ec4a/fastapi-0.123.10.tar.gz", hash = "sha256:624d384d7cda7c096449c889fc776a0571948ba14c3c929fa8e9a78cd0b0a6a8", size = 356360, upload-time = "2025-12-05T21:27:46.237Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/f0/7cb92c4a720def85240fd63fbbcf147ce19e7a731c8e1032376bb5a486ac/fastapi-0.123.10-py3-none-any.whl", hash = "sha256:0503b7b7bc71bc98f7c90c9117d21fdf6147c0d74703011b87936becc86985c1", size = 111774, upload-time = "2025-12-05T21:27:44.78Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" },
 ]
 
 [[package]]
@@ -87,6 +87,7 @@ dependencies = [
     { name = "gemini-webapi" },
     { name = "lmdb" },
     { name = "loguru" },
+    { name = "orjson" },
     { name = "pydantic-settings", extra = ["yaml"] },
     { name = "uvicorn" },
     { name = "uvloop", marker = "sys_platform != 'win32'" },
@@ -104,19 +105,20 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.115.12" },
-    { name = "gemini-webapi", specifier = ">=1.17.0" },
-    { name = "lmdb", specifier = ">=1.6.2" },
-    { name = "loguru", specifier = ">=0.7.0" },
-    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.9.1" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.11.7" },
-    { name = "uvicorn", specifier = ">=0.34.1" },
-    { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.21.0" },
+    { name = "fastapi", specifier = ">=0.128.0" },
+    { name = "gemini-webapi", specifier = ">=1.17.3" },
+    { name = "lmdb", specifier = ">=1.7.5" },
+    { name = "loguru", specifier = ">=0.7.3" },
+    { name = "orjson", specifier = ">=3.11.5" },
+    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" },
+    { name = "uvicorn", specifier = ">=0.40.0" },
+    { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
 provides-extras = ["dev"]
 
 [package.metadata.requires-dev]
-dev = [{ name = "ruff", specifier = ">=0.11.13" }]
+dev = [{ name = "ruff", specifier = ">=0.14.14" }]
 
 [[package]]
 name = "gemini-webapi"
@@ -209,25 +211,25 @@ wheels = [
 
 [[package]]
 name = "orjson"
-version = "3.11.4"
+version = "3.11.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c6/fe/ed708782d6709cc60eb4c2d8a361a440661f74134675c72990f2c48c785f/orjson-3.11.4.tar.gz", hash = "sha256:39485f4ab4c9b30a3943cfe99e1a213c4776fb69e8abd68f66b83d5a0b0fdc6d", size = 5945188, upload-time = "2025-10-24T15:50:38.027Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347, upload-time = "2025-12-06T15:55:39.458Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/63/51/6b556192a04595b93e277a9ff71cd0cc06c21a7df98bcce5963fa0f5e36f/orjson-3.11.4-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:d4371de39319d05d3f482f372720b841c841b52f5385bd99c61ed69d55d9ab50", size = 243571, upload-time = "2025-10-24T15:49:10.008Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/2c/2602392ddf2601d538ff11848b98621cd465d1a1ceb9db9e8043181f2f7b/orjson-3.11.4-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:e41fd3b3cac850eaae78232f37325ed7d7436e11c471246b87b2cd294ec94853", size = 128891, upload-time = "2025-10-24T15:49:11.297Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/47/bf85dcf95f7a3a12bf223394a4f849430acd82633848d52def09fa3f46ad/orjson-3.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:600e0e9ca042878c7fdf189cf1b028fe2c1418cc9195f6cb9824eb6ed99cb938", size = 130137, upload-time = "2025-10-24T15:49:12.544Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/4d/a0cb31007f3ab6f1fd2a1b17057c7c349bc2baf8921a85c0180cc7be8011/orjson-3.11.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7bbf9b333f1568ef5da42bc96e18bf30fd7f8d54e9ae066d711056add508e415", size = 129152, upload-time = "2025-10-24T15:49:13.754Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/ef/2811def7ce3d8576b19e3929fff8f8f0d44bc5eb2e0fdecb2e6e6cc6c720/orjson-3.11.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4806363144bb6e7297b8e95870e78d30a649fdc4e23fc84daa80c8ebd366ce44", size = 136834, upload-time = "2025-10-24T15:49:15.307Z" },
-    { url = "https://files.pythonhosted.org/packages/00/d4/9aee9e54f1809cec8ed5abd9bc31e8a9631d19460e3b8470145d25140106/orjson-3.11.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad355e8308493f527d41154e9053b86a5be892b3b359a5c6d5d95cda23601cb2", size = 137519, upload-time = "2025-10-24T15:49:16.557Z" },
-    { url = "https://files.pythonhosted.org/packages/db/ea/67bfdb5465d5679e8ae8d68c11753aaf4f47e3e7264bad66dc2f2249e643/orjson-3.11.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a7517482667fb9f0ff1b2f16fe5829296ed7a655d04d68cd9711a4d8a4e708", size = 136749, upload-time = "2025-10-24T15:49:17.796Z" },
-    { url = "https://files.pythonhosted.org/packages/01/7e/62517dddcfce6d53a39543cd74d0dccfcbdf53967017c58af68822100272/orjson-3.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97eb5942c7395a171cbfecc4ef6701fc3c403e762194683772df4c54cfbb2210", size = 136325, upload-time = "2025-10-24T15:49:19.347Z" },
-    { url = "https://files.pythonhosted.org/packages/18/ae/40516739f99ab4c7ec3aaa5cc242d341fcb03a45d89edeeaabc5f69cb2cf/orjson-3.11.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:149d95d5e018bdd822e3f38c103b1a7c91f88d38a88aada5c4e9b3a73a244241", size = 140204, upload-time = "2025-10-24T15:49:20.545Z" },
-    { url = "https://files.pythonhosted.org/packages/82/18/ff5734365623a8916e3a4037fcef1cd1782bfc14cf0992afe7940c5320bf/orjson-3.11.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:624f3951181eb46fc47dea3d221554e98784c823e7069edb5dbd0dc826ac909b", size = 406242, upload-time = "2025-10-24T15:49:21.884Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/43/96436041f0a0c8c8deca6a05ebeaf529bf1de04839f93ac5e7c479807aec/orjson-3.11.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:03bfa548cf35e3f8b3a96c4e8e41f753c686ff3d8e182ce275b1751deddab58c", size = 150013, upload-time = "2025-10-24T15:49:23.185Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/48/78302d98423ed8780479a1e682b9aecb869e8404545d999d34fa486e573e/orjson-3.11.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:525021896afef44a68148f6ed8a8bf8375553d6066c7f48537657f64823565b9", size = 139951, upload-time = "2025-10-24T15:49:24.428Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/7b/ad613fdcdaa812f075ec0875143c3d37f8654457d2af17703905425981bf/orjson-3.11.4-cp312-cp312-win32.whl", hash = "sha256:b58430396687ce0f7d9eeb3dd47761ca7d8fda8e9eb92b3077a7a353a75efefa", size = 136049, upload-time = "2025-10-24T15:49:25.973Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/3c/9cf47c3ff5f39b8350fb21ba65d789b6a1129d4cbb3033ba36c8a9023520/orjson-3.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:c6dbf422894e1e3c80a177133c0dda260f81428f9de16d61041949f6a2e5c140", size = 131461, upload-time = "2025-10-24T15:49:27.259Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/3b/e2425f61e5825dc5b08c2a5a2b3af387eaaca22a12b9c8c01504f8614c36/orjson-3.11.4-cp312-cp312-win_arm64.whl", hash = "sha256:d38d2bc06d6415852224fcc9c0bfa834c25431e466dc319f0edd56cca81aa96e", size = 126167, upload-time = "2025-10-24T15:49:28.511Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347, upload-time = "2025-12-06T15:54:22.061Z" },
+    { url = "https://files.pythonhosted.org/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435, upload-time = "2025-12-06T15:54:23.615Z" },
+    { url = "https://files.pythonhosted.org/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074, upload-time = "2025-12-06T15:54:24.694Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520, upload-time = "2025-12-06T15:54:26.185Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209, upload-time = "2025-12-06T15:54:27.264Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837, upload-time = "2025-12-06T15:54:28.75Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307, upload-time = "2025-12-06T15:54:29.856Z" },
+    { url = "https://files.pythonhosted.org/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020, upload-time = "2025-12-06T15:54:31.024Z" },
+    { url = "https://files.pythonhosted.org/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099, upload-time = "2025-12-06T15:54:32.196Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540, upload-time = "2025-12-06T15:54:33.361Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530, upload-time = "2025-12-06T15:54:34.6Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863, upload-time = "2025-12-06T15:54:35.801Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255, upload-time = "2025-12-06T15:54:37.209Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252, upload-time = "2025-12-06T15:54:38.401Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777, upload-time = "2025-12-06T15:54:39.515Z" },
 ]
 
 [[package]]
@@ -322,28 +324,28 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.14.8"
+version = "0.14.14"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ed/d9/f7a0c4b3a2bf2556cd5d99b05372c29980249ef71e8e32669ba77428c82c/ruff-0.14.8.tar.gz", hash = "sha256:774ed0dd87d6ce925e3b8496feb3a00ac564bea52b9feb551ecd17e0a23d1eed", size = 5765385, upload-time = "2025-12-04T15:06:17.669Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/48/b8/9537b52010134b1d2b72870cc3f92d5fb759394094741b09ceccae183fbe/ruff-0.14.8-py3-none-linux_armv6l.whl", hash = "sha256:ec071e9c82eca417f6111fd39f7043acb53cd3fde9b1f95bbed745962e345afb", size = 13441540, upload-time = "2025-12-04T15:06:14.896Z" },
-    { url = "https://files.pythonhosted.org/packages/24/00/99031684efb025829713682012b6dd37279b1f695ed1b01725f85fd94b38/ruff-0.14.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8cdb162a7159f4ca36ce980a18c43d8f036966e7f73f866ac8f493b75e0c27e9", size = 13669384, upload-time = "2025-12-04T15:06:51.809Z" },
-    { url = "https://files.pythonhosted.org/packages/72/64/3eb5949169fc19c50c04f28ece2c189d3b6edd57e5b533649dae6ca484fe/ruff-0.14.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e2fcbefe91f9fad0916850edf0854530c15bd1926b6b779de47e9ab619ea38f", size = 12806917, upload-time = "2025-12-04T15:06:08.925Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/08/5250babb0b1b11910f470370ec0cbc67470231f7cdc033cee57d4976f941/ruff-0.14.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d70721066a296f45786ec31916dc287b44040f553da21564de0ab4d45a869b", size = 13256112, upload-time = "2025-12-04T15:06:23.498Z" },
-    { url = "https://files.pythonhosted.org/packages/78/4c/6c588e97a8e8c2d4b522c31a579e1df2b4d003eddfbe23d1f262b1a431ff/ruff-0.14.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c87e09b3cd9d126fc67a9ecd3b5b1d3ded2b9c7fce3f16e315346b9d05cfb52", size = 13227559, upload-time = "2025-12-04T15:06:33.432Z" },
-    { url = "https://files.pythonhosted.org/packages/23/ce/5f78cea13eda8eceac71b5f6fa6e9223df9b87bb2c1891c166d1f0dce9f1/ruff-0.14.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d62cb310c4fbcb9ee4ac023fe17f984ae1e12b8a4a02e3d21489f9a2a5f730c", size = 13896379, upload-time = "2025-12-04T15:06:02.687Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/79/13de4517c4dadce9218a20035b21212a4c180e009507731f0d3b3f5df85a/ruff-0.14.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1af35c2d62633d4da0521178e8a2641c636d2a7153da0bac1b30cfd4ccd91344", size = 15372786, upload-time = "2025-12-04T15:06:29.828Z" },
-    { url = "https://files.pythonhosted.org/packages/00/06/33df72b3bb42be8a1c3815fd4fae83fa2945fc725a25d87ba3e42d1cc108/ruff-0.14.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25add4575ffecc53d60eed3f24b1e934493631b48ebbc6ebaf9d8517924aca4b", size = 14990029, upload-time = "2025-12-04T15:06:36.812Z" },
-    { url = "https://files.pythonhosted.org/packages/64/61/0f34927bd90925880394de0e081ce1afab66d7b3525336f5771dcf0cb46c/ruff-0.14.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c943d847b7f02f7db4201a0600ea7d244d8a404fbb639b439e987edcf2baf9a", size = 14407037, upload-time = "2025-12-04T15:06:39.979Z" },
-    { url = "https://files.pythonhosted.org/packages/96/bc/058fe0aefc0fbf0d19614cb6d1a3e2c048f7dc77ca64957f33b12cfdc5ef/ruff-0.14.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb6e8bf7b4f627548daa1b69283dac5a296bfe9ce856703b03130732e20ddfe2", size = 14102390, upload-time = "2025-12-04T15:06:46.372Z" },
-    { url = "https://files.pythonhosted.org/packages/af/a4/e4f77b02b804546f4c17e8b37a524c27012dd6ff05855d2243b49a7d3cb9/ruff-0.14.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:7aaf2974f378e6b01d1e257c6948207aec6a9b5ba53fab23d0182efb887a0e4a", size = 14230793, upload-time = "2025-12-04T15:06:20.497Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/52/bb8c02373f79552e8d087cedaffad76b8892033d2876c2498a2582f09dcf/ruff-0.14.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e5758ca513c43ad8a4ef13f0f081f80f08008f410790f3611a21a92421ab045b", size = 13160039, upload-time = "2025-12-04T15:06:49.06Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/ad/b69d6962e477842e25c0b11622548df746290cc6d76f9e0f4ed7456c2c31/ruff-0.14.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f74f7ba163b6e85a8d81a590363bf71618847e5078d90827749bfda1d88c9cdf", size = 13205158, upload-time = "2025-12-04T15:06:54.574Z" },
-    { url = "https://files.pythonhosted.org/packages/06/63/54f23da1315c0b3dfc1bc03fbc34e10378918a20c0b0f086418734e57e74/ruff-0.14.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:eed28f6fafcc9591994c42254f5a5c5ca40e69a30721d2ab18bb0bb3baac3ab6", size = 13469550, upload-time = "2025-12-04T15:05:59.209Z" },
-    { url = "https://files.pythonhosted.org/packages/70/7d/a4d7b1961e4903bc37fffb7ddcfaa7beb250f67d97cfd1ee1d5cddb1ec90/ruff-0.14.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:21d48fa744c9d1cb8d71eb0a740c4dd02751a5de9db9a730a8ef75ca34cf138e", size = 14211332, upload-time = "2025-12-04T15:06:06.027Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/93/2a5063341fa17054e5c86582136e9895db773e3c2ffb770dde50a09f35f0/ruff-0.14.8-py3-none-win32.whl", hash = "sha256:15f04cb45c051159baebb0f0037f404f1dc2f15a927418f29730f411a79bc4e7", size = 13151890, upload-time = "2025-12-04T15:06:11.668Z" },
-    { url = "https://files.pythonhosted.org/packages/02/1c/65c61a0859c0add13a3e1cbb6024b42de587456a43006ca2d4fd3d1618fe/ruff-0.14.8-py3-none-win_amd64.whl", hash = "sha256:9eeb0b24242b5bbff3011409a739929f497f3fb5fe3b5698aba5e77e8c833097", size = 14537826, upload-time = "2025-12-04T15:06:26.409Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/63/8b41cea3afd7f58eb64ac9251668ee0073789a3bc9ac6f816c8c6fef986d/ruff-0.14.8-py3-none-win_arm64.whl", hash = "sha256:965a582c93c63fe715fd3e3f8aa37c4b776777203d8e1d8aa3cc0c14424a4b99", size = 13634522, upload-time = "2025-12-04T15:06:43.212Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" },
+    { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" },
+    { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" },
+    { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" },
+    { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" },
+    { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" },
 ]
 
 [[package]]
@@ -382,15 +384,15 @@ wheels = [
 
 [[package]]
 name = "uvicorn"
-version = "0.38.0"
+version = "0.40.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605, upload-time = "2025-10-18T13:46:44.63Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
 ]
 
 [[package]]

From 889f2d257ba15a61339de924fb6a67a6fefe6516 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 11:15:41 +0700
Subject: [PATCH 044/139] Fix IDE warnings

---
 app/services/lmdb.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 93c7723..dec148b 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -52,7 +52,7 @@ def _hash_message(message: Message) -> str:
                         func["arguments"] = orjson.dumps(
                             parsed, option=orjson.OPT_SORT_KEYS
                         ).decode("utf-8")
-                    except Exception:
+                    except orjson.JSONDecodeError:
                         pass
 
     message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
@@ -175,7 +175,7 @@ def store(
         value = orjson.dumps(conv.model_dump(mode="json"))
 
         try:
-            with self._get_transaction(write=True) as txn:
+            with self._get_transaction(self, write=True) as txn:
                 # Store main data
                 txn.put(storage_key.encode("utf-8"), value, overwrite=True)
 
@@ -203,7 +203,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
             Conversation or None if not found
         """
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 data = txn.get(key.encode("utf-8"), default=None)
                 if not data:
                     return None
@@ -255,7 +255,7 @@ def _find_by_message_list(
 
             key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
             try:
-                with self._get_transaction(write=False) as txn:
+                with self._get_transaction(self, write=False) as txn:
                     if mapped := txn.get(key.encode("utf-8")):  # type: ignore
                         return self.get(mapped.decode("utf-8"))  # type: ignore
             except Exception as e:
@@ -279,7 +279,7 @@ def exists(self, key: str) -> bool:
             bool: True if key exists, False otherwise
         """
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 return txn.get(key.encode("utf-8")) is not None
         except Exception as e:
             logger.error(f"Failed to check existence of key {key}: {e}")
@@ -296,7 +296,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
             ConversationInStore: The deleted conversation data, or None if not found
         """
         try:
-            with self._get_transaction(write=True) as txn:
+            with self._get_transaction(self, write=True) as txn:
                 # Get data first to clean up hash mapping
                 data = txn.get(key.encode("utf-8"))
                 if not data:
@@ -333,7 +333,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
         """
         keys = []
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 cursor = txn.cursor()
                 cursor.first()
 
@@ -377,7 +377,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
         expired_entries: list[tuple[str, ConversationInStore]] = []
 
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 cursor = txn.cursor()
 
                 for key_bytes, value_bytes in cursor:
@@ -407,7 +407,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
         removed = 0
         try:
-            with self._get_transaction(write=True) as txn:
+            with self._get_transaction(self, write=True) as txn:
                 for key_str, conv in expired_entries:
                     key_bytes = key_str.encode("utf-8")
                     if not txn.delete(key_bytes):

From 66b62020330e690499ef386e81cee52dc0f97cce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 11:26:16 +0700
Subject: [PATCH 045/139] Incorrect IDE warnings

---
 app/services/lmdb.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index dec148b..c8e78a9 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -175,7 +175,7 @@ def store(
         value = orjson.dumps(conv.model_dump(mode="json"))
 
         try:
-            with self._get_transaction(self, write=True) as txn:
+            with self._get_transaction(write=True) as txn:
                 # Store main data
                 txn.put(storage_key.encode("utf-8"), value, overwrite=True)
 
@@ -203,7 +203,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
             Conversation or None if not found
         """
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 data = txn.get(key.encode("utf-8"), default=None)
                 if not data:
                     return None
@@ -255,7 +255,7 @@ def _find_by_message_list(
 
             key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
             try:
-                with self._get_transaction(self, write=False) as txn:
+                with self._get_transaction(write=False) as txn:
                     if mapped := txn.get(key.encode("utf-8")):  # type: ignore
                         return self.get(mapped.decode("utf-8"))  # type: ignore
             except Exception as e:
@@ -279,7 +279,7 @@ def exists(self, key: str) -> bool:
             bool: True if key exists, False otherwise
         """
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 return txn.get(key.encode("utf-8")) is not None
         except Exception as e:
             logger.error(f"Failed to check existence of key {key}: {e}")
@@ -296,7 +296,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
             ConversationInStore: The deleted conversation data, or None if not found
         """
         try:
-            with self._get_transaction(self, write=True) as txn:
+            with self._get_transaction(write=True) as txn:
                 # Get data first to clean up hash mapping
                 data = txn.get(key.encode("utf-8"))
                 if not data:
@@ -333,7 +333,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
         """
         keys = []
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 cursor = txn.cursor()
                 cursor.first()
 
@@ -377,7 +377,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
         expired_entries: list[tuple[str, ConversationInStore]] = []
 
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 cursor = txn.cursor()
 
                 for key_bytes, value_bytes in cursor:
@@ -407,7 +407,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
         removed = 0
         try:
-            with self._get_transaction(self, write=True) as txn:
+            with self._get_transaction(write=True) as txn:
                 for key_str, conv in expired_entries:
                     key_bytes = key_str.encode("utf-8")
                     if not txn.delete(key_bytes):

From 3297f534f035f869bd7e4a867618b39bc7256f06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 12:05:26 +0700
Subject: [PATCH 046/139] Refactor: Modify the LMDB store to fix issues where
 no conversation is found.

---
 app/services/lmdb.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index c8e78a9..a55d3a9 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -476,9 +476,7 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
                 if isinstance(msg.content, str):
                     normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
                     if normalized_content != msg.content:
-                        cleaned_msg = Message(
-                            role=msg.role, content=normalized_content, name=msg.name
-                        )
+                        cleaned_msg = msg.model_copy(update={"content": normalized_content})
                         cleaned_messages.append(cleaned_msg)
                     else:
                         cleaned_messages.append(msg)

From 5399b260595e77d6c1f0a8d24a880c59d165a57b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 12:06:52 +0700
Subject: [PATCH 047/139] Refactor: Centralized the mapping of the 'developer'
 role to 'system' for better Gemini compatibility.

---
 app/models/models.py | 7 +++++++
 app/server/chat.py   | 6 +-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index c27e024..63ddb94 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -29,6 +29,13 @@ class Message(BaseModel):
     audio: Optional[Dict[str, Any]] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
+    @model_validator(mode="after")
+    def normalize_role(self) -> "Message":
+        """Normalize 'developer' role to 'system' for Gemini compatibility."""
+        if self.role == "developer":
+            self.role = "system"
+        return self
+
 
 class Choice(BaseModel):
     """Choice model"""
diff --git a/app/server/chat.py b/app/server/chat.py
index a9d9dec..66a2720 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -319,8 +319,6 @@ def _response_items_to_messages(
     normalized_input: list[ResponseInputItem] = []
     for item in items:
         role = item.role
-        if role == "developer":
-            role = "system"
 
         content = item.content
         normalized_contents: list[ResponseInputContent] = []
@@ -394,8 +392,6 @@ def _instructions_to_messages(
             continue
 
         role = item.role
-        if role == "developer":
-            role = "system"
 
         content = item.content
         if isinstance(content, str):
@@ -1054,7 +1050,7 @@ async def _find_reusable_session(
     while search_end >= 2:
         search_history = messages[:search_end]
 
-        # Only try to match if the last stored message would be assistant/system.
+        # Only try to match if the last stored message would be assistant/system before querying LMDB.
         if search_history[-1].role in {"assistant", "system"}:
             try:
                 if conv := db.find(model.model_name, search_history):

From de01c7850fa44f4dcbd8f31c47bccaf301861a56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 13:04:31 +0700
Subject: [PATCH 048/139] Refactor: Modify the LMDB store to fix issues where
 no conversation is found.

---
 app/models/models.py |  1 +
 app/services/lmdb.py | 95 +++++++++++++++++++++++++-------------------
 app/utils/helper.py  | 10 +++--
 3 files changed, 63 insertions(+), 43 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 63ddb94..4072b29 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -24,6 +24,7 @@ class Message(BaseModel):
     content: Union[str, List[ContentItem], None] = None
     name: Optional[str] = None
     tool_calls: Optional[List["ToolCall"]] = None
+    tool_call_id: Optional[str] = None
     refusal: Optional[str] = None
     reasoning_content: Optional[str] = None
     audio: Optional[Dict[str, Any]] = None
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index a55d3a9..594acf0 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -15,53 +15,69 @@
 
 
 def _hash_message(message: Message) -> str:
-    """Generate a hash for a single message."""
-    # Convert message to dict and sort keys for consistent hashing
-    message_dict = message.model_dump(mode="json")
-
-    # Normalize content: empty string -> None
-    content = message_dict.get("content")
-    if content == "":
-        message_dict["content"] = None
+    """Generate a consistent hash for a single message focusing only on core identity fields."""
+    # Pick only fields that define the message in a conversation history
+    core_data = {
+        "role": message.role,
+        "name": message.name,
+        "tool_call_id": message.tool_call_id,
+    }
+
+    # Normalize content: strip, handle empty/None, and list-of-text items
+    content = message.content
+    if not content:
+        core_data["content"] = None
+    elif isinstance(content, str):
+        stripped = content.strip()
+        core_data["content"] = stripped if stripped else None
     elif isinstance(content, list):
-        is_pure_text = True
         text_parts = []
         for item in content:
-            if not isinstance(item, dict) or item.get("type") != "text":
-                is_pure_text = False
+            if isinstance(item, ContentItem) and item.type == "text":
+                text_parts.append(item.text or "")
+            elif isinstance(item, dict) and item.get("type") == "text":
+                text_parts.append(item.get("text") or "")
+            else:
+                # If it contains non-text (images/files), keep the full list for hashing
+                text_parts = None
                 break
-            text_parts.append(item.get("text") or "")
-
-        if is_pure_text:
-            text_content = "".join(text_parts)
-            message_dict["content"] = text_content if text_content else None
-
-    # Normalize tool_calls: empty list -> None, and canonicalize arguments
-    tool_calls = message_dict.get("tool_calls")
-    if not tool_calls:
-        message_dict["tool_calls"] = None
-    elif isinstance(tool_calls, list):
-        for tool_call in tool_calls:
-            if isinstance(tool_call, dict) and "function" in tool_call:
-                func = tool_call["function"]
-                args = func.get("arguments")
-                if isinstance(args, str):
-                    try:
-                        # Parse and re-dump to canonicalize (remove extra whitespace, sort keys)
-                        parsed = orjson.loads(args)
-                        func["arguments"] = orjson.dumps(
-                            parsed, option=orjson.OPT_SORT_KEYS
-                        ).decode("utf-8")
-                    except orjson.JSONDecodeError:
-                        pass
-
-    message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
+
+        if text_parts is not None:
+            text_content = "".join(text_parts).strip()
+            core_data["content"] = text_content if text_content else None
+        else:
+            core_data["content"] = message.model_dump(mode="json")["content"]
+
+    # Normalize tool_calls: canonicalize arguments and sort by name if multiple calls exist
+    if message.tool_calls:
+        calls_data = []
+        for tc in message.tool_calls:
+            args = tc.function.arguments or "{}"
+            try:
+                parsed = orjson.loads(args)
+                canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
+            except orjson.JSONDecodeError:
+                canon_args = args
+
+            calls_data.append(
+                {
+                    "id": tc.id,  # Deterministic IDs ensure this is stable
+                    "name": tc.function.name,
+                    "arguments": canon_args,
+                }
+            )
+        # Sort calls to be order-independent
+        calls_data.sort(key=lambda x: (x["name"], x["arguments"]))
+        core_data["tool_calls"] = calls_data
+    else:
+        core_data["tool_calls"] = None
+
+    message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
     return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:
-    """Generate a hash for a list of messages and client id."""
-    # Create a combined hash from all individual message hashes
+    """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
     combined_hash.update(client_id.encode("utf-8"))
     combined_hash.update(model.encode("utf-8"))
@@ -252,7 +268,6 @@ def _find_by_message_list(
         """Internal find implementation based on a message list."""
         for c in g_config.gemini.clients:
             message_hash = _hash_conversation(c.id, model, messages)
-
             key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
             try:
                 with self._get_transaction(write=False) as txn:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 1dc518f..239b7f4 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,9 +1,9 @@
 import base64
+import hashlib
 import mimetypes
 import re
 import struct
 import tempfile
-import uuid
 from pathlib import Path
 from typing import Iterator
 from urllib.parse import urlparse
@@ -222,13 +222,17 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         arguments = raw_args
         try:
             parsed_args = orjson.loads(raw_args)
-            arguments = orjson.dumps(parsed_args).decode("utf-8")
+            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
+        # Generate a deterministic ID based on name and arguments to avoid hash mismatch in LMDB
+        seed = f"{name}:{arguments}".encode("utf-8")
+        call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
+
         tool_calls.append(
             ToolCall(
-                id=f"call_{uuid.uuid4().hex}",
+                id=call_id,
                 type="function",
                 function=FunctionCall(name=name, arguments=arguments),
             )

From 196414755e860f1f6d9c840954eb45c53225a864 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 13:26:58 +0700
Subject: [PATCH 049/139] Refactor: Modify the LMDB store to fix issues where
 no conversation is found.

---
 app/server/chat.py   | 10 +++++++++-
 app/services/lmdb.py |  7 ++-----
 app/utils/helper.py  | 13 +++++++------
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 66a2720..7c683cd 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1047,6 +1047,8 @@ async def _find_reusable_session(
 
     # Start with the full history and iteratively trim from the end.
     search_end = len(messages)
+    logger.debug(f"Searching for reusable session in history of length {search_end}...")
+
     while search_end >= 2:
         search_history = messages[:search_end]
 
@@ -1057,14 +1059,20 @@ async def _find_reusable_session(
                     client = await pool.acquire(conv.client_id)
                     session = client.start_chat(metadata=conv.metadata, model=model)
                     remain = messages[search_end:]
+                    logger.debug(
+                        f"Match found at prefix length {search_end}. Client: {conv.client_id}"
+                    )
                     return session, client, remain
             except Exception as e:
-                logger.warning(f"Error checking LMDB for reusable session: {e}")
+                logger.warning(
+                    f"Error checking LMDB for reusable session at length {search_end}: {e}"
+                )
                 break
 
         # Trim one message and try again.
         search_end -= 1
 
+    logger.debug("No reusable session found after checking all possible prefixes.")
     return None, None, messages
 
 
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 594acf0..5aefa4b 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -15,12 +15,10 @@
 
 
 def _hash_message(message: Message) -> str:
-    """Generate a consistent hash for a single message focusing only on core identity fields."""
-    # Pick only fields that define the message in a conversation history
+    """Generate a consistent hash for a single message focusing ONLY on logic/content, ignoring technical IDs."""
     core_data = {
         "role": message.role,
         "name": message.name,
-        "tool_call_id": message.tool_call_id,
     }
 
     # Normalize content: strip, handle empty/None, and list-of-text items
@@ -48,7 +46,7 @@ def _hash_message(message: Message) -> str:
         else:
             core_data["content"] = message.model_dump(mode="json")["content"]
 
-    # Normalize tool_calls: canonicalize arguments and sort by name if multiple calls exist
+    # Normalize tool_calls: Focus ONLY on function name and arguments
     if message.tool_calls:
         calls_data = []
         for tc in message.tool_calls:
@@ -61,7 +59,6 @@ def _hash_message(message: Message) -> str:
 
             calls_data.append(
                 {
-                    "id": tc.id,  # Deterministic IDs ensure this is stable
                     "name": tc.function.name,
                     "arguments": canon_args,
                 }
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 239b7f4..ecf4a47 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -213,7 +213,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
 
     tool_calls: list[ToolCall] = []
 
-    def _create_tool_call(name: str, raw_args: str) -> None:
+    def _create_tool_call(name: str, raw_args: str, index: int) -> None:
         """Helper to parse args and append to the tool_calls list."""
         if not name:
             logger.warning("Encountered tool_call without a function name.")
@@ -226,8 +226,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
-        # Generate a deterministic ID based on name and arguments to avoid hash mismatch in LMDB
-        seed = f"{name}:{arguments}".encode("utf-8")
+        # Generate a deterministic ID based on name, arguments, and index to avoid collisions
+        seed = f"{name}:{arguments}:{index}".encode("utf-8")
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
 
         tool_calls.append(
@@ -244,11 +244,11 @@ def _replace_block(match: re.Match[str]) -> str:
             return match.group(0)
 
         found_in_block = False
-        for call_match in TOOL_CALL_RE.finditer(block_content):
+        for i, call_match in enumerate(TOOL_CALL_RE.finditer(block_content)):
             found_in_block = True
             name = (call_match.group(1) or "").strip()
             raw_args = (call_match.group(2) or "").strip()
-            _create_tool_call(name, raw_args)
+            _create_tool_call(name, raw_args, i)
 
         if found_in_block:
             return ""
@@ -258,9 +258,10 @@ def _replace_block(match: re.Match[str]) -> str:
     cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
 
     def _replace_orphan(match: re.Match[str]) -> str:
+        # Note: orphan calls are handled with a fallback index if they appear outside blocks
         name = (match.group(1) or "").strip()
         raw_args = (match.group(2) or "").strip()
-        _create_tool_call(name, raw_args)
+        _create_tool_call(name, raw_args, len(tool_calls))
         return ""
 
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)

From 8c5c7498230bc680bf50464dacf0b6f001888981 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 13:42:09 +0700
Subject: [PATCH 050/139] Refactor: Modify the LMDB store to fix issues where
 no conversation is found.

---
 app/server/chat.py   | 4 ++--
 app/services/lmdb.py | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 7c683cd..0d64b71 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1052,8 +1052,8 @@ async def _find_reusable_session(
     while search_end >= 2:
         search_history = messages[:search_end]
 
-        # Only try to match if the last stored message would be assistant/system before querying LMDB.
-        if search_history[-1].role in {"assistant", "system"}:
+        # Only try to match if the last stored message would be assistant/system/tool before querying LMDB.
+        if search_history[-1].role in {"assistant", "system", "tool"}:
             try:
                 if conv := db.find(model.model_name, search_history):
                     client = await pool.acquire(conv.client_id)
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 5aefa4b..c612d9e 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -70,7 +70,11 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = None
 
     message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-    return hashlib.sha256(message_bytes).hexdigest()
+    msg_hash = hashlib.sha256(message_bytes).hexdigest()
+    logger.debug(
+        f"Hashing message (role={message.role}): {message_bytes.decode('utf-8')} -> {msg_hash}"
+    )
+    return msg_hash
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:

From ce67d664b5443726fe518aee1cc9ef550ae640fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 14:41:55 +0700
Subject: [PATCH 051/139] Refactor: Avoid reusing an existing chat session if
 its idle time exceeds METADATA_TTL_MINUTES.

---
 app/server/chat.py   | 14 ++++++++++++--
 app/services/lmdb.py |  9 ++-------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0d64b71..6fbb818 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -58,6 +58,7 @@
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
+METADATA_TTL_MINUTES = 20
 
 router = APIRouter()
 
@@ -1047,7 +1048,6 @@ async def _find_reusable_session(
 
     # Start with the full history and iteratively trim from the end.
     search_end = len(messages)
-    logger.debug(f"Searching for reusable session in history of length {search_end}...")
 
     while search_end >= 2:
         search_history = messages[:search_end]
@@ -1056,6 +1056,17 @@ async def _find_reusable_session(
         if search_history[-1].role in {"assistant", "system", "tool"}:
             try:
                 if conv := db.find(model.model_name, search_history):
+                    # Check if metadata is too old
+                    now = datetime.now()
+                    updated_at = conv.updated_at or conv.created_at or now
+                    age_minutes = (now - updated_at).total_seconds() / 60
+
+                    if age_minutes > METADATA_TTL_MINUTES:
+                        logger.debug(
+                            f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse."
+                        )
+                        break
+
                     client = await pool.acquire(conv.client_id)
                     session = client.start_chat(metadata=conv.metadata, model=model)
                     remain = messages[search_end:]
@@ -1072,7 +1083,6 @@ async def _find_reusable_session(
         # Trim one message and try again.
         search_end -= 1
 
-    logger.debug("No reusable session found after checking all possible prefixes.")
     return None, None, messages
 
 
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index c612d9e..424b357 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -68,13 +68,8 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = calls_data
     else:
         core_data["tool_calls"] = None
-
-    message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-    msg_hash = hashlib.sha256(message_bytes).hexdigest()
-    logger.debug(
-        f"Hashing message (role={message.role}): {message_bytes.decode('utf-8')} -> {msg_hash}"
-    )
-    return msg_hash
+        message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
+        return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:

From 3d32d1226b1399f4286aadd95b2c4a52228fac45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 14:58:58 +0700
Subject: [PATCH 052/139] Refactor: Update the LMDB store to resolve issues
 preventing conversation from being saved

---
 app/services/lmdb.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 424b357..2dbe7b2 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -68,15 +68,16 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = calls_data
     else:
         core_data["tool_calls"] = None
-        message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-        return hashlib.sha256(message_bytes).hexdigest()
+
+    message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
+    return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:
     """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
-    combined_hash.update(client_id.encode("utf-8"))
-    combined_hash.update(model.encode("utf-8"))
+    combined_hash.update((client_id or "").encode("utf-8"))
+    combined_hash.update((model or "").encode("utf-8"))
     for message in messages:
         message_hash = _hash_message(message)
         combined_hash.update(message_hash.encode("utf-8"))

From 2eb9f05142ddfa1cb665b248f3faf2e278b619c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 17:57:04 +0700
Subject: [PATCH 053/139] Refactor: Update the _prepare_messages_for_model
 helper to omit the system instruction when reusing a session to save tokens.

---
 app/server/chat.py   | 66 +++++++++++++++++++++++++----------------
 app/services/lmdb.py | 70 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 96 insertions(+), 40 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 6fbb818..646f4fa 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -58,7 +58,7 @@
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
-METADATA_TTL_MINUTES = 20
+METADATA_TTL_MINUTES = 15
 
 router = APIRouter()
 
@@ -268,31 +268,35 @@ def _prepare_messages_for_model(
     tools: list[Tool] | None,
     tool_choice: str | ToolChoiceFunction | None,
     extra_instructions: list[str] | None = None,
+    inject_system_defaults: bool = True,
 ) -> list[Message]:
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
 
     instructions: list[str] = []
-    if tools:
-        tool_prompt = _build_tool_prompt(tools, tool_choice)
-        if tool_prompt:
-            instructions.append(tool_prompt)
-
-    if extra_instructions:
-        instructions.extend(instr for instr in extra_instructions if instr)
-        logger.debug(
-            f"Applied {len(extra_instructions)} extra instructions for tool/structured output."
-        )
+    if inject_system_defaults:
+        if tools:
+            tool_prompt = _build_tool_prompt(tools, tool_choice)
+            if tool_prompt:
+                instructions.append(tool_prompt)
+
+        if extra_instructions:
+            instructions.extend(instr for instr in extra_instructions if instr)
+            logger.debug(
+                f"Applied {len(extra_instructions)} extra instructions for tool/structured output."
+            )
 
-    if not _conversation_has_code_hint(prepared):
-        instructions.append(CODE_BLOCK_HINT)
-        logger.debug("Injected default code block hint for Gemini conversation.")
+        if not _conversation_has_code_hint(prepared):
+            instructions.append(CODE_BLOCK_HINT)
+            logger.debug("Injected default code block hint for Gemini conversation.")
 
     if not instructions:
+        # Still need to ensure XML hint for the last user message if tools are present
+        if tools and tool_choice != "none":
+            _append_xml_hint_to_last_user_message(prepared)
         return prepared
 
     combined_instructions = "\n\n".join(instructions)
-
     if prepared and prepared[0].role == "system" and isinstance(prepared[0].content, str):
         existing = prepared[0].content or ""
         separator = "\n\n" if existing else ""
@@ -530,8 +534,14 @@ async def create_chat_completion(
     )
 
     if session:
+        # Optimization: When reusing a session, we don't need to resend the heavy tool definitions
+        # or structured output instructions as they are already in the Gemini session history.
         messages_to_send = _prepare_messages_for_model(
-            remaining_messages, request.tools, request.tool_choice, extra_instructions
+            remaining_messages,
+            request.tools,
+            request.tool_choice,
+            extra_instructions,
+            inject_system_defaults=False,
         )
         if not messages_to_send:
             raise HTTPException(
@@ -642,17 +652,20 @@ async def create_chat_completion(
 
     # After formatting, persist the conversation to LMDB
     try:
-        last_message = Message(
+        current_assistant_message = Message(
             role="assistant",
             content=storage_output or None,
             tool_calls=tool_calls or None,
         )
-        cleaned_history = db.sanitize_assistant_messages(request.messages)
+        # Sanitize the entire history including the new message to ensure consistency
+        full_history = [*request.messages, current_assistant_message]
+        cleaned_history = db.sanitize_assistant_messages(full_history)
+
         conv = ConversationInStore(
             model=model.model_name,
             client_id=client.id,
             metadata=session.metadata,
-            messages=[*cleaned_history, last_message],
+            messages=cleaned_history,
         )
         key = db.store(conv)
         logger.debug(f"Conversation saved to LMDB with key: {key}")
@@ -780,9 +793,10 @@ async def _build_payload(
     if reuse_session:
         messages_to_send = _prepare_messages_for_model(
             remaining_messages,
-            tools=None,
-            tool_choice=None,
-            extra_instructions=extra_instructions or None,
+            tools=request_data.tools,  # Keep for XML hint logic
+            tool_choice=request_data.tool_choice,
+            extra_instructions=None,  # Already in session history
+            inject_system_defaults=False,
         )
         if not messages_to_send:
             raise HTTPException(
@@ -994,17 +1008,19 @@ async def _build_payload(
     )
 
     try:
-        last_message = Message(
+        current_assistant_message = Message(
             role="assistant",
             content=storage_output or None,
             tool_calls=detected_tool_calls or None,
         )
-        cleaned_history = db.sanitize_assistant_messages(messages)
+        full_history = [*messages, current_assistant_message]
+        cleaned_history = db.sanitize_assistant_messages(full_history)
+
         conv = ConversationInStore(
             model=model.model_name,
             client_id=client.id,
             metadata=session.metadata,
-            messages=[*cleaned_history, last_message],
+            messages=cleaned_history,
         )
         key = db.store(conv)
         logger.debug(f"Conversation saved to LMDB with key: {key}")
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 2dbe7b2..f4c9938 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -11,6 +11,7 @@
 
 from ..models import ContentItem, ConversationInStore, Message
 from ..utils import g_config
+from ..utils.helper import extract_tool_calls, remove_tool_call_blocks
 from ..utils.singleton import Singleton
 
 
@@ -26,8 +27,9 @@ def _hash_message(message: Message) -> str:
     if not content:
         core_data["content"] = None
     elif isinstance(content, str):
-        stripped = content.strip()
-        core_data["content"] = stripped if stripped else None
+        # Normalize line endings and strip whitespace
+        normalized = content.replace("\r\n", "\n").strip()
+        core_data["content"] = normalized if normalized else None
     elif isinstance(content, list):
         text_parts = []
         for item in content:
@@ -41,7 +43,7 @@ def _hash_message(message: Message) -> str:
                 break
 
         if text_parts is not None:
-            text_content = "".join(text_parts).strip()
+            text_content = "".join(text_parts).replace("\r\n", "\n").strip()
             core_data["content"] = text_content if text_content else None
         else:
             core_data["content"] = message.model_dump(mode="json")["content"]
@@ -260,7 +262,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
         return None
 
     def _find_by_message_list(
-        self, model: str, messages: List[Message]
+        self,
+        model: str,
+        messages: List[Message],
     ) -> Optional[ConversationInStore]:
         """Internal find implementation based on a message list."""
         for c in g_config.gemini.clients:
@@ -471,40 +475,76 @@ def __del__(self):
     @staticmethod
     def remove_think_tags(text: str) -> str:
         """
-        Remove <think>...</think> tags at the start of text and strip whitespace.
+        Remove all <think>...</think> tags and strip whitespace.
         """
-        cleaned_content = re.sub(r"^(\s*<think>.*?</think>\n?)", "", text, flags=re.DOTALL)
+        # Remove all think blocks anywhere in the text
+        cleaned_content = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
         return cleaned_content.strip()
 
     @staticmethod
     def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
         """
-        Create a new list of messages with assistant content cleaned of <think> tags.
-        This is useful for store the chat history.
+        Create a new list of messages with assistant content cleaned of <think> tags
+        and system hints/tool call blocks. This is used for both storing and
+        searching chat history to ensure consistency.
+
+        If a message has no tool_calls but contains tool call XML blocks in its
+        content, they will be extracted and moved to the tool_calls field.
         """
         cleaned_messages = []
         for msg in messages:
             if msg.role == "assistant":
                 if isinstance(msg.content, str):
-                    normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
-                    if normalized_content != msg.content:
-                        cleaned_msg = msg.model_copy(update={"content": normalized_content})
+                    text = LMDBConversationStore.remove_think_tags(msg.content)
+                    tool_calls = msg.tool_calls
+                    if not tool_calls:
+                        text, tool_calls = extract_tool_calls(text)
+                    else:
+                        text = remove_tool_call_blocks(text).strip()
+
+                    normalized_content = text.strip()
+
+                    if normalized_content != msg.content or tool_calls != msg.tool_calls:
+                        cleaned_msg = msg.model_copy(
+                            update={
+                                "content": normalized_content or None,
+                                "tool_calls": tool_calls or None,
+                            }
+                        )
                         cleaned_messages.append(cleaned_msg)
                     else:
                         cleaned_messages.append(msg)
                 elif isinstance(msg.content, list):
                     new_content = []
+                    all_extracted_calls = list(msg.tool_calls or [])
                     changed = False
+
                     for item in msg.content:
                         if isinstance(item, ContentItem) and item.type == "text" and item.text:
-                            cleaned_text = LMDBConversationStore.remove_think_tags(item.text)
-                            if cleaned_text != item.text:
+                            text = LMDBConversationStore.remove_think_tags(item.text)
+
+                            if not msg.tool_calls:
+                                text, extracted = extract_tool_calls(text)
+                                if extracted:
+                                    all_extracted_calls.extend(extracted)
+                                    changed = True
+                            else:
+                                text = remove_tool_call_blocks(text).strip()
+
+                            if text != item.text:
                                 changed = True
-                                item = item.model_copy(update={"text": cleaned_text})
+                                item = item.model_copy(update={"text": text.strip() or None})
                         new_content.append(item)
 
                     if changed:
-                        cleaned_messages.append(msg.model_copy(update={"content": new_content}))
+                        cleaned_messages.append(
+                            msg.model_copy(
+                                update={
+                                    "content": new_content,
+                                    "tool_calls": all_extracted_calls or None,
+                                }
+                            )
+                        )
                     else:
                         cleaned_messages.append(msg)
                 else:

From ade61d6826af1f256e7141ab6c1815b047cf8744 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 26 Jan 2026 11:01:41 +0700
Subject: [PATCH 054/139] Refactor: Modify the logic to convert a large prompt
 into a temporary text file attachment

- When multiple chunks are sent simultaneously, Google will immediately invalidate the access token and reject the request
- When a prompt contains a structured format like JSON, splitting it can break the format and may cause the model to misunderstand the context
- Another minor tweak as Copilot suggested
---
 app/server/chat.py   | 104 ++++++++++++++++---------------------------
 app/services/lmdb.py |   5 ++-
 app/utils/helper.py  |  13 +++---
 3 files changed, 49 insertions(+), 73 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 646f4fa..063d4d4 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,7 +1,6 @@
-import asyncio
 import base64
-import random
 import re
+import tempfile
 import uuid
 from dataclasses import dataclass
 from datetime import datetime, timezone
@@ -375,9 +374,7 @@ def _response_items_to_messages(
             ResponseInputItem(type="message", role=item.role, content=normalized_contents or [])
         )
 
-    logger.debug(
-        f"Normalized Responses input: {len(normalized_input)} message items (developer roles mapped to system)."
-    )
+    logger.debug(f"Normalized Responses input: {len(normalized_input)} message items.")
     return messages, normalized_input
 
 
@@ -1077,19 +1074,18 @@ async def _find_reusable_session(
                     updated_at = conv.updated_at or conv.created_at or now
                     age_minutes = (now - updated_at).total_seconds() / 60
 
-                    if age_minutes > METADATA_TTL_MINUTES:
+                    if age_minutes <= METADATA_TTL_MINUTES:
+                        client = await pool.acquire(conv.client_id)
+                        session = client.start_chat(metadata=conv.metadata, model=model)
+                        remain = messages[search_end:]
+                        logger.debug(
+                            f"Match found at prefix length {search_end}. Client: {conv.client_id}"
+                        )
+                        return session, client, remain
+                    else:
                         logger.debug(
                             f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse."
                         )
-                        break
-
-                    client = await pool.acquire(conv.client_id)
-                    session = client.start_chat(metadata=conv.metadata, model=model)
-                    remain = messages[search_end:]
-                    logger.debug(
-                        f"Match found at prefix length {search_end}. Client: {conv.client_id}"
-                    )
-                    return session, client, remain
             except Exception as e:
                 logger.warning(
                     f"Error checking LMDB for reusable session at length {search_end}: {e}"
@@ -1103,13 +1099,9 @@ async def _find_reusable_session(
 
 
 async def _send_with_split(session: ChatSession, text: str, files: list[Path | str] | None = None):
-    """Send text to Gemini, automatically splitting into multiple batches if it is
-    longer than ``MAX_CHARS_PER_REQUEST``.
-
-    Every intermediate batch (that is **not** the last one) is suffixed with a hint
-    telling Gemini that more content will come, and it should simply reply with
-    "ok". The final batch carries any file uploads and the real user prompt so
-    that Gemini can produce the actual answer.
+    """
+    Send text to Gemini. If text is longer than ``MAX_CHARS_PER_REQUEST``,
+    it is converted into a temporary text file attachment to avoid splitting issues.
     """
     if len(text) <= MAX_CHARS_PER_REQUEST:
         try:
@@ -1118,55 +1110,37 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
             logger.exception(f"Error sending message to Gemini: {e}")
             raise
 
-    hint_len = len(CONTINUATION_HINT)
-    safe_chunk_size = MAX_CHARS_PER_REQUEST - hint_len
-
-    chunks: list[str] = []
-    pos = 0
-    total = len(text)
-
-    while pos < total:
-        remaining = total - pos
-        if remaining <= MAX_CHARS_PER_REQUEST:
-            chunks.append(text[pos:])
-            break
-
-        end = pos + safe_chunk_size
-        slice_candidate = text[pos:end]
-        # Try to find a safe split point
-        split_idx = -1
-        idx = slice_candidate.rfind("\n")
-        if idx != -1:
-            split_idx = idx
-
-        if split_idx != -1:
-            split_at = pos + split_idx + 1
-        else:
-            split_at = end
+    logger.info(
+        f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment."
+    )
 
-        chunk = text[pos:split_at] + CONTINUATION_HINT
-        chunks.append(chunk)
-        pos = split_at
+    # Create a temporary directory to hold the message.txt file
+    # This ensures the filename is exactly 'message.txt' as expected by the instruction.
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        temp_file_path = Path(tmpdirname) / "message.txt"
+        temp_file_path.write_text(text, encoding="utf-8")
 
-    chunks_size = len(chunks)
-    for i, chk in enumerate(chunks[:-1]):
         try:
-            logger.debug(f"Sending chunk {i + 1}/{chunks_size}...")
-            await session.send_message(chk)
-            delay = random.uniform(1.0, 3.0)
-            logger.debug(f"Sleeping for {delay:.2f}s...")
-            await asyncio.sleep(delay)
+            # Prepare the files list
+            final_files = list(files) if files else []
+            final_files.append(temp_file_path)
+
+            instruction = (
+                "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n"
+                "**System Instruction:**\n"
+                "1. Read the content of `message.txt`.\n"
+                "2. Treat that content as the **primary** user prompt for this turn.\n"
+                "3. Execute the instructions or answer the questions found *inside* that file immediately.\n"
+            )
+
+            logger.debug(f"Sending prompt as temporary file: {temp_file_path}")
+
+            return await session.send_message(instruction, files=final_files)
+
         except Exception as e:
-            logger.exception(f"Error sending chunk to Gemini: {e}")
+            logger.exception(f"Error sending large text as file to Gemini: {e}")
             raise
 
-    try:
-        logger.debug(f"Sending final chunk {chunks_size}/{chunks_size}...")
-        return await session.send_message(chunks[-1], files=files)
-    except Exception as e:
-        logger.exception(f"Error sending final chunk to Gemini: {e}")
-        raise
-
 
 def _create_streaming_response(
     model_output: str,
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index f4c9938..c9d42cd 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -43,8 +43,9 @@ def _hash_message(message: Message) -> str:
                 break
 
         if text_parts is not None:
-            text_content = "".join(text_parts).replace("\r\n", "\n").strip()
-            core_data["content"] = text_content if text_content else None
+            # Normalize each part but keep them as a list to preserve boundaries and avoid collisions
+            normalized_parts = [p.replace("\r\n", "\n") for p in text_parts]
+            core_data["content"] = normalized_parts if normalized_parts else None
         else:
             core_data["content"] = message.model_dump(mode="json")["content"]
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index ecf4a47..190b5ce 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -213,7 +213,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
 
     tool_calls: list[ToolCall] = []
 
-    def _create_tool_call(name: str, raw_args: str, index: int) -> None:
+    def _create_tool_call(name: str, raw_args: str) -> None:
         """Helper to parse args and append to the tool_calls list."""
         if not name:
             logger.warning("Encountered tool_call without a function name.")
@@ -226,7 +226,9 @@ def _create_tool_call(name: str, raw_args: str, index: int) -> None:
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
-        # Generate a deterministic ID based on name, arguments, and index to avoid collisions
+        # Generate a deterministic ID based on name, arguments, and its global sequence index
+        # to ensure uniqueness across multiple fenced blocks while remaining stable for storage.
+        index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
 
@@ -244,11 +246,11 @@ def _replace_block(match: re.Match[str]) -> str:
             return match.group(0)
 
         found_in_block = False
-        for i, call_match in enumerate(TOOL_CALL_RE.finditer(block_content)):
+        for call_match in TOOL_CALL_RE.finditer(block_content):
             found_in_block = True
             name = (call_match.group(1) or "").strip()
             raw_args = (call_match.group(2) or "").strip()
-            _create_tool_call(name, raw_args, i)
+            _create_tool_call(name, raw_args)
 
         if found_in_block:
             return ""
@@ -258,10 +260,9 @@ def _replace_block(match: re.Match[str]) -> str:
     cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
 
     def _replace_orphan(match: re.Match[str]) -> str:
-        # Note: orphan calls are handled with a fallback index if they appear outside blocks
         name = (match.group(1) or "").strip()
         raw_args = (match.group(2) or "").strip()
-        _create_tool_call(name, raw_args, len(tool_calls))
+        _create_tool_call(name, raw_args)
         return ""
 
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)

From bdd893ff9a2d2c58fcbc3eb0c01aab337177edd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 28 Jan 2026 13:37:47 +0700
Subject: [PATCH 055/139] Enable streaming responses and fully resolve the
 problem with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
---
 app/main.py            |    2 +-
 app/models/models.py   |    4 +-
 app/server/chat.py     | 1867 ++++++++++++++++++++++------------------
 app/services/client.py |   11 +-
 app/services/lmdb.py   |  152 ++--
 app/services/pool.py   |    4 +-
 app/utils/helper.py    |  113 +--
 7 files changed, 1162 insertions(+), 991 deletions(-)

diff --git a/app/main.py b/app/main.py
index 307eb36..f4e6711 100644
--- a/app/main.py
+++ b/app/main.py
@@ -15,7 +15,7 @@
 )
 from .services import GeminiClientPool, LMDBConversationStore
 
-RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60  # 6 hours
+RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60  # Check every 6 hours
 
 
 async def _run_retention_cleanup(stop_event: asyncio.Event) -> None:
diff --git a/app/models/models.py b/app/models/models.py
index 4072b29..64ceaa9 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -7,7 +7,7 @@
 
 
 class ContentItem(BaseModel):
-    """Content item model"""
+    """Individual content item (text, image, or file) within a message."""
 
     type: Literal["text", "image_url", "file", "input_audio"]
     text: Optional[str] = None
@@ -159,7 +159,7 @@ class ConversationInStore(BaseModel):
     created_at: Optional[datetime] = Field(default=None)
     updated_at: Optional[datetime] = Field(default=None)
 
-    # NOTE: Gemini Web API do not support changing models once a conversation is created.
+    # Gemini Web API does not support changing models once a conversation is created.
     model: str = Field(..., description="Model used for the conversation")
     client_id: str = Field(..., description="Identifier of the Gemini client")
     metadata: list[str | None] = Field(
diff --git a/app/server/chat.py b/app/server/chat.py
index 063d4d4..37d3c70 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,18 +1,18 @@
 import base64
-import re
-import tempfile
+import io
+import reprlib
 import uuid
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any
+from typing import Any, AsyncGenerator
 
 import orjson
 from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import StreamingResponse
+from gemini_webapi import ModelOutput
 from gemini_webapi.client import ChatSession
 from gemini_webapi.constants import Model
-from gemini_webapi.exceptions import APIError
 from gemini_webapi.types.image import GeneratedImage, Image
 from loguru import logger
 
@@ -42,21 +42,18 @@
 from ..utils.helper import (
     CODE_BLOCK_HINT,
     CODE_HINT_STRIPPED,
+    CONTROL_TOKEN_RE,
     XML_HINT_STRIPPED,
     XML_WRAP_HINT,
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
-    iter_stream_segments,
-    remove_tool_call_blocks,
     strip_code_fence,
     text_from_message,
 )
 from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
 
-# Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
-CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
 METADATA_TTL_MINUTES = 15
 
 router = APIRouter()
@@ -72,6 +69,210 @@ class StructuredOutputRequirement:
     raw_format: dict[str, Any]
 
 
+# --- Helper Functions ---
+
+
+async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
+    """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
+    if isinstance(image, GeneratedImage):
+        try:
+            saved_path = await image.save(path=str(temp_dir), full_size=True)
+        except Exception as e:
+            logger.warning(
+                f"Failed to download full-size GeneratedImage, retrying with default size: {e}"
+            )
+            saved_path = await image.save(path=str(temp_dir), full_size=False)
+    else:
+        saved_path = await image.save(path=str(temp_dir))
+
+    if not saved_path:
+        raise ValueError("Failed to save generated image")
+
+    original_path = Path(saved_path)
+    random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}"
+    new_path = temp_dir / random_name
+    original_path.rename(new_path)
+
+    data = new_path.read_bytes()
+    width, height = extract_image_dimensions(data)
+    filename = random_name
+    return base64.b64encode(data).decode("ascii"), width, height, filename
+
+
+def _calculate_usage(
+    messages: list[Message],
+    assistant_text: str | None,
+    tool_calls: list[Any] | None,
+) -> tuple[int, int, int]:
+    """Calculate prompt, completion and total tokens consistently."""
+    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
+    tool_args_text = ""
+    if tool_calls:
+        for call in tool_calls:
+            if hasattr(call, "function"):
+                tool_args_text += call.function.arguments or ""
+            elif isinstance(call, dict):
+                tool_args_text += call.get("function", {}).get("arguments", "")
+
+    completion_basis = assistant_text or ""
+    if tool_args_text:
+        completion_basis = (
+            f"{completion_basis}\n{tool_args_text}" if completion_basis else tool_args_text
+        )
+
+    completion_tokens = estimate_tokens(completion_basis)
+    return prompt_tokens, completion_tokens, prompt_tokens + completion_tokens
+
+
+def _create_responses_standard_payload(
+    response_id: str,
+    created_time: int,
+    model_name: str,
+    assistant_text: str | None,
+    detected_tool_calls: list[Any] | None,
+    image_call_items: list[ResponseImageGenerationCall],
+    response_contents: list[ResponseOutputContent],
+    usage: ResponseUsage,
+    request_data: ResponseCreateRequest,
+    normalized_input: Any,
+) -> ResponseCreateResponse:
+    """Unified factory for building ResponseCreateResponse objects."""
+    message_id = f"msg_{uuid.uuid4().hex}"
+    tool_call_items: list[ResponseToolCall] = []
+    if detected_tool_calls:
+        tool_call_items = [
+            ResponseToolCall(
+                id=call.id if hasattr(call, "id") else call["id"],
+                status="completed",
+                function=call.function if hasattr(call, "function") else call["function"],
+            )
+            for call in detected_tool_calls
+        ]
+
+    return ResponseCreateResponse(
+        id=response_id,
+        created_at=created_time,
+        model=model_name,
+        output=[
+            ResponseOutputMessage(
+                id=message_id,
+                type="message",
+                role="assistant",
+                content=response_contents,
+            ),
+            *tool_call_items,
+            *image_call_items,
+        ],
+        status="completed",
+        usage=usage,
+        input=normalized_input or None,
+        metadata=request_data.metadata or None,
+        tools=request_data.tools,
+        tool_choice=request_data.tool_choice,
+    )
+
+
+def _create_chat_completion_standard_payload(
+    completion_id: str,
+    created_time: int,
+    model_name: str,
+    visible_output: str | None,
+    tool_calls_payload: list[dict] | None,
+    finish_reason: str,
+    usage: dict,
+) -> dict:
+    """Unified factory for building Chat Completion response dictionaries."""
+    return {
+        "id": completion_id,
+        "object": "chat.completion",
+        "created": created_time,
+        "model": model_name,
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": visible_output or None,
+                    "tool_calls": tool_calls_payload or None,
+                },
+                "finish_reason": finish_reason,
+            }
+        ],
+        "usage": usage,
+    }
+
+
+def _process_llm_output(
+    raw_output_with_think: str,
+    raw_output_clean: str,
+    structured_requirement: StructuredOutputRequirement | None,
+) -> tuple[str, str, list[Any]]:
+    """
+    Common post-processing logic for Gemini output.
+    Returns: (visible_text, storage_output, tool_calls)
+    """
+    visible_with_think, tool_calls = extract_tool_calls(raw_output_with_think)
+    if tool_calls:
+        logger.debug(f"Detected {len(tool_calls)} tool call(s) in model output.")
+
+    visible_output = visible_with_think.strip()
+
+    storage_output, _ = extract_tool_calls(raw_output_clean)
+    storage_output = storage_output.strip()
+
+    if structured_requirement:
+        cleaned_for_json = LMDBConversationStore.remove_think_tags(visible_output)
+        json_text = strip_code_fence(cleaned_for_json or "")
+        if json_text:
+            try:
+                structured_payload = orjson.loads(json_text)
+                canonical_output = orjson.dumps(structured_payload).decode("utf-8")
+                visible_output = canonical_output
+                storage_output = canonical_output
+                logger.debug(
+                    f"Structured response fulfilled (schema={structured_requirement.schema_name})."
+                )
+            except orjson.JSONDecodeError:
+                logger.warning(
+                    f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name})."
+                )
+
+    return visible_output, storage_output, tool_calls
+
+
+def _persist_conversation(
+    db: LMDBConversationStore,
+    model_name: str,
+    client_id: str,
+    metadata: list[str | None],
+    messages: list[Message],
+    storage_output: str | None,
+    tool_calls: list[Any] | None,
+) -> str | None:
+    """Unified logic to save conversation history to LMDB."""
+    try:
+        current_assistant_message = Message(
+            role="assistant",
+            content=storage_output or None,
+            tool_calls=tool_calls or None,
+        )
+        full_history = [*messages, current_assistant_message]
+        cleaned_history = db.sanitize_assistant_messages(full_history)
+
+        conv = ConversationInStore(
+            model=model_name,
+            client_id=client_id,
+            metadata=metadata,
+            messages=cleaned_history,
+        )
+        key = db.store(conv)
+        logger.debug(f"Conversation saved to LMDB with key: {key[:12]}")
+        return key
+    except Exception as e:
+        logger.warning(f"Failed to save {len(messages) + 1} messages to LMDB: {e}")
+        return None
+
+
 def _build_structured_requirement(
     response_format: dict[str, Any] | None,
 ) -> StructuredOutputRequirement | None:
@@ -80,17 +281,23 @@ def _build_structured_requirement(
         return None
 
     if response_format.get("type") != "json_schema":
-        logger.warning(f"Unsupported response_format type requested: {response_format}")
+        logger.warning(
+            f"Unsupported response_format type requested: {reprlib.repr(response_format)}"
+        )
         return None
 
     json_schema = response_format.get("json_schema")
     if not isinstance(json_schema, dict):
-        logger.warning(f"Invalid json_schema payload in response_format: {response_format}")
+        logger.warning(
+            f"Invalid json_schema payload in response_format: {reprlib.repr(response_format)}"
+        )
         return None
 
     schema = json_schema.get("schema")
     if not isinstance(schema, dict):
-        logger.warning(f"Missing `schema` object in response_format payload: {response_format}")
+        logger.warning(
+            f"Missing `schema` object in response_format payload: {reprlib.repr(response_format)}"
+        )
         return None
 
     schema_name = json_schema.get("name") or "response"
@@ -136,7 +343,9 @@ def _build_tool_prompt(
         description = function.description or "No description provided."
         lines.append(f"Tool `{function.name}`: {description}")
         if function.parameters:
-            schema_text = orjson.dumps(function.parameters).decode("utf-8")
+            schema_text = orjson.dumps(function.parameters, option=orjson.OPT_SORT_KEYS).decode(
+                "utf-8"
+            )
             lines.append("Arguments JSON schema:")
             lines.append(schema_text)
         else:
@@ -155,7 +364,6 @@ def _build_tool_prompt(
         lines.append(
             f"You are required to call the tool named `{target}`. Do not call any other tool."
         )
-    # `auto` or None fall back to default instructions.
 
     lines.append(
         "When you decide to call a tool you MUST respond with nothing except a single fenced block exactly like the template below."
@@ -221,7 +429,7 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None:
 
         if isinstance(msg.content, str):
             if XML_HINT_STRIPPED not in msg.content:
-                msg.content = f"{msg.content}{XML_WRAP_HINT}"
+                msg.content = f"{msg.content}\n{XML_WRAP_HINT}"
             return
 
         if isinstance(msg.content, list):
@@ -231,15 +439,13 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None:
                 text_value = part.text or ""
                 if XML_HINT_STRIPPED in text_value:
                     return
-                part.text = f"{text_value}{XML_WRAP_HINT}"
+                part.text = f"{text_value}\n{XML_WRAP_HINT}"
                 return
 
             messages_text = XML_WRAP_HINT.strip()
             msg.content.append(ContentItem(type="text", text=messages_text))
             return
 
-    # No user message to annotate; nothing to do.
-
 
 def _conversation_has_code_hint(messages: list[Message]) -> bool:
     """Return True if any system message already includes the code block hint."""
@@ -290,7 +496,6 @@ def _prepare_messages_for_model(
             logger.debug("Injected default code block hint for Gemini conversation.")
 
     if not instructions:
-        # Still need to ensure XML hint for the last user message if tools are present
         if tools and tool_choice != "none":
             _append_xml_hint_to_last_user_message(prepared)
         return prepared
@@ -323,7 +528,6 @@ def _response_items_to_messages(
     normalized_input: list[ResponseInputItem] = []
     for item in items:
         role = item.role
-
         content = item.content
         normalized_contents: list[ResponseInputContent] = []
         if isinstance(content, str):
@@ -394,7 +598,6 @@ def _instructions_to_messages(
             continue
 
         role = item.role
-
         content = item.content
         if isinstance(content, str):
             instruction_messages.append(Message(role=role, content=content))
@@ -432,10 +635,7 @@ def _instructions_to_messages(
 
 
 def _get_model_by_name(name: str) -> Model:
-    """
-    Retrieve a Model instance by name, considering custom models from config
-    and the update strategy (append or overwrite).
-    """
+    """Retrieve a Model instance by name."""
     strategy = g_config.gemini.model_strategy
     custom_models = {m.model_name: m for m in g_config.gemini.models if m.model_name}
 
@@ -449,9 +649,7 @@ def _get_model_by_name(name: str) -> Model:
 
 
 def _get_available_models() -> list[ModelData]:
-    """
-    Return a list of available models based on configuration strategy.
-    """
+    """Return a list of available models based on configuration strategy."""
     now = int(datetime.now(tz=timezone.utc).timestamp())
     strategy = g_config.gemini.model_strategy
     models_data = []
@@ -486,910 +684,897 @@ def _get_available_models() -> list[ModelData]:
     return models_data
 
 
-@router.get("/v1/models", response_model=ModelListResponse)
-async def list_models(api_key: str = Depends(verify_api_key)):
-    models = _get_available_models()
-    return ModelListResponse(data=models)
-
-
-@router.post("/v1/chat/completions")
-async def create_chat_completion(
-    request: ChatCompletionRequest,
-    api_key: str = Depends(verify_api_key),
-    tmp_dir: Path = Depends(get_temp_dir),
-    image_store: Path = Depends(get_image_store_dir),
-):
-    pool = GeminiClientPool()
-    db = LMDBConversationStore()
-
-    try:
-        model = _get_model_by_name(request.model)
-    except ValueError as exc:
-        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
-
-    if len(request.messages) == 0:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="At least one message is required in the conversation.",
-        )
+async def _find_reusable_session(
+    db: LMDBConversationStore,
+    pool: GeminiClientPool,
+    model: Model,
+    messages: list[Message],
+) -> tuple[ChatSession | None, GeminiClientWrapper | None, list[Message]]:
+    """Find an existing chat session matching the longest suitable history prefix."""
+    if len(messages) < 2:
+        return None, None, messages
 
-    structured_requirement = _build_structured_requirement(request.response_format)
-    if structured_requirement and request.stream:
-        logger.debug(
-            "Structured response requested with streaming enabled; will stream canonical JSON once ready."
-        )
-    if structured_requirement:
-        logger.debug(
-            f"Structured response requested for /v1/chat/completions (schema={structured_requirement.schema_name})."
-        )
+    search_end = len(messages)
+    while search_end >= 2:
+        search_history = messages[:search_end]
+        if search_history[-1].role in {"assistant", "system", "tool"}:
+            try:
+                if conv := db.find(model.model_name, search_history):
+                    now = datetime.now()
+                    updated_at = conv.updated_at or conv.created_at or now
+                    age_minutes = (now - updated_at).total_seconds() / 60
+                    if age_minutes <= METADATA_TTL_MINUTES:
+                        client = await pool.acquire(conv.client_id)
+                        session = client.start_chat(metadata=conv.metadata, model=model)
+                        remain = messages[search_end:]
+                        logger.debug(
+                            f"Match found at prefix length {search_end}/{len(messages)}. Client: {conv.client_id}"
+                        )
+                        return session, client, remain
+                    else:
+                        logger.debug(
+                            f"Matched conversation at length {search_end} is too old ({age_minutes:.1f}m), skipping reuse."
+                        )
+                else:
+                    # Log that we tried this prefix but failed
+                    pass
+            except Exception as e:
+                logger.warning(
+                    f"Error checking LMDB for reusable session at length {search_end}: {e}"
+                )
+                break
+        search_end -= 1
 
-    extra_instructions = [structured_requirement.instruction] if structured_requirement else None
+    logger.debug(f"No reusable session found for {len(messages)} messages.")
+    return None, None, messages
 
-    # Check if conversation is reusable
-    session, client, remaining_messages = await _find_reusable_session(
-        db, pool, model, request.messages
-    )
 
-    if session:
-        # Optimization: When reusing a session, we don't need to resend the heavy tool definitions
-        # or structured output instructions as they are already in the Gemini session history.
-        messages_to_send = _prepare_messages_for_model(
-            remaining_messages,
-            request.tools,
-            request.tool_choice,
-            extra_instructions,
-            inject_system_defaults=False,
-        )
-        if not messages_to_send:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail="No new messages to send for the existing session.",
-            )
-        if len(messages_to_send) == 1:
-            model_input, files = await GeminiClientWrapper.process_message(
-                messages_to_send[0], tmp_dir, tagged=False
-            )
-        else:
-            model_input, files = await GeminiClientWrapper.process_conversation(
-                messages_to_send, tmp_dir
-            )
-        logger.debug(
-            f"Reused session {session.metadata} - sending {len(messages_to_send)} prepared messages."
-        )
-    else:
-        # Start a new session and concat messages into a single string
+async def _send_with_split(
+    session: ChatSession,
+    text: str,
+    files: list[Path | str | io.BytesIO] | None = None,
+    stream: bool = False,
+) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
+    """Send text to Gemini, splitting or converting to attachment if too long."""
+    if len(text) <= MAX_CHARS_PER_REQUEST:
         try:
-            client = await pool.acquire()
-            session = client.start_chat(model=model)
-            messages_to_send = _prepare_messages_for_model(
-                request.messages, request.tools, request.tool_choice, extra_instructions
-            )
-            model_input, files = await GeminiClientWrapper.process_conversation(
-                messages_to_send, tmp_dir
-            )
-        except ValueError as e:
-            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
-        except RuntimeError as e:
-            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
+            if stream:
+                return session.send_message_stream(text, files=files)
+            return await session.send_message(text, files=files)
         except Exception as e:
-            logger.exception(f"Error in preparing conversation: {e}")
+            logger.exception(f"Error sending message to Gemini: {e}")
             raise
-        logger.debug("New session started.")
 
-    # Generate response
+    logger.info(
+        f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment."
+    )
+    file_obj = io.BytesIO(text.encode("utf-8"))
+    file_obj.name = "message.txt"
     try:
-        assert session and client, "Session and client not available"
-        client_id = client.id
-        logger.debug(
-            f"Client ID: {client_id}, Input length: {len(model_input)}, files count: {len(files)}"
+        final_files = list(files) if files else []
+        final_files.append(file_obj)
+        instruction = (
+            "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n"
+            "**System Instruction:**\n"
+            "1. Read the content of `message.txt`.\n"
+            "2. Treat that content as the **primary** user prompt for this turn.\n"
+            "3. Execute the instructions or answer the questions found *inside* that file immediately.\n"
         )
-        response = await _send_with_split(session, model_input, files=files)
-    except APIError as exc:
-        client_id = client.id if client else "unknown"
-        logger.warning(f"Gemini API returned invalid response for client {client_id}: {exc}")
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="Gemini temporarily returned an invalid response. Please retry.",
-        ) from exc
-    except HTTPException:
-        raise
+        if stream:
+            return session.send_message_stream(instruction, files=final_files)
+        return await session.send_message(instruction, files=final_files)
     except Exception as e:
-        logger.exception(f"Unexpected error generating content from Gemini API: {e}")
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail="Gemini returned an unexpected error.",
-        ) from e
+        logger.exception(f"Error sending large text as file to Gemini: {e}")
+        raise
 
-    # Format the response from API
-    try:
-        raw_output_with_think = GeminiClientWrapper.extract_output(response, include_thoughts=True)
-        raw_output_clean = GeminiClientWrapper.extract_output(response, include_thoughts=False)
-    except IndexError as exc:
-        logger.exception("Gemini output parsing failed (IndexError).")
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail="Gemini returned malformed response content.",
-        ) from exc
-    except Exception as exc:
-        logger.exception("Gemini output parsing failed unexpectedly.")
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Gemini output parsing failed unexpectedly.",
-        ) from exc
 
-    visible_output, tool_calls = extract_tool_calls(raw_output_with_think)
-    storage_output = remove_tool_call_blocks(raw_output_clean).strip()
-    tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
+class StreamingOutputFilter:
+    """
+    Enhanced streaming filter that suppresses:
+    1. XML tool call blocks: ```xml ... ```
+    2. ChatML tool blocks: <|im_start|>tool\n...<|im_end|>
+    3. ChatML role headers: <|im_start|>role\n (only suppresses the header, keeps content)
+    4. Control tokens: <|im_start|>, <|im_end|>
+    5. System instructions/hints: XML_WRAP_HINT, CODE_BLOCK_HINT, etc.
+    """
 
-    if structured_requirement:
-        cleaned_visible = strip_code_fence(visible_output or "")
-        if not cleaned_visible:
-            raise HTTPException(
-                status_code=status.HTTP_502_BAD_GATEWAY,
-                detail="LLM returned an empty response while JSON schema output was requested.",
-            )
-        try:
-            structured_payload = orjson.loads(cleaned_visible)
-        except orjson.JSONDecodeError as exc:
-            logger.warning(
-                f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): "
-                f"{cleaned_visible}"
-            )
-            raise HTTPException(
-                status_code=status.HTTP_502_BAD_GATEWAY,
-                detail="LLM returned invalid JSON for the requested response_format.",
-            ) from exc
+    def __init__(self):
+        self.buffer = ""
+        self.in_xml_tool = False
+        self.in_tagged_block = False
+        self.in_role_header = False
+        self.current_role = ""
+
+        self.XML_START = "```xml"
+        self.XML_END = "```"
+        self.TAG_START = "<|im_start|>"
+        self.TAG_END = "<|im_end|>"
+        self.SYSTEM_HINTS = [
+            XML_WRAP_HINT,
+            XML_HINT_STRIPPED,
+            CODE_BLOCK_HINT,
+            CODE_HINT_STRIPPED,
+        ]
 
-        canonical_output = orjson.dumps(structured_payload).decode("utf-8")
-        visible_output = canonical_output
-        storage_output = canonical_output
+    def process(self, chunk: str) -> str:
+        self.buffer += chunk
+        to_yield = ""
+
+        while self.buffer:
+            if self.in_xml_tool:
+                end_idx = self.buffer.find(self.XML_END)
+                if end_idx != -1:
+                    self.buffer = self.buffer[end_idx + len(self.XML_END) :]
+                    self.in_xml_tool = False
+                else:
+                    break
+            elif self.in_role_header:
+                nl_idx = self.buffer.find("\n")
+                if nl_idx != -1:
+                    role_text = self.buffer[:nl_idx].strip().lower()
+                    self.current_role = role_text
+                    self.buffer = self.buffer[nl_idx + 1 :]
+                    self.in_role_header = False
+                    self.in_tagged_block = True
+                else:
+                    break
+            elif self.in_tagged_block:
+                end_idx = self.buffer.find(self.TAG_END)
+                if end_idx != -1:
+                    content = self.buffer[:end_idx]
+                    if self.current_role != "tool":
+                        to_yield += content
+                    self.buffer = self.buffer[end_idx + len(self.TAG_END) :]
+                    self.in_tagged_block = False
+                    self.current_role = ""
+                else:
+                    if self.current_role == "tool":
+                        break
+                    else:
+                        yield_len = len(self.buffer) - (len(self.TAG_END) - 1)
+                        if yield_len > 0:
+                            to_yield += self.buffer[:yield_len]
+                            self.buffer = self.buffer[yield_len:]
+                        break
+            else:
+                # Outside any special block. Look for starts.
+                earliest_idx = -1
+                match_type = ""
+
+                xml_idx = self.buffer.find(self.XML_START)
+                if xml_idx != -1:
+                    earliest_idx = xml_idx
+                    match_type = "xml"
+
+                tag_s_idx = self.buffer.find(self.TAG_START)
+                if tag_s_idx != -1:
+                    if earliest_idx == -1 or tag_s_idx < earliest_idx:
+                        earliest_idx = tag_s_idx
+                        match_type = "tag_start"
+
+                tag_e_idx = self.buffer.find(self.TAG_END)
+                if tag_e_idx != -1:
+                    if earliest_idx == -1 or tag_e_idx < earliest_idx:
+                        earliest_idx = tag_e_idx
+                        match_type = "tag_end"
+
+                if earliest_idx != -1:
+                    # Yield text before the match
+                    to_yield += self.buffer[:earliest_idx]
+                    self.buffer = self.buffer[earliest_idx:]
+
+                    if match_type == "xml":
+                        self.in_xml_tool = True
+                        self.buffer = self.buffer[len(self.XML_START) :]
+                    elif match_type == "tag_start":
+                        self.in_role_header = True
+                        self.buffer = self.buffer[len(self.TAG_START) :]
+                    elif match_type == "tag_end":
+                        # Orphaned end tag, just skip it
+                        self.buffer = self.buffer[len(self.TAG_END) :]
+                    continue
+                else:
+                    # Check for prefixes
+                    prefixes = [self.XML_START, self.TAG_START, self.TAG_END]
+                    max_keep = 0
+                    for p in prefixes:
+                        for i in range(len(p) - 1, 0, -1):
+                            if self.buffer.endswith(p[:i]):
+                                max_keep = max(max_keep, i)
+                                break
 
-    if tool_calls_payload:
-        logger.debug(f"Detected tool calls: {tool_calls_payload}")
+                    yield_len = len(self.buffer) - max_keep
+                    if yield_len > 0:
+                        to_yield += self.buffer[:yield_len]
+                        self.buffer = self.buffer[yield_len:]
+                    break
 
-    # After formatting, persist the conversation to LMDB
-    try:
-        current_assistant_message = Message(
-            role="assistant",
-            content=storage_output or None,
-            tool_calls=tool_calls or None,
-        )
-        # Sanitize the entire history including the new message to ensure consistency
-        full_history = [*request.messages, current_assistant_message]
-        cleaned_history = db.sanitize_assistant_messages(full_history)
+        # Final pass: filter out system hints from the text to be yielded
+        for hint in self.SYSTEM_HINTS:
+            if hint in to_yield:
+                to_yield = to_yield.replace(hint, "")
 
-        conv = ConversationInStore(
-            model=model.model_name,
-            client_id=client.id,
-            metadata=session.metadata,
-            messages=cleaned_history,
-        )
-        key = db.store(conv)
-        logger.debug(f"Conversation saved to LMDB with key: {key}")
-    except Exception as e:
-        # We can still return the response even if saving fails
-        logger.warning(f"Failed to save conversation to LMDB: {e}")
+        return to_yield
 
-    # Return with streaming or standard response
-    completion_id = f"chatcmpl-{uuid.uuid4()}"
-    timestamp = int(datetime.now(tz=timezone.utc).timestamp())
-    if request.stream:
-        return _create_streaming_response(
-            visible_output,
-            tool_calls_payload,
-            completion_id,
-            timestamp,
-            request.model,
-            request.messages,
-        )
-    else:
-        return _create_standard_response(
-            visible_output,
-            tool_calls_payload,
-            completion_id,
-            timestamp,
-            request.model,
-            request.messages,
-        )
-
-
-@router.post("/v1/responses")
-async def create_response(
-    request_data: ResponseCreateRequest,
-    request: Request,
-    api_key: str = Depends(verify_api_key),
-    tmp_dir: Path = Depends(get_temp_dir),
-    image_store: Path = Depends(get_image_store_dir),
-):
-    base_messages, normalized_input = _response_items_to_messages(request_data.input)
-    structured_requirement = _build_structured_requirement(request_data.response_format)
-    if structured_requirement and request_data.stream:
-        logger.debug(
-            "Structured response requested with streaming enabled; streaming not supported for Responses."
-        )
-
-    extra_instructions: list[str] = []
-    if structured_requirement:
-        extra_instructions.append(structured_requirement.instruction)
-        logger.debug(
-            f"Structured response requested for /v1/responses (schema={structured_requirement.schema_name})."
-        )
-
-    # Separate standard tools from image generation tools
-    standard_tools: list[Tool] = []
-    image_tools: list[ResponseImageTool] = []
-
-    if request_data.tools:
-        for t in request_data.tools:
-            if isinstance(t, Tool):
-                standard_tools.append(t)
-            elif isinstance(t, ResponseImageTool):
-                image_tools.append(t)
-            # Handle dicts if Pydantic didn't convert them fully (fallback)
-            elif isinstance(t, dict):
-                t_type = t.get("type")
-                if t_type == "function":
-                    standard_tools.append(Tool.model_validate(t))
-                elif t_type == "image_generation":
-                    image_tools.append(ResponseImageTool.model_validate(t))
-
-    image_instruction = _build_image_generation_instruction(
-        image_tools,
-        request_data.tool_choice
-        if isinstance(request_data.tool_choice, ResponseToolChoice)
-        else None,
-    )
-    if image_instruction:
-        extra_instructions.append(image_instruction)
-        logger.debug("Image generation support enabled for /v1/responses request.")
-
-    preface_messages = _instructions_to_messages(request_data.instructions)
-    conversation_messages = base_messages
-    if preface_messages:
-        conversation_messages = [*preface_messages, *base_messages]
-        logger.debug(
-            f"Injected {len(preface_messages)} instruction messages before sending to Gemini."
-        )
-
-    # Pass standard tools to the prompt builder
-    # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction)
-    model_tool_choice = None
-    if isinstance(request_data.tool_choice, str):
-        model_tool_choice = request_data.tool_choice
-    elif isinstance(request_data.tool_choice, ToolChoiceFunction):
-        model_tool_choice = request_data.tool_choice
-    # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice.
-
-    messages = _prepare_messages_for_model(
-        conversation_messages,
-        tools=standard_tools or None,
-        tool_choice=model_tool_choice,
-        extra_instructions=extra_instructions or None,
-    )
-
-    pool = GeminiClientPool()
-    db = LMDBConversationStore()
-
-    try:
-        model = _get_model_by_name(request_data.model)
-    except ValueError as exc:
-        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
-
-    session, client, remaining_messages = await _find_reusable_session(db, pool, model, messages)
-
-    async def _build_payload(
-        _payload_messages: list[Message], _reuse_session: bool
-    ) -> tuple[str, list[Path | str]]:
-        if _reuse_session and len(_payload_messages) == 1:
-            return await GeminiClientWrapper.process_message(
-                _payload_messages[0], tmp_dir, tagged=False
-            )
-        return await GeminiClientWrapper.process_conversation(_payload_messages, tmp_dir)
-
-    reuse_session = session is not None
-    if reuse_session:
-        messages_to_send = _prepare_messages_for_model(
-            remaining_messages,
-            tools=request_data.tools,  # Keep for XML hint logic
-            tool_choice=request_data.tool_choice,
-            extra_instructions=None,  # Already in session history
-            inject_system_defaults=False,
-        )
-        if not messages_to_send:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail="No new messages to send for the existing session.",
-            )
-        payload_messages = messages_to_send
-        model_input, files = await _build_payload(payload_messages, _reuse_session=True)
-        logger.debug(
-            f"Reused session {session.metadata} - sending {len(payload_messages)} prepared messages."
-        )
-    else:
-        try:
-            client = await pool.acquire()
-            session = client.start_chat(model=model)
-            payload_messages = messages
-            model_input, files = await _build_payload(payload_messages, _reuse_session=False)
-        except ValueError as e:
-            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
-        except RuntimeError as e:
-            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
-        except Exception as e:
-            logger.exception(f"Error in preparing conversation for responses API: {e}")
-            raise
-        logger.debug("New session started for /v1/responses request.")
+    def flush(self) -> str:
+        # If we are stuck in a tool block or role header at the end,
+        # it usually means malformed output.
+        if self.in_xml_tool or (self.in_tagged_block and self.current_role == "tool"):
+            return ""
 
-    try:
-        assert session and client, "Session and client not available"
-        client_id = client.id
-        logger.debug(
-            f"Client ID: {client_id}, Input length: {len(model_input)}, files count: {len(files)}"
-        )
-        model_output = await _send_with_split(session, model_input, files=files)
-    except APIError as exc:
-        client_id = client.id if client else "unknown"
-        logger.warning(f"Gemini API returned invalid response for client {client_id}: {exc}")
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="Gemini temporarily returned an invalid response. Please retry.",
-        ) from exc
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception(f"Unexpected error generating content from Gemini API for responses: {e}")
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail="Gemini returned an unexpected error.",
-        ) from e
+        final_text = self.buffer
+        self.buffer = ""
 
-    try:
-        text_with_think = GeminiClientWrapper.extract_output(model_output, include_thoughts=True)
-        text_without_think = GeminiClientWrapper.extract_output(
-            model_output, include_thoughts=False
-        )
-    except IndexError as exc:
-        logger.exception("Gemini output parsing failed (IndexError).")
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail="Gemini returned malformed response content.",
-        ) from exc
-    except Exception as exc:
-        logger.exception("Gemini output parsing failed unexpectedly.")
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Gemini output parsing failed unexpectedly.",
-        ) from exc
+        # Filter out any orphaned/partial control tokens or hints
+        final_text = CONTROL_TOKEN_RE.sub("", final_text)
+        for hint in self.SYSTEM_HINTS:
+            final_text = final_text.replace(hint, "")
 
-    visible_text, detected_tool_calls = extract_tool_calls(text_with_think)
-    storage_output = remove_tool_call_blocks(text_without_think).strip()
-    assistant_text = LMDBConversationStore.remove_think_tags(visible_text.strip())
+        return final_text.strip()
 
-    if structured_requirement:
-        cleaned_visible = strip_code_fence(assistant_text or "")
-        if not cleaned_visible:
-            raise HTTPException(
-                status_code=status.HTTP_502_BAD_GATEWAY,
-                detail="LLM returned an empty response while JSON schema output was requested.",
-            )
-        try:
-            structured_payload = orjson.loads(cleaned_visible)
-        except orjson.JSONDecodeError as exc:
-            logger.warning(
-                f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): "
-                f"{cleaned_visible}"
-            )
-            raise HTTPException(
-                status_code=status.HTTP_502_BAD_GATEWAY,
-                detail="LLM returned invalid JSON for the requested response_format.",
-            ) from exc
-
-        canonical_output = orjson.dumps(structured_payload).decode("utf-8")
-        assistant_text = canonical_output
-        storage_output = canonical_output
-        logger.debug(
-            f"Structured response fulfilled for /v1/responses (schema={structured_requirement.schema_name})."
-        )
 
-    expects_image = (
-        request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation"
-    )
-    images = model_output.images or []
-    logger.debug(
-        f"Gemini returned {len(images)} image(s) for /v1/responses "
-        f"(expects_image={expects_image}, instruction_applied={bool(image_instruction)})."
-    )
-    if expects_image and not images:
-        summary = assistant_text.strip() if assistant_text else ""
-        if summary:
-            summary = re.sub(r"\s+", " ", summary)
-            if len(summary) > 200:
-                summary = f"{summary[:197]}..."
-        logger.warning(
-            "Image generation requested but Gemini produced no images. "
-            f"client_id={client_id}, forced_tool_choice={request_data.tool_choice is not None}, "
-            f"instruction_applied={bool(image_instruction)}, assistant_preview='{summary}'"
-        )
-        detail = "LLM returned no images for the requested image_generation tool."
-        if summary:
-            detail = f"{detail} Assistant response: {summary}"
-        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail)
+# --- Response Builders & Streaming ---
 
-    response_contents: list[ResponseOutputContent] = []
-    image_call_items: list[ResponseImageGenerationCall] = []
-    for image in images:
-        try:
-            image_base64, width, height, filename = await _image_to_base64(image, image_store)
-        except Exception as exc:
-            logger.warning(f"Failed to download generated image: {exc}")
-            continue
-
-        img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
-
-        # Use static URL for compatibility
-        image_url = (
-            f"![{filename}]({request.base_url}images/{filename}?token={get_image_token(filename)})"
-        )
-
-        image_call_items.append(
-            ResponseImageGenerationCall(
-                id=filename.rsplit(".", 1)[0],
-                status="completed",
-                result=image_base64,
-                output_format=img_format,
-                size=f"{width}x{height}" if width and height else None,
-            )
-        )
-        # Add as output_text content for compatibility
-        response_contents.append(
-            ResponseOutputContent(type="output_text", text=image_url, annotations=[])
-        )
-
-    tool_call_items: list[ResponseToolCall] = []
-    if detected_tool_calls:
-        tool_call_items = [
-            ResponseToolCall(
-                id=call.id,
-                status="completed",
-                function=call.function,
-            )
-            for call in detected_tool_calls
-        ]
-
-    if assistant_text:
-        response_contents.append(
-            ResponseOutputContent(type="output_text", text=assistant_text, annotations=[])
-        )
 
-    if not response_contents:
-        response_contents.append(ResponseOutputContent(type="output_text", text="", annotations=[]))
-
-    created_time = int(datetime.now(tz=timezone.utc).timestamp())
-    response_id = f"resp_{uuid.uuid4().hex}"
-    message_id = f"msg_{uuid.uuid4().hex}"
-
-    input_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
-    tool_arg_text = "".join(call.function.arguments or "" for call in detected_tool_calls)
-    completion_basis = assistant_text or ""
-    if tool_arg_text:
-        completion_basis = (
-            f"{completion_basis}\n{tool_arg_text}" if completion_basis else tool_arg_text
-        )
-    output_tokens = estimate_tokens(completion_basis)
-    usage = ResponseUsage(
-        input_tokens=input_tokens,
-        output_tokens=output_tokens,
-        total_tokens=input_tokens + output_tokens,
-    )
-
-    response_payload = ResponseCreateResponse(
-        id=response_id,
-        created_at=created_time,
-        model=request_data.model,
-        output=[
-            ResponseOutputMessage(
-                id=message_id,
-                type="message",
-                role="assistant",
-                content=response_contents,
-            ),
-            *tool_call_items,
-            *image_call_items,
-        ],
-        status="completed",
-        usage=usage,
-        input=normalized_input or None,
-        metadata=request_data.metadata or None,
-        tools=request_data.tools,
-        tool_choice=request_data.tool_choice,
-    )
-
-    try:
-        current_assistant_message = Message(
-            role="assistant",
-            content=storage_output or None,
-            tool_calls=detected_tool_calls or None,
-        )
-        full_history = [*messages, current_assistant_message]
-        cleaned_history = db.sanitize_assistant_messages(full_history)
-
-        conv = ConversationInStore(
-            model=model.model_name,
-            client_id=client.id,
-            metadata=session.metadata,
-            messages=cleaned_history,
-        )
-        key = db.store(conv)
-        logger.debug(f"Conversation saved to LMDB with key: {key}")
-    except Exception as exc:
-        logger.warning(f"Failed to save Responses conversation to LMDB: {exc}")
-
-    if request_data.stream:
-        logger.debug(
-            f"Streaming Responses API payload (response_id={response_payload.id}, text_chunks={bool(assistant_text)})."
-        )
-        return _create_responses_streaming_response(response_payload, assistant_text or "")
-
-    return response_payload
-
-
-async def _find_reusable_session(
+def _create_real_streaming_response(
+    generator: AsyncGenerator[ModelOutput, None],
+    completion_id: str,
+    created_time: int,
+    model_name: str,
+    messages: list[Message],
     db: LMDBConversationStore,
-    pool: GeminiClientPool,
     model: Model,
-    messages: list[Message],
-) -> tuple[ChatSession | None, GeminiClientWrapper | None, list[Message]]:
-    """Find an existing chat session that matches the *longest* prefix of
-    ``messages`` **whose last element is an assistant/system reply**.
-
-    Rationale
-    ---------
-    When a reply was generated by *another* server instance, the local LMDB may
-    only contain an older part of the conversation.  However, as long as we can
-    line up **any** earlier assistant/system response, we can restore the
-    corresponding Gemini session and replay the *remaining* turns locally
-    (including that missing assistant reply and the subsequent user prompts).
-
-    The algorithm therefore walks backwards through the history **one message at
-    a time**, each time requiring the current tail to be assistant/system before
-    querying LMDB.  As soon as a match is found we recreate the session and
-    return the untouched suffix as ``remaining_messages``.
-    """
-
-    if len(messages) < 2:
-        return None, None, messages
-
-    # Start with the full history and iteratively trim from the end.
-    search_end = len(messages)
-
-    while search_end >= 2:
-        search_history = messages[:search_end]
-
-        # Only try to match if the last stored message would be assistant/system/tool before querying LMDB.
-        if search_history[-1].role in {"assistant", "system", "tool"}:
-            try:
-                if conv := db.find(model.model_name, search_history):
-                    # Check if metadata is too old
-                    now = datetime.now()
-                    updated_at = conv.updated_at or conv.created_at or now
-                    age_minutes = (now - updated_at).total_seconds() / 60
-
-                    if age_minutes <= METADATA_TTL_MINUTES:
-                        client = await pool.acquire(conv.client_id)
-                        session = client.start_chat(metadata=conv.metadata, model=model)
-                        remain = messages[search_end:]
-                        logger.debug(
-                            f"Match found at prefix length {search_end}. Client: {conv.client_id}"
-                        )
-                        return session, client, remain
-                    else:
-                        logger.debug(
-                            f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse."
-                        )
-            except Exception as e:
-                logger.warning(
-                    f"Error checking LMDB for reusable session at length {search_end}: {e}"
-                )
-                break
-
-        # Trim one message and try again.
-        search_end -= 1
-
-    return None, None, messages
-
-
-async def _send_with_split(session: ChatSession, text: str, files: list[Path | str] | None = None):
+    client_wrapper: GeminiClientWrapper,
+    session: ChatSession,
+    structured_requirement: StructuredOutputRequirement | None = None,
+) -> StreamingResponse:
     """
-    Send text to Gemini. If text is longer than ``MAX_CHARS_PER_REQUEST``,
-    it is converted into a temporary text file attachment to avoid splitting issues.
+    Create a real-time streaming response.
+    Reconciles manual delta accumulation with the model's final authoritative state.
     """
-    if len(text) <= MAX_CHARS_PER_REQUEST:
-        try:
-            return await session.send_message(text, files=files)
-        except Exception as e:
-            logger.exception(f"Error sending message to Gemini: {e}")
-            raise
-
-    logger.info(
-        f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment."
-    )
-
-    # Create a temporary directory to hold the message.txt file
-    # This ensures the filename is exactly 'message.txt' as expected by the instruction.
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        temp_file_path = Path(tmpdirname) / "message.txt"
-        temp_file_path.write_text(text, encoding="utf-8")
 
+    async def generate_stream():
+        full_thoughts, full_text = "", ""
+        has_started = False
+        last_chunk_was_thought = False
+        all_outputs: list[ModelOutput] = []
+        suppressor = StreamingOutputFilter()
         try:
-            # Prepare the files list
-            final_files = list(files) if files else []
-            final_files.append(temp_file_path)
-
-            instruction = (
-                "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n"
-                "**System Instruction:**\n"
-                "1. Read the content of `message.txt`.\n"
-                "2. Treat that content as the **primary** user prompt for this turn.\n"
-                "3. Execute the instructions or answer the questions found *inside* that file immediately.\n"
-            )
-
-            logger.debug(f"Sending prompt as temporary file: {temp_file_path}")
-
-            return await session.send_message(instruction, files=final_files)
-
+            async for chunk in generator:
+                all_outputs.append(chunk)
+                if not has_started:
+                    data = {
+                        "id": completion_id,
+                        "object": "chat.completion.chunk",
+                        "created": created_time,
+                        "model": model_name,
+                        "choices": [
+                            {"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}
+                        ],
+                    }
+                    yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+                    has_started = True
+
+                if t_delta := chunk.thoughts_delta:
+                    if not last_chunk_was_thought and not full_thoughts:
+                        yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
+                    full_thoughts += t_delta
+                    data = {
+                        "id": completion_id,
+                        "object": "chat.completion.chunk",
+                        "created": created_time,
+                        "model": model_name,
+                        "choices": [
+                            {"index": 0, "delta": {"content": t_delta}, "finish_reason": None}
+                        ],
+                    }
+                    yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+                    last_chunk_was_thought = True
+
+                if text_delta := chunk.text_delta:
+                    if last_chunk_was_thought:
+                        yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '</think>\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
+                        last_chunk_was_thought = False
+                    full_text += text_delta
+                    if visible_delta := suppressor.process(text_delta):
+                        data = {
+                            "id": completion_id,
+                            "object": "chat.completion.chunk",
+                            "created": created_time,
+                            "model": model_name,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {"content": visible_delta},
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
         except Exception as e:
-            logger.exception(f"Error sending large text as file to Gemini: {e}")
-            raise
+            logger.exception(f"Error during OpenAI streaming: {e}")
+            yield f"data: {orjson.dumps({'error': {'message': 'Streaming error occurred.', 'type': 'server_error', 'param': None, 'code': None}}).decode('utf-8')}\n\n"
+            return
 
+        if all_outputs:
+            final_chunk = all_outputs[-1]
+            if final_chunk.text:
+                full_text = final_chunk.text
+            if final_chunk.thoughts:
+                full_thoughts = final_chunk.thoughts
 
-def _create_streaming_response(
-    model_output: str,
-    tool_calls: list[dict],
-    completion_id: str,
-    created_time: int,
-    model: str,
-    messages: list[Message],
-) -> StreamingResponse:
-    """Create streaming response with `usage` calculation included in the final chunk."""
+        if last_chunk_was_thought:
+            yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '</think>\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
 
-    # Calculate token usage
-    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
-    tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or [])
-    completion_tokens = estimate_tokens(model_output + tool_args)
-    total_tokens = prompt_tokens + completion_tokens
-    finish_reason = "tool_calls" if tool_calls else "stop"
+        if remaining_text := suppressor.flush():
+            data = {
+                "id": completion_id,
+                "object": "chat.completion.chunk",
+                "created": created_time,
+                "model": model_name,
+                "choices": [
+                    {"index": 0, "delta": {"content": remaining_text}, "finish_reason": None}
+                ],
+            }
+            yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-    async def generate_stream():
-        # Send start event
-        data = {
-            "id": completion_id,
-            "object": "chat.completion.chunk",
-            "created": created_time,
-            "model": model,
-            "choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}],
-        }
-        yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+        raw_output_with_think = f"<think>{full_thoughts}</think>\n" if full_thoughts else ""
+        raw_output_with_think += full_text
+        assistant_text, storage_output, tool_calls = _process_llm_output(
+            raw_output_with_think, full_text, structured_requirement
+        )
 
-        # Stream output text in chunks for efficiency
-        for chunk in iter_stream_segments(model_output):
+        images = []
+        for out in all_outputs:
+            if out.images:
+                images.extend(out.images)
+
+        image_markdown = ""
+        for image in images:
+            try:
+                image_store = get_image_store_dir()
+                _, _, _, filename = await _image_to_base64(image, image_store)
+                img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
+                image_markdown += f"\n\n{img_url}"
+            except Exception as exc:
+                logger.warning(f"Failed to process image in OpenAI stream: {exc}")
+
+        if image_markdown:
+            assistant_text += image_markdown
+            storage_output += image_markdown
+            # Send the image markdown as a final text chunk before usage
             data = {
                 "id": completion_id,
                 "object": "chat.completion.chunk",
                 "created": created_time,
-                "model": model,
-                "choices": [{"index": 0, "delta": {"content": chunk}, "finish_reason": None}],
+                "model": model_name,
+                "choices": [
+                    {"index": 0, "delta": {"content": image_markdown}, "finish_reason": None}
+                ],
             }
             yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-        if tool_calls:
-            tool_calls_delta = [{**call, "index": idx} for idx, call in enumerate(tool_calls)]
+        tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
+        if tool_calls_payload:
+            tool_calls_delta = [
+                {**call, "index": idx} for idx, call in enumerate(tool_calls_payload)
+            ]
             data = {
                 "id": completion_id,
                 "object": "chat.completion.chunk",
                 "created": created_time,
-                "model": model,
+                "model": model_name,
                 "choices": [
-                    {
-                        "index": 0,
-                        "delta": {"tool_calls": tool_calls_delta},
-                        "finish_reason": None,
-                    }
+                    {"index": 0, "delta": {"tool_calls": tool_calls_delta}, "finish_reason": None}
                 ],
             }
             yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-        # Send end event
+        p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, tool_calls)
+        usage = {"prompt_tokens": p_tok, "completion_tokens": c_tok, "total_tokens": t_tok}
         data = {
             "id": completion_id,
             "object": "chat.completion.chunk",
             "created": created_time,
-            "model": model,
-            "choices": [{"index": 0, "delta": {}, "finish_reason": finish_reason}],
-            "usage": {
-                "prompt_tokens": prompt_tokens,
-                "completion_tokens": completion_tokens,
-                "total_tokens": total_tokens,
-            },
+            "model": model_name,
+            "choices": [
+                {"index": 0, "delta": {}, "finish_reason": "tool_calls" if tool_calls else "stop"}
+            ],
+            "usage": usage,
         }
+        _persist_conversation(
+            db,
+            model.model_name,
+            client_wrapper.id,
+            session.metadata,
+            messages,  # This should be the prepared messages
+            storage_output,
+            tool_calls,
+        )
         yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
         yield "data: [DONE]\n\n"
 
     return StreamingResponse(generate_stream(), media_type="text/event-stream")
 
 
-def _create_responses_streaming_response(
-    response_payload: ResponseCreateResponse,
-    assistant_text: str | None,
+def _create_responses_real_streaming_response(
+    generator: AsyncGenerator[ModelOutput, None],
+    response_id: str,
+    created_time: int,
+    model_name: str,
+    messages: list[Message],
+    db: LMDBConversationStore,
+    model: Model,
+    client_wrapper: GeminiClientWrapper,
+    session: ChatSession,
+    request_data: ResponseCreateRequest,
+    image_store: Path,
+    base_url: str,
+    structured_requirement: StructuredOutputRequirement | None = None,
 ) -> StreamingResponse:
-    """Create streaming response for Responses API using event types defined by OpenAI."""
-
-    response_dict = response_payload.model_dump(mode="json")
-    response_id = response_payload.id
-    created_time = response_payload.created_at
-    model = response_payload.model
-
-    logger.debug(
-        f"Preparing streaming envelope for /v1/responses (response_id={response_id}, model={model})."
-    )
-
+    """
+    Create a real-time streaming response for the Responses API.
+    Ensures final accumulated text and thoughts are synchronized.
+    """
     base_event = {
         "id": response_id,
         "object": "response",
         "created_at": created_time,
-        "model": model,
+        "model": model_name,
     }
 
-    created_snapshot: dict[str, Any] = {
-        "id": response_id,
-        "object": "response",
-        "created_at": created_time,
-        "model": model,
-        "status": "in_progress",
-    }
-    if response_dict.get("metadata") is not None:
-        created_snapshot["metadata"] = response_dict["metadata"]
-    if response_dict.get("input") is not None:
-        created_snapshot["input"] = response_dict["input"]
-    if response_dict.get("tools") is not None:
-        created_snapshot["tools"] = response_dict["tools"]
-    if response_dict.get("tool_choice") is not None:
-        created_snapshot["tool_choice"] = response_dict["tool_choice"]
-
     async def generate_stream():
-        # Emit creation event
-        data = {
-            **base_event,
-            "type": "response.created",
-            "response": created_snapshot,
-        }
-        yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request_data.metadata, 'input': None, 'tools': request_data.tools, 'tool_choice': request_data.tool_choice}}).decode('utf-8')}\n\n"
+        message_id = f"msg_{uuid.uuid4().hex}"
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n"
 
-        # Stream output items (Message/Text, Tool Calls, Images)
-        for i, item in enumerate(response_payload.output):
-            item_json = item.model_dump(mode="json", exclude_none=True)
+        full_thoughts, full_text = "", ""
+        last_chunk_was_thought = False
+        all_outputs: list[ModelOutput] = []
+        suppressor = StreamingOutputFilter()
 
-            added_event = {
-                **base_event,
-                "type": "response.output_item.added",
-                "output_index": i,
-                "item": item_json,
-            }
-            yield f"data: {orjson.dumps(added_event).decode('utf-8')}\n\n"
-
-            # 2. Stream content if it's a message (text)
-            if item.type == "message":
-                content_text = ""
-                # Aggregate text content to stream
-                for c in item.content:
-                    if c.type == "output_text" and c.text:
-                        content_text += c.text
-
-                if content_text:
-                    for chunk in iter_stream_segments(content_text):
-                        delta_event = {
-                            **base_event,
-                            "type": "response.output_text.delta",
-                            "output_index": i,
-                            "delta": chunk,
-                        }
-                        yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n"
+        try:
+            async for chunk in generator:
+                all_outputs.append(chunk)
+                if t_delta := chunk.thoughts_delta:
+                    if not last_chunk_was_thought and not full_thoughts:
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '<think>'}).decode('utf-8')}\n\n"
+                    full_thoughts += t_delta
+                    yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': t_delta}).decode('utf-8')}\n\n"
+                    last_chunk_was_thought = True
+                if text_delta := chunk.text_delta:
+                    if last_chunk_was_thought:
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '</think>\n'}).decode('utf-8')}\n\n"
+                        last_chunk_was_thought = False
+                    full_text += text_delta
+                    if visible_delta := suppressor.process(text_delta):
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': visible_delta}).decode('utf-8')}\n\n"
+        except Exception as e:
+            logger.exception(f"Error during Responses API streaming: {e}")
+            yield f"data: {orjson.dumps({**base_event, 'type': 'error', 'error': {'message': 'Streaming error.'}}).decode('utf-8')}\n\n"
+            return
 
-                    # Text done
-                    done_event = {
-                        **base_event,
-                        "type": "response.output_text.done",
-                        "output_index": i,
-                    }
-                    yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
-
-            # 3. Emit output_item.done for all types
-            # This confirms the item is fully transferred.
-            item_done_event = {
-                **base_event,
-                "type": "response.output_item.done",
-                "output_index": i,
-                "item": item_json,
-            }
-            yield f"data: {orjson.dumps(item_done_event).decode('utf-8')}\n\n"
+        if all_outputs:
+            final_chunk = all_outputs[-1]
+            if final_chunk.text:
+                full_text = final_chunk.text
+            if final_chunk.thoughts:
+                full_thoughts = final_chunk.thoughts
+
+        if last_chunk_was_thought:
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '</think>\n'}).decode('utf-8')}\n\n"
+        if remaining_text := suppressor.flush():
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': remaining_text}).decode('utf-8')}\n\n"
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.done', 'output_index': 0}).decode('utf-8')}\n\n"
+
+        raw_output_with_think = f"<think>{full_thoughts}</think>\n" if full_thoughts else ""
+        raw_output_with_think += full_text
+        assistant_text, storage_output, detected_tool_calls = _process_llm_output(
+            raw_output_with_think, full_text, structured_requirement
+        )
 
-        # Emit completed event with full payload
-        completed_event = {
-            **base_event,
-            "type": "response.completed",
-            "response": response_dict,
-        }
-        yield f"data: {orjson.dumps(completed_event).decode('utf-8')}\n\n"
+        images = []
+        for out in all_outputs:
+            if out.images:
+                images.extend(out.images)
+
+        response_contents, image_call_items = [], []
+        for image in images:
+            try:
+                image_base64, width, height, filename = await _image_to_base64(image, image_store)
+                img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
+                image_url = (
+                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
+                )
+                image_call_items.append(
+                    ResponseImageGenerationCall(
+                        id=filename.rsplit(".", 1)[0],
+                        result=image_base64,
+                        output_format=img_format,
+                        size=f"{width}x{height}" if width and height else None,
+                    )
+                )
+                response_contents.append(ResponseOutputContent(type="output_text", text=image_url))
+            except Exception as exc:
+                logger.warning(f"Failed to process image in stream: {exc}")
+
+        if assistant_text:
+            response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
+        if not response_contents:
+            response_contents.append(ResponseOutputContent(type="output_text", text=""))
+
+        # Aggregate images for storage
+        image_markdown = ""
+        for img_call in image_call_items:
+            fname = f"{img_call.id}.{img_call.output_format}"
+            img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})"
+            image_markdown += f"\n\n{img_url}"
+
+        if image_markdown:
+            storage_output += image_markdown
+
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': [c.model_dump(mode='json') for c in response_contents]}}).decode('utf-8')}\n\n"
+
+        current_idx = 1
+        for call in detected_tool_calls:
+            tc_item = ResponseToolCall(id=call.id, status="completed", function=call.function)
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': tc_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': tc_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
+            current_idx += 1
+        for img_call in image_call_items:
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': img_call.model_dump(mode='json')}).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': img_call.model_dump(mode='json')}).decode('utf-8')}\n\n"
+            current_idx += 1
+
+        p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, detected_tool_calls)
+        usage = ResponseUsage(input_tokens=p_tok, output_tokens=c_tok, total_tokens=t_tok)
+        payload = _create_responses_standard_payload(
+            response_id,
+            created_time,
+            model_name,
+            assistant_text,
+            detected_tool_calls,
+            image_call_items,
+            response_contents,
+            usage,
+            request_data,
+            None,
+        )
+        _persist_conversation(
+            db,
+            model.model_name,
+            client_wrapper.id,
+            session.metadata,
+            messages,
+            storage_output,
+            detected_tool_calls,
+        )
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.completed', 'response': payload.model_dump(mode='json')}).decode('utf-8')}\n\n"
         yield "data: [DONE]\n\n"
 
     return StreamingResponse(generate_stream(), media_type="text/event-stream")
 
 
-def _create_standard_response(
-    model_output: str,
-    tool_calls: list[dict],
-    completion_id: str,
-    created_time: int,
-    model: str,
-    messages: list[Message],
-) -> dict:
-    """Create standard response"""
-    # Calculate token usage
-    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
-    tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or [])
-    completion_tokens = estimate_tokens(model_output + tool_args)
-    total_tokens = prompt_tokens + completion_tokens
-    finish_reason = "tool_calls" if tool_calls else "stop"
+# --- Main Router Endpoints ---
 
-    message_payload: dict = {"role": "assistant", "content": model_output or None}
-    if tool_calls:
-        message_payload["tool_calls"] = tool_calls
 
-    result = {
-        "id": completion_id,
-        "object": "chat.completion",
-        "created": created_time,
-        "model": model,
-        "choices": [
-            {
-                "index": 0,
-                "message": message_payload,
-                "finish_reason": finish_reason,
-            }
-        ],
-        "usage": {
-            "prompt_tokens": prompt_tokens,
-            "completion_tokens": completion_tokens,
-            "total_tokens": total_tokens,
-        },
-    }
+@router.get("/v1/models", response_model=ModelListResponse)
+async def list_models(api_key: str = Depends(verify_api_key)):
+    models = _get_available_models()
+    return ModelListResponse(data=models)
+
+
+@router.post("/v1/chat/completions")
+async def create_chat_completion(
+    request: ChatCompletionRequest,
+    api_key: str = Depends(verify_api_key),
+    tmp_dir: Path = Depends(get_temp_dir),
+    image_store: Path = Depends(get_image_store_dir),
+):
+    pool, db = GeminiClientPool(), LMDBConversationStore()
+    try:
+        model = _get_model_by_name(request.model)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
+    if not request.messages:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Messages required.")
 
-    logger.debug(f"Response created with {total_tokens} total tokens")
-    return result
+    structured_requirement = _build_structured_requirement(request.response_format)
+    extra_instr = [structured_requirement.instruction] if structured_requirement else None
 
+    # This ensures that server-injected system instructions are part of the history
+    msgs = _prepare_messages_for_model(
+        request.messages, request.tools, request.tool_choice, extra_instr
+    )
 
-async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
-    """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
-    if isinstance(image, GeneratedImage):
+    session, client, remain = await _find_reusable_session(db, pool, model, msgs)
+
+    if session:
+        if not remain:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
+
+        # For reused sessions, we only need to process the remaining messages.
+        # We don't re-inject system defaults to avoid duplicating instructions already in history.
+        input_msgs = _prepare_messages_for_model(
+            remain, request.tools, request.tool_choice, extra_instr, False
+        )
+        if len(input_msgs) == 1:
+            m_input, files = await GeminiClientWrapper.process_message(
+                input_msgs[0], tmp_dir, tagged=False
+            )
+        else:
+            m_input, files = await GeminiClientWrapper.process_conversation(input_msgs, tmp_dir)
+
+        logger.debug(
+            f"Reused session {reprlib.repr(session.metadata)} - sending {len(input_msgs)} prepared messages."
+        )
+    else:
         try:
-            saved_path = await image.save(path=str(temp_dir), full_size=True)
+            client = await pool.acquire()
+            session = client.start_chat(model=model)
+            # Use the already prepared 'msgs' for a fresh session
+            m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
         except Exception as e:
-            logger.warning(
-                f"Failed to download full-size GeneratedImage, retrying with default size: {e}"
-            )
-            saved_path = await image.save(path=str(temp_dir), full_size=False)
+            logger.exception("Error in preparing conversation")
+            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
+
+    completion_id = f"chatcmpl-{uuid.uuid4()}"
+    created_time = int(datetime.now(tz=timezone.utc).timestamp())
+
+    try:
+        assert session and client
+        logger.debug(
+            f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}"
+        )
+        resp_or_stream = await _send_with_split(
+            session, m_input, files=files, stream=request.stream
+        )
+    except Exception as e:
+        logger.exception("Gemini API error")
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e))
+
+    if request.stream:
+        return _create_real_streaming_response(
+            resp_or_stream,
+            completion_id,
+            created_time,
+            request.model,
+            msgs,  # Use prepared 'msgs'
+            db,
+            model,
+            client,
+            session,
+            structured_requirement,
+        )
+
+    try:
+        raw_with_t = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=True)
+        raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
+    except Exception as exc:
+        logger.exception("Gemini output parsing failed.")
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response."
+        ) from exc
+
+    visible_output, storage_output, tool_calls = _process_llm_output(
+        raw_with_t, raw_clean, structured_requirement
+    )
+
+    # Process images for OpenAI non-streaming flow
+    images = resp_or_stream.images or []
+    image_markdown = ""
+    for image in images:
+        try:
+            _, _, _, filename = await _image_to_base64(image, image_store)
+            img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
+            image_markdown += f"\n\n{img_url}"
+        except Exception as exc:
+            logger.warning(f"Failed to process image in OpenAI response: {exc}")
+
+    if image_markdown:
+        visible_output += image_markdown
+        storage_output += image_markdown
+
+    tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
+    if tool_calls_payload:
+        logger.debug(f"Detected tool calls: {reprlib.repr(tool_calls_payload)}")
+
+    p_tok, c_tok, t_tok = _calculate_usage(request.messages, visible_output, tool_calls)
+    usage = {"prompt_tokens": p_tok, "completion_tokens": c_tok, "total_tokens": t_tok}
+    payload = _create_chat_completion_standard_payload(
+        completion_id,
+        created_time,
+        request.model,
+        visible_output,
+        tool_calls_payload,
+        "tool_calls" if tool_calls else "stop",
+        usage,
+    )
+    _persist_conversation(
+        db,
+        model.model_name,
+        client.id,
+        session.metadata,
+        msgs,  # Use prepared messages 'msgs'
+        storage_output,
+        tool_calls,
+    )
+    return payload
+
+
+@router.post("/v1/responses")
+async def create_response(
+    request_data: ResponseCreateRequest,
+    request: Request,
+    api_key: str = Depends(verify_api_key),
+    tmp_dir: Path = Depends(get_temp_dir),
+    image_store: Path = Depends(get_image_store_dir),
+):
+    base_messages, norm_input = _response_items_to_messages(request_data.input)
+    struct_req = _build_structured_requirement(request_data.response_format)
+    extra_instr = [struct_req.instruction] if struct_req else []
+
+    standard_tools, image_tools = [], []
+    if request_data.tools:
+        for t in request_data.tools:
+            if isinstance(t, Tool):
+                standard_tools.append(t)
+            elif isinstance(t, ResponseImageTool):
+                image_tools.append(t)
+            elif isinstance(t, dict):
+                if t.get("type") == "function":
+                    standard_tools.append(Tool.model_validate(t))
+                elif t.get("type") == "image_generation":
+                    image_tools.append(ResponseImageTool.model_validate(t))
+
+    img_instr = _build_image_generation_instruction(
+        image_tools,
+        request_data.tool_choice
+        if isinstance(request_data.tool_choice, ResponseToolChoice)
+        else None,
+    )
+    if img_instr:
+        extra_instr.append(img_instr)
+    preface = _instructions_to_messages(request_data.instructions)
+    conv_messages = [*preface, *base_messages] if preface else base_messages
+    model_tool_choice = (
+        request_data.tool_choice
+        if isinstance(request_data.tool_choice, (str, ToolChoiceFunction))
+        else None
+    )
+
+    messages = _prepare_messages_for_model(
+        conv_messages, standard_tools or None, model_tool_choice, extra_instr or None
+    )
+    pool, db = GeminiClientPool(), LMDBConversationStore()
+    try:
+        model = _get_model_by_name(request_data.model)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
+
+    session, client, remain = await _find_reusable_session(db, pool, model, messages)
+    if session:
+        msgs = _prepare_messages_for_model(
+            remain, request_data.tools, request_data.tool_choice, None, False
+        )
+        if not msgs:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
+        m_input, files = (
+            await GeminiClientWrapper.process_message(msgs[0], tmp_dir, tagged=False)
+            if len(msgs) == 1
+            else await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
+        )
+        logger.debug(
+            f"Reused session {reprlib.repr(session.metadata)} - sending {len(msgs)} prepared messages."
+        )
     else:
-        saved_path = await image.save(path=str(temp_dir))
+        try:
+            client = await pool.acquire()
+            session = client.start_chat(model=model)
+            m_input, files = await GeminiClientWrapper.process_conversation(messages, tmp_dir)
+        except Exception as e:
+            logger.exception("Error in preparing conversation")
+            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
 
-    if not saved_path:
-        raise ValueError("Failed to save generated image")
+    response_id = f"resp_{uuid.uuid4().hex}"
+    created_time = int(datetime.now(tz=timezone.utc).timestamp())
 
-    # Rename file to a random UUID to ensure uniqueness and unpredictability
-    original_path = Path(saved_path)
-    random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}"
-    new_path = temp_dir / random_name
-    original_path.rename(new_path)
+    try:
+        assert session and client
+        logger.debug(
+            f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}"
+        )
+        resp_or_stream = await _send_with_split(
+            session, m_input, files=files, stream=request_data.stream
+        )
+    except Exception as e:
+        logger.exception("Gemini API error")
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e))
 
-    data = new_path.read_bytes()
-    width, height = extract_image_dimensions(data)
-    filename = random_name
-    return base64.b64encode(data).decode("ascii"), width, height, filename
+    if request_data.stream:
+        return _create_responses_real_streaming_response(
+            resp_or_stream,
+            response_id,
+            created_time,
+            request_data.model,
+            messages,
+            db,
+            model,
+            client,
+            session,
+            request_data,
+            image_store,
+            str(request.base_url),
+            struct_req,
+        )
+
+    try:
+        raw_t = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=True)
+        raw_c = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
+    except Exception as exc:
+        logger.exception("Gemini parsing failed")
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response."
+        ) from exc
+
+    assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req)
+    images = resp_or_stream.images or []
+    if (
+        request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation"
+    ) and not images:
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.")
+
+    contents, img_calls = [], []
+    for img in images:
+        try:
+            b64, w, h, fname = await _image_to_base64(img, image_store)
+            contents.append(
+                ResponseOutputContent(
+                    type="output_text",
+                    text=f"![{fname}]({request.base_url}images/{fname}?token={get_image_token(fname)})",
+                )
+            )
+            img_calls.append(
+                ResponseImageGenerationCall(
+                    id=fname.rsplit(".", 1)[0],
+                    result=b64,
+                    output_format="png" if isinstance(img, GeneratedImage) else "jpeg",
+                    size=f"{w}x{h}" if w and h else None,
+                )
+            )
+        except Exception as e:
+            logger.warning(f"Image error: {e}")
+
+    if assistant_text:
+        contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
+    if not contents:
+        contents.append(ResponseOutputContent(type="output_text", text=""))
+
+    # Aggregate images for storage
+    image_markdown = ""
+    for img_call in img_calls:
+        fname = f"{img_call.id}.{img_call.output_format}"
+        img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})"
+        image_markdown += f"\n\n{img_url}"
+
+    if image_markdown:
+        storage_output += image_markdown
+
+    p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, tool_calls)
+    usage = ResponseUsage(input_tokens=p_tok, output_tokens=c_tok, total_tokens=t_tok)
+    payload = _create_responses_standard_payload(
+        response_id,
+        created_time,
+        request_data.model,
+        assistant_text,
+        tool_calls,
+        img_calls,
+        contents,
+        usage,
+        request_data,
+        norm_input,
+    )
+    _persist_conversation(
+        db, model.model_name, client.id, session.metadata, messages, storage_output, tool_calls
+    )
+    return payload
diff --git a/app/services/client.py b/app/services/client.py
index 55be11a..eda1691 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -78,7 +78,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a single message and return model input.
+        Process a single Message object into a format suitable for the Gemini API.
+        Extracts text fragments, handles images and files, and appends tool call blocks if present.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -88,8 +89,7 @@ async def process_message(
             if message.content:
                 text_fragments.append(message.content)
         elif isinstance(message.content, list):
-            # Mixed content
-            # TODO: Use Pydantic to enforce the value checking
+            # Mixed content (text, image_url, or file)
             for item in message.content:
                 if item.type == "text":
                     # Append multiple text fragments
@@ -177,7 +177,8 @@ async def process_conversation(
     @staticmethod
     def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         """
-        Extract and format the output text from the Gemini response.
+        Extract and format the output text from a ModelOutput.
+        Includes reasoning thoughts (wrapped in <think> tags) and unescapes content.
         """
         text = ""
 
@@ -191,6 +192,7 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
 
         # Fix some escaped characters
         def _unescape_html(text_content: str) -> str:
+            """Unescape HTML entities only in non-code sections of the text."""
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):
@@ -205,6 +207,7 @@ def _unescape_html(text_content: str) -> str:
             return "".join(parts)
 
         def _unescape_markdown(text_content: str) -> str:
+            """Remove backslash escapes for markdown characters in non-code sections."""
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index c9d42cd..6ab2302 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -11,45 +11,98 @@
 
 from ..models import ContentItem, ConversationInStore, Message
 from ..utils import g_config
-from ..utils.helper import extract_tool_calls, remove_tool_call_blocks
+from ..utils.helper import (
+    CODE_BLOCK_HINT,
+    CODE_HINT_STRIPPED,
+    XML_HINT_STRIPPED,
+    XML_WRAP_HINT,
+    extract_tool_calls,
+    remove_tool_call_blocks,
+)
 from ..utils.singleton import Singleton
 
 
 def _hash_message(message: Message) -> str:
-    """Generate a consistent hash for a single message focusing ONLY on logic/content, ignoring technical IDs."""
+    """
+    Generate a stable, canonical hash for a single message.
+    Strips system hints, thoughts, and tool call blocks to ensure
+    identical logical content produces the same hash regardless of format.
+    """
     core_data = {
         "role": message.role,
         "name": message.name,
+        "tool_call_id": message.tool_call_id,
     }
 
-    # Normalize content: strip, handle empty/None, and list-of-text items
     content = message.content
     if not content:
         core_data["content"] = None
     elif isinstance(content, str):
-        # Normalize line endings and strip whitespace
-        normalized = content.replace("\r\n", "\n").strip()
+        normalized = content.replace("\r\n", "\n")
+
+        normalized = LMDBConversationStore.remove_think_tags(normalized)
+
+        for hint in [
+            XML_WRAP_HINT,
+            XML_HINT_STRIPPED,
+            CODE_BLOCK_HINT,
+            CODE_HINT_STRIPPED,
+        ]:
+            normalized = normalized.replace(hint, "")
+
+        if message.tool_calls:
+            normalized = remove_tool_call_blocks(normalized)
+        else:
+            temp_text, _extracted = extract_tool_calls(normalized)
+            normalized = temp_text
+
+        normalized = normalized.strip()
         core_data["content"] = normalized if normalized else None
     elif isinstance(content, list):
         text_parts = []
         for item in content:
+            text_val = ""
             if isinstance(item, ContentItem) and item.type == "text":
-                text_parts.append(item.text or "")
+                text_val = item.text or ""
             elif isinstance(item, dict) and item.get("type") == "text":
-                text_parts.append(item.get("text") or "")
+                text_val = item.get("text") or ""
+
+            if text_val:
+                text_val = text_val.replace("\r\n", "\n")
+                text_val = LMDBConversationStore.remove_think_tags(text_val)
+                for hint in [
+                    XML_WRAP_HINT,
+                    XML_HINT_STRIPPED,
+                    CODE_BLOCK_HINT,
+                    CODE_HINT_STRIPPED,
+                ]:
+                    text_val = text_val.replace(hint, "")
+                text_val = remove_tool_call_blocks(text_val).strip()
+                if text_val:
+                    text_parts.append(text_val)
+            elif isinstance(item, ContentItem) and item.type in ("image_url", "file"):
+                # For non-text items, include their unique markers to distinguish them
+                if item.type == "image_url":
+                    text_parts.append(
+                        f"[image_url:{item.image_url.get('url') if item.image_url else ''}]"
+                    )
+                elif item.type == "file":
+                    text_parts.append(
+                        f"[file:{item.file.get('url') or item.file.get('filename') if item.file else ''}]"
+                    )
             else:
-                # If it contains non-text (images/files), keep the full list for hashing
-                text_parts = None
-                break
-
-        if text_parts is not None:
-            # Normalize each part but keep them as a list to preserve boundaries and avoid collisions
-            normalized_parts = [p.replace("\r\n", "\n") for p in text_parts]
-            core_data["content"] = normalized_parts if normalized_parts else None
-        else:
-            core_data["content"] = message.model_dump(mode="json")["content"]
+                # Fallback for other dict-based content parts
+                part_type = item.get("type") if isinstance(item, dict) else None
+                if part_type == "image_url":
+                    url = item.get("image_url", {}).get("url")
+                    text_parts.append(f"[image_url:{url}]")
+                elif part_type == "file":
+                    url = item.get("file", {}).get("url") or item.get("file", {}).get("filename")
+                    text_parts.append(f"[file:{url}]")
+
+        combined_text = "\n".join(text_parts).replace("\r\n", "\n").strip()
+        core_data["content"] = combined_text if combined_text else None
 
-    # Normalize tool_calls: Focus ONLY on function name and arguments
     if message.tool_calls:
         calls_data = []
         for tc in message.tool_calls:
@@ -66,14 +119,14 @@ def _hash_message(message: Message) -> str:
                     "arguments": canon_args,
                 }
             )
-        # Sort calls to be order-independent
         calls_data.sort(key=lambda x: (x["name"], x["arguments"]))
         core_data["tool_calls"] = calls_data
     else:
         core_data["tool_calls"] = None
 
     message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-    return hashlib.sha256(message_bytes).hexdigest()
+    digest = hashlib.sha256(message_bytes).hexdigest()
+    return digest
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:
@@ -123,16 +176,14 @@ def __init__(
         self._init_environment()
 
     def _ensure_db_path(self) -> None:
-        """Ensure database directory exists."""
         self.db_path.parent.mkdir(parents=True, exist_ok=True)
 
     def _init_environment(self) -> None:
-        """Initialize LMDB environment."""
         try:
             self._env = lmdb.open(
                 str(self.db_path),
                 map_size=self.max_db_size,
-                max_dbs=3,  # main, metadata, and index databases
+                max_dbs=3,
                 writemap=True,
                 readahead=False,
                 meminit=False,
@@ -144,7 +195,6 @@ def _init_environment(self) -> None:
 
     @contextmanager
     def _get_transaction(self, write: bool = False):
-        """Get LMDB transaction context manager."""
         if not self._env:
             raise RuntimeError("LMDB environment not initialized")
 
@@ -178,11 +228,15 @@ def store(
         if not conv:
             raise ValueError("Messages list cannot be empty")
 
+        # Sanitize messages before computing hash and storing to ensure consistency
+        # with the search (find) logic, which also sanitizes its prefix.
+        sanitized_messages = self.sanitize_assistant_messages(conv.messages)
+        conv.messages = sanitized_messages
+
         # Generate hash for the message list
         message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
         storage_key = custom_key or message_hash
 
-        # Prepare data for storage
         now = datetime.now()
         if conv.created_at is None:
             conv.created_at = now
@@ -192,20 +246,18 @@ def store(
 
         try:
             with self._get_transaction(write=True) as txn:
-                # Store main data
                 txn.put(storage_key.encode("utf-8"), value, overwrite=True)
 
-                # Store hash -> key mapping for reverse lookup
                 txn.put(
                     f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"),
                     storage_key.encode("utf-8"),
                 )
 
-                logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key}")
+                logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}")
                 return storage_key
 
         except Exception as e:
-            logger.error(f"Failed to store conversation: {e}")
+            logger.error(f"Failed to store messages with key {storage_key[:12]}: {e}")
             raise
 
     def get(self, key: str) -> Optional[ConversationInStore]:
@@ -227,39 +279,35 @@ def get(self, key: str) -> Optional[ConversationInStore]:
                 storage_data = orjson.loads(data)  # type: ignore
                 conv = ConversationInStore.model_validate(storage_data)
 
-                logger.debug(f"Retrieved {len(conv.messages)} messages for key: {key}")
+                logger.debug(f"Retrieved {len(conv.messages)} messages with key: {key[:12]}")
                 return conv
 
         except Exception as e:
-            logger.error(f"Failed to retrieve messages for key {key}: {e}")
+            logger.error(f"Failed to retrieve messages with key {key[:12]}: {e}")
             return None
 
     def find(self, model: str, messages: List[Message]) -> Optional[ConversationInStore]:
         """
         Search conversation data by message list.
-
-        Args:
-            model: Model name of the conversations
-            messages: List of messages to search for
-
-        Returns:
-            Conversation or None if not found
         """
         if not messages:
             return None
 
         # --- Find with raw messages ---
         if conv := self._find_by_message_list(model, messages):
-            logger.debug("Found conversation with raw message history.")
+            logger.debug(f"Session found for '{model}' with {len(messages)} raw messages.")
             return conv
 
         # --- Find with cleaned messages ---
         cleaned_messages = self.sanitize_assistant_messages(messages)
-        if conv := self._find_by_message_list(model, cleaned_messages):
-            logger.debug("Found conversation with cleaned message history.")
-            return conv
+        if cleaned_messages != messages:
+            if conv := self._find_by_message_list(model, cleaned_messages):
+                logger.debug(
+                    f"Session found for '{model}' with {len(cleaned_messages)} cleaned messages."
+                )
+                return conv
 
-        logger.debug("No conversation found for either raw or cleaned history.")
+        logger.debug(f"No session found for '{model}' with {len(messages)} messages.")
         return None
 
     def _find_by_message_list(
@@ -330,11 +378,11 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
                 if message_hash and key != message_hash:
                     txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
 
-                logger.debug(f"Deleted messages with key: {key}")
+                logger.debug(f"Deleted messages with key: {key[:12]}")
                 return conv
 
         except Exception as e:
-            logger.error(f"Failed to delete key {key}: {e}")
+            logger.error(f"Failed to delete messages with key {key[:12]}: {e}")
             return None
 
     def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
@@ -478,6 +526,8 @@ def remove_think_tags(text: str) -> str:
         """
         Remove all <think>...</think> tags and strip whitespace.
         """
+        if not text:
+            return text
         # Remove all think blocks anywhere in the text
         cleaned_content = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
         return cleaned_content.strip()
@@ -485,12 +535,8 @@ def remove_think_tags(text: str) -> str:
     @staticmethod
     def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
         """
-        Create a new list of messages with assistant content cleaned of <think> tags
-        and system hints/tool call blocks. This is used for both storing and
-        searching chat history to ensure consistency.
-
-        If a message has no tool_calls but contains tool call XML blocks in its
-        content, they will be extracted and moved to the tool_calls field.
+        Produce a canonical history where assistant messages are cleaned of
+        internal markers and tool call blocks are moved to metadata.
         """
         cleaned_messages = []
         for msg in messages:
@@ -503,12 +549,12 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
                     else:
                         text = remove_tool_call_blocks(text).strip()
 
-                    normalized_content = text.strip()
+                    normalized_content = text.strip() or None
 
                     if normalized_content != msg.content or tool_calls != msg.tool_calls:
                         cleaned_msg = msg.model_copy(
                             update={
-                                "content": normalized_content or None,
+                                "content": normalized_content,
                                 "tool_calls": tool_calls or None,
                             }
                         )
diff --git a/app/services/pool.py b/app/services/pool.py
index a134dda..0f95203 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -31,7 +31,7 @@ def __init__(self) -> None:
             self._clients.append(client)
             self._id_map[c.id] = client
             self._round_robin.append(client)
-            self._restart_locks[c.id] = asyncio.Lock()  # Pre-initialize
+            self._restart_locks[c.id] = asyncio.Lock()
 
     async def init(self) -> None:
         """Initialize all clients in the pool."""
@@ -84,7 +84,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
 
         lock = self._restart_locks.get(client.id)
         if lock is None:
-            return False  # Should not happen
+            return False
 
         async with lock:
             if client.running():
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 190b5ce..7606dd3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -5,7 +5,6 @@
 import struct
 import tempfile
 from pathlib import Path
-from typing import Iterator
 from urllib.parse import urlparse
 
 import httpx
@@ -68,7 +67,6 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
     data: bytes | None = None
     suffix: str | None = None
     if url.startswith("data:image/"):
-        # Base64 encoded image
         metadata_part = url.split(",")[0]
         mime_type = metadata_part.split(":")[1].split(";")[0]
 
@@ -131,13 +129,11 @@ def strip_tagged_blocks(text: str) -> str:
             result.append(text[idx:])
             break
 
-        # append any content before this block
         result.append(text[idx:start])
 
         role_start = start + len(start_marker)
         newline = text.find("\n", role_start)
         if newline == -1:
-            # malformed block; keep the remainder as-is (safe behavior)
             result.append(text[start:])
             break
 
@@ -145,23 +141,18 @@ def strip_tagged_blocks(text: str) -> str:
 
         end = text.find(end_marker, newline + 1)
         if end == -1:
-            # missing end marker
             if role == "tool":
-                # drop from the start marker to EOF (skip the remainder)
                 break
             else:
-                # keep inner content from after the role newline to EOF
                 result.append(text[newline + 1 :])
                 break
 
         block_end = end + len(end_marker)
 
         if role == "tool":
-            # drop the whole block
             idx = block_end
             continue
 
-        # keep the content without role markers
         content = text[newline + 1 : end]
         result.append(content)
         idx = block_end
@@ -180,41 +171,19 @@ def strip_system_hints(text: str) -> str:
     return cleaned.strip()
 
 
-def remove_tool_call_blocks(text: str) -> str:
-    """Strip tool call code blocks from text."""
-    if not text:
-        return text
-
-    # 1. Remove fenced blocks ONLY if they contain tool calls
-    def _replace_block(match: re.Match[str]) -> str:
-        block_content = match.group(1)
-        if not block_content:
-            return match.group(0)
-
-        # Check if the block contains any tool call tag
-        if TOOL_CALL_RE.search(block_content):
-            return ""
-
-        # Preserve the block if no tool call found
-        return match.group(0)
-
-    cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
-
-    # 2. Remove orphaned tool calls
-    cleaned = TOOL_CALL_RE.sub("", cleaned)
-
-    return strip_system_hints(cleaned)
-
-
-def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
-    """Extract tool call definitions and return cleaned text."""
+def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:
+    """
+    Unified engine for stripping tool call blocks and extracting tool metadata.
+    If extract=True, parses JSON arguments and assigns deterministic call IDs.
+    """
     if not text:
         return text, []
 
     tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
-        """Helper to parse args and append to the tool_calls list."""
+        if not extract:
+            return
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return
@@ -226,8 +195,6 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
-        # Generate a deterministic ID based on name, arguments, and its global sequence index
-        # to ensure uniqueness across multiple fenced blocks while remaining stable for storage.
         index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
@@ -245,14 +212,14 @@ def _replace_block(match: re.Match[str]) -> str:
         if not block_content:
             return match.group(0)
 
-        found_in_block = False
-        for call_match in TOOL_CALL_RE.finditer(block_content):
-            found_in_block = True
-            name = (call_match.group(1) or "").strip()
-            raw_args = (call_match.group(2) or "").strip()
-            _create_tool_call(name, raw_args)
+        is_tool_block = bool(TOOL_CALL_RE.search(block_content))
 
-        if found_in_block:
+        if is_tool_block:
+            if extract:
+                for call_match in TOOL_CALL_RE.finditer(block_content):
+                    name = (call_match.group(1) or "").strip()
+                    raw_args = (call_match.group(2) or "").strip()
+                    _create_tool_call(name, raw_args)
             return ""
         else:
             return match.group(0)
@@ -260,56 +227,26 @@ def _replace_block(match: re.Match[str]) -> str:
     cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
 
     def _replace_orphan(match: re.Match[str]) -> str:
-        name = (match.group(1) or "").strip()
-        raw_args = (match.group(2) or "").strip()
-        _create_tool_call(name, raw_args)
+        if extract:
+            name = (match.group(1) or "").strip()
+            raw_args = (match.group(2) or "").strip()
+            _create_tool_call(name, raw_args)
         return ""
 
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
-
     cleaned = strip_system_hints(cleaned)
     return cleaned, tool_calls
 
 
-def iter_stream_segments(model_output: str, chunk_size: int = 64) -> Iterator[str]:
-    """Yield stream segments while keeping <think> markers and words intact."""
-    if not model_output:
-        return
-
-    token_pattern = re.compile(r"\s+|\S+\s*")
-    pending = ""
-
-    def _flush_pending() -> Iterator[str]:
-        nonlocal pending
-        if pending:
-            yield pending
-            pending = ""
-
-    # Split on <think> boundaries so the markers are never fragmented.
-    parts = re.split(r"(</?think>)", model_output)
-    for part in parts:
-        if not part:
-            continue
-        if part in {"<think>", "</think>"}:
-            yield from _flush_pending()
-            yield part
-            continue
-
-        for match in token_pattern.finditer(part):
-            token = match.group(0)
-
-            if len(token) > chunk_size:
-                yield from _flush_pending()
-                for idx in range(0, len(token), chunk_size):
-                    yield token[idx : idx + chunk_size]
-                continue
-
-            if pending and len(pending) + len(token) > chunk_size:
-                yield from _flush_pending()
+def remove_tool_call_blocks(text: str) -> str:
+    """Strip tool call code blocks from text."""
+    cleaned, _ = _process_tools_internal(text, extract=False)
+    return cleaned
 
-            pending += token
 
-    yield from _flush_pending()
+def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
+    """Extract tool call definitions and return cleaned text."""
+    return _process_tools_internal(text, extract=True)
 
 
 def text_from_message(message: Message) -> str:

From 52547a923276c5b5de3ba0394939478ac4166417 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 30 Jan 2026 13:34:04 +0700
Subject: [PATCH 056/139] Enable real-time streaming responses and completely
 solve the issue with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
- Introducing a new feature for real-time streaming responses.
- Fully resolve the problem with reusable sessions.
- Break down similar flow logic into helper functions.
- All endpoints now support inline Markdown images.
- Switch large prompts to use BytesIO to avoid reading and writing to disk.
---
 app/server/chat.py     | 70 +++++++++++++++++-------------------------
 app/services/client.py |  2 +-
 2 files changed, 29 insertions(+), 43 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 37d3c70..ae1533e 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -8,7 +8,7 @@
 from typing import Any, AsyncGenerator
 
 import orjson
-from fastapi import APIRouter, Depends, HTTPException, Request, status
+from fastapi import APIRouter, Depends, HTTPException, status
 from fastapi.responses import StreamingResponse
 from gemini_webapi import ModelOutput
 from gemini_webapi.client import ChatSession
@@ -128,12 +128,11 @@ def _create_responses_standard_payload(
     response_id: str,
     created_time: int,
     model_name: str,
-    assistant_text: str | None,
     detected_tool_calls: list[Any] | None,
     image_call_items: list[ResponseImageGenerationCall],
     response_contents: list[ResponseOutputContent],
     usage: ResponseUsage,
-    request_data: ResponseCreateRequest,
+    request: ResponseCreateRequest,
     normalized_input: Any,
 ) -> ResponseCreateResponse:
     """Unified factory for building ResponseCreateResponse objects."""
@@ -166,9 +165,9 @@ def _create_responses_standard_payload(
         status="completed",
         usage=usage,
         input=normalized_input or None,
-        metadata=request_data.metadata or None,
-        tools=request_data.tools,
-        tool_choice=request_data.tool_choice,
+        metadata=request.metadata or None,
+        tools=request.tools,
+        tool_choice=request.tool_choice,
     )
 
 
@@ -1042,7 +1041,7 @@ async def generate_stream():
         if image_markdown:
             assistant_text += image_markdown
             storage_output += image_markdown
-            # Send the image markdown as a final text chunk before usage
+            # Send the image Markdown as a final text chunk before usage
             data = {
                 "id": completion_id,
                 "object": "chat.completion.chunk",
@@ -1107,9 +1106,8 @@ def _create_responses_real_streaming_response(
     model: Model,
     client_wrapper: GeminiClientWrapper,
     session: ChatSession,
-    request_data: ResponseCreateRequest,
+    request: ResponseCreateRequest,
     image_store: Path,
-    base_url: str,
     structured_requirement: StructuredOutputRequirement | None = None,
 ) -> StreamingResponse:
     """
@@ -1124,7 +1122,7 @@ def _create_responses_real_streaming_response(
     }
 
     async def generate_stream():
-        yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request_data.metadata, 'input': None, 'tools': request_data.tools, 'tool_choice': request_data.tool_choice}}).decode('utf-8')}\n\n"
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request.metadata, 'input': None, 'tools': request.tools, 'tool_choice': request.tool_choice}}).decode('utf-8')}\n\n"
         message_id = f"msg_{uuid.uuid4().hex}"
         yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n"
 
@@ -1183,9 +1181,7 @@ async def generate_stream():
             try:
                 image_base64, width, height, filename = await _image_to_base64(image, image_store)
                 img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
-                image_url = (
-                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
-                )
+                image_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
                 image_call_items.append(
                     ResponseImageGenerationCall(
                         id=filename.rsplit(".", 1)[0],
@@ -1232,12 +1228,11 @@ async def generate_stream():
             response_id,
             created_time,
             model_name,
-            assistant_text,
             detected_tool_calls,
             image_call_items,
             response_contents,
             usage,
-            request_data,
+            request,
             None,
         )
         _persist_conversation(
@@ -1404,19 +1399,18 @@ async def create_chat_completion(
 
 @router.post("/v1/responses")
 async def create_response(
-    request_data: ResponseCreateRequest,
-    request: Request,
+    request: ResponseCreateRequest,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
     image_store: Path = Depends(get_image_store_dir),
 ):
-    base_messages, norm_input = _response_items_to_messages(request_data.input)
-    struct_req = _build_structured_requirement(request_data.response_format)
+    base_messages, norm_input = _response_items_to_messages(request.input)
+    struct_req = _build_structured_requirement(request.response_format)
     extra_instr = [struct_req.instruction] if struct_req else []
 
     standard_tools, image_tools = [], []
-    if request_data.tools:
-        for t in request_data.tools:
+    if request.tools:
+        for t in request.tools:
             if isinstance(t, Tool):
                 standard_tools.append(t)
             elif isinstance(t, ResponseImageTool):
@@ -1429,18 +1423,14 @@ async def create_response(
 
     img_instr = _build_image_generation_instruction(
         image_tools,
-        request_data.tool_choice
-        if isinstance(request_data.tool_choice, ResponseToolChoice)
-        else None,
+        request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None,
     )
     if img_instr:
         extra_instr.append(img_instr)
-    preface = _instructions_to_messages(request_data.instructions)
+    preface = _instructions_to_messages(request.instructions)
     conv_messages = [*preface, *base_messages] if preface else base_messages
     model_tool_choice = (
-        request_data.tool_choice
-        if isinstance(request_data.tool_choice, (str, ToolChoiceFunction))
-        else None
+        request.tool_choice if isinstance(request.tool_choice, (str, ToolChoiceFunction)) else None
     )
 
     messages = _prepare_messages_for_model(
@@ -1448,15 +1438,13 @@ async def create_response(
     )
     pool, db = GeminiClientPool(), LMDBConversationStore()
     try:
-        model = _get_model_by_name(request_data.model)
+        model = _get_model_by_name(request.model)
     except ValueError as exc:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
     session, client, remain = await _find_reusable_session(db, pool, model, messages)
     if session:
-        msgs = _prepare_messages_for_model(
-            remain, request_data.tools, request_data.tool_choice, None, False
-        )
+        msgs = _prepare_messages_for_model(remain, request.tools, request.tool_choice, None, False)
         if not msgs:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
         m_input, files = (
@@ -1485,26 +1473,25 @@ async def create_response(
             f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}"
         )
         resp_or_stream = await _send_with_split(
-            session, m_input, files=files, stream=request_data.stream
+            session, m_input, files=files, stream=request.stream
         )
     except Exception as e:
         logger.exception("Gemini API error")
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e))
 
-    if request_data.stream:
+    if request.stream:
         return _create_responses_real_streaming_response(
             resp_or_stream,
             response_id,
             created_time,
-            request_data.model,
+            request.model,
             messages,
             db,
             model,
             client,
             session,
-            request_data,
+            request,
             image_store,
-            str(request.base_url),
             struct_req,
         )
 
@@ -1520,7 +1507,7 @@ async def create_response(
     assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req)
     images = resp_or_stream.images or []
     if (
-        request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation"
+        request.tool_choice is not None and request.tool_choice.type == "image_generation"
     ) and not images:
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.")
 
@@ -1531,7 +1518,7 @@ async def create_response(
             contents.append(
                 ResponseOutputContent(
                     type="output_text",
-                    text=f"![{fname}]({request.base_url}images/{fname}?token={get_image_token(fname)})",
+                    text=f"![{fname}](images/{fname}?token={get_image_token(fname)})",
                 )
             )
             img_calls.append(
@@ -1565,13 +1552,12 @@ async def create_response(
     payload = _create_responses_standard_payload(
         response_id,
         created_time,
-        request_data.model,
-        assistant_text,
+        request.model,
         tool_calls,
         img_calls,
         contents,
         usage,
-        request_data,
+        request,
         norm_input,
     )
     _persist_conversation(
diff --git a/app/services/client.py b/app/services/client.py
index eda1691..dd1d74f 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -207,7 +207,7 @@ def _unescape_html(text_content: str) -> str:
             return "".join(parts)
 
         def _unescape_markdown(text_content: str) -> str:
-            """Remove backslash escapes for markdown characters in non-code sections."""
+            """Remove backslash escapes for Markdown characters in non-code sections."""
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):

From c0b32c62113acdac21407c629252f35c2ed2bbf2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 30 Jan 2026 17:50:02 +0700
Subject: [PATCH 057/139] Enable real-time streaming responses and completely
 solve the issue with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
- Introducing a new feature for real-time streaming responses.
- Fully resolve the problem with reusable sessions.
- Break down similar flow logic into helper functions.
- All endpoints now support inline Markdown images.
- Switch large prompts to use BytesIO to avoid reading and writing to disk.
- Remove duplicate images when saving and responding.
---
 app/server/chat.py | 86 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 70 insertions(+), 16 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ae1533e..4c64390 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,4 +1,5 @@
 import base64
+import hashlib
 import io
 import reprlib
 import uuid
@@ -8,7 +9,7 @@
 from typing import Any, AsyncGenerator
 
 import orjson
-from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import StreamingResponse
 from gemini_webapi import ModelOutput
 from gemini_webapi.client import ChatSession
@@ -72,8 +73,10 @@ class StructuredOutputRequirement:
 # --- Helper Functions ---
 
 
-async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
-    """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
+async def _image_to_base64(
+    image: Image, temp_dir: Path
+) -> tuple[str, int | None, int | None, str, str]:
+    """Persist an image provided by gemini_webapi and return base64 plus dimensions, filename, and hash."""
     if isinstance(image, GeneratedImage):
         try:
             saved_path = await image.save(path=str(temp_dir), full_size=True)
@@ -96,7 +99,8 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
     data = new_path.read_bytes()
     width, height = extract_image_dimensions(data)
     filename = random_name
-    return base64.b64encode(data).decode("ascii"), width, height, filename
+    file_hash = hashlib.sha256(data).hexdigest()
+    return base64.b64encode(data).decode("ascii"), width, height, filename, file_hash
 
 
 def _calculate_usage(
@@ -925,6 +929,7 @@ def _create_real_streaming_response(
     model: Model,
     client_wrapper: GeminiClientWrapper,
     session: ChatSession,
+    base_url: str,
     structured_requirement: StructuredOutputRequirement | None = None,
 ) -> StreamingResponse:
     """
@@ -1024,16 +1029,30 @@ async def generate_stream():
         )
 
         images = []
+        seen_urls = set()
         for out in all_outputs:
             if out.images:
-                images.extend(out.images)
+                for img in out.images:
+                    # Use the image URL as a stable identifier across chunks
+                    if img.url not in seen_urls:
+                        images.append(img)
+                        seen_urls.add(img.url)
 
         image_markdown = ""
+        seen_hashes = set()
         for image in images:
             try:
                 image_store = get_image_store_dir()
-                _, _, _, filename = await _image_to_base64(image, image_store)
-                img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
+                _, _, _, filename, file_hash = await _image_to_base64(image, image_store)
+                if file_hash in seen_hashes:
+                    # Duplicate content, delete the file and skip
+                    (image_store / filename).unlink(missing_ok=True)
+                    continue
+                seen_hashes.add(file_hash)
+
+                img_url = (
+                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
+                )
                 image_markdown += f"\n\n{img_url}"
             except Exception as exc:
                 logger.warning(f"Failed to process image in OpenAI stream: {exc}")
@@ -1108,6 +1127,7 @@ def _create_responses_real_streaming_response(
     session: ChatSession,
     request: ResponseCreateRequest,
     image_store: Path,
+    base_url: str,
     structured_requirement: StructuredOutputRequirement | None = None,
 ) -> StreamingResponse:
     """
@@ -1172,16 +1192,30 @@ async def generate_stream():
         )
 
         images = []
+        seen_urls = set()
         for out in all_outputs:
             if out.images:
-                images.extend(out.images)
+                for img in out.images:
+                    if img.url not in seen_urls:
+                        images.append(img)
+                        seen_urls.add(img.url)
 
         response_contents, image_call_items = [], []
+        seen_hashes = set()
         for image in images:
             try:
-                image_base64, width, height, filename = await _image_to_base64(image, image_store)
+                image_base64, width, height, filename, file_hash = await _image_to_base64(
+                    image, image_store
+                )
+                if file_hash in seen_hashes:
+                    (image_store / filename).unlink(missing_ok=True)
+                    continue
+                seen_hashes.add(file_hash)
+
                 img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
-                image_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
+                image_url = (
+                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
+                )
                 image_call_items.append(
                     ResponseImageGenerationCall(
                         id=filename.rsplit(".", 1)[0],
@@ -1203,7 +1237,7 @@ async def generate_stream():
         image_markdown = ""
         for img_call in image_call_items:
             fname = f"{img_call.id}.{img_call.output_format}"
-            img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})"
+            img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
             image_markdown += f"\n\n{img_url}"
 
         if image_markdown:
@@ -1262,10 +1296,12 @@ async def list_models(api_key: str = Depends(verify_api_key)):
 @router.post("/v1/chat/completions")
 async def create_chat_completion(
     request: ChatCompletionRequest,
+    raw_request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
     image_store: Path = Depends(get_image_store_dir),
 ):
+    base_url = str(raw_request.base_url)
     pool, db = GeminiClientPool(), LMDBConversationStore()
     try:
         model = _get_model_by_name(request.model)
@@ -1339,6 +1375,7 @@ async def create_chat_completion(
             model,
             client,
             session,
+            base_url,
             structured_requirement,
         )
 
@@ -1358,10 +1395,18 @@ async def create_chat_completion(
     # Process images for OpenAI non-streaming flow
     images = resp_or_stream.images or []
     image_markdown = ""
+    seen_hashes = set()
     for image in images:
         try:
-            _, _, _, filename = await _image_to_base64(image, image_store)
-            img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
+            _, _, _, filename, file_hash = await _image_to_base64(image, image_store)
+            if file_hash in seen_hashes:
+                (image_store / filename).unlink(missing_ok=True)
+                continue
+            seen_hashes.add(file_hash)
+
+            img_url = (
+                f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
+            )
             image_markdown += f"\n\n{img_url}"
         except Exception as exc:
             logger.warning(f"Failed to process image in OpenAI response: {exc}")
@@ -1400,10 +1445,12 @@ async def create_chat_completion(
 @router.post("/v1/responses")
 async def create_response(
     request: ResponseCreateRequest,
+    raw_request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
     image_store: Path = Depends(get_image_store_dir),
 ):
+    base_url = str(raw_request.base_url)
     base_messages, norm_input = _response_items_to_messages(request.input)
     struct_req = _build_structured_requirement(request.response_format)
     extra_instr = [struct_req.instruction] if struct_req else []
@@ -1492,6 +1539,7 @@ async def create_response(
             session,
             request,
             image_store,
+            base_url,
             struct_req,
         )
 
@@ -1512,13 +1560,19 @@ async def create_response(
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.")
 
     contents, img_calls = [], []
+    seen_hashes = set()
     for img in images:
         try:
-            b64, w, h, fname = await _image_to_base64(img, image_store)
+            b64, w, h, fname, fhash = await _image_to_base64(img, image_store)
+            if fhash in seen_hashes:
+                (image_store / fname).unlink(missing_ok=True)
+                continue
+            seen_hashes.add(fhash)
+
             contents.append(
                 ResponseOutputContent(
                     type="output_text",
-                    text=f"![{fname}](images/{fname}?token={get_image_token(fname)})",
+                    text=f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})",
                 )
             )
             img_calls.append(
@@ -1541,7 +1595,7 @@ async def create_response(
     image_markdown = ""
     for img_call in img_calls:
         fname = f"{img_call.id}.{img_call.output_format}"
-        img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})"
+        img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
         image_markdown += f"\n\n{img_url}"
 
     if image_markdown:

From 4d51a5fc8d19431712f2e13dd2d1a0395150e252 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 2 Feb 2026 18:33:27 +0700
Subject: [PATCH 058/139] Enable real-time streaming responses and completely
 solve the issue with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
- Introducing a new feature for real-time streaming responses.
- Fully resolve the problem with reusable sessions.
- Break down similar flow logic into helper functions.
- All endpoints now support inline Markdown images.
- Switch large prompts to use BytesIO to avoid reading and writing to disk.
- Remove duplicate images when saving and responding.
---
 app/server/chat.py     | 11 +++++++++++
 app/services/client.py | 22 ++++++++++++++++------
 app/utils/helper.py    |  6 +++---
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 4c64390..b8f611d 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -481,6 +481,17 @@ def _prepare_messages_for_model(
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
 
+    # Resolve tool names for 'tool' messages by looking back at previous assistant tool calls
+    tool_id_to_name = {}
+    for msg in prepared:
+        if msg.role == "assistant" and msg.tool_calls:
+            for tc in msg.tool_calls:
+                tool_id_to_name[tc.id] = tc.function.name
+
+    for msg in prepared:
+        if msg.role == "tool" and not msg.name and msg.tool_call_id:
+            msg.name = tool_id_to_name.get(msg.tool_call_id)
+
     instructions: list[str] = []
     if inject_system_defaults:
         if tools:
diff --git a/app/services/client.py b/app/services/client.py
index dd1d74f..803bc23 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -86,15 +86,15 @@ async def process_message(
 
         if isinstance(message.content, str):
             # Pure text content
-            if message.content:
-                text_fragments.append(message.content)
+            if message.content or message.role == "tool":
+                text_fragments.append(message.content or "")
         elif isinstance(message.content, list):
             # Mixed content (text, image_url, or file)
             for item in message.content:
                 if item.type == "text":
                     # Append multiple text fragments
-                    if item.text:
-                        text_fragments.append(item.text)
+                    if item.text or message.role == "tool":
+                        text_fragments.append(item.text or "")
 
                 elif item.type == "image_url":
                     if not item.image_url:
@@ -114,9 +114,19 @@ async def process_message(
                         files.append(await save_url_to_tempfile(url, tempdir))
                     else:
                         raise ValueError("File must contain 'file_data' or 'url' key")
+        elif message.content is None and message.role == "tool":
+            text_fragments.append("")
         elif message.content is not None:
             raise ValueError("Unsupported message content type.")
 
+        # Special handling for tool response format
+        if message.role == "tool":
+            tool_name = message.name or "unknown"
+            combined_content = "\n".join(text_fragments)
+            text_fragments = [
+                f'```xml\n<tool_response name="{tool_name}">{combined_content}</tool_response>\n```'
+            ]
+
         if message.tool_calls:
             tool_blocks: list[str] = []
             for call in message.tool_calls:
@@ -135,10 +145,10 @@ async def process_message(
                 tool_section = "```xml\n" + "".join(tool_blocks) + "\n```"
                 text_fragments.append(tool_section)
 
-        model_input = "\n".join(fragment for fragment in text_fragments if fragment)
+        model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
 
         # Add role tag if needed
-        if model_input:
+        if model_input or message.role == "tool":
             if tagged:
                 model_input = add_tag(message.role, model_input)
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 7606dd3..38b6400 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -110,9 +110,9 @@ def strip_code_fence(text: str) -> str:
 
 
 def strip_tagged_blocks(text: str) -> str:
-    """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely.
-    - tool blocks are removed entirely (if missing end marker, drop to EOF).
-    - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF).
+    """Remove <|im_start|>role ... <|im_end|> sections.
+    - tool blocks are removed entirely (including content).
+    - other roles: remove markers and role, keep inner content.
     """
     if not text:
         return text

From d69aaf02f2b6b7ff331564b526178a447c3b49e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 2 Feb 2026 18:52:51 +0700
Subject: [PATCH 059/139] Enable real-time streaming responses and completely
 solve the issue with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
- Introducing a new feature for real-time streaming responses.
- Fully resolve the problem with reusable sessions.
- Break down similar flow logic into helper functions.
- All endpoints now support inline Markdown images.
- Switch large prompts to use BytesIO to avoid reading and writing to disk.
- Remove duplicate images when saving and responding.
---
 app/services/client.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/services/client.py b/app/services/client.py
index 803bc23..4146b7e 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -122,7 +122,10 @@ async def process_message(
         # Special handling for tool response format
         if message.role == "tool":
             tool_name = message.name or "unknown"
-            combined_content = "\n".join(text_fragments)
+            combined_content = "\n".join(text_fragments).strip()
+            # If the tool result is literally empty, provide a clear indicator like empty JSON
+            if not combined_content:
+                combined_content = "{}"
             text_fragments = [
                 f'```xml\n<tool_response name="{tool_name}">{combined_content}</tool_response>\n```'
             ]

From 8e15a8698d4a3df53a7bc3f676c63c0a492c9a01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 2 Feb 2026 19:27:08 +0700
Subject: [PATCH 060/139] Enable real-time streaming responses and completely
 solve the issue with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
- Introducing a new feature for real-time streaming responses.
- Fully resolve the problem with reusable sessions.
- Break down similar flow logic into helper functions.
- All endpoints now support inline Markdown images.
- Switch large prompts to use BytesIO to avoid reading and writing to disk.
- Remove duplicate images when saving and responding.
---
 app/services/client.py | 43 +++++-------------------------------------
 1 file changed, 5 insertions(+), 38 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 4146b7e..a35146f 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -85,17 +85,13 @@ async def process_message(
         text_fragments: list[str] = []
 
         if isinstance(message.content, str):
-            # Pure text content
             if message.content or message.role == "tool":
-                text_fragments.append(message.content or "")
+                text_fragments.append(message.content or "{}")
         elif isinstance(message.content, list):
-            # Mixed content (text, image_url, or file)
             for item in message.content:
                 if item.type == "text":
-                    # Append multiple text fragments
                     if item.text or message.role == "tool":
-                        text_fragments.append(item.text or "")
-
+                        text_fragments.append(item.text or "{}")
                 elif item.type == "image_url":
                     if not item.image_url:
                         raise ValueError("Image URL cannot be empty")
@@ -103,7 +99,6 @@ async def process_message(
                         files.append(await save_url_to_tempfile(url, tempdir))
                     else:
                         raise ValueError("Image URL must contain 'url' key")
-
                 elif item.type == "file":
                     if not item.file:
                         raise ValueError("File cannot be empty")
@@ -115,19 +110,15 @@ async def process_message(
                     else:
                         raise ValueError("File must contain 'file_data' or 'url' key")
         elif message.content is None and message.role == "tool":
-            text_fragments.append("")
+            text_fragments.append("{}")
         elif message.content is not None:
             raise ValueError("Unsupported message content type.")
 
-        # Special handling for tool response format
         if message.role == "tool":
             tool_name = message.name or "unknown"
-            combined_content = "\n".join(text_fragments).strip()
-            # If the tool result is literally empty, provide a clear indicator like empty JSON
-            if not combined_content:
-                combined_content = "{}"
+            combined_content = "\n".join(text_fragments).strip() or "{}"
             text_fragments = [
-                f'```xml\n<tool_response name="{tool_name}">{combined_content}</tool_response>\n```'
+                f'<tool_response name="{tool_name}">{combined_content}</tool_response>'
             ]
 
         if message.tool_calls:
@@ -138,7 +129,6 @@ async def process_message(
                     parsed_args = orjson.loads(args_text)
                     args_text = orjson.dumps(parsed_args).decode("utf-8")
                 except orjson.JSONDecodeError:
-                    # Leave args_text as is if it is not valid JSON
                     pass
                 tool_blocks.append(
                     f'<tool_call name="{call.function.name}">{args_text}</tool_call>'
@@ -150,7 +140,6 @@ async def process_message(
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
 
-        # Add role tag if needed
         if model_input or message.role == "tool":
             if tagged:
                 model_input = add_tag(message.role, model_input)
@@ -161,51 +150,30 @@ async def process_message(
     async def process_conversation(
         messages: list[Message], tempdir: Path | None = None
     ) -> tuple[str, list[Path | str]]:
-        """
-        Process the entire conversation and return a formatted string and list of
-        files. The last message is assumed to be the assistant's response.
-        """
-        # Determine once whether we need to wrap messages with role tags: only required
-        # if the history already contains assistant/system messages. When every message
-        # so far is from the user, we can skip tagging entirely.
         need_tag = any(m.role != "user" for m in messages)
-
         conversation: list[str] = []
         files: list[Path | str] = []
-
         for msg in messages:
             input_part, files_part = await GeminiClientWrapper.process_message(
                 msg, tempdir, tagged=need_tag
             )
             conversation.append(input_part)
             files.extend(files_part)
-
-        # Append an opening assistant tag only when we used tags above so that Gemini
-        # knows where to start its reply.
         if need_tag:
             conversation.append(add_tag("assistant", "", unclose=True))
-
         return "\n".join(conversation), files
 
     @staticmethod
     def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
-        """
-        Extract and format the output text from a ModelOutput.
-        Includes reasoning thoughts (wrapped in <think> tags) and unescapes content.
-        """
         text = ""
-
         if include_thoughts and response.thoughts:
             text += f"<think>{response.thoughts}</think>\n"
-
         if response.text:
             text += response.text
         else:
             text += str(response)
 
-        # Fix some escaped characters
         def _unescape_html(text_content: str) -> str:
-            """Unescape HTML entities only in non-code sections of the text."""
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):
@@ -220,7 +188,6 @@ def _unescape_html(text_content: str) -> str:
             return "".join(parts)
 
         def _unescape_markdown(text_content: str) -> str:
-            """Remove backslash escapes for Markdown characters in non-code sections."""
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):

From 7716c62a8df23b6557841e5e4cdd571b025d5e4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 09:33:18 +0700
Subject: [PATCH 061/139] build: update dependencies

---
 pyproject.toml |  4 +--
 uv.lock        | 82 ++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 61 insertions(+), 25 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1c30f8e..dc08571 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,10 +6,10 @@ readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
     "fastapi>=0.128.0",
-    "gemini-webapi>=1.17.3",
+    "gemini-webapi>=1.18.0",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
-    "orjson>=3.11.5",
+    "orjson>=3.11.7",
     "pydantic-settings[yaml]>=2.12.0",
     "uvicorn>=0.40.0",
     "uvloop>=0.22.1; sys_platform != 'win32'",
diff --git a/uv.lock b/uv.lock
index 50a73be..34a949c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -106,10 +106,10 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "fastapi", specifier = ">=0.128.0" },
-    { name = "gemini-webapi", specifier = ">=1.17.3" },
+    { name = "gemini-webapi", specifier = ">=1.18.0" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
-    { name = "orjson", specifier = ">=3.11.5" },
+    { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" },
     { name = "uvicorn", specifier = ">=0.40.0" },
@@ -122,17 +122,17 @@ dev = [{ name = "ruff", specifier = ">=0.14.14" }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.17.3"
+version = "1.18.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "httpx" },
+    { name = "httpx", extra = ["http2"] },
     { name = "loguru" },
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/aa/74/1a31f3605250eb5cbcbfb15559c43b0d71734c8d286cfa9a7833841306e3/gemini_webapi-1.17.3.tar.gz", hash = "sha256:6201f9eaf5f562c5dc589d71c0edbba9e2eb8f780febbcf35307697bf474d577", size = 259418, upload-time = "2025-12-05T22:38:44.426Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c6/03/eb06536f287a8b7fb4808b00a60d9a9a3694f8a4079b77730325c639fbbe/gemini_webapi-1.18.0.tar.gz", hash = "sha256:0688a080fc3c95be55e723a66b2b69ec3ffcd58b07c50cf627d85d59d1181a86", size = 264630, upload-time = "2026-02-03T01:18:39.794Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4c/a3/a88ff45197dce68a81d92c8d40368e4c26f67faf3af3273357f3f71f5c3d/gemini_webapi-1.17.3-py3-none-any.whl", hash = "sha256:d83969b1fa3236f3010d856d191b35264c936ece81f1be4c1de53ec1cf0855c8", size = 56659, upload-time = "2025-12-05T22:38:42.93Z" },
+    { url = "https://files.pythonhosted.org/packages/40/33/85f520f56faddd68442c7efe7086ff5593b213bd8fc3768835dbe610fd9b/gemini_webapi-1.18.0-py3-none-any.whl", hash = "sha256:2fe25b5f8185aba1ca109e1280ef3eb79e5bd8a81fba16e01fbc4a177b72362c", size = 61523, upload-time = "2026-02-03T01:18:38.322Z" },
 ]
 
 [[package]]
@@ -144,6 +144,28 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
 ]
 
+[[package]]
+name = "h2"
+version = "4.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "hpack" },
+    { name = "hyperframe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" },
+]
+
+[[package]]
+name = "hpack"
+version = "4.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" },
+]
+
 [[package]]
 name = "httpcore"
 version = "1.0.9"
@@ -172,6 +194,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
 ]
 
+[package.optional-dependencies]
+http2 = [
+    { name = "h2" },
+]
+
+[[package]]
+name = "hyperframe"
+version = "6.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" },
+]
+
 [[package]]
 name = "idna"
 version = "3.11"
@@ -211,25 +247,25 @@ wheels = [
 
 [[package]]
 name = "orjson"
-version = "3.11.5"
+version = "3.11.7"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347, upload-time = "2025-12-06T15:55:39.458Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347, upload-time = "2025-12-06T15:54:22.061Z" },
-    { url = "https://files.pythonhosted.org/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435, upload-time = "2025-12-06T15:54:23.615Z" },
-    { url = "https://files.pythonhosted.org/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074, upload-time = "2025-12-06T15:54:24.694Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520, upload-time = "2025-12-06T15:54:26.185Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209, upload-time = "2025-12-06T15:54:27.264Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837, upload-time = "2025-12-06T15:54:28.75Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307, upload-time = "2025-12-06T15:54:29.856Z" },
-    { url = "https://files.pythonhosted.org/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020, upload-time = "2025-12-06T15:54:31.024Z" },
-    { url = "https://files.pythonhosted.org/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099, upload-time = "2025-12-06T15:54:32.196Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540, upload-time = "2025-12-06T15:54:33.361Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530, upload-time = "2025-12-06T15:54:34.6Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863, upload-time = "2025-12-06T15:54:35.801Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255, upload-time = "2025-12-06T15:54:37.209Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252, upload-time = "2025-12-06T15:54:38.401Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777, upload-time = "2025-12-06T15:54:39.515Z" },
+    { url = "https://files.pythonhosted.org/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" },
+    { url = "https://files.pythonhosted.org/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" },
+    { url = "https://files.pythonhosted.org/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" },
+    { url = "https://files.pythonhosted.org/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" },
+    { url = "https://files.pythonhosted.org/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" },
+    { url = "https://files.pythonhosted.org/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" },
+    { url = "https://files.pythonhosted.org/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" },
+    { url = "https://files.pythonhosted.org/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" },
+    { url = "https://files.pythonhosted.org/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" },
 ]
 
 [[package]]

From 61672cc46948a501a0f2af3761eb231e40ec6831 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 11:44:54 +0700
Subject: [PATCH 062/139] Refactor: Use `strip_system_hints` to standardize the
 content.

---
 app/services/client.py |  4 +++-
 app/services/lmdb.py   | 22 +++-------------------
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index a35146f..89ad3ba 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -127,7 +127,9 @@ async def process_message(
                 args_text = call.function.arguments.strip()
                 try:
                     parsed_args = orjson.loads(args_text)
-                    args_text = orjson.dumps(parsed_args).decode("utf-8")
+                    args_text = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
+                        "utf-8"
+                    )
                 except orjson.JSONDecodeError:
                     pass
                 tool_blocks.append(
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 6ab2302..d5424e0 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -12,12 +12,9 @@
 from ..models import ContentItem, ConversationInStore, Message
 from ..utils import g_config
 from ..utils.helper import (
-    CODE_BLOCK_HINT,
-    CODE_HINT_STRIPPED,
-    XML_HINT_STRIPPED,
-    XML_WRAP_HINT,
     extract_tool_calls,
     remove_tool_call_blocks,
+    strip_system_hints,
 )
 from ..utils.singleton import Singleton
 
@@ -41,14 +38,7 @@ def _hash_message(message: Message) -> str:
         normalized = content.replace("\r\n", "\n")
 
         normalized = LMDBConversationStore.remove_think_tags(normalized)
-
-        for hint in [
-            XML_WRAP_HINT,
-            XML_HINT_STRIPPED,
-            CODE_BLOCK_HINT,
-            CODE_HINT_STRIPPED,
-        ]:
-            normalized = normalized.replace(hint, "")
+        normalized = strip_system_hints(normalized)
 
         if message.tool_calls:
             normalized = remove_tool_call_blocks(normalized)
@@ -70,13 +60,7 @@ def _hash_message(message: Message) -> str:
             if text_val:
                 text_val = text_val.replace("\r\n", "\n")
                 text_val = LMDBConversationStore.remove_think_tags(text_val)
-                for hint in [
-                    XML_WRAP_HINT,
-                    XML_HINT_STRIPPED,
-                    CODE_BLOCK_HINT,
-                    CODE_HINT_STRIPPED,
-                ]:
-                    text_val = text_val.replace(hint, "")
+                text_val = strip_system_hints(text_val)
                 text_val = remove_tool_call_blocks(text_val).strip()
                 if text_val:
                     text_parts.append(text_val)

From cc0b13f40b0ed875ba9f1274101c7c9be49e8e52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 19:04:01 +0700
Subject: [PATCH 063/139] Refactor: Only inject code block hint if NOT a
 structured response request

---
 app/server/chat.py | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index b8f611d..608b52f 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -477,6 +477,7 @@ def _prepare_messages_for_model(
     tool_choice: str | ToolChoiceFunction | None,
     extra_instructions: list[str] | None = None,
     inject_system_defaults: bool = True,
+    is_structured: bool = False,
 ) -> list[Message]:
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
@@ -505,7 +506,8 @@ def _prepare_messages_for_model(
                 f"Applied {len(extra_instructions)} extra instructions for tool/structured output."
             )
 
-        if not _conversation_has_code_hint(prepared):
+        # Only inject code block hint if NOT a structured response request
+        if not is_structured and not _conversation_has_code_hint(prepared):
             instructions.append(CODE_BLOCK_HINT)
             logger.debug("Injected default code block hint for Gemini conversation.")
 
@@ -1326,7 +1328,11 @@ async def create_chat_completion(
 
     # This ensures that server-injected system instructions are part of the history
     msgs = _prepare_messages_for_model(
-        request.messages, request.tools, request.tool_choice, extra_instr
+        request.messages,
+        request.tools,
+        request.tool_choice,
+        extra_instr,
+        is_structured=structured_requirement is not None,
     )
 
     session, client, remain = await _find_reusable_session(db, pool, model, msgs)
@@ -1338,7 +1344,12 @@ async def create_chat_completion(
         # For reused sessions, we only need to process the remaining messages.
         # We don't re-inject system defaults to avoid duplicating instructions already in history.
         input_msgs = _prepare_messages_for_model(
-            remain, request.tools, request.tool_choice, extra_instr, False
+            remain,
+            request.tools,
+            request.tool_choice,
+            extra_instr,
+            False,
+            is_structured=structured_requirement is not None,
         )
         if len(input_msgs) == 1:
             m_input, files = await GeminiClientWrapper.process_message(
@@ -1492,7 +1503,11 @@ async def create_response(
     )
 
     messages = _prepare_messages_for_model(
-        conv_messages, standard_tools or None, model_tool_choice, extra_instr or None
+        conv_messages,
+        standard_tools or None,
+        model_tool_choice,
+        extra_instr or None,
+        is_structured=struct_req is not None,
     )
     pool, db = GeminiClientPool(), LMDBConversationStore()
     try:
@@ -1502,7 +1517,14 @@ async def create_response(
 
     session, client, remain = await _find_reusable_session(db, pool, model, messages)
     if session:
-        msgs = _prepare_messages_for_model(remain, request.tools, request.tool_choice, None, False)
+        msgs = _prepare_messages_for_model(
+            remain,
+            request.tools,
+            request.tool_choice,
+            None,
+            False,
+            is_structured=struct_req is not None,
+        )
         if not msgs:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
         m_input, files = (

From 6b90e5d15d942c96ccfd272f9cb9ef23e4f7ac31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 20:43:18 +0700
Subject: [PATCH 064/139] Refactor: Remove the code block hint entirely

---
 app/server/chat.py  | 56 +++++----------------------------------------
 app/utils/helper.py | 16 -------------
 2 files changed, 6 insertions(+), 66 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 608b52f..43f5e12 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -41,15 +41,12 @@
 from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
 from ..utils import g_config
 from ..utils.helper import (
-    CODE_BLOCK_HINT,
-    CODE_HINT_STRIPPED,
-    CONTROL_TOKEN_RE,
     XML_HINT_STRIPPED,
     XML_WRAP_HINT,
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
-    strip_code_fence,
+    strip_system_hints,
     text_from_message,
 )
 from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
@@ -225,10 +222,9 @@ def _process_llm_output(
 
     if structured_requirement:
         cleaned_for_json = LMDBConversationStore.remove_think_tags(visible_output)
-        json_text = strip_code_fence(cleaned_for_json or "")
-        if json_text:
+        if cleaned_for_json:
             try:
-                structured_payload = orjson.loads(json_text)
+                structured_payload = orjson.loads(cleaned_for_json)
                 canonical_output = orjson.dumps(structured_payload).decode("utf-8")
                 visible_output = canonical_output
                 storage_output = canonical_output
@@ -450,27 +446,6 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None:
             return
 
 
-def _conversation_has_code_hint(messages: list[Message]) -> bool:
-    """Return True if any system message already includes the code block hint."""
-    for msg in messages:
-        if msg.role != "system" or msg.content is None:
-            continue
-
-        if isinstance(msg.content, str):
-            if CODE_HINT_STRIPPED in msg.content:
-                return True
-            continue
-
-        if isinstance(msg.content, list):
-            for part in msg.content:
-                if getattr(part, "type", None) != "text":
-                    continue
-                if part.text and CODE_HINT_STRIPPED in part.text:
-                    return True
-
-    return False
-
-
 def _prepare_messages_for_model(
     source_messages: list[Message],
     tools: list[Tool] | None,
@@ -506,11 +481,6 @@ def _prepare_messages_for_model(
                 f"Applied {len(extra_instructions)} extra instructions for tool/structured output."
             )
 
-        # Only inject code block hint if NOT a structured response request
-        if not is_structured and not _conversation_has_code_hint(prepared):
-            instructions.append(CODE_BLOCK_HINT)
-            logger.debug("Injected default code block hint for Gemini conversation.")
-
     if not instructions:
         if tools and tool_choice != "none":
             _append_xml_hint_to_last_user_message(prepared)
@@ -791,7 +761,7 @@ class StreamingOutputFilter:
     2. ChatML tool blocks: <|im_start|>tool\n...<|im_end|>
     3. ChatML role headers: <|im_start|>role\n (only suppresses the header, keeps content)
     4. Control tokens: <|im_start|>, <|im_end|>
-    5. System instructions/hints: XML_WRAP_HINT, CODE_BLOCK_HINT, etc.
+    5. System instructions/hints.
     """
 
     def __init__(self):
@@ -805,12 +775,6 @@ def __init__(self):
         self.XML_END = "```"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
-        self.SYSTEM_HINTS = [
-            XML_WRAP_HINT,
-            XML_HINT_STRIPPED,
-            CODE_BLOCK_HINT,
-            CODE_HINT_STRIPPED,
-        ]
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
@@ -906,11 +870,7 @@ def process(self, chunk: str) -> str:
                     break
 
         # Final pass: filter out system hints from the text to be yielded
-        for hint in self.SYSTEM_HINTS:
-            if hint in to_yield:
-                to_yield = to_yield.replace(hint, "")
-
-        return to_yield
+        return strip_system_hints(to_yield)
 
     def flush(self) -> str:
         # If we are stuck in a tool block or role header at the end,
@@ -922,11 +882,7 @@ def flush(self) -> str:
         self.buffer = ""
 
         # Filter out any orphaned/partial control tokens or hints
-        final_text = CONTROL_TOKEN_RE.sub("", final_text)
-        for hint in self.SYSTEM_HINTS:
-            final_text = final_text.replace(hint, "")
-
-        return final_text.strip()
+        return strip_system_hints(final_text)
 
 
 # --- Response Builders & Streaming ---
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 38b6400..1281f9b 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -19,19 +19,12 @@
     '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
     "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
 )
-CODE_BLOCK_HINT = (
-    "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
-    "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
-    "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
-)
 TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
     r"<tool_call\s+name=\"([^\"]+)\"\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
 )
-JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
-CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip()
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
@@ -101,14 +94,6 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
     return path
 
 
-def strip_code_fence(text: str) -> str:
-    """Remove surrounding ```json fences if present."""
-    match = JSON_FENCE_RE.match(text.strip())
-    if match:
-        return match.group(1).strip()
-    return text.strip()
-
-
 def strip_tagged_blocks(text: str) -> str:
     """Remove <|im_start|>role ... <|im_end|> sections.
     - tool blocks are removed entirely (including content).
@@ -166,7 +151,6 @@ def strip_system_hints(text: str) -> str:
         return text
     cleaned = strip_tagged_blocks(text)
     cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
-    cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "")
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
     return cleaned.strip()
 

From 553bd94b4631832694de40fe7246063ab359fc46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 20:52:54 +0700
Subject: [PATCH 065/139] Refactor: Remove the code block hint entirely

---
 app/server/chat.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 43f5e12..0bb2722 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -452,7 +452,6 @@ def _prepare_messages_for_model(
     tool_choice: str | ToolChoiceFunction | None,
     extra_instructions: list[str] | None = None,
     inject_system_defaults: bool = True,
-    is_structured: bool = False,
 ) -> list[Message]:
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
@@ -1288,7 +1287,6 @@ async def create_chat_completion(
         request.tools,
         request.tool_choice,
         extra_instr,
-        is_structured=structured_requirement is not None,
     )
 
     session, client, remain = await _find_reusable_session(db, pool, model, msgs)
@@ -1305,7 +1303,6 @@ async def create_chat_completion(
             request.tool_choice,
             extra_instr,
             False,
-            is_structured=structured_requirement is not None,
         )
         if len(input_msgs) == 1:
             m_input, files = await GeminiClientWrapper.process_message(
@@ -1463,7 +1460,6 @@ async def create_response(
         standard_tools or None,
         model_tool_choice,
         extra_instr or None,
-        is_structured=struct_req is not None,
     )
     pool, db = GeminiClientPool(), LMDBConversationStore()
     try:
@@ -1479,7 +1475,6 @@ async def create_response(
             request.tool_choice,
             None,
             False,
-            is_structured=struct_req is not None,
         )
         if not msgs:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")

From fd767dad79266486f14bd5610054220881bc9a73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 22:40:38 +0700
Subject: [PATCH 066/139] Refactor: fix missing whitespace in the streaming
 response.

---
 app/utils/helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 1281f9b..a8b40aa 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -152,7 +152,7 @@ def strip_system_hints(text: str) -> str:
     cleaned = strip_tagged_blocks(text)
     cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
-    return cleaned.strip()
+    return cleaned
 
 
 def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:

From 4beb33bb5a27368f0a14c769c213aea44103c100 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Feb 2026 07:24:13 +0700
Subject: [PATCH 067/139] Refactor: remove unnecessary code

---
 app/services/lmdb.py | 13 +------------
 app/utils/helper.py  |  2 +-
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index d5424e0..8dc3722 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -14,7 +14,6 @@
 from ..utils.helper import (
     extract_tool_calls,
     remove_tool_call_blocks,
-    strip_system_hints,
 )
 from ..utils.singleton import Singleton
 
@@ -36,17 +35,8 @@ def _hash_message(message: Message) -> str:
         core_data["content"] = None
     elif isinstance(content, str):
         normalized = content.replace("\r\n", "\n")
-
         normalized = LMDBConversationStore.remove_think_tags(normalized)
-        normalized = strip_system_hints(normalized)
-
-        if message.tool_calls:
-            normalized = remove_tool_call_blocks(normalized)
-        else:
-            temp_text, _extracted = extract_tool_calls(normalized)
-            normalized = temp_text
-
-        normalized = normalized.strip()
+        normalized = remove_tool_call_blocks(normalized).strip()
         core_data["content"] = normalized if normalized else None
     elif isinstance(content, list):
         text_parts = []
@@ -60,7 +50,6 @@ def _hash_message(message: Message) -> str:
             if text_val:
                 text_val = text_val.replace("\r\n", "\n")
                 text_val = LMDBConversationStore.remove_think_tags(text_val)
-                text_val = strip_system_hints(text_val)
                 text_val = remove_tool_call_blocks(text_val).strip()
                 if text_val:
                     text_parts.append(text_val)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index a8b40aa..b6bb5cb 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -16,7 +16,7 @@
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 XML_WRAP_HINT = (
     "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
-    '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
+    '```xml\n<tool_call name="tool_name">{"argument": "value"}</tool_call>\n```\n'
     "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
 )
 TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)

From 6b8dd4e5b893e689521efe93261825e95f1a1a84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Feb 2026 09:35:21 +0700
Subject: [PATCH 068/139] Refactor: Update `StreamingOutputFilter` logic to
 improve handling of streaming responses

---
 app/server/chat.py  | 187 ++++++++++++++++++++++++--------------------
 app/utils/helper.py |  16 +++-
 2 files changed, 115 insertions(+), 88 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0bb2722..87f29a6 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -41,6 +41,8 @@
 from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
 from ..utils import g_config
 from ..utils.helper import (
+    XML_HINT_LINE_END,
+    XML_HINT_LINE_START,
     XML_HINT_STRIPPED,
     XML_WRAP_HINT,
     estimate_tokens,
@@ -755,133 +757,146 @@ async def _send_with_split(
 
 class StreamingOutputFilter:
     """
-    Enhanced streaming filter that suppresses:
-    1. XML tool call blocks: ```xml ... ```
-    2. ChatML tool blocks: <|im_start|>tool\n...<|im_end|>
-    3. ChatML role headers: <|im_start|>role\n (only suppresses the header, keeps content)
-    4. Control tokens: <|im_start|>, <|im_end|>
-    5. System instructions/hints.
+    Simplified State Machine filter to suppress technical markers, tool calls, and system hints.
+    States: NORMAL, IN_XML, IN_TAG, IN_BLOCK, IN_HINT
     """
 
     def __init__(self):
         self.buffer = ""
-        self.in_xml_tool = False
-        self.in_tagged_block = False
-        self.in_role_header = False
+        self.state = "NORMAL"
         self.current_role = ""
+        self.block_buffer = ""
 
         self.XML_START = "```xml"
         self.XML_END = "```"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
+        self.HINT_START = f"\n{XML_HINT_LINE_START}" if XML_HINT_LINE_START else ""
+        self.HINT_END = XML_HINT_LINE_END
+
+        self.WATCH_PREFIXES = [self.XML_START, self.TAG_START, self.TAG_END]
+        if self.HINT_START:
+            self.WATCH_PREFIXES.append(self.HINT_START)
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
-        to_yield = ""
+        output = []
 
         while self.buffer:
-            if self.in_xml_tool:
+            if self.state == "NORMAL":
+                xml_idx = self.buffer.find(self.XML_START)
+                tag_idx = self.buffer.find(self.TAG_START)
+                end_idx = self.buffer.find(self.TAG_END)
+                hint_idx = self.buffer.find(self.HINT_START)
+
+                indices = [
+                    (i, t)
+                    for i, t in [
+                        (xml_idx, "XML"),
+                        (tag_idx, "TAG"),
+                        (end_idx, "END"),
+                        (hint_idx, "HINT"),
+                    ]
+                    if i != -1
+                ]
+
+                if not indices:
+                    keep_len = 0
+                    for p in self.WATCH_PREFIXES:
+                        for i in range(len(p) - 1, 0, -1):
+                            if self.buffer.endswith(p[:i]):
+                                keep_len = max(keep_len, i)
+                                break
+
+                    yield_len = len(self.buffer) - keep_len
+                    if yield_len > 0:
+                        output.append(self.buffer[:yield_len])
+                        self.buffer = self.buffer[yield_len:]
+                    break
+
+                indices.sort()
+                idx, m_type = indices[0]
+                output.append(self.buffer[:idx])
+                self.buffer = self.buffer[idx:]
+
+                if m_type == "XML":
+                    self.state = "IN_XML"
+                    self.block_buffer = ""
+                    self.buffer = self.buffer[len(self.XML_START) :]
+                elif m_type == "TAG":
+                    self.state = "IN_TAG"
+                    self.buffer = self.buffer[len(self.TAG_START) :]
+                elif m_type == "END":
+                    self.buffer = self.buffer[len(self.TAG_END) :]
+                elif m_type == "HINT":
+                    self.state = "IN_HINT"
+                    self.buffer = self.buffer[len(self.HINT_START) :]
+
+            elif self.state == "IN_HINT":
+                end_idx = self.buffer.find(self.HINT_END)
+                if end_idx != -1:
+                    self.buffer = self.buffer[end_idx + len(self.HINT_END) :]
+                    self.state = "NORMAL"
+                else:
+                    self.buffer = ""
+                    break
+
+            elif self.state == "IN_XML":
                 end_idx = self.buffer.find(self.XML_END)
                 if end_idx != -1:
+                    content = self.block_buffer + self.buffer[:end_idx]
+                    if "<tool_call" not in content.lower():
+                        output.append(f"{self.XML_START}{content}{self.XML_END}")
                     self.buffer = self.buffer[end_idx + len(self.XML_END) :]
-                    self.in_xml_tool = False
+                    self.state = "NORMAL"
                 else:
+                    self.block_buffer += self.buffer
+                    self.buffer = ""
                     break
-            elif self.in_role_header:
+
+            elif self.state == "IN_TAG":
                 nl_idx = self.buffer.find("\n")
                 if nl_idx != -1:
-                    role_text = self.buffer[:nl_idx].strip().lower()
-                    self.current_role = role_text
+                    self.current_role = self.buffer[:nl_idx].strip().lower()
                     self.buffer = self.buffer[nl_idx + 1 :]
-                    self.in_role_header = False
-                    self.in_tagged_block = True
+                    self.state = "IN_BLOCK"
                 else:
                     break
-            elif self.in_tagged_block:
+
+            elif self.state == "IN_BLOCK":
                 end_idx = self.buffer.find(self.TAG_END)
                 if end_idx != -1:
                     content = self.buffer[:end_idx]
                     if self.current_role != "tool":
-                        to_yield += content
+                        output.append(content)
                     self.buffer = self.buffer[end_idx + len(self.TAG_END) :]
-                    self.in_tagged_block = False
+                    self.state = "NORMAL"
                     self.current_role = ""
                 else:
-                    if self.current_role == "tool":
-                        break
-                    else:
+                    if self.current_role != "tool":
                         yield_len = len(self.buffer) - (len(self.TAG_END) - 1)
                         if yield_len > 0:
-                            to_yield += self.buffer[:yield_len]
+                            output.append(self.buffer[:yield_len])
                             self.buffer = self.buffer[yield_len:]
-                        break
-            else:
-                # Outside any special block. Look for starts.
-                earliest_idx = -1
-                match_type = ""
-
-                xml_idx = self.buffer.find(self.XML_START)
-                if xml_idx != -1:
-                    earliest_idx = xml_idx
-                    match_type = "xml"
-
-                tag_s_idx = self.buffer.find(self.TAG_START)
-                if tag_s_idx != -1:
-                    if earliest_idx == -1 or tag_s_idx < earliest_idx:
-                        earliest_idx = tag_s_idx
-                        match_type = "tag_start"
-
-                tag_e_idx = self.buffer.find(self.TAG_END)
-                if tag_e_idx != -1:
-                    if earliest_idx == -1 or tag_e_idx < earliest_idx:
-                        earliest_idx = tag_e_idx
-                        match_type = "tag_end"
-
-                if earliest_idx != -1:
-                    # Yield text before the match
-                    to_yield += self.buffer[:earliest_idx]
-                    self.buffer = self.buffer[earliest_idx:]
-
-                    if match_type == "xml":
-                        self.in_xml_tool = True
-                        self.buffer = self.buffer[len(self.XML_START) :]
-                    elif match_type == "tag_start":
-                        self.in_role_header = True
-                        self.buffer = self.buffer[len(self.TAG_START) :]
-                    elif match_type == "tag_end":
-                        # Orphaned end tag, just skip it
-                        self.buffer = self.buffer[len(self.TAG_END) :]
-                    continue
-                else:
-                    # Check for prefixes
-                    prefixes = [self.XML_START, self.TAG_START, self.TAG_END]
-                    max_keep = 0
-                    for p in prefixes:
-                        for i in range(len(p) - 1, 0, -1):
-                            if self.buffer.endswith(p[:i]):
-                                max_keep = max(max_keep, i)
-                                break
-
-                    yield_len = len(self.buffer) - max_keep
-                    if yield_len > 0:
-                        to_yield += self.buffer[:yield_len]
-                        self.buffer = self.buffer[yield_len:]
+                    else:
+                        self.buffer = ""
                     break
 
-        # Final pass: filter out system hints from the text to be yielded
-        return strip_system_hints(to_yield)
+        return "".join(output)
 
     def flush(self) -> str:
-        # If we are stuck in a tool block or role header at the end,
-        # it usually means malformed output.
-        if self.in_xml_tool or (self.in_tagged_block and self.current_role == "tool"):
-            return ""
+        res = ""
+        if self.state == "IN_XML":
+            if "<tool_call" not in self.block_buffer.lower():
+                res = f"{self.XML_START}{self.block_buffer}"
+        elif self.state == "IN_BLOCK" and self.current_role != "tool":
+            res = self.buffer
+        elif self.state == "NORMAL":
+            res = self.buffer
 
-        final_text = self.buffer
         self.buffer = ""
-
-        # Filter out any orphaned/partial control tokens or hints
-        return strip_system_hints(final_text)
+        self.state = "NORMAL"
+        return strip_system_hints(res)
 
 
 # --- Response Builders & Streaming ---
diff --git a/app/utils/helper.py b/app/utils/helper.py
index b6bb5cb..78494a3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -25,6 +25,9 @@
 )
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
+_hint_lines = [line.strip() for line in XML_WRAP_HINT.split("\n") if line.strip()]
+XML_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
+XML_HINT_LINE_END = _hint_lines[-1] if _hint_lines else ""
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
@@ -149,8 +152,17 @@ def strip_system_hints(text: str) -> str:
     """Remove system-level hint text from a given string."""
     if not text:
         return text
-    cleaned = strip_tagged_blocks(text)
-    cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
+
+    # Remove the full hints first
+    cleaned = text.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
+
+    # Remove fragments using derived constants
+    if XML_HINT_LINE_START:
+        cleaned = re.sub(rf"\n?{re.escape(XML_HINT_LINE_START)}:?\s*", "", cleaned)
+    if XML_HINT_LINE_END:
+        cleaned = re.sub(rf"\s*{re.escape(XML_HINT_LINE_END)}\.?\n?", "", cleaned)
+
+    cleaned = strip_tagged_blocks(cleaned)
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
     return cleaned
 

From d86ae59e5f1037bdbc79c9a84624116f23f6a302 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Feb 2026 20:17:32 +0700
Subject: [PATCH 069/139] Refactor: Adjust function call format to prevent
 streaming issues

Caused by Gemini Web's post-processing mechanism.
---
 app/server/chat.py     | 62 +++++++++++++++++++++++-------------------
 app/services/client.py | 16 +++++------
 app/utils/helper.py    | 12 ++++----
 3 files changed, 47 insertions(+), 43 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 87f29a6..ed0d731 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -367,19 +367,17 @@ def _build_tool_prompt(
         )
 
     lines.append(
-        "When you decide to call a tool you MUST respond with nothing except a single fenced block exactly like the template below."
+        "When you decide to call a tool you MUST respond with nothing except a single [function_calls] block exactly like the template below."
     )
+    lines.append("Do not add text before or after it.")
+    lines.append("[function_calls]")
+    lines.append('[call:tool_name]{"argument": "value"}[/call]')
+    lines.append("[/function_calls]")
     lines.append(
-        "The fenced block MUST use ```xml as the opening fence and ``` as the closing fence. Do not add text before or after it."
+        "Use double quotes for JSON keys and values. If you omit the block or include any extra text, the system will assume you are NOT calling a tool and your request will fail."
     )
-    lines.append("```xml")
-    lines.append('<tool_call name="tool_name">{"argument": "value"}</tool_call>')
-    lines.append("```")
     lines.append(
-        "Use double quotes for JSON keys and values. If you omit the fenced block or include any extra text, the system will assume you are NOT calling a tool and your request will fail."
-    )
-    lines.append(
-        "If multiple tool calls are required, include multiple <tool_call> entries inside the same fenced block. Without a tool call, reply normally and do NOT emit any ```xml fence."
+        "If multiple tool calls are required, include multiple [call:...]...[/call] entries inside the same [function_calls] block. Without a tool call, reply normally and do NOT emit any [function_calls] tag."
     )
 
     return "\n".join(lines)
@@ -757,8 +755,8 @@ async def _send_with_split(
 
 class StreamingOutputFilter:
     """
-    Simplified State Machine filter to suppress technical markers, tool calls, and system hints.
-    States: NORMAL, IN_XML, IN_TAG, IN_BLOCK, IN_HINT
+    State Machine filter to suppress technical markers, tool calls, and system hints.
+    Handles fragmentation where markers are split across multiple chunks.
     """
 
     def __init__(self):
@@ -767,12 +765,13 @@ def __init__(self):
         self.current_role = ""
         self.block_buffer = ""
 
-        self.XML_START = "```xml"
-        self.XML_END = "```"
+        self.XML_START = "[function_calls]"
+        self.XML_END = "[/function_calls]"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
         self.HINT_START = f"\n{XML_HINT_LINE_START}" if XML_HINT_LINE_START else ""
         self.HINT_END = XML_HINT_LINE_END
+        self.TOOL_START = "[call:"
 
         self.WATCH_PREFIXES = [self.XML_START, self.TAG_START, self.TAG_END]
         if self.HINT_START:
@@ -787,7 +786,7 @@ def process(self, chunk: str) -> str:
                 xml_idx = self.buffer.find(self.XML_START)
                 tag_idx = self.buffer.find(self.TAG_START)
                 end_idx = self.buffer.find(self.TAG_END)
-                hint_idx = self.buffer.find(self.HINT_START)
+                hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1
 
                 indices = [
                     (i, t)
@@ -801,13 +800,13 @@ def process(self, chunk: str) -> str:
                 ]
 
                 if not indices:
+                    # Guard against split start markers
                     keep_len = 0
                     for p in self.WATCH_PREFIXES:
                         for i in range(len(p) - 1, 0, -1):
                             if self.buffer.endswith(p[:i]):
                                 keep_len = max(keep_len, i)
                                 break
-
                     yield_len = len(self.buffer) - keep_len
                     if yield_len > 0:
                         output.append(self.buffer[:yield_len])
@@ -838,20 +837,24 @@ def process(self, chunk: str) -> str:
                     self.buffer = self.buffer[end_idx + len(self.HINT_END) :]
                     self.state = "NORMAL"
                 else:
-                    self.buffer = ""
+                    # Keep end of buffer to avoid missing split HINT_END
+                    keep_len = len(self.HINT_END) - 1
+                    if len(self.buffer) > keep_len:
+                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_XML":
                 end_idx = self.buffer.find(self.XML_END)
                 if end_idx != -1:
-                    content = self.block_buffer + self.buffer[:end_idx]
-                    if "<tool_call" not in content.lower():
-                        output.append(f"{self.XML_START}{content}{self.XML_END}")
+                    self.block_buffer += self.buffer[:end_idx]
                     self.buffer = self.buffer[end_idx + len(self.XML_END) :]
                     self.state = "NORMAL"
                 else:
-                    self.block_buffer += self.buffer
-                    self.buffer = ""
+                    # Accumulate and keep potential split end marker
+                    keep_len = len(self.XML_END) - 1
+                    if len(self.buffer) > keep_len:
+                        self.block_buffer += self.buffer[:-keep_len]
+                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_TAG":
@@ -873,21 +876,24 @@ def process(self, chunk: str) -> str:
                     self.state = "NORMAL"
                     self.current_role = ""
                 else:
+                    # Yield safe part and keep potential split TAG_END
+                    keep_len = len(self.TAG_END) - 1
                     if self.current_role != "tool":
-                        yield_len = len(self.buffer) - (len(self.TAG_END) - 1)
-                        if yield_len > 0:
-                            output.append(self.buffer[:yield_len])
-                            self.buffer = self.buffer[yield_len:]
+                        if len(self.buffer) > keep_len:
+                            output.append(self.buffer[:-keep_len])
+                            self.buffer = self.buffer[-keep_len:]
+                        break
                     else:
-                        self.buffer = ""
-                    break
+                        if len(self.buffer) > keep_len:
+                            self.buffer = self.buffer[-keep_len:]
+                        break
 
         return "".join(output)
 
     def flush(self) -> str:
         res = ""
         if self.state == "IN_XML":
-            if "<tool_call" not in self.block_buffer.lower():
+            if self.TOOL_START not in self.block_buffer.lower():
                 res = f"{self.XML_START}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
diff --git a/app/services/client.py b/app/services/client.py
index 89ad3ba..bc6c297 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -16,7 +16,7 @@
 )
 
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
-MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
+ESC_SYMBOLS_RE = re.compile(r"\\(?=[\\\[\]{}()<>`*_#~+.:!&^$|-])")
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
@@ -132,12 +132,10 @@ async def process_message(
                     )
                 except orjson.JSONDecodeError:
                     pass
-                tool_blocks.append(
-                    f'<tool_call name="{call.function.name}">{args_text}</tool_call>'
-                )
+                tool_blocks.append(f"[call:{call.function.name}]{args_text}[/call]")
 
             if tool_blocks:
-                tool_section = "```xml\n" + "".join(tool_blocks) + "\n```"
+                tool_section = "[function_calls]\n" + "".join(tool_blocks) + "\n[/function_calls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -189,22 +187,22 @@ def _unescape_html(text_content: str) -> str:
                 parts.append(HTML_ESCAPE_RE.sub(lambda m: html.unescape(m.group(0)), tail))
             return "".join(parts)
 
-        def _unescape_markdown(text_content: str) -> str:
+        def _unescape_symbols(text_content: str) -> str:
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):
                 non_code = text_content[last_index : match.start()]
                 if non_code:
-                    parts.append(MARKDOWN_ESCAPE_RE.sub("", non_code))
+                    parts.append(ESC_SYMBOLS_RE.sub("", non_code))
                 parts.append(match.group(0))
                 last_index = match.end()
             tail = text_content[last_index:]
             if tail:
-                parts.append(MARKDOWN_ESCAPE_RE.sub("", tail))
+                parts.append(ESC_SYMBOLS_RE.sub("", tail))
             return "".join(parts)
 
         text = _unescape_html(text)
-        text = _unescape_markdown(text)
+        text = _unescape_symbols(text)
 
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 78494a3..5ca812c 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -15,14 +15,14 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 XML_WRAP_HINT = (
-    "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
-    '```xml\n<tool_call name="tool_name">{"argument": "value"}</tool_call>\n```\n'
-    "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
+    "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n"
+    '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n'
+    "Do not surround the block with any other text or whitespace; otherwise the call will be ignored.\n"
 )
-TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
-TOOL_CALL_RE = re.compile(
-    r"<tool_call\s+name=\"([^\"]+)\"\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
+TOOL_BLOCK_RE = re.compile(
+    r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE
 )
+TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in XML_WRAP_HINT.split("\n") if line.strip()]

From db39ad10637754ca4f109614e499e34827429b0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Feb 2026 20:27:40 +0700
Subject: [PATCH 070/139] Refactor: Adjust function call format to prevent
 streaming issues

Caused by Gemini Web's post-processing mechanism.
---
 app/server/chat.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ed0d731..15f59aa 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -377,7 +377,10 @@ def _build_tool_prompt(
         "Use double quotes for JSON keys and values. If you omit the block or include any extra text, the system will assume you are NOT calling a tool and your request will fail."
     )
     lines.append(
-        "If multiple tool calls are required, include multiple [call:...]...[/call] entries inside the same [function_calls] block. Without a tool call, reply normally and do NOT emit any [function_calls] tag."
+        "To call multiple tools, list each [call:tool_name]...[/call] entry sequentially within a single [function_calls] block."
+    )
+    lines.append(
+        "If no tool call is needed, provide a normal response and DO NOT use the [function_calls] tag."
     )
 
     return "\n".join(lines)

From 556a638fc1d34fc377a593fc2e98acd4d9a0ea6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Feb 2026 20:52:44 +0700
Subject: [PATCH 071/139] Refactor: Adjust function call format to prevent
 streaming issues

Caused by Gemini Web's post-processing mechanism.
---
 app/services/client.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index bc6c297..21814e5 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -117,9 +117,7 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip() or "{}"
-            text_fragments = [
-                f'<tool_response name="{tool_name}">{combined_content}</tool_response>'
-            ]
+            text_fragments = [f"[response:{tool_name}]{combined_content}[/response]"]
 
         if message.tool_calls:
             tool_blocks: list[str] = []

From d5fec7a04119ca6b668a4e98d9c6463f11a92eb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 5 Feb 2026 07:12:34 +0700
Subject: [PATCH 072/139] Refactor: Adjust function call format to prevent
 streaming issues

Caused by Gemini Web's post-processing mechanism.
---
 app/server/chat.py  | 67 +++++++++++++++++++++++----------------------
 app/utils/helper.py | 34 ++++++++++++++---------
 2 files changed, 55 insertions(+), 46 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 15f59aa..e56c926 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -41,10 +41,10 @@
 from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
 from ..utils import g_config
 from ..utils.helper import (
-    XML_HINT_LINE_END,
-    XML_HINT_LINE_START,
-    XML_HINT_STRIPPED,
-    XML_WRAP_HINT,
+    TOOL_HINT_LINE_END,
+    TOOL_HINT_LINE_START,
+    TOOL_HINT_STRIPPED,
+    TOOL_WRAP_HINT,
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
@@ -423,15 +423,15 @@ def _build_image_generation_instruction(
     return "\n\n".join(instructions)
 
 
-def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None:
-    """Ensure the last user message carries the XML wrap hint."""
+def _append_tool_hint_to_last_user_message(messages: list[Message]) -> None:
+    """Ensure the last user message carries the tool wrap hint."""
     for msg in reversed(messages):
         if msg.role != "user" or msg.content is None:
             continue
 
         if isinstance(msg.content, str):
-            if XML_HINT_STRIPPED not in msg.content:
-                msg.content = f"{msg.content}\n{XML_WRAP_HINT}"
+            if TOOL_HINT_STRIPPED not in msg.content:
+                msg.content = f"{msg.content}\n{TOOL_WRAP_HINT}"
             return
 
         if isinstance(msg.content, list):
@@ -439,12 +439,12 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None:
                 if getattr(part, "type", None) != "text":
                     continue
                 text_value = part.text or ""
-                if XML_HINT_STRIPPED in text_value:
+                if TOOL_HINT_STRIPPED in text_value:
                     return
-                part.text = f"{text_value}\n{XML_WRAP_HINT}"
+                part.text = f"{text_value}\n{TOOL_WRAP_HINT}"
                 return
 
-            messages_text = XML_WRAP_HINT.strip()
+            messages_text = TOOL_WRAP_HINT.strip()
             msg.content.append(ContentItem(type="text", text=messages_text))
             return
 
@@ -485,19 +485,20 @@ def _prepare_messages_for_model(
 
     if not instructions:
         if tools and tool_choice != "none":
-            _append_xml_hint_to_last_user_message(prepared)
+            _append_tool_hint_to_last_user_message(prepared)
         return prepared
 
     combined_instructions = "\n\n".join(instructions)
     if prepared and prepared[0].role == "system" and isinstance(prepared[0].content, str):
         existing = prepared[0].content or ""
-        separator = "\n\n" if existing else ""
-        prepared[0].content = f"{existing}{separator}{combined_instructions}"
+        if combined_instructions not in existing:
+            separator = "\n\n" if existing else ""
+            prepared[0].content = f"{existing}{separator}{combined_instructions}"
     else:
         prepared.insert(0, Message(role="system", content=combined_instructions))
 
     if tools and tool_choice != "none":
-        _append_xml_hint_to_last_user_message(prepared)
+        _append_tool_hint_to_last_user_message(prepared)
 
     return prepared
 
@@ -768,15 +769,15 @@ def __init__(self):
         self.current_role = ""
         self.block_buffer = ""
 
-        self.XML_START = "[function_calls]"
-        self.XML_END = "[/function_calls]"
+        self.TOOL_START = "[function_calls]"
+        self.TOOL_END = "[/function_calls]"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
-        self.HINT_START = f"\n{XML_HINT_LINE_START}" if XML_HINT_LINE_START else ""
-        self.HINT_END = XML_HINT_LINE_END
-        self.TOOL_START = "[call:"
+        self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
+        self.HINT_END = TOOL_HINT_LINE_END
+        self.TOOL_PREFIX = "[call:"
 
-        self.WATCH_PREFIXES = [self.XML_START, self.TAG_START, self.TAG_END]
+        self.WATCH_PREFIXES = [self.TOOL_START, self.TAG_START, self.TAG_END]
         if self.HINT_START:
             self.WATCH_PREFIXES.append(self.HINT_START)
 
@@ -786,7 +787,7 @@ def process(self, chunk: str) -> str:
 
         while self.buffer:
             if self.state == "NORMAL":
-                xml_idx = self.buffer.find(self.XML_START)
+                tool_idx = self.buffer.find(self.TOOL_START)
                 tag_idx = self.buffer.find(self.TAG_START)
                 end_idx = self.buffer.find(self.TAG_END)
                 hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1
@@ -794,7 +795,7 @@ def process(self, chunk: str) -> str:
                 indices = [
                     (i, t)
                     for i, t in [
-                        (xml_idx, "XML"),
+                        (tool_idx, "TOOL"),
                         (tag_idx, "TAG"),
                         (end_idx, "END"),
                         (hint_idx, "HINT"),
@@ -821,10 +822,10 @@ def process(self, chunk: str) -> str:
                 output.append(self.buffer[:idx])
                 self.buffer = self.buffer[idx:]
 
-                if m_type == "XML":
-                    self.state = "IN_XML"
+                if m_type == "TOOL":
+                    self.state = "IN_TOOL"
                     self.block_buffer = ""
-                    self.buffer = self.buffer[len(self.XML_START) :]
+                    self.buffer = self.buffer[len(self.TOOL_START) :]
                 elif m_type == "TAG":
                     self.state = "IN_TAG"
                     self.buffer = self.buffer[len(self.TAG_START) :]
@@ -846,15 +847,15 @@ def process(self, chunk: str) -> str:
                         self.buffer = self.buffer[-keep_len:]
                     break
 
-            elif self.state == "IN_XML":
-                end_idx = self.buffer.find(self.XML_END)
+            elif self.state == "IN_TOOL":
+                end_idx = self.buffer.find(self.TOOL_END)
                 if end_idx != -1:
                     self.block_buffer += self.buffer[:end_idx]
-                    self.buffer = self.buffer[end_idx + len(self.XML_END) :]
+                    self.buffer = self.buffer[end_idx + len(self.TOOL_END) :]
                     self.state = "NORMAL"
                 else:
                     # Accumulate and keep potential split end marker
-                    keep_len = len(self.XML_END) - 1
+                    keep_len = len(self.TOOL_END) - 1
                     if len(self.buffer) > keep_len:
                         self.block_buffer += self.buffer[:-keep_len]
                         self.buffer = self.buffer[-keep_len:]
@@ -895,9 +896,9 @@ def process(self, chunk: str) -> str:
 
     def flush(self) -> str:
         res = ""
-        if self.state == "IN_XML":
-            if self.TOOL_START not in self.block_buffer.lower():
-                res = f"{self.XML_START}{self.block_buffer}"
+        if self.state == "IN_TOOL":
+            if self.TOOL_PREFIX not in self.block_buffer.lower():
+                res = f"{self.TOOL_START}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
         elif self.state == "NORMAL":
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 5ca812c..99c3d84 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -14,7 +14,7 @@
 from ..models import FunctionCall, Message, ToolCall
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
-XML_WRAP_HINT = (
+TOOL_WRAP_HINT = (
     "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n"
     '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n'
     "Do not surround the block with any other text or whitespace; otherwise the call will be ignored.\n"
@@ -24,10 +24,10 @@
 )
 TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
-XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
-_hint_lines = [line.strip() for line in XML_WRAP_HINT.split("\n") if line.strip()]
-XML_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
-XML_HINT_LINE_END = _hint_lines[-1] if _hint_lines else ""
+TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
+_hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
+TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
+TOOL_HINT_LINE_END = _hint_lines[-1] if _hint_lines else ""
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
@@ -154,13 +154,18 @@ def strip_system_hints(text: str) -> str:
         return text
 
     # Remove the full hints first
-    cleaned = text.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
+    cleaned = text.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "")
 
-    # Remove fragments using derived constants
-    if XML_HINT_LINE_START:
-        cleaned = re.sub(rf"\n?{re.escape(XML_HINT_LINE_START)}:?\s*", "", cleaned)
-    if XML_HINT_LINE_END:
-        cleaned = re.sub(rf"\s*{re.escape(XML_HINT_LINE_END)}\.?\n?", "", cleaned)
+    # Remove fragments or multi-line blocks using derived constants
+    if TOOL_HINT_LINE_START and TOOL_HINT_LINE_END:
+        # Match from the start line to the end line, inclusive, handling internal modifications
+        pattern = rf"\n?{re.escape(TOOL_HINT_LINE_START)}.*?{re.escape(TOOL_HINT_LINE_END)}\.?\n?"
+        cleaned = re.sub(pattern, "", cleaned, flags=re.DOTALL)
+
+    if TOOL_HINT_LINE_START:
+        cleaned = re.sub(rf"\n?{re.escape(TOOL_HINT_LINE_START)}:?\s*", "", cleaned)
+    if TOOL_HINT_LINE_END:
+        cleaned = re.sub(rf"\s*{re.escape(TOOL_HINT_LINE_END)}\.?\n?", "", cleaned)
 
     cleaned = strip_tagged_blocks(cleaned)
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
@@ -175,6 +180,9 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[
     if not text:
         return text, []
 
+    # Clean hints FIRST so they don't interfere with tool call regexes (e.g. example calls in hint)
+    cleaned = strip_system_hints(text)
+
     tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
@@ -220,7 +228,7 @@ def _replace_block(match: re.Match[str]) -> str:
         else:
             return match.group(0)
 
-    cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
+    cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned)
 
     def _replace_orphan(match: re.Match[str]) -> str:
         if extract:
@@ -230,7 +238,7 @@ def _replace_orphan(match: re.Match[str]) -> str:
         return ""
 
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
-    cleaned = strip_system_hints(cleaned)
+
     return cleaned, tool_calls
 
 

From dbc553d7dbcb3949e5ee807b58b360a488672b6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 5 Feb 2026 08:23:08 +0700
Subject: [PATCH 073/139] Refactor: Enhance prompt to prevent issues with
 parsing tool call arguments

---
 app/server/chat.py  |  4 ++--
 app/utils/helper.py | 20 ++++++++++++++++++--
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index e56c926..f47471c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -369,12 +369,12 @@ def _build_tool_prompt(
     lines.append(
         "When you decide to call a tool you MUST respond with nothing except a single [function_calls] block exactly like the template below."
     )
-    lines.append("Do not add text before or after it.")
+    lines.append("Do not add text before or after the block.")
     lines.append("[function_calls]")
     lines.append('[call:tool_name]{"argument": "value"}[/call]')
     lines.append("[/function_calls]")
     lines.append(
-        "Use double quotes for JSON keys and values. If you omit the block or include any extra text, the system will assume you are NOT calling a tool and your request will fail."
+        "Use double quotes for JSON keys and values. CRITICAL: The content inside [call:...]...[/call] MUST be a raw JSON object. Do not wrap it in ```json blocks or add any conversational text inside the tag."
     )
     lines.append(
         "To call multiple tools, list each [call:tool_name]...[/call] entry sequentially within a single [function_calls] block."
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 99c3d84..65c49f0 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -2,6 +2,7 @@
 import hashlib
 import mimetypes
 import re
+import reprlib
 import struct
 import tempfile
 from pathlib import Path
@@ -17,7 +18,7 @@
 TOOL_WRAP_HINT = (
     "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n"
     '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n'
-    "Do not surround the block with any other text or whitespace; otherwise the call will be ignored.\n"
+    "IMPORTANT: Arguments MUST be a valid JSON object. Do not include markdown code blocks (```json) or any conversational text inside the [call] tag.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE
@@ -197,7 +198,22 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             parsed_args = orjson.loads(raw_args)
             arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
         except orjson.JSONDecodeError:
-            logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
+            json_match = re.search(r"({.*})", raw_args, re.DOTALL)
+            if json_match:
+                try:
+                    potential_json = json_match.group(1)
+                    parsed_args = orjson.loads(potential_json)
+                    arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
+                        "utf-8"
+                    )
+                except orjson.JSONDecodeError:
+                    logger.warning(
+                        f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(json_match)}"
+                    )
+            else:
+                logger.warning(
+                    f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}"
+                )
 
         index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")

From ca721cfcaad261a1cbcc401dd8090cdd6b8613aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 5 Feb 2026 08:32:57 +0700
Subject: [PATCH 074/139] Refactor: Enhance prompt to prevent issues with
 parsing tool call arguments

---
 app/utils/helper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 65c49f0..230622b 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -200,15 +200,15 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         except orjson.JSONDecodeError:
             json_match = re.search(r"({.*})", raw_args, re.DOTALL)
             if json_match:
+                potential_json = json_match.group(1)
                 try:
-                    potential_json = json_match.group(1)
                     parsed_args = orjson.loads(potential_json)
                     arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
                         "utf-8"
                     )
                 except orjson.JSONDecodeError:
                     logger.warning(
-                        f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(json_match)}"
+                        f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}"
                     )
             else:
                 logger.warning(

From 263158e3825b765139e98cce8fa555054782b951 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Feb 2026 20:55:39 +0700
Subject: [PATCH 075/139] Refactor: enhance system prompts

---
 app/server/chat.py  | 37 ++++++++++++++-----------------------
 app/utils/helper.py | 10 +++++++---
 2 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index f47471c..06aefc2 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -336,7 +336,7 @@ def _build_tool_prompt(
         return ""
 
     lines: list[str] = [
-        "You can invoke the following developer tools. Call a tool only when it is required and follow the JSON schema exactly when providing arguments."
+        "SYSTEM INTERFACE: You have access to the following technical tools. You MUST invoke them when necessary to fulfill the request, strictly adhering to the provided JSON schemas."
     ]
 
     for tool in tools:
@@ -367,20 +367,21 @@ def _build_tool_prompt(
         )
 
     lines.append(
-        "When you decide to call a tool you MUST respond with nothing except a single [function_calls] block exactly like the template below."
+        "When you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:"
     )
-    lines.append("Do not add text before or after the block.")
     lines.append("[function_calls]")
-    lines.append('[call:tool_name]{"argument": "value"}[/call]')
+    lines.append("[call:tool_name]")
+    lines.append('{"argument": "value"}')
+    lines.append("[/call]")
     lines.append("[/function_calls]")
     lines.append(
-        "Use double quotes for JSON keys and values. CRITICAL: The content inside [call:...]...[/call] MUST be a raw JSON object. Do not wrap it in ```json blocks or add any conversational text inside the tag."
+        "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block."
     )
     lines.append(
-        "To call multiple tools, list each [call:tool_name]...[/call] entry sequentially within a single [function_calls] block."
+        "If multiple tools are needed, list them sequentially within the same [function_calls] block."
     )
     lines.append(
-        "If no tool call is needed, provide a normal response and DO NOT use the [function_calls] tag."
+        "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
 
     return "\n".join(lines)
@@ -398,26 +399,16 @@ def _build_image_generation_instruction(
         return None
 
     instructions: list[str] = [
-        "Image generation is enabled. When the user requests an image, you must return an actual generated image, not a text description.",
-        "For new image requests, generate at least one new image matching the description.",
-        "If the user provides an image and asks for edits or variations, return a newly generated image with the requested changes.",
-        "Avoid all text replies unless a short caption is explicitly requested. Do not explain, apologize, or describe image creation steps.",
-        "Never send placeholder text like 'Here is your image' or any other response without an actual image attachment.",
+        "IMAGE GENERATION ENABLED: When an image is requested, you MUST return a real generated image directly.",
+        "1. For new requests, generate new images matching the description immediately.",
+        "2. For edits to existing images, apply changes and return a new generated version.",
+        "3. CRITICAL: Provide ZERO text explanation, prologue, or apologies. Do not describe the creation process.",
+        "4. NEVER send placeholder text or descriptions like 'Generating image...' without an actual image attachment.",
     ]
 
-    if primary:
-        if primary.model:
-            instructions.append(
-                f"Where styles differ, favor the `{primary.model}` image model when rendering the scene."
-            )
-        if primary.output_format:
-            instructions.append(
-                f"Encode the image using the `{primary.output_format}` format whenever possible."
-            )
-
     if has_forced_choice:
         instructions.append(
-            "Image generation was explicitly requested. You must return at least one generated image. Any response without an image will be treated as a failure."
+            "Image generation was explicitly requested. You MUST return at least one generated image. Any response without an image will be treated as a failure."
         )
 
     return "\n\n".join(instructions)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 230622b..2a3f841 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -16,9 +16,13 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n"
-    '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n'
-    "IMPORTANT: Arguments MUST be a valid JSON object. Do not include markdown code blocks (```json) or any conversational text inside the [call] tag.\n"
+    "\nWhen you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:\n"
+    "[function_calls]\n"
+    "[call:tool_name]\n"
+    '{"argument": "value"}\n'
+    "[/call]\n"
+    "[/function_calls]\n"
+    "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE

From 68ce2df5c2d46f529630b9cfb08550cdd4d46a76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Feb 2026 21:41:21 +0700
Subject: [PATCH 076/139] Refactor: Enhance system prompts

---
 app/server/chat.py     | 37 ++++++++++++++++++++++++++++++++++---
 app/services/client.py |  4 +++-
 app/utils/config.py    |  2 +-
 app/utils/helper.py    | 12 ++++++++++--
 config/config.yaml     |  2 +-
 5 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 06aefc2..cf03f6b 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -383,6 +383,9 @@ def _build_tool_prompt(
     lines.append(
         "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
+    lines.append(
+        "After you call a tool, the system will provide the output in a `[function_responses]` block with the same tool name."
+    )
 
     return "\n".join(lines)
 
@@ -462,11 +465,13 @@ def _prepare_messages_for_model(
             msg.name = tool_id_to_name.get(msg.tool_call_id)
 
     instructions: list[str] = []
+    tool_prompt_injected = False
     if inject_system_defaults:
         if tools:
             tool_prompt = _build_tool_prompt(tools, tool_choice)
             if tool_prompt:
                 instructions.append(tool_prompt)
+                tool_prompt_injected = True
 
         if extra_instructions:
             instructions.extend(instr for instr in extra_instructions if instr)
@@ -475,7 +480,7 @@ def _prepare_messages_for_model(
             )
 
     if not instructions:
-        if tools and tool_choice != "none":
+        if tools and tool_choice != "none" and not tool_prompt_injected:
             _append_tool_hint_to_last_user_message(prepared)
         return prepared
 
@@ -488,7 +493,7 @@ def _prepare_messages_for_model(
     else:
         prepared.insert(0, Message(role="system", content=combined_instructions))
 
-    if tools and tool_choice != "none":
+    if tools and tool_choice != "none" and not tool_prompt_injected:
         _append_tool_hint_to_last_user_message(prepared)
 
     return prepared
@@ -762,13 +767,20 @@ def __init__(self):
 
         self.TOOL_START = "[function_calls]"
         self.TOOL_END = "[/function_calls]"
+        self.RESPONSE_START = "[function_responses]"
+        self.RESPONSE_END = "[/function_responses]"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
         self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
         self.HINT_END = TOOL_HINT_LINE_END
         self.TOOL_PREFIX = "[call:"
 
-        self.WATCH_PREFIXES = [self.TOOL_START, self.TAG_START, self.TAG_END]
+        self.WATCH_PREFIXES = [
+            self.TOOL_START,
+            self.RESPONSE_START,
+            self.TAG_START,
+            self.TAG_END,
+        ]
         if self.HINT_START:
             self.WATCH_PREFIXES.append(self.HINT_START)
 
@@ -779,6 +791,7 @@ def process(self, chunk: str) -> str:
         while self.buffer:
             if self.state == "NORMAL":
                 tool_idx = self.buffer.find(self.TOOL_START)
+                resp_idx = self.buffer.find(self.RESPONSE_START)
                 tag_idx = self.buffer.find(self.TAG_START)
                 end_idx = self.buffer.find(self.TAG_END)
                 hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1
@@ -787,6 +800,7 @@ def process(self, chunk: str) -> str:
                     (i, t)
                     for i, t in [
                         (tool_idx, "TOOL"),
+                        (resp_idx, "RESP"),
                         (tag_idx, "TAG"),
                         (end_idx, "END"),
                         (hint_idx, "HINT"),
@@ -817,6 +831,9 @@ def process(self, chunk: str) -> str:
                     self.state = "IN_TOOL"
                     self.block_buffer = ""
                     self.buffer = self.buffer[len(self.TOOL_START) :]
+                elif m_type == "RESP":
+                    self.state = "IN_RESP"
+                    self.buffer = self.buffer[len(self.RESPONSE_START) :]
                 elif m_type == "TAG":
                     self.state = "IN_TAG"
                     self.buffer = self.buffer[len(self.TAG_START) :]
@@ -838,6 +855,18 @@ def process(self, chunk: str) -> str:
                         self.buffer = self.buffer[-keep_len:]
                     break
 
+            elif self.state == "IN_RESP":
+                end_idx = self.buffer.find(self.RESPONSE_END)
+                if end_idx != -1:
+                    self.buffer = self.buffer[end_idx + len(self.RESPONSE_END) :]
+                    self.state = "NORMAL"
+                else:
+                    # Keep end of buffer to avoid missing split RESPONSE_END
+                    keep_len = len(self.RESPONSE_END) - 1
+                    if len(self.buffer) > keep_len:
+                        self.buffer = self.buffer[-keep_len:]
+                    break
+
             elif self.state == "IN_TOOL":
                 end_idx = self.buffer.find(self.TOOL_END)
                 if end_idx != -1:
@@ -892,6 +921,8 @@ def flush(self) -> str:
                 res = f"{self.TOOL_START}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
+        elif self.state in ("IN_RESP", "IN_HINT"):
+            res = ""
         elif self.state == "NORMAL":
             res = self.buffer
 
diff --git a/app/services/client.py b/app/services/client.py
index 21814e5..5473b06 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -117,7 +117,9 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip() or "{}"
-            text_fragments = [f"[response:{tool_name}]{combined_content}[/response]"]
+            text_fragments = [
+                f"[function_responses]\n[response:{tool_name}]{combined_content}[/response]\n[/function_responses]"
+            ]
 
         if message.tool_calls:
             tool_blocks: list[str] = []
diff --git a/app/utils/config.py b/app/utils/config.py
index 708462d..bbb6054 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -83,7 +83,7 @@ class GeminiConfig(BaseModel):
         default="append",
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
-    timeout: int = Field(default=120, ge=1, description="Init timeout")
+    timeout: int = Field(default=300, ge=1, description="Init timeout")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
         default=540, ge=1, description="Interval in seconds to refresh Gemini cookies"
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 2a3f841..9f5cfef 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -28,6 +28,12 @@
     r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE
 )
 TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE)
+RESPONSE_BLOCK_RE = re.compile(
+    r"\[function_responses]\s*(.*?)\s*\[/function_responses]", re.DOTALL | re.IGNORECASE
+)
+RESPONSE_ITEM_RE = re.compile(
+    r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE
+)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
@@ -248,8 +254,6 @@ def _replace_block(match: re.Match[str]) -> str:
         else:
             return match.group(0)
 
-    cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned)
-
     def _replace_orphan(match: re.Match[str]) -> str:
         if extract:
             name = (match.group(1) or "").strip()
@@ -257,8 +261,12 @@ def _replace_orphan(match: re.Match[str]) -> str:
             _create_tool_call(name, raw_args)
         return ""
 
+    cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned)
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
 
+    cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
+    cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
+
     return cleaned, tool_calls
 
 
diff --git a/config/config.yaml b/config/config.yaml
index f2b17fb..ed581f7 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -22,7 +22,7 @@ gemini:
       secure_1psid: "YOUR_SECURE_1PSID_HERE"
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null          # Optional proxy URL (null/empty means direct connection)
-  timeout: 120             # Init timeout in seconds
+  timeout: 300             # Init timeout in seconds
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests

From 77f72105b4a6942d5adaf098d5f2e133dc7e5ced Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Feb 2026 22:23:35 +0700
Subject: [PATCH 077/139] Refactor: Enhance system prompts

---
 app/server/chat.py     | 2 +-
 app/services/client.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index cf03f6b..ffa37cd 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -384,7 +384,7 @@ def _build_tool_prompt(
         "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
     lines.append(
-        "After you call a tool, the system will provide the output in a `[function_responses]` block with the same tool name."
+        "Note: Tool results are returned in a `[function_responses]` block."
     )
 
     return "\n".join(lines)
diff --git a/app/services/client.py b/app/services/client.py
index 5473b06..3dae6a1 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -118,7 +118,7 @@ async def process_message(
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip() or "{}"
             text_fragments = [
-                f"[function_responses]\n[response:{tool_name}]{combined_content}[/response]\n[/function_responses]"
+                f"[function_responses]\n[response:{tool_name}]\n{combined_content}\n[/response]\n[/function_responses]"
             ]
 
         if message.tool_calls:

From 3addb2b495c7772ddb1a7d4256c348a702cabb28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 7 Feb 2026 09:46:26 +0700
Subject: [PATCH 078/139] Refactor: Enhance system prompts

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ffa37cd..ac96cf5 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -384,7 +384,7 @@ def _build_tool_prompt(
         "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
     lines.append(
-        "Note: Tool results are returned in a `[function_responses]` block."
+        "Note: Tool results are returned in a [function_responses] block."
     )
 
     return "\n".join(lines)

From 2a53eed83af901a12c95cba233a693ab3890eae2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 8 Feb 2026 09:54:51 +0700
Subject: [PATCH 079/139] fix: missing image extension

---
 app/server/chat.py  | 29 ++++++++++++++++++++++-------
 app/utils/helper.py | 13 +++++++++++++
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ac96cf5..bf34fbf 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -45,6 +45,7 @@
     TOOL_HINT_LINE_START,
     TOOL_HINT_STRIPPED,
     TOOL_WRAP_HINT,
+    detect_image_extension,
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
@@ -91,11 +92,21 @@ async def _image_to_base64(
         raise ValueError("Failed to save generated image")
 
     original_path = Path(saved_path)
-    random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}"
+    data = original_path.read_bytes()
+    suffix = original_path.suffix
+
+    if not suffix:
+        detected_ext = detect_image_extension(data)
+        if detected_ext:
+            suffix = detected_ext
+        else:
+            # Fallback if detection fails
+            suffix = ".png" if isinstance(image, GeneratedImage) else ".jpg"
+
+    random_name = f"img_{uuid.uuid4().hex}{suffix}"
     new_path = temp_dir / random_name
     original_path.rename(new_path)
 
-    data = new_path.read_bytes()
     width, height = extract_image_dimensions(data)
     filename = random_name
     file_hash = hashlib.sha256(data).hexdigest()
@@ -383,9 +394,7 @@ def _build_tool_prompt(
     lines.append(
         "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
-    lines.append(
-        "Note: Tool results are returned in a [function_responses] block."
-    )
+    lines.append("Note: Tool results are returned in a [function_responses] block.")
 
     return "\n".join(lines)
 
@@ -1227,7 +1236,11 @@ async def generate_stream():
                     continue
                 seen_hashes.add(file_hash)
 
-                img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
+                img_format = (
+                    filename.rsplit(".", 1)[-1]
+                    if "." in filename
+                    else ("png" if isinstance(image, GeneratedImage) else "jpeg")
+                )
                 image_url = (
                     f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
                 )
@@ -1610,7 +1623,9 @@ async def create_response(
                 ResponseImageGenerationCall(
                     id=fname.rsplit(".", 1)[0],
                     result=b64,
-                    output_format="png" if isinstance(img, GeneratedImage) else "jpeg",
+                    output_format=fname.rsplit(".", 1)[-1]
+                    if "." in fname
+                    else ("png" if isinstance(img, GeneratedImage) else "jpeg"),
                     size=f"{w}x{h}" if w and h else None,
                 )
             )
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 9f5cfef..384f5cd 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -362,3 +362,16 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
             idx += segment_length - 2
 
     return None, None
+
+
+def detect_image_extension(data: bytes) -> str | None:
+    """Detect image extension from magic bytes."""
+    if data.startswith(b"\x89PNG\r\n\x1a\n"):
+        return ".png"
+    if data.startswith(b"\xff\xd8"):
+        return ".jpg"
+    if data.startswith(b"GIF8"):
+        return ".gif"
+    if data.startswith(b"RIFF") and data[8:12] == b"WEBP":
+        return ".webp"
+    return None

From 26d39c75825c16bb118af7143317d51a12aa6c4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 8 Feb 2026 10:25:30 +0700
Subject: [PATCH 080/139] fix: missing image extension

---
 app/server/chat.py | 67 ++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 35 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index bf34fbf..dfcf930 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1067,16 +1067,14 @@ async def generate_stream():
         for image in images:
             try:
                 image_store = get_image_store_dir()
-                _, _, _, filename, file_hash = await _image_to_base64(image, image_store)
-                if file_hash in seen_hashes:
+                _, _, _, fname, fhash = await _image_to_base64(image, image_store)
+                if fhash in seen_hashes:
                     # Duplicate content, delete the file and skip
-                    (image_store / filename).unlink(missing_ok=True)
+                    (image_store / fname).unlink(missing_ok=True)
                     continue
-                seen_hashes.add(file_hash)
+                seen_hashes.add(fhash)
 
-                img_url = (
-                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
-                )
+                img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
                 image_markdown += f"\n\n{img_url}"
             except Exception as exc:
                 logger.warning(f"Failed to process image in OpenAI stream: {exc}")
@@ -1228,28 +1226,25 @@ async def generate_stream():
         seen_hashes = set()
         for image in images:
             try:
-                image_base64, width, height, filename, file_hash = await _image_to_base64(
-                    image, image_store
-                )
-                if file_hash in seen_hashes:
-                    (image_store / filename).unlink(missing_ok=True)
+                b64, w, h, fname, fhash = await _image_to_base64(image, image_store)
+                if fhash in seen_hashes:
+                    (image_store / fname).unlink(missing_ok=True)
                     continue
-                seen_hashes.add(file_hash)
+                seen_hashes.add(fhash)
 
-                img_format = (
-                    filename.rsplit(".", 1)[-1]
-                    if "." in filename
-                    else ("png" if isinstance(image, GeneratedImage) else "jpeg")
-                )
-                image_url = (
-                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
-                )
+                if "." in fname:
+                    img_id, img_format = fname.rsplit(".", 1)
+                else:
+                    img_id = fname
+                    img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
+
+                image_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
                 image_call_items.append(
                     ResponseImageGenerationCall(
-                        id=filename.rsplit(".", 1)[0],
-                        result=image_base64,
+                        id=img_id,
+                        result=b64,
                         output_format=img_format,
-                        size=f"{width}x{height}" if width and height else None,
+                        size=f"{w}x{h}" if w and h else None,
                     )
                 )
                 response_contents.append(ResponseOutputContent(type="output_text", text=image_url))
@@ -1433,15 +1428,13 @@ async def create_chat_completion(
     seen_hashes = set()
     for image in images:
         try:
-            _, _, _, filename, file_hash = await _image_to_base64(image, image_store)
-            if file_hash in seen_hashes:
-                (image_store / filename).unlink(missing_ok=True)
+            _, _, _, fname, fhash = await _image_to_base64(image, image_store)
+            if fhash in seen_hashes:
+                (image_store / fname).unlink(missing_ok=True)
                 continue
-            seen_hashes.add(file_hash)
+            seen_hashes.add(fhash)
 
-            img_url = (
-                f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
-            )
+            img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
             image_markdown += f"\n\n{img_url}"
         except Exception as exc:
             logger.warning(f"Failed to process image in OpenAI response: {exc}")
@@ -1613,6 +1606,12 @@ async def create_response(
                 continue
             seen_hashes.add(fhash)
 
+            if "." in fname:
+                img_id, img_format = fname.rsplit(".", 1)
+            else:
+                img_id = fname
+                img_format = "png" if isinstance(img, GeneratedImage) else "jpeg"
+
             contents.append(
                 ResponseOutputContent(
                     type="output_text",
@@ -1621,11 +1620,9 @@ async def create_response(
             )
             img_calls.append(
                 ResponseImageGenerationCall(
-                    id=fname.rsplit(".", 1)[0],
+                    id=img_id,
                     result=b64,
-                    output_format=fname.rsplit(".", 1)[-1]
-                    if "." in fname
-                    else ("png" if isinstance(img, GeneratedImage) else "jpeg"),
+                    output_format=img_format,
                     size=f"{w}x{h}" if w and h else None,
                 )
             )

From 598b56335277366d591d81a25c2ba8654afcb92c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 8 Feb 2026 17:03:20 +0700
Subject: [PATCH 081/139] fix: missing or duplicate ChatML tags.

---
 app/server/chat.py     | 13 ++-----------
 app/services/client.py | 44 ++++++++++++++++++++++++++++++++----------
 2 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index dfcf930..701c1f6 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1359,12 +1359,7 @@ async def create_chat_completion(
             extra_instr,
             False,
         )
-        if len(input_msgs) == 1:
-            m_input, files = await GeminiClientWrapper.process_message(
-                input_msgs[0], tmp_dir, tagged=False
-            )
-        else:
-            m_input, files = await GeminiClientWrapper.process_conversation(input_msgs, tmp_dir)
+        m_input, files = await GeminiClientWrapper.process_conversation(input_msgs, tmp_dir)
 
         logger.debug(
             f"Reused session {reprlib.repr(session.metadata)} - sending {len(input_msgs)} prepared messages."
@@ -1531,11 +1526,7 @@ async def create_response(
         )
         if not msgs:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
-        m_input, files = (
-            await GeminiClientWrapper.process_message(msgs[0], tmp_dir, tagged=False)
-            if len(msgs) == 1
-            else await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
-        )
+        m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
         logger.debug(
             f"Reused session {reprlib.repr(session.metadata)} - sending {len(msgs)} prepared messages."
         )
diff --git a/app/services/client.py b/app/services/client.py
index 3dae6a1..2a00ce6 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -75,7 +75,7 @@ def running(self) -> bool:
 
     @staticmethod
     async def process_message(
-        message: Message, tempdir: Path | None = None, tagged: bool = True
+        message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
         Process a single Message object into a format suitable for the Gemini API.
@@ -117,9 +117,11 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip() or "{}"
-            text_fragments = [
-                f"[function_responses]\n[response:{tool_name}]\n{combined_content}\n[/response]\n[/function_responses]"
-            ]
+            res_block = f"[response:{tool_name}]\n{combined_content}\n[/response]"
+            if wrap_tool:
+                text_fragments = [f"[function_responses]\n{res_block}\n[/function_responses]"]
+            else:
+                text_fragments = [res_block]
 
         if message.tool_calls:
             tool_blocks: list[str] = []
@@ -153,12 +155,34 @@ async def process_conversation(
         need_tag = any(m.role != "user" for m in messages)
         conversation: list[str] = []
         files: list[Path | str] = []
-        for msg in messages:
-            input_part, files_part = await GeminiClientWrapper.process_message(
-                msg, tempdir, tagged=need_tag
-            )
-            conversation.append(input_part)
-            files.extend(files_part)
+
+        i = 0
+        while i < len(messages):
+            msg = messages[i]
+            if msg.role == "tool" and need_tag:
+                # Group consecutive tool messages
+                tool_blocks: list[str] = []
+                while i < len(messages) and messages[i].role == "tool":
+                    part, part_files = await GeminiClientWrapper.process_message(
+                        messages[i], tempdir, tagged=False, wrap_tool=False
+                    )
+                    tool_blocks.append(part)
+                    files.extend(part_files)
+                    i += 1
+
+                combined_tool_content = "\n".join(tool_blocks)
+                wrapped_content = (
+                    f"[function_responses]\n{combined_tool_content}\n[/function_responses]"
+                )
+                conversation.append(add_tag("tool", wrapped_content))
+            else:
+                input_part, files_part = await GeminiClientWrapper.process_message(
+                    msg, tempdir, tagged=need_tag
+                )
+                conversation.append(input_part)
+                files.extend(files_part)
+                i += 1
+
         if need_tag:
             conversation.append(add_tag("assistant", "", unclose=True))
         return "\n".join(conversation), files

From 6d563c512d3e6b1448f3442c231c53ca8afb2aa9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 8 Feb 2026 17:50:06 +0700
Subject: [PATCH 082/139] Refactor: Consistently use ChatML tags throughout.

---
 app/services/client.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 2a00ce6..78edddd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -152,14 +152,13 @@ async def process_message(
     async def process_conversation(
         messages: list[Message], tempdir: Path | None = None
     ) -> tuple[str, list[Path | str]]:
-        need_tag = any(m.role != "user" for m in messages)
         conversation: list[str] = []
         files: list[Path | str] = []
 
         i = 0
         while i < len(messages):
             msg = messages[i]
-            if msg.role == "tool" and need_tag:
+            if msg.role == "tool":
                 # Group consecutive tool messages
                 tool_blocks: list[str] = []
                 while i < len(messages) and messages[i].role == "tool":
@@ -177,14 +176,13 @@ async def process_conversation(
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
-                    msg, tempdir, tagged=need_tag
+                    msg, tempdir, tagged=True
                 )
                 conversation.append(input_part)
                 files.extend(files_part)
                 i += 1
 
-        if need_tag:
-            conversation.append(add_tag("assistant", "", unclose=True))
+        conversation.append(add_tag("assistant", "", unclose=True))
         return "\n".join(conversation), files
 
     @staticmethod

From 58db419c15cf347083e49b487d8ae99071256ef0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 9 Feb 2026 17:17:33 +0700
Subject: [PATCH 083/139] Refactor: normalize text before calculating message
 hash

---
 app/services/lmdb.py | 81 ++++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 37 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 8dc3722..59f01bc 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -1,5 +1,6 @@
 import hashlib
 import re
+import unicodedata
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from pathlib import Path
@@ -18,63 +19,70 @@
 from ..utils.singleton import Singleton
 
 
+def _normalize_text(text: str | None) -> str | None:
+    """
+    Perform semantic normalization for hashing.
+    """
+    if text is None:
+        return None
+
+    # Unicode normalization
+    text = unicodedata.normalize("NFC", text)
+
+    # Basic cleaning
+    text = text.replace("\r\n", "\n").replace("\r", "\n")
+    text = LMDBConversationStore.remove_think_tags(text)
+    text = remove_tool_call_blocks(text)
+
+    return text if text else None
+
+
 def _hash_message(message: Message) -> str:
     """
     Generate a stable, canonical hash for a single message.
-    Strips system hints, thoughts, and tool call blocks to ensure
-    identical logical content produces the same hash regardless of format.
     """
-    core_data = {
+    core_data: dict[str, Any] = {
         "role": message.role,
-        "name": message.name,
-        "tool_call_id": message.tool_call_id,
+        "name": message.name or None,
+        "tool_call_id": message.tool_call_id or None,
     }
 
     content = message.content
-    if not content:
+    if content is None:
         core_data["content"] = None
     elif isinstance(content, str):
-        normalized = content.replace("\r\n", "\n")
-        normalized = LMDBConversationStore.remove_think_tags(normalized)
-        normalized = remove_tool_call_blocks(normalized).strip()
-        core_data["content"] = normalized if normalized else None
+        core_data["content"] = _normalize_text(content)
     elif isinstance(content, list):
         text_parts = []
         for item in content:
             text_val = ""
             if isinstance(item, ContentItem) and item.type == "text":
-                text_val = item.text or ""
+                text_val = item.text
             elif isinstance(item, dict) and item.get("type") == "text":
-                text_val = item.get("text") or ""
+                text_val = item.get("text")
 
             if text_val:
-                text_val = text_val.replace("\r\n", "\n")
-                text_val = LMDBConversationStore.remove_think_tags(text_val)
-                text_val = remove_tool_call_blocks(text_val).strip()
-                if text_val:
-                    text_parts.append(text_val)
-            elif isinstance(item, ContentItem) and item.type in ("image_url", "file"):
-                # For non-text items, include their unique markers to distinguish them
-                if item.type == "image_url":
-                    text_parts.append(
-                        f"[image_url:{item.image_url.get('url') if item.image_url else ''}]"
-                    )
-                elif item.type == "file":
-                    text_parts.append(
-                        f"[file:{item.file.get('url') or item.file.get('filename') if item.file else ''}]"
+                normalized_part = _normalize_text(text_val)
+                if normalized_part:
+                    text_parts.append(normalized_part)
+            elif isinstance(item, (ContentItem, dict)):
+                item_type = item.type if isinstance(item, ContentItem) else item.get("type")
+                if item_type == "image_url":
+                    url = (
+                        item.image_url.get("url")
+                        if isinstance(item, ContentItem) and item.image_url
+                        else item.get("image_url", {}).get("url")
                     )
-            else:
-                # Fallback for other dict-based content parts
-                part_type = item.get("type") if isinstance(item, dict) else None
-                if part_type == "image_url":
-                    url = item.get("image_url", {}).get("url")
                     text_parts.append(f"[image_url:{url}]")
-                elif part_type == "file":
-                    url = item.get("file", {}).get("url") or item.get("file", {}).get("filename")
+                elif item_type == "file":
+                    url = (
+                        item.file.get("url") or item.file.get("filename")
+                        if isinstance(item, ContentItem) and item.file
+                        else item.get("file", {}).get("url") or item.get("file", {}).get("filename")
+                    )
                     text_parts.append(f"[file:{url}]")
 
-        combined_text = "\n".join(text_parts).replace("\r\n", "\n").strip()
-        core_data["content"] = combined_text if combined_text else None
+        core_data["content"] = "\n".join(text_parts) if text_parts else None
 
     if message.tool_calls:
         calls_data = []
@@ -98,8 +106,7 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = None
 
     message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-    digest = hashlib.sha256(message_bytes).hexdigest()
-    return digest
+    return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:

From d5d1c5a48e1f1d68fde53f8c5fd5da3358dfc938 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 9 Feb 2026 17:33:39 +0700
Subject: [PATCH 084/139] Refactor: remove unescape helpers to avoid side
 effects

---
 app/services/client.py | 35 -----------------------------------
 1 file changed, 35 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 78edddd..16d7a33 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,4 +1,3 @@
-import html
 import re
 from pathlib import Path
 from typing import Any, cast
@@ -15,9 +14,6 @@
     save_url_to_tempfile,
 )
 
-HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
-ESC_SYMBOLS_RE = re.compile(r"\\(?=[\\\[\]{}()<>`*_#~+.:!&^$|-])")
-CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -195,37 +191,6 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
-        def _unescape_html(text_content: str) -> str:
-            parts: list[str] = []
-            last_index = 0
-            for match in CODE_FENCE_RE.finditer(text_content):
-                non_code = text_content[last_index : match.start()]
-                if non_code:
-                    parts.append(HTML_ESCAPE_RE.sub(lambda m: html.unescape(m.group(0)), non_code))
-                parts.append(match.group(0))
-                last_index = match.end()
-            tail = text_content[last_index:]
-            if tail:
-                parts.append(HTML_ESCAPE_RE.sub(lambda m: html.unescape(m.group(0)), tail))
-            return "".join(parts)
-
-        def _unescape_symbols(text_content: str) -> str:
-            parts: list[str] = []
-            last_index = 0
-            for match in CODE_FENCE_RE.finditer(text_content):
-                non_code = text_content[last_index : match.start()]
-                if non_code:
-                    parts.append(ESC_SYMBOLS_RE.sub("", non_code))
-                parts.append(match.group(0))
-                last_index = match.end()
-            tail = text_content[last_index:]
-            if tail:
-                parts.append(ESC_SYMBOLS_RE.sub("", tail))
-            return "".join(parts)
-
-        text = _unescape_html(text)
-        text = _unescape_symbols(text)
-
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
             if match:

From a4a987cdd3a0f95bc718eb76c781b2c7b39e655e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 9 Feb 2026 20:18:42 +0700
Subject: [PATCH 085/139] Refactor: Implement fuzzy matching to better handle
 complex data formats like Markdown.

---
 app/services/lmdb.py | 67 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 56 insertions(+), 11 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 59f01bc..b08a325 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -1,5 +1,6 @@
 import hashlib
 import re
+import string
 import unicodedata
 from contextlib import contextmanager
 from datetime import datetime, timedelta
@@ -18,8 +19,19 @@
 )
 from ..utils.singleton import Singleton
 
+_VOLATILE_SYMBOLS = string.whitespace + string.punctuation
 
-def _normalize_text(text: str | None) -> str | None:
+
+def _fuzzy_normalize(text: str | None) -> str | None:
+    """
+    Lowercase and remove all whitespace and punctuation.
+    """
+    if text is None:
+        return None
+    return text.lower().translate(str.maketrans("", "", _VOLATILE_SYMBOLS))
+
+
+def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     """
     Perform semantic normalization for hashing.
     """
@@ -34,10 +46,13 @@ def _normalize_text(text: str | None) -> str | None:
     text = LMDBConversationStore.remove_think_tags(text)
     text = remove_tool_call_blocks(text)
 
+    if fuzzy:
+        return _fuzzy_normalize(text)
+
     return text if text else None
 
 
-def _hash_message(message: Message) -> str:
+def _hash_message(message: Message, fuzzy: bool = False) -> str:
     """
     Generate a stable, canonical hash for a single message.
     """
@@ -51,7 +66,7 @@ def _hash_message(message: Message) -> str:
     if content is None:
         core_data["content"] = None
     elif isinstance(content, str):
-        core_data["content"] = _normalize_text(content)
+        core_data["content"] = _normalize_text(content, fuzzy=fuzzy)
     elif isinstance(content, list):
         text_parts = []
         for item in content:
@@ -62,7 +77,7 @@ def _hash_message(message: Message) -> str:
                 text_val = item.get("text")
 
             if text_val:
-                normalized_part = _normalize_text(text_val)
+                normalized_part = _normalize_text(text_val, fuzzy=fuzzy)
                 if normalized_part:
                     text_parts.append(normalized_part)
             elif isinstance(item, (ContentItem, dict)):
@@ -109,13 +124,15 @@ def _hash_message(message: Message) -> str:
     return hashlib.sha256(message_bytes).hexdigest()
 
 
-def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:
+def _hash_conversation(
+    client_id: str, model: str, messages: List[Message], fuzzy: bool = False
+) -> str:
     """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
     combined_hash.update((client_id or "").encode("utf-8"))
     combined_hash.update((model or "").encode("utf-8"))
     for message in messages:
-        message_hash = _hash_message(message)
+        message_hash = _hash_message(message, fuzzy=fuzzy)
         combined_hash.update(message_hash.encode("utf-8"))
     return combined_hash.hexdigest()
 
@@ -124,6 +141,7 @@ class LMDBConversationStore(metaclass=Singleton):
     """LMDB-based storage for Message lists with hash-based key-value operations."""
 
     HASH_LOOKUP_PREFIX = "hash:"
+    FUZZY_LOOKUP_PREFIX = "fuzzy:"
 
     def __init__(
         self,
@@ -215,6 +233,7 @@ def store(
 
         # Generate hash for the message list
         message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
+        fuzzy_hash = _hash_conversation(conv.client_id, conv.model, conv.messages, fuzzy=True)
         storage_key = custom_key or message_hash
 
         now = datetime.now()
@@ -233,6 +252,11 @@ def store(
                     storage_key.encode("utf-8"),
                 )
 
+                txn.put(
+                    f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"),
+                    storage_key.encode("utf-8"),
+                )
+
                 logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}")
                 return storage_key
 
@@ -287,6 +311,11 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
                 )
                 return conv
 
+        # --- Find with fuzzy matching ---
+        if conv := self._find_by_message_list(model, messages, fuzzy=True):
+            logger.debug(f"Session found for '{model}' with fuzzy matching.")
+            return conv
+
         logger.debug(f"No session found for '{model}' with {len(messages)} messages.")
         return None
 
@@ -294,11 +323,13 @@ def _find_by_message_list(
         self,
         model: str,
         messages: List[Message],
+        fuzzy: bool = False,
     ) -> Optional[ConversationInStore]:
         """Internal find implementation based on a message list."""
+        prefix = self.FUZZY_LOOKUP_PREFIX if fuzzy else self.HASH_LOOKUP_PREFIX
         for c in g_config.gemini.clients:
-            message_hash = _hash_conversation(c.id, model, messages)
-            key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
+            message_hash = _hash_conversation(c.id, model, messages, fuzzy=fuzzy)
+            key = f"{prefix}{message_hash}"
             try:
                 with self._get_transaction(write=False) as txn:
                     if mapped := txn.get(key.encode("utf-8")):  # type: ignore
@@ -350,6 +381,9 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
                 storage_data = orjson.loads(data)  # type: ignore
                 conv = ConversationInStore.model_validate(storage_data)
                 message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
+                fuzzy_hash = _hash_conversation(
+                    conv.client_id, conv.model, conv.messages, fuzzy=True
+                )
 
                 # Delete main data
                 txn.delete(key.encode("utf-8"))
@@ -358,6 +392,9 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
                 if message_hash and key != message_hash:
                     txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
 
+                # Always clean up fuzzy mapping
+                txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"))
+
                 logger.debug(f"Deleted messages with key: {key[:12]}")
                 return conv
 
@@ -386,7 +423,9 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
                 for key, _ in cursor:
                     key_str = key.decode("utf-8")
                     # Skip internal hash mappings
-                    if key_str.startswith(self.HASH_LOOKUP_PREFIX):
+                    if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith(
+                        self.FUZZY_LOOKUP_PREFIX
+                    ):
                         continue
 
                     if not prefix or key_str.startswith(prefix):
@@ -459,8 +498,14 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
                         continue
 
                     message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
-                    if message_hash and key_str != message_hash:
-                        txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
+                    if message_hash:
+                        if key_str != message_hash:
+                            txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
+
+                        fuzzy_hash = _hash_conversation(
+                            conv.client_id, conv.model, conv.messages, fuzzy=True
+                        )
+                        txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"))
                     removed += 1
         except Exception as exc:
             logger.error(f"Failed to delete expired conversations: {exc}")

From 551eb8775e03e24219436446d30f8997c268d7eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 9 Feb 2026 21:08:06 +0700
Subject: [PATCH 086/139] Refactor: Implement fuzzy matching to better handle
 complex data formats like Markdown.

---
 app/services/lmdb.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index b08a325..b8861dc 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -466,7 +466,9 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
                 for key_bytes, value_bytes in cursor:
                     key_str = key_bytes.decode("utf-8")
-                    if key_str.startswith(self.HASH_LOOKUP_PREFIX):
+                    if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith(
+                        self.FUZZY_LOOKUP_PREFIX
+                    ):
                         continue
 
                     try:

From b2dbb087cfe4b553e690b1d52b4d12b5c3b07296 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 07:56:22 +0700
Subject: [PATCH 087/139] Feat: Add watchdog_timeout parameter

---
 app/services/client.py | 3 +++
 app/services/pool.py   | 2 ++
 app/utils/config.py    | 1 +
 config/config.yaml     | 1 +
 4 files changed, 7 insertions(+)

diff --git a/app/services/client.py b/app/services/client.py
index 16d7a33..3cdd839 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -38,6 +38,7 @@ def __init__(self, client_id: str, **kwargs):
     async def init(
         self,
         timeout: float = cast(float, _UNSET),
+        watchdog_timeout: float = cast(float, _UNSET),
         auto_close: bool = False,
         close_delay: float = 300,
         auto_refresh: bool = cast(bool, _UNSET),
@@ -49,6 +50,7 @@ async def init(
         """
         config = g_config.gemini
         timeout = cast(float, _resolve(timeout, config.timeout))
+        watchdog_timeout = cast(float, _resolve(watchdog_timeout, config.watchdog_timeout))
         auto_refresh = cast(bool, _resolve(auto_refresh, config.auto_refresh))
         refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
         verbose = cast(bool, _resolve(verbose, config.verbose))
@@ -56,6 +58,7 @@ async def init(
         try:
             await super().init(
                 timeout=timeout,
+                watchdog_timeout=watchdog_timeout,
                 auto_close=auto_close,
                 close_delay=close_delay,
                 auto_refresh=auto_refresh,
diff --git a/app/services/pool.py b/app/services/pool.py
index 0f95203..decc21a 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -41,6 +41,7 @@ async def init(self) -> None:
                 try:
                     await client.init(
                         timeout=g_config.gemini.timeout,
+                        watchdog_timeout=g_config.gemini.watchdog_timeout,
                         auto_refresh=g_config.gemini.auto_refresh,
                         verbose=g_config.gemini.verbose,
                         refresh_interval=g_config.gemini.refresh_interval,
@@ -93,6 +94,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
             try:
                 await client.init(
                     timeout=g_config.gemini.timeout,
+                    watchdog_timeout=g_config.gemini.watchdog_timeout,
                     auto_refresh=g_config.gemini.auto_refresh,
                     verbose=g_config.gemini.verbose,
                     refresh_interval=g_config.gemini.refresh_interval,
diff --git a/app/utils/config.py b/app/utils/config.py
index bbb6054..e62832d 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -84,6 +84,7 @@ class GeminiConfig(BaseModel):
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
     timeout: int = Field(default=300, ge=1, description="Init timeout")
+    watchdog_timeout: int = Field(default=60, ge=1, description="Watchdog timeout")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
         default=540, ge=1, description="Interval in seconds to refresh Gemini cookies"
diff --git a/config/config.yaml b/config/config.yaml
index ed581f7..2873d48 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -23,6 +23,7 @@ gemini:
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null          # Optional proxy URL (null/empty means direct connection)
   timeout: 300             # Init timeout in seconds
+  watchdog_timeout: 60     # Watchdog timeout in seconds (No longer than 75 seconds)
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests

From 969cd4a1e37a0915915962fe8c7ae691c8defa65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 07:57:53 +0700
Subject: [PATCH 088/139] Update required dependencies

---
 pyproject.toml |  8 ++++----
 uv.lock        | 27 ++++++++++++++-------------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index dc08571..47cd86f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,8 +5,8 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
-    "fastapi>=0.128.0",
-    "gemini-webapi>=1.18.0",
+    "fastapi>=0.128.6",
+    "gemini-webapi>=1.19.0",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
@@ -17,7 +17,7 @@ dependencies = [
 
 [project.optional-dependencies]
 dev = [
-    "ruff>=0.14.14",
+    "ruff>=0.15.0",
 ]
 
 [tool.ruff]
@@ -31,5 +31,5 @@ indent-style = "space"
 
 [dependency-groups]
 dev = [
-    "ruff>=0.14.14",
+    "ruff>=0.15.0",
 ]
diff --git a/uv.lock b/uv.lock
index 2a29c98..ea28c0e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -65,17 +65,18 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.128.1"
+version = "0.128.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
+    { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f6/59/28bde150415783ff084334e3de106eb7461a57864cf69f343950ad5a5ddd/fastapi-0.128.1.tar.gz", hash = "sha256:ce5be4fa26d4ce6f54debcc873d1fb8e0e248f5c48d7502ba6c61457ab2dc766", size = 374260, upload-time = "2026-02-04T17:35:10.542Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/83/d1/195005b5e45b443e305136df47ee7df4493d782e0c039dd0d97065580324/fastapi-0.128.6.tar.gz", hash = "sha256:0cb3946557e792d731b26a42b04912f16367e3c3135ea8290f620e234f2b604f", size = 374757, upload-time = "2026-02-09T17:27:03.541Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1a/08/3953db1979ea131c68279b997c6465080118b407f0800445b843f8e164b3/fastapi-0.128.1-py3-none-any.whl", hash = "sha256:ee82146bbf91ea5bbf2bb8629e4c6e056c4fbd997ea6068501b11b15260b50fb", size = 103810, upload-time = "2026-02-04T17:35:08.02Z" },
+    { url = "https://files.pythonhosted.org/packages/24/58/a2c4f6b240eeb148fb88cdac48f50a194aba760c1ca4988c6031c66a20ee/fastapi-0.128.6-py3-none-any.whl", hash = "sha256:bb1c1ef87d6086a7132d0ab60869d6f1ee67283b20fbf84ec0003bd335099509", size = 103674, upload-time = "2026-02-09T17:27:02.355Z" },
 ]
 
 [[package]]
@@ -105,24 +106,24 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.128.0" },
-    { name = "gemini-webapi", specifier = ">=1.18.0" },
+    { name = "fastapi", specifier = ">=0.128.6" },
+    { name = "gemini-webapi", specifier = ">=1.19.0" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.0" },
     { name = "uvicorn", specifier = ">=0.40.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
 provides-extras = ["dev"]
 
 [package.metadata.requires-dev]
-dev = [{ name = "ruff", specifier = ">=0.14.14" }]
+dev = [{ name = "ruff", specifier = ">=0.15.0" }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.18.1"
+version = "1.19.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx", extra = ["http2"] },
@@ -130,9 +131,9 @@ dependencies = [
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/64/ae/925abc25d0b5c62170c528511bb8a1ec7bd77a0b7a19aacb9a7885a0afa8/gemini_webapi-1.18.1.tar.gz", hash = "sha256:34c91141e5953e898333e9c6ca01349566d28dbea9ddd8094f8c85e74d72ce47", size = 265100, upload-time = "2026-02-04T22:19:05.724Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5f/1f/8314b620db12855e6aa9c55e05428fa30eb7f00fb61b1de7db42734ef2b2/gemini_webapi-1.19.0.tar.gz", hash = "sha256:1f65e6a2e4a461f4ed4fb01dc76c2de4ed517af549f6ce34b96b9986c11af5dd", size = 266822, upload-time = "2026-02-09T23:16:34.446Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4f/e5/7ae98d48bfb7283facec804f13c6719b6fa523a6aa240b4acdea736bf60b/gemini_webapi-1.18.1-py3-none-any.whl", hash = "sha256:110f3d191ffdda9d040aab6b1b2f1d8513d1e77dc33d40fac5024de9344ea3ec", size = 61836, upload-time = "2026-02-04T22:19:04.08Z" },
+    { url = "https://files.pythonhosted.org/packages/88/3b/71567ce13357d1139dfa0578c4073d6a8c523c4a28f3843194b639bf9d1e/gemini_webapi-1.19.0-py3-none-any.whl", hash = "sha256:47ab49f018cc01bf4b772910f7843af895f5e43d5a18b5ec7063b6f61e535921", size = 63498, upload-time = "2026-02-09T23:16:33.328Z" },
 ]
 
 [[package]]
@@ -385,15 +386,15 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.50.0"
+version = "0.52.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
+    { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
 ]
 
 [[package]]

From c258d32d448f62db63ccd4bbcdb7ba29c575b1c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 08:46:53 +0700
Subject: [PATCH 089/139] Move `maketrans` to global variable

---
 app/services/lmdb.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index b8861dc..0ba6c3a 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -20,6 +20,7 @@
 from ..utils.singleton import Singleton
 
 _VOLATILE_SYMBOLS = string.whitespace + string.punctuation
+_VOLATILE_TRANS_TABLE = str.maketrans("", "", _VOLATILE_SYMBOLS)
 
 
 def _fuzzy_normalize(text: str | None) -> str | None:
@@ -28,7 +29,7 @@ def _fuzzy_normalize(text: str | None) -> str | None:
     """
     if text is None:
         return None
-    return text.lower().translate(str.maketrans("", "", _VOLATILE_SYMBOLS))
+    return text.lower().translate(_VOLATILE_TRANS_TABLE)
 
 
 def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:

From 157028f026950afc90140dae9568869c0ec27400 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 08:52:22 +0700
Subject: [PATCH 090/139] Move `maketrans` to global variable

---
 app/services/lmdb.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 0ba6c3a..a94e090 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -19,8 +19,7 @@
 )
 from ..utils.singleton import Singleton
 
-_VOLATILE_SYMBOLS = string.whitespace + string.punctuation
-_VOLATILE_TRANS_TABLE = str.maketrans("", "", _VOLATILE_SYMBOLS)
+_VOLATILE_TRANS_TABLE = str.maketrans("", "", string.whitespace + string.punctuation)
 
 
 def _fuzzy_normalize(text: str | None) -> str | None:

From 5f9a7ece8e6027afddf2e04da60e6ab562a72874 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 10:30:53 +0700
Subject: [PATCH 091/139] Refactor: Add a filter to catch orphaned tool calls.

---
 app/server/chat.py | 63 ++++++++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 701c1f6..414349a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -776,39 +776,44 @@ def __init__(self):
 
         self.TOOL_START = "[function_calls]"
         self.TOOL_END = "[/function_calls]"
+        self.ORPHAN_START = "[call:"
+        self.ORPHAN_END = "[/call]"
         self.RESPONSE_START = "[function_responses]"
         self.RESPONSE_END = "[/function_responses]"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
         self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
         self.HINT_END = TOOL_HINT_LINE_END
-        self.TOOL_PREFIX = "[call:"
 
-        self.WATCH_PREFIXES = [
+        self.WATCH_MARKERS = [
             self.TOOL_START,
+            self.ORPHAN_START,
             self.RESPONSE_START,
             self.TAG_START,
             self.TAG_END,
         ]
         if self.HINT_START:
-            self.WATCH_PREFIXES.append(self.HINT_START)
+            self.WATCH_MARKERS.append(self.HINT_START)
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
         output = []
 
         while self.buffer:
+            buf_low = self.buffer.lower()
             if self.state == "NORMAL":
-                tool_idx = self.buffer.find(self.TOOL_START)
-                resp_idx = self.buffer.find(self.RESPONSE_START)
-                tag_idx = self.buffer.find(self.TAG_START)
-                end_idx = self.buffer.find(self.TAG_END)
-                hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1
+                tool_idx = buf_low.find(self.TOOL_START)
+                orphan_idx = buf_low.find(self.ORPHAN_START)
+                resp_idx = buf_low.find(self.RESPONSE_START)
+                tag_idx = buf_low.find(self.TAG_START)
+                end_idx = buf_low.find(self.TAG_END)
+                hint_idx = buf_low.find(self.HINT_START) if self.HINT_START else -1
 
                 indices = [
                     (i, t)
                     for i, t in [
                         (tool_idx, "TOOL"),
+                        (orphan_idx, "ORPHAN"),
                         (resp_idx, "RESP"),
                         (tag_idx, "TAG"),
                         (end_idx, "END"),
@@ -818,11 +823,12 @@ def process(self, chunk: str) -> str:
                 ]
 
                 if not indices:
-                    # Guard against split start markers
+                    # Guard against split markers (case-insensitive)
                     keep_len = 0
-                    for p in self.WATCH_PREFIXES:
-                        for i in range(len(p) - 1, 0, -1):
-                            if self.buffer.endswith(p[:i]):
+                    for marker in self.WATCH_MARKERS:
+                        m_low = marker.lower()
+                        for i in range(len(m_low) - 1, 0, -1):
+                            if buf_low.endswith(m_low[:i]):
                                 keep_len = max(keep_len, i)
                                 break
                     yield_len = len(self.buffer) - keep_len
@@ -840,6 +846,10 @@ def process(self, chunk: str) -> str:
                     self.state = "IN_TOOL"
                     self.block_buffer = ""
                     self.buffer = self.buffer[len(self.TOOL_START) :]
+                elif m_type == "ORPHAN":
+                    self.state = "IN_ORPHAN"
+                    self.block_buffer = ""
+                    self.buffer = self.buffer[len(self.ORPHAN_START) :]
                 elif m_type == "RESP":
                     self.state = "IN_RESP"
                     self.buffer = self.buffer[len(self.RESPONSE_START) :]
@@ -853,43 +863,53 @@ def process(self, chunk: str) -> str:
                     self.buffer = self.buffer[len(self.HINT_START) :]
 
             elif self.state == "IN_HINT":
-                end_idx = self.buffer.find(self.HINT_END)
+                end_idx = buf_low.find(self.HINT_END.lower())
                 if end_idx != -1:
                     self.buffer = self.buffer[end_idx + len(self.HINT_END) :]
                     self.state = "NORMAL"
                 else:
-                    # Keep end of buffer to avoid missing split HINT_END
                     keep_len = len(self.HINT_END) - 1
                     if len(self.buffer) > keep_len:
                         self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_RESP":
-                end_idx = self.buffer.find(self.RESPONSE_END)
+                end_idx = buf_low.find(self.RESPONSE_END.lower())
                 if end_idx != -1:
                     self.buffer = self.buffer[end_idx + len(self.RESPONSE_END) :]
                     self.state = "NORMAL"
                 else:
-                    # Keep end of buffer to avoid missing split RESPONSE_END
                     keep_len = len(self.RESPONSE_END) - 1
                     if len(self.buffer) > keep_len:
                         self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_TOOL":
-                end_idx = self.buffer.find(self.TOOL_END)
+                end_idx = buf_low.find(self.TOOL_END.lower())
                 if end_idx != -1:
                     self.block_buffer += self.buffer[:end_idx]
                     self.buffer = self.buffer[end_idx + len(self.TOOL_END) :]
                     self.state = "NORMAL"
                 else:
-                    # Accumulate and keep potential split end marker
                     keep_len = len(self.TOOL_END) - 1
                     if len(self.buffer) > keep_len:
                         self.block_buffer += self.buffer[:-keep_len]
                         self.buffer = self.buffer[-keep_len:]
                     break
 
+            elif self.state == "IN_ORPHAN":
+                end_idx = buf_low.find(self.ORPHAN_END.lower())
+                if end_idx != -1:
+                    self.block_buffer += self.buffer[:end_idx]
+                    self.buffer = self.buffer[end_idx + len(self.ORPHAN_END) :]
+                    self.state = "NORMAL"
+                else:
+                    keep_len = len(self.ORPHAN_END) - 1
+                    if len(self.buffer) > keep_len:
+                        self.block_buffer += self.buffer[:-keep_len]
+                        self.buffer = self.buffer[-keep_len:]
+                    break
+
             elif self.state == "IN_TAG":
                 nl_idx = self.buffer.find("\n")
                 if nl_idx != -1:
@@ -900,7 +920,7 @@ def process(self, chunk: str) -> str:
                     break
 
             elif self.state == "IN_BLOCK":
-                end_idx = self.buffer.find(self.TAG_END)
+                end_idx = buf_low.find(self.TAG_END.lower())
                 if end_idx != -1:
                     content = self.buffer[:end_idx]
                     if self.current_role != "tool":
@@ -909,7 +929,6 @@ def process(self, chunk: str) -> str:
                     self.state = "NORMAL"
                     self.current_role = ""
                 else:
-                    # Yield safe part and keep potential split TAG_END
                     keep_len = len(self.TAG_END) - 1
                     if self.current_role != "tool":
                         if len(self.buffer) > keep_len:
@@ -926,8 +945,10 @@ def process(self, chunk: str) -> str:
     def flush(self) -> str:
         res = ""
         if self.state == "IN_TOOL":
-            if self.TOOL_PREFIX not in self.block_buffer.lower():
+            if self.ORPHAN_START.lower() not in self.block_buffer.lower():
                 res = f"{self.TOOL_START}{self.block_buffer}"
+        elif self.state == "IN_ORPHAN":
+            res = f"{self.ORPHAN_START}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
         elif self.state in ("IN_RESP", "IN_HINT"):

From c81c2cefd8bd76e83c0edf84ebd2a0a11ad28ab8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 12:48:20 +0700
Subject: [PATCH 092/139] Update required dependencies

---
 pyproject.toml | 2 +-
 uv.lock        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 47cd86f..58391ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
     "fastapi>=0.128.6",
-    "gemini-webapi>=1.19.0",
+    "gemini-webapi>=1.19.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
diff --git a/uv.lock b/uv.lock
index ea28c0e..34b5cc8 100644
--- a/uv.lock
+++ b/uv.lock
@@ -123,7 +123,7 @@ dev = [{ name = "ruff", specifier = ">=0.15.0" }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.19.0"
+version = "1.19.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx", extra = ["http2"] },
@@ -131,9 +131,9 @@ dependencies = [
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5f/1f/8314b620db12855e6aa9c55e05428fa30eb7f00fb61b1de7db42734ef2b2/gemini_webapi-1.19.0.tar.gz", hash = "sha256:1f65e6a2e4a461f4ed4fb01dc76c2de4ed517af549f6ce34b96b9986c11af5dd", size = 266822, upload-time = "2026-02-09T23:16:34.446Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e5/d1/c61ee05ca6e20c70caa25a3f0f12e2a810bbc6b39e588ff937821de43690/gemini_webapi-1.19.1.tar.gz", hash = "sha256:a52afdfc2d9f6e87a6ae8cd926fb2ce5c562a0a99dc75ce97d8d50ffc2a3e133", size = 266761, upload-time = "2026-02-10T05:44:29.195Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/3b/71567ce13357d1139dfa0578c4073d6a8c523c4a28f3843194b639bf9d1e/gemini_webapi-1.19.0-py3-none-any.whl", hash = "sha256:47ab49f018cc01bf4b772910f7843af895f5e43d5a18b5ec7063b6f61e535921", size = 63498, upload-time = "2026-02-09T23:16:33.328Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/0b/7a73919ee8621f6559ae679a20d754959b989a3f09cf20478d89971f40b4/gemini_webapi-1.19.1-py3-none-any.whl", hash = "sha256:0dc4c7daa58d281722d52d6acf520f2e850c6c3c6020080fdbc5f77736c8be9a", size = 63500, upload-time = "2026-02-10T05:44:27.692Z" },
 ]
 
 [[package]]

From a17082532189d852b61e8b791a49844b4bb922f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 12:53:10 +0700
Subject: [PATCH 093/139] Add dependabot

---
 .github/dependabot.yaml | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 .github/dependabot.yaml

diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml
new file mode 100644
index 0000000..5ace460
--- /dev/null
+++ b/.github/dependabot.yaml
@@ -0,0 +1,6 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"

From a0136af06d4e43bd6ac4a9ba7a2b05509358e187 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Feb 2026 10:37:50 +0700
Subject: [PATCH 094/139] Refactor: Implement the logic changes recommended by
 Copilot

- Remove orphaned tool calls to prevent leaking internal tool-call information.
- Define limits for the `timeout`, `watchdog_timeout`, and `refresh_interval` ranges.
- Revise the fuzzy match logic to prevent accidental session reuse and avoid any possible content leakage between requests.
---
 app/server/chat.py   |   4 +-
 app/services/lmdb.py | 252 ++++++++++++++++++++++++-------------------
 app/utils/config.py  |  10 +-
 config/config.yaml   |   6 +-
 4 files changed, 151 insertions(+), 121 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 414349a..30a6b3a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -947,11 +947,9 @@ def flush(self) -> str:
         if self.state == "IN_TOOL":
             if self.ORPHAN_START.lower() not in self.block_buffer.lower():
                 res = f"{self.TOOL_START}{self.block_buffer}"
-        elif self.state == "IN_ORPHAN":
-            res = f"{self.ORPHAN_START}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
-        elif self.state in ("IN_RESP", "IN_HINT"):
+        elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT"):
             res = ""
         elif self.state == "NORMAL":
             res = self.buffer
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index a94e090..c90f537 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -25,6 +25,7 @@
 def _fuzzy_normalize(text: str | None) -> str | None:
     """
     Lowercase and remove all whitespace and punctuation.
+    Used as a fallback for complex/malformed contents matching.
     """
     if text is None:
         return None
@@ -38,7 +39,7 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     if text is None:
         return None
 
-    # Unicode normalization
+    # Unicode normalization to NFC
     text = unicodedata.normalize("NFC", text)
 
     # Basic cleaning
@@ -49,7 +50,8 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     if fuzzy:
         return _fuzzy_normalize(text)
 
-    return text if text else None
+    # Always strip to ensure trailing newlines/spaces don't break exact matches
+    return text.strip() if text.strip() else None
 
 
 def _hash_message(message: Message, fuzzy: bool = False) -> str:
@@ -157,7 +159,6 @@ def __init__(
             max_db_size: Maximum database size in bytes (default: 256 MB)
             retention_days: Number of days to retain conversations (default: 14, 0 disables cleanup)
         """
-
         if db_path is None:
             db_path = g_config.storage.path
         if max_db_size is None:
@@ -174,9 +175,11 @@ def __init__(
         self._init_environment()
 
     def _ensure_db_path(self) -> None:
+        """Create database directory if it doesn't exist."""
         self.db_path.parent.mkdir(parents=True, exist_ok=True)
 
     def _init_environment(self) -> None:
+        """Initialize LMDB environment."""
         try:
             self._env = lmdb.open(
                 str(self.db_path),
@@ -187,12 +190,18 @@ def _init_environment(self) -> None:
                 meminit=False,
             )
             logger.info(f"LMDB environment initialized at {self.db_path}")
-        except Exception as e:
+        except lmdb.Error as e:
             logger.error(f"Failed to initialize LMDB environment: {e}")
             raise
 
     @contextmanager
     def _get_transaction(self, write: bool = False):
+        """
+        Context manager for LMDB transactions.
+
+        Args:
+            write: Whether the transaction should be writable.
+        """
         if not self._env:
             raise RuntimeError("LMDB environment not initialized")
 
@@ -201,12 +210,57 @@ def _get_transaction(self, write: bool = False):
             yield txn
             if write:
                 txn.commit()
-        except Exception:
+        except lmdb.Error:
+            if write:
+                txn.abort()
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error in LMDB transaction: {e}")
             if write:
                 txn.abort()
             raise
-        finally:
-            pass  # Transaction is automatically cleaned up
+
+    @staticmethod
+    def _decode_index_value(data: bytes) -> List[str]:
+        """Decode index value, handling both legacy single-string and new list-of-strings formats."""
+        if not data:
+            return []
+        if data.startswith(b"["):
+            try:
+                val = orjson.loads(data)
+                if isinstance(val, list):
+                    return [str(v) for v in val]
+            except orjson.JSONDecodeError:
+                pass
+        try:
+            return [data.decode("utf-8")]
+        except UnicodeDecodeError:
+            return []
+
+    @staticmethod
+    def _update_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str):
+        """Add a storage key to the index for a given hash, avoiding duplicates."""
+        idx_key = f"{prefix}{hash_val}".encode("utf-8")
+        existing = txn.get(idx_key)
+        keys = LMDBConversationStore._decode_index_value(existing) if existing else []
+        if storage_key not in keys:
+            keys.append(storage_key)
+            txn.put(idx_key, orjson.dumps(keys))
+
+    @staticmethod
+    def _remove_from_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str):
+        """Remove a specific storage key from the index for a given hash."""
+        idx_key = f"{prefix}{hash_val}".encode("utf-8")
+        existing = txn.get(idx_key)
+        if not existing:
+            return
+        keys = LMDBConversationStore._decode_index_value(existing)
+        if storage_key in keys:
+            keys.remove(storage_key)
+            if keys:
+                txn.put(idx_key, orjson.dumps(keys))
+            else:
+                txn.delete(idx_key)
 
     def store(
         self,
@@ -226,12 +280,10 @@ def store(
         if not conv:
             raise ValueError("Messages list cannot be empty")
 
-        # Sanitize messages before computing hash and storing to ensure consistency
-        # with the search (find) logic, which also sanitizes its prefix.
+        # Ensure consistent sanitization before hashing and storage
         sanitized_messages = self.sanitize_assistant_messages(conv.messages)
         conv.messages = sanitized_messages
 
-        # Generate hash for the message list
         message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
         fuzzy_hash = _hash_conversation(conv.client_id, conv.model, conv.messages, fuzzy=True)
         storage_key = custom_key or message_hash
@@ -247,21 +299,19 @@ def store(
             with self._get_transaction(write=True) as txn:
                 txn.put(storage_key.encode("utf-8"), value, overwrite=True)
 
-                txn.put(
-                    f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"),
-                    storage_key.encode("utf-8"),
-                )
-
-                txn.put(
-                    f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"),
-                    storage_key.encode("utf-8"),
-                )
+                self._update_index(txn, self.HASH_LOOKUP_PREFIX, message_hash, storage_key)
+                self._update_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, storage_key)
 
                 logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}")
                 return storage_key
 
+        except lmdb.Error as e:
+            logger.error(f"LMDB error while storing messages with key {storage_key[:12]}: {e}")
+            raise
         except Exception as e:
-            logger.error(f"Failed to store messages with key {storage_key[:12]}: {e}")
+            logger.error(
+                f"Unexpected error while storing messages with key {storage_key[:12]}: {e}"
+            )
             raise
 
     def get(self, key: str) -> Optional[ConversationInStore]:
@@ -280,29 +330,37 @@ def get(self, key: str) -> Optional[ConversationInStore]:
                 if not data:
                     return None
 
-                storage_data = orjson.loads(data)  # type: ignore
+                storage_data = orjson.loads(data)
                 conv = ConversationInStore.model_validate(storage_data)
 
                 logger.debug(f"Retrieved {len(conv.messages)} messages with key: {key[:12]}")
                 return conv
-
+        except (lmdb.Error, orjson.JSONDecodeError) as e:
+            logger.error(f"Failed to retrieve/parse messages with key {key[:12]}: {e}")
+            return None
         except Exception as e:
-            logger.error(f"Failed to retrieve messages with key {key[:12]}: {e}")
+            logger.error(f"Unexpected error retrieving messages with key {key[:12]}: {e}")
             return None
 
     def find(self, model: str, messages: List[Message]) -> Optional[ConversationInStore]:
         """
         Search conversation data by message list.
+        Tries raw matching, then sanitized matching, and finally fuzzy matching.
+
+        Args:
+            model: Model name
+            messages: List of messages to match
+
+        Returns:
+            ConversationInStore or None if not found
         """
         if not messages:
             return None
 
-        # --- Find with raw messages ---
         if conv := self._find_by_message_list(model, messages):
             logger.debug(f"Session found for '{model}' with {len(messages)} raw messages.")
             return conv
 
-        # --- Find with cleaned messages ---
         cleaned_messages = self.sanitize_assistant_messages(messages)
         if cleaned_messages != messages:
             if conv := self._find_by_message_list(model, cleaned_messages):
@@ -311,7 +369,6 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
                 )
                 return conv
 
-        # --- Find with fuzzy matching ---
         if conv := self._find_by_message_list(model, messages, fuzzy=True):
             logger.debug(f"Session found for '{model}' with fuzzy matching.")
             return conv
@@ -325,18 +382,49 @@ def _find_by_message_list(
         messages: List[Message],
         fuzzy: bool = False,
     ) -> Optional[ConversationInStore]:
-        """Internal find implementation based on a message list."""
+        """
+        Internal find implementation based on a message list.
+
+        Args:
+            model: Model name
+            messages: Message list to hash
+            fuzzy: Whether to use fuzzy hashing
+
+        Returns:
+            ConversationInStore or None if not found
+        """
         prefix = self.FUZZY_LOOKUP_PREFIX if fuzzy else self.HASH_LOOKUP_PREFIX
+        target_len = len(messages)
+
         for c in g_config.gemini.clients:
             message_hash = _hash_conversation(c.id, model, messages, fuzzy=fuzzy)
             key = f"{prefix}{message_hash}"
             try:
                 with self._get_transaction(write=False) as txn:
-                    if mapped := txn.get(key.encode("utf-8")):  # type: ignore
-                        return self.get(mapped.decode("utf-8"))  # type: ignore
-            except Exception as e:
+                    if mapped := txn.get(key.encode("utf-8")):
+                        candidate_keys = self._decode_index_value(mapped)
+                        # Try candidates from newest to oldest
+                        for ck in reversed(candidate_keys):
+                            if conv := self.get(ck):
+                                if len(conv.messages) != target_len:
+                                    continue
+
+                                if fuzzy:
+                                    # For fuzzy matching, verify each message hash individually
+                                    # to prevent semantic collisions (e.g., "1.2" vs "12")
+                                    match_found = True
+                                    for i in range(target_len):
+                                        if _hash_message(
+                                            conv.messages[i], fuzzy=True
+                                        ) != _hash_message(messages[i], fuzzy=True):
+                                            match_found = False
+                                            break
+                                    if not match_found:
+                                        continue
+                                return conv
+            except lmdb.Error as e:
                 logger.error(
-                    f"Failed to retrieve messages by message list for hash {message_hash} and client {c.id}: {e}"
+                    f"LMDB error while searching for hash {message_hash} and client {c.id}: {e}"
                 )
                 continue
 
@@ -345,74 +433,42 @@ def _find_by_message_list(
         return None
 
     def exists(self, key: str) -> bool:
-        """
-        Check if a key exists in the store.
-
-        Args:
-            key: Storage key to check
-
-        Returns:
-            bool: True if key exists, False otherwise
-        """
+        """Check if a key exists in the store."""
         try:
             with self._get_transaction(write=False) as txn:
                 return txn.get(key.encode("utf-8")) is not None
-        except Exception as e:
+        except lmdb.Error as e:
             logger.error(f"Failed to check existence of key {key}: {e}")
             return False
 
     def delete(self, key: str) -> Optional[ConversationInStore]:
-        """
-        Delete conversation model by key.
-
-        Args:
-            key: Storage key to delete
-
-        Returns:
-            ConversationInStore: The deleted conversation data, or None if not found
-        """
+        """Delete conversation model by key."""
         try:
             with self._get_transaction(write=True) as txn:
-                # Get data first to clean up hash mapping
                 data = txn.get(key.encode("utf-8"))
                 if not data:
                     return None
 
-                storage_data = orjson.loads(data)  # type: ignore
+                storage_data = orjson.loads(data)
                 conv = ConversationInStore.model_validate(storage_data)
                 message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
                 fuzzy_hash = _hash_conversation(
                     conv.client_id, conv.model, conv.messages, fuzzy=True
                 )
 
-                # Delete main data
                 txn.delete(key.encode("utf-8"))
 
-                # Clean up hash mapping if it exists
-                if message_hash and key != message_hash:
-                    txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
-
-                # Always clean up fuzzy mapping
-                txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"))
+                self._remove_from_index(txn, self.HASH_LOOKUP_PREFIX, message_hash, key)
+                self._remove_from_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, key)
 
                 logger.debug(f"Deleted messages with key: {key[:12]}")
                 return conv
-
-        except Exception as e:
+        except (lmdb.Error, orjson.JSONDecodeError) as e:
             logger.error(f"Failed to delete messages with key {key[:12]}: {e}")
             return None
 
     def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
-        """
-        List all keys in the store, optionally filtered by prefix.
-
-        Args:
-            prefix: Optional prefix to filter keys
-            limit: Optional limit on number of keys returned
-
-        Returns:
-            List of keys
-        """
+        """List all keys in the store, optionally filtered by prefix."""
         keys = []
         try:
             with self._get_transaction(write=False) as txn:
@@ -422,7 +478,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
                 count = 0
                 for key, _ in cursor:
                     key_str = key.decode("utf-8")
-                    # Skip internal hash mappings
+                    # Skip internal index mappings
                     if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith(
                         self.FUZZY_LOOKUP_PREFIX
                     ):
@@ -431,25 +487,14 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
                     if not prefix or key_str.startswith(prefix):
                         keys.append(key_str)
                         count += 1
-
                         if limit and count >= limit:
                             break
-
-        except Exception as e:
+        except lmdb.Error as e:
             logger.error(f"Failed to list keys: {e}")
-
         return keys
 
     def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
-        """
-        Delete conversations older than the given retention period.
-
-        Args:
-            retention_days: Optional override for retention period in days.
-
-        Returns:
-            Number of conversations removed.
-        """
+        """Delete conversations older than the given retention period."""
         retention_value = (
             self.retention_days if retention_days is None else max(0, int(retention_days))
         )
@@ -463,7 +508,6 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
         try:
             with self._get_transaction(write=False) as txn:
                 cursor = txn.cursor()
-
                 for key_bytes, value_bytes in cursor:
                     key_str = key_bytes.decode("utf-8")
                     if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith(
@@ -472,9 +516,9 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
                         continue
 
                     try:
-                        storage_data = orjson.loads(value_bytes)  # type: ignore[arg-type]
+                        storage_data = orjson.loads(value_bytes)
                         conv = ConversationInStore.model_validate(storage_data)
-                    except Exception as exc:
+                    except (orjson.JSONDecodeError, Exception) as exc:
                         logger.warning(f"Failed to decode record for key {key_str}: {exc}")
                         continue
 
@@ -484,7 +528,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
                     if timestamp < cutoff:
                         expired_entries.append((key_str, conv))
-        except Exception as exc:
+        except lmdb.Error as exc:
             logger.error(f"Failed to scan LMDB for retention cleanup: {exc}")
             raise
 
@@ -501,15 +545,13 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
                     message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
                     if message_hash:
-                        if key_str != message_hash:
-                            txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
-
+                        self._remove_from_index(txn, self.HASH_LOOKUP_PREFIX, message_hash, key_str)
                         fuzzy_hash = _hash_conversation(
                             conv.client_id, conv.model, conv.messages, fuzzy=True
                         )
-                        txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"))
+                        self._remove_from_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, key_str)
                     removed += 1
-        except Exception as exc:
+        except lmdb.Error as exc:
             logger.error(f"Failed to delete expired conversations: {exc}")
             raise
 
@@ -521,19 +563,13 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
         return removed
 
     def stats(self) -> Dict[str, Any]:
-        """
-        Get database statistics.
-
-        Returns:
-            Dict with database statistics
-        """
+        """Get database statistics."""
         if not self._env:
             logger.error("LMDB environment not initialized")
             return {}
-
         try:
             return self._env.stat()
-        except Exception as e:
+        except lmdb.Error as e:
             logger.error(f"Failed to get database stats: {e}")
             return {}
 
@@ -550,21 +586,15 @@ def __del__(self):
 
     @staticmethod
     def remove_think_tags(text: str) -> str:
-        """
-        Remove all <think>...</think> tags and strip whitespace.
-        """
+        """Remove all <think>...</think> tags and strip whitespace."""
         if not text:
             return text
-        # Remove all think blocks anywhere in the text
         cleaned_content = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
         return cleaned_content.strip()
 
     @staticmethod
     def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
-        """
-        Produce a canonical history where assistant messages are cleaned of
-        internal markers and tool call blocks are moved to metadata.
-        """
+        """Clean assistant messages of internal markers and move tool calls to metadata."""
         cleaned_messages = []
         for msg in messages:
             if msg.role == "assistant":
@@ -596,7 +626,6 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
                     for item in msg.content:
                         if isinstance(item, ContentItem) and item.type == "text" and item.text:
                             text = LMDBConversationStore.remove_think_tags(item.text)
-
                             if not msg.tool_calls:
                                 text, extracted = extract_tool_calls(text)
                                 if extracted:
@@ -625,5 +654,4 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
                     cleaned_messages.append(msg)
             else:
                 cleaned_messages.append(msg)
-
         return cleaned_messages
diff --git a/app/utils/config.py b/app/utils/config.py
index e62832d..3b24931 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -83,11 +83,15 @@ class GeminiConfig(BaseModel):
         default="append",
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
-    timeout: int = Field(default=300, ge=1, description="Init timeout")
-    watchdog_timeout: int = Field(default=60, ge=1, description="Watchdog timeout")
+    timeout: int = Field(default=300, ge=30, description="Init timeout")
+    watchdog_timeout: int = Field(
+        default=60, ge=10, le=75, description="Watchdog timeout in seconds (Not more than 75s)"
+    )
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
-        default=540, ge=1, description="Interval in seconds to refresh Gemini cookies"
+        default=540,
+        ge=60,
+        description="Interval in seconds to refresh Gemini cookies (Not less than 60s)",
     )
     verbose: bool = Field(False, description="Enable verbose logging for Gemini API requests")
     max_chars_per_request: int = Field(
diff --git a/config/config.yaml b/config/config.yaml
index 2873d48..3d5e6f4 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -22,10 +22,10 @@ gemini:
       secure_1psid: "YOUR_SECURE_1PSID_HERE"
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null          # Optional proxy URL (null/empty means direct connection)
-  timeout: 300             # Init timeout in seconds
-  watchdog_timeout: 60     # Watchdog timeout in seconds (No longer than 75 seconds)
+  timeout: 300             # Init timeout in seconds (Not less than 30s)
+  watchdog_timeout: 60     # Watchdog timeout in seconds (Not more than 75s)
   auto_refresh: true       # Auto-refresh session cookies
-  refresh_interval: 540    # Refresh interval in seconds
+  refresh_interval: 540    # Refresh interval in seconds (Not less than 60s)
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
   model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)

From 5eb9f509d451d76739da1f34b5003b1d7628279b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Feb 2026 12:24:06 +0700
Subject: [PATCH 095/139] Refactor: Optimize fuzzy matching logic

---
 app/services/lmdb.py | 31 ++++++++++++++++---------------
 app/utils/config.py  |  2 +-
 scripts/dump_lmdb.py |  2 +-
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index c90f537..4b57f60 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -370,7 +370,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
                 return conv
 
         if conv := self._find_by_message_list(model, messages, fuzzy=True):
-            logger.debug(f"Session found for '{model}' with fuzzy matching.")
+            logger.debug(
+                f"Session found for '{model}' with {len(messages)} fuzzy matching messages."
+            )
             return conv
 
         logger.debug(f"No session found for '{model}' with {len(messages)} messages.")
@@ -396,6 +398,8 @@ def _find_by_message_list(
         prefix = self.FUZZY_LOOKUP_PREFIX if fuzzy else self.HASH_LOOKUP_PREFIX
         target_len = len(messages)
 
+        target_hashes = [_hash_message(m, fuzzy=fuzzy) for m in messages]
+
         for c in g_config.gemini.clients:
             message_hash = _hash_conversation(c.id, model, messages, fuzzy=fuzzy)
             key = f"{prefix}{message_hash}"
@@ -403,25 +407,22 @@ def _find_by_message_list(
                 with self._get_transaction(write=False) as txn:
                     if mapped := txn.get(key.encode("utf-8")):
                         candidate_keys = self._decode_index_value(mapped)
-                        # Try candidates from newest to oldest
                         for ck in reversed(candidate_keys):
                             if conv := self.get(ck):
                                 if len(conv.messages) != target_len:
                                     continue
 
-                                if fuzzy:
-                                    # For fuzzy matching, verify each message hash individually
-                                    # to prevent semantic collisions (e.g., "1.2" vs "12")
-                                    match_found = True
-                                    for i in range(target_len):
-                                        if _hash_message(
-                                            conv.messages[i], fuzzy=True
-                                        ) != _hash_message(messages[i], fuzzy=True):
-                                            match_found = False
-                                            break
-                                    if not match_found:
-                                        continue
-                                return conv
+                                match_found = True
+                                for i in range(target_len):
+                                    if (
+                                        _hash_message(conv.messages[i], fuzzy=fuzzy)
+                                        != target_hashes[i]
+                                    ):
+                                        match_found = False
+                                        break
+
+                                if match_found:
+                                    return conv
             except lmdb.Error as e:
                 logger.error(
                     f"LMDB error while searching for hash {message_hash} and client {c.id}: {e}"
diff --git a/app/utils/config.py b/app/utils/config.py
index 3b24931..4c1709f 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -83,7 +83,7 @@ class GeminiConfig(BaseModel):
         default="append",
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
-    timeout: int = Field(default=300, ge=30, description="Init timeout")
+    timeout: int = Field(default=300, ge=30, description="Init timeout in seconds")
     watchdog_timeout: int = Field(
         default=60, ge=10, le=75, description="Watchdog timeout in seconds (Not more than 75s)"
     )
diff --git a/scripts/dump_lmdb.py b/scripts/dump_lmdb.py
index b06b1b4..a331325 100644
--- a/scripts/dump_lmdb.py
+++ b/scripts/dump_lmdb.py
@@ -42,7 +42,7 @@ def dump_lmdb(path: Path, keys: Iterable[str] | None = None) -> None:
             records = _dump_all(txn)
     env.close()
 
-    print(orjson.dumps(records, option=orjson.OPT_INDENT_2).decode())
+    print(orjson.dumps(records, option=orjson.OPT_INDENT_2).decode("utf-8"))
 
 
 def main() -> None:

From 971f2c70f81ac82640cb6a9f3c800be0d7c1143a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Feb 2026 12:28:11 +0700
Subject: [PATCH 096/139] Update dependencies

---
 pyproject.toml |  2 +-
 uv.lock        | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 58391ff..d3a1aaf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
-    "fastapi>=0.128.6",
+    "fastapi>=0.128.7",
     "gemini-webapi>=1.19.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
diff --git a/uv.lock b/uv.lock
index 34b5cc8..c038f53 100644
--- a/uv.lock
+++ b/uv.lock
@@ -65,7 +65,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.128.6"
+version = "0.128.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -74,9 +74,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/83/d1/195005b5e45b443e305136df47ee7df4493d782e0c039dd0d97065580324/fastapi-0.128.6.tar.gz", hash = "sha256:0cb3946557e792d731b26a42b04912f16367e3c3135ea8290f620e234f2b604f", size = 374757, upload-time = "2026-02-09T17:27:03.541Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a0/fc/af386750b3fd8d8828167e4c82b787a8eeca2eca5c5429c9db8bb7c70e04/fastapi-0.128.7.tar.gz", hash = "sha256:783c273416995486c155ad2c0e2b45905dedfaf20b9ef8d9f6a9124670639a24", size = 375325, upload-time = "2026-02-10T12:26:40.968Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/24/58/a2c4f6b240eeb148fb88cdac48f50a194aba760c1ca4988c6031c66a20ee/fastapi-0.128.6-py3-none-any.whl", hash = "sha256:bb1c1ef87d6086a7132d0ab60869d6f1ee67283b20fbf84ec0003bd335099509", size = 103674, upload-time = "2026-02-09T17:27:02.355Z" },
+    { url = "https://files.pythonhosted.org/packages/af/1a/f983b45661c79c31be575c570d46c437a5409b67a939c1b3d8d6b3ed7a7f/fastapi-0.128.7-py3-none-any.whl", hash = "sha256:6bd9bd31cb7047465f2d3fa3ba3f33b0870b17d4eaf7cdb36d1576ab060ad662", size = 103630, upload-time = "2026-02-10T12:26:39.414Z" },
 ]
 
 [[package]]
@@ -106,8 +106,8 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.128.6" },
-    { name = "gemini-webapi", specifier = ">=1.19.0" },
+    { name = "fastapi", specifier = ">=0.128.7" },
+    { name = "gemini-webapi", specifier = ">=1.19.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },

From cad23795e41a97fd7cb7e5dd371c03d1bdbec607 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Feb 2026 20:46:42 +0700
Subject: [PATCH 097/139] Refactor: Update Markdown unescape helpers to prevent
 impacting clients like Roo Code

---
 app/services/client.py |  6 ++++++
 app/utils/helper.py    | 44 ++++++++++++++++--------------------------
 2 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 3cdd839..5d248c2 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -14,6 +14,10 @@
     save_url_to_tempfile,
 )
 
+COMMONMARK_UNESCAPE_RE = re.compile(
+    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
+)  # See: https://spec.commonmark.org/current/#backslash-escapes
+
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -194,6 +198,8 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
+        text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text)
+
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
             if match:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 384f5cd..67bfa55 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -191,7 +191,6 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[
     if not text:
         return text, []
 
-    # Clean hints FIRST so they don't interfere with tool call regexes (e.g. example calls in hint)
     cleaned = strip_system_hints(text)
 
     tool_calls: list[ToolCall] = []
@@ -237,33 +236,24 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             )
         )
 
-    def _replace_block(match: re.Match[str]) -> str:
-        block_content = match.group(1)
-        if not block_content:
-            return match.group(0)
-
-        is_tool_block = bool(TOOL_CALL_RE.search(block_content))
-
-        if is_tool_block:
-            if extract:
-                for call_match in TOOL_CALL_RE.finditer(block_content):
-                    name = (call_match.group(1) or "").strip()
-                    raw_args = (call_match.group(2) or "").strip()
-                    _create_tool_call(name, raw_args)
-            return ""
-        else:
-            return match.group(0)
-
-    def _replace_orphan(match: re.Match[str]) -> str:
-        if extract:
-            name = (match.group(1) or "").strip()
-            raw_args = (match.group(2) or "").strip()
-            _create_tool_call(name, raw_args)
-        return ""
-
-    cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned)
-    cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
+    all_calls = []
+    for match in TOOL_CALL_RE.finditer(cleaned):
+        all_calls.append(
+            {
+                "start": match.start(),
+                "name": (match.group(1) or "").strip(),
+                "args": (match.group(2) or "").strip(),
+            }
+        )
+
+    all_calls.sort(key=lambda x: x["start"])
+
+    if extract:
+        for call in all_calls:
+            _create_tool_call(call["name"], call["args"])
 
+    cleaned = TOOL_BLOCK_RE.sub("", cleaned)
+    cleaned = TOOL_CALL_RE.sub("", cleaned)
     cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
     cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
 

From 795b8d88a3cfb29e46c4e369c277e810308cc8e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Feb 2026 21:20:11 +0700
Subject: [PATCH 098/139] Refactor: Update Markdown unescape helpers to prevent
 impacting clients like Roo Code

---
 app/services/client.py |  6 ------
 app/utils/helper.py    | 37 +++++++++++++++++++++++++++++--------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 5d248c2..3cdd839 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -14,10 +14,6 @@
     save_url_to_tempfile,
 )
 
-COMMONMARK_UNESCAPE_RE = re.compile(
-    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
-)  # See: https://spec.commonmark.org/current/#backslash-escapes
-
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -198,8 +194,6 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
-        text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text)
-
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
             if match:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 67bfa55..ce781bd 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -34,6 +34,9 @@
 RESPONSE_ITEM_RE = re.compile(
     r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE
 )
+COMMONMARK_UNESCAPE_RE = re.compile(
+    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
+)  # See: https://spec.commonmark.org/current/#backslash-escapes
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
@@ -192,9 +195,12 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[
         return text, []
 
     cleaned = strip_system_hints(text)
-
     tool_calls: list[ToolCall] = []
 
+    def _unescape_markdown(s: str) -> str:
+        """Restores characters escaped for Markdown rendering."""
+        return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+
     def _create_tool_call(name: str, raw_args: str) -> None:
         if not extract:
             return
@@ -202,20 +208,33 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             logger.warning("Encountered tool_call without a function name.")
             return
 
+        prev_name = ""
+        while name != prev_name:
+            prev_name = name
+            name = _unescape_markdown(name)
+
+        def _try_parse_json(s: str) -> dict | None:
+            try:
+                return orjson.loads(s)
+            except orjson.JSONDecodeError:
+                try:
+                    return orjson.loads(_unescape_markdown(s))
+                except orjson.JSONDecodeError:
+                    return None
+
         arguments = raw_args
-        try:
-            parsed_args = orjson.loads(raw_args)
-            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
-        except orjson.JSONDecodeError:
+        parsed_args = _try_parse_json(raw_args)
+
+        if parsed_args is None:
             json_match = re.search(r"({.*})", raw_args, re.DOTALL)
             if json_match:
                 potential_json = json_match.group(1)
-                try:
-                    parsed_args = orjson.loads(potential_json)
+                parsed_args = _try_parse_json(potential_json)
+                if parsed_args is not None:
                     arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
                         "utf-8"
                     )
-                except orjson.JSONDecodeError:
+                else:
                     logger.warning(
                         f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}"
                     )
@@ -223,6 +242,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
                 logger.warning(
                     f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}"
                 )
+        else:
+            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
 
         index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")

From e85252a3c6000af5f5094560ad95aa8a8e78c184 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 07:59:28 +0700
Subject: [PATCH 099/139] Revert "Refactor: Update Markdown unescape helpers to
 prevent impacting clients like Roo Code"

This reverts commit 795b8d88a3cfb29e46c4e369c277e810308cc8e8.
---
 app/services/client.py |  6 ++++++
 app/utils/helper.py    | 37 ++++++++-----------------------------
 2 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 3cdd839..5d248c2 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -14,6 +14,10 @@
     save_url_to_tempfile,
 )
 
+COMMONMARK_UNESCAPE_RE = re.compile(
+    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
+)  # See: https://spec.commonmark.org/current/#backslash-escapes
+
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -194,6 +198,8 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
+        text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text)
+
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
             if match:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index ce781bd..67bfa55 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -34,9 +34,6 @@
 RESPONSE_ITEM_RE = re.compile(
     r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE
 )
-COMMONMARK_UNESCAPE_RE = re.compile(
-    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
-)  # See: https://spec.commonmark.org/current/#backslash-escapes
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
@@ -195,11 +192,8 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[
         return text, []
 
     cleaned = strip_system_hints(text)
-    tool_calls: list[ToolCall] = []
 
-    def _unescape_markdown(s: str) -> str:
-        """Restores characters escaped for Markdown rendering."""
-        return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+    tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
         if not extract:
@@ -208,33 +202,20 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        prev_name = ""
-        while name != prev_name:
-            prev_name = name
-            name = _unescape_markdown(name)
-
-        def _try_parse_json(s: str) -> dict | None:
-            try:
-                return orjson.loads(s)
-            except orjson.JSONDecodeError:
-                try:
-                    return orjson.loads(_unescape_markdown(s))
-                except orjson.JSONDecodeError:
-                    return None
-
         arguments = raw_args
-        parsed_args = _try_parse_json(raw_args)
-
-        if parsed_args is None:
+        try:
+            parsed_args = orjson.loads(raw_args)
+            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
+        except orjson.JSONDecodeError:
             json_match = re.search(r"({.*})", raw_args, re.DOTALL)
             if json_match:
                 potential_json = json_match.group(1)
-                parsed_args = _try_parse_json(potential_json)
-                if parsed_args is not None:
+                try:
+                    parsed_args = orjson.loads(potential_json)
                     arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
                         "utf-8"
                     )
-                else:
+                except orjson.JSONDecodeError:
                     logger.warning(
                         f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}"
                     )
@@ -242,8 +223,6 @@ def _try_parse_json(s: str) -> dict | None:
                 logger.warning(
                     f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}"
                 )
-        else:
-            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
 
         index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")

From 4be41506c95673bf7a747f8ff2629a45c99b8309 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 12:17:12 +0700
Subject: [PATCH 100/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/server/chat.py     | 259 +++++++++++++++++++++++++----------------
 app/services/client.py |  39 ++++---
 app/services/lmdb.py   |   7 +-
 app/utils/helper.py    |  89 +++++++++-----
 4 files changed, 245 insertions(+), 149 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 30a6b3a..080d015 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -382,11 +382,16 @@ def _build_tool_prompt(
     )
     lines.append("[function_calls]")
     lines.append("[call:tool_name]")
-    lines.append('{"argument": "value"}')
+    lines.append("@args")
+    lines.append("")
+    lines.append("<<<ARG:arg_name>>>")
+    lines.append("value")
+    lines.append("<<<END:arg_name>>>")
+    lines.append("")
     lines.append("[/call]")
     lines.append("[/function_calls]")
     lines.append(
-        "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block."
+        "CRITICAL: Arguments MUST use <<<ARG:name>>>...<<<END:name>>> tags. Content inside tags can be any format."
     )
     lines.append(
         "If multiple tools are needed, list them sequentially within the same [function_calls] block."
@@ -394,7 +399,9 @@ def _build_tool_prompt(
     lines.append(
         "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
-    lines.append("Note: Tool results are returned in a [function_responses] block.")
+    lines.append(
+        "Note: Tool results are returned in a [function_responses] block using @results and <<<RESULT>>> tags."
+    )
 
     return "\n".join(lines)
 
@@ -774,26 +781,44 @@ def __init__(self):
         self.current_role = ""
         self.block_buffer = ""
 
-        self.TOOL_START = "[function_calls]"
-        self.TOOL_END = "[/function_calls]"
-        self.ORPHAN_START = "[call:"
-        self.ORPHAN_END = "[/call]"
-        self.RESPONSE_START = "[function_responses]"
-        self.RESPONSE_END = "[/function_responses]"
-        self.TAG_START = "<|im_start|>"
-        self.TAG_END = "<|im_end|>"
-        self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
-        self.HINT_END = TOOL_HINT_LINE_END
-
-        self.WATCH_MARKERS = [
-            self.TOOL_START,
-            self.ORPHAN_START,
-            self.RESPONSE_START,
-            self.TAG_START,
-            self.TAG_END,
-        ]
-        if self.HINT_START:
-            self.WATCH_MARKERS.append(self.HINT_START)
+        self.STATE_MARKERS = {
+            "TOOL": {
+                "starts": ["[function_calls]", "\\[function_calls\\]"],
+                "ends": ["[/function_calls]", "\\[/function_calls\\]"],
+            },
+            "ORPHAN": {
+                "starts": ["[call:", "\\[call:"],
+                "ends": ["[/call]", "\\[/call\\]"],
+            },
+            "RESP": {
+                "starts": ["[function_responses]", "\\[function_responses\\]"],
+                "ends": ["[/function_responses]", "\\[/function_responses\\]"],
+            },
+            "ARG": {
+                "starts": ["<<<ARG:", "\\<\\<\\<ARG:"],
+                "ends": ["<<<END:", "\\<\\<\\<END:"],
+            },
+            "RESULT": {
+                "starts": ["<<<RESULT>>>", "\\<\\<\\<RESULT\\>\\>\\>"],
+                "ends": ["<<<END:RESULT>>>", "\\<\\<\\<END:RESULT\\>\\>\\>"],
+            },
+            "TAG": {
+                "starts": ["<|im_start|>", "\\<|im_start|\\>"],
+                "ends": ["<|im_end|>", "\\<|im_end|\\>"],
+            },
+        }
+
+        hint_start = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
+        if hint_start:
+            self.STATE_MARKERS["HINT"] = {
+                "starts": [hint_start],
+                "ends": [TOOL_HINT_LINE_END],
+            }
+
+        self.WATCH_MARKERS = []
+        for cfg in self.STATE_MARKERS.values():
+            self.WATCH_MARKERS.extend(cfg["starts"])
+            self.WATCH_MARKERS.extend(cfg.get("ends", []))
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
@@ -802,25 +827,12 @@ def process(self, chunk: str) -> str:
         while self.buffer:
             buf_low = self.buffer.lower()
             if self.state == "NORMAL":
-                tool_idx = buf_low.find(self.TOOL_START)
-                orphan_idx = buf_low.find(self.ORPHAN_START)
-                resp_idx = buf_low.find(self.RESPONSE_START)
-                tag_idx = buf_low.find(self.TAG_START)
-                end_idx = buf_low.find(self.TAG_END)
-                hint_idx = buf_low.find(self.HINT_START) if self.HINT_START else -1
-
-                indices = [
-                    (i, t)
-                    for i, t in [
-                        (tool_idx, "TOOL"),
-                        (orphan_idx, "ORPHAN"),
-                        (resp_idx, "RESP"),
-                        (tag_idx, "TAG"),
-                        (end_idx, "END"),
-                        (hint_idx, "HINT"),
-                    ]
-                    if i != -1
-                ]
+                indices = []
+                for m_type, cfg in self.STATE_MARKERS.items():
+                    for p in cfg["starts"]:
+                        idx = buf_low.find(p.lower())
+                        if idx != -1:
+                            indices.append((idx, m_type, len(p)))
 
                 if not indices:
                     # Guard against split markers (case-insensitive)
@@ -838,76 +850,111 @@ def process(self, chunk: str) -> str:
                     break
 
                 indices.sort()
-                idx, m_type = indices[0]
+                idx, m_type, m_len = indices[0]
                 output.append(self.buffer[:idx])
                 self.buffer = self.buffer[idx:]
 
-                if m_type == "TOOL":
-                    self.state = "IN_TOOL"
-                    self.block_buffer = ""
-                    self.buffer = self.buffer[len(self.TOOL_START) :]
-                elif m_type == "ORPHAN":
-                    self.state = "IN_ORPHAN"
+                self.state = f"IN_{m_type}"
+                if m_type in ("TOOL", "ORPHAN"):
                     self.block_buffer = ""
-                    self.buffer = self.buffer[len(self.ORPHAN_START) :]
-                elif m_type == "RESP":
-                    self.state = "IN_RESP"
-                    self.buffer = self.buffer[len(self.RESPONSE_START) :]
-                elif m_type == "TAG":
-                    self.state = "IN_TAG"
-                    self.buffer = self.buffer[len(self.TAG_START) :]
-                elif m_type == "END":
-                    self.buffer = self.buffer[len(self.TAG_END) :]
-                elif m_type == "HINT":
-                    self.state = "IN_HINT"
-                    self.buffer = self.buffer[len(self.HINT_START) :]
+
+                self.buffer = self.buffer[m_len:]
 
             elif self.state == "IN_HINT":
-                end_idx = buf_low.find(self.HINT_END.lower())
-                if end_idx != -1:
-                    self.buffer = self.buffer[end_idx + len(self.HINT_END) :]
+                cfg = self.STATE_MARKERS["HINT"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    self.buffer = self.buffer[found_idx + found_len :]
+                    self.state = "NORMAL"
+                else:
+                    max_end_len = max(len(p) for p in cfg["ends"])
+                    if len(self.buffer) > max_end_len:
+                        self.buffer = self.buffer[-max_end_len:]
+                    break
+
+            elif self.state == "IN_ARG":
+                cfg = self.STATE_MARKERS["ARG"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    bracket_idx = self.buffer.find(">", found_idx + found_len)
+                    if bracket_idx != -1:
+                        end_pos = bracket_idx + 1
+                        while end_pos < len(self.buffer) and self.buffer[end_pos] == ">":
+                            end_pos += 1
+
+                        self.buffer = self.buffer[end_pos:]
+                        self.state = "NORMAL"
+                    else:
+                        break
+                else:
+                    break
+
+            elif self.state == "IN_RESULT":
+                cfg = self.STATE_MARKERS["RESULT"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
-                    keep_len = len(self.HINT_END) - 1
-                    if len(self.buffer) > keep_len:
-                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_RESP":
-                end_idx = buf_low.find(self.RESPONSE_END.lower())
-                if end_idx != -1:
-                    self.buffer = self.buffer[end_idx + len(self.RESPONSE_END) :]
+                cfg = self.STATE_MARKERS["RESP"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
-                    keep_len = len(self.RESPONSE_END) - 1
-                    if len(self.buffer) > keep_len:
-                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_TOOL":
-                end_idx = buf_low.find(self.TOOL_END.lower())
-                if end_idx != -1:
-                    self.block_buffer += self.buffer[:end_idx]
-                    self.buffer = self.buffer[end_idx + len(self.TOOL_END) :]
+                cfg = self.STATE_MARKERS["TOOL"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    self.block_buffer += self.buffer[:found_idx]
+                    self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
-                    keep_len = len(self.TOOL_END) - 1
-                    if len(self.buffer) > keep_len:
-                        self.block_buffer += self.buffer[:-keep_len]
-                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_ORPHAN":
-                end_idx = buf_low.find(self.ORPHAN_END.lower())
-                if end_idx != -1:
-                    self.block_buffer += self.buffer[:end_idx]
-                    self.buffer = self.buffer[end_idx + len(self.ORPHAN_END) :]
+                cfg = self.STATE_MARKERS["ORPHAN"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    self.block_buffer += self.buffer[:found_idx]
+                    self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
-                    keep_len = len(self.ORPHAN_END) - 1
-                    if len(self.buffer) > keep_len:
-                        self.block_buffer += self.buffer[:-keep_len]
-                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_TAG":
@@ -920,24 +967,30 @@ def process(self, chunk: str) -> str:
                     break
 
             elif self.state == "IN_BLOCK":
-                end_idx = buf_low.find(self.TAG_END.lower())
-                if end_idx != -1:
-                    content = self.buffer[:end_idx]
+                cfg = self.STATE_MARKERS["TAG"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    content = self.buffer[:found_idx]
                     if self.current_role != "tool":
                         output.append(content)
-                    self.buffer = self.buffer[end_idx + len(self.TAG_END) :]
+                    self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                     self.current_role = ""
                 else:
-                    keep_len = len(self.TAG_END) - 1
+                    max_end_len = max(len(p) for p in cfg["ends"])
                     if self.current_role != "tool":
-                        if len(self.buffer) > keep_len:
-                            output.append(self.buffer[:-keep_len])
-                            self.buffer = self.buffer[-keep_len:]
+                        if len(self.buffer) > max_end_len:
+                            output.append(self.buffer[:-max_end_len])
+                            self.buffer = self.buffer[-max_end_len:]
                         break
                     else:
-                        if len(self.buffer) > keep_len:
-                            self.buffer = self.buffer[-keep_len:]
+                        if len(self.buffer) > max_end_len:
+                            self.buffer = self.buffer[-max_end_len:]
                         break
 
         return "".join(output)
@@ -945,11 +998,13 @@ def process(self, chunk: str) -> str:
     def flush(self) -> str:
         res = ""
         if self.state == "IN_TOOL":
-            if self.ORPHAN_START.lower() not in self.block_buffer.lower():
-                res = f"{self.TOOL_START}{self.block_buffer}"
+            orphan_starts = self.STATE_MARKERS["ORPHAN"]["starts"]
+            is_orphan = any(p.lower() in self.block_buffer.lower() for p in orphan_starts)
+            if not is_orphan:
+                res = f"{self.STATE_MARKERS['TOOL']['starts'][0]}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
-        elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT"):
+        elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
             res = ""
         elif self.state == "NORMAL":
             res = self.buffer
diff --git a/app/services/client.py b/app/services/client.py
index 5d248c2..c955456 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -10,14 +10,11 @@
 from ..utils import g_config
 from ..utils.helper import (
     add_tag,
+    normalize_llm_text,
     save_file_to_tempfile,
     save_url_to_tempfile,
 )
 
-COMMONMARK_UNESCAPE_RE = re.compile(
-    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
-)  # See: https://spec.commonmark.org/current/#backslash-escapes
-
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -89,12 +86,12 @@ async def process_message(
 
         if isinstance(message.content, str):
             if message.content or message.role == "tool":
-                text_fragments.append(message.content or "{}")
+                text_fragments.append(message.content or "")
         elif isinstance(message.content, list):
             for item in message.content:
                 if item.type == "text":
                     if item.text or message.role == "tool":
-                        text_fragments.append(item.text or "{}")
+                        text_fragments.append(item.text or "")
                 elif item.type == "image_url":
                     if not item.image_url:
                         raise ValueError("Image URL cannot be empty")
@@ -113,14 +110,19 @@ async def process_message(
                     else:
                         raise ValueError("File must contain 'file_data' or 'url' key")
         elif message.content is None and message.role == "tool":
-            text_fragments.append("{}")
+            text_fragments.append("")
         elif message.content is not None:
             raise ValueError("Unsupported message content type.")
 
         if message.role == "tool":
             tool_name = message.name or "unknown"
-            combined_content = "\n".join(text_fragments).strip() or "{}"
-            res_block = f"[response:{tool_name}]\n{combined_content}\n[/response]"
+            combined_content = "\n".join(text_fragments).strip()
+            res_block = (
+                f"[response:{tool_name}]\n"
+                f"@results\n\n"
+                f"<<<RESULT>>>\n{combined_content}\n<<<END:RESULT>>>\n\n"
+                f"[/response]"
+            )
             if wrap_tool:
                 text_fragments = [f"[function_responses]\n{res_block}\n[/function_responses]"]
             else:
@@ -130,17 +132,22 @@ async def process_message(
             tool_blocks: list[str] = []
             for call in message.tool_calls:
                 args_text = call.function.arguments.strip()
+                formatted_args = "\n@args\n"
                 try:
                     parsed_args = orjson.loads(args_text)
-                    args_text = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
-                        "utf-8"
-                    )
+                    if isinstance(parsed_args, dict):
+                        for k, v in parsed_args.items():
+                            val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
+                            formatted_args += f"\n<<<ARG:{k}>>>\n{val_str}\n<<<END:{k}>>>\n"
+                    else:
+                        formatted_args += args_text
                 except orjson.JSONDecodeError:
-                    pass
-                tool_blocks.append(f"[call:{call.function.name}]{args_text}[/call]")
+                    formatted_args += args_text
+
+                tool_blocks.append(f"[call:{call.function.name}]{formatted_args}\n[/call]")
 
             if tool_blocks:
-                tool_section = "[function_calls]\n" + "".join(tool_blocks) + "\n[/function_calls]"
+                tool_section = "[function_calls]\n" + "\n".join(tool_blocks) + "\n[/function_calls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -198,7 +205,7 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
-        text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text)
+        text = normalize_llm_text(text)
 
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 4b57f60..a90c684 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -1,7 +1,6 @@
 import hashlib
 import re
 import string
-import unicodedata
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from pathlib import Path
@@ -15,6 +14,7 @@
 from ..utils import g_config
 from ..utils.helper import (
     extract_tool_calls,
+    normalize_llm_text,
     remove_tool_call_blocks,
 )
 from ..utils.singleton import Singleton
@@ -39,11 +39,8 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     if text is None:
         return None
 
-    # Unicode normalization to NFC
-    text = unicodedata.normalize("NFC", text)
+    text = normalize_llm_text(text)
 
-    # Basic cleaning
-    text = text.replace("\r\n", "\n").replace("\r", "\n")
     text = LMDBConversationStore.remove_think_tags(text)
     text = remove_tool_call_blocks(text)
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 67bfa55..dfb4abd 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,10 +1,12 @@
 import base64
 import hashlib
+import html
 import mimetypes
 import re
 import reprlib
 import struct
 import tempfile
+import unicodedata
 from pathlib import Path
 from urllib.parse import urlparse
 
@@ -19,22 +21,39 @@
     "\nWhen you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:\n"
     "[function_calls]\n"
     "[call:tool_name]\n"
-    '{"argument": "value"}\n'
+    "@args\n"
+    "\n<<<ARG:arg_name>>>\n"
+    "value\n"
+    "<<<END:arg_name>>>\n"
     "[/call]\n"
     "[/function_calls]\n"
-    "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block.\n"
+    "CRITICAL: Arguments MUST use <<<ARG:name>>>...<<<END:name>>> tags. Content inside tags can be any format.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE
+    r"\\?\[function_calls\\?]\s*(.*?)\s*\\?\[/function_calls\\?]", re.DOTALL | re.IGNORECASE
+)
+TOOL_CALL_RE = re.compile(
+    r"\\?\[call:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/call\\?]", re.DOTALL | re.IGNORECASE
 )
-TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE)
 RESPONSE_BLOCK_RE = re.compile(
-    r"\[function_responses]\s*(.*?)\s*\[/function_responses]", re.DOTALL | re.IGNORECASE
+    r"\\?\[function_responses\\?]\s*(.*?)\s*\\?\[/function_responses\\?]",
+    re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE
+    r"\\?\[response:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/response\\?]", re.DOTALL | re.IGNORECASE
+)
+TAGGED_ARG_RE = re.compile(
+    r"(?:\\?<){3}ARG:([^>\\]+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:\1(?:\\?>){3}",
+    re.DOTALL | re.IGNORECASE,
+)
+TAGGED_RESULT_RE = re.compile(
+    r"(?:\\?<){3}RESULT(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:RESULT(?:\\?>){3}",
+    re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
+CONTROL_TOKEN_RE = re.compile(r"\\?<\|im_(?:start|end)\|\\?>")
+COMMONMARK_UNESCAPE_RE = re.compile(
+    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
+)  # See: https://spec.commonmark.org/current/#backslash-escapes
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -50,6 +69,26 @@ def add_tag(role: str, content: str, unclose: bool = False) -> str:
     return f"<|im_start|>{role}\n{content}" + ("\n<|im_end|>" if not unclose else "")
 
 
+def normalize_llm_text(s: str) -> str:
+    """
+    Safely normalize LLM-generated text for both display and hashing.
+    Includes: HTML unescaping, NFC normalization, and line ending standardization.
+    """
+    if not s:
+        return ""
+
+    s = html.unescape(s)
+    s = unicodedata.normalize("NFC", s)
+    s = s.replace("\r\n", "\n").replace("\r", "\n")
+
+    return s
+
+
+def unescape_llm_text(s: str) -> str:
+    r"""Unescape characters escaped by Gemini Web's post-processing."""
+    return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+
+
 def estimate_tokens(text: str | None) -> int:
     """Estimate the number of tokens heuristically based on character count"""
     if not text:
@@ -202,27 +241,23 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        arguments = raw_args
-        try:
-            parsed_args = orjson.loads(raw_args)
-            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
-        except orjson.JSONDecodeError:
-            json_match = re.search(r"({.*})", raw_args, re.DOTALL)
-            if json_match:
-                potential_json = json_match.group(1)
-                try:
-                    parsed_args = orjson.loads(potential_json)
-                    arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
-                        "utf-8"
-                    )
-                except orjson.JSONDecodeError:
-                    logger.warning(
-                        f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}"
-                    )
+        name = unescape_llm_text(name.strip())
+        raw_args = unescape_llm_text(raw_args)
+
+        arg_matches = TAGGED_ARG_RE.findall(raw_args)
+        if arg_matches:
+            args_dict = {arg_name.strip(): arg_value.strip() for arg_name, arg_value in arg_matches}
+            arguments = orjson.dumps(args_dict).decode("utf-8")
+            logger.debug(f"Successfully parsed {len(args_dict)} tagged arguments for tool: {name}")
+        else:
+            cleaned_raw = raw_args.replace("@args", "").strip()
+            if not cleaned_raw:
+                logger.debug(f"Tool '{name}' called without arguments.")
             else:
                 logger.warning(
-                    f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}"
+                    f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"
                 )
+            arguments = "{}"
 
         index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")
@@ -241,7 +276,7 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         all_calls.append(
             {
                 "start": match.start(),
-                "name": (match.group(1) or "").strip(),
+                "name": unescape_llm_text((match.group(1) or "").strip()),
                 "args": (match.group(2) or "").strip(),
             }
         )
@@ -256,6 +291,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
     cleaned = TOOL_CALL_RE.sub("", cleaned)
     cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
     cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
+    cleaned = TAGGED_ARG_RE.sub("", cleaned)
+    cleaned = TAGGED_RESULT_RE.sub("", cleaned)
 
     return cleaned, tool_calls
 

From d86798bc360b5ba76f3fb778c3b7e86b736400f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 15:05:48 +0700
Subject: [PATCH 101/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/server/chat.py     | 114 ++++++++++++++++++-------------
 app/services/client.py |  24 ++++---
 app/services/lmdb.py   |   4 +-
 app/utils/helper.py    | 150 ++++++++++++++++-------------------------
 4 files changed, 141 insertions(+), 151 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 080d015..4262d0d 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -342,7 +342,7 @@ def _build_tool_prompt(
     tools: list[Tool],
     tool_choice: str | ToolChoiceFunction | None,
 ) -> str:
-    """Generate a system prompt chunk describing available tools."""
+    """Generate a system prompt describing available tools and the PascalCase protocol."""
     if not tools:
         return ""
 
@@ -378,29 +378,27 @@ def _build_tool_prompt(
         )
 
     lines.append(
-        "When you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:"
+        "When you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:"
     )
-    lines.append("[function_calls]")
-    lines.append("[call:tool_name]")
+    lines.append("[ToolCalls]")
+    lines.append("[Call:tool_name]")
     lines.append("@args")
-    lines.append("")
-    lines.append("<<<ARG:arg_name>>>")
+    lines.append("<<<CallParameter:arg_name>>>")
     lines.append("value")
-    lines.append("<<<END:arg_name>>>")
-    lines.append("")
-    lines.append("[/call]")
-    lines.append("[/function_calls]")
+    lines.append("<<<EndCallParameter>>>")
+    lines.append("[/Call]")
+    lines.append("[/ToolCalls]")
     lines.append(
-        "CRITICAL: Arguments MUST use <<<ARG:name>>>...<<<END:name>>> tags. Content inside tags can be any format."
+        "CRITICAL: Every argument MUST be enclosed in <<<CallParameter:arg_name>>>...<<<EndCallParameter>>>. Output as RAW text. Content inside tags can be any format."
     )
     lines.append(
-        "If multiple tools are needed, list them sequentially within the same [function_calls] block."
+        "If multiple tools are needed, list them sequentially within the same [ToolCalls] block."
     )
     lines.append(
-        "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
+        "If no tool call is needed, provide a normal response and NEVER use the [ToolCalls] tag."
     )
     lines.append(
-        "Note: Tool results are returned in a [function_responses] block using @results and <<<RESULT>>> tags."
+        "Note: Tool results are returned in a [ToolResults] block using @results and <<<ToolResult>>> tags."
     )
 
     return "\n".join(lines)
@@ -771,8 +769,8 @@ async def _send_with_split(
 
 class StreamingOutputFilter:
     """
-    State Machine filter to suppress technical markers, tool calls, and system hints.
-    Handles fragmentation where markers are split across multiple chunks.
+    Filter to suppress technical protocol markers, tool calls, and system hints from the stream.
+    Uses a state machine to handle fragmentation where markers are split across multiple chunks.
     """
 
     def __init__(self):
@@ -783,28 +781,32 @@ def __init__(self):
 
         self.STATE_MARKERS = {
             "TOOL": {
-                "starts": ["[function_calls]", "\\[function_calls\\]"],
-                "ends": ["[/function_calls]", "\\[/function_calls\\]"],
+                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
+                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
             },
             "ORPHAN": {
-                "starts": ["[call:", "\\[call:"],
-                "ends": ["[/call]", "\\[/call\\]"],
+                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
+                "ends": ["[/Call]", "\\[/Call\\]"],
             },
             "RESP": {
-                "starts": ["[function_responses]", "\\[function_responses\\]"],
-                "ends": ["[/function_responses]", "\\[/function_responses\\]"],
+                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
+                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
             },
             "ARG": {
-                "starts": ["<<<ARG:", "\\<\\<\\<ARG:"],
-                "ends": ["<<<END:", "\\<\\<\\<END:"],
+                "starts": [
+                    "<<<CallParameter:",
+                    "\\<\\<\\<CallParameter:",
+                    "\\<\\<\\<CallParameter\\:",
+                ],
+                "ends": ["<<<EndCallParameter>>>", "\\<\\<\\<EndCallParameter\\>\\>\\>"],
             },
             "RESULT": {
-                "starts": ["<<<RESULT>>>", "\\<\\<\\<RESULT\\>\\>\\>"],
-                "ends": ["<<<END:RESULT>>>", "\\<\\<\\<END:RESULT\\>\\>\\>"],
+                "starts": ["<<<ToolResult>>>", "\\<\\<\\<ToolResult\\>\\>\\>"],
+                "ends": ["<<<EndToolResult>>>", "\\<\\<\\<EndToolResult\\>\\>\\>"],
             },
             "TAG": {
-                "starts": ["<|im_start|>", "\\<|im_start|\\>"],
-                "ends": ["<|im_end|>", "\\<|im_end|\\>"],
+                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
+                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
             },
         }
 
@@ -815,10 +817,20 @@ def __init__(self):
                 "ends": [TOOL_HINT_LINE_END],
             }
 
+        self.ORPHAN_ENDS = [
+            "<|im_end|>",
+            "\\<|im\\_end|\\>",
+            "[/Call]",
+            "\\[/Call\\]",
+            "[/ToolCalls]",
+            "\\[/ToolCalls\\]",
+        ]
+
         self.WATCH_MARKERS = []
         for cfg in self.STATE_MARKERS.values():
             self.WATCH_MARKERS.extend(cfg["starts"])
             self.WATCH_MARKERS.extend(cfg.get("ends", []))
+        self.WATCH_MARKERS.extend(self.ORPHAN_ENDS)
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
@@ -834,8 +846,12 @@ def process(self, chunk: str) -> str:
                         if idx != -1:
                             indices.append((idx, m_type, len(p)))
 
+                for p in self.ORPHAN_ENDS:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1:
+                        indices.append((idx, "SKIP", len(p)))
+
                 if not indices:
-                    # Guard against split markers (case-insensitive)
                     keep_len = 0
                     for marker in self.WATCH_MARKERS:
                         m_low = marker.lower()
@@ -854,6 +870,10 @@ def process(self, chunk: str) -> str:
                 output.append(self.buffer[:idx])
                 self.buffer = self.buffer[idx:]
 
+                if m_type == "SKIP":
+                    self.buffer = self.buffer[m_len:]
+                    continue
+
                 self.state = f"IN_{m_type}"
                 if m_type in ("TOOL", "ORPHAN"):
                     self.block_buffer = ""
@@ -886,17 +906,12 @@ def process(self, chunk: str) -> str:
                         found_idx, found_len = idx, len(p)
 
                 if found_idx != -1:
-                    bracket_idx = self.buffer.find(">", found_idx + found_len)
-                    if bracket_idx != -1:
-                        end_pos = bracket_idx + 1
-                        while end_pos < len(self.buffer) and self.buffer[end_pos] == ">":
-                            end_pos += 1
-
-                        self.buffer = self.buffer[end_pos:]
-                        self.state = "NORMAL"
-                    else:
-                        break
+                    self.buffer = self.buffer[found_idx + found_len :]
+                    self.state = "NORMAL"
                 else:
+                    max_end_len = max(len(p) for p in cfg["ends"])
+                    if len(self.buffer) > max_end_len:
+                        self.buffer = self.buffer[-max_end_len:]
                     break
 
             elif self.state == "IN_RESULT":
@@ -911,6 +926,9 @@ def process(self, chunk: str) -> str:
                     self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
+                    max_end_len = max(len(p) for p in cfg["ends"])
+                    if len(self.buffer) > max_end_len:
+                        self.buffer = self.buffer[-max_end_len:]
                     break
 
             elif self.state == "IN_RESP":
@@ -940,6 +958,10 @@ def process(self, chunk: str) -> str:
                     self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
+                    max_end_len = max(len(p) for p in cfg["ends"])
+                    if len(self.buffer) > max_end_len:
+                        self.block_buffer += self.buffer[:-max_end_len]
+                        self.buffer = self.buffer[-max_end_len:]
                     break
 
             elif self.state == "IN_ORPHAN":
@@ -955,6 +977,10 @@ def process(self, chunk: str) -> str:
                     self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
+                    max_end_len = max(len(p) for p in cfg["ends"])
+                    if len(self.buffer) > max_end_len:
+                        self.block_buffer += self.buffer[:-max_end_len]
+                        self.buffer = self.buffer[-max_end_len:]
                     break
 
             elif self.state == "IN_TAG":
@@ -996,16 +1022,12 @@ def process(self, chunk: str) -> str:
         return "".join(output)
 
     def flush(self) -> str:
+        """Release remaining buffer content and perform final cleanup at stream end."""
         res = ""
-        if self.state == "IN_TOOL":
-            orphan_starts = self.STATE_MARKERS["ORPHAN"]["starts"]
-            is_orphan = any(p.lower() in self.block_buffer.lower() for p in orphan_starts)
-            if not is_orphan:
-                res = f"{self.STATE_MARKERS['TOOL']['starts'][0]}{self.block_buffer}"
+        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
+            res = ""
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
-        elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
-            res = ""
         elif self.state == "NORMAL":
             res = self.buffer
 
diff --git a/app/services/client.py b/app/services/client.py
index c955456..ba203d9 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -78,8 +78,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a single Message object into a format suitable for the Gemini API.
-        Extracts text fragments, handles images and files, and appends tool call blocks if present.
+        Process a Message into Gemini API format using the PascalCase technical protocol.
+        Extracts text, handles files, and appends ToolCalls/ToolResults blocks.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -118,13 +118,13 @@ async def process_message(
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
             res_block = (
-                f"[response:{tool_name}]\n"
-                f"@results\n\n"
-                f"<<<RESULT>>>\n{combined_content}\n<<<END:RESULT>>>\n\n"
-                f"[/response]"
+                f"[Result:{tool_name}]\n"
+                f"@results\n"
+                f"<<<ToolResult>>>\n{combined_content}\n<<<EndToolResult>>>\n"
+                f"[/Result]"
             )
             if wrap_tool:
-                text_fragments = [f"[function_responses]\n{res_block}\n[/function_responses]"]
+                text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
             else:
                 text_fragments = [res_block]
 
@@ -132,22 +132,24 @@ async def process_message(
             tool_blocks: list[str] = []
             for call in message.tool_calls:
                 args_text = call.function.arguments.strip()
-                formatted_args = "\n@args\n"
+                formatted_args = "@args\n"
                 try:
                     parsed_args = orjson.loads(args_text)
                     if isinstance(parsed_args, dict):
                         for k, v in parsed_args.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_args += f"\n<<<ARG:{k}>>>\n{val_str}\n<<<END:{k}>>>\n"
+                            formatted_args += (
+                                f"<<<CallParameter:{k}>>>\n{val_str}\n<<<EndCallParameter>>>\n"
+                            )
                     else:
                         formatted_args += args_text
                 except orjson.JSONDecodeError:
                     formatted_args += args_text
 
-                tool_blocks.append(f"[call:{call.function.name}]{formatted_args}\n[/call]")
+                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]")
 
             if tool_blocks:
-                tool_section = "[function_calls]\n" + "\n".join(tool_blocks) + "\n[/function_calls]"
+                tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index a90c684..ad92bbf 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -33,9 +33,7 @@ def _fuzzy_normalize(text: str | None) -> str | None:
 
 
 def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
-    """
-    Perform semantic normalization for hashing.
-    """
+    """Perform safe semantic normalization for hashing using helper utilities."""
     if text is None:
         return None
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index dfb4abd..25f9c9b 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,42 +18,43 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "\nWhen you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:\n"
-    "[function_calls]\n"
-    "[call:tool_name]\n"
+    "\nWhen you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n"
+    "[ToolCalls]\n"
+    "[Call:tool_name]\n"
     "@args\n"
-    "\n<<<ARG:arg_name>>>\n"
+    "<<<CallParameter:arg_name>>>\n"
     "value\n"
-    "<<<END:arg_name>>>\n"
-    "[/call]\n"
-    "[/function_calls]\n"
-    "CRITICAL: Arguments MUST use <<<ARG:name>>>...<<<END:name>>> tags. Content inside tags can be any format.\n"
+    "<<<EndCallParameter>>>\n"
+    "[/Call]\n"
+    "[/ToolCalls]\n"
+    "CRITICAL: Every argument MUST be enclosed in <<<CallParameter:arg_name>>>...<<<EndCallParameter>>>. Output as RAW text. Content inside tags can be any format.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"\\?\[function_calls\\?]\s*(.*?)\s*\\?\[/function_calls\\?]", re.DOTALL | re.IGNORECASE
+    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
 )
 TOOL_CALL_RE = re.compile(
-    r"\\?\[call:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/call\\?]", re.DOTALL | re.IGNORECASE
+    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\\?\[function_responses\\?]\s*(.*?)\s*\\?\[/function_responses\\?]",
+    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\\?\[response:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/response\\?]", re.DOTALL | re.IGNORECASE
+    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
+    re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"(?:\\?<){3}ARG:([^>\\]+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:\1(?:\\?>){3}",
+    r"(?:\\?<){3}CallParameter\\?:((?:[^>\\]|\\.)+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndCallParameter(?:\\?>){3}",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"(?:\\?<){3}RESULT(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:RESULT(?:\\?>){3}",
+    r"(?:\\?<){3}ToolResult(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndToolResult(?:\\?>){3}",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"\\?<\|im_(?:start|end)\|\\?>")
-COMMONMARK_UNESCAPE_RE = re.compile(
-    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
-)  # See: https://spec.commonmark.org/current/#backslash-escapes
+CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
+CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
+CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
+COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -61,7 +62,7 @@
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
-    """Surround content with role tags"""
+    """Surround content with ChatML role tags."""
     if role not in VALID_TAG_ROLES:
         logger.warning(f"Unknown role: {role}, returning content without tags")
         return content
@@ -85,12 +86,12 @@ def normalize_llm_text(s: str) -> str:
 
 
 def unescape_llm_text(s: str) -> str:
-    r"""Unescape characters escaped by Gemini Web's post-processing."""
+    """Unescape characters escaped by Gemini Web's post-processing (e.g., \\_ to _)."""
     return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
 
 
 def estimate_tokens(text: str | None) -> int:
-    """Estimate the number of tokens heuristically based on character count"""
+    """Estimate the number of tokens heuristically based on character count."""
     if not text:
         return 0
     return int(len(text) / 3)
@@ -99,6 +100,7 @@ def estimate_tokens(text: str | None) -> int:
 async def save_file_to_tempfile(
     file_in_base64: str, file_name: str = "", tempdir: Path | None = None
 ) -> Path:
+    """Decode base64 file data and save to a temporary file."""
     data = base64.b64decode(file_in_base64)
     suffix = Path(file_name).suffix if file_name else ".bin"
 
@@ -110,6 +112,7 @@ async def save_file_to_tempfile(
 
 
 async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
+    """Download content from a URL and save to a temporary file."""
     data: bytes | None = None
     suffix: str | None = None
     if url.startswith("data:image/"):
@@ -148,67 +151,48 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
 
 
 def strip_tagged_blocks(text: str) -> str:
-    """Remove <|im_start|>role ... <|im_end|> sections.
-    - tool blocks are removed entirely (including content).
-    - other roles: remove markers and role, keep inner content.
+    """
+    Remove ChatML role blocks (<|im_start|>role...<|im_end|>).
+    Role 'tool' blocks are removed entirely; others have markers stripped but content preserved.
+    Handles both raw and escaped markers consistently.
     """
     if not text:
         return text
 
-    result: list[str] = []
+    result = []
     idx = 0
-    length = len(text)
-    start_marker = "<|im_start|>"
-    end_marker = "<|im_end|>"
-
-    while idx < length:
-        start = text.find(start_marker, idx)
-        if start == -1:
+    while idx < len(text):
+        match_start = CHATML_START_RE.search(text, idx)
+        if not match_start:
             result.append(text[idx:])
             break
 
-        result.append(text[idx:start])
+        result.append(text[idx : match_start.start()])
+        role = match_start.group(1).lower()
+        content_start = match_start.end()
 
-        role_start = start + len(start_marker)
-        newline = text.find("\n", role_start)
-        if newline == -1:
-            result.append(text[start:])
+        match_end = CHATML_END_RE.search(text, content_start)
+        if not match_end:
+            if role != "tool":
+                result.append(text[content_start:])
             break
 
-        role = text[role_start:newline].strip().lower()
-
-        end = text.find(end_marker, newline + 1)
-        if end == -1:
-            if role == "tool":
-                break
-            else:
-                result.append(text[newline + 1 :])
-                break
-
-        block_end = end + len(end_marker)
+        if role != "tool":
+            result.append(text[content_start : match_end.start()])
 
-        if role == "tool":
-            idx = block_end
-            continue
-
-        content = text[newline + 1 : end]
-        result.append(content)
-        idx = block_end
+        idx = match_end.end()
 
     return "".join(result)
 
 
 def strip_system_hints(text: str) -> str:
-    """Remove system-level hint text from a given string."""
+    """Remove system hints, ChatML tags, and technical protocol markers from text."""
     if not text:
         return text
 
-    # Remove the full hints first
     cleaned = text.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "")
 
-    # Remove fragments or multi-line blocks using derived constants
     if TOOL_HINT_LINE_START and TOOL_HINT_LINE_END:
-        # Match from the start line to the end line, inclusive, handling internal modifications
         pattern = rf"\n?{re.escape(TOOL_HINT_LINE_START)}.*?{re.escape(TOOL_HINT_LINE_END)}\.?\n?"
         cleaned = re.sub(pattern, "", cleaned, flags=re.DOTALL)
 
@@ -218,20 +202,26 @@ def strip_system_hints(text: str) -> str:
         cleaned = re.sub(rf"\s*{re.escape(TOOL_HINT_LINE_END)}\.?\n?", "", cleaned)
 
     cleaned = strip_tagged_blocks(cleaned)
+
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
+    cleaned = TOOL_BLOCK_RE.sub("", cleaned)
+    cleaned = TOOL_CALL_RE.sub("", cleaned)
+    cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
+    cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
+    cleaned = TAGGED_ARG_RE.sub("", cleaned)
+    cleaned = TAGGED_RESULT_RE.sub("", cleaned)
+
     return cleaned
 
 
 def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:
     """
-    Unified engine for stripping tool call blocks and extracting tool metadata.
-    If extract=True, parses JSON arguments and assigns deterministic call IDs.
+    Extract tool metadata and return text stripped of technical markers.
+    Arguments are parsed into JSON and assigned deterministic call IDs.
     """
     if not text:
         return text, []
 
-    cleaned = strip_system_hints(text)
-
     tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
@@ -271,45 +261,27 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             )
         )
 
-    all_calls = []
-    for match in TOOL_CALL_RE.finditer(cleaned):
-        all_calls.append(
-            {
-                "start": match.start(),
-                "name": unescape_llm_text((match.group(1) or "").strip()),
-                "args": (match.group(2) or "").strip(),
-            }
-        )
-
-    all_calls.sort(key=lambda x: x["start"])
-
-    if extract:
-        for call in all_calls:
-            _create_tool_call(call["name"], call["args"])
+    for match in TOOL_CALL_RE.finditer(text):
+        _create_tool_call(match.group(1), match.group(2))
 
-    cleaned = TOOL_BLOCK_RE.sub("", cleaned)
-    cleaned = TOOL_CALL_RE.sub("", cleaned)
-    cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
-    cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
-    cleaned = TAGGED_ARG_RE.sub("", cleaned)
-    cleaned = TAGGED_RESULT_RE.sub("", cleaned)
+    cleaned = strip_system_hints(text)
 
     return cleaned, tool_calls
 
 
 def remove_tool_call_blocks(text: str) -> str:
-    """Strip tool call code blocks from text."""
+    """Strip tool call blocks from text for display."""
     cleaned, _ = _process_tools_internal(text, extract=False)
     return cleaned
 
 
 def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
-    """Extract tool call definitions and return cleaned text."""
+    """Extract tool calls and return cleaned text."""
     return _process_tools_internal(text, extract=True)
 
 
 def text_from_message(message: Message) -> str:
-    """Return text content from a message for token estimation."""
+    """Concatenate text and tool arguments from a message for token estimation."""
     base_text = ""
     if isinstance(message.content, str):
         base_text = message.content
@@ -329,7 +301,6 @@ def text_from_message(message: Message) -> str:
 
 def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
     """Return image dimensions (width, height) if PNG or JPEG headers are present."""
-    # PNG: dimensions stored in bytes 16..24 of the IHDR chunk
     if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"):
         try:
             width, height = struct.unpack(">II", data[16:24])
@@ -337,7 +308,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
         except struct.error:
             return None, None
 
-    # JPEG: dimensions stored in SOF segment; iterate through markers to locate it
     if len(data) >= 4 and data[0:2] == b"\xff\xd8":
         idx = 2
         length = len(data)
@@ -357,7 +327,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
             0xCF,
         }
         while idx < length:
-            # Find marker alignment (markers are prefixed with 0xFF bytes)
             if data[idx] != 0xFF:
                 idx += 1
                 continue
@@ -380,7 +349,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
 
             if marker in sof_markers:
                 if idx + 4 < length:
-                    # Skip precision byte at idx, then read height/width (big-endian)
                     height = (data[idx + 1] << 8) + data[idx + 2]
                     width = (data[idx + 3] << 8) + data[idx + 4]
                     return int(width), int(height)

From 0d18e9e84525c346bf4cf5fb3b545f7884f2157f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 16:55:17 +0700
Subject: [PATCH 102/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/server/chat.py     | 25 +++++++++++--------------
 app/services/client.py | 14 +++-----------
 app/utils/helper.py    | 15 ++++++---------
 3 files changed, 20 insertions(+), 34 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 4262d0d..66c6d11 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -382,14 +382,11 @@ def _build_tool_prompt(
     )
     lines.append("[ToolCalls]")
     lines.append("[Call:tool_name]")
-    lines.append("@args")
-    lines.append("<<<CallParameter:arg_name>>>")
-    lines.append("value")
-    lines.append("<<<EndCallParameter>>>")
+    lines.append("[CallParameter:arg_name]value[/CallParameter]")
     lines.append("[/Call]")
     lines.append("[/ToolCalls]")
     lines.append(
-        "CRITICAL: Every argument MUST be enclosed in <<<CallParameter:arg_name>>>...<<<EndCallParameter>>>. Output as RAW text. Content inside tags can be any format."
+        "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format."
     )
     lines.append(
         "If multiple tools are needed, list them sequentially within the same [ToolCalls] block."
@@ -398,7 +395,7 @@ def _build_tool_prompt(
         "If no tool call is needed, provide a normal response and NEVER use the [ToolCalls] tag."
     )
     lines.append(
-        "Note: Tool results are returned in a [ToolResults] block using @results and <<<ToolResult>>> tags."
+        "Note: Tool results are returned in a [ToolResults] block using [ToolResult] tags."
     )
 
     return "\n".join(lines)
@@ -793,16 +790,12 @@ def __init__(self):
                 "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
             },
             "ARG": {
-                "starts": [
-                    "<<<CallParameter:",
-                    "\\<\\<\\<CallParameter:",
-                    "\\<\\<\\<CallParameter\\:",
-                ],
-                "ends": ["<<<EndCallParameter>>>", "\\<\\<\\<EndCallParameter\\>\\>\\>"],
+                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
+                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
             },
             "RESULT": {
-                "starts": ["<<<ToolResult>>>", "\\<\\<\\<ToolResult\\>\\>\\>"],
-                "ends": ["<<<EndToolResult>>>", "\\<\\<\\<EndToolResult\\>\\>\\>"],
+                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
+                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
             },
             "TAG": {
                 "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
@@ -824,6 +817,10 @@ def __init__(self):
             "\\[/Call\\]",
             "[/ToolCalls]",
             "\\[/ToolCalls\\]",
+            "[/CallParameter]",
+            "\\[/CallParameter\\]",
+            "[/ToolResult]",
+            "\\[/ToolResult\\]",
         ]
 
         self.WATCH_MARKERS = []
diff --git a/app/services/client.py b/app/services/client.py
index ba203d9..9f9ac0f 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -118,10 +118,7 @@ async def process_message(
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
             res_block = (
-                f"[Result:{tool_name}]\n"
-                f"@results\n"
-                f"<<<ToolResult>>>\n{combined_content}\n<<<EndToolResult>>>\n"
-                f"[/Result]"
+                f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]"
             )
             if wrap_tool:
                 text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
@@ -138,9 +135,7 @@ async def process_message(
                     if isinstance(parsed_args, dict):
                         for k, v in parsed_args.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_args += (
-                                f"<<<CallParameter:{k}>>>\n{val_str}\n<<<EndCallParameter>>>\n"
-                            )
+                            formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n"
                     else:
                         formatted_args += args_text
                 except orjson.JSONDecodeError:
@@ -171,7 +166,6 @@ async def process_conversation(
         while i < len(messages):
             msg = messages[i]
             if msg.role == "tool":
-                # Group consecutive tool messages
                 tool_blocks: list[str] = []
                 while i < len(messages) and messages[i].role == "tool":
                     part, part_files = await GeminiClientWrapper.process_message(
@@ -182,9 +176,7 @@ async def process_conversation(
                     i += 1
 
                 combined_tool_content = "\n".join(tool_blocks)
-                wrapped_content = (
-                    f"[function_responses]\n{combined_tool_content}\n[/function_responses]"
-                )
+                wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]"
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 25f9c9b..4172154 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -21,13 +21,10 @@
     "\nWhen you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
-    "@args\n"
-    "<<<CallParameter:arg_name>>>\n"
-    "value\n"
-    "<<<EndCallParameter>>>\n"
+    "[CallParameter:arg_name]value[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n"
-    "CRITICAL: Every argument MUST be enclosed in <<<CallParameter:arg_name>>>...<<<EndCallParameter>>>. Output as RAW text. Content inside tags can be any format.\n"
+    "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
@@ -44,11 +41,11 @@
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"(?:\\?<){3}CallParameter\\?:((?:[^>\\]|\\.)+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndCallParameter(?:\\?>){3}",
+    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"(?:\\?<){3}ToolResult(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndToolResult(?:\\?>){3}",
+    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
@@ -86,7 +83,7 @@ def normalize_llm_text(s: str) -> str:
 
 
 def unescape_llm_text(s: str) -> str:
-    """Unescape characters escaped by Gemini Web's post-processing (e.g., \\_ to _)."""
+    """Unescape characters escaped by Gemini Web's post-processing."""
     return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
 
 
@@ -240,7 +237,7 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             arguments = orjson.dumps(args_dict).decode("utf-8")
             logger.debug(f"Successfully parsed {len(args_dict)} tagged arguments for tool: {name}")
         else:
-            cleaned_raw = raw_args.replace("@args", "").strip()
+            cleaned_raw = raw_args.strip()
             if not cleaned_raw:
                 logger.debug(f"Tool '{name}' called without arguments.")
             else:

From 8fa4329c5a1483784876630e0a891e7dba781fdd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 17:11:30 +0700
Subject: [PATCH 103/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/server/chat.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 66c6d11..c31a079 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -49,6 +49,7 @@
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
+    remove_tool_call_blocks,
     strip_system_hints,
     text_from_message,
 )
@@ -221,7 +222,7 @@ def _process_llm_output(
     structured_requirement: StructuredOutputRequirement | None,
 ) -> tuple[str, str, list[Any]]:
     """
-    Common post-processing logic for Gemini output.
+    Post-process Gemini output to extract tool calls and prepare clean text for display and storage.
     Returns: (visible_text, storage_output, tool_calls)
     """
     visible_with_think, tool_calls = extract_tool_calls(raw_output_with_think)
@@ -230,7 +231,7 @@ def _process_llm_output(
 
     visible_output = visible_with_think.strip()
 
-    storage_output, _ = extract_tool_calls(raw_output_clean)
+    storage_output = remove_tool_call_blocks(raw_output_clean)
     storage_output = storage_output.strip()
 
     if structured_requirement:

From dcd7276ee41a202eb840494c986074da3a53499d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 20:36:20 +0700
Subject: [PATCH 104/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/services/client.py | 33 +--------------------------------
 app/utils/helper.py    | 35 +++++++++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 9f9ac0f..6ab80cd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,4 +1,3 @@
-import re
 from pathlib import Path
 from typing import Any, cast
 
@@ -15,13 +14,6 @@
     save_url_to_tempfile,
 )
 
-FILE_PATH_PATTERN = re.compile(
-    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
-    re.IGNORECASE,
-)
-GOOGLE_SEARCH_LINK_PATTERN = re.compile(
-    r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
-)
 _UNSET = object()
 
 
@@ -199,27 +191,4 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
-        text = normalize_llm_text(text)
-
-        def extract_file_path_from_display_text(text_content: str) -> str | None:
-            match = re.match(FILE_PATH_PATTERN, text_content)
-            if match:
-                return match.group(1)
-            return None
-
-        def replacer(match: re.Match) -> str:
-            display_text = str(match.group(1)).strip()
-            google_search_prefix = match.group(2)
-            query_part = match.group(3)
-
-            file_path = extract_file_path_from_display_text(display_text)
-
-            if file_path:
-                # If it's a file path, transform it into a self-referencing Markdown link
-                return f"[`{file_path}`]({file_path})"
-            else:
-                # Otherwise, reconstruct the original Google search link with the display_text
-                original_google_search_url = f"{google_search_prefix}{query_part}"
-                return f"[`{display_text}`]({original_google_search_url})"
-
-        return re.sub(GOOGLE_SEARCH_LINK_PATTERN, replacer, text)
+        return normalize_llm_text(text)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 4172154..ec39ebc 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -52,6 +52,16 @@
 CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
 CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
+FILE_PATH_PATTERN = re.compile(
+    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
+    re.IGNORECASE,
+)
+GOOGLE_SEARCH_LINK_PATTERN = re.compile(
+    r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
+)
+CONFLICT_START_RE = re.compile(r"<(?:\s*<){6,}")
+CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}")
+CONFLICT_END_RE = re.compile(r">(?:\s*>){6,}")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -82,9 +92,26 @@ def normalize_llm_text(s: str) -> str:
     return s
 
 
+def _strip_google_search_links(match: re.Match) -> str:
+    """Extract local Markdown link from Google Search links if applicable."""
+    display_text = str(match.group(1)).strip()
+    if FILE_PATH_PATTERN.match(display_text):
+        return f"[`{display_text}`]({display_text})"
+    return match.group(0)
+
+
 def unescape_llm_text(s: str) -> str:
-    """Unescape characters escaped by Gemini Web's post-processing."""
-    return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+    """Unescape and mend text fragments broken by Gemini Web's post-processing."""
+    if not s:
+        return ""
+
+    s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+
+    s = CONFLICT_START_RE.sub("<<<<<<<", s)
+    s = CONFLICT_SEP_RE.sub("=======", s)
+    s = CONFLICT_END_RE.sub(">>>>>>>", s)
+
+    return GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)
 
 
 def estimate_tokens(text: str | None) -> int:
@@ -235,11 +262,11 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         if arg_matches:
             args_dict = {arg_name.strip(): arg_value.strip() for arg_name, arg_value in arg_matches}
             arguments = orjson.dumps(args_dict).decode("utf-8")
-            logger.debug(f"Successfully parsed {len(args_dict)} tagged arguments for tool: {name}")
+            logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}")
         else:
             cleaned_raw = raw_args.strip()
             if not cleaned_raw:
-                logger.debug(f"Tool '{name}' called without arguments.")
+                logger.debug(f"Successfully parsed 0 arguments for tool: {name}")
             else:
                 logger.warning(
                     f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"

From 737aa3aa4ac51a377ab3687e73623c7943c14872 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 23:24:44 +0700
Subject: [PATCH 105/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/utils/helper.py | 66 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 3 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index ec39ebc..0a83f8d 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -100,18 +100,78 @@ def _strip_google_search_links(match: re.Match) -> str:
     return match.group(0)
 
 
+def _remove_injected_fences(s: str) -> str:
+    """
+    Strip anonymous Markdown code fences often injected by LLMs around
+    responses or tool calls, while preserving named blocks and all internal content.
+    """
+    if not s:
+        return ""
+
+    lines = s.splitlines()
+    out = []
+    in_fence = False
+    fence_len = 0
+    is_anonymous = False
+
+    for line in lines:
+        stripped = line.strip()
+        if stripped.startswith("```"):
+            count = 0
+            for char in stripped:
+                if char == "`":
+                    count += 1
+                else:
+                    break
+
+            lang = stripped[count:].strip()
+
+            if not in_fence:
+                in_fence = True
+                fence_len = count
+                is_anonymous = not lang
+                if not is_anonymous:
+                    out.append(line)
+                continue
+
+            if count >= fence_len:
+                if is_anonymous and lang:
+                    out.append(line)
+                    continue
+
+                if not is_anonymous:
+                    out.append(line)
+                in_fence = False
+                is_anonymous = False
+                fence_len = 0
+                continue
+
+        out.append(line)
+
+    return "\n".join(out)
+
+
 def unescape_llm_text(s: str) -> str:
-    """Unescape and mend text fragments broken by Gemini Web's post-processing."""
+    """
+    Standardize and repair LLM-generated text fragments.
+
+    Sequence:
+    1. Reverse CommonMark escapes.
+    2. Restore git conflict markers broken by web processing.
+    3. Strip injected anonymous code fences.
+    4. Process and normalize Google Search links.
+    """
     if not s:
         return ""
 
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
-
     s = CONFLICT_START_RE.sub("<<<<<<<", s)
     s = CONFLICT_SEP_RE.sub("=======", s)
     s = CONFLICT_END_RE.sub(">>>>>>>", s)
+    s = _remove_injected_fences(s)
+    s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)
 
-    return GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)
+    return s
 
 
 def estimate_tokens(text: str | None) -> int:

From 2c808955f8f761e1ab062c4d2cd5aab705ac050c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 08:55:58 +0700
Subject: [PATCH 106/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/utils/helper.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 0a83f8d..2367b72 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -61,6 +61,7 @@
 )
 CONFLICT_START_RE = re.compile(r"<(?:\s*<){6,}")
 CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}")
+CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}")
 CONFLICT_END_RE = re.compile(r">(?:\s*>){6,}")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
@@ -167,6 +168,7 @@ def unescape_llm_text(s: str) -> str:
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = CONFLICT_START_RE.sub("<<<<<<<", s)
     s = CONFLICT_SEP_RE.sub("=======", s)
+    s = CONFLICT_SEP_DASH_RE.sub("-------", s)
     s = CONFLICT_END_RE.sub(">>>>>>>", s)
     s = _remove_injected_fences(s)
     s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)

From 9b0e1d5365ba323a406f77c7452283ef7a9879a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 09:23:08 +0700
Subject: [PATCH 107/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/utils/helper.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 2367b72..e612252 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -59,10 +59,10 @@
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
     r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
-CONFLICT_START_RE = re.compile(r"<(?:\s*<){6,}")
-CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}")
-CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}")
-CONFLICT_END_RE = re.compile(r">(?:\s*>){6,}")
+CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}")
+CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=\s*(?:=\s*){6,}")
+CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—]\s*(?:[-—]\s*){6,}")
+CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -155,21 +155,24 @@ def _remove_injected_fences(s: str) -> str:
 def unescape_llm_text(s: str) -> str:
     """
     Standardize and repair LLM-generated text fragments.
+    These patches are specifically designed for complex clients like Roo Code to ensure
+    compatibility with their specialized tool protocols (e.g., apply_diff) which may be
+    mangled by Gemini Web's interface or browser auto-formatting.
 
     Sequence:
-    1. Reverse CommonMark escapes.
-    2. Restore git conflict markers broken by web processing.
+    1. Restore git conflict markers and DOUBLE any leading backslash to protect it.
+    2. Reverse CommonMark escapes (consuming one level of doubled backslashes).
     3. Strip injected anonymous code fences.
     4. Process and normalize Google Search links.
     """
     if not s:
         return ""
 
+    s = CONFLICT_START_RE.sub(r"\1\1<<<<<<<", s)
+    s = CONFLICT_SEP_RE.sub(r"\1\1=======", s)
+    s = CONFLICT_SEP_DASH_RE.sub(r"\1\1-------", s)
+    s = CONFLICT_END_RE.sub(r"\1\1>>>>>>>", s)
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
-    s = CONFLICT_START_RE.sub("<<<<<<<", s)
-    s = CONFLICT_SEP_RE.sub("=======", s)
-    s = CONFLICT_SEP_DASH_RE.sub("-------", s)
-    s = CONFLICT_END_RE.sub(">>>>>>>", s)
     s = _remove_injected_fences(s)
     s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)
 

From 95f87f6d905cdbb1352698e03f29e0821a224d31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 09:38:08 +0700
Subject: [PATCH 108/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/utils/helper.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index e612252..8262b85 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -60,8 +60,8 @@
     r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
 CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}")
-CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=\s*(?:=\s*){6,}")
-CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—]\s*(?:[-—]\s*){6,}")
+CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}")
+CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}")
 CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
@@ -169,8 +169,8 @@ def unescape_llm_text(s: str) -> str:
         return ""
 
     s = CONFLICT_START_RE.sub(r"\1\1<<<<<<<", s)
-    s = CONFLICT_SEP_RE.sub(r"\1\1=======", s)
-    s = CONFLICT_SEP_DASH_RE.sub(r"\1\1-------", s)
+    s = CONFLICT_SEP_RE.sub("=======", s)
+    s = CONFLICT_SEP_DASH_RE.sub("-------", s)
     s = CONFLICT_END_RE.sub(r"\1\1>>>>>>>", s)
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = _remove_injected_fences(s)

From 4569689ad1280aa2808927345c1a2b29e170f997 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 10:47:56 +0700
Subject: [PATCH 109/139] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 8262b85..89429c9 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -59,10 +59,10 @@
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
     r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
-CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}")
-CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}")
-CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}")
-CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}")
+CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}(?:\s*(SEARCH)\b)?", re.IGNORECASE)
+CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=(?:\s*=){6,}")
+CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—](?:\s*[-—]){6,}")
+CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}(?:\s*(REPLACE)\b)?", re.IGNORECASE)
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -168,10 +168,14 @@ def unescape_llm_text(s: str) -> str:
     if not s:
         return ""
 
-    s = CONFLICT_START_RE.sub(r"\1\1<<<<<<<", s)
-    s = CONFLICT_SEP_RE.sub("=======", s)
-    s = CONFLICT_SEP_DASH_RE.sub("-------", s)
-    s = CONFLICT_END_RE.sub(r"\1\1>>>>>>>", s)
+    s = CONFLICT_START_RE.sub(
+        lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s
+    )
+    s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s)
+    s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s)
+    s = CONFLICT_END_RE.sub(
+        lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s
+    )
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = _remove_injected_fences(s)
     s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)

From 5a65cb6a9ebb6a51a21016f5d977bbef76f23f55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 10:56:50 +0700
Subject: [PATCH 110/139] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 89429c9..ff339e1 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -154,28 +154,29 @@ def _remove_injected_fences(s: str) -> str:
 
 def unescape_llm_text(s: str) -> str:
     """
-    Standardize and repair LLM-generated text fragments.
-    These patches are specifically designed for complex clients like Roo Code to ensure
-    compatibility with their specialized tool protocols (e.g., apply_diff) which may be
-    mangled by Gemini Web's interface or browser auto-formatting.
+    Standardize and repair LLM-generated text fragments for specialized client protocols.
+    Designed to ensure compatibility with clients like Roo Code by fixing
+    mangled conflict markers, escaping issues, and injected Markdown formatting.
 
     Sequence:
-    1. Restore git conflict markers and DOUBLE any leading backslash to protect it.
-    2. Reverse CommonMark escapes (consuming one level of doubled backslashes).
-    3. Strip injected anonymous code fences.
+    1. Normalize git conflict markers (handles mangled spacing and keyword standardization).
+    2. Reverse CommonMark escapes (removes leading backslashes from structural markers).
+    3. Strip injected anonymous Markdown code fences.
     4. Process and normalize Google Search links.
     """
     if not s:
         return ""
 
-    s = CONFLICT_START_RE.sub(
-        lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s
-    )
-    s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s)
-    s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s)
-    s = CONFLICT_END_RE.sub(
-        lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s
-    )
+    if any(c in s for c in ("<", "=", ">", "-", "—")):
+        s = CONFLICT_START_RE.sub(
+            lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s
+        )
+        s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s)
+        s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s)
+        s = CONFLICT_END_RE.sub(
+            lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s
+        )
+
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = _remove_injected_fences(s)
     s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)

From 8a03c3347bce1e41bb0b694b42095aec62a9e6cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 12:18:06 +0700
Subject: [PATCH 111/139] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index ff339e1..fba276a 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -8,7 +8,7 @@
 import tempfile
 import unicodedata
 from pathlib import Path
-from urllib.parse import urlparse
+from urllib.parse import unquote, urlparse
 
 import httpx
 import orjson
@@ -56,8 +56,13 @@
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
 )
-GOOGLE_SEARCH_LINK_PATTERN = re.compile(
-    r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
+GOOGLE_SEARCH_PATTERN = re.compile(
+    r"(?P<md_start>`?\[`?)?"
+    r"(?P<text>[^]]+)?"
+    r"(?(md_start)`?]\()?"
+    r"https://www\.google\.com/search\?q=(?P<query>[^&\s\"'<>)]+)"
+    r"(?(md_start)\)?`?)",
+    re.IGNORECASE,
 )
 CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}(?:\s*(SEARCH)\b)?", re.IGNORECASE)
 CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=(?:\s*=){6,}")
@@ -93,11 +98,13 @@ def normalize_llm_text(s: str) -> str:
     return s
 
 
-def _strip_google_search_links(match: re.Match) -> str:
-    """Extract local Markdown link from Google Search links if applicable."""
-    display_text = str(match.group(1)).strip()
-    if FILE_PATH_PATTERN.match(display_text):
-        return f"[`{display_text}`]({display_text})"
+def _strip_google_search(match: re.Match) -> str:
+    """Extract raw text from Google Search links if it looks like a file path."""
+    text_to_check = match.group("text") if match.group("text") else unquote(match.group("query"))
+    text_to_check = unquote(text_to_check.strip())
+
+    if FILE_PATH_PATTERN.match(text_to_check):
+        return text_to_check
     return match.group(0)
 
 
@@ -157,12 +164,6 @@ def unescape_llm_text(s: str) -> str:
     Standardize and repair LLM-generated text fragments for specialized client protocols.
     Designed to ensure compatibility with clients like Roo Code by fixing
     mangled conflict markers, escaping issues, and injected Markdown formatting.
-
-    Sequence:
-    1. Normalize git conflict markers (handles mangled spacing and keyword standardization).
-    2. Reverse CommonMark escapes (removes leading backslashes from structural markers).
-    3. Strip injected anonymous Markdown code fences.
-    4. Process and normalize Google Search links.
     """
     if not s:
         return ""
@@ -179,7 +180,7 @@ def unescape_llm_text(s: str) -> str:
 
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = _remove_injected_fences(s)
-    s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)
+    s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s)
 
     return s
 
@@ -413,7 +414,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
             0xC5,
             0xC6,
             0xC7,
-            0xC9,
             0xCA,
             0xCB,
             0xCD,

From 7ed21323a8b51798c3b02c1ce625e94a31b603cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 12:22:21 +0700
Subject: [PATCH 112/139] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index fba276a..8faa8b9 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -414,6 +414,7 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
             0xC5,
             0xC6,
             0xC7,
+            0xC9,
             0xCA,
             0xCB,
             0xCD,

From d92bc1c43a051c818fd0810be94710580994b40f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 15:28:52 +0700
Subject: [PATCH 113/139] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/server/chat.py  |  12 +---
 app/utils/helper.py | 139 +++++++++-----------------------------------
 2 files changed, 30 insertions(+), 121 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index c31a079..3f8e0cd 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -378,17 +378,7 @@ def _build_tool_prompt(
             f"You are required to call the tool named `{target}`. Do not call any other tool."
         )
 
-    lines.append(
-        "When you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:"
-    )
-    lines.append("[ToolCalls]")
-    lines.append("[Call:tool_name]")
-    lines.append("[CallParameter:arg_name]value[/CallParameter]")
-    lines.append("[/Call]")
-    lines.append("[/ToolCalls]")
-    lines.append(
-        "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format."
-    )
+    lines.append(TOOL_WRAP_HINT.strip())
     lines.append(
         "If multiple tools are needed, list them sequentially within the same [ToolCalls] block."
     )
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 8faa8b9..28260c3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,13 +18,13 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "\nWhen you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n"
+    "\nWhen calling tools, use this EXACT protocol:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
     "[CallParameter:arg_name]value[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n"
-    "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format.\n"
+    "CRITICAL: Wrap ALL multi-line or complex values in a markdown code block (e.g., [CallParameter:arg_name]```\nvalue\n```[/CallParameter]) to prevent rendering corruption.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
@@ -64,10 +64,6 @@
     r"(?(md_start)\)?`?)",
     re.IGNORECASE,
 )
-CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}(?:\s*(SEARCH)\b)?", re.IGNORECASE)
-CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=(?:\s*=){6,}")
-CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—](?:\s*[-—]){6,}")
-CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}(?:\s*(REPLACE)\b)?", re.IGNORECASE)
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -108,78 +104,36 @@ def _strip_google_search(match: re.Match) -> str:
     return match.group(0)
 
 
-def _remove_injected_fences(s: str) -> str:
+def _strip_param_fences(s: str) -> str:
     """
-    Strip anonymous Markdown code fences often injected by LLMs around
-    responses or tool calls, while preserving named blocks and all internal content.
+    Remove one layer of outermost Markdown code fences,
+    supporting nested blocks by detecting variable fence lengths.
     """
+    s = s.strip()
     if not s:
         return ""
 
-    lines = s.splitlines()
-    out = []
-    in_fence = False
-    fence_len = 0
-    is_anonymous = False
-
-    for line in lines:
-        stripped = line.strip()
-        if stripped.startswith("```"):
-            count = 0
-            for char in stripped:
-                if char == "`":
-                    count += 1
-                else:
-                    break
-
-            lang = stripped[count:].strip()
-
-            if not in_fence:
-                in_fence = True
-                fence_len = count
-                is_anonymous = not lang
-                if not is_anonymous:
-                    out.append(line)
-                continue
+    match = re.match(r"^(?P<fence>`{3,})", s)
+    if not match or not s.endswith(match.group("fence")):
+        return s
 
-            if count >= fence_len:
-                if is_anonymous and lang:
-                    out.append(line)
-                    continue
-
-                if not is_anonymous:
-                    out.append(line)
-                in_fence = False
-                is_anonymous = False
-                fence_len = 0
-                continue
-
-        out.append(line)
+    lines = s.splitlines()
+    if len(lines) >= 2:
+        return "\n".join(lines[1:-1])
 
-    return "\n".join(out)
+    n = len(match.group("fence"))
+    return s[n:-n].strip()
 
 
 def unescape_llm_text(s: str) -> str:
     """
-    Standardize and repair LLM-generated text fragments for specialized client protocols.
-    Designed to ensure compatibility with clients like Roo Code by fixing
-    mangled conflict markers, escaping issues, and injected Markdown formatting.
+    Standardize and repair LLM-generated text fragments (unescaping, link normalization)
+    to ensure compatibility with specialized clients like Roo Code.
     """
     if not s:
         return ""
 
-    if any(c in s for c in ("<", "=", ">", "-", "—")):
-        s = CONFLICT_START_RE.sub(
-            lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s
-        )
-        s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s)
-        s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s)
-        s = CONFLICT_END_RE.sub(
-            lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s
-        )
-
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
-    s = _remove_injected_fences(s)
     s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s)
 
     return s
@@ -196,13 +150,11 @@ async def save_file_to_tempfile(
     file_in_base64: str, file_name: str = "", tempdir: Path | None = None
 ) -> Path:
     """Decode base64 file data and save to a temporary file."""
-    data = base64.b64decode(file_in_base64)
-    suffix = Path(file_name).suffix if file_name else ".bin"
-
-    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp:
-        tmp.write(data)
+    with tempfile.NamedTemporaryFile(
+        delete=False, suffix=Path(file_name).suffix if file_name else ".bin", dir=tempdir
+    ) as tmp:
+        tmp.write(base64.b64decode(file_in_base64))
         path = Path(tmp.name)
-
     return path
 
 
@@ -213,35 +165,22 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
     if url.startswith("data:image/"):
         metadata_part = url.split(",")[0]
         mime_type = metadata_part.split(":")[1].split(";")[0]
-
-        base64_data = url.split(",")[1]
-        data = base64.b64decode(base64_data)
-
-        suffix = mimetypes.guess_extension(mime_type)
-        if not suffix:
-            suffix = f".{mime_type.split('/')[1]}"
+        data = base64.b64decode(url.split(",")[1])
+        suffix = mimetypes.guess_extension(mime_type) or f".{mime_type.split('/')[1]}"
     else:
         async with httpx.AsyncClient(follow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content
             content_type = resp.headers.get("content-type")
-
             if content_type:
-                mime_type = content_type.split(";")[0].strip()
-                suffix = mimetypes.guess_extension(mime_type)
-
+                suffix = mimetypes.guess_extension(content_type.split(";")[0].strip())
             if not suffix:
-                path_url = urlparse(url).path
-                suffix = Path(path_url).suffix
-
-            if not suffix:
-                suffix = ".bin"
+                suffix = Path(urlparse(url).path).suffix or ".bin"
 
     with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp:
         tmp.write(data)
         path = Path(tmp.name)
-
     return path
 
 
@@ -249,7 +188,6 @@ def strip_tagged_blocks(text: str) -> str:
     """
     Remove ChatML role blocks (<|im_start|>role...<|im_end|>).
     Role 'tool' blocks are removed entirely; others have markers stripped but content preserved.
-    Handles both raw and escaped markers consistently.
     """
     if not text:
         return text
@@ -274,7 +212,6 @@ def strip_tagged_blocks(text: str) -> str:
 
         if role != "tool":
             result.append(text[content_start : match_end.start()])
-
         idx = match_end.end()
 
     return "".join(result)
@@ -297,7 +234,6 @@ def strip_system_hints(text: str) -> str:
         cleaned = re.sub(rf"\s*{re.escape(TOOL_HINT_LINE_END)}\.?\n?", "", cleaned)
 
     cleaned = strip_tagged_blocks(cleaned)
-
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
     cleaned = TOOL_BLOCK_RE.sub("", cleaned)
     cleaned = TOOL_CALL_RE.sub("", cleaned)
@@ -331,7 +267,10 @@ def _create_tool_call(name: str, raw_args: str) -> None:
 
         arg_matches = TAGGED_ARG_RE.findall(raw_args)
         if arg_matches:
-            args_dict = {arg_name.strip(): arg_value.strip() for arg_name, arg_value in arg_matches}
+            args_dict = {
+                arg_name.strip(): _strip_param_fences(arg_value)
+                for arg_name, arg_value in arg_matches
+            }
             arguments = orjson.dumps(args_dict).decode("utf-8")
             logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}")
         else:
@@ -360,7 +299,6 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         _create_tool_call(match.group(1), match.group(2))
 
     cleaned = strip_system_hints(text)
-
     return cleaned, tool_calls
 
 
@@ -406,21 +344,7 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
     if len(data) >= 4 and data[0:2] == b"\xff\xd8":
         idx = 2
         length = len(data)
-        sof_markers = {
-            0xC0,
-            0xC1,
-            0xC2,
-            0xC3,
-            0xC5,
-            0xC6,
-            0xC7,
-            0xC9,
-            0xCA,
-            0xCB,
-            0xCD,
-            0xCE,
-            0xCF,
-        }
+        sof_markers = {0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF}
         while idx < length:
             if data[idx] != 0xFF:
                 idx += 1
@@ -431,26 +355,21 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
                 break
             marker = data[idx]
             idx += 1
-
             if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7:
                 continue
-
             if idx + 1 >= length:
                 break
             segment_length = (data[idx] << 8) + data[idx + 1]
             idx += 2
             if segment_length < 2:
                 break
-
             if marker in sof_markers:
                 if idx + 4 < length:
                     height = (data[idx + 1] << 8) + data[idx + 2]
                     width = (data[idx + 3] << 8) + data[idx + 4]
                     return int(width), int(height)
                 break
-
             idx += segment_length - 2
-
     return None, None
 
 

From 4ecad566edbe7cb1c37ad50ecc6da034c88e209a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 16:18:28 +0700
Subject: [PATCH 114/139] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 28260c3..99af0a5 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,13 +18,15 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "\nWhen calling tools, use this EXACT protocol:\n"
+    "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
-    "[CallParameter:arg_name]value[/CallParameter]\n"
+    "[CallParameter:arg_name]\n"
+    "value\n"
+    "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n"
-    "CRITICAL: Wrap ALL multi-line or complex values in a markdown code block (e.g., [CallParameter:arg_name]```\nvalue\n```[/CallParameter]) to prevent rendering corruption.\n"
+    "CRITICAL: If 'value' is multi-line or complex, you MUST wrap it in a markdown code block within the tags (use a fence longer than any backtick sequence in the content) to prevent rendering corruption.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE

From 28378b4125e8900284814040d86f3139f2ddd644 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 16:19:54 +0700
Subject: [PATCH 115/139] Update dependencies

---
 pyproject.toml |  4 ++--
 uv.lock        | 48 ++++++++++++++++++++++++------------------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d3a1aaf..93dabab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
-    "fastapi>=0.128.7",
+    "fastapi>=0.129.0",
     "gemini-webapi>=1.19.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
@@ -31,5 +31,5 @@ indent-style = "space"
 
 [dependency-groups]
 dev = [
-    "ruff>=0.15.0",
+    "ruff>=0.15.1",
 ]
diff --git a/uv.lock b/uv.lock
index c038f53..249e84b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -65,7 +65,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.128.7"
+version = "0.129.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -74,9 +74,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a0/fc/af386750b3fd8d8828167e4c82b787a8eeca2eca5c5429c9db8bb7c70e04/fastapi-0.128.7.tar.gz", hash = "sha256:783c273416995486c155ad2c0e2b45905dedfaf20b9ef8d9f6a9124670639a24", size = 375325, upload-time = "2026-02-10T12:26:40.968Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/48/47/75f6bea02e797abff1bca968d5997793898032d9923c1935ae2efdece642/fastapi-0.129.0.tar.gz", hash = "sha256:61315cebd2e65df5f97ec298c888f9de30430dd0612d59d6480beafbc10655af", size = 375450, upload-time = "2026-02-12T13:54:52.541Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/af/1a/f983b45661c79c31be575c570d46c437a5409b67a939c1b3d8d6b3ed7a7f/fastapi-0.128.7-py3-none-any.whl", hash = "sha256:6bd9bd31cb7047465f2d3fa3ba3f33b0870b17d4eaf7cdb36d1576ab060ad662", size = 103630, upload-time = "2026-02-10T12:26:39.414Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/dd/d0ee25348ac58245ee9f90b6f3cbb666bf01f69be7e0911f9851bddbda16/fastapi-0.129.0-py3-none-any.whl", hash = "sha256:b4946880e48f462692b31c083be0432275cbfb6e2274566b1be91479cc1a84ec", size = 102950, upload-time = "2026-02-12T13:54:54.528Z" },
 ]
 
 [[package]]
@@ -106,7 +106,7 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.128.7" },
+    { name = "fastapi", specifier = ">=0.129.0" },
     { name = "gemini-webapi", specifier = ">=1.19.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
@@ -119,7 +119,7 @@ requires-dist = [
 provides-extras = ["dev"]
 
 [package.metadata.requires-dev]
-dev = [{ name = "ruff", specifier = ">=0.15.0" }]
+dev = [{ name = "ruff", specifier = ">=0.15.1" }]
 
 [[package]]
 name = "gemini-webapi"
@@ -361,27 +361,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.15.0"
+version = "0.15.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c8/39/5cee96809fbca590abea6b46c6d1c586b49663d1d2830a751cc8fc42c666/ruff-0.15.0.tar.gz", hash = "sha256:6bdea47cdbea30d40f8f8d7d69c0854ba7c15420ec75a26f463290949d7f7e9a", size = 4524893, upload-time = "2026-02-03T17:53:35.357Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/dc/4e6ac71b511b141cf626357a3946679abeba4cf67bc7cc5a17920f31e10d/ruff-0.15.1.tar.gz", hash = "sha256:c590fe13fb57c97141ae975c03a1aedb3d3156030cabd740d6ff0b0d601e203f", size = 4540855, upload-time = "2026-02-12T23:09:09.998Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/88/3fd1b0aa4b6330d6aaa63a285bc96c9f71970351579152d231ed90914586/ruff-0.15.0-py3-none-linux_armv6l.whl", hash = "sha256:aac4ebaa612a82b23d45964586f24ae9bc23ca101919f5590bdb368d74ad5455", size = 10354332, upload-time = "2026-02-03T17:52:54.892Z" },
-    { url = "https://files.pythonhosted.org/packages/72/f6/62e173fbb7eb75cc29fe2576a1e20f0a46f671a2587b5f604bfb0eaf5f6f/ruff-0.15.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:dcd4be7cc75cfbbca24a98d04d0b9b36a270d0833241f776b788d59f4142b14d", size = 10767189, upload-time = "2026-02-03T17:53:19.778Z" },
-    { url = "https://files.pythonhosted.org/packages/99/e4/968ae17b676d1d2ff101d56dc69cf333e3a4c985e1ec23803df84fc7bf9e/ruff-0.15.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d747e3319b2bce179c7c1eaad3d884dc0a199b5f4d5187620530adf9105268ce", size = 10075384, upload-time = "2026-02-03T17:53:29.241Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/bf/9843c6044ab9e20af879c751487e61333ca79a2c8c3058b15722386b8cae/ruff-0.15.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:650bd9c56ae03102c51a5e4b554d74d825ff3abe4db22b90fd32d816c2e90621", size = 10481363, upload-time = "2026-02-03T17:52:43.332Z" },
-    { url = "https://files.pythonhosted.org/packages/55/d9/4ada5ccf4cd1f532db1c8d44b6f664f2208d3d93acbeec18f82315e15193/ruff-0.15.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6664b7eac559e3048223a2da77769c2f92b43a6dfd4720cef42654299a599c9", size = 10187736, upload-time = "2026-02-03T17:53:00.522Z" },
-    { url = "https://files.pythonhosted.org/packages/86/e2/f25eaecd446af7bb132af0a1d5b135a62971a41f5366ff41d06d25e77a91/ruff-0.15.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f811f97b0f092b35320d1556f3353bf238763420ade5d9e62ebd2b73f2ff179", size = 10968415, upload-time = "2026-02-03T17:53:15.705Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/dc/f06a8558d06333bf79b497d29a50c3a673d9251214e0d7ec78f90b30aa79/ruff-0.15.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:761ec0a66680fab6454236635a39abaf14198818c8cdf691e036f4bc0f406b2d", size = 11809643, upload-time = "2026-02-03T17:53:23.031Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/45/0ece8db2c474ad7df13af3a6d50f76e22a09d078af63078f005057ca59eb/ruff-0.15.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:940f11c2604d317e797b289f4f9f3fa5555ffe4fb574b55ed006c3d9b6f0eb78", size = 11234787, upload-time = "2026-02-03T17:52:46.432Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/d9/0e3a81467a120fd265658d127db648e4d3acfe3e4f6f5d4ea79fac47e587/ruff-0.15.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcbca3d40558789126da91d7ef9a7c87772ee107033db7191edefa34e2c7f1b4", size = 11112797, upload-time = "2026-02-03T17:52:49.274Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/cb/8c0b3b0c692683f8ff31351dfb6241047fa873a4481a76df4335a8bff716/ruff-0.15.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9a121a96db1d75fa3eb39c4539e607f628920dd72ff1f7c5ee4f1b768ac62d6e", size = 11033133, upload-time = "2026-02-03T17:53:33.105Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/5e/23b87370cf0f9081a8c89a753e69a4e8778805b8802ccfe175cc410e50b9/ruff-0.15.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5298d518e493061f2eabd4abd067c7e4fb89e2f63291c94332e35631c07c3662", size = 10442646, upload-time = "2026-02-03T17:53:06.278Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/9a/3c94de5ce642830167e6d00b5c75aacd73e6347b4c7fc6828699b150a5ee/ruff-0.15.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:afb6e603d6375ff0d6b0cee563fa21ab570fd15e65c852cb24922cef25050cf1", size = 10195750, upload-time = "2026-02-03T17:53:26.084Z" },
-    { url = "https://files.pythonhosted.org/packages/30/15/e396325080d600b436acc970848d69df9c13977942fb62bb8722d729bee8/ruff-0.15.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:77e515f6b15f828b94dc17d2b4ace334c9ddb7d9468c54b2f9ed2b9c1593ef16", size = 10676120, upload-time = "2026-02-03T17:53:09.363Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/c9/229a23d52a2983de1ad0fb0ee37d36e0257e6f28bfd6b498ee2c76361874/ruff-0.15.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6f6e80850a01eb13b3e42ee0ebdf6e4497151b48c35051aab51c101266d187a3", size = 11201636, upload-time = "2026-02-03T17:52:57.281Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/b0/69adf22f4e24f3677208adb715c578266842e6e6a3cc77483f48dd999ede/ruff-0.15.0-py3-none-win32.whl", hash = "sha256:238a717ef803e501b6d51e0bdd0d2c6e8513fe9eec14002445134d3907cd46c3", size = 10465945, upload-time = "2026-02-03T17:53:12.591Z" },
-    { url = "https://files.pythonhosted.org/packages/51/ad/f813b6e2c97e9b4598be25e94a9147b9af7e60523b0cb5d94d307c15229d/ruff-0.15.0-py3-none-win_amd64.whl", hash = "sha256:dd5e4d3301dc01de614da3cdffc33d4b1b96fb89e45721f1598e5532ccf78b18", size = 11564657, upload-time = "2026-02-03T17:52:51.893Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/b0/2d823f6e77ebe560f4e397d078487e8d52c1516b331e3521bc75db4272ca/ruff-0.15.0-py3-none-win_arm64.whl", hash = "sha256:c480d632cc0ca3f0727acac8b7d053542d9e114a462a145d0b00e7cd658c515a", size = 10865753, upload-time = "2026-02-03T17:53:03.014Z" },
+    { url = "https://files.pythonhosted.org/packages/23/bf/e6e4324238c17f9d9120a9d60aa99a7daaa21204c07fcd84e2ef03bb5fd1/ruff-0.15.1-py3-none-linux_armv6l.whl", hash = "sha256:b101ed7cf4615bda6ffe65bdb59f964e9f4a0d3f85cbf0e54f0ab76d7b90228a", size = 10367819, upload-time = "2026-02-12T23:09:03.598Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/ea/c8f89d32e7912269d38c58f3649e453ac32c528f93bb7f4219258be2e7ed/ruff-0.15.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:939c995e9277e63ea632cc8d3fae17aa758526f49a9a850d2e7e758bfef46602", size = 10798618, upload-time = "2026-02-12T23:09:22.928Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/0f/1d0d88bc862624247d82c20c10d4c0f6bb2f346559d8af281674cf327f15/ruff-0.15.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1d83466455fdefe60b8d9c8df81d3c1bbb2115cede53549d3b522ce2bc703899", size = 10148518, upload-time = "2026-02-12T23:08:58.339Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/c8/291c49cefaa4a9248e986256df2ade7add79388fe179e0691be06fae6f37/ruff-0.15.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9457e3c3291024866222b96108ab2d8265b477e5b1534c7ddb1810904858d16", size = 10518811, upload-time = "2026-02-12T23:09:31.865Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/1a/f5707440e5ae43ffa5365cac8bbb91e9665f4a883f560893829cf16a606b/ruff-0.15.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:92c92b003e9d4f7fbd33b1867bb15a1b785b1735069108dfc23821ba045b29bc", size = 10196169, upload-time = "2026-02-12T23:09:17.306Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/ff/26ddc8c4da04c8fd3ee65a89c9fb99eaa5c30394269d424461467be2271f/ruff-0.15.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fe5c41ab43e3a06778844c586251eb5a510f67125427625f9eb2b9526535779", size = 10990491, upload-time = "2026-02-12T23:09:25.503Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/00/50920cb385b89413f7cdb4bb9bc8fc59c1b0f30028d8bccc294189a54955/ruff-0.15.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66a6dd6df4d80dc382c6484f8ce1bcceb55c32e9f27a8b94c32f6c7331bf14fb", size = 11843280, upload-time = "2026-02-12T23:09:19.88Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/6d/2f5cad8380caf5632a15460c323ae326f1e1a2b5b90a6ee7519017a017ca/ruff-0.15.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a4a42cbb8af0bda9bcd7606b064d7c0bc311a88d141d02f78920be6acb5aa83", size = 11274336, upload-time = "2026-02-12T23:09:14.907Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/1d/5f56cae1d6c40b8a318513599b35ea4b075d7dc1cd1d04449578c29d1d75/ruff-0.15.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ab064052c31dddada35079901592dfba2e05f5b1e43af3954aafcbc1096a5b2", size = 11137288, upload-time = "2026-02-12T23:09:07.475Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/20/6f8d7d8f768c93b0382b33b9306b3b999918816da46537d5a61635514635/ruff-0.15.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:5631c940fe9fe91f817a4c2ea4e81f47bee3ca4aa646134a24374f3c19ad9454", size = 11070681, upload-time = "2026-02-12T23:08:55.43Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/67/d640ac76069f64cdea59dba02af2e00b1fa30e2103c7f8d049c0cff4cafd/ruff-0.15.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:68138a4ba184b4691ccdc39f7795c66b3c68160c586519e7e8444cf5a53e1b4c", size = 10486401, upload-time = "2026-02-12T23:09:27.927Z" },
+    { url = "https://files.pythonhosted.org/packages/65/3d/e1429f64a3ff89297497916b88c32a5cc88eeca7e9c787072d0e7f1d3e1e/ruff-0.15.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:518f9af03bfc33c03bdb4cb63fabc935341bb7f54af500f92ac309ecfbba6330", size = 10197452, upload-time = "2026-02-12T23:09:12.147Z" },
+    { url = "https://files.pythonhosted.org/packages/78/83/e2c3bade17dad63bf1e1c2ffaf11490603b760be149e1419b07049b36ef2/ruff-0.15.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:da79f4d6a826caaea95de0237a67e33b81e6ec2e25fc7e1993a4015dffca7c61", size = 10693900, upload-time = "2026-02-12T23:09:34.418Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/27/fdc0e11a813e6338e0706e8b39bb7a1d61ea5b36873b351acee7e524a72a/ruff-0.15.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3dd86dccb83cd7d4dcfac303ffc277e6048600dfc22e38158afa208e8bf94a1f", size = 11227302, upload-time = "2026-02-12T23:09:36.536Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/58/ac864a75067dcbd3b95be5ab4eb2b601d7fbc3d3d736a27e391a4f92a5c1/ruff-0.15.1-py3-none-win32.whl", hash = "sha256:660975d9cb49b5d5278b12b03bb9951d554543a90b74ed5d366b20e2c57c2098", size = 10462555, upload-time = "2026-02-12T23:09:29.899Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/5e/d4ccc8a27ecdb78116feac4935dfc39d1304536f4296168f91ed3ec00cd2/ruff-0.15.1-py3-none-win_amd64.whl", hash = "sha256:c820fef9dd5d4172a6570e5721704a96c6679b80cf7be41659ed439653f62336", size = 11599956, upload-time = "2026-02-12T23:09:01.157Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/07/5bda6a85b220c64c65686bc85bd0bbb23b29c62b3a9f9433fa55f17cda93/ruff-0.15.1-py3-none-win_arm64.whl", hash = "sha256:5ff7d5f0f88567850f45081fac8f4ec212be8d0b963e385c3f7d0d2eb4899416", size = 10874604, upload-time = "2026-02-12T23:09:05.515Z" },
 ]
 
 [[package]]

From 3fcd01ead021e9536f1cbdaf4c4f0c2b2a0047e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 17:28:44 +0700
Subject: [PATCH 116/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/server/chat.py  |  9 ---------
 app/utils/helper.py | 10 +++++++---
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 3f8e0cd..114eedf 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -379,15 +379,6 @@ def _build_tool_prompt(
         )
 
     lines.append(TOOL_WRAP_HINT.strip())
-    lines.append(
-        "If multiple tools are needed, list them sequentially within the same [ToolCalls] block."
-    )
-    lines.append(
-        "If no tool call is needed, provide a normal response and NEVER use the [ToolCalls] tag."
-    )
-    lines.append(
-        "Note: Tool results are returned in a [ToolResults] block using [ToolResult] tags."
-    )
 
     return "\n".join(lines)
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 99af0a5..68066a3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,15 +18,19 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n"
+    "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
     "[CallParameter:arg_name]\n"
     "value\n"
     "[/CallParameter]\n"
     "[/Call]\n"
-    "[/ToolCalls]\n"
-    "CRITICAL: If 'value' is multi-line or complex, you MUST wrap it in a markdown code block within the tags (use a fence longer than any backtick sequence in the content) to prevent rendering corruption.\n"
+    "[/ToolCalls]\n\n"
+    "CRITICAL: If 'value' contains ANY newlines or special characters, you MUST wrap it in a markdown code block (triple backticks or longer) within the tags. "
+    "Failure to wrap multi-line content will result in a protocol rejection. Use a fence longer than any backtick sequence in the content.\n\n"
+    "Multiple tool calls MUST be listed sequentially within the same [ToolCalls] block.\n"
+    "If no tool is needed, respond naturally and NEVER use any [ToolCalls] or [Call] tags.\n"
+    "Note: Tool results are returned in [ToolResults] blocks."
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE

From ef24704b3d302f76233999002717a44d4a88c444 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 17:42:59 +0700
Subject: [PATCH 117/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 68066a3..a2b4c23 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,7 +18,7 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n\n"
+    "When calling tools, respond ONLY with a single [ToolCalls] block. NO other text allowed. EXACT syntax:\n\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
     "[CallParameter:arg_name]\n"
@@ -26,11 +26,9 @@
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
-    "CRITICAL: If 'value' contains ANY newlines or special characters, you MUST wrap it in a markdown code block (triple backticks or longer) within the tags. "
-    "Failure to wrap multi-line content will result in a protocol rejection. Use a fence longer than any backtick sequence in the content.\n\n"
-    "Multiple tool calls MUST be listed sequentially within the same [ToolCalls] block.\n"
-    "If no tool is needed, respond naturally and NEVER use any [ToolCalls] or [Call] tags.\n"
-    "Note: Tool results are returned in [ToolResults] blocks."
+    "CRITICAL: If 'value' has ANY newline, you MUST wrap it in a code block (```). "
+    "The tags MUST contain ONLY the code block, no other text. Use a fence longer than any backticks in content.\n"
+    "Multiple calls: list [Call] blocks inside [ToolCalls]. No tools: respond naturally, NO tags."
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE

From 4edf4cdedc6c093a426da1554948fed074705edd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 18:08:52 +0700
Subject: [PATCH 118/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index a2b4c23..0efa460 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,17 +18,21 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "When calling tools, respond ONLY with a single [ToolCalls] block. NO other text allowed. EXACT syntax:\n\n"
+    "SYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
+    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
+    "2. For ALL parameters, the value MUST be wrapped in a markdown code block inside the tags to prevent rendering corruption.\n"
+    "3. Use a markdown fence (backticks) longer than any backtick sequence in the content (e.g., use ```` if content has ```).\n\n"
+    "EXACT SYNTAX TEMPLATE:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
     "[CallParameter:arg_name]\n"
+    "```\n"
     "value\n"
+    "```\n"
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
-    "CRITICAL: If 'value' has ANY newline, you MUST wrap it in a code block (```). "
-    "The tags MUST contain ONLY the code block, no other text. Use a fence longer than any backticks in content.\n"
-    "Multiple calls: list [Call] blocks inside [ToolCalls]. No tools: respond naturally, NO tags."
+    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags."
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE

From dcadabb8341a4e94cc9a44a6c9e9350e70365a7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 18:23:20 +0700
Subject: [PATCH 119/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/server/chat.py  | 2 +-
 app/utils/helper.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 114eedf..881f7c9 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -378,7 +378,7 @@ def _build_tool_prompt(
             f"You are required to call the tool named `{target}`. Do not call any other tool."
         )
 
-    lines.append(TOOL_WRAP_HINT.strip())
+    lines.append(TOOL_WRAP_HINT)
 
     return "\n".join(lines)
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 0efa460..1e70c3b 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,7 +18,7 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "SYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
+    "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
     "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
     "2. For ALL parameters, the value MUST be wrapped in a markdown code block inside the tags to prevent rendering corruption.\n"
     "3. Use a markdown fence (backticks) longer than any backtick sequence in the content (e.g., use ```` if content has ```).\n\n"
@@ -32,7 +32,7 @@
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
-    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags."
+    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE

From 83904142bb97676f09d93725ee5d4ad19f5d7cd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 19:56:04 +0700
Subject: [PATCH 120/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 1e70c3b..ae96f05 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -20,8 +20,8 @@
 TOOL_WRAP_HINT = (
     "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
     "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
-    "2. For ALL parameters, the value MUST be wrapped in a markdown code block inside the tags to prevent rendering corruption.\n"
-    "3. Use a markdown fence (backticks) longer than any backtick sequence in the content (e.g., use ```` if content has ```).\n\n"
+    "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
+    "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
     "EXACT SYNTAX TEMPLATE:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
@@ -32,6 +32,7 @@
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
+    "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
     "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
 TOOL_BLOCK_RE = re.compile(

From ce43d63ce2b2b6e042fa122d96d405c683407ef2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 22:26:00 +0700
Subject: [PATCH 121/139] Refactor: Remove all escape logic handlers.

- Change tool call tags to snake_case.
---
 app/server/chat.py     | 61 +++++++++++++++----------------
 app/services/client.py | 32 ++++++++---------
 app/services/lmdb.py   | 22 ++++++------
 app/utils/helper.py    | 81 ++++++++++++++++++++----------------------
 app/utils/logging.py   |  2 +-
 5 files changed, 93 insertions(+), 105 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 881f7c9..98277d6 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -121,18 +121,18 @@ def _calculate_usage(
 ) -> tuple[int, int, int]:
     """Calculate prompt, completion and total tokens consistently."""
     prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
-    tool_args_text = ""
+    tool_params_text = ""
     if tool_calls:
         for call in tool_calls:
             if hasattr(call, "function"):
-                tool_args_text += call.function.arguments or ""
+                tool_params_text += call.function.arguments or ""
             elif isinstance(call, dict):
-                tool_args_text += call.get("function", {}).get("arguments", "")
+                tool_params_text += call.get("function", {}).get("arguments", "")
 
     completion_basis = assistant_text or ""
-    if tool_args_text:
+    if tool_params_text:
         completion_basis = (
-            f"{completion_basis}\n{tool_args_text}" if completion_basis else tool_args_text
+            f"{completion_basis}\n{tool_params_text}" if completion_basis else tool_params_text
         )
 
     completion_tokens = estimate_tokens(completion_basis)
@@ -343,7 +343,7 @@ def _build_tool_prompt(
     tools: list[Tool],
     tool_choice: str | ToolChoiceFunction | None,
 ) -> str:
-    """Generate a system prompt describing available tools and the PascalCase protocol."""
+    """Generate a system prompt describing available tools and the snake_case protocol."""
     if not tools:
         return ""
 
@@ -359,10 +359,10 @@ def _build_tool_prompt(
             schema_text = orjson.dumps(function.parameters, option=orjson.OPT_SORT_KEYS).decode(
                 "utf-8"
             )
-            lines.append("Arguments JSON schema:")
+            lines.append("Parameters JSON schema:")
             lines.append(schema_text)
         else:
-            lines.append("Arguments JSON schema: {}")
+            lines.append("Parameters JSON schema: {}")
 
     if tool_choice == "none":
         lines.append(
@@ -760,28 +760,28 @@ def __init__(self):
 
         self.STATE_MARKERS = {
             "TOOL": {
-                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
-                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
+                "starts": ["[tool_calls]"],
+                "ends": ["[/tool_calls]"],
             },
             "ORPHAN": {
-                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
-                "ends": ["[/Call]", "\\[/Call\\]"],
+                "starts": ["[call:"],
+                "ends": ["[/call]"],
             },
             "RESP": {
-                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
-                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
+                "starts": ["[tool_results]"],
+                "ends": ["[/tool_results]"],
             },
-            "ARG": {
-                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
-                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
+            "PARAM": {
+                "starts": ["[call_parameter:"],
+                "ends": ["[/call_parameter]"],
             },
             "RESULT": {
-                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
-                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
+                "starts": ["[tool_result]"],
+                "ends": ["[/tool_result]"],
             },
             "TAG": {
-                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
-                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
+                "starts": ["<|im_start|>"],
+                "ends": ["<|im_end|>"],
             },
         }
 
@@ -794,15 +794,10 @@ def __init__(self):
 
         self.ORPHAN_ENDS = [
             "<|im_end|>",
-            "\\<|im\\_end|\\>",
-            "[/Call]",
-            "\\[/Call\\]",
-            "[/ToolCalls]",
-            "\\[/ToolCalls\\]",
-            "[/CallParameter]",
-            "\\[/CallParameter\\]",
-            "[/ToolResult]",
-            "\\[/ToolResult\\]",
+            "[/call]",
+            "[/tool_calls]",
+            "[/call_parameter]",
+            "[/tool_result]",
         ]
 
         self.WATCH_MARKERS = []
@@ -876,8 +871,8 @@ def process(self, chunk: str) -> str:
                         self.buffer = self.buffer[-max_end_len:]
                     break
 
-            elif self.state == "IN_ARG":
-                cfg = self.STATE_MARKERS["ARG"]
+            elif self.state == "IN_PARAM":
+                cfg = self.STATE_MARKERS["PARAM"]
                 found_idx, found_len = -1, 0
                 for p in cfg["ends"]:
                     idx = buf_low.find(p.lower())
@@ -1003,7 +998,7 @@ def process(self, chunk: str) -> str:
     def flush(self) -> str:
         """Release remaining buffer content and perform final cleanup at stream end."""
         res = ""
-        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
+        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_PARAM", "IN_RESULT"):
             res = ""
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
diff --git a/app/services/client.py b/app/services/client.py
index 6ab80cd..1c826e5 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -70,8 +70,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a Message into Gemini API format using the PascalCase technical protocol.
-        Extracts text, handles files, and appends ToolCalls/ToolResults blocks.
+        Process a Message into Gemini API format using the snake_case technical protocol.
+        Extracts text, handles files, and appends tool_calls/tool_results blocks.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -109,34 +109,32 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
-            res_block = (
-                f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]"
-            )
+            res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]"
             if wrap_tool:
-                text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
+                text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"]
             else:
                 text_fragments = [res_block]
 
         if message.tool_calls:
             tool_blocks: list[str] = []
             for call in message.tool_calls:
-                args_text = call.function.arguments.strip()
-                formatted_args = "@args\n"
+                params_text = call.function.arguments.strip()
+                formatted_params = ""
                 try:
-                    parsed_args = orjson.loads(args_text)
-                    if isinstance(parsed_args, dict):
-                        for k, v in parsed_args.items():
+                    parsed_params = orjson.loads(params_text)
+                    if isinstance(parsed_params, dict):
+                        for k, v in parsed_params.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n"
+                            formatted_params += f"[call_parameter:{k}]{val_str}[/call_parameter]\n"
                     else:
-                        formatted_args += args_text
+                        formatted_params += params_text
                 except orjson.JSONDecodeError:
-                    formatted_args += args_text
+                    formatted_params += params_text
 
-                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]")
+                tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]")
 
             if tool_blocks:
-                tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"
+                tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -168,7 +166,7 @@ async def process_conversation(
                     i += 1
 
                 combined_tool_content = "\n".join(tool_blocks)
-                wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]"
+                wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]"
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index ad92bbf..2f59662 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -99,17 +99,17 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str:
     if message.tool_calls:
         calls_data = []
         for tc in message.tool_calls:
-            args = tc.function.arguments or "{}"
+            params = tc.function.arguments or "{}"
             try:
-                parsed = orjson.loads(args)
-                canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
+                parsed = orjson.loads(params)
+                canon_params = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
             except orjson.JSONDecodeError:
-                canon_args = args
+                canon_params = params
 
             calls_data.append(
                 {
                     "name": tc.function.name,
-                    "arguments": canon_args,
+                    "arguments": canon_params,
                 }
             )
         calls_data.sort(key=lambda x: (x["name"], x["arguments"]))
@@ -149,7 +149,7 @@ def __init__(
         """
         Initialize LMDB store.
 
-        Args:
+        Params:
             db_path: Path to LMDB database directory
             max_db_size: Maximum database size in bytes (default: 256 MB)
             retention_days: Number of days to retain conversations (default: 14, 0 disables cleanup)
@@ -194,7 +194,7 @@ def _get_transaction(self, write: bool = False):
         """
         Context manager for LMDB transactions.
 
-        Args:
+        Params:
             write: Whether the transaction should be writable.
         """
         if not self._env:
@@ -265,7 +265,7 @@ def store(
         """
         Store a conversation model in LMDB.
 
-        Args:
+        Params:
             conv: Conversation model to store
             custom_key: Optional custom key, if not provided, hash will be used
 
@@ -313,7 +313,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
         """
         Retrieve conversation data by key.
 
-        Args:
+        Params:
             key: Storage key (hash or custom key)
 
         Returns:
@@ -342,7 +342,7 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
         Search conversation data by message list.
         Tries raw matching, then sanitized matching, and finally fuzzy matching.
 
-        Args:
+        Params:
             model: Model name
             messages: List of messages to match
 
@@ -382,7 +382,7 @@ def _find_by_message_list(
         """
         Internal find implementation based on a message list.
 
-        Args:
+        Params:
             model: Model name
             messages: Message list to hash
             fuzzy: Whether to use fuzzy hashing
diff --git a/app/utils/helper.py b/app/utils/helper.py
index ae96f05..752aed9 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -19,48 +19,45 @@
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
     "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
-    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
+    "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n"
     "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
     "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
     "EXACT SYNTAX TEMPLATE:\n"
-    "[ToolCalls]\n"
-    "[Call:tool_name]\n"
-    "[CallParameter:arg_name]\n"
+    "[tool_calls]\n"
+    "[call:tool_name]\n"
+    "[call_parameter:parameter_name]\n"
     "```\n"
     "value\n"
     "```\n"
-    "[/CallParameter]\n"
-    "[/Call]\n"
-    "[/ToolCalls]\n\n"
+    "[/call_parameter]\n"
+    "[/call]\n"
+    "[/tool_calls]\n\n"
     "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
-    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
-)
-TOOL_BLOCK_RE = re.compile(
-    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
+    "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
+TOOL_BLOCK_RE = re.compile(r"\[tool_calls]\s*(.*?)\s*\[/tool_calls]", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
-    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
+    r"\[call:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
+    r"\[tool_results]\s*(.*?)\s*\[/tool_results]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
+    r"\[result:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/result]",
     re.DOTALL | re.IGNORECASE,
 )
-TAGGED_ARG_RE = re.compile(
-    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
+TAGGED_PARAM_RE = re.compile(
+    r"\[call_parameter:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call_parameter]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
+    r"\[tool_result]\s*(.*?)\s*\[/tool_result]",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
-CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
-CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
-COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
+CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>", re.IGNORECASE)
+CHATML_START_RE = re.compile(r"<\|im_start\|>\s*(\w+)\s*\n?", re.IGNORECASE)
+CHATML_END_RE = re.compile(r"<\|im_end\|>", re.IGNORECASE)
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -134,15 +131,14 @@ def _strip_param_fences(s: str) -> str:
     return s[n:-n].strip()
 
 
-def unescape_llm_text(s: str) -> str:
+def _repair_param_value(s: str) -> str:
     """
-    Standardize and repair LLM-generated text fragments (unescaping, link normalization)
+    Standardize and repair LLM-generated parameter values
     to ensure compatibility with specialized clients like Roo Code.
     """
     if not s:
         return ""
 
-    s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s)
 
     return s
@@ -248,7 +244,7 @@ def strip_system_hints(text: str) -> str:
     cleaned = TOOL_CALL_RE.sub("", cleaned)
     cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
     cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
-    cleaned = TAGGED_ARG_RE.sub("", cleaned)
+    cleaned = TAGGED_PARAM_RE.sub("", cleaned)
     cleaned = TAGGED_RESULT_RE.sub("", cleaned)
 
     return cleaned
@@ -257,38 +253,37 @@ def strip_system_hints(text: str) -> str:
 def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:
     """
     Extract tool metadata and return text stripped of technical markers.
-    Arguments are parsed into JSON and assigned deterministic call IDs.
+    Parameters are parsed into JSON and assigned deterministic call IDs.
     """
     if not text:
         return text, []
 
     tool_calls: list[ToolCall] = []
 
-    def _create_tool_call(name: str, raw_args: str) -> None:
+    def _create_tool_call(name: str, raw_params: str) -> None:
         if not extract:
             return
+
+        name = name.strip()
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        name = unescape_llm_text(name.strip())
-        raw_args = unescape_llm_text(raw_args)
-
-        arg_matches = TAGGED_ARG_RE.findall(raw_args)
-        if arg_matches:
-            args_dict = {
-                arg_name.strip(): _strip_param_fences(arg_value)
-                for arg_name, arg_value in arg_matches
+        param_matches = TAGGED_PARAM_RE.findall(raw_params)
+        if param_matches:
+            params_dict = {
+                param_name.strip(): _repair_param_value(_strip_param_fences(param_value))
+                for param_name, param_value in param_matches
             }
-            arguments = orjson.dumps(args_dict).decode("utf-8")
-            logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}")
+            arguments = orjson.dumps(params_dict).decode("utf-8")
+            logger.debug(f"Successfully parsed {len(params_dict)} parameters for tool: {name}")
         else:
-            cleaned_raw = raw_args.strip()
+            cleaned_raw = raw_params.strip()
             if not cleaned_raw:
-                logger.debug(f"Successfully parsed 0 arguments for tool: {name}")
+                logger.debug(f"Successfully parsed 0 parameters for tool: {name}")
             else:
                 logger.warning(
-                    f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"
+                    f"Malformed parameters for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"
                 )
             arguments = "{}"
 
@@ -323,7 +318,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
 
 
 def text_from_message(message: Message) -> str:
-    """Concatenate text and tool arguments from a message for token estimation."""
+    """Concatenate text and tool parameters from a message for token estimation."""
     base_text = ""
     if isinstance(message.content, str):
         base_text = message.content
@@ -335,8 +330,8 @@ def text_from_message(message: Message) -> str:
         base_text = ""
 
     if message.tool_calls:
-        tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls)
-        base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text
+        tool_param_text = "".join(call.function.arguments or "" for call in message.tool_calls)
+        base_text = f"{base_text}\n{tool_param_text}" if base_text else tool_param_text
 
     return base_text
 
diff --git a/app/utils/logging.py b/app/utils/logging.py
index 87fcc7f..da417f1 100644
--- a/app/utils/logging.py
+++ b/app/utils/logging.py
@@ -15,7 +15,7 @@ def setup_logging(
     """
     Setup loguru logging configuration to unify all project logging output
 
-    Args:
+    Params:
         level: Log level
         diagnose: Whether to enable diagnostic information
         backtrace: Whether to enable backtrace information

From 8792948642f510ca294323c12e90232cb1926e06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 22:35:39 +0700
Subject: [PATCH 122/139] Refactor: Remove all escape logic handlers.

- Change tool call tags to snake_case.
---
 app/services/client.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 1c826e5..90474ad 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -125,11 +125,14 @@ async def process_message(
                     if isinstance(parsed_params, dict):
                         for k, v in parsed_params.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_params += f"[call_parameter:{k}]{val_str}[/call_parameter]\n"
+                            # Wrap value in triple backticks to match the required protocol hint
+                            formatted_params += (
+                                f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n"
+                            )
                     else:
-                        formatted_params += params_text
+                        formatted_params += f"```\n{params_text}\n```\n"
                 except orjson.JSONDecodeError:
-                    formatted_params += params_text
+                    formatted_params += f"```\n{params_text}\n```\n"
 
                 tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]")
 

From 5482e0cd51fa8051a92dafb061fa4677e108b10f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 22:56:47 +0700
Subject: [PATCH 123/139] Refactor: Remove all escape logic handlers.

- Change tool call tags to snake_case.
---
 app/services/client.py |  1 -
 app/utils/helper.py    | 12 ++++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 90474ad..9a2742b 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -125,7 +125,6 @@ async def process_message(
                     if isinstance(parsed_params, dict):
                         for k, v in parsed_params.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            # Wrap value in triple backticks to match the required protocol hint
                             formatted_params += (
                                 f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n"
                             )
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 752aed9..3576667 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -58,6 +58,7 @@
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>", re.IGNORECASE)
 CHATML_START_RE = re.compile(r"<\|im_start\|>\s*(\w+)\s*\n?", re.IGNORECASE)
 CHATML_END_RE = re.compile(r"<\|im_end\|>", re.IGNORECASE)
+COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -131,14 +132,15 @@ def _strip_param_fences(s: str) -> str:
     return s[n:-n].strip()
 
 
-def _repair_param_value(s: str) -> str:
+def repair_param_value(s: str) -> str:
     """
-    Standardize and repair LLM-generated parameter values
+    Standardize and repair LLM-generated values (unescaping, link normalization)
     to ensure compatibility with specialized clients like Roo Code.
     """
     if not s:
         return ""
 
+    s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s)
 
     return s
@@ -264,7 +266,9 @@ def _create_tool_call(name: str, raw_params: str) -> None:
         if not extract:
             return
 
-        name = name.strip()
+        name = repair_param_value(name.strip())
+        raw_params = repair_param_value(raw_params)
+
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return
@@ -272,7 +276,7 @@ def _create_tool_call(name: str, raw_params: str) -> None:
         param_matches = TAGGED_PARAM_RE.findall(raw_params)
         if param_matches:
             params_dict = {
-                param_name.strip(): _repair_param_value(_strip_param_fences(param_value))
+                param_name.strip(): _strip_param_fences(param_value)
                 for param_name, param_value in param_matches
             }
             arguments = orjson.dumps(params_dict).decode("utf-8")

From b324aef75d5a868d9ea409dffa2adc173f69c63b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 23:10:53 +0700
Subject: [PATCH 124/139] Revert "Refactor: Remove all escape logic handlers."

This reverts commit ce43d63c
---
 app/server/chat.py     | 61 ++++++++++++++++---------------
 app/services/client.py | 34 +++++++++---------
 app/services/lmdb.py   | 22 ++++++------
 app/utils/helper.py    | 81 +++++++++++++++++++++---------------------
 app/utils/logging.py   |  2 +-
 5 files changed, 103 insertions(+), 97 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 98277d6..881f7c9 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -121,18 +121,18 @@ def _calculate_usage(
 ) -> tuple[int, int, int]:
     """Calculate prompt, completion and total tokens consistently."""
     prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
-    tool_params_text = ""
+    tool_args_text = ""
     if tool_calls:
         for call in tool_calls:
             if hasattr(call, "function"):
-                tool_params_text += call.function.arguments or ""
+                tool_args_text += call.function.arguments or ""
             elif isinstance(call, dict):
-                tool_params_text += call.get("function", {}).get("arguments", "")
+                tool_args_text += call.get("function", {}).get("arguments", "")
 
     completion_basis = assistant_text or ""
-    if tool_params_text:
+    if tool_args_text:
         completion_basis = (
-            f"{completion_basis}\n{tool_params_text}" if completion_basis else tool_params_text
+            f"{completion_basis}\n{tool_args_text}" if completion_basis else tool_args_text
         )
 
     completion_tokens = estimate_tokens(completion_basis)
@@ -343,7 +343,7 @@ def _build_tool_prompt(
     tools: list[Tool],
     tool_choice: str | ToolChoiceFunction | None,
 ) -> str:
-    """Generate a system prompt describing available tools and the snake_case protocol."""
+    """Generate a system prompt describing available tools and the PascalCase protocol."""
     if not tools:
         return ""
 
@@ -359,10 +359,10 @@ def _build_tool_prompt(
             schema_text = orjson.dumps(function.parameters, option=orjson.OPT_SORT_KEYS).decode(
                 "utf-8"
             )
-            lines.append("Parameters JSON schema:")
+            lines.append("Arguments JSON schema:")
             lines.append(schema_text)
         else:
-            lines.append("Parameters JSON schema: {}")
+            lines.append("Arguments JSON schema: {}")
 
     if tool_choice == "none":
         lines.append(
@@ -760,28 +760,28 @@ def __init__(self):
 
         self.STATE_MARKERS = {
             "TOOL": {
-                "starts": ["[tool_calls]"],
-                "ends": ["[/tool_calls]"],
+                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
+                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
             },
             "ORPHAN": {
-                "starts": ["[call:"],
-                "ends": ["[/call]"],
+                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
+                "ends": ["[/Call]", "\\[/Call\\]"],
             },
             "RESP": {
-                "starts": ["[tool_results]"],
-                "ends": ["[/tool_results]"],
+                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
+                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
             },
-            "PARAM": {
-                "starts": ["[call_parameter:"],
-                "ends": ["[/call_parameter]"],
+            "ARG": {
+                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
+                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
             },
             "RESULT": {
-                "starts": ["[tool_result]"],
-                "ends": ["[/tool_result]"],
+                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
+                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
             },
             "TAG": {
-                "starts": ["<|im_start|>"],
-                "ends": ["<|im_end|>"],
+                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
+                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
             },
         }
 
@@ -794,10 +794,15 @@ def __init__(self):
 
         self.ORPHAN_ENDS = [
             "<|im_end|>",
-            "[/call]",
-            "[/tool_calls]",
-            "[/call_parameter]",
-            "[/tool_result]",
+            "\\<|im\\_end|\\>",
+            "[/Call]",
+            "\\[/Call\\]",
+            "[/ToolCalls]",
+            "\\[/ToolCalls\\]",
+            "[/CallParameter]",
+            "\\[/CallParameter\\]",
+            "[/ToolResult]",
+            "\\[/ToolResult\\]",
         ]
 
         self.WATCH_MARKERS = []
@@ -871,8 +876,8 @@ def process(self, chunk: str) -> str:
                         self.buffer = self.buffer[-max_end_len:]
                     break
 
-            elif self.state == "IN_PARAM":
-                cfg = self.STATE_MARKERS["PARAM"]
+            elif self.state == "IN_ARG":
+                cfg = self.STATE_MARKERS["ARG"]
                 found_idx, found_len = -1, 0
                 for p in cfg["ends"]:
                     idx = buf_low.find(p.lower())
@@ -998,7 +1003,7 @@ def process(self, chunk: str) -> str:
     def flush(self) -> str:
         """Release remaining buffer content and perform final cleanup at stream end."""
         res = ""
-        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_PARAM", "IN_RESULT"):
+        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
             res = ""
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
diff --git a/app/services/client.py b/app/services/client.py
index 9a2742b..6ab80cd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -70,8 +70,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a Message into Gemini API format using the snake_case technical protocol.
-        Extracts text, handles files, and appends tool_calls/tool_results blocks.
+        Process a Message into Gemini API format using the PascalCase technical protocol.
+        Extracts text, handles files, and appends ToolCalls/ToolResults blocks.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -109,34 +109,34 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
-            res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]"
+            res_block = (
+                f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]"
+            )
             if wrap_tool:
-                text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"]
+                text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
             else:
                 text_fragments = [res_block]
 
         if message.tool_calls:
             tool_blocks: list[str] = []
             for call in message.tool_calls:
-                params_text = call.function.arguments.strip()
-                formatted_params = ""
+                args_text = call.function.arguments.strip()
+                formatted_args = "@args\n"
                 try:
-                    parsed_params = orjson.loads(params_text)
-                    if isinstance(parsed_params, dict):
-                        for k, v in parsed_params.items():
+                    parsed_args = orjson.loads(args_text)
+                    if isinstance(parsed_args, dict):
+                        for k, v in parsed_args.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_params += (
-                                f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n"
-                            )
+                            formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n"
                     else:
-                        formatted_params += f"```\n{params_text}\n```\n"
+                        formatted_args += args_text
                 except orjson.JSONDecodeError:
-                    formatted_params += f"```\n{params_text}\n```\n"
+                    formatted_args += args_text
 
-                tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]")
+                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]")
 
             if tool_blocks:
-                tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]"
+                tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -168,7 +168,7 @@ async def process_conversation(
                     i += 1
 
                 combined_tool_content = "\n".join(tool_blocks)
-                wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]"
+                wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]"
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 2f59662..ad92bbf 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -99,17 +99,17 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str:
     if message.tool_calls:
         calls_data = []
         for tc in message.tool_calls:
-            params = tc.function.arguments or "{}"
+            args = tc.function.arguments or "{}"
             try:
-                parsed = orjson.loads(params)
-                canon_params = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
+                parsed = orjson.loads(args)
+                canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
             except orjson.JSONDecodeError:
-                canon_params = params
+                canon_args = args
 
             calls_data.append(
                 {
                     "name": tc.function.name,
-                    "arguments": canon_params,
+                    "arguments": canon_args,
                 }
             )
         calls_data.sort(key=lambda x: (x["name"], x["arguments"]))
@@ -149,7 +149,7 @@ def __init__(
         """
         Initialize LMDB store.
 
-        Params:
+        Args:
             db_path: Path to LMDB database directory
             max_db_size: Maximum database size in bytes (default: 256 MB)
             retention_days: Number of days to retain conversations (default: 14, 0 disables cleanup)
@@ -194,7 +194,7 @@ def _get_transaction(self, write: bool = False):
         """
         Context manager for LMDB transactions.
 
-        Params:
+        Args:
             write: Whether the transaction should be writable.
         """
         if not self._env:
@@ -265,7 +265,7 @@ def store(
         """
         Store a conversation model in LMDB.
 
-        Params:
+        Args:
             conv: Conversation model to store
             custom_key: Optional custom key, if not provided, hash will be used
 
@@ -313,7 +313,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
         """
         Retrieve conversation data by key.
 
-        Params:
+        Args:
             key: Storage key (hash or custom key)
 
         Returns:
@@ -342,7 +342,7 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
         Search conversation data by message list.
         Tries raw matching, then sanitized matching, and finally fuzzy matching.
 
-        Params:
+        Args:
             model: Model name
             messages: List of messages to match
 
@@ -382,7 +382,7 @@ def _find_by_message_list(
         """
         Internal find implementation based on a message list.
 
-        Params:
+        Args:
             model: Model name
             messages: Message list to hash
             fuzzy: Whether to use fuzzy hashing
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 3576667..ae96f05 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -19,45 +19,47 @@
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
     "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
-    "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n"
+    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
     "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
     "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
     "EXACT SYNTAX TEMPLATE:\n"
-    "[tool_calls]\n"
-    "[call:tool_name]\n"
-    "[call_parameter:parameter_name]\n"
+    "[ToolCalls]\n"
+    "[Call:tool_name]\n"
+    "[CallParameter:arg_name]\n"
     "```\n"
     "value\n"
     "```\n"
-    "[/call_parameter]\n"
-    "[/call]\n"
-    "[/tool_calls]\n\n"
+    "[/CallParameter]\n"
+    "[/Call]\n"
+    "[/ToolCalls]\n\n"
     "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
-    "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n"
+    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
+)
+TOOL_BLOCK_RE = re.compile(
+    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
 )
-TOOL_BLOCK_RE = re.compile(r"\[tool_calls]\s*(.*?)\s*\[/tool_calls]", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
-    r"\[call:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE
+    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\[tool_results]\s*(.*?)\s*\[/tool_results]",
+    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\[result:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/result]",
+    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
     re.DOTALL | re.IGNORECASE,
 )
-TAGGED_PARAM_RE = re.compile(
-    r"\[call_parameter:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call_parameter]",
+TAGGED_ARG_RE = re.compile(
+    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"\[tool_result]\s*(.*?)\s*\[/tool_result]",
+    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>", re.IGNORECASE)
-CHATML_START_RE = re.compile(r"<\|im_start\|>\s*(\w+)\s*\n?", re.IGNORECASE)
-CHATML_END_RE = re.compile(r"<\|im_end\|>", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
+CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
+CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
@@ -132,9 +134,9 @@ def _strip_param_fences(s: str) -> str:
     return s[n:-n].strip()
 
 
-def repair_param_value(s: str) -> str:
+def unescape_llm_text(s: str) -> str:
     """
-    Standardize and repair LLM-generated values (unescaping, link normalization)
+    Standardize and repair LLM-generated text fragments (unescaping, link normalization)
     to ensure compatibility with specialized clients like Roo Code.
     """
     if not s:
@@ -246,7 +248,7 @@ def strip_system_hints(text: str) -> str:
     cleaned = TOOL_CALL_RE.sub("", cleaned)
     cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
     cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
-    cleaned = TAGGED_PARAM_RE.sub("", cleaned)
+    cleaned = TAGGED_ARG_RE.sub("", cleaned)
     cleaned = TAGGED_RESULT_RE.sub("", cleaned)
 
     return cleaned
@@ -255,39 +257,38 @@ def strip_system_hints(text: str) -> str:
 def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:
     """
     Extract tool metadata and return text stripped of technical markers.
-    Parameters are parsed into JSON and assigned deterministic call IDs.
+    Arguments are parsed into JSON and assigned deterministic call IDs.
     """
     if not text:
         return text, []
 
     tool_calls: list[ToolCall] = []
 
-    def _create_tool_call(name: str, raw_params: str) -> None:
+    def _create_tool_call(name: str, raw_args: str) -> None:
         if not extract:
             return
-
-        name = repair_param_value(name.strip())
-        raw_params = repair_param_value(raw_params)
-
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        param_matches = TAGGED_PARAM_RE.findall(raw_params)
-        if param_matches:
-            params_dict = {
-                param_name.strip(): _strip_param_fences(param_value)
-                for param_name, param_value in param_matches
+        name = unescape_llm_text(name.strip())
+        raw_args = unescape_llm_text(raw_args)
+
+        arg_matches = TAGGED_ARG_RE.findall(raw_args)
+        if arg_matches:
+            args_dict = {
+                arg_name.strip(): _strip_param_fences(arg_value)
+                for arg_name, arg_value in arg_matches
             }
-            arguments = orjson.dumps(params_dict).decode("utf-8")
-            logger.debug(f"Successfully parsed {len(params_dict)} parameters for tool: {name}")
+            arguments = orjson.dumps(args_dict).decode("utf-8")
+            logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}")
         else:
-            cleaned_raw = raw_params.strip()
+            cleaned_raw = raw_args.strip()
             if not cleaned_raw:
-                logger.debug(f"Successfully parsed 0 parameters for tool: {name}")
+                logger.debug(f"Successfully parsed 0 arguments for tool: {name}")
             else:
                 logger.warning(
-                    f"Malformed parameters for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"
+                    f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"
                 )
             arguments = "{}"
 
@@ -322,7 +323,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
 
 
 def text_from_message(message: Message) -> str:
-    """Concatenate text and tool parameters from a message for token estimation."""
+    """Concatenate text and tool arguments from a message for token estimation."""
     base_text = ""
     if isinstance(message.content, str):
         base_text = message.content
@@ -334,8 +335,8 @@ def text_from_message(message: Message) -> str:
         base_text = ""
 
     if message.tool_calls:
-        tool_param_text = "".join(call.function.arguments or "" for call in message.tool_calls)
-        base_text = f"{base_text}\n{tool_param_text}" if base_text else tool_param_text
+        tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls)
+        base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text
 
     return base_text
 
diff --git a/app/utils/logging.py b/app/utils/logging.py
index da417f1..87fcc7f 100644
--- a/app/utils/logging.py
+++ b/app/utils/logging.py
@@ -15,7 +15,7 @@ def setup_logging(
     """
     Setup loguru logging configuration to unify all project logging output
 
-    Params:
+    Args:
         level: Log level
         diagnose: Whether to enable diagnostic information
         backtrace: Whether to enable backtrace information

From 8ef108d4827c29c617fbb968f2c09d7fe8d83fcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 23:11:36 +0700
Subject: [PATCH 125/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure

---
 app/services/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/services/client.py b/app/services/client.py
index 6ab80cd..0b2aea5 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -121,7 +121,7 @@ async def process_message(
             tool_blocks: list[str] = []
             for call in message.tool_calls:
                 args_text = call.function.arguments.strip()
-                formatted_args = "@args\n"
+                formatted_args = ""
                 try:
                     parsed_args = orjson.loads(args_text)
                     if isinstance(parsed_args, dict):

From 30043e585650c11876312b6dadc2a52eaaacdf60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 23:14:45 +0700
Subject: [PATCH 126/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure

---
 app/utils/helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index ae96f05..0b91993 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -25,7 +25,7 @@
     "EXACT SYNTAX TEMPLATE:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
-    "[CallParameter:arg_name]\n"
+    "[CallParameter:parameter_name]\n"
     "```\n"
     "value\n"
     "```\n"

From bc888d1fbb8a076074486293531441553c956028 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 23:22:26 +0700
Subject: [PATCH 127/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure

---
 app/services/client.py | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 0b2aea5..70dfce9 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -120,20 +120,25 @@ async def process_message(
         if message.tool_calls:
             tool_blocks: list[str] = []
             for call in message.tool_calls:
-                args_text = call.function.arguments.strip()
-                formatted_args = ""
-                try:
-                    parsed_args = orjson.loads(args_text)
-                    if isinstance(parsed_args, dict):
-                        for k, v in parsed_args.items():
-                            val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n"
-                    else:
-                        formatted_args += args_text
-                except orjson.JSONDecodeError:
-                    formatted_args += args_text
-
-                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]")
+                params_text = call.function.arguments.strip()
+                formatted_params = ""
+                if params_text:
+                    try:
+                        parsed_params = orjson.loads(params_text)
+                        if isinstance(parsed_params, dict):
+                            for k, v in parsed_params.items():
+                                val_str = (
+                                    v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
+                                )
+                                formatted_params += (
+                                    f"[CallParameter:{k}]\n```\n{val_str}\n```\n[/CallParameter]\n"
+                                )
+                        else:
+                            formatted_params += f"```\n{params_text}\n```\n"
+                    except orjson.JSONDecodeError:
+                        formatted_params += f"```\n{params_text}\n```\n"
+
+                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_params}[/Call]")
 
             if tool_blocks:
                 tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"

From d5349a096f9564d99b7939c6a32fae1193db0b8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 23:29:23 +0700
Subject: [PATCH 128/139] Refactor: Rewrite the function call format to match
 the client's complex argument structure

---
 app/utils/helper.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 0b91993..82bb562 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -134,7 +134,7 @@ def _strip_param_fences(s: str) -> str:
     return s[n:-n].strip()
 
 
-def unescape_llm_text(s: str) -> str:
+def _repair_param_value(s: str) -> str:
     """
     Standardize and repair LLM-generated text fragments (unescaping, link normalization)
     to ensure compatibility with specialized clients like Roo Code.
@@ -271,8 +271,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        name = unescape_llm_text(name.strip())
-        raw_args = unescape_llm_text(raw_args)
+        name = _repair_param_value(name.strip())
+        raw_args = _repair_param_value(raw_args)
 
         arg_matches = TAGGED_ARG_RE.findall(raw_args)
         if arg_matches:

From 7e217e917778211ad002511d1323739ed2b0e293 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 00:28:49 +0700
Subject: [PATCH 129/139] Reattempt changing tool call tags to `snake_case`.

---
 app/server/chat.py     | 50 ++++++++++++++++++++++++------------------
 app/services/client.py | 20 +++++++----------
 app/utils/helper.py    | 40 +++++++++++++++++++--------------
 3 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 881f7c9..1211c99 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -760,28 +760,32 @@ def __init__(self):
 
         self.STATE_MARKERS = {
             "TOOL": {
-                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
-                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
+                "starts": ["[tool_calls]", "\\[tool\\_calls\\]"],
+                "ends": ["[/tool_calls]", "\\[\\/tool\\_calls\\]"],
             },
             "ORPHAN": {
-                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
-                "ends": ["[/Call]", "\\[/Call\\]"],
+                "starts": ["[call:", "\\[call\\:"],
+                "ends": ["[/call]", "\\[\\/call\\]"],
             },
             "RESP": {
-                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
-                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
+                "starts": ["[tool_results]", "\\[tool\\_results\\]"],
+                "ends": ["[/tool_results]", "\\[\\/tool\\_results\\]"],
             },
             "ARG": {
-                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
-                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
+                "starts": ["[call_parameter:", "\\[call\\_parameter\\:"],
+                "ends": ["[/call_parameter]", "\\[\\/call\\_parameter\\]"],
             },
             "RESULT": {
-                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
-                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
+                "starts": ["[tool_result]", "\\[tool\\_result\\]"],
+                "ends": ["[/tool_result]", "\\[\\/tool\\_result\\]"],
+            },
+            "ITEM": {
+                "starts": ["[result:", "\\[result\\:"],
+                "ends": ["[/result]", "\\[\\/result\\]"],
             },
             "TAG": {
-                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
-                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
+                "starts": ["<|im_start|>", "\\<\\|im\\_start\\|\\>"],
+                "ends": ["<|im_end|>", "\\<\\|im\\_end\\|\\>"],
             },
         }
 
@@ -794,15 +798,19 @@ def __init__(self):
 
         self.ORPHAN_ENDS = [
             "<|im_end|>",
-            "\\<|im\\_end|\\>",
-            "[/Call]",
-            "\\[/Call\\]",
-            "[/ToolCalls]",
-            "\\[/ToolCalls\\]",
-            "[/CallParameter]",
-            "\\[/CallParameter\\]",
-            "[/ToolResult]",
-            "\\[/ToolResult\\]",
+            "\\<\\|im\\_end\\|\\>",
+            "[/call]",
+            "\\[\\/call\\]",
+            "[/tool_calls]",
+            "\\[\\/tool\\_calls\\]",
+            "[/call_parameter]",
+            "\\[\\/call\\_parameter\\]",
+            "[/tool_result]",
+            "\\[\\/tool\\_result\\]",
+            "[/tool_results]",
+            "\\[\\/tool\\_results\\]",
+            "[/result]",
+            "\\[\\/result\\]",
         ]
 
         self.WATCH_MARKERS = []
diff --git a/app/services/client.py b/app/services/client.py
index 70dfce9..05e7415 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -70,8 +70,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a Message into Gemini API format using the PascalCase technical protocol.
-        Extracts text, handles files, and appends ToolCalls/ToolResults blocks.
+        Process a Message into Gemini API format using the snake_case technical protocol.
+        Extracts text, handles files, and appends tool_calls/tool_results blocks.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -109,11 +109,9 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
-            res_block = (
-                f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]"
-            )
+            res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]"
             if wrap_tool:
-                text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
+                text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"]
             else:
                 text_fragments = [res_block]
 
@@ -130,18 +128,16 @@ async def process_message(
                                 val_str = (
                                     v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
                                 )
-                                formatted_params += (
-                                    f"[CallParameter:{k}]\n```\n{val_str}\n```\n[/CallParameter]\n"
-                                )
+                                formatted_params += f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n"
                         else:
                             formatted_params += f"```\n{params_text}\n```\n"
                     except orjson.JSONDecodeError:
                         formatted_params += f"```\n{params_text}\n```\n"
 
-                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_params}[/Call]")
+                tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]")
 
             if tool_blocks:
-                tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"
+                tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -173,7 +169,7 @@ async def process_conversation(
                     i += 1
 
                 combined_tool_content = "\n".join(tool_blocks)
-                wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]"
+                wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]"
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 82bb562..a1292d3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -19,47 +19,53 @@
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
     "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
-    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
+    "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n"
     "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
     "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
     "EXACT SYNTAX TEMPLATE:\n"
-    "[ToolCalls]\n"
-    "[Call:tool_name]\n"
-    "[CallParameter:parameter_name]\n"
+    "[tool_calls]\n"
+    "[call:tool_name]\n"
+    "[call_parameter:parameter_name]\n"
     "```\n"
     "value\n"
     "```\n"
-    "[/CallParameter]\n"
-    "[/Call]\n"
-    "[/ToolCalls]\n\n"
+    "[/call_parameter]\n"
+    "[/call]\n"
+    "[/tool_calls]\n\n"
     "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
-    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
+    "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
+    r"(?:\[tool_calls]|\\\[tool\\_calls\\])\s*(.*?)\s*(?:\[/tool_calls]|\\\[\\/tool\\_calls\\])",
+    re.DOTALL | re.IGNORECASE,
 )
 TOOL_CALL_RE = re.compile(
-    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
+    r"(?:\[call:|\\\[call\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/call]|\\\[\\/call\\])",
+    re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
+    r"(?:\[tool_results]|\\\[tool\\_results\\])\s*(.*?)\s*(?:\[/tool_results]|\\\[\\/tool\\_results\\])",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
+    r"(?:\[result:|\\\[result\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/result]|\\\[\\/result\\])",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
+    r"(?:\[call_parameter:|\\\[call\\_parameter\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/call_parameter]|\\\[\\/call\\_parameter\\])",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
+    r"(?:\[tool_result]|\\\[tool\\_result\\])\s*(.*?)\s*(?:\[/tool_result]|\\\[\\/tool\\_result\\])",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
-CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
-CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(
+    r"<\|im_(?:start|end)\|>|\\<\\\|im\\_(?:start|end)\\\|\\>", re.IGNORECASE
+)
+CHATML_START_RE = re.compile(
+    r"(?:<\|im_start\|>|\\<\\\|im\\_start\\\|\\>)\s*(\w+)\s*\n?", re.IGNORECASE
+)
+CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",

From fe30a5d47001c4956f9e64a3eda4cf4c7c4fc9db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 00:51:42 +0700
Subject: [PATCH 130/139] Revert "Reattempt changing tool call tags to
 `snake_case`."

This reverts commit 7e217e917778211ad002511d1323739ed2b0e293.
---
 app/server/chat.py     | 50 ++++++++++++++++++------------------------
 app/services/client.py | 20 ++++++++++-------
 app/utils/helper.py    | 40 ++++++++++++++-------------------
 3 files changed, 50 insertions(+), 60 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 1211c99..881f7c9 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -760,32 +760,28 @@ def __init__(self):
 
         self.STATE_MARKERS = {
             "TOOL": {
-                "starts": ["[tool_calls]", "\\[tool\\_calls\\]"],
-                "ends": ["[/tool_calls]", "\\[\\/tool\\_calls\\]"],
+                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
+                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
             },
             "ORPHAN": {
-                "starts": ["[call:", "\\[call\\:"],
-                "ends": ["[/call]", "\\[\\/call\\]"],
+                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
+                "ends": ["[/Call]", "\\[/Call\\]"],
             },
             "RESP": {
-                "starts": ["[tool_results]", "\\[tool\\_results\\]"],
-                "ends": ["[/tool_results]", "\\[\\/tool\\_results\\]"],
+                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
+                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
             },
             "ARG": {
-                "starts": ["[call_parameter:", "\\[call\\_parameter\\:"],
-                "ends": ["[/call_parameter]", "\\[\\/call\\_parameter\\]"],
+                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
+                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
             },
             "RESULT": {
-                "starts": ["[tool_result]", "\\[tool\\_result\\]"],
-                "ends": ["[/tool_result]", "\\[\\/tool\\_result\\]"],
-            },
-            "ITEM": {
-                "starts": ["[result:", "\\[result\\:"],
-                "ends": ["[/result]", "\\[\\/result\\]"],
+                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
+                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
             },
             "TAG": {
-                "starts": ["<|im_start|>", "\\<\\|im\\_start\\|\\>"],
-                "ends": ["<|im_end|>", "\\<\\|im\\_end\\|\\>"],
+                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
+                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
             },
         }
 
@@ -798,19 +794,15 @@ def __init__(self):
 
         self.ORPHAN_ENDS = [
             "<|im_end|>",
-            "\\<\\|im\\_end\\|\\>",
-            "[/call]",
-            "\\[\\/call\\]",
-            "[/tool_calls]",
-            "\\[\\/tool\\_calls\\]",
-            "[/call_parameter]",
-            "\\[\\/call\\_parameter\\]",
-            "[/tool_result]",
-            "\\[\\/tool\\_result\\]",
-            "[/tool_results]",
-            "\\[\\/tool\\_results\\]",
-            "[/result]",
-            "\\[\\/result\\]",
+            "\\<|im\\_end|\\>",
+            "[/Call]",
+            "\\[/Call\\]",
+            "[/ToolCalls]",
+            "\\[/ToolCalls\\]",
+            "[/CallParameter]",
+            "\\[/CallParameter\\]",
+            "[/ToolResult]",
+            "\\[/ToolResult\\]",
         ]
 
         self.WATCH_MARKERS = []
diff --git a/app/services/client.py b/app/services/client.py
index 05e7415..70dfce9 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -70,8 +70,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a Message into Gemini API format using the snake_case technical protocol.
-        Extracts text, handles files, and appends tool_calls/tool_results blocks.
+        Process a Message into Gemini API format using the PascalCase technical protocol.
+        Extracts text, handles files, and appends ToolCalls/ToolResults blocks.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -109,9 +109,11 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
-            res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]"
+            res_block = (
+                f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]"
+            )
             if wrap_tool:
-                text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"]
+                text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
             else:
                 text_fragments = [res_block]
 
@@ -128,16 +130,18 @@ async def process_message(
                                 val_str = (
                                     v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
                                 )
-                                formatted_params += f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n"
+                                formatted_params += (
+                                    f"[CallParameter:{k}]\n```\n{val_str}\n```\n[/CallParameter]\n"
+                                )
                         else:
                             formatted_params += f"```\n{params_text}\n```\n"
                     except orjson.JSONDecodeError:
                         formatted_params += f"```\n{params_text}\n```\n"
 
-                tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]")
+                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_params}[/Call]")
 
             if tool_blocks:
-                tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]"
+                tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -169,7 +173,7 @@ async def process_conversation(
                     i += 1
 
                 combined_tool_content = "\n".join(tool_blocks)
-                wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]"
+                wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]"
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
diff --git a/app/utils/helper.py b/app/utils/helper.py
index a1292d3..82bb562 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -19,53 +19,47 @@
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
     "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
-    "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n"
+    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
     "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
     "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
     "EXACT SYNTAX TEMPLATE:\n"
-    "[tool_calls]\n"
-    "[call:tool_name]\n"
-    "[call_parameter:parameter_name]\n"
+    "[ToolCalls]\n"
+    "[Call:tool_name]\n"
+    "[CallParameter:parameter_name]\n"
     "```\n"
     "value\n"
     "```\n"
-    "[/call_parameter]\n"
-    "[/call]\n"
-    "[/tool_calls]\n\n"
+    "[/CallParameter]\n"
+    "[/Call]\n"
+    "[/ToolCalls]\n\n"
     "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
-    "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n"
+    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"(?:\[tool_calls]|\\\[tool\\_calls\\])\s*(.*?)\s*(?:\[/tool_calls]|\\\[\\/tool\\_calls\\])",
-    re.DOTALL | re.IGNORECASE,
+    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
 )
 TOOL_CALL_RE = re.compile(
-    r"(?:\[call:|\\\[call\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/call]|\\\[\\/call\\])",
-    re.DOTALL | re.IGNORECASE,
+    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"(?:\[tool_results]|\\\[tool\\_results\\])\s*(.*?)\s*(?:\[/tool_results]|\\\[\\/tool\\_results\\])",
+    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"(?:\[result:|\\\[result\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/result]|\\\[\\/result\\])",
+    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"(?:\[call_parameter:|\\\[call\\_parameter\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/call_parameter]|\\\[\\/call\\_parameter\\])",
+    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"(?:\[tool_result]|\\\[tool\\_result\\])\s*(.*?)\s*(?:\[/tool_result]|\\\[\\/tool\\_result\\])",
+    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(
-    r"<\|im_(?:start|end)\|>|\\<\\\|im\\_(?:start|end)\\\|\\>", re.IGNORECASE
-)
-CHATML_START_RE = re.compile(
-    r"(?:<\|im_start\|>|\\<\\\|im\\_start\\\|\\>)\s*(\w+)\s*\n?", re.IGNORECASE
-)
-CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
+CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
+CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",

From 93e9ccdad4a55e1ed99cb2037f45aeba10e32226 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 01:18:56 +0700
Subject: [PATCH 131/139] Refactor: Handle all escape tags

---
 app/server/chat.py   |  38 +++++++++-------
 app/services/lmdb.py | 105 ++++++++++++++++++++++---------------------
 app/utils/helper.py  |  35 ++++++++++-----
 3 files changed, 101 insertions(+), 77 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 881f7c9..934091b 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -270,7 +270,7 @@ def _persist_conversation(
             tool_calls=tool_calls or None,
         )
         full_history = [*messages, current_assistant_message]
-        cleaned_history = db.sanitize_assistant_messages(full_history)
+        cleaned_history = db.sanitize_messages(full_history)
 
         conv = ConversationInStore(
             model=model_name,
@@ -761,27 +761,31 @@ def __init__(self):
         self.STATE_MARKERS = {
             "TOOL": {
                 "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
-                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
+                "ends": ["[/ToolCalls]", "\\[\\/ToolCalls\\]"],
             },
             "ORPHAN": {
-                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
-                "ends": ["[/Call]", "\\[/Call\\]"],
+                "starts": ["[Call:", "\\[Call\\:"],
+                "ends": ["[/Call]", "\\[\\/Call\\]"],
             },
             "RESP": {
                 "starts": ["[ToolResults]", "\\[ToolResults\\]"],
-                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
+                "ends": ["[/ToolResults]", "\\[\\/ToolResults\\]"],
             },
             "ARG": {
-                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
-                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
+                "starts": ["[CallParameter:", "\\[CallParameter\\:"],
+                "ends": ["[/CallParameter]", "\\[\\/CallParameter\\]"],
             },
             "RESULT": {
                 "starts": ["[ToolResult]", "\\[ToolResult\\]"],
-                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
+                "ends": ["[/ToolResult]", "\\[\\/ToolResult\\]"],
+            },
+            "ITEM": {
+                "starts": ["[Result:", "\\[Result\\:"],
+                "ends": ["[/Result]", "\\[\\/Result\\]"],
             },
             "TAG": {
-                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
-                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
+                "starts": ["<|im_start|>", "\\<\\|im\\_start\\|\\>"],
+                "ends": ["<|im_end|>", "\\<\\|im\\_end\\|\\>"],
             },
         }
 
@@ -794,15 +798,19 @@ def __init__(self):
 
         self.ORPHAN_ENDS = [
             "<|im_end|>",
-            "\\<|im\\_end|\\>",
+            "\\<\\|im\\_end\\|\\>",
             "[/Call]",
-            "\\[/Call\\]",
+            "\\[\\/Call\\]",
             "[/ToolCalls]",
-            "\\[/ToolCalls\\]",
+            "\\[\\/ToolCalls\\]",
             "[/CallParameter]",
-            "\\[/CallParameter\\]",
+            "\\[\\/CallParameter\\]",
             "[/ToolResult]",
-            "\\[/ToolResult\\]",
+            "\\[\\/ToolResult\\]",
+            "[/ToolResults]",
+            "\\[\\/ToolResults\\]",
+            "[/Result]",
+            "\\[\\/Result\\]",
         ]
 
         self.WATCH_MARKERS = []
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index ad92bbf..dd4197a 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -16,6 +16,8 @@
     extract_tool_calls,
     normalize_llm_text,
     remove_tool_call_blocks,
+    strip_system_hints,
+    unescape_text,
 )
 from ..utils.singleton import Singleton
 
@@ -38,6 +40,7 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
         return None
 
     text = normalize_llm_text(text)
+    text = unescape_text(text)
 
     text = LMDBConversationStore.remove_think_tags(text)
     text = remove_tool_call_blocks(text)
@@ -589,63 +592,61 @@ def remove_think_tags(text: str) -> str:
         return cleaned_content.strip()
 
     @staticmethod
-    def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
-        """Clean assistant messages of internal markers and move tool calls to metadata."""
+    def sanitize_messages(messages: list[Message]) -> list[Message]:
+        """Clean all messages of internal markers, hints and normalize tool calls."""
         cleaned_messages = []
         for msg in messages:
-            if msg.role == "assistant":
-                if isinstance(msg.content, str):
-                    text = LMDBConversationStore.remove_think_tags(msg.content)
-                    tool_calls = msg.tool_calls
-                    if not tool_calls:
-                        text, tool_calls = extract_tool_calls(text)
-                    else:
-                        text = remove_tool_call_blocks(text).strip()
-
-                    normalized_content = text.strip() or None
-
-                    if normalized_content != msg.content or tool_calls != msg.tool_calls:
-                        cleaned_msg = msg.model_copy(
+            if isinstance(msg.content, str):
+                text = LMDBConversationStore.remove_think_tags(msg.content)
+                tool_calls = msg.tool_calls
+
+                if msg.role == "assistant" and not tool_calls:
+                    text, tool_calls = extract_tool_calls(text)
+                else:
+                    text = strip_system_hints(text)
+
+                normalized_content = text.strip() or None
+
+                if normalized_content != msg.content or tool_calls != msg.tool_calls:
+                    cleaned_msg = msg.model_copy(
+                        update={
+                            "content": normalized_content,
+                            "tool_calls": tool_calls or None,
+                        }
+                    )
+                    cleaned_messages.append(cleaned_msg)
+                else:
+                    cleaned_messages.append(msg)
+            elif isinstance(msg.content, list):
+                new_content = []
+                all_extracted_calls = list(msg.tool_calls or [])
+                changed = False
+
+                for item in msg.content:
+                    if isinstance(item, ContentItem) and item.type == "text" and item.text:
+                        text = LMDBConversationStore.remove_think_tags(item.text)
+                        if msg.role == "assistant" and not msg.tool_calls:
+                            text, extracted = extract_tool_calls(text)
+                            if extracted:
+                                all_extracted_calls.extend(extracted)
+                                changed = True
+                        else:
+                            text = strip_system_hints(text)
+
+                        if text != item.text:
+                            changed = True
+                            item = item.model_copy(update={"text": text.strip() or None})
+                    new_content.append(item)
+
+                if changed:
+                    cleaned_messages.append(
+                        msg.model_copy(
                             update={
-                                "content": normalized_content,
-                                "tool_calls": tool_calls or None,
+                                "content": new_content,
+                                "tool_calls": all_extracted_calls or None,
                             }
                         )
-                        cleaned_messages.append(cleaned_msg)
-                    else:
-                        cleaned_messages.append(msg)
-                elif isinstance(msg.content, list):
-                    new_content = []
-                    all_extracted_calls = list(msg.tool_calls or [])
-                    changed = False
-
-                    for item in msg.content:
-                        if isinstance(item, ContentItem) and item.type == "text" and item.text:
-                            text = LMDBConversationStore.remove_think_tags(item.text)
-                            if not msg.tool_calls:
-                                text, extracted = extract_tool_calls(text)
-                                if extracted:
-                                    all_extracted_calls.extend(extracted)
-                                    changed = True
-                            else:
-                                text = remove_tool_call_blocks(text).strip()
-
-                            if text != item.text:
-                                changed = True
-                                item = item.model_copy(update={"text": text.strip() or None})
-                        new_content.append(item)
-
-                    if changed:
-                        cleaned_messages.append(
-                            msg.model_copy(
-                                update={
-                                    "content": new_content,
-                                    "tool_calls": all_extracted_calls or None,
-                                }
-                            )
-                        )
-                    else:
-                        cleaned_messages.append(msg)
+                    )
                 else:
                     cleaned_messages.append(msg)
             else:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 82bb562..9c75b45 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -36,30 +36,36 @@
     "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
+    r"(?:\[ToolCalls]|\\\[ToolCalls\\])\s*(.*?)\s*(?:\[/ToolCalls]|\\\[\\/ToolCalls\\])",
+    re.DOTALL | re.IGNORECASE,
 )
 TOOL_CALL_RE = re.compile(
-    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
+    r"(?:\[Call:|\\\[Call\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/Call]|\\\[\\/Call\\])",
+    re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
+    r"(?:\[ToolResults]|\\\[ToolResults\\])\s*(.*?)\s*(?:\[/ToolResults]|\\\[\\/ToolResults\\])",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
+    r"(?:\[Result:|\\\[Result\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/Result]|\\\[\\/Result\\])",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
+    r"(?:\[CallParameter:|\\\[CallParameter\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/CallParameter]|\\\[\\/CallParameter\\])",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
+    r"(?:\[ToolResult]|\\\[ToolResult\\])\s*(.*?)\s*(?:\[/ToolResult]|\\\[\\/ToolResult\\])",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
-CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
-CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(
+    r"<\|im_(?:start|end)\|>|\\<\\\|im\\_(?:start|end)\\\|\\>", re.IGNORECASE
+)
+CHATML_START_RE = re.compile(
+    r"(?:<\|im_start\|>|\\<\\\|im\\_start\\\|\\>)\s*(\w+)\s*\n?", re.IGNORECASE
+)
+CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
@@ -103,6 +109,13 @@ def normalize_llm_text(s: str) -> str:
     return s
 
 
+def unescape_text(s: str) -> str:
+    """Remove CommonMark backslash escapes."""
+    if not s:
+        return ""
+    return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+
+
 def _strip_google_search(match: re.Match) -> str:
     """Extract raw text from Google Search links if it looks like a file path."""
     text_to_check = match.group("text") if match.group("text") else unquote(match.group("query"))
@@ -231,7 +244,9 @@ def strip_system_hints(text: str) -> str:
     if not text:
         return text
 
-    cleaned = text.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "")
+    t_unescaped = unescape_text(text)
+
+    cleaned = t_unescaped.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "")
 
     if TOOL_HINT_LINE_START and TOOL_HINT_LINE_END:
         pattern = rf"\n?{re.escape(TOOL_HINT_LINE_START)}.*?{re.escape(TOOL_HINT_LINE_END)}\.?\n?"

From 30f61257a951b19fa0492fce6efa3421a2b990cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 01:39:11 +0700
Subject: [PATCH 132/139] Refactor: Handle all escape tags

---
 app/services/lmdb.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index dd4197a..abf8859 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -279,7 +279,7 @@ def store(
             raise ValueError("Messages list cannot be empty")
 
         # Ensure consistent sanitization before hashing and storage
-        sanitized_messages = self.sanitize_assistant_messages(conv.messages)
+        sanitized_messages = self.sanitize_messages(conv.messages)
         conv.messages = sanitized_messages
 
         message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
@@ -359,7 +359,7 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
             logger.debug(f"Session found for '{model}' with {len(messages)} raw messages.")
             return conv
 
-        cleaned_messages = self.sanitize_assistant_messages(messages)
+        cleaned_messages = self.sanitize_messages(messages)
         if cleaned_messages != messages:
             if conv := self._find_by_message_list(model, cleaned_messages):
                 logger.debug(

From a35525234ef206422570b168934d5e7c53a7a848 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 09:12:24 +0700
Subject: [PATCH 133/139] Refactor: Remove `_strip_google_search` as it's no
 longer needed

---
 app/utils/helper.py | 44 ++++----------------------------------------
 1 file changed, 4 insertions(+), 40 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 9c75b45..f6a3e77 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -8,7 +8,7 @@
 import tempfile
 import unicodedata
 from pathlib import Path
-from urllib.parse import unquote, urlparse
+from urllib.parse import urlparse
 
 import httpx
 import orjson
@@ -67,18 +67,6 @@
 )
 CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
-FILE_PATH_PATTERN = re.compile(
-    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
-    re.IGNORECASE,
-)
-GOOGLE_SEARCH_PATTERN = re.compile(
-    r"(?P<md_start>`?\[`?)?"
-    r"(?P<text>[^]]+)?"
-    r"(?(md_start)`?]\()?"
-    r"https://www\.google\.com/search\?q=(?P<query>[^&\s\"'<>)]+)"
-    r"(?(md_start)\)?`?)",
-    re.IGNORECASE,
-)
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -110,22 +98,12 @@ def normalize_llm_text(s: str) -> str:
 
 
 def unescape_text(s: str) -> str:
-    """Remove CommonMark backslash escapes."""
+    """Remove CommonMark backslash escapes from LLM-generated text."""
     if not s:
         return ""
     return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
 
 
-def _strip_google_search(match: re.Match) -> str:
-    """Extract raw text from Google Search links if it looks like a file path."""
-    text_to_check = match.group("text") if match.group("text") else unquote(match.group("query"))
-    text_to_check = unquote(text_to_check.strip())
-
-    if FILE_PATH_PATTERN.match(text_to_check):
-        return text_to_check
-    return match.group(0)
-
-
 def _strip_param_fences(s: str) -> str:
     """
     Remove one layer of outermost Markdown code fences,
@@ -147,20 +125,6 @@ def _strip_param_fences(s: str) -> str:
     return s[n:-n].strip()
 
 
-def _repair_param_value(s: str) -> str:
-    """
-    Standardize and repair LLM-generated text fragments (unescaping, link normalization)
-    to ensure compatibility with specialized clients like Roo Code.
-    """
-    if not s:
-        return ""
-
-    s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
-    s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s)
-
-    return s
-
-
 def estimate_tokens(text: str | None) -> int:
     """Estimate the number of tokens heuristically based on character count."""
     if not text:
@@ -286,8 +250,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        name = _repair_param_value(name.strip())
-        raw_args = _repair_param_value(raw_args)
+        name = unescape_text(name.strip())
+        raw_args = unescape_text(raw_args)
 
         arg_matches = TAGGED_ARG_RE.findall(raw_args)
         if arg_matches:

From 45af127004adcec8a48c9f9432f0c0b54a581724 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 12:30:29 +0700
Subject: [PATCH 134/139] Update `TOOL_WRAP_HINT` to ensure Gemini strictly
 follows the instructions.

---
 app/utils/helper.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index f6a3e77..64df4f7 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,11 +18,12 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
-    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
-    "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
-    "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
-    "EXACT SYNTAX TEMPLATE:\n"
+    "\n\n### SYSTEM: TOOL CALLING PROTOCOL (MANDATORY) ###\n"
+    "If tool execution is required, you MUST adhere to this EXACT protocol. No exceptions.\n\n"
+    "1. OUTPUT RESTRICTION: Your response MUST contain ONLY the [ToolCalls] block. Conversational filler, preambles, or concluding remarks are STRICTLY PROHIBITED.\n"
+    "2. WRAPPING LOGIC: Every parameter value MUST be enclosed in a markdown code block. Use 3 backticks (```) by default. If the value contains backticks, the outer fence MUST be longer than any sequence inside (e.g., ````).\n"
+    "3. TAG SYMMETRY: All tags MUST be balanced and closed in the exact reverse order of opening. Incomplete or unclosed blocks are strictly prohibited.\n\n"
+    "REQUIRED SYNTAX:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
     "[CallParameter:parameter_name]\n"
@@ -32,8 +33,7 @@
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
-    "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
-    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
+    "CRITICAL: Do NOT mix natural language with protocol tags. Either respond naturally OR provide the protocol block alone. There is no middle ground.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"(?:\[ToolCalls]|\\\[ToolCalls\\])\s*(.*?)\s*(?:\[/ToolCalls]|\\\[\\/ToolCalls\\])",

From f144e1440f6812ae330438b316ad1e08eb052240 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 12:32:26 +0700
Subject: [PATCH 135/139] Update required dependencies

---
 pyproject.toml | 2 +-
 uv.lock        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 93dabab..0cae786 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
     "fastapi>=0.129.0",
-    "gemini-webapi>=1.19.1",
+    "gemini-webapi>=1.19.2",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
diff --git a/uv.lock b/uv.lock
index 249e84b..5b687e4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -107,7 +107,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "fastapi", specifier = ">=0.129.0" },
-    { name = "gemini-webapi", specifier = ">=1.19.1" },
+    { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
@@ -123,7 +123,7 @@ dev = [{ name = "ruff", specifier = ">=0.15.1" }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.19.1"
+version = "1.19.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx", extra = ["http2"] },
@@ -131,9 +131,9 @@ dependencies = [
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e5/d1/c61ee05ca6e20c70caa25a3f0f12e2a810bbc6b39e588ff937821de43690/gemini_webapi-1.19.1.tar.gz", hash = "sha256:a52afdfc2d9f6e87a6ae8cd926fb2ce5c562a0a99dc75ce97d8d50ffc2a3e133", size = 266761, upload-time = "2026-02-10T05:44:29.195Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d4/d3/b4ff659bfb0fff378b16f934429d53b7f78451ac184406ab2f9ddda9357e/gemini_webapi-1.19.2.tar.gz", hash = "sha256:f6e96e28f3f1e78be6176fbb8b2eca25ad509aec6cfacf99c415559f27691b71", size = 266805, upload-time = "2026-02-14T05:26:04.103Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4a/0b/7a73919ee8621f6559ae679a20d754959b989a3f09cf20478d89971f40b4/gemini_webapi-1.19.1-py3-none-any.whl", hash = "sha256:0dc4c7daa58d281722d52d6acf520f2e850c6c3c6020080fdbc5f77736c8be9a", size = 63500, upload-time = "2026-02-10T05:44:27.692Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/e7/a676f721980e3daa43e05abe94884a84648efdc6203889e7a0f5c8ca2e98/gemini_webapi-1.19.2-py3-none-any.whl", hash = "sha256:fdc088ca35361301f40ea807a58c4bec18886b17a54164a1a8f3d639eadc6a66", size = 63524, upload-time = "2026-02-14T05:26:02.173Z" },
 ]
 
 [[package]]

From 9d014b093512b72d022b57133c3f31224c8f271e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 18 Feb 2026 14:29:41 +0700
Subject: [PATCH 136/139] Ignore github directory

---
 .github/workflows/docker.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 7231e08..995b60c 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -8,8 +8,7 @@ on:
       - "v*"
     paths-ignore:
       - "**/*.md"
-      - ".github/workflows/ruff.yaml"
-      - ".github/workflows/track.yml"
+      - ".github/*"
 
 env:
   REGISTRY: ghcr.io

From 3fab502ab495868f20c725e3657b11829895204c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 18 Feb 2026 16:41:55 +0700
Subject: [PATCH 137/139] Upgrade to fully support Python 3.13

---
 .github/workflows/docker.yaml |   6 +
 .github/workflows/ruff.yaml   |   4 +-
 Dockerfile                    |  18 ++-
 README.md                     |   4 +-
 README.zh.md                  |   4 +-
 app/main.py                   |   2 +-
 app/models/__init__.py        |  58 +++++++++-
 app/models/models.py          | 150 ++++++++++++-------------
 app/server/chat.py            |  54 ++++-----
 app/server/health.py          |   4 +-
 app/server/images.py          |   2 +-
 app/server/middleware.py      |   2 +-
 app/services/client.py        |  11 +-
 app/services/lmdb.py          |  55 ++++-----
 app/services/pool.py          |  18 +--
 app/utils/config.py           |  12 +-
 app/utils/helper.py           |   7 +-
 app/utils/singleton.py        |   6 +-
 pyproject.toml                |  50 +++++++--
 scripts/dump_lmdb.py          |  16 ++-
 uv.lock                       | 203 ++++++++++++++++++++++------------
 21 files changed, 427 insertions(+), 259 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 995b60c..775e9e4 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -9,6 +9,8 @@ on:
     paths-ignore:
       - "**/*.md"
       - ".github/*"
+      - "LICENSE"
+      - ".gitignore"
 
 env:
   REGISTRY: ghcr.io
@@ -25,6 +27,9 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v6
 
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
@@ -45,6 +50,7 @@ jobs:
             type=semver,pattern={{version}}
             type=semver,pattern={{major}}.{{minor}}
             type=semver,pattern={{major}}
+            type=sha,format=short
             type=raw,value=latest,enable={{is_default_branch}}
 
       - name: Build and push Docker image
diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml
index 6b9e536..d451cdc 100644
--- a/.github/workflows/ruff.yaml
+++ b/.github/workflows/ruff.yaml
@@ -19,12 +19,12 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v6
         with:
-          python-version: "3.12"
+          python-version: "3.13"
 
       - name: Install Ruff
         run: |
           python -m pip install --upgrade pip
-          pip install "ruff>=0.11.7"
+          pip install "ruff>=0.15.1"
 
       - name: Run Ruff
         run: ruff check .
diff --git a/Dockerfile b/Dockerfile
index 938bc2f..ef7f41e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,16 +1,26 @@
-FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
+FROM ghcr.io/astral-sh/uv:python3.13-trixie-slim
 
 LABEL org.opencontainers.image.description="Web-based Gemini models wrapped into an OpenAI-compatible API."
 
 WORKDIR /app
 
-# Install dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    tini \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV UV_COMPILE_BYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1
+
 COPY pyproject.toml uv.lock ./
-RUN uv sync --no-cache --no-dev
+RUN uv sync --no-cache --frozen --no-install-project --no-dev
 
 COPY app/ app/
 COPY config/ config/
 COPY run.py .
 
-# Command to run the application
+EXPOSE 8000
+
+ENTRYPOINT ["/usr/bin/tini", "--"]
+
 CMD ["uv", "run", "--no-dev", "run.py"]
diff --git a/README.md b/README.md
index 330e9c8..91f687c 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Gemini-FastAPI
 
-[![Python 3.12](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
+[![Python 3.13](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/)
 [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/)
 [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 
@@ -24,7 +24,7 @@ Web-based Gemini models wrapped into an OpenAI-compatible API. Powered by [Hanao
 
 ### Prerequisites
 
-- Python 3.12
+- Python 3.13
 - Google account with Gemini access on web
 - `secure_1psid` and `secure_1psidts` cookies from Gemini web interface
 
diff --git a/README.zh.md b/README.zh.md
index 2f9e1b5..d23bec1 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -1,6 +1,6 @@
 # Gemini-FastAPI
 
-[![Python 3.12](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
+[![Python 3.13](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/)
 [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/)
 [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 
@@ -24,7 +24,7 @@
 
 ### 前置条件
 
-- Python 3.12
+- Python 3.13
 - 拥有网页版 Gemini 访问权限的 Google 账号
 - 从 Gemini 网页获取的 `secure_1psid` 和 `secure_1psidts` Cookie
 
diff --git a/app/main.py b/app/main.py
index f4e6711..0634ce2 100644
--- a/app/main.py
+++ b/app/main.py
@@ -43,7 +43,7 @@ async def _run_retention_cleanup(stop_event: asyncio.Event) -> None:
                 stop_event.wait(),
                 timeout=RETENTION_CLEANUP_INTERVAL_SECONDS,
             )
-        except asyncio.TimeoutError:
+        except TimeoutError:
             continue
 
     logger.info("LMDB retention cleanup task stopped.")
diff --git a/app/models/__init__.py b/app/models/__init__.py
index c6a3640..a72efdc 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -1 +1,57 @@
-from .models import *  # noqa: F403
+from .models import (
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    Choice,
+    ContentItem,
+    ConversationInStore,
+    FunctionCall,
+    HealthCheckResponse,
+    Message,
+    ModelData,
+    ModelListResponse,
+    ResponseCreateRequest,
+    ResponseCreateResponse,
+    ResponseImageGenerationCall,
+    ResponseImageTool,
+    ResponseInputContent,
+    ResponseInputItem,
+    ResponseOutputContent,
+    ResponseOutputMessage,
+    ResponseToolCall,
+    ResponseToolChoice,
+    Tool,
+    ToolCall,
+    ToolChoiceFunction,
+    ToolChoiceFunctionDetail,
+    ToolFunctionDefinition,
+    Usage,
+)
+
+__all__ = [
+    "ChatCompletionRequest",
+    "ChatCompletionResponse",
+    "Choice",
+    "ContentItem",
+    "ConversationInStore",
+    "FunctionCall",
+    "HealthCheckResponse",
+    "Message",
+    "ModelData",
+    "ModelListResponse",
+    "ResponseCreateRequest",
+    "ResponseCreateResponse",
+    "ResponseImageGenerationCall",
+    "ResponseImageTool",
+    "ResponseInputContent",
+    "ResponseInputItem",
+    "ResponseOutputContent",
+    "ResponseOutputMessage",
+    "ResponseToolCall",
+    "ResponseToolChoice",
+    "Tool",
+    "ToolCall",
+    "ToolChoiceFunction",
+    "ToolChoiceFunctionDetail",
+    "ToolFunctionDefinition",
+    "Usage",
+]
diff --git a/app/models/models.py b/app/models/models.py
index 64ceaa9..ca206b7 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from datetime import datetime
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Literal
 
 from pydantic import BaseModel, Field, model_validator
 
@@ -10,28 +10,28 @@ class ContentItem(BaseModel):
     """Individual content item (text, image, or file) within a message."""
 
     type: Literal["text", "image_url", "file", "input_audio"]
-    text: Optional[str] = None
-    image_url: Optional[Dict[str, str]] = None
-    input_audio: Optional[Dict[str, Any]] = None
-    file: Optional[Dict[str, str]] = None
-    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+    text: str | None = None
+    image_url: dict[str, str] | None = None
+    input_audio: dict[str, Any] | None = None
+    file: dict[str, str] | None = None
+    annotations: list[dict[str, Any]] = Field(default_factory=list)
 
 
 class Message(BaseModel):
     """Message model"""
 
     role: str
-    content: Union[str, List[ContentItem], None] = None
-    name: Optional[str] = None
-    tool_calls: Optional[List["ToolCall"]] = None
-    tool_call_id: Optional[str] = None
-    refusal: Optional[str] = None
-    reasoning_content: Optional[str] = None
-    audio: Optional[Dict[str, Any]] = None
-    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+    content: str | list[ContentItem] | None = None
+    name: str | None = None
+    tool_calls: list[ToolCall] | None = None
+    tool_call_id: str | None = None
+    refusal: str | None = None
+    reasoning_content: str | None = None
+    audio: dict[str, Any] | None = None
+    annotations: list[dict[str, Any]] = Field(default_factory=list)
 
     @model_validator(mode="after")
-    def normalize_role(self) -> "Message":
+    def normalize_role(self) -> Message:
         """Normalize 'developer' role to 'system' for Gemini compatibility."""
         if self.role == "developer":
             self.role = "system"
@@ -44,7 +44,7 @@ class Choice(BaseModel):
     index: int
     message: Message
     finish_reason: str
-    logprobs: Optional[Dict[str, Any]] = None
+    logprobs: dict[str, Any] | None = None
 
 
 class FunctionCall(BaseModel):
@@ -66,8 +66,8 @@ class ToolFunctionDefinition(BaseModel):
     """Function definition for tool."""
 
     name: str
-    description: Optional[str] = None
-    parameters: Optional[Dict[str, Any]] = None
+    description: str | None = None
+    parameters: dict[str, Any] | None = None
 
 
 class Tool(BaseModel):
@@ -96,8 +96,8 @@ class Usage(BaseModel):
     prompt_tokens: int
     completion_tokens: int
     total_tokens: int
-    prompt_tokens_details: Optional[Dict[str, int]] = None
-    completion_tokens_details: Optional[Dict[str, int]] = None
+    prompt_tokens_details: dict[str, int] | None = None
+    completion_tokens_details: dict[str, int] | None = None
 
 
 class ModelData(BaseModel):
@@ -113,17 +113,17 @@ class ChatCompletionRequest(BaseModel):
     """Chat completion request model"""
 
     model: str
-    messages: List[Message]
-    stream: Optional[bool] = False
-    user: Optional[str] = None
-    temperature: Optional[float] = 0.7
-    top_p: Optional[float] = 1.0
-    max_tokens: Optional[int] = None
-    tools: Optional[List["Tool"]] = None
-    tool_choice: Optional[
-        Union[Literal["none"], Literal["auto"], Literal["required"], "ToolChoiceFunction"]
-    ] = None
-    response_format: Optional[Dict[str, Any]] = None
+    messages: list[Message]
+    stream: bool | None = False
+    user: str | None = None
+    temperature: float | None = 0.7
+    top_p: float | None = 1.0
+    max_tokens: int | None = None
+    tools: list[Tool] | None = None
+    tool_choice: (
+        Literal["none"] | Literal["auto"] | Literal["required"] | ToolChoiceFunction | None
+    ) = None
+    response_format: dict[str, Any] | None = None
 
 
 class ChatCompletionResponse(BaseModel):
@@ -133,7 +133,7 @@ class ChatCompletionResponse(BaseModel):
     object: str = "chat.completion"
     created: int
     model: str
-    choices: List[Choice]
+    choices: list[Choice]
     usage: Usage
 
 
@@ -141,23 +141,23 @@ class ModelListResponse(BaseModel):
     """Model list model"""
 
     object: str = "list"
-    data: List[ModelData]
+    data: list[ModelData]
 
 
 class HealthCheckResponse(BaseModel):
     """Health check response model"""
 
     ok: bool
-    storage: Optional[Dict[str, str | int]] = None
-    clients: Optional[Dict[str, bool]] = None
-    error: Optional[str] = None
+    storage: dict[str, str | int] | None = None
+    clients: dict[str, bool] | None = None
+    error: str | None = None
 
 
 class ConversationInStore(BaseModel):
     """Conversation model for storing in the database."""
 
-    created_at: Optional[datetime] = Field(default=None)
-    updated_at: Optional[datetime] = Field(default=None)
+    created_at: datetime | None = Field(default=None)
+    updated_at: datetime | None = Field(default=None)
 
     # Gemini Web API does not support changing models once a conversation is created.
     model: str = Field(..., description="Model used for the conversation")
@@ -172,13 +172,13 @@ class ResponseInputContent(BaseModel):
     """Content item for Responses API input."""
 
     type: Literal["input_text", "input_image", "input_file"]
-    text: Optional[str] = None
-    image_url: Optional[str] = None
-    detail: Optional[Literal["auto", "low", "high"]] = None
-    file_url: Optional[str] = None
-    file_data: Optional[str] = None
-    filename: Optional[str] = None
-    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+    text: str | None = None
+    image_url: str | None = None
+    detail: Literal["auto", "low", "high"] | None = None
+    file_url: str | None = None
+    file_data: str | None = None
+    filename: str | None = None
+    annotations: list[dict[str, Any]] = Field(default_factory=list)
 
     @model_validator(mode="before")
     @classmethod
@@ -192,42 +192,42 @@ def normalize_output_text(cls, data: Any) -> Any:
 class ResponseInputItem(BaseModel):
     """Single input item for Responses API."""
 
-    type: Optional[Literal["message"]] = "message"
+    type: Literal["message"] | None = "message"
     role: Literal["user", "assistant", "system", "developer"]
-    content: Union[str, List[ResponseInputContent]]
+    content: str | list[ResponseInputContent]
 
 
 class ResponseToolChoice(BaseModel):
     """Tool choice enforcing a specific tool in Responses API."""
 
     type: Literal["function", "image_generation"]
-    function: Optional[ToolChoiceFunctionDetail] = None
+    function: ToolChoiceFunctionDetail | None = None
 
 
 class ResponseImageTool(BaseModel):
     """Image generation tool specification for Responses API."""
 
     type: Literal["image_generation"]
-    model: Optional[str] = None
-    output_format: Optional[str] = None
+    model: str | None = None
+    output_format: str | None = None
 
 
 class ResponseCreateRequest(BaseModel):
     """Responses API request payload."""
 
     model: str
-    input: Union[str, List[ResponseInputItem]]
-    instructions: Optional[Union[str, List[ResponseInputItem]]] = None
-    temperature: Optional[float] = 0.7
-    top_p: Optional[float] = 1.0
-    max_output_tokens: Optional[int] = None
-    stream: Optional[bool] = False
-    tool_choice: Optional[Union[str, ResponseToolChoice]] = None
-    tools: Optional[List[Union[Tool, ResponseImageTool]]] = None
-    store: Optional[bool] = None
-    user: Optional[str] = None
-    response_format: Optional[Dict[str, Any]] = None
-    metadata: Optional[Dict[str, Any]] = None
+    input: str | list[ResponseInputItem]
+    instructions: str | list[ResponseInputItem] | None = None
+    temperature: float | None = 0.7
+    top_p: float | None = 1.0
+    max_output_tokens: int | None = None
+    stream: bool | None = False
+    tool_choice: str | ResponseToolChoice | None = None
+    tools: list[Tool | ResponseImageTool] | None = None
+    store: bool | None = None
+    user: str | None = None
+    response_format: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
 
 
 class ResponseUsage(BaseModel):
@@ -242,8 +242,8 @@ class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
     type: Literal["output_text"]
-    text: Optional[str] = ""
-    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+    text: str | None = ""
+    annotations: list[dict[str, Any]] = Field(default_factory=list)
 
 
 class ResponseOutputMessage(BaseModel):
@@ -252,7 +252,7 @@ class ResponseOutputMessage(BaseModel):
     id: str
     type: Literal["message"]
     role: Literal["assistant"]
-    content: List[ResponseOutputContent]
+    content: list[ResponseOutputContent]
 
 
 class ResponseImageGenerationCall(BaseModel):
@@ -261,10 +261,10 @@ class ResponseImageGenerationCall(BaseModel):
     id: str
     type: Literal["image_generation_call"] = "image_generation_call"
     status: Literal["completed", "in_progress", "generating", "failed"] = "completed"
-    result: Optional[str] = None
-    output_format: Optional[str] = None
-    size: Optional[str] = None
-    revised_prompt: Optional[str] = None
+    result: str | None = None
+    output_format: str | None = None
+    size: str | None = None
+    revised_prompt: str | None = None
 
 
 class ResponseToolCall(BaseModel):
@@ -283,7 +283,7 @@ class ResponseCreateResponse(BaseModel):
     object: Literal["response"] = "response"
     created_at: int
     model: str
-    output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]]
+    output: list[ResponseOutputMessage | ResponseImageGenerationCall | ResponseToolCall]
     status: Literal[
         "in_progress",
         "completed",
@@ -292,12 +292,12 @@ class ResponseCreateResponse(BaseModel):
         "cancelled",
         "requires_action",
     ] = "completed"
-    tool_choice: Optional[Union[str, ResponseToolChoice]] = None
-    tools: Optional[List[Union[Tool, ResponseImageTool]]] = None
+    tool_choice: str | ResponseToolChoice | None = None
+    tools: list[Tool | ResponseImageTool] | None = None
     usage: ResponseUsage
-    error: Optional[Dict[str, Any]] = None
-    metadata: Optional[Dict[str, Any]] = None
-    input: Optional[Union[str, List[ResponseInputItem]]] = None
+    error: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
+    input: str | list[ResponseInputItem] | None = None
 
 
 # Rebuild models with forward references
diff --git a/app/server/chat.py b/app/server/chat.py
index 934091b..3849af5 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -3,10 +3,11 @@
 import io
 import reprlib
 import uuid
+from collections.abc import AsyncGenerator
 from dataclasses import dataclass
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from pathlib import Path
-from typing import Any, AsyncGenerator
+from typing import Any
 
 import orjson
 from fastapi import APIRouter, Depends, HTTPException, Request, status
@@ -17,7 +18,7 @@
 from gemini_webapi.types.image import GeneratedImage, Image
 from loguru import logger
 
-from ..models import (
+from app.models import (
     ChatCompletionRequest,
     ContentItem,
     ConversationInStore,
@@ -38,9 +39,15 @@
     Tool,
     ToolChoiceFunction,
 )
-from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
-from ..utils import g_config
-from ..utils.helper import (
+from app.server.middleware import (
+    get_image_store_dir,
+    get_image_token,
+    get_temp_dir,
+    verify_api_key,
+)
+from app.services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
+from app.utils import g_config
+from app.utils.helper import (
     TOOL_HINT_LINE_END,
     TOOL_HINT_LINE_START,
     TOOL_HINT_STRIPPED,
@@ -53,7 +60,6 @@
     strip_system_hints,
     text_from_message,
 )
-from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
 
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 METADATA_TTL_MINUTES = 15
@@ -98,11 +104,7 @@ async def _image_to_base64(
 
     if not suffix:
         detected_ext = detect_image_extension(data)
-        if detected_ext:
-            suffix = detected_ext
-        else:
-            # Fallback if detection fails
-            suffix = ".png" if isinstance(image, GeneratedImage) else ".jpg"
+        suffix = detected_ext or (".png" if isinstance(image, GeneratedImage) else ".jpg")
 
     random_name = f"img_{uuid.uuid4().hex}{suffix}"
     new_path = temp_dir / random_name
@@ -628,7 +630,7 @@ def _get_model_by_name(name: str) -> Model:
 
 def _get_available_models() -> list[ModelData]:
     """Return a list of available models based on configuration strategy."""
-    now = int(datetime.now(tz=timezone.utc).timestamp())
+    now = int(datetime.now(tz=UTC).timestamp())
     strategy = g_config.gemini.model_strategy
     models_data = []
 
@@ -712,7 +714,7 @@ async def _send_with_split(
     text: str,
     files: list[Path | str | io.BytesIO] | None = None,
     stream: bool = False,
-) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
+) -> AsyncGenerator[ModelOutput] | ModelOutput:
     """Send text to Gemini, splitting or converting to attachment if too long."""
     if len(text) <= MAX_CHARS_PER_REQUEST:
         try:
@@ -1013,9 +1015,7 @@ def flush(self) -> str:
         res = ""
         if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
             res = ""
-        elif self.state == "IN_BLOCK" and self.current_role != "tool":
-            res = self.buffer
-        elif self.state == "NORMAL":
+        elif (self.state == "IN_BLOCK" and self.current_role != "tool") or self.state == "NORMAL":
             res = self.buffer
 
         self.buffer = ""
@@ -1027,7 +1027,7 @@ def flush(self) -> str:
 
 
 def _create_real_streaming_response(
-    generator: AsyncGenerator[ModelOutput, None],
+    generator: AsyncGenerator[ModelOutput],
     completion_id: str,
     created_time: int,
     model_name: str,
@@ -1221,7 +1221,7 @@ async def generate_stream():
 
 
 def _create_responses_real_streaming_response(
-    generator: AsyncGenerator[ModelOutput, None],
+    generator: AsyncGenerator[ModelOutput],
     response_id: str,
     created_time: int,
     model_name: str,
@@ -1455,10 +1455,12 @@ async def create_chat_completion(
             m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
         except Exception as e:
             logger.exception("Error in preparing conversation")
-            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
+            raise HTTPException(
+                status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)
+            ) from e
 
     completion_id = f"chatcmpl-{uuid.uuid4()}"
-    created_time = int(datetime.now(tz=timezone.utc).timestamp())
+    created_time = int(datetime.now(tz=UTC).timestamp())
 
     try:
         assert session and client
@@ -1470,7 +1472,7 @@ async def create_chat_completion(
         )
     except Exception as e:
         logger.exception("Gemini API error")
-        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e))
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e
 
     if request.stream:
         return _create_real_streaming_response(
@@ -1620,10 +1622,12 @@ async def create_response(
             m_input, files = await GeminiClientWrapper.process_conversation(messages, tmp_dir)
         except Exception as e:
             logger.exception("Error in preparing conversation")
-            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
+            raise HTTPException(
+                status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)
+            ) from e
 
     response_id = f"resp_{uuid.uuid4().hex}"
-    created_time = int(datetime.now(tz=timezone.utc).timestamp())
+    created_time = int(datetime.now(tz=UTC).timestamp())
 
     try:
         assert session and client
@@ -1635,7 +1639,7 @@ async def create_response(
         )
     except Exception as e:
         logger.exception("Gemini API error")
-        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e))
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e
 
     if request.stream:
         return _create_responses_real_streaming_response(
diff --git a/app/server/health.py b/app/server/health.py
index f521db1..444c938 100644
--- a/app/server/health.py
+++ b/app/server/health.py
@@ -1,8 +1,8 @@
 from fastapi import APIRouter
 from loguru import logger
 
-from ..models import HealthCheckResponse
-from ..services import GeminiClientPool, LMDBConversationStore
+from app.models import HealthCheckResponse
+from app.services import GeminiClientPool, LMDBConversationStore
 
 router = APIRouter()
 
diff --git a/app/server/images.py b/app/server/images.py
index fe078f7..e1c161c 100644
--- a/app/server/images.py
+++ b/app/server/images.py
@@ -1,7 +1,7 @@
 from fastapi import APIRouter, HTTPException, Query
 from fastapi.responses import FileResponse
 
-from ..server.middleware import get_image_store_dir, verify_image_token
+from app.server.middleware import get_image_store_dir, verify_image_token
 
 router = APIRouter()
 
diff --git a/app/server/middleware.py b/app/server/middleware.py
index 630e1f5..4bc358d 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -10,7 +10,7 @@
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
 from loguru import logger
 
-from ..utils import g_config
+from app.utils import g_config
 
 # Persistent directory for storing generated images
 IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "ai_generated_images"
diff --git a/app/services/client.py b/app/services/client.py
index 70dfce9..49d9e87 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -5,9 +5,9 @@
 from gemini_webapi import GeminiClient, ModelOutput
 from loguru import logger
 
-from ..models import Message
-from ..utils import g_config
-from ..utils.helper import (
+from app.models import Message
+from app.utils import g_config
+from app.utils.helper import (
     add_tag,
     normalize_llm_text,
     save_file_to_tempfile,
@@ -146,9 +146,8 @@ async def process_message(
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
 
-        if model_input or message.role == "tool":
-            if tagged:
-                model_input = add_tag(message.role, model_input)
+        if (model_input or message.role == "tool") and tagged:
+            model_input = add_tag(message.role, model_input)
 
         return model_input, files
 
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index abf8859..87a1449 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -4,22 +4,22 @@
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 import lmdb
 import orjson
 from loguru import logger
 
-from ..models import ContentItem, ConversationInStore, Message
-from ..utils import g_config
-from ..utils.helper import (
+from app.models import ContentItem, ConversationInStore, Message
+from app.utils import g_config
+from app.utils.helper import (
     extract_tool_calls,
     normalize_llm_text,
     remove_tool_call_blocks,
     strip_system_hints,
     unescape_text,
 )
-from ..utils.singleton import Singleton
+from app.utils.singleton import Singleton
 
 _VOLATILE_TRANS_TABLE = str.maketrans("", "", string.whitespace + string.punctuation)
 
@@ -125,7 +125,7 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str:
 
 
 def _hash_conversation(
-    client_id: str, model: str, messages: List[Message], fuzzy: bool = False
+    client_id: str, model: str, messages: list[Message], fuzzy: bool = False
 ) -> str:
     """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
@@ -145,9 +145,9 @@ class LMDBConversationStore(metaclass=Singleton):
 
     def __init__(
         self,
-        db_path: Optional[str] = None,
-        max_db_size: Optional[int] = None,
-        retention_days: Optional[int] = None,
+        db_path: str | None = None,
+        max_db_size: int | None = None,
+        retention_days: int | None = None,
     ):
         """
         Initialize LMDB store.
@@ -219,7 +219,7 @@ def _get_transaction(self, write: bool = False):
             raise
 
     @staticmethod
-    def _decode_index_value(data: bytes) -> List[str]:
+    def _decode_index_value(data: bytes) -> list[str]:
         """Decode index value, handling both legacy single-string and new list-of-strings formats."""
         if not data:
             return []
@@ -238,7 +238,7 @@ def _decode_index_value(data: bytes) -> List[str]:
     @staticmethod
     def _update_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str):
         """Add a storage key to the index for a given hash, avoiding duplicates."""
-        idx_key = f"{prefix}{hash_val}".encode("utf-8")
+        idx_key = f"{prefix}{hash_val}".encode()
         existing = txn.get(idx_key)
         keys = LMDBConversationStore._decode_index_value(existing) if existing else []
         if storage_key not in keys:
@@ -248,7 +248,7 @@ def _update_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key
     @staticmethod
     def _remove_from_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str):
         """Remove a specific storage key from the index for a given hash."""
-        idx_key = f"{prefix}{hash_val}".encode("utf-8")
+        idx_key = f"{prefix}{hash_val}".encode()
         existing = txn.get(idx_key)
         if not existing:
             return
@@ -263,7 +263,7 @@ def _remove_from_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storag
     def store(
         self,
         conv: ConversationInStore,
-        custom_key: Optional[str] = None,
+        custom_key: str | None = None,
     ) -> str:
         """
         Store a conversation model in LMDB.
@@ -312,7 +312,7 @@ def store(
             )
             raise
 
-    def get(self, key: str) -> Optional[ConversationInStore]:
+    def get(self, key: str) -> ConversationInStore | None:
         """
         Retrieve conversation data by key.
 
@@ -340,7 +340,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
             logger.error(f"Unexpected error retrieving messages with key {key[:12]}: {e}")
             return None
 
-    def find(self, model: str, messages: List[Message]) -> Optional[ConversationInStore]:
+    def find(self, model: str, messages: list[Message]) -> ConversationInStore | None:
         """
         Search conversation data by message list.
         Tries raw matching, then sanitized matching, and finally fuzzy matching.
@@ -360,12 +360,13 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
             return conv
 
         cleaned_messages = self.sanitize_messages(messages)
-        if cleaned_messages != messages:
-            if conv := self._find_by_message_list(model, cleaned_messages):
-                logger.debug(
-                    f"Session found for '{model}' with {len(cleaned_messages)} cleaned messages."
-                )
-                return conv
+        if cleaned_messages != messages and (
+            conv := self._find_by_message_list(model, cleaned_messages)
+        ):
+            logger.debug(
+                f"Session found for '{model}' with {len(cleaned_messages)} cleaned messages."
+            )
+            return conv
 
         if conv := self._find_by_message_list(model, messages, fuzzy=True):
             logger.debug(
@@ -379,9 +380,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
     def _find_by_message_list(
         self,
         model: str,
-        messages: List[Message],
+        messages: list[Message],
         fuzzy: bool = False,
-    ) -> Optional[ConversationInStore]:
+    ) -> ConversationInStore | None:
         """
         Internal find implementation based on a message list.
 
@@ -440,7 +441,7 @@ def exists(self, key: str) -> bool:
             logger.error(f"Failed to check existence of key {key}: {e}")
             return False
 
-    def delete(self, key: str) -> Optional[ConversationInStore]:
+    def delete(self, key: str) -> ConversationInStore | None:
         """Delete conversation model by key."""
         try:
             with self._get_transaction(write=True) as txn:
@@ -466,7 +467,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
             logger.error(f"Failed to delete messages with key {key[:12]}: {e}")
             return None
 
-    def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
+    def keys(self, prefix: str = "", limit: int | None = None) -> list[str]:
         """List all keys in the store, optionally filtered by prefix."""
         keys = []
         try:
@@ -492,7 +493,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
             logger.error(f"Failed to list keys: {e}")
         return keys
 
-    def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
+    def cleanup_expired(self, retention_days: int | None = None) -> int:
         """Delete conversations older than the given retention period."""
         retention_value = (
             self.retention_days if retention_days is None else max(0, int(retention_days))
@@ -561,7 +562,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
         return removed
 
-    def stats(self) -> Dict[str, Any]:
+    def stats(self) -> dict[str, Any]:
         """Get database statistics."""
         if not self._env:
             logger.error("LMDB environment not initialized")
diff --git a/app/services/pool.py b/app/services/pool.py
index decc21a..3b4197c 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -1,11 +1,11 @@
 import asyncio
 from collections import deque
-from typing import Dict, List, Optional
 
 from loguru import logger
 
-from ..utils import g_config
-from ..utils.singleton import Singleton
+from app.utils import g_config
+from app.utils.singleton import Singleton
+
 from .client import GeminiClientWrapper
 
 
@@ -13,10 +13,10 @@ class GeminiClientPool(metaclass=Singleton):
     """Pool of GeminiClient instances identified by unique ids."""
 
     def __init__(self) -> None:
-        self._clients: List[GeminiClientWrapper] = []
-        self._id_map: Dict[str, GeminiClientWrapper] = {}
+        self._clients: list[GeminiClientWrapper] = []
+        self._id_map: dict[str, GeminiClientWrapper] = {}
         self._round_robin: deque[GeminiClientWrapper] = deque()
-        self._restart_locks: Dict[str, asyncio.Lock] = {}
+        self._restart_locks: dict[str, asyncio.Lock] = {}
 
         if len(g_config.gemini.clients) == 0:
             raise ValueError("No Gemini clients configured")
@@ -55,7 +55,7 @@ async def init(self) -> None:
         if success_count == 0:
             raise RuntimeError("Failed to initialize any Gemini clients")
 
-    async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
+    async def acquire(self, client_id: str | None = None) -> GeminiClientWrapper:
         """Return a healthy client by id or using round-robin."""
         if not self._round_robin:
             raise RuntimeError("No Gemini clients configured")
@@ -106,10 +106,10 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
                 return False
 
     @property
-    def clients(self) -> List[GeminiClientWrapper]:
+    def clients(self) -> list[GeminiClientWrapper]:
         """Return managed clients."""
         return self._clients
 
-    def status(self) -> Dict[str, bool]:
+    def status(self) -> dict[str, bool]:
         """Return running status for each client."""
         return {client.id: client.running() for client in self._clients}
diff --git a/app/utils/config.py b/app/utils/config.py
index 4c1709f..21d2891 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,7 +1,7 @@
 import ast
 import os
 import sys
-from typing import Any, Literal, Optional
+from typing import Any, Literal
 
 import orjson
 from loguru import logger
@@ -28,7 +28,7 @@ class ServerConfig(BaseModel):
 
     host: str = Field(default="0.0.0.0", description="Server host address")
     port: int = Field(default=8000, ge=1, le=65535, description="Server port number")
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="API key for authentication, if set, will enable API key validation",
     )
@@ -41,11 +41,11 @@ class GeminiClientSettings(BaseModel):
     id: str = Field(..., description="Unique identifier for the client")
     secure_1psid: str = Field(..., description="Gemini Secure 1PSID")
     secure_1psidts: str = Field(..., description="Gemini Secure 1PSIDTS")
-    proxy: Optional[str] = Field(default=None, description="Proxy URL for this Gemini client")
+    proxy: str | None = Field(default=None, description="Proxy URL for this Gemini client")
 
     @field_validator("proxy", mode="before")
     @classmethod
-    def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]:
+    def _blank_proxy_to_none(cls, value: str | None) -> str | None:
         if value is None:
             return None
         stripped = value.strip()
@@ -55,8 +55,8 @@ def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]:
 class GeminiModelConfig(BaseModel):
     """Configuration for a custom Gemini model."""
 
-    model_name: Optional[str] = Field(default=None, description="Name of the model")
-    model_header: Optional[dict[str, Optional[str]]] = Field(
+    model_name: str | None = Field(default=None, description="Name of the model")
+    model_header: dict[str, str | None] | None = Field(
         default=None, description="Header for the model"
     )
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 64df4f7..002d401 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -14,7 +14,7 @@
 import orjson
 from loguru import logger
 
-from ..models import FunctionCall, Message, ToolCall
+from app.models import FunctionCall, Message, ToolCall
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
@@ -67,6 +67,7 @@
 )
 CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
+PARAM_FENCE_RE = re.compile(r"^(?P<fence>`{3,})")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -113,7 +114,7 @@ def _strip_param_fences(s: str) -> str:
     if not s:
         return ""
 
-    match = re.match(r"^(?P<fence>`{3,})", s)
+    match = PARAM_FENCE_RE.match(s)
     if not match or not s.endswith(match.group("fence")):
         return s
 
@@ -272,7 +273,7 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             arguments = "{}"
 
         index = len(tool_calls)
-        seed = f"{name}:{arguments}:{index}".encode("utf-8")
+        seed = f"{name}:{arguments}:{index}".encode()
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
 
         tool_calls.append(
diff --git a/app/utils/singleton.py b/app/utils/singleton.py
index 489e87e..2a258af 100644
--- a/app/utils/singleton.py
+++ b/app/utils/singleton.py
@@ -1,10 +1,10 @@
-from typing import ClassVar, Dict
+from typing import ClassVar
 
 
 class Singleton(type):
-    _instances: ClassVar[Dict[type, object]] = {}
+    _instances: ClassVar[dict[type, object]] = {}
 
     def __call__(cls, *args, **kwargs):
         if cls not in cls._instances:
-            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
+            cls._instances[cls] = super().__call__(*args, **kwargs)
         return cls._instances[cls]
diff --git a/pyproject.toml b/pyproject.toml
index 0cae786..a1ae29d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,33 +3,61 @@ name = "gemini-fastapi"
 version = "1.0.0"
 description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
-requires-python = "==3.12.*"
+requires-python = "==3.13.*"
 dependencies = [
     "fastapi>=0.129.0",
     "gemini-webapi>=1.19.2",
+    "httptools>=0.7.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
-    "pydantic-settings[yaml]>=2.12.0",
-    "uvicorn>=0.40.0",
+    "pydantic-settings[yaml]>=2.13.0",
+    "uvicorn>=0.41.0",
     "uvloop>=0.22.1; sys_platform != 'win32'",
 ]
 
+[project.urls]
+Repository = "https://github.com/Nativu5/Gemini-FastAPI"
+
 [project.optional-dependencies]
 dev = [
-    "ruff>=0.15.0",
+    "pytest>=9.0.2",
+    "ruff>=0.15.1",
+]
+
+[dependency-groups]
+dev = [
+    "gemini-fastapi[dev]",
 ]
 
 [tool.ruff]
 line-length = 100
-lint.select = ["E", "F", "W", "I", "RUF"]
-lint.ignore = ["E501"]
+target-version = "py313"
+
+[tool.ruff.lint]
+select = [
+    "E",    # pycodestyle errors
+    "F",    # pyflakes
+    "W",    # pycodestyle warnings
+    "I",    # isort
+    "UP",   # pyupgrade
+    "B",    # flake8-bugbear
+    "C4",   # flake8-comprehensions
+    "SIM",  # flake8-simplify
+    "RUF",  # ruff-specific rules
+    "TID",  # flake8-tidy-imports
+]
+ignore = [
+    "E501", # line too long
+]
+
+[tool.ruff.lint.flake8-bugbear]
+extend-immutable-calls = [
+    "fastapi.Depends",
+    "fastapi.Query",
+    "fastapi.security.HTTPBearer",
+]
 
 [tool.ruff.format]
 quote-style = "double"
 indent-style = "space"
-
-[dependency-groups]
-dev = [
-    "ruff>=0.15.1",
-]
diff --git a/scripts/dump_lmdb.py b/scripts/dump_lmdb.py
index a331325..889af4f 100644
--- a/scripts/dump_lmdb.py
+++ b/scripts/dump_lmdb.py
@@ -1,6 +1,7 @@
 import argparse
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Any, Iterable, List
+from typing import Any
 
 import lmdb
 import orjson
@@ -14,17 +15,17 @@ def _decode_value(value: bytes) -> Any:
         return value.decode("utf-8", errors="replace")
 
 
-def _dump_all(txn: lmdb.Transaction) -> List[dict[str, Any]]:
+def _dump_all(txn: lmdb.Transaction) -> list[dict[str, Any]]:
     """Return all records from the database."""
-    result: List[dict[str, Any]] = []
+    result: list[dict[str, Any]] = []
     for key, value in txn.cursor():
         result.append({"key": key.decode("utf-8"), "value": _decode_value(value)})
     return result
 
 
-def _dump_selected(txn: lmdb.Transaction, keys: Iterable[str]) -> List[dict[str, Any]]:
+def _dump_selected(txn: lmdb.Transaction, keys: Iterable[str]) -> list[dict[str, Any]]:
     """Return records for the provided keys."""
-    result: List[dict[str, Any]] = []
+    result: list[dict[str, Any]] = []
     for key in keys:
         raw = txn.get(key.encode("utf-8"))
         if raw is not None:
@@ -36,10 +37,7 @@ def dump_lmdb(path: Path, keys: Iterable[str] | None = None) -> None:
     """Print selected or all key-value pairs from the LMDB database."""
     env = lmdb.open(str(path), readonly=True, lock=False)
     with env.begin() as txn:
-        if keys:
-            records = _dump_selected(txn, keys)
-        else:
-            records = _dump_all(txn)
+        records = _dump_selected(txn, keys) if keys else _dump_all(txn)
     env.close()
 
     print(orjson.dumps(records, option=orjson.OPT_INDENT_2).decode("utf-8"))
diff --git a/uv.lock b/uv.lock
index 5b687e4..4c819e7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,6 +1,6 @@
 version = 1
 revision = 3
-requires-python = "==3.12.*"
+requires-python = "==3.13.*"
 
 [[package]]
 name = "annotated-doc"
@@ -26,7 +26,6 @@ version = "4.12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
-    { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
 wheels = [
@@ -86,6 +85,7 @@ source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },
     { name = "gemini-webapi" },
+    { name = "httptools" },
     { name = "lmdb" },
     { name = "loguru" },
     { name = "orjson" },
@@ -96,30 +96,33 @@ dependencies = [
 
 [package.optional-dependencies]
 dev = [
+    { name = "pytest" },
     { name = "ruff" },
 ]
 
 [package.dev-dependencies]
 dev = [
-    { name = "ruff" },
+    { name = "gemini-fastapi", extra = ["dev"] },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "fastapi", specifier = ">=0.129.0" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
+    { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
-    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.0" },
-    { name = "uvicorn", specifier = ">=0.40.0" },
+    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.0" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.1" },
+    { name = "uvicorn", specifier = ">=0.41.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
 provides-extras = ["dev"]
 
 [package.metadata.requires-dev]
-dev = [{ name = "ruff", specifier = ">=0.15.1" }]
+dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
@@ -180,6 +183,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
 ]
 
+[[package]]
+name = "httptools"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/09/8f/c77b1fcbfd262d422f12da02feb0d218fa228d52485b77b953832105bb90/httptools-0.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6babce6cfa2a99545c60bfef8bee0cc0545413cb0018f617c8059a30ad985de3", size = 202889, upload-time = "2025-10-10T03:54:47.089Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/1a/22887f53602feaa066354867bc49a68fc295c2293433177ee90870a7d517/httptools-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:601b7628de7504077dd3dcb3791c6b8694bbd967148a6d1f01806509254fb1ca", size = 108180, upload-time = "2025-10-10T03:54:48.052Z" },
+    { url = "https://files.pythonhosted.org/packages/32/6a/6aaa91937f0010d288d3d124ca2946d48d60c3a5ee7ca62afe870e3ea011/httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04c6c0e6c5fb0739c5b8a9eb046d298650a0ff38cf42537fc372b28dc7e4472c", size = 478596, upload-time = "2025-10-10T03:54:48.919Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/70/023d7ce117993107be88d2cbca566a7c1323ccbaf0af7eabf2064fe356f6/httptools-0.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69d4f9705c405ae3ee83d6a12283dc9feba8cc6aaec671b412917e644ab4fa66", size = 473268, upload-time = "2025-10-10T03:54:49.993Z" },
+    { url = "https://files.pythonhosted.org/packages/32/4d/9dd616c38da088e3f436e9a616e1d0cc66544b8cdac405cc4e81c8679fc7/httptools-0.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44c8f4347d4b31269c8a9205d8a5ee2df5322b09bbbd30f8f862185bb6b05346", size = 455517, upload-time = "2025-10-10T03:54:51.066Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/3a/a6c595c310b7df958e739aae88724e24f9246a514d909547778d776799be/httptools-0.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:465275d76db4d554918aba40bf1cbebe324670f3dfc979eaffaa5d108e2ed650", size = 458337, upload-time = "2025-10-10T03:54:52.196Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" },
+]
+
 [[package]]
 name = "httpx"
 version = "0.28.1"
@@ -218,18 +236,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
 ]
 
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+
 [[package]]
 name = "lmdb"
 version = "1.7.5"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/c7/a3/3756f2c6adba4a1413dba55e6c81a20b38a868656517308533e33cb59e1c/lmdb-1.7.5.tar.gz", hash = "sha256:f0604751762cb097059d5412444c4057b95f386c7ed958363cf63f453e5108da", size = 883490, upload-time = "2025-10-15T03:39:44.038Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/34/b4/8b862c4d7fd6f68cb33e2a919169fda8924121dc5ff61e3cc105304a6dd4/lmdb-1.7.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b48c2359eea876d7b634b49f84019ecc8c1626da97c795fc7b39a793676815df", size = 100910, upload-time = "2025-10-15T03:39:00.727Z" },
-    { url = "https://files.pythonhosted.org/packages/27/64/8ab5da48180d5f13a293ea00a9f8758b1bee080e76ea0ab0d6be0d51b55f/lmdb-1.7.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2f84793baeb430ba984eb6c1b4e08c0a508b1c03e79ce79fcda0f29ecc06a95a", size = 99376, upload-time = "2025-10-15T03:39:01.791Z" },
-    { url = "https://files.pythonhosted.org/packages/43/e0/51bc942fe5ed3fce69c631b54f52d97785de3d94487376139be6de1e199a/lmdb-1.7.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:68cc21314a33faac1b749645a976b7655e7fa7cc104a72365d2429d2db7f6342", size = 298556, upload-time = "2025-10-15T03:39:02.787Z" },
-    { url = "https://files.pythonhosted.org/packages/66/c5/19ea75c88b91d12da5c6f4bbe2aca633047b6b270fd613d557583d32cc5c/lmdb-1.7.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f2d9b7e102fcfe5e0cfb3acdebd403eb55ccbe5f7202d8f49d60bdafb1546d1e", size = 299449, upload-time = "2025-10-15T03:39:03.903Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/74/365194203dbff47d3a1621366d6a1133cdcce261f4ac0e1d0496f01e6ace/lmdb-1.7.5-cp312-cp312-win_amd64.whl", hash = "sha256:69de89cc79e03e191fc6f95797f1bef91b45c415d1ea9d38872b00b2d989a50f", size = 99328, upload-time = "2025-10-15T03:39:04.949Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/3a/a441afebff5bd761f7f58d194fed7ac265279964957479a5c8a51c42f9ad/lmdb-1.7.5-cp312-cp312-win_arm64.whl", hash = "sha256:0c880ee4b309e900f2d58a710701f5e6316a351878588c6a95a9c0bcb640680b", size = 94191, upload-time = "2025-10-15T03:39:05.975Z" },
+    { url = "https://files.pythonhosted.org/packages/38/f8/03275084218eacdbdf7e185d693e1db4cb79c35d18fac47fa0d388522a0d/lmdb-1.7.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:66ae02fa6179e46bb69fe446b7e956afe8706ae17ec1d4cd9f7056e161019156", size = 101508, upload-time = "2025-10-15T03:39:07.228Z" },
+    { url = "https://files.pythonhosted.org/packages/20/b9/bc33ae2e4940359ba2fc412e6a755a2f126bc5062b4aaf35edd3a791f9a5/lmdb-1.7.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bf65c573311ac8330c7908257f76b28ae3576020123400a81a6b650990dc028c", size = 100105, upload-time = "2025-10-15T03:39:08.491Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/f6/22f84b776a64d3992f052ecb637c35f1764a39df4f2190ecc5a3a1295bd7/lmdb-1.7.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97bcb3fc12841a8828db918e494fe0fd016a73d2680ad830d75719bb3bf4e76a", size = 301500, upload-time = "2025-10-15T03:39:09.463Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/4d/8e6be8d7d5a30d47fa0ce4b55e3a8050ad689556e6e979d206b4ac67b733/lmdb-1.7.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:865f374f6206ab4aacb92ffb1dc612ee1a31a421db7c89733abe06b81ac87cb0", size = 302285, upload-time = "2025-10-15T03:39:10.856Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/dc/7e04fb31a8f88951db81ac677e3ccb3e09248eda40e6ad52f74fd9370c32/lmdb-1.7.5-cp313-cp313-win_amd64.whl", hash = "sha256:82a04d5ca2a6a799c8db7f209354c48aebb49ff338530f5813721fc4c68e4450", size = 99447, upload-time = "2025-10-15T03:39:12.151Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/50/e3f97efab17b3fad4afde99b3c957ecac4ffbefada6874a57ad0c695660a/lmdb-1.7.5-cp313-cp313-win_arm64.whl", hash = "sha256:0ad85a15acbfe8a42fdef92ee5e869610286d38507e976755f211be0fc905ca7", size = 94145, upload-time = "2025-10-15T03:39:13.461Z" },
     { url = "https://files.pythonhosted.org/packages/bd/2c/982cb5afed533d0cb8038232b40c19b5b85a2d887dec74dfd39e8351ef4b/lmdb-1.7.5-py3-none-any.whl", hash = "sha256:fc344bb8bc0786c87c4ccb19b31f09a38c08bd159ada6f037d669426fea06f03", size = 148539, upload-time = "2025-10-15T03:39:42.982Z" },
 ]
 
@@ -252,21 +279,39 @@ version = "3.11.7"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" },
-    { url = "https://files.pythonhosted.org/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" },
-    { url = "https://files.pythonhosted.org/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" },
-    { url = "https://files.pythonhosted.org/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" },
-    { url = "https://files.pythonhosted.org/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" },
-    { url = "https://files.pythonhosted.org/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" },
-    { url = "https://files.pythonhosted.org/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" },
-    { url = "https://files.pythonhosted.org/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" },
-    { url = "https://files.pythonhosted.org/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" },
+    { url = "https://files.pythonhosted.org/packages/89/25/6e0e52cac5aab51d7b6dcd257e855e1dec1c2060f6b28566c509b4665f62/orjson-3.11.7-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1d98b30cc1313d52d4af17d9c3d307b08389752ec5f2e5febdfada70b0f8c733", size = 228390, upload-time = "2026-02-02T15:38:06.8Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/29/a77f48d2fc8a05bbc529e5ff481fb43d914f9e383ea2469d4f3d51df3d00/orjson-3.11.7-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:d897e81f8d0cbd2abb82226d1860ad2e1ab3ff16d7b08c96ca00df9d45409ef4", size = 125189, upload-time = "2026-02-02T15:38:08.181Z" },
+    { url = "https://files.pythonhosted.org/packages/89/25/0a16e0729a0e6a1504f9d1a13cdd365f030068aab64cec6958396b9969d7/orjson-3.11.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814be4b49b228cfc0b3c565acf642dd7d13538f966e3ccde61f4f55be3e20785", size = 128106, upload-time = "2026-02-02T15:38:09.41Z" },
+    { url = "https://files.pythonhosted.org/packages/66/da/a2e505469d60666a05ab373f1a6322eb671cb2ba3a0ccfc7d4bc97196787/orjson-3.11.7-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d06e5c5fed5caedd2e540d62e5b1c25e8c82431b9e577c33537e5fa4aa909539", size = 123363, upload-time = "2026-02-02T15:38:10.73Z" },
+    { url = "https://files.pythonhosted.org/packages/23/bf/ed73f88396ea35c71b38961734ea4a4746f7ca0768bf28fd551d37e48dd0/orjson-3.11.7-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31c80ce534ac4ea3739c5ee751270646cbc46e45aea7576a38ffec040b4029a1", size = 129007, upload-time = "2026-02-02T15:38:12.138Z" },
+    { url = "https://files.pythonhosted.org/packages/73/3c/b05d80716f0225fc9008fbf8ab22841dcc268a626aa550561743714ce3bf/orjson-3.11.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f50979824bde13d32b4320eedd513431c921102796d86be3eee0b58e58a3ecd1", size = 141667, upload-time = "2026-02-02T15:38:13.398Z" },
+    { url = "https://files.pythonhosted.org/packages/61/e8/0be9b0addd9bf86abfc938e97441dcd0375d494594b1c8ad10fe57479617/orjson-3.11.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e54f3808e2b6b945078c41aa8d9b5834b28c50843846e97807e5adb75fa9705", size = 130832, upload-time = "2026-02-02T15:38:14.698Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/ec/c68e3b9021a31d9ec15a94931db1410136af862955854ed5dd7e7e4f5bff/orjson-3.11.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12b80df61aab7b98b490fe9e4879925ba666fccdfcd175252ce4d9035865ace", size = 133373, upload-time = "2026-02-02T15:38:16.109Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/45/f3466739aaafa570cc8e77c6dbb853c48bf56e3b43738020e2661e08b0ac/orjson-3.11.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:996b65230271f1a97026fd0e6a753f51fbc0c335d2ad0c6201f711b0da32693b", size = 138307, upload-time = "2026-02-02T15:38:17.453Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/84/9f7f02288da1ffb31405c1be07657afd1eecbcb4b64ee2817b6fe0f785fa/orjson-3.11.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ab49d4b2a6a1d415ddb9f37a21e02e0d5dbfe10b7870b21bf779fc21e9156157", size = 408695, upload-time = "2026-02-02T15:38:18.831Z" },
+    { url = "https://files.pythonhosted.org/packages/18/07/9dd2f0c0104f1a0295ffbe912bc8d63307a539b900dd9e2c48ef7810d971/orjson-3.11.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:390a1dce0c055ddf8adb6aa94a73b45a4a7d7177b5c584b8d1c1947f2ba60fb3", size = 144099, upload-time = "2026-02-02T15:38:20.28Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/66/857a8e4a3292e1f7b1b202883bcdeb43a91566cf59a93f97c53b44bd6801/orjson-3.11.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1eb80451a9c351a71dfaf5b7ccc13ad065405217726b59fdbeadbcc544f9d223", size = 134806, upload-time = "2026-02-02T15:38:22.186Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/5b/6ebcf3defc1aab3a338ca777214966851e92efb1f30dc7fc8285216e6d1b/orjson-3.11.7-cp313-cp313-win32.whl", hash = "sha256:7477aa6a6ec6139c5cb1cc7b214643592169a5494d200397c7fc95d740d5fcf3", size = 127914, upload-time = "2026-02-02T15:38:23.511Z" },
+    { url = "https://files.pythonhosted.org/packages/00/04/c6f72daca5092e3117840a1b1e88dfc809cc1470cf0734890d0366b684a1/orjson-3.11.7-cp313-cp313-win_amd64.whl", hash = "sha256:b9f95dcdea9d4f805daa9ddf02617a89e484c6985fa03055459f90e87d7a0757", size = 124986, upload-time = "2026-02-02T15:38:24.836Z" },
+    { url = "https://files.pythonhosted.org/packages/03/ba/077a0f6f1085d6b806937246860fafbd5b17f3919c70ee3f3d8d9c713f38/orjson-3.11.7-cp313-cp313-win_arm64.whl", hash = "sha256:800988273a014a0541483dc81021247d7eacb0c845a9d1a34a422bc718f41539", size = 126045, upload-time = "2026-02-02T15:38:26.216Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "26.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
 [[package]]
@@ -293,38 +338,34 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" },
-    { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" },
-    { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" },
-    { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" },
-    { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" },
-    { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" },
-    { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" },
-    { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" },
-    { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
+    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
+    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
+    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
 ]
 
 [[package]]
 name = "pydantic-settings"
-version = "2.12.0"
+version = "2.13.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydantic" },
     { name = "python-dotenv" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/a1/ae859ffac5a3338a66b74c5e29e244fd3a3cc483c89feaf9f56c39898d75/pydantic_settings-2.13.0.tar.gz", hash = "sha256:95d875514610e8595672800a5c40b073e99e4aae467fa7c8f9c263061ea2e1fe", size = 222450, upload-time = "2026-02-15T12:11:23.476Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1a/dd1b9d7e627486cf8e7523d09b70010e05a4bc41414f4ae6ce184cf0afb6/pydantic_settings-2.13.0-py3-none-any.whl", hash = "sha256:d67b576fff39cd086b595441bf9c75d4193ca9c0ed643b90360694d0f1240246", size = 58429, upload-time = "2026-02-15T12:11:22.133Z" },
 ]
 
 [package.optional-dependencies]
@@ -332,6 +373,31 @@ yaml = [
     { name = "pyyaml" },
 ]
 
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "9.0.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
+]
+
 [[package]]
 name = "python-dotenv"
 version = "1.2.1"
@@ -347,16 +413,16 @@ version = "6.0.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
-    { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
-    { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
-    { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
-    { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
-    { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
-    { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
+    { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
+    { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
+    { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
+    { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
 ]
 
 [[package]]
@@ -390,7 +456,6 @@ version = "0.52.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
-    { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
 wheels = [
@@ -420,15 +485,15 @@ wheels = [
 
 [[package]]
 name = "uvicorn"
-version = "0.40.0"
+version = "0.41.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/32/ce/eeb58ae4ac36fe09e3842eb02e0eb676bf2c53ae062b98f1b2531673efdd/uvicorn-0.41.0.tar.gz", hash = "sha256:09d11cf7008da33113824ee5a1c6422d89fbc2ff476540d69a34c87fab8b571a", size = 82633, upload-time = "2026-02-16T23:07:24.1Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
+    { url = "https://files.pythonhosted.org/packages/83/e4/d04a086285c20886c0daad0e026f250869201013d18f81d9ff5eada73a88/uvicorn-0.41.0-py3-none-any.whl", hash = "sha256:29e35b1d2c36a04b9e180d4007ede3bcb32a85fbdfd6c6aeb3f26839de088187", size = 68783, upload-time = "2026-02-16T23:07:22.357Z" },
 ]
 
 [[package]]
@@ -437,12 +502,12 @@ version = "0.22.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" },
-    { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" },
-    { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" },
-    { url = "https://files.pythonhosted.org/packages/99/39/6b3f7d234ba3964c428a6e40006340f53ba37993f46ed6e111c6e9141d18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0", size = 4296343, upload-time = "2025-10-16T22:16:35.149Z" },
+    { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" },
+    { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" },
 ]
 
 [[package]]

From 35864f65d722b40ce8bbef15b5a5e17944adf96d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 18 Feb 2026 16:54:37 +0700
Subject: [PATCH 138/139] Upgrade to fully support Python 3.13

---
 .github/workflows/docker.yaml | 2 +-
 app/models/__init__.py        | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 775e9e4..11caa57 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -50,7 +50,7 @@ jobs:
             type=semver,pattern={{version}}
             type=semver,pattern={{major}}.{{minor}}
             type=semver,pattern={{major}}
-            type=sha,format=short
+            type=raw,value={{date 'YYYYMMDD'}}
             type=raw,value=latest,enable={{is_default_branch}}
 
       - name: Build and push Docker image
diff --git a/app/models/__init__.py b/app/models/__init__.py
index a72efdc..3896de1 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -19,6 +19,7 @@
     ResponseOutputMessage,
     ResponseToolCall,
     ResponseToolChoice,
+    ResponseUsage,
     Tool,
     ToolCall,
     ToolChoiceFunction,
@@ -48,6 +49,7 @@
     "ResponseOutputMessage",
     "ResponseToolCall",
     "ResponseToolChoice",
+    "ResponseUsage",
     "Tool",
     "ToolCall",
     "ToolChoiceFunction",

From a48c38d16fab0a0c27a716ee25bf12842c8bd9a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 18 Feb 2026 17:19:51 +0700
Subject: [PATCH 139/139] Upgrade to fully support Python 3.13

---
 .github/workflows/docker.yaml |   2 +-
 app/models/models.py          | 130 +++++++++++++++++-----------------
 2 files changed, 67 insertions(+), 65 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 11caa57..1c5a2ee 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -50,7 +50,7 @@ jobs:
             type=semver,pattern={{version}}
             type=semver,pattern={{major}}.{{minor}}
             type=semver,pattern={{major}}
-            type=raw,value={{date 'YYYYMMDD'}}
+            type=raw,value={{date 'YYYYMMDD'}}-{{sha}}
             type=raw,value=latest,enable={{is_default_branch}}
 
       - name: Build and push Docker image
diff --git a/app/models/models.py b/app/models/models.py
index ca206b7..3b3e627 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -10,10 +10,10 @@ class ContentItem(BaseModel):
     """Individual content item (text, image, or file) within a message."""
 
     type: Literal["text", "image_url", "file", "input_audio"]
-    text: str | None = None
-    image_url: dict[str, str] | None = None
-    input_audio: dict[str, Any] | None = None
-    file: dict[str, str] | None = None
+    text: str | None = Field(default=None)
+    image_url: dict[str, Any] | None = Field(default=None)
+    input_audio: dict[str, Any] | None = Field(default=None)
+    file: dict[str, Any] | None = Field(default=None)
     annotations: list[dict[str, Any]] = Field(default_factory=list)
 
 
@@ -21,13 +21,13 @@ class Message(BaseModel):
     """Message model"""
 
     role: str
-    content: str | list[ContentItem] | None = None
-    name: str | None = None
-    tool_calls: list[ToolCall] | None = None
-    tool_call_id: str | None = None
-    refusal: str | None = None
-    reasoning_content: str | None = None
-    audio: dict[str, Any] | None = None
+    content: str | list[ContentItem] | None = Field(default=None)
+    name: str | None = Field(default=None)
+    tool_calls: list[ToolCall] | None = Field(default=None)
+    tool_call_id: str | None = Field(default=None)
+    refusal: str | None = Field(default=None)
+    reasoning_content: str | None = Field(default=None)
+    audio: dict[str, Any] | None = Field(default=None)
     annotations: list[dict[str, Any]] = Field(default_factory=list)
 
     @model_validator(mode="after")
@@ -44,7 +44,7 @@ class Choice(BaseModel):
     index: int
     message: Message
     finish_reason: str
-    logprobs: dict[str, Any] | None = None
+    logprobs: dict[str, Any] | None = Field(default=None)
 
 
 class FunctionCall(BaseModel):
@@ -66,8 +66,8 @@ class ToolFunctionDefinition(BaseModel):
     """Function definition for tool."""
 
     name: str
-    description: str | None = None
-    parameters: dict[str, Any] | None = None
+    description: str | None = Field(default=None)
+    parameters: dict[str, Any] | None = Field(default=None)
 
 
 class Tool(BaseModel):
@@ -96,8 +96,8 @@ class Usage(BaseModel):
     prompt_tokens: int
     completion_tokens: int
     total_tokens: int
-    prompt_tokens_details: dict[str, int] | None = None
-    completion_tokens_details: dict[str, int] | None = None
+    prompt_tokens_details: dict[str, int] | None = Field(default=None)
+    completion_tokens_details: dict[str, int] | None = Field(default=None)
 
 
 class ModelData(BaseModel):
@@ -114,16 +114,16 @@ class ChatCompletionRequest(BaseModel):
 
     model: str
     messages: list[Message]
-    stream: bool | None = False
-    user: str | None = None
-    temperature: float | None = 0.7
-    top_p: float | None = 1.0
-    max_tokens: int | None = None
-    tools: list[Tool] | None = None
+    stream: bool | None = Field(default=False)
+    user: str | None = Field(default=None)
+    temperature: float | None = Field(default=0.7)
+    top_p: float | None = Field(default=1.0)
+    max_tokens: int | None = Field(default=None)
+    tools: list[Tool] | None = Field(default=None)
     tool_choice: (
         Literal["none"] | Literal["auto"] | Literal["required"] | ToolChoiceFunction | None
-    ) = None
-    response_format: dict[str, Any] | None = None
+    ) = Field(default=None)
+    response_format: dict[str, Any] | None = Field(default=None)
 
 
 class ChatCompletionResponse(BaseModel):
@@ -148,9 +148,9 @@ class HealthCheckResponse(BaseModel):
     """Health check response model"""
 
     ok: bool
-    storage: dict[str, str | int] | None = None
-    clients: dict[str, bool] | None = None
-    error: str | None = None
+    storage: dict[str, Any] | None = Field(default=None)
+    clients: dict[str, bool] | None = Field(default=None)
+    error: str | None = Field(default=None)
 
 
 class ConversationInStore(BaseModel):
@@ -172,12 +172,12 @@ class ResponseInputContent(BaseModel):
     """Content item for Responses API input."""
 
     type: Literal["input_text", "input_image", "input_file"]
-    text: str | None = None
-    image_url: str | None = None
-    detail: Literal["auto", "low", "high"] | None = None
-    file_url: str | None = None
-    file_data: str | None = None
-    filename: str | None = None
+    text: str | None = Field(default=None)
+    image_url: str | None = Field(default=None)
+    detail: Literal["auto", "low", "high"] | None = Field(default=None)
+    file_url: str | None = Field(default=None)
+    file_data: str | None = Field(default=None)
+    filename: str | None = Field(default=None)
     annotations: list[dict[str, Any]] = Field(default_factory=list)
 
     @model_validator(mode="before")
@@ -192,7 +192,7 @@ def normalize_output_text(cls, data: Any) -> Any:
 class ResponseInputItem(BaseModel):
     """Single input item for Responses API."""
 
-    type: Literal["message"] | None = "message"
+    type: Literal["message"] | None = Field(default="message")
     role: Literal["user", "assistant", "system", "developer"]
     content: str | list[ResponseInputContent]
 
@@ -201,15 +201,15 @@ class ResponseToolChoice(BaseModel):
     """Tool choice enforcing a specific tool in Responses API."""
 
     type: Literal["function", "image_generation"]
-    function: ToolChoiceFunctionDetail | None = None
+    function: ToolChoiceFunctionDetail | None = Field(default=None)
 
 
 class ResponseImageTool(BaseModel):
     """Image generation tool specification for Responses API."""
 
     type: Literal["image_generation"]
-    model: str | None = None
-    output_format: str | None = None
+    model: str | None = Field(default=None)
+    output_format: str | None = Field(default=None)
 
 
 class ResponseCreateRequest(BaseModel):
@@ -217,17 +217,17 @@ class ResponseCreateRequest(BaseModel):
 
     model: str
     input: str | list[ResponseInputItem]
-    instructions: str | list[ResponseInputItem] | None = None
-    temperature: float | None = 0.7
-    top_p: float | None = 1.0
-    max_output_tokens: int | None = None
-    stream: bool | None = False
-    tool_choice: str | ResponseToolChoice | None = None
-    tools: list[Tool | ResponseImageTool] | None = None
-    store: bool | None = None
-    user: str | None = None
-    response_format: dict[str, Any] | None = None
-    metadata: dict[str, Any] | None = None
+    instructions: str | list[ResponseInputItem] | None = Field(default=None)
+    temperature: float | None = Field(default=0.7)
+    top_p: float | None = Field(default=1.0)
+    max_output_tokens: int | None = Field(default=None)
+    stream: bool | None = Field(default=False)
+    tool_choice: str | ResponseToolChoice | None = Field(default=None)
+    tools: list[Tool | ResponseImageTool] | None = Field(default=None)
+    store: bool | None = Field(default=None)
+    user: str | None = Field(default=None)
+    response_format: dict[str, Any] | None = Field(default=None)
+    metadata: dict[str, Any] | None = Field(default=None)
 
 
 class ResponseUsage(BaseModel):
@@ -242,7 +242,7 @@ class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
     type: Literal["output_text"]
-    text: str | None = ""
+    text: str | None = Field(default="")
     annotations: list[dict[str, Any]] = Field(default_factory=list)
 
 
@@ -259,20 +259,22 @@ class ResponseImageGenerationCall(BaseModel):
     """Image generation call record emitted in Responses API."""
 
     id: str
-    type: Literal["image_generation_call"] = "image_generation_call"
-    status: Literal["completed", "in_progress", "generating", "failed"] = "completed"
-    result: str | None = None
-    output_format: str | None = None
-    size: str | None = None
-    revised_prompt: str | None = None
+    type: Literal["image_generation_call"] = Field(default="image_generation_call")
+    status: Literal["completed", "in_progress", "generating", "failed"] = Field(default="completed")
+    result: str | None = Field(default=None)
+    output_format: str | None = Field(default=None)
+    size: str | None = Field(default=None)
+    revised_prompt: str | None = Field(default=None)
 
 
 class ResponseToolCall(BaseModel):
     """Tool call record emitted in Responses API."""
 
     id: str
-    type: Literal["tool_call"] = "tool_call"
-    status: Literal["in_progress", "completed", "failed", "requires_action"] = "completed"
+    type: Literal["tool_call"] = Field(default="tool_call")
+    status: Literal["in_progress", "completed", "failed", "requires_action"] = Field(
+        default="completed"
+    )
     function: FunctionCall
 
 
@@ -280,7 +282,7 @@ class ResponseCreateResponse(BaseModel):
     """Responses API response payload."""
 
     id: str
-    object: Literal["response"] = "response"
+    object: Literal["response"] = Field(default="response")
     created_at: int
     model: str
     output: list[ResponseOutputMessage | ResponseImageGenerationCall | ResponseToolCall]
@@ -291,13 +293,13 @@ class ResponseCreateResponse(BaseModel):
         "incomplete",
         "cancelled",
         "requires_action",
-    ] = "completed"
-    tool_choice: str | ResponseToolChoice | None = None
-    tools: list[Tool | ResponseImageTool] | None = None
+    ] = Field(default="completed")
+    tool_choice: str | ResponseToolChoice | None = Field(default=None)
+    tools: list[Tool | ResponseImageTool] | None = Field(default=None)
     usage: ResponseUsage
-    error: dict[str, Any] | None = None
-    metadata: dict[str, Any] | None = None
-    input: str | list[ResponseInputItem] | None = None
+    error: dict[str, Any] | None = Field(default=None)
+    metadata: dict[str, Any] | None = Field(default=None)
+    input: str | list[ResponseInputItem] | None = Field(default=None)
 
 
 # Rebuild models with forward references