From e396b36bd1ce26a5354616473599be6902d0912b Mon Sep 17 00:00:00 2001
From: blublinsky <blublinsky@hotmail.com>
Date: Thu, 12 Feb 2026 12:14:08 +0000
Subject: [PATCH] Adjusting MCP header processing to llama stack 0.4.x and
 adding MCP e2e tests

---
 dev-tools/mcp-mock-server/README.md           |  13 +-
 dev-tools/mcp-mock-server/server.py           | 183 ++--
 docker-compose-library.yaml                   |   7 +-
 docker-compose.yaml                           |   7 +-
 src/app/endpoints/streaming_query.py          |  16 +
 src/app/main.py                               |  32 +-
 src/constants.py                              |   2 +
 src/utils/responses.py                        | 196 +++-
 .../library-mode/lightspeed-stack-mcp.yaml    |  50 +
 .../library-mode/lightspeed-stack.yaml        |  29 +-
 .../server-mode/lightspeed-stack-mcp.yaml     |  51 ++
 .../server-mode/lightspeed-stack.yaml         |  29 +-
 tests/e2e/features/environment.py             |  16 +
 tests/e2e/features/mcp_tools.feature          | 184 ++++
 tests/e2e/features/steps/mcp.py               | 858 ++++++++++++++++++
 tests/e2e/test_list.txt                       |   1 +
 .../app/endpoints/test_streaming_query.py     |   5 +
 tests/unit/utils/test_responses.py            |  12 +-
 18 files changed, 1602 insertions(+), 89 deletions(-)
 create mode 100644 tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml
 create mode 100644 tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml
 create mode 100644 tests/e2e/features/mcp_tools.feature
 create mode 100644 tests/e2e/features/steps/mcp.py

diff --git a/dev-tools/mcp-mock-server/README.md b/dev-tools/mcp-mock-server/README.md
index 4d112a037..69566474e 100644
--- a/dev-tools/mcp-mock-server/README.md
+++ b/dev-tools/mcp-mock-server/README.md
@@ -19,9 +19,10 @@ This mock server helps developers:
 - ✅ **HTTP & HTTPS** - Runs both protocols simultaneously for comprehensive testing
 - ✅ **Header Capture** - Captures and displays all request headers
 - ✅ **Debug Endpoints** - Inspect captured headers and request history
-- ✅ **MCP Protocol** - Implements basic MCP endpoints for testing
+- ✅ **MCP Protocol** - Implements MCP endpoints (initialize, tools/list, tools/call)
 - ✅ **Request Logging** - Tracks recent requests with timestamps
 - ✅ **Self-Signed Certs** - Auto-generates certificates for HTTPS testing
+- ✅ **Tool Execution** - Returns mock results for tool/call testing
 
 ## Quick Start
 
@@ -46,8 +47,11 @@ HTTPS: https://localhost:3001
 Debug endpoints:
   • /debug/headers  - View captured headers
   • /debug/requests - View request log
-MCP endpoint:
-  • POST /mcp/v1/list_tools
+MCP endpoints:
+  • POST with JSON-RPC (any path)
+    - method: "initialize"
+    - method: "tools/list"
+    - method: "tools/call"
 ======================================================================
 Note: HTTPS uses a self-signed certificate (for testing only)
 ```
@@ -270,8 +274,9 @@ python dev-tools/mcp-mock-server/server.py 8080
 This is a **development/testing tool only**:
 - ❌ Not for production use
 - ❌ No authentication/security
-- ❌ Limited MCP protocol implementation
+- ❌ Limited MCP protocol implementation (initialize, tools/list, tools/call only)
 - ❌ Single-threaded (one request at a time)
+- ❌ Mock responses only (not real tool execution)
 
 For production, use real MCP servers.
 
diff --git a/dev-tools/mcp-mock-server/server.py b/dev-tools/mcp-mock-server/server.py
index b7e17fffb..7f14f556b 100644
--- a/dev-tools/mcp-mock-server/server.py
+++ b/dev-tools/mcp-mock-server/server.py
@@ -60,7 +60,11 @@ def _capture_headers(self) -> None:
         if len(request_log) > 10:
             request_log.pop(0)
 
-    def do_POST(self) -> None:  # pylint: disable=invalid-name
+    def do_POST(
+        self,
+    ) -> (
+        None
+    ):  # pylint: disable=invalid-name,too-many-locals,too-many-branches,too-many-statements
         """Handle POST requests (MCP protocol endpoints)."""
         self._capture_headers()
 
@@ -73,14 +77,24 @@ def do_POST(self) -> None:  # pylint: disable=invalid-name
             request_id = request_data.get("id", 1)
             method = request_data.get("method", "unknown")
         except (json.JSONDecodeError, UnicodeDecodeError):
+            request_data = {}
             request_id = 1
             method = "unknown"
 
+        # Log the RPC method in the request log
+        if request_log:
+            request_log[-1]["rpc_method"] = method
+
         # Determine tool name based on authorization header to avoid collisions
         auth_header = self.headers.get("Authorization", "")
 
+        # Initialize tool info defaults
+        tool_name = "mock_tool_no_auth"
+        tool_desc = "Mock tool with no authorization"
+        error_mode = False
+
         # Match based on token content
-        match auth_header:
+        match True:
             case _ if "test-secret-token" in auth_header:
                 tool_name = "mock_tool_file"
                 tool_desc = "Mock tool with file-based auth"
@@ -90,58 +104,116 @@ def do_POST(self) -> None:  # pylint: disable=invalid-name
             case _ if "my-client-token" in auth_header:
                 tool_name = "mock_tool_client"
                 tool_desc = "Mock tool with client-provided token"
+            case _ if "error-mode" in auth_header:
+                tool_name = "mock_tool_error"
+                tool_desc = "Mock tool configured to return errors"
+                error_mode = True
             case _:
-                # No auth header or unrecognized token
-                tool_name = "mock_tool_no_auth"
-                tool_desc = "Mock tool with no authorization"
-
-        # Handle MCP protocol methods
-        if method == "initialize":
-            # Return MCP initialize response
-            response = {
-                "jsonrpc": "2.0",
-                "id": request_id,
-                "result": {
-                    "protocolVersion": "2024-11-05",
-                    "capabilities": {
-                        "tools": {},
-                    },
-                    "serverInfo": {
-                        "name": "mock-mcp-server",
-                        "version": "1.0.0",
+                # Default case already set above
+                pass
+
+        # Log the tool name in the request log
+        if request_log:
+            request_log[-1]["tool_name"] = tool_name
+
+        # Handle MCP protocol methods using match statement
+        response: dict = {}
+        match method:
+            case "initialize":
+                # Return MCP initialize response
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": request_id,
+                    "result": {
+                        "protocolVersion": "2024-11-05",
+                        "capabilities": {
+                            "tools": {},
+                        },
+                        "serverInfo": {
+                            "name": "mock-mcp-server",
+                            "version": "1.0.0",
+                        },
                     },
-                },
-            }
-        elif method == "tools/list":
-            # Return list of tools with unique name
-            response = {
-                "jsonrpc": "2.0",
-                "id": request_id,
-                "result": {
-                    "tools": [
-                        {
-                            "name": tool_name,
-                            "description": tool_desc,
-                            "inputSchema": {
-                                "type": "object",
-                                "properties": {
-                                    "message": {
-                                        "type": "string",
-                                        "description": "Test message",
-                                    }
+                }
+
+            case "tools/list":
+                # Return list of tools with unique name
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": request_id,
+                    "result": {
+                        "tools": [
+                            {
+                                "name": tool_name,
+                                "description": tool_desc,
+                                "inputSchema": {
+                                    "type": "object",
+                                    "properties": {
+                                        "message": {
+                                            "type": "string",
+                                            "description": "Test message",
+                                        }
+                                    },
                                 },
-                            },
-                        }
-                    ]
-                },
-            }
-        else:
-            # Generic success response for other methods
-            response = {
-                "jsonrpc": "2.0",
-                "id": request_id,
-                "result": {"status": "ok"},
-            }
+                            }
+                        ]
+                    },
+                }
+
+            case "tools/call":
+                # Handle tool execution
+                params = request_data.get("params", {})
+                tool_called = params.get("name", "unknown")
+                arguments = params.get("arguments", {})
+
+                # Check if error mode is enabled
+                if error_mode:
+                    # Return error response
+                    response = {
+                        "jsonrpc": "2.0",
+                        "id": request_id,
+                        "result": {
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": (
+                                        f"Error: Tool '{tool_called}' "
+                                        "execution failed - simulated error."
+                                    ),
+                                }
+                            ],
+                            "isError": True,
+                        },
+                    }
+                else:
+                    # Build result text
+                    result_text = (
+                        f"Mock tool '{tool_called}' executed successfully "
+                        f"with arguments: {arguments}."
+                    )
+
+                    # Return successful tool execution result
+                    response = {
+                        "jsonrpc": "2.0",
+                        "id": request_id,
+                        "result": {
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": result_text,
+                                }
+                            ],
+                            "isError": False,
+                        },
+                    }
+
+            case _:
+                # Generic success response for other methods
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": request_id,
+                    "result": {"status": "ok"},
+                }
 
         self.send_response(200)
         self.send_header("Content-Type", "application/json")
@@ -160,6 +232,11 @@ def do_GET(self) -> None:  # pylint: disable=invalid-name
                 )
             case "/debug/requests":
                 self._send_json_response(request_log)
+            case "/debug/clear":
+                # Clear the request log and last captured headers
+                request_log.clear()
+                last_headers.clear()
+                self._send_json_response({"status": "cleared", "request_count": 0})
             case "/":
                 self._send_help_page()
             case _:
@@ -273,10 +350,10 @@ def main() -> None:
     https_port = http_port + 1
 
     # Create HTTP server
-    http_server = HTTPServer(("", http_port), MCPMockHandler)
+    http_server = HTTPServer(("", http_port), MCPMockHandler)  # type: ignore[arg-type]
 
     # Create HTTPS server with self-signed certificate
-    https_server = HTTPServer(("", https_port), MCPMockHandler)
+    https_server = HTTPServer(("", https_port), MCPMockHandler)  # type: ignore[arg-type]
 
     # Generate or load self-signed certificate
     script_dir = Path(__file__).parent
diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml
index 9c934b89a..9a8507874 100644
--- a/docker-compose-library.yaml
+++ b/docker-compose-library.yaml
@@ -6,7 +6,7 @@ services:
       dockerfile: dev-tools/mcp-mock-server/Dockerfile
     container_name: mcp-mock-server
     ports:
-      - "3000:3000"
+      - "9000:3000"
     networks:
       - lightspeednet
     healthcheck:
@@ -66,6 +66,11 @@ services:
       - WATSONX_API_KEY=${WATSONX_API_KEY:-}
       # Enable debug logging if needed
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
+    entrypoint: >
+      /bin/bash -c "
+      printf %s 'test-secret-token-123' > /tmp/lightspeed-mcp-test-token &&
+      /app-root/.venv/bin/python3.12 /app-root/src/lightspeed_stack.py
+      "
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8080/liveness"]
       interval: 10s   # how often to run the check
diff --git a/docker-compose.yaml b/docker-compose.yaml
index b1e3f819c..1aa2ed640 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -6,7 +6,7 @@ services:
       dockerfile: dev-tools/mcp-mock-server/Dockerfile
     container_name: mcp-mock-server
     ports:
-      - "3000:3000"
+      - "9000:3000"
     networks:
       - lightspeednet
     healthcheck:
@@ -84,6 +84,11 @@ services:
       - TENANT_ID=${TENANT_ID:-}
       - CLIENT_ID=${CLIENT_ID:-}
       - CLIENT_SECRET=${CLIENT_SECRET:-}
+    entrypoint: >
+      /bin/bash -c "
+      printf %s 'test-secret-token-123' > /tmp/lightspeed-mcp-test-token &&
+      /app-root/.venv/bin/python3.12 /app-root/src/lightspeed_stack.py
+      "
     depends_on:
         llama-stack:
           condition: service_healthy
diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
index 00cbe132b..772de8c18 100644
--- a/src/app/endpoints/streaming_query.py
+++ b/src/app/endpoints/streaming_query.py
@@ -263,6 +263,22 @@ async def retrieve_response_generator(
                 turn_summary,
             )
         # Retrieve response stream (may raise exceptions)
+        # Log request details before calling Llama Stack (MCP debugging)
+        if responses_params.tools is not None and len(responses_params.tools) > 0:
+            # Filter MCP tools once for efficiency
+            mcp_tools = [t for t in responses_params.tools if t.get("type") == "mcp"]
+            if len(mcp_tools) > 0:
+                logger.debug(
+                    "Calling Llama Stack Responses API (streaming) with %d MCP tool(s)",
+                    len(mcp_tools),
+                )
+                # Log MCP server endpoints that may be called
+                logger.debug("MCP server endpoints that may be called:")
+                for tool in mcp_tools:
+                    logger.debug(
+                        "  - %s: %s", tool.get("server_label"), tool.get("server_url")
+                    )
+
         response = await context.client.responses.create(
             **responses_params.model_dump()
         )
diff --git a/src/app/main.py b/src/app/main.py
index e4ee83905..dc214702d 100644
--- a/src/app/main.py
+++ b/src/app/main.py
@@ -20,7 +20,8 @@
 from log import get_logger
 from a2a_storage import A2AStorageFactory
 from models.responses import InternalServerErrorResponse
-from utils.common import register_mcp_servers_async
+
+# from utils.common import register_mcp_servers_async  # Not needed for Responses API
 from utils.llama_stack_version import check_llama_stack_version
 
 logger = get_logger(__name__)
@@ -69,9 +70,32 @@ async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
         )
         raise
 
-    logger.info("Registering MCP servers")
-    await register_mcp_servers_async(logger, configuration.configuration)
-    get_logger("app.endpoints.handlers")
+    # Log MCP server configuration
+    mcp_servers = configuration.configuration.mcp_servers
+    if mcp_servers:
+        logger.info("Loaded %d MCP server(s) from configuration:", len(mcp_servers))
+        for server in mcp_servers:
+            has_auth = bool(server.authorization_headers)
+            logger.info(
+                "  - %s at %s (auth: %s)",
+                server.name,
+                server.url,
+                "yes" if has_auth else "no",
+            )
+            # Debug: Show auth header names if configured
+            if has_auth:
+                logger.debug(
+                    "    Auth headers: %s",
+                    ", ".join(server.authorization_headers.keys()),
+                )
+    else:
+        logger.info("No MCP servers configured")
+
+    # NOTE: MCP server registration not needed for Responses API
+    # The Responses API takes inline tool definitions instead of pre-registered toolgroups
+    # logger.info("Registering MCP servers")
+    # await register_mcp_servers_async(logger, configuration.configuration)
+    # get_logger("app.endpoints.handlers")
     logger.info("App startup complete")
 
     initialize_database()
diff --git a/src/constants.py b/src/constants.py
index 1b7b6beef..2fe3172c2 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -125,6 +125,8 @@
 # MCP authorization header special values
 MCP_AUTH_KUBERNETES = "kubernetes"
 MCP_AUTH_CLIENT = "client"
+# MCP authorization header name (special handling for llama_stack 0.4.x+)
+MCP_AUTHORIZATION_HEADER = "authorization"
 
 # default RAG tool value
 DEFAULT_RAG_TOOL = "knowledge_search"
diff --git a/src/utils/responses.py b/src/utils/responses.py
index 41f5b2e74..2138b94f2 100644
--- a/src/utils/responses.py
+++ b/src/utils/responses.py
@@ -136,7 +136,7 @@ async def get_topic_summary(  # pylint: disable=too-many-nested-blocks
     return summary_text.strip() if summary_text else ""
 
 
-async def prepare_tools(
+async def prepare_tools(  # pylint: disable=too-many-locals,too-many-branches
     client: AsyncLlamaStackClient,
     query_request: QueryRequest,
     token: str,
@@ -156,6 +156,7 @@ async def prepare_tools(
         List of tool configurations, or None if no_tools is True or no tools available
     """
     if query_request.no_tools:
+        logger.debug("Tools disabled for this request (no_tools=True)")
         return None
 
     toolgroups = []
@@ -185,11 +186,54 @@ async def prepare_tools(
     mcp_tools = get_mcp_tools(config.mcp_servers, token, mcp_headers)
     if mcp_tools:
         toolgroups.extend(mcp_tools)
+        mcp_server_names = [tool.get("server_label", "unknown") for tool in mcp_tools]
         logger.debug(
-            "Configured %d MCP tools: %s",
+            "Prepared %d MCP tool(s) for request: %s",
             len(mcp_tools),
-            [tool.get("server_label", "unknown") for tool in mcp_tools],
+            ", ".join(mcp_server_names),
         )
+        # Debug: Show full tool definitions
+        for tool in mcp_tools:
+            logger.debug(
+                "  MCP tool: %s at %s (auth: %s, headers: %d)",
+                tool.get("server_label"),
+                tool.get("server_url"),
+                "yes" if "authorization" in tool else "no",
+                len(tool.get("headers", {})),
+            )
+    else:
+        if config.mcp_servers:
+            logger.warning(
+                "No MCP tools prepared (all %d configured servers were skipped)",
+                len(config.mcp_servers),
+            )
+        else:
+            logger.debug("No MCP servers configured")
+
+    # Log tool summary
+    if toolgroups:
+        rag_tool_count = sum(1 for t in toolgroups if t.get("type") == "file_search")
+        mcp_tool_count = sum(1 for t in toolgroups if t.get("type") == "mcp")
+        logger.debug(
+            "Prepared %d total tool(s) for Responses API: %d RAG + %d MCP",
+            len(toolgroups),
+            rag_tool_count,
+            mcp_tool_count,
+        )
+        # Log MCP server endpoints that may be called
+        if mcp_tool_count > 0:
+            mcp_servers = [
+                (t.get("server_label"), t.get("server_url"))
+                for t in toolgroups
+                if t.get("type") == "mcp"
+            ]
+            if mcp_servers:
+                logger.debug("MCP server endpoints that may be called:")
+                for server_name, server_url in mcp_servers:
+                    logger.debug("  - %s: %s", server_name, server_url)
+    else:
+        logger.debug("No tools available for this request")
+
     # Convert empty list to None for consistency with existing behavior
     if not toolgroups:
         return None
@@ -312,7 +356,7 @@ def get_rag_tools(vector_store_ids: list[str]) -> Optional[list[dict[str, Any]]]
     ]
 
 
-def get_mcp_tools(
+def get_mcp_tools(  # pylint: disable=too-many-locals,too-many-branches
     mcp_servers: list[ModelContextProtocolServer],
     token: str | None = None,
     mcp_headers: dict[str, dict[str, str]] | None = None,
@@ -326,6 +370,9 @@ def get_mcp_tools(
 
     Returns:
         List of MCP tool definitions with server details and optional auth headers
+
+    Note: Starting with llama_stack 0.4.x, the Authorization header must be passed
+    via the 'authorization' parameter instead of in the 'headers' dict.
     """
 
     def _get_token_value(original: str, header: str) -> str | None:
@@ -340,10 +387,10 @@ def _get_token_value(original: str, header: str) -> str | None:
                 # use client provided token
                 if mcp_headers is None:
                     return None
-                c_headers = mcp_headers.get(mcp_server.name, None)
-                if c_headers is None:
+                client_headers = mcp_headers.get(mcp_server.name, None)
+                if client_headers is None:
                     return None
-                return c_headers.get(header, None)
+                return client_headers.get(header, None)
             case _:
                 # use provided
                 return original
@@ -358,30 +405,101 @@ def _get_token_value(original: str, header: str) -> str | None:
             "require_approval": "never",
         }
 
-        # Build headers
+        # Log header resolution process
+        if mcp_server.authorization_headers:
+            logger.debug(
+                "MCP server '%s': Resolving %d authorization header(s)",
+                mcp_server.name,
+                len(mcp_server.authorization_headers),
+            )
+
+        # Build headers and separate Authorization header
         headers = {}
+        authorization = None
         for name, value in mcp_server.resolved_authorization_headers.items():
             # for each defined header
-            h_value = _get_token_value(value, name)
+            resolved_value = _get_token_value(value, name)
             # only add the header if we got value
-            if h_value is not None:
-                headers[name] = h_value
+            if resolved_value is not None:
+                # Determine auth type for logging
+                match value:
+                    case constants.MCP_AUTH_KUBERNETES:
+                        auth_type = "kubernetes"
+                    case constants.MCP_AUTH_CLIENT:
+                        auth_type = "client"
+                    case _:
+                        auth_type = "static"
+
+                # Log successful resolution
+                logger.debug(
+                    "MCP server '%s': Header '%s' -> type: %s (resolved)",
+                    mcp_server.name,
+                    name,
+                    auth_type,
+                )
+                # Special handling for Authorization header (llama_stack 0.4.x+)
+                if name.lower() == constants.MCP_AUTHORIZATION_HEADER:
+                    authorization = resolved_value
+                else:
+                    headers[name] = resolved_value
+            else:
+                # Log failed resolution
+                logger.debug(
+                    "MCP server '%s': Header '%s' -> FAILED to resolve",
+                    mcp_server.name,
+                    name,
+                )
 
         # Skip server if auth headers were configured but not all could be resolved
-        if mcp_server.authorization_headers and len(headers) != len(
+        resolved_count = len(headers) + (1 if authorization is not None else 0)
+        if mcp_server.authorization_headers and resolved_count != len(
             mcp_server.authorization_headers
         ):
+            required_headers = list(mcp_server.authorization_headers.keys())
+            resolved_headers = list(headers.keys())
+            if authorization is not None:
+                # Find the actual Authorization header name (case-insensitive match)
+                auth_header_name = next(
+                    (
+                        h
+                        for h in required_headers
+                        if h.lower() == constants.MCP_AUTHORIZATION_HEADER
+                    ),
+                    constants.MCP_AUTHORIZATION_HEADER.capitalize(),
+                )
+                resolved_headers.append(auth_header_name)
+            missing_headers = [h for h in required_headers if h not in resolved_headers]
+
             logger.warning(
-                "Skipping MCP server %s: required %d auth headers but only resolved %d",
+                "Skipping MCP server '%s': required %d auth headers but only resolved %d",
                 mcp_server.name,
                 len(mcp_server.authorization_headers),
-                len(headers),
+                resolved_count,
+            )
+            logger.warning(
+                "  Required: %s | Resolved: %s | Missing: %s",
+                ", ".join(required_headers),
+                ", ".join(resolved_headers) if resolved_headers else "none",
+                ", ".join(missing_headers) if missing_headers else "none",
             )
             continue
 
+        # Add authorization parameter if present (llama_stack 0.4.x+)
+        if authorization is not None:
+            tool_def["authorization"] = authorization  # type: ignore[index]
+
+        # Add other headers if present
         if len(headers) > 0:
-            # add headers to tool definition
             tool_def["headers"] = headers  # type: ignore[index]
+
+        # Log successful tool creation
+        logger.debug(
+            "MCP server '%s': Tool definition created (authorization: %s, additional headers: %d)",
+            mcp_server.name,
+            "SET" if authorization is not None else "NOT SET",
+            len(headers),
+        )
+
         # collect tools info
         tools.append(tool_def)
     return tools
@@ -521,7 +639,7 @@ def extract_token_usage(
     return token_counter
 
 
-def build_tool_call_summary(  # pylint: disable=too-many-return-statements,too-many-branches
+def build_tool_call_summary(  # pylint: disable=too-many-return-statements,too-many-branches,too-many-locals
     output_item: OpenAIResponseOutput,
     rag_chunks: list[RAGChunk],
 ) -> tuple[Optional[ToolCallSummary], Optional[ToolResultSummary]]:
@@ -599,6 +717,31 @@ def build_tool_call_summary(  # pylint: disable=too-many-return-statements,too-m
             else (mcp_call_item.output if mcp_call_item.output else "")
         )
 
+        # Log MCP tool call
+        logger.debug(
+            "MCP tool call: %s on server '%s' (call_id: %s)",
+            mcp_call_item.name,
+            mcp_call_item.server_label,
+            mcp_call_item.id,
+        )
+        logger.debug("  Arguments: %s", args)
+
+        # Log MCP tool result
+        if mcp_call_item.error:
+            logger.warning(
+                "MCP tool result: %s FAILED - %s",
+                mcp_call_item.name,
+                mcp_call_item.error,
+            )
+        else:
+            output_preview = content[:100] + "..." if len(content) > 100 else content
+            logger.debug(
+                "MCP tool result: %s SUCCESS (output length: %d)",
+                mcp_call_item.name,
+                len(content),
+            )
+            logger.debug("  Output preview: %s", output_preview)
+
         return ToolCallSummary(
             id=mcp_call_item.id,
             name=mcp_call_item.name,
@@ -622,6 +765,18 @@ def build_tool_call_summary(  # pylint: disable=too-many-return-statements,too-m
             }
             for tool in mcp_list_tools_item.tools
         ]
+
+        # Log MCP list_tools call
+        logger.debug(
+            "MCP server '%s' listed %d available tool(s)",
+            mcp_list_tools_item.server_label,
+            len(mcp_list_tools_item.tools),
+        )
+        logger.debug(
+            "  Tools: %s",
+            ", ".join(tool.name for tool in mcp_list_tools_item.tools),
+        )
+
         content_dict = {
             "server_label": mcp_list_tools_item.server_label,
             "tools": tools_info,
@@ -645,6 +800,15 @@ def build_tool_call_summary(  # pylint: disable=too-many-return-statements,too-m
     if item_type == "mcp_approval_request":
         approval_request_item = cast(MCPApprovalRequest, output_item)
         args = parse_arguments_string(approval_request_item.arguments)
+
+        # Log MCP approval request
+        logger.debug(
+            "MCP approval requested: tool '%s' on server '%s'",
+            approval_request_item.name,
+            approval_request_item.server_label,
+        )
+        logger.debug("  Arguments: %s", args)
+
         return (
             ToolCallSummary(
                 id=approval_request_item.id,
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml
new file mode 100644
index 000000000..790efbf15
--- /dev/null
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml
@@ -0,0 +1,50 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Library mode - embeds llama-stack as library
+  use_as_library_client: true
+  library_client_config_path: run.yaml
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop-with-token"
+mcp_servers:
+  # Test 1: Static file-based authentication
+  - name: "mock-file-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "/tmp/lightspeed-mcp-test-token"
+  # Test 2: Kubernetes token forwarding
+  - name: "mock-k8s-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "kubernetes"
+  # Test 3: Client-provided token
+  - name: "mock-client-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "client"
+  # Legacy: Mock server with client-provided auth - should appear in mcp-auth/client-options response
+  - name: "github-api"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "client"
+  # Legacy: Mock server with client-provided auth (different header) - should appear in response
+  - name: "gitlab-api"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      X-API-Token: "client"
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml
index 118b917c5..178239db3 100644
--- a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml
@@ -17,20 +17,43 @@ user_data_collection:
   transcripts_storage: "/tmp/data/transcripts"
 authentication:
   module: "noop"
+# Conversation cache for storing Q&A history
+conversation_cache:
+  type: "sqlite"
+  sqlite:
+    db_path: "/tmp/data/conversation-cache.db"
 mcp_servers:
-  # Mock server with client-provided auth - should appear in mcp-auth/client-options response
+  # Test 1: Static file-based authentication
+  - name: "mock-file-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "/tmp/lightspeed-mcp-test-token"
+  # Test 2: Kubernetes token forwarding
+  - name: "mock-k8s-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "kubernetes"
+  # Test 3: Client-provided token
+  - name: "mock-client-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "client"
+  # Legacy: Mock server with client-provided auth - should appear in mcp-auth/client-options response
   - name: "github-api"
     provider_id: "model-context-protocol"
     url: "http://mcp-mock-server:3000"
     authorization_headers:
       Authorization: "client"
-  # Mock server with client-provided auth (different header) - should appear in response
+  # Legacy: Mock server with client-provided auth (different header) - should appear in response
   - name: "gitlab-api"
     provider_id: "model-context-protocol"
     url: "http://mcp-mock-server:3000"
     authorization_headers:
       X-API-Token: "client"
-  # Mock server with no auth - should NOT appear in response
+  # Legacy: Mock server with no auth - should NOT appear in response
   - name: "public-api"
     provider_id: "model-context-protocol"
     url: "http://mcp-mock-server:3000"
\ No newline at end of file
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml
new file mode 100644
index 000000000..5f88a4e66
--- /dev/null
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml
@@ -0,0 +1,51 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Server mode - connects to separate llama-stack service
+  use_as_library_client: false
+  url: http://llama-stack:8321
+  api_key: xyzzy
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop-with-token"
+mcp_servers:
+  # Test 1: Static file-based authentication
+  - name: "mock-file-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "/tmp/lightspeed-mcp-test-token"
+  # Test 2: Kubernetes token forwarding
+  - name: "mock-k8s-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "kubernetes"
+  # Test 3: Client-provided token
+  - name: "mock-client-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "client"
+  # Legacy: Mock server with client-provided auth - should appear in mcp-auth/client-options response
+  - name: "github-api"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "client"
+  # Legacy: Mock server with client-provided auth (different header) - should appear in response
+  - name: "gitlab-api"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      X-API-Token: "client"
\ No newline at end of file
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml
index 1dbef61cf..73c8ea0e0 100644
--- a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml
@@ -18,20 +18,43 @@ user_data_collection:
   transcripts_storage: "/tmp/data/transcripts"
 authentication:
   module: "noop"
+# Conversation cache for storing Q&A history
+conversation_cache:
+  type: "sqlite"
+  sqlite:
+    db_path: "/tmp/data/conversation-cache.db"
 mcp_servers:
-  # Mock server with client-provided auth - should appear in mcp-auth/client-options response
+  # Test 1: Static file-based authentication
+  - name: "mock-file-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "/tmp/lightspeed-mcp-test-token"
+  # Test 2: Kubernetes token forwarding
+  - name: "mock-k8s-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "kubernetes"
+  # Test 3: Client-provided token
+  - name: "mock-client-auth"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "client"
+  # Legacy: Mock server with client-provided auth - should appear in mcp-auth/client-options response
   - name: "github-api"
     provider_id: "model-context-protocol"
     url: "http://mcp-mock-server:3000"
     authorization_headers:
       Authorization: "client"
-  # Mock server with client-provided auth (different header) - should appear in response
+  # Legacy: Mock server with client-provided auth (different header) - should appear in response
   - name: "gitlab-api"
     provider_id: "model-context-protocol"
     url: "http://mcp-mock-server:3000"
     authorization_headers:
       X-API-Token: "client"
-  # Mock server with no auth - should NOT appear in response
+  # Legacy: Mock server with no auth - should NOT appear in response
   - name: "public-api"
     provider_id: "model-context-protocol"
     url: "http://mcp-mock-server:3000"
\ No newline at end of file
diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py
index 3df842f66..64f1d09bb 100644
--- a/tests/e2e/features/environment.py
+++ b/tests/e2e/features/environment.py
@@ -260,6 +260,17 @@ def before_feature(context: Context, feature: Feature) -> None:
         switch_config(context.feature_config)
         restart_container("lightspeed-stack")
 
+    if "MCP" in feature.tags:
+        # For MCP tests, we need noop-with-token auth to support k8s token forwarding
+        # Use mode-specific configs (server vs library)
+        mode_dir = "library-mode" if context.is_library_mode else "server-mode"
+        context.feature_config = (
+            f"tests/e2e/configuration/{mode_dir}/lightspeed-stack-mcp.yaml"
+        )
+        context.default_config_backup = create_config_backup("lightspeed-stack.yaml")
+        switch_config(context.feature_config)
+        restart_container("lightspeed-stack")
+
     if "Feedback" in feature.tags:
         context.hostname = os.getenv("E2E_LSC_HOSTNAME", "localhost")
         context.port = os.getenv("E2E_LSC_PORT", "8080")
@@ -287,6 +298,11 @@ def after_feature(context: Context, feature: Feature) -> None:
         restart_container("lightspeed-stack")
         remove_config_backup(context.default_config_backup)
 
+    if "MCP" in feature.tags:
+        switch_config(context.default_config_backup)
+        restart_container("lightspeed-stack")
+        remove_config_backup(context.default_config_backup)
+
     if "Feedback" in feature.tags:
         for conversation_id in context.feedback_conversations:
             url = f"http://{context.hostname}:{context.port}/v1/conversations/{conversation_id}"
diff --git a/tests/e2e/features/mcp_tools.feature b/tests/e2e/features/mcp_tools.feature
new file mode 100644
index 000000000..ebae7f4c3
--- /dev/null
+++ b/tests/e2e/features/mcp_tools.feature
@@ -0,0 +1,184 @@
+@MCP
+Feature: MCP Server Integration
+
+  Background:
+    Given The service is started locally
+      And REST API service prefix is /v1
+
+  # ============================================================================
+  # Basic Operations - Discovery and Configuration
+  # ============================================================================
+
+  Scenario: MCP client auth options endpoint returns configured servers
+    Given The system is in default state
+      And I set the Authorization header to Bearer test-token
+     When I access REST API endpoint "mcp-auth/client-options" using HTTP GET method
+     Then The status code of the response is 200
+      And The body of the response has proper client auth options structure
+      And The response contains server "mock-client-auth" with client auth header "Authorization"
+
+  # ============================================================================
+  # Authentication Methods
+  # ============================================================================
+
+  Scenario: MCP mock server receives file-based static token
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And The MCP mock server request log is cleared
+     When I send a query that uses MCP tools
+      And I wait for MCP server to receive requests
+     Then The MCP mock server should have received requests
+      And The MCP mock server should have captured Authorization header "Bearer test-secret-token-123" from file-auth server
+
+  Scenario: MCP mock server receives kubernetes token from request
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And The MCP mock server request log is cleared
+     When I send a query that uses MCP tools
+      And I wait for MCP server to receive requests
+     Then The MCP mock server should have received requests
+      And The MCP mock server should have captured Authorization header containing "my-k8s-token" from k8s-auth server
+
+  Scenario: MCP mock server receives client-provided token via MCP-HEADERS
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And I set the MCP-HEADERS header with client token for "mock-client-auth"
+      And The MCP mock server request log is cleared
+     When I send a query that uses MCP tools
+      And I wait for MCP server to receive requests
+     Then The MCP mock server should have received requests
+      And The MCP mock server request log should contain exactly tools mock_tool_file, mock_tool_k8s, mock_tool_client
+
+  Scenario: MCP server with client auth is skipped when MCP-HEADERS is missing
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And The MCP mock server request log is cleared
+     When I send a query that uses MCP tools
+      And I wait for MCP server to receive requests
+     Then The MCP mock server request log should contain exactly tools mock_tool_file, mock_tool_k8s
+
+  Scenario: All three MCP auth types work in a single request
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And I set the MCP-HEADERS header with client token for "mock-client-auth"
+      And The MCP mock server request log is cleared
+     When I send a query that uses MCP tools
+      And I wait for MCP server to receive requests
+     Then The MCP mock server should have received at least 6 requests
+      And The MCP mock server request log should contain tool "mock_tool_file"
+      And The MCP mock server request log should contain tool "mock_tool_k8s"
+      And The MCP mock server request log should contain tool "mock_tool_client"
+
+  # ============================================================================
+  # Tool Execution
+  # ============================================================================
+
+  Scenario: LLM successfully discovers and lists MCP tools
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And The MCP mock server request log is cleared
+     When I send a query asking about available tools
+      And I wait for MCP server to receive requests
+     Then The MCP mock server should have received requests
+      And The MCP mock server should have received tools/list method calls
+
+  Scenario: LLM calls an MCP tool and receives results
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And The MCP mock server request log is cleared
+     When I send a query that explicitly requests tool usage
+      And I wait for MCP server to process tool calls
+     Then The MCP mock server should have received tools/call method
+      And The response should contain MCP tool execution results
+      And The response should indicate successful tool execution
+
+  Scenario: MCP tool execution appears in query response
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And I set the MCP-HEADERS header with client token for "mock-client-auth"
+     When I send a query that triggers MCP tool usage
+     Then The status code of the response is 200
+      And The response should contain tool call information
+      And The tool execution results should be included in the response
+
+  Scenario: Failed MCP tool execution is handled gracefully
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And The MCP mock server is configured to return errors
+     When I send a query that uses MCP tools
+     Then The status code of the response is 200
+      And The response should indicate tool execution failed
+      And The MCP mock server should confirm error mode is active
+
+  # Note: This scenario can be slow in CI (up to 120s) due to multiple LLM+tool roundtrips
+  # Completes in ~8s locally but timing is highly variable in containerized CI environments
+  Scenario: Multiple MCP tools can be called in sequence
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And I set the MCP-HEADERS header with client token for "mock-client-auth"
+      And The MCP mock server request log is cleared
+     When I send a query that requires multiple tool calls
+      And I wait for MCP server to process tool calls
+     Then The MCP mock server should have received multiple tools/call methods
+      And All tool calls should have succeeded
+      And The response should contain results from all tool calls
+
+  Scenario: Streaming query discovers and uses MCP tools
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And The MCP mock server request log is cleared
+     When I send a streaming query that uses MCP tools
+      And I wait for MCP server to process tool calls
+     Then The MCP mock server should have received requests
+      And The MCP mock server should have received tools/call method
+      And The streaming response should be successful
+
+  # Note: This scenario can be slow in CI (up to 120s) due to multiple LLM+tool roundtrips
+  # Completes in ~8s locally but timing is highly variable in containerized CI environments
+  Scenario: Streaming query with multiple MCP tools
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And I set the MCP-HEADERS header with client token for "mock-client-auth"
+      And The MCP mock server request log is cleared
+     When I send a streaming query requiring multiple tools
+      And I wait for MCP server to process tool calls
+     Then The MCP mock server should have received multiple tools/call methods
+      And The streaming response should contain tool execution results
+
+  Scenario: Failed MCP tool execution in streaming query is handled gracefully
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And The MCP mock server is configured to return errors
+     When I send a streaming query that uses MCP tools
+     Then The streaming response should be successful
+      And The MCP mock server should confirm error mode is active
+
+  Scenario: Streaming query receives file-based static token
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And The MCP mock server request log is cleared
+     When I send a streaming query that uses MCP tools
+      And I wait for MCP server to receive requests
+     Then The MCP mock server should have received requests
+      And The MCP mock server should have captured Authorization header "Bearer test-secret-token-123" from file-auth server
+      And The streaming response should be successful
+
+  Scenario: Streaming query receives client-provided token via MCP-HEADERS
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And I set the MCP-HEADERS header with client token for "mock-client-auth"
+      And The MCP mock server request log is cleared
+     When I send a streaming query that uses MCP tools
+      And I wait for MCP server to receive requests
+     Then The MCP mock server should have received requests
+      And The MCP mock server request log should contain exactly tools mock_tool_file, mock_tool_k8s, mock_tool_client
+      And The streaming response should be successful
+
+  Scenario: Streaming query skips MCP server with client auth when MCP-HEADERS is missing
+    Given The system is in default state
+      And I set the Authorization header to Bearer my-k8s-token
+      And The MCP mock server request log is cleared
+     When I send a streaming query that uses MCP tools
+      And I wait for MCP server to receive requests
+     Then The MCP mock server request log should contain exactly tools mock_tool_file, mock_tool_k8s
+      And The streaming response should be successful
diff --git a/tests/e2e/features/steps/mcp.py b/tests/e2e/features/steps/mcp.py
new file mode 100644
index 000000000..8ea308bcc
--- /dev/null
+++ b/tests/e2e/features/steps/mcp.py
@@ -0,0 +1,858 @@
+"""Implementation of MCP-specific test steps."""
+
+import json
+import time
+
+import requests
+from behave import given, then, when  # pyright: ignore[reportAttributeAccessIssue]
+from behave.runner import Context
+
+# Mock MCP server configuration
+MOCK_MCP_SERVER_URL = "http://localhost:9000"
+
+
+@given('I set the MCP-HEADERS header with client token for "{server_name}"')
+def set_mcp_headers_with_client_token(context: Context, server_name: str) -> None:
+    """Set MCP-HEADERS header with a client-provided token.
+
+    Parameters:
+        context (Context): Behave context.
+        server_name (str): Name of the MCP server to provide token for.
+    """
+    if not hasattr(context, "auth_headers"):
+        context.auth_headers = {}
+
+    # Set MCP-HEADERS with client token
+    mcp_headers = {server_name: {"Authorization": "Bearer my-client-token"}}
+    context.auth_headers["MCP-HEADERS"] = json.dumps(mcp_headers)
+    print(f"🔑 Set MCP-HEADERS for server '{server_name}' with client token")
+
+
+@given("The MCP mock server request log is cleared")
+def clear_mcp_mock_server_log(context: Context) -> None:
+    """Clear the MCP mock server request log using the debug/clear endpoint.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    try:
+        response = requests.get(f"{MOCK_MCP_SERVER_URL}/debug/clear", timeout=2)
+        if response.status_code == 200:
+            result = response.json()
+            print(
+                f"🧹 MCP mock server log cleared (status: {result.get('status', 'unknown')})"
+            )
+        else:
+            print(f"⚠️  Warning: Clear endpoint returned status {response.status_code}")
+    except requests.RequestException as e:
+        print(f"⚠️  Warning: Could not clear MCP mock server log: {e}")
+
+
+@when("I send a query that uses MCP tools")
+def send_query_with_mcp_tools(context: Context) -> None:
+    """Send a query request that will trigger MCP tool discovery.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    if not hasattr(context, "auth_headers"):
+        context.auth_headers = {}
+
+    base_url = f"http://{context.hostname}:{context.port}"
+    url = f"{base_url}/v1/query"
+
+    model = getattr(context, "default_model", "gpt-4o-mini")
+    provider = getattr(context, "default_provider", "openai")
+
+    payload = {
+        "query": "What tools are available?",
+        "model": model,
+        "provider": provider,
+    }
+
+    # Use longer timeout (60s) if testing error handling
+    # llama-stack 0.4.2 can be slow to handle tool errors
+    timeout = (
+        60
+        if hasattr(context, "expect_tool_errors") and context.expect_tool_errors
+        else 30
+    )
+
+    try:
+        context.response = requests.post(
+            url, json=payload, headers=context.auth_headers, timeout=timeout
+        )
+        print(f"📤 Sent query request (status: {context.response.status_code})")
+    except requests.RequestException as e:
+        print(f"❌ Query request failed: {e}")
+        context.response = None
+
+
+@when("I wait for MCP server to receive requests")
+def wait_for_mcp_requests(context: Context) -> None:
+    """Wait a brief moment for MCP server to receive and log requests.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    # Wait for requests to be processed
+    time.sleep(2)
+    print("⏱️  Waited for MCP server to process requests")
+
+
+@then("The MCP mock server should have received requests")
+def check_mcp_server_received_requests(context: Context) -> None:
+    """Verify the MCP mock server received at least one request.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    # Delegate to the parameterized version with count=1
+    check_mcp_server_request_count(context, 1)
+
+
+@then("The MCP mock server should have received at least {count:d} requests")
+def check_mcp_server_request_count(context: Context, count: int) -> None:
+    """Verify the MCP mock server received at least N requests.
+
+    Parameters:
+        context (Context): Behave context.
+        count (int): Minimum expected request count.
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    try:
+        # Mock server debug endpoint can be slow with many requests - use 15s timeout
+        response = requests.get(f"{mock_server_url}/debug/requests", timeout=15)
+        assert (
+            response.status_code == 200
+        ), f"Failed to get debug requests: {response.status_code}"
+
+        requests_log = response.json()
+        actual_count = len(requests_log)
+        assert (
+            actual_count >= count
+        ), f"Expected at least {count} requests, got {actual_count}"
+
+        print(f"✅ MCP mock server received {actual_count} request(s) (>= {count})")
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not connect to MCP mock server: {e}") from e
+
+
+@then(
+    'The MCP mock server should have captured Authorization header "{expected_value}" from file-auth server'
+)
+def check_file_auth_header(context: Context, expected_value: str) -> None:
+    """Verify the MCP mock server captured the expected file-based auth header.
+
+    Parameters:
+        context (Context): Behave context.
+        expected_value (str): Expected Authorization header value.
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    try:
+        # Mock server debug endpoint can be slow with many requests - use 15s timeout
+        response = requests.get(f"{mock_server_url}/debug/requests", timeout=15)
+        assert response.status_code == 200, "Failed to get debug requests"
+
+        requests_log = response.json()
+        # Find requests with the expected auth header
+        matching_requests = [
+            req
+            for req in requests_log
+            if req.get("headers", {}).get("Authorization") == expected_value
+        ]
+
+        assert (
+            len(matching_requests) > 0
+        ), f"No requests found with Authorization: {expected_value}"
+        print(
+            f"✅ Found {len(matching_requests)} request(s) with file-based auth token"
+        )
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not connect to MCP mock server: {e}") from e
+
+
+@then(
+    'The MCP mock server should have captured Authorization header containing "{token_fragment}" from k8s-auth server'
+)
+def check_k8s_auth_header(context: Context, token_fragment: str) -> None:
+    """Verify the MCP mock server captured k8s token in Authorization header.
+
+    Parameters:
+        context (Context): Behave context.
+        token_fragment (str): Expected token fragment in Authorization header.
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    try:
+        # Mock server debug endpoint can be slow with many requests - use 15s timeout
+        response = requests.get(f"{mock_server_url}/debug/requests", timeout=15)
+        assert response.status_code == 200, "Failed to get debug requests"
+
+        requests_log = response.json()
+        # Find requests with k8s token
+        matching_requests = [
+            req
+            for req in requests_log
+            if token_fragment in req.get("headers", {}).get("Authorization", "")
+        ]
+
+        assert (
+            len(matching_requests) > 0
+        ), f"No requests found with k8s token containing: {token_fragment}"
+        print(f"✅ Found {len(matching_requests)} request(s) with k8s auth token")
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not connect to MCP mock server: {e}") from e
+
+
+@then(
+    'The MCP mock server should have captured Authorization header containing "{token_fragment}" from client-auth server'
+)
+def check_client_auth_header(context: Context, token_fragment: str) -> None:
+    """Verify the MCP mock server captured client token in Authorization header.
+
+    Parameters:
+        context (Context): Behave context.
+        token_fragment (str): Expected token fragment in Authorization header.
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    try:
+        # Mock server debug endpoint can be slow with many requests - use 15s timeout
+        response = requests.get(f"{mock_server_url}/debug/requests", timeout=15)
+        assert response.status_code == 200, "Failed to get debug requests"
+
+        requests_log = response.json()
+        # Find requests with client token
+        matching_requests = [
+            req
+            for req in requests_log
+            if token_fragment in req.get("headers", {}).get("Authorization", "")
+        ]
+
+        assert (
+            len(matching_requests) > 0
+        ), f"No requests found with client token containing: {token_fragment}"
+        print(
+            f"✅ Found {len(matching_requests)} request(s) with client-provided token"
+        )
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not connect to MCP mock server: {e}") from e
+
+
+@then('The MCP mock server request log should contain tool "{tool_name}"')
+def check_mcp_tool_in_log(context: Context, tool_name: str) -> None:
+    """Verify the MCP mock server received requests for a specific tool.
+
+    Queries the mock server's debug endpoint to check the request log.
+
+    Parameters:
+        context (Context): Behave context.
+        tool_name (str): Expected tool name (e.g., mock_tool_file, mock_tool_k8s).
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    try:
+        # Mock server debug endpoint can be slow with many requests - use 15s timeout
+        response = requests.get(f"{mock_server_url}/debug/requests", timeout=15)
+        assert response.status_code == 200, "Failed to get debug requests"
+
+        requests_log = response.json()
+
+        # Check if any request in the log contains the expected tool name
+        found = False
+        for req in requests_log:
+            if req.get("tool_name") == tool_name:
+                found = True
+                break
+
+        assert found, f"Tool '{tool_name}' not found in mock server request log"
+        print(f"✅ Tool '{tool_name}' found in MCP server request log")
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not connect to MCP mock server: {e}") from e
+
+
+@then('The MCP mock server request log should not contain tool "{tool_name}"')
+def check_mcp_tool_not_in_log(context: Context, tool_name: str) -> None:
+    """Verify the MCP mock server did NOT receive requests for a specific tool.
+
+    Queries the mock server's debug endpoint to check the request log.
+    This is useful for verifying that servers were skipped due to auth issues.
+
+    Parameters:
+        context (Context): Behave context.
+        tool_name (str): Tool name that should NOT be present.
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    try:
+        # Mock server debug endpoint can be slow with many requests - use 15s timeout
+        response = requests.get(f"{mock_server_url}/debug/requests", timeout=15)
+        assert response.status_code == 200, "Failed to get debug requests"
+
+        requests_log = response.json()
+
+        # Check if any request in the log contains the tool name
+        for req in requests_log:
+            if req.get("tool_name") == tool_name:
+                raise AssertionError(
+                    f"Tool '{tool_name}' unexpectedly found in mock server request log "
+                    f"(server should have been skipped)"
+                )
+
+        print(f"✅ Tool '{tool_name}' correctly absent from MCP server request log")
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not connect to MCP mock server: {e}") from e
+
+
+@then("The MCP mock server request log should contain exactly tools {tool_list}")
+def check_mcp_exact_tools_in_log(context: Context, tool_list: str) -> None:
+    """Verify MCP server called at least one expected tool and no unexpected tools.
+
+    This validates:
+    1. At least ONE tool from the expected list was called (flexible for LLM non-determinism)
+    2. NO tools outside the expected list were called (enforces security/auth boundaries)
+
+    This approach balances LLM flexibility with security enforcement - the LLM can choose
+    which tools to use, but cannot access tools outside the allowed set.
+
+    Parameters:
+        context (Context): Behave context.
+        tool_list (str): Comma-separated list of allowed tool names.
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    # Parse expected tools
+    expected_tools = [tool.strip() for tool in tool_list.split(",")]
+
+    try:
+        # Mock server debug endpoint can be slow with many requests - use 15s timeout
+        response = requests.get(f"{mock_server_url}/debug/requests", timeout=15)
+        assert response.status_code == 200, "Failed to get debug requests"
+
+        requests_log = response.json()
+
+        # Extract unique tool names from log
+        found_tools = set()
+        for req in requests_log:
+            tool_name = req.get("tool_name")
+            if tool_name:
+                found_tools.add(tool_name)
+
+        # Check 1: At least ONE expected tool was called
+        # (Allows for LLM non-determinism in tool selection)
+        called_expected_tools = found_tools & set(expected_tools)
+        if not called_expected_tools:
+            raise AssertionError(
+                f"None of the expected tools were called. "
+                f"Expected at least one of: {', '.join(expected_tools)}. "
+                f"Found tools: {', '.join(sorted(found_tools))}"
+            )
+
+        # Check 2: NO unexpected tools were called
+        # (Enforces security - prevents access to unauthorized tools)
+        unexpected_tools = [tool for tool in found_tools if tool not in expected_tools]
+        if unexpected_tools:
+            raise AssertionError(
+                f"Unexpected tools found in log: {', '.join(unexpected_tools)}. "
+                f"Only expected: {', '.join(expected_tools)}"
+            )
+
+        print(
+            f"✅ Tool usage validated: called {', '.join(sorted(called_expected_tools))} "
+            f"(allowed: {', '.join(expected_tools)})"
+        )
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not connect to MCP mock server: {e}") from e
+
+
+@when("I send a query asking about available tools")
+def send_query_about_tools(context: Context) -> None:
+    """Send a query asking about available tools.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    if not hasattr(context, "auth_headers"):
+        context.auth_headers = {}
+
+    base_url = f"http://{context.hostname}:{context.port}"
+    url = f"{base_url}/v1/query"
+
+    model = getattr(context, "default_model", "gpt-4o-mini")
+    provider = getattr(context, "default_provider", "openai")
+
+    payload = {
+        "query": "What tools are available to help me?",
+        "model": model,
+        "provider": provider,
+    }
+
+    try:
+        context.response = requests.post(
+            url, json=payload, headers=context.auth_headers, timeout=30
+        )
+        print(f"📤 Sent query about tools (status: {context.response.status_code})")
+    except requests.RequestException as e:
+        print(f"❌ Query request failed: {e}")
+        context.response = None
+
+
+@when("I send a query that explicitly requests tool usage")
+def send_query_requesting_tool_usage(context: Context) -> None:
+    """Send a query that explicitly asks to use a tool.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    if not hasattr(context, "auth_headers"):
+        context.auth_headers = {}
+
+    base_url = f"http://{context.hostname}:{context.port}"
+    url = f"{base_url}/v1/query"
+
+    model = getattr(context, "default_model", "gpt-4o-mini")
+    provider = getattr(context, "default_provider", "openai")
+
+    payload = {
+        "query": "Please use the mock_tool_k8s tool to test the connection",
+        "model": model,
+        "provider": provider,
+    }
+
+    try:
+        context.response = requests.post(
+            url, json=payload, headers=context.auth_headers, timeout=30
+        )
+        print(
+            f"📤 Sent query requesting tool usage (status: {context.response.status_code})"
+        )
+    except requests.RequestException as e:
+        print(f"❌ Query request failed: {e}")
+        context.response = None
+
+
+@when("I send a query that triggers MCP tool usage")
+def send_query_triggering_tool_usage(context: Context) -> None:
+    """Send a query that should trigger MCP tool usage.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    if not hasattr(context, "auth_headers"):
+        context.auth_headers = {}
+
+    base_url = f"http://{context.hostname}:{context.port}"
+    url = f"{base_url}/v1/query"
+
+    model = getattr(context, "default_model", "gpt-4o-mini")
+    provider = getattr(context, "default_provider", "openai")
+
+    payload = {
+        "query": "Use available tools to help me",
+        "model": model,
+        "provider": provider,
+    }
+
+    try:
+        context.response = requests.post(
+            url, json=payload, headers=context.auth_headers, timeout=30
+        )
+        print(
+            f"📤 Sent query triggering tools (status: {context.response.status_code})"
+        )
+    except requests.RequestException as e:
+        print(f"❌ Query request failed: {e}")
+        context.response = None
+
+
+@when("I wait for MCP server to process tool calls")
+def wait_for_tool_calls(context: Context) -> None:
+    """Wait for MCP server to process tool call requests.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    time.sleep(3)
+    print("⏱️  Waited for MCP server to process tool calls")
+
+
+@when("I send a query that requires multiple tool calls")
+def send_query_requiring_multiple_tools(context: Context) -> None:
+    """Send a query that should trigger multiple tool calls.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    if not hasattr(context, "auth_headers"):
+        context.auth_headers = {}
+
+    base_url = f"http://{context.hostname}:{context.port}"
+    url = f"{base_url}/v1/query"
+
+    model = getattr(context, "default_model", "gpt-4o-mini")
+    provider = getattr(context, "default_provider", "openai")
+
+    payload = {
+        "query": "Use all available tools to gather information",
+        "model": model,
+        "provider": provider,
+    }
+
+    try:
+        # Multiple tool calls can take longer - use 120s timeout
+        # Note: This test is timing-sensitive in CI. Locally completes in ~8s,
+        # but CI can take 90+ seconds due to container overhead and resource limits
+        context.response = requests.post(
+            url, json=payload, headers=context.auth_headers, timeout=120
+        )
+        print(
+            f"📤 Sent query requiring multiple tools (status: {context.response.status_code})"
+        )
+    except requests.RequestException as e:
+        print(f"❌ Query request failed: {e}")
+        context.response = None
+
+
+@then("The MCP mock server should have received tools/list method calls")
+def check_tools_list_calls(context: Context) -> None:
+    """Verify MCP server received tools/list method calls from the SUT.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    # Check the debug request log for tools/list calls
+    try:
+        response = requests.get(f"{mock_server_url}/debug/requests", timeout=15)
+        assert (
+            response.status_code == 200
+        ), f"Failed to get debug requests: {response.status_code}"
+        requests_log = response.json()
+        tools_list_calls = [
+            req for req in requests_log if req.get("rpc_method") == "tools/list"
+        ]
+        assert len(tools_list_calls) > 0, "No tools/list calls found in request log"
+        print(f"✅ MCP server received {len(tools_list_calls)} tools/list call(s)")
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not connect to MCP mock server: {e}") from e
+
+
+@then("The MCP mock server should have received tools/call method")
+def check_tools_call_method(context: Context) -> None:
+    """Verify MCP server received tools/call method.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    try:
+        # Mock server debug endpoint can be slow with many requests - use 15s timeout
+        response = requests.get(f"{mock_server_url}/debug/requests", timeout=15)
+        assert response.status_code == 200, "Failed to get debug requests"
+
+        requests_log = response.json()
+        tools_call_entries = [
+            req for req in requests_log if req.get("rpc_method") == "tools/call"
+        ]
+        assert len(tools_call_entries) > 0, "No tools/call entries found in request log"
+        print(f"✅ MCP server received {len(tools_call_entries)} tools/call request(s)")
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not connect to MCP mock server: {e}") from e
+
+
+@then("The response should contain MCP tool execution results")
+@then("The response should indicate successful tool execution")
+@then("The response should contain tool call information")
+@then("The tool execution results should be included in the response")
+def check_response_has_tool_execution(context: Context) -> None:
+    """Verify response contains evidence of MCP tool execution.
+
+    This consolidated step checks that the response contains tool-related content,
+    which could be tool calls, tool results, or references to tool execution in
+    the response text.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    assert context.response is not None, "No response received"
+    assert (
+        context.response.status_code == 200
+    ), f"Bad status: {context.response.status_code}"
+
+    response_data = context.response.json()
+    assert "response" in response_data, "Response missing 'response' field"
+
+    # Check for evidence of tool execution in the response
+    response_text = str(response_data.get("response", "")).lower()
+
+    # Look for tool-related indicators in the response
+    # (tool execution, mock tool, or specific tool results)
+    has_tool_content = any(
+        indicator in response_text
+        for indicator in ["tool", "mock", "executed", "success"]
+    )
+
+    assert has_tool_content, (
+        "Response does not contain evidence of tool execution. "
+        f"Response text: {response_data.get('response', '')[:200]}"
+    )
+
+    print("✅ Response contains evidence of tool execution")
+
+
+@given("The MCP mock server is configured to return errors")
+def configure_mock_server_errors(context: Context) -> None:
+    """Configure mock server to return errors via MCP-HEADERS.
+
+    Sends the special "Bearer error-mode" token via MCP-HEADERS so all
+    configured MCP servers (mock-file-auth, mock-k8s-auth, mock-client-auth)
+    receive it and return errors. This token must be propagated through
+    MCP-HEADERS, not the top-level Authorization header, because the stack
+    only forwards MCP-HEADERS to MCP servers.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    if not hasattr(context, "auth_headers"):
+        context.auth_headers = {}
+
+    # Configure all MCP servers to use error-mode token via MCP-HEADERS
+    # The mock server recognizes "Bearer error-mode" and returns errors
+    mcp_headers = {
+        "mock-file-auth": {"Authorization": "Bearer error-mode"},
+        "mock-k8s-auth": {"Authorization": "Bearer error-mode"},
+        "mock-client-auth": {"Authorization": "Bearer error-mode"},
+    }
+    context.auth_headers["MCP-HEADERS"] = json.dumps(mcp_headers)
+    context.expect_tool_errors = True
+    print(
+        "⚠️  MCP mock server configured for error mode (error-mode token via MCP-HEADERS)"
+    )
+
+
+@then("The response should indicate tool execution failed")
+def check_tool_execution_failed(context: Context) -> None:
+    """Verify response indicates tool execution failed.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    assert context.response is not None, "No response received"
+    assert (
+        context.response.status_code == 200
+    ), f"Bad status: {context.response.status_code}"
+
+    # In error mode, the response should still be 200 but contain error information
+    # The LLM will handle the tool error gracefully
+    print("✅ Response received (tool errors are handled gracefully by LLM)")
+
+
+@then("The MCP mock server should confirm error mode is active")
+def check_mock_server_error_mode(context: Context) -> None:
+    """Verify the mock server is returning errors via API query.
+
+    Sends a test request to the mock server and confirms it returns isError=true.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    try:
+        # Verify the mock server is in error mode by checking its response
+        payload = {
+            "jsonrpc": "2.0",
+            "id": 1,
+            "method": "tools/call",
+            "params": {"name": "test", "arguments": {}},
+        }
+        response = requests.post(
+            mock_server_url,
+            json=payload,
+            headers={"Authorization": "Bearer error-mode"},
+            timeout=5,
+        )
+        result = response.json()
+        assert result.get("result", {}).get(
+            "isError"
+        ), "Mock server not returning errors"
+        print("✅ Mock server confirmed to be returning errors (isError: true)")
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not verify mock server error mode: {e}") from e
+
+
+@then("The MCP mock server should have received multiple tools/call methods")
+def check_multiple_tool_calls(context: Context) -> None:
+    """Verify MCP server received multiple tool call requests.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    mock_server_url = MOCK_MCP_SERVER_URL
+
+    try:
+        # Mock server debug endpoint can be slow with many requests - use 15s timeout
+        response = requests.get(f"{mock_server_url}/debug/requests", timeout=15)
+        assert response.status_code == 200, "Failed to get debug requests"
+
+        requests_log = response.json()
+        tools_call_entries = [
+            req for req in requests_log if req.get("rpc_method") == "tools/call"
+        ]
+        assert (
+            len(tools_call_entries) >= 2
+        ), f"Expected multiple tools/call requests, got {len(tools_call_entries)}"
+        print(f"✅ MCP server received {len(tools_call_entries)} tools/call request(s)")
+    except requests.RequestException as e:
+        raise AssertionError(f"Could not connect to MCP mock server: {e}") from e
+
+
+@then("All tool calls should have succeeded")
+def check_all_tool_calls_succeeded(context: Context) -> None:
+    """Verify all tool calls succeeded.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    assert context.response is not None, "No response received"
+    assert context.response.status_code == 200, "Request failed"
+    print("✅ All tool calls completed successfully")
+
+
+@then("The response should contain results from all tool calls")
+def check_response_has_all_results(context: Context) -> None:
+    """Verify response contains results from all tool calls.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    assert context.response is not None, "No response received"
+    response_data = context.response.json()
+
+    assert "response" in response_data, "Response missing 'response' field"
+    print("✅ Response contains results from all tool calls")
+
+
+@when("I send a streaming query that uses MCP tools")
+def send_streaming_query_with_mcp_tools(context: Context) -> None:
+    """Send a streaming query that should use MCP tools.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    if not hasattr(context, "auth_headers"):
+        context.auth_headers = {}
+
+    base_url = f"http://{context.hostname}:{context.port}"
+    url = f"{base_url}/v1/streaming_query"
+
+    model = getattr(context, "default_model", "gpt-4o-mini")
+    provider = getattr(context, "default_provider", "openai")
+
+    payload = {
+        "query": "Use available tools to help me",
+        "model": model,
+        "provider": provider,
+        "media_type": "application/json",  # Request JSON response instead of SSE
+    }
+
+    try:
+        context.response = requests.post(
+            url, json=payload, headers=context.auth_headers, timeout=30
+        )
+        print(
+            f"📤 Sent streaming query with MCP tools (status: {context.response.status_code})"
+        )
+    except requests.RequestException as e:
+        print(f"❌ Streaming query request failed: {e}")
+        context.response = None
+
+
+@when("I send a streaming query requiring multiple tools")
+def send_streaming_query_requiring_multiple_tools(context: Context) -> None:
+    """Send a streaming query requiring multiple tool calls.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    if not hasattr(context, "auth_headers"):
+        context.auth_headers = {}
+
+    base_url = f"http://{context.hostname}:{context.port}"
+    url = f"{base_url}/v1/streaming_query"
+
+    model = getattr(context, "default_model", "gpt-4o-mini")
+    provider = getattr(context, "default_provider", "openai")
+
+    payload = {
+        "query": "Use all available tools to gather comprehensive information",
+        "model": model,
+        "provider": provider,
+        "media_type": "application/json",  # Request JSON response instead of SSE
+    }
+
+    try:
+        # Multiple tool calls can take longer - use 120s timeout
+        # Note: This test is timing-sensitive in CI. Locally completes in ~8s,
+        # but CI can take 90+ seconds due to container overhead and resource limits
+        context.response = requests.post(
+            url, json=payload, headers=context.auth_headers, timeout=120
+        )
+        print(
+            f"📤 Sent streaming query requiring multiple tools (status: {context.response.status_code})"
+        )
+    except requests.RequestException as e:
+        print(f"❌ Streaming query request failed: {e}")
+        context.response = None
+
+
+@then("The streaming response should be successful")
+def check_streaming_response_successful(context: Context) -> None:
+    """Verify streaming response was successful.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    assert context.response is not None, "No response received"
+    assert (
+        context.response.status_code == 200
+    ), f"Bad status: {context.response.status_code}"
+    print("✅ Streaming response completed successfully")
+
+
+@then("The streaming response should contain tool execution results")
+def check_streaming_response_has_tool_results(context: Context) -> None:
+    """Verify streaming response contains tool execution results.
+
+    Parameters:
+        context (Context): Behave context.
+    """
+    assert context.response is not None, "No response received"
+    assert (
+        context.response.status_code == 200
+    ), f"Bad status: {context.response.status_code}"
+
+    # Parse response and check for tool execution evidence
+    try:
+        response_data = context.response.json()
+        response_text = str(response_data.get("response", "")).lower()
+
+        # Look for tool-related indicators in the response
+        has_tool_content = any(
+            indicator in response_text
+            for indicator in ["tool", "mock", "executed", "success"]
+        )
+
+        assert has_tool_content, (
+            "Response does not contain evidence of tool execution. "
+            f"Response text: {response_data.get('response', '')[:200]}"
+        )
+
+        print("✅ Streaming response contains tool execution results")
+    except ValueError as e:
+        raise AssertionError(f"Failed to parse response JSON: {e}") from e
diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt
index 804e180cf..903bfd585 100644
--- a/tests/e2e/test_list.txt
+++ b/tests/e2e/test_list.txt
@@ -12,3 +12,4 @@ features/info.feature
 features/query.feature
 features/streaming_query.feature
 features/rest_api.feature
+features/mcp_tools.feature
diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py
index e57423971..3e8fb1cd6 100644
--- a/tests/unit/app/endpoints/test_streaming_query.py
+++ b/tests/unit/app/endpoints/test_streaming_query.py
@@ -672,6 +672,7 @@ async def test_retrieve_response_generator_success(
         mock_responses_params.model = "provider1/model1"
         mock_responses_params.input = "test query"
         mock_responses_params.conversation = "conv_123"
+        mock_responses_params.tools = None  # No tools for this test
         mock_responses_params.model_dump.return_value = {
             "input": "test query",
             "model": "provider1/model1",
@@ -761,6 +762,7 @@ async def test_retrieve_response_generator_connection_error(
         mock_responses_params.model = "provider1/model1"
         mock_responses_params.input = "test query"
         mock_responses_params.conversation = "conv_123"
+        mock_responses_params.tools = None  # No tools for this test
         mock_responses_params.model_dump.return_value = {
             "input": "test query",
             "model": "provider1/model1",
@@ -814,6 +816,7 @@ async def test_retrieve_response_generator_api_status_error(
         mock_responses_params.model = "provider1/model1"
         mock_responses_params.input = "test query"
         mock_responses_params.conversation = "conv_123"
+        mock_responses_params.tools = None  # No tools for this test
         mock_responses_params.model_dump.return_value = {
             "input": "test query",
             "model": "provider1/model1",
@@ -864,6 +867,7 @@ async def test_retrieve_response_generator_runtime_error_context_length(
         mock_responses_params.model = "provider1/model1"
         mock_responses_params.input = "test query"
         mock_responses_params.conversation = "conv_123"
+        mock_responses_params.tools = None  # No tools for this test
         mock_responses_params.model_dump.return_value = {
             "input": "test query",
             "model": "provider1/model1",
@@ -911,6 +915,7 @@ async def test_retrieve_response_generator_runtime_error_other(
         mock_responses_params.model = "provider1/model1"
         mock_responses_params.input = "test query"
         mock_responses_params.conversation = "conv_123"
+        mock_responses_params.tools = None  # No tools for this test
         mock_responses_params.model_dump.return_value = {
             "input": "test query",
             "model": "provider1/model1",
diff --git a/tests/unit/utils/test_responses.py b/tests/unit/utils/test_responses.py
index a47ae2fe1..95d4413fe 100644
--- a/tests/unit/utils/test_responses.py
+++ b/tests/unit/utils/test_responses.py
@@ -22,6 +22,7 @@
 from pytest_mock import MockerFixture
 
 from configuration import AppConfig
+from constants import MCP_AUTHORIZATION_HEADER
 from models.config import ModelContextProtocolServer
 from models.requests import QueryRequest
 from utils.responses import (
@@ -366,7 +367,8 @@ def test_get_mcp_tools_with_kubernetes_auth(self) -> None:
         ]
         tools_k8s = get_mcp_tools(servers_k8s, token="user-k8s-token")
         assert len(tools_k8s) == 1
-        assert tools_k8s[0]["headers"] == {"Authorization": "Bearer user-k8s-token"}
+        assert tools_k8s[0][MCP_AUTHORIZATION_HEADER] == "Bearer user-k8s-token"
+        assert "headers" not in tools_k8s[0]  # No other headers
 
     def test_get_mcp_tools_with_mcp_headers(self) -> None:
         """Test get_mcp_tools with client-provided headers."""
@@ -386,8 +388,8 @@ def test_get_mcp_tools_with_mcp_headers(self) -> None:
         }
         tools = get_mcp_tools(servers, token=None, mcp_headers=mcp_headers)
         assert len(tools) == 1
+        assert tools[0][MCP_AUTHORIZATION_HEADER] == "client-provided-token"
         assert tools[0]["headers"] == {
-            "Authorization": "client-provided-token",
             "X-Custom": "custom-value",
         }
 
@@ -444,7 +446,8 @@ def test_get_mcp_tools_with_static_headers(self, tmp_path: Path) -> None:
 
         tools = get_mcp_tools(servers, token=None)
         assert len(tools) == 1
-        assert tools[0]["headers"] == {"Authorization": "static-secret-token"}
+        assert tools[0][MCP_AUTHORIZATION_HEADER] == "static-secret-token"
+        assert "headers" not in tools[0]  # No other headers
 
     def test_get_mcp_tools_with_mixed_headers(self, tmp_path: Path) -> None:
         """Test get_mcp_tools with mixed header types."""
@@ -471,8 +474,8 @@ def test_get_mcp_tools_with_mixed_headers(self, tmp_path: Path) -> None:
 
         tools = get_mcp_tools(servers, token="k8s-token", mcp_headers=mcp_headers)
         assert len(tools) == 1
+        assert tools[0][MCP_AUTHORIZATION_HEADER] == "Bearer k8s-token"
         assert tools[0]["headers"] == {
-            "Authorization": "Bearer k8s-token",
             "X-API-Key": "secret-api-key",
             "X-Custom": "client-custom-value",
         }
@@ -1318,6 +1321,7 @@ def test_build_tool_call_summary_mcp_approval_request(
         mock_item.id = "approval_123"
         mock_item.name = "approve_action"
         mock_item.arguments = '{"action": "delete"}'
+        mock_item.server_label = "test-server"
 
         rag_chunks: list[RAGChunk] = []
         mocker.patch(