From e396b36bd1ce26a5354616473599be6902d0912b Mon Sep 17 00:00:00 2001 From: blublinsky Date: Thu, 12 Feb 2026 12:14:08 +0000 Subject: [PATCH] Adjusting MCP header processing to llama stack 0.4.x and adding MCP e2e tests --- dev-tools/mcp-mock-server/README.md | 13 +- dev-tools/mcp-mock-server/server.py | 183 ++-- docker-compose-library.yaml | 7 +- docker-compose.yaml | 7 +- src/app/endpoints/streaming_query.py | 16 + src/app/main.py | 32 +- src/constants.py | 2 + src/utils/responses.py | 196 +++- .../library-mode/lightspeed-stack-mcp.yaml | 50 + .../library-mode/lightspeed-stack.yaml | 29 +- .../server-mode/lightspeed-stack-mcp.yaml | 51 ++ .../server-mode/lightspeed-stack.yaml | 29 +- tests/e2e/features/environment.py | 16 + tests/e2e/features/mcp_tools.feature | 184 ++++ tests/e2e/features/steps/mcp.py | 858 ++++++++++++++++++ tests/e2e/test_list.txt | 1 + .../app/endpoints/test_streaming_query.py | 5 + tests/unit/utils/test_responses.py | 12 +- 18 files changed, 1602 insertions(+), 89 deletions(-) create mode 100644 tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml create mode 100644 tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml create mode 100644 tests/e2e/features/mcp_tools.feature create mode 100644 tests/e2e/features/steps/mcp.py diff --git a/dev-tools/mcp-mock-server/README.md b/dev-tools/mcp-mock-server/README.md index 4d112a037..69566474e 100644 --- a/dev-tools/mcp-mock-server/README.md +++ b/dev-tools/mcp-mock-server/README.md @@ -19,9 +19,10 @@ This mock server helps developers: - ✅ **HTTP & HTTPS** - Runs both protocols simultaneously for comprehensive testing - ✅ **Header Capture** - Captures and displays all request headers - ✅ **Debug Endpoints** - Inspect captured headers and request history -- ✅ **MCP Protocol** - Implements basic MCP endpoints for testing +- ✅ **MCP Protocol** - Implements MCP endpoints (initialize, tools/list, tools/call) - ✅ **Request Logging** - Tracks recent requests with timestamps - ✅ **Self-Signed Certs** - Auto-generates certificates for HTTPS testing +- ✅ **Tool Execution** - Returns mock results for tool/call testing ## Quick Start @@ -46,8 +47,11 @@ HTTPS: https://localhost:3001 Debug endpoints: • /debug/headers - View captured headers • /debug/requests - View request log -MCP endpoint: - • POST /mcp/v1/list_tools +MCP endpoints: + • POST with JSON-RPC (any path) + - method: "initialize" + - method: "tools/list" + - method: "tools/call" ====================================================================== Note: HTTPS uses a self-signed certificate (for testing only) ``` @@ -270,8 +274,9 @@ python dev-tools/mcp-mock-server/server.py 8080 This is a **development/testing tool only**: - ❌ Not for production use - ❌ No authentication/security -- ❌ Limited MCP protocol implementation +- ❌ Limited MCP protocol implementation (initialize, tools/list, tools/call only) - ❌ Single-threaded (one request at a time) +- ❌ Mock responses only (not real tool execution) For production, use real MCP servers. diff --git a/dev-tools/mcp-mock-server/server.py b/dev-tools/mcp-mock-server/server.py index b7e17fffb..7f14f556b 100644 --- a/dev-tools/mcp-mock-server/server.py +++ b/dev-tools/mcp-mock-server/server.py @@ -60,7 +60,11 @@ def _capture_headers(self) -> None: if len(request_log) > 10: request_log.pop(0) - def do_POST(self) -> None: # pylint: disable=invalid-name + def do_POST( + self, + ) -> ( + None + ): # pylint: disable=invalid-name,too-many-locals,too-many-branches,too-many-statements """Handle POST requests (MCP protocol endpoints).""" self._capture_headers() @@ -73,14 +77,24 @@ def do_POST(self) -> None: # pylint: disable=invalid-name request_id = request_data.get("id", 1) method = request_data.get("method", "unknown") except (json.JSONDecodeError, UnicodeDecodeError): + request_data = {} request_id = 1 method = "unknown" + # Log the RPC method in the request log + if request_log: + request_log[-1]["rpc_method"] = method + # Determine tool name based on authorization header to avoid collisions auth_header = self.headers.get("Authorization", "") + # Initialize tool info defaults + tool_name = "mock_tool_no_auth" + tool_desc = "Mock tool with no authorization" + error_mode = False + # Match based on token content - match auth_header: + match True: case _ if "test-secret-token" in auth_header: tool_name = "mock_tool_file" tool_desc = "Mock tool with file-based auth" @@ -90,58 +104,116 @@ def do_POST(self) -> None: # pylint: disable=invalid-name case _ if "my-client-token" in auth_header: tool_name = "mock_tool_client" tool_desc = "Mock tool with client-provided token" + case _ if "error-mode" in auth_header: + tool_name = "mock_tool_error" + tool_desc = "Mock tool configured to return errors" + error_mode = True case _: - # No auth header or unrecognized token - tool_name = "mock_tool_no_auth" - tool_desc = "Mock tool with no authorization" - - # Handle MCP protocol methods - if method == "initialize": - # Return MCP initialize response - response = { - "jsonrpc": "2.0", - "id": request_id, - "result": { - "protocolVersion": "2024-11-05", - "capabilities": { - "tools": {}, - }, - "serverInfo": { - "name": "mock-mcp-server", - "version": "1.0.0", + # Default case already set above + pass + + # Log the tool name in the request log + if request_log: + request_log[-1]["tool_name"] = tool_name + + # Handle MCP protocol methods using match statement + response: dict = {} + match method: + case "initialize": + # Return MCP initialize response + response = { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "protocolVersion": "2024-11-05", + "capabilities": { + "tools": {}, + }, + "serverInfo": { + "name": "mock-mcp-server", + "version": "1.0.0", + }, }, - }, - } - elif method == "tools/list": - # Return list of tools with unique name - response = { - "jsonrpc": "2.0", - "id": request_id, - "result": { - "tools": [ - { - "name": tool_name, - "description": tool_desc, - "inputSchema": { - "type": "object", - "properties": { - "message": { - "type": "string", - "description": "Test message", - } + } + + case "tools/list": + # Return list of tools with unique name + response = { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "tools": [ + { + "name": tool_name, + "description": tool_desc, + "inputSchema": { + "type": "object", + "properties": { + "message": { + "type": "string", + "description": "Test message", + } + }, }, - }, - } - ] - }, - } - else: - # Generic success response for other methods - response = { - "jsonrpc": "2.0", - "id": request_id, - "result": {"status": "ok"}, - } + } + ] + }, + } + + case "tools/call": + # Handle tool execution + params = request_data.get("params", {}) + tool_called = params.get("name", "unknown") + arguments = params.get("arguments", {}) + + # Check if error mode is enabled + if error_mode: + # Return error response + response = { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "content": [ + { + "type": "text", + "text": ( + f"Error: Tool '{tool_called}' " + "execution failed - simulated error." + ), + } + ], + "isError": True, + }, + } + else: + # Build result text + result_text = ( + f"Mock tool '{tool_called}' executed successfully " + f"with arguments: {arguments}." + ) + + # Return successful tool execution result + response = { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "content": [ + { + "type": "text", + "text": result_text, + } + ], + "isError": False, + }, + } + + case _: + # Generic success response for other methods + response = { + "jsonrpc": "2.0", + "id": request_id, + "result": {"status": "ok"}, + } self.send_response(200) self.send_header("Content-Type", "application/json") @@ -160,6 +232,11 @@ def do_GET(self) -> None: # pylint: disable=invalid-name ) case "/debug/requests": self._send_json_response(request_log) + case "/debug/clear": + # Clear the request log and last captured headers + request_log.clear() + last_headers.clear() + self._send_json_response({"status": "cleared", "request_count": 0}) case "/": self._send_help_page() case _: @@ -273,10 +350,10 @@ def main() -> None: https_port = http_port + 1 # Create HTTP server - http_server = HTTPServer(("", http_port), MCPMockHandler) + http_server = HTTPServer(("", http_port), MCPMockHandler) # type: ignore[arg-type] # Create HTTPS server with self-signed certificate - https_server = HTTPServer(("", https_port), MCPMockHandler) + https_server = HTTPServer(("", https_port), MCPMockHandler) # type: ignore[arg-type] # Generate or load self-signed certificate script_dir = Path(__file__).parent diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index 9c934b89a..9a8507874 100644 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -6,7 +6,7 @@ services: dockerfile: dev-tools/mcp-mock-server/Dockerfile container_name: mcp-mock-server ports: - - "3000:3000" + - "9000:3000" networks: - lightspeednet healthcheck: @@ -66,6 +66,11 @@ services: - WATSONX_API_KEY=${WATSONX_API_KEY:-} # Enable debug logging if needed - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} + entrypoint: > + /bin/bash -c " + printf %s 'test-secret-token-123' > /tmp/lightspeed-mcp-test-token && + /app-root/.venv/bin/python3.12 /app-root/src/lightspeed_stack.py + " healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/liveness"] interval: 10s # how often to run the check diff --git a/docker-compose.yaml b/docker-compose.yaml index b1e3f819c..1aa2ed640 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -6,7 +6,7 @@ services: dockerfile: dev-tools/mcp-mock-server/Dockerfile container_name: mcp-mock-server ports: - - "3000:3000" + - "9000:3000" networks: - lightspeednet healthcheck: @@ -84,6 +84,11 @@ services: - TENANT_ID=${TENANT_ID:-} - CLIENT_ID=${CLIENT_ID:-} - CLIENT_SECRET=${CLIENT_SECRET:-} + entrypoint: > + /bin/bash -c " + printf %s 'test-secret-token-123' > /tmp/lightspeed-mcp-test-token && + /app-root/.venv/bin/python3.12 /app-root/src/lightspeed_stack.py + " depends_on: llama-stack: condition: service_healthy diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py index 00cbe132b..772de8c18 100644 --- a/src/app/endpoints/streaming_query.py +++ b/src/app/endpoints/streaming_query.py @@ -263,6 +263,22 @@ async def retrieve_response_generator( turn_summary, ) # Retrieve response stream (may raise exceptions) + # Log request details before calling Llama Stack (MCP debugging) + if responses_params.tools is not None and len(responses_params.tools) > 0: + # Filter MCP tools once for efficiency + mcp_tools = [t for t in responses_params.tools if t.get("type") == "mcp"] + if len(mcp_tools) > 0: + logger.debug( + "Calling Llama Stack Responses API (streaming) with %d MCP tool(s)", + len(mcp_tools), + ) + # Log MCP server endpoints that may be called + logger.debug("MCP server endpoints that may be called:") + for tool in mcp_tools: + logger.debug( + " - %s: %s", tool.get("server_label"), tool.get("server_url") + ) + response = await context.client.responses.create( **responses_params.model_dump() ) diff --git a/src/app/main.py b/src/app/main.py index e4ee83905..dc214702d 100644 --- a/src/app/main.py +++ b/src/app/main.py @@ -20,7 +20,8 @@ from log import get_logger from a2a_storage import A2AStorageFactory from models.responses import InternalServerErrorResponse -from utils.common import register_mcp_servers_async + +# from utils.common import register_mcp_servers_async # Not needed for Responses API from utils.llama_stack_version import check_llama_stack_version logger = get_logger(__name__) @@ -69,9 +70,32 @@ async def lifespan(_app: FastAPI) -> AsyncIterator[None]: ) raise - logger.info("Registering MCP servers") - await register_mcp_servers_async(logger, configuration.configuration) - get_logger("app.endpoints.handlers") + # Log MCP server configuration + mcp_servers = configuration.configuration.mcp_servers + if mcp_servers: + logger.info("Loaded %d MCP server(s) from configuration:", len(mcp_servers)) + for server in mcp_servers: + has_auth = bool(server.authorization_headers) + logger.info( + " - %s at %s (auth: %s)", + server.name, + server.url, + "yes" if has_auth else "no", + ) + # Debug: Show auth header names if configured + if has_auth: + logger.debug( + " Auth headers: %s", + ", ".join(server.authorization_headers.keys()), + ) + else: + logger.info("No MCP servers configured") + + # NOTE: MCP server registration not needed for Responses API + # The Responses API takes inline tool definitions instead of pre-registered toolgroups + # logger.info("Registering MCP servers") + # await register_mcp_servers_async(logger, configuration.configuration) + # get_logger("app.endpoints.handlers") logger.info("App startup complete") initialize_database() diff --git a/src/constants.py b/src/constants.py index 1b7b6beef..2fe3172c2 100644 --- a/src/constants.py +++ b/src/constants.py @@ -125,6 +125,8 @@ # MCP authorization header special values MCP_AUTH_KUBERNETES = "kubernetes" MCP_AUTH_CLIENT = "client" +# MCP authorization header name (special handling for llama_stack 0.4.x+) +MCP_AUTHORIZATION_HEADER = "authorization" # default RAG tool value DEFAULT_RAG_TOOL = "knowledge_search" diff --git a/src/utils/responses.py b/src/utils/responses.py index 41f5b2e74..2138b94f2 100644 --- a/src/utils/responses.py +++ b/src/utils/responses.py @@ -136,7 +136,7 @@ async def get_topic_summary( # pylint: disable=too-many-nested-blocks return summary_text.strip() if summary_text else "" -async def prepare_tools( +async def prepare_tools( # pylint: disable=too-many-locals,too-many-branches client: AsyncLlamaStackClient, query_request: QueryRequest, token: str, @@ -156,6 +156,7 @@ async def prepare_tools( List of tool configurations, or None if no_tools is True or no tools available """ if query_request.no_tools: + logger.debug("Tools disabled for this request (no_tools=True)") return None toolgroups = [] @@ -185,11 +186,54 @@ async def prepare_tools( mcp_tools = get_mcp_tools(config.mcp_servers, token, mcp_headers) if mcp_tools: toolgroups.extend(mcp_tools) + mcp_server_names = [tool.get("server_label", "unknown") for tool in mcp_tools] logger.debug( - "Configured %d MCP tools: %s", + "Prepared %d MCP tool(s) for request: %s", len(mcp_tools), - [tool.get("server_label", "unknown") for tool in mcp_tools], + ", ".join(mcp_server_names), ) + # Debug: Show full tool definitions + for tool in mcp_tools: + logger.debug( + " MCP tool: %s at %s (auth: %s, headers: %d)", + tool.get("server_label"), + tool.get("server_url"), + "yes" if "authorization" in tool else "no", + len(tool.get("headers", {})), + ) + else: + if config.mcp_servers: + logger.warning( + "No MCP tools prepared (all %d configured servers were skipped)", + len(config.mcp_servers), + ) + else: + logger.debug("No MCP servers configured") + + # Log tool summary + if toolgroups: + rag_tool_count = sum(1 for t in toolgroups if t.get("type") == "file_search") + mcp_tool_count = sum(1 for t in toolgroups if t.get("type") == "mcp") + logger.debug( + "Prepared %d total tool(s) for Responses API: %d RAG + %d MCP", + len(toolgroups), + rag_tool_count, + mcp_tool_count, + ) + # Log MCP server endpoints that may be called + if mcp_tool_count > 0: + mcp_servers = [ + (t.get("server_label"), t.get("server_url")) + for t in toolgroups + if t.get("type") == "mcp" + ] + if mcp_servers: + logger.debug("MCP server endpoints that may be called:") + for server_name, server_url in mcp_servers: + logger.debug(" - %s: %s", server_name, server_url) + else: + logger.debug("No tools available for this request") + # Convert empty list to None for consistency with existing behavior if not toolgroups: return None @@ -312,7 +356,7 @@ def get_rag_tools(vector_store_ids: list[str]) -> Optional[list[dict[str, Any]]] ] -def get_mcp_tools( +def get_mcp_tools( # pylint: disable=too-many-locals,too-many-branches mcp_servers: list[ModelContextProtocolServer], token: str | None = None, mcp_headers: dict[str, dict[str, str]] | None = None, @@ -326,6 +370,9 @@ def get_mcp_tools( Returns: List of MCP tool definitions with server details and optional auth headers + + Note: Starting with llama_stack 0.4.x, the Authorization header must be passed + via the 'authorization' parameter instead of in the 'headers' dict. """ def _get_token_value(original: str, header: str) -> str | None: @@ -340,10 +387,10 @@ def _get_token_value(original: str, header: str) -> str | None: # use client provided token if mcp_headers is None: return None - c_headers = mcp_headers.get(mcp_server.name, None) - if c_headers is None: + client_headers = mcp_headers.get(mcp_server.name, None) + if client_headers is None: return None - return c_headers.get(header, None) + return client_headers.get(header, None) case _: # use provided return original @@ -358,30 +405,101 @@ def _get_token_value(original: str, header: str) -> str | None: "require_approval": "never", } - # Build headers + # Log header resolution process + if mcp_server.authorization_headers: + logger.debug( + "MCP server '%s': Resolving %d authorization header(s)", + mcp_server.name, + len(mcp_server.authorization_headers), + ) + + # Build headers and separate Authorization header headers = {} + authorization = None for name, value in mcp_server.resolved_authorization_headers.items(): # for each defined header - h_value = _get_token_value(value, name) + resolved_value = _get_token_value(value, name) # only add the header if we got value - if h_value is not None: - headers[name] = h_value + if resolved_value is not None: + # Determine auth type for logging + match value: + case constants.MCP_AUTH_KUBERNETES: + auth_type = "kubernetes" + case constants.MCP_AUTH_CLIENT: + auth_type = "client" + case _: + auth_type = "static" + + # Log successful resolution + logger.debug( + "MCP server '%s': Header '%s' -> type: %s (resolved)", + mcp_server.name, + name, + auth_type, + ) + # Special handling for Authorization header (llama_stack 0.4.x+) + if name.lower() == constants.MCP_AUTHORIZATION_HEADER: + authorization = resolved_value + else: + headers[name] = resolved_value + else: + # Log failed resolution + logger.debug( + "MCP server '%s': Header '%s' -> FAILED to resolve", + mcp_server.name, + name, + ) # Skip server if auth headers were configured but not all could be resolved - if mcp_server.authorization_headers and len(headers) != len( + resolved_count = len(headers) + (1 if authorization is not None else 0) + if mcp_server.authorization_headers and resolved_count != len( mcp_server.authorization_headers ): + required_headers = list(mcp_server.authorization_headers.keys()) + resolved_headers = list(headers.keys()) + if authorization is not None: + # Find the actual Authorization header name (case-insensitive match) + auth_header_name = next( + ( + h + for h in required_headers + if h.lower() == constants.MCP_AUTHORIZATION_HEADER + ), + constants.MCP_AUTHORIZATION_HEADER.capitalize(), + ) + resolved_headers.append(auth_header_name) + missing_headers = [h for h in required_headers if h not in resolved_headers] + logger.warning( - "Skipping MCP server %s: required %d auth headers but only resolved %d", + "Skipping MCP server '%s': required %d auth headers but only resolved %d", mcp_server.name, len(mcp_server.authorization_headers), - len(headers), + resolved_count, + ) + logger.warning( + " Required: %s | Resolved: %s | Missing: %s", + ", ".join(required_headers), + ", ".join(resolved_headers) if resolved_headers else "none", + ", ".join(missing_headers) if missing_headers else "none", ) continue + # Add authorization parameter if present (llama_stack 0.4.x+) + if authorization is not None: + tool_def["authorization"] = authorization # type: ignore[index] + + # Add other headers if present if len(headers) > 0: - # add headers to tool definition tool_def["headers"] = headers # type: ignore[index] + + # Log successful tool creation + logger.debug( + "MCP server '%s': Tool definition created (authorization: %s, additional headers: %d)", + mcp_server.name, + "SET" if authorization is not None else "NOT SET", + len(headers), + ) + # collect tools info tools.append(tool_def) return tools @@ -521,7 +639,7 @@ def extract_token_usage( return token_counter -def build_tool_call_summary( # pylint: disable=too-many-return-statements,too-many-branches +def build_tool_call_summary( # pylint: disable=too-many-return-statements,too-many-branches,too-many-locals output_item: OpenAIResponseOutput, rag_chunks: list[RAGChunk], ) -> tuple[Optional[ToolCallSummary], Optional[ToolResultSummary]]: @@ -599,6 +717,31 @@ def build_tool_call_summary( # pylint: disable=too-many-return-statements,too-m else (mcp_call_item.output if mcp_call_item.output else "") ) + # Log MCP tool call + logger.debug( + "MCP tool call: %s on server '%s' (call_id: %s)", + mcp_call_item.name, + mcp_call_item.server_label, + mcp_call_item.id, + ) + logger.debug(" Arguments: %s", args) + + # Log MCP tool result + if mcp_call_item.error: + logger.warning( + "MCP tool result: %s FAILED - %s", + mcp_call_item.name, + mcp_call_item.error, + ) + else: + output_preview = content[:100] + "..." if len(content) > 100 else content + logger.debug( + "MCP tool result: %s SUCCESS (output length: %d)", + mcp_call_item.name, + len(content), + ) + logger.debug(" Output preview: %s", output_preview) + return ToolCallSummary( id=mcp_call_item.id, name=mcp_call_item.name, @@ -622,6 +765,18 @@ def build_tool_call_summary( # pylint: disable=too-many-return-statements,too-m } for tool in mcp_list_tools_item.tools ] + + # Log MCP list_tools call + logger.debug( + "MCP server '%s' listed %d available tool(s)", + mcp_list_tools_item.server_label, + len(mcp_list_tools_item.tools), + ) + logger.debug( + " Tools: %s", + ", ".join(tool.name for tool in mcp_list_tools_item.tools), + ) + content_dict = { "server_label": mcp_list_tools_item.server_label, "tools": tools_info, @@ -645,6 +800,15 @@ def build_tool_call_summary( # pylint: disable=too-many-return-statements,too-m if item_type == "mcp_approval_request": approval_request_item = cast(MCPApprovalRequest, output_item) args = parse_arguments_string(approval_request_item.arguments) + + # Log MCP approval request + logger.debug( + "MCP approval requested: tool '%s' on server '%s'", + approval_request_item.name, + approval_request_item.server_label, + ) + logger.debug(" Arguments: %s", args) + return ( ToolCallSummary( id=approval_request_item.id, diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml new file mode 100644 index 000000000..790efbf15 --- /dev/null +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml @@ -0,0 +1,50 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop-with-token" +mcp_servers: + # Test 1: Static file-based authentication + - name: "mock-file-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "/tmp/lightspeed-mcp-test-token" + # Test 2: Kubernetes token forwarding + - name: "mock-k8s-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "kubernetes" + # Test 3: Client-provided token + - name: "mock-client-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "client" + # Legacy: Mock server with client-provided auth - should appear in mcp-auth/client-options response + - name: "github-api" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "client" + # Legacy: Mock server with client-provided auth (different header) - should appear in response + - name: "gitlab-api" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + X-API-Token: "client" diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml index 118b917c5..178239db3 100644 --- a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml +++ b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml @@ -17,20 +17,43 @@ user_data_collection: transcripts_storage: "/tmp/data/transcripts" authentication: module: "noop" +# Conversation cache for storing Q&A history +conversation_cache: + type: "sqlite" + sqlite: + db_path: "/tmp/data/conversation-cache.db" mcp_servers: - # Mock server with client-provided auth - should appear in mcp-auth/client-options response + # Test 1: Static file-based authentication + - name: "mock-file-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "/tmp/lightspeed-mcp-test-token" + # Test 2: Kubernetes token forwarding + - name: "mock-k8s-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "kubernetes" + # Test 3: Client-provided token + - name: "mock-client-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "client" + # Legacy: Mock server with client-provided auth - should appear in mcp-auth/client-options response - name: "github-api" provider_id: "model-context-protocol" url: "http://mcp-mock-server:3000" authorization_headers: Authorization: "client" - # Mock server with client-provided auth (different header) - should appear in response + # Legacy: Mock server with client-provided auth (different header) - should appear in response - name: "gitlab-api" provider_id: "model-context-protocol" url: "http://mcp-mock-server:3000" authorization_headers: X-API-Token: "client" - # Mock server with no auth - should NOT appear in response + # Legacy: Mock server with no auth - should NOT appear in response - name: "public-api" provider_id: "model-context-protocol" url: "http://mcp-mock-server:3000" \ No newline at end of file diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml new file mode 100644 index 000000000..5f88a4e66 --- /dev/null +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml @@ -0,0 +1,51 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://llama-stack:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop-with-token" +mcp_servers: + # Test 1: Static file-based authentication + - name: "mock-file-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "/tmp/lightspeed-mcp-test-token" + # Test 2: Kubernetes token forwarding + - name: "mock-k8s-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "kubernetes" + # Test 3: Client-provided token + - name: "mock-client-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "client" + # Legacy: Mock server with client-provided auth - should appear in mcp-auth/client-options response + - name: "github-api" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "client" + # Legacy: Mock server with client-provided auth (different header) - should appear in response + - name: "gitlab-api" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + X-API-Token: "client" \ No newline at end of file diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml index 1dbef61cf..73c8ea0e0 100644 --- a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml @@ -18,20 +18,43 @@ user_data_collection: transcripts_storage: "/tmp/data/transcripts" authentication: module: "noop" +# Conversation cache for storing Q&A history +conversation_cache: + type: "sqlite" + sqlite: + db_path: "/tmp/data/conversation-cache.db" mcp_servers: - # Mock server with client-provided auth - should appear in mcp-auth/client-options response + # Test 1: Static file-based authentication + - name: "mock-file-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "/tmp/lightspeed-mcp-test-token" + # Test 2: Kubernetes token forwarding + - name: "mock-k8s-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "kubernetes" + # Test 3: Client-provided token + - name: "mock-client-auth" + provider_id: "model-context-protocol" + url: "http://mcp-mock-server:3000" + authorization_headers: + Authorization: "client" + # Legacy: Mock server with client-provided auth - should appear in mcp-auth/client-options response - name: "github-api" provider_id: "model-context-protocol" url: "http://mcp-mock-server:3000" authorization_headers: Authorization: "client" - # Mock server with client-provided auth (different header) - should appear in response + # Legacy: Mock server with client-provided auth (different header) - should appear in response - name: "gitlab-api" provider_id: "model-context-protocol" url: "http://mcp-mock-server:3000" authorization_headers: X-API-Token: "client" - # Mock server with no auth - should NOT appear in response + # Legacy: Mock server with no auth - should NOT appear in response - name: "public-api" provider_id: "model-context-protocol" url: "http://mcp-mock-server:3000" \ No newline at end of file diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py index 3df842f66..64f1d09bb 100644 --- a/tests/e2e/features/environment.py +++ b/tests/e2e/features/environment.py @@ -260,6 +260,17 @@ def before_feature(context: Context, feature: Feature) -> None: switch_config(context.feature_config) restart_container("lightspeed-stack") + if "MCP" in feature.tags: + # For MCP tests, we need noop-with-token auth to support k8s token forwarding + # Use mode-specific configs (server vs library) + mode_dir = "library-mode" if context.is_library_mode else "server-mode" + context.feature_config = ( + f"tests/e2e/configuration/{mode_dir}/lightspeed-stack-mcp.yaml" + ) + context.default_config_backup = create_config_backup("lightspeed-stack.yaml") + switch_config(context.feature_config) + restart_container("lightspeed-stack") + if "Feedback" in feature.tags: context.hostname = os.getenv("E2E_LSC_HOSTNAME", "localhost") context.port = os.getenv("E2E_LSC_PORT", "8080") @@ -287,6 +298,11 @@ def after_feature(context: Context, feature: Feature) -> None: restart_container("lightspeed-stack") remove_config_backup(context.default_config_backup) + if "MCP" in feature.tags: + switch_config(context.default_config_backup) + restart_container("lightspeed-stack") + remove_config_backup(context.default_config_backup) + if "Feedback" in feature.tags: for conversation_id in context.feedback_conversations: url = f"http://{context.hostname}:{context.port}/v1/conversations/{conversation_id}" diff --git a/tests/e2e/features/mcp_tools.feature b/tests/e2e/features/mcp_tools.feature new file mode 100644 index 000000000..ebae7f4c3 --- /dev/null +++ b/tests/e2e/features/mcp_tools.feature @@ -0,0 +1,184 @@ +@MCP +Feature: MCP Server Integration + + Background: + Given The service is started locally + And REST API service prefix is /v1 + + # ============================================================================ + # Basic Operations - Discovery and Configuration + # ============================================================================ + + Scenario: MCP client auth options endpoint returns configured servers + Given The system is in default state + And I set the Authorization header to Bearer test-token + When I access REST API endpoint "mcp-auth/client-options" using HTTP GET method + Then The status code of the response is 200 + And The body of the response has proper client auth options structure + And The response contains server "mock-client-auth" with client auth header "Authorization" + + # ============================================================================ + # Authentication Methods + # ============================================================================ + + Scenario: MCP mock server receives file-based static token + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And The MCP mock server request log is cleared + When I send a query that uses MCP tools + And I wait for MCP server to receive requests + Then The MCP mock server should have received requests + And The MCP mock server should have captured Authorization header "Bearer test-secret-token-123" from file-auth server + + Scenario: MCP mock server receives kubernetes token from request + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And The MCP mock server request log is cleared + When I send a query that uses MCP tools + And I wait for MCP server to receive requests + Then The MCP mock server should have received requests + And The MCP mock server should have captured Authorization header containing "my-k8s-token" from k8s-auth server + + Scenario: MCP mock server receives client-provided token via MCP-HEADERS + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And I set the MCP-HEADERS header with client token for "mock-client-auth" + And The MCP mock server request log is cleared + When I send a query that uses MCP tools + And I wait for MCP server to receive requests + Then The MCP mock server should have received requests + And The MCP mock server request log should contain exactly tools mock_tool_file, mock_tool_k8s, mock_tool_client + + Scenario: MCP server with client auth is skipped when MCP-HEADERS is missing + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And The MCP mock server request log is cleared + When I send a query that uses MCP tools + And I wait for MCP server to receive requests + Then The MCP mock server request log should contain exactly tools mock_tool_file, mock_tool_k8s + + Scenario: All three MCP auth types work in a single request + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And I set the MCP-HEADERS header with client token for "mock-client-auth" + And The MCP mock server request log is cleared + When I send a query that uses MCP tools + And I wait for MCP server to receive requests + Then The MCP mock server should have received at least 6 requests + And The MCP mock server request log should contain tool "mock_tool_file" + And The MCP mock server request log should contain tool "mock_tool_k8s" + And The MCP mock server request log should contain tool "mock_tool_client" + + # ============================================================================ + # Tool Execution + # ============================================================================ + + Scenario: LLM successfully discovers and lists MCP tools + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And The MCP mock server request log is cleared + When I send a query asking about available tools + And I wait for MCP server to receive requests + Then The MCP mock server should have received requests + And The MCP mock server should have received tools/list method calls + + Scenario: LLM calls an MCP tool and receives results + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And The MCP mock server request log is cleared + When I send a query that explicitly requests tool usage + And I wait for MCP server to process tool calls + Then The MCP mock server should have received tools/call method + And The response should contain MCP tool execution results + And The response should indicate successful tool execution + + Scenario: MCP tool execution appears in query response + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And I set the MCP-HEADERS header with client token for "mock-client-auth" + When I send a query that triggers MCP tool usage + Then The status code of the response is 200 + And The response should contain tool call information + And The tool execution results should be included in the response + + Scenario: Failed MCP tool execution is handled gracefully + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And The MCP mock server is configured to return errors + When I send a query that uses MCP tools + Then The status code of the response is 200 + And The response should indicate tool execution failed + And The MCP mock server should confirm error mode is active + + # Note: This scenario can be slow in CI (up to 120s) due to multiple LLM+tool roundtrips + # Completes in ~8s locally but timing is highly variable in containerized CI environments + Scenario: Multiple MCP tools can be called in sequence + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And I set the MCP-HEADERS header with client token for "mock-client-auth" + And The MCP mock server request log is cleared + When I send a query that requires multiple tool calls + And I wait for MCP server to process tool calls + Then The MCP mock server should have received multiple tools/call methods + And All tool calls should have succeeded + And The response should contain results from all tool calls + + Scenario: Streaming query discovers and uses MCP tools + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And The MCP mock server request log is cleared + When I send a streaming query that uses MCP tools + And I wait for MCP server to process tool calls + Then The MCP mock server should have received requests + And The MCP mock server should have received tools/call method + And The streaming response should be successful + + # Note: This scenario can be slow in CI (up to 120s) due to multiple LLM+tool roundtrips + # Completes in ~8s locally but timing is highly variable in containerized CI environments + Scenario: Streaming query with multiple MCP tools + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And I set the MCP-HEADERS header with client token for "mock-client-auth" + And The MCP mock server request log is cleared + When I send a streaming query requiring multiple tools + And I wait for MCP server to process tool calls + Then The MCP mock server should have received multiple tools/call methods + And The streaming response should contain tool execution results + + Scenario: Failed MCP tool execution in streaming query is handled gracefully + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And The MCP mock server is configured to return errors + When I send a streaming query that uses MCP tools + Then The streaming response should be successful + And The MCP mock server should confirm error mode is active + + Scenario: Streaming query receives file-based static token + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And The MCP mock server request log is cleared + When I send a streaming query that uses MCP tools + And I wait for MCP server to receive requests + Then The MCP mock server should have received requests + And The MCP mock server should have captured Authorization header "Bearer test-secret-token-123" from file-auth server + And The streaming response should be successful + + Scenario: Streaming query receives client-provided token via MCP-HEADERS + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And I set the MCP-HEADERS header with client token for "mock-client-auth" + And The MCP mock server request log is cleared + When I send a streaming query that uses MCP tools + And I wait for MCP server to receive requests + Then The MCP mock server should have received requests + And The MCP mock server request log should contain exactly tools mock_tool_file, mock_tool_k8s, mock_tool_client + And The streaming response should be successful + + Scenario: Streaming query skips MCP server with client auth when MCP-HEADERS is missing + Given The system is in default state + And I set the Authorization header to Bearer my-k8s-token + And The MCP mock server request log is cleared + When I send a streaming query that uses MCP tools + And I wait for MCP server to receive requests + Then The MCP mock server request log should contain exactly tools mock_tool_file, mock_tool_k8s + And The streaming response should be successful diff --git a/tests/e2e/features/steps/mcp.py b/tests/e2e/features/steps/mcp.py new file mode 100644 index 000000000..8ea308bcc --- /dev/null +++ b/tests/e2e/features/steps/mcp.py @@ -0,0 +1,858 @@ +"""Implementation of MCP-specific test steps.""" + +import json +import time + +import requests +from behave import given, then, when # pyright: ignore[reportAttributeAccessIssue] +from behave.runner import Context + +# Mock MCP server configuration +MOCK_MCP_SERVER_URL = "http://localhost:9000" + + +@given('I set the MCP-HEADERS header with client token for "{server_name}"') +def set_mcp_headers_with_client_token(context: Context, server_name: str) -> None: + """Set MCP-HEADERS header with a client-provided token. + + Parameters: + context (Context): Behave context. + server_name (str): Name of the MCP server to provide token for. + """ + if not hasattr(context, "auth_headers"): + context.auth_headers = {} + + # Set MCP-HEADERS with client token + mcp_headers = {server_name: {"Authorization": "Bearer my-client-token"}} + context.auth_headers["MCP-HEADERS"] = json.dumps(mcp_headers) + print(f"🔑 Set MCP-HEADERS for server '{server_name}' with client token") + + +@given("The MCP mock server request log is cleared") +def clear_mcp_mock_server_log(context: Context) -> None: + """Clear the MCP mock server request log using the debug/clear endpoint. + + Parameters: + context (Context): Behave context. + """ + try: + response = requests.get(f"{MOCK_MCP_SERVER_URL}/debug/clear", timeout=2) + if response.status_code == 200: + result = response.json() + print( + f"🧹 MCP mock server log cleared (status: {result.get('status', 'unknown')})" + ) + else: + print(f"⚠️ Warning: Clear endpoint returned status {response.status_code}") + except requests.RequestException as e: + print(f"⚠️ Warning: Could not clear MCP mock server log: {e}") + + +@when("I send a query that uses MCP tools") +def send_query_with_mcp_tools(context: Context) -> None: + """Send a query request that will trigger MCP tool discovery. + + Parameters: + context (Context): Behave context. + """ + if not hasattr(context, "auth_headers"): + context.auth_headers = {} + + base_url = f"http://{context.hostname}:{context.port}" + url = f"{base_url}/v1/query" + + model = getattr(context, "default_model", "gpt-4o-mini") + provider = getattr(context, "default_provider", "openai") + + payload = { + "query": "What tools are available?", + "model": model, + "provider": provider, + } + + # Use longer timeout (60s) if testing error handling + # llama-stack 0.4.2 can be slow to handle tool errors + timeout = ( + 60 + if hasattr(context, "expect_tool_errors") and context.expect_tool_errors + else 30 + ) + + try: + context.response = requests.post( + url, json=payload, headers=context.auth_headers, timeout=timeout + ) + print(f"📤 Sent query request (status: {context.response.status_code})") + except requests.RequestException as e: + print(f"❌ Query request failed: {e}") + context.response = None + + +@when("I wait for MCP server to receive requests") +def wait_for_mcp_requests(context: Context) -> None: + """Wait a brief moment for MCP server to receive and log requests. + + Parameters: + context (Context): Behave context. + """ + # Wait for requests to be processed + time.sleep(2) + print("⏱️ Waited for MCP server to process requests") + + +@then("The MCP mock server should have received requests") +def check_mcp_server_received_requests(context: Context) -> None: + """Verify the MCP mock server received at least one request. + + Parameters: + context (Context): Behave context. + """ + # Delegate to the parameterized version with count=1 + check_mcp_server_request_count(context, 1) + + +@then("The MCP mock server should have received at least {count:d} requests") +def check_mcp_server_request_count(context: Context, count: int) -> None: + """Verify the MCP mock server received at least N requests. + + Parameters: + context (Context): Behave context. + count (int): Minimum expected request count. + """ + mock_server_url = MOCK_MCP_SERVER_URL + + try: + # Mock server debug endpoint can be slow with many requests - use 15s timeout + response = requests.get(f"{mock_server_url}/debug/requests", timeout=15) + assert ( + response.status_code == 200 + ), f"Failed to get debug requests: {response.status_code}" + + requests_log = response.json() + actual_count = len(requests_log) + assert ( + actual_count >= count + ), f"Expected at least {count} requests, got {actual_count}" + + print(f"✅ MCP mock server received {actual_count} request(s) (>= {count})") + except requests.RequestException as e: + raise AssertionError(f"Could not connect to MCP mock server: {e}") from e + + +@then( + 'The MCP mock server should have captured Authorization header "{expected_value}" from file-auth server' +) +def check_file_auth_header(context: Context, expected_value: str) -> None: + """Verify the MCP mock server captured the expected file-based auth header. + + Parameters: + context (Context): Behave context. + expected_value (str): Expected Authorization header value. + """ + mock_server_url = MOCK_MCP_SERVER_URL + + try: + # Mock server debug endpoint can be slow with many requests - use 15s timeout + response = requests.get(f"{mock_server_url}/debug/requests", timeout=15) + assert response.status_code == 200, "Failed to get debug requests" + + requests_log = response.json() + # Find requests with the expected auth header + matching_requests = [ + req + for req in requests_log + if req.get("headers", {}).get("Authorization") == expected_value + ] + + assert ( + len(matching_requests) > 0 + ), f"No requests found with Authorization: {expected_value}" + print( + f"✅ Found {len(matching_requests)} request(s) with file-based auth token" + ) + except requests.RequestException as e: + raise AssertionError(f"Could not connect to MCP mock server: {e}") from e + + +@then( + 'The MCP mock server should have captured Authorization header containing "{token_fragment}" from k8s-auth server' +) +def check_k8s_auth_header(context: Context, token_fragment: str) -> None: + """Verify the MCP mock server captured k8s token in Authorization header. + + Parameters: + context (Context): Behave context. + token_fragment (str): Expected token fragment in Authorization header. + """ + mock_server_url = MOCK_MCP_SERVER_URL + + try: + # Mock server debug endpoint can be slow with many requests - use 15s timeout + response = requests.get(f"{mock_server_url}/debug/requests", timeout=15) + assert response.status_code == 200, "Failed to get debug requests" + + requests_log = response.json() + # Find requests with k8s token + matching_requests = [ + req + for req in requests_log + if token_fragment in req.get("headers", {}).get("Authorization", "") + ] + + assert ( + len(matching_requests) > 0 + ), f"No requests found with k8s token containing: {token_fragment}" + print(f"✅ Found {len(matching_requests)} request(s) with k8s auth token") + except requests.RequestException as e: + raise AssertionError(f"Could not connect to MCP mock server: {e}") from e + + +@then( + 'The MCP mock server should have captured Authorization header containing "{token_fragment}" from client-auth server' +) +def check_client_auth_header(context: Context, token_fragment: str) -> None: + """Verify the MCP mock server captured client token in Authorization header. + + Parameters: + context (Context): Behave context. + token_fragment (str): Expected token fragment in Authorization header. + """ + mock_server_url = MOCK_MCP_SERVER_URL + + try: + # Mock server debug endpoint can be slow with many requests - use 15s timeout + response = requests.get(f"{mock_server_url}/debug/requests", timeout=15) + assert response.status_code == 200, "Failed to get debug requests" + + requests_log = response.json() + # Find requests with client token + matching_requests = [ + req + for req in requests_log + if token_fragment in req.get("headers", {}).get("Authorization", "") + ] + + assert ( + len(matching_requests) > 0 + ), f"No requests found with client token containing: {token_fragment}" + print( + f"✅ Found {len(matching_requests)} request(s) with client-provided token" + ) + except requests.RequestException as e: + raise AssertionError(f"Could not connect to MCP mock server: {e}") from e + + +@then('The MCP mock server request log should contain tool "{tool_name}"') +def check_mcp_tool_in_log(context: Context, tool_name: str) -> None: + """Verify the MCP mock server received requests for a specific tool. + + Queries the mock server's debug endpoint to check the request log. + + Parameters: + context (Context): Behave context. + tool_name (str): Expected tool name (e.g., mock_tool_file, mock_tool_k8s). + """ + mock_server_url = MOCK_MCP_SERVER_URL + + try: + # Mock server debug endpoint can be slow with many requests - use 15s timeout + response = requests.get(f"{mock_server_url}/debug/requests", timeout=15) + assert response.status_code == 200, "Failed to get debug requests" + + requests_log = response.json() + + # Check if any request in the log contains the expected tool name + found = False + for req in requests_log: + if req.get("tool_name") == tool_name: + found = True + break + + assert found, f"Tool '{tool_name}' not found in mock server request log" + print(f"✅ Tool '{tool_name}' found in MCP server request log") + except requests.RequestException as e: + raise AssertionError(f"Could not connect to MCP mock server: {e}") from e + + +@then('The MCP mock server request log should not contain tool "{tool_name}"') +def check_mcp_tool_not_in_log(context: Context, tool_name: str) -> None: + """Verify the MCP mock server did NOT receive requests for a specific tool. + + Queries the mock server's debug endpoint to check the request log. + This is useful for verifying that servers were skipped due to auth issues. + + Parameters: + context (Context): Behave context. + tool_name (str): Tool name that should NOT be present. + """ + mock_server_url = MOCK_MCP_SERVER_URL + + try: + # Mock server debug endpoint can be slow with many requests - use 15s timeout + response = requests.get(f"{mock_server_url}/debug/requests", timeout=15) + assert response.status_code == 200, "Failed to get debug requests" + + requests_log = response.json() + + # Check if any request in the log contains the tool name + for req in requests_log: + if req.get("tool_name") == tool_name: + raise AssertionError( + f"Tool '{tool_name}' unexpectedly found in mock server request log " + f"(server should have been skipped)" + ) + + print(f"✅ Tool '{tool_name}' correctly absent from MCP server request log") + except requests.RequestException as e: + raise AssertionError(f"Could not connect to MCP mock server: {e}") from e + + +@then("The MCP mock server request log should contain exactly tools {tool_list}") +def check_mcp_exact_tools_in_log(context: Context, tool_list: str) -> None: + """Verify MCP server called at least one expected tool and no unexpected tools. + + This validates: + 1. At least ONE tool from the expected list was called (flexible for LLM non-determinism) + 2. NO tools outside the expected list were called (enforces security/auth boundaries) + + This approach balances LLM flexibility with security enforcement - the LLM can choose + which tools to use, but cannot access tools outside the allowed set. + + Parameters: + context (Context): Behave context. + tool_list (str): Comma-separated list of allowed tool names. + """ + mock_server_url = MOCK_MCP_SERVER_URL + + # Parse expected tools + expected_tools = [tool.strip() for tool in tool_list.split(",")] + + try: + # Mock server debug endpoint can be slow with many requests - use 15s timeout + response = requests.get(f"{mock_server_url}/debug/requests", timeout=15) + assert response.status_code == 200, "Failed to get debug requests" + + requests_log = response.json() + + # Extract unique tool names from log + found_tools = set() + for req in requests_log: + tool_name = req.get("tool_name") + if tool_name: + found_tools.add(tool_name) + + # Check 1: At least ONE expected tool was called + # (Allows for LLM non-determinism in tool selection) + called_expected_tools = found_tools & set(expected_tools) + if not called_expected_tools: + raise AssertionError( + f"None of the expected tools were called. " + f"Expected at least one of: {', '.join(expected_tools)}. " + f"Found tools: {', '.join(sorted(found_tools))}" + ) + + # Check 2: NO unexpected tools were called + # (Enforces security - prevents access to unauthorized tools) + unexpected_tools = [tool for tool in found_tools if tool not in expected_tools] + if unexpected_tools: + raise AssertionError( + f"Unexpected tools found in log: {', '.join(unexpected_tools)}. " + f"Only expected: {', '.join(expected_tools)}" + ) + + print( + f"✅ Tool usage validated: called {', '.join(sorted(called_expected_tools))} " + f"(allowed: {', '.join(expected_tools)})" + ) + except requests.RequestException as e: + raise AssertionError(f"Could not connect to MCP mock server: {e}") from e + + +@when("I send a query asking about available tools") +def send_query_about_tools(context: Context) -> None: + """Send a query asking about available tools. + + Parameters: + context (Context): Behave context. + """ + if not hasattr(context, "auth_headers"): + context.auth_headers = {} + + base_url = f"http://{context.hostname}:{context.port}" + url = f"{base_url}/v1/query" + + model = getattr(context, "default_model", "gpt-4o-mini") + provider = getattr(context, "default_provider", "openai") + + payload = { + "query": "What tools are available to help me?", + "model": model, + "provider": provider, + } + + try: + context.response = requests.post( + url, json=payload, headers=context.auth_headers, timeout=30 + ) + print(f"📤 Sent query about tools (status: {context.response.status_code})") + except requests.RequestException as e: + print(f"❌ Query request failed: {e}") + context.response = None + + +@when("I send a query that explicitly requests tool usage") +def send_query_requesting_tool_usage(context: Context) -> None: + """Send a query that explicitly asks to use a tool. + + Parameters: + context (Context): Behave context. + """ + if not hasattr(context, "auth_headers"): + context.auth_headers = {} + + base_url = f"http://{context.hostname}:{context.port}" + url = f"{base_url}/v1/query" + + model = getattr(context, "default_model", "gpt-4o-mini") + provider = getattr(context, "default_provider", "openai") + + payload = { + "query": "Please use the mock_tool_k8s tool to test the connection", + "model": model, + "provider": provider, + } + + try: + context.response = requests.post( + url, json=payload, headers=context.auth_headers, timeout=30 + ) + print( + f"📤 Sent query requesting tool usage (status: {context.response.status_code})" + ) + except requests.RequestException as e: + print(f"❌ Query request failed: {e}") + context.response = None + + +@when("I send a query that triggers MCP tool usage") +def send_query_triggering_tool_usage(context: Context) -> None: + """Send a query that should trigger MCP tool usage. + + Parameters: + context (Context): Behave context. + """ + if not hasattr(context, "auth_headers"): + context.auth_headers = {} + + base_url = f"http://{context.hostname}:{context.port}" + url = f"{base_url}/v1/query" + + model = getattr(context, "default_model", "gpt-4o-mini") + provider = getattr(context, "default_provider", "openai") + + payload = { + "query": "Use available tools to help me", + "model": model, + "provider": provider, + } + + try: + context.response = requests.post( + url, json=payload, headers=context.auth_headers, timeout=30 + ) + print( + f"📤 Sent query triggering tools (status: {context.response.status_code})" + ) + except requests.RequestException as e: + print(f"❌ Query request failed: {e}") + context.response = None + + +@when("I wait for MCP server to process tool calls") +def wait_for_tool_calls(context: Context) -> None: + """Wait for MCP server to process tool call requests. + + Parameters: + context (Context): Behave context. + """ + time.sleep(3) + print("⏱️ Waited for MCP server to process tool calls") + + +@when("I send a query that requires multiple tool calls") +def send_query_requiring_multiple_tools(context: Context) -> None: + """Send a query that should trigger multiple tool calls. + + Parameters: + context (Context): Behave context. + """ + if not hasattr(context, "auth_headers"): + context.auth_headers = {} + + base_url = f"http://{context.hostname}:{context.port}" + url = f"{base_url}/v1/query" + + model = getattr(context, "default_model", "gpt-4o-mini") + provider = getattr(context, "default_provider", "openai") + + payload = { + "query": "Use all available tools to gather information", + "model": model, + "provider": provider, + } + + try: + # Multiple tool calls can take longer - use 120s timeout + # Note: This test is timing-sensitive in CI. Locally completes in ~8s, + # but CI can take 90+ seconds due to container overhead and resource limits + context.response = requests.post( + url, json=payload, headers=context.auth_headers, timeout=120 + ) + print( + f"📤 Sent query requiring multiple tools (status: {context.response.status_code})" + ) + except requests.RequestException as e: + print(f"❌ Query request failed: {e}") + context.response = None + + +@then("The MCP mock server should have received tools/list method calls") +def check_tools_list_calls(context: Context) -> None: + """Verify MCP server received tools/list method calls from the SUT. + + Parameters: + context (Context): Behave context. + """ + mock_server_url = MOCK_MCP_SERVER_URL + + # Check the debug request log for tools/list calls + try: + response = requests.get(f"{mock_server_url}/debug/requests", timeout=15) + assert ( + response.status_code == 200 + ), f"Failed to get debug requests: {response.status_code}" + requests_log = response.json() + tools_list_calls = [ + req for req in requests_log if req.get("rpc_method") == "tools/list" + ] + assert len(tools_list_calls) > 0, "No tools/list calls found in request log" + print(f"✅ MCP server received {len(tools_list_calls)} tools/list call(s)") + except requests.RequestException as e: + raise AssertionError(f"Could not connect to MCP mock server: {e}") from e + + +@then("The MCP mock server should have received tools/call method") +def check_tools_call_method(context: Context) -> None: + """Verify MCP server received tools/call method. + + Parameters: + context (Context): Behave context. + """ + mock_server_url = MOCK_MCP_SERVER_URL + + try: + # Mock server debug endpoint can be slow with many requests - use 15s timeout + response = requests.get(f"{mock_server_url}/debug/requests", timeout=15) + assert response.status_code == 200, "Failed to get debug requests" + + requests_log = response.json() + tools_call_entries = [ + req for req in requests_log if req.get("rpc_method") == "tools/call" + ] + assert len(tools_call_entries) > 0, "No tools/call entries found in request log" + print(f"✅ MCP server received {len(tools_call_entries)} tools/call request(s)") + except requests.RequestException as e: + raise AssertionError(f"Could not connect to MCP mock server: {e}") from e + + +@then("The response should contain MCP tool execution results") +@then("The response should indicate successful tool execution") +@then("The response should contain tool call information") +@then("The tool execution results should be included in the response") +def check_response_has_tool_execution(context: Context) -> None: + """Verify response contains evidence of MCP tool execution. + + This consolidated step checks that the response contains tool-related content, + which could be tool calls, tool results, or references to tool execution in + the response text. + + Parameters: + context (Context): Behave context. + """ + assert context.response is not None, "No response received" + assert ( + context.response.status_code == 200 + ), f"Bad status: {context.response.status_code}" + + response_data = context.response.json() + assert "response" in response_data, "Response missing 'response' field" + + # Check for evidence of tool execution in the response + response_text = str(response_data.get("response", "")).lower() + + # Look for tool-related indicators in the response + # (tool execution, mock tool, or specific tool results) + has_tool_content = any( + indicator in response_text + for indicator in ["tool", "mock", "executed", "success"] + ) + + assert has_tool_content, ( + "Response does not contain evidence of tool execution. " + f"Response text: {response_data.get('response', '')[:200]}" + ) + + print("✅ Response contains evidence of tool execution") + + +@given("The MCP mock server is configured to return errors") +def configure_mock_server_errors(context: Context) -> None: + """Configure mock server to return errors via MCP-HEADERS. + + Sends the special "Bearer error-mode" token via MCP-HEADERS so all + configured MCP servers (mock-file-auth, mock-k8s-auth, mock-client-auth) + receive it and return errors. This token must be propagated through + MCP-HEADERS, not the top-level Authorization header, because the stack + only forwards MCP-HEADERS to MCP servers. + + Parameters: + context (Context): Behave context. + """ + if not hasattr(context, "auth_headers"): + context.auth_headers = {} + + # Configure all MCP servers to use error-mode token via MCP-HEADERS + # The mock server recognizes "Bearer error-mode" and returns errors + mcp_headers = { + "mock-file-auth": {"Authorization": "Bearer error-mode"}, + "mock-k8s-auth": {"Authorization": "Bearer error-mode"}, + "mock-client-auth": {"Authorization": "Bearer error-mode"}, + } + context.auth_headers["MCP-HEADERS"] = json.dumps(mcp_headers) + context.expect_tool_errors = True + print( + "⚠️ MCP mock server configured for error mode (error-mode token via MCP-HEADERS)" + ) + + +@then("The response should indicate tool execution failed") +def check_tool_execution_failed(context: Context) -> None: + """Verify response indicates tool execution failed. + + Parameters: + context (Context): Behave context. + """ + assert context.response is not None, "No response received" + assert ( + context.response.status_code == 200 + ), f"Bad status: {context.response.status_code}" + + # In error mode, the response should still be 200 but contain error information + # The LLM will handle the tool error gracefully + print("✅ Response received (tool errors are handled gracefully by LLM)") + + +@then("The MCP mock server should confirm error mode is active") +def check_mock_server_error_mode(context: Context) -> None: + """Verify the mock server is returning errors via API query. + + Sends a test request to the mock server and confirms it returns isError=true. + + Parameters: + context (Context): Behave context. + """ + mock_server_url = MOCK_MCP_SERVER_URL + + try: + # Verify the mock server is in error mode by checking its response + payload = { + "jsonrpc": "2.0", + "id": 1, + "method": "tools/call", + "params": {"name": "test", "arguments": {}}, + } + response = requests.post( + mock_server_url, + json=payload, + headers={"Authorization": "Bearer error-mode"}, + timeout=5, + ) + result = response.json() + assert result.get("result", {}).get( + "isError" + ), "Mock server not returning errors" + print("✅ Mock server confirmed to be returning errors (isError: true)") + except requests.RequestException as e: + raise AssertionError(f"Could not verify mock server error mode: {e}") from e + + +@then("The MCP mock server should have received multiple tools/call methods") +def check_multiple_tool_calls(context: Context) -> None: + """Verify MCP server received multiple tool call requests. + + Parameters: + context (Context): Behave context. + """ + mock_server_url = MOCK_MCP_SERVER_URL + + try: + # Mock server debug endpoint can be slow with many requests - use 15s timeout + response = requests.get(f"{mock_server_url}/debug/requests", timeout=15) + assert response.status_code == 200, "Failed to get debug requests" + + requests_log = response.json() + tools_call_entries = [ + req for req in requests_log if req.get("rpc_method") == "tools/call" + ] + assert ( + len(tools_call_entries) >= 2 + ), f"Expected multiple tools/call requests, got {len(tools_call_entries)}" + print(f"✅ MCP server received {len(tools_call_entries)} tools/call request(s)") + except requests.RequestException as e: + raise AssertionError(f"Could not connect to MCP mock server: {e}") from e + + +@then("All tool calls should have succeeded") +def check_all_tool_calls_succeeded(context: Context) -> None: + """Verify all tool calls succeeded. + + Parameters: + context (Context): Behave context. + """ + assert context.response is not None, "No response received" + assert context.response.status_code == 200, "Request failed" + print("✅ All tool calls completed successfully") + + +@then("The response should contain results from all tool calls") +def check_response_has_all_results(context: Context) -> None: + """Verify response contains results from all tool calls. + + Parameters: + context (Context): Behave context. + """ + assert context.response is not None, "No response received" + response_data = context.response.json() + + assert "response" in response_data, "Response missing 'response' field" + print("✅ Response contains results from all tool calls") + + +@when("I send a streaming query that uses MCP tools") +def send_streaming_query_with_mcp_tools(context: Context) -> None: + """Send a streaming query that should use MCP tools. + + Parameters: + context (Context): Behave context. + """ + if not hasattr(context, "auth_headers"): + context.auth_headers = {} + + base_url = f"http://{context.hostname}:{context.port}" + url = f"{base_url}/v1/streaming_query" + + model = getattr(context, "default_model", "gpt-4o-mini") + provider = getattr(context, "default_provider", "openai") + + payload = { + "query": "Use available tools to help me", + "model": model, + "provider": provider, + "media_type": "application/json", # Request JSON response instead of SSE + } + + try: + context.response = requests.post( + url, json=payload, headers=context.auth_headers, timeout=30 + ) + print( + f"📤 Sent streaming query with MCP tools (status: {context.response.status_code})" + ) + except requests.RequestException as e: + print(f"❌ Streaming query request failed: {e}") + context.response = None + + +@when("I send a streaming query requiring multiple tools") +def send_streaming_query_requiring_multiple_tools(context: Context) -> None: + """Send a streaming query requiring multiple tool calls. + + Parameters: + context (Context): Behave context. + """ + if not hasattr(context, "auth_headers"): + context.auth_headers = {} + + base_url = f"http://{context.hostname}:{context.port}" + url = f"{base_url}/v1/streaming_query" + + model = getattr(context, "default_model", "gpt-4o-mini") + provider = getattr(context, "default_provider", "openai") + + payload = { + "query": "Use all available tools to gather comprehensive information", + "model": model, + "provider": provider, + "media_type": "application/json", # Request JSON response instead of SSE + } + + try: + # Multiple tool calls can take longer - use 120s timeout + # Note: This test is timing-sensitive in CI. Locally completes in ~8s, + # but CI can take 90+ seconds due to container overhead and resource limits + context.response = requests.post( + url, json=payload, headers=context.auth_headers, timeout=120 + ) + print( + f"📤 Sent streaming query requiring multiple tools (status: {context.response.status_code})" + ) + except requests.RequestException as e: + print(f"❌ Streaming query request failed: {e}") + context.response = None + + +@then("The streaming response should be successful") +def check_streaming_response_successful(context: Context) -> None: + """Verify streaming response was successful. + + Parameters: + context (Context): Behave context. + """ + assert context.response is not None, "No response received" + assert ( + context.response.status_code == 200 + ), f"Bad status: {context.response.status_code}" + print("✅ Streaming response completed successfully") + + +@then("The streaming response should contain tool execution results") +def check_streaming_response_has_tool_results(context: Context) -> None: + """Verify streaming response contains tool execution results. + + Parameters: + context (Context): Behave context. + """ + assert context.response is not None, "No response received" + assert ( + context.response.status_code == 200 + ), f"Bad status: {context.response.status_code}" + + # Parse response and check for tool execution evidence + try: + response_data = context.response.json() + response_text = str(response_data.get("response", "")).lower() + + # Look for tool-related indicators in the response + has_tool_content = any( + indicator in response_text + for indicator in ["tool", "mock", "executed", "success"] + ) + + assert has_tool_content, ( + "Response does not contain evidence of tool execution. " + f"Response text: {response_data.get('response', '')[:200]}" + ) + + print("✅ Streaming response contains tool execution results") + except ValueError as e: + raise AssertionError(f"Failed to parse response JSON: {e}") from e diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt index 804e180cf..903bfd585 100644 --- a/tests/e2e/test_list.txt +++ b/tests/e2e/test_list.txt @@ -12,3 +12,4 @@ features/info.feature features/query.feature features/streaming_query.feature features/rest_api.feature +features/mcp_tools.feature diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py index e57423971..3e8fb1cd6 100644 --- a/tests/unit/app/endpoints/test_streaming_query.py +++ b/tests/unit/app/endpoints/test_streaming_query.py @@ -672,6 +672,7 @@ async def test_retrieve_response_generator_success( mock_responses_params.model = "provider1/model1" mock_responses_params.input = "test query" mock_responses_params.conversation = "conv_123" + mock_responses_params.tools = None # No tools for this test mock_responses_params.model_dump.return_value = { "input": "test query", "model": "provider1/model1", @@ -761,6 +762,7 @@ async def test_retrieve_response_generator_connection_error( mock_responses_params.model = "provider1/model1" mock_responses_params.input = "test query" mock_responses_params.conversation = "conv_123" + mock_responses_params.tools = None # No tools for this test mock_responses_params.model_dump.return_value = { "input": "test query", "model": "provider1/model1", @@ -814,6 +816,7 @@ async def test_retrieve_response_generator_api_status_error( mock_responses_params.model = "provider1/model1" mock_responses_params.input = "test query" mock_responses_params.conversation = "conv_123" + mock_responses_params.tools = None # No tools for this test mock_responses_params.model_dump.return_value = { "input": "test query", "model": "provider1/model1", @@ -864,6 +867,7 @@ async def test_retrieve_response_generator_runtime_error_context_length( mock_responses_params.model = "provider1/model1" mock_responses_params.input = "test query" mock_responses_params.conversation = "conv_123" + mock_responses_params.tools = None # No tools for this test mock_responses_params.model_dump.return_value = { "input": "test query", "model": "provider1/model1", @@ -911,6 +915,7 @@ async def test_retrieve_response_generator_runtime_error_other( mock_responses_params.model = "provider1/model1" mock_responses_params.input = "test query" mock_responses_params.conversation = "conv_123" + mock_responses_params.tools = None # No tools for this test mock_responses_params.model_dump.return_value = { "input": "test query", "model": "provider1/model1", diff --git a/tests/unit/utils/test_responses.py b/tests/unit/utils/test_responses.py index a47ae2fe1..95d4413fe 100644 --- a/tests/unit/utils/test_responses.py +++ b/tests/unit/utils/test_responses.py @@ -22,6 +22,7 @@ from pytest_mock import MockerFixture from configuration import AppConfig +from constants import MCP_AUTHORIZATION_HEADER from models.config import ModelContextProtocolServer from models.requests import QueryRequest from utils.responses import ( @@ -366,7 +367,8 @@ def test_get_mcp_tools_with_kubernetes_auth(self) -> None: ] tools_k8s = get_mcp_tools(servers_k8s, token="user-k8s-token") assert len(tools_k8s) == 1 - assert tools_k8s[0]["headers"] == {"Authorization": "Bearer user-k8s-token"} + assert tools_k8s[0][MCP_AUTHORIZATION_HEADER] == "Bearer user-k8s-token" + assert "headers" not in tools_k8s[0] # No other headers def test_get_mcp_tools_with_mcp_headers(self) -> None: """Test get_mcp_tools with client-provided headers.""" @@ -386,8 +388,8 @@ def test_get_mcp_tools_with_mcp_headers(self) -> None: } tools = get_mcp_tools(servers, token=None, mcp_headers=mcp_headers) assert len(tools) == 1 + assert tools[0][MCP_AUTHORIZATION_HEADER] == "client-provided-token" assert tools[0]["headers"] == { - "Authorization": "client-provided-token", "X-Custom": "custom-value", } @@ -444,7 +446,8 @@ def test_get_mcp_tools_with_static_headers(self, tmp_path: Path) -> None: tools = get_mcp_tools(servers, token=None) assert len(tools) == 1 - assert tools[0]["headers"] == {"Authorization": "static-secret-token"} + assert tools[0][MCP_AUTHORIZATION_HEADER] == "static-secret-token" + assert "headers" not in tools[0] # No other headers def test_get_mcp_tools_with_mixed_headers(self, tmp_path: Path) -> None: """Test get_mcp_tools with mixed header types.""" @@ -471,8 +474,8 @@ def test_get_mcp_tools_with_mixed_headers(self, tmp_path: Path) -> None: tools = get_mcp_tools(servers, token="k8s-token", mcp_headers=mcp_headers) assert len(tools) == 1 + assert tools[0][MCP_AUTHORIZATION_HEADER] == "Bearer k8s-token" assert tools[0]["headers"] == { - "Authorization": "Bearer k8s-token", "X-API-Key": "secret-api-key", "X-Custom": "client-custom-value", } @@ -1318,6 +1321,7 @@ def test_build_tool_call_summary_mcp_approval_request( mock_item.id = "approval_123" mock_item.name = "approve_action" mock_item.arguments = '{"action": "delete"}' + mock_item.server_label = "test-server" rag_chunks: list[RAGChunk] = [] mocker.patch(