diff --git a/server/mcp_server_hqd/.env_example b/server/mcp_server_hqd/.env_example new file mode 100644 index 00000000..006514d8 --- /dev/null +++ b/server/mcp_server_hqd/.env_example @@ -0,0 +1,8 @@ +# HQD MCP Remote endpoint +HQD_MCP_ENDPOINT=https://sd6k08f59gqcea6qe13vg.apigateway-cn-beijing.volceapi.com/mcp + +# Auth token for remote HQD MCP endpoint +HQD_AUTH_TOKEN= + +# Server settings +PORT=8000 diff --git a/server/mcp_server_hqd/.gitignore b/server/mcp_server_hqd/.gitignore new file mode 100644 index 00000000..2c0ba064 --- /dev/null +++ b/server/mcp_server_hqd/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info +.venv +.env diff --git a/server/mcp_server_hqd/.python-version b/server/mcp_server_hqd/.python-version new file mode 100644 index 00000000..c8cfe395 --- /dev/null +++ b/server/mcp_server_hqd/.python-version @@ -0,0 +1 @@ +3.10 diff --git a/server/mcp_server_hqd/README.md b/server/mcp_server_hqd/README.md new file mode 100644 index 00000000..0e5ba9ab --- /dev/null +++ b/server/mcp_server_hqd/README.md @@ -0,0 +1,116 @@ +# HQD Multi-Source Search MCP Server + +## Version Information + +v0.1 + +## Product Description + +### Short Description + +Query enterprise data from HQD (High-Quality Dataset) multi-source search service, covering basic info, risk, operations, IP, and litigation data. + +### Long Description + +HQD Multi-Source Search MCP Server is a thin proxy that connects to the remote HQD MCP service deployed on Volcengine. It provides unified access to 5 enterprise data sources through a two-phase interaction pattern: metadata discovery (`describe_datasource`) followed by data retrieval (`query_datasource`). All queries are forwarded to the remote endpoint — no local data processing is performed. + +## Category + +Data Intelligence + +## Tags + +Enterprise Data, Multi-Source Search, HQD + +## Tools + +This MCP Server product provides the following Tools (capabilities): + +### Tool 1: describe_datasource + +Get metadata for data sources including dimensions, metrics, and filters. Agents should call this first to understand available data structures before querying. + +### Tool 2: query_datasource + +Query data from a specific datasource with filtering, aggregation, and pagination. Supports filter operators: `eq`, `like`, `in`, `not_in`, `between`, `range`, `keyword`. + +## Compatible Platforms + +- Python + +## Authentication Method + +Bearer Token + +### Obtaining Auth Token + +Contact the HQD service administrator to obtain an authentication token. + +### Environment Variable Configuration + +| Variable Name | Value | +| ---------- | ---------- | +| `HQD_AUTH_TOKEN` | Auth token for the remote HQD MCP endpoint | +| `HQD_MCP_ENDPOINT` | Remote HQD MCP endpoint (optional, has default) | + +## Python MCP Server + +### Dependencies + +The device running MCP server needs to install the following dependencies: + +- [Python](https://www.python.org/downloads/) 3.10 or higher. +- [`uv`](https://docs.astral.sh/uv/) & [`uvx`](https://docs.astral.sh/uv/guides/tools/). + +### Deployment and Configuration + +```json +{ + "mcpServers": { + "mcp-server-hqd": { + "command": "uvx", + "args": [ + "--from", + "git+https://github.com/volcengine/mcp-server#subdirectory=server/mcp_server_hqd", + "mcp-server-hqd" + ], + "env": { + "HQD_AUTH_TOKEN": "Your HQD Auth Token" + } + } + } +} +``` + +> Note: Please replace `Your HQD Auth Token` above with the authentication token provided by the HQD service administrator. + +## Using Clients + +The following clients are supported for interacting with MCP Server. For specific configurations, please refer to the client documentation: + +- Cursor +- [Trae](https://www.trae.com.cn/) +- Claude Desktop +- Ark + +Supports [Cline](https://cline.bot/) plugin + +## Available Datasources + +| ID | Name | +|----|------| +| `enterprise_basic_wide` | Enterprise Basic Information | +| `enterprise_risk_wide` | Enterprise Risk Information | +| `enterprise_operation_wide` | Enterprise Operations Information | +| `enterprise_ip_wide` | Enterprise Intellectual Property | +| `enterprise_litigation` | Enterprise Litigation Information | + +## Conversation Initiation Example + +- List all available data sources. +- Query the basic information of the enterprise named "ByteDance". +- Search for enterprises with registered capital over 10 million in Beijing. + +## License + +[MIT](../../LICENSE) diff --git a/server/mcp_server_hqd/README_zh.md b/server/mcp_server_hqd/README_zh.md new file mode 100644 index 00000000..c3ce41fa --- /dev/null +++ b/server/mcp_server_hqd/README_zh.md @@ -0,0 +1,116 @@ +# 高质量数据集多源搜索 MCP Server + +## 版本信息 + +v0.1 + +## 产品描述 + +### 短描述 + +查询 HQD(高质量数据集)多源搜索服务中的企业数据,涵盖基本信息、风险、经营、知识产权和诉讼数据。 + +### 长描述 + +HQD 多源搜索 MCP Server 是一个轻量代理,连接部署在火山引擎上的远端 HQD MCP 服务。通过两阶段交互模式提供对 5 个企业数据源的统一访问:先通过 `describe_datasource` 发现元数据,再通过 `query_datasource` 检索数据。所有查询均转发到远端服务,本地不进行数据处理。 + +## 分类 + +数据智能 + +## 标签 + +企业数据、多源搜索、HQD + +## Tools + +本 MCP Server 产品提供以下 Tools (工具/能力): + +### Tool 1: describe_datasource + +获取数据源的可查询元数据信息,包括维度(dimensions)、指标(metrics)和过滤条件(filters)。Agent 应先调用此工具了解数据结构,再进行查询。 + +### Tool 2: query_datasource + +查询指定数据源的实际数据,支持过滤、聚合和分页。支持的过滤操作符:`eq`(精确)、`like`(模糊)、`in`(批量)、`not_in`(排除)、`between`(范围)、`range`(数值范围)、`keyword`(全文搜索)。 + +## 可适配平台 + +- Python + +## 鉴权方式 + +Bearer Token + +### 获取 Auth Token + +请联系 HQD 服务管理员获取认证令牌。 + +### 环境变量配置 + +| 变量名 | 值 | +| ---------- | ---------- | +| `HQD_AUTH_TOKEN` | 远端 HQD MCP 服务的认证令牌 | +| `HQD_MCP_ENDPOINT` | 远端 HQD MCP 端点地址(可选,有默认值) | + +## Python 版 MCP server + +### 依赖项 + +运行 MCP server 的设备需要安装以下依赖项。 + +- [Python](https://www.python.org/downloads/) 3.10 或更高版本。 +- [`uv`](https://docs.astral.sh/uv/) & [`uvx`](https://docs.astral.sh/uv/guides/tools/)。 + +### 部署与配置 + +```json +{ + "mcpServers": { + "mcp-server-hqd": { + "command": "uvx", + "args": [ + "--from", + "git+https://github.com/volcengine/mcp-server#subdirectory=server/mcp_server_hqd", + "mcp-server-hqd" + ], + "env": { + "HQD_AUTH_TOKEN": "Your HQD Auth Token" + } + } + } +} +``` + +> 注:请将上方 `Your HQD Auth Token` 替换为 HQD 服务管理员提供的认证令牌。 + +## 使用客户端 + +支持通过以下客户端与 MCP Server 交互,具体配置可查阅该客户端文档。 + +- Cursor +- [Trae](https://www.trae.com.cn/) +- Claude Desktop +- 方舟 + +支持 [Cline](https://cline.bot/) 插件。 + +## 可用数据源 + +| ID | 名称 | +|----|------| +| `enterprise_basic_wide` | 企业基本信息宽表 | +| `enterprise_risk_wide` | 企业风险信息宽表 | +| `enterprise_operation_wide` | 企业经营信息宽表 | +| `enterprise_ip_wide` | 企业知识产权宽表 | +| `enterprise_litigation` | 企业诉讼信息 | + +## 对话发起示例 + +- 列出所有可用的数据源。 +- 查询名为"字节跳动"的企业基本信息。 +- 搜索北京市注册资本超过1000万的企业。 + +## 许可 + +[MIT](../../LICENSE) diff --git a/server/mcp_server_hqd/pyproject.toml b/server/mcp_server_hqd/pyproject.toml new file mode 100644 index 00000000..7e1767dd --- /dev/null +++ b/server/mcp_server_hqd/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "mcp-server-hqd" +version = "0.1.0" +description = "MCP server for HQD Multi-Source Enterprise Data Search" +readme = "README.md" +requires-python = ">=3.10" +license = {text = "MIT"} +dependencies = [ + "mcp[cli]>=1.5.0", + "requests>=2.31.0", + "python-dotenv>=1.0.1", +] + +[project.scripts] +mcp-server-hqd = "mcp_server_hqd.server:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/server/mcp_server_hqd/src/mcp_server_hqd/__init__.py b/server/mcp_server_hqd/src/mcp_server_hqd/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/server/mcp_server_hqd/src/mcp_server_hqd/config.py b/server/mcp_server_hqd/src/mcp_server_hqd/config.py new file mode 100644 index 00000000..77136e80 --- /dev/null +++ b/server/mcp_server_hqd/src/mcp_server_hqd/config.py @@ -0,0 +1,45 @@ +"""Configuration for HQD MCP Server (remote proxy mode).""" + +import os +import logging +from dataclasses import dataclass + +from dotenv import load_dotenv + +logger = logging.getLogger(__name__) + +load_dotenv() + + +@dataclass +class HqdConfig: + """Configuration for HQD MCP proxy server. + + Environment variables: + HQD_MCP_ENDPOINT: Remote HQD MCP server endpoint (streamable-http) + HQD_AUTH_TOKEN: Auth token for remote endpoint (sent as Authorization header) + PORT: Local server port (default: 8000) + """ + + endpoint: str + auth_token: str + port: int + + +def load_config() -> HqdConfig: + """Load configuration from environment variables.""" + endpoint = os.getenv( + "HQD_MCP_ENDPOINT", + "https://sd6k08f59gqcea6qe13vg.apigateway-cn-beijing.volceapi.com/mcp", + ) + if not endpoint: + raise ValueError("Missing required environment variable: HQD_MCP_ENDPOINT") + + return HqdConfig( + endpoint=endpoint, + auth_token=os.getenv("HQD_AUTH_TOKEN", ""), + port=int(os.getenv("PORT", "8000")), + ) + + +config = load_config() diff --git a/server/mcp_server_hqd/src/mcp_server_hqd/remote_client.py b/server/mcp_server_hqd/src/mcp_server_hqd/remote_client.py new file mode 100644 index 00000000..4ad06f3b --- /dev/null +++ b/server/mcp_server_hqd/src/mcp_server_hqd/remote_client.py @@ -0,0 +1,94 @@ +"""HTTP client for the remote HQD MCP server (streamable-http transport).""" + +import json +import logging +from typing import Any, Dict, Optional + +import requests + +logger = logging.getLogger(__name__) + + +class HqdRemoteClient: + """Stateful client that communicates with the remote HQD MCP server.""" + + def __init__(self, endpoint: str, auth_token: str = ""): + self._endpoint = endpoint + self._auth_token = auth_token + self._session_id: Optional[str] = None + self._http = requests.Session() + self._http.headers.update({ + "Content-Type": "application/json", + "Accept": "application/json, text/event-stream", + }) + if self._auth_token: + self._http.headers["Authorization"] = f"Bearer {self._auth_token}" + self._req_id = 0 + + def _next_id(self) -> int: + self._req_id += 1 + return self._req_id + + def _parse_sse_response(self, text: str) -> Dict[str, Any]: + """Parse SSE event stream and extract the JSON-RPC data.""" + for line in text.strip().splitlines(): + if line.startswith("data: "): + return json.loads(line[6:]) + # Fallback: try parsing the whole text as JSON + return json.loads(text) + + def _send(self, method: str, params: Dict[str, Any] = None) -> Dict[str, Any]: + """Send a JSON-RPC request to the remote MCP server.""" + payload = { + "jsonrpc": "2.0", + "id": self._next_id(), + "method": method, + "params": params or {}, + } + + headers = {} + if self._session_id: + headers["Mcp-Session-Id"] = self._session_id + + resp = self._http.post(self._endpoint, json=payload, headers=headers, timeout=30) + resp.raise_for_status() + + # Capture session ID from response headers + sid = resp.headers.get("mcp-session-id") + if sid: + self._session_id = sid + + return self._parse_sse_response(resp.text) + + def initialize(self) -> None: + """Initialize the MCP session with the remote server.""" + result = self._send("initialize", { + "protocolVersion": "2024-11-05", + "capabilities": {}, + "clientInfo": {"name": "mcp-server-hqd-proxy", "version": "0.1.0"}, + }) + server_info = result.get("result", {}).get("serverInfo", {}) + logger.info( + f"Connected to remote HQD MCP: {server_info.get('name')} " + f"v{server_info.get('version')}, session={self._session_id}" + ) + + def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> str: + """Call a tool on the remote MCP server and return the text result.""" + if not self._session_id: + self.initialize() + + result = self._send("tools/call", { + "name": tool_name, + "arguments": arguments, + }) + + # Extract text content from MCP response + rpc_result = result.get("result", {}) + contents = rpc_result.get("content", []) + for item in contents: + if item.get("type") == "text": + return item["text"] + + # Fallback + return json.dumps(rpc_result, ensure_ascii=False, indent=2) diff --git a/server/mcp_server_hqd/src/mcp_server_hqd/server.py b/server/mcp_server_hqd/src/mcp_server_hqd/server.py new file mode 100644 index 00000000..e93d78c3 --- /dev/null +++ b/server/mcp_server_hqd/src/mcp_server_hqd/server.py @@ -0,0 +1,141 @@ +"""HQD Multi-Source Search MCP Server — thin proxy to remote HQD MCP service.""" + +import argparse +import logging +import os +from typing import Dict, Optional + +from mcp.server.fastmcp import FastMCP + +from mcp_server_hqd.config import config +from mcp_server_hqd.remote_client import HqdRemoteClient + +logger = logging.getLogger(__name__) +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + +# Remote client (lazy-initialized on first tool call) +_client: Optional[HqdRemoteClient] = None + + +def get_client() -> HqdRemoteClient: + global _client + if _client is None: + _client = HqdRemoteClient(config.endpoint, config.auth_token) + return _client + + +# Create MCP server +mcp = FastMCP("HQD Multi-Source Search", port=config.port) + + +@mcp.tool() +def describe_datasource( + datasource_id: str = "all", + locale: str = "zh-CN", +) -> str: + """ + 获取数据源的可查询元数据信息,包括维度(dimensions)、指标(metrics)和过滤条件(filters)。 + + Agent 应先调用此工具了解数据结构,再调用 query_datasource 进行查询。 + 支持查询单个数据源或列出所有可用数据源。 + + Args: + datasource_id: 数据源 ID。传入具体 ID 返回该数据源的完整元数据; + 不传或传 'all' 返回所有已注册数据源的摘要列表。 + locale: 返回字段描述的语言,默认 zh-CN + """ + logger.info(f"describe_datasource: datasource_id={datasource_id}") + try: + return get_client().call_tool("describe_datasource", { + "datasource_id": datasource_id, + "locale": locale, + }) + except Exception as e: + logger.error(f"Error in describe_datasource: {e}") + return f'{{"error": "{e}"}}' + + +@mcp.tool() +def query_datasource( + datasource_id: str, + select_fields: Optional[str] = None, + filters: Optional[str] = None, + aggregation: Optional[str] = None, + group_by: Optional[str] = None, + sort_field: Optional[str] = None, + sort_order: str = "desc", + page: int = 1, + page_size: int = 10, +) -> str: + """ + 查询指定数据源的实际数据。 + + 使用前请先调用 describe_datasource 获取该数据源的可用维度、指标和过滤条件。 + + 过滤条件格式: 'field:op:value',多个用分号(;)分隔。 + 操作符: eq(精确), like(模糊), in(批量), not_in(排除), between(范围), range(数值范围), keyword(全文搜索)。 + + Args: + datasource_id: 目标数据源 ID(必填)。 + select_fields: 返回字段列表,逗号分隔。 + filters: 过滤条件字符串。 + aggregation: 聚合方式,格式 '字段:函数'。 + group_by: 分组字段,逗号分隔。 + sort_field: 排序字段名。 + sort_order: 排序方向 (asc/desc,默认 desc)。 + page: 页码(默认 1)。 + page_size: 每页记录数(默认 10,最大 50)。 + """ + logger.info(f"query_datasource: datasource_id={datasource_id}, filters={filters}") + try: + args = {"datasource_id": datasource_id} + if select_fields is not None: + args["select_fields"] = select_fields + if filters is not None: + args["filters"] = filters + if aggregation is not None: + args["aggregation"] = aggregation + if group_by is not None: + args["group_by"] = group_by + if sort_field is not None: + args["sort_field"] = sort_field + args["sort_order"] = sort_order + args["page"] = page + args["page_size"] = page_size + + return get_client().call_tool("query_datasource", args) + except Exception as e: + logger.error(f"Error in query_datasource: {e}") + return f'{{"error": "{e}"}}' + + +def main(): + """Main entry point for the HQD MCP Server.""" + parser = argparse.ArgumentParser( + description="Run the HQD Multi-Source Search MCP Server" + ) + parser.add_argument( + "--transport", "-t", + choices=["sse", "stdio"], + default="stdio", + help="Transport protocol to use (sse or stdio)", + ) + args = parser.parse_args() + + logger.info( + f"Starting HQD MCP Server with {args.transport} transport, " + f"proxying to {config.endpoint}" + ) + + try: + mcp.run(transport=args.transport) + except Exception as e: + logger.error(f"Error starting HQD MCP Server: {e}") + raise + + +if __name__ == "__main__": + main()