diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..f77a9d5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,7 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.6 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format diff --git a/CHANGELOG.md b/CHANGELOG.md index 65b0811..80c0a4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,28 @@ All notable changes to the OpenIntent SDK will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.15.0] - 2026-03-02 + +### Added + +- **RFC-0010 Retry Policy MCP Tools** — 4 new MCP tools: `set_retry_policy` (admin), `get_retry_policy` (read), `record_failure` (write), `get_failures` (read). MCP tool surface expanded from 66 to 70 tools; RBAC counts: reader=25, operator=43, admin=70. +- **`build_retry_failure_tools()`** — New helper in the Python MCP bridge (`openintent.mcp`) that constructs retry policy and failure-tracking tool definitions for MCP integration, enabling agents to manage retry policies and record/query failures through MCP. + +### Changed + +- **Native FastAPI SSE** — Replaced `sse-starlette` third-party dependency with FastAPI's built-in `EventSourceResponse` and `ServerSentEvent` (available since FastAPI 0.135.0). All four SSE subscription endpoints (`/api/v1/subscribe/intents/{id}`, `/api/v1/subscribe/portfolios/{id}`, `/api/v1/subscribe/agents/{id}`, `/api/v1/subscribe/channels/{id}`) now use native async generator pattern with `ServerSentEvent` objects instead of yielding raw dicts through `sse-starlette`'s `EventSourceResponse`. Keep-alive pings now use SSE comment format (`ServerSentEvent(comment="ping")`) per the SSE spec recommendation. + +### Removed + +- **`sse-starlette` dependency** — No longer required. The `server` optional dependency group now requires `fastapi>=0.135.0` (previously `>=0.104.0`) which includes native SSE support. + +### Updated + +- All version references updated to 0.15.0 across Python SDK, MCP server package, and changelog. +- FastAPI minimum version bumped from `>=0.104.0` to `>=0.135.0` in server extras. + +--- + ## [0.14.1] - 2026-02-27 ### Fixed diff --git a/docs/changelog.md b/docs/changelog.md index f28770c..4553788 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -5,6 +5,33 @@ All notable changes to the OpenIntent SDK will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.15.0] - 2026-03-02 + +### Added + +- **RFC-0010 Retry Policy MCP Tools** — 4 new MCP tools for retry policy management and failure tracking: + - `set_retry_policy` — Set or update retry policy on an intent (admin tier). + - `get_retry_policy` — Retrieve the current retry policy for an intent (reader tier). + - `record_failure` — Record a failure event against an intent for retry tracking (operator tier). + - `get_failures` — List recorded failures for an intent (reader tier). +- **`build_retry_failure_tools()`** — New helper in the Python MCP bridge (`openintent.mcp`) that builds the 4 retry/failure tool definitions for use with `MCPToolProvider` and `MCPToolExporter`. +- **MCP Tool Surface Expansion** — MCP tool surface expanded from 66 to 70 tools; RBAC counts: reader=25, operator=43, admin=70. + +### Changed + +- **Native FastAPI SSE** — Replaced `sse-starlette` third-party dependency with FastAPI's built-in `EventSourceResponse` and `ServerSentEvent` (available since FastAPI 0.135.0). All four SSE subscription endpoints (`/api/v1/subscribe/intents/{id}`, `/api/v1/subscribe/portfolios/{id}`, `/api/v1/subscribe/agents/{id}`, `/api/v1/subscribe/channels/{id}`) now use native async generator pattern with `ServerSentEvent` objects instead of yielding raw dicts through `sse-starlette`'s `EventSourceResponse`. Keep-alive pings now use SSE comment format (`ServerSentEvent(comment="ping")`) per the SSE spec recommendation. + +### Removed + +- **`sse-starlette` dependency** — No longer required. The `server` optional dependency group now requires `fastapi>=0.135.0` (previously `>=0.104.0`) which includes native SSE support. + +### Updated + +- All version references updated to 0.15.0 across Python SDK, MCP server package, and changelog. +- FastAPI minimum version bumped from `>=0.104.0` to `>=0.135.0` in server extras. + +--- + ## [0.14.1] - 2026-02-27 ### Fixed diff --git a/docs/guide/cost-retry.md b/docs/guide/cost-retry.md index fbd6745..3a894ce 100644 --- a/docs/guide/cost-retry.md +++ b/docs/guide/cost-retry.md @@ -188,8 +188,46 @@ workflow: !!! warning "Idempotency" When using retry policies, ensure your agent handlers are idempotent — safe to call multiple times with the same input without side effects. +## MCP Tool Access for Retry Policies + +The [MCP server](mcp.md) exposes 4 dedicated tools for retry policy management via the Model Context Protocol, allowing MCP clients like Claude Desktop and Cursor to manage retry policies and query failure history directly. + +| MCP Tool | Tier | Description | +|----------|------|-------------| +| `openintent_set_retry_policy` | `admin` | Configure retry policy for an intent (max attempts, backoff strategy, delays) | +| `openintent_get_retry_policy` | `read` | Retrieve the current retry policy for an intent | +| `openintent_record_failure` | `write` | Record a failure attempt against an intent for retry scheduling | +| `openintent_get_failures` | `read` | Query failure history for an intent | + +These tools are available through the `@openintentai/mcp-server` package and follow the same RBAC model as all other MCP tools. The `build_retry_failure_tools()` function in the Python MCP bridge registers these tools automatically. + +```python +from openintent import Agent, MCPTool, on_assignment + +@Agent("retry-manager", model="gpt-4o", tools=[ + MCPTool( + server="npx", + args=["-y", "@openintentai/mcp-server"], + role="admin", + allowed_tools=[ + "openintent_set_retry_policy", + "openintent_get_retry_policy", + "openintent_record_failure", + "openintent_get_failures", + ], + ), +]) +class RetryManager: + @on_assignment + async def work(self, intent): + return await self.think(intent.description) +``` + +With these tools, the total MCP tool surface is 70 tools across all categories, with RBAC counts of reader=25, operator=43, admin=70. + ## Next Steps +- [MCP Integration](mcp.md) — Full MCP server and bridge documentation - [LLM Adapters](adapters.md) — Automatic cost tracking for LLM calls - [Governance & Arbitration](governance.md) — Budget enforcement and escalation - [YAML Workflows](workflows.md) — Declarative retry and cost configuration diff --git a/docs/guide/mcp.md b/docs/guide/mcp.md index b1178e4..0a97d3b 100644 --- a/docs/guide/mcp.md +++ b/docs/guide/mcp.md @@ -120,7 +120,7 @@ class Researcher: ``` !!! info "RBAC role determines tool visibility" - The `role` field on `MCPTool` maps directly to the RBAC system. When connecting to `@openintentai/mcp-server`, the server filters its tool listing based on the role: `reader` sees 4 read-only tools, `operator` sees 10 read+write tools, `admin` sees all 16 tools. The `allowed_tools` field further restricts within the role's permissions — both gates must pass. + The `role` field on `MCPTool` maps directly to the RBAC system. When connecting to `@openintentai/mcp-server`, the server filters its tool listing based on the role: `reader` sees 25 read-only tools, `operator` sees 43 read+write tools, `admin` sees all 70 tools. The `allowed_tools` field further restricts within the role's permissions — both gates must pass. !!! warning "Least-privilege by design" `MCPTool.role` defaults to `"reader"` — the most restrictive level. Each agent should declare exactly the minimum role it needs. In multi-agent topologies, each agent's MCP server runs as an isolated child process with its own explicit role, so one agent's privilege never leaks to another. Do **not** set `OPENINTENT_MCP_ROLE` as a global environment variable in multi-agent setups — declare the role per-agent on each `MCPTool` instead. @@ -256,6 +256,10 @@ With the MCP server connected, Claude can interact with OpenIntent directly. Her | `openintent_assign_agent` | `admin` | Assign an agent to an intent | | `openintent_unassign_agent` | `admin` | Remove an agent assignment | | `openintent_create_channel` | `admin` | Create a messaging channel | +| `openintent_set_retry_policy` | `admin` | Configure retry policy for an intent | +| `openintent_get_retry_policy` | `read` | Retrieve the retry policy for an intent | +| `openintent_record_failure` | `write` | Record a failure attempt for retry scheduling | +| `openintent_get_failures` | `read` | Query failure history for an intent | ### Available Resources @@ -287,9 +291,9 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol | Role | Tiers Granted | Tool Count | Use Case | |------|---------------|------------|----------| -| **`reader`** | `read` | 4 | Dashboards, monitoring, auditing. The MCP client can observe but never modify protocol state. | -| **`operator`** | `read` + `write` | 10 | Worker agents that create intents, update state, and communicate. Cannot change lifecycle status, manage leases, or restructure agent assignments. | -| **`admin`** | `read` + `write` + `admin` | 16 | Trusted orchestrators with full control. Required for lifecycle management, lease coordination, and structural operations. | +| **`reader`** | `read` | 25 | Dashboards, monitoring, auditing. The MCP client can observe but never modify protocol state. | +| **`operator`** | `read` + `write` | 43 | Worker agents that create intents, update state, and communicate. Cannot change lifecycle status, manage leases, or restructure agent assignments. | +| **`admin`** | `read` + `write` + `admin` | 70 | Trusted orchestrators with full control. Required for lifecycle management, lease coordination, and structural operations. | #### Tool Classification @@ -301,6 +305,8 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol | `openintent_list_intents` | Query intents with status filters | | `openintent_get_events` | Read the immutable event log | | `openintent_get_messages` | Read channel message history | +| `openintent_get_retry_policy` | Retrieve the retry policy for an intent | +| `openintent_get_failures` | Query failure history for an intent | **Write tier** — bounded mutations with protocol safety: @@ -312,6 +318,7 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol | `openintent_send_message` | Send a channel message | Scoped to channel membership | | `openintent_ask` | Request/reply on a channel | Scoped to channel membership | | `openintent_broadcast` | Broadcast to channel | Scoped to channel membership | +| `openintent_record_failure` | Record a failure attempt | Append-only failure record for retry scheduling | **Admin tier** — structural and lifecycle operations: @@ -323,6 +330,7 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol | `openintent_assign_agent` | Add agent to intent | Changes who can work on an intent. | | `openintent_unassign_agent` | Remove agent from intent | Removing an active agent disrupts in-progress work. | | `openintent_create_channel` | Create messaging channel | Establishes communication topology between agents. | +| `openintent_set_retry_policy` | Configure retry policy for an intent | Controls retry behavior including max attempts, backoff strategy, and delay parameters. Misconfiguration can cause excessive retries. | #### Default Role @@ -398,7 +406,7 @@ If the role is not set or is set to an unrecognized value, the server falls back The server logs its active role and tool count at startup: ``` -[openintent-mcp] Server started – role="operator", tools=10/16, connected to http://localhost:8000 +[openintent-mcp] Server started – role="operator", tools=43/70, connected to http://localhost:8000 ``` If the role is `admin`, a warning is emitted: diff --git a/docs/overrides/home.html b/docs/overrides/home.html index b44b097..d06ac3f 100644 --- a/docs/overrides/home.html +++ b/docs/overrides/home.html @@ -24,7 +24,7 @@
-
v0.14.0 — Federation Protocol, Security & Python SDK Implementation
+
v0.15.0 — Native FastAPI SSE & RFC-0010 Retry MCP Tools

Stop Duct-Taping Your Agents Together

OpenIntent is a durable, auditable protocol for multi-agent coordination. Structured intents replace fragile chat chains. Versioned state replaces guesswork. Ship agent systems that actually work in production. @@ -50,7 +50,7 @@

Stop Duct-Taping Your Agents Together

Tests
-
v0.14.0
+
v0.15.0
Latest
diff --git a/mcp-server/README.md b/mcp-server/README.md index b26951e..92b5762 100644 --- a/mcp-server/README.md +++ b/mcp-server/README.md @@ -70,9 +70,9 @@ The server enforces role-based access control with three tiers: | Role | Tools | Description | |---|---|---| -| `reader` | 21 | Read-only observation: list/get intents, events, agents, costs | -| `operator` | 38 | Bounded mutations: create intents, post events, manage leases | -| `admin` | 62 | Full lifecycle: governance, identity, vaults, triggers, plans | +| `reader` | 25 | Read-only observation: list/get intents, events, agents, costs, retry policies, failures | +| `operator` | 43 | Bounded mutations: create intents, post events, manage leases, record failures | +| `admin` | 70 | Full lifecycle: governance, identity, vaults, triggers, plans, retry policies | Tools not permitted by the assigned role are hidden from the MCP tool listing entirely. @@ -93,6 +93,7 @@ The role gate works alongside the `allowed_tools` allowlist — both must pass f - **Verifiable Logs** (RFC-0019) — Hash chains, Merkle proofs - **Tracing** (RFC-0020) — Distributed trace propagation - **Messaging** (RFC-0021) — Agent-to-agent channels +- **Retry Policies** (RFC-0010) — Set/get retry policies, record and query failures ## Resources diff --git a/mcp-server/package.json b/mcp-server/package.json index d9dfd39..ae4cca8 100644 --- a/mcp-server/package.json +++ b/mcp-server/package.json @@ -1,6 +1,6 @@ { "name": "@openintentai/mcp-server", - "version": "0.14.1", + "version": "0.15.0", "description": "MCP server exposing the OpenIntent Coordination Protocol as MCP tools and resources", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/mcp-server/src/client.ts b/mcp-server/src/client.ts index 0b0df06..d4f95ab 100644 --- a/mcp-server/src/client.ts +++ b/mcp-server/src/client.ts @@ -926,4 +926,51 @@ export class OpenIntentClient { trace_id: params.trace_id, }); } + + // ── Retry Policy & Failures (RFC-0010) ────────────────────────────── + + async setRetryPolicy(params: { + intent_id: string; + policy: Record; + }): Promise { + return this.request("PUT", `/api/v1/intents/${params.intent_id}/retry-policy`, params.policy); + } + + async getRetryPolicy(params: { + intent_id: string; + }): Promise { + return this.request("GET", `/api/v1/intents/${params.intent_id}/retry-policy`); + } + + async recordFailure(params: { + intent_id: string; + agent_id: string; + attempt_number: number; + error_code?: string; + error_message?: string; + retry_scheduled_at?: string; + metadata?: Record; + }): Promise { + return this.request("POST", `/api/v1/intents/${params.intent_id}/failures`, { + agent_id: params.agent_id, + attempt_number: params.attempt_number, + error_code: params.error_code, + error_message: params.error_message, + retry_scheduled_at: params.retry_scheduled_at, + metadata: params.metadata ?? {}, + }); + } + + async getFailures(params: { + intent_id: string; + limit?: number; + }): Promise { + const query = new URLSearchParams(); + if (params.limit !== undefined) query.set("limit", String(params.limit)); + const qs = query.toString(); + return this.request( + "GET", + `/api/v1/intents/${params.intent_id}/failures${qs ? `?${qs}` : ""}`, + ); + } } diff --git a/mcp-server/src/index.ts b/mcp-server/src/index.ts index 15c1fcd..b35a16f 100644 --- a/mcp-server/src/index.ts +++ b/mcp-server/src/index.ts @@ -30,7 +30,7 @@ async function main() { const server = new Server( { name: "openintent-mcp", - version: "0.14.1", + version: "0.15.0", }, { capabilities: { diff --git a/mcp-server/src/security.ts b/mcp-server/src/security.ts index 9f9712b..d58517c 100644 --- a/mcp-server/src/security.ts +++ b/mcp-server/src/security.ts @@ -121,6 +121,12 @@ export const TOOL_TIERS: Record = { openintent_start_trace: "write", openintent_link_spans: "write", + // Retry Policy & Failures (RFC-0010) + openintent_set_retry_policy: "admin", + openintent_get_retry_policy: "read", + openintent_record_failure: "write", + openintent_get_failures: "read", + // Federation (RFC-0022) openintent_federation_status: "read", openintent_list_federated_agents: "read", diff --git a/mcp-server/src/tools.ts b/mcp-server/src/tools.ts index 7a4a5b7..116d278 100644 --- a/mcp-server/src/tools.ts +++ b/mcp-server/src/tools.ts @@ -1311,6 +1311,80 @@ export const TOOL_DEFINITIONS: ToolDefinition[] = [ tier: "write" as ToolTier, }, + // ── Retry Policy & Failures (RFC-0010) ────────────────────────────── + { + name: "openintent_set_retry_policy", + description: + "Set or update the retry policy for an intent (RFC-0010). Controls automatic " + + "retry behaviour including strategy, max attempts, backoff delays, and fallback agent.", + inputSchema: { + type: "object", + properties: { + intent_id: { type: "string", description: "The intent to configure retry policy for" }, + policy: { + type: "object", + description: "Retry policy configuration: { strategy: 'none'|'fixed'|'exponential'|'linear', max_retries: 3, base_delay_ms: 1000, max_delay_ms: 60000, fallback_agent_id: 'agent-backup'|null, failure_threshold: 3 }", + additionalProperties: true, + }, + }, + required: ["intent_id", "policy"], + }, + tier: "admin" as ToolTier, + }, + { + name: "openintent_get_retry_policy", + description: + "Retrieve the current retry policy configured for an intent. Returns " + + "the policy including max retries, backoff strategy, and retryable errors.", + inputSchema: { + type: "object", + properties: { + intent_id: { type: "string", description: "The intent to query" }, + }, + required: ["intent_id"], + }, + tier: "read" as ToolTier, + }, + { + name: "openintent_record_failure", + description: + "Record a failure event against an intent (RFC-0010). The server evaluates the " + + "failure against the retry policy and may automatically schedule a retry.", + inputSchema: { + type: "object", + properties: { + intent_id: { type: "string", description: "The intent that experienced a failure" }, + agent_id: { type: "string", description: "ID of the agent reporting the failure" }, + attempt_number: { type: "number", description: "Which retry attempt this failure represents (1-based)" }, + error_code: { type: "string", description: "Error classification: RATE_LIMIT, TIMEOUT, NETWORK_ERROR, INVALID_OUTPUT, BUDGET_EXCEEDED, PERMISSION_DENIED" }, + error_message: { type: "string", description: "Human-readable error message" }, + retry_scheduled_at: { type: "string", description: "ISO 8601 timestamp for when the next retry is scheduled (optional)" }, + metadata: { + type: "object", + description: "Additional failure context (e.g. { http_status: 429 })", + additionalProperties: true, + }, + }, + required: ["intent_id", "agent_id", "attempt_number"], + }, + tier: "write" as ToolTier, + }, + { + name: "openintent_get_failures", + description: + "Retrieve the failure history for an intent, ordered by most recent first. " + + "Useful for diagnosing recurring issues and reviewing retry attempts.", + inputSchema: { + type: "object", + properties: { + intent_id: { type: "string", description: "The intent to query" }, + limit: { type: "number", description: "Maximum number of failures to return (default 50)" }, + }, + required: ["intent_id"], + }, + tier: "read" as ToolTier, + }, + // ── Federation (RFC-0022) ─────────────────────────────────────────── { name: "openintent_federation_status", @@ -1919,6 +1993,39 @@ export async function handleToolCall( }); break; + // ── Retry Policy & Failures (RFC-0010) ────────────────────────── + case "openintent_set_retry_policy": + result = await client.setRetryPolicy({ + intent_id: args.intent_id as string, + policy: args.policy as Record, + }); + break; + + case "openintent_get_retry_policy": + result = await client.getRetryPolicy({ + intent_id: args.intent_id as string, + }); + break; + + case "openintent_record_failure": + result = await client.recordFailure({ + intent_id: args.intent_id as string, + agent_id: args.agent_id as string, + attempt_number: args.attempt_number as number, + error_code: args.error_code as string | undefined, + error_message: args.error_message as string | undefined, + retry_scheduled_at: args.retry_scheduled_at as string | undefined, + metadata: args.metadata as Record | undefined, + }); + break; + + case "openintent_get_failures": + result = await client.getFailures({ + intent_id: args.intent_id as string, + limit: args.limit as number | undefined, + }); + break; + case "openintent_federation_status": result = await client.getFederationStatus(); break; diff --git a/mkdocs.yml b/mkdocs.yml index a1f272c..e3ff6dd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -204,7 +204,7 @@ extra: link: https://pypi.org/project/openintent/ version: provider: mike - announcement: "v0.14.0 is here — Federation protocol (RFC-0022), federation security (RFC-0023), and complete RFC 0001–0023 coverage. Read the changelog →" + announcement: "v0.15.0 is here — Native FastAPI SSE replaces sse-starlette, plus 4 new RFC-0010 retry MCP tools (70 total). Read the changelog →" meta: - name: description content: "OpenIntent Python SDK — structured multi-agent coordination protocol with decorator-first agents, 23 RFCs, 7 LLM adapters, federation, MCP integration, and built-in FastAPI server." diff --git a/openintent/__init__.py b/openintent/__init__.py index a03cb2b..b262df2 100644 --- a/openintent/__init__.py +++ b/openintent/__init__.py @@ -233,7 +233,7 @@ def get_server() -> tuple[Any, Any, Any]: ) -__version__ = "0.14.1" +__version__ = "0.15.0" __all__ = [ "OpenIntentClient", "AsyncOpenIntentClient", diff --git a/openintent/mcp.py b/openintent/mcp.py index 6415e52..d32e188 100644 --- a/openintent/mcp.py +++ b/openintent/mcp.py @@ -264,6 +264,246 @@ async def handle_call( return {"result": result} +def build_retry_failure_tools( + client: Any, +) -> list[dict[str, Any]]: + """Build MCP tool definitions for RFC-0010 Retry Policy & Failure tools. + + Each tool delegates to the corresponding method on an + :class:`~openintent.client.OpenIntentClient` (sync) or + :class:`~openintent.client.AsyncOpenIntentClient` (async). + + Args: + client: An ``OpenIntentClient`` or ``AsyncOpenIntentClient`` instance. + + Returns: + A list of tool definition dicts ready for :class:`MCPToolExporter`. + """ + import asyncio + + def _call(method: Any, kwargs: dict[str, Any]) -> Any: + if asyncio.iscoroutinefunction(method): + return asyncio.get_event_loop().run_until_complete(method(**kwargs)) + return method(**kwargs) + + def _to_dict(obj: Any) -> Any: + if hasattr(obj, "to_dict"): + return obj.to_dict() + if isinstance(obj, list): + return [_to_dict(item) for item in obj] + return obj + + def _set_retry_policy( + intent_id: str, + policy: Optional[dict[str, Any]] = None, + **kwargs: Any, + ) -> dict[str, Any]: + from .models import RetryStrategy + + policy = policy or kwargs + strategy_val = policy.get("strategy", "exponential") + try: + strategy = RetryStrategy(strategy_val) + except ValueError: + strategy = RetryStrategy.EXPONENTIAL + result = _call( + client.set_retry_policy, + { + "intent_id": intent_id, + "strategy": strategy, + "max_retries": policy.get("max_retries", 3), + "base_delay_ms": policy.get("base_delay_ms", 1000), + "max_delay_ms": policy.get("max_delay_ms", 60000), + "fallback_agent_id": policy.get("fallback_agent_id"), + "failure_threshold": policy.get("failure_threshold", 3), + }, + ) + converted = _to_dict(result) + return converted if isinstance(converted, dict) else {"result": converted} + + def _get_retry_policy(intent_id: str, **_: Any) -> dict[str, Any]: + result = _call(client.get_retry_policy, {"intent_id": intent_id}) + if result is None: + return {"retry_policy": None} + converted = _to_dict(result) + return converted if isinstance(converted, dict) else {"result": converted} + + def _record_failure( + intent_id: str, + agent_id: str = "", + attempt_number: int = 1, + error_code: Optional[str] = None, + error_message: Optional[str] = None, + retry_scheduled_at: Optional[str] = None, + metadata: Optional[dict[str, Any]] = None, + **_: Any, + ) -> dict[str, Any]: + from datetime import datetime as _dt + + scheduled: Optional[Any] = None + if retry_scheduled_at: + try: + scheduled = _dt.fromisoformat(retry_scheduled_at) + except (ValueError, TypeError): + scheduled = None + kwargs: dict[str, Any] = { + "intent_id": intent_id, + "attempt_number": attempt_number, + "error_code": error_code, + "error_message": error_message, + "retry_scheduled_at": scheduled, + "metadata": metadata, + } + result = _call(client.record_failure, kwargs) + converted = _to_dict(result) + return converted if isinstance(converted, dict) else {"result": converted} + + def _get_failures( + intent_id: str, + limit: int = 50, + **_: Any, + ) -> dict[str, Any]: + result = _call(client.get_failures, {"intent_id": intent_id}) + failures = _to_dict(result) + if isinstance(failures, list): + failures = failures[:limit] + return {"failures": failures} + return failures if isinstance(failures, dict) else {"result": failures} + + return [ + { + "name": "openintent_set_retry_policy", + "description": ( + "Set or update the retry policy for an intent (RFC-0010). Controls " + "automatic retry behaviour including strategy, max attempts, backoff " + "delays, and fallback agent." + ), + "inputSchema": { + "type": "object", + "properties": { + "intent_id": { + "type": "string", + "description": "The intent to configure retry policy for", + }, + "policy": { + "type": "object", + "description": ( + "Retry policy configuration: { strategy: " + "'none'|'fixed'|'exponential'|'linear', " + "max_retries: 3, base_delay_ms: 1000, " + "max_delay_ms: 60000, " + "fallback_agent_id: 'agent-backup'|null, " + "failure_threshold: 3 }" + ), + "additionalProperties": True, + }, + }, + "required": ["intent_id", "policy"], + }, + "handler": _set_retry_policy, + }, + { + "name": "openintent_get_retry_policy", + "description": ( + "Retrieve the current retry policy configured for an " + "intent. Returns the policy including max retries, " + "backoff strategy, and retryable errors." + ), + "inputSchema": { + "type": "object", + "properties": { + "intent_id": { + "type": "string", + "description": "The intent to get retry policy for", + }, + }, + "required": ["intent_id"], + }, + "handler": _get_retry_policy, + }, + { + "name": "openintent_record_failure", + "description": ( + "Record a failure event against an intent (RFC-0010). The server " + "evaluates the failure against the retry policy and may " + "automatically schedule a retry." + ), + "inputSchema": { + "type": "object", + "properties": { + "intent_id": { + "type": "string", + "description": "The intent that experienced a failure", + }, + "agent_id": { + "type": "string", + "description": "ID of the agent reporting the failure", + }, + "attempt_number": { + "type": "number", + "description": ( + "Which retry attempt this failure represents (1-based)" + ), + }, + "error_code": { + "type": "string", + "description": ( + "Error classification: RATE_LIMIT, TIMEOUT, " + "NETWORK_ERROR, INVALID_OUTPUT, BUDGET_EXCEEDED, " + "PERMISSION_DENIED" + ), + }, + "error_message": { + "type": "string", + "description": "Human-readable error description", + }, + "retry_scheduled_at": { + "type": "string", + "description": ( + "ISO 8601 timestamp for when the next retry is " + "scheduled (optional)" + ), + }, + "metadata": { + "type": "object", + "description": ( + "Additional failure context (e.g. { http_status: 429 })" + ), + "additionalProperties": True, + }, + }, + "required": ["intent_id", "agent_id", "attempt_number"], + }, + "handler": _record_failure, + }, + { + "name": "openintent_get_failures", + "description": ( + "Retrieve the failure history for an intent, ordered " + "by most recent first. Useful for diagnosing recurring " + "issues and reviewing retry attempts." + ), + "inputSchema": { + "type": "object", + "properties": { + "intent_id": { + "type": "string", + "description": "The intent to get failure history for", + }, + "limit": { + "type": "number", + "description": ( + "Maximum number of failures to return (default 50)" + ), + }, + }, + "required": ["intent_id"], + }, + "handler": _get_failures, + }, + ] + + class MCPBridge: """High-level bridge managing multiple MCP server connections.""" diff --git a/openintent/server/app.py b/openintent/server/app.py index 5ec48c1..6a45b2f 100644 --- a/openintent/server/app.py +++ b/openintent/server/app.py @@ -11,9 +11,9 @@ from fastapi import Depends, FastAPI, Header, HTTPException, Query, Request, Response from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse +from fastapi.responses import EventSourceResponse, JSONResponse +from fastapi.sse import ServerSentEvent from pydantic import BaseModel, ConfigDict, Field -from sse_starlette.sse import EventSourceResponse from .config import ServerConfig from .database import ( # noqa: F401 @@ -2752,7 +2752,10 @@ async def remove_intent_from_portfolio( finally: session.close() - @app.get("/api/v1/subscribe/intents/{intent_id}") + @app.get( + "/api/v1/subscribe/intents/{intent_id}", + response_class=EventSourceResponse, + ) async def subscribe_intent( intent_id: str, request: Request, @@ -2762,27 +2765,26 @@ async def subscribe_intent( """SSE subscription for intent events.""" queue: asyncio.Queue = asyncio.Queue(maxsize=100) _event_queues["intents"].append(queue) + try: + while True: + if await request.is_disconnected(): + break + try: + event = await asyncio.wait_for(queue.get(), timeout=30.0) + if event.get("intent_id") == intent_id: + yield ServerSentEvent( + data=event.get("data", {}), + event=event.get("type", "message"), + ) + except asyncio.TimeoutError: + yield ServerSentEvent(comment="ping") + finally: + _event_queues["intents"].remove(queue) - async def event_generator(): - try: - while True: - if await request.is_disconnected(): - break - try: - event = await asyncio.wait_for(queue.get(), timeout=30.0) - if event.get("intent_id") == intent_id: - yield { - "event": event.get("type", "message"), - "data": str(event.get("data", {})), - } - except asyncio.TimeoutError: - yield {"event": "ping", "data": ""} - finally: - _event_queues["intents"].remove(queue) - - return EventSourceResponse(event_generator()) - - @app.get("/api/v1/subscribe/portfolios/{portfolio_id}") + @app.get( + "/api/v1/subscribe/portfolios/{portfolio_id}", + response_class=EventSourceResponse, + ) async def subscribe_portfolio( portfolio_id: str, request: Request, @@ -2792,27 +2794,26 @@ async def subscribe_portfolio( """SSE subscription for portfolio events.""" queue: asyncio.Queue = asyncio.Queue(maxsize=100) _event_queues["portfolios"].append(queue) + try: + while True: + if await request.is_disconnected(): + break + try: + event = await asyncio.wait_for(queue.get(), timeout=30.0) + if event.get("portfolio_id") == portfolio_id: + yield ServerSentEvent( + data=event.get("data", {}), + event=event.get("type", "message"), + ) + except asyncio.TimeoutError: + yield ServerSentEvent(comment="ping") + finally: + _event_queues["portfolios"].remove(queue) - async def event_generator(): - try: - while True: - if await request.is_disconnected(): - break - try: - event = await asyncio.wait_for(queue.get(), timeout=30.0) - if event.get("portfolio_id") == portfolio_id: - yield { - "event": event.get("type", "message"), - "data": str(event.get("data", {})), - } - except asyncio.TimeoutError: - yield {"event": "ping", "data": ""} - finally: - _event_queues["portfolios"].remove(queue) - - return EventSourceResponse(event_generator()) - - @app.get("/api/v1/subscribe/agents/{agent_id}") + @app.get( + "/api/v1/subscribe/agents/{agent_id}", + response_class=EventSourceResponse, + ) async def subscribe_agent( agent_id: str, request: Request, @@ -2822,25 +2823,21 @@ async def subscribe_agent( """SSE subscription for agent events.""" queue: asyncio.Queue = asyncio.Queue(maxsize=100) _event_queues["agents"].append(queue) - - async def event_generator(): - try: - while True: - if await request.is_disconnected(): - break - try: - event = await asyncio.wait_for(queue.get(), timeout=30.0) - if event.get("agent_id") == agent_id: - yield { - "event": event.get("type", "message"), - "data": str(event.get("data", {})), - } - except asyncio.TimeoutError: - yield {"event": "ping", "data": ""} - finally: - _event_queues["agents"].remove(queue) - - return EventSourceResponse(event_generator()) + try: + while True: + if await request.is_disconnected(): + break + try: + event = await asyncio.wait_for(queue.get(), timeout=30.0) + if event.get("agent_id") == agent_id: + yield ServerSentEvent( + data=event.get("data", {}), + event=event.get("type", "message"), + ) + except asyncio.TimeoutError: + yield ServerSentEvent(comment="ping") + finally: + _event_queues["agents"].remove(queue) @app.post("/api/v1/intents/{intent_id}/arbitrate") async def request_arbitration( @@ -4621,7 +4618,10 @@ async def delete_channel( channel_messages.pop(channel_id, None) return {"status": "closed", "channel_id": channel_id} - @app.get("/api/v1/subscribe/channels/{channel_id}") + @app.get( + "/api/v1/subscribe/channels/{channel_id}", + response_class=EventSourceResponse, + ) async def subscribe_channel( channel_id: str, request: Request, @@ -4632,25 +4632,21 @@ async def subscribe_channel( raise HTTPException(status_code=404, detail="Channel not found") queue: asyncio.Queue = asyncio.Queue(maxsize=100) _event_queues["channels"].append(queue) - - async def event_generator(): - try: - while True: - if await request.is_disconnected(): - break - try: - event = await asyncio.wait_for(queue.get(), timeout=30.0) - if event.get("channel_id") == channel_id: - yield { - "event": event.get("type", "message"), - "data": str(event.get("data", {})), - } - except asyncio.TimeoutError: - yield {"event": "ping", "data": ""} - finally: - _event_queues["channels"].remove(queue) - - return EventSourceResponse(event_generator()) + try: + while True: + if await request.is_disconnected(): + break + try: + event = await asyncio.wait_for(queue.get(), timeout=30.0) + if event.get("channel_id") == channel_id: + yield ServerSentEvent( + data=event.get("data", {}), + event=event.get("type", "message"), + ) + except asyncio.TimeoutError: + yield ServerSentEvent(comment="ping") + finally: + _event_queues["channels"].remove(queue) @app.post( "/api/v1/channels/{channel_id}/messages", diff --git a/pyproject.toml b/pyproject.toml index 95472e3..855d3ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "openintent" -version = "0.14.1" +version = "0.15.0" description = "Python SDK and Server for the OpenIntent Coordination Protocol" readme = "README.md" license = {text = "MIT"} @@ -43,10 +43,9 @@ dependencies = [ [project.optional-dependencies] server = [ - "fastapi>=0.104.0", + "fastapi>=0.135.0", "uvicorn[standard]>=0.24.0", "sqlalchemy>=2.0.0", - "sse-starlette>=1.6.0", "pydantic>=2.0.0", ] dev = [