diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..f77a9d5
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,7 @@
+repos:
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.8.6
+ hooks:
+ - id: ruff
+ args: [--fix]
+ - id: ruff-format
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 65b0811..80c0a4f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,28 @@ All notable changes to the OpenIntent SDK will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.15.0] - 2026-03-02
+
+### Added
+
+- **RFC-0010 Retry Policy MCP Tools** — 4 new MCP tools: `set_retry_policy` (admin), `get_retry_policy` (read), `record_failure` (write), `get_failures` (read). MCP tool surface expanded from 66 to 70 tools; RBAC counts: reader=25, operator=43, admin=70.
+- **`build_retry_failure_tools()`** — New helper in the Python MCP bridge (`openintent.mcp`) that constructs retry policy and failure-tracking tool definitions for MCP integration, enabling agents to manage retry policies and record/query failures through MCP.
+
+### Changed
+
+- **Native FastAPI SSE** — Replaced `sse-starlette` third-party dependency with FastAPI's built-in `EventSourceResponse` and `ServerSentEvent` (available since FastAPI 0.135.0). All four SSE subscription endpoints (`/api/v1/subscribe/intents/{id}`, `/api/v1/subscribe/portfolios/{id}`, `/api/v1/subscribe/agents/{id}`, `/api/v1/subscribe/channels/{id}`) now use native async generator pattern with `ServerSentEvent` objects instead of yielding raw dicts through `sse-starlette`'s `EventSourceResponse`. Keep-alive pings now use SSE comment format (`ServerSentEvent(comment="ping")`) per the SSE spec recommendation.
+
+### Removed
+
+- **`sse-starlette` dependency** — No longer required. The `server` optional dependency group now requires `fastapi>=0.135.0` (previously `>=0.104.0`) which includes native SSE support.
+
+### Updated
+
+- All version references updated to 0.15.0 across Python SDK, MCP server package, and changelog.
+- FastAPI minimum version bumped from `>=0.104.0` to `>=0.135.0` in server extras.
+
+---
+
## [0.14.1] - 2026-02-27
### Fixed
diff --git a/docs/changelog.md b/docs/changelog.md
index f28770c..4553788 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -5,6 +5,33 @@ All notable changes to the OpenIntent SDK will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.15.0] - 2026-03-02
+
+### Added
+
+- **RFC-0010 Retry Policy MCP Tools** — 4 new MCP tools for retry policy management and failure tracking:
+ - `set_retry_policy` — Set or update retry policy on an intent (admin tier).
+ - `get_retry_policy` — Retrieve the current retry policy for an intent (reader tier).
+ - `record_failure` — Record a failure event against an intent for retry tracking (operator tier).
+ - `get_failures` — List recorded failures for an intent (reader tier).
+- **`build_retry_failure_tools()`** — New helper in the Python MCP bridge (`openintent.mcp`) that builds the 4 retry/failure tool definitions for use with `MCPToolProvider` and `MCPToolExporter`.
+- **MCP Tool Surface Expansion** — MCP tool surface expanded from 66 to 70 tools; RBAC counts: reader=25, operator=43, admin=70.
+
+### Changed
+
+- **Native FastAPI SSE** — Replaced `sse-starlette` third-party dependency with FastAPI's built-in `EventSourceResponse` and `ServerSentEvent` (available since FastAPI 0.135.0). All four SSE subscription endpoints (`/api/v1/subscribe/intents/{id}`, `/api/v1/subscribe/portfolios/{id}`, `/api/v1/subscribe/agents/{id}`, `/api/v1/subscribe/channels/{id}`) now use native async generator pattern with `ServerSentEvent` objects instead of yielding raw dicts through `sse-starlette`'s `EventSourceResponse`. Keep-alive pings now use SSE comment format (`ServerSentEvent(comment="ping")`) per the SSE spec recommendation.
+
+### Removed
+
+- **`sse-starlette` dependency** — No longer required. The `server` optional dependency group now requires `fastapi>=0.135.0` (previously `>=0.104.0`) which includes native SSE support.
+
+### Updated
+
+- All version references updated to 0.15.0 across Python SDK, MCP server package, and changelog.
+- FastAPI minimum version bumped from `>=0.104.0` to `>=0.135.0` in server extras.
+
+---
+
## [0.14.1] - 2026-02-27
### Fixed
diff --git a/docs/guide/cost-retry.md b/docs/guide/cost-retry.md
index fbd6745..3a894ce 100644
--- a/docs/guide/cost-retry.md
+++ b/docs/guide/cost-retry.md
@@ -188,8 +188,46 @@ workflow:
!!! warning "Idempotency"
When using retry policies, ensure your agent handlers are idempotent — safe to call multiple times with the same input without side effects.
+## MCP Tool Access for Retry Policies
+
+The [MCP server](mcp.md) exposes 4 dedicated tools for retry policy management via the Model Context Protocol, allowing MCP clients like Claude Desktop and Cursor to manage retry policies and query failure history directly.
+
+| MCP Tool | Tier | Description |
+|----------|------|-------------|
+| `openintent_set_retry_policy` | `admin` | Configure retry policy for an intent (max attempts, backoff strategy, delays) |
+| `openintent_get_retry_policy` | `read` | Retrieve the current retry policy for an intent |
+| `openintent_record_failure` | `write` | Record a failure attempt against an intent for retry scheduling |
+| `openintent_get_failures` | `read` | Query failure history for an intent |
+
+These tools are available through the `@openintentai/mcp-server` package and follow the same RBAC model as all other MCP tools. The `build_retry_failure_tools()` function in the Python MCP bridge registers these tools automatically.
+
+```python
+from openintent import Agent, MCPTool, on_assignment
+
+@Agent("retry-manager", model="gpt-4o", tools=[
+ MCPTool(
+ server="npx",
+ args=["-y", "@openintentai/mcp-server"],
+ role="admin",
+ allowed_tools=[
+ "openintent_set_retry_policy",
+ "openintent_get_retry_policy",
+ "openintent_record_failure",
+ "openintent_get_failures",
+ ],
+ ),
+])
+class RetryManager:
+ @on_assignment
+ async def work(self, intent):
+ return await self.think(intent.description)
+```
+
+With these tools, the total MCP tool surface is 70 tools across all categories, with RBAC counts of reader=25, operator=43, admin=70.
+
## Next Steps
+- [MCP Integration](mcp.md) — Full MCP server and bridge documentation
- [LLM Adapters](adapters.md) — Automatic cost tracking for LLM calls
- [Governance & Arbitration](governance.md) — Budget enforcement and escalation
- [YAML Workflows](workflows.md) — Declarative retry and cost configuration
diff --git a/docs/guide/mcp.md b/docs/guide/mcp.md
index b1178e4..0a97d3b 100644
--- a/docs/guide/mcp.md
+++ b/docs/guide/mcp.md
@@ -120,7 +120,7 @@ class Researcher:
```
!!! info "RBAC role determines tool visibility"
- The `role` field on `MCPTool` maps directly to the RBAC system. When connecting to `@openintentai/mcp-server`, the server filters its tool listing based on the role: `reader` sees 4 read-only tools, `operator` sees 10 read+write tools, `admin` sees all 16 tools. The `allowed_tools` field further restricts within the role's permissions — both gates must pass.
+ The `role` field on `MCPTool` maps directly to the RBAC system. When connecting to `@openintentai/mcp-server`, the server filters its tool listing based on the role: `reader` sees 25 read-only tools, `operator` sees 43 read+write tools, `admin` sees all 70 tools. The `allowed_tools` field further restricts within the role's permissions — both gates must pass.
!!! warning "Least-privilege by design"
`MCPTool.role` defaults to `"reader"` — the most restrictive level. Each agent should declare exactly the minimum role it needs. In multi-agent topologies, each agent's MCP server runs as an isolated child process with its own explicit role, so one agent's privilege never leaks to another. Do **not** set `OPENINTENT_MCP_ROLE` as a global environment variable in multi-agent setups — declare the role per-agent on each `MCPTool` instead.
@@ -256,6 +256,10 @@ With the MCP server connected, Claude can interact with OpenIntent directly. Her
| `openintent_assign_agent` | `admin` | Assign an agent to an intent |
| `openintent_unassign_agent` | `admin` | Remove an agent assignment |
| `openintent_create_channel` | `admin` | Create a messaging channel |
+| `openintent_set_retry_policy` | `admin` | Configure retry policy for an intent |
+| `openintent_get_retry_policy` | `read` | Retrieve the retry policy for an intent |
+| `openintent_record_failure` | `write` | Record a failure attempt for retry scheduling |
+| `openintent_get_failures` | `read` | Query failure history for an intent |
### Available Resources
@@ -287,9 +291,9 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol
| Role | Tiers Granted | Tool Count | Use Case |
|------|---------------|------------|----------|
-| **`reader`** | `read` | 4 | Dashboards, monitoring, auditing. The MCP client can observe but never modify protocol state. |
-| **`operator`** | `read` + `write` | 10 | Worker agents that create intents, update state, and communicate. Cannot change lifecycle status, manage leases, or restructure agent assignments. |
-| **`admin`** | `read` + `write` + `admin` | 16 | Trusted orchestrators with full control. Required for lifecycle management, lease coordination, and structural operations. |
+| **`reader`** | `read` | 25 | Dashboards, monitoring, auditing. The MCP client can observe but never modify protocol state. |
+| **`operator`** | `read` + `write` | 43 | Worker agents that create intents, update state, and communicate. Cannot change lifecycle status, manage leases, or restructure agent assignments. |
+| **`admin`** | `read` + `write` + `admin` | 70 | Trusted orchestrators with full control. Required for lifecycle management, lease coordination, and structural operations. |
#### Tool Classification
@@ -301,6 +305,8 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol
| `openintent_list_intents` | Query intents with status filters |
| `openintent_get_events` | Read the immutable event log |
| `openintent_get_messages` | Read channel message history |
+| `openintent_get_retry_policy` | Retrieve the retry policy for an intent |
+| `openintent_get_failures` | Query failure history for an intent |
**Write tier** — bounded mutations with protocol safety:
@@ -312,6 +318,7 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol
| `openintent_send_message` | Send a channel message | Scoped to channel membership |
| `openintent_ask` | Request/reply on a channel | Scoped to channel membership |
| `openintent_broadcast` | Broadcast to channel | Scoped to channel membership |
+| `openintent_record_failure` | Record a failure attempt | Append-only failure record for retry scheduling |
**Admin tier** — structural and lifecycle operations:
@@ -323,6 +330,7 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol
| `openintent_assign_agent` | Add agent to intent | Changes who can work on an intent. |
| `openintent_unassign_agent` | Remove agent from intent | Removing an active agent disrupts in-progress work. |
| `openintent_create_channel` | Create messaging channel | Establishes communication topology between agents. |
+| `openintent_set_retry_policy` | Configure retry policy for an intent | Controls retry behavior including max attempts, backoff strategy, and delay parameters. Misconfiguration can cause excessive retries. |
#### Default Role
@@ -398,7 +406,7 @@ If the role is not set or is set to an unrecognized value, the server falls back
The server logs its active role and tool count at startup:
```
-[openintent-mcp] Server started – role="operator", tools=10/16, connected to http://localhost:8000
+[openintent-mcp] Server started – role="operator", tools=43/70, connected to http://localhost:8000
```
If the role is `admin`, a warning is emitted:
diff --git a/docs/overrides/home.html b/docs/overrides/home.html
index b44b097..d06ac3f 100644
--- a/docs/overrides/home.html
+++ b/docs/overrides/home.html
@@ -24,7 +24,7 @@
-
v0.14.0 — Federation Protocol, Security & Python SDK Implementation
+
v0.15.0 — Native FastAPI SSE & RFC-0010 Retry MCP Tools
Stop Duct-Taping Your Agents Together
OpenIntent is a durable, auditable protocol for multi-agent coordination. Structured intents replace fragile chat chains. Versioned state replaces guesswork. Ship agent systems that actually work in production.
@@ -50,7 +50,7 @@
Stop Duct-Taping Your Agents Together
Tests
-
v0.14.0
+
v0.15.0
Latest
diff --git a/mcp-server/README.md b/mcp-server/README.md
index b26951e..92b5762 100644
--- a/mcp-server/README.md
+++ b/mcp-server/README.md
@@ -70,9 +70,9 @@ The server enforces role-based access control with three tiers:
| Role | Tools | Description |
|---|---|---|
-| `reader` | 21 | Read-only observation: list/get intents, events, agents, costs |
-| `operator` | 38 | Bounded mutations: create intents, post events, manage leases |
-| `admin` | 62 | Full lifecycle: governance, identity, vaults, triggers, plans |
+| `reader` | 25 | Read-only observation: list/get intents, events, agents, costs, retry policies, failures |
+| `operator` | 43 | Bounded mutations: create intents, post events, manage leases, record failures |
+| `admin` | 70 | Full lifecycle: governance, identity, vaults, triggers, plans, retry policies |
Tools not permitted by the assigned role are hidden from the MCP tool listing entirely.
@@ -93,6 +93,7 @@ The role gate works alongside the `allowed_tools` allowlist — both must pass f
- **Verifiable Logs** (RFC-0019) — Hash chains, Merkle proofs
- **Tracing** (RFC-0020) — Distributed trace propagation
- **Messaging** (RFC-0021) — Agent-to-agent channels
+- **Retry Policies** (RFC-0010) — Set/get retry policies, record and query failures
## Resources
diff --git a/mcp-server/package.json b/mcp-server/package.json
index d9dfd39..ae4cca8 100644
--- a/mcp-server/package.json
+++ b/mcp-server/package.json
@@ -1,6 +1,6 @@
{
"name": "@openintentai/mcp-server",
- "version": "0.14.1",
+ "version": "0.15.0",
"description": "MCP server exposing the OpenIntent Coordination Protocol as MCP tools and resources",
"main": "dist/index.js",
"types": "dist/index.d.ts",
diff --git a/mcp-server/src/client.ts b/mcp-server/src/client.ts
index 0b0df06..d4f95ab 100644
--- a/mcp-server/src/client.ts
+++ b/mcp-server/src/client.ts
@@ -926,4 +926,51 @@ export class OpenIntentClient {
trace_id: params.trace_id,
});
}
+
+ // ── Retry Policy & Failures (RFC-0010) ──────────────────────────────
+
+ async setRetryPolicy(params: {
+ intent_id: string;
+ policy: Record;
+ }): Promise {
+ return this.request("PUT", `/api/v1/intents/${params.intent_id}/retry-policy`, params.policy);
+ }
+
+ async getRetryPolicy(params: {
+ intent_id: string;
+ }): Promise {
+ return this.request("GET", `/api/v1/intents/${params.intent_id}/retry-policy`);
+ }
+
+ async recordFailure(params: {
+ intent_id: string;
+ agent_id: string;
+ attempt_number: number;
+ error_code?: string;
+ error_message?: string;
+ retry_scheduled_at?: string;
+ metadata?: Record;
+ }): Promise {
+ return this.request("POST", `/api/v1/intents/${params.intent_id}/failures`, {
+ agent_id: params.agent_id,
+ attempt_number: params.attempt_number,
+ error_code: params.error_code,
+ error_message: params.error_message,
+ retry_scheduled_at: params.retry_scheduled_at,
+ metadata: params.metadata ?? {},
+ });
+ }
+
+ async getFailures(params: {
+ intent_id: string;
+ limit?: number;
+ }): Promise {
+ const query = new URLSearchParams();
+ if (params.limit !== undefined) query.set("limit", String(params.limit));
+ const qs = query.toString();
+ return this.request(
+ "GET",
+ `/api/v1/intents/${params.intent_id}/failures${qs ? `?${qs}` : ""}`,
+ );
+ }
}
diff --git a/mcp-server/src/index.ts b/mcp-server/src/index.ts
index 15c1fcd..b35a16f 100644
--- a/mcp-server/src/index.ts
+++ b/mcp-server/src/index.ts
@@ -30,7 +30,7 @@ async function main() {
const server = new Server(
{
name: "openintent-mcp",
- version: "0.14.1",
+ version: "0.15.0",
},
{
capabilities: {
diff --git a/mcp-server/src/security.ts b/mcp-server/src/security.ts
index 9f9712b..d58517c 100644
--- a/mcp-server/src/security.ts
+++ b/mcp-server/src/security.ts
@@ -121,6 +121,12 @@ export const TOOL_TIERS: Record = {
openintent_start_trace: "write",
openintent_link_spans: "write",
+ // Retry Policy & Failures (RFC-0010)
+ openintent_set_retry_policy: "admin",
+ openintent_get_retry_policy: "read",
+ openintent_record_failure: "write",
+ openintent_get_failures: "read",
+
// Federation (RFC-0022)
openintent_federation_status: "read",
openintent_list_federated_agents: "read",
diff --git a/mcp-server/src/tools.ts b/mcp-server/src/tools.ts
index 7a4a5b7..116d278 100644
--- a/mcp-server/src/tools.ts
+++ b/mcp-server/src/tools.ts
@@ -1311,6 +1311,80 @@ export const TOOL_DEFINITIONS: ToolDefinition[] = [
tier: "write" as ToolTier,
},
+ // ── Retry Policy & Failures (RFC-0010) ──────────────────────────────
+ {
+ name: "openintent_set_retry_policy",
+ description:
+ "Set or update the retry policy for an intent (RFC-0010). Controls automatic " +
+ "retry behaviour including strategy, max attempts, backoff delays, and fallback agent.",
+ inputSchema: {
+ type: "object",
+ properties: {
+ intent_id: { type: "string", description: "The intent to configure retry policy for" },
+ policy: {
+ type: "object",
+ description: "Retry policy configuration: { strategy: 'none'|'fixed'|'exponential'|'linear', max_retries: 3, base_delay_ms: 1000, max_delay_ms: 60000, fallback_agent_id: 'agent-backup'|null, failure_threshold: 3 }",
+ additionalProperties: true,
+ },
+ },
+ required: ["intent_id", "policy"],
+ },
+ tier: "admin" as ToolTier,
+ },
+ {
+ name: "openintent_get_retry_policy",
+ description:
+ "Retrieve the current retry policy configured for an intent. Returns " +
+ "the policy including max retries, backoff strategy, and retryable errors.",
+ inputSchema: {
+ type: "object",
+ properties: {
+ intent_id: { type: "string", description: "The intent to query" },
+ },
+ required: ["intent_id"],
+ },
+ tier: "read" as ToolTier,
+ },
+ {
+ name: "openintent_record_failure",
+ description:
+ "Record a failure event against an intent (RFC-0010). The server evaluates the " +
+ "failure against the retry policy and may automatically schedule a retry.",
+ inputSchema: {
+ type: "object",
+ properties: {
+ intent_id: { type: "string", description: "The intent that experienced a failure" },
+ agent_id: { type: "string", description: "ID of the agent reporting the failure" },
+ attempt_number: { type: "number", description: "Which retry attempt this failure represents (1-based)" },
+ error_code: { type: "string", description: "Error classification: RATE_LIMIT, TIMEOUT, NETWORK_ERROR, INVALID_OUTPUT, BUDGET_EXCEEDED, PERMISSION_DENIED" },
+ error_message: { type: "string", description: "Human-readable error message" },
+ retry_scheduled_at: { type: "string", description: "ISO 8601 timestamp for when the next retry is scheduled (optional)" },
+ metadata: {
+ type: "object",
+ description: "Additional failure context (e.g. { http_status: 429 })",
+ additionalProperties: true,
+ },
+ },
+ required: ["intent_id", "agent_id", "attempt_number"],
+ },
+ tier: "write" as ToolTier,
+ },
+ {
+ name: "openintent_get_failures",
+ description:
+ "Retrieve the failure history for an intent, ordered by most recent first. " +
+ "Useful for diagnosing recurring issues and reviewing retry attempts.",
+ inputSchema: {
+ type: "object",
+ properties: {
+ intent_id: { type: "string", description: "The intent to query" },
+ limit: { type: "number", description: "Maximum number of failures to return (default 50)" },
+ },
+ required: ["intent_id"],
+ },
+ tier: "read" as ToolTier,
+ },
+
// ── Federation (RFC-0022) ───────────────────────────────────────────
{
name: "openintent_federation_status",
@@ -1919,6 +1993,39 @@ export async function handleToolCall(
});
break;
+ // ── Retry Policy & Failures (RFC-0010) ──────────────────────────
+ case "openintent_set_retry_policy":
+ result = await client.setRetryPolicy({
+ intent_id: args.intent_id as string,
+ policy: args.policy as Record,
+ });
+ break;
+
+ case "openintent_get_retry_policy":
+ result = await client.getRetryPolicy({
+ intent_id: args.intent_id as string,
+ });
+ break;
+
+ case "openintent_record_failure":
+ result = await client.recordFailure({
+ intent_id: args.intent_id as string,
+ agent_id: args.agent_id as string,
+ attempt_number: args.attempt_number as number,
+ error_code: args.error_code as string | undefined,
+ error_message: args.error_message as string | undefined,
+ retry_scheduled_at: args.retry_scheduled_at as string | undefined,
+ metadata: args.metadata as Record | undefined,
+ });
+ break;
+
+ case "openintent_get_failures":
+ result = await client.getFailures({
+ intent_id: args.intent_id as string,
+ limit: args.limit as number | undefined,
+ });
+ break;
+
case "openintent_federation_status":
result = await client.getFederationStatus();
break;
diff --git a/mkdocs.yml b/mkdocs.yml
index a1f272c..e3ff6dd 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -204,7 +204,7 @@ extra:
link: https://pypi.org/project/openintent/
version:
provider: mike
- announcement: "v0.14.0 is here — Federation protocol (RFC-0022), federation security (RFC-0023), and complete RFC 0001–0023 coverage. Read the changelog →"
+ announcement: "v0.15.0 is here — Native FastAPI SSE replaces sse-starlette, plus 4 new RFC-0010 retry MCP tools (70 total). Read the changelog →"
meta:
- name: description
content: "OpenIntent Python SDK — structured multi-agent coordination protocol with decorator-first agents, 23 RFCs, 7 LLM adapters, federation, MCP integration, and built-in FastAPI server."
diff --git a/openintent/__init__.py b/openintent/__init__.py
index a03cb2b..b262df2 100644
--- a/openintent/__init__.py
+++ b/openintent/__init__.py
@@ -233,7 +233,7 @@ def get_server() -> tuple[Any, Any, Any]:
)
-__version__ = "0.14.1"
+__version__ = "0.15.0"
__all__ = [
"OpenIntentClient",
"AsyncOpenIntentClient",
diff --git a/openintent/mcp.py b/openintent/mcp.py
index 6415e52..d32e188 100644
--- a/openintent/mcp.py
+++ b/openintent/mcp.py
@@ -264,6 +264,246 @@ async def handle_call(
return {"result": result}
+def build_retry_failure_tools(
+ client: Any,
+) -> list[dict[str, Any]]:
+ """Build MCP tool definitions for RFC-0010 Retry Policy & Failure tools.
+
+ Each tool delegates to the corresponding method on an
+ :class:`~openintent.client.OpenIntentClient` (sync) or
+ :class:`~openintent.client.AsyncOpenIntentClient` (async).
+
+ Args:
+ client: An ``OpenIntentClient`` or ``AsyncOpenIntentClient`` instance.
+
+ Returns:
+ A list of tool definition dicts ready for :class:`MCPToolExporter`.
+ """
+ import asyncio
+
+ def _call(method: Any, kwargs: dict[str, Any]) -> Any:
+ if asyncio.iscoroutinefunction(method):
+ return asyncio.get_event_loop().run_until_complete(method(**kwargs))
+ return method(**kwargs)
+
+ def _to_dict(obj: Any) -> Any:
+ if hasattr(obj, "to_dict"):
+ return obj.to_dict()
+ if isinstance(obj, list):
+ return [_to_dict(item) for item in obj]
+ return obj
+
+ def _set_retry_policy(
+ intent_id: str,
+ policy: Optional[dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> dict[str, Any]:
+ from .models import RetryStrategy
+
+ policy = policy or kwargs
+ strategy_val = policy.get("strategy", "exponential")
+ try:
+ strategy = RetryStrategy(strategy_val)
+ except ValueError:
+ strategy = RetryStrategy.EXPONENTIAL
+ result = _call(
+ client.set_retry_policy,
+ {
+ "intent_id": intent_id,
+ "strategy": strategy,
+ "max_retries": policy.get("max_retries", 3),
+ "base_delay_ms": policy.get("base_delay_ms", 1000),
+ "max_delay_ms": policy.get("max_delay_ms", 60000),
+ "fallback_agent_id": policy.get("fallback_agent_id"),
+ "failure_threshold": policy.get("failure_threshold", 3),
+ },
+ )
+ converted = _to_dict(result)
+ return converted if isinstance(converted, dict) else {"result": converted}
+
+ def _get_retry_policy(intent_id: str, **_: Any) -> dict[str, Any]:
+ result = _call(client.get_retry_policy, {"intent_id": intent_id})
+ if result is None:
+ return {"retry_policy": None}
+ converted = _to_dict(result)
+ return converted if isinstance(converted, dict) else {"result": converted}
+
+ def _record_failure(
+ intent_id: str,
+ agent_id: str = "",
+ attempt_number: int = 1,
+ error_code: Optional[str] = None,
+ error_message: Optional[str] = None,
+ retry_scheduled_at: Optional[str] = None,
+ metadata: Optional[dict[str, Any]] = None,
+ **_: Any,
+ ) -> dict[str, Any]:
+ from datetime import datetime as _dt
+
+ scheduled: Optional[Any] = None
+ if retry_scheduled_at:
+ try:
+ scheduled = _dt.fromisoformat(retry_scheduled_at)
+ except (ValueError, TypeError):
+ scheduled = None
+ kwargs: dict[str, Any] = {
+ "intent_id": intent_id,
+ "attempt_number": attempt_number,
+ "error_code": error_code,
+ "error_message": error_message,
+ "retry_scheduled_at": scheduled,
+ "metadata": metadata,
+ }
+ result = _call(client.record_failure, kwargs)
+ converted = _to_dict(result)
+ return converted if isinstance(converted, dict) else {"result": converted}
+
+ def _get_failures(
+ intent_id: str,
+ limit: int = 50,
+ **_: Any,
+ ) -> dict[str, Any]:
+ result = _call(client.get_failures, {"intent_id": intent_id})
+ failures = _to_dict(result)
+ if isinstance(failures, list):
+ failures = failures[:limit]
+ return {"failures": failures}
+ return failures if isinstance(failures, dict) else {"result": failures}
+
+ return [
+ {
+ "name": "openintent_set_retry_policy",
+ "description": (
+ "Set or update the retry policy for an intent (RFC-0010). Controls "
+ "automatic retry behaviour including strategy, max attempts, backoff "
+ "delays, and fallback agent."
+ ),
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "intent_id": {
+ "type": "string",
+ "description": "The intent to configure retry policy for",
+ },
+ "policy": {
+ "type": "object",
+ "description": (
+ "Retry policy configuration: { strategy: "
+ "'none'|'fixed'|'exponential'|'linear', "
+ "max_retries: 3, base_delay_ms: 1000, "
+ "max_delay_ms: 60000, "
+ "fallback_agent_id: 'agent-backup'|null, "
+ "failure_threshold: 3 }"
+ ),
+ "additionalProperties": True,
+ },
+ },
+ "required": ["intent_id", "policy"],
+ },
+ "handler": _set_retry_policy,
+ },
+ {
+ "name": "openintent_get_retry_policy",
+ "description": (
+ "Retrieve the current retry policy configured for an "
+ "intent. Returns the policy including max retries, "
+ "backoff strategy, and retryable errors."
+ ),
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "intent_id": {
+ "type": "string",
+ "description": "The intent to get retry policy for",
+ },
+ },
+ "required": ["intent_id"],
+ },
+ "handler": _get_retry_policy,
+ },
+ {
+ "name": "openintent_record_failure",
+ "description": (
+ "Record a failure event against an intent (RFC-0010). The server "
+ "evaluates the failure against the retry policy and may "
+ "automatically schedule a retry."
+ ),
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "intent_id": {
+ "type": "string",
+ "description": "The intent that experienced a failure",
+ },
+ "agent_id": {
+ "type": "string",
+ "description": "ID of the agent reporting the failure",
+ },
+ "attempt_number": {
+ "type": "number",
+ "description": (
+ "Which retry attempt this failure represents (1-based)"
+ ),
+ },
+ "error_code": {
+ "type": "string",
+ "description": (
+ "Error classification: RATE_LIMIT, TIMEOUT, "
+ "NETWORK_ERROR, INVALID_OUTPUT, BUDGET_EXCEEDED, "
+ "PERMISSION_DENIED"
+ ),
+ },
+ "error_message": {
+ "type": "string",
+ "description": "Human-readable error description",
+ },
+ "retry_scheduled_at": {
+ "type": "string",
+ "description": (
+ "ISO 8601 timestamp for when the next retry is "
+ "scheduled (optional)"
+ ),
+ },
+ "metadata": {
+ "type": "object",
+ "description": (
+ "Additional failure context (e.g. { http_status: 429 })"
+ ),
+ "additionalProperties": True,
+ },
+ },
+ "required": ["intent_id", "agent_id", "attempt_number"],
+ },
+ "handler": _record_failure,
+ },
+ {
+ "name": "openintent_get_failures",
+ "description": (
+ "Retrieve the failure history for an intent, ordered "
+ "by most recent first. Useful for diagnosing recurring "
+ "issues and reviewing retry attempts."
+ ),
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "intent_id": {
+ "type": "string",
+ "description": "The intent to get failure history for",
+ },
+ "limit": {
+ "type": "number",
+ "description": (
+ "Maximum number of failures to return (default 50)"
+ ),
+ },
+ },
+ "required": ["intent_id"],
+ },
+ "handler": _get_failures,
+ },
+ ]
+
+
class MCPBridge:
"""High-level bridge managing multiple MCP server connections."""
diff --git a/openintent/server/app.py b/openintent/server/app.py
index 5ec48c1..6a45b2f 100644
--- a/openintent/server/app.py
+++ b/openintent/server/app.py
@@ -11,9 +11,9 @@
from fastapi import Depends, FastAPI, Header, HTTPException, Query, Request, Response
from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
+from fastapi.responses import EventSourceResponse, JSONResponse
+from fastapi.sse import ServerSentEvent
from pydantic import BaseModel, ConfigDict, Field
-from sse_starlette.sse import EventSourceResponse
from .config import ServerConfig
from .database import ( # noqa: F401
@@ -2752,7 +2752,10 @@ async def remove_intent_from_portfolio(
finally:
session.close()
- @app.get("/api/v1/subscribe/intents/{intent_id}")
+ @app.get(
+ "/api/v1/subscribe/intents/{intent_id}",
+ response_class=EventSourceResponse,
+ )
async def subscribe_intent(
intent_id: str,
request: Request,
@@ -2762,27 +2765,26 @@ async def subscribe_intent(
"""SSE subscription for intent events."""
queue: asyncio.Queue = asyncio.Queue(maxsize=100)
_event_queues["intents"].append(queue)
+ try:
+ while True:
+ if await request.is_disconnected():
+ break
+ try:
+ event = await asyncio.wait_for(queue.get(), timeout=30.0)
+ if event.get("intent_id") == intent_id:
+ yield ServerSentEvent(
+ data=event.get("data", {}),
+ event=event.get("type", "message"),
+ )
+ except asyncio.TimeoutError:
+ yield ServerSentEvent(comment="ping")
+ finally:
+ _event_queues["intents"].remove(queue)
- async def event_generator():
- try:
- while True:
- if await request.is_disconnected():
- break
- try:
- event = await asyncio.wait_for(queue.get(), timeout=30.0)
- if event.get("intent_id") == intent_id:
- yield {
- "event": event.get("type", "message"),
- "data": str(event.get("data", {})),
- }
- except asyncio.TimeoutError:
- yield {"event": "ping", "data": ""}
- finally:
- _event_queues["intents"].remove(queue)
-
- return EventSourceResponse(event_generator())
-
- @app.get("/api/v1/subscribe/portfolios/{portfolio_id}")
+ @app.get(
+ "/api/v1/subscribe/portfolios/{portfolio_id}",
+ response_class=EventSourceResponse,
+ )
async def subscribe_portfolio(
portfolio_id: str,
request: Request,
@@ -2792,27 +2794,26 @@ async def subscribe_portfolio(
"""SSE subscription for portfolio events."""
queue: asyncio.Queue = asyncio.Queue(maxsize=100)
_event_queues["portfolios"].append(queue)
+ try:
+ while True:
+ if await request.is_disconnected():
+ break
+ try:
+ event = await asyncio.wait_for(queue.get(), timeout=30.0)
+ if event.get("portfolio_id") == portfolio_id:
+ yield ServerSentEvent(
+ data=event.get("data", {}),
+ event=event.get("type", "message"),
+ )
+ except asyncio.TimeoutError:
+ yield ServerSentEvent(comment="ping")
+ finally:
+ _event_queues["portfolios"].remove(queue)
- async def event_generator():
- try:
- while True:
- if await request.is_disconnected():
- break
- try:
- event = await asyncio.wait_for(queue.get(), timeout=30.0)
- if event.get("portfolio_id") == portfolio_id:
- yield {
- "event": event.get("type", "message"),
- "data": str(event.get("data", {})),
- }
- except asyncio.TimeoutError:
- yield {"event": "ping", "data": ""}
- finally:
- _event_queues["portfolios"].remove(queue)
-
- return EventSourceResponse(event_generator())
-
- @app.get("/api/v1/subscribe/agents/{agent_id}")
+ @app.get(
+ "/api/v1/subscribe/agents/{agent_id}",
+ response_class=EventSourceResponse,
+ )
async def subscribe_agent(
agent_id: str,
request: Request,
@@ -2822,25 +2823,21 @@ async def subscribe_agent(
"""SSE subscription for agent events."""
queue: asyncio.Queue = asyncio.Queue(maxsize=100)
_event_queues["agents"].append(queue)
-
- async def event_generator():
- try:
- while True:
- if await request.is_disconnected():
- break
- try:
- event = await asyncio.wait_for(queue.get(), timeout=30.0)
- if event.get("agent_id") == agent_id:
- yield {
- "event": event.get("type", "message"),
- "data": str(event.get("data", {})),
- }
- except asyncio.TimeoutError:
- yield {"event": "ping", "data": ""}
- finally:
- _event_queues["agents"].remove(queue)
-
- return EventSourceResponse(event_generator())
+ try:
+ while True:
+ if await request.is_disconnected():
+ break
+ try:
+ event = await asyncio.wait_for(queue.get(), timeout=30.0)
+ if event.get("agent_id") == agent_id:
+ yield ServerSentEvent(
+ data=event.get("data", {}),
+ event=event.get("type", "message"),
+ )
+ except asyncio.TimeoutError:
+ yield ServerSentEvent(comment="ping")
+ finally:
+ _event_queues["agents"].remove(queue)
@app.post("/api/v1/intents/{intent_id}/arbitrate")
async def request_arbitration(
@@ -4621,7 +4618,10 @@ async def delete_channel(
channel_messages.pop(channel_id, None)
return {"status": "closed", "channel_id": channel_id}
- @app.get("/api/v1/subscribe/channels/{channel_id}")
+ @app.get(
+ "/api/v1/subscribe/channels/{channel_id}",
+ response_class=EventSourceResponse,
+ )
async def subscribe_channel(
channel_id: str,
request: Request,
@@ -4632,25 +4632,21 @@ async def subscribe_channel(
raise HTTPException(status_code=404, detail="Channel not found")
queue: asyncio.Queue = asyncio.Queue(maxsize=100)
_event_queues["channels"].append(queue)
-
- async def event_generator():
- try:
- while True:
- if await request.is_disconnected():
- break
- try:
- event = await asyncio.wait_for(queue.get(), timeout=30.0)
- if event.get("channel_id") == channel_id:
- yield {
- "event": event.get("type", "message"),
- "data": str(event.get("data", {})),
- }
- except asyncio.TimeoutError:
- yield {"event": "ping", "data": ""}
- finally:
- _event_queues["channels"].remove(queue)
-
- return EventSourceResponse(event_generator())
+ try:
+ while True:
+ if await request.is_disconnected():
+ break
+ try:
+ event = await asyncio.wait_for(queue.get(), timeout=30.0)
+ if event.get("channel_id") == channel_id:
+ yield ServerSentEvent(
+ data=event.get("data", {}),
+ event=event.get("type", "message"),
+ )
+ except asyncio.TimeoutError:
+ yield ServerSentEvent(comment="ping")
+ finally:
+ _event_queues["channels"].remove(queue)
@app.post(
"/api/v1/channels/{channel_id}/messages",
diff --git a/pyproject.toml b/pyproject.toml
index 95472e3..855d3ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "openintent"
-version = "0.14.1"
+version = "0.15.0"
description = "Python SDK and Server for the OpenIntent Coordination Protocol"
readme = "README.md"
license = {text = "MIT"}
@@ -43,10 +43,9 @@ dependencies = [
[project.optional-dependencies]
server = [
- "fastapi>=0.104.0",
+ "fastapi>=0.135.0",
"uvicorn[standard]>=0.24.0",
"sqlalchemy>=2.0.0",
- "sse-starlette>=1.6.0",
"pydantic>=2.0.0",
]
dev = [