diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..f77a9d5
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,7 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.8.6
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 65b0811..80c0a4f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,28 @@ All notable changes to the OpenIntent SDK will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.15.0] - 2026-03-02
+
+### Added
+
+- **RFC-0010 Retry Policy MCP Tools** — 4 new MCP tools: `set_retry_policy` (admin), `get_retry_policy` (read), `record_failure` (write), `get_failures` (read). MCP tool surface expanded from 66 to 70 tools; RBAC counts: reader=25, operator=43, admin=70.
+- **`build_retry_failure_tools()`** — New helper in the Python MCP bridge (`openintent.mcp`) that constructs retry policy and failure-tracking tool definitions for MCP integration, enabling agents to manage retry policies and record/query failures through MCP.
+
+### Changed
+
+- **Native FastAPI SSE** — Replaced `sse-starlette` third-party dependency with FastAPI's built-in `EventSourceResponse` and `ServerSentEvent` (available since FastAPI 0.135.0). All four SSE subscription endpoints (`/api/v1/subscribe/intents/{id}`, `/api/v1/subscribe/portfolios/{id}`, `/api/v1/subscribe/agents/{id}`, `/api/v1/subscribe/channels/{id}`) now use native async generator pattern with `ServerSentEvent` objects instead of yielding raw dicts through `sse-starlette`'s `EventSourceResponse`. Keep-alive pings now use SSE comment format (`ServerSentEvent(comment="ping")`) per the SSE spec recommendation.
+
+### Removed
+
+- **`sse-starlette` dependency** — No longer required. The `server` optional dependency group now requires `fastapi>=0.135.0` (previously `>=0.104.0`) which includes native SSE support.
+
+### Updated
+
+- All version references updated to 0.15.0 across Python SDK, MCP server package, and changelog.
+- FastAPI minimum version bumped from `>=0.104.0` to `>=0.135.0` in server extras.
+
+---
+
 ## [0.14.1] - 2026-02-27
 
 ### Fixed
diff --git a/docs/changelog.md b/docs/changelog.md
index f28770c..4553788 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -5,6 +5,33 @@ All notable changes to the OpenIntent SDK will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.15.0] - 2026-03-02
+
+### Added
+
+- **RFC-0010 Retry Policy MCP Tools** — 4 new MCP tools for retry policy management and failure tracking:
+  - `set_retry_policy` — Set or update retry policy on an intent (admin tier).
+  - `get_retry_policy` — Retrieve the current retry policy for an intent (reader tier).
+  - `record_failure` — Record a failure event against an intent for retry tracking (operator tier).
+  - `get_failures` — List recorded failures for an intent (reader tier).
+- **`build_retry_failure_tools()`** — New helper in the Python MCP bridge (`openintent.mcp`) that builds the 4 retry/failure tool definitions for use with `MCPToolProvider` and `MCPToolExporter`.
+- **MCP Tool Surface Expansion** — MCP tool surface expanded from 66 to 70 tools; RBAC counts: reader=25, operator=43, admin=70.
+
+### Changed
+
+- **Native FastAPI SSE** — Replaced `sse-starlette` third-party dependency with FastAPI's built-in `EventSourceResponse` and `ServerSentEvent` (available since FastAPI 0.135.0). All four SSE subscription endpoints (`/api/v1/subscribe/intents/{id}`, `/api/v1/subscribe/portfolios/{id}`, `/api/v1/subscribe/agents/{id}`, `/api/v1/subscribe/channels/{id}`) now use native async generator pattern with `ServerSentEvent` objects instead of yielding raw dicts through `sse-starlette`'s `EventSourceResponse`. Keep-alive pings now use SSE comment format (`ServerSentEvent(comment="ping")`) per the SSE spec recommendation.
+
+### Removed
+
+- **`sse-starlette` dependency** — No longer required. The `server` optional dependency group now requires `fastapi>=0.135.0` (previously `>=0.104.0`) which includes native SSE support.
+
+### Updated
+
+- All version references updated to 0.15.0 across Python SDK, MCP server package, and changelog.
+- FastAPI minimum version bumped from `>=0.104.0` to `>=0.135.0` in server extras.
+
+---
+
 ## [0.14.1] - 2026-02-27
 
 ### Fixed
diff --git a/docs/guide/cost-retry.md b/docs/guide/cost-retry.md
index fbd6745..3a894ce 100644
--- a/docs/guide/cost-retry.md
+++ b/docs/guide/cost-retry.md
@@ -188,8 +188,46 @@ workflow:
 !!! warning "Idempotency"
     When using retry policies, ensure your agent handlers are idempotent — safe to call multiple times with the same input without side effects.
 
+## MCP Tool Access for Retry Policies
+
+The [MCP server](mcp.md) exposes 4 dedicated tools for retry policy management via the Model Context Protocol, allowing MCP clients like Claude Desktop and Cursor to manage retry policies and query failure history directly.
+
+| MCP Tool | Tier | Description |
+|----------|------|-------------|
+| `openintent_set_retry_policy` | `admin` | Configure retry policy for an intent (max attempts, backoff strategy, delays) |
+| `openintent_get_retry_policy` | `read` | Retrieve the current retry policy for an intent |
+| `openintent_record_failure` | `write` | Record a failure attempt against an intent for retry scheduling |
+| `openintent_get_failures` | `read` | Query failure history for an intent |
+
+These tools are available through the `@openintentai/mcp-server` package and follow the same RBAC model as all other MCP tools. The `build_retry_failure_tools()` function in the Python MCP bridge registers these tools automatically.
+
+```python
+from openintent import Agent, MCPTool, on_assignment
+
+@Agent("retry-manager", model="gpt-4o", tools=[
+    MCPTool(
+        server="npx",
+        args=["-y", "@openintentai/mcp-server"],
+        role="admin",
+        allowed_tools=[
+            "openintent_set_retry_policy",
+            "openintent_get_retry_policy",
+            "openintent_record_failure",
+            "openintent_get_failures",
+        ],
+    ),
+])
+class RetryManager:
+    @on_assignment
+    async def work(self, intent):
+        return await self.think(intent.description)
+```
+
+With these tools, the total MCP tool surface is 70 tools across all categories, with RBAC counts of reader=25, operator=43, admin=70.
+
 ## Next Steps
 
+- [MCP Integration](mcp.md) — Full MCP server and bridge documentation
 - [LLM Adapters](adapters.md) — Automatic cost tracking for LLM calls
 - [Governance & Arbitration](governance.md) — Budget enforcement and escalation
 - [YAML Workflows](workflows.md) — Declarative retry and cost configuration
diff --git a/docs/guide/mcp.md b/docs/guide/mcp.md
index b1178e4..0a97d3b 100644
--- a/docs/guide/mcp.md
+++ b/docs/guide/mcp.md
@@ -120,7 +120,7 @@ class Researcher:
 ```
 
 !!! info "RBAC role determines tool visibility"
-    The `role` field on `MCPTool` maps directly to the RBAC system. When connecting to `@openintentai/mcp-server`, the server filters its tool listing based on the role: `reader` sees 4 read-only tools, `operator` sees 10 read+write tools, `admin` sees all 16 tools. The `allowed_tools` field further restricts within the role's permissions — both gates must pass.
+    The `role` field on `MCPTool` maps directly to the RBAC system. When connecting to `@openintentai/mcp-server`, the server filters its tool listing based on the role: `reader` sees 25 read-only tools, `operator` sees 43 read+write tools, `admin` sees all 70 tools. The `allowed_tools` field further restricts within the role's permissions — both gates must pass.
 
 !!! warning "Least-privilege by design"
     `MCPTool.role` defaults to `"reader"` — the most restrictive level. Each agent should declare exactly the minimum role it needs. In multi-agent topologies, each agent's MCP server runs as an isolated child process with its own explicit role, so one agent's privilege never leaks to another. Do **not** set `OPENINTENT_MCP_ROLE` as a global environment variable in multi-agent setups — declare the role per-agent on each `MCPTool` instead.
@@ -256,6 +256,10 @@ With the MCP server connected, Claude can interact with OpenIntent directly. Her
 | `openintent_assign_agent` | `admin` | Assign an agent to an intent |
 | `openintent_unassign_agent` | `admin` | Remove an agent assignment |
 | `openintent_create_channel` | `admin` | Create a messaging channel |
+| `openintent_set_retry_policy` | `admin` | Configure retry policy for an intent |
+| `openintent_get_retry_policy` | `read` | Retrieve the retry policy for an intent |
+| `openintent_record_failure` | `write` | Record a failure attempt for retry scheduling |
+| `openintent_get_failures` | `read` | Query failure history for an intent |
 
 ### Available Resources
 
@@ -287,9 +291,9 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol
 
 | Role | Tiers Granted | Tool Count | Use Case |
 |------|---------------|------------|----------|
-| **`reader`** | `read` | 4 | Dashboards, monitoring, auditing. The MCP client can observe but never modify protocol state. |
-| **`operator`** | `read` + `write` | 10 | Worker agents that create intents, update state, and communicate. Cannot change lifecycle status, manage leases, or restructure agent assignments. |
-| **`admin`** | `read` + `write` + `admin` | 16 | Trusted orchestrators with full control. Required for lifecycle management, lease coordination, and structural operations. |
+| **`reader`** | `read` | 25 | Dashboards, monitoring, auditing. The MCP client can observe but never modify protocol state. |
+| **`operator`** | `read` + `write` | 43 | Worker agents that create intents, update state, and communicate. Cannot change lifecycle status, manage leases, or restructure agent assignments. |
+| **`admin`** | `read` + `write` + `admin` | 70 | Trusted orchestrators with full control. Required for lifecycle management, lease coordination, and structural operations. |
 
 #### Tool Classification
 
@@ -301,6 +305,8 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol
 | `openintent_list_intents` | Query intents with status filters |
 | `openintent_get_events` | Read the immutable event log |
 | `openintent_get_messages` | Read channel message history |
+| `openintent_get_retry_policy` | Retrieve the retry policy for an intent |
+| `openintent_get_failures` | Query failure history for an intent |
 
 **Write tier** — bounded mutations with protocol safety:
 
@@ -312,6 +318,7 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol
 | `openintent_send_message` | Send a channel message | Scoped to channel membership |
 | `openintent_ask` | Request/reply on a channel | Scoped to channel membership |
 | `openintent_broadcast` | Broadcast to channel | Scoped to channel membership |
+| `openintent_record_failure` | Record a failure attempt | Append-only failure record for retry scheduling |
 
 **Admin tier** — structural and lifecycle operations:
 
@@ -323,6 +330,7 @@ Every tool belongs to a **permission tier** — `read`, `write`, or `admin`. Rol
 | `openintent_assign_agent` | Add agent to intent | Changes who can work on an intent. |
 | `openintent_unassign_agent` | Remove agent from intent | Removing an active agent disrupts in-progress work. |
 | `openintent_create_channel` | Create messaging channel | Establishes communication topology between agents. |
+| `openintent_set_retry_policy` | Configure retry policy for an intent | Controls retry behavior including max attempts, backoff strategy, and delay parameters. Misconfiguration can cause excessive retries. |
 
 #### Default Role
 
@@ -398,7 +406,7 @@ If the role is not set or is set to an unrecognized value, the server falls back
 The server logs its active role and tool count at startup:
 
 ```
-[openintent-mcp] Server started – role="operator", tools=10/16, connected to http://localhost:8000
+[openintent-mcp] Server started – role="operator", tools=43/70, connected to http://localhost:8000
 ```
 
 If the role is `admin`, a warning is emitted:
diff --git a/docs/overrides/home.html b/docs/overrides/home.html
index b44b097..d06ac3f 100644
--- a/docs/overrides/home.html
+++ b/docs/overrides/home.html
@@ -24,7 +24,7 @@
 
   <!-- Hero -->
   <div class="oi-hero">
-    <div class="oi-hero__badge">v0.14.0 &mdash; Federation Protocol, Security &amp; Python SDK Implementation</div>
+    <div class="oi-hero__badge">v0.15.0 &mdash; Native FastAPI SSE &amp; RFC-0010 Retry MCP Tools</div>
     <h1 class="oi-hero__title">Stop Duct-Taping Your Agents Together</h1>
     <p class="oi-hero__subtitle">
       OpenIntent is a durable, auditable protocol for multi-agent coordination. Structured intents replace fragile chat chains. Versioned state replaces guesswork. Ship agent systems that actually work in production.
@@ -50,7 +50,7 @@ <h1 class="oi-hero__title">Stop Duct-Taping Your Agents Together</h1>
       <div class="oi-stat__label">Tests</div>
     </div>
     <div class="oi-stat">
-      <div class="oi-stat__number">v0.14.0</div>
+      <div class="oi-stat__number">v0.15.0</div>
       <div class="oi-stat__label">Latest</div>
     </div>
   </div>
diff --git a/mcp-server/README.md b/mcp-server/README.md
index b26951e..92b5762 100644
--- a/mcp-server/README.md
+++ b/mcp-server/README.md
@@ -70,9 +70,9 @@ The server enforces role-based access control with three tiers:
 
 | Role | Tools | Description |
 |---|---|---|
-| `reader` | 21 | Read-only observation: list/get intents, events, agents, costs |
-| `operator` | 38 | Bounded mutations: create intents, post events, manage leases |
-| `admin` | 62 | Full lifecycle: governance, identity, vaults, triggers, plans |
+| `reader` | 25 | Read-only observation: list/get intents, events, agents, costs, retry policies, failures |
+| `operator` | 43 | Bounded mutations: create intents, post events, manage leases, record failures |
+| `admin` | 70 | Full lifecycle: governance, identity, vaults, triggers, plans, retry policies |
 
 Tools not permitted by the assigned role are hidden from the MCP tool listing entirely.
 
@@ -93,6 +93,7 @@ The role gate works alongside the `allowed_tools` allowlist — both must pass f
 - **Verifiable Logs** (RFC-0019) — Hash chains, Merkle proofs
 - **Tracing** (RFC-0020) — Distributed trace propagation
 - **Messaging** (RFC-0021) — Agent-to-agent channels
+- **Retry Policies** (RFC-0010) — Set/get retry policies, record and query failures
 
 ## Resources
 
diff --git a/mcp-server/package.json b/mcp-server/package.json
index d9dfd39..ae4cca8 100644
--- a/mcp-server/package.json
+++ b/mcp-server/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@openintentai/mcp-server",
-  "version": "0.14.1",
+  "version": "0.15.0",
   "description": "MCP server exposing the OpenIntent Coordination Protocol as MCP tools and resources",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
diff --git a/mcp-server/src/client.ts b/mcp-server/src/client.ts
index 0b0df06..d4f95ab 100644
--- a/mcp-server/src/client.ts
+++ b/mcp-server/src/client.ts
@@ -926,4 +926,51 @@ export class OpenIntentClient {
       trace_id: params.trace_id,
     });
   }
+
+  // ── Retry Policy & Failures (RFC-0010) ──────────────────────────────
+
+  async setRetryPolicy(params: {
+    intent_id: string;
+    policy: Record<string, unknown>;
+  }): Promise<unknown> {
+    return this.request("PUT", `/api/v1/intents/${params.intent_id}/retry-policy`, params.policy);
+  }
+
+  async getRetryPolicy(params: {
+    intent_id: string;
+  }): Promise<unknown> {
+    return this.request("GET", `/api/v1/intents/${params.intent_id}/retry-policy`);
+  }
+
+  async recordFailure(params: {
+    intent_id: string;
+    agent_id: string;
+    attempt_number: number;
+    error_code?: string;
+    error_message?: string;
+    retry_scheduled_at?: string;
+    metadata?: Record<string, unknown>;
+  }): Promise<unknown> {
+    return this.request("POST", `/api/v1/intents/${params.intent_id}/failures`, {
+      agent_id: params.agent_id,
+      attempt_number: params.attempt_number,
+      error_code: params.error_code,
+      error_message: params.error_message,
+      retry_scheduled_at: params.retry_scheduled_at,
+      metadata: params.metadata ?? {},
+    });
+  }
+
+  async getFailures(params: {
+    intent_id: string;
+    limit?: number;
+  }): Promise<unknown> {
+    const query = new URLSearchParams();
+    if (params.limit !== undefined) query.set("limit", String(params.limit));
+    const qs = query.toString();
+    return this.request(
+      "GET",
+      `/api/v1/intents/${params.intent_id}/failures${qs ? `?${qs}` : ""}`,
+    );
+  }
 }
diff --git a/mcp-server/src/index.ts b/mcp-server/src/index.ts
index 15c1fcd..b35a16f 100644
--- a/mcp-server/src/index.ts
+++ b/mcp-server/src/index.ts
@@ -30,7 +30,7 @@ async function main() {
   const server = new Server(
     {
       name: "openintent-mcp",
-      version: "0.14.1",
+      version: "0.15.0",
     },
     {
       capabilities: {
diff --git a/mcp-server/src/security.ts b/mcp-server/src/security.ts
index 9f9712b..d58517c 100644
--- a/mcp-server/src/security.ts
+++ b/mcp-server/src/security.ts
@@ -121,6 +121,12 @@ export const TOOL_TIERS: Record<string, ToolTier> = {
   openintent_start_trace:  "write",
   openintent_link_spans:   "write",
 
+  // Retry Policy & Failures (RFC-0010)
+  openintent_set_retry_policy:  "admin",
+  openintent_get_retry_policy:  "read",
+  openintent_record_failure:    "write",
+  openintent_get_failures:      "read",
+
   // Federation (RFC-0022)
   openintent_federation_status:    "read",
   openintent_list_federated_agents: "read",
diff --git a/mcp-server/src/tools.ts b/mcp-server/src/tools.ts
index 7a4a5b7..116d278 100644
--- a/mcp-server/src/tools.ts
+++ b/mcp-server/src/tools.ts
@@ -1311,6 +1311,80 @@ export const TOOL_DEFINITIONS: ToolDefinition[] = [
     tier: "write" as ToolTier,
   },
 
+  // ── Retry Policy & Failures (RFC-0010) ──────────────────────────────
+  {
+    name: "openintent_set_retry_policy",
+    description:
+      "Set or update the retry policy for an intent (RFC-0010). Controls automatic " +
+      "retry behaviour including strategy, max attempts, backoff delays, and fallback agent.",
+    inputSchema: {
+      type: "object",
+      properties: {
+        intent_id: { type: "string", description: "The intent to configure retry policy for" },
+        policy: {
+          type: "object",
+          description: "Retry policy configuration: { strategy: 'none'|'fixed'|'exponential'|'linear', max_retries: 3, base_delay_ms: 1000, max_delay_ms: 60000, fallback_agent_id: 'agent-backup'|null, failure_threshold: 3 }",
+          additionalProperties: true,
+        },
+      },
+      required: ["intent_id", "policy"],
+    },
+    tier: "admin" as ToolTier,
+  },
+  {
+    name: "openintent_get_retry_policy",
+    description:
+      "Retrieve the current retry policy configured for an intent. Returns " +
+      "the policy including max retries, backoff strategy, and retryable errors.",
+    inputSchema: {
+      type: "object",
+      properties: {
+        intent_id: { type: "string", description: "The intent to query" },
+      },
+      required: ["intent_id"],
+    },
+    tier: "read" as ToolTier,
+  },
+  {
+    name: "openintent_record_failure",
+    description:
+      "Record a failure event against an intent (RFC-0010). The server evaluates the " +
+      "failure against the retry policy and may automatically schedule a retry.",
+    inputSchema: {
+      type: "object",
+      properties: {
+        intent_id: { type: "string", description: "The intent that experienced a failure" },
+        agent_id: { type: "string", description: "ID of the agent reporting the failure" },
+        attempt_number: { type: "number", description: "Which retry attempt this failure represents (1-based)" },
+        error_code: { type: "string", description: "Error classification: RATE_LIMIT, TIMEOUT, NETWORK_ERROR, INVALID_OUTPUT, BUDGET_EXCEEDED, PERMISSION_DENIED" },
+        error_message: { type: "string", description: "Human-readable error message" },
+        retry_scheduled_at: { type: "string", description: "ISO 8601 timestamp for when the next retry is scheduled (optional)" },
+        metadata: {
+          type: "object",
+          description: "Additional failure context (e.g. { http_status: 429 })",
+          additionalProperties: true,
+        },
+      },
+      required: ["intent_id", "agent_id", "attempt_number"],
+    },
+    tier: "write" as ToolTier,
+  },
+  {
+    name: "openintent_get_failures",
+    description:
+      "Retrieve the failure history for an intent, ordered by most recent first. " +
+      "Useful for diagnosing recurring issues and reviewing retry attempts.",
+    inputSchema: {
+      type: "object",
+      properties: {
+        intent_id: { type: "string", description: "The intent to query" },
+        limit: { type: "number", description: "Maximum number of failures to return (default 50)" },
+      },
+      required: ["intent_id"],
+    },
+    tier: "read" as ToolTier,
+  },
+
   // ── Federation (RFC-0022) ───────────────────────────────────────────
   {
     name: "openintent_federation_status",
@@ -1919,6 +1993,39 @@ export async function handleToolCall(
         });
         break;
 
+      // ── Retry Policy & Failures (RFC-0010) ──────────────────────────
+      case "openintent_set_retry_policy":
+        result = await client.setRetryPolicy({
+          intent_id: args.intent_id as string,
+          policy: args.policy as Record<string, unknown>,
+        });
+        break;
+
+      case "openintent_get_retry_policy":
+        result = await client.getRetryPolicy({
+          intent_id: args.intent_id as string,
+        });
+        break;
+
+      case "openintent_record_failure":
+        result = await client.recordFailure({
+          intent_id: args.intent_id as string,
+          agent_id: args.agent_id as string,
+          attempt_number: args.attempt_number as number,
+          error_code: args.error_code as string | undefined,
+          error_message: args.error_message as string | undefined,
+          retry_scheduled_at: args.retry_scheduled_at as string | undefined,
+          metadata: args.metadata as Record<string, unknown> | undefined,
+        });
+        break;
+
+      case "openintent_get_failures":
+        result = await client.getFailures({
+          intent_id: args.intent_id as string,
+          limit: args.limit as number | undefined,
+        });
+        break;
+
       case "openintent_federation_status":
         result = await client.getFederationStatus();
         break;
diff --git a/mkdocs.yml b/mkdocs.yml
index a1f272c..e3ff6dd 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -204,7 +204,7 @@ extra:
       link: https://pypi.org/project/openintent/
   version:
     provider: mike
-  announcement: "<strong>v0.14.0</strong> is here &mdash; Federation protocol (RFC-0022), federation security (RFC-0023), and complete RFC 0001&ndash;0023 coverage. <a href='changelog/'>Read the changelog &rarr;</a>"
+  announcement: "<strong>v0.15.0</strong> is here &mdash; Native FastAPI SSE replaces sse-starlette, plus 4 new RFC-0010 retry MCP tools (70 total). <a href='changelog/'>Read the changelog &rarr;</a>"
   meta:
     - name: description
       content: "OpenIntent Python SDK — structured multi-agent coordination protocol with decorator-first agents, 23 RFCs, 7 LLM adapters, federation, MCP integration, and built-in FastAPI server."
diff --git a/openintent/__init__.py b/openintent/__init__.py
index a03cb2b..b262df2 100644
--- a/openintent/__init__.py
+++ b/openintent/__init__.py
@@ -233,7 +233,7 @@ def get_server() -> tuple[Any, Any, Any]:
         )
 
 
-__version__ = "0.14.1"
+__version__ = "0.15.0"
 __all__ = [
     "OpenIntentClient",
     "AsyncOpenIntentClient",
diff --git a/openintent/mcp.py b/openintent/mcp.py
index 6415e52..d32e188 100644
--- a/openintent/mcp.py
+++ b/openintent/mcp.py
@@ -264,6 +264,246 @@ async def handle_call(
         return {"result": result}
 
 
+def build_retry_failure_tools(
+    client: Any,
+) -> list[dict[str, Any]]:
+    """Build MCP tool definitions for RFC-0010 Retry Policy & Failure tools.
+
+    Each tool delegates to the corresponding method on an
+    :class:`~openintent.client.OpenIntentClient` (sync) or
+    :class:`~openintent.client.AsyncOpenIntentClient` (async).
+
+    Args:
+        client: An ``OpenIntentClient`` or ``AsyncOpenIntentClient`` instance.
+
+    Returns:
+        A list of tool definition dicts ready for :class:`MCPToolExporter`.
+    """
+    import asyncio
+
+    def _call(method: Any, kwargs: dict[str, Any]) -> Any:
+        if asyncio.iscoroutinefunction(method):
+            return asyncio.get_event_loop().run_until_complete(method(**kwargs))
+        return method(**kwargs)
+
+    def _to_dict(obj: Any) -> Any:
+        if hasattr(obj, "to_dict"):
+            return obj.to_dict()
+        if isinstance(obj, list):
+            return [_to_dict(item) for item in obj]
+        return obj
+
+    def _set_retry_policy(
+        intent_id: str,
+        policy: Optional[dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        from .models import RetryStrategy
+
+        policy = policy or kwargs
+        strategy_val = policy.get("strategy", "exponential")
+        try:
+            strategy = RetryStrategy(strategy_val)
+        except ValueError:
+            strategy = RetryStrategy.EXPONENTIAL
+        result = _call(
+            client.set_retry_policy,
+            {
+                "intent_id": intent_id,
+                "strategy": strategy,
+                "max_retries": policy.get("max_retries", 3),
+                "base_delay_ms": policy.get("base_delay_ms", 1000),
+                "max_delay_ms": policy.get("max_delay_ms", 60000),
+                "fallback_agent_id": policy.get("fallback_agent_id"),
+                "failure_threshold": policy.get("failure_threshold", 3),
+            },
+        )
+        converted = _to_dict(result)
+        return converted if isinstance(converted, dict) else {"result": converted}
+
+    def _get_retry_policy(intent_id: str, **_: Any) -> dict[str, Any]:
+        result = _call(client.get_retry_policy, {"intent_id": intent_id})
+        if result is None:
+            return {"retry_policy": None}
+        converted = _to_dict(result)
+        return converted if isinstance(converted, dict) else {"result": converted}
+
+    def _record_failure(
+        intent_id: str,
+        agent_id: str = "",
+        attempt_number: int = 1,
+        error_code: Optional[str] = None,
+        error_message: Optional[str] = None,
+        retry_scheduled_at: Optional[str] = None,
+        metadata: Optional[dict[str, Any]] = None,
+        **_: Any,
+    ) -> dict[str, Any]:
+        from datetime import datetime as _dt
+
+        scheduled: Optional[Any] = None
+        if retry_scheduled_at:
+            try:
+                scheduled = _dt.fromisoformat(retry_scheduled_at)
+            except (ValueError, TypeError):
+                scheduled = None
+        kwargs: dict[str, Any] = {
+            "intent_id": intent_id,
+            "attempt_number": attempt_number,
+            "error_code": error_code,
+            "error_message": error_message,
+            "retry_scheduled_at": scheduled,
+            "metadata": metadata,
+        }
+        result = _call(client.record_failure, kwargs)
+        converted = _to_dict(result)
+        return converted if isinstance(converted, dict) else {"result": converted}
+
+    def _get_failures(
+        intent_id: str,
+        limit: int = 50,
+        **_: Any,
+    ) -> dict[str, Any]:
+        result = _call(client.get_failures, {"intent_id": intent_id})
+        failures = _to_dict(result)
+        if isinstance(failures, list):
+            failures = failures[:limit]
+            return {"failures": failures}
+        return failures if isinstance(failures, dict) else {"result": failures}
+
+    return [
+        {
+            "name": "openintent_set_retry_policy",
+            "description": (
+                "Set or update the retry policy for an intent (RFC-0010). Controls "
+                "automatic retry behaviour including strategy, max attempts, backoff "
+                "delays, and fallback agent."
+            ),
+            "inputSchema": {
+                "type": "object",
+                "properties": {
+                    "intent_id": {
+                        "type": "string",
+                        "description": "The intent to configure retry policy for",
+                    },
+                    "policy": {
+                        "type": "object",
+                        "description": (
+                            "Retry policy configuration: { strategy: "
+                            "'none'|'fixed'|'exponential'|'linear', "
+                            "max_retries: 3, base_delay_ms: 1000, "
+                            "max_delay_ms: 60000, "
+                            "fallback_agent_id: 'agent-backup'|null, "
+                            "failure_threshold: 3 }"
+                        ),
+                        "additionalProperties": True,
+                    },
+                },
+                "required": ["intent_id", "policy"],
+            },
+            "handler": _set_retry_policy,
+        },
+        {
+            "name": "openintent_get_retry_policy",
+            "description": (
+                "Retrieve the current retry policy configured for an "
+                "intent. Returns the policy including max retries, "
+                "backoff strategy, and retryable errors."
+            ),
+            "inputSchema": {
+                "type": "object",
+                "properties": {
+                    "intent_id": {
+                        "type": "string",
+                        "description": "The intent to get retry policy for",
+                    },
+                },
+                "required": ["intent_id"],
+            },
+            "handler": _get_retry_policy,
+        },
+        {
+            "name": "openintent_record_failure",
+            "description": (
+                "Record a failure event against an intent (RFC-0010). The server "
+                "evaluates the failure against the retry policy and may "
+                "automatically schedule a retry."
+            ),
+            "inputSchema": {
+                "type": "object",
+                "properties": {
+                    "intent_id": {
+                        "type": "string",
+                        "description": "The intent that experienced a failure",
+                    },
+                    "agent_id": {
+                        "type": "string",
+                        "description": "ID of the agent reporting the failure",
+                    },
+                    "attempt_number": {
+                        "type": "number",
+                        "description": (
+                            "Which retry attempt this failure represents (1-based)"
+                        ),
+                    },
+                    "error_code": {
+                        "type": "string",
+                        "description": (
+                            "Error classification: RATE_LIMIT, TIMEOUT, "
+                            "NETWORK_ERROR, INVALID_OUTPUT, BUDGET_EXCEEDED, "
+                            "PERMISSION_DENIED"
+                        ),
+                    },
+                    "error_message": {
+                        "type": "string",
+                        "description": "Human-readable error description",
+                    },
+                    "retry_scheduled_at": {
+                        "type": "string",
+                        "description": (
+                            "ISO 8601 timestamp for when the next retry is "
+                            "scheduled (optional)"
+                        ),
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "description": (
+                            "Additional failure context (e.g. { http_status: 429 })"
+                        ),
+                        "additionalProperties": True,
+                    },
+                },
+                "required": ["intent_id", "agent_id", "attempt_number"],
+            },
+            "handler": _record_failure,
+        },
+        {
+            "name": "openintent_get_failures",
+            "description": (
+                "Retrieve the failure history for an intent, ordered "
+                "by most recent first. Useful for diagnosing recurring "
+                "issues and reviewing retry attempts."
+            ),
+            "inputSchema": {
+                "type": "object",
+                "properties": {
+                    "intent_id": {
+                        "type": "string",
+                        "description": "The intent to get failure history for",
+                    },
+                    "limit": {
+                        "type": "number",
+                        "description": (
+                            "Maximum number of failures to return (default 50)"
+                        ),
+                    },
+                },
+                "required": ["intent_id"],
+            },
+            "handler": _get_failures,
+        },
+    ]
+
+
 class MCPBridge:
     """High-level bridge managing multiple MCP server connections."""
 
diff --git a/openintent/server/app.py b/openintent/server/app.py
index 5ec48c1..6a45b2f 100644
--- a/openintent/server/app.py
+++ b/openintent/server/app.py
@@ -11,9 +11,9 @@
 
 from fastapi import Depends, FastAPI, Header, HTTPException, Query, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
+from fastapi.responses import EventSourceResponse, JSONResponse
+from fastapi.sse import ServerSentEvent
 from pydantic import BaseModel, ConfigDict, Field
-from sse_starlette.sse import EventSourceResponse
 
 from .config import ServerConfig
 from .database import (  # noqa: F401
@@ -2752,7 +2752,10 @@ async def remove_intent_from_portfolio(
         finally:
             session.close()
 
-    @app.get("/api/v1/subscribe/intents/{intent_id}")
+    @app.get(
+        "/api/v1/subscribe/intents/{intent_id}",
+        response_class=EventSourceResponse,
+    )
     async def subscribe_intent(
         intent_id: str,
         request: Request,
@@ -2762,27 +2765,26 @@ async def subscribe_intent(
         """SSE subscription for intent events."""
         queue: asyncio.Queue = asyncio.Queue(maxsize=100)
         _event_queues["intents"].append(queue)
+        try:
+            while True:
+                if await request.is_disconnected():
+                    break
+                try:
+                    event = await asyncio.wait_for(queue.get(), timeout=30.0)
+                    if event.get("intent_id") == intent_id:
+                        yield ServerSentEvent(
+                            data=event.get("data", {}),
+                            event=event.get("type", "message"),
+                        )
+                except asyncio.TimeoutError:
+                    yield ServerSentEvent(comment="ping")
+        finally:
+            _event_queues["intents"].remove(queue)
 
-        async def event_generator():
-            try:
-                while True:
-                    if await request.is_disconnected():
-                        break
-                    try:
-                        event = await asyncio.wait_for(queue.get(), timeout=30.0)
-                        if event.get("intent_id") == intent_id:
-                            yield {
-                                "event": event.get("type", "message"),
-                                "data": str(event.get("data", {})),
-                            }
-                    except asyncio.TimeoutError:
-                        yield {"event": "ping", "data": ""}
-            finally:
-                _event_queues["intents"].remove(queue)
-
-        return EventSourceResponse(event_generator())
-
-    @app.get("/api/v1/subscribe/portfolios/{portfolio_id}")
+    @app.get(
+        "/api/v1/subscribe/portfolios/{portfolio_id}",
+        response_class=EventSourceResponse,
+    )
     async def subscribe_portfolio(
         portfolio_id: str,
         request: Request,
@@ -2792,27 +2794,26 @@ async def subscribe_portfolio(
         """SSE subscription for portfolio events."""
         queue: asyncio.Queue = asyncio.Queue(maxsize=100)
         _event_queues["portfolios"].append(queue)
+        try:
+            while True:
+                if await request.is_disconnected():
+                    break
+                try:
+                    event = await asyncio.wait_for(queue.get(), timeout=30.0)
+                    if event.get("portfolio_id") == portfolio_id:
+                        yield ServerSentEvent(
+                            data=event.get("data", {}),
+                            event=event.get("type", "message"),
+                        )
+                except asyncio.TimeoutError:
+                    yield ServerSentEvent(comment="ping")
+        finally:
+            _event_queues["portfolios"].remove(queue)
 
-        async def event_generator():
-            try:
-                while True:
-                    if await request.is_disconnected():
-                        break
-                    try:
-                        event = await asyncio.wait_for(queue.get(), timeout=30.0)
-                        if event.get("portfolio_id") == portfolio_id:
-                            yield {
-                                "event": event.get("type", "message"),
-                                "data": str(event.get("data", {})),
-                            }
-                    except asyncio.TimeoutError:
-                        yield {"event": "ping", "data": ""}
-            finally:
-                _event_queues["portfolios"].remove(queue)
-
-        return EventSourceResponse(event_generator())
-
-    @app.get("/api/v1/subscribe/agents/{agent_id}")
+    @app.get(
+        "/api/v1/subscribe/agents/{agent_id}",
+        response_class=EventSourceResponse,
+    )
     async def subscribe_agent(
         agent_id: str,
         request: Request,
@@ -2822,25 +2823,21 @@ async def subscribe_agent(
         """SSE subscription for agent events."""
         queue: asyncio.Queue = asyncio.Queue(maxsize=100)
         _event_queues["agents"].append(queue)
-
-        async def event_generator():
-            try:
-                while True:
-                    if await request.is_disconnected():
-                        break
-                    try:
-                        event = await asyncio.wait_for(queue.get(), timeout=30.0)
-                        if event.get("agent_id") == agent_id:
-                            yield {
-                                "event": event.get("type", "message"),
-                                "data": str(event.get("data", {})),
-                            }
-                    except asyncio.TimeoutError:
-                        yield {"event": "ping", "data": ""}
-            finally:
-                _event_queues["agents"].remove(queue)
-
-        return EventSourceResponse(event_generator())
+        try:
+            while True:
+                if await request.is_disconnected():
+                    break
+                try:
+                    event = await asyncio.wait_for(queue.get(), timeout=30.0)
+                    if event.get("agent_id") == agent_id:
+                        yield ServerSentEvent(
+                            data=event.get("data", {}),
+                            event=event.get("type", "message"),
+                        )
+                except asyncio.TimeoutError:
+                    yield ServerSentEvent(comment="ping")
+        finally:
+            _event_queues["agents"].remove(queue)
 
     @app.post("/api/v1/intents/{intent_id}/arbitrate")
     async def request_arbitration(
@@ -4621,7 +4618,10 @@ async def delete_channel(
         channel_messages.pop(channel_id, None)
         return {"status": "closed", "channel_id": channel_id}
 
-    @app.get("/api/v1/subscribe/channels/{channel_id}")
+    @app.get(
+        "/api/v1/subscribe/channels/{channel_id}",
+        response_class=EventSourceResponse,
+    )
     async def subscribe_channel(
         channel_id: str,
         request: Request,
@@ -4632,25 +4632,21 @@ async def subscribe_channel(
             raise HTTPException(status_code=404, detail="Channel not found")
         queue: asyncio.Queue = asyncio.Queue(maxsize=100)
         _event_queues["channels"].append(queue)
-
-        async def event_generator():
-            try:
-                while True:
-                    if await request.is_disconnected():
-                        break
-                    try:
-                        event = await asyncio.wait_for(queue.get(), timeout=30.0)
-                        if event.get("channel_id") == channel_id:
-                            yield {
-                                "event": event.get("type", "message"),
-                                "data": str(event.get("data", {})),
-                            }
-                    except asyncio.TimeoutError:
-                        yield {"event": "ping", "data": ""}
-            finally:
-                _event_queues["channels"].remove(queue)
-
-        return EventSourceResponse(event_generator())
+        try:
+            while True:
+                if await request.is_disconnected():
+                    break
+                try:
+                    event = await asyncio.wait_for(queue.get(), timeout=30.0)
+                    if event.get("channel_id") == channel_id:
+                        yield ServerSentEvent(
+                            data=event.get("data", {}),
+                            event=event.get("type", "message"),
+                        )
+                except asyncio.TimeoutError:
+                    yield ServerSentEvent(comment="ping")
+        finally:
+            _event_queues["channels"].remove(queue)
 
     @app.post(
         "/api/v1/channels/{channel_id}/messages",
diff --git a/pyproject.toml b/pyproject.toml
index 95472e3..855d3ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "openintent"
-version = "0.14.1"
+version = "0.15.0"
 description = "Python SDK and Server for the OpenIntent Coordination Protocol"
 readme = "README.md"
 license = {text = "MIT"}
@@ -43,10 +43,9 @@ dependencies = [
 
 [project.optional-dependencies]
 server = [
-    "fastapi>=0.104.0",
+    "fastapi>=0.135.0",
     "uvicorn[standard]>=0.24.0",
     "sqlalchemy>=2.0.0",
-    "sse-starlette>=1.6.0",
     "pydantic>=2.0.0",
 ]
 dev = [