From f6b2bd44afeaa4b6694fb0813d0597a343b6579c Mon Sep 17 00:00:00 2001
From: Simon Kjellberg <simon.kjellberg@gmail.com>
Date: Tue, 10 Feb 2026 20:29:37 +0100
Subject: [PATCH 1/2] fix(discord): prevent rate limit retry storm with shared
 endpoint gate

Concurrent requests to the same Discord endpoint independently retried
on 429 responses, each consuming the rate limit budget and preventing
others from succeeding. This created a self-sustaining retry loop
lasting ~30 seconds per cycle.

Add a shared per-endpoint rate limit gate so all concurrent callers
wait on the same timestamp, and cap retries at 5 to prevent prolonged
retry storms.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 app/lib/discord/api.test.ts | 129 +++++++++++++++++++++++++++++++++++-
 app/lib/discord/api.ts      |  46 ++++++++++++-
 2 files changed, 169 insertions(+), 6 deletions(-)

diff --git a/app/lib/discord/api.test.ts b/app/lib/discord/api.test.ts
index ff7b39ce..9e5b5d7b 100644
--- a/app/lib/discord/api.test.ts
+++ b/app/lib/discord/api.test.ts
@@ -5,7 +5,12 @@ import { log } from "@/lib/log";
 import type { Username } from "@/lib/session";
 import { server } from "@/mocks/node";
 
-import { getChannelMessages, getMessageChain, postChannelMessage } from "./api";
+import {
+  _resetRateLimitState,
+  getChannelMessages,
+  getMessageChain,
+  postChannelMessage,
+} from "./api";
 
 vi.mock(import("server-only"), () => ({}));
 
@@ -708,6 +713,7 @@ describe("rate limiting", () => {
   afterEach(() => {
     vi.useRealTimers();
     vi.restoreAllMocks();
+    _resetRateLimitState();
   });
 
   it("should retry after 429 and succeed", async () => {
@@ -759,6 +765,7 @@ describe("rate limiting", () => {
         elapsedMs: 0,
         retryAfterMs: 35000,
         global: true,
+        retries: 1,
       },
       "Discord rate limit exceeded max wait time",
     );
@@ -786,7 +793,12 @@ describe("rate limiting", () => {
 
     expect(attempts).toBe(2);
     expect(logWarnSpy).toHaveBeenCalledWith(
-      { endpoint: expect.any(String), retryAfterMs: 1000, global: false },
+      {
+        endpoint: expect.any(String),
+        retryAfterMs: 1000,
+        global: false,
+        retries: 1,
+      },
       "Discord rate limited, retrying",
     );
   });
@@ -823,7 +835,12 @@ describe("rate limiting", () => {
 
     expect(attempts).toBe(2);
     expect(logWarnSpy).toHaveBeenCalledWith(
-      { endpoint: expect.any(String), retryAfterMs: 15000, global: false },
+      {
+        endpoint: expect.any(String),
+        retryAfterMs: 15000,
+        global: false,
+        retries: 1,
+      },
       "Discord rate limited, retrying",
     );
     expect(logErrorSpy).toHaveBeenCalledWith(
@@ -832,8 +849,114 @@ describe("rate limiting", () => {
         elapsedMs: 15000,
         retryAfterMs: 16000,
         global: false,
+        retries: 2,
       },
       "Discord rate limit exceeded max wait time",
     );
   });
+
+  it("should throw when max retries exceeded", async () => {
+    const logWarnSpy = vi.spyOn(log, "warn").mockImplementation(() => {});
+    const logErrorSpy = vi.spyOn(log, "error").mockImplementation(() => {});
+    let attempts = 0;
+
+    server.use(
+      http.get(`${DISCORD_BASE_URL}/channels/:channelId/messages`, () => {
+        attempts++;
+        return HttpResponse.json(
+          { message: "Rate limited", retry_after: 1, global: false },
+          { status: 429 },
+        );
+      }),
+    );
+
+    const promise = getChannelMessages();
+    const assertion = expect(promise).rejects.toThrow(
+      "Discord rate limit exceeded",
+    );
+
+    await vi.advanceTimersByTimeAsync(10000);
+    await assertion;
+
+    // 1 initial + 5 retries = 6 total attempts
+    expect(attempts).toBe(6);
+    expect(logWarnSpy).toHaveBeenCalledTimes(5);
+    expect(logErrorSpy).toHaveBeenCalledWith(
+      expect.objectContaining({ retries: 6 }),
+      "Discord rate limit exceeded max wait time",
+    );
+  });
+
+  it("should share rate limit state across concurrent requests", async () => {
+    vi.spyOn(log, "warn").mockImplementation(() => {});
+    let fetchCount = 0;
+
+    server.use(
+      http.get(`${DISCORD_BASE_URL}/channels/:channelId/messages`, () => {
+        fetchCount++;
+        if (fetchCount === 1) {
+          return HttpResponse.json(
+            { message: "Rate limited", retry_after: 2, global: false },
+            { status: 429 },
+          );
+        }
+        return HttpResponse.json([]);
+      }),
+    );
+
+    // First request gets 429 and sets the shared gate
+    const promise1 = getChannelMessages();
+    await vi.advanceTimersByTimeAsync(0);
+
+    // Second request arrives while rate limit gate is active
+    const promise2 = getChannelMessages();
+
+    // Advance past the 2-second rate limit window
+    await vi.advanceTimersByTimeAsync(2000);
+
+    await Promise.all([promise1, promise2]);
+
+    // Without shared state: 4 fetches (both hit 429 independently, both retry)
+    // With shared state: 3 fetches (first gets 429, second waits on gate, both succeed after delay)
+    expect(fetchCount).toBe(3);
+  });
+
+  it("should include retry count in log messages", async () => {
+    const logWarnSpy = vi.spyOn(log, "warn").mockImplementation(() => {});
+    let attempts = 0;
+
+    server.use(
+      http.get(`${DISCORD_BASE_URL}/channels/:channelId/messages`, () => {
+        attempts++;
+        if (attempts <= 3) {
+          return HttpResponse.json(
+            { message: "Rate limited", retry_after: 1, global: false },
+            { status: 429 },
+          );
+        }
+        return HttpResponse.json([]);
+      }),
+    );
+
+    const promise = getChannelMessages();
+    await vi.advanceTimersByTimeAsync(3000);
+    await promise;
+
+    expect(logWarnSpy).toHaveBeenCalledTimes(3);
+    expect(logWarnSpy).toHaveBeenNthCalledWith(
+      1,
+      expect.objectContaining({ retries: 1 }),
+      "Discord rate limited, retrying",
+    );
+    expect(logWarnSpy).toHaveBeenNthCalledWith(
+      2,
+      expect.objectContaining({ retries: 2 }),
+      "Discord rate limited, retrying",
+    );
+    expect(logWarnSpy).toHaveBeenNthCalledWith(
+      3,
+      expect.objectContaining({ retries: 3 }),
+      "Discord rate limited, retrying",
+    );
+  });
 });
diff --git a/app/lib/discord/api.ts b/app/lib/discord/api.ts
index d4f82b8f..e6a29f15 100644
--- a/app/lib/discord/api.ts
+++ b/app/lib/discord/api.ts
@@ -15,6 +15,7 @@ import { type DiscordMessage, DiscordMessageSchema } from "./schemas";
 
 const BASE_URL = "https://discord.com/api/v10";
 const RATE_LIMIT_TIMEOUT_MS = 30_000;
+const MAX_RETRIES = 5;
 
 const RateLimitResponseSchema = z.object({
   message: z.string(),
@@ -23,6 +24,14 @@ const RateLimitResponseSchema = z.object({
   code: z.number().optional(),
 });
 
+// Per-endpoint rate limit tracking: endpoint → timestamp until which requests should wait
+const rateLimitUntil = new Map<string, number>();
+
+/** @internal Exported for test cleanup only */
+export function _resetRateLimitState(): void {
+  rateLimitUntil.clear();
+}
+
 async function call<T extends z.ZodType>(
   method: string,
   endpoint: string,
@@ -38,8 +47,26 @@ async function call<T extends z.ZodType>(
   }
 
   const startTime = performance.now();
+  let retries = 0;
 
   while (true) {
+    // Wait for any active rate limit gate on this endpoint
+    const waitUntil = rateLimitUntil.get(endpoint);
+    if (waitUntil != null) {
+      const waitMs = waitUntil - Date.now();
+      if (waitMs > 0) {
+        const elapsedMs = performance.now() - startTime;
+        if (elapsedMs + waitMs > RATE_LIMIT_TIMEOUT_MS) {
+          log.error(
+            { endpoint, elapsedMs, waitMs, retries },
+            "Discord rate limit exceeded max wait time",
+          );
+          throw new Error(`Discord rate limit exceeded`);
+        }
+        await new Promise((resolve) => setTimeout(resolve, waitMs));
+      }
+    }
+
     const response = await fetch(url, {
       method,
       body: method === "POST" ? JSON.stringify(params) : undefined,
@@ -53,6 +80,8 @@ async function call<T extends z.ZodType>(
     const json = await response.json();
 
     if (response.status === 429) {
+      retries++;
+
       const rateLimit = RateLimitResponseSchema.safeParse(json);
       const retryAfterMs = rateLimit.success
         ? rateLimit.data.retry_after * 1000
@@ -61,16 +90,24 @@ async function call<T extends z.ZodType>(
 
       const elapsedMs = performance.now() - startTime;
 
-      if (elapsedMs + retryAfterMs > RATE_LIMIT_TIMEOUT_MS) {
+      if (
+        retries > MAX_RETRIES ||
+        elapsedMs + retryAfterMs > RATE_LIMIT_TIMEOUT_MS
+      ) {
         log.error(
-          { endpoint, elapsedMs, retryAfterMs, global },
+          { endpoint, elapsedMs, retryAfterMs, global, retries },
           "Discord rate limit exceeded max wait time",
         );
         throw new Error(`Discord rate limit exceeded`);
       }
 
+      // Update shared rate limit gate (Math.max avoids overwriting a longer wait)
+      const newUntil = Date.now() + retryAfterMs;
+      const existing = rateLimitUntil.get(endpoint) ?? 0;
+      rateLimitUntil.set(endpoint, Math.max(existing, newUntil));
+
       log.warn(
-        { endpoint, retryAfterMs, global },
+        { endpoint, retryAfterMs, global, retries },
         "Discord rate limited, retrying",
       );
 
@@ -78,6 +115,9 @@ async function call<T extends z.ZodType>(
       continue;
     }
 
+    // Clear rate limit gate on success
+    rateLimitUntil.delete(endpoint);
+
     if (!response.ok) {
       log.error(
         { endpoint, status: response.status, body: json },

From c7464457ddb6e94b3991a08912bc941fc0d46c40 Mon Sep 17 00:00:00 2001
From: Simon Kjellberg <simon.kjellberg@gmail.com>
Date: Tue, 10 Feb 2026 20:50:11 +0100
Subject: [PATCH 2/2] test: cover gate timeout path for 100% patch coverage

Add _setRateLimitGate test helper to directly set a rate limit gate,
since the gate timeout path is unreachable through natural 429 responses
alone (the gate is bounded by RATE_LIMIT_TIMEOUT_MS).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 app/lib/discord/api.test.ts | 19 +++++++++++++++++++
 app/lib/discord/api.ts      |  5 +++++
 2 files changed, 24 insertions(+)

diff --git a/app/lib/discord/api.test.ts b/app/lib/discord/api.test.ts
index 9e5b5d7b..5b9ad5b8 100644
--- a/app/lib/discord/api.test.ts
+++ b/app/lib/discord/api.test.ts
@@ -7,6 +7,7 @@ import { server } from "@/mocks/node";
 
 import {
   _resetRateLimitState,
+  _setRateLimitGate,
   getChannelMessages,
   getMessageChain,
   postChannelMessage,
@@ -921,6 +922,24 @@ describe("rate limiting", () => {
     expect(fetchCount).toBe(3);
   });
 
+  it("should throw when shared gate wait exceeds timeout", async () => {
+    const logErrorSpy = vi.spyOn(log, "error").mockImplementation(() => {});
+
+    // Directly set a gate 31 seconds in the future — exceeds RATE_LIMIT_TIMEOUT_MS (30s)
+    const endpoint = "channels/test-discord-channel-id/messages";
+    _setRateLimitGate(endpoint, Date.now() + 31_000);
+
+    // A fresh request checks the gate: waitMs=31000, elapsedMs≈0, 0+31000 > 30000 → throws
+    await expect(getChannelMessages()).rejects.toThrow(
+      "Discord rate limit exceeded",
+    );
+
+    expect(logErrorSpy).toHaveBeenCalledWith(
+      { endpoint, elapsedMs: expect.any(Number), waitMs: 31_000, retries: 0 },
+      "Discord rate limit exceeded max wait time",
+    );
+  });
+
   it("should include retry count in log messages", async () => {
     const logWarnSpy = vi.spyOn(log, "warn").mockImplementation(() => {});
     let attempts = 0;
diff --git a/app/lib/discord/api.ts b/app/lib/discord/api.ts
index e6a29f15..7be578e8 100644
--- a/app/lib/discord/api.ts
+++ b/app/lib/discord/api.ts
@@ -32,6 +32,11 @@ export function _resetRateLimitState(): void {
   rateLimitUntil.clear();
 }
 
+/** @internal Exported for testing the gate timeout path */
+export function _setRateLimitGate(endpoint: string, until: number): void {
+  rateLimitUntil.set(endpoint, until);
+}
+
 async function call<T extends z.ZodType>(
   method: string,
   endpoint: string,