diff --git a/EDGE_CASES_AND_RECOVERY.md b/EDGE_CASES_AND_RECOVERY.md new file mode 100644 index 000000000..ed3fe1d52 --- /dev/null +++ b/EDGE_CASES_AND_RECOVERY.md @@ -0,0 +1,798 @@ +# Frontend Transport: Edge Cases, Viability & Recovery Strategies + +## Your Understanding is Correct ✅ + +Yes, you've got it exactly right: + +1. **Unified Chunk Interface**: AI SDK uses a single `UIMessageChunk` union type with different `type` fields +2. **Bi-directional Conversion**: + - Pgflow events (Supabase Realtime) → AI SDK chunks (frontend receives) + - AI responses in steps → Supabase Realtime events → AI SDK chunks (full cycle) + +```typescript +// In pgflow step (backend) +for await (const token of openai.stream()) { + await ctx.stream.emitText(token); // → Supabase Realtime event +} + +// Supabase broadcasts: { event_type: 'step:stream', chunk_type: 'text', chunk: { text: token } } + +// PgflowChatTransport (frontend) +onStepStreamEvent((event) => { + controller.enqueue({ + type: 'text-delta', // ← AI SDK chunk type + text: event.chunk.text, + }); +}); + +// useChat displays token in UI +``` + +--- + +## Critical Edge Case: Edge Runtime Shutdown Mid-Stream + +### The Problem You Identified 🔴 + +**Scenario:** +``` +1. Edge function executes final step 'generate_response' +2. LLM streams 80% of response via Supabase Realtime +3. Edge runtime hits timeout (Vercel Edge: 25s max) or crashes +4. Step never completes → No step:completed event +5. Run never completes → No run:completed event +6. Final output NOT saved to database +``` + +**What the user sees:** +``` +AI: "Based on my analysis, I recommend you should invest in renewable ener" +[stops mid-sentence] +[stuck in "streaming..." state forever] +``` + +**Why this is devastating:** +- ❌ Partial response is useless +- ❌ Can't retry (don't know where it stopped) +- ❌ Database has no record of output +- ❌ User's message is lost +- ❌ Frontend stuck in limbo + +**You're absolutely right: Without intervention, this is UNRECOVERABLE.** + +--- + +## Root Causes + +### 1. **Ephemeral Streaming Events** + +Supabase Realtime broadcasts are **not durable**: +```typescript +// This broadcast is gone forever once sent +await supabase.channel(`pgflow:run:${runId}`).send({ + type: 'broadcast', + event: 'step:stream', + payload: { chunk: 'Hello' }, // ← Ephemeral, not stored +}); +``` + +If client disconnects or edge function dies: +- Those chunks are **lost** +- Cannot be replayed +- Cannot be recovered + +### 2. **Step Output Depends on Completion** + +Pgflow's architecture: +```typescript +// Step function returns final output +const output = await stepFunction(input, ctx); + +// ONLY saved to database when step completes successfully +await supabase + .from('flow_steps') + .update({ status: 'completed', output }) + .eq('step_id', stepId); +``` + +If edge function dies before return: +- Output is **never saved** +- Database shows step as "started" +- No way to reconstruct final output from streamed chunks + +### 3. **Edge Runtime Timeouts are Aggressive** + +| Platform | Max Duration | +|----------|--------------| +| Vercel Edge | 25 seconds | +| Cloudflare Workers | 30 seconds (free), 15 min (paid) | +| Supabase Edge Functions | 120 seconds | +| AWS Lambda@Edge | 30 seconds | + +LLM responses can easily take 30+ seconds: +- GPT-4: ~20-30 tokens/sec +- 600 token response = 20-30 seconds +- ⚠️ Dangerously close to timeout + +--- + +## Solution 1: Store Chunks as They Stream (Recommended) + +### Architecture: Dual-Write Pattern + +Write chunks to **both** Realtime (fast) and Database (durable): + +```typescript +// Backend: Enhanced streaming context +export function createStreamingContext(supabase, runId, stepSlug) { + const chunkBuffer = []; + let chunkIndex = 0; + + return { + async emitText(text: string) { + // 1. Broadcast immediately (fast, ephemeral) + await supabase.channel(`pgflow:run:${runId}`).send({ + type: 'broadcast', + event: 'step:stream', + payload: { + run_id: runId, + step_slug: stepSlug, + chunk_type: 'text', + chunk_index: chunkIndex, + chunk: { text }, + }, + }); + + // 2. Buffer for database write (durable) + chunkBuffer.push({ + run_id: runId, + step_slug: stepSlug, + chunk_index: chunkIndex++, + chunk_type: 'text', + chunk_data: { text }, + created_at: new Date(), + }); + + // 3. Batch insert every 10 chunks or 1 second + if (chunkBuffer.length >= 10 || shouldFlush()) { + await this.flushChunks(); + } + }, + + async flushChunks() { + if (chunkBuffer.length === 0) return; + + await supabase + .from('streaming_chunks') + .insert([...chunkBuffer]); + + chunkBuffer.length = 0; // Clear buffer + }, + + // Called when step completes (or on error) + async finalize() { + await this.flushChunks(); // Flush remaining chunks + }, + }; +} +``` + +### Database Schema + +```sql +CREATE TABLE streaming_chunks ( + id BIGSERIAL PRIMARY KEY, + run_id UUID NOT NULL REFERENCES flow_runs(run_id), + step_slug TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + chunk_type TEXT NOT NULL, -- 'text' | 'data' | 'reasoning' + chunk_data JSONB NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW(), + + UNIQUE(run_id, step_slug, chunk_index) +); + +CREATE INDEX idx_streaming_chunks_run_step + ON streaming_chunks(run_id, step_slug, chunk_index); + +-- RLS policy +CREATE POLICY "users_read_own_chunks" + ON streaming_chunks FOR SELECT + USING ( + EXISTS ( + SELECT 1 FROM flow_runs + WHERE flow_runs.run_id = streaming_chunks.run_id + AND flow_runs.user_id = auth.uid() + ) + ); +``` + +### Recovery: Frontend Reconnection + +```typescript +// PgflowChatTransport.reconnectToStream() +async reconnectToStream(options: { chatId: string }) { + const run = await this.pgflowClient.getRun(chatId); + + if (!run) return null; + + return new ReadableStream({ + start: async (controller) => { + // 1. Fetch stored chunks from database + const { data: chunks } = await this.supabase + .from('streaming_chunks') + .select('*') + .eq('run_id', chatId) + .order('chunk_index'); + + // 2. Replay all chunks + for (const chunk of chunks || []) { + controller.enqueue({ + type: 'text-delta', + text: chunk.chunk_data.text, + }); + } + + // 3. Subscribe to new live chunks + if (run.status === 'started') { + // Still streaming, subscribe to Realtime + const unsubscribe = this.pgflowClient.onStepStreamEvent((event) => { + if (event.run_id === chatId && event.chunk_index > chunks.length) { + controller.enqueue(this.mapStreamEventToChunk(event)); + } + }); + + // Wait for completion + await run.waitForStatus('completed'); + unsubscribe(); + } + + // 4. If completed, get final output + if (run.status === 'completed') { + controller.enqueue({ + type: 'finish', + finishReason: 'stop', + }); + } + + controller.close(); + }, + }); +} +``` + +### Recovery: Edge Shutdown Scenario + +**Scenario:** +``` +1. Edge function streams 200 chunks via Realtime +2. Edge function dies at chunk 200 +3. No step:completed event +``` + +**What happens:** + +1. **Database has chunks 0-200** (durable storage) +2. **Frontend detects timeout** (no events for 30s) +3. **Frontend triggers recovery:** + ```typescript + // Detect stuck streaming + if (status === 'streaming' && noEventsSince(30000)) { + // Check database directly + const { data: chunks } = await supabase + .from('streaming_chunks') + .select('*') + .eq('run_id', runId) + .order('chunk_index'); + + if (chunks && chunks.length > 0) { + // We have partial response! + const partialText = chunks.map(c => c.chunk_data.text).join(''); + + // Show to user with indication + setMessages([ + ...messages, + { + role: 'assistant', + content: partialText, + metadata: { + incomplete: true, + error: 'Response generation was interrupted', + canRetry: true, + }, + }, + ]); + + // Offer retry + showRetryButton(); + } + } + ``` + +4. **User can retry** with context preserved + +--- + +## Solution 2: Checkpoint Pattern (Intermediate Outputs) + +Don't wait until the end to save output - checkpoint during streaming: + +```typescript +.step('generate_response', async (input, ctx) => { + let fullResponse = ''; + let checkpointCounter = 0; + + for await (const chunk of llm.stream(prompt)) { + // Stream to frontend + await ctx.stream.emitText(chunk); + fullResponse += chunk; + + // Checkpoint every 50 tokens + checkpointCounter++; + if (checkpointCounter % 50 === 0) { + await ctx.checkpoint({ + partial_response: fullResponse, + tokens_generated: checkpointCounter, + }); + } + } + + return { response: fullResponse }; +}) +``` + +**Backend implementation:** +```typescript +// In streaming context +async checkpoint(data: any) { + // Update step with partial output + await supabase + .from('flow_steps') + .update({ + checkpoint_data: data, + checkpoint_at: new Date(), + }) + .eq('run_id', this.runId) + .eq('step_slug', this.stepSlug); +} +``` + +**Recovery:** +```typescript +// If edge function dies, frontend can recover checkpoint +const step = run.step('generate_response'); +if (step.checkpoint_data?.partial_response) { + // Show partial response to user + displayPartialResponse(step.checkpoint_data.partial_response); + + // Ask if they want to retry or continue + askUserToRetry(); +} +``` + +--- + +## Solution 3: Two-Phase Commit (Optimistic + Confirmed) + +Show streamed content immediately, but mark as "pending" until confirmed: + +```typescript +// Frontend state +const [messages, setMessages] = useState([]); +const [pendingChunks, setPendingChunks] = useState(''); + +// As chunks arrive +onStepStreamEvent((event) => { + if (event.chunk_type === 'text') { + setPendingChunks(prev => prev + event.chunk.text); + + // Show immediately (optimistic) + setMessages(prev => [ + ...prev.slice(0, -1), + { + ...prev[prev.length - 1], + content: pendingChunks + event.chunk.text, + status: 'streaming', // ← Marked as unconfirmed + }, + ]); + } +}); + +// When step completes +run.step('generate').on('completed', (event) => { + // Mark as confirmed + setMessages(prev => [ + ...prev.slice(0, -1), + { + ...prev[prev.length - 1], + content: event.output.response, // ← Final confirmed output from DB + status: 'completed', // ← Confirmed + }, + ]); + setPendingChunks(''); +}); + +// If step fails +run.step('generate').on('failed', () => { + // Discard optimistic updates + setMessages(prev => [ + ...prev.slice(0, -1), + { + ...prev[prev.length - 1], + content: pendingChunks, + status: 'failed', + canRetry: true, + }, + ]); +}); +``` + +--- + +## Solution 4: Hybrid Approach (Recommended for Production) + +**Different strategies for different step types:** + +### Type A: Non-Streaming Steps (Most steps) +```typescript +// No per-token streaming needed +.step('search', async (input, ctx) => { + await ctx.stream.emitReasoning('Searching knowledge base...'); + + const results = await search(input.message); + + // Emit complete result when done + await ctx.stream.emitData('search_results', { + count: results.length, + preview: results.slice(0, 3), + }); + + // Output saved to database on completion + return { results }; +}) +``` + +**Recovery:** Easy! Step output is in database. + +### Type B: Streaming Steps with Persistence +```typescript +// LLM streaming with chunk storage +.step('generate', async (input, ctx) => { + // Enable automatic chunk persistence + ctx.stream.enablePersistence({ batchSize: 10 }); + + let fullResponse = ''; + for await (const token of llm.stream()) { + await ctx.stream.emitText(token); // Broadcasts + stores + fullResponse += token; + } + + // Final output saved to database + return { response: fullResponse }; +}) +``` + +**Recovery:** Can replay from stored chunks OR use final output. + +### Type C: Fire-and-Forget Steps (Analytics, Logging) +```typescript +// Don't block on these +.step('log_analytics', async (input, ctx) => { + // Fire and forget - don't care if it fails + await ctx.stream.emitData('analytics', { event: 'response_generated' }); + + return { logged: true }; +}) +``` + +--- + +## Latency Analysis + +### Per-Token Streaming Latency + +**Backend API Route (Direct SSE):** +``` +Token from OpenAI: 0ms + ↓ +Write to response stream: +1ms + ↓ +Network to client: +10-30ms + ↓ +useChat receives: +1ms + ↓ +React render: +16ms +--- +Total: ~28-48ms per token +``` + +**Frontend Transport (via Supabase Realtime):** +``` +Token from OpenAI: 0ms + ↓ +ctx.stream.emitText(): +2ms + ↓ +Supabase broadcast send: +10-30ms + ↓ +Supabase Realtime routing: +50-150ms ⚠️ + ↓ +WebSocket to client: +10-30ms + ↓ +PgflowChatTransport: +2ms + ↓ +useChat receives: +1ms + ↓ +React render: +16ms +--- +Total: ~91-231ms per token +``` + +**Verdict:** Frontend transport is **3-5x slower** due to Supabase Realtime overhead. + +### Mitigation: Batch Tokens + +```typescript +// Batch tokens to reduce events +async emitText(text: string) { + this.textBuffer += text; + + // Emit every 5 tokens OR every 100ms + const tokenCount = this.textBuffer.split(/\s+/).length; + const timeSinceLastEmit = Date.now() - this.lastEmit; + + if (tokenCount >= 5 || timeSinceLastEmit > 100) { + await this.broadcast('text', { text: this.textBuffer }); + this.textBuffer = ''; + this.lastEmit = Date.now(); + } +} +``` + +**Result:** +- Reduces events by 80% (1 event per 5 tokens instead of 5 events) +- Latency for first token: ~100-200ms +- Subsequent tokens appear in batches every 100ms +- Much more reasonable! + +--- + +## Edge Runtime Timeout Strategies + +### Strategy 1: Step Timeout Limits + +Configure per-step timeouts and handle gracefully: + +```typescript +.step('generate', async (input, ctx) => { + // Set max execution time + ctx.setTimeout(20000); // 20 seconds + + try { + const response = await withTimeout( + streamLLMResponse(input, ctx), + 20000 + ); + return { response }; + } catch (error) { + if (error.name === 'TimeoutError') { + // Gracefully handle timeout + const partial = ctx.getStreamedContent(); + + // Save partial result + await ctx.checkpoint({ partial_response: partial }); + + // Let user know + throw new Error('Response generation timed out. Partial response saved.'); + } + throw error; + } +}) +``` + +### Strategy 2: Chunked Generation + +Break long generations into multiple steps: + +```typescript +// Instead of one long streaming step +.step('generate_part_1', async (input, ctx) => { + const partial = await llm.generate({ max_tokens: 200 }); + await ctx.stream.emitText(partial); + return { partial }; +}) +.step('generate_part_2', async (input, ctx) => { + const continuation = await llm.generate({ + prompt: input.partial + '...', + max_tokens: 200 + }); + await ctx.stream.emitText(continuation); + return { response: input.partial + continuation }; +}) +``` + +**Advantages:** +- Each step is short (under timeout) +- Progress saved between steps +- Can recover from any step failure + +### Strategy 3: Streaming Platforms with Longer Timeouts + +| Platform | Timeout | Streaming Support | Recommendation | +|----------|---------|-------------------|----------------| +| Vercel Edge | 25s | ❌ Too short | Don't use for LLM streaming | +| Cloudflare Workers | 15min (paid) | ✅ Good | Good choice | +| Supabase Edge Functions | 120s | ✅ Decent | Works for most cases | +| AWS Lambda | 15min | ✅ Good | Good but more complex | +| Self-hosted | Unlimited | ✅ Best | Full control | + +**Recommendation:** Use Supabase Edge Functions (120s timeout) or self-hosted workers for LLM streaming steps. + +--- + +## Viability Assessment + +### ✅ **VIABLE** for These Use Cases: + +**1. Multi-Step Pipelines (Your Use Case!)** +```typescript +// Perplexity-style research assistant +.step('expand_query', ...) // 2 seconds +.step('search_sources', ...) // 5 seconds +.step('rerank', ...) // 3 seconds +.step('extract', ...) // 4 seconds +.step('synthesize', ...) // 15 seconds → stream this +``` + +**Why it works:** +- Most steps don't need fine-grained streaming +- Only final synthesis step streams tokens +- Intermediate progress is valuable (user sees each step) +- Total time is long (30s+), so Realtime latency doesn't matter +- **Steps 1-4 outputs saved before step 5 streams** + +**2. Tool Calling / Multi-Agent** +```typescript +.step('route_to_agent', ...) // Decide which agent +.step('agent_1_research', ...) // First agent works +.step('agent_2_analyze', ...) // Second agent works +.step('synthesize', ...) // Combine results → stream +``` + +**Why it works:** +- Each agent's output is durably stored +- Can retry individual agents if they fail +- Streaming is optional (can emit complete results) + +### ⚠️ **PROBLEMATIC** for These Use Cases: + +**1. Real-Time Chat (ChatGPT-style)** +```typescript +// User expects instant token-by-token streaming +.step('generate', async (input, ctx) => { + for await (const token of llm.stream()) { + await ctx.stream.emitText(token); // Too slow via Realtime + } +}) +``` + +**Problems:** +- Supabase Realtime adds 50-150ms per token +- User perceives lag (especially on mobile) +- Traditional SSE API route is 3x faster + +**Solution:** Use backend API route pattern for simple chat. + +**2. Voice Assistants** +- Latency is critical (need <50ms) +- Frontend transport is too slow +- Use WebRTC or direct streaming + +### ❌ **NOT VIABLE** for These Use Cases: + +**1. High-Volume / Cost-Sensitive** +- Supabase Realtime pricing: $10/month per 1M messages +- 1 LLM response = 100-500 tokens = 100-500 Realtime messages +- 1000 responses/day = 150k messages = ~$1.50/day = $45/month just for Realtime +- Database writes for chunk storage add more cost + +**2. Guaranteed Delivery** +- Realtime is "best effort," not guaranteed +- Chunks can be lost if Realtime has issues +- Critical applications should use API routes + database persistence + +--- + +## Recommended Architecture Decision Tree + +``` +Do you have multi-step workflows (3+ steps)? +├─ YES → Continue +└─ NO → Use backend API route (simpler) + +Do most steps need token-by-token streaming? +├─ YES → Use backend API route (lower latency) +└─ NO → Continue + +Do you need intermediate progress visibility? +├─ YES → Continue +└─ NO → Use backend API route (simpler) + +Is latency <100ms per token critical? +├─ YES → Use backend API route +└─ NO → Continue + +Can you afford Supabase Realtime costs? +├─ YES → Use frontend transport with stored chunks ✅ +└─ NO → Use backend API route + +Are you okay with chunk storage overhead? +├─ YES → Use frontend transport with stored chunks ✅ +└─ NO → Use frontend transport with step outputs only +``` + +--- + +## Production-Ready Implementation Plan + +### Phase 1: Proof of Concept (No Chunk Storage) +- Implement basic PgflowChatTransport +- Stream only step completion events (not tokens) +- Test with multi-step pipeline +- Measure latency and user experience + +### Phase 2: Add Chunk Storage (If Needed) +- Add `streaming_chunks` table +- Implement dual-write in `createStreamingContext` +- Add reconnection logic with chunk replay +- Test edge function shutdown recovery + +### Phase 3: Optimize Performance +- Implement token batching (5 tokens per event) +- Add chunk buffer/flush strategy +- Monitor Supabase Realtime costs +- Optimize database writes + +### Phase 4: Production Hardening +- Add comprehensive error handling +- Implement retry logic +- Add monitoring/observability +- Load testing +- Cost analysis + +--- + +## Conclusion + +### Is Frontend Transport Viable? + +**YES**, but with important caveats: + +✅ **Perfect for:** +- Multi-step AI pipelines (Perplexity-style) +- Tool calling / multi-agent workflows +- Apps where intermediate progress matters +- Non-latency-critical applications + +⚠️ **Requires care for:** +- Token-by-token streaming (need chunk storage) +- Edge runtime timeouts (need checkpointing) +- Cost management (Realtime + DB writes) + +❌ **Avoid for:** +- Simple request/response chat (use API route) +- Ultra-low latency requirements +- High-volume / cost-sensitive apps + +### Your Edge Shutdown Concern is Valid ✅ + +**Without chunk storage:** Unrecoverable data loss +**With chunk storage:** Fully recoverable + +**Recommendation:** +Implement the **hybrid approach**: +1. Start without chunk storage (simpler, test viability) +2. Add chunk storage for critical streaming steps +3. Use checkpointing for long-running steps +4. Monitor edge function durations and timeout rates + +This gives you the benefits of frontend transport (no API routes, real-time progress) with recovery guarantees (chunk storage, checkpoints) when you need them. diff --git a/FRONTEND_TRANSPORT_DESIGN.md b/FRONTEND_TRANSPORT_DESIGN.md new file mode 100644 index 000000000..984aa8eca --- /dev/null +++ b/FRONTEND_TRANSPORT_DESIGN.md @@ -0,0 +1,1100 @@ +# Pgflow Frontend Transport with Streaming Steps + +## Architecture Overview + +**Philosophy:** Use pgflow client in the **frontend** as a custom ChatTransport, and implement **streaming helpers** in backend flows to emit incremental data that the frontend consumes as AI SDK chunks. + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Frontend (React + AI SDK) │ +│ │ +│ useChat({ transport: PgflowChatTransport }) │ +│ │ │ +│ └─→ PgflowClient (browser) │ +│ │ │ +│ └─→ Supabase Realtime (WebSocket) │ +│ ↕ │ +└──────────────┼───────────────────────────────────────────────┘ + │ + │ Broadcast Events + │ +┌──────────────┼───────────────────────────────────────────────┐ +│ ↕ │ +│ Supabase Database + Realtime │ +│ │ +│ Pgflow Flows (Backend - Supabase Functions/Edge) │ +│ │ │ +│ └─→ Steps with streaming context │ +│ .step('generate', async (input, ctx) => { │ +│ for await (chunk of llm.stream()) { │ +│ await ctx.stream.emit('text', chunk); │ +│ } │ +│ }) │ +└──────────────────────────────────────────────────────────────┘ +``` + +**Key Advantages:** +- ✅ No backend API routes needed (direct Supabase connection) +- ✅ Streaming works naturally via Supabase Realtime +- ✅ Type-safe frontend → backend communication +- ✅ RLS policies enforce authorization +- ✅ Works offline/reconnects automatically + +--- + +## 1. Streaming Context API for Pgflow Steps + +### 1.1 New Streaming Events + +Extend pgflow's broadcast events to support streaming chunks: + +```typescript +// pkgs/client/src/lib/types/events.ts + +/** + * New event type: step streaming chunks + * Emitted during step execution for incremental data + */ +export type BroadcastStepStreamEvent = { + event_type: 'step:stream'; + run_id: string; + step_slug: string; + stream_type: 'text' | 'data' | 'reasoning' | 'tool-input'; + chunk: Json; // The incremental data + index: number; // Chunk sequence number + timestamp: string; +}; + +// Add to existing BroadcastEvent union +export type BroadcastEvent = + | BroadcastRunEvent + | BroadcastStepEvent + | BroadcastStepStreamEvent; // NEW +``` + +### 1.2 Streaming Context Interface + +```typescript +// pkgs/dsl/src/lib/streaming-context.ts + +/** + * Streaming context passed to step functions + * Allows steps to emit incremental chunks during execution + */ +export interface StreamingContext { + /** + * Emit a streaming chunk to connected clients + */ + emit(type: 'text' | 'data' | 'reasoning' | 'tool-input', chunk: any): Promise; + + /** + * Emit text delta (for LLM streaming) + */ + emitText(text: string): Promise; + + /** + * Emit custom data + */ + emitData(key: string, data: any): Promise; + + /** + * Emit reasoning/thinking + */ + emitReasoning(reasoning: string): Promise; + + /** + * Emit tool execution progress + */ + emitToolInput(toolName: string, input: any): Promise; +} + +/** + * Step function signature with streaming context + */ +export type StepFunctionWithStreaming = ( + input: TInput, + ctx: { + stream: StreamingContext; + runId: string; + stepSlug: string; + } +) => Promise; +``` + +### 1.3 Implementation (Backend - Supabase Edge Function) + +```typescript +// supabase/functions/pgflow-streaming-helper/index.ts + +import { createClient } from '@supabase/supabase-js'; + +/** + * Create a streaming context for a pgflow step + * Broadcasts chunks via Supabase Realtime + */ +export function createStreamingContext( + supabase: SupabaseClient, + runId: string, + stepSlug: string +): StreamingContext { + let chunkIndex = 0; + + const emit = async ( + streamType: 'text' | 'data' | 'reasoning' | 'tool-input', + chunk: any + ) => { + const event: BroadcastStepStreamEvent = { + event_type: 'step:stream', + run_id: runId, + step_slug: stepSlug, + stream_type: streamType, + chunk, + index: chunkIndex++, + timestamp: new Date().toISOString(), + }; + + // Broadcast to the run's channel + await supabase.channel(`pgflow:run:${runId}`).send({ + type: 'broadcast', + event: 'step:stream', + payload: event, + }); + }; + + return { + emit, + + emitText: async (text: string) => { + await emit('text', { text }); + }, + + emitData: async (key: string, data: any) => { + await emit('data', { key, data }); + }, + + emitReasoning: async (reasoning: string) => { + await emit('reasoning', { reasoning }); + }, + + emitToolInput: async (toolName: string, input: any) => { + await emit('tool-input', { toolName, input }); + }, + }; +} +``` + +### 1.4 Usage in Flows + +```typescript +// Example: Chat flow with streaming LLM response + +import { Flow } from '@pgflow/dsl'; +import { createStreamingContext } from './pgflow-streaming-helper'; +import { OpenAI } from 'openai'; + +export const ChatFlow = new Flow<{ + message: string; + conversationId: string; +}>({ slug: 'streaming_chat' }) + + .step('retrieve_context', async (input, ctx) => { + // Emit reasoning about what we're doing + await ctx.stream.emitReasoning('Searching knowledge base...'); + + const results = await vectorSearch(input.message); + + // Emit the retrieved data + await ctx.stream.emitData('search_results', results); + + return { context: results }; + }) + + .step('generate_response', async (input, ctx) => { + const openai = new OpenAI(); + + // Stream LLM response token by token + const stream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [ + { role: 'system', content: `Context: ${input.context}` }, + { role: 'user', content: input.message }, + ], + stream: true, + }); + + let fullResponse = ''; + + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta?.content || ''; + if (delta) { + // Emit each token to frontend immediately + await ctx.stream.emitText(delta); + fullResponse += delta; + } + } + + // Return final result (stored in database) + return { response: fullResponse }; + }) + + .step('format', async (input) => { + return { + response: input.response, + formatted: true + }; + }); +``` + +--- + +## 2. Frontend PgflowChatTransport + +### 2.1 Implementation + +```typescript +// lib/pgflow-chat-transport.ts + +import { PgflowClient } from '@pgflow/client/browser'; +import type { + ChatTransport, + UIMessage, + UIMessageChunk +} from '@ai-sdk/react'; +import type { BroadcastStepStreamEvent } from '@pgflow/client'; + +export class PgflowChatTransport implements ChatTransport { + constructor( + private pgflowClient: PgflowClient, + private flowSlug: string, + ) {} + + async sendMessages(options: { + trigger: 'submit-message' | 'regenerate-message'; + chatId: string; + messageId: string | undefined; + messages: UIMessage[]; + abortSignal: AbortSignal | undefined; + }): Promise> { + + const { messages, chatId, abortSignal } = options; + const lastMessage = messages[messages.length - 1]; + + return new ReadableStream({ + start: async (controller) => { + try { + // Start the pgflow flow + const run = await this.pgflowClient.startFlow( + this.flowSlug, + { + message: lastMessage.content, + conversationId: chatId, + history: messages.slice(0, -1), + }, + chatId // Use chatId as runId for consistency + ); + + // Send start chunk + controller.enqueue({ + type: 'start', + id: run.run_id, + } as UIMessageChunk); + + // Listen to streaming events + const unsubscribeStream = this.pgflowClient.onStepEvent((event) => { + if (event.run_id !== run.run_id) return; + + // Handle streaming chunks + if (event.event_type === 'step:stream') { + const streamEvent = event as BroadcastStepStreamEvent; + const chunks = this.mapStreamEventToChunks(streamEvent); + chunks.forEach(chunk => controller.enqueue(chunk)); + } + }); + + // Listen to step completion + const unsubscribeSteps = this.pgflowClient.onStepEvent((event) => { + if (event.run_id !== run.run_id) return; + + if (event.event_type === 'step:completed') { + // Optionally emit step completion as data + controller.enqueue({ + type: 'data-step-complete', + data: { + step: event.step_slug, + output: event.output, + }, + } as UIMessageChunk); + } + }); + + // Listen to run completion/failure + const unsubscribeRun = run.on('*', (runEvent) => { + if (runEvent.event_type === 'run:completed') { + // Send finish chunk + controller.enqueue({ + type: 'finish', + finishReason: 'stop', + } as UIMessageChunk); + + // Cleanup + unsubscribeStream(); + unsubscribeSteps(); + unsubscribeRun(); + controller.close(); + } + + if (runEvent.event_type === 'run:failed') { + // Send error chunk + controller.enqueue({ + type: 'error', + error: new Error(runEvent.error_message), + } as UIMessageChunk); + + // Cleanup + unsubscribeStream(); + unsubscribeSteps(); + unsubscribeRun(); + controller.close(); + } + }); + + // Handle abort signal + if (abortSignal) { + abortSignal.addEventListener('abort', () => { + unsubscribeStream(); + unsubscribeSteps(); + unsubscribeRun(); + controller.close(); + }); + } + + // Wait for completion + await run.waitForStatus('completed', { + timeoutMs: 5 * 60 * 1000, // 5 minutes + signal: abortSignal, + }).catch(async () => { + // Check if it failed + await run.waitForStatus('failed', { + timeoutMs: 1000, + signal: abortSignal, + }); + }); + + } catch (error) { + controller.error(error); + } + }, + + cancel() { + // Cleanup on cancel (handled by abort signal) + }, + }); + } + + async reconnectToStream(options: { + chatId: string; + }): Promise | null> { + const { chatId } = options; + + // Try to get existing run + const run = await this.pgflowClient.getRun(chatId); + + if (!run) return null; + + // If already completed, return null + if (run.status === 'completed' || run.status === 'failed') { + return null; + } + + // Re-subscribe to the stream + return new ReadableStream({ + start: async (controller) => { + // Similar logic to sendMessages but without starting new run + // Just resubscribe to events... + + const unsubscribe = run.on('*', (event) => { + // Map events to chunks + // ... + }); + + await run.waitForStatus('completed', { timeoutMs: 60000 }); + unsubscribe(); + controller.close(); + }, + }); + } + + /** + * Map pgflow streaming events to AI SDK chunks + */ + private mapStreamEventToChunks( + event: BroadcastStepStreamEvent + ): UIMessageChunk[] { + switch (event.stream_type) { + case 'text': + return [{ + type: 'text-delta', + text: event.chunk.text, + }]; + + case 'reasoning': + return [{ + type: 'reasoning-delta', + reasoning: event.chunk.reasoning, + }]; + + case 'data': + return [{ + type: `data-${event.chunk.key}`, + data: event.chunk.data, + }]; + + case 'tool-input': + return [{ + type: 'tool-input-delta', + toolCallId: event.step_slug, + toolName: event.chunk.toolName, + argsTextDelta: JSON.stringify(event.chunk.input), + }]; + + default: + return []; + } + } +} +``` + +### 2.2 Frontend Usage + +```typescript +// app/page.tsx +'use client'; + +import { useChat } from '@ai-sdk/react'; +import { PgflowClient } from '@pgflow/client/browser'; +import { createClient } from '@supabase/supabase-js'; +import { PgflowChatTransport } from '@/lib/pgflow-chat-transport'; +import { useMemo } from 'react'; + +export default function ChatPage() { + const transport = useMemo(() => { + const supabase = createClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY! + ); + + const pgflow = new PgflowClient(supabase); + + return new PgflowChatTransport(pgflow, 'streaming_chat'); + }, []); + + const { + messages, + sendMessage, + status, + data // Custom data from streaming events + } = useChat({ + transport, + onData: (chunk) => { + // Handle custom streaming data + if (chunk.type === 'data-search_results') { + console.log('Search results:', chunk.data); + // Update UI with search progress + } + }, + }); + + return ( +
+
+ {messages.map((message) => ( +
+
+ {message.role === 'user' ? 'You' : 'AI'} +
+
{message.content}
+
+ ))} + + {/* Show custom streaming data */} + {data && data['search_results'] && ( +
+ Searching: {JSON.stringify(data['search_results'])} +
+ )} + + {status === 'streaming' && ( +
AI is thinking...
+ )} +
+ +
+
{ + e.preventDefault(); + const input = e.currentTarget.elements.namedItem('message') as HTMLInputElement; + sendMessage({ content: input.value }); + input.value = ''; + }} + > + +
+
+
+ ); +} +``` + +--- + +## 3. Required Changes to Pgflow + +### 3.1 Client Changes + +```typescript +// pkgs/client/src/lib/PgflowClient.ts + +export class PgflowClient { + // ... existing code ... + + /** + * Listen to step streaming events (NEW) + */ + onStepStreamEvent( + callback: (event: BroadcastStepStreamEvent) => void + ): Unsubscribe { + return this.#realtimeAdapter.on('stepStreamEvent', callback); + } +} +``` + +```typescript +// pkgs/client/src/lib/adapters/SupabaseBroadcastAdapter.ts + +export class SupabaseBroadcastAdapter { + // ... existing code ... + + #handleBroadcastMessage(payload: unknown) { + const parsed = this.#parseJsonFields(payload); + + switch (parsed.event_type) { + case 'run:started': + case 'run:completed': + case 'run:failed': + this.#emitter.emit('runEvent', parsed as BroadcastRunEvent); + break; + + case 'step:started': + case 'step:completed': + case 'step:failed': + this.#emitter.emit('stepEvent', parsed as BroadcastStepEvent); + break; + + case 'step:stream': // NEW + this.#emitter.emit('stepStreamEvent', parsed as BroadcastStepStreamEvent); + break; + } + } +} +``` + +### 3.2 Backend Flow Executor Changes + +```typescript +// pkgs/executor/src/lib/step-executor.ts (or wherever steps are executed) + +import { createStreamingContext } from './streaming-context'; + +export async function executeStep( + step: Step, + input: any, + context: ExecutionContext +) { + const { runId, stepSlug, supabase } = context; + + // Create streaming context + const streamingContext = createStreamingContext(supabase, runId, stepSlug); + + // Call step function with streaming context + const output = await step.execute(input, { + stream: streamingContext, + runId, + stepSlug, + }); + + return output; +} +``` + +--- + +## 4. Helper Utilities + +### 4.1 AI SDK Streaming Adapter + +Helper to easily stream from AI SDK providers in pgflow steps: + +```typescript +// lib/pgflow-ai-sdk-adapter.ts + +import { streamText } from 'ai'; +import type { StreamingContext } from '@pgflow/dsl'; + +/** + * Stream AI SDK results through pgflow streaming context + */ +export async function streamAISDKResponse( + streamTextResult: ReturnType, + ctx: StreamingContext +): Promise { + let fullText = ''; + + for await (const chunk of streamTextResult.textStream) { + await ctx.emitText(chunk); + fullText += chunk; + } + + return fullText; +} + +// Usage in flow: +.step('generate', async (input, ctx) => { + const result = streamText({ + model: openai('gpt-4'), + prompt: input.message, + }); + + const response = await streamAISDKResponse(result, ctx.stream); + + return { response }; +}) +``` + +### 4.2 OpenAI Streaming Adapter + +```typescript +// lib/pgflow-openai-adapter.ts + +import type { OpenAI } from 'openai'; +import type { StreamingContext } from '@pgflow/dsl'; + +export async function streamOpenAIResponse( + stream: AsyncIterable, + ctx: StreamingContext +): Promise { + let fullResponse = ''; + + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta?.content || ''; + if (delta) { + await ctx.emitText(delta); + fullResponse += delta; + } + } + + return fullResponse; +} + +// Usage: +.step('generate', async (input, ctx) => { + const stream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [...], + stream: true, + }); + + const response = await streamOpenAIResponse(stream, ctx.stream); + return { response }; +}) +``` + +### 4.3 Generic Async Iterator Adapter + +```typescript +// lib/pgflow-stream-adapter.ts + +import type { StreamingContext } from '@pgflow/dsl'; + +/** + * Stream any async iterable through pgflow + */ +export async function* streamToContext( + iterable: AsyncIterable, + ctx: StreamingContext, + mapper: (item: T) => { type: string; data: any } +): AsyncGenerator { + for await (const item of iterable) { + const { type, data } = mapper(item); + await ctx.emit(type as any, data); + yield item; + } +} + +// Usage: +.step('process', async (input, ctx) => { + const results = []; + + for await (const item of streamToContext( + processLargeDataset(input), + ctx.stream, + (item) => ({ type: 'data', data: { progress: item.progress } }) + )) { + results.push(item); + } + + return { results }; +}) +``` + +--- + +## 5. Authentication & Security + +### 5.1 Supabase RLS Policies + +```sql +-- Only authenticated users can start flows +CREATE POLICY "authenticated_users_can_start_flows" +ON flow_runs +FOR INSERT +TO authenticated +WITH CHECK (auth.uid() = user_id); + +-- Users can only read their own runs +CREATE POLICY "users_read_own_runs" +ON flow_runs +FOR SELECT +TO authenticated +USING (auth.uid() = user_id); + +-- Users can only subscribe to their own run channels +-- (Enforced via Supabase Realtime RLS) +ALTER PUBLICATION supabase_realtime ADD TABLE flow_runs; +``` + +### 5.2 Frontend Auth + +```typescript +// lib/pgflow-chat-transport.ts (updated) + +export class PgflowChatTransport implements ChatTransport { + constructor( + private supabaseClient: SupabaseClient, // Pass supabase client + private flowSlug: string, + ) {} + + async sendMessages(options) { + // Check authentication + const { data: { session } } = await this.supabaseClient.auth.getSession(); + + if (!session) { + throw new Error('User must be authenticated'); + } + + const pgflow = new PgflowClient(this.supabaseClient); + + // Rest of implementation... + } +} +``` + +--- + +## 6. Complete Example + +### 6.1 Backend Flow (Supabase Edge Function) + +```typescript +// supabase/functions/flows/streaming-chat.ts + +import { Flow } from '@pgflow/dsl'; +import { createClient } from '@supabase/supabase-js'; +import { OpenAI } from 'openai'; +import { streamOpenAIResponse } from './helpers/streaming'; + +export const StreamingChatFlow = new Flow<{ + message: string; + conversationId: string; + userId: string; +}>({ slug: 'streaming_chat' }) + + .step('classify_intent', async (input, ctx) => { + await ctx.stream.emitReasoning('Analyzing your message...'); + + const openai = new OpenAI(); + const intent = await openai.chat.completions.create({ + model: 'gpt-3.5-turbo', + messages: [{ + role: 'user', + content: `Classify this message intent: "${input.message}"` + }], + }); + + const classification = intent.choices[0].message.content; + + await ctx.stream.emitData('intent', { classification }); + + return { intent: classification }; + }) + + .step('retrieve_context', async (input, ctx) => { + await ctx.stream.emitReasoning('Searching knowledge base...'); + + // Simulate vector search + const results = await vectorSearch(input.message); + + await ctx.stream.emitData('search_results', { + count: results.length, + sources: results.map(r => r.source), + }); + + return { context: results }; + }) + + .step('generate_response', async (input, ctx) => { + await ctx.stream.emitReasoning('Generating response...'); + + const openai = new OpenAI(); + + const stream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [ + { + role: 'system', + content: `Context: ${JSON.stringify(input.context)}` + }, + { + role: 'user', + content: input.message + } + ], + stream: true, + }); + + // Stream each token to frontend + const response = await streamOpenAIResponse(stream, ctx.stream); + + return { response }; + }); +``` + +### 6.2 Frontend with Progress Indicators + +```typescript +// app/chat/page.tsx +'use client'; + +import { useChat } from '@ai-sdk/react'; +import { PgflowChatTransport } from '@/lib/pgflow-chat-transport'; +import { createBrowserClient } from '@/lib/supabase'; +import { useMemo, useState } from 'react'; + +export default function ChatPage() { + const [customData, setCustomData] = useState>({}); + + const supabase = useMemo(() => createBrowserClient(), []); + + const transport = useMemo(() => { + return new PgflowChatTransport(supabase, 'streaming_chat'); + }, [supabase]); + + const { messages, sendMessage, status } = useChat({ + transport, + onData: (chunk) => { + // Capture custom streaming data + if (chunk.type.startsWith('data-')) { + const key = chunk.type.replace('data-', ''); + setCustomData(prev => ({ ...prev, [key]: chunk.data })); + } + }, + }); + + return ( +
+ {/* Messages */} +
+ {messages.map((msg) => ( +
+
+ {msg.role === 'user' ? 'You' : 'AI'} +
+
{msg.content}
+
+ ))} +
+ + {/* Progress Indicators */} + {status === 'streaming' && ( +
+
Processing...
+ + {customData.intent && ( +
+ ✓ Intent: {customData.intent.classification} +
+ )} + + {customData.search_results && ( +
+ ✓ Found {customData.search_results.count} results +
+ )} +
+ )} + + {/* Input */} +
{ + e.preventDefault(); + const formData = new FormData(e.currentTarget); + const message = formData.get('message') as string; + sendMessage({ content: message }); + e.currentTarget.reset(); + }} + className="flex gap-2" + > + + +
+
+ ); +} +``` + +--- + +## 7. Implementation Roadmap + +### Phase 1: Core Streaming Infrastructure (Week 1-2) + +- [ ] Add `BroadcastStepStreamEvent` type to client +- [ ] Update `SupabaseBroadcastAdapter` to handle streaming events +- [ ] Add `onStepStreamEvent` to `PgflowClient` +- [ ] Create `StreamingContext` interface in DSL package + +### Phase 2: Backend Helpers (Week 2-3) + +- [ ] Implement `createStreamingContext` in executor +- [ ] Update step executor to pass streaming context +- [ ] Create OpenAI streaming adapter +- [ ] Create AI SDK streaming adapter +- [ ] Add tests for streaming context + +### Phase 3: Frontend Transport (Week 3-4) + +- [ ] Implement `PgflowChatTransport` class +- [ ] Add event mapping utilities +- [ ] Create TypeScript types for streaming events +- [ ] Add reconnection support +- [ ] Add tests for transport + +### Phase 4: Documentation & Examples (Week 4) + +- [ ] Add streaming examples to `/examples` +- [ ] Document streaming API +- [ ] Create migration guide +- [ ] Add troubleshooting guide + +### Phase 5: Polish & Optimization (Week 5) + +- [ ] Performance testing (measure latency) +- [ ] Error handling improvements +- [ ] Add telemetry/monitoring hooks +- [ ] Production hardening + +--- + +## 8. Advantages of This Approach + +### ✅ **No Backend API Routes** +- Frontend connects directly to Supabase +- Flows execute in Supabase Edge Functions or self-hosted workers +- Simpler architecture, fewer moving parts + +### ✅ **Native Streaming** +- Pgflow's event system is built for streaming +- Supabase Realtime handles WebSocket complexity +- Automatic reconnection on network failures + +### ✅ **Type Safety End-to-End** +- Flow input/output types +- Streaming event types +- AI SDK chunk types +- Full TypeScript inference + +### ✅ **Better Developer Experience** +- Write flows with familiar async/await +- Stream data with simple `ctx.stream.emit()` +- Frontend automatically receives chunks +- No manual SSE formatting + +### ✅ **Scalability** +- Supabase handles connection pooling +- Database-backed state +- RLS policies enforce security +- Works with serverless edge functions + +--- + +## 9. Comparison with Backend Approach + +| Aspect | Frontend Transport | Backend API Route | +|--------|-------------------|-------------------| +| Architecture | Direct Supabase connection | Client → API → Supabase | +| Latency | Lower (one less hop) | Higher (extra hop) | +| Auth | RLS policies | API middleware | +| Offline | Automatic (Supabase) | Manual implementation | +| Complexity | Medium (need RLS) | Lower (traditional) | +| Scalability | High (Supabase infra) | Medium (API scaling) | +| Cost | Realtime connections | API route compute | + +**Recommendation:** Frontend transport for most use cases. Backend API route only if you need server-side preprocessing or have strict security requirements. + +--- + +## 10. Next Steps + +1. **Prototype the streaming context API** + - Implement `createStreamingContext` helper + - Test with simple OpenAI streaming example + - Validate event broadcasting works + +2. **Build PgflowChatTransport** + - Implement basic transport class + - Test with useChat hook + - Verify reconnection logic + +3. **Create example application** + - Simple chat with streaming + - Show intermediate progress (search results, reasoning) + - Document the pattern + +4. **Gather feedback** + - Test with real AI workflows + - Measure performance (latency, throughput) + - Iterate on API design + +This approach gives you the **best of both worlds**: pgflow's powerful workflow orchestration with AI SDK's excellent frontend primitives, connected via Supabase Realtime for native streaming support. diff --git a/FRONTEND_TRANSPORT_SUMMARY.md b/FRONTEND_TRANSPORT_SUMMARY.md new file mode 100644 index 000000000..d551628d6 --- /dev/null +++ b/FRONTEND_TRANSPORT_SUMMARY.md @@ -0,0 +1,420 @@ +# Frontend Transport Approach - Summary + +## What We Built + +A **frontend-first integration** between pgflow and Vercel AI SDK's `useChat` hook, where: + +1. **Pgflow client runs in the browser** (not in API routes) +2. **Custom ChatTransport** connects `useChat` → pgflow → Supabase Realtime +3. **Streaming context API** allows pgflow steps to emit incremental data +4. **No backend API routes needed** - everything flows through Supabase + +## Why This Approach is Better + +### ❌ What You DIDN'T Want (Backend API Route Pattern) + +``` +Frontend useChat + ↓ HTTP POST +Backend API Route + ↓ Uses pgflow client + ↓ Transforms events to SSE +Frontend receives SSE +``` + +**Problems:** +- Backend API route needed +- Manual SSE formatting +- Extra network hop +- More code to maintain + +### ✅ What You DO Want (Frontend Transport Pattern) + +``` +Frontend useChat + PgflowChatTransport + ↓ Direct connection +Supabase Realtime (WebSocket) + ↓ Broadcasts +Backend pgflow flows (Supabase Edge Functions) + └─ ctx.stream.emitText(chunk) +``` + +**Benefits:** +- ✅ No API routes +- ✅ Native WebSocket streaming (Supabase Realtime) +- ✅ Simpler architecture +- ✅ Automatic reconnection +- ✅ RLS security built-in + +## Key Components + +### 1. Streaming Context API (Backend) + +Allows pgflow steps to stream data: + +```typescript +.step('generate', async (input, ctx) => { + // Stream LLM tokens as they come + for await (const chunk of llm.stream(prompt)) { + await ctx.stream.emitText(chunk); // ← Broadcasts to frontend + } + return { response: fullText }; +}) +``` + +**Events broadcast via Supabase Realtime:** +- `step:stream` event type +- Contains: `{ stream_type: 'text', chunk: { text: '...' } }` +- Received by frontend via WebSocket + +### 2. PgflowChatTransport (Frontend) + +Implements the `ChatTransport` interface from AI SDK: + +```typescript +const transport = new PgflowChatTransport(supabase, 'streaming_chat'); + +const { messages, sendMessage, status } = useChat({ + transport, // ← Custom transport +}); +``` + +**What it does:** +1. Starts pgflow flow when user sends message +2. Subscribes to Supabase Realtime channel +3. Receives `step:stream` events +4. Converts to AI SDK `UIMessageChunk` format +5. `useChat` automatically updates UI + +### 3. Event Mapping + +**Pgflow streaming events** → **AI SDK chunks:** + +| Pgflow Event | AI SDK Chunk | Use Case | +|--------------|--------------|----------| +| `ctx.stream.emitText(chunk)` | `{ type: 'text-delta', text: chunk }` | LLM token streaming | +| `ctx.stream.emitReasoning(msg)` | `{ type: 'data-reasoning', data: msg }` | Show AI thinking | +| `ctx.stream.emitData(key, val)` | `{ type: 'data-{key}', data: val }` | Custom progress | + +## Complete Flow Example + +### Backend: Pgflow Flow + +```typescript +export const ChatFlow = new Flow<{ message: string }>() + + .step('search', async (input, ctx) => { + await ctx.stream.emitReasoning('Searching...'); + const results = await search(input.message); + await ctx.stream.emitData('results', { count: results.length }); + return { results }; + }) + + .step('generate', async (input, ctx) => { + const stream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [...], + stream: true, + }); + + let fullText = ''; + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta?.content || ''; + if (delta) { + await ctx.stream.emitText(delta); // Stream each token + fullText += delta; + } + } + + return { response: fullText }; + }); +``` + +### Frontend: React Component + +```typescript +'use client'; + +export default function Chat() { + const [progress, setProgress] = useState({}); + + const transport = useMemo(() => { + const supabase = createClient(...); + return new PgflowChatTransport(supabase, 'streaming_chat'); + }, []); + + const { messages, sendMessage, status } = useChat({ + transport, + onData: (chunk) => { + // Handle custom data + if (chunk.type === 'data-results') { + setProgress({ searchCount: chunk.data.count }); + } + }, + }); + + return ( +
+ {messages.map(m => )} + + {status === 'streaming' && ( +
+ Processing... {progress.searchCount} results found +
+ )} + +
{ + e.preventDefault(); + sendMessage({ content: e.target.message.value }); + }}> + + +
+
+ ); +} +``` + +## Implementation Requirements + +### For Pgflow Core + +**Need to add:** + +1. **Event type**: `BroadcastStepStreamEvent` + - Location: `pkgs/client/src/lib/types/events.ts` + - Fields: `event_type: 'step:stream'`, `stream_type`, `chunk`, etc. + +2. **Client method**: `onStepStreamEvent(callback)` + - Location: `pkgs/client/src/lib/PgflowClient.ts` + - Listens to streaming events globally + +3. **Adapter update**: Handle `step:stream` events + - Location: `pkgs/client/src/lib/adapters/SupabaseBroadcastAdapter.ts` + - Parse and emit streaming events + +4. **Streaming context**: `createStreamingContext(supabase, runId, stepSlug)` + - Location: New package or in executor + - Returns: `{ emitText, emitData, emitReasoning, emitToolInput }` + +5. **Step executor**: Pass streaming context to step functions + - Location: Wherever steps are executed + - Modify signature: `step.execute(input, { stream, runId, stepSlug })` + +### For Applications Using This + +**Need to provide:** + +1. Supabase project with Realtime enabled +2. RLS policies for security +3. Pgflow flows deployed (Supabase Edge Functions or self-hosted) +4. Frontend with `PgflowChatTransport` implementation + +## Architecture Diagram + +``` +┌─────────────────────────────────────────────────────────┐ +│ FRONTEND (React) │ +│ │ +│ useChat({ transport: PgflowChatTransport }) │ +│ │ │ +│ └─→ transport.sendMessages() │ +│ │ │ +│ └─→ pgflow.startFlow('chat', input) │ +│ │ │ +│ └─→ Supabase Realtime Subscribe │ +│ ↓ WebSocket │ +└───────────────────┼─────────────────────────────────────┘ + │ + │ Broadcast Events + │ +┌───────────────────┼─────────────────────────────────────┐ +│ ↓ │ +│ SUPABASE DATABASE + REALTIME │ +│ │ +│ Channel: pgflow:run:{run_id} │ +│ ↑ │ +│ │ broadcast({ event: 'step:stream', payload }) │ +│ │ │ +│ PGFLOW FLOWS (Edge Functions) │ +│ │ │ +│ └─→ .step('generate', async (input, ctx) => { │ +│ for await (const chunk of llm.stream()) { │ +│ await ctx.stream.emitText(chunk); ──────────┘ +│ } +│ }) +└─────────────────────────────────────────────────────────┘ +``` + +## Security Model + +**RLS Policies:** + +```sql +-- Users can only start their own flows +CREATE POLICY "users_start_own_flows" +ON flow_runs FOR INSERT +TO authenticated +WITH CHECK (auth.uid() = user_id); + +-- Users can only read their own runs +CREATE POLICY "users_read_own_runs" +ON flow_runs FOR SELECT +TO authenticated +USING (auth.uid() = user_id); + +-- Realtime RLS (enforced by Supabase) +-- Users automatically can only subscribe to channels for their runs +``` + +**Authentication Flow:** + +1. User signs in via Supabase Auth +2. Frontend gets session token +3. `PgflowChatTransport` checks session before starting flows +4. RLS policies enforce access control at database level +5. Realtime channels inherit RLS permissions + +## Performance Characteristics + +**Latency:** + +- **Token-to-display**: ~50-100ms (Supabase Realtime + WebSocket) +- **vs. SSE approach**: Similar (SSE is also ~50-100ms) +- **vs. HTTP polling**: Much faster (polling is 200ms+ minimum) + +**Throughput:** + +- Supabase Realtime: Thousands of concurrent connections per project +- Database writes: Each `emitText()` broadcasts (not writes to DB) +- No database bottleneck for streaming + +**Cost:** + +- Supabase Free Tier: 2GB database, 500MB bandwidth, Realtime included +- Pro: $25/mo for production workloads +- vs. API route approach: Similar (both need compute + database) + +## Migration from API Routes + +**If you have existing API routes:** + +```typescript +// Before: Backend API route +export async function POST(req) { + const { messages } = await req.json(); + const result = streamText({ model: openai('gpt-4'), messages }); + return result.toDataStreamResponse(); +} + +// After: Pgflow flow (deployed separately) +export const ChatFlow = new Flow() + .step('generate', async (input, ctx) => { + const stream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: input.messages, + stream: true, + }); + return { response: await streamOpenAIResponse(stream, ctx.stream) }; + }); + +// Frontend changes from: +const { messages } = useChat({ api: '/api/chat' }); + +// To: +const transport = new PgflowChatTransport(supabase, 'chat_flow'); +const { messages } = useChat({ transport }); +``` + +**Migration strategy:** + +1. Deploy pgflow flow to Supabase Edge Function +2. Test flow works independently +3. Create `PgflowChatTransport` in frontend +4. Switch `useChat` to use transport +5. Remove old API route + +## Next Steps + +### Phase 1: Prototype (You Can Do Now) + +1. Copy `PgflowChatTransport` implementation to your project +2. Create a simple streaming flow manually +3. Test with `useChat` hook +4. Validate the approach works + +### Phase 2: Pgflow Core Support (Requires Core Changes) + +1. Add `BroadcastStepStreamEvent` type to client +2. Implement `onStepStreamEvent()` in `PgflowClient` +3. Update `SupabaseBroadcastAdapter` to handle streaming events +4. Create `StreamingContext` interface and implementation + +### Phase 3: Production Ready (Polish) + +1. Add TypeScript types package for streaming events +2. Create `@pgflow/ai-sdk` integration package +3. Add comprehensive tests +4. Document best practices +5. Create templates/examples + +## Comparison with Alternatives + +### vs. Backend API Routes (Original Design) + +| Aspect | Frontend Transport | Backend API | +|--------|-------------------|-------------| +| **Architecture** | Browser → Supabase Realtime | Browser → API → Supabase | +| **Latency** | Lower (one less hop) | Higher | +| **Code Complexity** | Medium (need RLS) | Lower (traditional) | +| **Streaming** | Native (WebSocket) | Manual (SSE) | +| **Reconnection** | Automatic (Supabase) | Manual | +| **Security** | RLS policies | API middleware | + +**Winner:** Frontend transport (simpler overall, leverages Supabase) + +### vs. Pure AI SDK (No Pgflow) + +| Aspect | Pgflow + AI SDK | AI SDK Alone | +|--------|----------------|--------------| +| **Workflow Orchestration** | ✅ Multi-step DAGs | ❌ Manual | +| **State Persistence** | ✅ Database-backed | ❌ Ephemeral | +| **Observability** | ✅ SQL queries | ❌ Logs only | +| **Complexity** | Higher (two systems) | Lower | +| **Time to Ship** | Slower (more setup) | Faster | + +**Winner:** Depends on use case (pgflow for complex, AI SDK for simple) + +## Conclusion + +**This approach is ideal when:** + +- ✅ You're building on Supabase already +- ✅ You need workflow orchestration (multi-step) +- ✅ State persistence is important +- ✅ You want to avoid managing API routes +- ✅ WebSocket streaming is acceptable + +**Stick with API routes when:** + +- ❌ You're not using Supabase +- ❌ You need server-side preprocessing +- ❌ Your team is unfamiliar with RLS +- ❌ You prefer traditional REST patterns + +**The frontend transport pattern is the cleanest way to integrate pgflow with Vercel AI SDK, leveraging both systems' strengths without unnecessary complexity.** + +--- + +## Files Created + +1. **FRONTEND_TRANSPORT_DESIGN.md** - Complete technical design document +2. **examples/vercel-ai-sdk-integration/** - Example implementations: + - `frontend/lib/pgflow-chat-transport.ts` - ChatTransport implementation + - `backend/helpers/streaming-context.ts` - Streaming context API + - `backend/flows/streaming-chat.example.ts` - Example flow + - `frontend/components/chat.example.tsx` - Example React component +3. **INTEGRATION_VALUE_ASSESSMENT.md** - Honest assessment of use cases +4. **PGFLOW_VERCEL_AI_SDK_INTEGRATION.md** - Original comprehensive analysis + +All committed to branch: `claude/explore-vercel-usechat-0pUZT` diff --git a/INTEGRATION_VALUE_ASSESSMENT.md b/INTEGRATION_VALUE_ASSESSMENT.md new file mode 100644 index 000000000..a72148975 --- /dev/null +++ b/INTEGRATION_VALUE_ASSESSMENT.md @@ -0,0 +1,533 @@ +# Pgflow + Vercel AI SDK Integration: Value Assessment + +## TL;DR: Does This Integration Make Sense? + +**Yes, but only for specific use cases.** This integration is **NOT** a replacement for the AI SDK's built-in capabilities. Instead, it's a specialized solution for applications that need **database-backed workflow orchestration** for their AI chat features. + +**Target Audience:** ~20% of AI SDK users with complex, production-grade requirements +**Not For:** Simple chatbots or prototype applications (80% of AI SDK users) + +--- + +## What AI SDK Users Gain + +### 1. ✅ **Production-Grade State Persistence** + +**Problem AI SDK Has:** +- Chat state is ephemeral (stored in React state or memory) +- Server restarts lose conversation context +- No built-in conversation history storage + +**What Pgflow Adds:** +```typescript +// User closes browser mid-conversation +// 2 hours later, they come back... + +const existingRun = await pgflow.getRun(conversationId); +// Full state recovered: all steps, outputs, context + +// Resume exactly where they left off +const { messages } = useChat({ id: conversationId }); +// All previous messages automatically loaded +``` + +**Real-World Value:** +- Long-running AI workflows (data analysis, research, content generation) +- Mobile users with flaky connections +- Enterprise dashboards where users expect persistence +- Compliance/audit requirements (full conversation history in database) + +--- + +### 2. ✅ **Complex Multi-Step Workflow Orchestration** + +**Problem AI SDK Has:** +- Built for linear request/response patterns +- Complex orchestration requires manual state management in API routes +- No built-in DAG execution or dependency management + +**What Pgflow Adds:** +```typescript +// Example: Research assistant with complex pipeline +const ResearchFlow = new Flow<{ query: string }>() + .step('expand_query', async ({ query }) => { + // Generate search variations + return { queries: ['q1', 'q2', 'q3'] }; + }) + .step('parallel_search', async ({ queries }) => { + // Search multiple sources in parallel + return { results: [...] }; + }) + .step('rerank', async ({ results }) => { + // ML-based reranking + return { ranked: [...] }; + }) + .step('extract_insights', async ({ ranked }) => { + // LLM extraction from each source + return { insights: [...] }; + }) + .step('synthesize', async ({ insights }) => { + // Final synthesis + return { response: '...' }; + }); + +// Each step streams progress to UI via useChat +// Database tracks execution, enables retry on failure +// Can pause/resume between steps +``` + +**Real-World Value:** +- RAG pipelines with multiple retrieval/reranking stages +- Multi-agent systems (different AI agents for different steps) +- Workflows with human-in-the-loop approvals +- Error recovery (retry individual steps, not entire conversation) + +**Without Pgflow:** +You'd need to build all this orchestration logic manually in API routes, manage state in Redis/memory, implement retries, etc. + +--- + +### 3. ✅ **Deep Observability & Debugging** + +**Problem AI SDK Has:** +- Limited visibility into what happened during a conversation +- Debugging requires logs/traces (if you set them up) +- No built-in analytics on workflow performance + +**What Pgflow Adds:** +```sql +-- Every step is in the database +SELECT + step_slug, + status, + started_at, + completed_at, + completed_at - started_at as duration, + output +FROM flow_steps +WHERE run_id = 'abc123' +ORDER BY started_at; + +-- Analyze performance across all conversations +SELECT + step_slug, + AVG(completed_at - started_at) as avg_duration, + COUNT(*) as executions, + COUNT(*) FILTER (WHERE status = 'failed') as failures +FROM flow_steps +WHERE flow_slug = 'chat_workflow' +GROUP BY step_slug; +``` + +**Real-World Value:** +- Debug why a specific conversation failed (full step-by-step history) +- Identify bottleneck steps in your workflow +- A/B test different workflow configurations +- Compliance/audit trails (required for healthcare, finance, legal) +- Analytics on user interaction patterns + +--- + +### 4. ✅ **Reliability & Error Recovery** + +**Problem AI SDK Has:** +- If API route crashes mid-stream, conversation state is lost +- No built-in retry logic for individual steps +- User has to restart entire conversation + +**What Pgflow Adds:** +```typescript +// Step 3 of 5 fails due to API rate limit +// Pgflow automatically marks it as 'failed' in database + +// User clicks "Retry" +const run = await pgflow.getRun(conversationId); + +// Smart retry: only re-run failed step, not entire workflow +if (run.step('expensive_api_call').status === 'failed') { + await retryStep(run.run_id, 'expensive_api_call'); +} + +// Steps 1-2 outputs already in database, reused +// Only step 3 re-executes +``` + +**Real-World Value:** +- Expensive LLM calls (GPT-4 Claude) that you don't want to re-run +- Workflows with external API calls that might fail +- Long-running processes (30+ seconds) where partial progress matters +- Better UX for users (don't lose their work) + +--- + +### 5. ✅ **Scalability for High-Concurrency Scenarios** + +**Problem AI SDK Has:** +- In-memory state management in API routes +- Scaling requires sticky sessions or external state store +- No built-in queueing or rate limiting + +**What Pgflow Adds:** +```typescript +// 1000 concurrent conversations +// Each conversation is a database row +// PostgreSQL handles concurrency, not your API route + +// Natural rate limiting via database connection pool +const pgflow = new PgflowClient(supabase, { + maxPgConnections: 10 // Prevent overload +}); + +// Failed workflows automatically queued for retry +// No in-memory state to lose during deployments +``` + +**Real-World Value:** +- Enterprise apps with thousands of concurrent users +- Serverless deployments (stateless API routes) +- Zero-downtime deployments (state in database, not memory) +- Natural backpressure (database queue prevents overload) + +--- + +### 6. ✅ **Multi-Tenant & Collaboration Features** + +**Problem AI SDK Has:** +- Built for single-user chat experiences +- No built-in multi-user collaboration + +**What Pgflow Adds:** +```typescript +// Multiple users collaborate on same conversation +const run = await pgflow.getRun(sharedConversationId); + +// User A adds message +run.on('*', (event) => { + // Broadcast to all connected users via Supabase Realtime + broadcastToRoom(conversationId, event); +}); + +// User B sees updates in real-time in their useChat UI + +// Database enforces access control via RLS +CREATE POLICY "team_conversations" +ON flow_runs +USING ( + team_id IN (SELECT team_id FROM team_members WHERE user_id = auth.uid()) +); +``` + +**Real-World Value:** +- Team collaboration (multiple people in same AI conversation) +- Customer support (agent takes over from bot) +- Shared research/brainstorming sessions +- Approval workflows (manager reviews AI output before sending) + +--- + +## What AI SDK Users DON'T Gain (Honest Limitations) + +### ❌ **Simplicity** +- **Complexity overhead**: Now managing two systems (AI SDK + pgflow + Supabase) +- **Learning curve**: Developers need to understand workflow orchestration +- **More moving parts**: Database migrations, Supabase setup, pgflow configuration + +**When this matters:** Prototypes, MVPs, simple chatbots, hackathons + +--- + +### ❌ **Latency** +- **Database roundtrips**: Each step writes to database (adds 10-50ms per step) +- **Supabase Realtime delay**: 300ms stabilization delay by default +- **Not optimal for speed**: Direct LLM streaming is faster + +**When this matters:** Real-time conversational AI, voice assistants, speed-critical apps + +--- + +### ❌ **Cost Efficiency for Simple Use Cases** +- **Supabase costs**: Database storage, realtime connections +- **Database writes**: Every step/event writes to database +- **Overkill for simple chat**: Just using AI SDK is cheaper + +**When this matters:** Side projects, low-budget apps, simple Q&A bots + +--- + +### ❌ **Built-in AI SDK Features** +- **Tool calling**: AI SDK has native, well-tested tool calling. Pgflow requires custom implementation +- **Provider switching**: AI SDK supports 30+ providers out-of-box. Pgflow requires integration code +- **Streaming tokens**: AI SDK streams individual tokens. Pgflow streams step completions (coarser granularity) + +**When this matters:** Apps that need fine-grained token streaming, multi-provider support, complex tool calling + +--- + +## The Honest Use Case Assessment + +### 🟢 **STRONG FIT** - Pgflow Integration Makes Sense + +**Production Enterprise AI Applications:** +- Multi-step RAG pipelines (vector search → reranking → synthesis) +- AI research assistants (complex multi-source queries) +- AI-powered data analysis (long-running, multi-stage) +- Customer support AI with escalation workflows +- Content generation with approval steps +- Multi-agent systems (different AI models for different tasks) +- Compliance-critical applications (audit trails required) + +**Characteristics:** +- 5+ step workflows +- Need state persistence across sessions +- Human-in-the-loop approvals +- Must survive server restarts +- Debugging/observability critical +- High concurrency (100+ concurrent users) +- Budget for infrastructure + +**Example Companies:** +- Notion AI (complex document processing) +- Perplexity (multi-source research synthesis) +- Intercom (customer support with escalation) +- Jasper (content generation with review steps) + +--- + +### 🟡 **MEDIUM FIT** - Consider Carefully + +**Moderate Complexity Apps:** +- Basic RAG (single vector search → LLM) +- Chatbots with 2-3 step workflows +- Apps with occasional need for persistence +- Growing startups planning for scale + +**Decision Factors:** +- If you **already use PostgreSQL/Supabase**: Lower integration cost +- If you **plan to add complexity later**: Good foundation +- If you **need observability now**: Worth the investment +- If you're **prototyping**: Probably too heavy + +**Recommendation:** Start with pure AI SDK, migrate to pgflow when you hit limitations + +--- + +### 🔴 **POOR FIT** - Don't Use Pgflow + +**Simple Chatbots:** +- Prompt → LLM → Response (single step) +- No need for state persistence +- Low traffic (<100 users) +- Prototype/MVP stage +- Speed is critical (real-time voice, gaming) + +**Use AI SDK alone:** +```typescript +// This is perfectly fine without pgflow +const { messages, sendMessage } = useChat({ + api: '/api/chat' +}); + +// API route +export async function POST(req) { + const { messages } = await req.json(); + + const result = streamText({ + model: openai('gpt-4'), + messages, + }); + + return result.toDataStreamResponse(); +} +``` + +**Characteristics:** +- Simple request/response pattern +- State can be in React/memory +- Budget-conscious +- Need to ship fast + +--- + +## Decision Framework + +### When to Choose Pgflow Integration: + +``` +Answer these questions: + +1. Do you have 5+ step workflows? + YES → +1 for pgflow + +2. Do conversations need to survive server restarts? + YES → +1 for pgflow + +3. Do you need human approval steps? + YES → +1 for pgflow + +4. Is observability/debugging critical? + YES → +1 for pgflow + +5. Do you have 100+ concurrent users? + YES → +1 for pgflow + +6. Can you afford infrastructure complexity? + NO → -2 for pgflow + +7. Is latency critical (<100ms)? + YES → -2 for pgflow + +8. Is this a prototype/MVP? + YES → -2 for pgflow + +SCORE: + 4+ → Strong fit, use pgflow integration + 1-3 → Medium fit, evaluate trade-offs + ≤0 → Poor fit, use AI SDK alone +``` + +--- + +## Competitive Alternatives + +### vs. LangChain + useChat + +**LangChain:** +- More mature ecosystem +- Better tool calling, agents +- Memory management built-in +- BUT: In-memory (state lost on restart) +- BUT: Harder to debug (no database) +- BUT: Observability requires LangSmith ($$) + +**Pgflow:** +- Database-backed (state persists) +- Native observability (SQL queries) +- Simpler mental model (DAG in database) +- BUT: Younger ecosystem +- BUT: Less AI tooling out-of-box + +**When to choose pgflow:** State persistence and observability are critical + +--- + +### vs. Temporal/Inngest + AI SDK + +**Temporal/Inngest:** +- Purpose-built workflow engines +- Better developer experience for workflows +- More features (scheduling, cron, fan-out) +- BUT: Separate infrastructure to manage +- BUT: Higher complexity +- BUT: Not built specifically for PostgreSQL + +**Pgflow:** +- PostgreSQL-native (single database) +- Simpler for teams already using Postgres +- Supabase integration is seamless +- BUT: Less mature workflow features + +**When to choose pgflow:** Already using PostgreSQL/Supabase, simpler stack + +--- + +### vs. Pure AI SDK (Recommended for Most) + +**AI SDK Alone:** +- Fastest development +- Lowest complexity +- Best DX for simple cases +- Works out-of-box + +**When to choose AI SDK alone:** 80% of use cases + +**When to add pgflow:** When you hit the wall with state management, observability, or workflow complexity + +--- + +## Recommendation: Gradual Adoption Path + +### Phase 1: Start with AI SDK Only +```typescript +const { messages, sendMessage } = useChat(); +``` +- Ship fast +- Validate product-market fit +- Keep it simple + +### Phase 2: Add Complexity Detection +When you notice: +- API routes getting complex (>50 lines) +- Manual state management becoming painful +- Need to debug production conversations +- Users complaining about lost state + +### Phase 3: Evaluate Pgflow +- Prototype pgflow integration for most complex workflow +- Measure latency impact +- Assess observability benefits +- Calculate infrastructure costs + +### Phase 4: Selective Migration +- Keep simple endpoints on AI SDK alone +- Migrate complex workflows to pgflow +- Hybrid approach (not all-or-nothing) + +--- + +## Conclusion + +### Should You Implement This Integration? + +**YES, if:** +1. You're building **production enterprise AI apps** with complex workflows +2. **State persistence** across sessions is a hard requirement +3. **Observability/debugging** is critical (compliance, support) +4. You already use **PostgreSQL/Supabase** (lower integration cost) +5. You have **budget for infrastructure** complexity +6. You're solving **real workflow orchestration problems**, not theoretical ones + +**NO, if:** +1. Building **simple chatbots** or prototypes +2. **Speed to market** is priority #1 +3. Workflows are **linear** (prompt → LLM → response) +4. **Latency** is critical (<100ms requirement) +5. Team is **small** and can't maintain two systems +6. You haven't tried **AI SDK alone** and hit its limits yet + +### The Pragmatic Take + +**For 80% of AI SDK users:** Stick with AI SDK alone. It's excellent for most chat use cases. + +**For the 20% building complex, production-grade AI workflows:** This integration provides genuine value: +- Database-backed state you can query with SQL +- Workflow orchestration that scales +- Observability that actually helps debug issues +- Reliability that enterprises demand + +**The integration makes sense, but it's a specialized tool for specialized needs.** + +--- + +## Next Steps if You Decide to Proceed + +1. **Validate the need:** + - List your actual workflow steps (be specific) + - Identify pain points with current AI SDK setup + - Estimate value of state persistence, observability + +2. **Start small:** + - Pick ONE complex workflow to migrate + - Keep simple endpoints on AI SDK + - Measure actual benefits (latency, debuggability, reliability) + +3. **Build incrementally:** + - Don't rewrite everything at once + - Create `@pgflow/ai-sdk` package for reusable integration code + - Share learnings with team + +4. **Monitor impact:** + - Latency metrics (does database slow things down?) + - Developer productivity (easier to debug?) + - Cost (Supabase bill vs. value gained) + +**The best integration is the one you actually need, not the one that sounds cool.** diff --git a/PGFLOW_VERCEL_AI_SDK_INTEGRATION.md b/PGFLOW_VERCEL_AI_SDK_INTEGRATION.md new file mode 100644 index 000000000..5b0b416cf --- /dev/null +++ b/PGFLOW_VERCEL_AI_SDK_INTEGRATION.md @@ -0,0 +1,1035 @@ +# Pgflow + Vercel AI SDK Integration Analysis + +## Executive Summary + +This document analyzes how **pgflow** (especially `@pgflow/client`) can be leveraged as a backend for chat applications built with **Vercel AI SDK's `useChat` hook** and related features. + +**Key Finding:** Pgflow's event-driven streaming architecture and the Vercel AI SDK's extensible transport system are highly compatible. Integration can happen at multiple levels: + +1. **Backend Integration**: Use pgflow flows in Next.js API routes that stream responses to `useChat` +2. **Custom Transport**: Create a `PgflowChatTransport` that directly connects frontend to pgflow client +3. **Hybrid Approach**: Combine pgflow workflows with AI SDK's native LLM capabilities + +--- + +## 1. Understanding the Components + +### 1.1 Pgflow Client Architecture + +**Core Capabilities:** +- Event-driven workflow execution with real-time streaming +- Type-safe TypeScript API with generics +- Supabase Realtime for WebSocket-based event propagation +- Multi-step workflow orchestration with DAG execution +- Per-run and per-step event subscriptions +- State management with snapshot loading and incremental updates + +**Key Classes:** +- `PgflowClient`: Main client for starting/managing flows +- `FlowRun`: Represents a single workflow execution with events +- `FlowStep`: Individual step within a flow with state tracking +- `SupabaseBroadcastAdapter`: Handles Supabase Realtime communication + +**Event Model:** +```typescript +// Run events +run.on('completed', (event) => { + console.log(event.output); // Type-safe output +}); + +// Step events +run.step('generate_response').on('completed', (event) => { + console.log(event.output); // Type-safe step output +}); +``` + +**Reference:** See comprehensive pgflow client analysis in previous Task output (agent ID: ad920c5) + +### 1.2 Vercel AI SDK useChat Hook + +**Architecture (AI SDK 5+):** +- Transport-based modular design +- Decoupled state management (compatible with Zustand, Redux, etc.) +- Server-Sent Events (SSE) streaming protocol +- Default `/api/chat` endpoint with `DefaultChatTransport` + +**Core API:** +```typescript +const { messages, sendMessage, status } = useChat({ + transport: customTransport, // Custom ChatTransport implementation + onFinish: (message, options) => {}, + onError: (error) => {}, +}); +``` + +**ChatTransport Interface:** +```typescript +interface ChatTransport { + sendMessages(options: { + trigger: 'submit-message' | 'regenerate-message'; + chatId: string; + messageId: string | undefined; + messages: UI_MESSAGE[]; + abortSignal: AbortSignal | undefined; + } & ChatRequestOptions): Promise>; + + reconnectToStream(options: { + chatId: string; + } & ChatRequestOptions): Promise | null>; +} +``` + +**Stream Protocol Requirements:** +- Header: `x-vercel-ai-ui-message-stream: v1` +- Format: Server-Sent Events (SSE) +- Data chunks: `data: {"type":"text-delta","text":"..."}\n\n` +- Termination: `data: [DONE]\n\n` + +--- + +## 2. Integration Patterns + +### 2.1 Pattern A: Backend API Route Integration (Recommended) + +**Architecture:** +``` +React UI (useChat) + → Next.js API Route (/api/chat) + → Pgflow Client → Pgflow Database + → Transform events to SSE stream + ← SSE Response +← Update UI +``` + +**Implementation:** + +```typescript +// app/api/chat/route.ts +import { PgflowClient } from '@pgflow/client'; +import { createClient } from '@supabase/supabase-js'; + +export async function POST(req: Request) { + const { messages } = await req.json(); + const lastMessage = messages[messages.length - 1]; + + // Initialize pgflow client + const supabase = createClient( + process.env.SUPABASE_URL!, + process.env.SUPABASE_SERVICE_KEY! + ); + const pgflow = new PgflowClient(supabase); + + // Start flow with user input + const run = await pgflow.startFlow('chat_workflow', { + message: lastMessage.content, + conversation_history: messages.slice(0, -1), + }); + + // Create SSE stream + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + async start(controller) { + // Set up event listeners before waiting + run.on('*', (event) => { + if (event.event_type === 'run:completed') { + // Send final text + const chunk = `data: ${JSON.stringify({ + type: 'text-delta', + text: event.output.response + })}\n\n`; + controller.enqueue(encoder.encode(chunk)); + + // Send finish event + const finishChunk = `data: ${JSON.stringify({ + type: 'finish', + finishReason: 'stop' + })}\n\n`; + controller.enqueue(encoder.encode(finishChunk)); + + // Close stream + controller.enqueue(encoder.encode('data: [DONE]\n\n')); + controller.close(); + } + }); + + // Stream intermediate steps + if (run.hasStep('generate_response')) { + run.step('generate_response').on('completed', (event) => { + const chunk = `data: ${JSON.stringify({ + type: 'text-delta', + text: event.output + })}\n\n`; + controller.enqueue(encoder.encode(chunk)); + }); + } + + // Handle errors + run.on('failed', (event) => { + const errorChunk = `data: ${JSON.stringify({ + type: 'error', + error: event.error_message + })}\n\n`; + controller.enqueue(encoder.encode(errorChunk)); + controller.close(); + }); + + // Wait for completion + try { + await run.waitForStatus('completed', { timeoutMs: 60000 }); + } catch (error) { + await run.waitForStatus('failed'); + } + } + }); + + return new Response(stream, { + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'x-vercel-ai-ui-message-stream': 'v1', + }, + }); +} +``` + +**Frontend:** +```typescript +// app/page.tsx +'use client'; +import { useChat } from '@ai-sdk/react'; + +export default function Chat() { + const { messages, sendMessage, status } = useChat({ + api: '/api/chat', + }); + + return ( +
+ {messages.map(m => ( +
{m.content}
+ ))} + +
+ ); +} +``` + +**Advantages:** +- ✅ Simple to implement +- ✅ Works with existing pgflow infrastructure +- ✅ Full server-side control and security +- ✅ Compatible with Vercel deployment +- ✅ Can leverage Next.js middleware + +**Disadvantages:** +- ❌ Requires server component for pgflow client +- ❌ Additional network hop (client → API → pgflow) + +--- + +### 2.2 Pattern B: Custom PgflowChatTransport + +**Architecture:** +``` +React UI (useChat with custom transport) + → PgflowChatTransport + → Pgflow Client (browser) + → Supabase Realtime (WebSocket) + → Pgflow Database +``` + +**Implementation:** + +```typescript +// lib/pgflow-chat-transport.ts +import { PgflowClient } from '@pgflow/client/browser'; +import type { ChatTransport, UIMessageChunk } from '@ai-sdk/react'; + +export class PgflowChatTransport implements ChatTransport { + constructor( + private pgflowClient: PgflowClient, + private flowSlug: string, + ) {} + + async sendMessages(options): Promise> { + const { messages, chatId } = options; + const lastMessage = messages[messages.length - 1]; + + // Start or continue flow + const run = await this.pgflowClient.startFlow(this.flowSlug, { + message: lastMessage.content, + conversation_id: chatId, + history: messages.slice(0, -1), + }, chatId); // Use chatId as run_id for continuity + + return new ReadableStream({ + async start(controller) { + // Listen to all events + const unsubscribe = run.on('*', (event) => { + switch (event.event_type) { + case 'run:started': + controller.enqueue({ + type: 'start', + id: run.run_id, + } as UIMessageChunk); + break; + + case 'run:completed': + // Enqueue text content + controller.enqueue({ + type: 'text-delta', + text: event.output.response, + } as UIMessageChunk); + + // Finish message + controller.enqueue({ + type: 'finish', + finishReason: 'stop', + } as UIMessageChunk); + + unsubscribe(); + controller.close(); + break; + + case 'run:failed': + controller.enqueue({ + type: 'error', + error: new Error(event.error_message), + } as UIMessageChunk); + unsubscribe(); + controller.close(); + break; + } + }); + + // Stream intermediate steps + if (run.hasStep('reasoning')) { + run.step('reasoning').on('completed', (event) => { + controller.enqueue({ + type: 'data-reasoning', + data: event.output, + } as UIMessageChunk); + }); + } + + // Wait for completion + try { + await run.waitForStatus('completed', { + timeoutMs: 60000, + signal: options.abortSignal, + }); + } catch (error) { + if (error.name !== 'AbortError') { + controller.error(error); + } + } + }, + + cancel() { + // Cleanup if needed + run.dispose?.(); + }, + }); + } + + async reconnectToStream(options): Promise | null> { + const { chatId } = options; + + // Try to get existing run + const run = await this.pgflowClient.getRun(chatId); + if (!run) return null; + + // If already completed, return null + if (run.status === 'completed' || run.status === 'failed') { + return null; + } + + // Re-subscribe to events + return new ReadableStream({ + async start(controller) { + const unsubscribe = run.on('*', (event) => { + // Same event handling as sendMessages + // ... (similar logic) + }); + + await run.waitForStatus('completed', { timeoutMs: 60000 }); + }, + }); + } +} +``` + +**Usage:** +```typescript +// app/page.tsx +'use client'; +import { useChat } from '@ai-sdk/react'; +import { PgflowClient } from '@pgflow/client/browser'; +import { createClient } from '@supabase/supabase-js'; +import { PgflowChatTransport } from '@/lib/pgflow-chat-transport'; +import { useMemo } from 'react'; + +export default function Chat() { + const transport = useMemo(() => { + const supabase = createClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY! + ); + const pgflow = new PgflowClient(supabase); + return new PgflowChatTransport(pgflow, 'chat_workflow'); + }, []); + + const { messages, sendMessage } = useChat({ + transport, + }); + + return ( +
+ {messages.map(m => ( +
{m.content}
+ ))} + +
+ ); +} +``` + +**Advantages:** +- ✅ Direct browser → database communication (no API middleman) +- ✅ Leverages Supabase Realtime's WebSocket infrastructure +- ✅ Automatic reconnection via Supabase +- ✅ Works in offline-first scenarios +- ✅ Real-time progress from all workflow steps + +**Disadvantages:** +- ❌ Requires exposing Supabase credentials to browser +- ❌ Limited server-side control (auth, validation) +- ❌ RLS policies must be carefully configured +- ❌ Cannot use service role key (security risk) + +--- + +### 2.3 Pattern C: Hybrid with AI SDK Core + +**Architecture:** +``` +useChat + → API Route + → Pgflow flow for orchestration + → AI SDK Core for LLM calls (streamText) + → Combine streams + ← SSE Response +``` + +**Use Case:** Use pgflow for complex workflow orchestration (RAG, tool calling, multi-agent) while using AI SDK Core's native LLM streaming for the final response generation. + +**Implementation:** + +```typescript +// app/api/chat/route.ts +import { streamText } from 'ai'; +import { openai } from '@ai-sdk/openai'; +import { PgflowClient } from '@pgflow/client'; + +export async function POST(req: Request) { + const { messages } = await req.json(); + const supabase = createClient(...); + const pgflow = new PgflowClient(supabase); + + // Run pgflow for context retrieval/preparation + const prepRun = await pgflow.startFlow('chat_context_prep', { + message: messages[messages.length - 1].content, + }); + + await prepRun.waitForStatus('completed'); + const context = prepRun.output; + + // Use AI SDK Core for streaming LLM response + const result = streamText({ + model: openai('gpt-4'), + messages: [ + { role: 'system', content: context.systemPrompt }, + ...messages, + ], + }); + + return result.toDataStreamResponse(); +} +``` + +**Advantages:** +- ✅ Best of both worlds: pgflow orchestration + AI SDK streaming +- ✅ Leverages AI SDK's built-in provider integrations +- ✅ Native tool calling support +- ✅ Automatic token counting, retries, etc. + +**Disadvantages:** +- ❌ More complex architecture +- ❌ Two systems to maintain + +--- + +## 3. Key Integration Points + +### 3.1 Event Mapping + +**Pgflow Events → AI SDK Stream Protocol:** + +| Pgflow Event | AI SDK Chunk Type | Implementation | +|--------------|-------------------|----------------| +| `run:started` | `start` | Send start chunk with run ID | +| `run:completed` | `text-delta` + `finish` | Send final output as text, then finish | +| `run:failed` | `error` | Send error chunk | +| `step:completed` (reasoning) | `reasoning-delta` | Stream intermediate reasoning | +| `step:completed` (tool) | `tool-input-delta` | Stream tool execution progress | +| Custom step output | `data-[type]` | Custom data parts | + +**Example Mapping Function:** +```typescript +function mapPgflowEventToAISDK( + event: FlowRunEvent | StepEvent +): UIMessageChunk[] { + switch (event.event_type) { + case 'run:started': + return [{ type: 'start', id: event.run_id }]; + + case 'run:completed': + return [ + { type: 'text-delta', text: event.output.response }, + { type: 'finish', finishReason: 'stop' } + ]; + + case 'run:failed': + return [{ + type: 'error', + error: new Error(event.error_message) + }]; + + case 'step:completed': + // Map based on step type + if (event.step_slug === 'reasoning') { + return [{ + type: 'data-reasoning', + data: event.output + }]; + } + return []; + + default: + return []; + } +} +``` + +### 3.2 Type Safety Bridge + +**Challenge:** Maintain type safety across pgflow flows and AI SDK messages. + +**Solution:** Define shared types and conversion utilities. + +```typescript +// types/chat.ts +import type { UIMessage } from '@ai-sdk/react'; +import type { ExtractFlowInput, ExtractFlowOutput } from '@pgflow/dsl'; + +// Define your flow types +export const ChatFlow = new Flow<{ + message: string; + history: UIMessage[]; +}>({ slug: 'chat_workflow' }) + .step('parse', async (input) => ({ intent: string })) + .step('generate', async () => ({ response: string })) + .step('format', async () => ({ response: string })); + +export type ChatFlowInput = ExtractFlowInput; +export type ChatFlowOutput = ExtractFlowOutput; + +// Converters +export function uiMessagesToFlowInput( + messages: UIMessage[] +): ChatFlowInput { + return { + message: messages[messages.length - 1].content, + history: messages.slice(0, -1), + }; +} + +export function flowOutputToUIMessage( + output: ChatFlowOutput, + id: string +): UIMessage { + return { + id, + role: 'assistant', + content: output.response, + }; +} +``` + +### 3.3 Authentication & Authorization + +**For Backend Pattern (A):** +- Use Next.js middleware for auth +- Pass user context to pgflow flows +- Configure Supabase RLS policies + +```typescript +// middleware.ts +import { createMiddlewareClient } from '@supabase/auth-helpers-nextjs'; + +export async function middleware(req: NextRequest) { + const res = NextResponse.next(); + const supabase = createMiddlewareClient({ req, res }); + + const { data: { session } } = await supabase.auth.getSession(); + + if (!session && req.nextUrl.pathname.startsWith('/api/chat')) { + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); + } + + return res; +} +``` + +**For Custom Transport Pattern (B):** +- Configure Supabase RLS policies to restrict flow access by user ID +- Use anon key in browser with RLS enforcement +- Example RLS policy: + +```sql +-- Only allow users to start flows for themselves +CREATE POLICY "Users can start their own flows" +ON flow_runs +FOR INSERT +USING (auth.uid() = user_id); + +-- Only allow users to read their own flow runs +CREATE POLICY "Users can read their own runs" +ON flow_runs +FOR SELECT +USING (auth.uid() = user_id); +``` + +--- + +## 4. Advanced Use Cases + +### 4.1 Multi-Step Conversation with Progress + +Show intermediate workflow steps to users: + +```typescript +// Frontend +const { messages, sendMessage, data } = useChat({ + api: '/api/chat', + onData: (chunk) => { + if (chunk.type === 'data-search') { + console.log('Search results:', chunk.data); + // Update UI with search progress + } + if (chunk.type === 'data-reasoning') { + console.log('AI reasoning:', chunk.data); + // Show reasoning to user + } + }, +}); + +// Backend +run.step('search').on('completed', (event) => { + controller.enqueue(encoder.encode( + `data: ${JSON.stringify({ + type: 'data-search', + data: event.output + })}\n\n` + )); +}); + +run.step('reasoning').on('completed', (event) => { + controller.enqueue(encoder.encode( + `data: ${JSON.stringify({ + type: 'data-reasoning', + data: event.output + })}\n\n` + )); +}); +``` + +### 4.2 Tool Calling Integration + +Pgflow orchestrates tool execution, AI SDK displays it: + +```typescript +// Define flow with tool steps +const ChatFlow = new Flow({ slug: 'chat_with_tools' }) + .step('detect_intent', async (input) => ({ + needsTool: boolean, + tool: string + })) + .step('execute_tool', async () => ({ result: any })) + .step('generate_response', async () => ({ response: string })); + +// Backend: Map tool execution to AI SDK format +run.step('execute_tool').on('started', (event) => { + controller.enqueue(encoder.encode( + `data: ${JSON.stringify({ + type: 'tool-input-start', + toolCallId: event.step_slug, + toolName: 'search', + })}\n\n` + )); +}); + +run.step('execute_tool').on('completed', (event) => { + controller.enqueue(encoder.encode( + `data: ${JSON.stringify({ + type: 'tool-input-available', + toolCallId: event.step_slug, + result: event.output, + })}\n\n` + )); +}); +``` + +### 4.3 Multimodal Messages + +Pgflow processes images/files, streams results: + +```typescript +// Flow with image processing +const VisionFlow = new Flow<{ + message: string; + image_url: string; +}>({ slug: 'vision_chat' }) + .step('analyze_image', async (input) => ({ + description: string + })) + .step('generate_response', async () => ({ + response: string + })); + +// Frontend: Send multimodal message +sendMessage({ + content: 'What is in this image?', + experimental_attachments: [{ + url: imageUrl, + contentType: 'image/png', + }], +}); + +// Backend: Process in pgflow +const run = await pgflow.startFlow('vision_chat', { + message: lastMessage.content, + image_url: lastMessage.experimental_attachments[0].url, +}); +``` + +### 4.4 Conversation Memory & History + +Leverage pgflow's database persistence: + +```typescript +// Backend +export async function POST(req: Request) { + const { messages, id: chatId } = await req.json(); + + // Check if this is a continuing conversation + const existingRun = await pgflow.getRun(chatId); + + if (existingRun && existingRun.status === 'completed') { + // Start new run in same conversation + const run = await pgflow.startFlow('chat_workflow', { + message: lastMessage.content, + conversation_id: chatId, + previous_run_id: existingRun.run_id, + }); + // ... + } +} + +// Query conversation history via pgflow +const conversationHistory = await supabase + .from('flow_runs') + .select('input, output, created_at') + .eq('flow_slug', 'chat_workflow') + .eq('input->conversation_id', conversationId) + .order('created_at', { ascending: true }); +``` + +--- + +## 5. Implementation Recommendations + +### 5.1 Getting Started (Recommended Path) + +1. **Start with Pattern A (Backend Integration):** + - Simplest to implement and secure + - Create `/api/chat` route in Next.js + - Use pgflow client server-side + - Map pgflow events to SSE chunks + +2. **Define Your Chat Flow:** + ```typescript + // flows/chat-flow.ts + import { Flow } from '@pgflow/dsl'; + + export const ChatFlow = new Flow<{ + message: string; + user_id: string; + }>({ slug: 'chat_workflow' }) + .step('parse_intent', async (input) => { + // Intent classification + return { intent: 'question', entities: [] }; + }) + .step('retrieve_context', async ({ intent }) => { + // RAG / vector search + return { documents: [] }; + }) + .step('generate_response', async ({ documents, message }) => { + // LLM call + return { response: 'AI response here' }; + }); + ``` + +3. **Test Integration:** + - Start with simple message → response flow + - Add step-by-step progress indicators + - Implement error handling + +4. **Iterate:** + - Add tool calling + - Add multimodal support + - Optimize performance + +### 5.2 Production Considerations + +**Performance:** +- Use Supabase connection pooling for pgflow +- Configure `realtimeStabilizationDelayMs` for latency +- Set appropriate timeouts in `waitForStatus` +- Consider caching flow definitions + +**Error Handling:** +```typescript +// Retry logic for transient failures +let retries = 3; +while (retries > 0) { + try { + const run = await pgflow.startFlow(flowSlug, input); + await run.waitForStatus('completed', { timeoutMs: 60000 }); + break; + } catch (error) { + retries--; + if (retries === 0) throw error; + await new Promise(resolve => setTimeout(resolve, 1000)); + } +} +``` + +**Monitoring:** +```typescript +// Log all pgflow events +pgflow.onRunEvent((event) => { + analytics.track('pgflow_run_event', { + event_type: event.event_type, + run_id: event.run_id, + flow_slug: event.flow_slug, + }); +}); + +// Track latency +const startTime = Date.now(); +run.on('completed', () => { + const duration = Date.now() - startTime; + metrics.histogram('pgflow_run_duration_ms', duration); +}); +``` + +**Testing:** +```typescript +// Test pgflow flows independently +describe('ChatFlow', () => { + it('should generate response', async () => { + const run = await pgflow.startFlow('chat_workflow', { + message: 'Hello', + user_id: 'test-user', + }); + + await run.waitForStatus('completed', { timeoutMs: 10000 }); + + expect(run.status).toBe('completed'); + expect(run.output).toHaveProperty('response'); + }); +}); + +// Test API route integration +describe('/api/chat', () => { + it('should stream SSE response', async () => { + const response = await fetch('/api/chat', { + method: 'POST', + body: JSON.stringify({ + messages: [{ role: 'user', content: 'Hello' }], + }), + }); + + expect(response.headers.get('x-vercel-ai-ui-message-stream')).toBe('v1'); + + const reader = response.body.getReader(); + const chunks = []; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + chunks.push(new TextDecoder().decode(value)); + } + + expect(chunks.some(c => c.includes('text-delta'))).toBe(true); + expect(chunks.some(c => c.includes('[DONE]'))).toBe(true); + }); +}); +``` + +### 5.3 Migration Path + +**From Existing AI SDK App:** + +1. Identify complex workflows in your current API routes +2. Extract them into pgflow flows +3. Replace direct LLM calls with pgflow client calls +4. Keep simple requests using AI SDK Core + +**From Existing Pgflow App:** + +1. Add Vercel AI SDK to your Next.js app +2. Create API route that wraps pgflow client +3. Replace custom chat UI with `useChat` hook +4. Migrate event handling to AI SDK patterns + +--- + +## 6. Comparison with Other Approaches + +### 6.1 vs. Direct AI SDK Core Usage + +| Aspect | Pgflow + useChat | AI SDK Core Only | +|--------|------------------|------------------| +| Workflow Orchestration | ✅ Native multi-step DAGs | ❌ Manual orchestration | +| State Persistence | ✅ Database-backed | ❌ Ephemeral | +| Real-time Progress | ✅ Event-driven per step | ⚠️ Only final stream | +| Type Safety | ✅ Full generics | ✅ Full generics | +| Tool Calling | ⚠️ Manual implementation | ✅ Native support | +| Provider Support | ⚠️ Custom integration | ✅ 30+ providers | +| Complexity | ⚠️ Higher (two systems) | ✅ Lower (single SDK) | + +**Recommendation:** Use pgflow for complex, multi-step workflows. Use AI SDK Core for simple chat. + +### 6.2 vs. LangChain + useChat + +| Aspect | Pgflow | LangChain | +|--------|--------|-----------| +| Execution Model | Database-backed workflow engine | In-memory chains | +| Streaming | Native event-driven | Via callbacks | +| State Management | PostgreSQL persistence | Redis/memory | +| Observability | Built-in via database | Requires LangSmith | +| Scalability | Database-native concurrency | Process-based | + +### 6.3 vs. Custom WebSocket Implementation + +| Aspect | Pgflow + Supabase Realtime | Custom WebSocket | +|--------|----------------------------|------------------| +| Connection Management | ✅ Handled by Supabase | ❌ Manual implementation | +| Reconnection | ✅ Automatic | ❌ Manual logic | +| Scaling | ✅ Supabase infrastructure | ❌ Custom load balancing | +| Security | ✅ RLS policies | ❌ Custom auth | + +--- + +## 7. Next Steps + +### Proof of Concept + +1. Create a simple chat flow in pgflow: + ```bash + cd /home/user/pgflow + # Define flow in examples/ or test in your app + ``` + +2. Implement basic API route: + ```typescript + // app/api/chat/route.ts + // Use Pattern A implementation from section 2.1 + ``` + +3. Test with `useChat`: + ```typescript + // app/page.tsx + // Use basic useChat example from section 2.1 + ``` + +### Enhancements + +- [ ] Create `@pgflow/ai-sdk` integration package +- [ ] Add helper utilities for event mapping +- [ ] Implement `PgflowChatTransport` class +- [ ] Add example flows for common patterns (RAG, agents, etc.) +- [ ] Create Next.js template with pgflow + AI SDK +- [ ] Add monitoring/observability helpers + +### Documentation + +- [ ] Add pgflow examples in `/examples/ai-chat` +- [ ] Document best practices for chat workflows +- [ ] Create migration guide for AI SDK users +- [ ] Add troubleshooting guide + +--- + +## 8. Conclusion + +**Pgflow and Vercel AI SDK are highly complementary:** + +- **Pgflow** excels at orchestrating complex, multi-step AI workflows with database-backed state management +- **Vercel AI SDK** provides excellent frontend primitives and streaming UX + +**Best Integration:** Use pgflow as the backend workflow engine with AI SDK's `useChat` hook for the frontend, connecting them via either: +1. A Next.js API route that transforms pgflow events to SSE (recommended for most cases) +2. A custom `PgflowChatTransport` for direct browser-to-database communication (advanced use cases) + +**This integration enables:** +- Type-safe, multi-step AI workflows +- Real-time streaming of intermediate progress +- Database-backed conversation history +- Scalable, production-ready architecture +- Best-in-class developer experience + +The complementary strengths of both tools make them an excellent foundation for building sophisticated AI chat applications. + +--- + +## References + +### Pgflow Resources +- Pgflow Client Documentation: `/home/user/pgflow/pkgs/client/` +- Flow DSL: `/home/user/pgflow/pkgs/dsl/` +- Examples: `/home/user/pgflow/examples/` + +### Vercel AI SDK Resources +- [AI SDK Documentation](https://ai-sdk.dev) +- [useChat Reference](https://ai-sdk.dev/docs/reference/ai-sdk-ui/use-chat) +- [Stream Protocol](https://ai-sdk.dev/docs/ai-sdk-ui/stream-protocol) +- [Custom Transports](https://ai-sdk.dev/docs/ai-sdk-ui/transport) +- [GitHub Repository](https://github.com/vercel/ai) +- [AI SDK 5 Announcement](https://vercel.com/blog/ai-sdk-5) +- [AI SDK 6 Announcement](https://vercel.com/blog/ai-sdk-6) + +### Community Examples +- [WebSocket Transport Discussion](https://github.com/vercel/ai/discussions/5607) +- [Custom Provider Implementation](https://ai-sdk.dev/providers/community-providers/custom-providers) +- [WorkflowChatTransport](https://useworkflow.dev/docs/api-reference/workflow-ai/workflow-chat-transport) (similar concept) diff --git a/PRAGMATIC_HYBRID_ARCHITECTURE.md b/PRAGMATIC_HYBRID_ARCHITECTURE.md new file mode 100644 index 000000000..e5062c468 --- /dev/null +++ b/PRAGMATIC_HYBRID_ARCHITECTURE.md @@ -0,0 +1,440 @@ +# The Pragmatic Architecture: Hybrid Approach + +After critical analysis, here's what actually makes sense: + +## Problems with "Frontend Transport via Realtime" Approach + +### ❌ Problem 1: Realtime is Not Streaming +- Supabase Realtime = pub/sub messaging (discrete events) +- Not designed for high-frequency token delivery +- 3-5x latency overhead per token +- No backpressure, no flow control +- Calling it "streaming" is misleading + +### ❌ Problem 2: Edge Runtime Timeout is Universal +- Affects ALL edge functions (not just pgflow) +- 25s Vercel Edge limit is too short for LLM streaming +- ANY proxy that streams LLM responses has this issue +- Not unique to our approach + +### ❌ Problem 3: Over-Engineering +- Chunk storage in database for every token +- Complex recovery logic +- High Realtime message costs +- Solving a problem the wrong way + +--- + +## The Right Architecture: Hybrid Approach + +**Use the right tool for each job:** + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Frontend (React + useChat) │ +│ └─ Uses standard fetch to /api/chat │ +└─────────────────┬───────────────────────────────────────────┘ + │ + ↓ HTTP POST + │ +┌─────────────────┴───────────────────────────────────────────┐ +│ Next.js API Route (Node.js Runtime, not Edge!) │ +│ ├─ Timeout: 300s (Vercel), 10min (self-hosted) │ +│ ├─ Authenticates user │ +│ ├─ Starts pgflow flow in database │ +│ ├─ Subscribes to pgflow events via Supabase Realtime │ +│ ├─ Streams LLM response via SSE (traditional streaming) │ +│ └─ Maps pgflow events → SSE data chunks │ +└─────────────────┬───────────────────────────────────────────┘ + │ + ├─ SSE Stream ─→ Frontend (fast, low latency) + │ + └─ Supabase Realtime ←→ Pgflow Database + (for step events, not token streaming) +``` + +--- + +## Implementation + +### Backend: Node.js API Route (NOT Edge) + +```typescript +// app/api/chat/route.ts +export const runtime = 'nodejs'; // ← 300s timeout (not 25s!) +export const dynamic = 'force-dynamic'; + +import { PgflowClient } from '@pgflow/client'; +import { createClient } from '@supabase/supabase-js'; +import { OpenAI } from 'openai'; + +export async function POST(req: Request) { + // 1. Authenticate + const session = await getServerSession(req); + if (!session) { + return new Response('Unauthorized', { status: 401 }); + } + + const { messages, conversationId } = await req.json(); + + // 2. Initialize pgflow client + const supabase = createClient( + process.env.SUPABASE_URL!, + process.env.SUPABASE_SERVICE_KEY! + ); + const pgflow = new PgflowClient(supabase); + + // 3. Start multi-step flow + const run = await pgflow.startFlow('chat_pipeline', { + message: messages[messages.length - 1].content, + userId: session.user.id, + conversationId, + }); + + // 4. Create SSE stream + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + async start(controller) { + // Helper to send SSE event + const sendEvent = (data: any) => { + const chunk = `data: ${JSON.stringify(data)}\n\n`; + controller.enqueue(encoder.encode(chunk)); + }; + + try { + // Subscribe to pgflow events (non-streaming steps) + run.on('*', (event) => { + if (event.event_type === 'run:started') { + sendEvent({ type: 'start', id: run.run_id }); + } + if (event.event_type === 'run:failed') { + sendEvent({ type: 'error', error: event.error_message }); + controller.close(); + } + }); + + // Listen to step completions (for progress) + run.step('search').on('completed', (event) => { + sendEvent({ + type: 'data-search-complete', + data: { + count: event.output.results.length, + sources: event.output.sources, + }, + }); + }); + + run.step('analyze').on('completed', (event) => { + sendEvent({ + type: 'data-analysis', + data: event.output, + }); + }); + + // Wait for all non-streaming steps to complete + await run.step('search').waitForStatus('completed'); + await run.step('analyze').waitForStatus('completed'); + + // 5. Get context from completed steps + const searchResults = run.step('search').output?.results || []; + const analysis = run.step('analyze').output; + + // 6. NOW stream LLM response via traditional SSE + // This happens in the API route (long timeout, reliable) + const openai = new OpenAI(); + const llmStream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [ + { + role: 'system', + content: `Context: ${JSON.stringify(searchResults)}\nAnalysis: ${JSON.stringify(analysis)}`, + }, + ...messages, + ], + stream: true, + }); + + // Stream tokens via SSE (fast, low latency) + for await (const chunk of llmStream) { + const delta = chunk.choices[0]?.delta?.content || ''; + if (delta) { + sendEvent({ + type: 'text-delta', + text: delta, + }); + } + } + + // 7. Mark run as completed in database + // (or let pgflow executor handle this if running in worker) + + sendEvent({ + type: 'finish', + finishReason: 'stop', + }); + + controller.close(); + } catch (error) { + sendEvent({ + type: 'error', + error: error.message, + }); + controller.close(); + } + }, + }); + + return new Response(stream, { + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'x-vercel-ai-data-stream': 'v1', + }, + }); +} +``` + +### Frontend: Standard useChat + +```typescript +'use client'; + +import { useChat } from '@ai-sdk/react'; + +export default function Chat() { + const { messages, sendMessage, status, data } = useChat({ + api: '/api/chat', // ← Standard fetch, no custom transport! + onData: (chunk) => { + // Custom data from pgflow steps + if (chunk.type === 'data-search-complete') { + console.log('Search done:', chunk.data); + } + if (chunk.type === 'data-analysis') { + console.log('Analysis:', chunk.data); + } + }, + }); + + return ( +
+ {/* Show intermediate progress */} + {status === 'streaming' && data && ( +
+ {data['search-complete'] && ( +
✓ Found {data['search-complete'].count} results
+ )} + {data.analysis && ( +
✓ Analysis: {data.analysis.summary}
+ )} +
+ )} + + {/* Messages */} + {messages.map(m => ( +
{m.content}
+ ))} + + {/* Input */} + +
+ ); +} +``` + +--- + +## What Each Component Does + +### Pgflow (Database Orchestration) +- **Executes multi-step pipeline** (search → analyze → prepare context) +- **Stores step outputs** in database (durable, queryable) +- **Broadcasts step events** via Supabase Realtime (progress updates) +- **NOT used for token streaming** (that's SSE's job) + +### API Route (Node.js Runtime) +- **Long timeout** (300s on Vercel, unlimited self-hosted) +- **Authenticates requests** (user session, rate limits) +- **Subscribes to pgflow events** (listens for step completions) +- **Streams LLM tokens via SSE** (traditional, fast, reliable) +- **Maps pgflow progress → SSE data chunks** (for frontend) + +### Supabase Realtime +- **Used for coarse-grained events** (step completed, run status) +- **NOT for fine-grained token streaming** (that's wasteful) +- Frequency: ~5-10 events per conversation (not 300 tokens!) + +### SSE Streaming +- **Used for token streaming** (fast, low latency) +- Direct connection API route → frontend +- No intermediate hops, no extra overhead +- Standard streaming protocol + +--- + +## Advantages of This Approach + +### ✅ **Fast Token Streaming** +- SSE latency: 28-48ms per token (same as direct OpenAI) +- No Realtime overhead +- No JSON wrapping per token +- Standard streaming protocol + +### ✅ **Long Timeouts** +- Node.js runtime: 300s on Vercel +- Self-hosted: Unlimited +- No edge timeout issues + +### ✅ **Pgflow Benefits Preserved** +- Multi-step orchestration still works +- Step outputs stored in database +- Can retry individual steps +- Full observability (SQL queries) + +### ✅ **Simpler Architecture** +- No chunk storage needed +- No complex recovery logic +- Standard patterns (SSE, fetch) +- Less code to maintain + +### ✅ **Lower Costs** +- Realtime: ~10 messages/conversation (not 300!) +- Cost: <$1/month for 1000 daily chats +- 90% cheaper than token-per-message approach + +### ✅ **Better UX** +- Intermediate progress via data chunks +- Fast token streaming for final response +- Best of both worlds + +--- + +## What Gets Streamed Where + +| Data Type | Transport | Frequency | Latency | +|-----------|-----------|-----------|---------| +| Step completions | Realtime | ~5/conversation | 100-200ms (acceptable) | +| Progress updates | Realtime | ~10/conversation | 100-200ms (acceptable) | +| LLM tokens | SSE | ~300/conversation | 28-48ms (fast!) | +| Final outputs | Database | 1/step | N/A (durable) | + +--- + +## Edge Cases Handled + +### 1. API Route Timeout (300s) +**Very rare** (would need 300s LLM response) + +If it happens: +- Frontend detects timeout +- Queries database for completed steps +- Shows partial progress +- User can retry + +**Recovery:** +```typescript +if (timeout detected) { + const run = await fetch('/api/runs/' + conversationId); + const completedSteps = run.steps.filter(s => s.status === 'completed'); + + // Show what completed + displayProgress(completedSteps); + + // Offer retry + showRetryButton(); +} +``` + +### 2. Network Disconnection +**Standard SSE reconnection:** +```typescript +useChat({ + api: '/api/chat', + onError: (error) => { + if (error.message.includes('fetch')) { + // Network error, retry with exponential backoff + retryConnection(); + } + }, +}); +``` + +### 3. Flow Step Failure +**Pgflow handles this:** +- Step fails → marked as 'failed' in database +- API route receives 'step:failed' event +- Sends error to frontend via SSE +- User can retry individual step + +--- + +## When to Use What + +### Use This Hybrid Approach ✅ +- Multi-step AI pipelines (2+ steps before LLM) +- Need intermediate progress updates +- Want pgflow orchestration benefits +- Reliable token streaming required + +### Use Frontend Transport ⚠️ +- **Only if you NEED browser-to-database direct connection** +- Offline-first applications +- No backend API allowed (browser extensions, static sites) +- Accept 3-5x latency overhead + +### Use Simple Backend API ✅ +- Single-step chat (prompt → LLM → response) +- No multi-step orchestration needed +- Maximum simplicity required + +--- + +## Migration Path + +### Phase 1: Start Simple (No Pgflow) +```typescript +// Simple API route with SSE streaming +export async function POST(req: Request) { + const openai = new OpenAI(); + const stream = await openai.chat.completions.create({ stream: true }); + return new Response(stream); +} +``` + +### Phase 2: Add Pgflow When Complexity Grows +When you need: +- Multi-step workflows (search before generate) +- State persistence (recover from failures) +- Observability (query past conversations) + +Then add the hybrid approach. + +--- + +## Conclusion + +**You were right to question both assumptions:** + +1. ❌ **Supabase Realtime is not streaming** - It's pub/sub messaging + - **Solution:** Use SSE for token streaming, Realtime for coarse events + +2. ❌ **Edge timeout affects everyone** - Not pgflow-specific + - **Solution:** Use Node.js runtime (longer timeout), not Edge + +**The pragmatic architecture:** +- **Pgflow:** Multi-step orchestration (database) +- **Realtime:** Step completion events (coarse-grained) +- **SSE:** Token streaming (fast, low latency) +- **Node.js API Route:** Long timeout (300s+) + +This gives you: +- ✅ Fast token streaming (28-48ms) +- ✅ Multi-step orchestration (pgflow) +- ✅ Intermediate progress (Realtime events) +- ✅ Reliable delivery (long timeouts) +- ✅ Simple architecture (standard patterns) +- ✅ Low cost (<$1/month for 1000 chats) + +**Much better than trying to force Realtime to be a streaming protocol.** diff --git a/SPLIT_ARCHITECTURE.md b/SPLIT_ARCHITECTURE.md new file mode 100644 index 000000000..7b2e19849 --- /dev/null +++ b/SPLIT_ARCHITECTURE.md @@ -0,0 +1,679 @@ +# Split Architecture: Pgflow Preparation + Separate Streaming + +## The Elegant Solution + +Split the pipeline into two distinct phases: + +1. **Pgflow Flow**: Multi-step preparation (search, rank, analyze) - durable, database-backed +2. **Streaming Endpoint**: Simple LLM proxy - fast, standard SSE streaming + +**Key Insight:** Pgflow orchestrates **preparation**, not **streaming**. + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Frontend (React + useChat) │ +└────┬─────────────────────────────────────────────┬──────────┘ + │ │ + │ 1. Start preparation │ 3. Start streaming + │ │ + ↓ ↓ +┌────────────────────────┐ ┌─────────────────────┐ +│ POST /api/prepare │ │ POST /api/stream │ +│ (Node.js, 300s) │ │ (Edge, 25s is fine) │ +│ │ │ │ +│ Starts pgflow flow │ │ Reads context │ +│ Returns runId │ │ Proxies LLM │ +└────┬───────────────────┘ └──────┬──────────────┘ + │ │ + ↓ ↓ +┌────────────────────────────────────────────────────────────┐ +│ Supabase Database │ +│ │ +│ Pgflow Flow Execution: │ +│ ├─ Step 1: search (5s) → results stored │ +│ ├─ Step 2: rerank (3s) → rankings stored │ +│ ├─ Step 3: extract (4s) → chunks stored │ +│ └─ Status: completed │ +│ │ +│ 2. Frontend polls or listens via Realtime │ +│ When all steps complete → trigger streaming │ +└────────────────────────────────────────────────────────────┘ +``` + +--- + +## Implementation + +### Phase 1: Preparation Flow (Pgflow) + +#### Backend: Pgflow Flow Definition + +```typescript +// flows/chat-preparation.ts +import { Flow } from '@pgflow/dsl'; + +export const ChatPreparationFlow = new Flow<{ + message: string; + conversationId: string; + userId: string; +}>({ slug: 'chat_preparation' }) + + .step('search', async (input) => { + // Vector search, web search, etc. + const results = await vectorSearch(input.message); + + return { + results: results.map(r => ({ + content: r.content, + source: r.source, + score: r.score, + })), + count: results.length, + }; + }) + + .step('rerank', async (input) => { + // Rerank results by relevance + const reranked = await rerankResults( + input.message, + input.results + ); + + return { + topResults: reranked.slice(0, 5), + scores: reranked.map(r => r.score), + }; + }) + + .step('extract', async (input) => { + // Extract relevant chunks from top results + const chunks = await extractRelevantChunks( + input.message, + input.topResults + ); + + return { + chunks: chunks, + sources: chunks.map(c => c.source), + }; + }) + + .step('prepare_context', async (input) => { + // Format context for LLM + const context = formatContext(input.chunks); + + return { + systemPrompt: `You are a helpful assistant. Use the following context to answer the user's question:\n\n${context}`, + sources: input.sources, + ready: true, // Signal that we're ready to stream + }; + }); +``` + +#### API Route: Start Preparation + +```typescript +// app/api/prepare/route.ts +export const runtime = 'nodejs'; // Long timeout for multi-step flow + +import { PgflowClient } from '@pgflow/client'; +import { createClient } from '@supabase/supabase-js'; + +export async function POST(req: Request) { + // Authenticate + const session = await getServerSession(req); + if (!session) { + return Response.json({ error: 'Unauthorized' }, { status: 401 }); + } + + const { message, conversationId } = await req.json(); + + // Initialize pgflow + const supabase = createClient( + process.env.SUPABASE_URL!, + process.env.SUPABASE_SERVICE_KEY! + ); + const pgflow = new PgflowClient(supabase); + + // Start preparation flow + const run = await pgflow.startFlow('chat_preparation', { + message, + conversationId: conversationId || crypto.randomUUID(), + userId: session.user.id, + }); + + // Return immediately with runId + // Frontend will poll/listen for completion + return Response.json({ + runId: run.run_id, + status: 'preparing', + }); +} +``` + +### Phase 2: Streaming Endpoint (Simple Proxy) + +```typescript +// app/api/stream/route.ts +export const runtime = 'edge'; // Fast, can use Edge runtime! + +import { OpenAI } from 'openai'; +import { createClient } from '@supabase/supabase-js'; + +export async function POST(req: Request) { + // Authenticate + const authHeader = req.headers.get('Authorization'); + if (!authHeader) { + return Response.json({ error: 'Unauthorized' }, { status: 401 }); + } + + const { runId, message, history } = await req.json(); + + // Get preparation context from database + const supabase = createClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.SUPABASE_ANON_KEY!, + { + global: { + headers: { Authorization: authHeader }, + }, + } + ); + + // Fetch pgflow run output + const { data: run, error } = await supabase + .from('flow_runs') + .select('status, output') + .eq('run_id', runId) + .single(); + + if (error || !run) { + return Response.json({ error: 'Run not found' }, { status: 404 }); + } + + if (run.status !== 'completed') { + return Response.json( + { error: 'Preparation not complete' }, + { status: 400 } + ); + } + + // Get context from final step output + const context = run.output; + + // Stream LLM response (standard, fast, no pgflow involved) + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + }); + + const stream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [ + { role: 'system', content: context.systemPrompt }, + ...(history || []), + { role: 'user', content: message }, + ], + stream: true, + }); + + // Return standard SSE stream (fast!) + const encoder = new TextEncoder(); + const readable = new ReadableStream({ + async start(controller) { + try { + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta?.content || ''; + if (delta) { + controller.enqueue( + encoder.encode(`data: ${JSON.stringify({ type: 'text-delta', text: delta })}\n\n`) + ); + } + } + + // Send sources as custom data + controller.enqueue( + encoder.encode(`data: ${JSON.stringify({ type: 'data-sources', data: context.sources })}\n\n`) + ); + + // Finish + controller.enqueue( + encoder.encode(`data: ${JSON.stringify({ type: 'finish', finishReason: 'stop' })}\n\n`) + ); + + controller.close(); + } catch (error) { + controller.enqueue( + encoder.encode(`data: ${JSON.stringify({ type: 'error', error: error.message })}\n\n`) + ); + controller.close(); + } + }, + }); + + return new Response(readable, { + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + }, + }); +} +``` + +--- + +## Frontend: Two-Phase Flow + +### Option A: Custom Hook (Recommended) + +```typescript +// hooks/use-pgflow-chat.ts +import { useState, useCallback } from 'react'; +import { useChat } from '@ai-sdk/react'; +import { createBrowserClient } from '@supabase/supabase-js'; +import { PgflowClient } from '@pgflow/client/browser'; + +export function usePgflowChat() { + const [preparationStatus, setPreparationStatus] = useState< + 'idle' | 'preparing' | 'ready' | 'error' + >('idle'); + const [runId, setRunId] = useState(null); + const [preparationData, setPreparationData] = useState(null); + + const supabase = createBrowserClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY! + ); + + const pgflow = new PgflowClient(supabase); + + // Phase 1: Start preparation + const startPreparation = useCallback(async (message: string) => { + setPreparationStatus('preparing'); + + try { + // Start pgflow preparation flow + const run = await pgflow.startFlow('chat_preparation', { + message, + conversationId: crypto.randomUUID(), + userId: 'current-user', // Get from auth + }); + + setRunId(run.run_id); + + // Listen to step completions + run.step('search').on('completed', (event) => { + setPreparationData((prev) => ({ + ...prev, + search: event.output, + })); + }); + + run.step('rerank').on('completed', (event) => { + setPreparationData((prev) => ({ + ...prev, + rerank: event.output, + })); + }); + + run.step('extract').on('completed', (event) => { + setPreparationData((prev) => ({ + ...prev, + extract: event.output, + })); + }); + + // Wait for completion + await run.waitForStatus('completed'); + + setPreparationStatus('ready'); + return run.run_id; + } catch (error) { + console.error('Preparation failed:', error); + setPreparationStatus('error'); + throw error; + } + }, [pgflow]); + + // Phase 2: Stream response + const { messages, sendMessage, status, ...rest } = useChat({ + api: '/api/stream', + body: { runId }, // Pass runId to streaming endpoint + }); + + // Combined send: prepare then stream + const sendMessageWithPreparation = useCallback( + async (content: string) => { + // Phase 1: Prepare + const preparedRunId = await startPreparation(content); + + // Phase 2: Stream (once preparation completes) + sendMessage({ content, data: { runId: preparedRunId } }); + }, + [startPreparation, sendMessage] + ); + + return { + messages, + sendMessage: sendMessageWithPreparation, + status: preparationStatus === 'preparing' ? 'preparing' : status, + preparationStatus, + preparationData, + ...rest, + }; +} +``` + +### Usage in Component + +```typescript +// app/chat/page.tsx +'use client'; + +import { usePgflowChat } from '@/hooks/use-pgflow-chat'; + +export default function ChatPage() { + const { + messages, + sendMessage, + status, + preparationStatus, + preparationData, + } = usePgflowChat(); + + return ( +
+ {/* Messages */} +
+ {messages.map((message) => ( +
+ {message.content} +
+ ))} +
+ + {/* Preparation Progress */} + {preparationStatus === 'preparing' && ( +
+
+ 🔍 Preparing your answer... +
+ + {preparationData?.search && ( +
+ ✓ Searched {preparationData.search.count} sources +
+ )} + + {preparationData?.rerank && ( +
+ ✓ Ranked top {preparationData.rerank.topResults.length} results +
+ )} + + {preparationData?.extract && ( +
+ ✓ Extracted relevant information +
+ )} +
+ )} + + {/* Streaming Progress */} + {status === 'streaming' && ( +
+
+
💬
+ Writing response... +
+
+ )} + + {/* Input */} +
{ + e.preventDefault(); + const formData = new FormData(e.currentTarget); + const message = formData.get('message') as string; + if (message.trim()) { + sendMessage(message); + e.currentTarget.reset(); + } + }} + className="flex gap-2" + > + + +
+
+ ); +} +``` + +### Option B: Sequential API Calls (Simpler) + +```typescript +// hooks/use-pgflow-chat-simple.ts +import { useChat } from '@ai-sdk/react'; +import { useState } from 'react'; + +export function usePgflowChatSimple() { + const [preparing, setPreparing] = useState(false); + + const { messages, sendMessage, status, ...rest } = useChat({ + api: '/api/stream', + + // Intercept before sending + onBeforeSend: async ({ content }) => { + setPreparing(true); + + // Phase 1: Call preparation endpoint + const res = await fetch('/api/prepare', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ message: content }), + }); + + const { runId } = await res.json(); + + // Poll until ready (or use Realtime) + let ready = false; + while (!ready) { + await new Promise((resolve) => setTimeout(resolve, 500)); + + const statusRes = await fetch(`/api/runs/${runId}`); + const { status } = await statusRes.json(); + + if (status === 'completed') { + ready = true; + } + } + + setPreparing(false); + + // Return modified body with runId + return { runId, message: content }; + }, + }); + + return { + messages, + sendMessage, + status: preparing ? 'preparing' : status, + preparing, + ...rest, + }; +} +``` + +--- + +## Advantages of Split Architecture + +### ✅ **Clean Separation of Concerns** +- Pgflow: Orchestration (what it's good at) +- Streaming endpoint: Proxying (what it's good at) +- No mixing of concerns + +### ✅ **Fast Streaming** +- Streaming endpoint can use Edge runtime (25s is fine for LLM proxy) +- Standard SSE streaming (28-48ms per token) +- No pgflow overhead during streaming + +### ✅ **Durable Preparation** +- All prep steps stored in database +- Can query past preparations +- Full observability (SQL) +- Can retry individual steps + +### ✅ **Flexible Frontend** +- Choose how to wait for preparation (polling vs Realtime) +- Show granular progress during preparation +- Standard useChat for streaming phase + +### ✅ **Simple Streaming Endpoint** +- Just reads context from DB +- Proxies to LLM +- No complex logic +- Easy to test + +### ✅ **Reusable Context** +- Preparation result is in database +- Can regenerate response without re-preparing +- Can use same context for multiple prompts +- Cache preparation results + +### ✅ **Better Error Handling** +- Preparation failures are separate from streaming failures +- Can retry preparation independently +- Streaming endpoint is simple, less likely to fail + +--- + +## Comparison with Other Approaches + +| Approach | Prep Timeout | Stream Timeout | Stream Latency | Complexity | +|----------|--------------|----------------|----------------|------------| +| **Split (This)** | 300s (Node.js) | 25s (Edge, fine) | 28-48ms (SSE) | Low | +| Hybrid | 300s (Node.js) | 300s (same route) | 28-48ms (SSE) | Medium | +| Frontend Transport | 120s (Supabase) | 120s (Supabase) | 91-231ms (Realtime) | High | + +--- + +## Advanced: Caching & Regeneration + +### Cache Preparation Results + +```typescript +// User asks: "What is quantum computing?" +// 1. Prepare (search, rank, extract) → cache result + +// Later, user asks: "Explain it simpler" +// 2. Reuse cached preparation +// 3. Only stream new response with different system prompt + +const { data: cachedRun } = await supabase + .from('flow_runs') + .select('output') + .eq('input->message', 'What is quantum computing?') + .eq('flow_slug', 'chat_preparation') + .eq('status', 'completed') + .order('created_at', { ascending: false }) + .limit(1) + .single(); + +if (cachedRun && isFresh(cachedRun.created_at)) { + // Skip preparation, use cached context + streamResponse(cachedRun.output); +} else { + // Run fresh preparation + await startPreparation(message); +} +``` + +### Regenerate Without Re-preparing + +```typescript +// User clicks "Regenerate response" +// Don't re-run search, just re-stream with same context + +const regenerate = async () => { + sendMessage({ + content: lastMessage, + data: { + runId: existingRunId, // Reuse preparation + regenerate: true, + }, + }); +}; +``` + +--- + +## Implementation Checklist + +### Phase 1: Basic Split +- [ ] Create chat_preparation flow in pgflow +- [ ] Implement /api/prepare endpoint (Node.js) +- [ ] Implement /api/stream endpoint (Edge) +- [ ] Frontend: Sequential calls (prepare → wait → stream) +- [ ] Test with polling for completion + +### Phase 2: Real-time Updates +- [ ] Add Realtime listener in frontend +- [ ] Show step-by-step progress +- [ ] Remove polling, use event-driven + +### Phase 3: Optimization +- [ ] Add preparation result caching +- [ ] Implement regeneration without re-prep +- [ ] Add error recovery +- [ ] Performance monitoring + +### Phase 4: Advanced Features +- [ ] Parallel preparation for multiple queries +- [ ] Incremental context updates +- [ ] Adaptive preparation (skip steps if cached) +- [ ] Cost optimization (cache expensive operations) + +--- + +## Conclusion + +**This split architecture is the best approach for your use case:** + +✅ **Pgflow does orchestration** (multi-step preparation, durable) +✅ **Streaming endpoint is simple** (just proxy, fast) +✅ **Clean separation** (easy to reason about) +✅ **Fast streaming** (28-48ms, can use Edge) +✅ **Flexible** (cache, regenerate, reuse context) +✅ **Reliable** (prep stored in DB, streaming is standard) + +**Perfect for Perplexity-style apps where:** +- Multi-step preparation is complex and slow +- Streaming response is fast and simple +- Want to show step-by-step progress +- May regenerate response without re-searching + +**This is the architecture I recommend implementing.** diff --git a/UNIFIED_STREAM_ARCHITECTURE.md b/UNIFIED_STREAM_ARCHITECTURE.md new file mode 100644 index 000000000..7ed688a36 --- /dev/null +++ b/UNIFIED_STREAM_ARCHITECTURE.md @@ -0,0 +1,792 @@ +# Unified Stream Architecture: Pgflow Events + LLM Streaming + +## The Vision + +**Single continuous stream that includes:** +1. Pgflow step events (preparation progress) → displayed as AI SDK data chunks +2. Final LLM streaming (actual response) → displayed as message content + +**User experience:** +``` +User: "What is quantum computing?" + +AI: [Searching knowledge base...] + [Found 15 results] + [Ranking by relevance...] + [Top 5 results selected] + [Extracting key information...] + [Generating response...] + + Quantum computing is a type of computation that harnesses... + [streams token by token] +``` + +**All in one useChat conversation!** + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Frontend │ +│ useChat({ api: '/api/chat' }) │ +│ ↓ receives SSE stream with: │ +│ - data-search (pgflow event) │ +│ - data-rank (pgflow event) │ +│ - text-delta (LLM tokens) │ +│ - finish │ +└─────────────────┬───────────────────────────────────────────┘ + │ + ↓ Single SSE connection + │ +┌─────────────────┴───────────────────────────────────────────┐ +│ POST /api/chat (Node.js, 300s timeout) │ +│ │ +│ 1. Start pgflow flow │ +│ 2. Subscribe to step events (Realtime) │ +│ 3. Convert pgflow events → SSE data chunks │ +│ 4. When preparation complete, start LLM streaming │ +│ 5. Stream LLM tokens → SSE text-delta chunks │ +│ 6. Close stream when done │ +└─────────────────┬───────────────────────────────────────────┘ + │ + ↕ +┌─────────────────┴───────────────────────────────────────────┐ +│ Supabase Realtime + Database │ +│ │ +│ Pgflow Flow Execution: │ +│ ├─ search → broadcasts 'step:completed' │ +│ ├─ rank → broadcasts 'step:completed' │ +│ └─ extract → broadcasts 'step:completed' │ +└──────────────────────────────────────────────────────────────┘ +``` + +--- + +## Implementation + +### Backend: Single Unified Endpoint + +```typescript +// app/api/chat/route.ts +export const runtime = 'nodejs'; // 300s timeout +export const dynamic = 'force-dynamic'; + +import { PgflowClient } from '@pgflow/client'; +import { createClient } from '@supabase/supabase-js'; +import { OpenAI } from 'openai'; + +export async function POST(req: Request) { + const { messages } = await req.json(); + const userMessage = messages[messages.length - 1].content; + + // Auth check + const session = await getServerSession(req); + if (!session) { + return new Response('Unauthorized', { status: 401 }); + } + + // Initialize + const supabase = createClient( + process.env.SUPABASE_URL!, + process.env.SUPABASE_SERVICE_KEY! + ); + const pgflow = new PgflowClient(supabase); + + // Create SSE stream + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + async start(controller) { + const sendChunk = (data: any) => { + const chunk = `data: ${JSON.stringify(data)}\n\n`; + controller.enqueue(encoder.encode(chunk)); + }; + + try { + // Send start event + sendChunk({ type: 'start', id: crypto.randomUUID() }); + + // ========================================== + // PHASE 1: Pgflow Preparation with Progress + // ========================================== + + // Start pgflow flow + const run = await pgflow.startFlow('chat_preparation', { + message: userMessage, + userId: session.user.id, + }); + + // Track preparation data for LLM context + let preparationContext: any = {}; + + // Listen to step events and convert to AI SDK chunks + const stepEventListener = pgflow.onStepEvent((event) => { + if (event.run_id !== run.run_id) return; + + // Step started - send progress + if (event.event_type === 'step:started') { + const messages = { + search: 'Searching knowledge base...', + rerank: 'Ranking results by relevance...', + extract: 'Extracting key information...', + prepare_context: 'Preparing response context...', + }; + + sendChunk({ + type: 'data-progress', + data: { + step: event.step_slug, + status: 'started', + message: messages[event.step_slug] || `Processing ${event.step_slug}...`, + }, + }); + } + + // Step completed - send results + if (event.event_type === 'step:completed') { + // Store for context + preparationContext[event.step_slug] = event.output; + + // Send to frontend + sendChunk({ + type: `data-${event.step_slug}`, + data: event.output, + }); + + // Send completion message + const completionMessages = { + search: `Found ${event.output?.count || 0} results`, + rerank: `Selected top ${event.output?.topResults?.length || 0} results`, + extract: `Extracted ${event.output?.chunks?.length || 0} relevant passages`, + prepare_context: 'Context ready', + }; + + sendChunk({ + type: 'data-progress', + data: { + step: event.step_slug, + status: 'completed', + message: completionMessages[event.step_slug] || 'Completed', + }, + }); + } + + // Step failed + if (event.event_type === 'step:failed') { + sendChunk({ + type: 'error', + error: `Failed at ${event.step_slug}: ${event.error_message}`, + }); + } + }); + + // Wait for preparation to complete + try { + await run.waitForStatus('completed', { + timeoutMs: 60000, // 60 seconds for preparation + }); + } catch (error) { + stepEventListener(); // Unsubscribe + sendChunk({ + type: 'error', + error: 'Preparation timed out', + }); + controller.close(); + return; + } + + // Unsubscribe from step events + stepEventListener(); + + // Get final context + const finalContext = run.output || preparationContext.prepare_context; + + if (!finalContext) { + sendChunk({ + type: 'error', + error: 'Preparation completed but no context available', + }); + controller.close(); + return; + } + + // ========================================== + // PHASE 2: LLM Streaming + // ========================================== + + sendChunk({ + type: 'data-progress', + data: { + step: 'generate', + status: 'started', + message: 'Generating response...', + }, + }); + + // Initialize OpenAI + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + }); + + // Stream LLM response + const llmStream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [ + { + role: 'system', + content: finalContext.systemPrompt || `Context: ${JSON.stringify(finalContext)}`, + }, + ...messages, + ], + stream: true, + }); + + // Stream tokens + for await (const chunk of llmStream) { + const delta = chunk.choices[0]?.delta?.content || ''; + if (delta) { + sendChunk({ + type: 'text-delta', + text: delta, + }); + } + } + + // Send sources as final data + if (finalContext.sources) { + sendChunk({ + type: 'data-sources', + data: { + sources: finalContext.sources, + }, + }); + } + + // Send finish + sendChunk({ + type: 'finish', + finishReason: 'stop', + }); + + controller.close(); + } catch (error) { + console.error('Stream error:', error); + sendChunk({ + type: 'error', + error: error.message, + }); + controller.close(); + } + }, + }); + + return new Response(stream, { + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + }, + }); +} +``` + +--- + +## Frontend: Standard useChat with Progress Display + +```typescript +// app/chat/page.tsx +'use client'; + +import { useChat } from '@ai-sdk/react'; +import { useState } from 'react'; + +export default function ChatPage() { + const [progressSteps, setProgressSteps] = useState< + Array<{ step: string; status: string; message: string }> + >([]); + const [searchData, setSearchData] = useState(null); + const [rankData, setRankData] = useState(null); + const [sources, setSources] = useState([]); + + const { messages, sendMessage, status } = useChat({ + api: '/api/chat', + + // Handle custom data chunks + onData: (chunk) => { + // Progress updates + if (chunk.type === 'data-progress') { + setProgressSteps((prev) => { + const existing = prev.findIndex((p) => p.step === chunk.data.step); + if (existing >= 0) { + // Update existing step + const updated = [...prev]; + updated[existing] = chunk.data; + return updated; + } else { + // Add new step + return [...prev, chunk.data]; + } + }); + } + + // Search results + if (chunk.type === 'data-search') { + setSearchData(chunk.data); + } + + // Rank results + if (chunk.type === 'data-rerank') { + setRankData(chunk.data); + } + + // Sources + if (chunk.type === 'data-sources') { + setSources(chunk.data.sources); + } + }, + + // Clear progress on new message + onBeforeSend: () => { + setProgressSteps([]); + setSearchData(null); + setRankData(null); + setSources([]); + }, + }); + + return ( +
+ {/* Messages */} +
+ {messages.map((message) => ( +
+
+ {message.role === 'user' ? '🧑 You' : '🤖 AI'} +
+
{message.content}
+ + {/* Show sources after message */} + {message.role === 'assistant' && sources.length > 0 && ( +
+
+ Sources: +
+
+ {sources.map((source, idx) => ( +
+ [{idx + 1}] {source} +
+ ))} +
+
+ )} +
+ ))} + + {/* Progress indicator during streaming */} + {status === 'streaming' && progressSteps.length > 0 && ( +
+
+ 🔄 Processing... +
+ + {/* Step progress */} +
+ {progressSteps.map((step, idx) => ( +
+ {step.status === 'started' && ( +
+ )} + {step.status === 'completed' && ( +
+ )} + {step.message} +
+ ))} +
+ + {/* Detailed data (expandable) */} + {searchData && ( +
+ + Search results ({searchData.count}) + +
+ {JSON.stringify(searchData, null, 2)} +
+
+ )} + + {rankData && ( +
+ + Ranking details + +
+ {JSON.stringify(rankData, null, 2)} +
+
+ )} +
+ )} +
+ + {/* Input */} +
{ + e.preventDefault(); + const formData = new FormData(e.currentTarget); + const message = formData.get('message') as string; + if (message.trim()) { + sendMessage({ content: message }); + e.currentTarget.reset(); + } + }} + className="flex gap-2" + > + + +
+
+ ); +} +``` + +--- + +## Event Mapping: Pgflow → AI SDK + +### Pgflow Events + +```typescript +// Step started +{ + event_type: 'step:started', + run_id: 'abc123', + step_slug: 'search', +} + +// Step completed +{ + event_type: 'step:completed', + run_id: 'abc123', + step_slug: 'search', + output: { count: 10, results: [...] } +} +``` + +### AI SDK Chunks (sent via SSE) + +```typescript +// Progress indicator (custom data) +data: {"type":"data-progress","data":{"step":"search","status":"started","message":"Searching knowledge base..."}} + +// Step result (custom data) +data: {"type":"data-search","data":{"count":10,"results":[...]}} + +// Completion indicator (custom data) +data: {"type":"data-progress","data":{"step":"search","status":"completed","message":"Found 10 results"}} + +// LLM token (text) +data: {"type":"text-delta","text":"Quantum"} + +// Finish +data: {"type":"finish","finishReason":"stop"} +``` + +--- + +## Alternative: Custom Wrapper Hook + +For even better DX, create a wrapper around `useChat`: + +```typescript +// hooks/use-pgflow-chat.ts +import { useChat } from '@ai-sdk/react'; +import { useState } from 'react'; + +interface ProgressStep { + step: string; + status: 'started' | 'completed' | 'failed'; + message: string; + data?: any; +} + +export function usePgflowChat(options?: { + api?: string; + onStepComplete?: (step: string, data: any) => void; +}) { + const [progressSteps, setProgressSteps] = useState([]); + const [preparationData, setPreparationData] = useState>({}); + + const chat = useChat({ + api: options?.api || '/api/chat', + + onData: (chunk) => { + // Handle progress updates + if (chunk.type === 'data-progress') { + setProgressSteps((prev) => { + const existing = prev.findIndex((p) => p.step === chunk.data.step); + if (existing >= 0) { + const updated = [...prev]; + updated[existing] = { ...updated[existing], ...chunk.data }; + return updated; + } + return [...prev, chunk.data]; + }); + } + + // Handle step data + if (chunk.type.startsWith('data-') && chunk.type !== 'data-progress') { + const stepName = chunk.type.replace('data-', ''); + setPreparationData((prev) => ({ + ...prev, + [stepName]: chunk.data, + })); + + options?.onStepComplete?.(stepName, chunk.data); + } + }, + + onBeforeSend: () => { + // Clear progress on new message + setProgressSteps([]); + setPreparationData({}); + }, + }); + + return { + ...chat, + progressSteps, + preparationData, + isPrepairing: progressSteps.some((s) => s.status === 'started'), + }; +} +``` + +### Usage + +```typescript +const { + messages, + sendMessage, + status, + progressSteps, + preparationData, + isPrepairing, +} = usePgflowChat({ + onStepComplete: (step, data) => { + console.log(`Step ${step} completed:`, data); + }, +}); + +return ( +
+ {/* Show progress */} + {isPrepairing && ( + + )} + + {/* Show messages */} + {messages.map(m => {m.content})} + + {/* Show search results while preparing */} + {preparationData.search && ( + + )} +
+); +``` + +--- + +## Visual Timeline + +What the user sees during a conversation: + +``` +[User sends: "What is quantum computing?"] + +Time: 0s + 🔄 Processing... + ⏳ Searching knowledge base... + +Time: 2s + 🔄 Processing... + ✓ Found 15 results + ⏳ Ranking results by relevance... + +Time: 5s + 🔄 Processing... + ✓ Found 15 results + ✓ Selected top 5 results + ⏳ Extracting key information... + +Time: 8s + 🔄 Processing... + ✓ Found 15 results + ✓ Selected top 5 results + ✓ Extracted 12 relevant passages + ⏳ Generating response... + +Time: 9s + 🤖 AI: Quantum| + +Time: 9.05s + 🤖 AI: Quantum computing| + +Time: 9.1s + 🤖 AI: Quantum computing is| + +[continues streaming tokens...] + +Time: 20s + 🤖 AI: Quantum computing is a type of computation that... + [complete response] + + Sources: + [1] Wikipedia: Quantum Computing + [2] MIT OpenCourseware: Quantum Information +``` + +--- + +## Advantages + +### ✅ **Unified User Experience** +- Everything in one chat conversation +- Progress updates feel native +- Smooth transition from prep → streaming + +### ✅ **Standard AI SDK Patterns** +- Uses regular `useChat` hook +- Custom data chunks for progress +- No custom transport needed + +### ✅ **Real-time Progress** +- User sees each step as it happens +- Can show detailed data (search results, etc.) +- Better UX than "loading..." + +### ✅ **Pgflow Benefits** +- Multi-step orchestration +- Durable storage +- Can query past preparations +- Retry individual steps + +### ✅ **Fast Final Streaming** +- LLM tokens stream via SSE (28-48ms) +- No Realtime overhead for tokens +- Only Realtime used for coarse step events + +--- + +## Comparison + +| Approach | Progress Updates | Final Streaming | Complexity | UX | +|----------|-----------------|-----------------|------------|-----| +| **Unified Stream** | ✅ Real-time in chat | ✅ Fast (SSE) | Medium | ⭐⭐⭐⭐⭐ | +| Split Architecture | ⚠️ Separate UI | ✅ Fast (SSE) | Low | ⭐⭐⭐ | +| Frontend Transport | ✅ Real-time in chat | ❌ Slow (Realtime) | High | ⭐⭐⭐ | + +--- + +## Implementation Checklist + +- [ ] Create pgflow preparation flow with meaningful step names +- [ ] Implement /api/chat endpoint with dual-phase streaming +- [ ] Subscribe to pgflow step events in API route +- [ ] Map step events → AI SDK data chunks +- [ ] Frontend: Use `useChat` with `onData` handler +- [ ] Display progress steps in chat UI +- [ ] Show detailed step data (collapsible) +- [ ] Test with realistic multi-step flow +- [ ] Add error handling for step failures +- [ ] Polish progress indicators (animations, icons) + +--- + +## Potential Issues & Solutions + +### Issue 1: API Route Timeout During Preparation + +**Problem:** Preparation takes 60s, but connection times out. + +**Solution:** +```typescript +// Keep connection alive with periodic heartbeats +const heartbeat = setInterval(() => { + sendChunk({ type: 'heartbeat' }); +}, 10000); // Every 10 seconds + +// Clear on completion +clearInterval(heartbeat); +``` + +### Issue 2: Realtime Event Delay + +**Problem:** Step events arrive with 100-200ms delay. + +**Solution:** This is acceptable for coarse-grained progress. Users won't notice 200ms delay when each step takes 3-5 seconds. + +### Issue 3: Failed Steps + +**Problem:** Step fails, but user sees partial progress. + +**Solution:** +```typescript +if (event.event_type === 'step:failed') { + sendChunk({ + type: 'data-progress', + data: { + step: event.step_slug, + status: 'failed', + message: `Failed: ${event.error_message}`, + }, + }); + + sendChunk({ + type: 'error', + error: `Preparation failed at ${event.step_slug}`, + }); + + controller.close(); +} +``` + +--- + +## Conclusion + +**This unified stream architecture gives you:** + +✅ **Everything in one conversation** (prep progress + final response) +✅ **Real-time updates** (pgflow events → AI SDK data chunks) +✅ **Fast LLM streaming** (standard SSE, 28-48ms) +✅ **Standard patterns** (useChat, no custom transport) +✅ **Great UX** (users see step-by-step progress) +✅ **Pgflow benefits** (orchestration, durability, observability) + +**Perfect for Perplexity-style multi-step AI apps.** + +The key insight: **Pgflow events become AI SDK data chunks**, displayed alongside the final streaming response in one unified chat experience. diff --git a/examples/vercel-ai-sdk-integration/COMPLETE_EXAMPLE.md b/examples/vercel-ai-sdk-integration/COMPLETE_EXAMPLE.md new file mode 100644 index 000000000..9a6979878 --- /dev/null +++ b/examples/vercel-ai-sdk-integration/COMPLETE_EXAMPLE.md @@ -0,0 +1,641 @@ +# Complete Example: Production-Ready Streaming Chat with Recovery + +This example demonstrates the full implementation of pgflow + Vercel AI SDK integration with: +- Real-time streaming via Supabase Realtime +- Chunk persistence in database +- Automatic recovery from edge function timeouts +- Reconnection support +- Graceful handling of partial responses + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Frontend (React + useChat) │ +│ └─ PgflowChatTransport │ +│ ├─ Realtime: Fast streaming │ +│ └─ Database: Recovery + reconnection │ +└─────────────────┬───────────────────────────────────────────────┘ + │ + ↕ WebSocket + SQL + │ +┌─────────────────┴───────────────────────────────────────────────┐ +│ Supabase │ +│ ├─ Realtime: Broadcasts streaming chunks │ +│ ├─ Database: Stores chunks + checkpoints │ +│ └─ Edge Functions: Executes pgflow flows │ +└─────────────────┬───────────────────────────────────────────────┘ + │ + ↓ + Pgflow Flows + ├─ Step 1: Search (fast, no streaming) + ├─ Step 2: Analyze (fast, emit reasoning) + └─ Step 3: Generate (slow, stream tokens + persist) +``` + +--- + +## 1. Database Setup + +### Run Migrations + +```sql +-- streaming_chunks table +CREATE TABLE IF NOT EXISTS streaming_chunks ( + id BIGSERIAL PRIMARY KEY, + run_id UUID NOT NULL REFERENCES flow_runs(run_id) ON DELETE CASCADE, + step_slug TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + chunk_type TEXT NOT NULL CHECK (chunk_type IN ('text', 'data', 'reasoning', 'tool-input')), + chunk_data JSONB NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW(), + + UNIQUE(run_id, step_slug, chunk_index) +); + +CREATE INDEX idx_streaming_chunks_run_step + ON streaming_chunks(run_id, step_slug, chunk_index); + +-- Enable RLS +ALTER TABLE streaming_chunks ENABLE ROW LEVEL SECURITY; + +CREATE POLICY "users_read_own_chunks" + ON streaming_chunks FOR SELECT + USING ( + EXISTS ( + SELECT 1 FROM flow_runs + WHERE flow_runs.run_id = streaming_chunks.run_id + AND flow_runs.user_id = auth.uid() + ) + ); + +CREATE POLICY "service_role_insert_chunks" + ON streaming_chunks FOR INSERT + WITH CHECK (true); + +-- Add checkpoint support to flow_steps +ALTER TABLE flow_steps + ADD COLUMN IF NOT EXISTS checkpoint_data JSONB, + ADD COLUMN IF NOT EXISTS checkpoint_at TIMESTAMPTZ; +``` + +--- + +## 2. Backend Flow (Supabase Edge Function) + +### `supabase/functions/execute-streaming-chat/index.ts` + +```typescript +import { serve } from 'https://deno.land/std@0.168.0/http/server.ts'; +import { createClient } from 'https://esm.sh/@supabase/supabase-js@2'; +import { OpenAI } from 'https://esm.sh/openai@4'; +import { StreamingChatFlow } from './flows/streaming-chat.ts'; +import { createStreamingContext } from './helpers/streaming-context.ts'; + +serve(async (req) => { + try { + // Initialize Supabase client + const supabase = createClient( + Deno.env.get('SUPABASE_URL')!, + Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')! + ); + + // Get user from auth header + const authHeader = req.headers.get('Authorization'); + const token = authHeader?.replace('Bearer ', ''); + + const { data: { user }, error: authError } = await supabase.auth.getUser(token); + + if (authError || !user) { + return new Response('Unauthorized', { status: 401 }); + } + + // Parse request + const { message, conversationId, history } = await req.json(); + + // Execute flow (pgflow executor would do this automatically) + // For this example, we'll execute manually + const runId = conversationId || crypto.randomUUID(); + + // Execute the flow + const result = await executeStreamingChatFlow( + supabase, + runId, + { + message, + conversationId, + userId: user.id, + history, + } + ); + + return new Response(JSON.stringify(result), { + headers: { 'Content-Type': 'application/json' }, + }); + } catch (error) { + console.error('Error:', error); + return new Response(JSON.stringify({ error: error.message }), { + status: 500, + headers: { 'Content-Type': 'application/json' }, + }); + } +}); + +async function executeStreamingChatFlow( + supabase: any, + runId: string, + input: any +) { + // Step 1: Search + const searchCtx = createStreamingContext(supabase, runId, 'search', { + enabled: false, // Don't persist search chunks + }); + + await searchCtx.emitReasoning('Searching knowledge base...'); + + const searchResults = await performSearch(input.message); + + await searchCtx.emitData('search_results', { + count: searchResults.length, + sources: searchResults.map(r => r.source), + }); + + // Step 2: Analyze + const analyzeCtx = createStreamingContext(supabase, runId, 'analyze', { + enabled: false, + }); + + await analyzeCtx.emitReasoning('Analyzing results...'); + + const analysis = await analyzeResults(searchResults); + + await analyzeCtx.emitData('analysis', analysis); + + // Step 3: Generate (with persistence) + const generateCtx = createStreamingContext(supabase, runId, 'generate', { + enabled: true, + batchSize: 10, + flushIntervalMs: 1000, + }); + + await generateCtx.emitReasoning('Generating response...'); + + const openai = new OpenAI({ + apiKey: Deno.env.get('OPENAI_API_KEY'), + }); + + try { + const stream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [ + { + role: 'system', + content: `Context: ${JSON.stringify(searchResults)}\nAnalysis: ${JSON.stringify(analysis)}`, + }, + ...(input.history || []), + { + role: 'user', + content: input.message, + }, + ], + stream: true, + max_tokens: 1000, + }); + + let fullResponse = ''; + let tokenCount = 0; + + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta?.content || ''; + if (delta) { + await generateCtx.emitText(delta); + fullResponse += delta; + tokenCount++; + + // Checkpoint every 50 tokens + if (tokenCount % 50 === 0) { + await generateCtx.checkpoint({ + partial_response: fullResponse, + tokens_generated: tokenCount, + }); + } + } + } + + // Finalize (flush remaining chunks) + await generateCtx.finalize(); + + return { + response: fullResponse, + metadata: { + sources: searchResults.length, + tokens: tokenCount, + }, + }; + } catch (error) { + // On error, finalize and save partial progress + const partial = generateCtx.getStreamedText(); + await generateCtx.checkpoint({ + partial_response: partial, + error: error.message, + }); + await generateCtx.finalize(); + + throw error; + } +} + +async function performSearch(query: string) { + // Mock implementation + return [ + { source: 'doc1', content: 'Result 1' }, + { source: 'doc2', content: 'Result 2' }, + ]; +} + +async function analyzeResults(results: any[]) { + // Mock implementation + return { relevance: 'high', confidence: 0.9 }; +} +``` + +--- + +## 3. Frontend Component + +### `app/chat/page.tsx` + +```typescript +'use client'; + +import { useChat } from '@ai-sdk/react'; +import { createBrowserClient } from '@supabase/ssr'; +import { PgflowChatTransport } from '@/lib/pgflow-chat-transport-with-recovery'; +import { useMemo, useState } from 'react'; + +export default function ChatPage() { + const [customData, setCustomData] = useState>({}); + const [partialResponse, setPartialResponse] = useState(null); + + // Initialize Supabase client + const supabase = useMemo( + () => + createBrowserClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY! + ), + [] + ); + + // Create transport with recovery options + const transport = useMemo( + () => + new PgflowChatTransport(supabase, 'streaming_chat', { + streamTimeoutMs: 30000, + enableChunkRecovery: true, + showPartialOnTimeout: true, + onStreamTimeout: (runId, partial) => { + console.warn(`Stream timeout for ${runId}, partial: ${partial}`); + setPartialResponse(partial); + }, + }), + [supabase] + ); + + // Use the chat hook + const { + messages, + sendMessage, + status, + error, + reload, + } = useChat({ + transport, + onData: (chunk) => { + // Handle custom streaming data + if (chunk.type.startsWith('data-')) { + const key = chunk.type.replace('data-', ''); + setCustomData((prev) => ({ ...prev, [key]: chunk.data })); + } + + // Handle partial response on timeout + if (chunk.type === 'data-partial-response') { + setPartialResponse(chunk.data.text); + } + }, + onError: (error) => { + console.error('Chat error:', error); + }, + }); + + return ( +
+ {/* Header */} +
+

AI Chat with Recovery

+

+ Powered by pgflow + Vercel AI SDK +

+
+ + {/* Messages */} +
+ {messages.map((message) => ( +
+
+ {message.role === 'user' ? '🧑 You' : '🤖 AI'} +
+
{message.content}
+ + {/* Show metadata if available */} + {message.metadata && ( +
+ {message.metadata.incomplete && ( + + ⚠️ Incomplete response + + )} +
+ )} +
+ ))} + + {/* Partial response warning */} + {partialResponse && ( +
+
+ ⚠️ Response Interrupted +
+
+ The AI's response was interrupted. Here's what was generated: +
+
+ {partialResponse} +
+ +
+ )} +
+ + {/* Progress Indicators */} + {status === 'streaming' && ( +
+
+
+ Processing... +
+ + {/* Show intermediate progress */} + {customData.reasoning && ( +
+ 💭 {customData.reasoning} +
+ )} + + {customData.search_results && ( +
+ 🔍 Found {customData.search_results.count} results +
+ )} + + {customData.analysis && ( +
+ 📊 Analysis: {customData.analysis.relevance} relevance +
+ )} + + {customData['step-complete'] && ( +
+ ✓ Step completed: {customData['step-complete'].step} +
+ )} +
+ )} + + {/* Error Display */} + {error && ( +
+
Error
+
{error.message}
+ +
+ )} + + {/* Input Form */} +
{ + e.preventDefault(); + const formData = new FormData(e.currentTarget); + const message = formData.get('message') as string; + + if (message.trim()) { + sendMessage({ content: message }); + e.currentTarget.reset(); + setCustomData({}); + setPartialResponse(null); + } + }} + className="flex gap-2" + > + + +
+
+ ); +} +``` + +--- + +## 4. Testing Recovery Scenarios + +### Test 1: Normal Flow +``` +1. User sends message +2. Flow executes normally +3. All tokens stream via Realtime +4. Final output saved to database +5. Frontend displays complete response +✅ SUCCESS +``` + +### Test 2: Edge Function Timeout +``` +1. User sends message +2. Flow starts streaming +3. Edge function times out at 50% (simulate by killing function) +4. Frontend detects timeout (no events for 30s) +5. Frontend queries database for stored chunks +6. Displays partial response with retry option +7. User clicks retry +8. Flow restarts and completes +✅ RECOVERED +``` + +### Test 3: Network Disconnection +``` +1. User sends message +2. Flow streams 30% of tokens +3. User's network disconnects (close browser, lose WiFi) +4. Flow continues executing on server +5. All chunks stored in database +6. User reconnects +7. Frontend calls reconnectToStream() +8. Replays all 30% from database +9. Subscribes to live stream for remaining 70% +10. Displays complete response +✅ RECOVERED +``` + +### Test 4: Checkpoint Recovery +``` +1. User sends long generation request (1000 tokens) +2. Flow checkpoints every 50 tokens +3. Edge function crashes at token 300 +4. Frontend queries checkpoint_data +5. Finds checkpoint at token 250 +6. Displays partial response +7. User retries +8. New flow continues from context (not exact token) +✅ PARTIAL RECOVERY +``` + +--- + +## 5. Performance Monitoring + +```typescript +// Add to frontend +const [metrics, setMetrics] = useState({ + firstTokenMs: null, + totalTokens: 0, + averageLatencyMs: null, +}); + +useChat({ + transport, + onData: (chunk) => { + if (chunk.type === 'text-delta') { + setMetrics(prev => ({ + firstTokenMs: prev.firstTokenMs ?? Date.now() - startTime, + totalTokens: prev.totalTokens + 1, + averageLatencyMs: (Date.now() - startTime) / (prev.totalTokens + 1), + })); + } + }, +}); + +// Display metrics +
+ First token: {metrics.firstTokenMs}ms | + Total: {metrics.totalTokens} tokens | + Avg latency: {metrics.averageLatencyMs?.toFixed(0)}ms/token +
+``` + +--- + +## 6. Cost Analysis + +### Database Costs (Supabase) +``` +Assumptions: +- 1000 chat responses per day +- 300 tokens per response (avg) +- 10 tokens per chunk (batched) + +Storage: +- 1000 responses × 30 chunks × 100 bytes = 3 MB/day +- 90 MB/month +- Cost: ~$0 (well within free tier) + +Realtime: +- 1000 responses × 30 chunks = 30,000 messages/day +- 900,000 messages/month +- Cost: ~$9/month ($10 per 1M messages) + +Total: ~$9/month for 1000 daily conversations +``` + +### Comparison with API Route +``` +API Route: +- Compute: Vercel Function invocations +- Bandwidth: SSE streaming +- Cost: ~$5-10/month for same volume + +Verdict: Similar cost, frontend transport adds $9 Realtime cost +``` + +--- + +## 7. Production Checklist + +- [ ] Enable RLS policies on streaming_chunks table +- [ ] Set up automatic chunk cleanup (24h retention) +- [ ] Configure appropriate timeouts per platform +- [ ] Add monitoring for timeout rates +- [ ] Set up alerts for high failure rates +- [ ] Test reconnection on mobile networks +- [ ] Optimize chunk batch size (balance latency vs writes) +- [ ] Add rate limiting per user +- [ ] Implement retry backoff strategy +- [ ] Add telemetry/analytics +- [ ] Load test with concurrent users +- [ ] Document edge runtime limits + +--- + +## Conclusion + +This implementation provides **production-grade reliability** for streaming AI chat with: + +✅ **Real-time streaming** via Supabase Realtime +✅ **Chunk persistence** for recovery +✅ **Timeout detection** and graceful handling +✅ **Reconnection support** with chunk replay +✅ **Checkpoint system** for long operations +✅ **Partial response** display with retry +✅ **Type-safe** end-to-end +✅ **Cost-effective** (~$9/month for 1000 daily chats) + +**This is viable for production use**, especially for multi-step AI pipelines like Perplexity, where: +- Intermediate progress is valuable +- Total latency is 30+ seconds +- Reliability > speed +- State persistence is critical diff --git a/examples/vercel-ai-sdk-integration/README.md b/examples/vercel-ai-sdk-integration/README.md new file mode 100644 index 000000000..d9d2cfb0e --- /dev/null +++ b/examples/vercel-ai-sdk-integration/README.md @@ -0,0 +1,148 @@ +# Vercel AI SDK Integration with Pgflow + +This example demonstrates how to use **pgflow client in the frontend** as a custom `ChatTransport` for Vercel AI SDK's `useChat` hook, with streaming support. + +## Architecture + +``` +Frontend (React) + ├─ useChat({ transport: PgflowChatTransport }) + └─ PgflowClient (browser) → Supabase Realtime + ↕ +Backend (Supabase Edge Functions) + └─ Pgflow Flows with streaming context + └─ ctx.stream.emitText(chunk) +``` + +## Features + +- ✅ **Frontend pgflow client** - Direct Supabase connection, no API routes +- ✅ **Streaming responses** - LLM tokens streamed in real-time via Supabase Realtime +- ✅ **Type-safe** - Full TypeScript support end-to-end +- ✅ **Progress indicators** - Show intermediate workflow steps +- ✅ **Auto-reconnection** - Supabase handles network failures +- ✅ **RLS security** - Database policies enforce access control + +## Quick Start + +### 1. Install Dependencies + +```bash +npm install @pgflow/client @pgflow/dsl @ai-sdk/react ai @supabase/supabase-js +``` + +### 2. Create Streaming Flow + +See `./backend/flows/streaming-chat.ts` for a complete example. + +```typescript +import { Flow } from '@pgflow/dsl'; + +export const ChatFlow = new Flow<{ message: string }>({ slug: 'streaming_chat' }) + .step('generate', async (input, ctx) => { + // Stream LLM response + for await (const chunk of llm.stream(input.message)) { + await ctx.stream.emitText(chunk); + } + return { response: fullText }; + }); +``` + +### 3. Set Up Frontend Transport + +See `./frontend/lib/pgflow-chat-transport.ts` for implementation. + +```typescript +import { PgflowChatTransport } from './lib/pgflow-chat-transport'; + +const transport = new PgflowChatTransport(supabase, 'streaming_chat'); + +const { messages, sendMessage } = useChat({ transport }); +``` + +## File Structure + +``` +examples/vercel-ai-sdk-integration/ +├── README.md (this file) +├── backend/ +│ ├── flows/ +│ │ └── streaming-chat.ts # Example flow with streaming +│ ├── helpers/ +│ │ ├── streaming-context.ts # Streaming context implementation +│ │ └── openai-adapter.ts # OpenAI streaming helper +│ └── types/ +│ └── streaming-events.ts # TypeScript types for events +├── frontend/ +│ ├── lib/ +│ │ └── pgflow-chat-transport.ts # Custom ChatTransport implementation +│ ├── components/ +│ │ └── chat.tsx # Example chat UI +│ └── hooks/ +│ └── use-pgflow-chat.ts # React hook wrapper +└── supabase/ + └── migrations/ + └── 001_streaming_support.sql # Database setup +``` + +## How It Works + +### 1. Backend: Streaming Context + +Flows receive a `StreamingContext` that allows emitting incremental data: + +```typescript +.step('generate', async (input, ctx) => { + // ctx.stream is the streaming context + await ctx.stream.emitText('Hello'); + await ctx.stream.emitText(' world'); + + return { response: 'Hello world' }; +}) +``` + +Events are broadcast via Supabase Realtime to connected clients. + +### 2. Frontend: PgflowChatTransport + +The custom transport: +- Starts pgflow flows when messages are sent +- Subscribes to streaming events via Supabase Realtime +- Converts pgflow events → AI SDK `UIMessageChunk`s +- Handles reconnection automatically + +```typescript +const transport = new PgflowChatTransport(supabase, 'streaming_chat'); + +// useChat automatically uses the transport +const { messages, sendMessage, status } = useChat({ transport }); +``` + +### 3. Event Flow + +``` +1. User sends message + → useChat calls transport.sendMessages() + +2. Transport starts pgflow flow + → pgflow.startFlow('streaming_chat', { message }) + +3. Backend flow executes + → ctx.stream.emitText('chunk') + → Broadcasts to Supabase channel + +4. Frontend receives event + → BroadcastStepStreamEvent + → Mapped to UIMessageChunk { type: 'text-delta', text: 'chunk' } + → useChat updates UI +``` + +## Next Steps + +See the implementation files in `./backend` and `./frontend` for complete examples. + +For production use, you'll need to: +1. Implement the streaming context in pgflow executor +2. Add RLS policies for security +3. Deploy flows to Supabase Edge Functions +4. Configure Supabase Realtime channels diff --git a/examples/vercel-ai-sdk-integration/backend/flows/streaming-chat.example.ts b/examples/vercel-ai-sdk-integration/backend/flows/streaming-chat.example.ts new file mode 100644 index 000000000..fdb8451fd --- /dev/null +++ b/examples/vercel-ai-sdk-integration/backend/flows/streaming-chat.example.ts @@ -0,0 +1,211 @@ +/** + * Example: Streaming Chat Flow for Vercel AI SDK Integration + * + * This flow demonstrates how to use pgflow with streaming context + * to provide real-time updates to a frontend using Vercel AI SDK's useChat hook. + * + * Key Features: + * - Streams LLM responses token-by-token + * - Emits intermediate progress (reasoning, search results) + * - Type-safe input/output + * - Works with PgflowChatTransport on frontend + */ + +import { Flow } from '@pgflow/dsl'; +import type { StreamingContext } from '../helpers/streaming-context'; +import { streamOpenAIResponse } from '../helpers/streaming-context'; + +// Mock implementations (replace with real implementations) +import { OpenAI } from 'openai'; + +/** + * Flow input type + */ +interface ChatInput { + message: string; + conversationId: string; + userId: string; + history?: Array<{ role: string; content: string }>; +} + +/** + * Streaming chat flow + */ +export const StreamingChatFlow = new Flow({ + slug: 'streaming_chat', +}) + + /** + * Step 1: Classify user intent + * Shows reasoning to user + */ + .step('classify_intent', async (input, ctx: { stream: StreamingContext }) => { + // Show progress + await ctx.stream.emitReasoning('Analyzing your message...'); + + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + }); + + const response = await openai.chat.completions.create({ + model: 'gpt-3.5-turbo', + messages: [{ + role: 'user', + content: `Classify the intent of this message in one word: "${input.message}"` + }], + temperature: 0, + }); + + const intent = response.choices[0]?.message?.content || 'general'; + + // Emit the classification result + await ctx.stream.emitData('intent', { + classification: intent, + confidence: 0.9, + }); + + return { intent }; + }) + + /** + * Step 2: Retrieve relevant context + * Shows search progress to user + */ + .step('retrieve_context', async (input, ctx: { stream: StreamingContext }) => { + await ctx.stream.emitReasoning('Searching knowledge base...'); + + // Simulate vector search (replace with real implementation) + await new Promise(resolve => setTimeout(resolve, 500)); + + const mockResults = [ + { id: '1', content: 'Document 1 content...', score: 0.95 }, + { id: '2', content: 'Document 2 content...', score: 0.87 }, + { id: '3', content: 'Document 3 content...', score: 0.76 }, + ]; + + // Emit search results as they come in + await ctx.stream.emitData('search_results', { + count: mockResults.length, + sources: mockResults.map(r => ({ id: r.id, score: r.score })), + }); + + return { + context: mockResults.map(r => r.content).join('\n\n'), + sources: mockResults, + }; + }) + + /** + * Step 3: Generate streaming response + * Streams LLM tokens in real-time + */ + .step('generate_response', async (input, ctx: { stream: StreamingContext }) => { + await ctx.stream.emitReasoning('Generating response...'); + + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + }); + + // Create streaming completion + const stream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [ + { + role: 'system', + content: `You are a helpful assistant. Use the following context to answer the user's question: + +Context: +${input.context} + +Answer concisely and cite sources when possible.` + }, + ...(input.history || []), + { + role: 'user', + content: input.message, + } + ], + stream: true, + temperature: 0.7, + }); + + // Stream response through pgflow streaming context + // Each token is emitted to frontend immediately + const fullResponse = await streamOpenAIResponse(stream, ctx.stream); + + return { + response: fullResponse, + model: 'gpt-4', + tokensUsed: fullResponse.split(' ').length * 1.3, // Rough estimate + }; + }) + + /** + * Step 4: Format and finalize + * Add metadata, citations, etc. + */ + .step('finalize', async (input, ctx: { stream: StreamingContext }) => { + // Add citations if sources were used + const citations = input.sources.slice(0, 3).map((s: any, i: number) => + `[${i + 1}] Source ${s.id}` + ); + + await ctx.stream.emitData('citations', citations); + + return { + response: input.response, + citations, + metadata: { + intent: input.intent, + sourcesUsed: input.sources.length, + model: input.model, + tokensUsed: input.tokensUsed, + }, + }; + }); + +/** + * Alternative: Simpler streaming chat without context retrieval + */ +export const SimpleStreamingChatFlow = new Flow<{ + message: string; + conversationId: string; +}>({ slug: 'simple_streaming_chat' }) + + .step('generate', async (input, ctx: { stream: StreamingContext }) => { + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + }); + + const stream = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [{ role: 'user', content: input.message }], + stream: true, + }); + + const response = await streamOpenAIResponse(stream, ctx.stream); + + return { response }; + }); + +/** + * Usage in Supabase Edge Function: + * + * ```typescript + * import { serve } from 'https://deno.land/std@0.168.0/http/server.ts'; + * import { createClient } from 'https://esm.sh/@supabase/supabase-js@2'; + * import { StreamingChatFlow } from './flows/streaming-chat.ts'; + * + * serve(async (req) => { + * const supabase = createClient( + * Deno.env.get('SUPABASE_URL')!, + * Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')! + * ); + * + * // Flow execution happens here + * // Streaming context is provided automatically by executor + * + * return new Response('Flow started', { status: 200 }); + * }); + * ``` + */ diff --git a/examples/vercel-ai-sdk-integration/backend/helpers/streaming-context-with-persistence.ts b/examples/vercel-ai-sdk-integration/backend/helpers/streaming-context-with-persistence.ts new file mode 100644 index 000000000..a22d03a88 --- /dev/null +++ b/examples/vercel-ai-sdk-integration/backend/helpers/streaming-context-with-persistence.ts @@ -0,0 +1,398 @@ +/** + * Production-Ready Streaming Context with Chunk Persistence + * + * This implementation provides: + * - Real-time streaming via Supabase Realtime (fast, ephemeral) + * - Chunk storage in database (durable, recoverable) + * - Batch writes to reduce database overhead + * - Recovery from edge function timeouts + * - Checkpoint support for long-running operations + */ + +import { SupabaseClient } from '@supabase/supabase-js'; + +export interface StreamingContext { + /** + * Emit text delta (for LLM token streaming) + * Broadcasts via Realtime + stores in database + */ + emitText(text: string): Promise; + + /** + * Emit custom data + */ + emitData(key: string, data: any): Promise; + + /** + * Emit reasoning/thinking progress + */ + emitReasoning(reasoning: string): Promise; + + /** + * Emit tool execution progress + */ + emitToolInput(toolName: string, input: any): Promise; + + /** + * Save checkpoint (partial progress) + * Useful for long-running operations that might timeout + */ + checkpoint(data: any): Promise; + + /** + * Finalize streaming (flush remaining chunks) + * Should be called when step completes or fails + */ + finalize(): Promise; + + /** + * Get all streamed text so far + */ + getStreamedText(): string; + + /** + * Configure persistence settings + */ + enablePersistence(options?: PersistenceOptions): void; + disablePersistence(): void; +} + +export interface PersistenceOptions { + /** + * Number of chunks to buffer before database write + * Default: 10 + */ + batchSize?: number; + + /** + * Maximum time to wait before flushing buffer (ms) + * Default: 1000 + */ + flushIntervalMs?: number; + + /** + * Whether to store chunks in database + * Default: true + */ + enabled?: boolean; +} + +interface ChunkBuffer { + run_id: string; + step_slug: string; + chunk_index: number; + chunk_type: 'text' | 'data' | 'reasoning' | 'tool-input'; + chunk_data: any; + created_at: string; +} + +/** + * Create a streaming context for a pgflow step + */ +export function createStreamingContext( + supabase: SupabaseClient, + runId: string, + stepSlug: string, + options?: PersistenceOptions +): StreamingContext { + // Configuration + const persistenceOptions: Required = { + batchSize: options?.batchSize ?? 10, + flushIntervalMs: options?.flushIntervalMs ?? 1000, + enabled: options?.enabled ?? true, + }; + + // State + let chunkIndex = 0; + let chunkBuffer: ChunkBuffer[] = []; + let lastFlushTime = Date.now(); + let streamedText = ''; + let flushInterval: NodeJS.Timeout | null = null; + + // Start periodic flush + if (persistenceOptions.enabled) { + flushInterval = setInterval(() => { + if (Date.now() - lastFlushTime >= persistenceOptions.flushIntervalMs) { + flushChunks().catch(console.error); + } + }, persistenceOptions.flushIntervalMs); + } + + /** + * Broadcast event via Supabase Realtime + */ + async function broadcast( + chunkType: string, + chunk: any, + index: number + ): Promise { + const channel = supabase.channel(`pgflow:run:${runId}`); + + await channel.send({ + type: 'broadcast', + event: 'step:stream', + payload: { + event_type: 'step:stream', + run_id: runId, + step_slug: stepSlug, + stream_type: chunkType, + chunk, + index, + timestamp: new Date().toISOString(), + }, + }); + } + + /** + * Buffer chunk for database write + */ + function bufferChunk( + chunkType: ChunkBuffer['chunk_type'], + chunkData: any + ): void { + if (!persistenceOptions.enabled) return; + + chunkBuffer.push({ + run_id: runId, + step_slug: stepSlug, + chunk_index: chunkIndex, + chunk_type: chunkType, + chunk_data: chunkData, + created_at: new Date().toISOString(), + }); + } + + /** + * Flush buffered chunks to database + */ + async function flushChunks(): Promise { + if (chunkBuffer.length === 0) return; + + const chunksToWrite = [...chunkBuffer]; + chunkBuffer = []; // Clear buffer immediately + lastFlushTime = Date.now(); + + try { + const { error } = await supabase + .from('streaming_chunks') + .insert(chunksToWrite); + + if (error) { + console.error('Failed to persist chunks:', error); + // Could implement retry logic here + } + } catch (err) { + console.error('Error flushing chunks:', err); + } + } + + /** + * Core emit function (used by all emit methods) + */ + async function emit( + chunkType: ChunkBuffer['chunk_type'], + chunkData: any + ): Promise { + const index = chunkIndex++; + + // 1. Broadcast via Realtime (fast, ephemeral) + await broadcast(chunkType, chunkData, index); + + // 2. Buffer for database write (durable) + bufferChunk(chunkType, chunkData); + + // 3. Auto-flush if buffer is full + if (chunkBuffer.length >= persistenceOptions.batchSize) { + await flushChunks(); + } + } + + // Public API + return { + async emitText(text: string): Promise { + streamedText += text; + await emit('text', { text }); + }, + + async emitData(key: string, data: any): Promise { + await emit('data', { key, data }); + }, + + async emitReasoning(reasoning: string): Promise { + await emit('reasoning', { reasoning }); + }, + + async emitToolInput(toolName: string, input: any): Promise { + await emit('tool-input', { toolName, input }); + }, + + async checkpoint(data: any): Promise { + // Flush all pending chunks first + await flushChunks(); + + // Update step with checkpoint data + await supabase + .from('flow_steps') + .update({ + checkpoint_data: data, + checkpoint_at: new Date().toISOString(), + }) + .eq('run_id', runId) + .eq('step_slug', stepSlug); + }, + + async finalize(): Promise { + // Flush remaining chunks + await flushChunks(); + + // Clear interval + if (flushInterval) { + clearInterval(flushInterval); + flushInterval = null; + } + }, + + getStreamedText(): string { + return streamedText; + }, + + enablePersistence(options?: PersistenceOptions): void { + Object.assign(persistenceOptions, options, { enabled: true }); + }, + + disablePersistence(): void { + persistenceOptions.enabled = false; + }, + }; +} + +/** + * Helper: Stream OpenAI response through streaming context + */ +export async function streamOpenAIResponse( + stream: AsyncIterable, + ctx: StreamingContext +): Promise { + let fullResponse = ''; + + try { + for await (const chunk of stream) { + const delta = chunk.choices?.[0]?.delta?.content || ''; + if (delta) { + await ctx.emitText(delta); + fullResponse += delta; + } + } + + // Finalize to flush remaining chunks + await ctx.finalize(); + + return fullResponse; + } catch (error) { + // Finalize even on error + await ctx.finalize(); + throw error; + } +} + +/** + * Helper: Stream with timeout protection + */ +export async function streamWithTimeout( + streamFn: (ctx: StreamingContext) => Promise, + ctx: StreamingContext, + timeoutMs: number = 25000 +): Promise { + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => { + reject(new Error(`Step timed out after ${timeoutMs}ms`)); + }, timeoutMs); + }); + + try { + const result = await Promise.race([streamFn(ctx), timeoutPromise]); + await ctx.finalize(); + return result; + } catch (error) { + // Save checkpoint with partial progress + const partial = ctx.getStreamedText(); + if (partial) { + await ctx.checkpoint({ + partial_response: partial, + error: error.message, + timed_out: true, + }); + } + + await ctx.finalize(); + throw error; + } +} + +/** + * Migration: Create streaming_chunks table + */ +export const STREAMING_CHUNKS_MIGRATION = ` +-- Table for storing streaming chunks +CREATE TABLE IF NOT EXISTS streaming_chunks ( + id BIGSERIAL PRIMARY KEY, + run_id UUID NOT NULL REFERENCES flow_runs(run_id) ON DELETE CASCADE, + step_slug TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + chunk_type TEXT NOT NULL CHECK (chunk_type IN ('text', 'data', 'reasoning', 'tool-input')), + chunk_data JSONB NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW(), + + UNIQUE(run_id, step_slug, chunk_index) +); + +-- Index for fast retrieval +CREATE INDEX IF NOT EXISTS idx_streaming_chunks_run_step + ON streaming_chunks(run_id, step_slug, chunk_index); + +-- RLS policies +ALTER TABLE streaming_chunks ENABLE ROW LEVEL SECURITY; + +-- Users can read chunks for their own runs +CREATE POLICY "users_read_own_chunks" + ON streaming_chunks FOR SELECT + USING ( + EXISTS ( + SELECT 1 FROM flow_runs + WHERE flow_runs.run_id = streaming_chunks.run_id + AND flow_runs.user_id = auth.uid() + ) + ); + +-- Service role can insert chunks +CREATE POLICY "service_role_insert_chunks" + ON streaming_chunks FOR INSERT + WITH CHECK (true); + +-- Auto-cleanup old chunks (optional) +CREATE OR REPLACE FUNCTION cleanup_old_streaming_chunks() +RETURNS void AS $$ +BEGIN + DELETE FROM streaming_chunks + WHERE created_at < NOW() - INTERVAL '24 hours'; +END; +$$ LANGUAGE plpgsql; + +-- Schedule cleanup (if pg_cron is available) +-- SELECT cron.schedule('cleanup-chunks', '0 2 * * *', 'SELECT cleanup_old_streaming_chunks()'); +`; + +/** + * Add checkpoint_data column to flow_steps table + */ +export const CHECKPOINT_MIGRATION = ` +-- Add checkpoint support to flow_steps +ALTER TABLE flow_steps + ADD COLUMN IF NOT EXISTS checkpoint_data JSONB, + ADD COLUMN IF NOT EXISTS checkpoint_at TIMESTAMPTZ; + +-- Index for checkpoint queries +CREATE INDEX IF NOT EXISTS idx_flow_steps_checkpoint + ON flow_steps(run_id, step_slug) + WHERE checkpoint_data IS NOT NULL; +`; diff --git a/examples/vercel-ai-sdk-integration/backend/helpers/streaming-context.ts b/examples/vercel-ai-sdk-integration/backend/helpers/streaming-context.ts new file mode 100644 index 000000000..bbbe03388 --- /dev/null +++ b/examples/vercel-ai-sdk-integration/backend/helpers/streaming-context.ts @@ -0,0 +1,205 @@ +/** + * Streaming Context for Pgflow Steps + * + * Allows pgflow flow steps to emit incremental data (text chunks, progress updates) + * that are broadcast to connected clients via Supabase Realtime. + */ + +import type { SupabaseClient } from '@supabase/supabase-js'; + +/** + * Streaming event broadcast via Supabase Realtime + */ +export interface BroadcastStepStreamEvent { + event_type: 'step:stream'; + run_id: string; + step_slug: string; + stream_type: 'text' | 'data' | 'reasoning' | 'tool-input'; + chunk: any; + index: number; + timestamp: string; +} + +/** + * Streaming context provided to step functions + */ +export interface StreamingContext { + /** + * Emit a generic streaming chunk + */ + emit(type: 'text' | 'data' | 'reasoning' | 'tool-input', chunk: any): Promise; + + /** + * Emit text delta (for LLM streaming) + */ + emitText(text: string): Promise; + + /** + * Emit custom data with a key + */ + emitData(key: string, data: any): Promise; + + /** + * Emit reasoning/thinking process + */ + emitReasoning(reasoning: string): Promise; + + /** + * Emit tool execution progress + */ + emitToolInput(toolName: string, input: any): Promise; +} + +/** + * Create a streaming context for a pgflow step + * + * @param supabase - Supabase client + * @param runId - Flow run ID + * @param stepSlug - Step slug + * @param options - Configuration options + * @returns StreamingContext instance + */ +export function createStreamingContext( + supabase: SupabaseClient, + runId: string, + stepSlug: string, + options: { + debug?: boolean; + batchDelayMs?: number; // Delay between broadcasts (rate limiting) + } = {} +): StreamingContext { + let chunkIndex = 0; + let lastBroadcastTime = 0; + + /** + * Broadcast a streaming event to Supabase Realtime + */ + const broadcast = async ( + streamType: 'text' | 'data' | 'reasoning' | 'tool-input', + chunk: any + ): Promise => { + // Rate limiting (optional) + if (options.batchDelayMs) { + const now = Date.now(); + const elapsed = now - lastBroadcastTime; + if (elapsed < options.batchDelayMs) { + await new Promise(resolve => setTimeout(resolve, options.batchDelayMs - elapsed)); + } + lastBroadcastTime = Date.now(); + } + + const event: BroadcastStepStreamEvent = { + event_type: 'step:stream', + run_id: runId, + step_slug: stepSlug, + stream_type: streamType, + chunk, + index: chunkIndex++, + timestamp: new Date().toISOString(), + }; + + if (options.debug) { + console.log('[StreamingContext] Broadcasting:', event); + } + + try { + // Broadcast to the run's channel + const channel = supabase.channel(`pgflow:run:${runId}`); + + await channel.send({ + type: 'broadcast', + event: 'step:stream', + payload: event, + }); + + if (options.debug) { + console.log('[StreamingContext] Broadcast successful'); + } + } catch (error) { + console.error('[StreamingContext] Broadcast error:', error); + // Don't throw - streaming errors shouldn't fail the step + } + }; + + return { + emit: async (type, chunk) => { + await broadcast(type, chunk); + }, + + emitText: async (text: string) => { + await broadcast('text', { text }); + }, + + emitData: async (key: string, data: any) => { + await broadcast('data', { key, data }); + }, + + emitReasoning: async (reasoning: string) => { + await broadcast('reasoning', { reasoning }); + }, + + emitToolInput: async (toolName: string, input: any) => { + await broadcast('tool-input', { toolName, input }); + }, + }; +} + +/** + * Helper: Stream OpenAI response through streaming context + * + * @example + * ```typescript + * const stream = await openai.chat.completions.create({ + * model: 'gpt-4', + * messages: [...], + * stream: true, + * }); + * + * const fullText = await streamOpenAIResponse(stream, ctx.stream); + * return { response: fullText }; + * ``` + */ +export async function streamOpenAIResponse( + stream: AsyncIterable, + ctx: StreamingContext +): Promise { + let fullResponse = ''; + + for await (const chunk of stream) { + const delta = chunk.choices?.[0]?.delta?.content || ''; + if (delta) { + await ctx.emitText(delta); + fullResponse += delta; + } + } + + return fullResponse; +} + +/** + * Helper: Stream Vercel AI SDK streamText through streaming context + * + * @example + * ```typescript + * const result = streamText({ + * model: openai('gpt-4'), + * prompt: input.message, + * }); + * + * const fullText = await streamAISDKResponse(result, ctx.stream); + * return { response: fullText }; + * ``` + */ +export async function streamAISDKResponse( + result: { textStream: AsyncIterable }, + ctx: StreamingContext +): Promise { + let fullText = ''; + + for await (const chunk of result.textStream) { + await ctx.emitText(chunk); + fullText += chunk; + } + + return fullText; +} diff --git a/examples/vercel-ai-sdk-integration/frontend/components/chat.example.tsx b/examples/vercel-ai-sdk-integration/frontend/components/chat.example.tsx new file mode 100644 index 000000000..361ffbe35 --- /dev/null +++ b/examples/vercel-ai-sdk-integration/frontend/components/chat.example.tsx @@ -0,0 +1,283 @@ +/** + * Example: Chat Component using Pgflow + Vercel AI SDK + * + * Demonstrates how to use PgflowChatTransport with useChat hook + * for a fully functional streaming chat interface. + */ + +'use client'; + +import { useChat } from '@ai-sdk/react'; +import { createPgflowChatTransport } from '../lib/pgflow-chat-transport'; +import { createBrowserClient } from '@supabase/ssr'; +import { useMemo, useState, useEffect } from 'react'; + +/** + * Custom data from streaming events + */ +interface ChatData { + intent?: { + classification: string; + confidence: number; + }; + search_results?: { + count: number; + sources: Array<{ id: string; score: number }>; + }; + citations?: string[]; + reasoning?: { + step: string; + reasoning: string; + }; +} + +export default function ChatExample() { + const [customData, setCustomData] = useState({}); + const [reasoning, setReasoning] = useState(''); + + // Initialize Supabase client + const supabase = useMemo(() => { + return createBrowserClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY! + ); + }, []); + + // Create pgflow chat transport + const transport = useMemo(() => { + return createPgflowChatTransport(supabase, 'streaming_chat', { + debug: process.env.NODE_ENV === 'development', + timeout: 5 * 60 * 1000, // 5 minutes + }); + }, [supabase]); + + // Use the chat hook with our custom transport + const { + messages, + input, + handleInputChange, + handleSubmit, + status, + error, + data, + } = useChat({ + transport, + onData: (chunk) => { + // Handle custom streaming data + if (chunk.type === 'data-intent') { + setCustomData(prev => ({ ...prev, intent: chunk.data })); + } else if (chunk.type === 'data-search_results') { + setCustomData(prev => ({ ...prev, search_results: chunk.data })); + } else if (chunk.type === 'data-citations') { + setCustomData(prev => ({ ...prev, citations: chunk.data })); + } else if (chunk.type === 'data-reasoning') { + setReasoning(chunk.data.reasoning); + } + }, + onFinish: () => { + // Clear reasoning when done + setReasoning(''); + setCustomData({}); + }, + onError: (error) => { + console.error('Chat error:', error); + }, + }); + + // Check authentication on mount + useEffect(() => { + const checkAuth = async () => { + const { data: { session } } = await supabase.auth.getSession(); + if (!session) { + console.warn('User not authenticated. Please sign in.'); + // Optionally redirect to login + } + }; + checkAuth(); + }, [supabase]); + + return ( +
+ {/* Header */} +
+

Pgflow AI Chat

+

Powered by Vercel AI SDK + Pgflow

+
+ + {/* Messages */} +
+ {messages.length === 0 && ( +
+

👋 Welcome!

+

Ask me anything. I'll show you my thought process as I work.

+
+ )} + + {messages.map((message) => ( +
+
+
+ {message.role === 'user' ? 'You' : 'AI Assistant'} +
+
{message.content}
+
+
+ ))} + + {/* Streaming progress indicators */} + {status === 'streaming' && ( +
+
+
+
Processing...
+
+ + {/* Show reasoning */} + {reasoning && ( +
+ 💭 {reasoning} +
+ )} + + {/* Show intent classification */} + {customData.intent && ( +
+ ✓ Intent: {customData.intent.classification} + {' '}({Math.round(customData.intent.confidence * 100)}% confidence) +
+ )} + + {/* Show search results */} + {customData.search_results && ( +
+ ✓ Found {customData.search_results.count} relevant sources +
+ )} + + {/* Show citations */} + {customData.citations && customData.citations.length > 0 && ( +
+ ✓ Added {customData.citations.length} citations +
+ )} +
+ )} + + {/* Error display */} + {error && ( +
+
Error
+
{error.message}
+
+ )} +
+ + {/* Input form */} +
+
+ + +
+ + {/* Status indicator */} +
+
+ + {status === 'ready' && 'Ready'} + {status === 'streaming' && 'AI is thinking...'} + {status === 'submitted' && 'Submitting...'} + {status === 'error' && 'Error occurred'} + +
+
+
+ ); +} + +/** + * Simpler example without custom data handling + */ +export function SimpleChatExample() { + const supabase = useMemo(() => { + return createBrowserClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY! + ); + }, []); + + const transport = useMemo(() => { + return createPgflowChatTransport(supabase, 'simple_streaming_chat'); + }, [supabase]); + + const { messages, input, handleInputChange, handleSubmit, status } = useChat({ + transport, + }); + + return ( +
+
+ {messages.map((msg) => ( +
+
+ {msg.content} +
+
+ ))} +
+ +
+ + +
+
+ ); +} diff --git a/examples/vercel-ai-sdk-integration/frontend/lib/pgflow-chat-transport-with-recovery.ts b/examples/vercel-ai-sdk-integration/frontend/lib/pgflow-chat-transport-with-recovery.ts new file mode 100644 index 000000000..11e6067b9 --- /dev/null +++ b/examples/vercel-ai-sdk-integration/frontend/lib/pgflow-chat-transport-with-recovery.ts @@ -0,0 +1,549 @@ +/** + * PgflowChatTransport with Chunk Recovery + * + * This transport handles: + * - Real-time streaming via Supabase Realtime + * - Automatic recovery from disconnections + * - Chunk replay from database on reconnection + * - Edge function timeout detection and recovery + * - Graceful handling of partial responses + */ + +import { PgflowClient } from '@pgflow/client/browser'; +import { SupabaseClient } from '@supabase/supabase-js'; +import type { + ChatTransport, + UIMessage, + UIMessageChunk, +} from '@ai-sdk/react'; + +interface StreamingChunk { + id: number; + run_id: string; + step_slug: string; + chunk_index: number; + chunk_type: 'text' | 'data' | 'reasoning' | 'tool-input'; + chunk_data: any; + created_at: string; +} + +export interface PgflowChatTransportOptions { + /** + * Timeout for detecting stuck streams (ms) + * Default: 30000 (30 seconds) + */ + streamTimeoutMs?: number; + + /** + * Enable chunk recovery from database + * Default: true + */ + enableChunkRecovery?: boolean; + + /** + * Show partial responses on timeout + * Default: true + */ + showPartialOnTimeout?: boolean; + + /** + * Callback when stream times out + */ + onStreamTimeout?: (runId: string, partialText: string) => void; +} + +export class PgflowChatTransport implements ChatTransport { + private pgflowClient: PgflowClient; + + constructor( + private supabaseClient: SupabaseClient, + private flowSlug: string, + private options: PgflowChatTransportOptions = {} + ) { + // Initialize defaults + this.options = { + streamTimeoutMs: 30000, + enableChunkRecovery: true, + showPartialOnTimeout: true, + ...options, + }; + + this.pgflowClient = new PgflowClient(supabaseClient); + } + + async sendMessages(options: { + trigger: 'submit-message' | 'regenerate-message'; + chatId: string; + messageId: string | undefined; + messages: UIMessage[]; + abortSignal: AbortSignal | undefined; + }): Promise> { + const { messages, chatId, abortSignal } = options; + const lastMessage = messages[messages.length - 1]; + + return new ReadableStream({ + start: async (controller) => { + let lastEventTime = Date.now(); + let timeoutCheckInterval: NodeJS.Timeout | null = null; + let unsubscribers: Array<() => void> = []; + + try { + // Start the pgflow flow + const run = await this.pgflowClient.startFlow( + this.flowSlug, + { + message: lastMessage.content, + conversationId: chatId, + history: messages.slice(0, -1).map((m) => ({ + role: m.role, + content: m.content, + })), + }, + chatId // Use chatId as runId + ); + + // Send start chunk + controller.enqueue({ + type: 'start', + id: run.run_id, + } as UIMessageChunk); + + // Track streamed text for partial response + let streamedText = ''; + + // Listen to streaming chunks + const unsubscribeStream = this.pgflowClient.onStepEvent( + async (event) => { + if (event.run_id !== run.run_id) return; + + lastEventTime = Date.now(); // Reset timeout + + // Handle streaming chunks + if (event.event_type === 'step:stream') { + const streamEvent = event as any; // BroadcastStepStreamEvent + const chunks = this.mapStreamEventToChunks(streamEvent); + + chunks.forEach((chunk) => { + controller.enqueue(chunk); + + // Track text for partial response recovery + if (chunk.type === 'text-delta') { + streamedText += chunk.text; + } + }); + } + + // Handle step completion + if (event.event_type === 'step:completed') { + controller.enqueue({ + type: 'data-step-complete', + data: { + step: event.step_slug, + status: 'completed', + }, + } as UIMessageChunk); + } + + // Handle step failure + if (event.event_type === 'step:failed') { + controller.enqueue({ + type: 'data-step-failed', + data: { + step: event.step_slug, + error: event.error_message, + }, + } as UIMessageChunk); + } + } + ); + unsubscribers.push(unsubscribeStream); + + // Listen to run events + const unsubscribeRun = run.on('*', (runEvent) => { + lastEventTime = Date.now(); // Reset timeout + + if (runEvent.event_type === 'run:completed') { + // Send finish chunk + controller.enqueue({ + type: 'finish', + finishReason: 'stop', + } as UIMessageChunk); + + // Cleanup + this.cleanup(unsubscribers, timeoutCheckInterval); + controller.close(); + } + + if (runEvent.event_type === 'run:failed') { + // Send error chunk + controller.enqueue({ + type: 'error', + error: new Error(runEvent.error_message), + } as UIMessageChunk); + + // Cleanup + this.cleanup(unsubscribers, timeoutCheckInterval); + controller.close(); + } + }); + unsubscribers.push(unsubscribeRun); + + // Timeout detection + timeoutCheckInterval = setInterval(async () => { + const timeSinceLastEvent = Date.now() - lastEventTime; + + if (timeSinceLastEvent > this.options.streamTimeoutMs!) { + console.warn( + `Stream timeout detected for run ${run.run_id} (no events for ${timeSinceLastEvent}ms)` + ); + + // Clear interval + clearInterval(timeoutCheckInterval!); + + // Try to recover + const recovered = await this.recoverFromTimeout( + run.run_id, + streamedText, + controller + ); + + if (!recovered) { + // Failed to recover + controller.enqueue({ + type: 'error', + error: new Error( + 'Stream timed out and recovery failed' + ), + } as UIMessageChunk); + } + + // Cleanup + this.cleanup(unsubscribers, null); + controller.close(); + } + }, 5000); // Check every 5 seconds + + // Handle abort signal + if (abortSignal) { + abortSignal.addEventListener('abort', () => { + this.cleanup(unsubscribers, timeoutCheckInterval); + controller.close(); + }); + } + + // Wait for completion (with timeout) + const completionPromise = run + .waitForStatus('completed', { + timeoutMs: 5 * 60 * 1000, // 5 minutes + signal: abortSignal, + }) + .catch(async () => { + // Check if it failed + await run.waitForStatus('failed', { + timeoutMs: 1000, + signal: abortSignal, + }); + }); + + await completionPromise; + } catch (error) { + this.cleanup(unsubscribers, timeoutCheckInterval); + controller.error(error); + } + }, + + cancel() { + // Cleanup handled by abort signal + }, + }); + } + + async reconnectToStream(options: { + chatId: string; + }): Promise | null> { + const { chatId } = options; + + try { + // Get existing run + const run = await this.pgflowClient.getRun(chatId); + + if (!run) return null; + + // If already in terminal state + if (run.status === 'completed' || run.status === 'failed') { + // Try to replay from stored chunks + if (this.options.enableChunkRecovery) { + return this.replayFromChunks(chatId, run); + } + return null; + } + + // Still in progress - reconnect to live stream + return new ReadableStream({ + start: async (controller) => { + // 1. Fetch and replay stored chunks first + if (this.options.enableChunkRecovery) { + const replayed = await this.replayStoredChunks( + chatId, + controller + ); + console.log(`Replayed ${replayed} chunks from database`); + } + + // 2. Subscribe to new live chunks + const unsubscribe = run.on('*', (event) => { + if (event.event_type === 'run:completed') { + controller.enqueue({ + type: 'finish', + finishReason: 'stop', + } as UIMessageChunk); + unsubscribe(); + controller.close(); + } + + if (event.event_type === 'run:failed') { + controller.enqueue({ + type: 'error', + error: new Error(event.error_message), + } as UIMessageChunk); + unsubscribe(); + controller.close(); + } + }); + + // Wait for completion + await run.waitForStatus('completed', { + timeoutMs: 60000, + }); + }, + }); + } catch (error) { + console.error('Failed to reconnect to stream:', error); + return null; + } + } + + /** + * Map pgflow streaming events to AI SDK chunks + */ + private mapStreamEventToChunks(event: any): UIMessageChunk[] { + switch (event.stream_type) { + case 'text': + return [ + { + type: 'text-delta', + text: event.chunk.text, + }, + ]; + + case 'reasoning': + return [ + { + type: 'data-reasoning', + data: event.chunk.reasoning, + }, + ]; + + case 'data': + return [ + { + type: `data-${event.chunk.key}`, + data: event.chunk.data, + }, + ]; + + case 'tool-input': + return [ + { + type: 'tool-input-delta', + toolCallId: event.step_slug, + toolName: event.chunk.toolName, + argsTextDelta: JSON.stringify(event.chunk.input), + }, + ]; + + default: + return []; + } + } + + /** + * Replay stored chunks from database + */ + private async replayStoredChunks( + runId: string, + controller: ReadableStreamDefaultController + ): Promise { + try { + const { data: chunks, error } = await this.supabaseClient + .from('streaming_chunks') + .select('*') + .eq('run_id', runId) + .order('chunk_index'); + + if (error) throw error; + + if (chunks && chunks.length > 0) { + for (const chunk of chunks) { + const uiChunks = this.mapStoredChunkToUIChunks(chunk); + uiChunks.forEach((c) => controller.enqueue(c)); + } + } + + return chunks?.length || 0; + } catch (error) { + console.error('Failed to replay chunks:', error); + return 0; + } + } + + /** + * Replay chunks for a completed run + */ + private async replayFromChunks( + runId: string, + run: any + ): Promise> { + return new ReadableStream({ + start: async (controller) => { + // Replay all stored chunks + await this.replayStoredChunks(runId, controller); + + // Send final status + if (run.status === 'completed') { + controller.enqueue({ + type: 'finish', + finishReason: 'stop', + } as UIMessageChunk); + } else if (run.status === 'failed') { + controller.enqueue({ + type: 'error', + error: new Error(run.error_message), + } as UIMessageChunk); + } + + controller.close(); + }, + }); + } + + /** + * Recover from stream timeout + */ + private async recoverFromTimeout( + runId: string, + streamedText: string, + controller: ReadableStreamDefaultController + ): Promise { + try { + // Check if we have chunks in database + const { data: chunks } = await this.supabaseClient + .from('streaming_chunks') + .select('*') + .eq('run_id', runId) + .eq('chunk_type', 'text') + .order('chunk_index'); + + if (chunks && chunks.length > 0) { + // We have stored chunks, emit them + const fullText = chunks + .map((c: any) => c.chunk_data.text) + .join(''); + + if (fullText.length > streamedText.length) { + // We have more text in database + const missingText = fullText.slice(streamedText.length); + + controller.enqueue({ + type: 'text-delta', + text: missingText, + } as UIMessageChunk); + + console.log( + `Recovered ${missingText.length} characters from database` + ); + } + + // Check step checkpoint for final output + const { data: step } = await this.supabaseClient + .from('flow_steps') + .select('checkpoint_data, output, status') + .eq('run_id', runId) + .single(); + + if (step?.checkpoint_data?.partial_response) { + // Show partial response + if (this.options.showPartialOnTimeout) { + controller.enqueue({ + type: 'data-partial-response', + data: { + text: step.checkpoint_data.partial_response, + reason: 'timeout', + }, + } as UIMessageChunk); + } + + // Notify callback + this.options.onStreamTimeout?.( + runId, + step.checkpoint_data.partial_response + ); + } + + return true; + } + + return false; + } catch (error) { + console.error('Recovery failed:', error); + return false; + } + } + + /** + * Map stored chunk to UI chunks + */ + private mapStoredChunkToUIChunks( + chunk: StreamingChunk + ): UIMessageChunk[] { + switch (chunk.chunk_type) { + case 'text': + return [ + { + type: 'text-delta', + text: chunk.chunk_data.text, + }, + ]; + + case 'reasoning': + return [ + { + type: 'data-reasoning', + data: chunk.chunk_data.reasoning, + }, + ]; + + case 'data': + return [ + { + type: `data-${chunk.chunk_data.key}`, + data: chunk.chunk_data.data, + }, + ]; + + default: + return []; + } + } + + /** + * Cleanup subscriptions and intervals + */ + private cleanup( + unsubscribers: Array<() => void>, + interval: NodeJS.Timeout | null + ): void { + unsubscribers.forEach((unsub) => unsub()); + if (interval) clearInterval(interval); + } +} diff --git a/examples/vercel-ai-sdk-integration/frontend/lib/pgflow-chat-transport.ts b/examples/vercel-ai-sdk-integration/frontend/lib/pgflow-chat-transport.ts new file mode 100644 index 000000000..e79fb7784 --- /dev/null +++ b/examples/vercel-ai-sdk-integration/frontend/lib/pgflow-chat-transport.ts @@ -0,0 +1,373 @@ +/** + * PgflowChatTransport - Custom ChatTransport for Vercel AI SDK + * + * Connects useChat hook directly to pgflow client in the browser, + * enabling real-time streaming via Supabase Realtime without API routes. + */ + +import type { PgflowClient } from '@pgflow/client/browser'; +import type { SupabaseClient } from '@supabase/supabase-js'; + +// AI SDK types (from @ai-sdk/react) +interface UIMessage { + id: string; + role: 'user' | 'assistant' | 'system'; + content: string; +} + +type UIMessageChunk = + | { type: 'start'; id: string } + | { type: 'text-delta'; text: string } + | { type: 'finish'; finishReason: string } + | { type: 'error'; error: Error } + | { type: 'data-reasoning'; data: any } + | { type: 'data-search_results'; data: any } + | { type: string; data?: any }; // Generic data chunks + +interface ChatTransport { + sendMessages(options: { + trigger: 'submit-message' | 'regenerate-message'; + chatId: string; + messageId: string | undefined; + messages: T[]; + abortSignal: AbortSignal | undefined; + }): Promise>; + + reconnectToStream(options: { + chatId: string; + }): Promise | null>; +} + +// Pgflow streaming event types +interface BroadcastStepStreamEvent { + event_type: 'step:stream'; + run_id: string; + step_slug: string; + stream_type: 'text' | 'data' | 'reasoning' | 'tool-input'; + chunk: any; + index: number; + timestamp: string; +} + +/** + * Custom ChatTransport that uses pgflow client for backend communication + */ +export class PgflowChatTransport implements ChatTransport { + private pgflowClient: PgflowClient | null = null; + + constructor( + private supabase: SupabaseClient, + private flowSlug: string, + private options: { + debug?: boolean; + timeout?: number; // Default: 5 minutes + } = {} + ) {} + + /** + * Lazy initialization of pgflow client + */ + private async getPgflowClient(): Promise { + if (!this.pgflowClient) { + // Dynamic import to avoid bundling server-side code + const { PgflowClient } = await import('@pgflow/client/browser'); + this.pgflowClient = new PgflowClient(this.supabase); + } + return this.pgflowClient; + } + + /** + * Send messages and stream response + */ + async sendMessages(options: { + trigger: 'submit-message' | 'regenerate-message'; + chatId: string; + messageId: string | undefined; + messages: UIMessage[]; + abortSignal: AbortSignal | undefined; + }): Promise> { + const { messages, chatId, abortSignal } = options; + const lastMessage = messages[messages.length - 1]; + + if (this.options.debug) { + console.log('[PgflowChatTransport] Sending messages:', { + chatId, + messageCount: messages.length, + lastMessage: lastMessage.content.substring(0, 50), + }); + } + + const pgflow = await this.getPgflowClient(); + + return new ReadableStream({ + start: async (controller) => { + try { + // Check authentication + const { data: { session } } = await this.supabase.auth.getSession(); + if (!session) { + throw new Error('User must be authenticated to send messages'); + } + + // Start the pgflow flow + const run = await pgflow.startFlow( + this.flowSlug, + { + message: lastMessage.content, + conversationId: chatId, + userId: session.user.id, + history: messages.slice(0, -1).map(m => ({ + role: m.role, + content: m.content, + })), + }, + chatId // Use chatId as run_id for consistency + ); + + if (this.options.debug) { + console.log('[PgflowChatTransport] Flow started:', run.run_id); + } + + // Send start chunk + controller.enqueue({ + type: 'start', + id: run.run_id, + }); + + // Track cleanup functions + const cleanups: Array<() => void> = []; + + // Listen to streaming events + const unsubscribeStream = pgflow.onStepEvent((event: any) => { + if (event.run_id !== run.run_id) return; + + // Handle streaming chunks + if (event.event_type === 'step:stream') { + const streamEvent = event as BroadcastStepStreamEvent; + const chunks = this.mapStreamEventToChunks(streamEvent); + + if (this.options.debug) { + console.log('[PgflowChatTransport] Stream chunk:', streamEvent); + } + + chunks.forEach(chunk => controller.enqueue(chunk)); + } + + // Handle step completion (optional metadata) + if (event.event_type === 'step:completed') { + if (this.options.debug) { + console.log('[PgflowChatTransport] Step completed:', event.step_slug); + } + } + }); + cleanups.push(unsubscribeStream); + + // Listen to run completion/failure + const unsubscribeRun = run.on('*', (runEvent: any) => { + if (runEvent.event_type === 'run:completed') { + if (this.options.debug) { + console.log('[PgflowChatTransport] Run completed'); + } + + // Send finish chunk + controller.enqueue({ + type: 'finish', + finishReason: 'stop', + }); + + // Cleanup and close + cleanups.forEach(cleanup => cleanup()); + controller.close(); + } + + if (runEvent.event_type === 'run:failed') { + console.error('[PgflowChatTransport] Run failed:', runEvent.error_message); + + // Send error chunk + controller.enqueue({ + type: 'error', + error: new Error(runEvent.error_message), + }); + + // Cleanup and close + cleanups.forEach(cleanup => cleanup()); + controller.close(); + } + }); + cleanups.push(unsubscribeRun); + + // Handle abort signal + if (abortSignal) { + abortSignal.addEventListener('abort', () => { + if (this.options.debug) { + console.log('[PgflowChatTransport] Request aborted'); + } + + cleanups.forEach(cleanup => cleanup()); + controller.close(); + }); + } + + // Wait for completion with timeout + const timeout = this.options.timeout || 5 * 60 * 1000; + + await run.waitForStatus('completed', { + timeoutMs: timeout, + signal: abortSignal, + }).catch(async () => { + // If not completed, check if failed + try { + await run.waitForStatus('failed', { + timeoutMs: 1000, + signal: abortSignal, + }); + } catch { + // Timeout - flow still running + console.warn('[PgflowChatTransport] Flow timed out'); + } + }); + + } catch (error) { + console.error('[PgflowChatTransport] Error:', error); + controller.error(error); + } + }, + + cancel() { + if (this.options.debug) { + console.log('[PgflowChatTransport] Stream cancelled'); + } + // Cleanup handled by abort signal + }, + }); + } + + /** + * Reconnect to an interrupted stream + */ + async reconnectToStream(options: { + chatId: string; + }): Promise | null> { + const { chatId } = options; + + if (this.options.debug) { + console.log('[PgflowChatTransport] Reconnecting to stream:', chatId); + } + + const pgflow = await this.getPgflowClient(); + + // Try to get existing run + const run = await pgflow.getRun(chatId); + + if (!run) { + if (this.options.debug) { + console.log('[PgflowChatTransport] No run found for reconnection'); + } + return null; + } + + // If already completed or failed, no stream to reconnect to + if (run.status === 'completed' || run.status === 'failed') { + if (this.options.debug) { + console.log('[PgflowChatTransport] Run already finished:', run.status); + } + return null; + } + + // Re-subscribe to events + return new ReadableStream({ + start: async (controller) => { + if (this.options.debug) { + console.log('[PgflowChatTransport] Resuming stream for:', run.run_id); + } + + // Same event handling as sendMessages + const unsubscribe = run.on('*', (event: any) => { + if (event.event_type === 'run:completed') { + controller.enqueue({ + type: 'finish', + finishReason: 'stop', + }); + unsubscribe(); + controller.close(); + } + + if (event.event_type === 'run:failed') { + controller.enqueue({ + type: 'error', + error: new Error(event.error_message), + }); + unsubscribe(); + controller.close(); + } + }); + + await run.waitForStatus('completed', { + timeoutMs: this.options.timeout || 5 * 60 * 1000, + }).catch(() => { + // Ignore timeout - already handled + }); + }, + }); + } + + /** + * Map pgflow streaming events to AI SDK UIMessageChunk format + */ + private mapStreamEventToChunks( + event: BroadcastStepStreamEvent + ): UIMessageChunk[] { + switch (event.stream_type) { + case 'text': + // Text streaming (LLM tokens) + return [{ + type: 'text-delta', + text: event.chunk.text, + }]; + + case 'reasoning': + // Reasoning/thinking process + return [{ + type: 'data-reasoning', + data: { + step: event.step_slug, + reasoning: event.chunk.reasoning, + }, + }]; + + case 'data': + // Custom data (search results, progress, etc.) + return [{ + type: `data-${event.chunk.key}`, + data: event.chunk.data, + }]; + + case 'tool-input': + // Tool execution (if supported by AI SDK) + return [{ + type: 'tool-input-delta', + // @ts-ignore - AI SDK may not have this type yet + toolCallId: event.step_slug, + toolName: event.chunk.toolName, + argsTextDelta: JSON.stringify(event.chunk.input), + }]; + + default: + console.warn('[PgflowChatTransport] Unknown stream type:', event.stream_type); + return []; + } + } +} + +/** + * Factory function for easier usage + */ +export function createPgflowChatTransport( + supabase: SupabaseClient, + flowSlug: string, + options?: { + debug?: boolean; + timeout?: number; + } +): PgflowChatTransport { + return new PgflowChatTransport(supabase, flowSlug, options); +}