From c7ea21dfe04eb07fc702d89a93d3c0bf99830c42 Mon Sep 17 00:00:00 2001
From: aspectrr <collin@steelbrowser.com>
Date: Wed, 19 Nov 2025 10:17:06 -0500
Subject: [PATCH 1/4] fix: add pplx guide

---
 content/docs/overview/guides/aeo.mdx          |   6 +
 content/docs/overview/guides/meta.json        |   3 +-
 content/docs/overview/guides/perplexity.mdx   | 574 ++++++++++++++++++
 .../docs/overview/guides/playwright-node.mdx  |   2 +-
 4 files changed, 583 insertions(+), 2 deletions(-)
 create mode 100644 content/docs/overview/guides/aeo.mdx
 create mode 100644 content/docs/overview/guides/perplexity.mdx

diff --git a/content/docs/overview/guides/aeo.mdx b/content/docs/overview/guides/aeo.mdx
new file mode 100644
index 00000000..32976d71
--- /dev/null
+++ b/content/docs/overview/guides/aeo.mdx
@@ -0,0 +1,6 @@
+---
+title: Build an AEO Scraper (Node)
+description: Scrape LLM providers with Steel and synthesize answers with OpenAI
+sidebarTitle: AEO Scraper (Node)
+llm: true
+---
diff --git a/content/docs/overview/guides/meta.json b/content/docs/overview/guides/meta.json
index 9e2294ad..a84d93ad 100644
--- a/content/docs/overview/guides/meta.json
+++ b/content/docs/overview/guides/meta.json
@@ -7,6 +7,7 @@
     "playwright-node",
     "playwright-python",
     "puppeteer",
-    "selenium"
+    "selenium",
+    "perplexity"
   ]
 }
diff --git a/content/docs/overview/guides/perplexity.mdx b/content/docs/overview/guides/perplexity.mdx
new file mode 100644
index 00000000..c9cfe660
--- /dev/null
+++ b/content/docs/overview/guides/perplexity.mdx
@@ -0,0 +1,574 @@
+---
+title: Build a Perplexity‑style Search Engine
+description: Search with Brave, scrape with Steel, and synthesize with OpenAI using a TypeScript CLI
+sidebarTitle: Perplexity Clone (Node)
+llm: true
+---
+
+This guide shows you how to build a Perplexity-like research workflow in Node.js/TypeScript that:
+- Generates targeted search queries with OpenAI
+- Finds relevant links with the Brave Search API
+- Scrapes those links to Markdown via Steel’s /v1/scrape endpoint
+- Synthesizes a well-cited answer with inline citations
+
+Looking for a ready-made starter? Skip to the example project section.
+
+Quick Start
+-----------
+
+Clone the example and run it locally:
+
+```bash
+git clone https://github.com/steel-dev/steel-cookbook
+cd steel-cookbook/examples/steel-perplexity-clone
+npm install
+
+# Create a .env file in this directory with your credentials
+# See "Configuration" below for required variables.
+
+# Option A: put QUERY in .env
+npm start
+
+# Option B: pass QUERY on the fly
+QUERY="What are the latest improvements in WebAssembly?" npm start
+```
+
+- Node.js: Requires Node 18+
+- Credentials: You’ll need API keys for Steel.dev, OpenAI, and Brave Search.
+
+Project Structure
+-----------------
+
+```bash
+examples/steel-perplexity-clone
+  ├─ src/
+  │  ├─ config.ts          # Env parsing, defaults, feature flags
+  │  ├─ clients.ts         # Brave search, Steel scrape, OpenAI synthesis
+  │  └─ index.ts           # Main pipeline orchestration
+  ├─ package.json
+  ├─ tsconfig.json
+  └─ README.md
+```
+
+Configuration
+-------------
+
+Create a `.env` file in `examples/steel-perplexity-clone`:
+
+```env
+NODE_ENV=development
+
+# OpenAI
+OPENAI_API_KEY=sk-...
+OPENAI_ORG_ID=
+OPENAI_MODEL=gpt-5-nano
+OPENAI_ENABLE_WEB_SEARCH=true
+
+# Steel.dev
+STEEL_API_KEY=steel_...
+STEEL_SCRAPE_ENDPOINT=https://api.steel.dev/v1/scrape
+# Optional pacing between scrape requests (ms), useful for rate limits
+STEEL_TIMEOUT=3000
+
+# Brave Search
+BRAVE_API_KEY=brv_...
+BRAVE_SEARCH_ENDPOINT=https://api.search.brave.com/res/v1/web/search
+BRAVE_SEARCH_COUNTRY=US
+BRAVE_SEARCH_LANG=en
+BRAVE_SAFESEARCH=moderate
+
+# Search behavior
+SEARCH_TOP_K=3
+REQUEST_TIMEOUT_MS=30000
+CONCURRENCY=2
+
+# Your question to research
+QUERY="What are the latest improvements in WebAssembly and their benefits?"
+```
+
+What this example does
+----------------------
+
+At a high level:
+
+1) Generate multiple targeted queries for better coverage
+- Uses OpenAI to turn the user query into 3 high‑signal search queries
+
+2) Search and rank URLs with Brave
+- Calls Brave’s Web Search API for each generated query
+- Aggregates and ranks URLs using a reciprocal-rank strategy
+
+3) Scrape sources to Markdown with Steel
+- Sends each URL to Steel’s `/v1/scrape` to obtain clean Markdown
+
+4) Synthesize a well‑cited answer with OpenAI
+- Builds a context block from scraped Markdown
+- Instructs the model to produce inline [n] citations, matching the material order
+
+The core orchestration happens here:
+
+```typescript
+import { config } from "./config";
+import {
+  scrapeUrlsToMarkdown,
+  synthesizeWithCitations,
+  multiQueryBraveSearch,
+} from "./clients";
+
+type SearchResponse = {
+  query: string;
+  answer: string;
+  citations: Array<{ index: number; url: string }>;
+  model: string;
+  meta: {
+    tookMs: number;
+  };
+};
+
+async function main() {
+  const started = Date.now();
+
+  const query = config.query;
+  const topK = config.search.topK;
+  const concurrency = config.concurrency;
+
+  console.info("Search request received", {
+    query,
+    topK,
+  });
+
+  // 1) Use Brave to get top relevant URLs (do double to get more relevant results to search)
+  const { urls } = await multiQueryBraveSearch(query, topK * 2);
+  // const searchRes = await searchTopRelevantUrls(query, requestedTopK * 2);
+  // const urls = (searchRes.urls || []).slice(0, requestedTopK * 2);
+
+  // console.log(urls, urls.length);
+
+  if (urls.length === 0) {
+    return console.error("No URLs found for the given query.");
+  }
+
+  // 2) Scrape each URL into markdown using Steel.dev
+  const materials = await scrapeUrlsToMarkdown(urls, concurrency, topK);
+
+  if (materials.length === 0) {
+    console.error("Failed to scrape all URLs. Try again or refine your query.");
+  }
+
+  // 3) Use OpenAI to synthesize an answer with inline citations
+  const synthesis = await synthesizeWithCitations({
+    query,
+    materials,
+  });
+
+  const tookMs = Date.now() - started;
+
+  const response: SearchResponse = {
+    query,
+    answer: synthesis.answer,
+    citations: synthesis.sources,
+    model: config.openai.model,
+    meta: { tookMs },
+  };
+
+  console.log(response);
+}
+
+// Execute the demo
+main()
+  .then(() => {
+    process.exit(0);
+  })
+  .catch((error) => {
+    console.error("Task execution failed:", error);
+    process.exit(1);
+  });
+```
+
+Step 1: Generate richer search coverage
+---------------------------------------
+
+- The example asks OpenAI to produce 3 specific queries that maximize recall and signal.
+- It then calls Brave Search for each query, pausing briefly between calls.
+- Results are aggregated and ranked by frequency and reciprocal rank.
+
+```typescript
+export async function multiQueryBraveSearch(
+  userQuery: string,
+  topKPerQuery = config.search.topK,
+): Promise<MultiQuerySearchResult> {
+  // 1) Ask OpenAI to produce exactly 3 queries as strict JSON.
+  const prompt = [
+    "You are a search strategist.",
+    "Given the user's query, generate exactly 3 search queries that maximize the likelihood of finding relevant, recent, and factual information.",
+    "Avoid generic questions; use specific keywords.",
+    "",
+    "Return strict JSON with this shape:",
+    '{ "queries": ["...", "...", "..."] }',
+    "",
+    `User query: ${userQuery}`,
+  ].join("\n");
+
+  const completion = await openai.chat.completions.create({
+    model: config.openai.model,
+    messages: [
+      { role: "system", content: "You produce JSON only. No prose." },
+      { role: "user", content: prompt },
+    ],
+  });
+
+  const rawContent =
+    completion.choices?.[0]?.message?.content?.trim() ?? '{"queries": []}';
+
+  let queries: string[] = [];
+  try {
+    const parsed = JSON.parse(rawContent);
+    if (Array.isArray(parsed?.queries)) {
+      queries = parsed.queries.map((q: unknown) =>
+        typeof q === "string" ? q.trim() : "",
+      );
+    }
+  } catch {
+    // Fallback: split lines
+    queries = rawContent
+      .split("\n")
+      .map((l) => l.replace(/^[-*\d.)\s]+/, "").trim())
+      .filter(Boolean)
+      .slice(0, 3);
+  }
+
+  // Ensure exactly 3 queries, fall back to the original user query variations if needed
+  queries = Array.from(
+    new Set(
+      queries
+        .filter(Boolean)
+        .map((q) => q.replace(/\s+/g, " ").trim())
+        .slice(0, 3),
+    ),
+  );
+  while (queries.length < 3) {
+    if (queries.length === 0) queries.push(userQuery);
+    else queries.push(`${userQuery} ${queries.length + 1}`);
+  }
+  queries = queries.slice(0, 3);
+
+  console.info("Generated queries", { queries });
+
+  // 2) For each query, call Brave Search with a 1s delay between calls.
+  const perQueryUrls: string[][] = [];
+  for (let i = 0; i < queries.length; i++) {
+    const q = queries[i];
+    if (q == null) {
+      perQueryUrls.push([]);
+      continue;
+    }
+    if (i > 0) {
+      await new Promise((r) => setTimeout(r, 1000));
+    }
+    try {
+      const { urls } = await searchTopRelevantUrls(
+        q,
+        topKPerQuery ?? config.search.topK,
+      );
+      perQueryUrls.push(urls);
+    } catch (err) {
+      console.warn("Brave search failed for generated query", {
+        query: q,
+        err: (err as Error)?.message,
+      });
+      perQueryUrls.push([]);
+    }
+  }
+
+  // 3) Rank aggregation: reciprocal rank sum + frequency and best rank tiebreakers
+  type Acc = {
+    score: number;
+    occurrences: number;
+    ranks: number[];
+  };
+  const scores = new Map<string, Acc>();
+
+  perQueryUrls.forEach((urls) => {
+    urls.forEach((u, idx) => {
+      const url = u.trim();
+      if (!url) return;
+      const rank = idx + 1; // 1-based
+      const inc = 1 / rank; // reciprocal rank
+      const prev = scores.get(url) ?? { score: 0, occurrences: 0, ranks: [] };
+      prev.score += inc;
+      prev.occurrences += 1;
+      prev.ranks.push(rank);
+      scores.set(url, prev);
+    });
+  });
+
+  // 4) Deduplicate and sort
+  const ranked: RankedUrl[] = Array.from(scores.entries())
+    .map(([url, acc]) => ({
+      url,
+      score: acc.score,
+      occurrences: acc.occurrences,
+      ranks: acc.ranks.sort((a, b) => a - b),
+    }))
+    .sort((a, b) => {
+      if (b.score !== a.score) return b.score - a.score; // primary: score
+      if (b.occurrences !== a.occurrences) return b.occurrences - a.occurrences; // secondary: frequency
+      // tertiary: best (lowest) rank
+      const aBest = a.ranks[0] ?? Number.POSITIVE_INFINITY;
+      const bBest = b.ranks[0] ?? Number.POSITIVE_INFINITY;
+      return aBest - bBest;
+    });
+
+  console.info("Ranked URLs across multi-query search", {
+    unique: ranked.length,
+  });
+
+  return {
+    queries,
+    urls: ranked.map((url) => url.url),
+    _raw: { openai: completion, perQueryUrls },
+  };
+}
+```
+
+Under the hood, the Brave call itself looks like this:
+
+```typescript
+export async function searchTopRelevantUrls(
+  query: string,
+  topK = config.search.topK,
+): Promise<UrlSearchResult> {
+  // Build Brave Search request URL with query params
+  const endpoint = new URL(config.brave.endpoint);
+  endpoint.searchParams.set("q", query);
+  endpoint.searchParams.set("country", config.brave.country);
+  endpoint.searchParams.set("search_lang", config.brave.lang);
+  endpoint.searchParams.set("safesearch", config.brave.safesearch);
+  endpoint.searchParams.set(
+    "count",
+    String(Math.min(topK, config.search.topK)),
+  );
+
+  const res = await fetchWithTimeout(endpoint.toString(), {
+    headers: {
+      Accept: "application/json",
+      "X-Subscription-Token": config.brave.apiKey,
+    },
+  });
+
+  if (!res.ok) {
+    const text = await res.text().catch(() => "");
+    console.error("Brave search failed", {
+      status: res.status,
+      statusText: res.statusText,
+      response: text?.slice(0, 1000),
+    });
+    throw new Error(`Brave search failed: ${res.status} ${res.statusText}`);
+  }
+
+  const data = (await res.json()) as any;
+
+  // Extract URLs from Brave response
+  const urls: string[] = [];
+  if (data?.web?.results && Array.isArray(data.web.results)) {
+    for (const r of data.web.results) {
+      if (typeof r?.url === "string") urls.push(r.url);
+    }
+  } else if (Array.isArray(data?.results)) {
+    for (const r of data.results) {
+      if (typeof r?.url === "string") urls.push(r.url);
+    }
+  }
+
+  if (urls.length === 0) {
+    console.warn("No URLs returned from Brave, attempting salvage from raw", {
+      raw: JSON.stringify(data).slice(0, 1000),
+    });
+    const rawText = JSON.stringify(data);
+    const regex = /\bhttps?:\/\/[^\s"'<>]+/gi;
+    const salvaged = (rawText.match(regex) ?? []) as string[];
+    urls.push(...salvaged);
+  }
+
+  // Normalize and dedupe
+  const normalized = Array.from(new Set(urls.map((u) => u.trim())))
+    .filter(Boolean)
+    .slice(0, topK);
+
+  console.info("Collected URLs from Brave", { count: normalized.length });
+
+  return {
+    urls: normalized,
+    _raw: data,
+  };
+}
+```
+
+Step 2: Scrape each URL to Markdown with Steel
+----------------------------------------------
+
+- For each URL, POST to Steel’s scrape endpoint.
+- Request Markdown by setting `format: ["markdown"]`.
+- The response contains `content.markdown`, `links`, and metadata.
+
+```typescript
+export async function scrapeUrlToMarkdown(url: string): Promise<ScrapeResult> {
+  const endpoint = config.steel.scrapeEndpoint;
+
+  const body: SteelScrapeRequest = {
+    url,
+    format: ["markdown"],
+  };
+
+  const res = await fetchWithTimeout(endpoint, {
+    method: "POST",
+    headers: {
+      "Steel-Api-Key": config.steel.apiKey,
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify(body),
+  });
+
+  if (!res.ok) {
+    const text = await res.text().catch(() => "");
+    console.error("Steel.dev scrape failed", {
+      status: res.status,
+      statusText: res.statusText,
+      url,
+      response: text?.slice(0, 1000),
+    });
+    throw new Error(
+      `Steel.dev scrape failed for ${url}: ${res.status} ${res.statusText}`,
+    );
+  }
+
+  const payload = (await res.json()) as SteelScrapeResponse;
+  const markdown = payload?.content?.markdown;
+  const links = payload?.links;
+
+  if (!markdown) {
+    console.warn("Steel.dev response did not include recognizable markdown", {
+      url,
+      payload: JSON.stringify(payload).slice(0, 1000),
+    });
+    throw new Error(`Steel.dev response missing markdown content for ${url}`);
+  }
+
+  return { url, markdown, links };
+}
+```
+
+Step 3: Synthesize an answer with inline citations
+--------------------------------------------------
+
+- Build a context that enumerates materials like `[1] URL`, then the Markdown.
+- Prompt the model to cite with `[n]` as it writes.
+- Return an answer plus a `sources` array mapping `[n] -> url`.
+
+```typescript
+export async function synthesizeWithCitations(
+  input: SynthesisInput,
+): Promise<SynthesisOutput> {
+  // Build context block
+  const contextHeader =
+    "Context materials (each item shows [index] and URL, followed by markdown content)";
+  const contextLines: string[] = [contextHeader];
+  input.materials.forEach((m, i) => {
+    const idx = i + 1;
+    contextLines.push(`\n[${idx}] ${m.url}\n---\n${m.markdown}\n`);
+  });
+
+  const system = ` <goal> You are Perplexity, ... </output>`;
+
+  const user = [`User query: ${input.query}`, "", contextLines.join("\n")].join(
+    "\n",
+  );
+
+  const completion = await openai.chat.completions.create({
+    model: config.openai.model,
+    messages: [
+      { role: "system", content: system },
+      { role: "user", content: user },
+    ],
+  });
+
+  const answer = completion.choices?.[0]?.message?.content?.trim() ?? "";
+
+  // Collect sources in index order for convenience
+  const sources = input.materials.map((m, i) => ({ index: i + 1, url: m.url }));
+
+  console.info("Synthesis complete", {
+    answerPreview: answer.slice(0, 160),
+  });
+
+  return {
+    answer,
+    sources,
+    _raw: completion,
+  };
+}
+```
+
+Run and interpret the output
+----------------------------
+
+After `npm start`, the script logs a JSON result like:
+
+```json
+{
+  "query": "What are the latest improvements in WebAssembly and their benefits?",
+  "answer": "Recent WebAssembly updates improved component model support and tooling, enabling easier interop and faster iterations.[1][2] These changes reduce bundle size, improve portability, and speed up non‑JS language performance across platforms.[2][3]",
+  "citations": [
+    { "index": 1, "url": "https://example.com/article-1" },
+    { "index": 2, "url": "https://example.com/article-2" },
+    { "index": 3, "url": "https://example.com/article-3" }
+  ],
+  "model": "gpt-5-nano",
+  "meta": { "tookMs": 12345 }
+}
+```
+
+Tuning and tips
+---------------
+
+- Expand coverage
+  - Increase `SEARCH_TOP_K` to retrieve and scrape more URLs.
+  - `CONCURRENCY` controls how many pages you scrape at once.
+
+- Respect rate limits
+  - Steel Hobby plan allows ~20 requests/min. To add an actual delay between scrapes, replace the no-op `setTimeout(() => {}, config.steel.timeout)` with an awaited delay:
+```typescript
+// Replace this in scrapeUrlsToMarkdown's worker loop:
+await new Promise((r) => setTimeout(r, config.steel.timeout));
+```
+
+- Timeouts
+  - `REQUEST_TIMEOUT_MS` applies to both Brave and Steel requests.
+
+- Models
+  - Use `OPENAI_MODEL` to choose a cost-effective model for both query generation and synthesis.
+
+- Debugging
+  - The code returns `_raw` payloads in some helpers to aid troubleshooting.
+  - Log the ranked URL list before scraping if you need to inspect relevance.
+
+Example project
+---------------
+
+- GitHub: https://github.com/steel-dev/steel-cookbook/tree/main/examples/steel-perplexity-clone
+
+What to customize next
+----------------------
+
+- Swap Brave for another Search API if you prefer
+- Add caching for search and scrapes
+- Stream synthesis tokens for a live UI
+- Persist answers and materials to a database
+- Filter sources by domain whitelist/blacklist
+
+Support
+-------
+
+- Steel Documentation: https://docs.steel.dev
+- API Reference: https://docs.steel.dev/api-reference
+- Discord Community: https://discord.gg/steel-dev
diff --git a/content/docs/overview/guides/playwright-node.mdx b/content/docs/overview/guides/playwright-node.mdx
index e764acf9..9e8252c8 100644
--- a/content/docs/overview/guides/playwright-node.mdx
+++ b/content/docs/overview/guides/playwright-node.mdx
@@ -11,7 +11,7 @@ Steel sessions are designed to be easily driven by Playwright. There are two mai
 
 
 
-**Quick Start:** Want to jump right in? [Skip to example project](https://docs.steel.dev/overview/guides/connect-with-playwright-node#example-project-scraping-hacker-news).
+**Quick Start:** Want to jump right in? [Skip to example project](https://docs.steel.dev/overview/guides/playwright-node#example-project-scraping-hacker-news).
 
 Method #1: One-line change (_easiest)_
 --------------------------------------

From 343dad07777481af3e84cc413d8d9c29a2033463 Mon Sep 17 00:00:00 2001
From: aspectrr <collin@steelbrowser.com>
Date: Wed, 19 Nov 2025 10:19:47 -0500
Subject: [PATCH 2/4] fix: update .env

---
 content/docs/overview/guides/perplexity.mdx | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/content/docs/overview/guides/perplexity.mdx b/content/docs/overview/guides/perplexity.mdx
index c9cfe660..138b1a82 100644
--- a/content/docs/overview/guides/perplexity.mdx
+++ b/content/docs/overview/guides/perplexity.mdx
@@ -62,7 +62,6 @@ NODE_ENV=development
 OPENAI_API_KEY=sk-...
 OPENAI_ORG_ID=
 OPENAI_MODEL=gpt-5-nano
-OPENAI_ENABLE_WEB_SEARCH=true
 
 # Steel.dev
 STEEL_API_KEY=steel_...
@@ -78,7 +77,7 @@ BRAVE_SEARCH_LANG=en
 BRAVE_SAFESEARCH=moderate
 
 # Search behavior
-SEARCH_TOP_K=3
+SEARCH_TOP_K=10
 REQUEST_TIMEOUT_MS=30000
 CONCURRENCY=2
 

From 5d867b75e71e6726306d49b3e112e78ce30bb0d6 Mon Sep 17 00:00:00 2001
From: aspectrr <collin@steelbrowser.com>
Date: Tue, 2 Dec 2025 08:41:23 -0500
Subject: [PATCH 3/4] feat: updated perplexity docs

---
 content/docs/overview/guides/perplexity.mdx | 304 +++++++-------------
 1 file changed, 107 insertions(+), 197 deletions(-)

diff --git a/content/docs/overview/guides/perplexity.mdx b/content/docs/overview/guides/perplexity.mdx
index 138b1a82..e4f71617 100644
--- a/content/docs/overview/guides/perplexity.mdx
+++ b/content/docs/overview/guides/perplexity.mdx
@@ -6,7 +6,6 @@ llm: true
 ---
 
 This guide shows you how to build a Perplexity-like research workflow in Node.js/TypeScript that:
-- Generates targeted search queries with OpenAI
 - Finds relevant links with the Brave Search API
 - Scrapes those links to Markdown via Steel’s /v1/scrape endpoint
 - Synthesizes a well-cited answer with inline citations
@@ -66,8 +65,6 @@ OPENAI_MODEL=gpt-5-nano
 # Steel.dev
 STEEL_API_KEY=steel_...
 STEEL_SCRAPE_ENDPOINT=https://api.steel.dev/v1/scrape
-# Optional pacing between scrape requests (ms), useful for rate limits
-STEEL_TIMEOUT=3000
 
 # Brave Search
 BRAVE_API_KEY=brv_...
@@ -78,8 +75,8 @@ BRAVE_SAFESEARCH=moderate
 
 # Search behavior
 SEARCH_TOP_K=10
-REQUEST_TIMEOUT_MS=30000
-CONCURRENCY=2
+REQUEST_TIMEOUT_MS=5000
+CONCURRENCY=5
 
 # Your question to research
 QUERY="What are the latest improvements in WebAssembly and their benefits?"
@@ -137,11 +134,7 @@ async function main() {
   });
 
   // 1) Use Brave to get top relevant URLs (do double to get more relevant results to search)
-  const { urls } = await multiQueryBraveSearch(query, topK * 2);
-  // const searchRes = await searchTopRelevantUrls(query, requestedTopK * 2);
-  // const urls = (searchRes.urls || []).slice(0, requestedTopK * 2);
-
-  // console.log(urls, urls.length);
+  const { urls } = await singleQueryBraveSearch(query, topK * 2);
 
   if (urls.length === 0) {
     return console.error("No URLs found for the given query.");
@@ -192,141 +185,40 @@ Step 1: Generate richer search coverage
 - Results are aggregated and ranked by frequency and reciprocal rank.
 
 ```typescript
-export async function multiQueryBraveSearch(
+export async function singleQueryBraveSearch(
   userQuery: string,
   topKPerQuery = config.search.topK,
 ): Promise<MultiQuerySearchResult> {
-  // 1) Ask OpenAI to produce exactly 3 queries as strict JSON.
-  const prompt = [
-    "You are a search strategist.",
-    "Given the user's query, generate exactly 3 search queries that maximize the likelihood of finding relevant, recent, and factual information.",
-    "Avoid generic questions; use specific keywords.",
-    "",
-    "Return strict JSON with this shape:",
-    '{ "queries": ["...", "...", "..."] }',
-    "",
-    `User query: ${userQuery}`,
-  ].join("\n");
+  const spinner = ora("Searching...").start();
+  const normalizedQuery = userQuery.trim() || userQuery;
+  const queries = [normalizedQuery];
 
-  const completion = await openai.chat.completions.create({
-    model: config.openai.model,
-    messages: [
-      { role: "system", content: "You produce JSON only. No prose." },
-      { role: "user", content: prompt },
-    ],
-  });
-
-  const rawContent =
-    completion.choices?.[0]?.message?.content?.trim() ?? '{"queries": []}';
-
-  let queries: string[] = [];
   try {
-    const parsed = JSON.parse(rawContent);
-    if (Array.isArray(parsed?.queries)) {
-      queries = parsed.queries.map((q: unknown) =>
-        typeof q === "string" ? q.trim() : "",
-      );
-    }
-  } catch {
-    // Fallback: split lines
-    queries = rawContent
-      .split("\n")
-      .map((l) => l.replace(/^[-*\d.)\s]+/, "").trim())
-      .filter(Boolean)
-      .slice(0, 3);
-  }
-
-  // Ensure exactly 3 queries, fall back to the original user query variations if needed
-  queries = Array.from(
-    new Set(
-      queries
-        .filter(Boolean)
-        .map((q) => q.replace(/\s+/g, " ").trim())
-        .slice(0, 3),
-    ),
-  );
-  while (queries.length < 3) {
-    if (queries.length === 0) queries.push(userQuery);
-    else queries.push(`${userQuery} ${queries.length + 1}`);
-  }
-  queries = queries.slice(0, 3);
-
-  console.info("Generated queries", { queries });
-
-  // 2) For each query, call Brave Search with a 1s delay between calls.
-  const perQueryUrls: string[][] = [];
-  for (let i = 0; i < queries.length; i++) {
-    const q = queries[i];
-    if (q == null) {
-      perQueryUrls.push([]);
-      continue;
-    }
-    if (i > 0) {
-      await new Promise((r) => setTimeout(r, 1000));
-    }
-    try {
-      const { urls } = await searchTopRelevantUrls(
-        q,
-        topKPerQuery ?? config.search.topK,
-      );
-      perQueryUrls.push(urls);
-    } catch (err) {
-      console.warn("Brave search failed for generated query", {
-        query: q,
-        err: (err as Error)?.message,
-      });
-      perQueryUrls.push([]);
-    }
-  }
-
-  // 3) Rank aggregation: reciprocal rank sum + frequency and best rank tiebreakers
-  type Acc = {
-    score: number;
-    occurrences: number;
-    ranks: number[];
-  };
-  const scores = new Map<string, Acc>();
-
-  perQueryUrls.forEach((urls) => {
-    urls.forEach((u, idx) => {
-      const url = u.trim();
-      if (!url) return;
-      const rank = idx + 1; // 1-based
-      const inc = 1 / rank; // reciprocal rank
-      const prev = scores.get(url) ?? { score: 0, occurrences: 0, ranks: [] };
-      prev.score += inc;
-      prev.occurrences += 1;
-      prev.ranks.push(rank);
-      scores.set(url, prev);
-    });
-  });
+    const { urls } = await searchTopRelevantUrls(
+      normalizedQuery,
+      topKPerQuery ?? config.search.topK,
+    );
 
-  // 4) Deduplicate and sort
-  const ranked: RankedUrl[] = Array.from(scores.entries())
-    .map(([url, acc]) => ({
-      url,
-      score: acc.score,
-      occurrences: acc.occurrences,
-      ranks: acc.ranks.sort((a, b) => a - b),
-    }))
-    .sort((a, b) => {
-      if (b.score !== a.score) return b.score - a.score; // primary: score
-      if (b.occurrences !== a.occurrences) return b.occurrences - a.occurrences; // secondary: frequency
-      // tertiary: best (lowest) rank
-      const aBest = a.ranks[0] ?? Number.POSITIVE_INFINITY;
-      const bBest = b.ranks[0] ?? Number.POSITIVE_INFINITY;
-      return aBest - bBest;
+    spinner.succeed("Search complete");
+
+    return {
+      queries,
+      urls,
+      _raw: { perQueryUrls: [urls] },
+    };
+  } catch (err) {
+    spinner.fail("Search failed");
+    console.warn("Brave search failed for query", {
+      query: normalizedQuery,
+      err: (err as Error)?.message,
     });
 
-  console.info("Ranked URLs across multi-query search", {
-    unique: ranked.length,
-  });
-
-  return {
-    queries,
-    urls: ranked.map((url) => url.url),
-    _raw: { openai: completion, perQueryUrls },
-  };
+    return {
+      queries,
+      urls: [],
+      _raw: { error: err },
+    };
+  }
 }
 ```
 
@@ -411,49 +303,31 @@ Step 2: Scrape each URL to Markdown with Steel
 - The response contains `content.markdown`, `links`, and metadata.
 
 ```typescript
-export async function scrapeUrlToMarkdown(url: string): Promise<ScrapeResult> {
-  const endpoint = config.steel.scrapeEndpoint;
-
-  const body: SteelScrapeRequest = {
-    url,
-    format: ["markdown"],
-  };
-
-  const res = await fetchWithTimeout(endpoint, {
-    method: "POST",
-    headers: {
-      "Steel-Api-Key": config.steel.apiKey,
-      "Content-Type": "application/json",
-    },
-    body: JSON.stringify(body),
-  });
+export async function scrapeUrlToMarkdown(
+  url: string,
+): Promise<ScrapeResult | null> {
+  try {
+    const client = new Steel({
+      steelAPIKey: config.steel.apiKey,
+      timeout: config.requestTimeoutMs,
+    });
 
-  if (!res.ok) {
-    const text = await res.text().catch(() => "");
-    console.error("Steel.dev scrape failed", {
-      status: res.status,
-      statusText: res.statusText,
+    const res = await client.scrape({
       url,
-      response: text?.slice(0, 1000),
+      format: ["markdown"],
     });
-    throw new Error(
-      `Steel.dev scrape failed for ${url}: ${res.status} ${res.statusText}`,
-    );
-  }
 
-  const payload = (await res.json()) as SteelScrapeResponse;
-  const markdown = payload?.content?.markdown;
-  const links = payload?.links;
+    const markdown = res?.content?.markdown;
+    const links = res?.links;
 
-  if (!markdown) {
-    console.warn("Steel.dev response did not include recognizable markdown", {
-      url,
-      payload: JSON.stringify(payload).slice(0, 1000),
-    });
-    throw new Error(`Steel.dev response missing markdown content for ${url}`);
-  }
+    if (!markdown) {
+      throw new Error(`Steel.dev response missing markdown content for ${url}`);
+    }
 
-  return { url, markdown, links };
+    return { url, markdown, links };
+  } catch {
+    return null;
+  }
 }
 ```
 
@@ -468,6 +342,7 @@ Step 3: Synthesize an answer with inline citations
 export async function synthesizeWithCitations(
   input: SynthesisInput,
 ): Promise<SynthesisOutput> {
+  const spinner = ora("Synthesizing answer...").start();
   // Build context block
   const contextHeader =
     "Context materials (each item shows [index] and URL, followed by markdown content)";
@@ -477,11 +352,39 @@ export async function synthesizeWithCitations(
     contextLines.push(`\n[${idx}] ${m.url}\n---\n${m.markdown}\n`);
   });
 
-  const system = ` <goal> You are Perplexity, ... </output>`;
+  const now = new Date();
+
+  // Day of week, month, day, year
+  const dateFormatter = new Intl.DateTimeFormat("en-NZ", {
+    weekday: "long",
+    month: "long",
+    day: "2-digit",
+    year: "numeric",
+    timeZone: "Pacific/Auckland",
+  });
+
+  // Time with hour + timezone abbreviation
+  const timeFormatter = new Intl.DateTimeFormat("en-NZ", {
+    hour: "numeric",
+    minute: "2-digit",
+    hour12: true,
+    timeZone: "Pacific/Auckland",
+    timeZoneName: "short", // gives "NZDT"
+  });
+
+  const dateStr = dateFormatter.format(now);
+  const timeStr = timeFormatter.format(now);
+
+  // Combine + remove the minutes (":00") if you want "7 PM" instead of "7:00 PM"
+  const final = `${dateStr}, ${timeStr.replace(/:00/, "")}`;
+
+  const system = `<goal> You are ...`
 
   const user = [`User query: ${input.query}`, "", contextLines.join("\n")].join(
     "\n",
   );
+  let answer = "";
+  let started = false;
 
   const completion = await openai.chat.completions.create({
     model: config.openai.model,
@@ -489,15 +392,28 @@ export async function synthesizeWithCitations(
       { role: "system", content: system },
       { role: "user", content: user },
     ],
+    stream: true,
   });
 
-  const answer = completion.choices?.[0]?.message?.content?.trim() ?? "";
+  for await (const chunk of completion) {
+    const content = chunk.choices[0]?.delta?.content;
+    if (content) {
+      if (!started) {
+        started = true;
+        spinner.succeed("Answer synthesized");
+        process.stdout.write("\n");
+      }
+      answer += content;
+      process.stdout.write(content);
+    }
+  }
 
   // Collect sources in index order for convenience
   const sources = input.materials.map((m, i) => ({ index: i + 1, url: m.url }));
 
-  console.info("Synthesis complete", {
-    answerPreview: answer.slice(0, 160),
+  console.log("\n\nSources:");
+  sources.forEach((source) => {
+    console.log(`[${source.index}] ${source.url}`);
   });
 
   return {
@@ -511,20 +427,17 @@ export async function synthesizeWithCitations(
 Run and interpret the output
 ----------------------------
 
-After `npm start`, the script logs a JSON result like:
-
-```json
-{
-  "query": "What are the latest improvements in WebAssembly and their benefits?",
-  "answer": "Recent WebAssembly updates improved component model support and tooling, enabling easier interop and faster iterations.[1][2] These changes reduce bundle size, improve portability, and speed up non‑JS language performance across platforms.[2][3]",
-  "citations": [
-    { "index": 1, "url": "https://example.com/article-1" },
-    { "index": 2, "url": "https://example.com/article-2" },
-    { "index": 3, "url": "https://example.com/article-3" }
-  ],
-  "model": "gpt-5-nano",
-  "meta": { "tookMs": 12345 }
-}
+After `npm start`, the script logs the result step-by-step:
+
+```bash
+✔ Search complete
+✔ Scraping complete
+✔ Answer synthesized
+
+Prediction markets offer a practical way to hedge specific risks and to add liquidity to broader market positions by turning uncertain outcomes into tradable, cash-settled contracts. Their price signals aggregate diverse information in real time, creating hedging tools and a more liquid trading environment than many traditional markets. [1]
+...
+
+## How prediction markets create hedging opportunities
 ```
 
 Tuning and tips
@@ -535,11 +448,8 @@ Tuning and tips
   - `CONCURRENCY` controls how many pages you scrape at once.
 
 - Respect rate limits
-  - Steel Hobby plan allows ~20 requests/min. To add an actual delay between scrapes, replace the no-op `setTimeout(() => {}, config.steel.timeout)` with an awaited delay:
-```typescript
-// Replace this in scrapeUrlsToMarkdown's worker loop:
-await new Promise((r) => setTimeout(r, config.steel.timeout));
-```
+- Steel Hobby plan allows ~20 requests/min. To add an actual delay between scrapes, replace the no-op `setTimeout(() => {}, config.steel.timeout)` with an awaited delay:
+
 
 - Timeouts
   - `REQUEST_TIMEOUT_MS` applies to both Brave and Steel requests.

From f46e86660c64143297fdfccf0847d409d76ea406 Mon Sep 17 00:00:00 2001
From: aspectrr <collin@steelbrowser.com>
Date: Wed, 3 Dec 2025 07:13:57 -0500
Subject: [PATCH 4/4] fix: update perplexity docs

---
 content/docs/overview/guides/perplexity.mdx | 62 ++++++++-------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/content/docs/overview/guides/perplexity.mdx b/content/docs/overview/guides/perplexity.mdx
index e4f71617..876c4e68 100644
--- a/content/docs/overview/guides/perplexity.mdx
+++ b/content/docs/overview/guides/perplexity.mdx
@@ -64,7 +64,6 @@ OPENAI_MODEL=gpt-5-nano
 
 # Steel.dev
 STEEL_API_KEY=steel_...
-STEEL_SCRAPE_ENDPOINT=https://api.steel.dev/v1/scrape
 
 # Brave Search
 BRAVE_API_KEY=brv_...
@@ -87,23 +86,18 @@ What this example does
 
 At a high level:
 
-1) Generate multiple targeted queries for better coverage
-- Uses OpenAI to turn the user query into 3 high‑signal search queries
+1. Search Brave for relevant URLs
 
-2) Search and rank URLs with Brave
-- Calls Brave’s Web Search API for each generated query
-- Aggregates and ranks URLs using a reciprocal-rank strategy
+2. Scrape sources to Markdown with Steel
+    - Sends each URL to Steel’s `/v1/scrape` to obtain clean Markdown
 
-3) Scrape sources to Markdown with Steel
-- Sends each URL to Steel’s `/v1/scrape` to obtain clean Markdown
-
-4) Synthesize a well‑cited answer with OpenAI
-- Builds a context block from scraped Markdown
-- Instructs the model to produce inline [n] citations, matching the material order
+3. Synthesize a well‑cited answer with OpenAI
+    - Builds a context block from scraped Markdown
+    - Instructs the model to produce inline [n] citations, matching the material order
 
 The core orchestration happens here:
 
-```typescript
+```typescript Typescript -wcn -f index.ts
 import { config } from "./config";
 import {
   scrapeUrlsToMarkdown,
@@ -177,14 +171,12 @@ main()
   });
 ```
 
-Step 1: Generate richer search coverage
+Step 1: Get relevant URLs
 ---------------------------------------
 
-- The example asks OpenAI to produce 3 specific queries that maximize recall and signal.
-- It then calls Brave Search for each query, pausing briefly between calls.
-- Results are aggregated and ranked by frequency and reciprocal rank.
+- The example calls the Brave API to recieve relevant URLs based on the user query
 
-```typescript
+```typescript Typescript -wcn
 export async function singleQueryBraveSearch(
   userQuery: string,
   topKPerQuery = config.search.topK,
@@ -224,7 +216,7 @@ export async function singleQueryBraveSearch(
 
 Under the hood, the Brave call itself looks like this:
 
-```typescript
+```typescript Typescript -wcn
 export async function searchTopRelevantUrls(
   query: string,
   topK = config.search.topK,
@@ -272,9 +264,6 @@ export async function searchTopRelevantUrls(
   }
 
   if (urls.length === 0) {
-    console.warn("No URLs returned from Brave, attempting salvage from raw", {
-      raw: JSON.stringify(data).slice(0, 1000),
-    });
     const rawText = JSON.stringify(data);
     const regex = /\bhttps?:\/\/[^\s"'<>]+/gi;
     const salvaged = (rawText.match(regex) ?? []) as string[];
@@ -286,11 +275,8 @@ export async function searchTopRelevantUrls(
     .filter(Boolean)
     .slice(0, topK);
 
-  console.info("Collected URLs from Brave", { count: normalized.length });
-
   return {
     urls: normalized,
-    _raw: data,
   };
 }
 ```
@@ -298,11 +284,11 @@ export async function searchTopRelevantUrls(
 Step 2: Scrape each URL to Markdown with Steel
 ----------------------------------------------
 
-- For each URL, POST to Steel’s scrape endpoint.
+- For each URL, make a request to Steel's `/v1/scrape` endpoint.
 - Request Markdown by setting `format: ["markdown"]`.
-- The response contains `content.markdown`, `links`, and metadata.
+- The response contains `content.markdown`, and metadata.
 
-```typescript
+```typescript Typescript -wcn
 export async function scrapeUrlToMarkdown(
   url: string,
 ): Promise<ScrapeResult | null> {
@@ -338,7 +324,7 @@ Step 3: Synthesize an answer with inline citations
 - Prompt the model to cite with `[n]` as it writes.
 - Return an answer plus a `sources` array mapping `[n] -> url`.
 
-```typescript
+```typescript Typescript -wcn
 export async function synthesizeWithCitations(
   input: SynthesisInput,
 ): Promise<SynthesisOutput> {
@@ -419,7 +405,6 @@ export async function synthesizeWithCitations(
   return {
     answer,
     sources,
-    _raw: completion,
   };
 }
 ```
@@ -429,15 +414,18 @@ Run and interpret the output
 
 After `npm start`, the script logs the result step-by-step:
 
-```bash
+```
 ✔ Search complete
 ✔ Scraping complete
 ✔ Answer synthesized
 
-Prediction markets offer a practical way to hedge specific risks and to add liquidity to broader market positions by turning uncertain outcomes into tradable, cash-settled contracts. Their price signals aggregate diverse information in real time, creating hedging tools and a more liquid trading environment than many traditional markets. [1]
-...
-
-## How prediction markets create hedging opportunities
+## Prediction Markets
+Prediction markets offer a practical way to
+hedge specific risks and to add liquidity to broader
+market positions by turning uncertain outcomes into tradable,
+cash-settled contracts. Their price signals aggregate diverse
+information in real time, creating hedging tools and a more
+liquid trading environment than many traditional markets. [1] ...
 ```
 
 Tuning and tips
@@ -448,7 +436,7 @@ Tuning and tips
   - `CONCURRENCY` controls how many pages you scrape at once.
 
 - Respect rate limits
-- Steel Hobby plan allows ~20 requests/min. To add an actual delay between scrapes, replace the no-op `setTimeout(() => {}, config.steel.timeout)` with an awaited delay:
+- Steel Hobby plan allows ~20 requests/min.
 
 
 - Timeouts
@@ -458,7 +446,6 @@ Tuning and tips
   - Use `OPENAI_MODEL` to choose a cost-effective model for both query generation and synthesis.
 
 - Debugging
-  - The code returns `_raw` payloads in some helpers to aid troubleshooting.
   - Log the ranked URL list before scraping if you need to inspect relevance.
 
 Example project
@@ -471,7 +458,6 @@ What to customize next
 
 - Swap Brave for another Search API if you prefer
 - Add caching for search and scrapes
-- Stream synthesis tokens for a live UI
 - Persist answers and materials to a database
 - Filter sources by domain whitelist/blacklist