diff --git a/tools/media/README.md b/tools/media/README.md
new file mode 100644
index 0000000..9d1a795
--- /dev/null
+++ b/tools/media/README.md
@@ -0,0 +1,123 @@
+# Media Indexer
+
+A tool for building and maintaining a media index from DA Live's medialog and auditlog APIs.
+
+## Architecture
+
+The indexer is now modular for better maintainability and extensibility:
+
+```
+tools/media/
+├── indexer.html          # Entry point HTML
+├── indexer.css           # Styles
+├── indexer.js            # Main entry (initialization & auth)
+├── indexer-old.js        # Backup of monolithic version
+└── lib/
+    ├── config.js         # Configuration & state management
+    ├── api.js            # DA Admin API calls
+    ├── helpers.js        # Utility functions (normalizePath, isPage, etc.)
+    ├── builder.js        # Core index building logic
+    └── ui.js             # UI rendering & event handling
+```
+
+## Modules
+
+### `lib/config.js`
+- URL parameter parsing (org, repo, ref)
+- Global state management
+- Constants (DA_ADMIN, sitePath)
+
+### `lib/api.js`
+- `fetchWithAuth()` - Authenticated fetch wrapper
+- `daFetch()` - DA API fetch wrapper
+- `loadMeta()` - Load metadata from DA
+- `createSheet()` - Create sheet format for DA
+- `saveMeta()` - Save metadata to DA
+- `fetchFromAdminAPI()` - Fetch from auditlog/medialog with pagination
+
+### `lib/helpers.js`
+- `normalizePath()` - Normalize paths (add .md, remove query params)
+- `isPage()` - Detect if path is a page vs media file
+- `extractName()` - Extract filename from medialog entry
+- `detectMediaType()` - Detect media type from contentType
+
+### `lib/builder.js`
+- `getIndexStatus()` - Get current index metadata
+- `buildInitialIndex()` - Core indexing logic:
+  1. Fetch auditlog entries
+  2. Fetch medialog entries
+  3. Match media to pages (5-second time window)
+  4. Deduplicate by hash
+  5. Save index to DA
+
+### `lib/ui.js`
+- `render()` - Render UI with status, progress, logs, errors
+- `attachEventListeners()` - Handle button clicks
+
+### `indexer.js`
+- Main entry point
+- DA SDK authentication
+- Initialize UI
+
+## Index Schema
+
+Each entry in the media index:
+
+```javascript
+{
+  hash: "abc123",                    // Media hash (unique identifier)
+  pages: "/page1.md|/page2.md",     // Pipe-separated list of pages using this media
+  url: "https://.../media_abc.jpg",  // Full URL to media
+  name: "photo.jpg",                 // Filename (extracted from URL)
+  timestamp: 1771704070155,          // Latest usage timestamp
+  user: "user@example.com",          // User who uploaded/used it
+  operation: "reuse",                // Latest operation (ingest/reuse)
+  type: "img > jpeg",                // Media type (category > extension)
+  status: "referenced"               // Status (referenced/unused)
+}
+```
+
+## Indexing Rules
+
+- **Latest event only:** For each page, use only the latest auditlog event. Skip all others. Multiple events in a batch are sorted by timestamp; only the most recent determines the current page state.
+
+## Phase 1 (Current)
+
+✅ Media Bus items (images/videos) from medialog API
+✅ Deduplicated by hash
+✅ Pipe-separated pages for multi-page usage
+✅ Latest usage tracking
+
+## Phase 2 (Current)
+
+- Linked content (PDFs, SVGs, fragments) from auditlog
+- HTML parsing for usage detection (extractFragmentReferences, extractLinks)
+- Source: "auditlog-parsed"
+- Index entries: path, usedIn, timestamp, type, status
+
+## Phase 3 (Future)
+
+- Streaming architecture for large sites
+- Chunked processing
+- Memory optimization
+
+## Usage
+
+1. Open in browser: `https://main--repo--org.aem.page/tools/media/indexer.html?org=yourorg&repo=yourrepo`
+2. Authenticate with DA Live
+3. Click "Build Initial Index"
+4. Index saved to `/.da/mediaindex/media-index.json`
+
+## Development
+
+Run linting:
+```bash
+npm run lint:js
+npm run lint:css
+```
+
+Test locally:
+```bash
+npx @adobe/aem-cli up
+# Open http://localhost:3000/tools/media/indexer.html?org=yourorg&repo=yourrepo
+```
diff --git a/tools/media/indexer-old.js b/tools/media/indexer-old.js
new file mode 100644
index 0000000..1ca1414
--- /dev/null
+++ b/tools/media/indexer-old.js
@@ -0,0 +1,507 @@
+/* eslint-disable import/no-absolute-path, import/no-unresolved */
+/* The DA SDK is loaded from the da.live CDN and is required for authentication */
+import DA_SDK from 'https://da.live/nx/utils/sdk.js';
+
+// Parse URL parameters
+const params = new URLSearchParams(window.location.search);
+const org = params.get('org');
+const repo = params.get('repo') || params.get('site');
+const ref = 'main';
+const sitePath = `/${org}/${repo}`;
+
+const state = {
+  building: false,
+  progress: { stage: 'idle', message: '', percent: 0 },
+  errors: [],
+  logs: [],
+  status: null,
+  daToken: null,
+};
+
+async function fetchWithAuth(url, opts = {}) {
+  opts.headers ||= {};
+  if (state.daToken) {
+    opts.headers.Authorization = `Bearer ${state.daToken}`;
+  }
+  return fetch(url, opts);
+}
+
+const DA_ADMIN = 'https://admin.da.live';
+
+async function daFetch(url, opts = {}) {
+  opts.headers ||= {};
+  if (state.daToken) {
+    opts.headers.Authorization = `Bearer ${state.daToken}`;
+  }
+  return fetch(url, opts);
+}
+
+async function loadMeta(path) {
+  try {
+    const resp = await daFetch(`${DA_ADMIN}/source${path}`);
+    if (resp.ok) {
+      const data = await resp.json();
+      return data.data?.[0] || data;
+    }
+  } catch {
+    return null;
+  }
+  return null;
+}
+
+async function createSheet(data, type = 'sheet') {
+  const sheetMeta = {
+    total: data.length,
+    limit: data.length,
+    offset: 0,
+    data,
+    ':type': type,
+  };
+  const blob = new Blob([JSON.stringify(sheetMeta, null, 2)], { type: 'application/json' });
+  const formData = new FormData();
+  formData.append('data', blob);
+  return formData;
+}
+
+async function saveMeta(meta, path) {
+  const metaArray = Array.isArray(meta) ? meta : [meta];
+  const formData = await createSheet(metaArray);
+  return daFetch(`${DA_ADMIN}/source${path}`, {
+    method: 'POST',
+    body: formData,
+  });
+}
+
+function timestampToDuration(timestamp) {
+  if (!timestamp) return '90d';
+  const ageMs = Date.now() - timestamp;
+  const days = Math.ceil(ageMs / (24 * 60 * 60 * 1000));
+  if (days < 1) {
+    const hours = Math.ceil(ageMs / (60 * 60 * 1000));
+    return hours > 0 ? `${hours}h` : '1h';
+  }
+  return `${Math.min(days, 90)}d`;
+}
+
+async function fetchFromAdminAPI(endpoint, orgName, repoName, refName, since, limit, onPageLoaded) {
+  const fetchParams = new URLSearchParams();
+  fetchParams.append('limit', limit.toString());
+
+  const sinceDuration = since ? timestampToDuration(since) : '90d';
+  fetchParams.append('since', sinceDuration);
+
+  const baseUrl = `https://admin.hlx.page/${endpoint}/${orgName}/${repoName}/${refName}`;
+  const separator = endpoint === 'medialog' ? '/' : '';
+  const url = `${baseUrl}${separator}?${fetchParams.toString()}`;
+
+  const resp = await fetchWithAuth(url);
+
+  if (!resp.ok) {
+    throw new Error(`${endpoint} API error: ${resp.status} ${resp.statusText}`);
+  }
+
+  const data = await resp.json();
+  const entries = data.entries || data.data || [];
+  const { nextToken } = data;
+
+  if (onPageLoaded && entries.length > 0) {
+    onPageLoaded(entries, !!nextToken);
+  }
+
+  async function fetchNextPage(token) {
+    if (!token) return [];
+
+    fetchParams.set('nextToken', token);
+    const nextUrl = `${baseUrl}${separator}?${fetchParams.toString()}`;
+    const nextResp = await fetchWithAuth(nextUrl);
+
+    if (!nextResp.ok) return [];
+
+    const nextData = await nextResp.json();
+    const nextEntries = nextData.entries || nextData.data || [];
+
+    if (!nextEntries || nextEntries.length === 0) return [];
+
+    if (onPageLoaded) {
+      onPageLoaded([...entries, ...nextEntries], !!nextData.nextToken);
+    }
+
+    const remainingEntries = await fetchNextPage(nextData.nextToken);
+    return [...nextEntries, ...remainingEntries];
+  }
+
+  const additionalEntries = await fetchNextPage(nextToken);
+  return [...entries, ...additionalEntries];
+}
+
+/**
+ * Normalize a path by removing query params/fragments and adding .md for pages
+ * @param {string} path - The path to normalize
+ * @returns {string} Normalized path
+ */
+function normalizePath(path) {
+  if (!path) return '';
+  let cleanPath = path.split('?')[0].split('#')[0];
+  // Add .md for pages: /drafts/page -> /drafts/page.md
+  if (!cleanPath.includes('.') && !cleanPath.startsWith('/media/')) {
+    cleanPath = `${cleanPath}.md`;
+  }
+  return cleanPath;
+}
+
+/**
+ * Detect if a path represents a page (not a media file or fragment)
+ * @param {string} path - The path to check
+ * @returns {boolean} True if path is a page
+ */
+function isPage(path) {
+  if (!path || typeof path !== 'string') return false;
+  return (path.endsWith('.md')
+          || (!path.includes('.') && !path.startsWith('/media/')))
+         && !path.includes('/fragments/');
+}
+
+/**
+ * Extract the filename from a medialog entry
+ * @param {object} mediaEntry - The medialog entry
+ * @returns {string} The filename without query params or fragments
+ */
+function extractName(mediaEntry) {
+  if (!mediaEntry) return '';
+  if (mediaEntry.operation === 'ingest' && mediaEntry.originalFilename) {
+    return mediaEntry.originalFilename.split('/').pop();
+  }
+  if (!mediaEntry.path) return '';
+  // Remove query params (?...) and URL fragments (#...)
+  return mediaEntry.path.split('?')[0].split('#')[0].split('/').pop();
+}
+
+/**
+ * Detect media type from contentType in structured format
+ * @param {object} mediaEntry - The medialog entry
+ * @returns {string} Type in format "category > extension"
+ */
+function detectMediaType(mediaEntry) {
+  const contentType = mediaEntry.contentType || '';
+  if (contentType.startsWith('image/')) {
+    const ext = contentType.split('/')[1];
+    return `img > ${ext}`;
+  }
+  if (contentType.startsWith('video/')) {
+    const ext = contentType.split('/')[1];
+    return `video > ${ext}`;
+  }
+  return 'unknown';
+}
+
+async function getIndexStatus() {
+  const metaPath = `${sitePath}/.da/mediaindex/medialog-meta.json`;
+  const meta = await loadMeta(metaPath);
+
+  return {
+    lastRefresh: meta?.lastFetchTime || null,
+    entriesCount: meta?.entriesCount || 0,
+  };
+}
+
+async function buildInitialIndex(onProgress) {
+  const index = [];
+
+  // Phase 1: Fetch auditlog entries
+  onProgress({ stage: 'fetching', message: 'Fetching auditlog entries...', percent: 10 });
+
+  const auditlogEntries = await fetchFromAdminAPI('log', org, repo, ref, null, 1000, (entries, hasMore) => {
+    onProgress({
+      stage: 'fetching',
+      message: `Fetched ${entries.length} auditlog entries${hasMore ? ' (more available)' : ''}...`,
+      percent: 20,
+    });
+  });
+
+  // Separate pages from files (filter out entries with invalid paths)
+  const validEntries = auditlogEntries.filter((e) => e && e.path);
+  const pages = validEntries.filter((e) => isPage(e.path));
+  const files = validEntries.filter((e) => !isPage(e.path));
+
+  onProgress({
+    stage: 'fetching',
+    message: `Identified ${pages.length} pages and ${files.length} files from auditlog`,
+    percent: 30,
+  });
+
+  // Phase 2: Fetch medialog entries
+  onProgress({ stage: 'fetching', message: 'Fetching medialog entries...', percent: 40 });
+
+  const medialogEntries = await fetchFromAdminAPI('medialog', org, repo, ref, null, 1000, (entries, hasMore) => {
+    onProgress({
+      stage: 'fetching',
+      message: `Fetched ${entries.length} medialog entries${hasMore ? ' (more available)' : ''}...`,
+      percent: 50,
+    });
+  });
+
+  onProgress({
+    stage: 'processing',
+    message: `Processing ${pages.length} pages with ${medialogEntries.length} medialog entries...`,
+    percent: 60,
+  });
+
+  // Phase 3: Build hash map (deduplicate by hash, track all pages)
+  const hashMap = new Map();
+
+  // Process page-referenced media
+  pages.forEach((pageEvent) => {
+    const normalizedPath = normalizePath(pageEvent.path);
+
+    // Find matching medialog entries within 5-second time window
+    const pageMedia = medialogEntries.filter((m) => {
+      if (!m.resourcePath) return false;
+      if (m.resourcePath !== normalizedPath) return false;
+
+      const TIME_WINDOW_MS = 5000;
+      return m.timestamp >= pageEvent.timestamp
+             && m.timestamp < pageEvent.timestamp + TIME_WINDOW_MS;
+    });
+
+    // Add to hash map
+    pageMedia.forEach((media) => {
+      const hash = media.mediaHash;
+      if (!hashMap.has(hash)) {
+        // First time seeing this hash - initialize entry
+        hashMap.set(hash, {
+          hash,
+          pages: new Set([normalizedPath]),
+          url: media.path,
+          name: extractName(media),
+          timestamp: media.timestamp,
+          user: media.user,
+          operation: media.operation,
+          type: detectMediaType(media),
+          status: 'referenced',
+        });
+      } else {
+        // Hash exists - update with latest info
+        const entry = hashMap.get(hash);
+        entry.pages.add(normalizedPath);
+
+        // Keep latest timestamp (since logs are sorted newest first)
+        if (media.timestamp > entry.timestamp) {
+          entry.timestamp = media.timestamp;
+          entry.operation = media.operation;
+        }
+      }
+    });
+  });
+
+  onProgress({
+    stage: 'processing',
+    message: `Processed ${pages.length} pages, found ${hashMap.size} unique media items`,
+    percent: 70,
+  });
+
+  // Phase 4: Process standalone uploads (not on any page yet)
+  const standaloneUploads = medialogEntries.filter((m) => !m.resourcePath && m.originalFilename);
+
+  standaloneUploads.forEach((media) => {
+    const hash = media.mediaHash;
+    if (!hashMap.has(hash)) {
+      // Only add if not already referenced on a page
+      hashMap.set(hash, {
+        hash,
+        pages: new Set(),
+        url: media.path,
+        name: media.originalFilename.split('/').pop(),
+        timestamp: media.timestamp,
+        user: media.user,
+        operation: media.operation,
+        type: detectMediaType(media),
+        status: 'unused',
+      });
+    }
+  });
+
+  onProgress({
+    stage: 'processing',
+    message: `Added ${standaloneUploads.length} standalone uploads, total unique: ${hashMap.size}`,
+    percent: 80,
+  });
+
+  // Convert Map to array with pipe-separated pages
+  hashMap.forEach((entry) => {
+    index.push({
+      hash: entry.hash,
+      pages: Array.from(entry.pages).join('|'),
+      url: entry.url,
+      name: entry.name,
+      timestamp: entry.timestamp,
+      user: entry.user,
+      operation: entry.operation,
+      type: entry.type,
+      status: entry.status,
+    });
+  });
+
+  // Phase 5: Save index
+  onProgress({ stage: 'saving', message: `Saving ${index.length} entries...`, percent: 90 });
+
+  const indexPath = `${sitePath}/.da/mediaindex/media-index.json`;
+  const formData = await createSheet(index);
+  await daFetch(`${DA_ADMIN}/source${indexPath}`, {
+    method: 'POST',
+    body: formData,
+  });
+
+  await saveMeta({
+    lastFetchTime: Date.now(),
+    entriesCount: index.length,
+    lastRefreshBy: 'media-indexer',
+  }, `${sitePath}/.da/mediaindex/medialog-meta.json`);
+
+  onProgress({ stage: 'complete', message: `Complete! ${index.length} entries indexed`, percent: 100 });
+
+  return { entriesCount: index.length };
+}
+
+function render() {
+  const app = document.getElementById('app');
+
+  const statusHtml = state.status ? `
+    <div class="status-panel">
+      <h2>Current Index Status</h2>
+      <div class="status-grid">
+        <div class="status-item">
+          <label>Last Refresh:</label>
+          <span>${state.status.lastRefresh ? new Date(state.status.lastRefresh).toLocaleString() : 'Never'}</span>
+        </div>
+        <div class="status-item">
+          <label>Total Entries:</label>
+          <span>${state.status.entriesCount || 0}</span>
+        </div>
+      </div>
+    </div>
+  ` : '<div class="status-loading">Checking status...</div>';
+
+  const progressHtml = state.building || state.progress.stage !== 'idle' ? `
+    <div class="progress-section">
+      <h2>Progress</h2>
+      <div class="progress-bar">
+        <div class="progress-fill" style="width: ${state.progress.percent}%"></div>
+      </div>
+      <div class="progress-info">
+        <span class="progress-stage">${state.progress.stage}</span>
+        <span class="progress-message">${state.progress.message}</span>
+      </div>
+    </div>
+  ` : '';
+
+  const logsHtml = state.logs.length > 0 ? `
+    <div class="logs-section">
+      <h3>Logs (${state.logs.length})</h3>
+      <ul class="logs-list">
+        ${state.logs.map((log) => `<li class="log-${log.type}">${log.message}</li>`).join('')}
+      </ul>
+    </div>
+  ` : '';
+
+  const errorsHtml = state.errors.length > 0 ? `
+    <div class="errors-section">
+      <h3>Errors (${state.errors.length})</h3>
+      <ul class="errors-list">
+        ${state.errors.map((err) => `<li>${err.message}</li>`).join('')}
+      </ul>
+    </div>
+  ` : '';
+
+  app.innerHTML = `
+    <h1>Media Index Builder</h1>
+    <p>Building index for: <strong>${org}/${repo}</strong></p>
+
+    ${statusHtml}
+
+    <div class="actions">
+      <button id="buildBtn" class="btn-primary" ${state.building ? 'disabled' : ''}>
+        ${state.building ? 'Building Index...' : 'Build Initial Index'}
+      </button>
+    </div>
+
+    ${progressHtml}
+    ${errorsHtml}
+    ${logsHtml}
+  `;
+}
+
+function attachEventListeners() {
+  if (!state.building) {
+    const buildBtn = document.getElementById('buildBtn');
+    if (buildBtn) {
+      buildBtn.addEventListener('click', () => {
+        state.building = true;
+        state.errors = [];
+        state.logs = [];
+        state.progress = { stage: 'starting', message: 'Starting build...', percent: 0 };
+        render();
+
+        buildInitialIndex((progress) => {
+          state.progress = progress;
+          state.logs.push({ message: progress.message, type: 'info' });
+          render();
+        })
+          .then((result) => {
+            state.logs.push({ message: `Index built successfully: ${result.entriesCount} entries`, type: 'success' });
+            return getIndexStatus();
+          })
+          .then((status) => {
+            state.status = status;
+          })
+          .catch((error) => {
+            state.errors.push({ message: error.message });
+            state.logs.push({ message: `Error: ${error.message}`, type: 'error' });
+            state.progress = { stage: 'error', message: error.message, percent: 0 };
+          })
+          .finally(() => {
+            state.building = false;
+            render();
+            attachEventListeners();
+          });
+      });
+    }
+  }
+}
+
+async function init() {
+  if (!org || !repo) {
+    document.getElementById('app').innerHTML = `
+      <div class="error">
+        <h1>Missing Parameters</h1>
+        <p>Please provide org and repo parameters in the URL:</p>
+        <pre>?org=yourorg&repo=yourrepo</pre>
+      </div>
+    `;
+    return;
+  }
+
+  // Get DA token with timeout
+  try {
+    const tokenPromise = DA_SDK;
+    const timeoutPromise = new Promise((_, reject) => {
+      setTimeout(() => reject(new Error('Authentication timeout')), 5000);
+    });
+
+    const result = await Promise.race([tokenPromise, timeoutPromise]);
+    state.daToken = result?.token;
+  } catch (error) {
+    state.errors.push({ message: `Failed to get DA token: ${error.message}` });
+  }
+
+  if (!state.daToken) {
+    const returnUrl = encodeURIComponent(window.location.href);
+    window.location.href = `https://da.live/?returnUrl=${returnUrl}`;
+    return;
+  }
+
+  state.status = await getIndexStatus();
+  render();
+  attachEventListeners();
+}
+
+init();
diff --git a/tools/media/indexer.css b/tools/media/indexer.css
new file mode 100644
index 0000000..36a1647
--- /dev/null
+++ b/tools/media/indexer.css
@@ -0,0 +1,315 @@
+/* ========== Base & Variables ========== */
+
+:root {
+  /* Gray Scale */
+  --s2-gray-50: #f9fafb;
+  --s2-gray-100: #f3f4f6;
+  --s2-gray-200: #e5e7eb;
+  --s2-gray-300: #d1d5db;
+  --s2-gray-400: #9ca3af;
+  --s2-gray-500: #6b7280;
+  --s2-gray-600: #4b5563;
+  --s2-gray-700: #374151;
+  --s2-gray-900: #111827;
+
+  /* Blue Scale */
+  --s2-blue-50: #eff6ff;
+  --s2-blue-100: #dbeafe;
+  --s2-blue-200: #bfdbfe;
+  --s2-blue-300: #93c5fd;
+  --s2-blue-500: #3b82f6;
+  --s2-blue-600: #2563eb;
+  --s2-blue-700: #1d4ed8;
+  --s2-blue-900: #1e3a8a;
+
+  /* Green Scale */
+  --s2-green-100: rgb(215 247 225);
+  --s2-green-900: #065f46;
+
+  /* Red Scale */
+  --s2-red-100: rgb(255 214 209);
+  --s2-red-700: #991b1b;
+
+  /* Spacing */
+  --spacing-100: 4px;
+  --spacing-200: 8px;
+  --spacing-300: 12px;
+  --spacing-400: 16px;
+  --spacing-500: 24px;
+  --spacing-600: 32px;
+  --spacing-700: 40px;
+
+  /* Border Radius */
+  --s2-radius-100: 4px;
+  --s2-radius-200: 8px;
+  --s2-radius-300: 18px;
+
+  /* Typography */
+  --body-font-family: 'Adobe Clean', adobe-clean, 'Trebuchet MS', sans-serif;
+  --mono-font-family: 'Roboto Mono', menlo, consolas, 'Liberation Mono', monospace;
+  --s2-font-size-200: 14px;
+  --s2-font-size-300: 16px;
+  --s2-font-size-400: 16px;
+  --s2-font-size-600: 24px;
+  --s2-font-size-700: 32px;
+}
+
+* {
+  box-sizing: border-box;
+}
+
+body {
+  font-family: var(--body-font-family);
+  color: var(--s2-gray-900);
+  line-height: 1.6;
+  margin: 0;
+  padding: 0;
+  background: var(--s2-gray-50);
+}
+
+#app {
+  max-width: 1200px;
+  margin: var(--spacing-700) auto;
+  padding: 0 var(--spacing-400);
+}
+
+h1 {
+  font-size: var(--s2-font-size-700);
+  font-weight: 700;
+  line-height: 1.2;
+  margin: 0 0 var(--spacing-200);
+  color: var(--s2-gray-900);
+}
+
+h2 {
+  font-size: 20px;
+  font-weight: 600;
+  margin: 0 0 var(--spacing-400);
+  color: var(--s2-gray-900);
+}
+
+h3 {
+  font-size: 18px;
+  font-weight: 600;
+  margin: 0 0 var(--spacing-300);
+  color: var(--s2-gray-900);
+}
+
+p {
+  font-size: var(--s2-font-size-400);
+  color: var(--s2-gray-600);
+  margin: 0 0 var(--spacing-600);
+}
+
+/* ========== Status Panel ========== */
+
+.status-panel {
+  background: white;
+  border: 1px solid var(--s2-gray-200);
+  border-radius: var(--s2-radius-200);
+  padding: var(--spacing-500);
+  margin-bottom: var(--spacing-600);
+}
+
+.status-loading {
+  padding: var(--spacing-600);
+  text-align: center;
+  color: var(--s2-gray-600);
+  background: white;
+  border: 1px solid var(--s2-gray-200);
+  border-radius: var(--s2-radius-200);
+  margin-bottom: var(--spacing-600);
+}
+
+.status-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+  gap: var(--spacing-500);
+}
+
+.status-item {
+  display: flex;
+  flex-direction: column;
+  gap: var(--spacing-100);
+}
+
+.status-item label {
+  font-size: 11px;
+  font-weight: 600;
+  text-transform: uppercase;
+  color: var(--s2-gray-400);
+  letter-spacing: 1px;
+}
+
+.status-item span {
+  font-size: var(--s2-font-size-300);
+  color: var(--s2-gray-900);
+  font-weight: 500;
+}
+
+/* ========== Actions ========== */
+
+.actions {
+  margin-bottom: var(--spacing-600);
+}
+
+button,
+.button {
+  font-family: var(--body-font-family);
+  font-size: 15px;
+  font-weight: 700;
+  padding: 8px 24px;
+  line-height: 18px;
+  border: 2px solid #000;
+  color: #000;
+  border-radius: var(--s2-radius-300);
+  background: none;
+  cursor: pointer;
+  transition: all 0.2s;
+  text-align: center;
+}
+
+button:disabled {
+  background-color: #efefef;
+  border: 2px solid #efefef;
+  color: var(--s2-gray-700);
+  cursor: not-allowed;
+}
+
+button:hover:not(:disabled) {
+  background: var(--s2-gray-100);
+}
+
+button.accent,
+.btn-primary {
+  background: #3b63fb;
+  border: 2px solid #3b63fb;
+  color: #fff;
+}
+
+button.accent:hover:not(:disabled),
+.btn-primary:hover:not(:disabled) {
+  background: #2952e8;
+  border: 2px solid #2952e8;
+}
+
+/* ========== Progress Section ========== */
+
+.progress-section {
+  background: white;
+  border: 1px solid var(--s2-gray-200);
+  border-radius: var(--s2-radius-200);
+  padding: var(--spacing-500);
+  margin-bottom: var(--spacing-600);
+}
+
+.progress-bar {
+  width: 100%;
+  height: 8px;
+  background: var(--s2-gray-200);
+  border-radius: var(--s2-radius-100);
+  overflow: hidden;
+  margin-bottom: var(--spacing-300);
+}
+
+.progress-fill {
+  height: 100%;
+  background: #3b63fb;
+  transition: width 0.3s ease;
+}
+
+.progress-info {
+  display: flex;
+  gap: var(--spacing-300);
+  align-items: center;
+  font-size: var(--s2-font-size-200);
+}
+
+.progress-stage {
+  font-weight: 600;
+  color: var(--s2-gray-900);
+  text-transform: capitalize;
+}
+
+.progress-message {
+  color: var(--s2-gray-600);
+  flex: 1;
+}
+
+.progress-timing {
+  display: flex;
+  gap: var(--spacing-500);
+  margin-top: var(--spacing-200);
+  font-size: var(--s2-font-size-200);
+  color: var(--s2-gray-500);
+}
+
+/* ========== Logs Section ========== */
+
+.logs-section,
+.errors-section {
+  background: white;
+  border: 1px solid var(--s2-gray-200);
+  border-radius: var(--s2-radius-200);
+  padding: var(--spacing-500);
+  margin-bottom: var(--spacing-600);
+}
+
+.logs-list,
+.errors-list {
+  list-style: none;
+  margin: 0;
+  padding: 0;
+  max-height: 400px;
+  overflow-y: auto;
+}
+
+.logs-list li,
+.errors-list li {
+  padding: var(--spacing-200) var(--spacing-300);
+  margin-bottom: var(--spacing-100);
+  border-radius: var(--s2-radius-100);
+  font-size: 13px;
+  font-family: var(--mono-font-family);
+}
+
+.log-info {
+  background: var(--s2-gray-100);
+  color: var(--s2-gray-900);
+}
+
+.log-success {
+  background: var(--s2-green-100);
+  color: var(--s2-green-900);
+}
+
+.log-error,
+.errors-list li {
+  background: var(--s2-red-100);
+  color: var(--s2-red-700);
+}
+
+/* ========== Error Page ========== */
+
+.error {
+  background: white;
+  border: 1px solid var(--s2-gray-200);
+  border-radius: var(--s2-radius-200);
+  padding: var(--spacing-700);
+  text-align: center;
+}
+
+.error h1 {
+  color: var(--s2-red-700);
+}
+
+.error pre {
+  background: var(--s2-gray-100);
+  padding: var(--spacing-400);
+  border-radius: var(--s2-radius-100);
+  text-align: left;
+  display: inline-block;
+  margin-top: var(--spacing-400);
+  font-family: var(--mono-font-family);
+  font-size: 13px;
+}
diff --git a/tools/media/indexer.html b/tools/media/indexer.html
new file mode 100644
index 0000000..1c75061
--- /dev/null
+++ b/tools/media/indexer.html
@@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Media Index Builder</title>
+  <link rel="stylesheet" href="https://use.typekit.net/hah7vzn.css">
+  <link rel="stylesheet" href="indexer.css">
+</head>
+<body>
+  <div id="app"></div>
+  <script type="module" src="indexer.js"></script>
+</body>
+</html>
diff --git a/tools/media/indexer.js b/tools/media/indexer.js
new file mode 100644
index 0000000..ede955e
--- /dev/null
+++ b/tools/media/indexer.js
@@ -0,0 +1,60 @@
+/* eslint-disable import/no-absolute-path, import/no-unresolved */
+/* The DA SDK is loaded from the da.live CDN and is required for authentication */
+import DA_SDK from 'https://da.live/nx/utils/sdk.js';
+
+import { state, org, repo } from './lib/config.js';
+import { getIndexStatus } from './lib/builder.js';
+import { render, attachEventListeners } from './lib/ui.js';
+
+/** Constants */
+const AUTH_TIMEOUT_MS = 5000; // Timeout for DA authentication
+
+async function init() {
+  if (!org || !repo) {
+    const params = new URLSearchParams(window.location.search);
+    const rawOrg = params.get('org');
+    const rawRepo = params.get('repo') || params.get('site');
+
+    let errorMsg = '<p>Please provide valid org and repo parameters in the URL:</p>';
+    if (!rawOrg || !rawRepo) {
+      errorMsg += '<p>Missing required parameters.</p>';
+    } else {
+      errorMsg += '<p>Invalid parameter format. Names must be alphanumeric with optional hyphens, underscores, or dots.</p>';
+    }
+
+    document.getElementById('app').innerHTML = `
+      <div class="error">
+        <h1>Configuration Error</h1>
+        ${errorMsg}
+        <pre>?org=yourorg&repo=yourrepo</pre>
+        <p><small>Example: ?org=mycompany&repo=myproject</small></p>
+      </div>
+    `;
+    return;
+  }
+
+  // Get DA token with timeout
+  try {
+    const tokenPromise = DA_SDK;
+    const timeoutPromise = new Promise((_, reject) => {
+      setTimeout(() => reject(new Error('Authentication timeout')), AUTH_TIMEOUT_MS);
+    });
+
+    const result = await Promise.race([tokenPromise, timeoutPromise]);
+    state.daToken = result?.token;
+  } catch (error) {
+    state.errors.push({ message: `Failed to get DA token: ${error.message}` });
+  }
+
+  if (!state.daToken) {
+    const returnUrl = encodeURIComponent(window.location.href);
+    window.location.href = `https://da.live/?returnUrl=${returnUrl}`;
+    return;
+  }
+
+  state.status = await getIndexStatus();
+  render();
+  attachEventListeners();
+}
+
+init();
diff --git a/tools/media/lib/api.js b/tools/media/lib/api.js
new file mode 100644
index 0000000..f916f8c
--- /dev/null
+++ b/tools/media/lib/api.js
@@ -0,0 +1,314 @@
+/**
+ * DA Admin API functions for fetching logs and saving data
+ */
+
+import {
+  state, DA_ADMIN, org, repo, ref,
+} from './config.js';
+import * as logger from './logger.js';
+
+/** Constants */
+const RATE_LIMIT_DELAY_MS = 100; // Delay between paginated API requests
+
+/**
+ * Fetch with DA authentication token
+ * @param {string} url - URL to fetch
+ * @param {object} opts - Fetch options
+ * @returns {Promise<Response>} Fetch response
+ */
+export async function daFetch(url, opts = {}) {
+  opts.headers ||= {};
+  if (state.daToken) {
+    opts.headers.Authorization = `Bearer ${state.daToken}`;
+  }
+  return fetch(url, opts);
+}
+
+/** CORS proxy for cross-origin fetches (same as media-library block) */
+const CORS_PROXY_URL = 'https://media-library-cors-proxy.aem-poc-lab.workers.dev/';
+
+/**
+ * Fetch with CORS proxy. Uses proxy first when cross-origin (e.g. localhost → aem.page)
+ * to avoid CORS errors; direct fetch when same-origin.
+ * @param {string} url - URL to fetch
+ * @param {object} options - Fetch options
+ * @returns {Promise<Response>} Fetch response
+ */
+async function fetchWithCorsProxy(url, options = {}) {
+  const targetOrigin = url.startsWith('http') ? new URL(url).origin : null;
+  const isCrossOrigin = targetOrigin && window.location.origin !== targetOrigin;
+
+  if (isCrossOrigin) {
+    const proxyUrl = `${CORS_PROXY_URL}?url=${encodeURIComponent(url)}`;
+    return fetch(proxyUrl, options);
+  }
+
+  try {
+    const response = await fetch(url, options);
+    if (!response.ok) {
+      const proxyUrl = `${CORS_PROXY_URL}?url=${encodeURIComponent(url)}`;
+      return fetch(proxyUrl, options);
+    }
+    return response;
+  } catch (directError) {
+    if (directError.name === 'TypeError'
+        && (directError.message.includes('CORS')
+        || directError.message.includes('blocked')
+        || directError.message.includes('Access-Control-Allow-Origin')
+        || directError.message.includes('Failed to fetch'))) {
+      const proxyUrl = `${CORS_PROXY_URL}?url=${encodeURIComponent(url)}`;
+      return fetch(proxyUrl, options);
+    }
+    throw directError;
+  }
+}
+
+export async function loadMeta(path) {
+  try {
+    const resp = await daFetch(`${DA_ADMIN}/source${path}`);
+    if (resp.ok) {
+      const data = await resp.json();
+      return data.data?.[0] || data;
+    }
+  } catch (error) {
+    logger.error(`Failed to load meta from ${path}:`, error.message);
+    return null;
+  }
+  return null;
+}
+
+/**
+ * Fetch page markdown from preview URL (org, repo from query params).
+ * Uses CORS proxy fallback when direct fetch fails (e.g. cross-origin).
+ * @param {string} pagePath - Path e.g. /drafts/page.md
+ * @returns {Promise<string|null>} - Raw markdown or null
+ */
+export async function fetchPageMarkdown(pagePath) {
+  try {
+    if (!org || !repo) return null;
+    const path = pagePath.startsWith('/') ? pagePath : `/${pagePath}`;
+    const url = `https://${ref}--${repo}--${org}.aem.page${path}`;
+    const resp = await fetchWithCorsProxy(url);
+    if (!resp.ok) return null;
+    return resp.text();
+  } catch (error) {
+    logger.error(`Failed to fetch page markdown ${pagePath}:`, error.message);
+    return null;
+  }
+}
+
+/**
+ * Load media-index.json from DA (sheet format).
+ * @param {string} path - Path to media-index.json
+ * @returns {Promise<Array>} - Array of index entries, or [] if not found
+ */
+export async function loadIndex(path) {
+  try {
+    const resp = await daFetch(`${DA_ADMIN}/source${path}`);
+    if (!resp.ok) return [];
+    const data = await resp.json();
+    const entries = data.data || [];
+    return Array.isArray(entries) ? entries : [];
+  } catch (error) {
+    logger.error(`Failed to load index from ${path}:`, error.message);
+    return [];
+  }
+}
+
+/**
+ * List children of a DA path using the DA Admin List API.
+ * Returns array of items; each item may have path, name, ext, props (with lastModified).
+ * @param {string} path - Path within org/repo (e.g. /.da/mediaindex)
+ * @returns {Promise<Array<{path?: string, name?: string, ext?: string, props?: object}>>}
+ */
+export async function daList(path) {
+  const normalizedPath = path.replace(/^\//, '') || '';
+  const url = `${DA_ADMIN}/list/${org}/${repo}/${normalizedPath}`;
+  const resp = await daFetch(url);
+  if (!resp.ok) return [];
+  const data = await resp.json();
+  const items = Array.isArray(data) ? data : (data.sources || []);
+  return items;
+}
+
+/**
+ * Get media-index.json info from DA Admin List API (not Franklin Admin API).
+ * Uses DA List API since the index is stored in DA.
+ * @param {string} folderPath - Path to mediaindex folder within repo (e.g. .da/mediaindex)
+ * @returns {Promise<{exists: boolean, lastModified: number|null}>}
+ */
+export async function getMediaIndexInfo(folderPath = '.da/mediaindex') {
+  const items = await daList(folderPath);
+  const indexFile = items.find(
+    (item) => (item.name === 'media-index' && item.ext === 'json')
+      || (item.path && item.path.endsWith('/media-index.json')),
+  );
+  if (!indexFile) return { exists: false, lastModified: null };
+  // DA List API: lastModified is Unix timestamp (ms) on item (docs.da.live/developers/api/list)
+  const lastMod = indexFile.lastModified ?? indexFile.props?.lastModified;
+  const ts = lastMod != null && typeof lastMod === 'number' ? lastMod : null;
+  return { exists: true, lastModified: ts };
+}
+
+export async function createSheet(data, type = 'sheet') {
+  const sheetMeta = {
+    total: data.length,
+    limit: data.length,
+    offset: 0,
+    data,
+    ':type': type,
+  };
+  const blob = new Blob([JSON.stringify(sheetMeta, null, 2)], { type: 'application/json' });
+  const formData = new FormData();
+  formData.append('data', blob);
+  return formData;
+}
+
+export async function saveMeta(meta, path) {
+  const metaArray = Array.isArray(meta) ? meta : [meta];
+  const formData = await createSheet(metaArray);
+  return daFetch(`${DA_ADMIN}/source${path}`, {
+    method: 'POST',
+    body: formData,
+  });
+}
+
+function timestampToDuration(timestamp) {
+  if (!timestamp) return '90d';
+  const ageMs = Date.now() - timestamp;
+  const days = Math.ceil(ageMs / (24 * 60 * 60 * 1000));
+  if (days < 1) {
+    const hours = Math.ceil(ageMs / (60 * 60 * 1000));
+    return hours > 0 ? `${hours}h` : '1h';
+  }
+  return `${Math.min(days, 90)}d`;
+}
+
+export async function fetchFromAdminAPI(
+  endpoint,
+  orgName,
+  repoName,
+  refName,
+  since,
+  limit,
+  onPageLoaded,
+) {
+  const fetchParams = new URLSearchParams();
+  fetchParams.append('limit', limit.toString());
+
+  // API default (no since) = from=now-15min, to=now. For initial index use max span.
+  const sinceDuration = since != null ? timestampToDuration(since) : '36500d';
+  fetchParams.append('since', sinceDuration);
+
+  const baseUrl = `https://admin.hlx.page/${endpoint}/${orgName}/${repoName}/${refName}`;
+  const separator = endpoint === 'medialog' ? '/' : '';
+  const url = `${baseUrl}${separator}?${fetchParams.toString()}`;
+
+  const resp = await daFetch(url);
+
+  if (!resp.ok) {
+    throw new Error(`${endpoint} API error: ${resp.status} ${resp.statusText}`);
+  }
+
+  const data = await resp.json();
+  const entries = data.entries || data.data || [];
+  const { nextToken } = data;
+
+  if (onPageLoaded && entries.length > 0) {
+    onPageLoaded(entries, !!nextToken);
+  }
+
+  async function fetchNextPage(token) {
+    if (!token) return [];
+
+    fetchParams.set('nextToken', token);
+    const nextUrl = `${baseUrl}${separator}?${fetchParams.toString()}`;
+    const nextResp = await daFetch(nextUrl);
+
+    if (!nextResp.ok) return [];
+
+    const nextData = await nextResp.json();
+    const nextEntries = nextData.entries || nextData.data || [];
+
+    if (onPageLoaded && nextEntries?.length > 0) {
+      onPageLoaded(nextEntries, !!nextData.nextToken);
+    }
+
+    const remainingEntries = nextData.nextToken
+      ? await fetchNextPage(nextData.nextToken)
+      : [];
+    return [...(nextEntries || []), ...remainingEntries];
+  }
+
+  const additionalEntries = await fetchNextPage(nextToken);
+  return [...entries, ...additionalEntries];
+}
+
+/** Delay helper for rate limiting */
+function sleep(ms) {
+  return new Promise((resolve) => {
+    setTimeout(resolve, ms);
+  });
+}
+
+/**
+ * Stream fetch from Admin API - yields chunks to onChunk, does not accumulate in memory.
+ * @param {string} endpoint - 'log' or 'medialog'
+ * @param {string} orgName - Org
+ * @param {string} repoName - Repo
+ * @param {string} refName - Ref (e.g. 'main')
+ * @param {number|null} since - Timestamp for incremental, or null for full
+ * @param {number} limit - Page size
+ * @param {Function} onChunk - (entries: Array) => void|Promise - called per chunk
+ */
+export async function fetchFromAdminAPIStreaming(
+  endpoint,
+  orgName,
+  repoName,
+  refName,
+  since,
+  limit,
+  onChunk,
+) {
+  const fetchParams = new URLSearchParams();
+  fetchParams.append('limit', limit.toString());
+  const sinceDuration = since != null ? timestampToDuration(since) : '36500d';
+  fetchParams.append('since', sinceDuration);
+
+  const baseUrl = `https://admin.hlx.page/${endpoint}/${orgName}/${repoName}/${refName}`;
+  const separator = endpoint === 'medialog' ? '/' : '';
+  let nextUrl = `${baseUrl}${separator}?${fetchParams.toString()}`;
+
+  /* eslint-disable no-await-in-loop -- sequential fetch required for pagination */
+  while (nextUrl) {
+    const resp = await daFetch(nextUrl);
+
+    if (!resp.ok) {
+      throw new Error(`${endpoint} API error: ${resp.status} ${resp.statusText}`);
+    }
+
+    const data = await resp.json();
+    const entries = data.entries || data.data || [];
+
+    if (entries.length > 0 && onChunk) {
+      await onChunk(entries);
+    }
+
+    const nextLink = data.links?.next;
+    const token = data.nextToken;
+    logger.debug(`[${endpoint}] page: ${entries.length} entries | response keys: ${Object.keys(data).join(', ')} | nextToken=${token ?? 'null'} | links.next=${nextLink ?? 'null'}`);
+
+    if (nextLink && typeof nextLink === 'string' && nextLink.trim()) {
+      const base = `${baseUrl}${separator}`;
+      nextUrl = nextLink.startsWith('http') ? nextLink : new URL(nextLink, base).href;
+    } else if (token) {
+      fetchParams.set('nextToken', token);
+      nextUrl = `${baseUrl}${separator}?${fetchParams.toString()}`;
+    } else {
+      nextUrl = null;
+    }
+
+    if (nextUrl) await sleep(RATE_LIMIT_DELAY_MS);
+  }
+  /* eslint-enable no-await-in-loop */
+}
diff --git a/tools/media/lib/builder.js b/tools/media/lib/builder.js
new file mode 100644
index 0000000..7457bbb
--- /dev/null
+++ b/tools/media/lib/builder.js
@@ -0,0 +1,880 @@
+/**
+ * Core index building logic
+ */
+
+import {
+  org, repo, ref, sitePath, DA_ADMIN,
+} from './config.js';
+import {
+  fetchFromAdminAPI, fetchFromAdminAPIStreaming, createSheet, daFetch, saveMeta, loadMeta,
+  loadIndex, getMediaIndexInfo, fetchPageMarkdown,
+} from './api.js';
+import {
+  normalizePath, isPage, extractName, detectMediaType,
+  isPdf, isSvg, isFragment, isPdfOrSvg, getFileType,
+  isLinkedContentPath, normalizeFilePath,
+  extractFragmentReferences, extractLinks, extractIconReferences,
+} from './helpers.js';
+import * as logger from './logger.js';
+
+/** Constants */
+// 2 minutes tolerance for index/meta alignment
+const INDEX_ALIGNMENT_TOLERANCE_MS = 120_000;
+// 5s window for matching media to page events (full build)
+const MEDIA_ASSOCIATION_WINDOW_MS = 5000;
+// 10s window for incremental media updates
+const INCREMENTAL_WINDOW_MS = 10000;
+// Default page size for Admin API requests
+const API_PAGE_SIZE = 1000;
+// Max concurrent page markdown fetches to avoid overwhelming browser/server
+const MAX_CONCURRENT_FETCHES = 10;
+
+export async function getIndexStatus() {
+  const metaPath = `${sitePath}/.da/mediaindex/medialog-meta.json`;
+  const meta = await loadMeta(metaPath);
+  const { exists: indexExists, lastModified: indexLastModified } = await getMediaIndexInfo('.da/mediaindex');
+
+  return {
+    lastRefresh: meta?.lastFetchTime || null,
+    entriesCount: meta?.entriesCount || 0,
+    lastBuildMode: meta?.lastBuildMode || null,
+    indexExists,
+    indexLastModified,
+  };
+}
+
+/**
+ * Determine if we can do incremental re-index instead of full build.
+ * Re-index when: meta has lastFetchTime, index exists, and index lastModified aligns with meta.
+ * @returns {Promise<{shouldReindex: boolean, reason?: string}>}
+ */
+export async function shouldReindex() {
+  const metaPath = `${sitePath}/.da/mediaindex/medialog-meta.json`;
+  const meta = await loadMeta(metaPath);
+  const { exists: indexExists, lastModified: indexLastModified } = await getMediaIndexInfo('.da/mediaindex');
+
+  if (!meta?.lastFetchTime) {
+    return { shouldReindex: false, reason: 'No previous fetch (meta missing lastFetchTime)' };
+  }
+  if (!indexExists) {
+    return { shouldReindex: false, reason: 'Index file does not exist in DA' };
+  }
+  if (indexLastModified == null) {
+    return { shouldReindex: false, reason: 'DA List API did not return lastModified for media-index.json' };
+  }
+
+  const lastFetch = meta.lastFetchTime;
+  const diff = Math.abs(lastFetch - indexLastModified);
+  if (diff > INDEX_ALIGNMENT_TOLERANCE_MS) {
+    return {
+      shouldReindex: false,
+      reason: `Index lastModified (${indexLastModified}) does not align with meta lastFetchTime (${lastFetch})`,
+    };
+  }
+
+  return { shouldReindex: true };
+}
+
+/**
+ * Execute async tasks with concurrency limit
+ * @param {Array} items - Items to process
+ * @param {Function} fn - Async function to execute per item
+ * @param {number} concurrency - Max concurrent operations
+ * @returns {Promise<Array>} Results in order
+ */
+async function processConcurrently(items, fn, concurrency) {
+  const results = [];
+  const executing = [];
+
+  for (let i = 0; i < items.length; i += 1) {
+    const item = items[i];
+    const promise = Promise.resolve().then(() => fn(item, i));
+    results.push(promise);
+
+    if (concurrency <= items.length) {
+      const executingPromise = promise.then(() => {
+        executing.splice(executing.indexOf(executingPromise), 1);
+      });
+      executing.push(executingPromise);
+
+      if (executing.length >= concurrency) {
+        // eslint-disable-next-line no-await-in-loop
+        await Promise.race(executing);
+      }
+    }
+  }
+
+  return Promise.all(results);
+}
+
+/**
+ * Build usage map for linked content (PDFs, SVGs, fragments).
+ * Fetches .md from preview URL and parses markdown link syntax.
+ * @param {Array<{path: string}>} pageEntries - Auditlog entries for pages
+ * @param {Function} onProgress - Progress callback
+ * @returns {Promise<{pdfs: Map, svgs: Map, fragments: Map}>}
+ */
+async function buildContentUsageMap(pageEntries, onProgress) {
+  const usageMap = {
+    fragments: new Map(),
+    pdfs: new Map(),
+    svgs: new Map(),
+  };
+
+  const pagesByPath = new Map();
+  pageEntries.forEach((e) => {
+    const p = normalizePath(e.path);
+    if (!pagesByPath.has(p)) pagesByPath.set(p, []);
+    pagesByPath.get(p).push(e);
+  });
+  pagesByPath.forEach((events) => {
+    events.sort((a, b) => b.timestamp - a.timestamp);
+  });
+
+  const uniquePages = [...pagesByPath.keys()];
+  logger.debug(`[buildContentUsageMap] parsing ${uniquePages.length} unique pages: [${uniquePages.slice(0, 10).join(', ')}${uniquePages.length > 10 ? '...' : ''}]`);
+
+  const results = await processConcurrently(
+    uniquePages,
+    async (normalizedPath, i) => {
+      onProgress?.({ message: `Parsing page ${i + 1}/${uniquePages.length}: ${normalizedPath}` });
+      const md = await fetchPageMarkdown(normalizedPath);
+      return { normalizedPath, md };
+    },
+    MAX_CONCURRENT_FETCHES,
+  );
+
+  const failed = results.filter((r) => !r.md);
+  if (failed.length > 0) {
+    logger.warn(`[buildContentUsageMap] failed to fetch markdown for ${failed.length} pages: [${failed.map((r) => r.normalizedPath).join(', ')}]`);
+  }
+
+  results.forEach(({ normalizedPath, md }) => {
+    if (!md) return;
+
+    const fragments = extractFragmentReferences(md);
+    const pdfs = extractLinks(md, /\.pdf$/);
+    const svgs = extractLinks(md, /\.svg$/);
+    const icons = extractIconReferences(md);
+
+    const addToMap = (map, path) => {
+      if (!map.has(path)) map.set(path, []);
+      if (!map.get(path).includes(normalizedPath)) {
+        map.get(path).push(normalizedPath);
+      }
+    };
+
+    fragments.forEach((f) => addToMap(usageMap.fragments, f));
+    pdfs.forEach((p) => addToMap(usageMap.pdfs, p));
+    svgs.forEach((s) => addToMap(usageMap.svgs, s));
+    icons.forEach((s) => addToMap(usageMap.svgs, s));
+  });
+
+  const iconPathsFromUsage = [...usageMap.svgs.keys()].filter((p) => p.includes('/icons/'));
+  logger.debug(`[buildContentUsageMap] usageMap: pdfs=${usageMap.pdfs.size}, svgs=${usageMap.svgs.size}, fragments=${usageMap.fragments.size} | icon paths from parsing: [${iconPathsFromUsage.join(', ') || 'none'}]`);
+
+  return usageMap;
+}
+
+function noop() {}
+
+/**
+ * Find page events matching media within time window
+ * Matches media to page events that occurred BEFORE media timestamp within window
+ * Time window: MEDIA_ASSOCIATION_WINDOW_MS (5s for full build)
+ * Rationale: Media operations typically follow page preview within seconds
+ * Example: Page preview at T, media upload at T+2s → matched (within 5s window)
+ * Edge case: Media uploaded, then page previewed → not matched (preview must come first)
+ * @param {Map} pagesByPath - Map of normalized path to page events
+ * @param {string} resourcePath - Media resource path
+ * @param {number} mediaTimestamp - Media operation timestamp
+ * @returns {Array} Matching page events
+ */
+function findMatchingPageEvents(pagesByPath, resourcePath, mediaTimestamp) {
+  const events = pagesByPath.get(resourcePath);
+  if (!events || events.length === 0) return [];
+  const minTs = mediaTimestamp - MEDIA_ASSOCIATION_WINDOW_MS;
+  return events.filter(
+    (e) => e.timestamp <= mediaTimestamp && e.timestamp > minTs,
+  );
+}
+
+/** Check memory (Chrome/Edge); returns { warning, usedMB, limitMB } or { warning: false } */
+function checkMemory() {
+  if (typeof performance !== 'undefined' && performance.memory) {
+    const used = performance.memory.usedJSHeapSize / (1024 * 1024);
+    const limit = performance.memory.jsHeapSizeLimit / (1024 * 1024);
+    return { warning: used > limit * 0.8, usedMB: used, limitMB: limit };
+  }
+  return { warning: false };
+}
+
+/**
+ * Remove media entry from index; handle orphaned media
+ * Strategy: If removing last reference to a hash, mark as "unused" vs deleting
+ * Exception: Don't add "unused" if medialog has explicit "delete" for this hash
+ * Rationale: Media files persist in storage when unreferenced; track for cleanup
+ * Example: Media on 2 pages, remove from 1 → still referenced
+ *          Remove from both → becomes "unused"
+ * @param {Array} idx - Index array
+ * @param {object} entry - Entry to remove
+ * @param {string} path - Page path
+ * @param {Array} medialog - Medialog entries for delete detection
+ * @returns {number} removed count (0 or 1)
+ */
+function removeMediaMaybeAddOrphan(idx, entry, path, medialog) {
+  const i = idx.findIndex((e) => e.hash === entry.hash && e.page === path);
+  if (i === -1) return 0;
+  const { hash } = entry;
+  const hasDelete = medialog.some((m) => m.mediaHash === hash && m.operation === 'delete');
+  idx.splice(i, 1);
+  const stillHasEntry = idx.some((e) => e.hash === hash);
+  const alreadyUnused = idx.some((e) => e.hash === hash && !e.page);
+  if (!stillHasEntry && !hasDelete && !alreadyUnused) {
+    idx.push({
+      hash,
+      page: '',
+      url: entry.url,
+      name: entry.name,
+      timestamp: entry.timestamp,
+      user: entry.user,
+      operation: entry.operation,
+      type: entry.type,
+      status: 'unused',
+    });
+  }
+  return 1;
+}
+
+/**
+ * Create a linked-content index entry using the same schema as media entries
+ * so the DA sheet stores all rows correctly (no column misalignment).
+ * @param {string} filePath - Path e.g. /icons/headset.svg
+ * @param {string[]} linkedPages - Pages that reference this file
+ * @param {{timestamp: number, user?: string}} fileEvent - Auditlog event
+ * @param {string} status - 'referenced' or 'file-unused'
+ * @returns {object} Entry matching media schema (hash, page, url, name, etc.)
+ */
+function toLinkedContentEntry(filePath, linkedPages, fileEvent, status) {
+  const pageVal = linkedPages.length > 0 ? linkedPages.join(',') : '';
+  return {
+    hash: filePath,
+    page: pageVal,
+    url: '',
+    name: filePath.split('/').pop() || filePath,
+    timestamp: fileEvent.timestamp,
+    user: fileEvent.user || '',
+    operation: 'auditlog-parsed',
+    type: getFileType(filePath),
+    status,
+    source: 'auditlog-parsed',
+  };
+}
+
+/**
+ * Process page-level media updates for incremental indexing
+ * Compares old index entries with new medialog to detect additions/removals
+ * @param {Array} updatedIndex - Index being built (mutated)
+ * @param {Map} pagesByPath - Map of page path to events
+ * @param {Array} medialogEntries - New medialog entries
+ * @param {Function} onLog - Logging callback
+ * @returns {{added: number, removed: number}} Counts
+ */
+function processPageMediaUpdates(updatedIndex, pagesByPath, medialogEntries, onLog) {
+  let added = 0;
+  let removed = 0;
+
+  pagesByPath.forEach((pageEvents, normalizedPath) => {
+    const latestEvent = pageEvents[0];
+    const latestTs = latestEvent.timestamp;
+    const windowStart = latestTs;
+    const windowEnd = latestTs + INCREMENTAL_WINDOW_MS;
+
+    onLog(`--- Page: ${normalizedPath} ---`);
+    onLog(`  Latest preview: ${latestTs} (${new Date(latestTs).toISOString()})`);
+    onLog(`  Window: [${windowStart}-${windowEnd}] (${INCREMENTAL_WINDOW_MS / 1000}s)`);
+
+    const matchesPage = (m) => m.resourcePath && m.resourcePath === normalizedPath;
+    const pageMedialogAll = medialogEntries.filter(matchesPage);
+    const inWindow = (m) => m.timestamp >= windowStart && m.timestamp < windowEnd;
+    const newPageMedia = pageMedialogAll.filter(inWindow);
+    const outsideWindow = pageMedialogAll.filter((m) => !newPageMedia.includes(m));
+
+    if (pageMedialogAll.length > 0) {
+      onLog(`  Medialog for page: ${pageMedialogAll.length} total, ${newPageMedia.length} in window, ${outsideWindow.length} outside`);
+      if (outsideWindow.length > 0) {
+        outsideWindow.slice(0, 3).forEach((m) => {
+          onLog(`    Outside: hash=${m.mediaHash} ts=${m.timestamp} (${new Date(m.timestamp).toISOString()})`);
+        });
+      }
+    }
+
+    const oldPageEntries = updatedIndex.filter((e) => e.page === normalizedPath);
+    const oldHashes = new Set(oldPageEntries.map((e) => e.hash));
+    const newHashes = new Set(newPageMedia.map((m) => m.mediaHash));
+
+    onLog(`  Old (index): ${oldHashes.size} hashes ${[...oldHashes].slice(0, 5).join(', ')}${oldHashes.size > 5 ? '...' : ''}`);
+    onLog(`  New (medialog in window): ${newHashes.size} hashes ${[...newHashes].slice(0, 5).join(', ')}${newHashes.size > 5 ? '...' : ''}`);
+
+    /**
+     * Edge case: Page was previewed but no media in the time window
+     * Scenario: User previewed page, removed all media, then previewed again
+     * Decision: Remove all old media entries for this page (assume removal intended)
+     * Alternative considered: Keep old entries (assume no change)
+     * Rationale: Preview action signals intent to update; empty medialog = intentional removal
+     * Assumption: Events are processed in timestamp order
+     */
+    if (newPageMedia.length === 0 && oldPageEntries.length > 0) {
+      onLog('  Edge case: Page previewed with no media in window - removing old entries');
+      const rm = removeMediaMaybeAddOrphan;
+      oldPageEntries.forEach((oldEntry) => {
+        removed += rm(updatedIndex, oldEntry, normalizedPath, medialogEntries);
+      });
+      return;
+    }
+
+    const toRemove = [...oldHashes].filter((h) => !newHashes.has(h));
+    const toAdd = [...newHashes].filter((h) => !oldHashes.has(h));
+    const unchanged = [...newHashes].filter((h) => oldHashes.has(h));
+
+    if (toRemove.length || toAdd.length) {
+      onLog(`  Diff: remove ${toRemove.length} (${toRemove.slice(0, 3).join(', ')}${toRemove.length > 3 ? '...' : ''}), add ${toAdd.length}`);
+    }
+
+    const rm = removeMediaMaybeAddOrphan;
+    toRemove.forEach((hash) => {
+      const oldEntry = oldPageEntries.find((e) => e.hash === hash);
+      if (oldEntry) {
+        removed += rm(updatedIndex, oldEntry, normalizedPath, medialogEntries);
+      }
+    });
+
+    toAdd.forEach((hash) => {
+      const media = newPageMedia.find((m) => m.mediaHash === hash);
+      if (media) {
+        updatedIndex.push({
+          hash: media.mediaHash,
+          page: normalizedPath,
+          url: media.path,
+          name: extractName(media),
+          timestamp: media.timestamp,
+          user: media.user,
+          operation: media.operation,
+          type: detectMediaType(media),
+          status: 'referenced',
+        });
+        added += 1;
+      }
+    });
+
+    unchanged.forEach((hash) => {
+      const idx = updatedIndex.findIndex((e) => e.hash === hash && e.page === normalizedPath);
+      const media = newPageMedia.find((m) => m.mediaHash === hash);
+      if (idx !== -1 && media) {
+        updatedIndex[idx].timestamp = media.timestamp;
+      }
+    });
+  });
+
+  return { added, removed };
+}
+
+/**
+ * Process standalone media uploads (no page association)
+ * @param {Array} updatedIndex - Index being built (mutated)
+ * @param {Array} medialogEntries - New medialog entries
+ * @param {Set} referencedHashes - Already referenced media hashes
+ * @returns {number} Added count
+ */
+function processStandaloneUploads(updatedIndex, medialogEntries, referencedHashes) {
+  let added = 0;
+  const standaloneUploads = medialogEntries.filter((m) => !m.resourcePath && m.originalFilename);
+
+  standaloneUploads.forEach((media) => {
+    if (!referencedHashes.has(media.mediaHash)) {
+      const exists = updatedIndex.some((e) => e.hash === media.mediaHash && !e.page);
+      if (!exists) {
+        updatedIndex.push({
+          hash: media.mediaHash,
+          page: '',
+          url: media.path,
+          name: media.originalFilename.split('/').pop(),
+          timestamp: media.timestamp,
+          user: media.user,
+          operation: media.operation,
+          type: detectMediaType(media),
+          status: 'unused',
+        });
+        added += 1;
+      }
+    }
+  });
+
+  return added;
+}
+
+/**
+ * Process linked content (PDFs, SVGs, fragments) for incremental index
+ * @param {Array} updatedIndex - Index being built (mutated)
+ * @param {Array} files - File events from auditlog
+ * @param {Array} pages - Page events
+ * @param {Function} onProgress - Progress callback
+ * @param {Function} onLog - Log callback
+ * @returns {Promise<{added: number, removed: number}>} Counts
+ */
+async function processLinkedContentIncremental(updatedIndex, files, pages, onProgress, onLog) {
+  let added = 0;
+  let removed = 0;
+
+  const filesByPath = new Map();
+  files.forEach((e) => {
+    if (!isPdfOrSvg(e.path) && !isFragment(e.path)) return;
+    const p = e.path;
+    const existing = filesByPath.get(p);
+    if (!existing || e.timestamp > existing.timestamp) filesByPath.set(p, e);
+  });
+
+  const deletedPaths = new Set();
+  filesByPath.forEach((event, path) => {
+    if (event.method === 'DELETE') deletedPaths.add(path);
+  });
+
+  // Remove deleted linked content
+  deletedPaths.forEach((path) => {
+    const idx = updatedIndex.findIndex(
+      (e) => (e.operation === 'auditlog-parsed' || e.source === 'auditlog-parsed') && e.hash === path,
+    );
+    if (idx !== -1) {
+      updatedIndex.splice(idx, 1);
+      removed += 1;
+      onLog(`Removed linked content (DELETE): ${path}`);
+    }
+  });
+
+  // Build usage map
+  onProgress({ stage: 'processing', message: 'Building usage map for linked content...', percent: 83 });
+  const usageMap = await buildContentUsageMap(pages, (p) => onProgress(p));
+
+  const allLinkedPaths = new Set(filesByPath.keys());
+  ['pdfs', 'svgs', 'fragments'].forEach((key) => {
+    usageMap[key]?.forEach((_, path) => allLinkedPaths.add(path));
+  });
+
+  // Add existing linked content paths whose pages were parsed
+  const parsedPages = new Set(pages.map((p) => normalizePath(p.path)));
+  updatedIndex.forEach((e) => {
+    const isLinkedContent = e.operation === 'auditlog-parsed' || e.source === 'auditlog-parsed';
+    if (!isLinkedContent) return;
+    const entryPages = (e.page || '').split(',').map((p) => p.trim()).filter(Boolean);
+    if (entryPages.some((p) => parsedPages.has(p))) {
+      allLinkedPaths.add(e.hash);
+    }
+  });
+
+  allLinkedPaths.forEach((filePath) => {
+    if (deletedPaths.has(filePath)) return;
+
+    let key = 'fragments';
+    if (isPdf(filePath)) key = 'pdfs';
+    else if (isSvg(filePath)) key = 'svgs';
+    const linkedPages = usageMap[key]?.get(filePath) || [];
+    const status = linkedPages.length > 0 ? 'referenced' : 'file-unused';
+    const fileEvent = filesByPath.get(filePath) || { timestamp: 0, user: '' };
+
+    const isLinked = (e) => (e.operation === 'auditlog-parsed' || e.source === 'auditlog-parsed')
+      && e.hash === filePath;
+    const existingIdx = updatedIndex.findIndex(isLinked);
+
+    if (existingIdx !== -1) {
+      updatedIndex[existingIdx].page = linkedPages.length > 0 ? linkedPages.join(',') : '';
+      updatedIndex[existingIdx].timestamp = fileEvent.timestamp;
+      updatedIndex[existingIdx].status = status;
+    } else {
+      updatedIndex.push(toLinkedContentEntry(filePath, linkedPages, fileEvent, status));
+      added += 1;
+    }
+  });
+
+  return { added, removed };
+}
+
+/**
+ * Incremental re-index: fetch logs since lastFetchTime, merge with existing index.
+ * Detects additions, removals, and updates per page.
+ * @param {Function} onProgress - Progress callback
+ * @param {Function} [onLog] - Optional debug log callback for per-page details
+ */
+export async function buildIncrementalIndex(onProgress, onLog = noop) {
+  const metaPath = `${sitePath}/.da/mediaindex/medialog-meta.json`;
+  const indexPath = `${sitePath}/.da/mediaindex/media-index.json`;
+  const meta = await loadMeta(metaPath);
+  const lastFetchTime = meta?.lastFetchTime;
+
+  if (!lastFetchTime) {
+    throw new Error('Cannot run incremental: meta missing lastFetchTime');
+  }
+
+  onLog(`lastFetchTime: ${lastFetchTime} (${new Date(lastFetchTime).toISOString()})`);
+  onProgress({
+    stage: 'starting',
+    message: 'Mode: Incremental re-index (since last build)',
+    percent: 5,
+  });
+
+  onProgress({ stage: 'loading', message: 'Loading existing index...', percent: 8 });
+  const existingIndex = await loadIndex(indexPath);
+
+  onLog(`Fetching auditlog since ${new Date(lastFetchTime).toISOString()}`);
+  onProgress({ stage: 'fetching', message: 'Fetching new auditlog entries...', percent: 15 });
+  const auditlogEntries = await fetchFromAdminAPI('log', org, repo, ref, lastFetchTime, API_PAGE_SIZE, (entries, hasMore) => {
+    onProgress({
+      stage: 'fetching',
+      message: `Fetched ${entries.length} auditlog entries${hasMore ? ' (more available)' : ''}...`,
+      percent: 25,
+    });
+  });
+
+  const validEntries = auditlogEntries.filter((e) => e && e.path && e.route === 'preview');
+  const pages = validEntries.filter((e) => isPage(e.path));
+
+  onProgress({ stage: 'fetching', message: 'Fetching new medialog entries...', percent: 35 });
+  const medialogEntries = await fetchFromAdminAPI('medialog', org, repo, ref, lastFetchTime, API_PAGE_SIZE, (entries, hasMore) => {
+    onProgress({
+      stage: 'fetching',
+      message: `Fetched ${entries.length} medialog entries${hasMore ? ' (more available)' : ''}...`,
+      percent: 45,
+    });
+  });
+
+  if (pages.length === 0 && medialogEntries.length === 0) {
+    onProgress({
+      stage: 'complete',
+      message: 'No new activity since last build - index unchanged',
+      percent: 100,
+    });
+    return existingIndex;
+  }
+
+  onLog(`Auditlog: ${auditlogEntries.length} entries, ${pages.length} pages`);
+  onLog(`Medialog: ${medialogEntries.length} entries (all since lastFetchTime)`);
+  onProgress({
+    stage: 'processing',
+    message: `Processing ${pages.length} pages with ${medialogEntries.length} medialog entries...`,
+    percent: 55,
+  });
+
+  const updatedIndex = [...existingIndex];
+
+  const pagesByPath = new Map();
+  pages.forEach((e) => {
+    const p = normalizePath(e.path);
+    if (!pagesByPath.has(p)) pagesByPath.set(p, []);
+    pagesByPath.get(p).push(e);
+  });
+
+  /**
+   * Indexing strategy for multiple preview events per page
+   * Rule: Process only the LATEST preview event per page, skip others
+   * Rationale: Latest preview represents current state; earlier previews are superseded
+   * Example: Page previewed at T1, T2, T3 → only process T3's media associations
+   * Trade-off: Simpler logic, potential to miss media if window misaligned (acceptable)
+   */
+  pagesByPath.forEach((events) => {
+    events.sort((a, b) => b.timestamp - a.timestamp);
+  });
+  onLog(`Time window: ${INCREMENTAL_WINDOW_MS / 1000}s (medialog within window of latest preview)`);
+  onLog(`Pages to process: ${pagesByPath.size} (${[...pagesByPath.keys()].join(', ')})`);
+  onLog(`Medialog entries since lastFetch: ${medialogEntries.length}`);
+
+  // Process page-level media updates
+  const pageResults = processPageMediaUpdates(updatedIndex, pagesByPath, medialogEntries, onLog);
+  let { added, removed } = pageResults;
+
+  // Calculate referenced hashes for standalone upload processing
+  const referencedHashes = new Set(
+    updatedIndex.filter((e) => e.page).flatMap((e) => e.hash),
+  );
+
+  // Process standalone uploads
+  const standaloneAdded = processStandaloneUploads(updatedIndex, medialogEntries, referencedHashes);
+  added += standaloneAdded;
+
+  // Process linked content
+  const files = validEntries.filter((e) => !isPage(e.path));
+  const linkedResults = await processLinkedContentIncremental(
+    updatedIndex,
+    files,
+    pages,
+    onProgress,
+    onLog,
+  );
+  added += linkedResults.added;
+  removed += linkedResults.removed;
+
+  onProgress({
+    stage: 'processing',
+    message: `Incremental: +${added} added, -${removed} removed, total: ${updatedIndex.length}`,
+    percent: 85,
+  });
+
+  onProgress({ stage: 'saving', message: `Saving ${updatedIndex.length} entries...`, percent: 90 });
+
+  const formData = await createSheet(updatedIndex);
+  await daFetch(`${DA_ADMIN}/source${indexPath}`, {
+    method: 'POST',
+    body: formData,
+  });
+
+  await saveMeta({
+    lastFetchTime: Date.now(),
+    entriesCount: updatedIndex.length,
+    lastRefreshBy: 'media-indexer',
+    lastBuildMode: 'incremental',
+  }, metaPath);
+
+  onProgress({
+    stage: 'complete',
+    message: `Incremental complete! ${updatedIndex.length} entries (${added} added, ${removed} removed)`,
+    percent: 100,
+  });
+
+  return updatedIndex;
+}
+
+export async function buildInitialIndex(onProgress) {
+  const index = [];
+  const buildMode = 'full'; // incremental not yet implemented
+
+  onProgress({
+    stage: 'starting',
+    message: 'Mode: Full build (rebuilding from auditlog + medialog)',
+    percent: 5,
+  });
+
+  // Phase 1: Stream auditlog, build maps (no full accumulation)
+  onProgress({ stage: 'fetching', message: 'Fetching auditlog (streaming)...', percent: 10 });
+
+  const pagesByPath = new Map(); // normalizedPath -> [events] sorted desc
+  const filesByPath = new Map(); // path -> latest event
+  const deletedPaths = new Set();
+  let auditlogCount = 0;
+
+  await fetchFromAdminAPIStreaming('log', org, repo, ref, null, API_PAGE_SIZE, (chunk) => {
+    const rawCount = chunk.length;
+    const droppedNoPath = chunk.filter((e) => !e?.path).length;
+    const droppedRoute = chunk.filter((e) => e?.path && e.route !== 'preview').length;
+    if (droppedNoPath > 0 || droppedRoute > 0) {
+      logger.debug(`[auditlog chunk] raw=${rawCount}, dropped(no path)=${droppedNoPath}, dropped(route!==preview)=${droppedRoute}`);
+    }
+    chunk.forEach((e) => {
+      if (!e?.path || e.route !== 'preview') return;
+      auditlogCount += 1;
+      if (isPage(e.path)) {
+        const p = normalizePath(e.path);
+        if (!pagesByPath.has(p)) pagesByPath.set(p, []);
+        pagesByPath.get(p).push(e);
+      } else {
+        const fp = normalizeFilePath(e.path);
+        const existing = filesByPath.get(fp);
+        if (!existing || e.timestamp > existing.timestamp) {
+          filesByPath.set(fp, e);
+        }
+      }
+    });
+    onProgress({
+      stage: 'fetching',
+      message: `Auditlog: ${auditlogCount} entries, ${pagesByPath.size} pages...`,
+      percent: 15,
+    });
+  });
+
+  pagesByPath.forEach((events) => events.sort((a, b) => b.timestamp - a.timestamp));
+
+  const pages = [];
+  pagesByPath.forEach((events) => pages.push(...events));
+
+  /**
+   * Deletion detection strategy: Only mark as deleted if LATEST event is DELETE
+   * Rationale: If a file was deleted then re-added, the latest event reflects current state
+   * Assumption: filesByPath contains only the latest event per path (maintained above)
+   * Example timeline: DELETE at T1, POST at T2 → latest=POST → not deleted (correct)
+   */
+  filesByPath.forEach((event, path) => {
+    if (isLinkedContentPath(path) && event.method === 'DELETE') {
+      deletedPaths.add(path);
+    }
+  });
+
+  const iconPathsFromAuditlog = [...filesByPath.keys()].filter((p) => p.includes('/icons/'));
+  const iconPathsInDeleted = [...deletedPaths].filter((p) => p.includes('/icons/'));
+  logger.debug(`[auditlog done] total=${auditlogCount}, pages=${pagesByPath.size}, files=${filesByPath.size}, deleted=${deletedPaths.size}`);
+  logger.debug(`  icon paths from auditlog: [${iconPathsFromAuditlog.join(', ') || 'none'}]`);
+  logger.debug(`  icon paths in deletedPaths: [${iconPathsInDeleted.join(', ') || 'none'}]`);
+
+  onProgress({
+    stage: 'fetching',
+    message: `Identified ${pages.length} page events, ${filesByPath.size} files`,
+    percent: 25,
+  });
+
+  // Phase 2: Stream medialog, process each chunk (no full accumulation)
+  onProgress({ stage: 'fetching', message: 'Fetching medialog (streaming)...', percent: 30 });
+
+  const entryMap = new Map();
+  const referencedHashes = new Set();
+  const standaloneBuffer = [];
+  let medialogCount = 0;
+
+  await fetchFromAdminAPIStreaming('medialog', org, repo, ref, null, API_PAGE_SIZE, (chunk) => {
+    logger.debug(`[medialog chunk] ${chunk.length} entries`);
+    chunk.forEach((media) => {
+      medialogCount += 1;
+      if (media.resourcePath) {
+        const matches = findMatchingPageEvents(pagesByPath, media.resourcePath, media.timestamp);
+        matches.forEach((pageEvent) => {
+          const normalizedPath = normalizePath(pageEvent.path);
+          const hash = media.mediaHash;
+          const key = `${hash}|${normalizedPath}`;
+          const existing = entryMap.get(key);
+          if (!existing || media.timestamp > existing.timestamp) {
+            entryMap.set(key, {
+              hash,
+              page: normalizedPath,
+              url: media.path,
+              name: extractName(media),
+              timestamp: media.timestamp,
+              user: media.user,
+              operation: media.operation,
+              type: detectMediaType(media),
+              status: 'referenced',
+            });
+          }
+          referencedHashes.add(hash);
+        });
+      } else if (media.originalFilename) {
+        standaloneBuffer.push(media);
+      }
+    });
+    const mem = checkMemory();
+    if (mem.warning) {
+      onProgress({
+        stage: 'processing',
+        message: `Memory: ${mem.usedMB.toFixed(0)}MB / ${mem.limitMB.toFixed(0)}MB`,
+        percent: 35,
+      });
+    } else {
+      onProgress({
+        stage: 'fetching',
+        message: `Medialog: ${medialogCount} entries processed...`,
+        percent: 35,
+      });
+    }
+  });
+
+  onProgress({
+    stage: 'processing',
+    message: `Processed ${medialogCount} medialog, ${entryMap.size} page refs`,
+    percent: 60,
+  });
+
+  // Phase 3: Process standalone uploads
+  standaloneBuffer.forEach((media) => {
+    const hash = media.mediaHash;
+    if (!referencedHashes.has(hash)) {
+      const key = `${hash}|`;
+      const existing = entryMap.get(key);
+      if (!existing || media.timestamp > existing.timestamp) {
+        entryMap.set(key, {
+          hash,
+          page: '',
+          url: media.path,
+          name: media.originalFilename.split('/').pop(),
+          timestamp: media.timestamp,
+          user: media.user,
+          operation: media.operation,
+          type: detectMediaType(media),
+          status: 'unused',
+        });
+      }
+    }
+  });
+
+  onProgress({
+    stage: 'processing',
+    message: `Standalone: ${standaloneBuffer.length}, total: ${entryMap.size}`,
+    percent: 70,
+  });
+
+  // Convert Map to array
+  entryMap.forEach((entry) => {
+    index.push(entry);
+  });
+
+  // Phase 5: Linked content (PDFs, SVGs, fragments) - parse pages for usage
+  onProgress({ stage: 'processing', message: 'Building content usage map (parsing pages)...', percent: 78 });
+  const usageMap = await buildContentUsageMap(pages, (p) => onProgress(p));
+
+  const linkedFilesByPath = new Map();
+  filesByPath.forEach((e, p) => {
+    if (!isPdfOrSvg(p) && !isFragment(p)) return;
+    linkedFilesByPath.set(p, e);
+  });
+
+  const usageKey = (path) => {
+    if (isPdf(path)) return 'pdfs';
+    if (isSvg(path)) return 'svgs';
+    return 'fragments';
+  };
+
+  const allLinkedPaths = new Set(linkedFilesByPath.keys());
+  ['pdfs', 'svgs', 'fragments'].forEach((key) => {
+    usageMap[key]?.forEach((_, path) => allLinkedPaths.add(path));
+  });
+
+  const iconPathsInAllLinked = [...allLinkedPaths].filter((p) => p.includes('/icons/'));
+  logger.debug(`[linked content] linkedFilesByPath=${linkedFilesByPath.size}, allLinkedPaths=${allLinkedPaths.size} (after merge with usageMap) | icon paths: [${iconPathsInAllLinked.join(', ') || 'none'}]`);
+
+  allLinkedPaths.forEach((filePath) => {
+    if (deletedPaths.has(filePath)) {
+      if (filePath.includes('/icons/')) {
+        logger.debug(`[linked content] SKIP (in deletedPaths): ${filePath}`);
+      }
+      return;
+    }
+    const key = usageKey(filePath);
+    const linkedPages = usageMap[key]?.get(filePath) || [];
+    const status = linkedPages.length > 0 ? 'referenced' : 'file-unused';
+    const fileEvent = linkedFilesByPath.get(filePath) || { timestamp: 0, user: '' };
+    index.push(toLinkedContentEntry(filePath, linkedPages, fileEvent, status));
+  });
+
+  const linkedContentCount = index.length - entryMap.size;
+  const iconEntriesInIndex = index.filter((e) => e.hash?.includes?.('/icons/'));
+  logger.debug(`[full build done] media=${entryMap.size}, linked content=${linkedContentCount}, total=${index.length} | icon entries in index: [${iconEntriesInIndex.map((e) => e.hash).join(', ') || 'none'}]`);
+
+  onProgress({
+    stage: 'processing',
+    message: `Added ${allLinkedPaths.size} linked content entries (PDFs, SVGs, fragments)`,
+    percent: 82,
+  });
+
+  onProgress({ stage: 'saving', message: `Saving ${index.length} entries...`, percent: 90 });
+
+  const indexPath = `${sitePath}/.da/mediaindex/media-index.json`;
+  const formData = await createSheet(index);
+  await daFetch(`${DA_ADMIN}/source${indexPath}`, {
+    method: 'POST',
+    body: formData,
+  });
+
+  await saveMeta({
+    lastFetchTime: Date.now(),
+    entriesCount: index.length,
+    lastRefreshBy: 'media-indexer',
+    lastBuildMode: buildMode,
+  }, `${sitePath}/.da/mediaindex/medialog-meta.json`);
+
+  onProgress({ stage: 'complete', message: `Complete! ${index.length} entries indexed`, percent: 100 });
+
+  return index;
+}
diff --git a/tools/media/lib/config.js b/tools/media/lib/config.js
new file mode 100644
index 0000000..621094a
--- /dev/null
+++ b/tools/media/lib/config.js
@@ -0,0 +1,39 @@
+/**
+ * Configuration and state management for media indexer
+ */
+
+/**
+ * Validate GitHub org/repo name to prevent injection attacks
+ * Allows: alphanumeric, hyphens, underscores, dots (standard GitHub naming)
+ * @param {string} name - Org or repo name
+ * @returns {string|null} Validated name or null if invalid
+ */
+function validateGitHubName(name) {
+  if (!name || typeof name !== 'string') return null;
+  // GitHub allows alphanumeric, hyphens, underscores, dots
+  // Must not start/end with special chars, max 100 chars
+  const validPattern = /^[a-zA-Z0-9]([a-zA-Z0-9._-]{0,98}[a-zA-Z0-9])?$/;
+  return validPattern.test(name) ? name : null;
+}
+
+// Parse URL parameters
+const params = new URLSearchParams(window.location.search);
+const rawOrg = params.get('org');
+const rawRepo = params.get('repo') || params.get('site');
+
+export const org = validateGitHubName(rawOrg);
+export const repo = validateGitHubName(rawRepo);
+export const ref = 'main';
+export const sitePath = org && repo ? `/${org}/${repo}` : null;
+
+export const DA_ADMIN = 'https://admin.da.live';
+
+export const state = {
+  building: false,
+  progress: { stage: 'idle', message: '', percent: 0 },
+  buildStartTime: null,
+  errors: [],
+  logs: [],
+  status: null,
+  daToken: null,
+};
diff --git a/tools/media/lib/helpers.js b/tools/media/lib/helpers.js
new file mode 100644
index 0000000..23a0c5f
--- /dev/null
+++ b/tools/media/lib/helpers.js
@@ -0,0 +1,177 @@
+/**
+ * Helper functions for path normalization, type detection, and name extraction
+ */
+
+import * as logger from './logger.js';
+
+/**
+ * Normalize path by removing query params and adding .md for pages
+ * @param {string} path - The path to normalize
+ * @returns {string} Normalized path
+ */
+export function normalizePath(path) {
+  if (!path) return '';
+  let cleanPath = path.split('?')[0].split('#')[0];
+  if (!cleanPath.includes('.') && !cleanPath.startsWith('/media/')) {
+    cleanPath = cleanPath === '/' || cleanPath === '' ? '/index.md' : `${cleanPath}.md`;
+  }
+  return cleanPath;
+}
+
+/**
+ * Check if a path represents a page (not a media file or fragment)
+ * @param {string} path - The path to check
+ * @returns {boolean} True if path is a page
+ */
+export function isPage(path) {
+  if (!path || typeof path !== 'string') return false;
+  return (path.endsWith('.md')
+          || (!path.includes('.') && !path.startsWith('/media/')))
+         && !path.includes('/fragments/');
+}
+
+/**
+ * Extract filename from medialog entry or path
+ * @param {object} mediaEntry - The medialog entry
+ * @returns {string} Extracted filename
+ */
+export function extractName(mediaEntry) {
+  if (!mediaEntry) return '';
+  if (mediaEntry.originalFilename) {
+    return mediaEntry.originalFilename.split('/').pop();
+  }
+  if (!mediaEntry.path) return '';
+  return mediaEntry.path.split('?')[0].split('#')[0].split('/').pop();
+}
+
+/** Phase 2: Linked content type detection */
+export function isPdf(path) {
+  return path && path.toLowerCase().endsWith('.pdf');
+}
+
+export function isSvg(path) {
+  return path && path.toLowerCase().endsWith('.svg');
+}
+
+export function isFragment(path) {
+  return path && path.includes('/fragments/');
+}
+
+/** True if path is PDF, SVG, or fragment (linked content from auditlog) */
+export function isLinkedContentPath(path) {
+  return path && (isPdf(path) || isSvg(path) || isFragment(path));
+}
+
+/** Normalize file path for matching (ensure leading slash) */
+export function normalizeFilePath(path) {
+  if (!path) return '';
+  const p = path.split('?')[0].split('#')[0].trim();
+  return p.startsWith('/') ? p : `/${p}`;
+}
+
+export function isPdfOrSvg(path) {
+  return isPdf(path) || isSvg(path);
+}
+
+/**
+ * Get file type in same format as media: "category > extension"
+ * @param {string} path - File path
+ * @returns {string} e.g. "document > pdf", "image > svg", "content > fragment"
+ */
+export function getFileType(path) {
+  if (isPdf(path)) return 'document > pdf';
+  if (isSvg(path)) return 'image > svg';
+  if (isFragment(path)) return 'content > fragment';
+  return 'unknown';
+}
+
+function toPath(href) {
+  if (!href) return '';
+  try {
+    if (href.startsWith('http')) {
+      return new URL(href).pathname;
+    }
+    return href.startsWith('/') ? href : `/${href}`;
+  } catch (error) {
+    logger.error(`Failed to parse URL ${href}:`, error.message);
+    return href;
+  }
+}
+
+/** Markdown link regex: [text](url) or ![alt](url) - captures URL in group 1 */
+const MD_LINK_RE = /\[[^\]]*\]\(([^)]+)\)/gi;
+
+/** Markdown autolink: <url> - captures URL in group 1 */
+const MD_AUTOLINK_RE = /<(https?:\/\/[^>]+|\/[^>\s]*)>/g;
+
+/** Icon shorthand: :iconname: → /icons/iconname.svg */
+const ICON_RE = /:([a-zA-Z0-9-]+):/g;
+/** Exclude doc terms like "with :svg: syntax" to avoid false positives */
+const ICON_DOC_EXCLUDE = new Set(['svg', 'pdf', 'image', 'link', 'syntax']);
+
+/**
+ * Extract all URLs from markdown: [text](url), ![alt](url), and <url> autolinks
+ * @param {string} md - Raw markdown
+ * @returns {string[]} - URLs from link syntax
+ */
+function extractUrlsFromMarkdown(md) {
+  if (!md || typeof md !== 'string') return [];
+  const fromLinks = [...md.matchAll(MD_LINK_RE)].map((m) => m[1].trim());
+  const fromAutolinks = [...md.matchAll(MD_AUTOLINK_RE)].map((m) => m[1].trim());
+  return [...fromLinks, ...fromAutolinks];
+}
+
+/**
+ * Extract icon references from :iconname: shorthand (resolves to /icons/iconname.svg)
+ * @param {string} md - Raw markdown
+ * @returns {string[]} - Normalized paths like /icons/headset.svg
+ */
+export function extractIconReferences(md) {
+  if (!md || typeof md !== 'string') return [];
+  const matches = [...md.matchAll(ICON_RE)];
+  return [...new Set(
+    matches
+      .filter((m) => !ICON_DOC_EXCLUDE.has(m[1].toLowerCase()))
+      .map((m) => `/icons/${m[1]}.svg`),
+  )];
+}
+
+/**
+ * Extract fragment references from markdown (links to /fragments/...)
+ * @param {string} md - Raw markdown
+ * @returns {string[]} - Normalized paths
+ */
+export function extractFragmentReferences(md) {
+  const urls = extractUrlsFromMarkdown(md);
+  return [...new Set(urls.filter((u) => u.includes('/fragments/')).map((u) => toPath(u)))];
+}
+
+/**
+ * Extract links matching pattern (e.g. .pdf, .svg) from markdown
+ * @param {string} md - Raw markdown
+ * @param {RegExp} pattern - Pattern to match (e.g. /\.pdf$/)
+ * @returns {string[]} - Normalized paths
+ */
+export function extractLinks(md, pattern) {
+  const urls = extractUrlsFromMarkdown(md);
+  const pathPart = (u) => u.split('?')[0].split('#')[0];
+  return [...new Set(urls.filter((u) => pattern.test(pathPart(u))).map((u) => toPath(u)))];
+}
+
+/**
+ * Detect media type from contentType in structured format
+ * @param {object} mediaEntry - The medialog entry
+ * @returns {string} Type in format "category > extension"
+ */
+export function detectMediaType(mediaEntry) {
+  const contentType = mediaEntry.contentType || '';
+  if (contentType.startsWith('image/')) {
+    const ext = contentType.split('/')[1];
+    return `img > ${ext}`;
+  }
+  if (contentType.startsWith('video/')) {
+    const ext = contentType.split('/')[1];
+    return `video > ${ext}`;
+  }
+  return 'unknown';
+}
diff --git a/tools/media/lib/logger.js b/tools/media/lib/logger.js
new file mode 100644
index 0000000..5d7f752
--- /dev/null
+++ b/tools/media/lib/logger.js
@@ -0,0 +1,79 @@
+/**
+ * Logging utility with configurable log levels
+ */
+
+const LOG_LEVELS = {
+  DEBUG: 0,
+  INFO: 1,
+  WARN: 2,
+  ERROR: 3,
+  NONE: 4,
+};
+
+/**
+ * Logger configuration
+ * Set LOG_LEVEL to control verbosity in production
+ */
+const config = {
+  // Change to LOG_LEVELS.INFO or LOG_LEVELS.WARN for production
+  level: LOG_LEVELS.DEBUG,
+  prefix: '[MediaIndexer]',
+};
+
+/**
+ * Set log level
+ * @param {number} level - Log level from LOG_LEVELS
+ */
+export function setLogLevel(level) {
+  config.level = level;
+}
+
+/**
+ * Debug logging - verbose details for development
+ * @param {string} message - Log message
+ * @param  {...any} args - Additional arguments
+ */
+export function debug(message, ...args) {
+  if (config.level <= LOG_LEVELS.DEBUG) {
+    // eslint-disable-next-line no-console
+    console.log(`${config.prefix}[DEBUG]`, message, ...args);
+  }
+}
+
+/**
+ * Info logging - general information
+ * @param {string} message - Log message
+ * @param  {...any} args - Additional arguments
+ */
+export function info(message, ...args) {
+  if (config.level <= LOG_LEVELS.INFO) {
+    // eslint-disable-next-line no-console
+    console.log(`${config.prefix}[INFO]`, message, ...args);
+  }
+}
+
+/**
+ * Warning logging - potential issues
+ * @param {string} message - Log message
+ * @param  {...any} args - Additional arguments
+ */
+export function warn(message, ...args) {
+  if (config.level <= LOG_LEVELS.WARN) {
+    // eslint-disable-next-line no-console
+    console.warn(`${config.prefix}[WARN]`, message, ...args);
+  }
+}
+
+/**
+ * Error logging - failures and exceptions
+ * @param {string} message - Log message
+ * @param  {...any} args - Additional arguments
+ */
+export function error(message, ...args) {
+  if (config.level <= LOG_LEVELS.ERROR) {
+    // eslint-disable-next-line no-console
+    console.error(`${config.prefix}[ERROR]`, message, ...args);
+  }
+}
+
+export { LOG_LEVELS };
diff --git a/tools/media/lib/ui.js b/tools/media/lib/ui.js
new file mode 100644
index 0000000..2e4e0cc
--- /dev/null
+++ b/tools/media/lib/ui.js
@@ -0,0 +1,175 @@
+/**
+ * UI rendering and event handling
+ */
+
+import { state, org, repo } from './config.js';
+import {
+  buildInitialIndex, buildIncrementalIndex, shouldReindex, getIndexStatus,
+} from './builder.js';
+
+export function render() {
+  const app = document.getElementById('app');
+
+  const statusHtml = state.status ? `
+    <div class="status-panel">
+      <h2>Current Index Status</h2>
+      <div class="status-grid">
+        <div class="status-item">
+          <label>Last Refresh:</label>
+          <span>${state.status.lastRefresh ? new Date(state.status.lastRefresh).toLocaleString() : 'Never'}</span>
+        </div>
+        <div class="status-item">
+          <label>Total Entries:</label>
+          <span>${state.status.entriesCount || 0}</span>
+        </div>
+        ${state.status.lastBuildMode ? `
+        <div class="status-item">
+          <label>Last Build:</label>
+          <span>${state.status.lastBuildMode === 'full' ? 'Full rebuild' : 'Incremental'}</span>
+        </div>
+        ` : ''}
+        ${state.status.indexLastModified != null ? `
+        <div class="status-item">
+          <label>Index Last Modified (DA):</label>
+          <span>${new Date(state.status.indexLastModified).toLocaleString()}</span>
+        </div>
+        ` : ''}
+      </div>
+    </div>
+  ` : '<div class="status-loading">Checking status...</div>';
+
+  const elapsedMs = state.buildStartTime ? Date.now() - state.buildStartTime : 0;
+  const elapsedStr = elapsedMs >= 1000 ? `${(elapsedMs / 1000).toFixed(1)}s` : `${elapsedMs}ms`;
+  const pct = state.progress.percent;
+  const etaMs = pct > 0 && pct < 100 ? (elapsedMs / pct) * (100 - pct) : 0;
+  const etaStr = etaMs > 0 ? `~${(etaMs / 1000).toFixed(1)}s` : '';
+  const { totalMs } = state.progress;
+  let totalStr = '';
+  if (totalMs != null) {
+    totalStr = totalMs >= 1000 ? `${(totalMs / 1000).toFixed(1)}s` : `${totalMs}ms`;
+  }
+
+  const timingHtml = state.progress.stage === 'complete' && totalStr
+    ? `<span>Total: ${totalStr}</span>`
+    : `<span>Elapsed: ${elapsedStr}</span>${etaStr ? `<span>ETA: ${etaStr}</span>` : ''}`;
+
+  const progressHtml = state.building || state.progress.stage !== 'idle' ? `
+    <div class="progress-section">
+      <h2>Progress</h2>
+      <div class="progress-bar">
+        <div class="progress-fill" style="width: ${state.progress.percent}%"></div>
+      </div>
+      <div class="progress-info">
+        <span class="progress-stage">${state.progress.stage}</span>
+        <span class="progress-message">${state.progress.message}</span>
+      </div>
+      <div class="progress-timing">
+        ${timingHtml}
+      </div>
+    </div>
+  ` : '';
+
+  const logsHtml = state.logs.length > 0 ? `
+    <div class="logs-section">
+      <h3>Logs (${state.logs.length})</h3>
+      <ul class="logs-list">
+        ${state.logs.map((log) => `<li class="log-${log.type}">${log.message}</li>`).join('')}
+      </ul>
+    </div>
+  ` : '';
+
+  const errorsHtml = state.errors.length > 0 ? `
+    <div class="errors-section">
+      <h3>Errors (${state.errors.length})</h3>
+      <ul class="errors-list">
+        ${state.errors.map((err) => `<li>${err.message}</li>`).join('')}
+      </ul>
+    </div>
+  ` : '';
+
+  app.innerHTML = `
+    <h1>Media Index Builder</h1>
+    <p>Building index for: <strong>${org}/${repo}</strong></p>
+
+    ${statusHtml}
+
+    <div class="actions">
+      <button id="buildBtn" class="btn-primary" ${state.building ? 'disabled' : ''}>
+        ${state.building ? 'Building Index...' : 'Build Index'}
+      </button>
+    </div>
+
+    ${progressHtml}
+    ${errorsHtml}
+    ${logsHtml}
+  `;
+}
+
+export function attachEventListeners() {
+  if (!state.building) {
+    const buildBtn = document.getElementById('buildBtn');
+    if (buildBtn) {
+      buildBtn.addEventListener('click', () => {
+        state.building = true;
+        state.buildStartTime = Date.now();
+        state.errors = [];
+        state.logs = [];
+        state.progress = { stage: 'starting', message: 'Checking build mode...', percent: 0 };
+        render();
+
+        const runBuild = (useIncremental) => {
+          const buildFn = useIncremental ? buildIncrementalIndex : buildInitialIndex;
+          const onLog = (msg) => {
+            state.logs.push({ message: msg, type: 'info' });
+            render();
+          };
+          return buildFn(
+            (progress) => {
+              let finalProgress = progress;
+              if (progress.stage === 'complete' && state.buildStartTime) {
+                const totalMs = Date.now() - state.buildStartTime;
+                const totalStr = totalMs >= 1000 ? `${(totalMs / 1000).toFixed(1)}s` : `${totalMs}ms`;
+                finalProgress = { ...progress, message: `${progress.message} (${totalStr})`, totalMs };
+              }
+              state.progress = finalProgress;
+              state.logs.push({ message: finalProgress.message, type: 'info' });
+              render();
+            },
+            useIncremental ? onLog : undefined,
+          );
+        };
+
+        shouldReindex()
+          .then(({ shouldReindex: useIncremental, reason }) => {
+            if (reason && !useIncremental) {
+              state.logs.push({ message: `Full build: ${reason}`, type: 'info' });
+            }
+            return runBuild(useIncremental);
+          })
+          .then((result) => {
+            const totalMs = state.buildStartTime ? Date.now() - state.buildStartTime : 0;
+            const totalStr = totalMs >= 1000 ? `${(totalMs / 1000).toFixed(1)}s` : `${totalMs}ms`;
+            state.logs.push({
+              message: `Index built successfully: ${result.length} entries (${totalStr})`,
+              type: 'success',
+            });
+            return getIndexStatus();
+          })
+          .then((status) => {
+            state.status = status;
+          })
+          .catch((error) => {
+            state.errors.push({ message: error.message });
+            state.logs.push({ message: `Error: ${error.message}`, type: 'error' });
+            state.progress = { stage: 'error', message: error.message, percent: 0 };
+          })
+          .finally(() => {
+            state.building = false;
+            state.buildStartTime = null;
+            render();
+            attachEventListeners();
+          });
+      });
+    }
+  }
+}
diff --git a/tools/media/media-indexing-strategy.md b/tools/media/media-indexing-strategy.md
new file mode 100644
index 0000000..752d5da
--- /dev/null
+++ b/tools/media/media-indexing-strategy.md
@@ -0,0 +1,2160 @@
+# Media Indexing Strategy for AEM Sites
+**Date:** February 24, 2026
+**Author:** Testing & Analysis with Claude Code
+**Project:** Media Library Integration with Medialog & Auditlog
+
+---
+
+## Summary
+
+Strategy for building a media index by combining **AEM Auditlog** and **Medialog** APIs, including operational architecture for backfilling historical data, initial index population, and ongoing refresh mechanisms.
+
+### Key Findings
+
+- **Two Log Sources:** Auditlog tracks page/file previews; Medialog tracks Media Bus items (images/videos)
+- **Temporal Relationship:** Auditlog entries precede medialog by 1.5-2 seconds
+- **Path Matching:** Critical to normalize paths (`/drafts/page` vs `/drafts/page.md`)
+- **Media Bus vs Content Delivery:** Images/videos use Media Bus (tracked in medialog), PDFs/SVGs/Fragments use content delivery (auditlog only)
+- **Parsing Required:** Fragments, PDFs, and SVGs require parsing page content to determine usage relationships
+- **Duplicate Events:** Every preview creates new log entries, even without content changes
+- **Time Window:** Use 5-second window to match medialog entries to auditlog events
+
+### Operational Architecture
+
+- **Two-Tier Backfill:** Separate CLI tool for medialog backfill, separate DA app for index population
+- **Historical Coverage:** Supports sites created before medialog existed (2023+) via Status API parsing
+- **Initial Setup:** One-time engineer-run process per repository (30-60 minutes total)
+- **Ongoing Refresh:** Browser-based auto-refresh every 10 minutes with distributed locking
+- **Multi-User Support:** Distributed lock prevents race conditions across concurrent users
+- **Scalability:** Handles sites with 10,000+ pages and 50,000+ media items
+
+---
+
+## Log Relationships
+
+### Auditlog vs Medialog
+
+| Aspect | Auditlog | Medialog |
+|--------|----------|----------|
+| **Purpose** | Tracks all preview/publish actions | Tracks Media Bus activity only |
+| **Scope** | Pages, PDFs, SVGs, Fragments, images, videos | Images and videos only |
+| **Timing** | Logged first (T) | Logged ~1.5-2s later (T+1500ms) |
+| **Path Format** | `/drafts/page` | `/drafts/page.md` |
+| **Contains** | Page-level events | Media-level events with `resourcePath` |
+
+### Linking Strategy
+
+```
+Auditlog Entry                    Medialog Entries
+┌─────────────────────┐          ┌──────────────────────────┐
+│ path: /drafts/page  │  ───────>│ resourcePath: /drafts/   │
+│ timestamp: 1000     │  match   │   page.md                │
+│                     │  by:     │ timestamp: 1001-1005     │
+└─────────────────────┘          └──────────────────────────┘
+                                           │
+                                           │ Multiple media
+                                           ▼
+                                  All have same timestamp
+```
+
+**Matching Rules:**
+1. Normalize paths: `auditlog.path` + `.md` = `medialog.resourcePath`
+2. Time window: `medialog.timestamp` within 5 seconds after `auditlog.timestamp`
+3. Group medialog entries by `(resourcePath, timestamp)` to find page's media
+
+---
+
+## Content Types & Tracking Methods
+
+### Complete Matrix
+
+| Content Type | Delivery Method | Auditlog | Medialog | Tracking Method | Usage Relationship |
+|-------------|-----------------|----------|----------|-----------------|-------------------|
+| **Images (embedded)** | Media Bus | Page event | Entry with resourcePath | Medialog linking | From resourcePath field |
+| **Images (standalone)** | Media Bus | File event | Entry with originalFilename | Medialog linking | No usage (standalone) |
+| **Videos (embedded)** | Media Bus | Page event | Entry with resourcePath | Medialog linking | From resourcePath field |
+| **Videos (standalone)** | Media Bus | File event | Entry with originalFilename | Medialog linking | No usage (standalone) |
+| **PDFs** | Content delivery | File event | None | Auditlog + Parse | Parse page HTML for links |
+| **SVGs** | Content delivery | File event | None | Auditlog + Parse | Parse page HTML for links |
+| **Fragments** | Content delivery | File event | None | Auditlog + Parse | Parse page HTML for references |
+| **Icons** | Icons folder | None | None | Not tracked | N/A |
+
+### Why Different Tracking Methods?
+
+**Media Bus Items (Images, Videos):**
+- Deduplicated, content-addressed storage
+- Hash-based URLs: `media_<hash>/<filename>`
+- Tracked in medialog with `resourcePath` linking to pages
+- **No parsing required** - logs provide complete relationships
+
+**Content Delivery Items (PDFs, SVGs, Fragments):**
+- Regular file delivery
+- Standard preview/publish lifecycle
+- Not content-addressed
+- **Parsing required** - logs don't link to containing pages
+
+**Source:** [AEM Media Documentation](https://www.aem.live/docs/media)
+
+---
+
+## Test Scenarios & Results
+
+### Scenario A: Page with 3 Images
+
+**Action:** Created `/drafts/scenario-a.md` with 3 embedded images, previewed once
+
+**Auditlog Response:**
+```json
+{
+  "entries": [
+    {
+      "path": "/drafts/scenario-a",
+      "timestamp": 1771936397105,
+      "route": "preview",
+      "user": "user@example.com",
+      "status": 200
+    }
+  ]
+}
+```
+
+**Medialog Response:**
+```json
+{
+  "entries": [
+    {
+      "path": "https://main--repo--org.aem.live/media_a1b2c3/image1.jpg",
+      "operation": "ingest",
+      "timestamp": 1771936400523,
+      "resourcePath": "/drafts/scenario-a.md",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "a1b2c3",
+      "width": "2000",
+      "height": "1333"
+    },
+    {
+      "path": "https://main--repo--org.aem.live/media_d4e5f6/image2.jpg",
+      "operation": "ingest",
+      "timestamp": 1771936400523,
+      "resourcePath": "/drafts/scenario-a.md",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "d4e5f6",
+      "width": "2000",
+      "height": "1500"
+    },
+    {
+      "path": "https://main--repo--org.aem.live/media_g7h8i9/image3.jpg",
+      "operation": "ingest",
+      "timestamp": 1771936400523,
+      "resourcePath": "/drafts/scenario-a.md",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "g7h8i9",
+      "width": "2000",
+      "height": "1600"
+    }
+  ]
+}
+```
+
+**Learning:**
+- All media on same page share identical timestamp
+- Media added through markup: `operation: "ingest"` WITHOUT `originalFilename`
+- 3.4 second processing delay between logs
+- `resourcePath` links media to page
+
+---
+
+### Scenario B: Text-Only Page
+
+**Action:** Created `/drafts/scenario-b.md` with only text, previewed
+
+**Auditlog Response:**
+```json
+{
+  "entries": [
+    {
+      "path": "/drafts/scenario-b",
+      "timestamp": 1771936758410,
+      "route": "preview",
+      "user": "user@example.com",
+      "duration": 1112,
+      "status": 200
+    }
+  ]
+}
+```
+
+**Medialog Response:**
+```json
+{
+  "entries": []
+}
+```
+
+**Learning:**
+- Auditlog logs text-only pages
+- Empty medialog = no Media Bus items on page
+- Can detect "all media removed" pattern (with caveats)
+
+---
+
+### Scenario H: Standalone Media Preview
+
+**Action:** Uploaded and previewed 3 standalone files:
+- `/media/standalone-image.jpg` (image)
+- `/media/standalone-doc.pdf` (PDF)
+- `/media/standalone-graphic.svg` (SVG)
+
+**Auditlog Response:**
+```json
+{
+  "entries": [
+    {
+      "path": "/media/standalone-image.jpg",
+      "timestamp": 1771937123456,
+      "route": "preview",
+      "user": "user@example.com",
+      "status": 200
+    },
+    {
+      "path": "/media/standalone-doc.pdf",
+      "timestamp": 1771937125789,
+      "route": "preview",
+      "user": "user@example.com",
+      "status": 200
+    },
+    {
+      "path": "/media/standalone-graphic.svg",
+      "timestamp": 1771937128012,
+      "route": "preview",
+      "user": "user@example.com",
+      "status": 200
+    }
+  ]
+}
+```
+
+**Medialog Response:**
+```json
+{
+  "entries": [
+    {
+      "path": "https://main--repo--org.aem.live/media_j1k2l3/standalone-image.jpg",
+      "operation": "ingest",
+      "timestamp": 1771937124567,
+      "originalFilename": "/media/standalone-image.jpg",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "j1k2l3",
+      "owner": "2d0fcd52abc",
+      "repo": "2d0fcd52abc",
+      "width": "1920",
+      "height": "1080"
+    }
+  ]
+}
+```
+
+**Learning:**
+- Images: Appear in BOTH logs
+- PDFs/SVGs: Auditlog ONLY (not on Media Bus)
+- Standalone uploads have `originalFilename` + `owner` + `repo`
+- NO `resourcePath` (not linked to page)
+
+---
+
+### Scenario G: Page with Mixed Media
+
+**Action:** Created `/drafts/scenario-g.md` with:
+- 2 embedded images
+- 1 PDF preview link
+- 1 SVG preview link
+- 1 icon (`:headset:`)
+
+**Auditlog Response (page preview):**
+```json
+{
+  "entries": [
+    {
+      "path": "/drafts/scenario-g",
+      "timestamp": 1771937500000,
+      "route": "preview",
+      "user": "user@example.com",
+      "status": 200
+    }
+  ]
+}
+```
+
+**Auditlog Response (when user clicks PDF/SVG links):**
+```json
+{
+  "entries": [
+    {
+      "path": "/media/standalone-doc.pdf",
+      "timestamp": 1771937510000,
+      "route": "preview",
+      "user": "user@example.com",
+      "status": 200
+    },
+    {
+      "path": "/media/standalone-graphic.svg",
+      "timestamp": 1771937515000,
+      "route": "preview",
+      "user": "user@example.com",
+      "status": 200
+    }
+  ]
+}
+```
+
+**Medialog Response:**
+```json
+{
+  "entries": [
+    {
+      "path": "https://main--repo--org.aem.live/media_m4n5o6/image1.jpg",
+      "operation": "ingest",
+      "timestamp": 1771937501500,
+      "resourcePath": "/drafts/scenario-g.md",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "m4n5o6",
+      "width": "1800",
+      "height": "1200"
+    },
+    {
+      "path": "https://main--repo--org.aem.live/media_p7q8r9/image2.jpg",
+      "operation": "ingest",
+      "timestamp": 1771937501500,
+      "resourcePath": "/drafts/scenario-g.md",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "p7q8r9",
+      "width": "2000",
+      "height": "1500"
+    }
+  ]
+}
+```
+
+**Learning:**
+- Only embedded images tracked in medialog
+- Icons: Not tracked
+- PDF/SVG links: Create separate auditlog entries when clicked (unrelated timestamps)
+- **Cannot determine page→PDF/SVG relationships from logs**
+- **Parsing required** to find which pages reference PDFs/SVGs/Fragments
+
+---
+
+### Re-Preview Test: Duplicate Events
+
+**Action:** Re-previewed scenario-a, scenario-b, scenario-g without changes
+
+**Auditlog Response:**
+```json
+{
+  "entries": [
+    {
+      "path": "/drafts/scenario-b",
+      "timestamp": 1771938338331,
+      "route": "preview",
+      "user": "user@example.com",
+      "status": 200
+    },
+    {
+      "path": "/drafts/scenario-a",
+      "timestamp": 1771938338335,
+      "route": "preview",
+      "user": "user@example.com",
+      "status": 200
+    },
+    {
+      "path": "/drafts/scenario-g",
+      "timestamp": 1771938338340,
+      "route": "preview",
+      "user": "user@example.com",
+      "status": 200
+    }
+  ]
+}
+```
+
+**Medialog Response:**
+```json
+{
+  "entries": [
+    {
+      "path": "https://main--repo--org.aem.live/media_m4n5o6/image1.jpg",
+      "operation": "reuse",
+      "timestamp": 1771938339903,
+      "resourcePath": "/drafts/scenario-g.md",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "m4n5o6"
+    },
+    {
+      "path": "https://main--repo--org.aem.live/media_p7q8r9/image2.jpg",
+      "operation": "reuse",
+      "timestamp": 1771938339903,
+      "resourcePath": "/drafts/scenario-g.md",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "p7q8r9"
+    },
+    {
+      "path": "https://main--repo--org.aem.live/media_a1b2c3/image1.jpg",
+      "operation": "reuse",
+      "timestamp": 1771938340350,
+      "resourcePath": "/drafts/scenario-a.md",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "a1b2c3"
+    },
+    {
+      "path": "https://main--repo--org.aem.live/media_d4e5f6/image2.jpg",
+      "operation": "reuse",
+      "timestamp": 1771938340350,
+      "resourcePath": "/drafts/scenario-a.md",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "d4e5f6"
+    },
+    {
+      "path": "https://main--repo--org.aem.live/media_g7h8i9/image3.jpg",
+      "operation": "reuse",
+      "timestamp": 1771938340350,
+      "resourcePath": "/drafts/scenario-a.md",
+      "contentType": "image/jpeg",
+      "user": "user@example.com",
+      "mediaHash": "g7h8i9"
+    }
+  ]
+}
+```
+
+**Learning:**
+- Every preview creates new events (even without changes)
+- `operation: "reuse"` indicates media already exists
+- Cannot rely on "new events = new content"
+- Must compare current state vs previous state
+
+---
+
+## Parsing Strategy for Linked Content
+
+### Why Parsing is Needed
+
+**Media Bus items** (images/videos) have `resourcePath` in medialog that directly links them to pages. **Content delivery items** (PDFs/SVGs/Fragments) do NOT have this linking - you must parse page HTML to find references.
+
+### Current Implementation
+
+The codebase already parses for fragments:
+
+```javascript
+// From media-library.js (existing code)
+const [fragmentLogEntries, pageLogEntries] = await Promise.all([
+  fetchFragments(this.org, this.repo, 'main', since),
+  fetchPages(this.org, this.repo, 'main', since),
+]);
+
+// Parse pages to build fragment usage map
+const usageMap = await buildFragmentUsageMap(pageLogEntries, this.sitePath);
+
+const mergedData = mergeFragmentEntries(
+  fragmentsData,
+  fragmentLogEntries,
+  this.org,
+  this.repo,
+  usageMap  // Usage map from parsing
+);
+```
+
+### Unified Parsing Approach
+
+Extend the existing fragment parsing to include PDFs and SVGs:
+
+```javascript
+async function buildContentUsageMap(pageLogEntries, org, repo) {
+  const usageMap = {
+    fragments: new Map(),  // fragment path -> [page paths]
+    pdfs: new Map(),       // pdf path -> [page paths]
+    svgs: new Map(),       // svg path -> [page paths]
+  };
+
+  for (const pageEvent of pageLogEntries) {
+    // Fetch page HTML (single fetch per page)
+    const html = await fetchPageHtml(pageEvent.path, org, repo);
+
+    // Extract all content types in one pass
+    const fragments = extractFragmentReferences(html);
+    const pdfs = extractLinks(html, /\.pdf$/);
+    const svgs = extractLinks(html, /\.svg$/);
+
+    // Build usage maps
+    fragments.forEach(f => {
+      if (!usageMap.fragments.has(f)) {
+        usageMap.fragments.set(f, []);
+      }
+      usageMap.fragments.get(f).push(pageEvent.path);
+    });
+
+    pdfs.forEach(p => {
+      if (!usageMap.pdfs.has(p)) {
+        usageMap.pdfs.set(p, []);
+      }
+      usageMap.pdfs.get(p).push(pageEvent.path);
+    });
+
+    svgs.forEach(s => {
+      if (!usageMap.svgs.has(s)) {
+        usageMap.svgs.set(s, []);
+      }
+      usageMap.svgs.get(s).push(pageEvent.path);
+    });
+  }
+
+  return usageMap;
+}
+```
+
+### Extraction Helper Functions
+
+```javascript
+function extractFragmentReferences(html) {
+  // Fragments typically appear in href attributes
+  const fragmentPattern = /href="([^"]*\/fragments\/[^"]+)"/g;
+  const matches = [...html.matchAll(fragmentPattern)];
+  return matches.map(m => m[1]).map(normalizePath);
+}
+
+function extractLinks(html, pattern) {
+  // Extract href or src attributes matching pattern
+  const linkPattern = new RegExp(`(?:href|src)="([^"]*${pattern.source})"`, 'gi');
+  const matches = [...html.matchAll(linkPattern)];
+  return matches.map(m => m[1]).map(normalizePath);
+}
+
+function normalizePath(path) {
+  // Remove query params, hashes
+  return path.split('?')[0].split('#')[0];
+}
+```
+
+### Performance Optimization
+
+**Single-pass parsing:**
+```javascript
+// GOOD: Fetch once, extract all
+const html = await fetchPageHtml(page);
+const allContent = {
+  fragments: extractFragments(html),
+  pdfs: extractPdfs(html),
+  svgs: extractSvgs(html),
+};
+
+// BAD: Multiple fetches
+const fragments = extractFragments(await fetchPageHtml(page));
+const pdfs = extractPdfs(await fetchPageHtml(page));  // Duplicate fetch!
+```
+
+### When to Parse
+
+**Parse frequency:**
+- **Initial build:** Parse all pages to establish complete usage map
+- **Incremental update:** Parse only pages with new auditlog events
+- **Validation:** Periodic full re-parse (weekly/monthly) to catch any drift
+
+---
+
+## Operational Architecture
+
+### Overview
+
+Two-tier approach: historical data backfill (Tier 1) + ongoing incremental updates (Tier 2). Separates concerns, handles large sites efficiently, no server infrastructure for ongoing operations.
+
+### Architecture Diagram
+
+```
+Historical Data (2023-2026)          Live Data (2026+)
+┌──────────────────────┐            ┌────────────────────┐
+│  Status API          │            │  Auditlog API      │
+│  (all pages)         │            │  Medialog API      │
+└──────────┬───────────┘            └─────────┬──────────┘
+           │                                  │
+           v                                  v
+    ┌──────────────┐                  ┌──────────────┐
+    │ Tier 1:      │                  │ Incremental  │
+    │ Medialog     │                  │ Refresh      │
+    │ Backfill CLI │                  │ (10-min)     │
+    └──────┬───────┘                  └──────┬───────┘
+           │                                  │
+           v                                  │
+    ┌──────────────┐                         │
+    │ Medialog API │                         │
+    │ (populated)  │                         │
+    └──────┬───────┘                         │
+           │                                  │
+           └─────────────┬────────────────────┘
+                         v
+                  ┌──────────────┐
+                  │ Tier 2:      │
+                  │ Index        │
+                  │ Population   │
+                  └──────┬───────┘
+                         v
+                  ┌──────────────┐
+                  │ .da/         │
+                  │ mediaindex/  │
+                  │ index.json   │
+                  └──────────────┘
+```
+
+---
+
+### Tier 1: Medialog Backfill (One-Time)
+
+**Purpose:** Populate medialog API with historical data for sites created before medialog existed
+
+**Implementation:** CLI tool at `/media-log-ingestor`
+
+**Process:**
+1. Engineer runs CLI tool with org/repo credentials
+2. Tool fetches all pages via Status API
+3. Parses markdown content to extract media references
+4. Sends entries to Medialog API in batches
+5. Deduplicates based on media hash (first = ingest, subsequent = reuse)
+6. Enriches with user information from preview logs
+
+**Command:**
+```bash
+logmedia ingest --org myorg --repo myrepo --token <token>
+```
+
+**Characteristics:**
+- One-time operation per repository
+- Takes 5-30 minutes depending on site size
+- Handles rate limits (10 req/sec)
+- Resumable on failure
+- Creates historical medialog entries with "ingest" operations
+
+**Output:**
+- Medialog API populated with historical media references
+- All pages analyzed, media tracked back to 2023
+- Ready for Tier 2 index building
+
+---
+
+### Tier 2: Index Population & Refresh
+
+#### Initial Population (One-Time per Site)
+
+**Purpose:** Build complete media index from medialog + auditlog data
+
+**Implementation:** Separate DA app at `/tools/media-indexer` (to be created)
+
+**Process:**
+1. Engineer navigates to `/tools/media-indexer`
+2. Clicks "Build Initial Index" button
+3. Server-side process:
+   - Fetches all medialog entries (from Tier 1 backfill)
+   - Fetches all auditlog entries (last 90 days)
+   - Processes and combines data
+   - Parses pages for PDFs/SVGs/Fragments
+   - Deduplicates and sorts
+   - Writes to `.da/mediaindex/index.json`
+4. Displays progress (X of Y pages processed)
+5. Completes in 30-60 seconds
+
+**Characteristics:**
+- One-time per repository
+- Server-side execution (handles large datasets)
+- Shows progress indicator
+- Atomic operation (succeeds or fails completely)
+- Creates complete index with all historical + recent data
+
+**Output:**
+```
+.da/mediaindex/
+├── index.json           # Complete media index
+├── medialog-meta.json   # { lastFetchTime, entriesCount, lastRefreshBy }
+└── lock.json            # Distributed lock (initially unlocked)
+```
+
+---
+
+#### Incremental Refresh (Ongoing)
+
+**Purpose:** Keep index up-to-date with new preview activity
+
+**Two Modes:**
+
+**1. User-Triggered Refresh**
+- User clicks "Refresh" button in media library
+- Acquires distributed lock
+- Fetches logs since last update (incremental)
+- Merges with existing index
+- Updates UI immediately
+- Takes 2-5 seconds
+
+**2. Background Auto-Refresh**
+- Runs every 10 minutes from any open browser
+- Checks if lock is available
+- Checks if index is stale (> 5 minutes old)
+- If both true, performs incremental refresh
+- Silent operation, no UI disruption
+- Dispatches event for UI refresh when complete
+
+---
+
+### Distributed Locking Strategy
+
+**Problem:** Multiple users may have media library open simultaneously, each browser trying to refresh every 10 minutes. Without coordination, this creates race conditions and corrupts the index.
+
+**Solution:** Distributed lock using `.da/mediaindex/lock.json`
+
+#### Lock Structure
+
+```json
+{
+  "locked": true,
+  "lockedBy": "user@example.com|session-abc123",
+  "lockedAt": 1709567890000,
+  "operation": "auto-refresh",
+  "timeout": 300000
+}
+```
+
+#### Lock Behavior
+
+**Acquiring Lock:**
+```javascript
+1. Read current lock.json
+2. If locked=false OR (now - lockedAt) > timeout:
+   - Write new lock with your identity
+   - Wait 500ms
+   - Re-read to verify (race condition check)
+   - If lockedBy matches yours, lock acquired
+   - Else, retry (max 3 attempts with 2s delay)
+3. If locked by someone else:
+   - Return "lock held by X"
+```
+
+**Releasing Lock:**
+```javascript
+1. Read current lock.json
+2. If lockedBy matches your identity:
+   - Write { locked: false, releasedAt: now }
+3. Else, skip (don't own lock)
+```
+
+**Lock Timeout:**
+- Default: 5 minutes (300000ms)
+- After 5 minutes, lock considered expired
+- Next process treats expired lock as unlocked
+- Handles browser crashes gracefully
+
+**Priority:**
+- User-triggered refresh > Background refresh
+- User clicks button: Attempts lock immediately
+- Background timer: Checks lock first, skips if held
+
+---
+
+### File Structure & Metadata
+
+#### .da/mediaindex/index.json
+
+Main index file containing processed media entries:
+
+```json
+[
+  {
+    "hash": "abc123",
+    "url": "https://main--repo--org.aem.live/media_abc123/image.jpg",
+    "name": "image.jpg",
+    "page": "/drafts/my-page.md",
+    "timestamp": 1709567890000,
+    "user": "user@example.com",
+    "operation": "ingest",
+    "type": "img > jpg",
+    "source": "medialog"
+  },
+  {
+    "path": "/media/doc.pdf",
+    "usedIn": ["/drafts/page1.md", "/drafts/page2.md"],
+    "timestamp": 1709567890000,
+    "user": "user@example.com",
+    "type": "document > pdf",
+    "status": "referenced",
+    "source": "auditlog-parsed"
+  }
+]
+```
+
+#### .da/mediaindex/medialog-meta.json
+
+Metadata tracking last refresh:
+
+```json
+{
+  "lastFetchTime": 1709567890000,
+  "entriesCount": 1523,
+  "lastRefreshBy": "user@example.com"
+}
+```
+
+#### .da/mediaindex/lock.json
+
+Distributed lock state:
+
+```json
+{
+  "locked": false,
+  "lockedBy": null,
+  "lockedAt": null,
+  "operation": null,
+  "releasedAt": 1709567890000
+}
+```
+
+---
+
+### Operational Procedures
+
+#### Initial Setup (One-Time per Repository)
+
+**Step 1: Backfill Medialog (if site existed before 2026)**
+
+```bash
+# Clone backfill tool
+git clone <media-log-ingestor-repo>
+cd media-log-ingestor
+
+# Install dependencies
+npm install
+
+# Get authentication token
+npm run token  # Shows instructions
+
+# Run backfill
+npm run ingest -- --org myorg --repo myrepo --token <token>
+
+# Wait for completion (5-30 minutes)
+```
+
+**Step 2: Build Initial Index**
+
+```
+1. Navigate to: https://main--repo--org.aem.live/tools/media-indexer
+2. Click "Build Initial Index"
+3. Wait for progress bar to complete (30-60 seconds)
+4. Verify: "Index built successfully: 1523 entries"
+```
+
+**Step 3: Enable Auto-Refresh**
+
+```
+1. Open media library: https://main--repo--org.aem.live/tools/media
+2. Auto-refresh starts automatically (10-minute interval)
+3. Verify in console: "Background auto-refresh started"
+```
+
+---
+
+#### Ongoing Operations
+
+**User-Triggered Refresh:**
+- User clicks "Refresh" button
+- Shows progress: "Fetching logs..." → "Processing..." → "Complete"
+- Updates UI with latest media
+- Frequency: As needed (typically when adding new content)
+
+**Background Auto-Refresh:**
+- Runs silently every 10 minutes
+- Logs to console: "Auto-refresh starting..." or "Index fresh, skipping"
+- No user interaction required
+- Handles multiple users gracefully via locking
+
+**Manual Unlock (Admin Only):**
+- If lock stuck (rare), admin can force unlock
+- Click "Force Unlock" button in index manager
+- Confirms before releasing lock
+- Use only when certain no other process is running
+
+---
+
+#### Monitoring & Troubleshooting
+
+**Check Index Status:**
+
+```javascript
+// In browser console
+const status = await getIndexStatus(sitePath);
+console.log(status);
+// {
+//   lastRefresh: 1709567890000,
+//   entriesCount: 1523,
+//   isStale: false,
+//   locked: false
+// }
+```
+
+**Common Issues:**
+
+| Issue | Symptom | Solution |
+|-------|---------|----------|
+| Index not updating | Old timestamps | Check lock status, force unlock if stuck |
+| Missing recent media | Added but not shown | Trigger manual refresh |
+| Duplicate entries | Same media appears multiple times | Full rebuild (weekly maintenance) |
+| Lock timeout | "Cannot acquire lock" errors | Wait 5 minutes or force unlock |
+| Parse failures | PDFs/SVGs not tracked | Check page accessibility, retry |
+
+**Maintenance Tasks:**
+
+- **Daily:** None (auto-refresh handles updates)
+- **Weekly:** Review error logs, check for parse failures
+- **Monthly:** Full index rebuild to eliminate drift
+- **Quarterly:** Verify backfill integrity, re-run if needed
+
+---
+
+### Performance & Scalability
+
+#### Incremental Refresh Performance
+
+| Site Size | Pages | Media | Refresh Time |
+|-----------|-------|-------|--------------|
+| Small | < 100 | < 500 | 1-2 seconds |
+| Medium | 100-1000 | 500-5000 | 2-5 seconds |
+| Large | 1000-10000 | 5000-50000 | 5-10 seconds |
+| Very Large | > 10000 | > 50000 | 10-30 seconds |
+
+**Optimization Techniques:**
+- Hash-based lookups (O(1) instead of O(N))
+- Group medialog by resourcePath (reduce iterations)
+- Parse only changed pages (not entire site)
+- Batch writes to DA (reduce API calls)
+- Cache parsed HTML (avoid re-fetching)
+
+#### Lock Contention
+
+**Scenario:** 10 users have media library open
+
+- Each browser runs auto-refresh every 10 minutes
+- On average, 1 lock attempt per minute across all users
+- Lock held for 2-5 seconds during refresh
+- Contention rate: < 10% (most attempts succeed)
+
+**Mitigation:**
+- Lock timeout ensures stale locks don't block
+- Retry logic with exponential backoff
+- Background refresh skips if locked (no retry spam)
+- User-triggered refresh has higher priority
+
+---
+
+### Future Enhancements
+
+**Tier 1 Improvements:**
+- Incremental medialog backfill (only new pages)
+- Scheduled re-backfill for updated pages
+- Webhook integration for real-time updates
+
+**Tier 2 Improvements:**
+- Worker-based refresh (dedicated service vs browser-based)
+- Streaming updates (websocket for live refresh)
+- Index compression for very large sites
+- Pagination for index loading
+
+**Locking Improvements:**
+- Leader election (one browser becomes "leader" for all refreshes)
+- Heartbeat mechanism (detect crashed processes faster)
+- Lock priority queue (order competing processes)
+
+---
+
+## Indexing Flowcharts
+
+### 1. Initial Index Build (First Pull)
+
+```mermaid
+flowchart TD
+    Start([Start: First Pull / Backfill]) --> FetchLogs[Fetch ALL available logs:<br/>- Auditlog<br/>- Medialog<br/>Note: Medialog is new, get all available]
+    FetchLogs --> FilterAudit{Filter auditlog<br/>by file type}
+
+    FilterAudit -->|.md files| ProcessPages[Group: Pages to process]
+    FilterAudit -->|.pdf, .svg, /fragments/| ProcessFiles[Group: Content delivery files]
+
+    ProcessPages --> ParseContent[Parse page HTML:<br/>- Extract fragment references<br/>- Extract PDF links<br/>- Extract SVG links]
+
+    ParseContent --> BuildUsageMap[Build usage maps:<br/>- fragments -> pages<br/>- pdfs -> pages<br/>- svgs -> pages]
+
+    ProcessPages --> LoopPages{For each page}
+    LoopPages --> NormalizePath[Normalize: page -> page.md]
+    NormalizePath --> FindMedia[Find medialog entries WHERE:<br/>- resourcePath = normalized path<br/>- timestamp within 5s of audit timestamp]
+
+    FindMedia --> HasMedia{Media found?}
+
+    HasMedia -->|Yes| CreateRefs[Create index entries:<br/>hash, page, timestamp, status='referenced']
+    HasMedia -->|No| SkipPage[Page has no Media Bus items<br/>May have PDFs/SVGs/fragments]
+
+    CreateRefs --> MorePages{More pages?}
+    SkipPage --> MorePages
+    MorePages -->|Yes| LoopPages
+    MorePages -->|No| ProcessFiles
+
+    ProcessFiles --> LoopFiles{For each file}
+    LoopFiles --> FileType{File type?}
+
+    FileType -->|PDF/SVG/Fragment| CheckUsage[Check in usage map:<br/>Referenced by any page?]
+    FileType -->|Image standalone| FindStandalone[Find in medialog:<br/>originalFilename present?]
+
+    CheckUsage -->|Referenced| CreateLinkedEntry[Create index entry:<br/>path, usedIn pages, type, status='referenced']
+    CheckUsage -->|Not referenced| CreateStandaloneFile[Create index entry:<br/>path, type, status='file-unused']
+
+    FindStandalone -->|Found| CreateStandaloneEntry[Create index entry:<br/>hash, originalFilename, status='uploaded-unused']
+    FindStandalone -->|Not found| SkipFile[Skip: Not Media Bus item]
+
+    CreateLinkedEntry --> MoreFiles{More files?}
+    CreateStandaloneFile --> MoreFiles
+    CreateStandaloneEntry --> MoreFiles
+    SkipFile --> MoreFiles
+
+    MoreFiles -->|Yes| LoopFiles
+    MoreFiles -->|No| MergeUsage[Merge usage map into index]
+
+    BuildUsageMap --> MergeUsage
+    MergeUsage --> SaveIndex[Save index to DA:<br/>/.da/mediaindex/media.json]
+
+    SaveIndex --> SaveMeta[Save metadata:<br/>lastFetchTime, itemCount]
+    SaveMeta --> End([End: Index Built])
+```
+
+---
+
+### 2. Incremental Update Flow
+
+```mermaid
+flowchart TD
+    Start([Start: Incremental Update]) --> LoadMeta[Load index metadata:<br/>Get lastFetchTime]
+    LoadMeta --> FetchNew[Fetch NEW logs since lastFetchTime:<br/>- Auditlog<br/>- Medialog]
+
+    FetchNew --> HasNewEvents{New events exist?}
+    HasNewEvents -->|No| EndEarly([End: No updates needed])
+    HasNewEvents -->|Yes| LoadIndex[Load existing index from DA]
+
+    LoadIndex --> ParseNewPages[Parse newly previewed pages:<br/>Update usage maps for:<br/>- Fragments<br/>- PDFs<br/>- SVGs]
+
+    ParseNewPages --> LoopNewPages{For each page<br/>in new auditlog}
+
+    LoopNewPages --> NormalizePath[Normalize: page -> page.md]
+    NormalizePath --> FindNewMedia[Find NEW medialog entries:<br/>- resourcePath = normalized path<br/>- timestamp within 5s of audit timestamp]
+
+    FindNewMedia --> LoadOldState[Load OLD index entries<br/>for this page]
+
+    LoadOldState --> CompareState{Compare:<br/>Old vs New}
+
+    CompareState --> ExtractHashes[Extract:<br/>- oldHashes from index<br/>- newHashes from medialog]
+
+    ExtractHashes --> CheckChanges{Content<br/>changed?}
+
+    CheckChanges -->|newHashes empty| CheckAmbiguous{Old state<br/>had media?}
+    CheckChanges -->|newHashes exist| CompareHashes[Compare hash sets]
+
+    CheckAmbiguous -->|Yes| AmbiguousCase[Ambiguous: May have PDFs/SVGs<br/>or all removed or processing delay]
+    CheckAmbiguous -->|No| StillText[Still text-only<br/>No action needed]
+
+    AmbiguousCase --> VerifyParse{Parse to verify?}
+    VerifyParse -->|Yes| QuickParse[Quick parse: Check for img tags]
+    VerifyParse -->|No| AssumeRemoved[Assume removed<br/>Mark as medium confidence]
+
+    QuickParse --> HasImages{Images found?}
+    HasImages -->|No| ConfirmedRemoved[Confirmed: All removed]
+    HasImages -->|Yes| DataInconsistency[Data inconsistency<br/>Flag for investigation]
+
+    ConfirmedRemoved --> MarkUnreferenced[DELETE or UPDATE all old entries:<br/>status = 'unreferenced']
+    AssumeRemoved --> MarkUnreferenced
+
+    CompareHashes --> FindAdded[Added = newHashes NOT IN oldHashes]
+    CompareHashes --> FindRemoved[Removed = oldHashes NOT IN newHashes]
+    CompareHashes --> FindUnchanged[Unchanged = intersection]
+
+    FindAdded --> HasAdded{Additions?}
+    HasAdded -->|Yes| InsertNew[INSERT new index entries:<br/>hash, page, timestamp, status='referenced']
+    HasAdded -->|No| CheckRemoved
+
+    FindRemoved --> CheckRemoved{Removals?}
+    CheckRemoved -->|Yes| DeleteOld[DELETE or UPDATE removed entries:<br/>status = 'unreferenced']
+    CheckRemoved -->|No| CheckUnchanged
+
+    FindUnchanged --> CheckUnchanged{Unchanged?}
+    CheckUnchanged -->|Yes| UpdateTimestamp[UPDATE timestamp only<br/>for unchanged entries]
+    CheckUnchanged -->|No| NextPage
+
+    InsertNew --> NextPage
+    DeleteOld --> NextPage
+    UpdateTimestamp --> NextPage
+    MarkUnreferenced --> NextPage
+    StillText --> NextPage
+    DataInconsistency --> NextPage
+
+    NextPage{More pages?}
+    NextPage -->|Yes| LoopNewPages
+    NextPage -->|No| UpdateLinkedContent[Update linked content from usage maps:<br/>- Add new fragment references<br/>- Remove old references<br/>- Update PDF/SVG usage]
+
+    UpdateLinkedContent --> SaveUpdated[Save updated index to DA]
+
+    SaveUpdated --> UpdateMeta[Update metadata:<br/>lastFetchTime = now<br/>itemCount = index.length]
+
+    UpdateMeta --> End([End: Index Updated])
+```
+
+---
+
+### 3. Page State Detection Logic
+
+```mermaid
+flowchart TD
+    Start([Page Preview Event]) --> GetAudit[Auditlog entry:<br/>page X at time T]
+
+    GetAudit --> SearchMedia[Search medialog for entries:<br/>- resourcePath = X.md<br/>- timestamp in T, T+5000ms]
+
+    SearchMedia --> MediaCount{Count of<br/>media entries}
+
+    MediaCount -->|0 entries| CheckHistory1{Check index:<br/>Page existed before?}
+    MediaCount -->|N entries| HasMedia[Scenario: Page has N Media Bus items<br/>May also have PDFs/SVGs/fragments]
+
+    CheckHistory1 -->|No| NewTextPage[New text-only page<br/>OR page with only PDFs/SVGs/fragments<br/>Action: Check parsed content]
+    CheckHistory1 -->|Yes, had media| Ambiguous[Ambiguous scenario:<br/>- All Media Bus items removed?<br/>- Or page has PDFs/SVGs only?<br/>- Or processing delay?]
+    CheckHistory1 -->|Yes, no media| StillText[Still text-only or non-Media Bus<br/>Action: Update timestamp only]
+
+    Ambiguous --> DecideParse{Parse to verify?}
+    DecideParse -->|Yes| ParseCheck[Parse HTML for img tags]
+    DecideParse -->|No| AssumeRemoved[Assume removed<br/>Medium confidence]
+
+    ParseCheck --> ImagesFound{Images in HTML?}
+    ImagesFound -->|No| ConfirmRemoved[Confirmed: All removed<br/>Action: Mark old entries unreferenced]
+    ImagesFound -->|Yes| Inconsistent[Inconsistency detected<br/>Action: Flag for investigation]
+
+    HasMedia --> GroupByTimestamp[Group media by timestamp:<br/>All should have same timestamp]
+
+    GroupByTimestamp --> ExtractHashes[Extract: List of mediaHashes]
+
+    ExtractHashes --> CheckPrevious{Check index:<br/>Page existed before?}
+
+    CheckPrevious -->|No| NewPage[New page with media<br/>Action: Create all entries]
+    CheckPrevious -->|Yes| CompareHashes[Compare old vs new hashes]
+
+    CompareHashes --> DiffResult{Difference?}
+
+    DiffResult -->|Same hashes| NoChange[No content change<br/>Action: Update timestamps]
+    DiffResult -->|Added hashes| MediaAdded[Media added<br/>Action: Insert new entries]
+    DiffResult -->|Removed hashes| SomeRemoved[Media removed<br/>Action: Delete/flag entries]
+    DiffResult -->|Both added & removed| MediaChanged[Media changed<br/>Action: Insert + Delete]
+
+    NewTextPage --> End([End])
+    ConfirmRemoved --> End
+    AssumeRemoved --> End
+    Inconsistent --> End
+    StillText --> End
+    NewPage --> End
+    NoChange --> End
+    MediaAdded --> End
+    SomeRemoved --> End
+    MediaChanged --> End
+```
+
+---
+
+### 4. Medialog Entry Classification
+
+```mermaid
+flowchart TD
+    Start([Medialog Entry]) --> CheckOperation{Check:<br/>operation field}
+
+    CheckOperation -->|"ingest"| CheckPath{Has<br/>resourcePath?}
+    CheckOperation -->|"reuse"| ReuseCase[Reuse Operation]
+    CheckOperation -->|"delete"| DeleteCase[Delete Operation<br/>Future: TBD by API team]
+
+    CheckPath -->|Yes| IngestInPage[Ingest via Markup<br/>New media added to page]
+    CheckPath -->|No| CheckOriginal{Has<br/>originalFilename?}
+
+    CheckOriginal -->|Yes| StandaloneUpload[Standalone Upload<br/>Media previewed alone]
+    CheckOriginal -->|No| AnomalyCase[Anomaly: No path, no filename<br/>Should not occur]
+
+    IngestInPage --> ExtractName1[Extract name from URL path:<br/>media_hash.jpg]
+    IngestInPage --> UseResource1[Use resourcePath for doc field]
+    IngestInPage --> NoOwner1[NO owner/repo fields]
+
+    StandaloneUpload --> ExtractName2[Extract name from originalFilename:<br/>clay-banks-cabin.jpg]
+    StandaloneUpload --> NoDoc2[Doc field = empty/null]
+    StandaloneUpload --> HasOwner2[HAS owner/repo fields]
+
+    ReuseCase --> ExtractName3[Extract name from URL path:<br/>media_hash.jpg]
+    ReuseCase --> UseResource3[Use resourcePath for doc field]
+    ReuseCase --> NoOwner3[NO owner/repo fields]
+
+    DeleteCase --> WaitSpec[Wait for API spec confirmation]
+
+    ExtractName1 --> CreateIndex1[Create index entry:<br/>source='medialog-ingest-page']
+    UseResource1 --> CreateIndex1
+    NoOwner1 --> CreateIndex1
+
+    ExtractName2 --> CreateIndex2[Create index entry:<br/>source='medialog-ingest-standalone']
+    NoDoc2 --> CreateIndex2
+    HasOwner2 --> CreateIndex2
+
+    ExtractName3 --> CreateIndex3[Create index entry:<br/>source='medialog-reuse']
+    UseResource3 --> CreateIndex3
+    NoOwner3 --> CreateIndex3
+
+    CreateIndex1 --> End([Process Complete])
+    CreateIndex2 --> End
+    CreateIndex3 --> End
+    AnomalyCase --> End
+    WaitSpec --> End
+```
+
+---
+
+## Decision Tables
+
+### Table 1: Matching Auditlog to Medialog
+
+| Auditlog Entry | Expected Medialog | Action |
+|----------------|-------------------|--------|
+| Page preview @ T | N entries with resourcePath=page.md, timestamp in [T, T+5000] | Link entries to page, extract media list |
+| Page preview @ T | 0 entries matching | Page is text-only OR all media removed OR has only PDFs/SVGs/fragments |
+| PDF/SVG/Fragment preview @ T | 0 entries | Expected - Not on Media Bus |
+| Image preview @ T | 1 entry with originalFilename=image.jpg | Standalone image upload |
+
+### Table 2: Index Update Actions
+
+| Old Index State | New Medialog State | Action | Index Update |
+|-----------------|-------------------|--------|--------------|
+| Page not in index | Medialog has N hashes | New page | INSERT N entries with status='referenced' |
+| Page has [A,B] | Medialog has [A,B] | No change | UPDATE timestamps only |
+| Page has [A,B] | Medialog has [A,B,C] | Media added | INSERT entry for C |
+| Page has [A,B,C] | Medialog has [A,B] | Media removed | DELETE or FLAG entry for C as 'unreferenced' |
+| Page has [A,B] | Medialog empty | Ambiguous | Parse to verify or assume all removed |
+| Page has [A,B] | Medialog has [C,D] | Complete change | DELETE [A,B], INSERT [C,D] |
+
+### Table 3: Processing Optimization
+
+| Condition | Optimization | Benefit |
+|-----------|--------------|---------|
+| Event timestamp < lastFetchTime | Skip event | Avoid reprocessing old data |
+| Same page, multiple events in batch | Process only latest | Reduce redundant work |
+| No changes detected in comparison | Skip write operation | Reduce DA API calls |
+| medialog entries have same timestamp | Batch process as single page state | Improve efficiency |
+| Parsing multiple content types | Single fetch, extract all | Minimize network calls |
+
+### Table 4: Content Type Decision Matrix
+
+| Content Type | Found In | Requires Parsing | Usage Tracking Method |
+|-------------|----------|------------------|----------------------|
+| Images | Medialog | No | resourcePath field provides direct link |
+| Videos | Medialog | No | resourcePath field provides direct link |
+| PDFs | Auditlog only | Yes | Parse page HTML for PDF links |
+| SVGs | Auditlog only | Yes | Parse page HTML for SVG links |
+| Fragments | Auditlog only | Yes | Parse page HTML for fragment references |
+| Icons | Not tracked | N/A | Not included in index |
+
+---
+
+## Implementation Pseudocode
+
+### Initial Build Algorithm
+
+```javascript
+async function buildInitialIndex(org, repo, ref = 'main') {
+  const index = [];
+
+  // 1. Fetch all available logs (medialog is new, backfill what exists)
+  const auditLogEntries = await fetchAuditLog(org, repo, ref, since=null);
+  const mediaLogEntries = await fetchMediaLog(org, repo, ref, since=null);
+
+  // 2. Separate pages from files
+  const pages = auditLogEntries.filter(e => isPage(e.path));
+  const files = auditLogEntries.filter(e => !pages.includes(e));
+
+  // 3. Parse pages to build usage maps for linked content
+  const usageMap = await buildContentUsageMap(pages, org, repo);
+
+  // 4. Process each page
+  for (const pageEvent of pages) {
+    const normalizedPath = normalizePath(pageEvent.path); // Add .md if needed
+
+    // Find matching medialog entries within 5-second window
+    const pageMedia = mediaLogEntries.filter(m =>
+      m.resourcePath === normalizedPath &&
+      m.timestamp >= pageEvent.timestamp &&
+      m.timestamp < pageEvent.timestamp + 5000
+    );
+
+    // Create index entries for Media Bus items
+    for (const media of pageMedia) {
+      index.push({
+        hash: media.mediaHash,
+        page: normalizedPath,
+        url: media.path,
+        name: extractName(media),
+        timestamp: media.timestamp,
+        user: media.user,
+        operation: media.operation,
+        status: 'referenced',
+        source: 'medialog',
+        type: 'image'  // or 'video'
+      });
+    }
+  }
+
+  // 5. Process standalone files and linked content
+  for (const fileEvent of files) {
+    const filePath = fileEvent.path;
+
+    if (isPdfOrSvg(filePath)) {
+      // Check if referenced by any page
+      const linkedPages = usageMap.pdfs.get(filePath) || usageMap.svgs.get(filePath) || [];
+
+      index.push({
+        path: filePath,
+        usedIn: linkedPages,
+        timestamp: fileEvent.timestamp,
+        user: fileEvent.user,
+        type: getFileType(filePath),
+        status: linkedPages.length > 0 ? 'referenced' : 'file-unused',
+        source: 'auditlog-parsed'
+      });
+
+    } else if (isFragment(filePath)) {
+      // Check if referenced by any page
+      const linkedPages = usageMap.fragments.get(filePath) || [];
+
+      index.push({
+        path: filePath,
+        usedIn: linkedPages,
+        timestamp: fileEvent.timestamp,
+        user: fileEvent.user,
+        type: 'fragment',
+        status: linkedPages.length > 0 ? 'referenced' : 'file-unused',
+        source: 'auditlog-parsed'
+      });
+
+    } else if (isImage(filePath)) {
+      // Check if in medialog (standalone upload)
+      const mediaEntry = mediaLogEntries.find(m =>
+        m.originalFilename === filePath &&
+        Math.abs(m.timestamp - fileEvent.timestamp) < 5000
+      );
+
+      if (mediaEntry) {
+        index.push({
+          hash: mediaEntry.mediaHash,
+          url: mediaEntry.path,
+          name: extractFromOriginalFilename(mediaEntry.originalFilename),
+          originalFilename: mediaEntry.originalFilename,
+          timestamp: mediaEntry.timestamp,
+          user: mediaEntry.user,
+          status: 'uploaded-unused',
+          source: 'medialog',
+          type: 'image'
+        });
+      }
+    }
+  }
+
+  // 6. Save index
+  await saveMediaSheet(index, `/${org}/${repo}`);
+  await saveLogMeta(`/${org}/${repo}`, {
+    lastFetchTime: Date.now(),
+    processedItems: index.length
+  });
+
+  return index;
+}
+```
+
+### Incremental Update Algorithm
+
+```javascript
+async function updateIndex(org, repo, ref = 'main') {
+  // 1. Load existing state
+  const meta = await loadLogMeta(`/${org}/${repo}`);
+  const existingIndex = await loadMediaSheet(`/${org}/${repo}`);
+  const lastFetchTime = meta?.lastFetchTime || null;
+
+  // 2. Fetch new events since last update
+  const newAuditLog = await fetchAuditLog(org, repo, ref, since=lastFetchTime);
+  const newMediaLog = await fetchMediaLog(org, repo, ref, since=lastFetchTime);
+
+  if (newAuditLog.length === 0 && newMediaLog.length === 0) {
+    return existingIndex; // No updates needed
+  }
+
+  // 3. Parse newly previewed pages
+  const newPages = newAuditLog.filter(e => isPage(e.path));
+  const newUsageMap = await buildContentUsageMap(newPages, org, repo);
+
+  // 4. Process each new page event
+  const updatedIndex = [...existingIndex];
+
+  for (const pageEvent of newPages) {
+    const normalizedPath = normalizePath(pageEvent.path);
+
+    // Find new medialog entries for this page
+    const newPageMedia = newMediaLog.filter(m =>
+      m.resourcePath === normalizedPath &&
+      m.timestamp >= pageEvent.timestamp &&
+      m.timestamp < pageEvent.timestamp + 5000
+    );
+
+    // Get old state from index
+    const oldPageEntries = existingIndex.filter(e =>
+      e.page === normalizedPath && e.source === 'medialog'
+    );
+    const oldHashes = new Set(oldPageEntries.map(e => e.hash));
+    const newHashes = new Set(newPageMedia.map(m => m.mediaHash));
+
+    // Handle ambiguous case: no medialog entries
+    if (newPageMedia.length === 0 && oldPageEntries.length > 0) {
+      // Option A: Parse to verify (recommended for high-value pages)
+      const shouldVerify = oldPageEntries.length >= 5 || isImportantPage(normalizedPath);
+
+      if (shouldVerify) {
+        const hasImages = await quickParseForImages(pageEvent.path, org, repo);
+
+        if (hasImages === false) {
+          // Confirmed: All removed
+          for (const oldEntry of oldPageEntries) {
+            const idx = updatedIndex.indexOf(oldEntry);
+            if (idx !== -1) updatedIndex.splice(idx, 1);
+          }
+        } else if (hasImages === true) {
+          // Data inconsistency - log for investigation
+          console.warn('Data inconsistency detected:', normalizedPath);
+        }
+      } else {
+        // Option B: Trust logs, assume removed
+        for (const oldEntry of oldPageEntries) {
+          const idx = updatedIndex.indexOf(oldEntry);
+          if (idx !== -1) updatedIndex.splice(idx, 1);
+        }
+      }
+
+      continue;
+    }
+
+    // Detect changes
+    const added = [...newHashes].filter(h => !oldHashes.has(h));
+    const removed = [...oldHashes].filter(h => !newHashes.has(h));
+    const unchanged = [...newHashes].filter(h => oldHashes.has(h));
+
+    // Apply changes
+
+    // 1. Remove deleted media
+    for (const hash of removed) {
+      const idx = updatedIndex.findIndex(e =>
+        e.hash === hash && e.page === normalizedPath
+      );
+      if (idx !== -1) {
+        updatedIndex.splice(idx, 1);
+      }
+    }
+
+    // 2. Add new media
+    for (const hash of added) {
+      const mediaEntry = newPageMedia.find(m => m.mediaHash === hash);
+      updatedIndex.push({
+        hash: mediaEntry.mediaHash,
+        page: normalizedPath,
+        url: mediaEntry.path,
+        name: extractName(mediaEntry),
+        timestamp: mediaEntry.timestamp,
+        user: mediaEntry.user,
+        operation: mediaEntry.operation,
+        status: 'referenced',
+        source: 'medialog',
+        type: 'image'
+      });
+    }
+
+    // 3. Update timestamps for unchanged media
+    for (const hash of unchanged) {
+      const idx = updatedIndex.findIndex(e =>
+        e.hash === hash && e.page === normalizedPath
+      );
+      if (idx !== -1) {
+        const mediaEntry = newPageMedia.find(m => m.mediaHash === hash);
+        updatedIndex[idx].timestamp = mediaEntry.timestamp;
+      }
+    }
+  }
+
+  // 5. Update linked content (PDFs, SVGs, Fragments) from usage map
+  for (const fileEvent of newAuditLog.filter(e => !isPage(e.path))) {
+    const filePath = fileEvent.path;
+
+    if (isPdfOrSvg(filePath) || isFragment(filePath)) {
+      const usageKey = isPdf(filePath) ? 'pdfs' :
+                      isSvg(filePath) ? 'svgs' : 'fragments';
+      const linkedPages = newUsageMap[usageKey].get(filePath) || [];
+
+      // Update or create entry
+      const existingIdx = updatedIndex.findIndex(e => e.path === filePath);
+
+      if (existingIdx !== -1) {
+        // Update existing entry
+        updatedIndex[existingIdx].usedIn = linkedPages;
+        updatedIndex[existingIdx].timestamp = fileEvent.timestamp;
+        updatedIndex[existingIdx].status = linkedPages.length > 0 ? 'referenced' : 'file-unused';
+      } else {
+        // Create new entry
+        updatedIndex.push({
+          path: filePath,
+          usedIn: linkedPages,
+          timestamp: fileEvent.timestamp,
+          user: fileEvent.user,
+          type: getFileType(filePath),
+          status: linkedPages.length > 0 ? 'referenced' : 'file-unused',
+          source: 'auditlog-parsed'
+        });
+      }
+    }
+  }
+
+  // 6. Save updated index
+  await saveMediaSheet(updatedIndex, `/${org}/${repo}`);
+  await saveLogMeta(`/${org}/${repo}`, {
+    lastFetchTime: Date.now(),
+    processedItems: updatedIndex.length
+  });
+
+  return updatedIndex;
+}
+```
+
+### Content Usage Map Builder
+
+```javascript
+async function buildContentUsageMap(pageLogEntries, org, repo) {
+  const usageMap = {
+    fragments: new Map(),
+    pdfs: new Map(),
+    svgs: new Map(),
+  };
+
+  for (const pageEvent of pageLogEntries) {
+    try {
+      // Fetch page HTML
+      const html = await fetchPageHtml(pageEvent.path, org, repo);
+
+      // Extract all content types in single pass
+      const fragments = extractFragmentReferences(html);
+      const pdfs = extractLinks(html, /\.pdf$/);
+      const svgs = extractLinks(html, /\.svg$/);
+
+      const normalizedPage = normalizePath(pageEvent.path);
+
+      // Build usage maps
+      fragments.forEach(f => {
+        if (!usageMap.fragments.has(f)) {
+          usageMap.fragments.set(f, []);
+        }
+        if (!usageMap.fragments.get(f).includes(normalizedPage)) {
+          usageMap.fragments.get(f).push(normalizedPage);
+        }
+      });
+
+      pdfs.forEach(p => {
+        if (!usageMap.pdfs.has(p)) {
+          usageMap.pdfs.set(p, []);
+        }
+        if (!usageMap.pdfs.get(p).includes(normalizedPage)) {
+          usageMap.pdfs.get(p).push(normalizedPage);
+        }
+      });
+
+      svgs.forEach(s => {
+        if (!usageMap.svgs.has(s)) {
+          usageMap.svgs.set(s, []);
+        }
+        if (!usageMap.svgs.get(s).includes(normalizedPage)) {
+          usageMap.svgs.get(s).push(normalizedPage);
+        }
+      });
+
+    } catch (error) {
+      console.error(`Failed to parse page ${pageEvent.path}:`, error);
+      // Continue with other pages
+    }
+  }
+
+  return usageMap;
+}
+
+async function fetchPageHtml(pagePath, org, repo, ref = 'main') {
+  const url = `https://${ref}--${repo}--${org}.aem.page${pagePath}`;
+  const response = await fetch(url);
+  if (!response.ok) {
+    throw new Error(`Failed to fetch ${url}: ${response.status}`);
+  }
+  return response.text();
+}
+
+function extractFragmentReferences(html) {
+  const fragmentPattern = /href="([^"]*\/fragments\/[^"]+)"/g;
+  const matches = [...html.matchAll(fragmentPattern)];
+  return matches.map(m => normalizePath(m[1]));
+}
+
+function extractLinks(html, pattern) {
+  const linkPattern = new RegExp(`(?:href|src)="([^"]*${pattern.source})"`, 'gi');
+  const matches = [...html.matchAll(linkPattern)];
+  return matches.map(m => normalizePath(m[1]));
+}
+
+async function quickParseForImages(pagePath, org, repo, ref = 'main') {
+  try {
+    const html = await fetchPageHtml(pagePath, org, repo, ref);
+    // Simple check: Does it contain <img> tags with media_ URLs?
+    return html.includes('media_') && html.includes('<img');
+  } catch (error) {
+    return null; // Indeterminate
+  }
+}
+```
+
+### Helper Functions
+
+```javascript
+function normalizePath(path) {
+  // Remove query params and hash
+  let cleanPath = path.split('?')[0].split('#')[0];
+
+  // For auditlog paths: /drafts/page -> /drafts/page.md
+  if (!cleanPath.includes('.') && !cleanPath.startsWith('/media/')) {
+    cleanPath = `${cleanPath}.md`;
+  }
+
+  return cleanPath;
+}
+
+function extractName(mediaEntry) {
+  // For "ingest" with originalFilename
+  if (mediaEntry.operation === 'ingest' && mediaEntry.originalFilename) {
+    return mediaEntry.originalFilename.split('/').pop();
+  }
+
+  // For "reuse" or "ingest" without originalFilename
+  const cleanPath = mediaEntry.path.split('?')[0].split('#')[0];
+  return cleanPath.split('/').pop();
+}
+
+function isPage(path) {
+  return (path.endsWith('.md') ||
+          (!path.includes('.') && !path.startsWith('/media/'))) &&
+         !path.includes('/fragments/');
+}
+
+function isPdfOrSvg(path) {
+  return path.endsWith('.pdf') || path.endsWith('.svg');
+}
+
+function isPdf(path) {
+  return path.endsWith('.pdf');
+}
+
+function isSvg(path) {
+  return path.endsWith('.svg');
+}
+
+function isFragment(path) {
+  return path.includes('/fragments/');
+}
+
+function isImage(path) {
+  const imageExts = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'];
+  return imageExts.some(ext => path.toLowerCase().endsWith(ext));
+}
+
+function getFileType(path) {
+  if (path.endsWith('.pdf')) return 'pdf';
+  if (path.endsWith('.svg')) return 'svg';
+  if (path.includes('/fragments/')) return 'fragment';
+  if (isImage(path)) return 'image';
+  return 'unknown';
+}
+```
+
+---
+
+## Edge Cases & Handling
+
+### 1. Missing Auditlog Entry for Medialog Events
+
+**Scenario:** Medialog has entries but no matching auditlog entry
+
+**Causes:**
+- Processing delay (auditlog slower than medialog)
+- Auditlog API failure
+- Events outside fetched time range
+
+**Handling:**
+```javascript
+// Queue orphaned medialog entries for next processing cycle
+const orphanedMedia = mediaLog.filter(m => {
+  return !auditLog.some(a =>
+    normalizePath(a.path) === m.resourcePath &&
+    Math.abs(a.timestamp - m.timestamp) < 10000
+  );
+});
+
+// Retry on next incremental update with wider time range
+```
+
+### 2. Timestamp Drift Beyond 5 Seconds
+
+**Scenario:** Medialog timestamp > 5 seconds after auditlog
+
+**Causes:**
+- Heavy server load
+- Batch processing delays
+- Queue backlog
+
+**Handling:**
+```javascript
+// Adaptive time window
+const timeWindow = calculateAdaptiveWindow(processingLoad);
+// Start at 5s, increase to 10s or 15s if needed
+
+// Or use backup matching by sequence
+matchByTimestampProximity(auditLog, mediaLog);
+```
+
+### 3. Duplicate Hash in Multiple Pages
+
+**Scenario:** Same image used in 5 different pages
+
+**Handling:**
+```javascript
+// Create separate index entry for each page reference
+// Reference count = unique pages
+const referenceCount = index.filter(e => e.hash === targetHash).length;
+
+// Each entry tracks its specific page usage
+```
+
+### 4. Page Deleted (No Preview Events)
+
+**Scenario:** User deletes page entirely via DA
+
+**Handling:**
+```javascript
+// After X days (e.g., 30) without preview events:
+const staleThresholdMs = 30 * 24 * 60 * 60 * 1000;
+const cutoffTime = Date.now() - staleThresholdMs;
+
+const staleEntries = index.filter(e =>
+  e.timestamp < cutoffTime
+);
+
+// Option A: Flag as potentially stale
+staleEntries.forEach(e => e.status = 'potentially-stale');
+
+// Option B: Verify by fetching actual document
+// If 404, mark as unreferenced
+```
+
+### 5. Race Condition: Concurrent Previews
+
+**Scenario:** Two users preview same page simultaneously
+
+**Handling:**
+```javascript
+// Use "last-write-wins" with timestamp comparison
+if (newEntry.timestamp > existingEntry.timestamp) {
+  // New entry is more recent, use it
+  replaceEntry(existingEntry, newEntry);
+} else {
+  // Existing entry is more recent, skip
+  skipEntry(newEntry);
+}
+
+// OR: Keep both entries and deduplicate later
+// based on latest timestamp per hash+page combo
+```
+
+### 6. Ambiguous Empty Medialog
+
+**Scenario:** Auditlog shows page preview, medialog is empty, old state had media
+
+**Possible Causes:**
+- All Media Bus items removed
+- Page now has only PDFs/SVGs/Fragments
+- Processing delay
+- API failure
+
+**Handling:**
+```javascript
+async function handleAmbiguousCase(pageEvent, oldEntries) {
+  // Wait for processing delay
+  await sleep(3000);
+
+  // Retry medialog fetch
+  const retryEntries = await fetchMediaLog(...);
+
+  if (retryEntries.length > 0) {
+    return { action: 'update', entries: retryEntries };
+  }
+
+  // Still empty - parse to verify for high-value pages
+  const shouldVerify = oldEntries.length >= 5 || isImportantPage(pageEvent.path);
+
+  if (shouldVerify) {
+    const hasImages = await quickParseForImages(pageEvent.path);
+
+    if (hasImages === false) {
+      return { action: 'remove-all', confidence: 'high' };
+    } else if (hasImages === true) {
+      return { action: 'flag-investigation', confidence: 'low' };
+    }
+  }
+
+  // Default: assume removed
+  return { action: 'remove-all', confidence: 'medium' };
+}
+```
+
+### 7. Parsing Failures
+
+**Scenario:** Cannot fetch or parse page HTML
+
+**Handling:**
+```javascript
+async function buildContentUsageMap(pageLogEntries, org, repo) {
+  const usageMap = { fragments: new Map(), pdfs: new Map(), svgs: new Map() };
+  const failures = [];
+
+  for (const pageEvent of pageLogEntries) {
+    try {
+      const html = await fetchPageHtml(pageEvent.path, org, repo);
+      // Extract content...
+    } catch (error) {
+      failures.push({ page: pageEvent.path, error: error.message });
+      // Continue with other pages
+    }
+  }
+
+  // Log failures for investigation
+  if (failures.length > 0) {
+    console.warn('Failed to parse pages:', failures);
+  }
+
+  return usageMap;
+}
+```
+
+---
+
+## Performance Considerations
+
+### Scaling Factors
+
+| Factor | Impact | Mitigation |
+|--------|--------|------------|
+| **Number of pages** | O(N) processing time | Batch processing, parallel processing |
+| **Media per page** | O(M) comparison operations | Hash-based lookups instead of linear search |
+| **Event frequency** | Incremental update frequency | Adaptive polling (more frequent when active) |
+| **Index size** | Storage and read time | Compress, paginate, or archive old entries |
+| **Time window size** | False matches | Optimize to 5s, expand only if needed |
+| **Parsing pages** | Network and CPU cost | Cache parsed results, parse only changed pages |
+
+### Optimization Strategies
+
+```javascript
+// 1. Use Map for O(1) lookups instead of Array.filter
+const indexMap = new Map();
+existingIndex.forEach(e => {
+  const key = `${e.hash}|${e.page}`;
+  indexMap.set(key, e);
+});
+
+// 2. Group medialog entries by resourcePath first
+const mediaByPage = groupBy(mediaLog, 'resourcePath');
+
+// 3. Process only changed pages
+const changedPages = new Set(newAuditLog.map(e => e.path));
+
+// 4. Batch writes to DA
+const BATCH_SIZE = 100;
+await saveBatch(updatedEntries, BATCH_SIZE);
+
+// 5. Cache parsed HTML results
+const parseCache = new Map();
+const getCachedHtml = async (page) => {
+  if (!parseCache.has(page)) {
+    parseCache.set(page, await fetchPageHtml(page));
+  }
+  return parseCache.get(page);
+};
+
+// 6. Parallel parsing for multiple pages
+const htmlResults = await Promise.all(
+  pages.map(p => fetchPageHtml(p.path, org, repo))
+);
+```
+
+### Parsing Performance
+
+**Minimize parsing overhead:**
+```javascript
+// Parse only when necessary
+const shouldParse = (pageEvent, oldState) => {
+  // Always parse for initial build
+  if (!oldState) return true;
+
+  // Parse if page has new auditlog event
+  if (pageEvent.timestamp > oldState.lastParsed) return true;
+
+  // Skip if recently parsed
+  const cacheAge = Date.now() - oldState.lastParsed;
+  return cacheAge > (24 * 60 * 60 * 1000); // 24 hours
+};
+
+// Single-pass extraction
+const parsePageContent = (html) => {
+  return {
+    fragments: extractFragmentReferences(html),
+    pdfs: extractLinks(html, /\.pdf$/),
+    svgs: extractLinks(html, /\.svg$/),
+    hasImages: html.includes('media_') && html.includes('<img')
+  };
+};
+```
+
+---
+
+## Frontend Usage
+
+### Calculating Reference Counts
+
+```javascript
+function calculateReferenceCounts(index) {
+  const counts = new Map();
+
+  index.forEach(entry => {
+    if (entry.status !== 'referenced') return; // Skip unreferenced
+
+    const key = entry.hash || entry.path;
+    if (!counts.has(key)) {
+      counts.set(key, {
+        key,
+        pages: new Set(),
+        lastUsed: 0,
+        type: entry.type
+      });
+    }
+
+    const count = counts.get(key);
+
+    // For Media Bus items (have page field)
+    if (entry.page) {
+      count.pages.add(entry.page);
+    }
+
+    // For linked content (have usedIn field)
+    if (entry.usedIn) {
+      entry.usedIn.forEach(p => count.pages.add(p));
+    }
+
+    if (entry.timestamp > count.lastUsed) {
+      count.lastUsed = entry.timestamp;
+    }
+  });
+
+  // Convert to array with reference counts
+  return Array.from(counts.values()).map(c => ({
+    key: c.key,
+    type: c.type,
+    referenceCount: c.pages.size,
+    pages: Array.from(c.pages),
+    lastUsed: c.lastUsed
+  }));
+}
+```
+
+### Filtering by Content Type
+
+```javascript
+function filterByType(index, type) {
+  return index.filter(e => e.type === type && e.status === 'referenced');
+}
+
+// Examples
+const images = filterByType(index, 'image');
+const pdfs = filterByType(index, 'pdf');
+const fragments = filterByType(index, 'fragment');
+```
+
+### Filtering Unreferenced Media
+
+```javascript
+function getUnreferencedMedia(index) {
+  return index.filter(e =>
+    e.status === 'unreferenced' ||
+    e.status === 'uploaded-unused' ||
+    e.status === 'file-unused'
+  );
+}
+```
+
+### Getting Usage Details for Media Info Panel
+
+```javascript
+function getMediaUsage(index, identifier) {
+  // identifier can be hash (for images) or path (for PDFs/SVGs/fragments)
+  const usageEntries = index.filter(e =>
+    (e.hash === identifier || e.path === identifier) &&
+    e.status === 'referenced'
+  );
+
+  if (usageEntries.length === 0) return [];
+
+  // For Media Bus items (images/videos)
+  if (usageEntries[0].hash) {
+    const byPage = groupBy(usageEntries, 'page');
+
+    return Object.entries(byPage).map(([page, entries]) => ({
+      page,
+      previewCount: entries.length,
+      lastPreview: Math.max(...entries.map(e => e.timestamp)),
+      users: [...new Set(entries.map(e => e.user))]
+    }));
+  }
+
+  // For linked content (PDFs/SVGs/fragments)
+  if (usageEntries[0].usedIn) {
+    return usageEntries[0].usedIn.map(page => ({
+      page,
+      previewCount: 1, // Can't track individual previews for linked content
+      lastPreview: usageEntries[0].timestamp,
+      users: [usageEntries[0].user]
+    }));
+  }
+
+  return [];
+}
+```
+
+---
+
+## Next Steps
+
+### 1. Infrastructure Setup
+
+- [ ] Verify medialog backfill CLI tool is production-ready
+- [ ] Create `/tools/media-indexer` DA app for initial index population
+- [ ] Set up `.da/mediaindex/` directory structure
+- [ ] Document authentication requirements and token management
+- [ ] Test on small pilot repository first
+
+### 2. Implementation Phase
+
+- [ ] Implement initial index build function (in media-indexer app)
+- [ ] Implement incremental update function (in browser)
+- [ ] Add content usage map builder (fragments, PDFs, SVGs)
+- [ ] Implement distributed locking mechanism
+- [ ] Add background auto-refresh with 10-minute interval
+- [ ] Create user-triggered refresh UI
+- [ ] Add error handling and retry logic
+- [ ] Test with production data at scale
+
+### 3. Testing & Validation
+
+- [ ] Unit tests for matching logic
+- [ ] Integration tests with real logs
+- [ ] Performance testing with large datasets (10,000+ pages)
+- [ ] Validate reference counts accuracy
+- [ ] Test parsing extraction functions
+- [ ] Test distributed lock under concurrent load
+- [ ] Test browser crash recovery (lock timeout)
+- [ ] Validate medialog backfill completeness
+
+### 4. Operational Readiness
+
+- [ ] Write operational runbooks for engineers
+- [ ] Create monitoring dashboards for index health
+- [ ] Document troubleshooting procedures
+- [ ] Set up alerts for failures (lock timeouts, parse errors)
+- [ ] Establish maintenance schedule (monthly rebuilds)
+- [ ] Train support team on index operations
+
+### 5. Monitoring & Maintenance
+
+- [ ] Log processing metrics (time, entries, errors)
+- [ ] Alert on anomalies (orphaned entries, large drifts)
+- [ ] Periodic full rebuild (weekly/monthly)
+- [ ] Dashboard for index health
+- [ ] Track parsing failures and success rates
+- [ ] Monitor lock contention and timeout rates
+- [ ] Track refresh performance across site sizes
+
+### 6. Future Enhancements
+
+- [ ] Handle "delete" operations when API confirmed
+- [ ] Add support for video tracking (similar to images)
+- [ ] Implement pagination for large indexes
+- [ ] Add caching layer for frequent queries
+- [ ] Explore real-time updates via webhooks
+- [ ] Optimize parsing performance (parallel processing, caching)
+- [ ] Implement leader election for background refresh
+- [ ] Add incremental medialog backfill for updated pages
+- [ ] Explore worker-based refresh (replace browser-based)
+
+---
+
+## References
+
+- **AEM Media Documentation:** https://www.aem.live/docs/media
+- **Auditlog API:** https://www.aem.live/docs/admin.html#tag/log/operation/getLogs
+- **Medialog API:** (Similar to auditlog, dedicated for Media Bus)
+- **Testing Repository:** `kmurugulla/brightpath`
+- **Test Date:** February 24, 2026
+
+---
+
+## Appendix: Test Data Summary
+
+### All Scenarios Tested
+
+| Scenario | Pages | Media | Key Learning |
+|----------|-------|-------|--------------|
+| A | scenario-a.md | 3 images | First-time ingest via markup |
+| B | scenario-b.md | 0 (text) | Auditlog without medialog |
+| H | Standalone files | 1 image, 1 PDF, 1 SVG | Standalone vs embedded |
+| G | scenario-g.md | 2 images, links | Mixed media behavior, parsing required |
+| Re-preview | All 3 pages | Same media | Duplicate event handling |
+
+### Timestamp Patterns Observed
+
+| Event Type | Typical Delay | Range Observed |
+|------------|--------------|----------------|
+| Auditlog to Medialog | 1.5-2 seconds | 800ms - 3400ms |
+| Multi-page bulk preview | Nearly simultaneous | 4-9ms apart |
+| Media on same page | Identical timestamp | 0ms (exact match) |
+
+### Content Type Tracking Summary
+
+| Content | Logs | Parsing | Usage Link |
+|---------|------|---------|------------|
+| Images/Videos | Medialog | No | resourcePath field |
+| PDFs/SVGs/Fragments | Auditlog only | Yes | Parse HTML |
+
+---
+
+**Last Updated:** February 24, 2026