diff --git a/src/extension/background.js b/src/extension/background.js index bb5ad6fa..811303f8 100644 --- a/src/extension/background.js +++ b/src/extension/background.js @@ -1,63 +1,233 @@ -// background.js - Service Worker for screenshot capture -// Chrome extensions can only capture screenshots from the background script -// Listen for screenshot requests from content script +// background.js - Service Worker with WASM (CSP-Immune!) +// This runs in an isolated environment, completely immune to page CSP policies + +// ✅ STATIC IMPORTS at top level - Required for Service Workers! +// Dynamic import() is FORBIDDEN in ServiceWorkerGlobalScope +import init, { analyze_page, analyze_page_with_options, prune_for_api } from './pkg/sentience_core.js'; + +console.log('[Sentience Background] Initializing...'); + +// Global WASM initialization state +let wasmReady = false; +let wasmInitPromise = null; + +/** + * Initialize WASM module - called once on service worker startup + * Uses static imports (not dynamic import()) which is required for Service Workers + */ +async function initWASM() { + if (wasmReady) return; + if (wasmInitPromise) return wasmInitPromise; + + wasmInitPromise = (async () => { + try { + console.log('[Sentience Background] Loading WASM module...'); + + // Define the js_click_element function that WASM expects + // In Service Workers, use 'globalThis' instead of 'window' + // In background context, we can't actually click, so we log a warning + globalThis.js_click_element = (_id) => { + console.warn('[Sentience Background] js_click_element called in background (ignored)'); + }; + + // Initialize WASM - this calls the init() function from the static import + // The init() function handles fetching and instantiating the .wasm file + await init(); + + wasmReady = true; + console.log('[Sentience Background] ✓ WASM ready!'); + console.log('[Sentience Background] Available functions: analyze_page, analyze_page_with_options, prune_for_api'); + } catch (error) { + console.error('[Sentience Background] WASM initialization failed:', error); + throw error; + } + })(); + + return wasmInitPromise; +} + +// Initialize WASM on service worker startup +initWASM().catch(err => { + console.error('[Sentience Background] Failed to initialize WASM:', err); +}); + +/** + * Message handler for all extension communication + * Includes global error handling to prevent extension crashes + */ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { - if (request.action === 'captureScreenshot') { - handleScreenshotCapture(sender.tab.id, request.options) - .then(screenshot => { - sendResponse({ success: true, screenshot }); - }) - .catch(error => { - console.error('[Sentience] Screenshot capture failed:', error); - sendResponse({ - success: false, - error: error.message || 'Screenshot capture failed' - }); - }); + // Global error handler to prevent extension crashes + try { + // Handle screenshot requests (existing functionality) + if (request.action === 'captureScreenshot') { + handleScreenshotCapture(sender.tab.id, request.options) + .then(screenshot => { + sendResponse({ success: true, screenshot }); + }) + .catch(error => { + console.error('[Sentience Background] Screenshot capture failed:', error); + sendResponse({ + success: false, + error: error.message || 'Screenshot capture failed' + }); + }); + return true; // Async response + } + + // Handle WASM processing requests (NEW!) + if (request.action === 'processSnapshot') { + handleSnapshotProcessing(request.rawData, request.options) + .then(result => { + sendResponse({ success: true, result }); + }) + .catch(error => { + console.error('[Sentience Background] Snapshot processing failed:', error); + sendResponse({ + success: false, + error: error.message || 'Snapshot processing failed' + }); + }); + return true; // Async response + } - // Return true to indicate we'll send response asynchronously - return true; - } + // Unknown action + console.warn('[Sentience Background] Unknown action:', request.action); + sendResponse({ success: false, error: 'Unknown action' }); + return false; + } catch (error) { + // Catch any synchronous errors that might crash the extension + console.error('[Sentience Background] Fatal error in message handler:', error); + try { + sendResponse({ + success: false, + error: `Fatal error: ${error.message || 'Unknown error'}` + }); + } catch (e) { + // If sendResponse already called, ignore + } + return false; + } }); /** - * Capture screenshot of the active tab - * @param {number} tabId - Tab ID to capture - * @param {Object} options - Screenshot options - * @returns {Promise} Base64-encoded PNG data URL + * Handle screenshot capture (existing functionality) */ -async function handleScreenshotCapture(tabId, options = {}) { - try { - const { - format = 'png', // 'png' or 'jpeg' - quality = 90 // JPEG quality (0-100), ignored for PNG - } = options; - - // Capture visible tab as data URL - const dataUrl = await chrome.tabs.captureVisibleTab(null, { - format: format, - quality: quality - }); - - console.log(`[Sentience] Screenshot captured: ${format}, size: ${dataUrl.length} bytes`); - - return dataUrl; - } catch (error) { - console.error('[Sentience] Screenshot error:', error); - throw new Error(`Failed to capture screenshot: ${error.message}`); - } +async function handleScreenshotCapture(_tabId, options = {}) { + try { + const { + format = 'png', + quality = 90 + } = options; + + const dataUrl = await chrome.tabs.captureVisibleTab(null, { + format: format, + quality: quality + }); + + console.log(`[Sentience Background] Screenshot captured: ${format}, size: ${dataUrl.length} bytes`); + return dataUrl; + } catch (error) { + console.error('[Sentience Background] Screenshot error:', error); + throw new Error(`Failed to capture screenshot: ${error.message}`); + } } /** - * Optional: Add viewport-specific capture (requires additional setup) - * This would allow capturing specific regions, not just visible area + * Handle snapshot processing with WASM (NEW!) + * This is where the magic happens - completely CSP-immune! + * Includes safeguards to prevent crashes and hangs. + * + * @param {Array} rawData - Raw element data from injected_api.js + * @param {Object} options - Snapshot options (limit, filter, etc.) + * @returns {Promise} Processed snapshot result */ -async function captureRegion(tabId, region) { - // For region capture, you'd need to: - // 1. Capture full visible tab - // 2. Use Canvas API to crop to region - // 3. Return cropped image - - // Not implemented in this basic version - throw new Error('Region capture not yet implemented'); +async function handleSnapshotProcessing(rawData, options = {}) { + const MAX_ELEMENTS = 10000; // Safety limit to prevent hangs + const startTime = performance.now(); + + try { + // Safety check: limit element count to prevent hangs + if (!Array.isArray(rawData)) { + throw new Error('rawData must be an array'); + } + + if (rawData.length > MAX_ELEMENTS) { + console.warn(`[Sentience Background] ⚠️ Large dataset: ${rawData.length} elements. Limiting to ${MAX_ELEMENTS} to prevent hangs.`); + rawData = rawData.slice(0, MAX_ELEMENTS); + } + + // Ensure WASM is initialized + await initWASM(); + if (!wasmReady) { + throw new Error('WASM module not initialized'); + } + + console.log(`[Sentience Background] Processing ${rawData.length} elements with options:`, options); + + // Run WASM processing using the imported functions directly + // Wrap in try-catch with timeout protection + let analyzedElements; + try { + // Use a timeout wrapper to prevent infinite hangs + const wasmPromise = new Promise((resolve, reject) => { + try { + let result; + if (options.limit || options.filter) { + result = analyze_page_with_options(rawData, options); + } else { + result = analyze_page(rawData); + } + resolve(result); + } catch (e) { + reject(e); + } + }); + + // Add timeout protection (18 seconds - less than content.js timeout) + analyzedElements = await Promise.race([ + wasmPromise, + new Promise((_, reject) => + setTimeout(() => reject(new Error('WASM processing timeout (>18s)')), 18000) + ) + ]); + } catch (e) { + const errorMsg = e.message || 'Unknown WASM error'; + console.error(`[Sentience Background] WASM analyze_page failed: ${errorMsg}`, e); + throw new Error(`WASM analyze_page failed: ${errorMsg}`); + } + + // Prune elements for API (prevents 413 errors on large sites) + let prunedRawData; + try { + prunedRawData = prune_for_api(rawData); + } catch (e) { + console.warn('[Sentience Background] prune_for_api failed, using original data:', e); + prunedRawData = rawData; + } + + const duration = performance.now() - startTime; + console.log(`[Sentience Background] ✓ Processed: ${analyzedElements.length} analyzed, ${prunedRawData.length} pruned (${duration.toFixed(1)}ms)`); + + return { + elements: analyzedElements, + raw_elements: prunedRawData + }; + } catch (error) { + const duration = performance.now() - startTime; + console.error(`[Sentience Background] Processing error after ${duration.toFixed(1)}ms:`, error); + throw error; + } } + +console.log('[Sentience Background] Service worker ready'); + +// Global error handlers to prevent extension crashes +self.addEventListener('error', (event) => { + console.error('[Sentience Background] Global error caught:', event.error); + event.preventDefault(); // Prevent extension crash +}); + +self.addEventListener('unhandledrejection', (event) => { + console.error('[Sentience Background] Unhandled promise rejection:', event.reason); + event.preventDefault(); // Prevent extension crash +}); diff --git a/src/extension/content.js b/src/extension/content.js index de24fa5a..62ae4086 100644 --- a/src/extension/content.js +++ b/src/extension/content.js @@ -1,22 +1,298 @@ -// content.js - ISOLATED WORLD -console.log('[Sentience] Bridge loaded.'); +// content.js - ISOLATED WORLD (Bridge between Main World and Background) +console.log('[Sentience Bridge] Loaded.'); -// 1. Pass Extension ID to Main World (So WASM knows where to load from) +// Detect if we're in a child frame (for iframe support) +const isChildFrame = window !== window.top; +if (isChildFrame) { + console.log('[Sentience Bridge] Running in child frame:', window.location.href); +} + +// 1. Pass Extension ID to Main World (So API knows where to find resources) document.documentElement.dataset.sentienceExtensionId = chrome.runtime.id; -// 2. Proxy for Screenshots (The only thing Isolated World needs to do) +// 2. Message Router - Handles all communication between page and background window.addEventListener('message', (event) => { // Security check: only accept messages from same window - if (event.source !== window || event.data.type !== 'SENTIENCE_SCREENSHOT_REQUEST') return; + if (event.source !== window) return; + + // Route different message types + switch (event.data.type) { + case 'SENTIENCE_SCREENSHOT_REQUEST': + handleScreenshotRequest(event.data); + break; + + case 'SENTIENCE_SNAPSHOT_REQUEST': + handleSnapshotRequest(event.data); + break; + + case 'SENTIENCE_SHOW_OVERLAY': + handleShowOverlay(event.data); + break; + + case 'SENTIENCE_CLEAR_OVERLAY': + handleClearOverlay(); + break; + + default: + // Ignore unknown message types + break; + } +}); +/** + * Handle screenshot requests (existing functionality) + */ +function handleScreenshotRequest(data) { chrome.runtime.sendMessage( - { action: 'captureScreenshot', options: event.data.options }, + { action: 'captureScreenshot', options: data.options }, (response) => { window.postMessage({ type: 'SENTIENCE_SCREENSHOT_RESULT', - requestId: event.data.requestId, - screenshot: response?.success ? response.screenshot : null + requestId: data.requestId, + screenshot: response?.success ? response.screenshot : null, + error: response?.error }, '*'); } ); -}); \ No newline at end of file +} + +/** + * Handle snapshot processing requests (NEW!) + * Sends raw DOM data to background worker for WASM processing + * Includes timeout protection to prevent extension crashes + */ +function handleSnapshotRequest(data) { + const startTime = performance.now(); + const TIMEOUT_MS = 20000; // 20 seconds (longer than injected_api timeout) + let responded = false; + + // Timeout protection: if background doesn't respond, send error + const timeoutId = setTimeout(() => { + if (!responded) { + responded = true; + const duration = performance.now() - startTime; + console.error(`[Sentience Bridge] ⚠️ WASM processing timeout after ${duration.toFixed(1)}ms`); + window.postMessage({ + type: 'SENTIENCE_SNAPSHOT_RESULT', + requestId: data.requestId, + error: 'WASM processing timeout - background script may be unresponsive', + duration: duration + }, '*'); + } + }, TIMEOUT_MS); + + try { + chrome.runtime.sendMessage( + { + action: 'processSnapshot', + rawData: data.rawData, + options: data.options + }, + (response) => { + if (responded) return; // Already responded via timeout + responded = true; + clearTimeout(timeoutId); + + const duration = performance.now() - startTime; + + // Handle Chrome extension errors (e.g., background script crashed) + if (chrome.runtime.lastError) { + console.error('[Sentience Bridge] Chrome runtime error:', chrome.runtime.lastError.message); + window.postMessage({ + type: 'SENTIENCE_SNAPSHOT_RESULT', + requestId: data.requestId, + error: `Chrome runtime error: ${chrome.runtime.lastError.message}`, + duration: duration + }, '*'); + return; + } + + if (response?.success) { + console.log(`[Sentience Bridge] ✓ WASM processing complete in ${duration.toFixed(1)}ms`); + window.postMessage({ + type: 'SENTIENCE_SNAPSHOT_RESULT', + requestId: data.requestId, + elements: response.result.elements, + raw_elements: response.result.raw_elements, + duration: duration + }, '*'); + } else { + console.error('[Sentience Bridge] WASM processing failed:', response?.error); + window.postMessage({ + type: 'SENTIENCE_SNAPSHOT_RESULT', + requestId: data.requestId, + error: response?.error || 'Processing failed', + duration: duration + }, '*'); + } + } + ); + } catch (error) { + if (!responded) { + responded = true; + clearTimeout(timeoutId); + const duration = performance.now() - startTime; + console.error('[Sentience Bridge] Exception sending message:', error); + window.postMessage({ + type: 'SENTIENCE_SNAPSHOT_RESULT', + requestId: data.requestId, + error: `Failed to send message: ${error.message}`, + duration: duration + }, '*'); + } + } +} + +// ============================================================================ +// Visual Overlay - Shadow DOM Implementation +// ============================================================================ + +const OVERLAY_HOST_ID = 'sentience-overlay-host'; +let overlayTimeout = null; + +/** + * Show visual overlay highlighting elements using Shadow DOM + * @param {Object} data - Message data with elements and targetElementId + */ +function handleShowOverlay(data) { + const { elements, targetElementId } = data; + + if (!elements || !Array.isArray(elements)) { + console.warn('[Sentience Bridge] showOverlay: elements must be an array'); + return; + } + + removeOverlay(); + + // Create host with Shadow DOM for CSS isolation + const host = document.createElement('div'); + host.id = OVERLAY_HOST_ID; + host.style.cssText = ` + position: fixed !important; + top: 0 !important; + left: 0 !important; + width: 100vw !important; + height: 100vh !important; + pointer-events: none !important; + z-index: 2147483647 !important; + margin: 0 !important; + padding: 0 !important; + `; + document.body.appendChild(host); + + // Attach shadow root (closed mode for security and CSS isolation) + const shadow = host.attachShadow({ mode: 'closed' }); + + // Calculate max importance for scaling + const maxImportance = Math.max(...elements.map(e => e.importance || 0), 1); + + elements.forEach((element) => { + const bbox = element.bbox; + if (!bbox) return; + + const isTarget = element.id === targetElementId; + const isPrimary = element.visual_cues?.is_primary || false; + const importance = element.importance || 0; + + // Color: Red (target), Blue (primary), Green (regular) + let color; + if (isTarget) color = '#FF0000'; + else if (isPrimary) color = '#0066FF'; + else color = '#00FF00'; + + // Scale opacity and border width based on importance + const importanceRatio = maxImportance > 0 ? importance / maxImportance : 0.5; + const borderOpacity = isTarget ? 1.0 : (isPrimary ? 0.9 : Math.max(0.4, 0.5 + importanceRatio * 0.5)); + const fillOpacity = borderOpacity * 0.2; + const borderWidth = isTarget ? 2 : (isPrimary ? 1.5 : Math.max(0.5, Math.round(importanceRatio * 2))); + + // Convert fill opacity to hex for background-color + const hexOpacity = Math.round(fillOpacity * 255).toString(16).padStart(2, '0'); + + // Create box with semi-transparent fill + const box = document.createElement('div'); + box.style.cssText = ` + position: absolute; + left: ${bbox.x}px; + top: ${bbox.y}px; + width: ${bbox.width}px; + height: ${bbox.height}px; + border: ${borderWidth}px solid ${color}; + background-color: ${color}${hexOpacity}; + box-sizing: border-box; + opacity: ${borderOpacity}; + pointer-events: none; + `; + + // Add badge showing importance score + if (importance > 0 || isPrimary) { + const badge = document.createElement('span'); + badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`; + badge.style.cssText = ` + position: absolute; + top: -18px; + left: 0; + background: ${color}; + color: white; + font-size: 11px; + font-weight: bold; + padding: 2px 6px; + font-family: Arial, sans-serif; + border-radius: 3px; + opacity: 0.95; + white-space: nowrap; + pointer-events: none; + `; + box.appendChild(badge); + } + + // Add target emoji for target element + if (isTarget) { + const targetIndicator = document.createElement('span'); + targetIndicator.textContent = '🎯'; + targetIndicator.style.cssText = ` + position: absolute; + top: -18px; + right: 0; + font-size: 16px; + pointer-events: none; + `; + box.appendChild(targetIndicator); + } + + shadow.appendChild(box); + }); + + console.log(`[Sentience Bridge] Overlay shown for ${elements.length} elements`); + + // Auto-remove after 5 seconds + overlayTimeout = setTimeout(() => { + removeOverlay(); + console.log('[Sentience Bridge] Overlay auto-cleared after 5 seconds'); + }, 5000); +} + +/** + * Clear overlay manually + */ +function handleClearOverlay() { + removeOverlay(); + console.log('[Sentience Bridge] Overlay cleared manually'); +} + +/** + * Remove overlay from DOM + */ +function removeOverlay() { + const existing = document.getElementById(OVERLAY_HOST_ID); + if (existing) { + existing.remove(); + } + + if (overlayTimeout) { + clearTimeout(overlayTimeout); + overlayTimeout = null; + } +} + +// console.log('[Sentience Bridge] Ready - Extension ID:', chrome.runtime.id); diff --git a/src/extension/injected_api.js b/src/extension/injected_api.js index 8f6eb156..45c43370 100644 --- a/src/extension/injected_api.js +++ b/src/extension/injected_api.js @@ -1,33 +1,56 @@ -// injected_api.js - MAIN WORLD +// injected_api.js - MAIN WORLD (NO WASM! CSP-Resistant!) +// This script ONLY collects raw DOM data and sends it to background for processing (async () => { - // 1. Get Extension ID (Wait for content.js to set it) + // console.log('[SentienceAPI] Initializing (CSP-Resistant Mode)...'); + + // Wait for Extension ID from content.js const getExtensionId = () => document.documentElement.dataset.sentienceExtensionId; let extId = getExtensionId(); - - // Safety poller for async loading race conditions + if (!extId) { await new Promise(resolve => { const check = setInterval(() => { extId = getExtensionId(); if (extId) { clearInterval(check); resolve(); } }, 50); + setTimeout(() => resolve(), 5000); // Max 5s wait }); } - const EXT_URL = `chrome-extension://${extId}/`; - console.log('[SentienceAPI.com] Initializing from:', EXT_URL); + if (!extId) { + console.error('[SentienceAPI] Failed to get extension ID'); + return; + } + + // console.log('[SentienceAPI] Extension ID:', extId); + // Registry for click actions (still needed for click() function) window.sentience_registry = []; - let wasmModule = null; - // --- HELPER: Deep Walker --- + // --- HELPER: Deep Walker with Native Filter --- function getAllElements(root = document) { const elements = []; - const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT); + const filter = { + acceptNode: function(node) { + // Skip metadata and script/style tags + if (['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'LINK', 'HEAD'].includes(node.tagName)) { + return NodeFilter.FILTER_REJECT; + } + // Skip deep SVG children + if (node.parentNode && node.parentNode.tagName === 'SVG' && node.tagName !== 'SVG') { + return NodeFilter.FILTER_REJECT; + } + return NodeFilter.FILTER_ACCEPT; + } + }; + + const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT, filter); while(walker.nextNode()) { const node = walker.currentNode; - elements.push(node); - if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot)); + if (node.isConnected) { + elements.push(node); + if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot)); + } } return elements; } @@ -40,7 +63,163 @@ return (el.innerText || '').replace(/\s+/g, ' ').trim().substring(0, 100); } - // --- HELPER: Viewport Check (NEW) --- + // --- HELPER: Safe Class Name Extractor (Handles SVGAnimatedString) --- + function getClassName(el) { + if (!el || !el.className) return ''; + + // Handle string (HTML elements) + if (typeof el.className === 'string') return el.className; + + // Handle SVGAnimatedString (SVG elements) + if (typeof el.className === 'object') { + if ('baseVal' in el.className && typeof el.className.baseVal === 'string') { + return el.className.baseVal; + } + if ('animVal' in el.className && typeof el.className.animVal === 'string') { + return el.className.animVal; + } + // Fallback: convert to string + try { + return String(el.className); + } catch (e) { + return ''; + } + } + + return ''; + } + + // --- HELPER: Paranoid String Converter (Handles SVGAnimatedString) --- + function toSafeString(value) { + if (value === null || value === undefined) return null; + + // 1. If it's already a primitive string, return it + if (typeof value === 'string') return value; + + // 2. Handle SVG objects (SVGAnimatedString, SVGAnimatedNumber, etc.) + if (typeof value === 'object') { + // Try extracting baseVal (standard SVG property) + if ('baseVal' in value && typeof value.baseVal === 'string') { + return value.baseVal; + } + // Try animVal as fallback + if ('animVal' in value && typeof value.animVal === 'string') { + return value.animVal; + } + // Fallback: Force to string (prevents WASM crash even if data is less useful) + // This prevents the "Invalid Type" crash, even if the data is "[object SVGAnimatedString]" + try { + return String(value); + } catch (e) { + return null; + } + } + + // 3. Last resort cast for primitives + try { + return String(value); + } catch (e) { + return null; + } + } + + // --- HELPER: Get SVG Fill/Stroke Color --- + // For SVG elements, get the fill or stroke color (SVGs use fill/stroke, not backgroundColor) + function getSVGColor(el) { + if (!el || el.tagName !== 'SVG') return null; + + const style = window.getComputedStyle(el); + + // Try fill first (most common for SVG icons) + const fill = style.fill; + if (fill && fill !== 'none' && fill !== 'transparent' && fill !== 'rgba(0, 0, 0, 0)') { + // Convert fill to rgb() format if needed + const rgbaMatch = fill.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/); + if (rgbaMatch) { + const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0; + if (alpha >= 0.9) { + return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`; + } + } else if (fill.startsWith('rgb(')) { + return fill; + } + } + + // Fallback to stroke if fill is not available + const stroke = style.stroke; + if (stroke && stroke !== 'none' && stroke !== 'transparent' && stroke !== 'rgba(0, 0, 0, 0)') { + const rgbaMatch = stroke.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/); + if (rgbaMatch) { + const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0; + if (alpha >= 0.9) { + return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`; + } + } else if (stroke.startsWith('rgb(')) { + return stroke; + } + } + + return null; + } + + // --- HELPER: Get Effective Background Color --- + // Traverses up the DOM tree to find the nearest non-transparent background color + // For SVGs, also checks fill/stroke properties + // This handles rgba(0,0,0,0) and transparent values that browsers commonly return + function getEffectiveBackgroundColor(el) { + if (!el) return null; + + // For SVG elements, use fill/stroke instead of backgroundColor + if (el.tagName === 'SVG') { + const svgColor = getSVGColor(el); + if (svgColor) return svgColor; + } + + let current = el; + const maxDepth = 10; // Prevent infinite loops + let depth = 0; + + while (current && depth < maxDepth) { + const style = window.getComputedStyle(current); + + // For SVG elements in the tree, also check fill/stroke + if (current.tagName === 'SVG') { + const svgColor = getSVGColor(current); + if (svgColor) return svgColor; + } + + const bgColor = style.backgroundColor; + + if (bgColor && bgColor !== 'transparent' && bgColor !== 'rgba(0, 0, 0, 0)') { + // Check if it's rgba with alpha < 1 (semi-transparent) + const rgbaMatch = bgColor.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/); + if (rgbaMatch) { + const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0; + // If alpha is high enough (>= 0.9), consider it opaque enough + if (alpha >= 0.9) { + // Convert to rgb() format for Gateway compatibility + return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`; + } + // If semi-transparent, continue up the tree + } else if (bgColor.startsWith('rgb(')) { + // Already in rgb() format, use it + return bgColor; + } else { + // Named color or other format, return as-is + return bgColor; + } + } + + // Move up the DOM tree + current = current.parentElement; + depth++; + } + + // Fallback: return null if nothing found + return null; + } + + // --- HELPER: Viewport Check --- function isInViewport(rect) { return ( rect.top < window.innerHeight && rect.bottom > 0 && @@ -48,19 +227,30 @@ ); } - // --- HELPER: Occlusion Check (NEW) --- - function isOccluded(el, rect) { - // Fast center-point check + // --- HELPER: Occlusion Check (Optimized to avoid layout thrashing) --- + // Only checks occlusion for elements likely to be occluded (high z-index, positioned) + // This avoids forced reflow for most elements, dramatically improving performance + function isOccluded(el, rect, style) { + // Fast path: Skip occlusion check for most elements + // Only check for elements that are likely to be occluded (overlays, modals, tooltips) + const zIndex = parseInt(style.zIndex, 10); + const position = style.position; + + // Skip occlusion check for normal flow elements (vast majority) + // Only check for positioned elements or high z-index (likely overlays) + if (position === 'static' && (isNaN(zIndex) || zIndex <= 10)) { + return false; // Assume not occluded for performance + } + + // For positioned/high z-index elements, do the expensive check const cx = rect.x + rect.width / 2; const cy = rect.y + rect.height / 2; - - // If point is off-screen, elementFromPoint returns null, assume NOT occluded for safety + if (cx < 0 || cx > window.innerWidth || cy < 0 || cy > window.innerHeight) return false; const topEl = document.elementFromPoint(cx, cy); if (!topEl) return false; - - // It's visible if the top element is us, or contains us, or we contain it + return !(el === topEl || el.contains(topEl) || topEl.contains(el)); } @@ -76,45 +266,91 @@ }; window.addEventListener('message', listener); window.postMessage({ type: 'SENTIENCE_SCREENSHOT_REQUEST', requestId, options }, '*'); + setTimeout(() => { + window.removeEventListener('message', listener); + resolve(null); + }, 10000); // 10s timeout + }); + } + + // --- HELPER: Snapshot Processing Bridge (NEW!) --- + function processSnapshotInBackground(rawData, options) { + return new Promise((resolve, reject) => { + const requestId = Math.random().toString(36).substring(7); + const TIMEOUT_MS = 25000; // 25 seconds (longer than content.js timeout) + let resolved = false; + + const timeout = setTimeout(() => { + if (!resolved) { + resolved = true; + window.removeEventListener('message', listener); + reject(new Error('WASM processing timeout - extension may be unresponsive. Try reloading the extension.')); + } + }, TIMEOUT_MS); + + const listener = (e) => { + if (e.data.type === 'SENTIENCE_SNAPSHOT_RESULT' && e.data.requestId === requestId) { + if (resolved) return; // Already handled + resolved = true; + clearTimeout(timeout); + window.removeEventListener('message', listener); + + if (e.data.error) { + reject(new Error(e.data.error)); + } else { + resolve({ + elements: e.data.elements, + raw_elements: e.data.raw_elements, + duration: e.data.duration + }); + } + } + }; + + window.addEventListener('message', listener); + + try { + window.postMessage({ + type: 'SENTIENCE_SNAPSHOT_REQUEST', + requestId, + rawData, + options + }, '*'); + } catch (error) { + if (!resolved) { + resolved = true; + clearTimeout(timeout); + window.removeEventListener('message', listener); + reject(new Error(`Failed to send snapshot request: ${error.message}`)); + } + } }); } - // --- HELPER: Get Raw HTML for Turndown/External Processing --- - // Returns cleaned HTML that can be processed by Turndown or other Node.js libraries + // --- HELPER: Raw HTML Extractor (unchanged) --- function getRawHTML(root) { const sourceRoot = root || document.body; const clone = sourceRoot.cloneNode(true); - - // Remove unwanted elements by tag name (simple and reliable) + const unwantedTags = ['nav', 'footer', 'header', 'script', 'style', 'noscript', 'iframe', 'svg']; unwantedTags.forEach(tag => { const elements = clone.querySelectorAll(tag); elements.forEach(el => { - if (el.parentNode) { - el.parentNode.removeChild(el); - } + if (el.parentNode) el.parentNode.removeChild(el); }); }); - // Remove invisible elements from original DOM and find matching ones in clone - // We'll use a simple approach: mark elements in original, then remove from clone + // Remove invisible elements const invisibleSelectors = []; - const walker = document.createTreeWalker( - sourceRoot, - NodeFilter.SHOW_ELEMENT, - null, - false - ); - + const walker = document.createTreeWalker(sourceRoot, NodeFilter.SHOW_ELEMENT, null, false); let node; while (node = walker.nextNode()) { const tag = node.tagName.toLowerCase(); if (tag === 'head' || tag === 'title') continue; - + const style = window.getComputedStyle(node); if (style.display === 'none' || style.visibility === 'hidden' || (node.offsetWidth === 0 && node.offsetHeight === 0)) { - // Build a selector for this element let selector = tag; if (node.id) { selector = `#${node.id}`; @@ -128,30 +364,25 @@ } } - // Remove invisible elements from clone (if we can find them) invisibleSelectors.forEach(selector => { try { const elements = clone.querySelectorAll(selector); elements.forEach(el => { - if (el.parentNode) { - el.parentNode.removeChild(el); - } + if (el.parentNode) el.parentNode.removeChild(el); }); } catch (e) { // Invalid selector, skip } }); - // Resolve relative URLs in links and images + // Resolve relative URLs const links = clone.querySelectorAll('a[href]'); links.forEach(link => { const href = link.getAttribute('href'); if (href && !href.startsWith('http://') && !href.startsWith('https://') && !href.startsWith('#')) { try { link.setAttribute('href', new URL(href, document.baseURI).href); - } catch (e) { - // Keep original href if URL parsing fails - } + } catch (e) {} } }); @@ -161,32 +392,24 @@ if (src && !src.startsWith('http://') && !src.startsWith('https://') && !src.startsWith('data:')) { try { img.setAttribute('src', new URL(src, document.baseURI).href); - } catch (e) { - // Keep original src if URL parsing fails - } + } catch (e) {} } }); return clone.innerHTML; } - // --- HELPER: Simple Markdown Converter (Lightweight) --- - // Uses getRawHTML() and then converts to markdown for consistency + // --- HELPER: Markdown Converter (unchanged) --- function convertToMarkdown(root) { - // Get cleaned HTML first const rawHTML = getRawHTML(root); - - // Create a temporary container to parse the HTML const tempDiv = document.createElement('div'); tempDiv.innerHTML = rawHTML; - + let markdown = ''; - let insideLink = false; // Track if we're inside an tag + let insideLink = false; function walk(node) { if (node.nodeType === Node.TEXT_NODE) { - // Keep minimal whitespace to prevent words merging - // Strip newlines inside text nodes to prevent broken links const text = node.textContent.replace(/[\r\n]+/g, ' ').replace(/\s+/g, ' '); if (text.trim()) markdown += text; return; @@ -201,13 +424,12 @@ if (tag === 'h2') markdown += '\n## '; if (tag === 'h3') markdown += '\n### '; if (tag === 'li') markdown += '\n- '; - // IMPORTANT: Don't add newlines for block elements when inside a link if (!insideLink && (tag === 'p' || tag === 'div' || tag === 'br')) markdown += '\n'; if (tag === 'strong' || tag === 'b') markdown += '**'; if (tag === 'em' || tag === 'i') markdown += '_'; if (tag === 'a') { markdown += '['; - insideLink = true; // Mark that we're entering a link + insideLink = true; } // Children @@ -219,25 +441,21 @@ // Suffix if (tag === 'a') { - // Get absolute URL from href attribute (already resolved in getRawHTML) const href = node.getAttribute('href'); if (href) markdown += `](${href})`; else markdown += ']'; - insideLink = false; // Mark that we're exiting the link + insideLink = false; } if (tag === 'strong' || tag === 'b') markdown += '**'; if (tag === 'em' || tag === 'i') markdown += '_'; - // IMPORTANT: Don't add newlines for block elements when inside a link (suffix section too) if (!insideLink && (tag === 'h1' || tag === 'h2' || tag === 'h3' || tag === 'p' || tag === 'div')) markdown += '\n'; } walk(tempDiv); - - // Cleanup: remove excessive newlines return markdown.replace(/\n{3,}/g, '\n\n').trim(); } - // --- HELPER: Raw Text Extractor --- + // --- HELPER: Text Extractor (unchanged) --- function convertToText(root) { let text = ''; function walk(node) { @@ -247,22 +465,20 @@ } if (node.nodeType === Node.ELEMENT_NODE) { const tag = node.tagName.toLowerCase(); - // Skip nav/footer/header/script/style/noscript/iframe/svg if (['nav', 'footer', 'header', 'script', 'style', 'noscript', 'iframe', 'svg'].includes(tag)) return; const style = window.getComputedStyle(node); if (style.display === 'none' || style.visibility === 'hidden') return; - - // Block level elements get a newline + const isBlock = style.display === 'block' || style.display === 'flex' || node.tagName === 'P' || node.tagName === 'DIV'; if (isBlock) text += ' '; - + if (node.shadowRoot) { Array.from(node.shadowRoot.childNodes).forEach(walk); } else { node.childNodes.forEach(walk); } - + if (isBlock) text += '\n'; } } @@ -270,155 +486,597 @@ return text.replace(/\n{3,}/g, '\n\n').trim(); } - // Load WASM - try { - const wasmUrl = EXT_URL + 'pkg/sentience_core.js'; - const module = await import(wasmUrl); - const imports = { - env: { - js_click_element: (id) => { - const el = window.sentience_registry[id]; - if (el) { el.click(); el.focus(); } + // --- HELPER: Clean null/undefined fields --- + function cleanElement(obj) { + if (Array.isArray(obj)) { + return obj.map(cleanElement); + } + if (obj !== null && typeof obj === 'object') { + const cleaned = {}; + for (const [key, value] of Object.entries(obj)) { + if (value !== null && value !== undefined) { + if (typeof value === 'object') { + const deepClean = cleanElement(value); + if (Object.keys(deepClean).length > 0) { + cleaned[key] = deepClean; + } + } else { + cleaned[key] = value; + } } } - }; - await module.default(undefined, imports); - wasmModule = module; - - // Verify functions are available - if (!wasmModule.analyze_page) { - console.error('[SentienceAPI.com] WASM functions not available'); - } else { - console.log('[SentienceAPI.com] ✓ API Ready!'); - console.log('[SentienceAPI.com] Available functions:', Object.keys(wasmModule).filter(k => k.startsWith('analyze'))); + return cleaned; } - } catch (e) { - console.error('[SentienceAPI.com] WASM Load Failed:', e); + return obj; } - // REMOVED: Headless detection - no longer needed (license system removed) - - // --- GLOBAL API --- - window.sentience = { - // 1. Geometry snapshot (existing) - snapshot: async (options = {}) => { - if (!wasmModule) return { error: "WASM not ready" }; + // --- HELPER: Extract Raw Element Data (for Golden Set) --- + function extractRawElementData(el) { + const style = window.getComputedStyle(el); + const rect = el.getBoundingClientRect(); + + return { + tag: el.tagName, + rect: { + x: Math.round(rect.x), + y: Math.round(rect.y), + width: Math.round(rect.width), + height: Math.round(rect.height) + }, + styles: { + cursor: style.cursor || null, + backgroundColor: style.backgroundColor || null, + color: style.color || null, + fontWeight: style.fontWeight || null, + fontSize: style.fontSize || null, + display: style.display || null, + position: style.position || null, + zIndex: style.zIndex || null, + opacity: style.opacity || null, + visibility: style.visibility || null + }, + attributes: { + role: el.getAttribute('role') || null, + type: el.getAttribute('type') || null, + ariaLabel: el.getAttribute('aria-label') || null, + id: el.id || null, + className: el.className || null + } + }; + } - const rawData = []; - // Remove textMap as we include text in rawData - window.sentience_registry = []; + // --- HELPER: Generate Unique CSS Selector (for Golden Set) --- + function getUniqueSelector(el) { + if (!el || !el.tagName) return ''; + + // If element has a unique ID, use it + if (el.id) { + return `#${el.id}`; + } + + // Try data attributes or aria-label for uniqueness + for (const attr of el.attributes) { + if (attr.name.startsWith('data-') || attr.name === 'aria-label') { + const value = attr.value ? attr.value.replace(/"/g, '\\"') : ''; + return `${el.tagName.toLowerCase()}[${attr.name}="${value}"]`; + } + } + + // Build path with classes and nth-child for uniqueness + const path = []; + let current = el; + + while (current && current !== document.body && current !== document.documentElement) { + let selector = current.tagName.toLowerCase(); - const nodes = getAllElements(); + // If current element has ID, use it and stop + if (current.id) { + selector = `#${current.id}`; + path.unshift(selector); + break; + } - nodes.forEach((el, idx) => { - if (!el.getBoundingClientRect) return; - const rect = el.getBoundingClientRect(); - if (rect.width < 5 || rect.height < 5) return; + // Add class if available + if (current.className && typeof current.className === 'string') { + const classes = current.className.trim().split(/\s+/).filter(c => c); + if (classes.length > 0) { + // Use first class for simplicity + selector += `.${classes[0]}`; + } + } + + // Add nth-of-type if needed for uniqueness + if (current.parentElement) { + const siblings = Array.from(current.parentElement.children); + const sameTagSiblings = siblings.filter(s => s.tagName === current.tagName); + const index = sameTagSiblings.indexOf(current); + if (index > 0 || sameTagSiblings.length > 1) { + selector += `:nth-of-type(${index + 1})`; + } + } + + path.unshift(selector); + current = current.parentElement; + } + + return path.join(' > ') || el.tagName.toLowerCase(); + } + + // --- HELPER: Wait for DOM Stability (SPA Hydration) --- + // Waits for the DOM to stabilize before taking a snapshot + // Useful for React/Vue apps that render empty skeletons before hydration + async function waitForStability(options = {}) { + const { + minNodeCount = 500, + quietPeriod = 200, // milliseconds + maxWait = 5000 // maximum wait time + } = options; - window.sentience_registry[idx] = el; + const startTime = Date.now(); + + return new Promise((resolve) => { + // Check if DOM already has enough nodes + const nodeCount = document.querySelectorAll('*').length; + if (nodeCount >= minNodeCount) { + // DOM seems ready, but wait for quiet period to ensure stability + let lastChange = Date.now(); + const observer = new MutationObserver(() => { + lastChange = Date.now(); + }); - // Calculate properties for Fat Payload - const textVal = getText(el); - const inView = isInViewport(rect); - // Only check occlusion if visible (Optimization) - const occluded = inView ? isOccluded(el, rect) : false; - - const style = window.getComputedStyle(el); - rawData.push({ - id: idx, - tag: el.tagName.toLowerCase(), - rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }, - styles: { - display: style.display, - visibility: style.visibility, - opacity: style.opacity, - z_index: style.zIndex || "0", - bg_color: style.backgroundColor, - color: style.color, - cursor: style.cursor, - font_weight: style.fontWeight, - font_size: style.fontSize - }, - attributes: { - role: el.getAttribute('role'), - type_: el.getAttribute('type'), - aria_label: el.getAttribute('aria-label'), - href: el.href, - class: el.className - }, - // Pass to WASM - text: textVal || null, - in_viewport: inView, - is_occluded: occluded + observer.observe(document.body, { + childList: true, + subtree: true, + attributes: false + }); + + const checkStable = () => { + const timeSinceLastChange = Date.now() - lastChange; + const totalWait = Date.now() - startTime; + + if (timeSinceLastChange >= quietPeriod) { + observer.disconnect(); + resolve(); + } else if (totalWait >= maxWait) { + observer.disconnect(); + console.warn('[SentienceAPI] DOM stability timeout - proceeding anyway'); + resolve(); + } else { + setTimeout(checkStable, 50); + } + }; + + checkStable(); + } else { + // DOM doesn't have enough nodes yet, wait for them + const observer = new MutationObserver(() => { + const currentCount = document.querySelectorAll('*').length; + const totalWait = Date.now() - startTime; + + if (currentCount >= minNodeCount) { + observer.disconnect(); + // Now wait for quiet period + let lastChange = Date.now(); + const quietObserver = new MutationObserver(() => { + lastChange = Date.now(); + }); + + quietObserver.observe(document.body, { + childList: true, + subtree: true, + attributes: false + }); + + const checkQuiet = () => { + const timeSinceLastChange = Date.now() - lastChange; + const totalWait = Date.now() - startTime; + + if (timeSinceLastChange >= quietPeriod) { + quietObserver.disconnect(); + resolve(); + } else if (totalWait >= maxWait) { + quietObserver.disconnect(); + console.warn('[SentienceAPI] DOM stability timeout - proceeding anyway'); + resolve(); + } else { + setTimeout(checkQuiet, 50); + } + }; + + checkQuiet(); + } else if (totalWait >= maxWait) { + observer.disconnect(); + console.warn('[SentienceAPI] DOM node count timeout - proceeding anyway'); + resolve(); + } }); + + observer.observe(document.body, { + childList: true, + subtree: true, + attributes: false + }); + + // Timeout fallback + setTimeout(() => { + observer.disconnect(); + console.warn('[SentienceAPI] DOM stability max wait reached - proceeding'); + resolve(); + }, maxWait); + } + }); + } + + // --- HELPER: Collect Iframe Snapshots (Frame Stitching) --- + // Recursively collects snapshot data from all child iframes + // This enables detection of elements inside iframes (e.g., Stripe forms) + // + // NOTE: Cross-origin iframes cannot be accessed due to browser security (Same-Origin Policy). + // Only same-origin iframes will return snapshot data. Cross-origin iframes will be skipped + // with a warning. For cross-origin iframes, users must manually switch frames using + // Playwright's page.frame() API. + async function collectIframeSnapshots(options = {}) { + const iframeData = new Map(); // Map of iframe element -> snapshot data + + // Find all iframe elements in current document + const iframes = Array.from(document.querySelectorAll('iframe')); + + if (iframes.length === 0) { + return iframeData; + } + + console.log(`[SentienceAPI] Found ${iframes.length} iframe(s), requesting snapshots...`); + // Request snapshot from each iframe + const iframePromises = iframes.map((iframe, idx) => { + // OPTIMIZATION: Skip common ad domains to save time + const src = iframe.src || ''; + if (src.includes('doubleclick') || src.includes('googleadservices') || src.includes('ads system')) { + console.log(`[SentienceAPI] Skipping ad iframe: ${src.substring(0, 30)}...`); + return Promise.resolve(null); + } + + return new Promise((resolve) => { + const requestId = `iframe-${idx}-${Date.now()}`; + + // 1. EXTENDED TIMEOUT (Handle slow children) + const timeout = setTimeout(() => { + console.warn(`[SentienceAPI] ⚠️ Iframe ${idx} snapshot TIMEOUT (id: ${requestId})`); + resolve(null); + }, 5000); // Increased to 5s to handle slow processing + + // 2. ROBUST LISTENER with debugging + const listener = (event) => { + // Debug: Log all SENTIENCE_IFRAME_SNAPSHOT_RESPONSE messages to see what's happening + if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE') { + // Only log if it's not our request (for debugging) + if (event.data?.requestId !== requestId) { + // console.log(`[SentienceAPI] Received response for different request: ${event.data.requestId} (expected: ${requestId})`); + } + } + + // Check if this is the response we're waiting for + if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' && + event.data?.requestId === requestId) { + + clearTimeout(timeout); + window.removeEventListener('message', listener); + + if (event.data.error) { + console.warn(`[SentienceAPI] Iframe ${idx} returned error:`, event.data.error); + resolve(null); + } else { + const elementCount = event.data.snapshot?.raw_elements?.length || 0; + console.log(`[SentienceAPI] ✓ Received ${elementCount} elements from Iframe ${idx} (id: ${requestId})`); + resolve({ + iframe: iframe, + data: event.data.snapshot, + error: null + }); + } + } + }; + + window.addEventListener('message', listener); + + // 3. SEND REQUEST with error handling + try { + if (iframe.contentWindow) { + // console.log(`[SentienceAPI] Sending request to Iframe ${idx} (id: ${requestId})`); + iframe.contentWindow.postMessage({ + type: 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST', + requestId: requestId, + options: { + ...options, + collectIframes: true // Enable recursion for nested iframes + } + }, '*'); // Use '*' for cross-origin, but browser will enforce same-origin policy + } else { + console.warn(`[SentienceAPI] Iframe ${idx} contentWindow is inaccessible (Cross-Origin?)`); + clearTimeout(timeout); + window.removeEventListener('message', listener); + resolve(null); + } + } catch (error) { + console.error(`[SentienceAPI] Failed to postMessage to Iframe ${idx}:`, error); + clearTimeout(timeout); + window.removeEventListener('message', listener); + resolve(null); + } }); + }); + + // Wait for all iframe responses + const results = await Promise.all(iframePromises); + + // Store iframe data + results.forEach((result, idx) => { + if (result && result.data && !result.error) { + iframeData.set(iframes[idx], result.data); + console.log(`[SentienceAPI] ✓ Collected snapshot from iframe ${idx}`); + } else if (result && result.error) { + console.warn(`[SentienceAPI] Iframe ${idx} snapshot error:`, result.error); + } else if (!result) { + console.warn(`[SentienceAPI] Iframe ${idx} returned no data (timeout or error)`); + } + }); + + return iframeData; + } - // FREE TIER: No license checks - extension provides basic geometry data - // Pro/Enterprise tiers will be handled server-side (future work) - - // 1. Get Geometry from WASM - let result; - try { - if (options.limit || options.filter) { - result = wasmModule.analyze_page_with_options(rawData, options); - } else { - result = wasmModule.analyze_page(rawData); + // --- HELPER: Handle Iframe Snapshot Request (for child frames) --- + // When a parent frame requests snapshot, this handler responds with local snapshot + // NOTE: Recursion is safe because querySelectorAll('iframe') only finds direct children. + // Iframe A can ask Iframe B, but won't go back up to parent (no circular dependency risk). + function setupIframeSnapshotHandler() { + window.addEventListener('message', async (event) => { + // Security: only respond to snapshot requests from parent frames + if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST') { + const { requestId, options } = event.data; + + try { + // Generate snapshot for this iframe's content + // Allow recursive collection - querySelectorAll('iframe') only finds direct children, + // so Iframe A will ask Iframe B, but won't go back up to parent (safe recursion) + // waitForStability: false makes performance better - i.e. don't wait for children frames + const snapshotOptions = { ...options, collectIframes: true, waitForStability: options.waitForStability === false ? false : false }; + const snapshot = await window.sentience.snapshot(snapshotOptions); + + // Send response back to parent + if (event.source && event.source.postMessage) { + event.source.postMessage({ + type: 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE', + requestId: requestId, + snapshot: snapshot, + error: null + }, '*'); + } + } catch (error) { + // Send error response + if (event.source && event.source.postMessage) { + event.source.postMessage({ + type: 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE', + requestId: requestId, + snapshot: null, + error: error.message + }, '*'); + } } - } catch (e) { - return { status: "error", error: e.message }; } + }); + } + + // Setup iframe handler when script loads (only once) + if (!window.sentience_iframe_handler_setup) { + setupIframeSnapshotHandler(); + window.sentience_iframe_handler_setup = true; + } - // Hydration step removed as WASM now returns populated structs + // --- GLOBAL API --- + window.sentience = { + // 1. Geometry snapshot (NEW ARCHITECTURE - No WASM in Main World!) + snapshot: async (options = {}) => { + try { + // Step 0: Wait for DOM stability if requested (for SPA hydration) + if (options.waitForStability !== false) { + await waitForStability(options.waitForStability || {}); + } + + // Step 1: Collect raw DOM data (Main World - CSP can't block this!) + const rawData = []; + window.sentience_registry = []; - // Capture Screenshot - let screenshot = null; - if (options.screenshot) { - screenshot = await captureScreenshot(options.screenshot); - } + const nodes = getAllElements(); + + nodes.forEach((el, idx) => { + if (!el.getBoundingClientRect) return; + const rect = el.getBoundingClientRect(); + if (rect.width < 5 || rect.height < 5) return; + + window.sentience_registry[idx] = el; + + const textVal = getText(el); + const inView = isInViewport(rect); + + // Get computed style once (needed for both occlusion check and data collection) + const style = window.getComputedStyle(el); + + // Only check occlusion for elements likely to be occluded (optimized) + // This avoids layout thrashing for the vast majority of elements + const occluded = inView ? isOccluded(el, rect, style) : false; + + // Get effective background color (traverses DOM to find non-transparent color) + const effectiveBgColor = getEffectiveBackgroundColor(el); + + rawData.push({ + id: idx, + tag: el.tagName.toLowerCase(), + rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }, + styles: { + display: toSafeString(style.display), + visibility: toSafeString(style.visibility), + opacity: toSafeString(style.opacity), + z_index: toSafeString(style.zIndex || "auto"), + position: toSafeString(style.position), + bg_color: toSafeString(effectiveBgColor || style.backgroundColor), + color: toSafeString(style.color), + cursor: toSafeString(style.cursor), + font_weight: toSafeString(style.fontWeight), + font_size: toSafeString(style.fontSize) + }, + attributes: { + role: toSafeString(el.getAttribute('role')), + type_: toSafeString(el.getAttribute('type')), + aria_label: toSafeString(el.getAttribute('aria-label')), + href: toSafeString(el.href || el.getAttribute('href') || null), + class: toSafeString(getClassName(el)), + // Capture dynamic input state (not just initial attributes) + value: el.value !== undefined ? toSafeString(el.value) : toSafeString(el.getAttribute('value')), + checked: el.checked !== undefined ? String(el.checked) : null + }, + text: toSafeString(textVal), + in_viewport: inView, + is_occluded: occluded + }); + }); + + console.log(`[SentienceAPI] Collected ${rawData.length} elements from main frame`); - // C. Clean up null/undefined fields to save tokens (Your existing cleaner) - const cleanElement = (obj) => { - if (Array.isArray(obj)) { - return obj.map(cleanElement); - } else if (obj !== null && typeof obj === 'object') { - const cleaned = {}; - for (const [key, value] of Object.entries(obj)) { - // Keep boolean false for critical flags if desired, or remove to match Rust defaults - if (value !== null && value !== undefined) { - cleaned[key] = cleanElement(value); + // Step 1.5: Collect iframe snapshots and FLATTEN immediately + // "Flatten Early" architecture: Merge iframe elements into main array before WASM + // This allows WASM to process all elements uniformly (no recursion needed) + let allRawElements = [...rawData]; // Start with main frame elements + let totalIframeElements = 0; + + if (options.collectIframes !== false) { + try { + console.log(`[SentienceAPI] Starting iframe collection...`); + const iframeSnapshots = await collectIframeSnapshots(options); + console.log(`[SentienceAPI] Iframe collection complete. Received ${iframeSnapshots.size} snapshot(s)`); + + if (iframeSnapshots.size > 0) { + // FLATTEN IMMEDIATELY: Don't nest them. Just append them with coordinate translation. + iframeSnapshots.forEach((iframeSnapshot, iframeEl) => { + // Debug: Log structure to verify data is correct + // console.log(`[SentienceAPI] Processing iframe snapshot:`, iframeSnapshot); + + if (iframeSnapshot && iframeSnapshot.raw_elements) { + const rawElementsCount = iframeSnapshot.raw_elements.length; + console.log(`[SentienceAPI] Processing ${rawElementsCount} elements from iframe (src: ${iframeEl.src || 'unknown'})`); + // Get iframe's bounding rect (offset for coordinate translation) + const iframeRect = iframeEl.getBoundingClientRect(); + const offset = { x: iframeRect.x, y: iframeRect.y }; + + // Get iframe context for frame switching (Playwright needs this) + const iframeSrc = iframeEl.src || iframeEl.getAttribute('src') || ''; + let isSameOrigin = false; + try { + // Try to access contentWindow to check if same-origin + isSameOrigin = iframeEl.contentWindow !== null; + } catch (e) { + isSameOrigin = false; + } + + // Adjust coordinates and add iframe context to each element + const adjustedElements = iframeSnapshot.raw_elements.map(el => { + const adjusted = { ...el }; + + // Adjust rect coordinates to parent viewport + if (adjusted.rect) { + adjusted.rect = { + ...adjusted.rect, + x: adjusted.rect.x + offset.x, + y: adjusted.rect.y + offset.y + }; + } + + // Add iframe context so agents can switch frames in Playwright + adjusted.iframe_context = { + src: iframeSrc, + is_same_origin: isSameOrigin + }; + + return adjusted; + }); + + // Append flattened iframe elements to main array + allRawElements.push(...adjustedElements); + totalIframeElements += adjustedElements.length; + } + }); + + // console.log(`[SentienceAPI] Merged ${iframeSnapshots.size} iframe(s). Total elements: ${allRawElements.length} (${rawData.length} main + ${totalIframeElements} iframe)`); } + } catch (error) { + console.warn('[SentienceAPI] Iframe collection failed:', error); } - return cleaned; } - return obj; - }; - const cleanedElements = cleanElement(result); + // Step 2: Send EVERYTHING to WASM (One giant flat list) + // Now WASM prunes iframe elements and main elements in one pass! + // No recursion needed - everything is already flat + console.log(`[SentienceAPI] Sending ${allRawElements.length} total elements to WASM (${rawData.length} main + ${totalIframeElements} iframe)`); + const processed = await processSnapshotInBackground(allRawElements, options); + + if (!processed || !processed.elements) { + throw new Error('WASM processing returned invalid result'); + } + + // Step 3: Capture screenshot if requested + let screenshot = null; + if (options.screenshot) { + screenshot = await captureScreenshot(options.screenshot); + } - return { - status: "success", - url: window.location.href, - elements: cleanedElements, - raw_elements: rawData, // Include raw data for server-side processing (safe to expose - no proprietary value) - screenshot: screenshot - }; + // Step 4: Clean and return + const cleanedElements = cleanElement(processed.elements); + const cleanedRawElements = cleanElement(processed.raw_elements); + + // FIXED: Removed undefined 'totalIframeRawElements' + // FIXED: Logic updated for "Flatten Early" architecture. + // processed.elements ALREADY contains the merged iframe elements, + // so we simply use .length. No addition needed. + + const totalCount = cleanedElements.length; + const totalRaw = cleanedRawElements.length; + const iframeCount = totalIframeElements || 0; + + console.log(`[SentienceAPI] ✓ Complete: ${totalCount} Smart Elements, ${totalRaw} Raw Elements (includes ${iframeCount} from iframes) (WASM took ${processed.duration?.toFixed(1)}ms)`); + + return { + status: "success", + url: window.location.href, + viewport: { + width: window.innerWidth, + height: window.innerHeight + }, + elements: cleanedElements, + raw_elements: cleanedRawElements, + screenshot: screenshot + }; + } catch (error) { + console.error('[SentienceAPI] snapshot() failed:', error); + console.error('[SentienceAPI] Error stack:', error.stack); + return { + status: "error", + error: error.message || 'Unknown error', + stack: error.stack + }; + } }, - // 2. Read Content (New) + + // 2. Read Content (unchanged) read: (options = {}) => { - const format = options.format || 'raw'; // 'raw', 'text', or 'markdown' + const format = options.format || 'raw'; let content; - + if (format === 'raw') { - // Return raw HTML suitable for Turndown or other Node.js libraries content = getRawHTML(document.body); } else if (format === 'markdown') { - // Return lightweight markdown conversion content = convertToMarkdown(document.body); } else { - // Default to text content = convertToText(document.body); } - + return { status: "success", url: window.location.href, @@ -428,11 +1086,388 @@ }; }, - // 3. Action + // 2b. Find Text Rectangle - Get exact pixel coordinates of specific text + findTextRect: (options = {}) => { + const { + text, + containerElement = document.body, + caseSensitive = false, + wholeWord = false, + maxResults = 10 + } = options; + + if (!text || text.trim().length === 0) { + return { + status: "error", + error: "Text parameter is required" + }; + } + + const results = []; + const searchText = caseSensitive ? text : text.toLowerCase(); + + // Helper function to find text in a single text node + function findInTextNode(textNode) { + const nodeText = textNode.nodeValue; + const searchableText = caseSensitive ? nodeText : nodeText.toLowerCase(); + + let startIndex = 0; + while (startIndex < nodeText.length && results.length < maxResults) { + const foundIndex = searchableText.indexOf(searchText, startIndex); + + if (foundIndex === -1) break; + + // Check whole word matching if required + if (wholeWord) { + const before = foundIndex > 0 ? nodeText[foundIndex - 1] : ' '; + const after = foundIndex + text.length < nodeText.length + ? nodeText[foundIndex + text.length] + : ' '; + + // Check if surrounded by word boundaries + if (!/\s/.test(before) || !/\s/.test(after)) { + startIndex = foundIndex + 1; + continue; + } + } + + try { + // Create range for this occurrence + const range = document.createRange(); + range.setStart(textNode, foundIndex); + range.setEnd(textNode, foundIndex + text.length); + + const rect = range.getBoundingClientRect(); + + // Only include visible rectangles + if (rect.width > 0 && rect.height > 0) { + results.push({ + text: nodeText.substring(foundIndex, foundIndex + text.length), + rect: { + x: rect.left + window.scrollX, + y: rect.top + window.scrollY, + width: rect.width, + height: rect.height, + left: rect.left + window.scrollX, + top: rect.top + window.scrollY, + right: rect.right + window.scrollX, + bottom: rect.bottom + window.scrollY + }, + viewport_rect: { + x: rect.left, + y: rect.top, + width: rect.width, + height: rect.height + }, + context: { + before: nodeText.substring(Math.max(0, foundIndex - 20), foundIndex), + after: nodeText.substring(foundIndex + text.length, Math.min(nodeText.length, foundIndex + text.length + 20)) + }, + in_viewport: ( + rect.top >= 0 && + rect.left >= 0 && + rect.bottom <= window.innerHeight && + rect.right <= window.innerWidth + ) + }); + } + } catch (e) { + console.warn('[SentienceAPI] Failed to get rect for text:', e); + } + + startIndex = foundIndex + 1; + } + } + + // Tree walker to find all text nodes + const walker = document.createTreeWalker( + containerElement, + NodeFilter.SHOW_TEXT, + { + acceptNode: function(node) { + // Skip script, style, and empty text nodes + const parent = node.parentElement; + if (!parent) return NodeFilter.FILTER_REJECT; + + const tagName = parent.tagName.toLowerCase(); + if (tagName === 'script' || tagName === 'style' || tagName === 'noscript') { + return NodeFilter.FILTER_REJECT; + } + + // Skip whitespace-only nodes + if (!node.nodeValue || node.nodeValue.trim().length === 0) { + return NodeFilter.FILTER_REJECT; + } + + // Check if element is visible + const computedStyle = window.getComputedStyle(parent); + if (computedStyle.display === 'none' || + computedStyle.visibility === 'hidden' || + computedStyle.opacity === '0') { + return NodeFilter.FILTER_REJECT; + } + + return NodeFilter.FILTER_ACCEPT; + } + } + ); + + // Walk through all text nodes + let currentNode; + while ((currentNode = walker.nextNode()) && results.length < maxResults) { + findInTextNode(currentNode); + } + + return { + status: "success", + query: text, + case_sensitive: caseSensitive, + whole_word: wholeWord, + matches: results.length, + results: results, + viewport: { + width: window.innerWidth, + height: window.innerHeight, + scroll_x: window.scrollX, + scroll_y: window.scrollY + } + }; + }, + + // 3. Click Action (unchanged) click: (id) => { const el = window.sentience_registry[id]; - if (el) { el.click(); el.focus(); return true; } + if (el) { + el.click(); + el.focus(); + return true; + } return false; + }, + + // 4. Inspector Mode: Start Recording for Golden Set Collection + startRecording: (options = {}) => { + const { + highlightColor = '#ff0000', + successColor = '#00ff00', + autoDisableTimeout = 30 * 60 * 1000, // 30 minutes default + keyboardShortcut = 'Ctrl+Shift+I' + } = options; + + console.log("🔴 [Sentience] Recording Mode STARTED. Click an element to copy its Ground Truth JSON."); + console.log(` Press ${keyboardShortcut} or call stopRecording() to stop.`); + + // Validate registry is populated + if (!window.sentience_registry || window.sentience_registry.length === 0) { + console.warn("⚠️ Registry empty. Call `await window.sentience.snapshot()` first to populate registry."); + alert("Registry empty. Run `await window.sentience.snapshot()` first!"); + return () => {}; // Return no-op cleanup function + } + + // Create reverse mapping for O(1) lookup (fixes registry lookup bug) + window.sentience_registry_map = new Map(); + window.sentience_registry.forEach((el, idx) => { + if (el) window.sentience_registry_map.set(el, idx); + }); + + // Create highlight box overlay + let highlightBox = document.getElementById('sentience-highlight-box'); + if (!highlightBox) { + highlightBox = document.createElement('div'); + highlightBox.id = 'sentience-highlight-box'; + highlightBox.style.cssText = ` + position: fixed; + pointer-events: none; + z-index: 2147483647; + border: 2px solid ${highlightColor}; + background: rgba(255, 0, 0, 0.1); + display: none; + transition: all 0.1s ease; + box-sizing: border-box; + `; + document.body.appendChild(highlightBox); + } + + // Create visual indicator (red border on page when recording) + let recordingIndicator = document.getElementById('sentience-recording-indicator'); + if (!recordingIndicator) { + recordingIndicator = document.createElement('div'); + recordingIndicator.id = 'sentience-recording-indicator'; + recordingIndicator.style.cssText = ` + position: fixed; + top: 0; + left: 0; + right: 0; + height: 3px; + background: ${highlightColor}; + z-index: 2147483646; + pointer-events: none; + `; + document.body.appendChild(recordingIndicator); + } + recordingIndicator.style.display = 'block'; + + // Hover handler (visual feedback) + const mouseOverHandler = (e) => { + const el = e.target; + if (!el || el === highlightBox || el === recordingIndicator) return; + + const rect = el.getBoundingClientRect(); + highlightBox.style.display = 'block'; + highlightBox.style.top = (rect.top + window.scrollY) + 'px'; + highlightBox.style.left = (rect.left + window.scrollX) + 'px'; + highlightBox.style.width = rect.width + 'px'; + highlightBox.style.height = rect.height + 'px'; + }; + + // Click handler (capture ground truth data) + const clickHandler = (e) => { + e.preventDefault(); + e.stopPropagation(); + + const el = e.target; + if (!el || el === highlightBox || el === recordingIndicator) return; + + // Use Map for reliable O(1) lookup + const sentienceId = window.sentience_registry_map.get(el); + if (sentienceId === undefined) { + console.warn("⚠️ Element not found in Sentience Registry. Did you run snapshot() first?"); + alert("Element not in registry. Run `await window.sentience.snapshot()` first!"); + return; + } + + // Extract raw data (ground truth + raw signals, NOT model outputs) + const rawData = extractRawElementData(el); + const selector = getUniqueSelector(el); + const role = el.getAttribute('role') || el.tagName.toLowerCase(); + const text = getText(el); + + // Build golden set JSON (ground truth + raw signals only) + const snippet = { + task: `Interact with ${text.substring(0, 20)}${text.length > 20 ? '...' : ''}`, + url: window.location.href, + timestamp: new Date().toISOString(), + target_criteria: { + id: sentienceId, + selector: selector, + role: role, + text: text.substring(0, 50) + }, + debug_snapshot: rawData + }; + + // Copy to clipboard + const jsonString = JSON.stringify(snippet, null, 2); + navigator.clipboard.writeText(jsonString).then(() => { + console.log("✅ Copied Ground Truth to clipboard:", snippet); + + // Flash green to indicate success + highlightBox.style.border = `2px solid ${successColor}`; + highlightBox.style.background = 'rgba(0, 255, 0, 0.2)'; + setTimeout(() => { + highlightBox.style.border = `2px solid ${highlightColor}`; + highlightBox.style.background = 'rgba(255, 0, 0, 0.1)'; + }, 500); + }).catch(err => { + console.error("❌ Failed to copy to clipboard:", err); + alert("Failed to copy to clipboard. Check console for JSON."); + }); + }; + + // Auto-disable timeout + let timeoutId = null; + + // Cleanup function to stop recording (defined before use) + const stopRecording = () => { + document.removeEventListener('mouseover', mouseOverHandler, true); + document.removeEventListener('click', clickHandler, true); + document.removeEventListener('keydown', keyboardHandler, true); + + if (timeoutId) { + clearTimeout(timeoutId); + timeoutId = null; + } + + if (highlightBox) { + highlightBox.style.display = 'none'; + } + + if (recordingIndicator) { + recordingIndicator.style.display = 'none'; + } + + // Clean up registry map (optional, but good practice) + if (window.sentience_registry_map) { + window.sentience_registry_map.clear(); + } + + // Remove global reference + if (window.sentience_stopRecording === stopRecording) { + delete window.sentience_stopRecording; + } + + console.log("⚪ [Sentience] Recording Mode STOPPED."); + }; + + // Keyboard shortcut handler (defined after stopRecording) + const keyboardHandler = (e) => { + // Ctrl+Shift+I or Cmd+Shift+I + if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === 'I') { + e.preventDefault(); + stopRecording(); + } + }; + + // Attach event listeners (use capture phase to intercept early) + document.addEventListener('mouseover', mouseOverHandler, true); + document.addEventListener('click', clickHandler, true); + document.addEventListener('keydown', keyboardHandler, true); + + // Set up auto-disable timeout + if (autoDisableTimeout > 0) { + timeoutId = setTimeout(() => { + console.log("⏰ [Sentience] Recording Mode auto-disabled after timeout."); + stopRecording(); + }, autoDisableTimeout); + } + + // Store stop function globally for keyboard shortcut access + window.sentience_stopRecording = stopRecording; + + return stopRecording; + } + }; + + /** + * Show overlay highlighting specific elements with Shadow DOM + * @param {Array} elements - List of elements with bbox, importance, visual_cues + * @param {number} targetElementId - Optional ID of target element (shown in red) + */ + window.sentience.showOverlay = function(elements, targetElementId = null) { + if (!elements || !Array.isArray(elements)) { + console.warn('[Sentience] showOverlay: elements must be an array'); + return; } + + window.postMessage({ + type: 'SENTIENCE_SHOW_OVERLAY', + elements: elements, + targetElementId: targetElementId, + timestamp: Date.now() + }, '*'); + + console.log(`[Sentience] Overlay requested for ${elements.length} elements`); }; -})(); \ No newline at end of file + + /** + * Clear overlay manually + */ + window.sentience.clearOverlay = function() { + window.postMessage({ + type: 'SENTIENCE_CLEAR_OVERLAY' + }, '*'); + console.log('[Sentience] Overlay cleared'); + }; + + console.log('[SentienceAPI] ✓ Ready! (CSP-Resistant - WASM runs in background)'); +})(); diff --git a/src/extension/manifest.json b/src/extension/manifest.json index 9d979cb1..f75c6817 100644 --- a/src/extension/manifest.json +++ b/src/extension/manifest.json @@ -1,10 +1,14 @@ { "manifest_version": 3, "name": "Sentience Semantic Visual Grounding Extractor", - "version": "1.0.5", + "version": "2.0.7", "description": "Extract semantic visual grounding data from web pages", "permissions": ["activeTab", "scripting"], "host_permissions": [""], + "background": { + "service_worker": "background.js", + "type": "module" + }, "web_accessible_resources": [ { "resources": ["pkg/*"], @@ -15,16 +19,18 @@ { "matches": [""], "js": ["content.js"], - "run_at": "document_start" + "run_at": "document_start", + "all_frames": true }, { "matches": [""], "js": ["injected_api.js"], "run_at": "document_idle", - "world": "MAIN" + "world": "MAIN", + "all_frames": true } ], "content_security_policy": { "extension_pages": "script-src 'self' 'wasm-unsafe-eval'; object-src 'self'" } -} \ No newline at end of file +} diff --git a/src/extension/pkg/README.md b/src/extension/pkg/README.md index ca9d035c..7f0d49b0 100644 --- a/src/extension/pkg/README.md +++ b/src/extension/pkg/README.md @@ -21,6 +21,7 @@ Perfect for AI agents, automation scripts, visual grounding, and accessibility t 11. [API Reference](#api-reference) 12. [Performance](#performance) 13. [Troubleshooting](#troubleshooting) +14. Transferred to SentienceAPI Org --- @@ -151,13 +152,20 @@ Claude/ ## User API -### The Only Function You Need +### Core Functions + +The extension provides two main functions: + +1. **`window.sentience.snapshot(options?)`** - Extract page geometry and elements +2. **`window.sentience.findTextRect(options)`** - Find exact pixel coordinates of text + +### snapshot() - Geometry Extraction ```javascript window.sentience.snapshot(options?) ``` -**One function, many capabilities:** +**Capabilities:** - Get geometry map - Capture screenshot - Filter by role/size/z-index @@ -213,6 +221,159 @@ await window.sentience.snapshot({ } ``` +### findTextRect() - Text Location Finder + +Find exact pixel coordinates of any text on the page using the DOM Range API. Perfect for highlighting specific words, clicking on text, or text-based navigation **without Vision Models**. + +```javascript +window.sentience.findTextRect(options) +``` + +**Parameters:** +```typescript +{ + text: string, // Required: Text to find + containerElement?: Element, // Optional: Search within (default: document.body) + caseSensitive?: boolean, // Optional: Case-sensitive search (default: false) + wholeWord?: boolean, // Optional: Match whole words only (default: false) + maxResults?: number // Optional: Limit results (default: 10) +} +``` + +**Returns:** +```typescript +{ + status: "success" | "error", + query: string, // The search text + case_sensitive: boolean, + whole_word: boolean, + matches: number, // Total matches found + results: [{ + text: string, // Actual matched text + rect: { // Absolute coordinates (with scroll) + x: number, + y: number, + width: number, + height: number, + left: number, + top: number, + right: number, + bottom: number + }, + viewport_rect: { // Viewport-relative coordinates + x: number, + y: number, + width: number, + height: number + }, + context: { // Surrounding text + before: string, // 20 chars before + after: string // 20 chars after + }, + in_viewport: boolean // Is it currently visible? + }], + viewport: { + width: number, + height: number, + scroll_x: number, + scroll_y: number + }, + error?: string // Error message if status is "error" +} +``` + +**Usage Examples:** + +```javascript +// Example 1: Find "Add to Cart" text +const result = await window.sentience.findTextRect({ + text: "Add to Cart" +}); + +if (result.status === "success") { + console.log(`Found ${result.matches} occurrences`); + result.results.forEach((match, i) => { + console.log(`${i+1}. At (${match.rect.x}, ${match.rect.y})`); + console.log(` Context: "${match.context.before}${match.text}${match.context.after}"`); + }); +} + +// Example 2: Highlight all matches +const result = await window.sentience.findTextRect({ + text: "price", + caseSensitive: false, + maxResults: 20 +}); + +result.results.forEach(match => { + const highlight = document.createElement('div'); + highlight.style.cssText = ` + position: absolute; + left: ${match.rect.x}px; + top: ${match.rect.y}px; + width: ${match.rect.width}px; + height: ${match.rect.height}px; + background: yellow; + opacity: 0.5; + pointer-events: none; + z-index: 9999; + `; + document.body.appendChild(highlight); +}); + +// Example 3: Click on specific text (not button!) +const result = await window.sentience.findTextRect({ + text: "Terms of Service", + wholeWord: true +}); + +if (result.matches > 0) { + const first = result.results[0]; + // Click the center of the text + const centerX = first.viewport_rect.x + first.viewport_rect.width / 2; + const centerY = first.viewport_rect.y + first.viewport_rect.height / 2; + + document.elementFromPoint(centerX, centerY)?.click(); +} + +// Example 4: Find text only in header +const header = document.querySelector('header'); +const result = await window.sentience.findTextRect({ + text: "Login", + containerElement: header +}); + +// Example 5: Scroll to first match +const result = await window.sentience.findTextRect({ + text: "Contact Us" +}); + +if (result.matches > 0) { + const first = result.results[0]; + window.scrollTo({ + top: first.rect.y - 100, // Offset for header + behavior: 'smooth' + }); +} +``` + +**Use Cases:** +- 🎯 **Text-based clicking** - Click on text that's not in a button +- 🖍️ **Text highlighting** - Draw bounding boxes around specific words +- 📍 **Text navigation** - Scroll to specific content +- ♿ **Accessibility** - Find and highlight important text +- 🤖 **AI Agents** - Locate text without vision models +- 🔍 **Search results** - Find and highlight search terms + +**Features:** +- ✅ Pixel-perfect coordinates using DOM Range API +- ✅ Filters invisible/hidden text automatically +- ✅ Returns both absolute and viewport-relative coordinates +- ✅ Provides context for ambiguous matches +- ✅ Handles multiple occurrences +- ✅ Performance-safe with result limits +- ✅ Works with case-insensitive and whole-word matching + --- ## Usage Examples diff --git a/src/extension/pkg/sentience_core.d.ts b/src/extension/pkg/sentience_core.d.ts index 017160d8..e280c268 100644 --- a/src/extension/pkg/sentience_core.d.ts +++ b/src/extension/pkg/sentience_core.d.ts @@ -7,6 +7,14 @@ export function analyze_page_with_options(val: any, options: any): any; export function decide_and_act(_raw_elements: any): void; +/** + * Prune raw elements before sending to API + * This is a "dumb" filter that reduces payload size without leaking proprietary IP + * Filters out: tiny elements, invisible elements, non-interactive wrapper divs + * Amazon: 5000-6000 elements -> ~200-400 elements (~95% reduction) + */ +export function prune_for_api(val: any): any; + export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module; export interface InitOutput { @@ -14,6 +22,7 @@ export interface InitOutput { readonly analyze_page: (a: number) => number; readonly analyze_page_with_options: (a: number, b: number) => number; readonly decide_and_act: (a: number) => void; + readonly prune_for_api: (a: number) => number; readonly __wbindgen_export: (a: number, b: number) => number; readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number; readonly __wbindgen_export3: (a: number) => void; diff --git a/src/extension/pkg/sentience_core.js b/src/extension/pkg/sentience_core.js index bb44be74..b232d138 100644 --- a/src/extension/pkg/sentience_core.js +++ b/src/extension/pkg/sentience_core.js @@ -223,6 +223,19 @@ export function decide_and_act(_raw_elements) { wasm.decide_and_act(addHeapObject(_raw_elements)); } +/** + * Prune raw elements before sending to API + * This is a "dumb" filter that reduces payload size without leaking proprietary IP + * Filters out: tiny elements, invisible elements, non-interactive wrapper divs + * Amazon: 5000-6000 elements -> ~200-400 elements (~95% reduction) + * @param {any} val + * @returns {any} + */ +export function prune_for_api(val) { + const ret = wasm.prune_for_api(addHeapObject(val)); + return takeObject(ret); +} + const EXPECTED_RESPONSE_TYPES = new Set(['basic', 'cors', 'default']); async function __wbg_load(module, imports) { @@ -338,6 +351,9 @@ function __wbg_get_imports() { const ret = getObject(arg0).done; return ret; }; + imports.wbg.__wbg_error_7bc7d576a6aaf855 = function(arg0) { + console.error(getObject(arg0)); + }; imports.wbg.__wbg_get_6b7bd52aca3f9671 = function(arg0, arg1) { const ret = getObject(arg0)[arg1 >>> 0]; return addHeapObject(ret); diff --git a/src/extension/pkg/sentience_core_bg.wasm b/src/extension/pkg/sentience_core_bg.wasm index 10a312c9..ddb4659c 100644 Binary files a/src/extension/pkg/sentience_core_bg.wasm and b/src/extension/pkg/sentience_core_bg.wasm differ diff --git a/src/extension/pkg/sentience_core_bg.wasm.d.ts b/src/extension/pkg/sentience_core_bg.wasm.d.ts index 35441434..dccf049f 100644 --- a/src/extension/pkg/sentience_core_bg.wasm.d.ts +++ b/src/extension/pkg/sentience_core_bg.wasm.d.ts @@ -4,6 +4,7 @@ export const memory: WebAssembly.Memory; export const analyze_page: (a: number) => number; export const analyze_page_with_options: (a: number, b: number) => number; export const decide_and_act: (a: number) => void; +export const prune_for_api: (a: number) => number; export const __wbindgen_export: (a: number, b: number) => number; export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number; export const __wbindgen_export3: (a: number) => void; diff --git a/src/extension/release.json b/src/extension/release.json new file mode 100644 index 00000000..ebade77e --- /dev/null +++ b/src/extension/release.json @@ -0,0 +1,115 @@ +{ + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/273122615", + "assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/273122615/assets", + "upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/273122615/assets{?name,label}", + "html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v2.0.7", + "id": 273122615, + "author": { + "login": "github-actions[bot]", + "id": 41898282, + "node_id": "MDM6Qm90NDE4OTgyODI=", + "avatar_url": "https://avatars.githubusercontent.com/in/15368?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/github-actions%5Bbot%5D", + "html_url": "https://github.com/apps/github-actions", + "followers_url": "https://api.github.com/users/github-actions%5Bbot%5D/followers", + "following_url": "https://api.github.com/users/github-actions%5Bbot%5D/following{/other_user}", + "gists_url": "https://api.github.com/users/github-actions%5Bbot%5D/gists{/gist_id}", + "starred_url": "https://api.github.com/users/github-actions%5Bbot%5D/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/github-actions%5Bbot%5D/subscriptions", + "organizations_url": "https://api.github.com/users/github-actions%5Bbot%5D/orgs", + "repos_url": "https://api.github.com/users/github-actions%5Bbot%5D/repos", + "events_url": "https://api.github.com/users/github-actions%5Bbot%5D/events{/privacy}", + "received_events_url": "https://api.github.com/users/github-actions%5Bbot%5D/received_events", + "type": "Bot", + "user_view_type": "public", + "site_admin": false + }, + "node_id": "RE_kwDOQshiJ84QR4U3", + "tag_name": "v2.0.7", + "target_commitish": "main", + "name": "Release v2.0.7", + "draft": false, + "immutable": false, + "prerelease": false, + "created_at": "2025-12-29T03:56:13Z", + "updated_at": "2025-12-29T03:57:09Z", + "published_at": "2025-12-29T03:57:08Z", + "assets": [ + { + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/333966751", + "id": 333966751, + "node_id": "RA_kwDOQshiJ84T5-2f", + "name": "extension-files.tar.gz", + "label": "", + "uploader": { + "login": "github-actions[bot]", + "id": 41898282, + "node_id": "MDM6Qm90NDE4OTgyODI=", + "avatar_url": "https://avatars.githubusercontent.com/in/15368?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/github-actions%5Bbot%5D", + "html_url": "https://github.com/apps/github-actions", + "followers_url": "https://api.github.com/users/github-actions%5Bbot%5D/followers", + "following_url": "https://api.github.com/users/github-actions%5Bbot%5D/following{/other_user}", + "gists_url": "https://api.github.com/users/github-actions%5Bbot%5D/gists{/gist_id}", + "starred_url": "https://api.github.com/users/github-actions%5Bbot%5D/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/github-actions%5Bbot%5D/subscriptions", + "organizations_url": "https://api.github.com/users/github-actions%5Bbot%5D/orgs", + "repos_url": "https://api.github.com/users/github-actions%5Bbot%5D/repos", + "events_url": "https://api.github.com/users/github-actions%5Bbot%5D/events{/privacy}", + "received_events_url": "https://api.github.com/users/github-actions%5Bbot%5D/received_events", + "type": "Bot", + "user_view_type": "public", + "site_admin": false + }, + "content_type": "application/gzip", + "state": "uploaded", + "size": 78091, + "digest": "sha256:e281f8b755b61da4b8015d6172064aa9a337c14133ceceff4ab29199ee53307e", + "download_count": 2, + "created_at": "2025-12-29T03:57:09Z", + "updated_at": "2025-12-29T03:57:09Z", + "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.0.7/extension-files.tar.gz" + }, + { + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/333966752", + "id": 333966752, + "node_id": "RA_kwDOQshiJ84T5-2g", + "name": "extension-package.zip", + "label": "", + "uploader": { + "login": "github-actions[bot]", + "id": 41898282, + "node_id": "MDM6Qm90NDE4OTgyODI=", + "avatar_url": "https://avatars.githubusercontent.com/in/15368?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/github-actions%5Bbot%5D", + "html_url": "https://github.com/apps/github-actions", + "followers_url": "https://api.github.com/users/github-actions%5Bbot%5D/followers", + "following_url": "https://api.github.com/users/github-actions%5Bbot%5D/following{/other_user}", + "gists_url": "https://api.github.com/users/github-actions%5Bbot%5D/gists{/gist_id}", + "starred_url": "https://api.github.com/users/github-actions%5Bbot%5D/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/github-actions%5Bbot%5D/subscriptions", + "organizations_url": "https://api.github.com/users/github-actions%5Bbot%5D/orgs", + "repos_url": "https://api.github.com/users/github-actions%5Bbot%5D/repos", + "events_url": "https://api.github.com/users/github-actions%5Bbot%5D/events{/privacy}", + "received_events_url": "https://api.github.com/users/github-actions%5Bbot%5D/received_events", + "type": "Bot", + "user_view_type": "public", + "site_admin": false + }, + "content_type": "application/zip", + "state": "uploaded", + "size": 80179, + "digest": "sha256:a025edeb8b6d05bfb25c57f913b68507060653ecbdf616000a46df4cb8dec377", + "download_count": 0, + "created_at": "2025-12-29T03:57:09Z", + "updated_at": "2025-12-29T03:57:09Z", + "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.0.7/extension-package.zip" + } + ], + "tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v2.0.7", + "zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v2.0.7", + "body": "**Full Changelog**: https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/compare/v2.0.6...v2.0.7" +} diff --git a/src/extension/test-content.js b/src/extension/test-content.js new file mode 100644 index 00000000..7ca4ddc9 --- /dev/null +++ b/src/extension/test-content.js @@ -0,0 +1,4 @@ +// test-content.js - Simple test script +console.log('TEST: Extension content script is loading!'); +window.testExtension = true; +alert('Extension loaded!');