From 337ec8c4987478c864ec4b47652f131d644a295c Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 21 Dec 2025 21:42:05 -0800 Subject: [PATCH 1/4] fix sync pipeline --- .github/.DS_Store | Bin 0 -> 6148 bytes .github/workflows/sync-extension.yml | 274 +++++++-------------------- 2 files changed, 68 insertions(+), 206 deletions(-) create mode 100644 .github/.DS_Store diff --git a/.github/.DS_Store b/.github/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..b681ead2b85400ce6e4dc8dcd17ba4713e05df48 GIT binary patch literal 6148 zcmeHK%}N6?5T3Nv?oxyv6!aGGTClBBC|;IYU%(YTsMId4x^&%0w{;Jtuy=hSU&QBe zCdrDW6+DU98JK*@{A|dVC6fSv=u8Iu#Z zQZyU>A_KH{F7B9uF^u5L{v}UAqyqH51y50!OzQP_Q7D!+Hp@=gsW>+I|w9G~?@(POM$44(pjJuO=nb9lwhilsgK<2X|B zJ$m!ZJcE%LU> $GITHUB_OUTPUT + exit 0 + fi fi - if [ -z "$TAG" ] || [ "$TAG" == "null" ]; then - echo "No release found, skipping" - echo "skip=true" >> $GITHUB_OUTPUT - exit 0 + if [ -z "$TAG" ]; then + echo "Could not determine release tag." + exit 1 fi + echo "Syncing tag: $TAG" echo "tag=$TAG" >> $GITHUB_OUTPUT - echo "Release tag: $TAG" - name: Download extension files if: steps.release.outputs.skip != 'true' @@ -59,229 +66,86 @@ jobs: TAG="${{ steps.release.outputs.tag }}" REPO="${{ secrets.SENTIENCE_CHROME_REPO }}" - # Download release assets + # Setup temp directory mkdir -p extension-temp cd extension-temp - # Download individual files from release (reliable method - no zip) - echo "๐Ÿ“ Downloading individual files from release..." - echo "Available release assets:" - curl -s -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \ - "https://api.github.com/repos/$REPO/releases/tags/$TAG" | \ - jq -r '.assets[] | select(.name | endswith(".js") or endswith(".wasm") or endswith(".json") or endswith(".d.ts")) | .name' || true + echo "โฌ‡๏ธ Fetching release assets for $TAG from $REPO..." - curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \ + # 1. Get the URL for 'extension-files.tar.gz' specifically + # We query the release assets API and filter by name + ASSET_URL=$(curl -s -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \ "https://api.github.com/repos/$REPO/releases/tags/$TAG" | \ - jq -r '.assets[] | select(.name | endswith(".js") or endswith(".wasm") or endswith(".json") or endswith(".d.ts")) | "\(.browser_download_url)|\(.name)"' | \ - while IFS='|' read -r url name; do - if [ -n "$url" ] && [ "$url" != "null" ] && [ -n "$name" ]; then - # Handle asset names that might have paths like "pkg/sentience_core.js" or "extension-package/manifest.json" - # GitHub releases might preserve directory structure in asset names - # Strip "extension-package/" prefix if present, as we'll handle it in copy step - if [[ "$name" == extension-package/* ]]; then - # Asset name is "extension-package/manifest.json" - strip prefix - filename="${name#extension-package/}" - dir=$(dirname "$filename") - if [ "$dir" != "." ]; then - mkdir -p "$dir" - fi - echo " Downloading $name -> $filename" - curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" "$url" -o "$filename" - elif [[ "$name" == pkg/* ]]; then - # Asset name is "pkg/sentience_core.js" - create pkg directory - mkdir -p pkg - filename=$(basename "$name") - echo " Downloading $name -> pkg/$filename" - curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" "$url" -o "pkg/$filename" - else - # Asset name is just "manifest.json" - put at root - dir=$(dirname "$name") - if [ "$dir" != "." ]; then - mkdir -p "$dir" - fi - echo " Downloading $name" - curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" "$url" -o "$name" - fi - fi - done - - # Verify downloaded files - echo "๐Ÿ“‹ Downloaded files structure:" - find . -type f -name "*.js" -o -name "*.wasm" -o -name "*.json" | sort - echo "" - echo "Directory structure:" - ls -laR . | head -50 - echo "" - echo "๐Ÿ” Verifying critical files:" - if [ -f "manifest.json" ]; then - echo "โœ… manifest.json found ($(wc -c < manifest.json) bytes)" - head -5 manifest.json - else - echo "โŒ manifest.json NOT FOUND" - fi - if [ -d "pkg" ]; then - echo "โœ… pkg directory found with $(ls -1 pkg | wc -l) files" - else - echo "โŒ pkg directory NOT FOUND" - fi - - - name: Copy extension files - if: steps.release.outputs.skip != 'true' - run: | - # Create extension directory structure - mkdir -p src/extension/pkg - - # Copy extension files (handle both root and extension-package/ subdirectory) - # Check root first, then extension-package/ subdirectory - if [ -f "extension-temp/manifest.json" ]; then - size=$(wc -c < extension-temp/manifest.json) - if [ "$size" -gt 0 ]; then - echo "โœ… Copying manifest.json ($size bytes)" - cp extension-temp/manifest.json src/extension/ - # Verify copy - if [ -f "src/extension/manifest.json" ] && [ "$(wc -c < src/extension/manifest.json)" -gt 0 ]; then - echo "โœ… manifest.json copied successfully" - else - echo "โŒ manifest.json copy failed or file is empty" - exit 1 - fi - else - echo "โŒ manifest.json is empty ($size bytes)" - exit 1 - fi - elif [ -f "extension-temp/extension-package/manifest.json" ]; then - size=$(wc -c < extension-temp/extension-package/manifest.json) - if [ "$size" -gt 0 ]; then - echo "โœ… Copying manifest.json from extension-package/ ($size bytes)" - cp extension-temp/extension-package/manifest.json src/extension/ - # Verify copy - if [ -f "src/extension/manifest.json" ] && [ "$(wc -c < src/extension/manifest.json)" -gt 0 ]; then - echo "โœ… manifest.json copied successfully" - else - echo "โŒ manifest.json copy failed or file is empty" - exit 1 - fi - else - echo "โŒ manifest.json is empty ($size bytes)" - exit 1 - fi - else - echo "โŒ manifest.json not found in extension-temp/" - echo "Available files:" - find extension-temp -type f | head -20 + jq -r '.assets[] | select(.name == "extension-files.tar.gz") | .browser_download_url') + + if [ -z "$ASSET_URL" ] || [ "$ASSET_URL" == "null" ]; then + echo "โŒ Critical Error: extension-files.tar.gz not found in release assets!" + echo "Debug: Listing available assets..." + curl -s -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \ + "https://api.github.com/repos/$REPO/releases/tags/$TAG" | jq -r '.assets[].name' exit 1 fi + + # 2. Download the tarball + echo "๐Ÿ“ฆ Downloading tarball from $ASSET_URL..." + curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \ + -H "Accept: application/octet-stream" \ + "$ASSET_URL" -o extension.tar.gz + + # 3. Extract it + echo "๐Ÿ“‚ Extracting..." + tar -xzf extension.tar.gz + rm extension.tar.gz - if [ -f "extension-temp/content.js" ]; then - cp extension-temp/content.js src/extension/ - elif [ -f "extension-temp/extension-package/content.js" ]; then - cp extension-temp/extension-package/content.js src/extension/ - else - echo "โš ๏ธ content.js not found" - fi - - if [ -f "extension-temp/background.js" ]; then - cp extension-temp/background.js src/extension/ - elif [ -f "extension-temp/extension-package/background.js" ]; then - cp extension-temp/extension-package/background.js src/extension/ - else - echo "โš ๏ธ background.js not found" - fi + # 4. Verify extraction + echo "โœ… Extraction complete. Contents:" + ls -la - if [ -f "extension-temp/injected_api.js" ]; then - cp extension-temp/injected_api.js src/extension/ - elif [ -f "extension-temp/extension-package/injected_api.js" ]; then - cp extension-temp/extension-package/injected_api.js src/extension/ - else - echo "โš ๏ธ injected_api.js not found" + if [ ! -f "manifest.json" ]; then + echo "โŒ Error: manifest.json missing after extraction" + exit 1 fi - # Copy WASM files - try multiple locations and patterns - echo "๐Ÿ” Searching for pkg directory and WASM files..." + - name: Update extension files + if: steps.release.outputs.skip != 'true' + run: | + # Target directory in sdk-ts + TARGET_DIR="sentience-chrome" - # Check all possible locations - if [ -d "extension-temp/pkg" ]; then - echo "โœ… Found pkg directory at extension-temp/pkg" - cp -r extension-temp/pkg/* src/extension/pkg/ 2>/dev/null || true - elif [ -d "extension-temp/extension-package/pkg" ]; then - echo "โœ… Found pkg directory at extension-temp/extension-package/pkg" - cp -r extension-temp/extension-package/pkg/* src/extension/pkg/ 2>/dev/null || true - else - echo "โš ๏ธ pkg directory not found, searching for individual files..." - - # Search for files in various locations - find extension-temp -name "sentience_core.js" -type f | while read file; do - echo " Found: $file" - cp "$file" src/extension/pkg/ 2>/dev/null || true - done - - find extension-temp -name "sentience_core_bg.wasm" -type f | while read file; do - echo " Found: $file" - cp "$file" src/extension/pkg/ 2>/dev/null || true - done - - find extension-temp -name "*.d.ts" -type f | while read file; do - echo " Found: $file" - cp "$file" src/extension/pkg/ 2>/dev/null || true - done - fi + # Ensure target directory exists and is clean + rm -rf "$TARGET_DIR" + mkdir -p "$TARGET_DIR" - # Verify copied files - echo "๐Ÿ“‹ Copied files:" - echo "Extension root:" - ls -la src/extension/ || echo "โš ๏ธ Extension directory empty" - echo "" - echo "WASM files (pkg directory):" - if [ -d "src/extension/pkg" ]; then - ls -la src/extension/pkg/ || echo "โš ๏ธ pkg directory empty" - else - echo "โŒ ERROR: pkg directory not created!" - exit 1 - fi + # Copy files from temp directory + cp -r extension-temp/* "$TARGET_DIR/" - # Verify required files exist - if [ ! -f "src/extension/pkg/sentience_core.js" ]; then - echo "โŒ ERROR: sentience_core.js not found!" - exit 1 - fi - if [ ! -f "src/extension/pkg/sentience_core_bg.wasm" ]; then - echo "โŒ ERROR: sentience_core_bg.wasm not found!" + # Verify copy + if [ ! -f "$TARGET_DIR/manifest.json" ]; then + echo "โŒ Failed to copy manifest.json to $TARGET_DIR" exit 1 fi - echo "โœ… All required WASM files verified" - # Clean up temporary directory - cd .. + # Cleanup rm -rf extension-temp - echo "๐Ÿงน Cleaned up extension-temp directory" + + echo "โœ… Extension files updated in $TARGET_DIR" + ls -la "$TARGET_DIR" - name: Check for changes if: steps.release.outputs.skip != 'true' id: changes run: | - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - - # Show what files exist before adding - echo "๐Ÿ“‹ Files in src/extension before git add:" - find src/extension -type f | sort || echo "No files found" - - # Add all files including binary files - # Use -f to force add in case files are in .gitignore - git add -f src/extension/ || true + git add sentience-chrome/ - # Show what was staged - echo "๐Ÿ“‹ Staged files:" - git diff --staged --name-only || echo "No staged files" - - # Check if there are actual changes + # Check if anything actually changed if git diff --staged --quiet; then + echo "No changes detected." echo "changed=false" >> $GITHUB_OUTPUT - echo "No changes detected" else + echo "Changes detected." echo "changed=true" >> $GITHUB_OUTPUT - echo "Changes detected" - # Show file sizes to verify binary files are included + + # Show staged files to verify binary files are included echo "๐Ÿ“Š Staged file sizes:" git diff --staged --name-only | while read file; do if [ -f "$file" ]; then @@ -295,9 +159,8 @@ jobs: if: steps.release.outputs.skip != 'true' && steps.changes.outputs.changed == 'true' uses: peter-evans/create-pull-request@v5 with: - # Use PR_TOKEN if available (for repos with org restrictions), otherwise use GITHUB_TOKEN - # To use PAT: create secret named PR_TOKEN with a Personal Access Token that has 'repo' scope - token: ${{ secrets.PR_TOKEN }} + # Use PR_TOKEN if available, otherwise GITHUB_TOKEN + token: ${{ secrets.PR_TOKEN || secrets.GITHUB_TOKEN }} commit-message: "chore: sync extension files from sentience-chrome ${{ steps.release.outputs.tag }}" title: "Sync Extension: ${{ steps.release.outputs.tag }}" body: | @@ -312,5 +175,4 @@ jobs: delete-branch: true labels: | automated - extension-sync - + extension-sync \ No newline at end of file From a79fa6391e2539ae956a1ac1f7ac23c6c25f24a6 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 21 Dec 2025 22:01:31 -0800 Subject: [PATCH 2/4] fix: use --headless=new for extension support and improve WASM wait logic --- src/browser.ts | 89 +++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 51 deletions(-) diff --git a/src/browser.ts b/src/browser.ts index 36b59e90..af25fe2b 100644 --- a/src/browser.ts +++ b/src/browser.ts @@ -15,7 +15,6 @@ export class SentienceBrowser { private userDataDir: string | null = null; private _apiKey?: string; private _apiUrl?: string; - private headless: boolean; constructor( apiKey?: string, @@ -23,12 +22,11 @@ export class SentienceBrowser { headless?: boolean ) { this._apiKey = apiKey; - // Default to headless=True in CI (no X server), headless=False locally - if (headless === undefined) { - const ci = process.env.CI?.toLowerCase(); - this.headless = ci === 'true' || ci === '1' || ci === 'yes'; - } else { - this.headless = headless; + // Note: headless parameter is accepted but ignored for extensions + // Extensions REQUIRE --headless=new mode which is set in browser args + // We keep the parameter for API compatibility + if (headless !== undefined) { + console.log('[Sentience] Note: headless parameter ignored for extensions (using --headless=new)'); } // Only set apiUrl if apiKey is provided, otherwise undefined (free tier) // Default to https://api.sentienceapi.com if apiKey is provided but apiUrl is not @@ -141,56 +139,39 @@ export class SentienceBrowser { // Extensions load more reliably with persistent contexts const launchTimeout = 30000; // 30 seconds const userDataDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sentience-profile-')); - + this.userDataDir = userDataDir; + // Stealth arguments for bot evasion + // IMPORTANT: Always use --headless=new for extensions (required even when headless=false in config) const stealthArgs = [ `--load-extension=${tempDir}`, `--disable-extensions-except=${tempDir}`, + '--headless=new', // Required for extensions to work '--disable-blink-features=AutomationControlled', // Hide automation indicators '--no-sandbox', // Required for some environments '--disable-infobars', // Hide "Chrome is being controlled" message ]; - + // Realistic viewport and user-agent for better evasion const viewportConfig = { width: 1920, height: 1080 }; const userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'; - + // Launch browser with extension - // Note: channel="chrome" (system Chrome) has known issues with extension loading - // We use bundled Chromium for reliable extension loading, but still apply stealth features - const useChromeChannel = false; // Disabled for now due to extension loading issues - + // Note: We use bundled Chromium for reliable extension loading + // headless: false in config, but --headless=new in args ensures extension compatibility try { - if (useChromeChannel) { - // Try with system Chrome first (better evasion, but may have extension issues) - this.context = await Promise.race([ - chromium.launchPersistentContext(userDataDir, { - channel: 'chrome', // Use system Chrome (better evasion) - headless: this.headless, - args: stealthArgs, - viewport: viewportConfig, - userAgent: userAgent, - timeout: launchTimeout, - }), - new Promise((_, reject) => - setTimeout(() => reject(new Error(`Browser launch timed out after ${launchTimeout}ms. Make sure Playwright browsers are installed: npx playwright install chromium`)), launchTimeout) - ), - ]); - } else { - // Use bundled Chromium (more reliable for extensions) - this.context = await Promise.race([ - chromium.launchPersistentContext(userDataDir, { - headless: this.headless, - args: stealthArgs, - viewport: viewportConfig, - userAgent: userAgent, - timeout: launchTimeout, - }), - new Promise((_, reject) => - setTimeout(() => reject(new Error(`Browser launch timed out after ${launchTimeout}ms. Make sure Playwright browsers are installed: npx playwright install chromium`)), launchTimeout) - ), - ]); - } + this.context = await Promise.race([ + chromium.launchPersistentContext(userDataDir, { + headless: false, // Must be false for extensions, but we pass --headless=new in args + args: stealthArgs, + viewport: viewportConfig, + userAgent: userAgent, + timeout: launchTimeout, + }), + new Promise((_, reject) => + setTimeout(() => reject(new Error(`Browser launch timed out after ${launchTimeout}ms. Make sure Playwright browsers are installed: npx playwright install chromium`)), launchTimeout) + ), + ]); } catch (launchError: any) { // Clean up user data dir on failure try { @@ -319,7 +300,7 @@ export class SentienceBrowser { const start = Date.now(); let lastError: string | null = null; - + while (Date.now() - start < timeout) { try { const result = await this.page.evaluate(() => { @@ -335,20 +316,26 @@ export class SentienceBrowser { if ((window as any).sentience_registry === undefined) { return { ready: false, reason: 'registry not initialized' }; } - // Check if WASM module itself is loaded (check internal _wasmModule if available) + // IMPORTANT: Check if WASM module is actually loaded (not null) const sentience = (window as any).sentience; + if (sentience._wasmModule === null) { + return { ready: false, reason: 'WASM module is null (still loading)' }; + } + if (sentience._wasmModule === undefined) { + return { ready: false, reason: 'WASM module not initialized' }; + } + // Verify WASM module has required function if (sentience._wasmModule && !sentience._wasmModule.analyze_page) { - return { ready: false, reason: 'WASM module not fully loaded' }; + return { ready: false, reason: 'WASM module missing analyze_page function' }; } - // If _wasmModule is not exposed, that's okay - it might be internal - // Just verify the API structure is correct + // Everything is ready return { ready: true }; }); if (result && (result as any).ready) { return true; } - + // Track the last error for debugging if (result && (result as any).reason) { lastError = (result as any).reason; @@ -365,7 +352,7 @@ export class SentienceBrowser { if (lastError) { console.warn(`Extension wait timeout. Last status: ${lastError}`); } - + return false; } From 3b4d3eaecb872899a7487050b8941a599bf6e48d Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 21 Dec 2025 22:40:16 -0800 Subject: [PATCH 3/4] fix browser.ts --- src/browser.ts | 409 ++++++++++++--------------------------------- tests/read.test.ts | 10 +- 2 files changed, 117 insertions(+), 302 deletions(-) diff --git a/src/browser.ts b/src/browser.ts index af25fe2b..55a54bd9 100644 --- a/src/browser.ts +++ b/src/browser.ts @@ -15,6 +15,7 @@ export class SentienceBrowser { private userDataDir: string | null = null; private _apiKey?: string; private _apiUrl?: string; + private headless: boolean; constructor( apiKey?: string, @@ -22,14 +23,17 @@ export class SentienceBrowser { headless?: boolean ) { this._apiKey = apiKey; - // Note: headless parameter is accepted but ignored for extensions - // Extensions REQUIRE --headless=new mode which is set in browser args - // We keep the parameter for API compatibility - if (headless !== undefined) { - console.log('[Sentience] Note: headless parameter ignored for extensions (using --headless=new)'); + + // Determine headless mode + if (headless === undefined) { + // Default to true in CI, false locally + const ci = process.env.CI?.toLowerCase(); + this.headless = ci === 'true' || ci === '1' || ci === 'yes'; + } else { + this.headless = headless; } - // Only set apiUrl if apiKey is provided, otherwise undefined (free tier) - // Default to https://api.sentienceapi.com if apiKey is provided but apiUrl is not + + // Configure API URL if (apiKey) { this._apiUrl = apiUrl || 'https://api.sentienceapi.com'; } else { @@ -38,321 +42,121 @@ export class SentienceBrowser { } async start(): Promise { - // Try to find extension in multiple locations: - // 1. Embedded extension (src/extension/) - for production/CI - // 2. Development mode (../sentience-chrome/) - for local development + // 1. Resolve Extension Path + // Handle: src/extension (local dev), dist/extension (prod), or ../sentience-chrome (monorepo) + let extensionSource = ''; - // Handle both ts-node (src/) and compiled (dist/src/) cases - let sdkRoot: string; - let repoRoot: string; - if (__dirname.includes('dist')) { - // Compiled: dist/src/ -> go up 2 levels to sdk-ts/ - sdkRoot = path.resolve(__dirname, '../..'); - // Go up 1 more level to project root (Sentience/) - repoRoot = path.resolve(sdkRoot, '..'); - } else { - // ts-node: src/ -> go up 1 level to sdk-ts/ - sdkRoot = path.resolve(__dirname, '..'); - // Go up 1 more level to project root (Sentience/) - repoRoot = path.resolve(sdkRoot, '..'); - } - - // Check for embedded extension first (production/CI) - const embeddedExtension = path.join(sdkRoot, 'src', 'extension'); - - // Check for development extension (local development) - const devExtension = path.join(repoRoot, 'sentience-chrome'); - - // Prefer embedded extension, fall back to dev extension - let extensionSource: string; - if (fs.existsSync(embeddedExtension) && fs.existsSync(path.join(embeddedExtension, 'manifest.json'))) { - extensionSource = embeddedExtension; - } else if (fs.existsSync(devExtension) && fs.existsSync(path.join(devExtension, 'manifest.json'))) { - extensionSource = devExtension; - } else { - throw new Error( - `Extension not found. Checked:\n` + - ` 1. ${embeddedExtension}\n` + - ` 2. ${devExtension}\n` + - 'Make sure extension files are available. ' + - 'For development: cd ../sentience-chrome && ./build.sh' - ); - } - - // Create temporary extension bundle - const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sentience-ext-')); - this.extensionPath = tempDir; // tempDir is already a string - - // Copy extension files - const filesToCopy = [ - 'manifest.json', - 'content.js', - 'background.js', - 'injected_api.js', + const candidates = [ + // Production / Installed Package + path.resolve(__dirname, '../extension'), + path.resolve(__dirname, 'extension'), + // Local Monorepo Dev + path.resolve(__dirname, '../../sentience-chrome'), + path.resolve(__dirname, '../../../sentience-chrome'), + // CI Artifact + path.resolve(process.cwd(), 'extension') ]; - const missingFiles: string[] = []; - for (const file of filesToCopy) { - const src = path.join(extensionSource, file); - if (fs.existsSync(src)) { - fs.copyFileSync(src, path.join(tempDir, file)); - } else { - missingFiles.push(file); + for (const loc of candidates) { + if (fs.existsSync(path.join(loc, 'manifest.json'))) { + extensionSource = loc; + break; } } - if (missingFiles.length > 0) { - throw new Error( - `Missing required extension files: ${missingFiles.join(', ')}\n` + - `Extension source: ${extensionSource}` - ); + if (!extensionSource) { + throw new Error( + `Sentience extension not found. Checked:\n${candidates.map(c => `- ${c}`).join('\n')}\n` + + 'Ensure the extension is built/downloaded.' + ); } - // Copy pkg directory (WASM) - const pkgSource = path.join(extensionSource, 'pkg'); - if (!fs.existsSync(pkgSource)) { - throw new Error( - `WASM package directory not found at ${pkgSource}\n` + - 'Make sure extension files are available. ' + - 'For development: cd ../sentience-chrome && ./build.sh' - ); - } + // 2. Setup Temp Profile (Avoids locking issues) + this.userDataDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sentience-ts-')); + this.extensionPath = path.join(this.userDataDir, 'extension'); - // Verify WASM files exist - const wasmJs = path.join(pkgSource, 'sentience_core.js'); - const wasmBinary = path.join(pkgSource, 'sentience_core_bg.wasm'); - if (!fs.existsSync(wasmJs) || !fs.existsSync(wasmBinary)) { - throw new Error( - `WASM files not found. Expected:\n` + - ` - ${wasmJs}\n` + - ` - ${wasmBinary}\n` + - 'Make sure extension files are available. ' + - 'For development: cd ../sentience-chrome && ./build.sh' - ); - } - - const pkgDest = path.join(tempDir, 'pkg'); - fs.mkdirSync(pkgDest, { recursive: true }); - this.copyDirectory(pkgSource, pkgDest); - - // Use launchPersistentContext for better extension support - // Extensions load more reliably with persistent contexts - const launchTimeout = 30000; // 30 seconds - const userDataDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sentience-profile-')); - this.userDataDir = userDataDir; - - // Stealth arguments for bot evasion - // IMPORTANT: Always use --headless=new for extensions (required even when headless=false in config) - const stealthArgs = [ - `--load-extension=${tempDir}`, - `--disable-extensions-except=${tempDir}`, - '--headless=new', // Required for extensions to work - '--disable-blink-features=AutomationControlled', // Hide automation indicators - '--no-sandbox', // Required for some environments - '--disable-infobars', // Hide "Chrome is being controlled" message + // Copy extension to temp dir + this._copyRecursive(extensionSource, this.extensionPath); + + // 3. Build Args + const args = [ + `--disable-extensions-except=${this.extensionPath}`, + `--load-extension=${this.extensionPath}`, + '--disable-blink-features=AutomationControlled', + '--no-sandbox', + '--disable-infobars', ]; - // Realistic viewport and user-agent for better evasion - const viewportConfig = { width: 1920, height: 1080 }; - const userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'; - - // Launch browser with extension - // Note: We use bundled Chromium for reliable extension loading - // headless: false in config, but --headless=new in args ensures extension compatibility - try { - this.context = await Promise.race([ - chromium.launchPersistentContext(userDataDir, { - headless: false, // Must be false for extensions, but we pass --headless=new in args - args: stealthArgs, - viewport: viewportConfig, - userAgent: userAgent, - timeout: launchTimeout, - }), - new Promise((_, reject) => - setTimeout(() => reject(new Error(`Browser launch timed out after ${launchTimeout}ms. Make sure Playwright browsers are installed: npx playwright install chromium`)), launchTimeout) - ), - ]); - } catch (launchError: any) { - // Clean up user data dir on failure - try { - fs.rmSync(userDataDir, { recursive: true, force: true }); - } catch (cleanupError) { - // Ignore cleanup errors - } - throw new Error( - `Failed to launch browser: ${launchError.message}\n` + - 'Make sure Playwright browsers are installed: npx playwright install chromium' - ); + // CRITICAL: Headless Extensions Support + // headless: true -> NO extensions. + // headless: false + args: '--headless=new' -> YES extensions. + if (this.headless) { + args.push('--headless=new'); } - // Get first page or create new one - const pages = this.context.pages(); - if (pages.length > 0) { - this.page = pages[0]; - } else { - this.page = await this.context.newPage(); - } - - // Store user data dir for cleanup - this.userDataDir = userDataDir; + // 4. Launch Browser + this.context = await chromium.launchPersistentContext(this.userDataDir, { + headless: false, // Must be false here, handled via args above + args: args, + viewport: { width: 1920, height: 1080 }, + // Clean User-Agent to avoid "HeadlessChrome" detection + userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36' + }); + + this.page = this.context.pages()[0] || await this.context.newPage(); - // Apply basic stealth patches for bot evasion - // Note: TypeScript doesn't have playwright-stealth equivalent, so we apply basic patches + // 5. Apply Stealth (Basic) await this.page.addInitScript(() => { - // Override navigator.webdriver - Object.defineProperty(navigator, 'webdriver', { - get: () => false, - }); - - // Override chrome runtime - (window as any).chrome = { - runtime: {}, - }; - - // Override permissions - const originalQuery = (window.navigator as any).permissions?.query; - if (originalQuery) { - (window.navigator as any).permissions.query = (parameters: any) => - parameters.name === 'notifications' - ? Promise.resolve({ state: Notification.permission } as PermissionStatus) - : originalQuery(parameters); - } - - // Override plugins - Object.defineProperty(navigator, 'plugins', { - get: () => [1, 2, 3, 4, 5], - }); + Object.defineProperty(navigator, 'webdriver', { get: () => false }); }); - // Navigate to a real page so extension can inject - // Extension content scripts only run on actual pages (not about:blank) - // Use a simple page that loads quickly - await this.page.goto('https://example.com', { - waitUntil: 'domcontentloaded', - timeout: 15000, // 15 second timeout for navigation - }); - - // Give extension time to initialize (WASM loading is async) - // Content scripts run at document_idle, so we need to wait for that - await this.page.waitForTimeout(3000); - - // Wait for extension to load - if (!(await this.waitForExtension(25000))) { - // Extension might need more time, try waiting a bit longer - await this.page.waitForTimeout(3000); - - // Try to get more diagnostic info - let diagnosticInfo = ''; - try { - diagnosticInfo = await this.page.evaluate(() => { - const info: any = { - sentience_defined: typeof (window as any).sentience !== 'undefined', - registry_defined: typeof (window as any).sentience_registry !== 'undefined', - snapshot_defined: typeof (window as any).sentience?.snapshot === 'function', - wasm_loaded: !!(window as any).sentience?._wasmModule, - extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set', - url: window.location.href, - }; - // Check console errors if possible - if ((window as any).sentience) { - info.sentience_keys = Object.keys((window as any).sentience); - } - return JSON.stringify(info, null, 2); - }); - } catch (e) { - diagnosticInfo = `Could not get diagnostic info: ${e}`; - } - - if (!(await this.waitForExtension(15000))) { - throw new Error( - 'Extension failed to load after navigation. Make sure:\n' + - '1. Extension is built (cd sentience-chrome && ./build.sh)\n' + - '2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n' + - '3. Check browser console for errors (run with headless=false to see console)\n' + - `4. Extension path: ${tempDir}\n` + - `5. Diagnostic info: ${diagnosticInfo}` - ); - } - } - } - - private copyDirectory(src: string, dest: string): void { - if (!fs.existsSync(dest)) { - fs.mkdirSync(dest, { recursive: true }); + // Inject API Key if present + if (this._apiKey) { + await this.page.addInitScript((key) => { + (window as any).__SENTIENCE_API_KEY__ = key; + }, this._apiKey); } - const entries = fs.readdirSync(src, { withFileTypes: true }); - - for (const entry of entries) { - const srcPath = path.join(src, entry.name); - const destPath = path.join(dest, entry.name); + // Wait for extension background pages to spin up + await new Promise(r => setTimeout(r, 500)); + } - if (entry.isDirectory()) { - this.copyDirectory(srcPath, destPath); - } else { - fs.copyFileSync(srcPath, destPath); - } + async goto(url: string): Promise { + const page = this.getPage(); + await page.goto(url, { waitUntil: 'domcontentloaded' }); + + if (!(await this.waitForExtension(15000))) { + // Gather Debug Info + const diag = await page.evaluate(() => ({ + sentience_global: typeof (window as any).sentience !== 'undefined', + wasm_ready: (window as any).sentience && (window as any).sentience._wasmModule !== null, + ext_id: document.documentElement.dataset.sentienceExtensionId || 'not set', + url: window.location.href + })).catch(e => ({ error: String(e) })); + + throw new Error( + 'Extension failed to load after navigation.\n' + + `Path: ${this.extensionPath}\n` + + `Diagnostics: ${JSON.stringify(diag, null, 2)}` + ); } } - private async waitForExtension(timeout: number = 20000): Promise { - if (!this.page) return false; - + private async waitForExtension(timeoutMs: number): Promise { const start = Date.now(); - let lastError: string | null = null; - - while (Date.now() - start < timeout) { + while (Date.now() - start < timeoutMs) { try { - const result = await this.page.evaluate(() => { - // Check if sentience API exists - if (typeof (window as any).sentience === 'undefined') { - return { ready: false, reason: 'window.sentience not defined' }; - } - // Check if snapshot function exists - if (typeof (window as any).sentience.snapshot !== 'function') { - return { ready: false, reason: 'snapshot function not available' }; - } - // Check if WASM module is loaded - if ((window as any).sentience_registry === undefined) { - return { ready: false, reason: 'registry not initialized' }; - } - // IMPORTANT: Check if WASM module is actually loaded (not null) - const sentience = (window as any).sentience; - if (sentience._wasmModule === null) { - return { ready: false, reason: 'WASM module is null (still loading)' }; - } - if (sentience._wasmModule === undefined) { - return { ready: false, reason: 'WASM module not initialized' }; - } - // Verify WASM module has required function - if (sentience._wasmModule && !sentience._wasmModule.analyze_page) { - return { ready: false, reason: 'WASM module missing analyze_page function' }; - } - // Everything is ready - return { ready: true }; + const ready = await this.page!.evaluate(() => { + // Check for API AND Wasm Module (set by injected_api.js) + const s = (window as any).sentience; + return s && s._wasmModule !== null; // Strict check for null (it's initialized as null) }); - - if (result && (result as any).ready) { - return true; - } - - // Track the last error for debugging - if (result && (result as any).reason) { - lastError = (result as any).reason; - } - } catch (e: any) { - lastError = `Evaluation error: ${e.message}`; - // Continue waiting on errors + if (ready) return true; + } catch (e) { + // Context invalid errors expected during navigation } - - await new Promise((resolve) => setTimeout(resolve, 300)); - } - - // Log the last error for debugging - if (lastError) { - console.warn(`Extension wait timeout. Last status: ${lastError}`); + await new Promise(r => setTimeout(r, 100)); } - return false; } @@ -363,6 +167,18 @@ export class SentienceBrowser { return this.page; } + // Helper for recursive copy (fs.cp is Node 16.7+) + private _copyRecursive(src: string, dest: string) { + if (fs.statSync(src).isDirectory()) { + if (!fs.existsSync(dest)) fs.mkdirSync(dest); + fs.readdirSync(src).forEach(child => { + this._copyRecursive(path.join(src, child), path.join(dest, child)); + }); + } else { + fs.copyFileSync(src, dest); + } + } + // Expose API configuration (read-only) getApiKey(): string | undefined { return this._apiKey; @@ -417,8 +233,5 @@ export class SentienceBrowser { } this.userDataDir = null; } - - this.page = null; } -} - +} \ No newline at end of file diff --git a/tests/read.test.ts b/tests/read.test.ts index 758f5eb5..cc55e7db 100644 --- a/tests/read.test.ts +++ b/tests/read.test.ts @@ -18,7 +18,8 @@ describe('read', () => { expect(result.format).toBe('text'); expect(result.content).toBeDefined(); expect(result.length).toBeGreaterThan(0); - expect(result.url).toBe('https://example.com'); + // Browser may normalize URL with trailing slash + expect(result.url).toMatch(/^https:\/\/example\.com\/?$/); } finally { await browser.close(); } @@ -36,7 +37,8 @@ describe('read', () => { expect(result.format).toBe('markdown'); expect(result.content).toBeDefined(); expect(result.length).toBeGreaterThan(0); - expect(result.url).toBe('https://example.com'); + // Browser may normalize URL with trailing slash + expect(result.url).toMatch(/^https:\/\/example\.com\/?$/); } finally { await browser.close(); } @@ -51,7 +53,7 @@ describe('read', () => { // Test with enhancement (default) const resultEnhanced = await read(browser, { format: 'markdown', - enhance_markdown: true, + enhanceMarkdown: true, }); expect(resultEnhanced.status).toBe('success'); @@ -61,7 +63,7 @@ describe('read', () => { // Test without enhancement const resultBasic = await read(browser, { format: 'markdown', - enhance_markdown: false, + enhanceMarkdown: false, }); expect(resultBasic.status).toBe('success'); From 6987317e3d486dca348deb3915864f9e50541cc4 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 21 Dec 2025 22:46:53 -0800 Subject: [PATCH 4/4] delete ds --- .github/.DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .github/.DS_Store diff --git a/.github/.DS_Store b/.github/.DS_Store deleted file mode 100644 index b681ead2b85400ce6e4dc8dcd17ba4713e05df48..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}N6?5T3Nv?oxyv6!aGGTClBBC|;IYU%(YTsMId4x^&%0w{;Jtuy=hSU&QBe zCdrDW6+DU98JK*@{A|dVC6fSv=u8Iu#Z zQZyU>A_KH{F7B9uF^u5L{v}UAqyqH51y50!OzQP_Q7D!+Hp@=gsW>+I|w9G~?@(POM$44(pjJuO=nb9lwhilsgK<2X|B zJ$m!ZJcE%LU