Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 72 additions & 18 deletions .github/workflows/sync-extension.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
fetch-depth: 0 # Fetch all history for proper branching

- name: Set up Node.js
uses: actions/setup-node@v4
Expand Down Expand Up @@ -62,31 +63,79 @@ jobs:
mkdir -p extension-temp
cd extension-temp

# Download each file from release
curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \
# First, try to download the zip archive if available
ZIP_URL=$(curl -s -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \
"https://api.github.com/repos/$REPO/releases/tags/$TAG" | \
jq -r '.assets[] | select(.name | endswith(".js") or endswith(".wasm") or endswith(".json") or endswith(".d.ts")) | .browser_download_url' | \
while read url; do
filename=$(basename "$url")
curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" "$url" -o "$filename"
done
jq -r '.assets[] | select(.name == "extension-package.zip") | .browser_download_url')

if [ -n "$ZIP_URL" ] && [ "$ZIP_URL" != "null" ]; then
echo "📦 Downloading extension-package.zip..."
curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" "$ZIP_URL" -o extension-package.zip
unzip -q extension-package.zip -d .
# Files should now be in extension-temp/extension-package/ or extension-temp/
if [ -d "extension-package" ]; then
mv extension-package/* . 2>/dev/null || true
rmdir extension-package 2>/dev/null || true
fi
else
echo "📁 Downloading individual files from release..."
# Download each file from release
curl -s -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" \
"https://api.github.com/repos/$REPO/releases/tags/$TAG" | \
jq -r '.assets[] | select(.name | endswith(".js") or endswith(".wasm") or endswith(".json") or endswith(".d.ts")) | .browser_download_url' | \
while read url; do
if [ -n "$url" ] && [ "$url" != "null" ]; then
filename=$(basename "$url")
echo " Downloading $filename..."
curl -L -H "Authorization: token ${{ secrets.SENTIENCE_CHROME_TOKEN }}" "$url" -o "$filename"
fi
done
fi

# Verify files were downloaded
echo "📋 Downloaded files:"
ls -la

- name: Copy extension files
if: steps.release.outputs.skip != 'true'
run: |
# Create extension directory structure
mkdir -p src/extension/pkg

# Copy extension files
cp extension-temp/manifest.json src/extension/ 2>/dev/null || echo "manifest.json not found in release"
cp extension-temp/content.js src/extension/ 2>/dev/null || echo "content.js not found in release"
cp extension-temp/background.js src/extension/ 2>/dev/null || echo "background.js not found in release"
cp extension-temp/injected_api.js src/extension/ 2>/dev/null || echo "injected_api.js not found in release"
# Copy extension files (check both root and pkg subdirectory)
cp extension-temp/manifest.json src/extension/ 2>/dev/null || echo "⚠️ manifest.json not found in release"
cp extension-temp/content.js src/extension/ 2>/dev/null || echo "⚠️ content.js not found in release"
cp extension-temp/background.js src/extension/ 2>/dev/null || echo "⚠️ background.js not found in release"
cp extension-temp/injected_api.js src/extension/ 2>/dev/null || echo "⚠️ injected_api.js not found in release"

# Copy WASM files
cp extension-temp/pkg/sentience_core.js src/extension/pkg/ 2>/dev/null || echo "sentience_core.js not found"
cp extension-temp/pkg/sentience_core_bg.wasm src/extension/pkg/ 2>/dev/null || echo "sentience_core_bg.wasm not found"
cp extension-temp/pkg/*.d.ts src/extension/pkg/ 2>/dev/null || echo "Type definitions not found"
# Copy WASM files (check both root and pkg subdirectory)
if [ -f "extension-temp/pkg/sentience_core.js" ]; then
cp extension-temp/pkg/sentience_core.js src/extension/pkg/
elif [ -f "extension-temp/sentience_core.js" ]; then
cp extension-temp/sentience_core.js src/extension/pkg/
else
echo "⚠️ sentience_core.js not found"
fi

if [ -f "extension-temp/pkg/sentience_core_bg.wasm" ]; then
cp extension-temp/pkg/sentience_core_bg.wasm src/extension/pkg/
elif [ -f "extension-temp/sentience_core_bg.wasm" ]; then
cp extension-temp/sentience_core_bg.wasm src/extension/pkg/
else
echo "⚠️ sentience_core_bg.wasm not found"
fi

# Copy TypeScript definitions
if [ -d "extension-temp/pkg" ]; then
cp extension-temp/pkg/*.d.ts src/extension/pkg/ 2>/dev/null || echo "⚠️ Type definitions not found"
elif [ -d "extension-temp" ]; then
cp extension-temp/*.d.ts src/extension/pkg/ 2>/dev/null || echo "⚠️ Type definitions not found"
fi

# Verify copied files
echo "📋 Copied files:"
ls -la src/extension/
ls -la src/extension/pkg/ 2>/dev/null || echo "⚠️ pkg directory not created"

- name: Check for changes
if: steps.release.outputs.skip != 'true'
Expand All @@ -107,7 +156,9 @@ jobs:
if: steps.release.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.GITHUB_TOKEN }}
# Use GITHUB_TOKEN (built-in) if repository allows PR creation, otherwise use PR_TOKEN (PAT)
# To use PAT: create secret named PR_TOKEN with a Personal Access Token that has 'repo' scope
token: ${{ secrets.PR_TOKEN || secrets.GITHUB_TOKEN }}
commit-message: "chore: sync extension files from sentience-chrome ${{ steps.release.outputs.tag }}"
title: "Sync Extension: ${{ steps.release.outputs.tag }}"
body: |
Expand All @@ -117,7 +168,10 @@ jobs:
- Extension manifest and scripts
- WASM binary and bindings

**Source:** [sentience-chrome release ${{ steps.release.outputs.tag }}](${{ secrets.SENTIENCE_CHROME_REPO }}/releases/tag/${{ steps.release.outputs.tag }})
**Source:** [sentience-chrome release ${{ steps.release.outputs.tag }}](https://github.com/${{ secrets.SENTIENCE_CHROME_REPO }}/releases/tag/${{ steps.release.outputs.tag }})
branch: sync-extension-${{ steps.release.outputs.tag }}
delete-branch: true
labels: |
automated
extension-sync

52 changes: 52 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ npm run build
- `snapshot(browser, options)` - Capture page state
- TypeScript types for type safety

### Content Reading & Screenshots
- `read(browser, options)` - Read page content as text or markdown
- Enhanced markdown conversion using `turndown` (better than extension's lightweight conversion)
- Supports `enhance_markdown` option to use improved conversion
- `screenshot(browser, options)` - Capture standalone screenshot
- Returns base64-encoded data URL
- Supports PNG and JPEG formats with quality control

### Day 4: Query Engine
- `query(snapshot, selector)` - Find elements matching selector
- `find(snapshot, selector)` - Find single best match
Expand Down Expand Up @@ -105,6 +113,50 @@ See `examples/` directory:
- `query-demo.ts` - Query engine
- `wait-and-click.ts` - Wait and actions

### Content Reading Example

```typescript
import { SentienceBrowser, read } from './src';

const browser = new SentienceBrowser();
await browser.start();

await browser.getPage().goto('https://example.com');
await browser.getPage().waitForLoadState('networkidle');

// Read as enhanced markdown (better quality)
const result = await read(browser, {
format: 'markdown',
enhance_markdown: true
});
console.log(result.content); // High-quality markdown

await browser.close();
```

### Screenshot Example

```typescript
import { SentienceBrowser, screenshot } from './src';
import { writeFileSync } from 'fs';

const browser = new SentienceBrowser();
await browser.start();

await browser.getPage().goto('https://example.com');
await browser.getPage().waitForLoadState('networkidle');

// Capture PNG screenshot
const dataUrl = await screenshot(browser, { format: 'png' });

// Save to file
const base64Data = dataUrl.split(',')[1];
const imageData = Buffer.from(base64Data, 'base64');
writeFileSync('screenshot.png', imageData);

await browser.close();
```

## Testing

```bash
Expand Down
28 changes: 28 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
},
"dependencies": {
"playwright": "^1.40.0",
"turndown": "^7.2.2",
"zod": "^3.22.0"
},
"devDependencies": {
"@types/jest": "^29.5.14",
"@types/node": "^20.0.0",
"@types/turndown": "^5.0.3",
"jest": "^29.0.0",
"ts-jest": "^29.0.0",
"ts-node": "^10.9.0",
Expand Down
2 changes: 2 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,7 @@ export { expect, Expectation } from './expect';
export { Inspector, inspect } from './inspector';
export { Recorder, Trace, TraceStep, record } from './recorder';
export { ScriptGenerator, generate } from './generator';
export { read, ReadOptions, ReadResult } from './read';
export { screenshot, ScreenshotOptions } from './screenshot';
export * from './types';

80 changes: 80 additions & 0 deletions src/read.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/**
* Read page content - enhanced markdown conversion
*/

import { SentienceBrowser } from './browser';
import TurndownService from 'turndown';

export interface ReadOptions {
format?: 'text' | 'markdown';
enhance_markdown?: boolean;
}

export interface ReadResult {
status: 'success' | 'error';
url: string;
format: 'text' | 'markdown';
content: string;
length: number;
error?: string;
}

/**
* Read page content as text or markdown
*
* @param browser - SentienceBrowser instance
* @param options - Read options
* @returns ReadResult with page content
*/
export async function read(
browser: SentienceBrowser,
options: ReadOptions = {}
): Promise<ReadResult> {
const page = browser.getPage();
const format = options.format || 'text';
const enhanceMarkdown = options.enhance_markdown !== false; // Default to true

// Get basic content from extension
const result = (await page.evaluate(
(opts) => {
return (window as any).sentience.read(opts);
},
{ format }
)) as ReadResult;

// Enhance markdown if requested and format is markdown
if (format === 'markdown' && enhanceMarkdown && result.status === 'success') {
try {
// Get full HTML from page
const htmlContent = await page.evaluate(
() => document.documentElement.outerHTML
);

// Use turndown for better conversion
const turndownService = new TurndownService({
headingStyle: 'atx', // Use # for headings
bulletListMarker: '-', // Use - for lists
codeBlockStyle: 'fenced', // Use ``` for code blocks
});

// Add custom rules for better conversion
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'] as any,
replacement: (content: string) => `~~${content}~~`,
});

// Strip unwanted tags
turndownService.remove(['script', 'style', 'nav', 'footer', 'header', 'noscript']);

const enhancedMarkdown = turndownService.turndown(htmlContent);
result.content = enhancedMarkdown;
result.length = enhancedMarkdown.length;
} catch (e) {
// If enhancement fails, use extension's result
result.error = `Markdown enhancement failed: ${e}`;
}
}

return result;
}

Loading
Loading