BrightLocal · mprytyka · Apr 7, 2025
diff --git a/.env.example b/.env.example
@@ -1,8 +1,15 @@
 OPENAI_API_KEY=
 ANTHROPIC_API_KEY=
+AZURE_ENDPOINT=
+AZURE_OPENAI_API_KEY=
+GEMINI_API_KEY=
+DEEPSEEK_API_KEY=
 
 # Set to false to disable anonymized telemetry
 ANONYMIZED_TELEMETRY=true
 
 # LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
 BROWSER_USE_LOGGING_LEVEL=info
+
+# set this to true to optimize browser-use's chrome for running inside docker
+IN_DOCKER=false
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -81,4 +81,4 @@ body:
     attributes:
       label: Relevant Log Output
       description: Please copy and paste any relevant log output. This will be automatically formatted into code.
-      render: shell
+      render: shell
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -8,4 +8,4 @@ contact_links:
     about: Please ask questions in our Discord community
   - name: 📖 Documentation
     url: https://docs.browser-use.com
-    about: Check our documentation for answers first 
+    about: Check our documentation for answers first
diff --git a/.github/ISSUE_TEMPLATE/docs_issue.yml b/.github/ISSUE_TEMPLATE/docs_issue.yml
@@ -46,10 +46,10 @@ body:
       description: If you have specific suggestions for how to improve the documentation, please share them
       placeholder: |
         The documentation could be improved by...
-        
+
         Example:
         ```python
         # Your suggested code example or text here
         ```
     validations:
-      required: true
+      required: true
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -40,4 +40,4 @@ body:
       placeholder: |
         - Example use cases
         - Screenshots or mockups
-        - Related issues or discussions 
+        - Related issues or discussions
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,24 @@
+name: Lint
+on:
+  push:
+  pull_request:
+  workflow_dispatch:
+jobs:
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v5
+      - run: uv run ruff format
+      - run: uv run pre-commit run --all-files
+      # TODO: Fix the ignored pytests.
+      # openai.OpenAIError: The api_key client option must be set either by passing
+      # api_key to the client or by setting the OPENAI_API_KEY environment variable
+      - run: uv run --with=dotenv pytest
+                --ignore=tests/test_dropdown_error.py
+                --ignore=tests/test_gif_path.py
+                --ignore=tests/test_models.py
+                --ignore=tests/test_react_dropdown.py
+                --ignore=tests/test_save_conversation.py
+                --ignore=tests/test_vision.py
+                --ignore=tests/test_wait_for_element.py || true
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -6,33 +6,87 @@
 # separate terms of service, privacy policy, and support
 # documentation.
 
-name: Upload Python Package
+name: Python Package Workflow
 
 on:
+  push:
+    branches:
+      - main
   release:
     types: [published]
+  schedule:
+    - cron: "0 17 * * FRI"  # Every Friday at 5 PM UTC
 
 permissions:
-  contents: read
+  contents: write
 
 jobs:
-  deploy:
+  pre_commit_and_tests:
+    if: github.event_name == 'push' && github.ref_name == 'main'
     runs-on: ubuntu-latest
-
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: "3.x"
-      - name: Install dependencies
+      - uses: astral-sh/setup-uv@v5
+      - run: uv run ruff check --no-fix --select PLE  # check only for syntax errors
+      - run: uv build
+      - run: uv run --isolated --no-project --with pytest --with dist/*.whl tests/conftest.py
+      - run: uv run --isolated --no-project --with pytest --with dist/*.tar.gz tests/conftest.py
+      - run: uv run --with=dotenv pytest \
+                --ignore=tests/test_dropdown_error.py \
+                --ignore=tests/test_gif_path.py \
+                --ignore=tests/test_models.py \
+                --ignore=tests/test_react_dropdown.py \
+                --ignore=tests/test_save_conversation.py \
+                --ignore=tests/test_vision.py \
+                --ignore=tests/test_wait_for_element.py || true
+      - run: uv publish --trusted-publishing always
+
+  tag_pre_release:
+    if: github.event_name == 'schedule'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Create pre-release tag
         run: |
-          python -m pip install --upgrade pip
-          pip install build hatch
-      - name: Build package
-        run: python -m build
-      - name: Publish package
-        uses: pypa/gh-action-pypi-publish@release/v1
+          git fetch --tags
+          latest_tag=$(git tag --list --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+rc[0-9]+$' | head -n 1)
+          if [ -z "$latest_tag" ]; then
+            new_tag="v0.1.0rc1"
+          else
+            new_tag=$(echo $latest_tag | awk -F'rc' '{print $1 "rc" $2+1}')
+          fi
+          git tag $new_tag
+          git push origin $new_tag
+
+  deploy:
+    if: github.event_name == 'release'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
         with:
-          user: __token__
-          password: ${{ secrets.PYPI_API_TOKEN }}
+          python-version: "3.x"
+      - uses: astral-sh/setup-uv@v5
+      - run: uv run ruff check --no-fix --select PLE  # check only for syntax errors
+      - run: uv build
+      - run: uv run --isolated --no-project --with pytest --with dist/*.whl tests/conftest.py
+      - run: uv run --isolated --no-project --with pytest --with dist/*.tar.gz tests/conftest.py
+      - run: uv run --with=dotenv pytest \
+                --ignore=tests/test_dropdown_error.py \
+                --ignore=tests/test_gif_path.py \
+                --ignore=tests/test_models.py \
+                --ignore=tests/test_react_dropdown.py \
+                --ignore=tests/test_save_conversation.py \
+                --ignore=tests/test_vision.py \
+                --ignore=tests/test_wait_for_element.py || true
+      - run: uv publish --trusted-publishing always
+      - name: Push to stable branch (if stable release)
+        if: startsWith(github.ref_name, 'v') && !contains(github.ref_name, 'rc')
+        run: |
+          git checkout -b stable
+          git push origin stable
diff --git a/.gitignore b/.gitignore
@@ -187,4 +187,4 @@ gcp-login.json
 *.json
 *.jsonl
 
-uv.lock
+uv.lock
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,18 +1,32 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.3.0
+    rev: v0.11.2
     hooks:
       - id: ruff
-        args: [
-            --line-length=130,
-            --select=E,F,I,
-            --fix,
-        ]
+      - id: ruff-format
+      # see pyproject.toml for more details on ruff config
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v5.0.0
     hooks:
-      - id: trailing-whitespace
-      - id: end-of-file-fixer
-      - id: check-yaml
       - id: check-toml
+      - id: check-yaml
+      - id: check-json
+      - id: end-of-file-fixer
+      - id: check-merge-conflict
+      - id: check-illegal-windows-names
+      - id: check-case-conflict
+      - id: check-added-large-files
+      - id: check-shebang-scripts-are-executable
+      - id: check-symlinks
+      - id: destroyed-symlinks
+      - id: detect-private-key
+      - id: mixed-line-ending
+      - id: fix-byte-order-marker
+
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.4.1
+    hooks:
+      - id: codespell # See pyproject.toml for args
+        additional_dependencies:
+          - tomli
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -39,7 +39,7 @@
                 "-v",
                 "-k",
                 "test_captcha_solver",
-                "--capture=no",
+                "--capture=no"
             ],
             "console": "integratedTerminal",
             "justMyCode": false
@@ -54,7 +54,7 @@
                 "-v",
                 "-k",
                 "test_ecommerce_interaction",
-                "--capture=no",
+                "--capture=no"
             ],
             "console": "integratedTerminal",
             "justMyCode": false
@@ -85,4 +85,4 @@
             "justMyCode": false
         }
     ]
-}
+}
diff --git a/README.md b/README.md
@@ -28,10 +28,9 @@ With pip (Python>=3.11):
 pip install browser-use
 ```
 
-install playwright:
-
+Install Playwright:
 ```bash
-playwright install
+playwright install chromium
 ```
 
 Spin up your agent:
@@ -57,6 +56,11 @@ Add your API keys for the provider you want to use to your `.env` file.
 
 ```bash
 OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
+AZURE_ENDPOINT=
+AZURE_OPENAI_API_KEY=
+GEMINI_API_KEY=
+DEEPSEEK_API_KEY=
 ```
 
 For other settings, models, and more, check out the [documentation 📕](https://docs.browser-use.com).
@@ -133,7 +137,7 @@ Tell your computer what to do, and it gets it done.
 ### Rerunning tasks
 
 - [ ] LLM as fallback
-- [ ] Make it easy to define workfows templates where LLM fills in the details
+- [ ] Make it easy to define workflow templates where LLM fills in the details
 - [ ] Return playwright script from the agent
 
 ### Datasets
@@ -156,6 +160,11 @@ We love contributions! Feel free to open issues for bugs or feature requests. To
 
 To learn more about the library, check out the [local setup 📕](https://docs.browser-use.com/development/local-setup).
 
+
+`main` is the primary development branch with frequent changes. For production use, install a stable [versioned release](https://github.com/browser-use/browser-use/releases) instead.
+
+---
+
 ## Cooperations
 
 We are forming a commission to define best practices for UI/UX design for browser agents.
@@ -181,7 +190,7 @@ If you use Browser Use in your research or project, please cite:
 }
 ```
 
- <div align="center"> <img src="https://github.com/user-attachments/assets/402b2129-b6ac-44d3-a217-01aea3277dce" width="400"/> 
+ <div align="center"> <img src="https://github.com/user-attachments/assets/06fa3078-8461-4560-b434-445510c1766f" width="400"/> 
 
 [![Twitter Follow](https://img.shields.io/twitter/follow/Gregor?style=social)](https://x.com/gregpr07)
 [![Twitter Follow](https://img.shields.io/twitter/follow/Magnus?style=social)](https://x.com/mamagnus00)

diff --git a/SECURITY.md b/SECURITY.md
@@ -17,4 +17,3 @@ Please include as much of the information listed below as you can to help me bet
 * Impact of the issue, including how an attacker might exploit the issue
 
 This information will help me triage your report more quickly.
-
diff --git a/browser_use/agent/gif.py b/browser_use/agent/gif.py
@@ -155,10 +155,28 @@ def _create_task_frame(
 	# Calculate vertical center of image
 	center_y = image.height // 2
 
-	# Draw task text with increased font size
+	# Draw task text with dynamic font size based on task length
 	margin = 140  # Increased margin
 	max_width = image.width - (2 * margin)
-	larger_font = ImageFont.truetype(regular_font.path, regular_font.size + 16)  # Increase font size more
+
+	# Dynamic font size calculation based on task length
+	# Start with base font size (regular + 16)
+	base_font_size = regular_font.size + 16
+	min_font_size = max(regular_font.size - 10, 16)  # Don't go below 16pt
+	max_font_size = base_font_size  # Cap at the base font size
+
+	# Calculate dynamic font size based on text length and complexity
+	# Longer texts get progressively smaller fonts
+	text_length = len(task)
+	if text_length > 200:
+		# For very long text, reduce font size logarithmically
+		font_size = max(base_font_size - int(10 * (text_length / 200)), min_font_size)
+	else:
+		font_size = base_font_size
+
+	larger_font = ImageFont.truetype(regular_font.path, font_size)
+
+	# Generate wrapped text with the calculated font size
 	wrapped_text = _wrap_text(task, larger_font, max_width)
 
 	# Calculate line height with spacing

diff --git a/browser_use/agent/memory/__init__.py b/browser_use/agent/memory/__init__.py
@@ -0,0 +1,3 @@
+from browser_use.agent.memory.service import Memory, MemorySettings
+
+__all__ = ['Memory', 'MemorySettings']
-Original file line number
+Diff line change
@@ Expand Up / @@ -187,4 +187,4 @@ gcp-login.json @@
     *.json
     *.jsonl
-    uv.lock
+    uv.lock
Original file line number	Diff line number	Diff line change
Expand Up		@@ -17,4 +17,3 @@ Please include as much of the information listed below as you can to help me bet
		* Impact of the issue, including how an attacker might exploit the issue

		This information will help me triage your report more quickly.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from browser_use.agent.memory.service import Memory, MemorySettings

		__all__ = ['Memory', 'MemorySettings']